1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
  29  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/param.h>
  34 #include <sys/thread.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/signal.h>
  37 #include <sys/cred.h>
  38 #include <sys/priv.h>
  39 #include <sys/user.h>
  40 #include <sys/file.h>
  41 #include <sys/errno.h>
  42 #include <sys/vnode.h>
  43 #include <sys/mode.h>
  44 #include <sys/vfs.h>
  45 #include <sys/mman.h>
  46 #include <sys/kmem.h>
  47 #include <sys/proc.h>
  48 #include <sys/pathname.h>
  49 #include <sys/cmn_err.h>
  50 #include <sys/systm.h>
  51 #include <sys/elf.h>
  52 #include <sys/vmsystm.h>
  53 #include <sys/debug.h>
  54 #include <sys/procfs.h>
  55 #include <sys/regset.h>
  56 #include <sys/auxv.h>
  57 #include <sys/exec.h>
  58 #include <sys/prsystm.h>
  59 #include <sys/utsname.h>
  60 #include <sys/zone.h>
  61 #include <vm/as.h>
  62 #include <vm/rm.h>
  63 #include <sys/modctl.h>
  64 #include <sys/systeminfo.h>
  65 #include <sys/machelf.h>
  66 #include <sys/sunddi.h>
  67 #include "elf_impl.h"
  68 #if defined(__i386) || defined(__i386_COMPAT)
  69 #include <sys/sysi86.h>
  70 #endif
  71 
  72 void
  73 setup_note_header(Phdr *v, proc_t *p)
  74 {
  75         int nlwp = p->p_lwpcnt;
  76         int nzomb = p->p_zombcnt;
  77         int nfd;
  78         size_t size;
  79         prcred_t *pcrp;
  80         uf_info_t *fip;
  81         uf_entry_t *ufp;
  82         int fd;
  83 
  84         fip = P_FINFO(p);
  85         nfd = 0;
  86         mutex_enter(&fip->fi_lock);
  87         for (fd = 0; fd < fip->fi_nfiles; fd++) {
  88                 UF_ENTER(ufp, fip, fd);
  89                 if ((ufp->uf_file != NULL) && (ufp->uf_file->f_count > 0))
  90                         nfd++;
  91                 UF_EXIT(ufp);
  92         }
  93         mutex_exit(&fip->fi_lock);
  94 
  95         v[0].p_type = PT_NOTE;
  96         v[0].p_flags = PF_R;
  97         v[0].p_filesz = (sizeof (Note) * (9 + 2 * nlwp + nzomb + nfd))
  98             + roundup(sizeof (psinfo_t), sizeof (Word))
  99             + roundup(sizeof (pstatus_t), sizeof (Word))
 100             + roundup(prgetprivsize(), sizeof (Word))
 101             + roundup(priv_get_implinfo_size(), sizeof (Word))
 102             + roundup(strlen(platform) + 1, sizeof (Word))
 103             + roundup(strlen(p->p_zone->zone_name) + 1, sizeof (Word))
 104             + roundup(__KERN_NAUXV_IMPL * sizeof (aux_entry_t), sizeof (Word))
 105             + roundup(sizeof (utsname), sizeof (Word))
 106             + roundup(sizeof (core_content_t), sizeof (Word))
 107             + (nlwp + nzomb) * roundup(sizeof (lwpsinfo_t), sizeof (Word))
 108             + nlwp * roundup(sizeof (lwpstatus_t), sizeof (Word))
 109             + nfd * roundup(sizeof (prfdinfo_t), sizeof (Word));
 110 
 111         if (curproc->p_agenttp != NULL) {
 112                 v[0].p_filesz += sizeof (Note) +
 113                     roundup(sizeof (psinfo_t), sizeof (Word));
 114         }
 115 
 116         size = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
 117         pcrp = kmem_alloc(size, KM_SLEEP);
 118         prgetcred(p, pcrp);
 119         if (pcrp->pr_ngroups != 0) {
 120                 v[0].p_filesz += sizeof (Note) + roundup(sizeof (prcred_t) +
 121                     sizeof (gid_t) * (pcrp->pr_ngroups - 1), sizeof (Word));
 122         } else {
 123                 v[0].p_filesz += sizeof (Note) +
 124                     roundup(sizeof (prcred_t), sizeof (Word));
 125         }
 126         kmem_free(pcrp, size);
 127 
 128 
 129 #if defined(__i386) || defined(__i386_COMPAT)
 130         mutex_enter(&p->p_ldtlock);
 131         size = prnldt(p) * sizeof (struct ssd);
 132         mutex_exit(&p->p_ldtlock);
 133         if (size != 0)
 134                 v[0].p_filesz += sizeof (Note) + roundup(size, sizeof (Word));
 135 #endif  /* __i386 || __i386_COMPAT */
 136 
 137         if ((size = prhasx(p)? prgetprxregsize(p) : 0) != 0)
 138                 v[0].p_filesz += nlwp * sizeof (Note)
 139                     + nlwp * roundup(size, sizeof (Word));
 140 
 141 #if defined(__sparc)
 142         /*
 143          * Figure out the number and sizes of register windows.
 144          */
 145         {
 146                 kthread_t *t = p->p_tlist;
 147                 do {
 148                         if ((size = prnwindows(ttolwp(t))) != 0) {
 149                                 size = sizeof (gwindows_t) -
 150                                     (SPARC_MAXREGWINDOW - size) *
 151                                     sizeof (struct rwindow);
 152                                 v[0].p_filesz += sizeof (Note) +
 153                                     roundup(size, sizeof (Word));
 154                         }
 155                 } while ((t = t->t_forw) != p->p_tlist);
 156         }
 157         /*
 158          * Space for the Ancillary State Registers.
 159          */
 160         if (p->p_model == DATAMODEL_LP64)
 161                 v[0].p_filesz += nlwp * sizeof (Note)
 162                     + nlwp * roundup(sizeof (asrset_t), sizeof (Word));
 163 #endif /* __sparc */
 164 }
 165 
 166 int
 167 write_elfnotes(proc_t *p, int sig, vnode_t *vp, offset_t offset,
 168     rlim64_t rlimit, cred_t *credp, core_content_t content)
 169 {
 170         union {
 171                 psinfo_t        psinfo;
 172                 pstatus_t       pstatus;
 173                 lwpsinfo_t      lwpsinfo;
 174                 lwpstatus_t     lwpstatus;
 175 #if defined(__sparc)
 176                 gwindows_t      gwindows;
 177                 asrset_t        asrset;
 178 #endif /* __sparc */
 179                 char            xregs[1];
 180                 aux_entry_t     auxv[__KERN_NAUXV_IMPL];
 181                 prcred_t        pcred;
 182                 prpriv_t        ppriv;
 183                 priv_impl_info_t prinfo;
 184                 struct utsname  uts;
 185         } *bigwad;
 186 
 187         size_t xregsize = prhasx(p)? prgetprxregsize(p) : 0;
 188         size_t crsize = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
 189         size_t psize = prgetprivsize();
 190         size_t bigsize = MAX(psize, MAX(sizeof (*bigwad),
 191             MAX(xregsize, crsize)));
 192 
 193         priv_impl_info_t *prii;
 194 
 195         lwpdir_t *ldp;
 196         lwpent_t *lep;
 197         kthread_t *t;
 198         klwp_t *lwp;
 199         user_t *up;
 200         int i;
 201         int nlwp;
 202         int nzomb;
 203         int error;
 204         uchar_t oldsig;
 205         uf_info_t *fip;
 206         int fd;
 207         vnode_t *vroot;
 208 
 209 #if defined(__i386) || defined(__i386_COMPAT)
 210         struct ssd *ssd;
 211         size_t ssdsize;
 212 #endif  /* __i386 || __i386_COMPAT */
 213 
 214         bigsize = MAX(bigsize, priv_get_implinfo_size());
 215 
 216         bigwad = kmem_alloc(bigsize, KM_SLEEP);
 217 
 218         /*
 219          * The order of the elfnote entries should be same here
 220          * and in the gcore(1) command.  Synchronization is
 221          * needed between the kernel and gcore(1).
 222          */
 223 
 224         /*
 225          * Get the psinfo, and set the wait status to indicate that a core was
 226          * dumped.  We have to forge this since p->p_wcode is not set yet.
 227          */
 228         mutex_enter(&p->p_lock);
 229         prgetpsinfo(p, &bigwad->psinfo);
 230         mutex_exit(&p->p_lock);
 231         bigwad->psinfo.pr_wstat = wstat(CLD_DUMPED, sig);
 232 
 233         error = elfnote(vp, &offset, NT_PSINFO, sizeof (bigwad->psinfo),
 234             (caddr_t)&bigwad->psinfo, rlimit, credp);
 235         if (error)
 236                 goto done;
 237 
 238         /*
 239          * Modify t_whystop and lwp_cursig so it appears that the current LWP
 240          * is stopped after faulting on the signal that caused the core dump.
 241          * As a result, prgetstatus() will record that signal, the saved
 242          * lwp_siginfo, and its signal handler in the core file status.  We
 243          * restore lwp_cursig in case a subsequent signal was received while
 244          * dumping core.
 245          */
 246         mutex_enter(&p->p_lock);
 247         lwp = ttolwp(curthread);
 248 
 249         oldsig = lwp->lwp_cursig;
 250         lwp->lwp_cursig = (uchar_t)sig;
 251         curthread->t_whystop = PR_FAULTED;
 252 
 253         prgetstatus(p, &bigwad->pstatus, p->p_zone);
 254         bigwad->pstatus.pr_lwp.pr_why = 0;
 255 
 256         curthread->t_whystop = 0;
 257         lwp->lwp_cursig = oldsig;
 258         mutex_exit(&p->p_lock);
 259 
 260         error = elfnote(vp, &offset, NT_PSTATUS, sizeof (bigwad->pstatus),
 261             (caddr_t)&bigwad->pstatus, rlimit, credp);
 262         if (error)
 263                 goto done;
 264 
 265         error = elfnote(vp, &offset, NT_PLATFORM, strlen(platform) + 1,
 266             platform, rlimit, credp);
 267         if (error)
 268                 goto done;
 269 
 270         up = PTOU(p);
 271         for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
 272                 bigwad->auxv[i].a_type = up->u_auxv[i].a_type;
 273                 bigwad->auxv[i].a_un.a_val = up->u_auxv[i].a_un.a_val;
 274         }
 275         error = elfnote(vp, &offset, NT_AUXV, sizeof (bigwad->auxv),
 276             (caddr_t)bigwad->auxv, rlimit, credp);
 277         if (error)
 278                 goto done;
 279 
 280         bcopy(&utsname, &bigwad->uts, sizeof (struct utsname));
 281         if (!INGLOBALZONE(p)) {
 282                 bcopy(p->p_zone->zone_nodename, &bigwad->uts.nodename,
 283                     _SYS_NMLN);
 284         }
 285         error = elfnote(vp, &offset, NT_UTSNAME, sizeof (struct utsname),
 286             (caddr_t)&bigwad->uts, rlimit, credp);
 287         if (error)
 288                 goto done;
 289 
 290         prgetcred(p, &bigwad->pcred);
 291 
 292         if (bigwad->pcred.pr_ngroups != 0) {
 293                 crsize = sizeof (prcred_t) +
 294                     sizeof (gid_t) * (bigwad->pcred.pr_ngroups - 1);
 295         } else
 296                 crsize = sizeof (prcred_t);
 297 
 298         error = elfnote(vp, &offset, NT_PRCRED, crsize,
 299             (caddr_t)&bigwad->pcred, rlimit, credp);
 300         if (error)
 301                 goto done;
 302 
 303         error = elfnote(vp, &offset, NT_CONTENT, sizeof (core_content_t),
 304             (caddr_t)&content, rlimit, credp);
 305         if (error)
 306                 goto done;
 307 
 308         prgetpriv(p, &bigwad->ppriv);
 309 
 310         error = elfnote(vp, &offset, NT_PRPRIV, psize,
 311             (caddr_t)&bigwad->ppriv, rlimit, credp);
 312         if (error)
 313                 goto done;
 314 
 315         prii = priv_hold_implinfo();
 316         error = elfnote(vp, &offset, NT_PRPRIVINFO, priv_get_implinfo_size(),
 317             (caddr_t)prii, rlimit, credp);
 318         priv_release_implinfo();
 319         if (error)
 320                 goto done;
 321 
 322         /* zone can't go away as long as process exists */
 323         error = elfnote(vp, &offset, NT_ZONENAME,
 324             strlen(p->p_zone->zone_name) + 1, p->p_zone->zone_name,
 325             rlimit, credp);
 326         if (error)
 327                 goto done;
 328 
 329 
 330         /* open file table */
 331         vroot = PTOU(p)->u_rdir;
 332         if (vroot == NULL)
 333                 vroot = rootdir;
 334 
 335         VN_HOLD(vroot);
 336 
 337         fip = P_FINFO(p);
 338 
 339         for (fd = 0; fd < fip->fi_nfiles; fd++) {
 340                 uf_entry_t *ufp;
 341                 vnode_t *fvp;
 342                 struct file *fp;
 343                 vattr_t vattr;
 344                 prfdinfo_t fdinfo;
 345 
 346                 bzero(&fdinfo, sizeof (fdinfo));
 347 
 348                 mutex_enter(&fip->fi_lock);
 349                 UF_ENTER(ufp, fip, fd);
 350                 if (((fp = ufp->uf_file) == NULL) || (fp->f_count < 1)) {
 351                         UF_EXIT(ufp);
 352                         mutex_exit(&fip->fi_lock);
 353                         continue;
 354                 }
 355 
 356                 fdinfo.pr_fd = fd;
 357                 fdinfo.pr_fdflags = ufp->uf_flag;
 358                 fdinfo.pr_fileflags = fp->f_flag2;
 359                 fdinfo.pr_fileflags <<= 16;
 360                 fdinfo.pr_fileflags |= fp->f_flag;
 361                 if ((fdinfo.pr_fileflags & (FSEARCH | FEXEC)) == 0)
 362                         fdinfo.pr_fileflags += FOPEN;
 363                 fdinfo.pr_offset = fp->f_offset;
 364 
 365 
 366                 fvp = fp->f_vnode;
 367                 VN_HOLD(fvp);
 368                 UF_EXIT(ufp);
 369                 mutex_exit(&fip->fi_lock);
 370 
 371                 /*
 372                  * There are some vnodes that have no corresponding
 373                  * path.  Its reasonable for this to fail, in which
 374                  * case the path will remain an empty string.
 375                  */
 376                 (void) vnodetopath(vroot, fvp, fdinfo.pr_path,
 377                     sizeof (fdinfo.pr_path), credp);
 378 
 379                 if (VOP_GETATTR(fvp, &vattr, 0, credp, NULL) != 0) {
 380                         /*
 381                          * Try to write at least a subset of information
 382                          */
 383                         fdinfo.pr_major = 0;
 384                         fdinfo.pr_minor = 0;
 385                         fdinfo.pr_ino = 0;
 386                         fdinfo.pr_mode = 0;
 387                         fdinfo.pr_uid = (uid_t)-1;
 388                         fdinfo.pr_gid = (gid_t)-1;
 389                         fdinfo.pr_rmajor = 0;
 390                         fdinfo.pr_rminor = 0;
 391                         fdinfo.pr_size = -1;
 392 
 393                         error = elfnote(vp, &offset, NT_FDINFO,
 394                             sizeof (fdinfo), &fdinfo, rlimit, credp);
 395                         VN_RELE(fvp);
 396                         if (error) {
 397                                 VN_RELE(vroot);
 398                                 goto done;
 399                         }
 400                         continue;
 401                 }
 402 
 403                 if (fvp->v_type == VSOCK)
 404                         fdinfo.pr_fileflags |= sock_getfasync(fvp);
 405 
 406                 VN_RELE(fvp);
 407 
 408                 /*
 409                  * This logic mirrors fstat(), which we cannot use
 410                  * directly, as it calls copyout().
 411                  */
 412                 fdinfo.pr_major = getmajor(vattr.va_fsid);
 413                 fdinfo.pr_minor = getminor(vattr.va_fsid);
 414                 fdinfo.pr_ino = (ino64_t)vattr.va_nodeid;
 415                 fdinfo.pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
 416                 fdinfo.pr_uid = vattr.va_uid;
 417                 fdinfo.pr_gid = vattr.va_gid;
 418                 fdinfo.pr_rmajor = getmajor(vattr.va_rdev);
 419                 fdinfo.pr_rminor = getminor(vattr.va_rdev);
 420                 fdinfo.pr_size = (off64_t)vattr.va_size;
 421 
 422                 error = elfnote(vp, &offset, NT_FDINFO,
 423                     sizeof (fdinfo), &fdinfo, rlimit, credp);
 424                 if (error) {
 425                         VN_RELE(vroot);
 426                         goto done;
 427                 }
 428         }
 429 
 430         VN_RELE(vroot);
 431 
 432 #if defined(__i386) || defined(__i386_COMPAT)
 433         mutex_enter(&p->p_ldtlock);
 434         ssdsize = prnldt(p) * sizeof (struct ssd);
 435         if (ssdsize != 0) {
 436                 ssd = kmem_alloc(ssdsize, KM_SLEEP);
 437                 prgetldt(p, ssd);
 438                 error = elfnote(vp, &offset, NT_LDT, ssdsize,
 439                     (caddr_t)ssd, rlimit, credp);
 440                 kmem_free(ssd, ssdsize);
 441         }
 442         mutex_exit(&p->p_ldtlock);
 443         if (error)
 444                 goto done;
 445 #endif  /* __i386 || defined(__i386_COMPAT) */
 446 
 447         nlwp = p->p_lwpcnt;
 448         nzomb = p->p_zombcnt;
 449         /* for each entry in the lwp directory ... */
 450         for (ldp = p->p_lwpdir; nlwp + nzomb != 0; ldp++) {
 451 
 452                 if ((lep = ldp->ld_entry) == NULL)   /* empty slot */
 453                         continue;
 454 
 455                 if ((t = lep->le_thread) != NULL) {  /* active lwp */
 456                         ASSERT(nlwp != 0);
 457                         nlwp--;
 458                         lwp = ttolwp(t);
 459                         mutex_enter(&p->p_lock);
 460                         prgetlwpsinfo(t, &bigwad->lwpsinfo);
 461                         mutex_exit(&p->p_lock);
 462                 } else {                                /* zombie lwp */
 463                         ASSERT(nzomb != 0);
 464                         nzomb--;
 465                         bzero(&bigwad->lwpsinfo, sizeof (bigwad->lwpsinfo));
 466                         bigwad->lwpsinfo.pr_lwpid = lep->le_lwpid;
 467                         bigwad->lwpsinfo.pr_state = SZOMB;
 468                         bigwad->lwpsinfo.pr_sname = 'Z';
 469                         bigwad->lwpsinfo.pr_start.tv_sec = lep->le_start;
 470                 }
 471                 error = elfnote(vp, &offset, NT_LWPSINFO,
 472                     sizeof (bigwad->lwpsinfo), (caddr_t)&bigwad->lwpsinfo,
 473                     rlimit, credp);
 474                 if (error)
 475                         goto done;
 476                 if (t == NULL)          /* nothing more to do for a zombie */
 477                         continue;
 478 
 479                 mutex_enter(&p->p_lock);
 480                 if (t == curthread) {
 481                         /*
 482                          * Modify t_whystop and lwp_cursig so it appears that
 483                          * the current LWP is stopped after faulting on the
 484                          * signal that caused the core dump.  As a result,
 485                          * prgetlwpstatus() will record that signal, the saved
 486                          * lwp_siginfo, and its signal handler in the core file
 487                          * status.  We restore lwp_cursig in case a subsequent
 488                          * signal was received while dumping core.
 489                          */
 490                         oldsig = lwp->lwp_cursig;
 491                         lwp->lwp_cursig = (uchar_t)sig;
 492                         t->t_whystop = PR_FAULTED;
 493 
 494                         prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone);
 495                         bigwad->lwpstatus.pr_why = 0;
 496 
 497                         t->t_whystop = 0;
 498                         lwp->lwp_cursig = oldsig;
 499                 } else {
 500                         prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone);
 501                 }
 502                 mutex_exit(&p->p_lock);
 503                 error = elfnote(vp, &offset, NT_LWPSTATUS,
 504                     sizeof (bigwad->lwpstatus), (caddr_t)&bigwad->lwpstatus,
 505                     rlimit, credp);
 506                 if (error)
 507                         goto done;
 508 
 509 #if defined(__sparc)
 510                 /*
 511                  * Unspilled SPARC register windows.
 512                  */
 513                 {
 514                         size_t size = prnwindows(lwp);
 515 
 516                         if (size != 0) {
 517                                 size = sizeof (gwindows_t) -
 518                                     (SPARC_MAXREGWINDOW - size) *
 519                                     sizeof (struct rwindow);
 520                                 prgetwindows(lwp, &bigwad->gwindows);
 521                                 error = elfnote(vp, &offset, NT_GWINDOWS,
 522                                     size, (caddr_t)&bigwad->gwindows,
 523                                     rlimit, credp);
 524                                 if (error)
 525                                         goto done;
 526                         }
 527                 }
 528                 /*
 529                  * Ancillary State Registers.
 530                  */
 531                 if (p->p_model == DATAMODEL_LP64) {
 532                         prgetasregs(lwp, bigwad->asrset);
 533                         error = elfnote(vp, &offset, NT_ASRS,
 534                             sizeof (asrset_t), (caddr_t)bigwad->asrset,
 535                             rlimit, credp);
 536                         if (error)
 537                                 goto done;
 538                 }
 539 #endif /* __sparc */
 540 
 541                 if (xregsize) {
 542                         prgetprxregs(lwp, bigwad->xregs);
 543                         error = elfnote(vp, &offset, NT_PRXREG,
 544                             xregsize, bigwad->xregs, rlimit, credp);
 545                         if (error)
 546                                 goto done;
 547                 }
 548 
 549                 if (t->t_lwp->lwp_spymaster != NULL) {
 550                         void *psaddr = t->t_lwp->lwp_spymaster;
 551 #ifdef _ELF32_COMPAT
 552                         /*
 553                          * On a 64-bit kernel with 32-bit ELF compatibility,
 554                          * this file is compiled into two different objects:
 555                          * one is compiled normally, and the other is compiled
 556                          * with _ELF32_COMPAT set -- and therefore with a
 557                          * psinfo_t defined to be a psinfo32_t.  However, the
 558                          * psinfo_t denoting our spymaster is always of the
 559                          * native type; if we are in the _ELF32_COMPAT case,
 560                          * we need to explicitly convert it.
 561                          */
 562                         if (p->p_model == DATAMODEL_ILP32) {
 563                                 psinfo_kto32(psaddr, &bigwad->psinfo);
 564                                 psaddr = &bigwad->psinfo;
 565                         }
 566 #endif
 567 
 568                         error = elfnote(vp, &offset, NT_SPYMASTER,
 569                             sizeof (psinfo_t), psaddr, rlimit, credp);
 570                         if (error)
 571                                 goto done;
 572                 }
 573         }
 574         ASSERT(nlwp == 0);
 575 
 576 done:
 577         kmem_free(bigwad, bigsize);
 578         return (error);
 579 }