1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
  29  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/param.h>
  34 #include <sys/thread.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/signal.h>
  37 #include <sys/cred.h>
  38 #include <sys/priv.h>
  39 #include <sys/user.h>
  40 #include <sys/file.h>
  41 #include <sys/errno.h>
  42 #include <sys/vnode.h>
  43 #include <sys/mode.h>
  44 #include <sys/vfs.h>
  45 #include <sys/mman.h>
  46 #include <sys/kmem.h>
  47 #include <sys/proc.h>
  48 #include <sys/pathname.h>
  49 #include <sys/cmn_err.h>
  50 #include <sys/systm.h>
  51 #include <sys/elf.h>
  52 #include <sys/vmsystm.h>
  53 #include <sys/debug.h>
  54 #include <sys/procfs.h>
  55 #include <sys/regset.h>
  56 #include <sys/auxv.h>
  57 #include <sys/exec.h>
  58 #include <sys/prsystm.h>
  59 #include <sys/utsname.h>
  60 #include <sys/zone.h>
  61 #include <vm/as.h>
  62 #include <vm/rm.h>
  63 #include <sys/modctl.h>
  64 #include <sys/systeminfo.h>
  65 #include <sys/machelf.h>
  66 #include <sys/sunddi.h>
  67 #include "elf_impl.h"
  68 #if defined(__i386) || defined(__i386_COMPAT)
  69 #include <sys/sysi86.h>
  70 #endif
  71 
  72 void
  73 setup_note_header(Phdr *v, proc_t *p)
  74 {
  75         int nlwp = p->p_lwpcnt;
  76         int nzomb = p->p_zombcnt;
  77         int nfd;
  78         size_t size;
  79         prcred_t *pcrp;
  80         uf_info_t *fip;
  81         uf_entry_t *ufp;
  82         int fd;
  83 
  84         fip = P_FINFO(p);
  85         nfd = 0;
  86         mutex_enter(&fip->fi_lock);
  87         for (fd = 0; fd < fip->fi_nfiles; fd++) {
  88                 UF_ENTER(ufp, fip, fd);
  89                 if ((ufp->uf_file != NULL) && (ufp->uf_file->f_count > 0))
  90                         nfd++;
  91                 UF_EXIT(ufp);
  92         }
  93         mutex_exit(&fip->fi_lock);
  94 
  95         v[0].p_type = PT_NOTE;
  96         v[0].p_flags = PF_R;
  97         v[0].p_filesz = (sizeof (Note) * (9 + 2 * nlwp + nzomb + nfd))
  98             + roundup(sizeof (psinfo_t), sizeof (Word))
  99             + roundup(sizeof (pstatus_t), sizeof (Word))
 100             + roundup(prgetprivsize(), sizeof (Word))
 101             + roundup(priv_get_implinfo_size(), sizeof (Word))
 102             + roundup(strlen(platform) + 1, sizeof (Word))
 103             + roundup(strlen(p->p_zone->zone_name) + 1, sizeof (Word))
 104             + roundup(__KERN_NAUXV_IMPL * sizeof (aux_entry_t), sizeof (Word))
 105             + roundup(sizeof (utsname), sizeof (Word))
 106             + roundup(sizeof (core_content_t), sizeof (Word))
 107             + (nlwp + nzomb) * roundup(sizeof (lwpsinfo_t), sizeof (Word))
 108             + nlwp * roundup(sizeof (lwpstatus_t), sizeof (Word))
 109             + nfd * roundup(sizeof (prfdinfo_t), sizeof (Word));
 110 
 111         if (curproc->p_agenttp != NULL) {
 112                 v[0].p_filesz += sizeof (Note) +
 113                     roundup(sizeof (psinfo_t), sizeof (Word));
 114         }
 115 
 116         size = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
 117         pcrp = kmem_alloc(size, KM_SLEEP);
 118         prgetcred(p, pcrp);
 119         if (pcrp->pr_ngroups != 0) {
 120                 v[0].p_filesz += sizeof (Note) + roundup(sizeof (prcred_t) +
 121                     sizeof (gid_t) * (pcrp->pr_ngroups - 1), sizeof (Word));
 122         } else {
 123                 v[0].p_filesz += sizeof (Note) +
 124                     roundup(sizeof (prcred_t), sizeof (Word));
 125         }
 126         kmem_free(pcrp, size);
 127 
 128 
 129 #if defined(__i386) || defined(__i386_COMPAT)
 130         mutex_enter(&p->p_ldtlock);
 131         size = prnldt(p) * sizeof (struct ssd);
 132         mutex_exit(&p->p_ldtlock);
 133         if (size != 0)
 134                 v[0].p_filesz += sizeof (Note) + roundup(size, sizeof (Word));
 135 #endif  /* __i386 || __i386_COMPAT */
 136 
 137         if ((size = prhasx(p)? prgetprxregsize(p) : 0) != 0)
 138                 v[0].p_filesz += nlwp * sizeof (Note)
 139                     + nlwp * roundup(size, sizeof (Word));
 140 
 141 #if defined(__sparc)
 142         /*
 143          * Figure out the number and sizes of register windows.
 144          */
 145         {
 146                 kthread_t *t = p->p_tlist;
 147                 do {
 148                         if ((size = prnwindows(ttolwp(t))) != 0) {
 149                                 size = sizeof (gwindows_t) -
 150                                     (SPARC_MAXREGWINDOW - size) *
 151                                     sizeof (struct rwindow);
 152                                 v[0].p_filesz += sizeof (Note) +
 153                                     roundup(size, sizeof (Word));
 154                         }
 155                 } while ((t = t->t_forw) != p->p_tlist);
 156         }
 157         /*
 158          * Space for the Ancillary State Registers.
 159          */
 160         if (p->p_model == DATAMODEL_LP64)
 161                 v[0].p_filesz += nlwp * sizeof (Note)
 162                     + nlwp * roundup(sizeof (asrset_t), sizeof (Word));
 163 #endif /* __sparc */
 164 }
 165 
 166 int
 167 write_elfnotes(proc_t *p, int sig, vnode_t *vp, offset_t offset,
 168     rlim64_t rlimit, cred_t *credp, core_content_t content)
 169 {
 170         union {
 171                 psinfo_t        psinfo;
 172                 pstatus_t       pstatus;
 173                 lwpsinfo_t      lwpsinfo;
 174                 lwpstatus_t     lwpstatus;
 175 #if defined(__sparc)
 176                 gwindows_t      gwindows;
 177                 asrset_t        asrset;
 178 #endif /* __sparc */
 179                 char            xregs[1];
 180                 aux_entry_t     auxv[__KERN_NAUXV_IMPL];
 181                 prcred_t        pcred;
 182                 prpriv_t        ppriv;
 183                 priv_impl_info_t prinfo;
 184                 struct utsname  uts;
 185         } *bigwad;
 186 
 187         size_t xregsize = prhasx(p)? prgetprxregsize(p) : 0;
 188         size_t crsize = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
 189         size_t psize = prgetprivsize();
 190         size_t bigsize = MAX(psize, MAX(sizeof (*bigwad),
 191             MAX(xregsize, crsize)));
 192 
 193         priv_impl_info_t *prii;
 194 
 195         lwpdir_t *ldp;
 196         lwpent_t *lep;
 197         kthread_t *t;
 198         klwp_t *lwp;
 199         user_t *up;
 200         int i;
 201         int nlwp;
 202         int nzomb;
 203         int error;
 204         uchar_t oldsig;
 205         uf_info_t *fip;
 206         int fd;
 207         vnode_t *vroot;
 208 
 209 #if defined(__i386) || defined(__i386_COMPAT)
 210         struct ssd *ssd;
 211         size_t ssdsize;
 212 #endif  /* __i386 || __i386_COMPAT */
 213 
 214         bigsize = MAX(bigsize, priv_get_implinfo_size());
 215 
 216         bigwad = kmem_alloc(bigsize, KM_SLEEP);
 217 
 218         /*
 219          * The order of the elfnote entries should be same here
 220          * and in the gcore(1) command.  Synchronization is
 221          * needed between the kernel and gcore(1).
 222          */
 223 
 224         /*
 225          * Get the psinfo, and set the wait status to indicate that a core was
 226          * dumped.  We have to forge this since p->p_wcode is not set yet.
 227          */
 228         mutex_enter(&p->p_lock);
 229         prgetpsinfo(p, &bigwad->psinfo);
 230         mutex_exit(&p->p_lock);
 231         bigwad->psinfo.pr_wstat = wstat(CLD_DUMPED, sig);
 232 
 233         error = elfnote(vp, &offset, NT_PSINFO, sizeof (bigwad->psinfo),
 234             (caddr_t)&bigwad->psinfo, rlimit, credp);
 235         if (error)
 236                 goto done;
 237 
 238         /*
 239          * Modify t_whystop and lwp_cursig so it appears that the current LWP
 240          * is stopped after faulting on the signal that caused the core dump.
 241          * As a result, prgetstatus() will record that signal, the saved
 242          * lwp_siginfo, and its signal handler in the core file status.  We
 243          * restore lwp_cursig in case a subsequent signal was received while
 244          * dumping core.
 245          */
 246         mutex_enter(&p->p_lock);
 247         lwp = ttolwp(curthread);
 248 
 249         oldsig = lwp->lwp_cursig;
 250         lwp->lwp_cursig = (uchar_t)sig;
 251         curthread->t_whystop = PR_FAULTED;
 252 
 253         prgetstatus(p, &bigwad->pstatus, p->p_zone);
 254         bigwad->pstatus.pr_lwp.pr_why = 0;
 255 
 256         curthread->t_whystop = 0;
 257         lwp->lwp_cursig = oldsig;
 258         mutex_exit(&p->p_lock);
 259 
 260         error = elfnote(vp, &offset, NT_PSTATUS, sizeof (bigwad->pstatus),
 261             (caddr_t)&bigwad->pstatus, rlimit, credp);
 262         if (error)
 263                 goto done;
 264 
 265         error = elfnote(vp, &offset, NT_PLATFORM, strlen(platform) + 1,
 266             platform, rlimit, credp);
 267         if (error)
 268                 goto done;
 269 
 270         up = PTOU(p);
 271         for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
 272                 bigwad->auxv[i].a_type = up->u_auxv[i].a_type;
 273                 bigwad->auxv[i].a_un.a_val = up->u_auxv[i].a_un.a_val;
 274         }
 275         error = elfnote(vp, &offset, NT_AUXV, sizeof (bigwad->auxv),
 276             (caddr_t)bigwad->auxv, rlimit, credp);
 277         if (error)
 278                 goto done;
 279 
 280         bcopy(&utsname, &bigwad->uts, sizeof (struct utsname));
 281         if (!INGLOBALZONE(p)) {
 282                 bcopy(p->p_zone->zone_nodename, &bigwad->uts.nodename,
 283                     _SYS_NMLN);
 284         }
 285         error = elfnote(vp, &offset, NT_UTSNAME, sizeof (struct utsname),
 286             (caddr_t)&bigwad->uts, rlimit, credp);
 287         if (error)
 288                 goto done;
 289 
 290         prgetcred(p, &bigwad->pcred);
 291 
 292         if (bigwad->pcred.pr_ngroups != 0) {
 293                 crsize = sizeof (prcred_t) +
 294                     sizeof (gid_t) * (bigwad->pcred.pr_ngroups - 1);
 295         } else
 296                 crsize = sizeof (prcred_t);
 297 
 298         error = elfnote(vp, &offset, NT_PRCRED, crsize,
 299             (caddr_t)&bigwad->pcred, rlimit, credp);
 300         if (error)
 301                 goto done;
 302 
 303         error = elfnote(vp, &offset, NT_CONTENT, sizeof (core_content_t),
 304             (caddr_t)&content, rlimit, credp);
 305         if (error)
 306                 goto done;
 307 
 308         prgetpriv(p, &bigwad->ppriv);
 309 
 310         error = elfnote(vp, &offset, NT_PRPRIV, psize,
 311             (caddr_t)&bigwad->ppriv, rlimit, credp);
 312         if (error)
 313                 goto done;
 314 
 315         prii = priv_hold_implinfo();
 316         error = elfnote(vp, &offset, NT_PRPRIVINFO, priv_get_implinfo_size(),
 317             (caddr_t)prii, rlimit, credp);
 318         priv_release_implinfo();
 319         if (error)
 320                 goto done;
 321 
 322         /* zone can't go away as long as process exists */
 323         error = elfnote(vp, &offset, NT_ZONENAME,
 324             strlen(p->p_zone->zone_name) + 1, p->p_zone->zone_name,
 325             rlimit, credp);
 326         if (error)
 327                 goto done;
 328 
 329 
 330         /* open file table */
 331         vroot = PTOU(p)->u_rdir;
 332         if (vroot == NULL)
 333                 vroot = rootdir;
 334 
 335         VN_HOLD(vroot);
 336 
 337         fip = P_FINFO(p);
 338 
 339         for (fd = 0; fd < fip->fi_nfiles; fd++) {
 340                 uf_entry_t *ufp;
 341                 vnode_t *fvp;
 342                 struct file *fp;
 343                 vattr_t vattr;
 344                 prfdinfo_t fdinfo;
 345 
 346                 bzero(&fdinfo, sizeof (fdinfo));
 347 
 348                 mutex_enter(&fip->fi_lock);
 349                 UF_ENTER(ufp, fip, fd);
 350                 if (((fp = ufp->uf_file) == NULL) || (fp->f_count < 1)) {
 351                         UF_EXIT(ufp);
 352                         mutex_exit(&fip->fi_lock);
 353                         continue;
 354                 }
 355 
 356                 fdinfo.pr_fd = fd;
 357                 fdinfo.pr_fdflags = ufp->uf_flag;
 358                 fdinfo.pr_fileflags = fp->f_flag2;
 359                 fdinfo.pr_fileflags <<= 16;
 360                 fdinfo.pr_fileflags |= fp->f_flag;
 361                 if ((fdinfo.pr_fileflags & (FSEARCH | FEXEC)) == 0)
 362                         fdinfo.pr_fileflags += FOPEN;
 363                 fdinfo.pr_offset = fp->f_offset;
 364 
 365 
 366                 fvp = fp->f_vnode;
 367                 VN_HOLD(fvp);
 368                 UF_EXIT(ufp);
 369                 mutex_exit(&fip->fi_lock);
 370 
 371                 /*
 372                  * There are some vnodes that have no corresponding
 373                  * path.  Its reasonable for this to fail, in which
 374                  * case the path will remain an empty string.
 375                  */
 376                 (void) vnodetopath(vroot, fvp, fdinfo.pr_path,
 377                     sizeof (fdinfo.pr_path), credp);
 378 
 379                 if (VOP_GETATTR(fvp, &vattr, 0, credp, NULL) != 0) {
 380                         /*
 381                          * Try to write at least a subset of information
 382                          */
 383                         fdinfo.pr_major = 0;
 384                         fdinfo.pr_minor = 0;
 385                         fdinfo.pr_ino = 0;
 386                         fdinfo.pr_mode = 0;
 387                         fdinfo.pr_uid = (uid_t)-1;
 388                         fdinfo.pr_gid = (gid_t)-1;
 389                         fdinfo.pr_rmajor = 0;
 390                         fdinfo.pr_rminor = 0;
 391                         fdinfo.pr_size = -1;
 392 
 393                         error = elfnote(vp, &offset, NT_FDINFO,
 394                             sizeof (fdinfo), &fdinfo, rlimit, credp);
 395                         VN_RELE(fvp);
 396                         VN_RELE(vroot);
 397                         if (error)
 398                                 goto done;
 399                         continue;
 400                 }
 401 
 402                 if (fvp->v_type == VSOCK)
 403                         fdinfo.pr_fileflags |= sock_getfasync(fvp);
 404 
 405                 VN_RELE(fvp);
 406 
 407                 /*
 408                  * This logic mirrors fstat(), which we cannot use
 409                  * directly, as it calls copyout().
 410                  */
 411                 fdinfo.pr_major = getmajor(vattr.va_fsid);
 412                 fdinfo.pr_minor = getminor(vattr.va_fsid);
 413                 fdinfo.pr_ino = (ino64_t)vattr.va_nodeid;
 414                 fdinfo.pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
 415                 fdinfo.pr_uid = vattr.va_uid;
 416                 fdinfo.pr_gid = vattr.va_gid;
 417                 fdinfo.pr_rmajor = getmajor(vattr.va_rdev);
 418                 fdinfo.pr_rminor = getminor(vattr.va_rdev);
 419                 fdinfo.pr_size = (off64_t)vattr.va_size;
 420 
 421                 error = elfnote(vp, &offset, NT_FDINFO,
 422                     sizeof (fdinfo), &fdinfo, rlimit, credp);
 423                 if (error) {
 424                         VN_RELE(vroot);
 425                         goto done;
 426                 }
 427         }
 428 
 429         VN_RELE(vroot);
 430 
 431 #if defined(__i386) || defined(__i386_COMPAT)
 432         mutex_enter(&p->p_ldtlock);
 433         ssdsize = prnldt(p) * sizeof (struct ssd);
 434         if (ssdsize != 0) {
 435                 ssd = kmem_alloc(ssdsize, KM_SLEEP);
 436                 prgetldt(p, ssd);
 437                 error = elfnote(vp, &offset, NT_LDT, ssdsize,
 438                     (caddr_t)ssd, rlimit, credp);
 439                 kmem_free(ssd, ssdsize);
 440         }
 441         mutex_exit(&p->p_ldtlock);
 442         if (error)
 443                 goto done;
 444 #endif  /* __i386 || defined(__i386_COMPAT) */
 445 
 446         nlwp = p->p_lwpcnt;
 447         nzomb = p->p_zombcnt;
 448         /* for each entry in the lwp directory ... */
 449         for (ldp = p->p_lwpdir; nlwp + nzomb != 0; ldp++) {
 450 
 451                 if ((lep = ldp->ld_entry) == NULL)   /* empty slot */
 452                         continue;
 453 
 454                 if ((t = lep->le_thread) != NULL) {  /* active lwp */
 455                         ASSERT(nlwp != 0);
 456                         nlwp--;
 457                         lwp = ttolwp(t);
 458                         mutex_enter(&p->p_lock);
 459                         prgetlwpsinfo(t, &bigwad->lwpsinfo);
 460                         mutex_exit(&p->p_lock);
 461                 } else {                                /* zombie lwp */
 462                         ASSERT(nzomb != 0);
 463                         nzomb--;
 464                         bzero(&bigwad->lwpsinfo, sizeof (bigwad->lwpsinfo));
 465                         bigwad->lwpsinfo.pr_lwpid = lep->le_lwpid;
 466                         bigwad->lwpsinfo.pr_state = SZOMB;
 467                         bigwad->lwpsinfo.pr_sname = 'Z';
 468                         bigwad->lwpsinfo.pr_start.tv_sec = lep->le_start;
 469                 }
 470                 error = elfnote(vp, &offset, NT_LWPSINFO,
 471                     sizeof (bigwad->lwpsinfo), (caddr_t)&bigwad->lwpsinfo,
 472                     rlimit, credp);
 473                 if (error)
 474                         goto done;
 475                 if (t == NULL)          /* nothing more to do for a zombie */
 476                         continue;
 477 
 478                 mutex_enter(&p->p_lock);
 479                 if (t == curthread) {
 480                         /*
 481                          * Modify t_whystop and lwp_cursig so it appears that
 482                          * the current LWP is stopped after faulting on the
 483                          * signal that caused the core dump.  As a result,
 484                          * prgetlwpstatus() will record that signal, the saved
 485                          * lwp_siginfo, and its signal handler in the core file
 486                          * status.  We restore lwp_cursig in case a subsequent
 487                          * signal was received while dumping core.
 488                          */
 489                         oldsig = lwp->lwp_cursig;
 490                         lwp->lwp_cursig = (uchar_t)sig;
 491                         t->t_whystop = PR_FAULTED;
 492 
 493                         prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone);
 494                         bigwad->lwpstatus.pr_why = 0;
 495 
 496                         t->t_whystop = 0;
 497                         lwp->lwp_cursig = oldsig;
 498                 } else {
 499                         prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone);
 500                 }
 501                 mutex_exit(&p->p_lock);
 502                 error = elfnote(vp, &offset, NT_LWPSTATUS,
 503                     sizeof (bigwad->lwpstatus), (caddr_t)&bigwad->lwpstatus,
 504                     rlimit, credp);
 505                 if (error)
 506                         goto done;
 507 
 508 #if defined(__sparc)
 509                 /*
 510                  * Unspilled SPARC register windows.
 511                  */
 512                 {
 513                         size_t size = prnwindows(lwp);
 514 
 515                         if (size != 0) {
 516                                 size = sizeof (gwindows_t) -
 517                                     (SPARC_MAXREGWINDOW - size) *
 518                                     sizeof (struct rwindow);
 519                                 prgetwindows(lwp, &bigwad->gwindows);
 520                                 error = elfnote(vp, &offset, NT_GWINDOWS,
 521                                     size, (caddr_t)&bigwad->gwindows,
 522                                     rlimit, credp);
 523                                 if (error)
 524                                         goto done;
 525                         }
 526                 }
 527                 /*
 528                  * Ancillary State Registers.
 529                  */
 530                 if (p->p_model == DATAMODEL_LP64) {
 531                         prgetasregs(lwp, bigwad->asrset);
 532                         error = elfnote(vp, &offset, NT_ASRS,
 533                             sizeof (asrset_t), (caddr_t)bigwad->asrset,
 534                             rlimit, credp);
 535                         if (error)
 536                                 goto done;
 537                 }
 538 #endif /* __sparc */
 539 
 540                 if (xregsize) {
 541                         prgetprxregs(lwp, bigwad->xregs);
 542                         error = elfnote(vp, &offset, NT_PRXREG,
 543                             xregsize, bigwad->xregs, rlimit, credp);
 544                         if (error)
 545                                 goto done;
 546                 }
 547 
 548                 if (t->t_lwp->lwp_spymaster != NULL) {
 549                         void *psaddr = t->t_lwp->lwp_spymaster;
 550 #ifdef _ELF32_COMPAT
 551                         /*
 552                          * On a 64-bit kernel with 32-bit ELF compatibility,
 553                          * this file is compiled into two different objects:
 554                          * one is compiled normally, and the other is compiled
 555                          * with _ELF32_COMPAT set -- and therefore with a
 556                          * psinfo_t defined to be a psinfo32_t.  However, the
 557                          * psinfo_t denoting our spymaster is always of the
 558                          * native type; if we are in the _ELF32_COMPAT case,
 559                          * we need to explicitly convert it.
 560                          */
 561                         if (p->p_model == DATAMODEL_ILP32) {
 562                                 psinfo_kto32(psaddr, &bigwad->psinfo);
 563                                 psaddr = &bigwad->psinfo;
 564                         }
 565 #endif
 566 
 567                         error = elfnote(vp, &offset, NT_SPYMASTER,
 568                             sizeof (psinfo_t), psaddr, rlimit, credp);
 569                         if (error)
 570                                 goto done;
 571                 }
 572         }
 573         ASSERT(nlwp == 0);
 574 
 575 done:
 576         kmem_free(bigwad, bigsize);
 577         return (error);
 578 }