1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Routines for preparing tdata trees for conversion into CTF data, and 28 * for placing the resulting data into an output file. 29 */ 30 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <strings.h> 34 #include <sys/types.h> 35 #include <sys/stat.h> 36 #include <fcntl.h> 37 #include <libelf.h> 38 #include <gelf.h> 39 #include <unistd.h> 40 41 #include "ctftools.h" 42 #include "list.h" 43 #include "memory.h" 44 #include "traverse.h" 45 #include "symbol.h" 46 47 typedef struct iidesc_match { 48 int iim_fuzzy; 49 iidesc_t *iim_ret; 50 char *iim_name; 51 char *iim_file; 52 uchar_t iim_bind; 53 } iidesc_match_t; 54 55 static int 56 burst_iitypes(void *data, void *arg) 57 { 58 iidesc_t *ii = data; 59 iiburst_t *iiburst = arg; 60 61 switch (ii->ii_type) { 62 case II_GFUN: 63 case II_SFUN: 64 case II_GVAR: 65 case II_SVAR: 66 if (!(ii->ii_flags & IIDESC_F_USED)) 67 return (0); 68 break; 69 default: 70 break; 71 } 72 73 ii->ii_dtype->t_flags |= TDESC_F_ISROOT; 74 (void) iitraverse_td(ii, iiburst->iib_tdtd); 75 return (1); 76 } 77 78 /*ARGSUSED1*/ 79 static int 80 save_type_by_id(tdesc_t *tdp, tdesc_t **tdpp, void *private) 81 { 82 iiburst_t *iiburst = private; 83 84 /* 85 * Doing this on every node is horribly inefficient, but given that 86 * we may be suppressing some types, we can't trust nextid in the 87 * tdata_t. 88 */ 89 if (tdp->t_id > iiburst->iib_maxtypeid) 90 iiburst->iib_maxtypeid = tdp->t_id; 91 92 slist_add(&iiburst->iib_types, tdp, tdesc_idcmp); 93 94 return (1); 95 } 96 97 static tdtrav_cb_f burst_types_cbs[] = { 98 NULL, 99 save_type_by_id, /* intrinsic */ 100 save_type_by_id, /* pointer */ 101 save_type_by_id, /* array */ 102 save_type_by_id, /* function */ 103 save_type_by_id, /* struct */ 104 save_type_by_id, /* union */ 105 save_type_by_id, /* enum */ 106 save_type_by_id, /* forward */ 107 save_type_by_id, /* typedef */ 108 tdtrav_assert, /* typedef_unres */ 109 save_type_by_id, /* volatile */ 110 save_type_by_id, /* const */ 111 save_type_by_id /* restrict */ 112 }; 113 114 115 static iiburst_t * 116 iiburst_new(tdata_t *td, int max) 117 { 118 iiburst_t *iiburst = xcalloc(sizeof (iiburst_t)); 119 iiburst->iib_td = td; 120 iiburst->iib_funcs = xcalloc(sizeof (iidesc_t *) * max); 121 iiburst->iib_nfuncs = 0; 122 iiburst->iib_objts = xcalloc(sizeof (iidesc_t *) * max); 123 iiburst->iib_nobjts = 0; 124 return (iiburst); 125 } 126 127 static void 128 iiburst_types(iiburst_t *iiburst) 129 { 130 tdtrav_data_t tdtd; 131 132 tdtrav_init(&tdtd, &iiburst->iib_td->td_curvgen, NULL, burst_types_cbs, 133 NULL, (void *)iiburst); 134 135 iiburst->iib_tdtd = &tdtd; 136 137 (void) hash_iter(iiburst->iib_td->td_iihash, burst_iitypes, iiburst); 138 } 139 140 static void 141 iiburst_free(iiburst_t *iiburst) 142 { 143 free(iiburst->iib_funcs); 144 free(iiburst->iib_objts); 145 list_free(iiburst->iib_types, NULL, NULL); 146 free(iiburst); 147 } 148 149 /* 150 * See if this iidesc matches the ELF symbol data we pass in. 151 * 152 * A fuzzy match is where we have a local symbol matching the name of a 153 * global type description. This is common when a mapfile is used for a 154 * DSO, but we don't accept it by default. 155 * 156 * A weak fuzzy match is when a weak symbol was resolved and matched to 157 * a global type description. 158 */ 159 static int 160 matching_iidesc(iidesc_t *iidesc, iidesc_match_t *match) 161 { 162 if (streq(iidesc->ii_name, match->iim_name) == 0) 163 return (0); 164 165 switch (iidesc->ii_type) { 166 case II_GFUN: 167 case II_GVAR: 168 if (match->iim_bind == STB_GLOBAL) { 169 match->iim_ret = iidesc; 170 return (-1); 171 } else if (match->iim_fuzzy && match->iim_ret == NULL) { 172 match->iim_ret = iidesc; 173 /* continue to look for strong match */ 174 return (0); 175 } 176 break; 177 case II_SFUN: 178 case II_SVAR: 179 if (match->iim_bind == STB_LOCAL && 180 match->iim_file != NULL && 181 streq(iidesc->ii_owner, match->iim_file)) { 182 match->iim_ret = iidesc; 183 return (-1); 184 } 185 break; 186 } 187 return (0); 188 } 189 190 static iidesc_t * 191 find_iidesc(tdata_t *td, iidesc_match_t *match) 192 { 193 match->iim_ret = NULL; 194 iter_iidescs_by_name(td, match->iim_name, 195 (int (*)())matching_iidesc, match); 196 return (match->iim_ret); 197 } 198 199 /* 200 * If we have a weak symbol, attempt to find the strong symbol it will 201 * resolve to. Note: the code where this actually happens is in 202 * sym_process() in cmd/sgs/libld/common/syms.c 203 * 204 * Finding the matching symbol is unfortunately not trivial. For a 205 * symbol to be a candidate, it must: 206 * 207 * - have the same type (function, object) 208 * - have the same value (address) 209 * - have the same size 210 * - not be another weak symbol 211 * - belong to the same section (checked via section index) 212 * 213 * If such a candidate is global, then we assume we've found it. The 214 * linker generates the symbol table such that the curfile might be 215 * incorrect; this is OK for global symbols, since find_iidesc() doesn't 216 * need to check for the source file for the symbol. 217 * 218 * We might have found a strong local symbol, where the curfile is 219 * accurate and matches that of the weak symbol. We assume this is a 220 * reasonable match. 221 * 222 * If we've got a local symbol with a non-matching curfile, there are 223 * two possibilities. Either this is a completely different symbol, or 224 * it's a once-global symbol that was scoped to local via a mapfile. In 225 * the latter case, curfile is likely inaccurate since the linker does 226 * not preserve the needed curfile in the order of the symbol table (see 227 * the comments about locally scoped symbols in libld's update_osym()). 228 * As we can't tell this case from the former one, we use this symbol 229 * iff no other matching symbol is found. 230 * 231 * What we really need here is a SUNW section containing weak<->strong 232 * mappings that we can consume. 233 */ 234 static int 235 check_for_weak(GElf_Sym *weak, char const *weakfile, 236 Elf_Data *data, int nent, Elf_Data *strdata, 237 GElf_Sym *retsym, char **curfilep) 238 { 239 char *curfile = NULL; 240 char *tmpfile; 241 GElf_Sym tmpsym; 242 int candidate = 0; 243 int i; 244 245 if (GELF_ST_BIND(weak->st_info) != STB_WEAK) 246 return (0); 247 248 for (i = 0; i < nent; i++) { 249 GElf_Sym sym; 250 uchar_t type; 251 252 if (gelf_getsym(data, i, &sym) == NULL) 253 continue; 254 255 type = GELF_ST_TYPE(sym.st_info); 256 257 if (type == STT_FILE) 258 curfile = (char *)strdata->d_buf + sym.st_name; 259 260 if (GELF_ST_TYPE(weak->st_info) != type || 261 weak->st_value != sym.st_value) 262 continue; 263 264 if (weak->st_size != sym.st_size) 265 continue; 266 267 if (GELF_ST_BIND(sym.st_info) == STB_WEAK) 268 continue; 269 270 if (sym.st_shndx != weak->st_shndx) 271 continue; 272 273 if (GELF_ST_BIND(sym.st_info) == STB_LOCAL && 274 (curfile == NULL || weakfile == NULL || 275 strcmp(curfile, weakfile) != 0)) { 276 candidate = 1; 277 tmpfile = curfile; 278 tmpsym = sym; 279 continue; 280 } 281 282 *curfilep = curfile; 283 *retsym = sym; 284 return (1); 285 } 286 287 if (candidate) { 288 *curfilep = tmpfile; 289 *retsym = tmpsym; 290 return (1); 291 } 292 293 return (0); 294 } 295 296 /* 297 * When we've found the underlying symbol's type description 298 * for a weak symbol, we need to copy it and rename it to match 299 * the weak symbol. We also need to add it to the td so it's 300 * handled along with the others later. 301 */ 302 static iidesc_t * 303 copy_from_strong(tdata_t *td, GElf_Sym *sym, iidesc_t *strongdesc, 304 const char *weakname, const char *weakfile) 305 { 306 iidesc_t *new = iidesc_dup_rename(strongdesc, weakname, weakfile); 307 uchar_t type = GELF_ST_TYPE(sym->st_info); 308 309 switch (type) { 310 case STT_OBJECT: 311 new->ii_type = II_GVAR; 312 break; 313 case STT_FUNC: 314 new->ii_type = II_GFUN; 315 break; 316 } 317 318 hash_add(td->td_iihash, new); 319 320 return (new); 321 } 322 323 /* 324 * Process the symbol table of the output file, associating each symbol 325 * with a type description if possible, and sorting them into functions 326 * and data, maintaining symbol table order. 327 */ 328 static iiburst_t * 329 sort_iidescs(Elf *elf, const char *file, tdata_t *td, int fuzzymatch, 330 int dynsym) 331 { 332 iiburst_t *iiburst; 333 Elf_Scn *scn; 334 GElf_Shdr shdr; 335 Elf_Data *data, *strdata; 336 int i, stidx; 337 int nent; 338 iidesc_match_t match; 339 340 match.iim_fuzzy = fuzzymatch; 341 match.iim_file = NULL; 342 343 if ((stidx = findelfsecidx(elf, file, 344 dynsym ? ".dynsym" : ".symtab")) < 0) 345 terminate("%s: Can't open symbol table\n", file); 346 scn = elf_getscn(elf, stidx); 347 data = elf_getdata(scn, NULL); 348 gelf_getshdr(scn, &shdr); 349 nent = shdr.sh_size / shdr.sh_entsize; 350 351 scn = elf_getscn(elf, shdr.sh_link); 352 strdata = elf_getdata(scn, NULL); 353 354 iiburst = iiburst_new(td, nent); 355 356 for (i = 0; i < nent; i++) { 357 GElf_Sym sym; 358 iidesc_t **tolist; 359 GElf_Sym ssym; 360 iidesc_match_t smatch; 361 int *curr; 362 iidesc_t *iidesc; 363 364 if (gelf_getsym(data, i, &sym) == NULL) 365 elfterminate(file, "Couldn't read symbol %d", i); 366 367 match.iim_name = (char *)strdata->d_buf + sym.st_name; 368 match.iim_bind = GELF_ST_BIND(sym.st_info); 369 370 switch (GELF_ST_TYPE(sym.st_info)) { 371 case STT_FILE: 372 match.iim_file = match.iim_name; 373 continue; 374 case STT_OBJECT: 375 tolist = iiburst->iib_objts; 376 curr = &iiburst->iib_nobjts; 377 break; 378 case STT_FUNC: 379 tolist = iiburst->iib_funcs; 380 curr = &iiburst->iib_nfuncs; 381 break; 382 default: 383 continue; 384 } 385 386 if (ignore_symbol(&sym, match.iim_name)) 387 continue; 388 389 iidesc = find_iidesc(td, &match); 390 391 if (iidesc != NULL) { 392 tolist[*curr] = iidesc; 393 iidesc->ii_flags |= IIDESC_F_USED; 394 (*curr)++; 395 continue; 396 } 397 398 if (!check_for_weak(&sym, match.iim_file, data, nent, strdata, 399 &ssym, &smatch.iim_file)) { 400 (*curr)++; 401 continue; 402 } 403 404 smatch.iim_fuzzy = fuzzymatch; 405 smatch.iim_name = (char *)strdata->d_buf + ssym.st_name; 406 smatch.iim_bind = GELF_ST_BIND(ssym.st_info); 407 408 debug(3, "Weak symbol %s resolved to %s\n", match.iim_name, 409 smatch.iim_name); 410 411 iidesc = find_iidesc(td, &smatch); 412 413 if (iidesc != NULL) { 414 tolist[*curr] = copy_from_strong(td, &sym, 415 iidesc, match.iim_name, match.iim_file); 416 tolist[*curr]->ii_flags |= IIDESC_F_USED; 417 } 418 419 (*curr)++; 420 } 421 422 /* 423 * Stabs are generated for every function declared in a given C source 424 * file. When converting an object file, we may encounter a stab that 425 * has no symbol table entry because the optimizer has decided to omit 426 * that item (for example, an unreferenced static function). We may 427 * see iidescs that do not have an associated symtab entry, and so 428 * we do not write records for those functions into the CTF data. 429 * All others get marked as a root by this function. 430 */ 431 iiburst_types(iiburst); 432 433 /* 434 * By not adding some of the functions and/or objects, we may have 435 * caused some types that were referenced solely by those 436 * functions/objects to be suppressed. This could cause a label, 437 * generated prior to the evisceration, to be incorrect. Find the 438 * highest type index, and change the label indicies to be no higher 439 * than this value. 440 */ 441 tdata_label_newmax(td, iiburst->iib_maxtypeid); 442 443 return (iiburst); 444 } 445 446 static void 447 write_file(Elf *src, const char *srcname, Elf *dst, const char *dstname, 448 caddr_t ctfdata, size_t ctfsize, int flags) 449 { 450 GElf_Ehdr sehdr, dehdr; 451 Elf_Scn *sscn, *dscn; 452 Elf_Data *sdata, *ddata; 453 GElf_Shdr shdr; 454 GElf_Word symtab_type; 455 int symtab_idx = -1; 456 off_t new_offset = 0; 457 off_t ctfnameoff = 0; 458 int dynsym = (flags & CTF_USE_DYNSYM); 459 int *secxlate; 460 int srcidx, dstidx; 461 int curnmoff = 0; 462 int changing = 0; 463 int pad; 464 int i; 465 466 if (gelf_newehdr(dst, gelf_getclass(src)) == 0) 467 elfterminate(dstname, "Cannot copy ehdr to temp file"); 468 gelf_getehdr(src, &sehdr); 469 memcpy(&dehdr, &sehdr, sizeof (GElf_Ehdr)); 470 gelf_update_ehdr(dst, &dehdr); 471 472 symtab_type = dynsym ? SHT_DYNSYM : SHT_SYMTAB; 473 474 /* 475 * Neither the existing stab sections nor the SUNW_ctf sections (new or 476 * existing) are SHF_ALLOC'd, so they won't be in areas referenced by 477 * program headers. As such, we can just blindly copy the program 478 * headers from the existing file to the new file. 479 */ 480 if (sehdr.e_phnum != 0) { 481 (void) elf_flagelf(dst, ELF_C_SET, ELF_F_LAYOUT); 482 if (gelf_newphdr(dst, sehdr.e_phnum) == 0) 483 elfterminate(dstname, "Cannot make phdrs in temp file"); 484 485 for (i = 0; i < sehdr.e_phnum; i++) { 486 GElf_Phdr phdr; 487 488 gelf_getphdr(src, i, &phdr); 489 gelf_update_phdr(dst, i, &phdr); 490 } 491 } 492 493 secxlate = xmalloc(sizeof (int) * sehdr.e_shnum); 494 for (srcidx = dstidx = 0; srcidx < sehdr.e_shnum; srcidx++) { 495 Elf_Scn *scn = elf_getscn(src, srcidx); 496 GElf_Shdr shdr; 497 char *sname; 498 499 gelf_getshdr(scn, &shdr); 500 sname = elf_strptr(src, sehdr.e_shstrndx, shdr.sh_name); 501 if (sname == NULL) { 502 elfterminate(srcname, "Can't find string at %u", 503 shdr.sh_name); 504 } 505 506 if (strcmp(sname, CTF_ELF_SCN_NAME) == 0) { 507 secxlate[srcidx] = -1; 508 } else if (dynsym && shdr.sh_type == SHT_SYMTAB) { 509 /* 510 * If we're building CTF against the dynsym, 511 * we'll rip out the symtab so debuggers aren't 512 * confused. 513 */ 514 secxlate[srcidx] = -1; 515 } else { 516 secxlate[srcidx] = dstidx++; 517 curnmoff += strlen(sname) + 1; 518 } 519 520 new_offset = (off_t)dehdr.e_phoff; 521 } 522 523 for (srcidx = 1; srcidx < sehdr.e_shnum; srcidx++) { 524 char *sname; 525 526 sscn = elf_getscn(src, srcidx); 527 gelf_getshdr(sscn, &shdr); 528 529 if (secxlate[srcidx] == -1) { 530 changing = 1; 531 continue; 532 } 533 534 dscn = elf_newscn(dst); 535 536 /* 537 * If this file has program headers, we need to explicitly lay 538 * out sections. If none of the sections prior to this one have 539 * been removed, then we can just use the existing location. If 540 * one or more sections have been changed, then we need to 541 * adjust this one to avoid holes. 542 */ 543 if (changing && sehdr.e_phnum != 0) { 544 pad = new_offset % shdr.sh_addralign; 545 546 if (pad) 547 new_offset += shdr.sh_addralign - pad; 548 shdr.sh_offset = new_offset; 549 } 550 551 shdr.sh_link = secxlate[shdr.sh_link]; 552 553 if (shdr.sh_type == SHT_REL || shdr.sh_type == SHT_RELA) 554 shdr.sh_info = secxlate[shdr.sh_info]; 555 556 sname = elf_strptr(src, sehdr.e_shstrndx, shdr.sh_name); 557 if (sname == NULL) { 558 elfterminate(srcname, "Can't find string at %u", 559 shdr.sh_name); 560 } 561 if ((sdata = elf_getdata(sscn, NULL)) == NULL) 562 elfterminate(srcname, "Cannot get sect %s data", sname); 563 if ((ddata = elf_newdata(dscn)) == NULL) 564 elfterminate(dstname, "Can't make sect %s data", sname); 565 bcopy(sdata, ddata, sizeof (Elf_Data)); 566 567 if (srcidx == sehdr.e_shstrndx) { 568 char seclen = strlen(CTF_ELF_SCN_NAME); 569 570 ddata->d_buf = xmalloc(ddata->d_size + shdr.sh_size + 571 seclen + 1); 572 bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size); 573 strcpy((caddr_t)ddata->d_buf + shdr.sh_size, 574 CTF_ELF_SCN_NAME); 575 ctfnameoff = (off_t)shdr.sh_size; 576 shdr.sh_size += seclen + 1; 577 ddata->d_size += seclen + 1; 578 579 if (sehdr.e_phnum != 0) 580 changing = 1; 581 } 582 583 if (shdr.sh_type == symtab_type && shdr.sh_entsize != 0) { 584 int nsym = shdr.sh_size / shdr.sh_entsize; 585 586 symtab_idx = secxlate[srcidx]; 587 588 ddata->d_buf = xmalloc(shdr.sh_size); 589 bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size); 590 591 for (i = 0; i < nsym; i++) { 592 GElf_Sym sym; 593 short newscn; 594 595 (void) gelf_getsym(ddata, i, &sym); 596 597 if (sym.st_shndx >= SHN_LORESERVE) 598 continue; 599 600 if ((newscn = secxlate[sym.st_shndx]) != 601 sym.st_shndx) { 602 sym.st_shndx = 603 (newscn == -1 ? 1 : newscn); 604 605 gelf_update_sym(ddata, i, &sym); 606 } 607 } 608 } 609 610 if (gelf_update_shdr(dscn, &shdr) == 0) 611 elfterminate(dstname, "Cannot update sect %s", sname); 612 613 new_offset = (off_t)shdr.sh_offset; 614 if (shdr.sh_type != SHT_NOBITS) 615 new_offset += shdr.sh_size; 616 } 617 618 if (symtab_idx == -1) { 619 terminate("%s: Cannot find %s section\n", srcname, 620 dynsym ? "SHT_DYNSYM" : "SHT_SYMTAB"); 621 } 622 623 /* Add the ctf section */ 624 dscn = elf_newscn(dst); 625 gelf_getshdr(dscn, &shdr); 626 shdr.sh_name = ctfnameoff; 627 shdr.sh_type = SHT_PROGBITS; 628 shdr.sh_size = ctfsize; 629 shdr.sh_link = symtab_idx; 630 shdr.sh_addralign = 4; 631 if (changing && sehdr.e_phnum != 0) { 632 pad = new_offset % shdr.sh_addralign; 633 634 if (pad) 635 new_offset += shdr.sh_addralign - pad; 636 637 shdr.sh_offset = new_offset; 638 new_offset += shdr.sh_size; 639 } 640 641 ddata = elf_newdata(dscn); 642 ddata->d_buf = ctfdata; 643 ddata->d_size = ctfsize; 644 ddata->d_align = shdr.sh_addralign; 645 646 gelf_update_shdr(dscn, &shdr); 647 648 /* update the section header location */ 649 if (sehdr.e_phnum != 0) { 650 size_t align = gelf_fsize(dst, ELF_T_ADDR, 1, EV_CURRENT); 651 size_t r = new_offset % align; 652 653 if (r) 654 new_offset += align - r; 655 656 dehdr.e_shoff = new_offset; 657 } 658 659 /* commit to disk */ 660 dehdr.e_shstrndx = secxlate[sehdr.e_shstrndx]; 661 gelf_update_ehdr(dst, &dehdr); 662 if (elf_update(dst, ELF_C_WRITE) < 0) 663 elfterminate(dstname, "Cannot finalize temp file"); 664 665 free(secxlate); 666 } 667 668 static caddr_t 669 make_ctf_data(tdata_t *td, Elf *elf, const char *file, size_t *lenp, int flags) 670 { 671 iiburst_t *iiburst; 672 caddr_t data; 673 674 iiburst = sort_iidescs(elf, file, td, flags & CTF_FUZZY_MATCH, 675 flags & CTF_USE_DYNSYM); 676 data = ctf_gen(iiburst, lenp, flags & CTF_COMPRESS); 677 678 iiburst_free(iiburst); 679 680 return (data); 681 } 682 683 void 684 write_ctf(tdata_t *td, const char *curname, const char *newname, int flags) 685 { 686 struct stat st; 687 Elf *elf = NULL; 688 Elf *telf = NULL; 689 caddr_t data; 690 size_t len; 691 int fd = -1; 692 int tfd = -1; 693 694 (void) elf_version(EV_CURRENT); 695 if ((fd = open(curname, O_RDONLY)) < 0 || fstat(fd, &st) < 0) 696 terminate("%s: Cannot open for re-reading", curname); 697 if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL) 698 elfterminate(curname, "Cannot re-read"); 699 700 if ((tfd = open(newname, O_RDWR | O_CREAT | O_TRUNC, st.st_mode)) < 0) 701 terminate("Cannot open temp file %s for writing", newname); 702 if ((telf = elf_begin(tfd, ELF_C_WRITE, NULL)) == NULL) 703 elfterminate(curname, "Cannot write"); 704 705 data = make_ctf_data(td, elf, curname, &len, flags); 706 write_file(elf, curname, telf, newname, data, len, flags); 707 free(data); 708 709 elf_end(telf); 710 elf_end(elf); 711 (void) close(fd); 712 (void) close(tfd); 713 }