Print this page
10812 ctf tools shouldn't add blank labels
10813 ctf symbol mapping needs work
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>


  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2012 Jason King.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 /*
  31  * Copyright 2018 Joyent, Inc.
  32  */
  33 
  34 /*
  35  * CTF DWARF conversion theory.
  36  *
  37  * DWARF data contains a series of compilation units. Each compilation unit
  38  * generally refers to an object file or what once was, in the case of linked
  39  * binaries and shared objects. Each compilation unit has a series of what DWARF
  40  * calls a DIE (Debugging Information Entry). The set of entries that we care
  41  * about have type information stored in a series of attributes. Each DIE also
  42  * has a tag that identifies the kind of attributes that it has.
  43  *
  44  * A given DIE may itself have children. For example, a DIE that represents a
  45  * structure has children which represent members. Whenever we encounter a DIE
  46  * that has children or other values or types associated with it, we recursively
  47  * process those children first so that way we can then refer to the generated
  48  * CTF type id while processing its parent. This reduces the amount of unknowns
  49  * and fixups that we need. It also ensures that we don't accidentally add types
  50  * that an overzealous compiler might add to the DWARF data but aren't used by
  51  * anything in the system.


 265         avl_tree_t      cu_map;         /* map die offsets to CTF types */
 266         char            *cu_errbuf;     /* error message buffer */
 267         size_t          cu_errlen;      /* error message buffer length */
 268         size_t          cu_ptrsz;       /* object's pointer size */
 269         boolean_t       cu_bigend;      /* is it big endian */
 270         boolean_t       cu_doweaks;     /* should we convert weak symbols? */
 271         uint_t          cu_mach;        /* machine type */
 272         ctf_id_t        cu_voidtid;     /* void pointer */
 273         ctf_id_t        cu_longtid;     /* id for a 'long' */
 274 } ctf_cu_t;
 275 
 276 static int ctf_dwarf_offset(ctf_cu_t *, Dwarf_Die, Dwarf_Off *);
 277 static int ctf_dwarf_convert_die(ctf_cu_t *, Dwarf_Die);
 278 static int ctf_dwarf_convert_type(ctf_cu_t *, Dwarf_Die, ctf_id_t *, int);
 279 
 280 static int ctf_dwarf_function_count(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
 281     boolean_t);
 282 static int ctf_dwarf_convert_fargs(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
 283     ctf_id_t *);
 284 
 285 typedef int (ctf_dwarf_symtab_f)(ctf_cu_t *, const GElf_Sym *, ulong_t,
 286     const char *, const char *, void *);
 287 
 288 /*
 289  * This is a generic way to set a CTF Conversion backend error depending on what
 290  * we were doing. Unless it was one of a specific set of errors that don't
 291  * indicate a programming / translation bug, eg. ENOMEM, then we transform it
 292  * into a CTF backend error and fill in the error buffer.
 293  */
 294 static int
 295 ctf_dwarf_error(ctf_cu_t *cup, ctf_file_t *cfp, int err, const char *fmt, ...)
 296 {
 297         va_list ap;
 298         int ret;
 299         size_t off = 0;
 300         ssize_t rem = cup->cu_errlen;
 301         if (cfp != NULL)
 302                 err = ctf_errno(cfp);
 303 
 304         if (err == ENOMEM)
 305                 return (err);
 306 
 307         ret = snprintf(cup->cu_errbuf, rem, "die %s: ", cup->cu_name);


2149 }
2150 
2151 static int
2152 ctf_dwarf_fixup_die(ctf_cu_t *cup, boolean_t addpass)
2153 {
2154         ctf_dwmap_t *map;
2155 
2156         for (map = avl_first(&cup->cu_map); map != NULL;
2157             map = AVL_NEXT(&cup->cu_map, map)) {
2158                 int ret;
2159                 if (map->cdm_fix == B_FALSE)
2160                         continue;
2161                 if ((ret = ctf_dwarf_fixup_sou(cup, map->cdm_die, map->cdm_id,
2162                     addpass)) != 0)
2163                         return (ret);
2164         }
2165 
2166         return (0);
2167 }
2168 
























































2169 static ctf_dwfunc_t *
2170 ctf_dwarf_match_func(ctf_cu_t *cup, const char *file, const char *name,
2171     int bind)
2172 {
2173         ctf_dwfunc_t *cdf;
2174 
2175         if (bind == STB_WEAK)
2176                 return (NULL);
2177 
2178         /* Nothing we can do if we can't find a name to compare it to. */
2179         if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2180                 return (NULL);
2181 
2182         for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL;
2183             cdf = ctf_list_next(cdf)) {
2184                 if (bind == STB_GLOBAL && cdf->cdf_global == B_FALSE)







2185                         continue;
2186                 if (bind == STB_LOCAL && cdf->cdf_global == B_TRUE)
2187                         continue;
2188                 if (strcmp(name, cdf->cdf_name) != 0)
2189                         continue;
2190                 if (bind == STB_LOCAL && strcmp(file, cup->cu_name) != 0)
2191                         continue;
2192                 return (cdf);
2193         }


2194 
2195         return (NULL);
2196 }

2197 static ctf_dwvar_t *
2198 ctf_dwarf_match_var(ctf_cu_t *cup, const char *file, const char *name,
2199     int bind)
2200 {
2201         ctf_dwvar_t *cdv;
2202 
2203         /* Nothing we can do if we can't find a name to compare it to. */


2204         if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2205                 return (NULL);
2206         ctf_dprintf("Still considering %s\n", name);
2207 
2208         for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL;
2209             cdv = ctf_list_next(cdv)) {
2210                 if (bind == STB_GLOBAL && cdv->cdv_global == B_FALSE)
2211                         continue;
2212                 if (bind == STB_LOCAL && cdv->cdv_global == B_TRUE)
2213                         continue;
2214                 if (strcmp(name, cdv->cdv_name) != 0)
2215                         continue;
2216                 if (bind == STB_LOCAL && strcmp(file, cup->cu_name) != 0)
2217                         continue;
2218                 return (cdv);
2219         }
2220 
2221         return (NULL);
2222 }
2223 
2224 static int
2225 ctf_dwarf_symtab_iter(ctf_cu_t *cup, ctf_dwarf_symtab_f *func, void *arg)
2226 {
2227         int ret;
2228         ulong_t i;
2229         ctf_file_t *fp = cup->cu_ctfp;
2230         const char *file = NULL;
2231         uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
2232         uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data;
2233 
2234         for (i = 0; i < fp->ctf_nsyms; i++) {
2235                 const char *name;
2236                 int type;
2237                 GElf_Sym gsym;
2238                 const GElf_Sym *gsymp;
2239 
2240                 if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
2241                         const Elf32_Sym *symp = (Elf32_Sym *)symbase + i;
2242                         type = ELF32_ST_TYPE(symp->st_info);
2243                         if (type == STT_FILE) {
2244                                 file = (char *)(strbase + symp->st_name);
2245                                 continue;
2246                         }
2247                         if (type != STT_OBJECT && type != STT_FUNC)
2248                                 continue;
2249                         if (ctf_sym_valid(strbase, type, symp->st_shndx,
2250                             symp->st_value, symp->st_name) == B_FALSE)
2251                                 continue;
2252                         name = (char *)(strbase + symp->st_name);
2253                         gsym.st_name = symp->st_name;
2254                         gsym.st_value = symp->st_value;
2255                         gsym.st_size = symp->st_size;
2256                         gsym.st_info = symp->st_info;
2257                         gsym.st_other = symp->st_other;
2258                         gsym.st_shndx = symp->st_shndx;
2259                         gsymp = &gsym;
2260                 } else {
2261                         const Elf64_Sym *symp = (Elf64_Sym *)symbase + i;
2262                         type = ELF64_ST_TYPE(symp->st_info);
2263                         if (type == STT_FILE) {
2264                                 file = (char *)(strbase + symp->st_name);
2265                                 continue;
2266                         }
2267                         if (type != STT_OBJECT && type != STT_FUNC)
2268                                 continue;
2269                         if (ctf_sym_valid(strbase, type, symp->st_shndx,
2270                             symp->st_value, symp->st_name) == B_FALSE)
2271                                 continue;
2272                         name = (char *)(strbase + symp->st_name);
2273                         gsymp = symp;
2274                 }
2275 
2276                 ret = func(cup, gsymp, i, file, name, arg);
2277                 if (ret != 0)
2278                         return (ret);
2279         }
2280 
2281         return (0);
2282 }
2283 
2284 static int
2285 ctf_dwarf_conv_funcvars_cb(ctf_cu_t *cup, const GElf_Sym *symp, ulong_t idx,
2286     const char *file, const char *name, void *arg)
2287 {
2288         int ret, bind, type;


2289 
2290         bind = GELF_ST_BIND(symp->st_info);
2291         type = GELF_ST_TYPE(symp->st_info);
2292 
2293         /*
2294          * Come back to weak symbols in another pass
2295          */
2296         if (bind == STB_WEAK)
2297                 return (0);
2298 
2299         if (type == STT_OBJECT) {
2300                 ctf_dwvar_t *cdv = ctf_dwarf_match_var(cup, file, name,
2301                     bind);
2302                 ctf_dprintf("match for %s (%d): %p\n", name, idx, cdv);
2303                 if (cdv == NULL)
2304                         return (0);
2305                 ret = ctf_add_object(cup->cu_ctfp, idx, cdv->cdv_type);
2306                 ctf_dprintf("added object %s\n", name);
2307         } else {
2308                 ctf_dwfunc_t *cdf = ctf_dwarf_match_func(cup, file, name,
2309                     bind);
2310                 if (cdf == NULL)
2311                         return (0);
2312                 ret = ctf_add_function(cup->cu_ctfp, idx, &cdf->cdf_fip,
2313                     cdf->cdf_argv);

2314         }
2315 
2316         if (ret == CTF_ERR) {
2317                 return (ctf_errno(cup->cu_ctfp));
2318         }
2319 
2320         return (0);
2321 }
2322 
2323 static int
2324 ctf_dwarf_conv_funcvars(ctf_cu_t *cup)
2325 {
2326         return (ctf_dwarf_symtab_iter(cup, ctf_dwarf_conv_funcvars_cb, NULL));
2327 }
2328 
2329 /*
2330  * If we have a weak symbol, attempt to find the strong symbol it will resolve
2331  * to.  Note: the code where this actually happens is in sym_process() in
2332  * cmd/sgs/libld/common/syms.c
2333  *
2334  * Finding the matching symbol is unfortunately not trivial.  For a symbol to be
2335  * a candidate, it must:
2336  *
2337  * - have the same type (function, object)
2338  * - have the same value (address)
2339  * - have the same size
2340  * - not be another weak symbol
2341  * - belong to the same section (checked via section index)
2342  *
2343  * To perform this check, we first iterate over the symbol table. For each weak
2344  * symbol that we encounter, we then do a second walk over the symbol table,
2345  * calling ctf_dwarf_conv_check_weak(). If a symbol matches the above, then it's
2346  * either a local or global symbol. If we find a global symbol then we go with


2348  *
2349  * If instead, we find a local symbol, things are more complicated. The first
2350  * thing we do is to try and see if we have file information about both symbols
2351  * (STT_FILE). If they both have file information and it matches, then we treat
2352  * that as a good match and stop searching for additional matches.
2353  *
2354  * Otherwise, this means we have a non-matching file and a local symbol. We
2355  * treat this as a candidate and if we find a better match (one of the two cases
2356  * above), use that instead. There are two different ways this can happen.
2357  * Either this is a completely different symbol, or it's a once-global symbol
2358  * that was scoped to local via a mapfile.  In the former case, curfile is
2359  * likely inaccurate since the linker does not preserve the needed curfile in
2360  * the order of the symbol table (see the comments about locally scoped symbols
2361  * in libld's update_osym()).  As we can't tell this case from the former one,
2362  * we use this symbol iff no other matching symbol is found.
2363  *
2364  * What we really need here is a SUNW section containing weak<->strong mappings
2365  * that we can consume.
2366  */
2367 typedef struct ctf_dwarf_weak_arg {
2368         const GElf_Sym *cweak_symp;
2369         const char *cweak_file;
2370         boolean_t cweak_candidate;
2371         ulong_t cweak_idx;
2372 } ctf_dwarf_weak_arg_t;
2373 
2374 static int
2375 ctf_dwarf_conv_check_weak(ctf_cu_t *cup, const GElf_Sym *symp,
2376     ulong_t idx, const char *file, const char *name, void *arg)
2377 {
2378         ctf_dwarf_weak_arg_t *cweak = arg;
2379         const GElf_Sym *wsymp = cweak->cweak_symp;
2380 


2381         ctf_dprintf("comparing weak to %s\n", name);
2382 
2383         if (GELF_ST_BIND(symp->st_info) == STB_WEAK) {
2384                 return (0);
2385         }
2386 
2387         if (GELF_ST_TYPE(wsymp->st_info) != GELF_ST_TYPE(symp->st_info)) {
2388                 return (0);
2389         }
2390 
2391         if (wsymp->st_value != symp->st_value) {
2392                 return (0);
2393         }
2394 
2395         if (wsymp->st_size != symp->st_size) {
2396                 return (0);
2397         }
2398 
2399         if (wsymp->st_shndx != symp->st_shndx) {
2400                 return (0);


2459                 if (args == NULL)
2460                         return (ENOMEM);
2461 
2462                 if (ctf_func_args(cup->cu_ctfp, matchidx, fip.ctc_argc, args) ==
2463                     CTF_ERR) {
2464                         ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2465                         return (ctf_errno(cup->cu_ctfp));
2466                 }
2467         }
2468 
2469         ret = ctf_add_function(cup->cu_ctfp, idx, &fip, args);
2470         if (args != NULL)
2471                 ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2472         if (ret == CTF_ERR)
2473                 return (ctf_errno(cup->cu_ctfp));
2474 
2475         return (0);
2476 }
2477 
2478 static int
2479 ctf_dwarf_conv_weaks_cb(ctf_cu_t *cup, const GElf_Sym *symp,
2480     ulong_t idx, const char *file, const char *name, void *arg)
2481 {
2482         int ret, type;
2483         ctf_dwarf_weak_arg_t cweak;

2484 
2485         /*
2486          * We only care about weak symbols.
2487          */
2488         if (GELF_ST_BIND(symp->st_info) != STB_WEAK)
2489                 return (0);
2490 
2491         type = GELF_ST_TYPE(symp->st_info);
2492         ASSERT(type == STT_OBJECT || type == STT_FUNC);
2493 
2494         /*
2495          * For each weak symbol we encounter, we need to do a second iteration
2496          * to try and find a match. We should probably think about other
2497          * techniques to try and save us time in the future.
2498          */
2499         cweak.cweak_symp = symp;
2500         cweak.cweak_file = file;
2501         cweak.cweak_candidate = B_FALSE;
2502         cweak.cweak_idx = 0;
2503 
2504         ctf_dprintf("Trying to find weak equiv for %s\n", name);
2505 
2506         ret = ctf_dwarf_symtab_iter(cup, ctf_dwarf_conv_check_weak, &cweak);
2507         VERIFY(ret == 0 || ret == 1);
2508 
2509         /*
2510          * Nothing was ever found, we're not going to add anything for this
2511          * entry.
2512          */
2513         if (ret == 0 && cweak.cweak_candidate == B_FALSE) {
2514                 ctf_dprintf("found no weak match for %s\n", name);
2515                 return (0);
2516         }
2517 
2518         /*
2519          * Now, finally go and add the type based on the match.
2520          */

2521         if (type == STT_OBJECT) {
2522                 ret = ctf_dwarf_duplicate_sym(cup, idx, cweak.cweak_idx);
2523         } else {
2524                 ret = ctf_dwarf_duplicate_func(cup, idx, cweak.cweak_idx);
2525         }
2526 
2527         return (ret);
2528 }
2529 
2530 static int
2531 ctf_dwarf_conv_weaks(ctf_cu_t *cup)
2532 {
2533         return (ctf_dwarf_symtab_iter(cup, ctf_dwarf_conv_weaks_cb, NULL));
2534 }
2535 
2536 /* ARGSUSED */
2537 static int
2538 ctf_dwarf_convert_one(void *arg, void *unused)
2539 {
2540         int ret;
2541         ctf_file_t *dedup;
2542         ctf_cu_t *cup = arg;
2543 
2544         ctf_dprintf("converting die: %s\n", cup->cu_name);
2545         ctf_dprintf("max offset: %x\n", cup->cu_maxoff);
2546         VERIFY(cup != NULL);
2547 
2548         ret = ctf_dwarf_convert_die(cup, cup->cu_cu);
2549         ctf_dprintf("ctf_dwarf_convert_die (%s) returned %d\n", cup->cu_name,
2550             ret);
2551         if (ret != 0) {
2552                 return (ret);
2553         }


2584                     "failed to convert strong functions and variables"));
2585         }
2586 
2587         if (ctf_update(cup->cu_ctfp) != 0) {
2588                 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2589                     "failed to update output ctf container"));
2590         }
2591 
2592         if (cup->cu_doweaks == B_TRUE) {
2593                 if ((ret = ctf_dwarf_conv_weaks(cup)) != 0) {
2594                         return (ctf_dwarf_error(cup, NULL, ret,
2595                             "failed to convert weak functions and variables"));
2596                 }
2597 
2598                 if (ctf_update(cup->cu_ctfp) != 0) {
2599                         return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2600                             "failed to update output ctf container"));
2601                 }
2602         }
2603 
2604         ctf_phase_dump(cup->cu_ctfp, "pre-dedup");
2605         ctf_dprintf("adding inputs for dedup\n");
2606         if ((ret = ctf_merge_add(cup->cu_cmh, cup->cu_ctfp)) != 0) {
2607                 return (ctf_dwarf_error(cup, NULL, ret,
2608                     "failed to add inputs for merge"));
2609         }
2610 
2611         ctf_dprintf("starting merge\n");
2612         if ((ret = ctf_merge_dedup(cup->cu_cmh, &dedup)) != 0) {
2613                 return (ctf_dwarf_error(cup, NULL, ret,
2614                     "failed to deduplicate die"));
2615         }
2616         ctf_close(cup->cu_ctfp);
2617         cup->cu_ctfp = dedup;

2618 
2619         return (0);
2620 }
2621 
2622 /*
2623  * Note, we expect that if we're returning a ctf_file_t from one of the dies,
2624  * say in the single node case, it's been saved and the entry here has been set
2625  * to NULL, which ctf_close happily ignores.
2626  */
2627 static void
2628 ctf_dwarf_free_die(ctf_cu_t *cup)
2629 {
2630         ctf_dwfunc_t *cdf, *ndf;
2631         ctf_dwvar_t *cdv, *ndv;
2632         ctf_dwbitf_t *cdb, *ndb;
2633         ctf_dwmap_t *map;
2634         void *cookie;
2635         Dwarf_Error derr;
2636 
2637         ctf_dprintf("Beginning to free die: %p\n", cup);


2887         }
2888 
2889         for (i = 0; i < ndies; i++) {
2890                 cup = &cdies[i];
2891                 ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL,
2892                     &cup->cu_dwarf, &derr);
2893                 if (ret != 0) {
2894                         ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
2895                         (void) snprintf(errmsg, errlen,
2896                             "failed to initialize DWARF: %s\n",
2897                             dwarf_errmsg(derr));
2898                         *errp = ECTF_CONVBKERR;
2899                         return (CTF_CONV_ERROR);
2900                 }
2901 
2902                 ret = ctf_dwarf_init_die(fd, elf, &cdies[i], i, errmsg, errlen);
2903                 if (ret != 0) {
2904                         *errp = ret;
2905                         goto out;
2906                 }

2907                 cup->cu_doweaks = ndies > 1 ? B_FALSE : B_TRUE;
2908         }
2909 
2910         ctf_dprintf("found %d DWARF die(s)\n", ndies);
2911 
2912         /*
2913          * If we only have one compilation unit, there's no reason to use
2914          * multiple threads, even if the user requested them. After all, they
2915          * just gave us an upper bound.
2916          */
2917         if (ndies == 1)
2918                 nthrs = 1;
2919 
2920         if (workq_init(&wqp, nthrs) == -1) {
2921                 *errp = errno;
2922                 goto out;
2923         }
2924 
2925         for (i = 0; i < ndies; i++) {
2926                 cup = &cdies[i];
2927                 ctf_dprintf("adding die %s: %p, %x %x\n", cup->cu_name,
2928                     cup->cu_cu, cup->cu_cuoff, cup->cu_maxoff);
2929                 if (workq_add(wqp, cup) == -1) {
2930                         *errp = errno;
2931                         goto out;
2932                 }
2933         }
2934 
2935         ret = workq_work(wqp, ctf_dwarf_convert_one, NULL, errp);
2936         if (ret == WORKQ_ERROR) {
2937                 *errp = errno;
2938                 goto out;
2939         } else if (ret == WORKQ_UERROR) {
2940                 ctf_dprintf("internal convert failed: %s\n",
2941                     ctf_errmsg(*errp));
2942                 goto out;
2943         }
2944 
2945         ctf_dprintf("Determining next phase: have %d dies\n", ndies);
2946         if (ndies != 1) {
2947                 ctf_merge_t *cmp;
2948 
2949                 cmp = ctf_merge_init(fd, &ret);
2950                 if (cmp == NULL) {
2951                         *errp = ret;
2952                         goto out;
2953                 }
2954 
2955                 ctf_dprintf("setting threads\n");
2956                 if ((ret = ctf_merge_set_nthreads(cmp, nthrs)) != 0) {
2957                         ctf_merge_fini(cmp);
2958                         *errp = ret;
2959                         goto out;
2960                 }
2961 
2962                 ctf_dprintf("adding dies\n");
2963                 for (i = 0; i < ndies; i++) {
2964                         cup = &cdies[i];


2965                         if ((ret = ctf_merge_add(cmp, cup->cu_ctfp)) != 0) {
2966                                 ctf_merge_fini(cmp);
2967                                 *errp = ret;
2968                                 goto out;
2969                         }
2970                 }
2971 
2972                 ctf_dprintf("performing merge\n");
2973                 ret = ctf_merge_merge(cmp, fpp);
2974                 if (ret != 0) {
2975                         ctf_dprintf("failed merge!\n");
2976                         *fpp = NULL;
2977                         ctf_merge_fini(cmp);
2978                         *errp = ret;
2979                         goto out;
2980                 }
2981                 ctf_merge_fini(cmp);
2982                 *errp = 0;
2983                 ctf_dprintf("successfully converted!\n");
2984         } else {


  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2012 Jason King.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 /*
  31  * Copyright 2019 Joyent, Inc.
  32  */
  33 
  34 /*
  35  * CTF DWARF conversion theory.
  36  *
  37  * DWARF data contains a series of compilation units. Each compilation unit
  38  * generally refers to an object file or what once was, in the case of linked
  39  * binaries and shared objects. Each compilation unit has a series of what DWARF
  40  * calls a DIE (Debugging Information Entry). The set of entries that we care
  41  * about have type information stored in a series of attributes. Each DIE also
  42  * has a tag that identifies the kind of attributes that it has.
  43  *
  44  * A given DIE may itself have children. For example, a DIE that represents a
  45  * structure has children which represent members. Whenever we encounter a DIE
  46  * that has children or other values or types associated with it, we recursively
  47  * process those children first so that way we can then refer to the generated
  48  * CTF type id while processing its parent. This reduces the amount of unknowns
  49  * and fixups that we need. It also ensures that we don't accidentally add types
  50  * that an overzealous compiler might add to the DWARF data but aren't used by
  51  * anything in the system.


 265         avl_tree_t      cu_map;         /* map die offsets to CTF types */
 266         char            *cu_errbuf;     /* error message buffer */
 267         size_t          cu_errlen;      /* error message buffer length */
 268         size_t          cu_ptrsz;       /* object's pointer size */
 269         boolean_t       cu_bigend;      /* is it big endian */
 270         boolean_t       cu_doweaks;     /* should we convert weak symbols? */
 271         uint_t          cu_mach;        /* machine type */
 272         ctf_id_t        cu_voidtid;     /* void pointer */
 273         ctf_id_t        cu_longtid;     /* id for a 'long' */
 274 } ctf_cu_t;
 275 
 276 static int ctf_dwarf_offset(ctf_cu_t *, Dwarf_Die, Dwarf_Off *);
 277 static int ctf_dwarf_convert_die(ctf_cu_t *, Dwarf_Die);
 278 static int ctf_dwarf_convert_type(ctf_cu_t *, Dwarf_Die, ctf_id_t *, int);
 279 
 280 static int ctf_dwarf_function_count(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
 281     boolean_t);
 282 static int ctf_dwarf_convert_fargs(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
 283     ctf_id_t *);
 284 



 285 /*
 286  * This is a generic way to set a CTF Conversion backend error depending on what
 287  * we were doing. Unless it was one of a specific set of errors that don't
 288  * indicate a programming / translation bug, eg. ENOMEM, then we transform it
 289  * into a CTF backend error and fill in the error buffer.
 290  */
 291 static int
 292 ctf_dwarf_error(ctf_cu_t *cup, ctf_file_t *cfp, int err, const char *fmt, ...)
 293 {
 294         va_list ap;
 295         int ret;
 296         size_t off = 0;
 297         ssize_t rem = cup->cu_errlen;
 298         if (cfp != NULL)
 299                 err = ctf_errno(cfp);
 300 
 301         if (err == ENOMEM)
 302                 return (err);
 303 
 304         ret = snprintf(cup->cu_errbuf, rem, "die %s: ", cup->cu_name);


2146 }
2147 
2148 static int
2149 ctf_dwarf_fixup_die(ctf_cu_t *cup, boolean_t addpass)
2150 {
2151         ctf_dwmap_t *map;
2152 
2153         for (map = avl_first(&cup->cu_map); map != NULL;
2154             map = AVL_NEXT(&cup->cu_map, map)) {
2155                 int ret;
2156                 if (map->cdm_fix == B_FALSE)
2157                         continue;
2158                 if ((ret = ctf_dwarf_fixup_sou(cup, map->cdm_die, map->cdm_id,
2159                     addpass)) != 0)
2160                         return (ret);
2161         }
2162 
2163         return (0);
2164 }
2165 
2166 /*
2167  * The DWARF information about a symbol and the information in the symbol table
2168  * may not be the same due to symbol reduction that is performed by ld due to a
2169  * mapfile or other such directive. We process weak symbols at a later time.
2170  *
2171  * The following are the rules that we employ:
2172  *
2173  * 1. A DWARF function that is considered exported matches STB_GLOBAL entries
2174  * with the same name.
2175  *
2176  * 2. A DWARF function that is considered exported matches STB_LOCAL entries
2177  * with the same name and the same file. This case may happen due to mapfile
2178  * reduction.
2179  *
2180  * 3. A DWARF function that is not considered exported matches STB_LOCAL entries
2181  * with the same name and the same file.
2182  *
2183  * 4. A DWARF function that has the same name as the symbol table entry, but the
2184  * files do not match. This is considered a 'fuzzy' match. This may also happen
2185  * due to a mapfile reduction. Fuzzy matching is only used when we know that the
2186  * file in question refers to the primary object. This is because when a symbol
2187  * is reduced in a mapfile, it's always going to be tagged as a local value in
2188  * the generated output and it is considered as to belong to the primary file
2189  * which is the first STT_FILE symbol we see.
2190  */
2191 static boolean_t
2192 ctf_dwarf_symbol_match(const char *symtab_file, const char *symtab_name,
2193     uint_t symtab_bind, const char *dwarf_file, const char *dwarf_name,
2194     boolean_t dwarf_global, boolean_t *is_fuzzy)
2195 {
2196         *is_fuzzy = B_FALSE;
2197 
2198         if (symtab_bind != STB_LOCAL && symtab_bind != STB_GLOBAL) {
2199                 return (B_FALSE);
2200         }
2201 
2202         if (strcmp(symtab_name, dwarf_name) != 0) {
2203                 return (B_FALSE);
2204         }
2205 
2206         if (symtab_bind == STB_GLOBAL) {
2207                 return (dwarf_global);
2208         }
2209 
2210         if (strcmp(symtab_file, dwarf_file) == 0) {
2211                 return (B_TRUE);
2212         }
2213 
2214         if (dwarf_global) {
2215                 *is_fuzzy = B_TRUE;
2216                 return (B_TRUE);
2217         }
2218 
2219         return (B_FALSE);
2220 }
2221 
2222 static ctf_dwfunc_t *
2223 ctf_dwarf_match_func(ctf_cu_t *cup, const char *file, const char *name,
2224     uint_t bind, boolean_t primary)
2225 {
2226         ctf_dwfunc_t *cdf, *fuzzy = NULL;
2227 
2228         if (bind == STB_WEAK)
2229                 return (NULL);
2230 

2231         if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2232                 return (NULL);
2233 
2234         for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL;
2235             cdf = ctf_list_next(cdf)) {
2236                 boolean_t is_fuzzy = B_FALSE;
2237 
2238                 if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2239                     cdf->cdf_name, cdf->cdf_global, &is_fuzzy)) {
2240                         if (is_fuzzy) {
2241                                 if (primary) {
2242                                         fuzzy = cdf;
2243                                 }
2244                                 continue;
2245                         } else {





2246                                 return (cdf);
2247                         }
2248                 }
2249         }
2250 
2251         return (fuzzy);
2252 }
2253 
2254 static ctf_dwvar_t *
2255 ctf_dwarf_match_var(ctf_cu_t *cup, const char *file, const char *name,
2256     uint_t bind, boolean_t primary)
2257 {
2258         ctf_dwvar_t *cdv, *fuzzy = NULL;
2259 
2260         if (bind == STB_WEAK)
2261                 return (NULL);
2262 
2263         if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2264                 return (NULL);

2265 
2266         for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL;
2267             cdv = ctf_list_next(cdv)) {
2268                 boolean_t is_fuzzy = B_FALSE;









2269 
2270                 if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2271                     cdv->cdv_name, cdv->cdv_global, &is_fuzzy)) {
2272                         if (is_fuzzy) {
2273                                 if (primary) {
2274                                         fuzzy = cdv;




















2275                                 }













2276                         } else {
2277                                 return (cdv);




2278                         }







2279                 }




2280         }
2281 
2282         return (fuzzy);
2283 }
2284 
2285 static int
2286 ctf_dwarf_conv_funcvars_cb(const Elf64_Sym *symp, ulong_t idx,
2287     const char *file, const char *name, boolean_t primary, void *arg)
2288 {
2289         int ret;
2290         uint_t bind, type;
2291         ctf_cu_t *cup = arg;
2292 
2293         bind = GELF_ST_BIND(symp->st_info);
2294         type = GELF_ST_TYPE(symp->st_info);
2295 
2296         /*
2297          * Come back to weak symbols in another pass
2298          */
2299         if (bind == STB_WEAK)
2300                 return (0);
2301 
2302         if (type == STT_OBJECT) {
2303                 ctf_dwvar_t *cdv = ctf_dwarf_match_var(cup, file, name,
2304                     bind, primary);

2305                 if (cdv == NULL)
2306                         return (0);
2307                 ret = ctf_add_object(cup->cu_ctfp, idx, cdv->cdv_type);
2308                 ctf_dprintf("added object %s->%ld\n", name, cdv->cdv_type);
2309         } else {
2310                 ctf_dwfunc_t *cdf = ctf_dwarf_match_func(cup, file, name,
2311                     bind, primary);
2312                 if (cdf == NULL)
2313                         return (0);
2314                 ret = ctf_add_function(cup->cu_ctfp, idx, &cdf->cdf_fip,
2315                     cdf->cdf_argv);
2316                 ctf_dprintf("added function %s\n", name);
2317         }
2318 
2319         if (ret == CTF_ERR) {
2320                 return (ctf_errno(cup->cu_ctfp));
2321         }
2322 
2323         return (0);
2324 }
2325 
2326 static int
2327 ctf_dwarf_conv_funcvars(ctf_cu_t *cup)
2328 {
2329         return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_funcvars_cb, cup));
2330 }
2331 
2332 /*
2333  * If we have a weak symbol, attempt to find the strong symbol it will resolve
2334  * to.  Note: the code where this actually happens is in sym_process() in
2335  * cmd/sgs/libld/common/syms.c
2336  *
2337  * Finding the matching symbol is unfortunately not trivial.  For a symbol to be
2338  * a candidate, it must:
2339  *
2340  * - have the same type (function, object)
2341  * - have the same value (address)
2342  * - have the same size
2343  * - not be another weak symbol
2344  * - belong to the same section (checked via section index)
2345  *
2346  * To perform this check, we first iterate over the symbol table. For each weak
2347  * symbol that we encounter, we then do a second walk over the symbol table,
2348  * calling ctf_dwarf_conv_check_weak(). If a symbol matches the above, then it's
2349  * either a local or global symbol. If we find a global symbol then we go with


2351  *
2352  * If instead, we find a local symbol, things are more complicated. The first
2353  * thing we do is to try and see if we have file information about both symbols
2354  * (STT_FILE). If they both have file information and it matches, then we treat
2355  * that as a good match and stop searching for additional matches.
2356  *
2357  * Otherwise, this means we have a non-matching file and a local symbol. We
2358  * treat this as a candidate and if we find a better match (one of the two cases
2359  * above), use that instead. There are two different ways this can happen.
2360  * Either this is a completely different symbol, or it's a once-global symbol
2361  * that was scoped to local via a mapfile.  In the former case, curfile is
2362  * likely inaccurate since the linker does not preserve the needed curfile in
2363  * the order of the symbol table (see the comments about locally scoped symbols
2364  * in libld's update_osym()).  As we can't tell this case from the former one,
2365  * we use this symbol iff no other matching symbol is found.
2366  *
2367  * What we really need here is a SUNW section containing weak<->strong mappings
2368  * that we can consume.
2369  */
2370 typedef struct ctf_dwarf_weak_arg {
2371         const Elf64_Sym *cweak_symp;
2372         const char *cweak_file;
2373         boolean_t cweak_candidate;
2374         ulong_t cweak_idx;
2375 } ctf_dwarf_weak_arg_t;
2376 
2377 static int
2378 ctf_dwarf_conv_check_weak(const Elf64_Sym *symp, ulong_t idx, const char *file,
2379     const char *name, boolean_t primary, void *arg)
2380 {
2381         ctf_dwarf_weak_arg_t *cweak = arg;

2382 
2383         const Elf64_Sym *wsymp = cweak->cweak_symp;
2384 
2385         ctf_dprintf("comparing weak to %s\n", name);
2386 
2387         if (GELF_ST_BIND(symp->st_info) == STB_WEAK) {
2388                 return (0);
2389         }
2390 
2391         if (GELF_ST_TYPE(wsymp->st_info) != GELF_ST_TYPE(symp->st_info)) {
2392                 return (0);
2393         }
2394 
2395         if (wsymp->st_value != symp->st_value) {
2396                 return (0);
2397         }
2398 
2399         if (wsymp->st_size != symp->st_size) {
2400                 return (0);
2401         }
2402 
2403         if (wsymp->st_shndx != symp->st_shndx) {
2404                 return (0);


2463                 if (args == NULL)
2464                         return (ENOMEM);
2465 
2466                 if (ctf_func_args(cup->cu_ctfp, matchidx, fip.ctc_argc, args) ==
2467                     CTF_ERR) {
2468                         ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2469                         return (ctf_errno(cup->cu_ctfp));
2470                 }
2471         }
2472 
2473         ret = ctf_add_function(cup->cu_ctfp, idx, &fip, args);
2474         if (args != NULL)
2475                 ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2476         if (ret == CTF_ERR)
2477                 return (ctf_errno(cup->cu_ctfp));
2478 
2479         return (0);
2480 }
2481 
2482 static int
2483 ctf_dwarf_conv_weaks_cb(const Elf64_Sym *symp, ulong_t idx, const char *file,
2484     const char *name, boolean_t primary, void *arg)
2485 {
2486         int ret, type;
2487         ctf_dwarf_weak_arg_t cweak;
2488         ctf_cu_t *cup = arg;
2489 
2490         /*
2491          * We only care about weak symbols.
2492          */
2493         if (GELF_ST_BIND(symp->st_info) != STB_WEAK)
2494                 return (0);
2495 
2496         type = GELF_ST_TYPE(symp->st_info);
2497         ASSERT(type == STT_OBJECT || type == STT_FUNC);
2498 
2499         /*
2500          * For each weak symbol we encounter, we need to do a second iteration
2501          * to try and find a match. We should probably think about other
2502          * techniques to try and save us time in the future.
2503          */
2504         cweak.cweak_symp = symp;
2505         cweak.cweak_file = file;
2506         cweak.cweak_candidate = B_FALSE;
2507         cweak.cweak_idx = 0;
2508 
2509         ctf_dprintf("Trying to find weak equiv for %s\n", name);
2510 
2511         ret = ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_check_weak, &cweak);
2512         VERIFY(ret == 0 || ret == 1);
2513 
2514         /*
2515          * Nothing was ever found, we're not going to add anything for this
2516          * entry.
2517          */
2518         if (ret == 0 && cweak.cweak_candidate == B_FALSE) {
2519                 ctf_dprintf("found no weak match for %s\n", name);
2520                 return (0);
2521         }
2522 
2523         /*
2524          * Now, finally go and add the type based on the match.
2525          */
2526         ctf_dprintf("matched weak symbol %lu to %lu\n", idx, cweak.cweak_idx);
2527         if (type == STT_OBJECT) {
2528                 ret = ctf_dwarf_duplicate_sym(cup, idx, cweak.cweak_idx);
2529         } else {
2530                 ret = ctf_dwarf_duplicate_func(cup, idx, cweak.cweak_idx);
2531         }
2532 
2533         return (ret);
2534 }
2535 
2536 static int
2537 ctf_dwarf_conv_weaks(ctf_cu_t *cup)
2538 {
2539         return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_weaks_cb, cup));
2540 }
2541 
2542 /* ARGSUSED */
2543 static int
2544 ctf_dwarf_convert_one(void *arg, void *unused)
2545 {
2546         int ret;
2547         ctf_file_t *dedup;
2548         ctf_cu_t *cup = arg;
2549 
2550         ctf_dprintf("converting die: %s\n", cup->cu_name);
2551         ctf_dprintf("max offset: %x\n", cup->cu_maxoff);
2552         VERIFY(cup != NULL);
2553 
2554         ret = ctf_dwarf_convert_die(cup, cup->cu_cu);
2555         ctf_dprintf("ctf_dwarf_convert_die (%s) returned %d\n", cup->cu_name,
2556             ret);
2557         if (ret != 0) {
2558                 return (ret);
2559         }


2590                     "failed to convert strong functions and variables"));
2591         }
2592 
2593         if (ctf_update(cup->cu_ctfp) != 0) {
2594                 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2595                     "failed to update output ctf container"));
2596         }
2597 
2598         if (cup->cu_doweaks == B_TRUE) {
2599                 if ((ret = ctf_dwarf_conv_weaks(cup)) != 0) {
2600                         return (ctf_dwarf_error(cup, NULL, ret,
2601                             "failed to convert weak functions and variables"));
2602                 }
2603 
2604                 if (ctf_update(cup->cu_ctfp) != 0) {
2605                         return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2606                             "failed to update output ctf container"));
2607                 }
2608         }
2609 
2610         ctf_phase_dump(cup->cu_ctfp, "pre-dwarf-dedup", cup->cu_name);
2611         ctf_dprintf("adding inputs for dedup\n");
2612         if ((ret = ctf_merge_add(cup->cu_cmh, cup->cu_ctfp)) != 0) {
2613                 return (ctf_dwarf_error(cup, NULL, ret,
2614                     "failed to add inputs for merge"));
2615         }
2616 
2617         ctf_dprintf("starting dedup of %s\n", cup->cu_name);
2618         if ((ret = ctf_merge_dedup(cup->cu_cmh, &dedup)) != 0) {
2619                 return (ctf_dwarf_error(cup, NULL, ret,
2620                     "failed to deduplicate die"));
2621         }
2622         ctf_close(cup->cu_ctfp);
2623         cup->cu_ctfp = dedup;
2624         ctf_phase_dump(cup->cu_ctfp, "post-dwarf-dedup", cup->cu_name);
2625 
2626         return (0);
2627 }
2628 
2629 /*
2630  * Note, we expect that if we're returning a ctf_file_t from one of the dies,
2631  * say in the single node case, it's been saved and the entry here has been set
2632  * to NULL, which ctf_close happily ignores.
2633  */
2634 static void
2635 ctf_dwarf_free_die(ctf_cu_t *cup)
2636 {
2637         ctf_dwfunc_t *cdf, *ndf;
2638         ctf_dwvar_t *cdv, *ndv;
2639         ctf_dwbitf_t *cdb, *ndb;
2640         ctf_dwmap_t *map;
2641         void *cookie;
2642         Dwarf_Error derr;
2643 
2644         ctf_dprintf("Beginning to free die: %p\n", cup);


2894         }
2895 
2896         for (i = 0; i < ndies; i++) {
2897                 cup = &cdies[i];
2898                 ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL,
2899                     &cup->cu_dwarf, &derr);
2900                 if (ret != 0) {
2901                         ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
2902                         (void) snprintf(errmsg, errlen,
2903                             "failed to initialize DWARF: %s\n",
2904                             dwarf_errmsg(derr));
2905                         *errp = ECTF_CONVBKERR;
2906                         return (CTF_CONV_ERROR);
2907                 }
2908 
2909                 ret = ctf_dwarf_init_die(fd, elf, &cdies[i], i, errmsg, errlen);
2910                 if (ret != 0) {
2911                         *errp = ret;
2912                         goto out;
2913                 }
2914 
2915                 cup->cu_doweaks = ndies > 1 ? B_FALSE : B_TRUE;
2916         }
2917 
2918         ctf_dprintf("found %d DWARF CUs\n", ndies);
2919 
2920         /*
2921          * If we only have one compilation unit, there's no reason to use
2922          * multiple threads, even if the user requested them. After all, they
2923          * just gave us an upper bound.
2924          */
2925         if (ndies == 1)
2926                 nthrs = 1;
2927 
2928         if (workq_init(&wqp, nthrs) == -1) {
2929                 *errp = errno;
2930                 goto out;
2931         }
2932 
2933         for (i = 0; i < ndies; i++) {
2934                 cup = &cdies[i];
2935                 ctf_dprintf("adding cu %s: %p, %x %x\n", cup->cu_name,
2936                     cup->cu_cu, cup->cu_cuoff, cup->cu_maxoff);
2937                 if (workq_add(wqp, cup) == -1) {
2938                         *errp = errno;
2939                         goto out;
2940                 }
2941         }
2942 
2943         ret = workq_work(wqp, ctf_dwarf_convert_one, NULL, errp);
2944         if (ret == WORKQ_ERROR) {
2945                 *errp = errno;
2946                 goto out;
2947         } else if (ret == WORKQ_UERROR) {
2948                 ctf_dprintf("internal convert failed: %s\n",
2949                     ctf_errmsg(*errp));
2950                 goto out;
2951         }
2952 
2953         ctf_dprintf("Determining next phase: have %d CUs\n", ndies);
2954         if (ndies != 1) {
2955                 ctf_merge_t *cmp;
2956 
2957                 cmp = ctf_merge_init(fd, &ret);
2958                 if (cmp == NULL) {
2959                         *errp = ret;
2960                         goto out;
2961                 }
2962 
2963                 ctf_dprintf("setting threads\n");
2964                 if ((ret = ctf_merge_set_nthreads(cmp, nthrs)) != 0) {
2965                         ctf_merge_fini(cmp);
2966                         *errp = ret;
2967                         goto out;
2968                 }
2969 

2970                 for (i = 0; i < ndies; i++) {
2971                         cup = &cdies[i];
2972                         ctf_dprintf("adding cu %s (%p)\n", cup->cu_name,
2973                             cup->cu_ctfp);
2974                         if ((ret = ctf_merge_add(cmp, cup->cu_ctfp)) != 0) {
2975                                 ctf_merge_fini(cmp);
2976                                 *errp = ret;
2977                                 goto out;
2978                         }
2979                 }
2980 
2981                 ctf_dprintf("performing merge\n");
2982                 ret = ctf_merge_merge(cmp, fpp);
2983                 if (ret != 0) {
2984                         ctf_dprintf("failed merge!\n");
2985                         *fpp = NULL;
2986                         ctf_merge_fini(cmp);
2987                         *errp = ret;
2988                         goto out;
2989                 }
2990                 ctf_merge_fini(cmp);
2991                 *errp = 0;
2992                 ctf_dprintf("successfully converted!\n");
2993         } else {