11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2012 Jason King. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 /*
31 * Copyright 2018 Joyent, Inc.
32 */
33
34 /*
35 * CTF DWARF conversion theory.
36 *
37 * DWARF data contains a series of compilation units. Each compilation unit
38 * generally refers to an object file or what once was, in the case of linked
39 * binaries and shared objects. Each compilation unit has a series of what DWARF
40 * calls a DIE (Debugging Information Entry). The set of entries that we care
41 * about have type information stored in a series of attributes. Each DIE also
42 * has a tag that identifies the kind of attributes that it has.
43 *
44 * A given DIE may itself have children. For example, a DIE that represents a
45 * structure has children which represent members. Whenever we encounter a DIE
46 * that has children or other values or types associated with it, we recursively
47 * process those children first so that way we can then refer to the generated
48 * CTF type id while processing its parent. This reduces the amount of unknowns
49 * and fixups that we need. It also ensures that we don't accidentally add types
50 * that an overzealous compiler might add to the DWARF data but aren't used by
51 * anything in the system.
265 avl_tree_t cu_map; /* map die offsets to CTF types */
266 char *cu_errbuf; /* error message buffer */
267 size_t cu_errlen; /* error message buffer length */
268 size_t cu_ptrsz; /* object's pointer size */
269 boolean_t cu_bigend; /* is it big endian */
270 boolean_t cu_doweaks; /* should we convert weak symbols? */
271 uint_t cu_mach; /* machine type */
272 ctf_id_t cu_voidtid; /* void pointer */
273 ctf_id_t cu_longtid; /* id for a 'long' */
274 } ctf_cu_t;
275
276 static int ctf_dwarf_offset(ctf_cu_t *, Dwarf_Die, Dwarf_Off *);
277 static int ctf_dwarf_convert_die(ctf_cu_t *, Dwarf_Die);
278 static int ctf_dwarf_convert_type(ctf_cu_t *, Dwarf_Die, ctf_id_t *, int);
279
280 static int ctf_dwarf_function_count(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
281 boolean_t);
282 static int ctf_dwarf_convert_fargs(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
283 ctf_id_t *);
284
285 typedef int (ctf_dwarf_symtab_f)(ctf_cu_t *, const GElf_Sym *, ulong_t,
286 const char *, const char *, void *);
287
288 /*
289 * This is a generic way to set a CTF Conversion backend error depending on what
290 * we were doing. Unless it was one of a specific set of errors that don't
291 * indicate a programming / translation bug, eg. ENOMEM, then we transform it
292 * into a CTF backend error and fill in the error buffer.
293 */
294 static int
295 ctf_dwarf_error(ctf_cu_t *cup, ctf_file_t *cfp, int err, const char *fmt, ...)
296 {
297 va_list ap;
298 int ret;
299 size_t off = 0;
300 ssize_t rem = cup->cu_errlen;
301 if (cfp != NULL)
302 err = ctf_errno(cfp);
303
304 if (err == ENOMEM)
305 return (err);
306
307 ret = snprintf(cup->cu_errbuf, rem, "die %s: ", cup->cu_name);
2149 }
2150
2151 static int
2152 ctf_dwarf_fixup_die(ctf_cu_t *cup, boolean_t addpass)
2153 {
2154 ctf_dwmap_t *map;
2155
2156 for (map = avl_first(&cup->cu_map); map != NULL;
2157 map = AVL_NEXT(&cup->cu_map, map)) {
2158 int ret;
2159 if (map->cdm_fix == B_FALSE)
2160 continue;
2161 if ((ret = ctf_dwarf_fixup_sou(cup, map->cdm_die, map->cdm_id,
2162 addpass)) != 0)
2163 return (ret);
2164 }
2165
2166 return (0);
2167 }
2168
2169 static ctf_dwfunc_t *
2170 ctf_dwarf_match_func(ctf_cu_t *cup, const char *file, const char *name,
2171 int bind)
2172 {
2173 ctf_dwfunc_t *cdf;
2174
2175 if (bind == STB_WEAK)
2176 return (NULL);
2177
2178 /* Nothing we can do if we can't find a name to compare it to. */
2179 if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2180 return (NULL);
2181
2182 for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL;
2183 cdf = ctf_list_next(cdf)) {
2184 if (bind == STB_GLOBAL && cdf->cdf_global == B_FALSE)
2185 continue;
2186 if (bind == STB_LOCAL && cdf->cdf_global == B_TRUE)
2187 continue;
2188 if (strcmp(name, cdf->cdf_name) != 0)
2189 continue;
2190 if (bind == STB_LOCAL && strcmp(file, cup->cu_name) != 0)
2191 continue;
2192 return (cdf);
2193 }
2194
2195 return (NULL);
2196 }
2197 static ctf_dwvar_t *
2198 ctf_dwarf_match_var(ctf_cu_t *cup, const char *file, const char *name,
2199 int bind)
2200 {
2201 ctf_dwvar_t *cdv;
2202
2203 /* Nothing we can do if we can't find a name to compare it to. */
2204 if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2205 return (NULL);
2206 ctf_dprintf("Still considering %s\n", name);
2207
2208 for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL;
2209 cdv = ctf_list_next(cdv)) {
2210 if (bind == STB_GLOBAL && cdv->cdv_global == B_FALSE)
2211 continue;
2212 if (bind == STB_LOCAL && cdv->cdv_global == B_TRUE)
2213 continue;
2214 if (strcmp(name, cdv->cdv_name) != 0)
2215 continue;
2216 if (bind == STB_LOCAL && strcmp(file, cup->cu_name) != 0)
2217 continue;
2218 return (cdv);
2219 }
2220
2221 return (NULL);
2222 }
2223
2224 static int
2225 ctf_dwarf_symtab_iter(ctf_cu_t *cup, ctf_dwarf_symtab_f *func, void *arg)
2226 {
2227 int ret;
2228 ulong_t i;
2229 ctf_file_t *fp = cup->cu_ctfp;
2230 const char *file = NULL;
2231 uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
2232 uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data;
2233
2234 for (i = 0; i < fp->ctf_nsyms; i++) {
2235 const char *name;
2236 int type;
2237 GElf_Sym gsym;
2238 const GElf_Sym *gsymp;
2239
2240 if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
2241 const Elf32_Sym *symp = (Elf32_Sym *)symbase + i;
2242 type = ELF32_ST_TYPE(symp->st_info);
2243 if (type == STT_FILE) {
2244 file = (char *)(strbase + symp->st_name);
2245 continue;
2246 }
2247 if (type != STT_OBJECT && type != STT_FUNC)
2248 continue;
2249 if (ctf_sym_valid(strbase, type, symp->st_shndx,
2250 symp->st_value, symp->st_name) == B_FALSE)
2251 continue;
2252 name = (char *)(strbase + symp->st_name);
2253 gsym.st_name = symp->st_name;
2254 gsym.st_value = symp->st_value;
2255 gsym.st_size = symp->st_size;
2256 gsym.st_info = symp->st_info;
2257 gsym.st_other = symp->st_other;
2258 gsym.st_shndx = symp->st_shndx;
2259 gsymp = &gsym;
2260 } else {
2261 const Elf64_Sym *symp = (Elf64_Sym *)symbase + i;
2262 type = ELF64_ST_TYPE(symp->st_info);
2263 if (type == STT_FILE) {
2264 file = (char *)(strbase + symp->st_name);
2265 continue;
2266 }
2267 if (type != STT_OBJECT && type != STT_FUNC)
2268 continue;
2269 if (ctf_sym_valid(strbase, type, symp->st_shndx,
2270 symp->st_value, symp->st_name) == B_FALSE)
2271 continue;
2272 name = (char *)(strbase + symp->st_name);
2273 gsymp = symp;
2274 }
2275
2276 ret = func(cup, gsymp, i, file, name, arg);
2277 if (ret != 0)
2278 return (ret);
2279 }
2280
2281 return (0);
2282 }
2283
2284 static int
2285 ctf_dwarf_conv_funcvars_cb(ctf_cu_t *cup, const GElf_Sym *symp, ulong_t idx,
2286 const char *file, const char *name, void *arg)
2287 {
2288 int ret, bind, type;
2289
2290 bind = GELF_ST_BIND(symp->st_info);
2291 type = GELF_ST_TYPE(symp->st_info);
2292
2293 /*
2294 * Come back to weak symbols in another pass
2295 */
2296 if (bind == STB_WEAK)
2297 return (0);
2298
2299 if (type == STT_OBJECT) {
2300 ctf_dwvar_t *cdv = ctf_dwarf_match_var(cup, file, name,
2301 bind);
2302 ctf_dprintf("match for %s (%d): %p\n", name, idx, cdv);
2303 if (cdv == NULL)
2304 return (0);
2305 ret = ctf_add_object(cup->cu_ctfp, idx, cdv->cdv_type);
2306 ctf_dprintf("added object %s\n", name);
2307 } else {
2308 ctf_dwfunc_t *cdf = ctf_dwarf_match_func(cup, file, name,
2309 bind);
2310 if (cdf == NULL)
2311 return (0);
2312 ret = ctf_add_function(cup->cu_ctfp, idx, &cdf->cdf_fip,
2313 cdf->cdf_argv);
2314 }
2315
2316 if (ret == CTF_ERR) {
2317 return (ctf_errno(cup->cu_ctfp));
2318 }
2319
2320 return (0);
2321 }
2322
2323 static int
2324 ctf_dwarf_conv_funcvars(ctf_cu_t *cup)
2325 {
2326 return (ctf_dwarf_symtab_iter(cup, ctf_dwarf_conv_funcvars_cb, NULL));
2327 }
2328
2329 /*
2330 * If we have a weak symbol, attempt to find the strong symbol it will resolve
2331 * to. Note: the code where this actually happens is in sym_process() in
2332 * cmd/sgs/libld/common/syms.c
2333 *
2334 * Finding the matching symbol is unfortunately not trivial. For a symbol to be
2335 * a candidate, it must:
2336 *
2337 * - have the same type (function, object)
2338 * - have the same value (address)
2339 * - have the same size
2340 * - not be another weak symbol
2341 * - belong to the same section (checked via section index)
2342 *
2343 * To perform this check, we first iterate over the symbol table. For each weak
2344 * symbol that we encounter, we then do a second walk over the symbol table,
2345 * calling ctf_dwarf_conv_check_weak(). If a symbol matches the above, then it's
2346 * either a local or global symbol. If we find a global symbol then we go with
2348 *
2349 * If instead, we find a local symbol, things are more complicated. The first
2350 * thing we do is to try and see if we have file information about both symbols
2351 * (STT_FILE). If they both have file information and it matches, then we treat
2352 * that as a good match and stop searching for additional matches.
2353 *
2354 * Otherwise, this means we have a non-matching file and a local symbol. We
2355 * treat this as a candidate and if we find a better match (one of the two cases
2356 * above), use that instead. There are two different ways this can happen.
2357 * Either this is a completely different symbol, or it's a once-global symbol
2358 * that was scoped to local via a mapfile. In the former case, curfile is
2359 * likely inaccurate since the linker does not preserve the needed curfile in
2360 * the order of the symbol table (see the comments about locally scoped symbols
2361 * in libld's update_osym()). As we can't tell this case from the former one,
2362 * we use this symbol iff no other matching symbol is found.
2363 *
2364 * What we really need here is a SUNW section containing weak<->strong mappings
2365 * that we can consume.
2366 */
2367 typedef struct ctf_dwarf_weak_arg {
2368 const GElf_Sym *cweak_symp;
2369 const char *cweak_file;
2370 boolean_t cweak_candidate;
2371 ulong_t cweak_idx;
2372 } ctf_dwarf_weak_arg_t;
2373
2374 static int
2375 ctf_dwarf_conv_check_weak(ctf_cu_t *cup, const GElf_Sym *symp,
2376 ulong_t idx, const char *file, const char *name, void *arg)
2377 {
2378 ctf_dwarf_weak_arg_t *cweak = arg;
2379 const GElf_Sym *wsymp = cweak->cweak_symp;
2380
2381 ctf_dprintf("comparing weak to %s\n", name);
2382
2383 if (GELF_ST_BIND(symp->st_info) == STB_WEAK) {
2384 return (0);
2385 }
2386
2387 if (GELF_ST_TYPE(wsymp->st_info) != GELF_ST_TYPE(symp->st_info)) {
2388 return (0);
2389 }
2390
2391 if (wsymp->st_value != symp->st_value) {
2392 return (0);
2393 }
2394
2395 if (wsymp->st_size != symp->st_size) {
2396 return (0);
2397 }
2398
2399 if (wsymp->st_shndx != symp->st_shndx) {
2400 return (0);
2459 if (args == NULL)
2460 return (ENOMEM);
2461
2462 if (ctf_func_args(cup->cu_ctfp, matchidx, fip.ctc_argc, args) ==
2463 CTF_ERR) {
2464 ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2465 return (ctf_errno(cup->cu_ctfp));
2466 }
2467 }
2468
2469 ret = ctf_add_function(cup->cu_ctfp, idx, &fip, args);
2470 if (args != NULL)
2471 ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2472 if (ret == CTF_ERR)
2473 return (ctf_errno(cup->cu_ctfp));
2474
2475 return (0);
2476 }
2477
2478 static int
2479 ctf_dwarf_conv_weaks_cb(ctf_cu_t *cup, const GElf_Sym *symp,
2480 ulong_t idx, const char *file, const char *name, void *arg)
2481 {
2482 int ret, type;
2483 ctf_dwarf_weak_arg_t cweak;
2484
2485 /*
2486 * We only care about weak symbols.
2487 */
2488 if (GELF_ST_BIND(symp->st_info) != STB_WEAK)
2489 return (0);
2490
2491 type = GELF_ST_TYPE(symp->st_info);
2492 ASSERT(type == STT_OBJECT || type == STT_FUNC);
2493
2494 /*
2495 * For each weak symbol we encounter, we need to do a second iteration
2496 * to try and find a match. We should probably think about other
2497 * techniques to try and save us time in the future.
2498 */
2499 cweak.cweak_symp = symp;
2500 cweak.cweak_file = file;
2501 cweak.cweak_candidate = B_FALSE;
2502 cweak.cweak_idx = 0;
2503
2504 ctf_dprintf("Trying to find weak equiv for %s\n", name);
2505
2506 ret = ctf_dwarf_symtab_iter(cup, ctf_dwarf_conv_check_weak, &cweak);
2507 VERIFY(ret == 0 || ret == 1);
2508
2509 /*
2510 * Nothing was ever found, we're not going to add anything for this
2511 * entry.
2512 */
2513 if (ret == 0 && cweak.cweak_candidate == B_FALSE) {
2514 ctf_dprintf("found no weak match for %s\n", name);
2515 return (0);
2516 }
2517
2518 /*
2519 * Now, finally go and add the type based on the match.
2520 */
2521 if (type == STT_OBJECT) {
2522 ret = ctf_dwarf_duplicate_sym(cup, idx, cweak.cweak_idx);
2523 } else {
2524 ret = ctf_dwarf_duplicate_func(cup, idx, cweak.cweak_idx);
2525 }
2526
2527 return (ret);
2528 }
2529
2530 static int
2531 ctf_dwarf_conv_weaks(ctf_cu_t *cup)
2532 {
2533 return (ctf_dwarf_symtab_iter(cup, ctf_dwarf_conv_weaks_cb, NULL));
2534 }
2535
2536 /* ARGSUSED */
2537 static int
2538 ctf_dwarf_convert_one(void *arg, void *unused)
2539 {
2540 int ret;
2541 ctf_file_t *dedup;
2542 ctf_cu_t *cup = arg;
2543
2544 ctf_dprintf("converting die: %s\n", cup->cu_name);
2545 ctf_dprintf("max offset: %x\n", cup->cu_maxoff);
2546 VERIFY(cup != NULL);
2547
2548 ret = ctf_dwarf_convert_die(cup, cup->cu_cu);
2549 ctf_dprintf("ctf_dwarf_convert_die (%s) returned %d\n", cup->cu_name,
2550 ret);
2551 if (ret != 0) {
2552 return (ret);
2553 }
2584 "failed to convert strong functions and variables"));
2585 }
2586
2587 if (ctf_update(cup->cu_ctfp) != 0) {
2588 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2589 "failed to update output ctf container"));
2590 }
2591
2592 if (cup->cu_doweaks == B_TRUE) {
2593 if ((ret = ctf_dwarf_conv_weaks(cup)) != 0) {
2594 return (ctf_dwarf_error(cup, NULL, ret,
2595 "failed to convert weak functions and variables"));
2596 }
2597
2598 if (ctf_update(cup->cu_ctfp) != 0) {
2599 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2600 "failed to update output ctf container"));
2601 }
2602 }
2603
2604 ctf_phase_dump(cup->cu_ctfp, "pre-dedup");
2605 ctf_dprintf("adding inputs for dedup\n");
2606 if ((ret = ctf_merge_add(cup->cu_cmh, cup->cu_ctfp)) != 0) {
2607 return (ctf_dwarf_error(cup, NULL, ret,
2608 "failed to add inputs for merge"));
2609 }
2610
2611 ctf_dprintf("starting merge\n");
2612 if ((ret = ctf_merge_dedup(cup->cu_cmh, &dedup)) != 0) {
2613 return (ctf_dwarf_error(cup, NULL, ret,
2614 "failed to deduplicate die"));
2615 }
2616 ctf_close(cup->cu_ctfp);
2617 cup->cu_ctfp = dedup;
2618
2619 return (0);
2620 }
2621
2622 /*
2623 * Note, we expect that if we're returning a ctf_file_t from one of the dies,
2624 * say in the single node case, it's been saved and the entry here has been set
2625 * to NULL, which ctf_close happily ignores.
2626 */
2627 static void
2628 ctf_dwarf_free_die(ctf_cu_t *cup)
2629 {
2630 ctf_dwfunc_t *cdf, *ndf;
2631 ctf_dwvar_t *cdv, *ndv;
2632 ctf_dwbitf_t *cdb, *ndb;
2633 ctf_dwmap_t *map;
2634 void *cookie;
2635 Dwarf_Error derr;
2636
2637 ctf_dprintf("Beginning to free die: %p\n", cup);
2887 }
2888
2889 for (i = 0; i < ndies; i++) {
2890 cup = &cdies[i];
2891 ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL,
2892 &cup->cu_dwarf, &derr);
2893 if (ret != 0) {
2894 ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
2895 (void) snprintf(errmsg, errlen,
2896 "failed to initialize DWARF: %s\n",
2897 dwarf_errmsg(derr));
2898 *errp = ECTF_CONVBKERR;
2899 return (CTF_CONV_ERROR);
2900 }
2901
2902 ret = ctf_dwarf_init_die(fd, elf, &cdies[i], i, errmsg, errlen);
2903 if (ret != 0) {
2904 *errp = ret;
2905 goto out;
2906 }
2907 cup->cu_doweaks = ndies > 1 ? B_FALSE : B_TRUE;
2908 }
2909
2910 ctf_dprintf("found %d DWARF die(s)\n", ndies);
2911
2912 /*
2913 * If we only have one compilation unit, there's no reason to use
2914 * multiple threads, even if the user requested them. After all, they
2915 * just gave us an upper bound.
2916 */
2917 if (ndies == 1)
2918 nthrs = 1;
2919
2920 if (workq_init(&wqp, nthrs) == -1) {
2921 *errp = errno;
2922 goto out;
2923 }
2924
2925 for (i = 0; i < ndies; i++) {
2926 cup = &cdies[i];
2927 ctf_dprintf("adding die %s: %p, %x %x\n", cup->cu_name,
2928 cup->cu_cu, cup->cu_cuoff, cup->cu_maxoff);
2929 if (workq_add(wqp, cup) == -1) {
2930 *errp = errno;
2931 goto out;
2932 }
2933 }
2934
2935 ret = workq_work(wqp, ctf_dwarf_convert_one, NULL, errp);
2936 if (ret == WORKQ_ERROR) {
2937 *errp = errno;
2938 goto out;
2939 } else if (ret == WORKQ_UERROR) {
2940 ctf_dprintf("internal convert failed: %s\n",
2941 ctf_errmsg(*errp));
2942 goto out;
2943 }
2944
2945 ctf_dprintf("Determining next phase: have %d dies\n", ndies);
2946 if (ndies != 1) {
2947 ctf_merge_t *cmp;
2948
2949 cmp = ctf_merge_init(fd, &ret);
2950 if (cmp == NULL) {
2951 *errp = ret;
2952 goto out;
2953 }
2954
2955 ctf_dprintf("setting threads\n");
2956 if ((ret = ctf_merge_set_nthreads(cmp, nthrs)) != 0) {
2957 ctf_merge_fini(cmp);
2958 *errp = ret;
2959 goto out;
2960 }
2961
2962 ctf_dprintf("adding dies\n");
2963 for (i = 0; i < ndies; i++) {
2964 cup = &cdies[i];
2965 if ((ret = ctf_merge_add(cmp, cup->cu_ctfp)) != 0) {
2966 ctf_merge_fini(cmp);
2967 *errp = ret;
2968 goto out;
2969 }
2970 }
2971
2972 ctf_dprintf("performing merge\n");
2973 ret = ctf_merge_merge(cmp, fpp);
2974 if (ret != 0) {
2975 ctf_dprintf("failed merge!\n");
2976 *fpp = NULL;
2977 ctf_merge_fini(cmp);
2978 *errp = ret;
2979 goto out;
2980 }
2981 ctf_merge_fini(cmp);
2982 *errp = 0;
2983 ctf_dprintf("successfully converted!\n");
2984 } else {
|
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2012 Jason King. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 /*
31 * Copyright 2019 Joyent, Inc.
32 */
33
34 /*
35 * CTF DWARF conversion theory.
36 *
37 * DWARF data contains a series of compilation units. Each compilation unit
38 * generally refers to an object file or what once was, in the case of linked
39 * binaries and shared objects. Each compilation unit has a series of what DWARF
40 * calls a DIE (Debugging Information Entry). The set of entries that we care
41 * about have type information stored in a series of attributes. Each DIE also
42 * has a tag that identifies the kind of attributes that it has.
43 *
44 * A given DIE may itself have children. For example, a DIE that represents a
45 * structure has children which represent members. Whenever we encounter a DIE
46 * that has children or other values or types associated with it, we recursively
47 * process those children first so that way we can then refer to the generated
48 * CTF type id while processing its parent. This reduces the amount of unknowns
49 * and fixups that we need. It also ensures that we don't accidentally add types
50 * that an overzealous compiler might add to the DWARF data but aren't used by
51 * anything in the system.
265 avl_tree_t cu_map; /* map die offsets to CTF types */
266 char *cu_errbuf; /* error message buffer */
267 size_t cu_errlen; /* error message buffer length */
268 size_t cu_ptrsz; /* object's pointer size */
269 boolean_t cu_bigend; /* is it big endian */
270 boolean_t cu_doweaks; /* should we convert weak symbols? */
271 uint_t cu_mach; /* machine type */
272 ctf_id_t cu_voidtid; /* void pointer */
273 ctf_id_t cu_longtid; /* id for a 'long' */
274 } ctf_cu_t;
275
276 static int ctf_dwarf_offset(ctf_cu_t *, Dwarf_Die, Dwarf_Off *);
277 static int ctf_dwarf_convert_die(ctf_cu_t *, Dwarf_Die);
278 static int ctf_dwarf_convert_type(ctf_cu_t *, Dwarf_Die, ctf_id_t *, int);
279
280 static int ctf_dwarf_function_count(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
281 boolean_t);
282 static int ctf_dwarf_convert_fargs(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
283 ctf_id_t *);
284
285 /*
286 * This is a generic way to set a CTF Conversion backend error depending on what
287 * we were doing. Unless it was one of a specific set of errors that don't
288 * indicate a programming / translation bug, eg. ENOMEM, then we transform it
289 * into a CTF backend error and fill in the error buffer.
290 */
291 static int
292 ctf_dwarf_error(ctf_cu_t *cup, ctf_file_t *cfp, int err, const char *fmt, ...)
293 {
294 va_list ap;
295 int ret;
296 size_t off = 0;
297 ssize_t rem = cup->cu_errlen;
298 if (cfp != NULL)
299 err = ctf_errno(cfp);
300
301 if (err == ENOMEM)
302 return (err);
303
304 ret = snprintf(cup->cu_errbuf, rem, "die %s: ", cup->cu_name);
2146 }
2147
2148 static int
2149 ctf_dwarf_fixup_die(ctf_cu_t *cup, boolean_t addpass)
2150 {
2151 ctf_dwmap_t *map;
2152
2153 for (map = avl_first(&cup->cu_map); map != NULL;
2154 map = AVL_NEXT(&cup->cu_map, map)) {
2155 int ret;
2156 if (map->cdm_fix == B_FALSE)
2157 continue;
2158 if ((ret = ctf_dwarf_fixup_sou(cup, map->cdm_die, map->cdm_id,
2159 addpass)) != 0)
2160 return (ret);
2161 }
2162
2163 return (0);
2164 }
2165
2166 /*
2167 * The DWARF information about a symbol and the information in the symbol table
2168 * may not be the same due to symbol reduction that is performed by ld due to a
2169 * mapfile or other such directive. We process weak symbols at a later time.
2170 *
2171 * The following are the rules that we employ:
2172 *
2173 * 1. A DWARF function that is considered exported matches STB_GLOBAL entries
2174 * with the same name.
2175 *
2176 * 2. A DWARF function that is considered exported matches STB_LOCAL entries
2177 * with the same name and the same file. This case may happen due to mapfile
2178 * reduction.
2179 *
2180 * 3. A DWARF function that is not considered exported matches STB_LOCAL entries
2181 * with the same name and the same file.
2182 *
2183 * 4. A DWARF function that has the same name as the symbol table entry, but the
2184 * files do not match. This is considered a 'fuzzy' match. This may also happen
2185 * due to a mapfile reduction. Fuzzy matching is only used when we know that the
2186 * file in question refers to the primary object. This is because when a symbol
2187 * is reduced in a mapfile, it's always going to be tagged as a local value in
2188 * the generated output and it is considered as to belong to the primary file
2189 * which is the first STT_FILE symbol we see.
2190 */
2191 static boolean_t
2192 ctf_dwarf_symbol_match(const char *symtab_file, const char *symtab_name,
2193 uint_t symtab_bind, const char *dwarf_file, const char *dwarf_name,
2194 boolean_t dwarf_global, boolean_t *is_fuzzy)
2195 {
2196 *is_fuzzy = B_FALSE;
2197
2198 if (symtab_bind != STB_LOCAL && symtab_bind != STB_GLOBAL) {
2199 return (B_FALSE);
2200 }
2201
2202 if (strcmp(symtab_name, dwarf_name) != 0) {
2203 return (B_FALSE);
2204 }
2205
2206 if (symtab_bind == STB_GLOBAL) {
2207 return (dwarf_global);
2208 }
2209
2210 if (strcmp(symtab_file, dwarf_file) == 0) {
2211 return (B_TRUE);
2212 }
2213
2214 if (dwarf_global) {
2215 *is_fuzzy = B_TRUE;
2216 return (B_TRUE);
2217 }
2218
2219 return (B_FALSE);
2220 }
2221
2222 static ctf_dwfunc_t *
2223 ctf_dwarf_match_func(ctf_cu_t *cup, const char *file, const char *name,
2224 uint_t bind, boolean_t primary)
2225 {
2226 ctf_dwfunc_t *cdf, *fuzzy = NULL;
2227
2228 if (bind == STB_WEAK)
2229 return (NULL);
2230
2231 if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2232 return (NULL);
2233
2234 for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL;
2235 cdf = ctf_list_next(cdf)) {
2236 boolean_t is_fuzzy = B_FALSE;
2237
2238 if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2239 cdf->cdf_name, cdf->cdf_global, &is_fuzzy)) {
2240 if (is_fuzzy) {
2241 if (primary) {
2242 fuzzy = cdf;
2243 }
2244 continue;
2245 } else {
2246 return (cdf);
2247 }
2248 }
2249 }
2250
2251 return (fuzzy);
2252 }
2253
2254 static ctf_dwvar_t *
2255 ctf_dwarf_match_var(ctf_cu_t *cup, const char *file, const char *name,
2256 uint_t bind, boolean_t primary)
2257 {
2258 ctf_dwvar_t *cdv, *fuzzy = NULL;
2259
2260 if (bind == STB_WEAK)
2261 return (NULL);
2262
2263 if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2264 return (NULL);
2265
2266 for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL;
2267 cdv = ctf_list_next(cdv)) {
2268 boolean_t is_fuzzy = B_FALSE;
2269
2270 if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2271 cdv->cdv_name, cdv->cdv_global, &is_fuzzy)) {
2272 if (is_fuzzy) {
2273 if (primary) {
2274 fuzzy = cdv;
2275 }
2276 } else {
2277 return (cdv);
2278 }
2279 }
2280 }
2281
2282 return (fuzzy);
2283 }
2284
2285 static int
2286 ctf_dwarf_conv_funcvars_cb(const Elf64_Sym *symp, ulong_t idx,
2287 const char *file, const char *name, boolean_t primary, void *arg)
2288 {
2289 int ret;
2290 uint_t bind, type;
2291 ctf_cu_t *cup = arg;
2292
2293 bind = GELF_ST_BIND(symp->st_info);
2294 type = GELF_ST_TYPE(symp->st_info);
2295
2296 /*
2297 * Come back to weak symbols in another pass
2298 */
2299 if (bind == STB_WEAK)
2300 return (0);
2301
2302 if (type == STT_OBJECT) {
2303 ctf_dwvar_t *cdv = ctf_dwarf_match_var(cup, file, name,
2304 bind, primary);
2305 if (cdv == NULL)
2306 return (0);
2307 ret = ctf_add_object(cup->cu_ctfp, idx, cdv->cdv_type);
2308 ctf_dprintf("added object %s->%ld\n", name, cdv->cdv_type);
2309 } else {
2310 ctf_dwfunc_t *cdf = ctf_dwarf_match_func(cup, file, name,
2311 bind, primary);
2312 if (cdf == NULL)
2313 return (0);
2314 ret = ctf_add_function(cup->cu_ctfp, idx, &cdf->cdf_fip,
2315 cdf->cdf_argv);
2316 ctf_dprintf("added function %s\n", name);
2317 }
2318
2319 if (ret == CTF_ERR) {
2320 return (ctf_errno(cup->cu_ctfp));
2321 }
2322
2323 return (0);
2324 }
2325
2326 static int
2327 ctf_dwarf_conv_funcvars(ctf_cu_t *cup)
2328 {
2329 return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_funcvars_cb, cup));
2330 }
2331
2332 /*
2333 * If we have a weak symbol, attempt to find the strong symbol it will resolve
2334 * to. Note: the code where this actually happens is in sym_process() in
2335 * cmd/sgs/libld/common/syms.c
2336 *
2337 * Finding the matching symbol is unfortunately not trivial. For a symbol to be
2338 * a candidate, it must:
2339 *
2340 * - have the same type (function, object)
2341 * - have the same value (address)
2342 * - have the same size
2343 * - not be another weak symbol
2344 * - belong to the same section (checked via section index)
2345 *
2346 * To perform this check, we first iterate over the symbol table. For each weak
2347 * symbol that we encounter, we then do a second walk over the symbol table,
2348 * calling ctf_dwarf_conv_check_weak(). If a symbol matches the above, then it's
2349 * either a local or global symbol. If we find a global symbol then we go with
2351 *
2352 * If instead, we find a local symbol, things are more complicated. The first
2353 * thing we do is to try and see if we have file information about both symbols
2354 * (STT_FILE). If they both have file information and it matches, then we treat
2355 * that as a good match and stop searching for additional matches.
2356 *
2357 * Otherwise, this means we have a non-matching file and a local symbol. We
2358 * treat this as a candidate and if we find a better match (one of the two cases
2359 * above), use that instead. There are two different ways this can happen.
2360 * Either this is a completely different symbol, or it's a once-global symbol
2361 * that was scoped to local via a mapfile. In the former case, curfile is
2362 * likely inaccurate since the linker does not preserve the needed curfile in
2363 * the order of the symbol table (see the comments about locally scoped symbols
2364 * in libld's update_osym()). As we can't tell this case from the former one,
2365 * we use this symbol iff no other matching symbol is found.
2366 *
2367 * What we really need here is a SUNW section containing weak<->strong mappings
2368 * that we can consume.
2369 */
2370 typedef struct ctf_dwarf_weak_arg {
2371 const Elf64_Sym *cweak_symp;
2372 const char *cweak_file;
2373 boolean_t cweak_candidate;
2374 ulong_t cweak_idx;
2375 } ctf_dwarf_weak_arg_t;
2376
2377 static int
2378 ctf_dwarf_conv_check_weak(const Elf64_Sym *symp, ulong_t idx, const char *file,
2379 const char *name, boolean_t primary, void *arg)
2380 {
2381 ctf_dwarf_weak_arg_t *cweak = arg;
2382
2383 const Elf64_Sym *wsymp = cweak->cweak_symp;
2384
2385 ctf_dprintf("comparing weak to %s\n", name);
2386
2387 if (GELF_ST_BIND(symp->st_info) == STB_WEAK) {
2388 return (0);
2389 }
2390
2391 if (GELF_ST_TYPE(wsymp->st_info) != GELF_ST_TYPE(symp->st_info)) {
2392 return (0);
2393 }
2394
2395 if (wsymp->st_value != symp->st_value) {
2396 return (0);
2397 }
2398
2399 if (wsymp->st_size != symp->st_size) {
2400 return (0);
2401 }
2402
2403 if (wsymp->st_shndx != symp->st_shndx) {
2404 return (0);
2463 if (args == NULL)
2464 return (ENOMEM);
2465
2466 if (ctf_func_args(cup->cu_ctfp, matchidx, fip.ctc_argc, args) ==
2467 CTF_ERR) {
2468 ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2469 return (ctf_errno(cup->cu_ctfp));
2470 }
2471 }
2472
2473 ret = ctf_add_function(cup->cu_ctfp, idx, &fip, args);
2474 if (args != NULL)
2475 ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2476 if (ret == CTF_ERR)
2477 return (ctf_errno(cup->cu_ctfp));
2478
2479 return (0);
2480 }
2481
2482 static int
2483 ctf_dwarf_conv_weaks_cb(const Elf64_Sym *symp, ulong_t idx, const char *file,
2484 const char *name, boolean_t primary, void *arg)
2485 {
2486 int ret, type;
2487 ctf_dwarf_weak_arg_t cweak;
2488 ctf_cu_t *cup = arg;
2489
2490 /*
2491 * We only care about weak symbols.
2492 */
2493 if (GELF_ST_BIND(symp->st_info) != STB_WEAK)
2494 return (0);
2495
2496 type = GELF_ST_TYPE(symp->st_info);
2497 ASSERT(type == STT_OBJECT || type == STT_FUNC);
2498
2499 /*
2500 * For each weak symbol we encounter, we need to do a second iteration
2501 * to try and find a match. We should probably think about other
2502 * techniques to try and save us time in the future.
2503 */
2504 cweak.cweak_symp = symp;
2505 cweak.cweak_file = file;
2506 cweak.cweak_candidate = B_FALSE;
2507 cweak.cweak_idx = 0;
2508
2509 ctf_dprintf("Trying to find weak equiv for %s\n", name);
2510
2511 ret = ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_check_weak, &cweak);
2512 VERIFY(ret == 0 || ret == 1);
2513
2514 /*
2515 * Nothing was ever found, we're not going to add anything for this
2516 * entry.
2517 */
2518 if (ret == 0 && cweak.cweak_candidate == B_FALSE) {
2519 ctf_dprintf("found no weak match for %s\n", name);
2520 return (0);
2521 }
2522
2523 /*
2524 * Now, finally go and add the type based on the match.
2525 */
2526 ctf_dprintf("matched weak symbol %lu to %lu\n", idx, cweak.cweak_idx);
2527 if (type == STT_OBJECT) {
2528 ret = ctf_dwarf_duplicate_sym(cup, idx, cweak.cweak_idx);
2529 } else {
2530 ret = ctf_dwarf_duplicate_func(cup, idx, cweak.cweak_idx);
2531 }
2532
2533 return (ret);
2534 }
2535
2536 static int
2537 ctf_dwarf_conv_weaks(ctf_cu_t *cup)
2538 {
2539 return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_weaks_cb, cup));
2540 }
2541
2542 /* ARGSUSED */
2543 static int
2544 ctf_dwarf_convert_one(void *arg, void *unused)
2545 {
2546 int ret;
2547 ctf_file_t *dedup;
2548 ctf_cu_t *cup = arg;
2549
2550 ctf_dprintf("converting die: %s\n", cup->cu_name);
2551 ctf_dprintf("max offset: %x\n", cup->cu_maxoff);
2552 VERIFY(cup != NULL);
2553
2554 ret = ctf_dwarf_convert_die(cup, cup->cu_cu);
2555 ctf_dprintf("ctf_dwarf_convert_die (%s) returned %d\n", cup->cu_name,
2556 ret);
2557 if (ret != 0) {
2558 return (ret);
2559 }
2590 "failed to convert strong functions and variables"));
2591 }
2592
2593 if (ctf_update(cup->cu_ctfp) != 0) {
2594 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2595 "failed to update output ctf container"));
2596 }
2597
2598 if (cup->cu_doweaks == B_TRUE) {
2599 if ((ret = ctf_dwarf_conv_weaks(cup)) != 0) {
2600 return (ctf_dwarf_error(cup, NULL, ret,
2601 "failed to convert weak functions and variables"));
2602 }
2603
2604 if (ctf_update(cup->cu_ctfp) != 0) {
2605 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2606 "failed to update output ctf container"));
2607 }
2608 }
2609
2610 ctf_phase_dump(cup->cu_ctfp, "pre-dwarf-dedup", cup->cu_name);
2611 ctf_dprintf("adding inputs for dedup\n");
2612 if ((ret = ctf_merge_add(cup->cu_cmh, cup->cu_ctfp)) != 0) {
2613 return (ctf_dwarf_error(cup, NULL, ret,
2614 "failed to add inputs for merge"));
2615 }
2616
2617 ctf_dprintf("starting dedup of %s\n", cup->cu_name);
2618 if ((ret = ctf_merge_dedup(cup->cu_cmh, &dedup)) != 0) {
2619 return (ctf_dwarf_error(cup, NULL, ret,
2620 "failed to deduplicate die"));
2621 }
2622 ctf_close(cup->cu_ctfp);
2623 cup->cu_ctfp = dedup;
2624 ctf_phase_dump(cup->cu_ctfp, "post-dwarf-dedup", cup->cu_name);
2625
2626 return (0);
2627 }
2628
2629 /*
2630 * Note, we expect that if we're returning a ctf_file_t from one of the dies,
2631 * say in the single node case, it's been saved and the entry here has been set
2632 * to NULL, which ctf_close happily ignores.
2633 */
2634 static void
2635 ctf_dwarf_free_die(ctf_cu_t *cup)
2636 {
2637 ctf_dwfunc_t *cdf, *ndf;
2638 ctf_dwvar_t *cdv, *ndv;
2639 ctf_dwbitf_t *cdb, *ndb;
2640 ctf_dwmap_t *map;
2641 void *cookie;
2642 Dwarf_Error derr;
2643
2644 ctf_dprintf("Beginning to free die: %p\n", cup);
2894 }
2895
2896 for (i = 0; i < ndies; i++) {
2897 cup = &cdies[i];
2898 ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL,
2899 &cup->cu_dwarf, &derr);
2900 if (ret != 0) {
2901 ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
2902 (void) snprintf(errmsg, errlen,
2903 "failed to initialize DWARF: %s\n",
2904 dwarf_errmsg(derr));
2905 *errp = ECTF_CONVBKERR;
2906 return (CTF_CONV_ERROR);
2907 }
2908
2909 ret = ctf_dwarf_init_die(fd, elf, &cdies[i], i, errmsg, errlen);
2910 if (ret != 0) {
2911 *errp = ret;
2912 goto out;
2913 }
2914
2915 cup->cu_doweaks = ndies > 1 ? B_FALSE : B_TRUE;
2916 }
2917
2918 ctf_dprintf("found %d DWARF CUs\n", ndies);
2919
2920 /*
2921 * If we only have one compilation unit, there's no reason to use
2922 * multiple threads, even if the user requested them. After all, they
2923 * just gave us an upper bound.
2924 */
2925 if (ndies == 1)
2926 nthrs = 1;
2927
2928 if (workq_init(&wqp, nthrs) == -1) {
2929 *errp = errno;
2930 goto out;
2931 }
2932
2933 for (i = 0; i < ndies; i++) {
2934 cup = &cdies[i];
2935 ctf_dprintf("adding cu %s: %p, %x %x\n", cup->cu_name,
2936 cup->cu_cu, cup->cu_cuoff, cup->cu_maxoff);
2937 if (workq_add(wqp, cup) == -1) {
2938 *errp = errno;
2939 goto out;
2940 }
2941 }
2942
2943 ret = workq_work(wqp, ctf_dwarf_convert_one, NULL, errp);
2944 if (ret == WORKQ_ERROR) {
2945 *errp = errno;
2946 goto out;
2947 } else if (ret == WORKQ_UERROR) {
2948 ctf_dprintf("internal convert failed: %s\n",
2949 ctf_errmsg(*errp));
2950 goto out;
2951 }
2952
2953 ctf_dprintf("Determining next phase: have %d CUs\n", ndies);
2954 if (ndies != 1) {
2955 ctf_merge_t *cmp;
2956
2957 cmp = ctf_merge_init(fd, &ret);
2958 if (cmp == NULL) {
2959 *errp = ret;
2960 goto out;
2961 }
2962
2963 ctf_dprintf("setting threads\n");
2964 if ((ret = ctf_merge_set_nthreads(cmp, nthrs)) != 0) {
2965 ctf_merge_fini(cmp);
2966 *errp = ret;
2967 goto out;
2968 }
2969
2970 for (i = 0; i < ndies; i++) {
2971 cup = &cdies[i];
2972 ctf_dprintf("adding cu %s (%p)\n", cup->cu_name,
2973 cup->cu_ctfp);
2974 if ((ret = ctf_merge_add(cmp, cup->cu_ctfp)) != 0) {
2975 ctf_merge_fini(cmp);
2976 *errp = ret;
2977 goto out;
2978 }
2979 }
2980
2981 ctf_dprintf("performing merge\n");
2982 ret = ctf_merge_merge(cmp, fpp);
2983 if (ret != 0) {
2984 ctf_dprintf("failed merge!\n");
2985 *fpp = NULL;
2986 ctf_merge_fini(cmp);
2987 *errp = ret;
2988 goto out;
2989 }
2990 ctf_merge_fini(cmp);
2991 *errp = 0;
2992 ctf_dprintf("successfully converted!\n");
2993 } else {
|