1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2019, Joyent, Inc.
14 */
15
16 /*
17 * To perform a merge of two CTF containers, we first diff the two containers
18 * types. For every type that's in the src container, but not in the dst
19 * container, we note it and add it to dst container. If there are any objects
20 * or functions associated with src, we go through and update the types that
21 * they refer to such that they all refer to types in the dst container.
22 *
23 * The bulk of the logic for the merge, after we've run the diff, occurs in
24 * ctf_merge_common().
25 *
26 * In terms of exported APIs, we don't really export a simple merge two
27 * containers, as the general way this is used, in something like ctfmerge(1),
28 * is to add all the containers and then let us figure out the best way to merge
29 * it.
30 */
31
32 #include <libctf_impl.h>
33 #include <sys/debug.h>
34 #include <sys/list.h>
35 #include <stddef.h>
36 #include <fcntl.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <mergeq.h>
40 #include <errno.h>
41
42 typedef struct ctf_merge_tinfo {
43 uint16_t cmt_map; /* Map to the type in out */
44 boolean_t cmt_fixup;
45 boolean_t cmt_forward;
46 boolean_t cmt_missing;
47 } ctf_merge_tinfo_t;
48
49 /*
50 * State required for doing an individual merge of two containers.
51 */
52 typedef struct ctf_merge_types {
53 ctf_file_t *cm_out; /* Output CTF file */
54 ctf_file_t *cm_src; /* Input CTF file */
55 ctf_merge_tinfo_t *cm_tmap; /* Type state information */
56 boolean_t cm_dedup; /* Are we doing a dedup? */
57 boolean_t cm_unique; /* are we doing a uniquify? */
58 } ctf_merge_types_t;
59
60 typedef struct ctf_merge_objmap {
61 list_node_t cmo_node;
62 const char *cmo_name; /* Symbol name */
63 const char *cmo_file; /* Symbol file */
64 ulong_t cmo_idx; /* Symbol ID */
65 Elf64_Sym cmo_sym; /* Symbol Entry */
66 ctf_id_t cmo_tid; /* Type ID */
67 } ctf_merge_objmap_t;
68
69 typedef struct ctf_merge_funcmap {
70 list_node_t cmf_node;
71 const char *cmf_name; /* Symbol name */
72 const char *cmf_file; /* Symbol file */
73 ulong_t cmf_idx; /* Symbol ID */
74 Elf64_Sym cmf_sym; /* Symbol Entry */
75 ctf_id_t cmf_rtid; /* Type ID */
76 uint_t cmf_flags; /* ctf_funcinfo_t ctc_flags */
77 uint_t cmf_argc; /* Number of arguments */
78 ctf_id_t cmf_args[]; /* Types of arguments */
79 } ctf_merge_funcmap_t;
80
81 typedef struct ctf_merge_input {
82 list_node_t cmi_node;
83 ctf_file_t *cmi_input;
84 list_t cmi_omap;
85 list_t cmi_fmap;
86 boolean_t cmi_created;
87 } ctf_merge_input_t;
88
89 struct ctf_merge_handle {
90 list_t cmh_inputs; /* Input list */
91 uint_t cmh_ninputs; /* Number of inputs */
92 uint_t cmh_nthreads; /* Number of threads to use */
93 ctf_file_t *cmh_unique; /* ctf to uniquify against */
94 boolean_t cmh_msyms; /* Should we merge symbols/funcs? */
95 int cmh_ofd; /* FD for output file */
96 int cmh_flags; /* Flags that control merge behavior */
97 char *cmh_label; /* Optional label */
98 char *cmh_pname; /* Parent name */
99 };
100
101 typedef struct ctf_merge_symbol_arg {
102 list_t *cmsa_objmap;
103 list_t *cmsa_funcmap;
104 ctf_file_t *cmsa_out;
105 boolean_t cmsa_dedup;
106 } ctf_merge_symbol_arg_t;
107
108 static int ctf_merge_add_type(ctf_merge_types_t *, ctf_id_t);
109
110 static ctf_id_t
111 ctf_merge_gettype(ctf_merge_types_t *cmp, ctf_id_t id)
112 {
113 if (cmp->cm_dedup == B_FALSE) {
114 VERIFY(cmp->cm_tmap[id].cmt_map != 0);
115 return (cmp->cm_tmap[id].cmt_map);
116 }
117
118 while (cmp->cm_tmap[id].cmt_missing == B_FALSE) {
119 VERIFY(cmp->cm_tmap[id].cmt_map != 0);
120 id = cmp->cm_tmap[id].cmt_map;
121 }
122 VERIFY(cmp->cm_tmap[id].cmt_map != 0);
123 return (cmp->cm_tmap[id].cmt_map);
124 }
125
126 static void
127 ctf_merge_diffcb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
128 ctf_id_t oid, void *arg)
129 {
130 ctf_merge_types_t *cmp = arg;
131 ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
132
133 if (same == B_TRUE) {
134 if (ctf_type_kind(ifp, iid) == CTF_K_FORWARD &&
135 ctf_type_kind(ofp, oid) != CTF_K_FORWARD) {
136 VERIFY(cmt[oid].cmt_map == 0);
137
138 /*
139 * If we're uniquifying types, it's possible for the
140 * container that we're uniquifying against to have a
141 * forward which exists in the container being reduced.
142 * For example, genunix has the machcpu structure as a
143 * forward which is actually in unix and we uniquify
144 * unix against genunix. In such cases, we explicitly do
145 * not do any mapping of the forward information, lest
146 * we risk losing the real definition. Instead, mark
147 * that it's missing.
148 */
149 if (cmp->cm_unique == B_TRUE) {
150 cmt[oid].cmt_missing = B_TRUE;
151 return;
152 }
153
154 cmt[oid].cmt_map = iid;
155 cmt[oid].cmt_forward = B_TRUE;
156 ctf_dprintf("merge diff forward mapped %d->%d\n", oid,
157 iid);
158 return;
159 }
160
161 /*
162 * We could have multiple things that a given type ends up
163 * matching in the world of forwards and pointers to forwards.
164 * For now just take the first one...
165 */
166 if (cmt[oid].cmt_map != 0)
167 return;
168 cmt[oid].cmt_map = iid;
169 ctf_dprintf("merge diff mapped %d->%d\n", oid, iid);
170 } else if (ifp == cmp->cm_src) {
171 VERIFY(cmt[iid].cmt_map == 0);
172 cmt[iid].cmt_missing = B_TRUE;
173 ctf_dprintf("merge diff said %d is missing\n", iid);
174 }
175 }
176
177 static int
178 ctf_merge_add_number(ctf_merge_types_t *cmp, ctf_id_t id)
179 {
180 int ret, flags;
181 const ctf_type_t *tp;
182 const char *name;
183 ctf_encoding_t en;
184
185 if (ctf_type_encoding(cmp->cm_src, id, &en) != 0)
186 return (CTF_ERR);
187
188 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
189 name = ctf_strraw(cmp->cm_src, tp->ctt_name);
190 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
191 flags = CTF_ADD_ROOT;
192 else
193 flags = CTF_ADD_NONROOT;
194
195 ret = ctf_add_encoded(cmp->cm_out, flags, name, &en,
196 ctf_type_kind(cmp->cm_src, id));
197
198 if (ret == CTF_ERR)
199 return (ret);
200
201 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
202 cmp->cm_tmap[id].cmt_map = ret;
203 return (0);
204 }
205
206 static int
207 ctf_merge_add_array(ctf_merge_types_t *cmp, ctf_id_t id)
208 {
209 int ret, flags;
210 const ctf_type_t *tp;
211 ctf_arinfo_t ar;
212
213 if (ctf_array_info(cmp->cm_src, id, &ar) == CTF_ERR)
214 return (CTF_ERR);
215
216 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
217 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
218 flags = CTF_ADD_ROOT;
219 else
220 flags = CTF_ADD_NONROOT;
221
222 if (cmp->cm_tmap[ar.ctr_contents].cmt_map == 0) {
223 ret = ctf_merge_add_type(cmp, ar.ctr_contents);
224 if (ret != 0)
225 return (ret);
226 ASSERT(cmp->cm_tmap[ar.ctr_contents].cmt_map != 0);
227 }
228 ar.ctr_contents = ctf_merge_gettype(cmp, ar.ctr_contents);
229
230 if (cmp->cm_tmap[ar.ctr_index].cmt_map == 0) {
231 ret = ctf_merge_add_type(cmp, ar.ctr_index);
232 if (ret != 0)
233 return (ret);
234 ASSERT(cmp->cm_tmap[ar.ctr_index].cmt_map != 0);
235 }
236 ar.ctr_index = ctf_merge_gettype(cmp, ar.ctr_index);
237
238 ret = ctf_add_array(cmp->cm_out, flags, &ar);
239 if (ret == CTF_ERR)
240 return (ret);
241
242 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
243 cmp->cm_tmap[id].cmt_map = ret;
244
245 return (0);
246 }
247
248 static int
249 ctf_merge_add_reftype(ctf_merge_types_t *cmp, ctf_id_t id)
250 {
251 int ret, flags;
252 const ctf_type_t *tp;
253 ctf_id_t reftype;
254 const char *name;
255
256 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
257 name = ctf_strraw(cmp->cm_src, tp->ctt_name);
258 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
259 flags = CTF_ADD_ROOT;
260 else
261 flags = CTF_ADD_NONROOT;
262
263 reftype = ctf_type_reference(cmp->cm_src, id);
264 if (reftype == CTF_ERR)
265 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
266
267 if (cmp->cm_tmap[reftype].cmt_map == 0) {
268 ret = ctf_merge_add_type(cmp, reftype);
269 if (ret != 0)
270 return (ret);
271 ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
272 }
273 reftype = ctf_merge_gettype(cmp, reftype);
274
275 ret = ctf_add_reftype(cmp->cm_out, flags, name, reftype,
276 ctf_type_kind(cmp->cm_src, id));
277 if (ret == CTF_ERR)
278 return (ret);
279
280 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
281 cmp->cm_tmap[id].cmt_map = ret;
282 return (0);
283 }
284
285 static int
286 ctf_merge_add_typedef(ctf_merge_types_t *cmp, ctf_id_t id)
287 {
288 int ret, flags;
289 const ctf_type_t *tp;
290 const char *name;
291 ctf_id_t reftype;
292
293 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
294 name = ctf_strraw(cmp->cm_src, tp->ctt_name);
295 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
296 flags = CTF_ADD_ROOT;
297 else
298 flags = CTF_ADD_NONROOT;
299
300 reftype = ctf_type_reference(cmp->cm_src, id);
301 if (reftype == CTF_ERR)
302 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
303
304 if (cmp->cm_tmap[reftype].cmt_map == 0) {
305 ret = ctf_merge_add_type(cmp, reftype);
306 if (ret != 0)
307 return (ret);
308 ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
309 }
310 reftype = ctf_merge_gettype(cmp, reftype);
311
312 ret = ctf_add_typedef(cmp->cm_out, flags, name, reftype);
313 if (ret == CTF_ERR)
314 return (ret);
315
316 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
317 cmp->cm_tmap[id].cmt_map = ret;
318 return (0);
319 }
320
321 typedef struct ctf_merge_enum {
322 ctf_file_t *cme_fp;
323 ctf_id_t cme_id;
324 } ctf_merge_enum_t;
325
326 static int
327 ctf_merge_add_enumerator(const char *name, int value, void *arg)
328 {
329 ctf_merge_enum_t *cmep = arg;
330
331 return (ctf_add_enumerator(cmep->cme_fp, cmep->cme_id, name, value) ==
332 CTF_ERR);
333 }
334
335 static int
336 ctf_merge_add_enum(ctf_merge_types_t *cmp, ctf_id_t id)
337 {
338 int flags;
339 const ctf_type_t *tp;
340 const char *name;
341 ctf_id_t enumid;
342 ctf_merge_enum_t cme;
343
344 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
345 name = ctf_strraw(cmp->cm_src, tp->ctt_name);
346 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
347 flags = CTF_ADD_ROOT;
348 else
349 flags = CTF_ADD_NONROOT;
350
351 enumid = ctf_add_enum(cmp->cm_out, flags, name);
352 if (enumid == CTF_ERR)
353 return (enumid);
354
355 cme.cme_fp = cmp->cm_out;
356 cme.cme_id = enumid;
357 if (ctf_enum_iter(cmp->cm_src, id, ctf_merge_add_enumerator,
358 &cme) != 0)
359 return (CTF_ERR);
360
361 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
362 cmp->cm_tmap[id].cmt_map = enumid;
363 return (0);
364 }
365
366 static int
367 ctf_merge_add_func(ctf_merge_types_t *cmp, ctf_id_t id)
368 {
369 int ret, flags, i;
370 const ctf_type_t *tp;
371 ctf_funcinfo_t ctc;
372 ctf_id_t *argv;
373
374 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
375 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
376 flags = CTF_ADD_ROOT;
377 else
378 flags = CTF_ADD_NONROOT;
379
380 if (ctf_func_info_by_id(cmp->cm_src, id, &ctc) == CTF_ERR)
381 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
382
383 argv = ctf_alloc(sizeof (ctf_id_t) * ctc.ctc_argc);
384 if (argv == NULL)
385 return (ctf_set_errno(cmp->cm_out, ENOMEM));
386 if (ctf_func_args_by_id(cmp->cm_src, id, ctc.ctc_argc, argv) ==
387 CTF_ERR) {
388 ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
389 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
390 }
391
392 if (cmp->cm_tmap[ctc.ctc_return].cmt_map == 0) {
393 ret = ctf_merge_add_type(cmp, ctc.ctc_return);
394 if (ret != 0)
395 return (ret);
396 ASSERT(cmp->cm_tmap[ctc.ctc_return].cmt_map != 0);
397 }
398 ctc.ctc_return = ctf_merge_gettype(cmp, ctc.ctc_return);
399
400 for (i = 0; i < ctc.ctc_argc; i++) {
401 if (cmp->cm_tmap[argv[i]].cmt_map == 0) {
402 ret = ctf_merge_add_type(cmp, argv[i]);
403 if (ret != 0)
404 return (ret);
405 ASSERT(cmp->cm_tmap[argv[i]].cmt_map != 0);
406 }
407 argv[i] = ctf_merge_gettype(cmp, argv[i]);
408 }
409
410 ret = ctf_add_funcptr(cmp->cm_out, flags, &ctc, argv);
411 ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
412 if (ret == CTF_ERR)
413 return (ret);
414
415 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
416 cmp->cm_tmap[id].cmt_map = ret;
417 return (0);
418 }
419
420 static int
421 ctf_merge_add_forward(ctf_merge_types_t *cmp, ctf_id_t id)
422 {
423 int ret, flags;
424 const ctf_type_t *tp;
425 const char *name;
426
427 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
428 name = ctf_strraw(cmp->cm_src, tp->ctt_name);
429 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
430 flags = CTF_ADD_ROOT;
431 else
432 flags = CTF_ADD_NONROOT;
433
434 /*
435 * ctf_add_forward tries to check to see if a given forward already
436 * exists in one of its hash tables. If we're here then we know that we
437 * have a forward in a container that isn't present in another.
438 * Therefore, we choose a token hash table to satisfy the API choice
439 * here.
440 */
441 ret = ctf_add_forward(cmp->cm_out, flags, name, CTF_K_STRUCT);
442 if (ret == CTF_ERR)
443 return (CTF_ERR);
444
445 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
446 cmp->cm_tmap[id].cmt_map = ret;
447 return (0);
448 }
449
450 typedef struct ctf_merge_su {
451 ctf_merge_types_t *cms_cm;
452 ctf_id_t cms_id;
453 } ctf_merge_su_t;
454
455 static int
456 ctf_merge_add_member(const char *name, ctf_id_t type, ulong_t offset, void *arg)
457 {
458 ctf_merge_su_t *cms = arg;
459
460 VERIFY(cms->cms_cm->cm_tmap[type].cmt_map != 0);
461 type = cms->cms_cm->cm_tmap[type].cmt_map;
462
463 ctf_dprintf("Trying to add member %s to %d\n", name, cms->cms_id);
464 return (ctf_add_member(cms->cms_cm->cm_out, cms->cms_id, name,
465 type, offset) == CTF_ERR);
466 }
467
468 /*
469 * During the first pass, we always add the generic structure and union but none
470 * of its members as they might not all have been mapped yet. Instead we just
471 * mark all structures and unions as needing to be fixed up.
472 */
473 static int
474 ctf_merge_add_sou(ctf_merge_types_t *cmp, ctf_id_t id, boolean_t forward)
475 {
476 int flags, kind;
477 const ctf_type_t *tp;
478 const char *name;
479 ctf_id_t suid;
480
481 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
482 name = ctf_strraw(cmp->cm_src, tp->ctt_name);
483 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
484 flags = CTF_ADD_ROOT;
485 else
486 flags = CTF_ADD_NONROOT;
487 kind = ctf_type_kind(cmp->cm_src, id);
488
489 if (kind == CTF_K_STRUCT)
490 suid = ctf_add_struct(cmp->cm_out, flags, name);
491 else
492 suid = ctf_add_union(cmp->cm_out, flags, name);
493
494 if (suid == CTF_ERR)
495 return (suid);
496
497 /*
498 * If this is a forward reference then its mapping should already
499 * exist.
500 */
501 if (forward == B_FALSE) {
502 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
503 cmp->cm_tmap[id].cmt_map = suid;
504 ctf_dprintf("added sou \"%s\" as (%d) %d->%d\n", name, kind, id,
505 suid);
506 } else {
507 VERIFY(cmp->cm_tmap[id].cmt_map == suid);
508 }
509 cmp->cm_tmap[id].cmt_fixup = B_TRUE;
510
511 return (0);
512 }
513
514 static int
515 ctf_merge_add_type(ctf_merge_types_t *cmp, ctf_id_t id)
516 {
517 int kind, ret;
518
519 /*
520 * We may end up evaluating a type more than once as we may deal with it
521 * as we recursively evaluate some kind of reference and then we may see
522 * it normally.
523 */
524 if (cmp->cm_tmap[id].cmt_map != 0)
525 return (0);
526
527 kind = ctf_type_kind(cmp->cm_src, id);
528 switch (kind) {
529 case CTF_K_INTEGER:
530 case CTF_K_FLOAT:
531 ret = ctf_merge_add_number(cmp, id);
532 break;
533 case CTF_K_ARRAY:
534 ret = ctf_merge_add_array(cmp, id);
535 break;
536 case CTF_K_POINTER:
537 case CTF_K_VOLATILE:
538 case CTF_K_CONST:
539 case CTF_K_RESTRICT:
540 ret = ctf_merge_add_reftype(cmp, id);
541 break;
542 case CTF_K_TYPEDEF:
543 ret = ctf_merge_add_typedef(cmp, id);
544 break;
545 case CTF_K_ENUM:
546 ret = ctf_merge_add_enum(cmp, id);
547 break;
548 case CTF_K_FUNCTION:
549 ret = ctf_merge_add_func(cmp, id);
550 break;
551 case CTF_K_FORWARD:
552 ret = ctf_merge_add_forward(cmp, id);
553 break;
554 case CTF_K_STRUCT:
555 case CTF_K_UNION:
556 ret = ctf_merge_add_sou(cmp, id, B_FALSE);
557 break;
558 case CTF_K_UNKNOWN:
559 /*
560 * We don't add unknown types, and we later assert that nothing
561 * should reference them.
562 */
563 return (0);
564 default:
565 abort();
566 }
567
568 return (ret);
569 }
570
571 static int
572 ctf_merge_fixup_sou(ctf_merge_types_t *cmp, ctf_id_t id)
573 {
574 ctf_dtdef_t *dtd;
575 ctf_merge_su_t cms;
576 ctf_id_t mapid;
577 ssize_t size;
578
579 mapid = cmp->cm_tmap[id].cmt_map;
580 VERIFY(mapid != 0);
581 dtd = ctf_dtd_lookup(cmp->cm_out, mapid);
582 VERIFY(dtd != NULL);
583
584 ctf_dprintf("Trying to fix up sou %d\n", id);
585 cms.cms_cm = cmp;
586 cms.cms_id = mapid;
587 if (ctf_member_iter(cmp->cm_src, id, ctf_merge_add_member, &cms) != 0)
588 return (CTF_ERR);
589
590 if ((size = ctf_type_size(cmp->cm_src, id)) == CTF_ERR)
591 return (CTF_ERR);
592 if (ctf_set_size(cmp->cm_out, mapid, size) == CTF_ERR)
593 return (CTF_ERR);
594
595 return (0);
596 }
597
598 static int
599 ctf_merge_fixup_type(ctf_merge_types_t *cmp, ctf_id_t id)
600 {
601 int kind, ret;
602
603 kind = ctf_type_kind(cmp->cm_src, id);
604 switch (kind) {
605 case CTF_K_STRUCT:
606 case CTF_K_UNION:
607 ret = ctf_merge_fixup_sou(cmp, id);
608 break;
609 default:
610 VERIFY(0);
611 ret = CTF_ERR;
612 }
613
614 return (ret);
615 }
616
617 /*
618 * Now that we've successfully merged everything, we're going to remap the type
619 * table.
620 *
621 * Remember we have two containers: ->cm_src is what we're working from, and
622 * ->cm_out is where we are building the de-duplicated CTF.
623 *
624 * The index of this table is always the type IDs in ->cm_src.
625 *
626 * When we built this table originally in ctf_diff_self(), if we found a novel
627 * type, we marked it as .cmt_missing to indicate it needs adding to ->cm_out.
628 * Otherwise, .cmt_map indicated the ->cm_src type ID that this type duplicates.
629 *
630 * Then, in ctf_merge_common(), we walked through and added all "cmt_missing"
631 * types to ->cm_out with ctf_merge_add_type(). These routines update cmt_map
632 * to be the *new* type ID in ->cm_out. In this function, you can read
633 * "cmt_missing" as meaning "added to ->cm_out, and cmt_map updated".
634 *
635 * So at this point, we need to mop up all types where .cmt_missing == B_FALSE,
636 * making sure *their* .cmt_map values also point to the ->cm_out container.
637 */
638 static void
639 ctf_merge_dedup_remap(ctf_merge_types_t *cmp)
640 {
641 int i;
642
643 for (i = 1; i < cmp->cm_src->ctf_typemax + 1; i++) {
644 ctf_id_t tid;
645
646 if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
647 VERIFY(cmp->cm_tmap[i].cmt_map != 0);
648 continue;
649 }
650
651 tid = i;
652 while (cmp->cm_tmap[tid].cmt_missing == B_FALSE) {
653 VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
654 tid = cmp->cm_tmap[tid].cmt_map;
655 }
656 VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
657 cmp->cm_tmap[i].cmt_map = cmp->cm_tmap[tid].cmt_map;
658 }
659 }
660
661
662 /*
663 * We're going to do three passes over the containers.
664 *
665 * Pass 1 checks for forward references in the output container that we know
666 * exist in the source container.
667 *
668 * Pass 2 adds all the missing types from the source container. As part of this
669 * we may be adding a type as a forward reference that doesn't exist yet.
670 * Any types that we encounter in this form, we need to add to a third pass.
671 *
672 * Pass 3 is the fixup pass. Here we go through and find all the types that were
673 * missing in the first.
674 *
675 * Importantly, we *must* call ctf_update between the second and third pass,
676 * otherwise several of the libctf functions will not properly find the data in
677 * the container. If we're doing a dedup we also fix up the type mapping.
678 */
679 static int
680 ctf_merge_common(ctf_merge_types_t *cmp)
681 {
682 int ret, i;
683
684 ctf_phase_dump(cmp->cm_src, "merge-common-src", NULL);
685 ctf_phase_dump(cmp->cm_out, "merge-common-dest", NULL);
686
687 /* Pass 1 */
688 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
689 if (cmp->cm_tmap[i].cmt_forward == B_TRUE) {
690 ret = ctf_merge_add_sou(cmp, i, B_TRUE);
691 if (ret != 0) {
692 return (ret);
693 }
694 }
695 }
696
697 /* Pass 2 */
698 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
699 if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
700 ret = ctf_merge_add_type(cmp, i);
701 if (ret != 0) {
702 ctf_dprintf("Failed to merge type %d\n", i);
703 return (ret);
704 }
705 }
706 }
707
708 ret = ctf_update(cmp->cm_out);
709 if (ret != 0)
710 return (ret);
711
712 if (cmp->cm_dedup == B_TRUE) {
713 ctf_merge_dedup_remap(cmp);
714 }
715
716 ctf_dprintf("Beginning merge pass 3\n");
717 /* Pass 3 */
718 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
719 if (cmp->cm_tmap[i].cmt_fixup == B_TRUE) {
720 ret = ctf_merge_fixup_type(cmp, i);
721 if (ret != 0)
722 return (ret);
723 }
724 }
725
726 return (0);
727 }
728
729 /*
730 * Uniquification is slightly different from a stock merge. For starters, we
731 * don't need to replace any forward references in the output. In this case
732 * though, the types that already exist are in a parent container to the empty
733 * output container.
734 */
735 static int
736 ctf_merge_uniquify_types(ctf_merge_types_t *cmp)
737 {
738 int i, ret;
739
740 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
741 if (cmp->cm_tmap[i].cmt_missing == B_FALSE)
742 continue;
743 ret = ctf_merge_add_type(cmp, i);
744 if (ret != 0)
745 return (ret);
746 }
747
748 ret = ctf_update(cmp->cm_out);
749 if (ret != 0)
750 return (ret);
751
752 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
753 if (cmp->cm_tmap[i].cmt_fixup == B_FALSE)
754 continue;
755 ret = ctf_merge_fixup_type(cmp, i);
756 if (ret != 0)
757 return (ret);
758 }
759
760 return (0);
761 }
762
763 static int
764 ctf_merge_types_init(ctf_merge_types_t *cmp)
765 {
766 cmp->cm_tmap = ctf_alloc(sizeof (ctf_merge_tinfo_t) *
767 (cmp->cm_src->ctf_typemax + 1));
768 if (cmp->cm_tmap == NULL)
769 return (ctf_set_errno(cmp->cm_out, ENOMEM));
770 bzero(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
771 (cmp->cm_src->ctf_typemax + 1));
772 return (0);
773 }
774
775 static void
776 ctf_merge_types_fini(ctf_merge_types_t *cmp)
777 {
778 ctf_free(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
779 (cmp->cm_src->ctf_typemax + 1));
780 }
781
782 /*
783 * After performing a pass, we need to go through the object and function type
784 * maps and potentially fix them up based on the new maps that we have.
785 */
786 static void
787 ctf_merge_fixup_symmaps(ctf_merge_types_t *cmp, ctf_merge_input_t *cmi)
788 {
789 ctf_merge_objmap_t *cmo;
790 ctf_merge_funcmap_t *cmf;
791
792 for (cmo = list_head(&cmi->cmi_omap); cmo != NULL;
793 cmo = list_next(&cmi->cmi_omap, cmo)) {
794 VERIFY3S(cmo->cmo_tid, !=, 0);
795 VERIFY(cmp->cm_tmap[cmo->cmo_tid].cmt_map != 0);
796 cmo->cmo_tid = cmp->cm_tmap[cmo->cmo_tid].cmt_map;
797 }
798
799 for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL;
800 cmf = list_next(&cmi->cmi_fmap, cmf)) {
801 int i;
802
803 VERIFY(cmp->cm_tmap[cmf->cmf_rtid].cmt_map != 0);
804 cmf->cmf_rtid = cmp->cm_tmap[cmf->cmf_rtid].cmt_map;
805 for (i = 0; i < cmf->cmf_argc; i++) {
806 VERIFY(cmp->cm_tmap[cmf->cmf_args[i]].cmt_map != 0);
807 cmf->cmf_args[i] =
808 cmp->cm_tmap[cmf->cmf_args[i]].cmt_map;
809 }
810 }
811 }
812
813 /*
814 * Merge the types contained inside of two input files. The second input file is
815 * always going to be the destination. We're guaranteed that it's always
816 * writeable.
817 */
818 static int
819 ctf_merge_types(void *arg, void *arg2, void **outp, void *unsued)
820 {
821 int ret;
822 ctf_merge_types_t cm;
823 ctf_diff_t *cdp;
824 ctf_merge_input_t *scmi = arg;
825 ctf_merge_input_t *dcmi = arg2;
826 ctf_file_t *out = dcmi->cmi_input;
827 ctf_file_t *source = scmi->cmi_input;
828
829 ctf_dprintf("merging %p->%p\n", source, out);
830
831 if (!(out->ctf_flags & LCTF_RDWR))
832 return (ctf_set_errno(out, ECTF_RDONLY));
833
834 if (ctf_getmodel(out) != ctf_getmodel(source))
835 return (ctf_set_errno(out, ECTF_DMODEL));
836
837 if ((ret = ctf_diff_init(out, source, &cdp)) != 0)
838 return (ret);
839
840 cm.cm_out = out;
841 cm.cm_src = source;
842 cm.cm_dedup = B_FALSE;
843 cm.cm_unique = B_FALSE;
844 ret = ctf_merge_types_init(&cm);
845 if (ret != 0) {
846 ctf_diff_fini(cdp);
847 return (ctf_set_errno(out, ret));
848 }
849
850 ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
851 if (ret != 0)
852 goto cleanup;
853 ret = ctf_merge_common(&cm);
854 ctf_dprintf("merge common returned with %d\n", ret);
855 if (ret == 0) {
856 ret = ctf_update(out);
857 ctf_dprintf("update returned with %d\n", ret);
858 } else {
859 goto cleanup;
860 }
861
862 /*
863 * Now we need to fix up the object and function maps.
864 */
865 ctf_merge_fixup_symmaps(&cm, scmi);
866
867 /*
868 * Now that we've fixed things up, we need to give our function and
869 * object maps to the destination, such that it can continue to update
870 * them going forward.
871 */
872 list_move_tail(&dcmi->cmi_fmap, &scmi->cmi_fmap);
873 list_move_tail(&dcmi->cmi_omap, &scmi->cmi_omap);
874
875 cleanup:
876 if (ret == 0)
877 *outp = dcmi;
878 ctf_merge_types_fini(&cm);
879 ctf_diff_fini(cdp);
880 if (ret != 0)
881 return (ctf_errno(out));
882 ctf_phase_bump();
883 return (0);
884 }
885
886 static int
887 ctf_uniquify_types(ctf_merge_t *cmh, ctf_file_t *src, ctf_file_t **outp)
888 {
889 int err, ret;
890 ctf_file_t *out;
891 ctf_merge_types_t cm;
892 ctf_diff_t *cdp;
893 ctf_merge_input_t *cmi;
894 ctf_file_t *parent = cmh->cmh_unique;
895
896 *outp = NULL;
897 out = ctf_fdcreate(cmh->cmh_ofd, &err);
898 if (out == NULL)
899 return (ctf_set_errno(src, err));
900
901 out->ctf_parname = cmh->cmh_pname;
902 if (ctf_setmodel(out, ctf_getmodel(parent)) != 0) {
903 (void) ctf_set_errno(src, ctf_errno(out));
904 ctf_close(out);
905 return (CTF_ERR);
906 }
907
908 if (ctf_import(out, parent) != 0) {
909 (void) ctf_set_errno(src, ctf_errno(out));
910 ctf_close(out);
911 return (CTF_ERR);
912 }
913
914 if ((ret = ctf_diff_init(parent, src, &cdp)) != 0) {
915 ctf_close(out);
916 return (ctf_set_errno(src, ctf_errno(parent)));
917 }
918
919 cm.cm_out = parent;
920 cm.cm_src = src;
921 cm.cm_dedup = B_FALSE;
922 cm.cm_unique = B_TRUE;
923 ret = ctf_merge_types_init(&cm);
924 if (ret != 0) {
925 ctf_close(out);
926 ctf_diff_fini(cdp);
927 return (ctf_set_errno(src, ret));
928 }
929
930 ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
931 if (ret == 0) {
932 cm.cm_out = out;
933 ret = ctf_merge_uniquify_types(&cm);
934 if (ret == 0)
935 ret = ctf_update(out);
936 }
937
938 if (ret != 0) {
939 ctf_merge_types_fini(&cm);
940 ctf_diff_fini(cdp);
941 return (ctf_set_errno(src, ctf_errno(cm.cm_out)));
942 }
943
944 for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
945 cmi = list_next(&cmh->cmh_inputs, cmi)) {
946 ctf_merge_fixup_symmaps(&cm, cmi);
947 }
948
949 ctf_merge_types_fini(&cm);
950 ctf_diff_fini(cdp);
951 *outp = out;
952 return (0);
953 }
954
955 static void
956 ctf_merge_fini_input(ctf_merge_input_t *cmi)
957 {
958 ctf_merge_objmap_t *cmo;
959 ctf_merge_funcmap_t *cmf;
960
961 while ((cmo = list_remove_head(&cmi->cmi_omap)) != NULL)
962 ctf_free(cmo, sizeof (ctf_merge_objmap_t));
963
964 while ((cmf = list_remove_head(&cmi->cmi_fmap)) != NULL)
965 ctf_free(cmf, sizeof (ctf_merge_funcmap_t) +
966 sizeof (ctf_id_t) * cmf->cmf_argc);
967
968 if (cmi->cmi_created == B_TRUE && cmi->cmi_input != NULL)
969 ctf_close(cmi->cmi_input);
970
971 ctf_free(cmi, sizeof (ctf_merge_input_t));
972 }
973
974 void
975 ctf_merge_fini(ctf_merge_t *cmh)
976 {
977 size_t len;
978 ctf_merge_input_t *cmi;
979
980 if (cmh->cmh_label != NULL) {
981 len = strlen(cmh->cmh_label) + 1;
982 ctf_free(cmh->cmh_label, len);
983 }
984
985 if (cmh->cmh_pname != NULL) {
986 len = strlen(cmh->cmh_pname) + 1;
987 ctf_free(cmh->cmh_pname, len);
988 }
989
990 while ((cmi = list_remove_head(&cmh->cmh_inputs)) != NULL)
991 ctf_merge_fini_input(cmi);
992
993 ctf_free(cmh, sizeof (ctf_merge_t));
994 }
995
996 ctf_merge_t *
997 ctf_merge_init(int fd, int *errp)
998 {
999 int err;
1000 ctf_merge_t *out;
1001 struct stat st;
1002
1003 if (errp == NULL)
1004 errp = &err;
1005
1006 if (fd != -1 && fstat(fd, &st) != 0) {
1007 *errp = EINVAL;
1008 return (NULL);
1009 }
1010
1011 out = ctf_alloc(sizeof (ctf_merge_t));
1012 if (out == NULL) {
1013 *errp = ENOMEM;
1014 return (NULL);
1015 }
1016
1017 if (fd == -1) {
1018 out->cmh_msyms = B_FALSE;
1019 } else {
1020 out->cmh_msyms = B_TRUE;
1021 }
1022
1023 list_create(&out->cmh_inputs, sizeof (ctf_merge_input_t),
1024 offsetof(ctf_merge_input_t, cmi_node));
1025 out->cmh_ninputs = 0;
1026 out->cmh_nthreads = 1;
1027 out->cmh_unique = NULL;
1028 out->cmh_ofd = fd;
1029 out->cmh_flags = 0;
1030 out->cmh_label = NULL;
1031 out->cmh_pname = NULL;
1032
1033 return (out);
1034 }
1035
1036 int
1037 ctf_merge_label(ctf_merge_t *cmh, const char *label)
1038 {
1039 char *dup;
1040
1041 if (label == NULL)
1042 return (EINVAL);
1043
1044 dup = ctf_strdup(label);
1045 if (dup == NULL)
1046 return (EAGAIN);
1047
1048 if (cmh->cmh_label != NULL) {
1049 size_t len = strlen(cmh->cmh_label) + 1;
1050 ctf_free(cmh->cmh_label, len);
1051 }
1052
1053 cmh->cmh_label = dup;
1054 return (0);
1055 }
1056
1057 static int
1058 ctf_merge_add_function(ctf_merge_input_t *cmi, ctf_funcinfo_t *fip, ulong_t idx,
1059 const char *file, const char *name, const Elf64_Sym *symp)
1060 {
1061 ctf_merge_funcmap_t *fmap;
1062
1063 fmap = ctf_alloc(sizeof (ctf_merge_funcmap_t) +
1064 sizeof (ctf_id_t) * fip->ctc_argc);
1065 if (fmap == NULL)
1066 return (ENOMEM);
1067
1068 fmap->cmf_idx = idx;
1069 fmap->cmf_sym = *symp;
1070 fmap->cmf_rtid = fip->ctc_return;
1071 fmap->cmf_flags = fip->ctc_flags;
1072 fmap->cmf_argc = fip->ctc_argc;
1073 fmap->cmf_name = name;
1074 if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
1075 fmap->cmf_file = file;
1076 } else {
1077 fmap->cmf_file = NULL;
1078 }
1079
1080 if (ctf_func_args(cmi->cmi_input, idx, fmap->cmf_argc,
1081 fmap->cmf_args) != 0) {
1082 ctf_free(fmap, sizeof (ctf_merge_funcmap_t) +
1083 sizeof (ctf_id_t) * fip->ctc_argc);
1084 return (ctf_errno(cmi->cmi_input));
1085 }
1086
1087 ctf_dprintf("added initial function %s, %lu, %s %u\n", name, idx,
1088 fmap->cmf_file != NULL ? fmap->cmf_file : "global",
1089 ELF64_ST_BIND(symp->st_info));
1090 list_insert_tail(&cmi->cmi_fmap, fmap);
1091 return (0);
1092 }
1093
1094 static int
1095 ctf_merge_add_object(ctf_merge_input_t *cmi, ctf_id_t id, ulong_t idx,
1096 const char *file, const char *name, const Elf64_Sym *symp)
1097 {
1098 ctf_merge_objmap_t *cmo;
1099
1100 cmo = ctf_alloc(sizeof (ctf_merge_objmap_t));
1101 if (cmo == NULL)
1102 return (ENOMEM);
1103
1104 cmo->cmo_name = name;
1105 if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
1106 cmo->cmo_file = file;
1107 } else {
1108 cmo->cmo_file = NULL;
1109 }
1110 cmo->cmo_idx = idx;
1111 cmo->cmo_tid = id;
1112 cmo->cmo_sym = *symp;
1113 list_insert_tail(&cmi->cmi_omap, cmo);
1114
1115 ctf_dprintf("added initial object %s, %lu, %ld, %s\n", name, idx, id,
1116 cmo->cmo_file != NULL ? cmo->cmo_file : "global");
1117
1118 return (0);
1119 }
1120
1121 static int
1122 ctf_merge_add_symbol(const Elf64_Sym *symp, ulong_t idx, const char *file,
1123 const char *name, boolean_t primary, void *arg)
1124 {
1125 ctf_merge_input_t *cmi = arg;
1126 ctf_file_t *fp = cmi->cmi_input;
1127 ushort_t *data, funcbase;
1128 uint_t type;
1129 ctf_funcinfo_t fi;
1130
1131 /*
1132 * See if there is type information for this. If there is no
1133 * type information for this entry or no translation, then we
1134 * will find the value zero. This indicates no type ID for
1135 * objects and encodes unknown information for functions.
1136 */
1137 if (fp->ctf_sxlate[idx] == -1u)
1138 return (0);
1139 data = (ushort_t *)((uintptr_t)fp->ctf_buf + fp->ctf_sxlate[idx]);
1140 if (*data == 0)
1141 return (0);
1142
1143 type = ELF64_ST_TYPE(symp->st_info);
1144
1145 switch (type) {
1146 case STT_FUNC:
1147 funcbase = *data;
1148 if (LCTF_INFO_KIND(fp, funcbase) != CTF_K_FUNCTION)
1149 return (0);
1150 data++;
1151 fi.ctc_return = *data;
1152 data++;
1153 fi.ctc_argc = LCTF_INFO_VLEN(fp, funcbase);
1154 fi.ctc_flags = 0;
1155
1156 if (fi.ctc_argc != 0 && data[fi.ctc_argc - 1] == 0) {
1157 fi.ctc_flags |= CTF_FUNC_VARARG;
1158 fi.ctc_argc--;
1159 }
1160 return (ctf_merge_add_function(cmi, &fi, idx, file, name,
1161 symp));
1162 case STT_OBJECT:
1163 return (ctf_merge_add_object(cmi, *data, idx, file, name,
1164 symp));
1165 default:
1166 return (0);
1167 }
1168 }
1169
1170 /*
1171 * Whenever we create an entry to merge, we then go and add a second empty
1172 * ctf_file_t which we use for the purposes of our merging. It's not the best,
1173 * but it's the best that we've got at the moment.
1174 */
1175 int
1176 ctf_merge_add(ctf_merge_t *cmh, ctf_file_t *input)
1177 {
1178 int ret;
1179 ctf_merge_input_t *cmi;
1180 ctf_file_t *empty;
1181
1182 ctf_dprintf("adding input %p\n", input);
1183
1184 if (input->ctf_flags & LCTF_CHILD)
1185 return (ECTF_MCHILD);
1186
1187 cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1188 if (cmi == NULL)
1189 return (ENOMEM);
1190
1191 cmi->cmi_created = B_FALSE;
1192 cmi->cmi_input = input;
1193 list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
1194 offsetof(ctf_merge_funcmap_t, cmf_node));
1195 list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
1196 offsetof(ctf_merge_objmap_t, cmo_node));
1197
1198 if (cmh->cmh_msyms == B_TRUE) {
1199 if ((ret = ctf_symtab_iter(input, ctf_merge_add_symbol,
1200 cmi)) != 0) {
1201 ctf_merge_fini_input(cmi);
1202 return (ret);
1203 }
1204 }
1205
1206 list_insert_tail(&cmh->cmh_inputs, cmi);
1207 cmh->cmh_ninputs++;
1208
1209 /* And now the empty one to merge into this */
1210 cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1211 if (cmi == NULL)
1212 return (ENOMEM);
1213 list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
1214 offsetof(ctf_merge_funcmap_t, cmf_node));
1215 list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
1216 offsetof(ctf_merge_objmap_t, cmo_node));
1217
1218 empty = ctf_fdcreate(cmh->cmh_ofd, &ret);
1219 if (empty == NULL)
1220 return (ret);
1221 cmi->cmi_input = empty;
1222 cmi->cmi_created = B_TRUE;
1223
1224 if (ctf_setmodel(empty, ctf_getmodel(input)) == CTF_ERR) {
1225 return (ctf_errno(empty));
1226 }
1227
1228 list_insert_tail(&cmh->cmh_inputs, cmi);
1229 cmh->cmh_ninputs++;
1230 ctf_dprintf("added containers %p and %p\n", input, empty);
1231 return (0);
1232 }
1233
1234 int
1235 ctf_merge_uniquify(ctf_merge_t *cmh, ctf_file_t *u, const char *pname)
1236 {
1237 char *dup;
1238
1239 if (u->ctf_flags & LCTF_CHILD)
1240 return (ECTF_MCHILD);
1241 if (pname == NULL)
1242 return (EINVAL);
1243 dup = ctf_strdup(pname);
1244 if (dup == NULL)
1245 return (EINVAL);
1246 if (cmh->cmh_pname != NULL) {
1247 size_t len = strlen(cmh->cmh_pname) + 1;
1248 ctf_free(cmh->cmh_pname, len);
1249 }
1250 cmh->cmh_pname = dup;
1251 cmh->cmh_unique = u;
1252 return (0);
1253 }
1254
1255 /*
1256 * Symbol matching rules: the purpose of this is to verify that the type
1257 * information that we have for a given symbol actually matches the output
1258 * symbol. This is unfortunately complicated by several different factors:
1259 *
1260 * 1. When merging multiple .o's into a single item, the symbol table index will
1261 * not match.
1262 *
1263 * 2. Visibility of a symbol may not be identical to the object file or the
1264 * DWARF information due to symbol reduction via a mapfile.
1265 *
1266 * As such, we have to employ the following rules:
1267 *
1268 * 1. A global symbol table entry always matches a global CTF symbol with the
1269 * same name.
1270 *
1271 * 2. A local symbol table entry always matches a local CTF symbol if they have
1272 * the same name and they belong to the same file.
1273 *
1274 * 3. A weak symbol matches a non-weak symbol. This happens if we find that the
1275 * types match, the values match, the sizes match, and the section indexes
1276 * match. This happens when we do a conversion in one pass, it almost never
1277 * happens when we're merging multiple object files. If we match a CTF global
1278 * symbol, that's a fixed match, otherwise it's a fuzzy match.
1279 *
1280 * 4. A local symbol table entry matches a global CTF entry if the
1281 * other pieces fail, but they have the same name. This is considered a fuzzy
1282 * match and is not used unless we have no other options.
1283 *
1284 * 5. A weak symbol table entry matches a weak CTF entry if the other pieces
1285 * fail, but they have the same name. This is considered a fuzzy match and is
1286 * not used unless we have no other options. When merging independent .o files,
1287 * this is often the only recourse we have to matching weak symbols.
1288 *
1289 * In the end, this would all be much simpler if we were able to do this as part
1290 * of libld which would be able to do all the symbol transformations.
1291 */
1292 static boolean_t
1293 ctf_merge_symbol_match(const char *ctf_file, const char *ctf_name,
1294 const Elf64_Sym *ctf_symp, const char *symtab_file, const char *symtab_name,
1295 const Elf64_Sym *symtab_symp, boolean_t *is_fuzzy)
1296 {
1297 *is_fuzzy = B_FALSE;
1298 uint_t symtab_bind, ctf_bind;
1299
1300 symtab_bind = ELF64_ST_BIND(symtab_symp->st_info);
1301 ctf_bind = ELF64_ST_BIND(ctf_symp->st_info);
1302
1303 ctf_dprintf("comparing merge match for %s/%s/%u->%s/%s/%u\n",
1304 symtab_file, symtab_name, symtab_bind,
1305 ctf_file, ctf_name, ctf_bind);
1306 if (strcmp(ctf_name, symtab_name) != 0) {
1307 return (B_FALSE);
1308 }
1309
1310 if (symtab_bind == STB_GLOBAL && ctf_bind == STB_GLOBAL) {
1311 return (B_TRUE);
1312 } else if (symtab_bind == STB_GLOBAL) {
1313 return (B_FALSE);
1314 }
1315
1316 if (ctf_bind == STB_LOCAL && ctf_bind == symtab_bind &&
1317 ctf_file != NULL && symtab_file != NULL &&
1318 strcmp(ctf_file, symtab_file) == 0) {
1319 return (B_TRUE);
1320 }
1321
1322 if (symtab_bind == STB_WEAK && ctf_bind != STB_WEAK &&
1323 ELF64_ST_TYPE(symtab_symp->st_info) ==
1324 ELF64_ST_TYPE(ctf_symp->st_info) &&
1325 symtab_symp->st_value == ctf_symp->st_value &&
1326 symtab_symp->st_size == ctf_symp->st_size &&
1327 symtab_symp->st_shndx == ctf_symp->st_shndx) {
1328 if (ctf_bind == STB_GLOBAL) {
1329 return (B_TRUE);
1330 }
1331
1332 if (ctf_bind == STB_LOCAL && ctf_file != NULL &&
1333 symtab_file != NULL && strcmp(ctf_file, symtab_file) == 0) {
1334 *is_fuzzy = B_TRUE;
1335 return (B_TRUE);
1336 }
1337 }
1338
1339 if (ctf_bind == STB_GLOBAL ||
1340 (ctf_bind == STB_WEAK && symtab_bind == STB_WEAK)) {
1341 *is_fuzzy = B_TRUE;
1342 return (B_TRUE);
1343 }
1344
1345 return (B_FALSE);
1346 }
1347
1348 /*
1349 * For each symbol, try and find a match. We will attempt to find an exact
1350 * match; however, we will settle for a fuzzy match in general. There is one
1351 * case where we will not opt to use a fuzzy match, which is when performing the
1352 * deduplication of a container. In such a case we are trying to reduce common
1353 * types and a fuzzy match would be inappropriate as if we're in the context of
1354 * a single container, the conversion process should have identified any exact
1355 * or fuzzy matches that were required.
1356 */
1357 static int
1358 ctf_merge_symbols(const Elf64_Sym *symp, ulong_t idx, const char *file,
1359 const char *name, boolean_t primary, void *arg)
1360 {
1361 int err;
1362 uint_t type, bind;
1363 ctf_merge_symbol_arg_t *csa = arg;
1364 ctf_file_t *fp = csa->cmsa_out;
1365
1366 type = ELF64_ST_TYPE(symp->st_info);
1367 bind = ELF64_ST_BIND(symp->st_info);
1368
1369 ctf_dprintf("Trying to find match for %s/%s/%u\n", file, name,
1370 ELF64_ST_BIND(symp->st_info));
1371
1372 if (type == STT_OBJECT) {
1373 ctf_merge_objmap_t *cmo, *match = NULL;
1374
1375 for (cmo = list_head(csa->cmsa_objmap); cmo != NULL;
1376 cmo = list_next(csa->cmsa_objmap, cmo)) {
1377 boolean_t is_fuzzy = B_FALSE;
1378 if (ctf_merge_symbol_match(cmo->cmo_file, cmo->cmo_name,
1379 &cmo->cmo_sym, file, name, symp, &is_fuzzy)) {
1380 if (is_fuzzy && csa->cmsa_dedup &&
1381 bind != STB_WEAK) {
1382 continue;
1383 }
1384 match = cmo;
1385 if (is_fuzzy) {
1386 continue;
1387 }
1388 break;
1389 }
1390 }
1391
1392 if (match == NULL) {
1393 return (0);
1394 }
1395
1396 if ((err = ctf_add_object(fp, idx, match->cmo_tid)) != 0) {
1397 ctf_dprintf("Failed to add symbol %s->%d: %s\n", name,
1398 match->cmo_tid, ctf_errmsg(ctf_errno(fp)));
1399 return (ctf_errno(fp));
1400 }
1401 ctf_dprintf("mapped object into output %s/%s->%ld\n", file,
1402 name, match->cmo_tid);
1403 } else {
1404 ctf_merge_funcmap_t *cmf, *match = NULL;
1405 ctf_funcinfo_t fi;
1406
1407 for (cmf = list_head(csa->cmsa_funcmap); cmf != NULL;
1408 cmf = list_next(csa->cmsa_funcmap, cmf)) {
1409 boolean_t is_fuzzy = B_FALSE;
1410 if (ctf_merge_symbol_match(cmf->cmf_file, cmf->cmf_name,
1411 &cmf->cmf_sym, file, name, symp, &is_fuzzy)) {
1412 if (is_fuzzy && csa->cmsa_dedup &&
1413 bind != STB_WEAK) {
1414 continue;
1415 }
1416 match = cmf;
1417 if (is_fuzzy) {
1418 continue;
1419 }
1420 break;
1421 }
1422 }
1423
1424 if (match == NULL) {
1425 return (0);
1426 }
1427
1428 fi.ctc_return = match->cmf_rtid;
1429 fi.ctc_argc = match->cmf_argc;
1430 fi.ctc_flags = match->cmf_flags;
1431 if ((err = ctf_add_function(fp, idx, &fi, match->cmf_args)) !=
1432 0) {
1433 ctf_dprintf("Failed to add function %s: %s\n", name,
1434 ctf_errmsg(ctf_errno(fp)));
1435 return (ctf_errno(fp));
1436 }
1437 ctf_dprintf("mapped function into output %s/%s\n", file,
1438 name);
1439 }
1440
1441 return (0);
1442 }
1443
1444 int
1445 ctf_merge_merge(ctf_merge_t *cmh, ctf_file_t **outp)
1446 {
1447 int err, merr;
1448 ctf_merge_input_t *cmi;
1449 ctf_id_t ltype;
1450 mergeq_t *mqp;
1451 ctf_merge_input_t *final;
1452 ctf_file_t *out;
1453
1454 ctf_dprintf("Beginning ctf_merge_merge()\n");
1455 if (cmh->cmh_label != NULL && cmh->cmh_unique != NULL) {
1456 const char *label = ctf_label_topmost(cmh->cmh_unique);
1457 if (label == NULL)
1458 return (ECTF_NOLABEL);
1459 if (strcmp(label, cmh->cmh_label) != 0)
1460 return (ECTF_LCONFLICT);
1461 }
1462
1463 if (mergeq_init(&mqp, cmh->cmh_nthreads) == -1) {
1464 return (errno);
1465 }
1466
1467 VERIFY(cmh->cmh_ninputs % 2 == 0);
1468 for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
1469 cmi = list_next(&cmh->cmh_inputs, cmi)) {
1470 if (mergeq_add(mqp, cmi) == -1) {
1471 err = errno;
1472 mergeq_fini(mqp);
1473 }
1474 }
1475
1476 err = mergeq_merge(mqp, ctf_merge_types, NULL, (void **)&final, &merr);
1477 mergeq_fini(mqp);
1478
1479 if (err == MERGEQ_ERROR) {
1480 return (errno);
1481 } else if (err == MERGEQ_UERROR) {
1482 return (merr);
1483 }
1484
1485 /*
1486 * Disassociate the generated ctf_file_t from the original input. That
1487 * way when the input gets cleaned up, we don't accidentally kill the
1488 * final reference to the ctf_file_t. If it gets uniquified then we'll
1489 * kill it.
1490 */
1491 VERIFY(final->cmi_input != NULL);
1492 out = final->cmi_input;
1493 final->cmi_input = NULL;
1494
1495 ctf_dprintf("preparing to uniquify against: %p\n", cmh->cmh_unique);
1496 if (cmh->cmh_unique != NULL) {
1497 ctf_file_t *u;
1498 err = ctf_uniquify_types(cmh, out, &u);
1499 if (err != 0) {
1500 err = ctf_errno(out);
1501 ctf_close(out);
1502 return (err);
1503 }
1504 ctf_close(out);
1505 out = u;
1506 }
1507
1508 ltype = out->ctf_typemax;
1509 if ((out->ctf_flags & LCTF_CHILD) && ltype != 0)
1510 ltype += CTF_CHILD_START;
1511 ctf_dprintf("trying to add the label\n");
1512 if (cmh->cmh_label != NULL &&
1513 ctf_add_label(out, cmh->cmh_label, ltype, 0) != 0) {
1514 ctf_close(out);
1515 return (ctf_errno(out));
1516 }
1517
1518 ctf_dprintf("merging symbols and the like\n");
1519 if (cmh->cmh_msyms == B_TRUE) {
1520 ctf_merge_symbol_arg_t arg;
1521 arg.cmsa_objmap = &final->cmi_omap;
1522 arg.cmsa_funcmap = &final->cmi_fmap;
1523 arg.cmsa_out = out;
1524 arg.cmsa_dedup = B_FALSE;
1525 err = ctf_symtab_iter(out, ctf_merge_symbols, &arg);
1526 if (err != 0) {
1527 ctf_close(out);
1528 return (err);
1529 }
1530 }
1531
1532 err = ctf_update(out);
1533 if (err != 0) {
1534 err = ctf_errno(out);
1535 ctf_close(out);
1536 return (err);
1537 }
1538
1539 *outp = out;
1540 return (0);
1541 }
1542
1543 /*
1544 * When we get told that something is unique, eg. same is B_FALSE, then that
1545 * tells us that we need to add it to the output. If same is B_TRUE, then we'll
1546 * want to record it in the mapping table so that we know how to redirect types
1547 * to the extant ones.
1548 */
1549 static void
1550 ctf_dedup_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
1551 ctf_id_t oid, void *arg)
1552 {
1553 ctf_merge_types_t *cmp = arg;
1554 ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
1555
1556 if (same == B_TRUE) {
1557 /*
1558 * The output id here may itself map to something else.
1559 * Therefore, we need to basically walk a chain and see what it
1560 * points to until it itself points to a base type, eg. -1.
1561 * Otherwise we'll dedup to something which no longer exists.
1562 */
1563 while (cmt[oid].cmt_missing == B_FALSE)
1564 oid = cmt[oid].cmt_map;
1565 cmt[iid].cmt_map = oid;
1566 ctf_dprintf("%d->%d \n", iid, oid);
1567 } else {
1568 VERIFY(cmt[iid].cmt_map == 0);
1569 cmt[iid].cmt_missing = B_TRUE;
1570 ctf_dprintf("%d is missing\n", iid);
1571 }
1572 }
1573
1574 /*
1575 * Dedup a CTF container.
1576 *
1577 * DWARF and other encoding formats that we use to create CTF data may create
1578 * multiple copies of a given type. However, after doing a conversion, and
1579 * before doing a merge, we'd prefer, if possible, to have every input container
1580 * to be unique.
1581 *
1582 * Doing a deduplication is like a normal merge. However, when we diff the types
1583 * in the container, rather than doing a normal diff, we instead want to diff
1584 * against any already processed types. eg, for a given type i in a container,
1585 * we want to diff it from 0 to i - 1.
1586 */
1587 int
1588 ctf_merge_dedup(ctf_merge_t *cmp, ctf_file_t **outp)
1589 {
1590 int ret;
1591 ctf_diff_t *cdp = NULL;
1592 ctf_merge_input_t *cmi, *cmc;
1593 ctf_file_t *ifp, *ofp;
1594 ctf_merge_types_t cm;
1595
1596 if (cmp == NULL || outp == NULL)
1597 return (EINVAL);
1598
1599 ctf_dprintf("encountered %d inputs\n", cmp->cmh_ninputs);
1600 if (cmp->cmh_ninputs != 2)
1601 return (EINVAL);
1602
1603 ctf_dprintf("passed argument sanity check\n");
1604
1605 cmi = list_head(&cmp->cmh_inputs);
1606 VERIFY(cmi != NULL);
1607 cmc = list_next(&cmp->cmh_inputs, cmi);
1608 VERIFY(cmc != NULL);
1609 ifp = cmi->cmi_input;
1610 ofp = cmc->cmi_input;
1611 VERIFY(ifp != NULL);
1612 VERIFY(ofp != NULL);
1613 cm.cm_src = ifp;
1614 cm.cm_out = ofp;
1615 cm.cm_dedup = B_TRUE;
1616 cm.cm_unique = B_FALSE;
1617
1618 if ((ret = ctf_merge_types_init(&cm)) != 0) {
1619 return (ret);
1620 }
1621
1622 if ((ret = ctf_diff_init(ifp, ifp, &cdp)) != 0)
1623 goto err;
1624
1625 ctf_dprintf("Successfully initialized dedup\n");
1626 if ((ret = ctf_diff_self(cdp, ctf_dedup_cb, &cm)) != 0)
1627 goto err;
1628
1629 ctf_dprintf("Successfully diffed types\n");
1630 ret = ctf_merge_common(&cm);
1631 ctf_dprintf("deduping types result: %d\n", ret);
1632 if (ret == 0)
1633 ret = ctf_update(cm.cm_out);
1634 if (ret != 0)
1635 goto err;
1636
1637 ctf_dprintf("Successfully deduped types\n");
1638 ctf_phase_dump(cm.cm_out, "dedup-pre-syms", NULL);
1639
1640 /*
1641 * Now we need to fix up the object and function maps.
1642 */
1643 ctf_merge_fixup_symmaps(&cm, cmi);
1644
1645 if (cmp->cmh_msyms == B_TRUE) {
1646 ctf_merge_symbol_arg_t arg;
1647 arg.cmsa_objmap = &cmi->cmi_omap;
1648 arg.cmsa_funcmap = &cmi->cmi_fmap;
1649 arg.cmsa_out = cm.cm_out;
1650 arg.cmsa_dedup = B_TRUE;
1651 ret = ctf_symtab_iter(cm.cm_out, ctf_merge_symbols, &arg);
1652 if (ret != 0) {
1653 ctf_dprintf("failed to dedup symbols: %s\n",
1654 ctf_errmsg(ret));
1655 goto err;
1656 }
1657 }
1658
1659 ret = ctf_update(cm.cm_out);
1660 if (ret == 0) {
1661 cmc->cmi_input = NULL;
1662 *outp = cm.cm_out;
1663 }
1664 ctf_phase_dump(cm.cm_out, "dedup-post-syms", NULL);
1665 err:
1666 ctf_merge_types_fini(&cm);
1667 ctf_diff_fini(cdp);
1668 return (ret);
1669 }
1670
1671 int
1672 ctf_merge_set_nthreads(ctf_merge_t *cmp, const uint_t nthrs)
1673 {
1674 if (nthrs == 0)
1675 return (EINVAL);
1676 cmp->cmh_nthreads = nthrs;
1677 return (0);
1678 }