1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2019, Joyent, Inc.
  14  */
  15 
  16 /*
  17  * To perform a merge of two CTF containers, we first diff the two containers
  18  * types. For every type that's in the src container, but not in the dst
  19  * container, we note it and add it to dst container. If there are any objects
  20  * or functions associated with src, we go through and update the types that
  21  * they refer to such that they all refer to types in the dst container.
  22  *
  23  * The bulk of the logic for the merge, after we've run the diff, occurs in
  24  * ctf_merge_common().
  25  *
  26  * In terms of exported APIs, we don't really export a simple merge two
  27  * containers, as the general way this is used, in something like ctfmerge(1),
  28  * is to add all the containers and then let us figure out the best way to merge
  29  * it.
  30  */
  31 
  32 #include <libctf_impl.h>
  33 #include <sys/debug.h>
  34 #include <sys/list.h>
  35 #include <stddef.h>
  36 #include <fcntl.h>
  37 #include <sys/types.h>
  38 #include <sys/stat.h>
  39 #include <mergeq.h>
  40 #include <errno.h>
  41 
  42 typedef struct ctf_merge_tinfo {
  43         uint16_t cmt_map;       /* Map to the type in out */
  44         boolean_t cmt_fixup;
  45         boolean_t cmt_forward;
  46         boolean_t cmt_missing;
  47 } ctf_merge_tinfo_t;
  48 
  49 /*
  50  * State required for doing an individual merge of two containers.
  51  */
  52 typedef struct ctf_merge_types {
  53         ctf_file_t *cm_out;             /* Output CTF file */
  54         ctf_file_t *cm_src;             /* Input CTF file */
  55         ctf_merge_tinfo_t *cm_tmap;     /* Type state information */
  56         boolean_t cm_dedup;             /* Are we doing a dedup? */
  57         boolean_t cm_unique;            /* are we doing a uniquify? */
  58 } ctf_merge_types_t;
  59 
  60 typedef struct ctf_merge_objmap {
  61         list_node_t cmo_node;
  62         const char *cmo_name;           /* Symbol name */
  63         const char *cmo_file;           /* Symbol file */
  64         ulong_t cmo_idx;                /* Symbol ID */
  65         Elf64_Sym cmo_sym;              /* Symbol Entry */
  66         ctf_id_t cmo_tid;               /* Type ID */
  67 } ctf_merge_objmap_t;
  68 
  69 typedef struct ctf_merge_funcmap {
  70         list_node_t cmf_node;
  71         const char *cmf_name;           /* Symbol name */
  72         const char *cmf_file;           /* Symbol file */
  73         ulong_t cmf_idx;                /* Symbol ID */
  74         Elf64_Sym cmf_sym;              /* Symbol Entry */
  75         ctf_id_t cmf_rtid;              /* Type ID */
  76         uint_t cmf_flags;               /* ctf_funcinfo_t ctc_flags */
  77         uint_t cmf_argc;                /* Number of arguments */
  78         ctf_id_t cmf_args[];            /* Types of arguments */
  79 } ctf_merge_funcmap_t;
  80 
  81 typedef struct ctf_merge_input {
  82         list_node_t cmi_node;
  83         ctf_file_t *cmi_input;
  84         list_t cmi_omap;
  85         list_t cmi_fmap;
  86         boolean_t cmi_created;
  87 } ctf_merge_input_t;
  88 
  89 struct ctf_merge_handle {
  90         list_t cmh_inputs;              /* Input list */
  91         uint_t cmh_ninputs;             /* Number of inputs */
  92         uint_t cmh_nthreads;            /* Number of threads to use */
  93         ctf_file_t *cmh_unique;         /* ctf to uniquify against */
  94         boolean_t cmh_msyms;            /* Should we merge symbols/funcs? */
  95         int cmh_ofd;                    /* FD for output file */
  96         int cmh_flags;                  /* Flags that control merge behavior */
  97         char *cmh_label;                /* Optional label */
  98         char *cmh_pname;                /* Parent name */
  99 };
 100 
 101 typedef struct ctf_merge_symbol_arg {
 102         list_t *cmsa_objmap;
 103         list_t *cmsa_funcmap;
 104         ctf_file_t *cmsa_out;
 105         boolean_t cmsa_dedup;
 106 } ctf_merge_symbol_arg_t;
 107 
 108 static int ctf_merge_add_type(ctf_merge_types_t *, ctf_id_t);
 109 
 110 static ctf_id_t
 111 ctf_merge_gettype(ctf_merge_types_t *cmp, ctf_id_t id)
 112 {
 113         if (cmp->cm_dedup == B_FALSE) {
 114                 VERIFY(cmp->cm_tmap[id].cmt_map != 0);
 115                 return (cmp->cm_tmap[id].cmt_map);
 116         }
 117 
 118         while (cmp->cm_tmap[id].cmt_missing == B_FALSE) {
 119                 VERIFY(cmp->cm_tmap[id].cmt_map != 0);
 120                 id = cmp->cm_tmap[id].cmt_map;
 121         }
 122         VERIFY(cmp->cm_tmap[id].cmt_map != 0);
 123         return (cmp->cm_tmap[id].cmt_map);
 124 }
 125 
 126 static void
 127 ctf_merge_diffcb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
 128     ctf_id_t oid, void *arg)
 129 {
 130         ctf_merge_types_t *cmp = arg;
 131         ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
 132 
 133         if (same == B_TRUE) {
 134                 if (ctf_type_kind(ifp, iid) == CTF_K_FORWARD &&
 135                     ctf_type_kind(ofp, oid) != CTF_K_FORWARD) {
 136                         VERIFY(cmt[oid].cmt_map == 0);
 137 
 138                         /*
 139                          * If we're uniquifying types, it's possible for the
 140                          * container that we're uniquifying against to have a
 141                          * forward which exists in the container being reduced.
 142                          * For example, genunix has the machcpu structure as a
 143                          * forward which is actually in unix and we uniquify
 144                          * unix against genunix. In such cases, we explicitly do
 145                          * not do any mapping of the forward information, lest
 146                          * we risk losing the real definition. Instead, mark
 147                          * that it's missing.
 148                          */
 149                         if (cmp->cm_unique == B_TRUE) {
 150                                 cmt[oid].cmt_missing = B_TRUE;
 151                                 return;
 152                         }
 153 
 154                         cmt[oid].cmt_map = iid;
 155                         cmt[oid].cmt_forward = B_TRUE;
 156                         ctf_dprintf("merge diff forward mapped %d->%d\n", oid,
 157                             iid);
 158                         return;
 159                 }
 160 
 161                 /*
 162                  * We could have multiple things that a given type ends up
 163                  * matching in the world of forwards and pointers to forwards.
 164                  * For now just take the first one...
 165                  */
 166                 if (cmt[oid].cmt_map != 0)
 167                         return;
 168                 cmt[oid].cmt_map = iid;
 169                 ctf_dprintf("merge diff mapped %d->%d\n", oid, iid);
 170         } else if (ifp == cmp->cm_src) {
 171                 VERIFY(cmt[iid].cmt_map == 0);
 172                 cmt[iid].cmt_missing = B_TRUE;
 173                 ctf_dprintf("merge diff said %d is missing\n", iid);
 174         }
 175 }
 176 
 177 static int
 178 ctf_merge_add_number(ctf_merge_types_t *cmp, ctf_id_t id)
 179 {
 180         int ret, flags;
 181         const ctf_type_t *tp;
 182         const char *name;
 183         ctf_encoding_t en;
 184 
 185         if (ctf_type_encoding(cmp->cm_src, id, &en) != 0)
 186                 return (CTF_ERR);
 187 
 188         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 189         name = ctf_strraw(cmp->cm_src, tp->ctt_name);
 190         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 191                 flags = CTF_ADD_ROOT;
 192         else
 193                 flags = CTF_ADD_NONROOT;
 194 
 195         ret = ctf_add_encoded(cmp->cm_out, flags, name, &en,
 196             ctf_type_kind(cmp->cm_src, id));
 197 
 198         if (ret == CTF_ERR)
 199                 return (ret);
 200 
 201         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 202         cmp->cm_tmap[id].cmt_map = ret;
 203         return (0);
 204 }
 205 
 206 static int
 207 ctf_merge_add_array(ctf_merge_types_t *cmp, ctf_id_t id)
 208 {
 209         int ret, flags;
 210         const ctf_type_t *tp;
 211         ctf_arinfo_t ar;
 212 
 213         if (ctf_array_info(cmp->cm_src, id, &ar) == CTF_ERR)
 214                 return (CTF_ERR);
 215 
 216         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 217         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 218                 flags = CTF_ADD_ROOT;
 219         else
 220                 flags = CTF_ADD_NONROOT;
 221 
 222         if (cmp->cm_tmap[ar.ctr_contents].cmt_map == 0) {
 223                 ret = ctf_merge_add_type(cmp, ar.ctr_contents);
 224                 if (ret != 0)
 225                         return (ret);
 226                 ASSERT(cmp->cm_tmap[ar.ctr_contents].cmt_map != 0);
 227         }
 228         ar.ctr_contents = ctf_merge_gettype(cmp, ar.ctr_contents);
 229 
 230         if (cmp->cm_tmap[ar.ctr_index].cmt_map == 0) {
 231                 ret = ctf_merge_add_type(cmp, ar.ctr_index);
 232                 if (ret != 0)
 233                         return (ret);
 234                 ASSERT(cmp->cm_tmap[ar.ctr_index].cmt_map != 0);
 235         }
 236         ar.ctr_index = ctf_merge_gettype(cmp, ar.ctr_index);
 237 
 238         ret = ctf_add_array(cmp->cm_out, flags, &ar);
 239         if (ret == CTF_ERR)
 240                 return (ret);
 241 
 242         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 243         cmp->cm_tmap[id].cmt_map = ret;
 244 
 245         return (0);
 246 }
 247 
 248 static int
 249 ctf_merge_add_reftype(ctf_merge_types_t *cmp, ctf_id_t id)
 250 {
 251         int ret, flags;
 252         const ctf_type_t *tp;
 253         ctf_id_t reftype;
 254         const char *name;
 255 
 256         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 257         name = ctf_strraw(cmp->cm_src, tp->ctt_name);
 258         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 259                 flags = CTF_ADD_ROOT;
 260         else
 261                 flags = CTF_ADD_NONROOT;
 262 
 263         reftype = ctf_type_reference(cmp->cm_src, id);
 264         if (reftype == CTF_ERR)
 265                 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
 266 
 267         if (cmp->cm_tmap[reftype].cmt_map == 0) {
 268                 ret = ctf_merge_add_type(cmp, reftype);
 269                 if (ret != 0)
 270                         return (ret);
 271                 ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
 272         }
 273         reftype = ctf_merge_gettype(cmp, reftype);
 274 
 275         ret = ctf_add_reftype(cmp->cm_out, flags, name, reftype,
 276             ctf_type_kind(cmp->cm_src, id));
 277         if (ret == CTF_ERR)
 278                 return (ret);
 279 
 280         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 281         cmp->cm_tmap[id].cmt_map = ret;
 282         return (0);
 283 }
 284 
 285 static int
 286 ctf_merge_add_typedef(ctf_merge_types_t *cmp, ctf_id_t id)
 287 {
 288         int ret, flags;
 289         const ctf_type_t *tp;
 290         const char *name;
 291         ctf_id_t reftype;
 292 
 293         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 294         name = ctf_strraw(cmp->cm_src, tp->ctt_name);
 295         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 296                 flags = CTF_ADD_ROOT;
 297         else
 298                 flags = CTF_ADD_NONROOT;
 299 
 300         reftype = ctf_type_reference(cmp->cm_src, id);
 301         if (reftype == CTF_ERR)
 302                 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
 303 
 304         if (cmp->cm_tmap[reftype].cmt_map == 0) {
 305                 ret = ctf_merge_add_type(cmp, reftype);
 306                 if (ret != 0)
 307                         return (ret);
 308                 ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
 309         }
 310         reftype = ctf_merge_gettype(cmp, reftype);
 311 
 312         ret = ctf_add_typedef(cmp->cm_out, flags, name, reftype);
 313         if (ret == CTF_ERR)
 314                 return (ret);
 315 
 316         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 317         cmp->cm_tmap[id].cmt_map = ret;
 318         return (0);
 319 }
 320 
 321 typedef struct ctf_merge_enum {
 322         ctf_file_t *cme_fp;
 323         ctf_id_t cme_id;
 324 } ctf_merge_enum_t;
 325 
 326 static int
 327 ctf_merge_add_enumerator(const char *name, int value, void *arg)
 328 {
 329         ctf_merge_enum_t *cmep = arg;
 330 
 331         return (ctf_add_enumerator(cmep->cme_fp, cmep->cme_id, name, value) ==
 332             CTF_ERR);
 333 }
 334 
 335 static int
 336 ctf_merge_add_enum(ctf_merge_types_t *cmp, ctf_id_t id)
 337 {
 338         int flags;
 339         const ctf_type_t *tp;
 340         const char *name;
 341         ctf_id_t enumid;
 342         ctf_merge_enum_t cme;
 343 
 344         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 345         name = ctf_strraw(cmp->cm_src, tp->ctt_name);
 346         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 347                 flags = CTF_ADD_ROOT;
 348         else
 349                 flags = CTF_ADD_NONROOT;
 350 
 351         enumid = ctf_add_enum(cmp->cm_out, flags, name);
 352         if (enumid == CTF_ERR)
 353                 return (enumid);
 354 
 355         cme.cme_fp = cmp->cm_out;
 356         cme.cme_id = enumid;
 357         if (ctf_enum_iter(cmp->cm_src, id, ctf_merge_add_enumerator,
 358             &cme) != 0)
 359                 return (CTF_ERR);
 360 
 361         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 362         cmp->cm_tmap[id].cmt_map = enumid;
 363         return (0);
 364 }
 365 
 366 static int
 367 ctf_merge_add_func(ctf_merge_types_t *cmp, ctf_id_t id)
 368 {
 369         int ret, flags, i;
 370         const ctf_type_t *tp;
 371         ctf_funcinfo_t ctc;
 372         ctf_id_t *argv;
 373 
 374         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 375         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 376                 flags = CTF_ADD_ROOT;
 377         else
 378                 flags = CTF_ADD_NONROOT;
 379 
 380         if (ctf_func_info_by_id(cmp->cm_src, id, &ctc) == CTF_ERR)
 381                 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
 382 
 383         argv = ctf_alloc(sizeof (ctf_id_t) * ctc.ctc_argc);
 384         if (argv == NULL)
 385                 return (ctf_set_errno(cmp->cm_out, ENOMEM));
 386         if (ctf_func_args_by_id(cmp->cm_src, id, ctc.ctc_argc, argv) ==
 387             CTF_ERR) {
 388                 ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
 389                 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
 390         }
 391 
 392         if (cmp->cm_tmap[ctc.ctc_return].cmt_map == 0) {
 393                 ret = ctf_merge_add_type(cmp, ctc.ctc_return);
 394                 if (ret != 0)
 395                         return (ret);
 396                 ASSERT(cmp->cm_tmap[ctc.ctc_return].cmt_map != 0);
 397         }
 398         ctc.ctc_return = ctf_merge_gettype(cmp, ctc.ctc_return);
 399 
 400         for (i = 0; i < ctc.ctc_argc; i++) {
 401                 if (cmp->cm_tmap[argv[i]].cmt_map == 0) {
 402                         ret = ctf_merge_add_type(cmp, argv[i]);
 403                         if (ret != 0)
 404                                 return (ret);
 405                         ASSERT(cmp->cm_tmap[argv[i]].cmt_map != 0);
 406                 }
 407                 argv[i] = ctf_merge_gettype(cmp, argv[i]);
 408         }
 409 
 410         ret = ctf_add_funcptr(cmp->cm_out, flags, &ctc, argv);
 411         ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
 412         if (ret == CTF_ERR)
 413                 return (ret);
 414 
 415         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 416         cmp->cm_tmap[id].cmt_map = ret;
 417         return (0);
 418 }
 419 
 420 static int
 421 ctf_merge_add_forward(ctf_merge_types_t *cmp, ctf_id_t id)
 422 {
 423         int ret, flags;
 424         const ctf_type_t *tp;
 425         const char *name;
 426 
 427         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 428         name = ctf_strraw(cmp->cm_src, tp->ctt_name);
 429         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 430                 flags = CTF_ADD_ROOT;
 431         else
 432                 flags = CTF_ADD_NONROOT;
 433 
 434         /*
 435          * ctf_add_forward tries to check to see if a given forward already
 436          * exists in one of its hash tables.  If we're here then we know that we
 437          * have a forward in a container that isn't present in another.
 438          * Therefore, we choose a token hash table to satisfy the API choice
 439          * here.
 440          */
 441         ret = ctf_add_forward(cmp->cm_out, flags, name, CTF_K_STRUCT);
 442         if (ret == CTF_ERR)
 443                 return (CTF_ERR);
 444 
 445         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 446         cmp->cm_tmap[id].cmt_map = ret;
 447         return (0);
 448 }
 449 
 450 typedef struct ctf_merge_su {
 451         ctf_merge_types_t *cms_cm;
 452         ctf_id_t cms_id;
 453 } ctf_merge_su_t;
 454 
 455 static int
 456 ctf_merge_add_member(const char *name, ctf_id_t type, ulong_t offset, void *arg)
 457 {
 458         ctf_merge_su_t *cms = arg;
 459 
 460         VERIFY(cms->cms_cm->cm_tmap[type].cmt_map != 0);
 461         type = cms->cms_cm->cm_tmap[type].cmt_map;
 462 
 463         ctf_dprintf("Trying to add member %s to %d\n", name, cms->cms_id);
 464         return (ctf_add_member(cms->cms_cm->cm_out, cms->cms_id, name,
 465             type, offset) == CTF_ERR);
 466 }
 467 
 468 /*
 469  * During the first pass, we always add the generic structure and union but none
 470  * of its members as they might not all have been mapped yet. Instead we just
 471  * mark all structures and unions as needing to be fixed up.
 472  */
 473 static int
 474 ctf_merge_add_sou(ctf_merge_types_t *cmp, ctf_id_t id, boolean_t forward)
 475 {
 476         int flags, kind;
 477         const ctf_type_t *tp;
 478         const char *name;
 479         ctf_id_t suid;
 480 
 481         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 482         name = ctf_strraw(cmp->cm_src, tp->ctt_name);
 483         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 484                 flags = CTF_ADD_ROOT;
 485         else
 486                 flags = CTF_ADD_NONROOT;
 487         kind = ctf_type_kind(cmp->cm_src, id);
 488 
 489         if (kind == CTF_K_STRUCT)
 490                 suid = ctf_add_struct(cmp->cm_out, flags, name);
 491         else
 492                 suid = ctf_add_union(cmp->cm_out, flags, name);
 493 
 494         if (suid == CTF_ERR)
 495                 return (suid);
 496 
 497         /*
 498          * If this is a forward reference then its mapping should already
 499          * exist.
 500          */
 501         if (forward == B_FALSE) {
 502                 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 503                 cmp->cm_tmap[id].cmt_map = suid;
 504                 ctf_dprintf("added sou \"%s\" as (%d) %d->%d\n", name, kind, id,
 505                     suid);
 506         } else {
 507                 VERIFY(cmp->cm_tmap[id].cmt_map == suid);
 508         }
 509         cmp->cm_tmap[id].cmt_fixup = B_TRUE;
 510 
 511         return (0);
 512 }
 513 
 514 static int
 515 ctf_merge_add_type(ctf_merge_types_t *cmp, ctf_id_t id)
 516 {
 517         int kind, ret;
 518 
 519         /*
 520          * We may end up evaluating a type more than once as we may deal with it
 521          * as we recursively evaluate some kind of reference and then we may see
 522          * it normally.
 523          */
 524         if (cmp->cm_tmap[id].cmt_map != 0)
 525                 return (0);
 526 
 527         kind = ctf_type_kind(cmp->cm_src, id);
 528         switch (kind) {
 529         case CTF_K_INTEGER:
 530         case CTF_K_FLOAT:
 531                 ret = ctf_merge_add_number(cmp, id);
 532                 break;
 533         case CTF_K_ARRAY:
 534                 ret = ctf_merge_add_array(cmp, id);
 535                 break;
 536         case CTF_K_POINTER:
 537         case CTF_K_VOLATILE:
 538         case CTF_K_CONST:
 539         case CTF_K_RESTRICT:
 540                 ret = ctf_merge_add_reftype(cmp, id);
 541                 break;
 542         case CTF_K_TYPEDEF:
 543                 ret = ctf_merge_add_typedef(cmp, id);
 544                 break;
 545         case CTF_K_ENUM:
 546                 ret = ctf_merge_add_enum(cmp, id);
 547                 break;
 548         case CTF_K_FUNCTION:
 549                 ret = ctf_merge_add_func(cmp, id);
 550                 break;
 551         case CTF_K_FORWARD:
 552                 ret = ctf_merge_add_forward(cmp, id);
 553                 break;
 554         case CTF_K_STRUCT:
 555         case CTF_K_UNION:
 556                 ret = ctf_merge_add_sou(cmp, id, B_FALSE);
 557                 break;
 558         case CTF_K_UNKNOWN:
 559                 /*
 560                  * We don't add unknown types, and we later assert that nothing
 561                  * should reference them.
 562                  */
 563                 return (0);
 564         default:
 565                 abort();
 566         }
 567 
 568         return (ret);
 569 }
 570 
 571 static int
 572 ctf_merge_fixup_sou(ctf_merge_types_t *cmp, ctf_id_t id)
 573 {
 574         ctf_dtdef_t *dtd;
 575         ctf_merge_su_t cms;
 576         ctf_id_t mapid;
 577         ssize_t size;
 578 
 579         mapid = cmp->cm_tmap[id].cmt_map;
 580         VERIFY(mapid != 0);
 581         dtd = ctf_dtd_lookup(cmp->cm_out, mapid);
 582         VERIFY(dtd != NULL);
 583 
 584         ctf_dprintf("Trying to fix up sou %d\n", id);
 585         cms.cms_cm = cmp;
 586         cms.cms_id = mapid;
 587         if (ctf_member_iter(cmp->cm_src, id, ctf_merge_add_member, &cms) != 0)
 588                 return (CTF_ERR);
 589 
 590         if ((size = ctf_type_size(cmp->cm_src, id)) == CTF_ERR)
 591                 return (CTF_ERR);
 592         if (ctf_set_size(cmp->cm_out, mapid, size) == CTF_ERR)
 593                 return (CTF_ERR);
 594 
 595         return (0);
 596 }
 597 
 598 static int
 599 ctf_merge_fixup_type(ctf_merge_types_t *cmp, ctf_id_t id)
 600 {
 601         int kind, ret;
 602 
 603         kind = ctf_type_kind(cmp->cm_src, id);
 604         switch (kind) {
 605         case CTF_K_STRUCT:
 606         case CTF_K_UNION:
 607                 ret = ctf_merge_fixup_sou(cmp, id);
 608                 break;
 609         default:
 610                 VERIFY(0);
 611                 ret = CTF_ERR;
 612         }
 613 
 614         return (ret);
 615 }
 616 
 617 /*
 618  * Now that we've successfully merged everything, we're going to remap the type
 619  * table.
 620  *
 621  * Remember we have two containers: ->cm_src is what we're working from, and
 622  * ->cm_out is where we are building the de-duplicated CTF.
 623  *
 624  * The index of this table is always the type IDs in ->cm_src.
 625  *
 626  * When we built this table originally in ctf_diff_self(), if we found a novel
 627  * type, we marked it as .cmt_missing to indicate it needs adding to ->cm_out.
 628  * Otherwise, .cmt_map indicated the ->cm_src type ID that this type duplicates.
 629  *
 630  * Then, in ctf_merge_common(), we walked through and added all "cmt_missing"
 631  * types to ->cm_out with ctf_merge_add_type(). These routines update cmt_map
 632  * to be the *new* type ID in ->cm_out.  In this function, you can read
 633  * "cmt_missing" as meaning "added to ->cm_out, and cmt_map updated".
 634  *
 635  * So at this point, we need to mop up all types where .cmt_missing == B_FALSE,
 636  * making sure *their* .cmt_map values also point to the ->cm_out container.
 637  */
 638 static void
 639 ctf_merge_dedup_remap(ctf_merge_types_t *cmp)
 640 {
 641         int i;
 642 
 643         for (i = 1; i < cmp->cm_src->ctf_typemax + 1; i++) {
 644                 ctf_id_t tid;
 645 
 646                 if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
 647                         VERIFY(cmp->cm_tmap[i].cmt_map != 0);
 648                         continue;
 649                 }
 650 
 651                 tid = i;
 652                 while (cmp->cm_tmap[tid].cmt_missing == B_FALSE) {
 653                         VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
 654                         tid = cmp->cm_tmap[tid].cmt_map;
 655                 }
 656                 VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
 657                 cmp->cm_tmap[i].cmt_map = cmp->cm_tmap[tid].cmt_map;
 658         }
 659 }
 660 
 661 
 662 /*
 663  * We're going to do three passes over the containers.
 664  *
 665  * Pass 1 checks for forward references in the output container that we know
 666  * exist in the source container.
 667  *
 668  * Pass 2 adds all the missing types from the source container. As part of this
 669  * we may be adding a type as a forward reference that doesn't exist yet.
 670  * Any types that we encounter in this form, we need to add to a third pass.
 671  *
 672  * Pass 3 is the fixup pass. Here we go through and find all the types that were
 673  * missing in the first.
 674  *
 675  * Importantly, we *must* call ctf_update between the second and third pass,
 676  * otherwise several of the libctf functions will not properly find the data in
 677  * the container. If we're doing a dedup we also fix up the type mapping.
 678  */
 679 static int
 680 ctf_merge_common(ctf_merge_types_t *cmp)
 681 {
 682         int ret, i;
 683 
 684         ctf_phase_dump(cmp->cm_src, "merge-common-src", NULL);
 685         ctf_phase_dump(cmp->cm_out, "merge-common-dest", NULL);
 686 
 687         /* Pass 1 */
 688         for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
 689                 if (cmp->cm_tmap[i].cmt_forward == B_TRUE) {
 690                         ret = ctf_merge_add_sou(cmp, i, B_TRUE);
 691                         if (ret != 0) {
 692                                 return (ret);
 693                         }
 694                 }
 695         }
 696 
 697         /* Pass 2 */
 698         for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
 699                 if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
 700                         ret = ctf_merge_add_type(cmp, i);
 701                         if (ret != 0) {
 702                                 ctf_dprintf("Failed to merge type %d\n", i);
 703                                 return (ret);
 704                         }
 705                 }
 706         }
 707 
 708         ret = ctf_update(cmp->cm_out);
 709         if (ret != 0)
 710                 return (ret);
 711 
 712         if (cmp->cm_dedup == B_TRUE) {
 713                 ctf_merge_dedup_remap(cmp);
 714         }
 715 
 716         ctf_dprintf("Beginning merge pass 3\n");
 717         /* Pass 3 */
 718         for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
 719                 if (cmp->cm_tmap[i].cmt_fixup == B_TRUE) {
 720                         ret = ctf_merge_fixup_type(cmp, i);
 721                         if (ret != 0)
 722                                 return (ret);
 723                 }
 724         }
 725 
 726         return (0);
 727 }
 728 
 729 /*
 730  * Uniquification is slightly different from a stock merge. For starters, we
 731  * don't need to replace any forward references in the output. In this case
 732  * though, the types that already exist are in a parent container to the empty
 733  * output container.
 734  */
 735 static int
 736 ctf_merge_uniquify_types(ctf_merge_types_t *cmp)
 737 {
 738         int i, ret;
 739 
 740         for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
 741                 if (cmp->cm_tmap[i].cmt_missing == B_FALSE)
 742                         continue;
 743                 ret = ctf_merge_add_type(cmp, i);
 744                 if (ret != 0)
 745                         return (ret);
 746         }
 747 
 748         ret = ctf_update(cmp->cm_out);
 749         if (ret != 0)
 750                 return (ret);
 751 
 752         for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
 753                 if (cmp->cm_tmap[i].cmt_fixup == B_FALSE)
 754                         continue;
 755                 ret = ctf_merge_fixup_type(cmp, i);
 756                 if (ret != 0)
 757                         return (ret);
 758         }
 759 
 760         return (0);
 761 }
 762 
 763 static int
 764 ctf_merge_types_init(ctf_merge_types_t *cmp)
 765 {
 766         cmp->cm_tmap = ctf_alloc(sizeof (ctf_merge_tinfo_t) *
 767             (cmp->cm_src->ctf_typemax + 1));
 768         if (cmp->cm_tmap == NULL)
 769                 return (ctf_set_errno(cmp->cm_out, ENOMEM));
 770         bzero(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
 771             (cmp->cm_src->ctf_typemax + 1));
 772         return (0);
 773 }
 774 
 775 static void
 776 ctf_merge_types_fini(ctf_merge_types_t *cmp)
 777 {
 778         ctf_free(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
 779             (cmp->cm_src->ctf_typemax + 1));
 780 }
 781 
 782 /*
 783  * After performing a pass, we need to go through the object and function type
 784  * maps and potentially fix them up based on the new maps that we have.
 785  */
 786 static void
 787 ctf_merge_fixup_symmaps(ctf_merge_types_t *cmp, ctf_merge_input_t *cmi)
 788 {
 789         ctf_merge_objmap_t *cmo;
 790         ctf_merge_funcmap_t *cmf;
 791 
 792         for (cmo = list_head(&cmi->cmi_omap); cmo != NULL;
 793             cmo = list_next(&cmi->cmi_omap, cmo)) {
 794                 VERIFY3S(cmo->cmo_tid, !=, 0);
 795                 VERIFY(cmp->cm_tmap[cmo->cmo_tid].cmt_map != 0);
 796                 cmo->cmo_tid = cmp->cm_tmap[cmo->cmo_tid].cmt_map;
 797         }
 798 
 799         for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL;
 800             cmf = list_next(&cmi->cmi_fmap, cmf)) {
 801                 int i;
 802 
 803                 VERIFY(cmp->cm_tmap[cmf->cmf_rtid].cmt_map != 0);
 804                 cmf->cmf_rtid = cmp->cm_tmap[cmf->cmf_rtid].cmt_map;
 805                 for (i = 0; i < cmf->cmf_argc; i++) {
 806                         VERIFY(cmp->cm_tmap[cmf->cmf_args[i]].cmt_map != 0);
 807                         cmf->cmf_args[i] =
 808                             cmp->cm_tmap[cmf->cmf_args[i]].cmt_map;
 809                 }
 810         }
 811 }
 812 
 813 /*
 814  * Merge the types contained inside of two input files. The second input file is
 815  * always going to be the destination. We're guaranteed that it's always
 816  * writeable.
 817  */
 818 static int
 819 ctf_merge_types(void *arg, void *arg2, void **outp, void *unsued)
 820 {
 821         int ret;
 822         ctf_merge_types_t cm;
 823         ctf_diff_t *cdp;
 824         ctf_merge_input_t *scmi = arg;
 825         ctf_merge_input_t *dcmi = arg2;
 826         ctf_file_t *out = dcmi->cmi_input;
 827         ctf_file_t *source = scmi->cmi_input;
 828 
 829         ctf_dprintf("merging %p->%p\n", source, out);
 830 
 831         if (!(out->ctf_flags & LCTF_RDWR))
 832                 return (ctf_set_errno(out, ECTF_RDONLY));
 833 
 834         if (ctf_getmodel(out) != ctf_getmodel(source))
 835                 return (ctf_set_errno(out, ECTF_DMODEL));
 836 
 837         if ((ret = ctf_diff_init(out, source, &cdp)) != 0)
 838                 return (ret);
 839 
 840         cm.cm_out = out;
 841         cm.cm_src = source;
 842         cm.cm_dedup = B_FALSE;
 843         cm.cm_unique = B_FALSE;
 844         ret = ctf_merge_types_init(&cm);
 845         if (ret != 0) {
 846                 ctf_diff_fini(cdp);
 847                 return (ctf_set_errno(out, ret));
 848         }
 849 
 850         ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
 851         if (ret != 0)
 852                 goto cleanup;
 853         ret = ctf_merge_common(&cm);
 854         ctf_dprintf("merge common returned with %d\n", ret);
 855         if (ret == 0) {
 856                 ret = ctf_update(out);
 857                 ctf_dprintf("update returned with %d\n", ret);
 858         } else {
 859                 goto cleanup;
 860         }
 861 
 862         /*
 863          * Now we need to fix up the object and function maps.
 864          */
 865         ctf_merge_fixup_symmaps(&cm, scmi);
 866 
 867         /*
 868          * Now that we've fixed things up, we need to give our function and
 869          * object maps to the destination, such that it can continue to update
 870          * them going forward.
 871          */
 872         list_move_tail(&dcmi->cmi_fmap, &scmi->cmi_fmap);
 873         list_move_tail(&dcmi->cmi_omap, &scmi->cmi_omap);
 874 
 875 cleanup:
 876         if (ret == 0)
 877                 *outp = dcmi;
 878         ctf_merge_types_fini(&cm);
 879         ctf_diff_fini(cdp);
 880         if (ret != 0)
 881                 return (ctf_errno(out));
 882         ctf_phase_bump();
 883         return (0);
 884 }
 885 
 886 static int
 887 ctf_uniquify_types(ctf_merge_t *cmh, ctf_file_t *src, ctf_file_t **outp)
 888 {
 889         int err, ret;
 890         ctf_file_t *out;
 891         ctf_merge_types_t cm;
 892         ctf_diff_t *cdp;
 893         ctf_merge_input_t *cmi;
 894         ctf_file_t *parent = cmh->cmh_unique;
 895 
 896         *outp = NULL;
 897         out = ctf_fdcreate(cmh->cmh_ofd, &err);
 898         if (out == NULL)
 899                 return (ctf_set_errno(src, err));
 900 
 901         out->ctf_parname = cmh->cmh_pname;
 902         if (ctf_setmodel(out, ctf_getmodel(parent)) != 0) {
 903                 (void) ctf_set_errno(src, ctf_errno(out));
 904                 ctf_close(out);
 905                 return (CTF_ERR);
 906         }
 907 
 908         if (ctf_import(out, parent) != 0) {
 909                 (void) ctf_set_errno(src, ctf_errno(out));
 910                 ctf_close(out);
 911                 return (CTF_ERR);
 912         }
 913 
 914         if ((ret = ctf_diff_init(parent, src, &cdp)) != 0) {
 915                 ctf_close(out);
 916                 return (ctf_set_errno(src, ctf_errno(parent)));
 917         }
 918 
 919         cm.cm_out = parent;
 920         cm.cm_src = src;
 921         cm.cm_dedup = B_FALSE;
 922         cm.cm_unique = B_TRUE;
 923         ret = ctf_merge_types_init(&cm);
 924         if (ret != 0) {
 925                 ctf_close(out);
 926                 ctf_diff_fini(cdp);
 927                 return (ctf_set_errno(src, ret));
 928         }
 929 
 930         ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
 931         if (ret == 0) {
 932                 cm.cm_out = out;
 933                 ret = ctf_merge_uniquify_types(&cm);
 934                 if (ret == 0)
 935                         ret = ctf_update(out);
 936         }
 937 
 938         if (ret != 0) {
 939                 ctf_merge_types_fini(&cm);
 940                 ctf_diff_fini(cdp);
 941                 return (ctf_set_errno(src, ctf_errno(cm.cm_out)));
 942         }
 943 
 944         for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
 945             cmi = list_next(&cmh->cmh_inputs, cmi)) {
 946                 ctf_merge_fixup_symmaps(&cm, cmi);
 947         }
 948 
 949         ctf_merge_types_fini(&cm);
 950         ctf_diff_fini(cdp);
 951         *outp = out;
 952         return (0);
 953 }
 954 
 955 static void
 956 ctf_merge_fini_input(ctf_merge_input_t *cmi)
 957 {
 958         ctf_merge_objmap_t *cmo;
 959         ctf_merge_funcmap_t *cmf;
 960 
 961         while ((cmo = list_remove_head(&cmi->cmi_omap)) != NULL)
 962                 ctf_free(cmo, sizeof (ctf_merge_objmap_t));
 963 
 964         while ((cmf = list_remove_head(&cmi->cmi_fmap)) != NULL)
 965                 ctf_free(cmf, sizeof (ctf_merge_funcmap_t) +
 966                     sizeof (ctf_id_t) * cmf->cmf_argc);
 967 
 968         if (cmi->cmi_created == B_TRUE && cmi->cmi_input != NULL)
 969                 ctf_close(cmi->cmi_input);
 970 
 971         ctf_free(cmi, sizeof (ctf_merge_input_t));
 972 }
 973 
 974 void
 975 ctf_merge_fini(ctf_merge_t *cmh)
 976 {
 977         size_t len;
 978         ctf_merge_input_t *cmi;
 979 
 980         if (cmh->cmh_label != NULL) {
 981                 len = strlen(cmh->cmh_label) + 1;
 982                 ctf_free(cmh->cmh_label, len);
 983         }
 984 
 985         if (cmh->cmh_pname != NULL) {
 986                 len = strlen(cmh->cmh_pname) + 1;
 987                 ctf_free(cmh->cmh_pname, len);
 988         }
 989 
 990         while ((cmi = list_remove_head(&cmh->cmh_inputs)) != NULL)
 991                 ctf_merge_fini_input(cmi);
 992 
 993         ctf_free(cmh, sizeof (ctf_merge_t));
 994 }
 995 
 996 ctf_merge_t *
 997 ctf_merge_init(int fd, int *errp)
 998 {
 999         int err;
1000         ctf_merge_t *out;
1001         struct stat st;
1002 
1003         if (errp == NULL)
1004                 errp = &err;
1005 
1006         if (fd != -1 && fstat(fd, &st) != 0) {
1007                 *errp = EINVAL;
1008                 return (NULL);
1009         }
1010 
1011         out = ctf_alloc(sizeof (ctf_merge_t));
1012         if (out == NULL) {
1013                 *errp = ENOMEM;
1014                 return (NULL);
1015         }
1016 
1017         if (fd == -1) {
1018                 out->cmh_msyms = B_FALSE;
1019         } else {
1020                 out->cmh_msyms = B_TRUE;
1021         }
1022 
1023         list_create(&out->cmh_inputs, sizeof (ctf_merge_input_t),
1024             offsetof(ctf_merge_input_t, cmi_node));
1025         out->cmh_ninputs = 0;
1026         out->cmh_nthreads = 1;
1027         out->cmh_unique = NULL;
1028         out->cmh_ofd = fd;
1029         out->cmh_flags = 0;
1030         out->cmh_label = NULL;
1031         out->cmh_pname = NULL;
1032 
1033         return (out);
1034 }
1035 
1036 int
1037 ctf_merge_label(ctf_merge_t *cmh, const char *label)
1038 {
1039         char *dup;
1040 
1041         if (label == NULL)
1042                 return (EINVAL);
1043 
1044         dup = ctf_strdup(label);
1045         if (dup == NULL)
1046                 return (EAGAIN);
1047 
1048         if (cmh->cmh_label != NULL) {
1049                 size_t len = strlen(cmh->cmh_label) + 1;
1050                 ctf_free(cmh->cmh_label, len);
1051         }
1052 
1053         cmh->cmh_label = dup;
1054         return (0);
1055 }
1056 
1057 static int
1058 ctf_merge_add_function(ctf_merge_input_t *cmi, ctf_funcinfo_t *fip, ulong_t idx,
1059     const char *file, const char *name, const Elf64_Sym *symp)
1060 {
1061         ctf_merge_funcmap_t *fmap;
1062 
1063         fmap = ctf_alloc(sizeof (ctf_merge_funcmap_t) +
1064             sizeof (ctf_id_t) * fip->ctc_argc);
1065         if (fmap == NULL)
1066                 return (ENOMEM);
1067 
1068         fmap->cmf_idx = idx;
1069         fmap->cmf_sym = *symp;
1070         fmap->cmf_rtid = fip->ctc_return;
1071         fmap->cmf_flags = fip->ctc_flags;
1072         fmap->cmf_argc = fip->ctc_argc;
1073         fmap->cmf_name = name;
1074         if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
1075                 fmap->cmf_file = file;
1076         } else {
1077                 fmap->cmf_file = NULL;
1078         }
1079 
1080         if (ctf_func_args(cmi->cmi_input, idx, fmap->cmf_argc,
1081             fmap->cmf_args) != 0) {
1082                 ctf_free(fmap, sizeof (ctf_merge_funcmap_t) +
1083                     sizeof (ctf_id_t) * fip->ctc_argc);
1084                 return (ctf_errno(cmi->cmi_input));
1085         }
1086 
1087         ctf_dprintf("added initial function %s, %lu, %s %u\n", name, idx,
1088             fmap->cmf_file != NULL ? fmap->cmf_file : "global",
1089             ELF64_ST_BIND(symp->st_info));
1090         list_insert_tail(&cmi->cmi_fmap, fmap);
1091         return (0);
1092 }
1093 
1094 static int
1095 ctf_merge_add_object(ctf_merge_input_t *cmi, ctf_id_t id, ulong_t idx,
1096     const char *file, const char *name, const Elf64_Sym *symp)
1097 {
1098         ctf_merge_objmap_t *cmo;
1099 
1100         cmo = ctf_alloc(sizeof (ctf_merge_objmap_t));
1101         if (cmo == NULL)
1102                 return (ENOMEM);
1103 
1104         cmo->cmo_name = name;
1105         if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
1106                 cmo->cmo_file = file;
1107         } else {
1108                 cmo->cmo_file = NULL;
1109         }
1110         cmo->cmo_idx = idx;
1111         cmo->cmo_tid = id;
1112         cmo->cmo_sym = *symp;
1113         list_insert_tail(&cmi->cmi_omap, cmo);
1114 
1115         ctf_dprintf("added initial object %s, %lu, %ld, %s\n", name, idx, id,
1116             cmo->cmo_file != NULL ? cmo->cmo_file : "global");
1117 
1118         return (0);
1119 }
1120 
1121 static int
1122 ctf_merge_add_symbol(const Elf64_Sym *symp, ulong_t idx, const char *file,
1123     const char *name, boolean_t primary, void *arg)
1124 {
1125         ctf_merge_input_t *cmi = arg;
1126         ctf_file_t *fp = cmi->cmi_input;
1127         ushort_t *data, funcbase;
1128         uint_t type;
1129         ctf_funcinfo_t fi;
1130 
1131         /*
1132          * See if there is type information for this. If there is no
1133          * type information for this entry or no translation, then we
1134          * will find the value zero. This indicates no type ID for
1135          * objects and encodes unknown information for functions.
1136          */
1137         if (fp->ctf_sxlate[idx] == -1u)
1138                 return (0);
1139         data = (ushort_t *)((uintptr_t)fp->ctf_buf + fp->ctf_sxlate[idx]);
1140         if (*data == 0)
1141                 return (0);
1142 
1143         type = ELF64_ST_TYPE(symp->st_info);
1144 
1145         switch (type) {
1146         case STT_FUNC:
1147                 funcbase = *data;
1148                 if (LCTF_INFO_KIND(fp, funcbase) != CTF_K_FUNCTION)
1149                         return (0);
1150                 data++;
1151                 fi.ctc_return = *data;
1152                 data++;
1153                 fi.ctc_argc = LCTF_INFO_VLEN(fp, funcbase);
1154                 fi.ctc_flags = 0;
1155 
1156                 if (fi.ctc_argc != 0 && data[fi.ctc_argc - 1] == 0) {
1157                         fi.ctc_flags |= CTF_FUNC_VARARG;
1158                         fi.ctc_argc--;
1159                 }
1160                 return (ctf_merge_add_function(cmi, &fi, idx, file, name,
1161                     symp));
1162         case STT_OBJECT:
1163                 return (ctf_merge_add_object(cmi, *data, idx, file, name,
1164                     symp));
1165         default:
1166                 return (0);
1167         }
1168 }
1169 
1170 /*
1171  * Whenever we create an entry to merge, we then go and add a second empty
1172  * ctf_file_t which we use for the purposes of our merging. It's not the best,
1173  * but it's the best that we've got at the moment.
1174  */
1175 int
1176 ctf_merge_add(ctf_merge_t *cmh, ctf_file_t *input)
1177 {
1178         int ret;
1179         ctf_merge_input_t *cmi;
1180         ctf_file_t *empty;
1181 
1182         ctf_dprintf("adding input %p\n", input);
1183 
1184         if (input->ctf_flags & LCTF_CHILD)
1185                 return (ECTF_MCHILD);
1186 
1187         cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1188         if (cmi == NULL)
1189                 return (ENOMEM);
1190 
1191         cmi->cmi_created = B_FALSE;
1192         cmi->cmi_input = input;
1193         list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
1194             offsetof(ctf_merge_funcmap_t, cmf_node));
1195         list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
1196             offsetof(ctf_merge_objmap_t, cmo_node));
1197 
1198         if (cmh->cmh_msyms == B_TRUE) {
1199                 if ((ret = ctf_symtab_iter(input, ctf_merge_add_symbol,
1200                     cmi)) != 0) {
1201                         ctf_merge_fini_input(cmi);
1202                         return (ret);
1203                 }
1204         }
1205 
1206         list_insert_tail(&cmh->cmh_inputs, cmi);
1207         cmh->cmh_ninputs++;
1208 
1209         /* And now the empty one to merge into this */
1210         cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1211         if (cmi == NULL)
1212                 return (ENOMEM);
1213         list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
1214             offsetof(ctf_merge_funcmap_t, cmf_node));
1215         list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
1216             offsetof(ctf_merge_objmap_t, cmo_node));
1217 
1218         empty = ctf_fdcreate(cmh->cmh_ofd, &ret);
1219         if (empty == NULL)
1220                 return (ret);
1221         cmi->cmi_input = empty;
1222         cmi->cmi_created = B_TRUE;
1223 
1224         if (ctf_setmodel(empty, ctf_getmodel(input)) == CTF_ERR) {
1225                 return (ctf_errno(empty));
1226         }
1227 
1228         list_insert_tail(&cmh->cmh_inputs, cmi);
1229         cmh->cmh_ninputs++;
1230         ctf_dprintf("added containers %p and %p\n", input, empty);
1231         return (0);
1232 }
1233 
1234 int
1235 ctf_merge_uniquify(ctf_merge_t *cmh, ctf_file_t *u, const char *pname)
1236 {
1237         char *dup;
1238 
1239         if (u->ctf_flags & LCTF_CHILD)
1240                 return (ECTF_MCHILD);
1241         if (pname == NULL)
1242                 return (EINVAL);
1243         dup = ctf_strdup(pname);
1244         if (dup == NULL)
1245                 return (EINVAL);
1246         if (cmh->cmh_pname != NULL) {
1247                 size_t len = strlen(cmh->cmh_pname) + 1;
1248                 ctf_free(cmh->cmh_pname, len);
1249         }
1250         cmh->cmh_pname = dup;
1251         cmh->cmh_unique = u;
1252         return (0);
1253 }
1254 
1255 /*
1256  * Symbol matching rules: the purpose of this is to verify that the type
1257  * information that we have for a given symbol actually matches the output
1258  * symbol. This is unfortunately complicated by several different factors:
1259  *
1260  * 1. When merging multiple .o's into a single item, the symbol table index will
1261  * not match.
1262  *
1263  * 2. Visibility of a symbol may not be identical to the object file or the
1264  * DWARF information due to symbol reduction via a mapfile.
1265  *
1266  * As such, we have to employ the following rules:
1267  *
1268  * 1. A global symbol table entry always matches a global CTF symbol with the
1269  * same name.
1270  *
1271  * 2. A local symbol table entry always matches a local CTF symbol if they have
1272  * the same name and they belong to the same file.
1273  *
1274  * 3. A weak symbol matches a non-weak symbol. This happens if we find that the
1275  * types match, the values match, the sizes match, and the section indexes
1276  * match. This happens when we do a conversion in one pass, it almost never
1277  * happens when we're merging multiple object files. If we match a CTF global
1278  * symbol, that's a fixed match, otherwise it's a fuzzy match.
1279  *
1280  * 4. A local symbol table entry matches a global CTF entry if the
1281  * other pieces fail, but they have the same name. This is considered a fuzzy
1282  * match and is not used unless we have no other options.
1283  *
1284  * 5. A weak symbol table entry matches a weak CTF entry if the other pieces
1285  * fail, but they have the same name. This is considered a fuzzy match and is
1286  * not used unless we have no other options. When merging independent .o files,
1287  * this is often the only recourse we have to matching weak symbols.
1288  *
1289  * In the end, this would all be much simpler if we were able to do this as part
1290  * of libld which would be able to do all the symbol transformations.
1291  */
1292 static boolean_t
1293 ctf_merge_symbol_match(const char *ctf_file, const char *ctf_name,
1294     const Elf64_Sym *ctf_symp, const char *symtab_file, const char *symtab_name,
1295     const Elf64_Sym *symtab_symp, boolean_t *is_fuzzy)
1296 {
1297         *is_fuzzy = B_FALSE;
1298         uint_t symtab_bind, ctf_bind;
1299 
1300         symtab_bind = ELF64_ST_BIND(symtab_symp->st_info);
1301         ctf_bind = ELF64_ST_BIND(ctf_symp->st_info);
1302 
1303         ctf_dprintf("comparing merge match for %s/%s/%u->%s/%s/%u\n",
1304             symtab_file, symtab_name, symtab_bind,
1305             ctf_file, ctf_name, ctf_bind);
1306         if (strcmp(ctf_name, symtab_name) != 0) {
1307                 return (B_FALSE);
1308         }
1309 
1310         if (symtab_bind == STB_GLOBAL && ctf_bind == STB_GLOBAL) {
1311                 return (B_TRUE);
1312         } else if (symtab_bind == STB_GLOBAL) {
1313                 return (B_FALSE);
1314         }
1315 
1316         if (ctf_bind == STB_LOCAL && ctf_bind == symtab_bind &&
1317             ctf_file != NULL && symtab_file != NULL &&
1318             strcmp(ctf_file, symtab_file) == 0) {
1319                 return (B_TRUE);
1320         }
1321 
1322         if (symtab_bind == STB_WEAK && ctf_bind != STB_WEAK &&
1323             ELF64_ST_TYPE(symtab_symp->st_info) ==
1324             ELF64_ST_TYPE(ctf_symp->st_info) &&
1325             symtab_symp->st_value == ctf_symp->st_value &&
1326             symtab_symp->st_size == ctf_symp->st_size &&
1327             symtab_symp->st_shndx == ctf_symp->st_shndx) {
1328                 if (ctf_bind == STB_GLOBAL) {
1329                         return (B_TRUE);
1330                 }
1331 
1332                 if (ctf_bind == STB_LOCAL && ctf_file != NULL &&
1333                     symtab_file != NULL && strcmp(ctf_file, symtab_file) == 0) {
1334                         *is_fuzzy = B_TRUE;
1335                         return (B_TRUE);
1336                 }
1337         }
1338 
1339         if (ctf_bind == STB_GLOBAL ||
1340             (ctf_bind == STB_WEAK && symtab_bind == STB_WEAK)) {
1341                 *is_fuzzy = B_TRUE;
1342                 return (B_TRUE);
1343         }
1344 
1345         return (B_FALSE);
1346 }
1347 
1348 /*
1349  * For each symbol, try and find a match. We will attempt to find an exact
1350  * match; however, we will settle for a fuzzy match in general. There is one
1351  * case where we will not opt to use a fuzzy match, which is when performing the
1352  * deduplication of a container. In such a case we are trying to reduce common
1353  * types and a fuzzy match would be inappropriate as if we're in the context of
1354  * a single container, the conversion process should have identified any exact
1355  * or fuzzy matches that were required.
1356  */
1357 static int
1358 ctf_merge_symbols(const Elf64_Sym *symp, ulong_t idx, const char *file,
1359     const char *name, boolean_t primary, void *arg)
1360 {
1361         int err;
1362         uint_t type, bind;
1363         ctf_merge_symbol_arg_t *csa = arg;
1364         ctf_file_t *fp = csa->cmsa_out;
1365 
1366         type = ELF64_ST_TYPE(symp->st_info);
1367         bind = ELF64_ST_BIND(symp->st_info);
1368 
1369         ctf_dprintf("Trying to find match for %s/%s/%u\n", file, name,
1370             ELF64_ST_BIND(symp->st_info));
1371 
1372         if (type == STT_OBJECT) {
1373                 ctf_merge_objmap_t *cmo, *match = NULL;
1374 
1375                 for (cmo = list_head(csa->cmsa_objmap); cmo != NULL;
1376                     cmo = list_next(csa->cmsa_objmap, cmo)) {
1377                         boolean_t is_fuzzy = B_FALSE;
1378                         if (ctf_merge_symbol_match(cmo->cmo_file, cmo->cmo_name,
1379                             &cmo->cmo_sym, file, name, symp, &is_fuzzy)) {
1380                                 if (is_fuzzy && csa->cmsa_dedup &&
1381                                     bind != STB_WEAK) {
1382                                         continue;
1383                                 }
1384                                 match = cmo;
1385                                 if (is_fuzzy) {
1386                                         continue;
1387                                 }
1388                                 break;
1389                         }
1390                 }
1391 
1392                 if (match == NULL) {
1393                         return (0);
1394                 }
1395 
1396                 if ((err = ctf_add_object(fp, idx, match->cmo_tid)) != 0) {
1397                         ctf_dprintf("Failed to add symbol %s->%d: %s\n", name,
1398                             match->cmo_tid, ctf_errmsg(ctf_errno(fp)));
1399                         return (ctf_errno(fp));
1400                 }
1401                 ctf_dprintf("mapped object into output %s/%s->%ld\n", file,
1402                     name, match->cmo_tid);
1403         } else {
1404                 ctf_merge_funcmap_t *cmf, *match = NULL;
1405                 ctf_funcinfo_t fi;
1406 
1407                 for (cmf = list_head(csa->cmsa_funcmap); cmf != NULL;
1408                     cmf = list_next(csa->cmsa_funcmap, cmf)) {
1409                         boolean_t is_fuzzy = B_FALSE;
1410                         if (ctf_merge_symbol_match(cmf->cmf_file, cmf->cmf_name,
1411                             &cmf->cmf_sym, file, name, symp, &is_fuzzy)) {
1412                                 if (is_fuzzy && csa->cmsa_dedup &&
1413                                     bind != STB_WEAK) {
1414                                         continue;
1415                                 }
1416                                 match = cmf;
1417                                 if (is_fuzzy) {
1418                                         continue;
1419                                 }
1420                                 break;
1421                         }
1422                 }
1423 
1424                 if (match == NULL) {
1425                         return (0);
1426                 }
1427 
1428                 fi.ctc_return = match->cmf_rtid;
1429                 fi.ctc_argc = match->cmf_argc;
1430                 fi.ctc_flags = match->cmf_flags;
1431                 if ((err = ctf_add_function(fp, idx, &fi, match->cmf_args)) !=
1432                     0) {
1433                         ctf_dprintf("Failed to add function %s: %s\n", name,
1434                             ctf_errmsg(ctf_errno(fp)));
1435                         return (ctf_errno(fp));
1436                 }
1437                 ctf_dprintf("mapped function into output %s/%s\n", file,
1438                     name);
1439         }
1440 
1441         return (0);
1442 }
1443 
1444 int
1445 ctf_merge_merge(ctf_merge_t *cmh, ctf_file_t **outp)
1446 {
1447         int err, merr;
1448         ctf_merge_input_t *cmi;
1449         ctf_id_t ltype;
1450         mergeq_t *mqp;
1451         ctf_merge_input_t *final;
1452         ctf_file_t *out;
1453 
1454         ctf_dprintf("Beginning ctf_merge_merge()\n");
1455         if (cmh->cmh_label != NULL && cmh->cmh_unique != NULL) {
1456                 const char *label = ctf_label_topmost(cmh->cmh_unique);
1457                 if (label == NULL)
1458                         return (ECTF_NOLABEL);
1459                 if (strcmp(label, cmh->cmh_label) != 0)
1460                         return (ECTF_LCONFLICT);
1461         }
1462 
1463         if (mergeq_init(&mqp, cmh->cmh_nthreads) == -1) {
1464                 return (errno);
1465         }
1466 
1467         VERIFY(cmh->cmh_ninputs % 2 == 0);
1468         for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
1469             cmi = list_next(&cmh->cmh_inputs, cmi)) {
1470                 if (mergeq_add(mqp, cmi) == -1) {
1471                         err = errno;
1472                         mergeq_fini(mqp);
1473                 }
1474         }
1475 
1476         err = mergeq_merge(mqp, ctf_merge_types, NULL, (void **)&final, &merr);
1477         mergeq_fini(mqp);
1478 
1479         if (err == MERGEQ_ERROR) {
1480                 return (errno);
1481         } else if (err == MERGEQ_UERROR) {
1482                 return (merr);
1483         }
1484 
1485         /*
1486          * Disassociate the generated ctf_file_t from the original input. That
1487          * way when the input gets cleaned up, we don't accidentally kill the
1488          * final reference to the ctf_file_t. If it gets uniquified then we'll
1489          * kill it.
1490          */
1491         VERIFY(final->cmi_input != NULL);
1492         out = final->cmi_input;
1493         final->cmi_input = NULL;
1494 
1495         ctf_dprintf("preparing to uniquify against: %p\n", cmh->cmh_unique);
1496         if (cmh->cmh_unique != NULL) {
1497                 ctf_file_t *u;
1498                 err = ctf_uniquify_types(cmh, out, &u);
1499                 if (err != 0) {
1500                         err = ctf_errno(out);
1501                         ctf_close(out);
1502                         return (err);
1503                 }
1504                 ctf_close(out);
1505                 out = u;
1506         }
1507 
1508         ltype = out->ctf_typemax;
1509         if ((out->ctf_flags & LCTF_CHILD) && ltype != 0)
1510                 ltype += CTF_CHILD_START;
1511         ctf_dprintf("trying to add the label\n");
1512         if (cmh->cmh_label != NULL &&
1513             ctf_add_label(out, cmh->cmh_label, ltype, 0) != 0) {
1514                 ctf_close(out);
1515                 return (ctf_errno(out));
1516         }
1517 
1518         ctf_dprintf("merging symbols and the like\n");
1519         if (cmh->cmh_msyms == B_TRUE) {
1520                 ctf_merge_symbol_arg_t arg;
1521                 arg.cmsa_objmap = &final->cmi_omap;
1522                 arg.cmsa_funcmap = &final->cmi_fmap;
1523                 arg.cmsa_out = out;
1524                 arg.cmsa_dedup = B_FALSE;
1525                 err = ctf_symtab_iter(out, ctf_merge_symbols, &arg);
1526                 if (err != 0) {
1527                         ctf_close(out);
1528                         return (err);
1529                 }
1530         }
1531 
1532         err = ctf_update(out);
1533         if (err != 0) {
1534                 err = ctf_errno(out);
1535                 ctf_close(out);
1536                 return (err);
1537         }
1538 
1539         *outp = out;
1540         return (0);
1541 }
1542 
1543 /*
1544  * When we get told that something is unique, eg. same is B_FALSE, then that
1545  * tells us that we need to add it to the output. If same is B_TRUE, then we'll
1546  * want to record it in the mapping table so that we know how to redirect types
1547  * to the extant ones.
1548  */
1549 static void
1550 ctf_dedup_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
1551     ctf_id_t oid, void *arg)
1552 {
1553         ctf_merge_types_t *cmp = arg;
1554         ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
1555 
1556         if (same == B_TRUE) {
1557                 /*
1558                  * The output id here may itself map to something else.
1559                  * Therefore, we need to basically walk a chain and see what it
1560                  * points to until it itself points to a base type, eg. -1.
1561                  * Otherwise we'll dedup to something which no longer exists.
1562                  */
1563                 while (cmt[oid].cmt_missing == B_FALSE)
1564                         oid = cmt[oid].cmt_map;
1565                 cmt[iid].cmt_map = oid;
1566                 ctf_dprintf("%d->%d \n", iid, oid);
1567         } else {
1568                 VERIFY(cmt[iid].cmt_map == 0);
1569                 cmt[iid].cmt_missing = B_TRUE;
1570                 ctf_dprintf("%d is missing\n", iid);
1571         }
1572 }
1573 
1574 /*
1575  * Dedup a CTF container.
1576  *
1577  * DWARF and other encoding formats that we use to create CTF data may create
1578  * multiple copies of a given type. However, after doing a conversion, and
1579  * before doing a merge, we'd prefer, if possible, to have every input container
1580  * to be unique.
1581  *
1582  * Doing a deduplication is like a normal merge. However, when we diff the types
1583  * in the container, rather than doing a normal diff, we instead want to diff
1584  * against any already processed types. eg, for a given type i in a container,
1585  * we want to diff it from 0 to i - 1.
1586  */
1587 int
1588 ctf_merge_dedup(ctf_merge_t *cmp, ctf_file_t **outp)
1589 {
1590         int ret;
1591         ctf_diff_t *cdp = NULL;
1592         ctf_merge_input_t *cmi, *cmc;
1593         ctf_file_t *ifp, *ofp;
1594         ctf_merge_types_t cm;
1595 
1596         if (cmp == NULL || outp == NULL)
1597                 return (EINVAL);
1598 
1599         ctf_dprintf("encountered %d inputs\n", cmp->cmh_ninputs);
1600         if (cmp->cmh_ninputs != 2)
1601                 return (EINVAL);
1602 
1603         ctf_dprintf("passed argument sanity check\n");
1604 
1605         cmi = list_head(&cmp->cmh_inputs);
1606         VERIFY(cmi != NULL);
1607         cmc = list_next(&cmp->cmh_inputs, cmi);
1608         VERIFY(cmc != NULL);
1609         ifp = cmi->cmi_input;
1610         ofp = cmc->cmi_input;
1611         VERIFY(ifp != NULL);
1612         VERIFY(ofp != NULL);
1613         cm.cm_src = ifp;
1614         cm.cm_out = ofp;
1615         cm.cm_dedup = B_TRUE;
1616         cm.cm_unique = B_FALSE;
1617 
1618         if ((ret = ctf_merge_types_init(&cm)) != 0) {
1619                 return (ret);
1620         }
1621 
1622         if ((ret = ctf_diff_init(ifp, ifp, &cdp)) != 0)
1623                 goto err;
1624 
1625         ctf_dprintf("Successfully initialized dedup\n");
1626         if ((ret = ctf_diff_self(cdp, ctf_dedup_cb, &cm)) != 0)
1627                 goto err;
1628 
1629         ctf_dprintf("Successfully diffed types\n");
1630         ret = ctf_merge_common(&cm);
1631         ctf_dprintf("deduping types result: %d\n", ret);
1632         if (ret == 0)
1633                 ret = ctf_update(cm.cm_out);
1634         if (ret != 0)
1635                 goto err;
1636 
1637         ctf_dprintf("Successfully deduped types\n");
1638         ctf_phase_dump(cm.cm_out, "dedup-pre-syms", NULL);
1639 
1640         /*
1641          * Now we need to fix up the object and function maps.
1642          */
1643         ctf_merge_fixup_symmaps(&cm, cmi);
1644 
1645         if (cmp->cmh_msyms == B_TRUE) {
1646                 ctf_merge_symbol_arg_t arg;
1647                 arg.cmsa_objmap = &cmi->cmi_omap;
1648                 arg.cmsa_funcmap = &cmi->cmi_fmap;
1649                 arg.cmsa_out = cm.cm_out;
1650                 arg.cmsa_dedup = B_TRUE;
1651                 ret = ctf_symtab_iter(cm.cm_out, ctf_merge_symbols, &arg);
1652                 if (ret != 0) {
1653                         ctf_dprintf("failed to dedup symbols: %s\n",
1654                             ctf_errmsg(ret));
1655                         goto err;
1656                 }
1657         }
1658 
1659         ret = ctf_update(cm.cm_out);
1660         if (ret == 0) {
1661                 cmc->cmi_input = NULL;
1662                 *outp = cm.cm_out;
1663         }
1664         ctf_phase_dump(cm.cm_out, "dedup-post-syms", NULL);
1665 err:
1666         ctf_merge_types_fini(&cm);
1667         ctf_diff_fini(cdp);
1668         return (ret);
1669 }
1670 
1671 int
1672 ctf_merge_set_nthreads(ctf_merge_t *cmp, const uint_t nthrs)
1673 {
1674         if (nthrs == 0)
1675                 return (EINVAL);
1676         cmp->cmh_nthreads = nthrs;
1677         return (0);
1678 }