1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2020 Joyent, Inc.
  14  */
  15 
  16 /*
  17  * To perform a merge of two CTF containers, we first diff the two containers
  18  * types. For every type that's in the src container, but not in the dst
  19  * container, we note it and add it to dst container. If there are any objects
  20  * or functions associated with src, we go through and update the types that
  21  * they refer to such that they all refer to types in the dst container.
  22  *
  23  * The bulk of the logic for the merge, after we've run the diff, occurs in
  24  * ctf_merge_common().
  25  *
  26  * In terms of exported APIs, we don't really export a simple merge two
  27  * containers, as the general way this is used, in something like ctfmerge(1),
  28  * is to add all the containers and then let us figure out the best way to merge
  29  * it.
  30  */
  31 
  32 #include <libctf_impl.h>
  33 #include <sys/debug.h>
  34 #include <sys/list.h>
  35 #include <stddef.h>
  36 #include <fcntl.h>
  37 #include <sys/types.h>
  38 #include <sys/stat.h>
  39 #include <mergeq.h>
  40 #include <errno.h>
  41 
  42 typedef struct ctf_merge_tinfo {
  43         uint16_t cmt_map;       /* Map to the type in out */
  44         boolean_t cmt_fixup;
  45         boolean_t cmt_forward;
  46         boolean_t cmt_missing;
  47 } ctf_merge_tinfo_t;
  48 
  49 /*
  50  * State required for doing an individual merge of two containers.
  51  */
  52 typedef struct ctf_merge_types {
  53         ctf_file_t *cm_out;             /* Output CTF file */
  54         ctf_file_t *cm_src;             /* Input CTF file */
  55         ctf_merge_tinfo_t *cm_tmap;     /* Type state information */
  56         boolean_t cm_dedup;             /* Are we doing a dedup? */
  57         boolean_t cm_unique;            /* are we doing a uniquify? */
  58 } ctf_merge_types_t;
  59 
  60 typedef struct ctf_merge_objmap {
  61         list_node_t cmo_node;
  62         const char *cmo_name;           /* Symbol name */
  63         const char *cmo_file;           /* Symbol file */
  64         ulong_t cmo_idx;                /* Symbol ID */
  65         Elf64_Sym cmo_sym;              /* Symbol Entry */
  66         ctf_id_t cmo_tid;               /* Type ID */
  67 } ctf_merge_objmap_t;
  68 
  69 typedef struct ctf_merge_funcmap {
  70         list_node_t cmf_node;
  71         const char *cmf_name;           /* Symbol name */
  72         const char *cmf_file;           /* Symbol file */
  73         ulong_t cmf_idx;                /* Symbol ID */
  74         Elf64_Sym cmf_sym;              /* Symbol Entry */
  75         ctf_id_t cmf_rtid;              /* Type ID */
  76         uint_t cmf_flags;               /* ctf_funcinfo_t ctc_flags */
  77         uint_t cmf_argc;                /* Number of arguments */
  78         ctf_id_t cmf_args[];            /* Types of arguments */
  79 } ctf_merge_funcmap_t;
  80 
  81 typedef struct ctf_merge_input {
  82         list_node_t cmi_node;
  83         ctf_file_t *cmi_input;
  84         list_t cmi_omap;
  85         list_t cmi_fmap;
  86         boolean_t cmi_created;
  87 } ctf_merge_input_t;
  88 
  89 struct ctf_merge_handle {
  90         list_t cmh_inputs;              /* Input list */
  91         uint_t cmh_ninputs;             /* Number of inputs */
  92         uint_t cmh_nthreads;            /* Number of threads to use */
  93         ctf_file_t *cmh_unique;         /* ctf to uniquify against */
  94         boolean_t cmh_msyms;            /* Should we merge symbols/funcs? */
  95         int cmh_ofd;                    /* FD for output file */
  96         int cmh_flags;                  /* Flags that control merge behavior */
  97         char *cmh_label;                /* Optional label */
  98         char *cmh_pname;                /* Parent name */
  99 };
 100 
 101 typedef struct ctf_merge_symbol_arg {
 102         list_t *cmsa_objmap;
 103         list_t *cmsa_funcmap;
 104         ctf_file_t *cmsa_out;
 105         boolean_t cmsa_dedup;
 106 } ctf_merge_symbol_arg_t;
 107 
 108 static int ctf_merge_add_type(ctf_merge_types_t *, ctf_id_t);
 109 
 110 static ctf_id_t
 111 ctf_merge_gettype(ctf_merge_types_t *cmp, ctf_id_t id)
 112 {
 113         if (cmp->cm_dedup == B_FALSE) {
 114                 VERIFY(cmp->cm_tmap[id].cmt_map != 0);
 115                 return (cmp->cm_tmap[id].cmt_map);
 116         }
 117 
 118         while (cmp->cm_tmap[id].cmt_missing == B_FALSE) {
 119                 VERIFY(cmp->cm_tmap[id].cmt_map != 0);
 120                 id = cmp->cm_tmap[id].cmt_map;
 121         }
 122         VERIFY(cmp->cm_tmap[id].cmt_map != 0);
 123         return (cmp->cm_tmap[id].cmt_map);
 124 }
 125 
 126 static void
 127 ctf_merge_diffcb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
 128     ctf_id_t oid, void *arg)
 129 {
 130         ctf_merge_types_t *cmp = arg;
 131         ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
 132 
 133         if (same == B_TRUE) {
 134                 if (ctf_type_kind(ifp, iid) == CTF_K_FORWARD &&
 135                     ctf_type_kind(ofp, oid) != CTF_K_FORWARD) {
 136                         VERIFY(cmt[oid].cmt_map == 0);
 137 
 138                         /*
 139                          * If we're uniquifying types, it's possible for the
 140                          * container that we're uniquifying against to have a
 141                          * forward which exists in the container being reduced.
 142                          * For example, genunix has the machcpu structure as a
 143                          * forward which is actually in unix and we uniquify
 144                          * unix against genunix. In such cases, we explicitly do
 145                          * not do any mapping of the forward information, lest
 146                          * we risk losing the real definition. Instead, mark
 147                          * that it's missing.
 148                          */
 149                         if (cmp->cm_unique == B_TRUE) {
 150                                 cmt[oid].cmt_missing = B_TRUE;
 151                                 return;
 152                         }
 153 
 154                         cmt[oid].cmt_map = iid;
 155                         cmt[oid].cmt_forward = B_TRUE;
 156                         ctf_dprintf("merge diff forward mapped %d->%d\n", oid,
 157                             iid);
 158                         return;
 159                 }
 160 
 161                 /*
 162                  * We could have multiple things that a given type ends up
 163                  * matching in the world of forwards and pointers to forwards.
 164                  * For now just take the first one...
 165                  */
 166                 if (cmt[oid].cmt_map != 0)
 167                         return;
 168                 cmt[oid].cmt_map = iid;
 169                 ctf_dprintf("merge diff mapped %d->%d\n", oid, iid);
 170         } else if (ifp == cmp->cm_src) {
 171                 VERIFY(cmt[iid].cmt_map == 0);
 172                 cmt[iid].cmt_missing = B_TRUE;
 173                 ctf_dprintf("merge diff said %d is missing\n", iid);
 174         }
 175 }
 176 
 177 static int
 178 ctf_merge_add_number(ctf_merge_types_t *cmp, ctf_id_t id)
 179 {
 180         int ret, flags;
 181         const ctf_type_t *tp;
 182         const char *name;
 183         ctf_encoding_t en;
 184 
 185         if (ctf_type_encoding(cmp->cm_src, id, &en) != 0)
 186                 return (CTF_ERR);
 187 
 188         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 189         name = ctf_strraw(cmp->cm_src, tp->ctt_name);
 190         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 191                 flags = CTF_ADD_ROOT;
 192         else
 193                 flags = CTF_ADD_NONROOT;
 194 
 195         ret = ctf_add_encoded(cmp->cm_out, flags, name, &en,
 196             ctf_type_kind(cmp->cm_src, id));
 197 
 198         if (ret == CTF_ERR)
 199                 return (ret);
 200 
 201         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 202         cmp->cm_tmap[id].cmt_map = ret;
 203         return (0);
 204 }
 205 
 206 static int
 207 ctf_merge_add_array(ctf_merge_types_t *cmp, ctf_id_t id)
 208 {
 209         int ret, flags;
 210         const ctf_type_t *tp;
 211         ctf_arinfo_t ar;
 212 
 213         if (ctf_array_info(cmp->cm_src, id, &ar) == CTF_ERR)
 214                 return (CTF_ERR);
 215 
 216         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 217         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 218                 flags = CTF_ADD_ROOT;
 219         else
 220                 flags = CTF_ADD_NONROOT;
 221 
 222         if (cmp->cm_tmap[ar.ctr_contents].cmt_map == 0) {
 223                 ret = ctf_merge_add_type(cmp, ar.ctr_contents);
 224                 if (ret != 0)
 225                         return (ret);
 226                 ASSERT(cmp->cm_tmap[ar.ctr_contents].cmt_map != 0);
 227         }
 228         ar.ctr_contents = ctf_merge_gettype(cmp, ar.ctr_contents);
 229 
 230         if (cmp->cm_tmap[ar.ctr_index].cmt_map == 0) {
 231                 ret = ctf_merge_add_type(cmp, ar.ctr_index);
 232                 if (ret != 0)
 233                         return (ret);
 234                 ASSERT(cmp->cm_tmap[ar.ctr_index].cmt_map != 0);
 235         }
 236         ar.ctr_index = ctf_merge_gettype(cmp, ar.ctr_index);
 237 
 238         ret = ctf_add_array(cmp->cm_out, flags, &ar);
 239         if (ret == CTF_ERR)
 240                 return (ret);
 241 
 242         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 243         cmp->cm_tmap[id].cmt_map = ret;
 244 
 245         return (0);
 246 }
 247 
 248 static int
 249 ctf_merge_add_reftype(ctf_merge_types_t *cmp, ctf_id_t id)
 250 {
 251         int ret, flags;
 252         const ctf_type_t *tp;
 253         ctf_id_t reftype;
 254         const char *name;
 255 
 256         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 257         name = ctf_strraw(cmp->cm_src, tp->ctt_name);
 258         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 259                 flags = CTF_ADD_ROOT;
 260         else
 261                 flags = CTF_ADD_NONROOT;
 262 
 263         reftype = ctf_type_reference(cmp->cm_src, id);
 264         if (reftype == CTF_ERR)
 265                 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
 266 
 267         if (cmp->cm_tmap[reftype].cmt_map == 0) {
 268                 ret = ctf_merge_add_type(cmp, reftype);
 269                 if (ret != 0)
 270                         return (ret);
 271                 ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
 272         }
 273         reftype = ctf_merge_gettype(cmp, reftype);
 274 
 275         ret = ctf_add_reftype(cmp->cm_out, flags, name, reftype,
 276             ctf_type_kind(cmp->cm_src, id));
 277         if (ret == CTF_ERR)
 278                 return (ret);
 279 
 280         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 281         cmp->cm_tmap[id].cmt_map = ret;
 282         return (0);
 283 }
 284 
 285 static int
 286 ctf_merge_add_typedef(ctf_merge_types_t *cmp, ctf_id_t id)
 287 {
 288         int ret, flags;
 289         const ctf_type_t *tp;
 290         const char *name;
 291         ctf_id_t reftype;
 292 
 293         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 294         name = ctf_strraw(cmp->cm_src, tp->ctt_name);
 295         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 296                 flags = CTF_ADD_ROOT;
 297         else
 298                 flags = CTF_ADD_NONROOT;
 299 
 300         reftype = ctf_type_reference(cmp->cm_src, id);
 301         if (reftype == CTF_ERR)
 302                 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
 303 
 304         if (cmp->cm_tmap[reftype].cmt_map == 0) {
 305                 ret = ctf_merge_add_type(cmp, reftype);
 306                 if (ret != 0)
 307                         return (ret);
 308                 ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
 309         }
 310         reftype = ctf_merge_gettype(cmp, reftype);
 311 
 312         ret = ctf_add_typedef(cmp->cm_out, flags, name, reftype);
 313         if (ret == CTF_ERR)
 314                 return (ret);
 315 
 316         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 317         cmp->cm_tmap[id].cmt_map = ret;
 318         return (0);
 319 }
 320 
 321 typedef struct ctf_merge_enum {
 322         ctf_file_t *cme_fp;
 323         ctf_id_t cme_id;
 324 } ctf_merge_enum_t;
 325 
 326 static int
 327 ctf_merge_add_enumerator(const char *name, int value, void *arg)
 328 {
 329         ctf_merge_enum_t *cmep = arg;
 330 
 331         return (ctf_add_enumerator(cmep->cme_fp, cmep->cme_id, name, value) ==
 332             CTF_ERR);
 333 }
 334 
 335 static int
 336 ctf_merge_add_enum(ctf_merge_types_t *cmp, ctf_id_t id)
 337 {
 338         int flags;
 339         const ctf_type_t *tp;
 340         const char *name;
 341         ctf_id_t enumid;
 342         ctf_merge_enum_t cme;
 343         size_t size;
 344 
 345         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 346         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 347                 flags = CTF_ADD_ROOT;
 348         else
 349                 flags = CTF_ADD_NONROOT;
 350 
 351         name = ctf_strraw(cmp->cm_src, tp->ctt_name);
 352         size = ctf_get_ctt_size(cmp->cm_src, tp, NULL, NULL);
 353 
 354         enumid = ctf_add_enum(cmp->cm_out, flags, name, size);
 355         if (enumid == CTF_ERR)
 356                 return (enumid);
 357 
 358         cme.cme_fp = cmp->cm_out;
 359         cme.cme_id = enumid;
 360         if (ctf_enum_iter(cmp->cm_src, id, ctf_merge_add_enumerator,
 361             &cme) != 0)
 362                 return (CTF_ERR);
 363 
 364         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 365         cmp->cm_tmap[id].cmt_map = enumid;
 366         return (0);
 367 }
 368 
 369 static int
 370 ctf_merge_add_func(ctf_merge_types_t *cmp, ctf_id_t id)
 371 {
 372         int ret, flags, i;
 373         const ctf_type_t *tp;
 374         ctf_funcinfo_t ctc;
 375         ctf_id_t *argv;
 376 
 377         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 378         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 379                 flags = CTF_ADD_ROOT;
 380         else
 381                 flags = CTF_ADD_NONROOT;
 382 
 383         if (ctf_func_info_by_id(cmp->cm_src, id, &ctc) == CTF_ERR)
 384                 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
 385 
 386         argv = ctf_alloc(sizeof (ctf_id_t) * ctc.ctc_argc);
 387         if (argv == NULL)
 388                 return (ctf_set_errno(cmp->cm_out, ENOMEM));
 389         if (ctf_func_args_by_id(cmp->cm_src, id, ctc.ctc_argc, argv) ==
 390             CTF_ERR) {
 391                 ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
 392                 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
 393         }
 394 
 395         if (cmp->cm_tmap[ctc.ctc_return].cmt_map == 0) {
 396                 ret = ctf_merge_add_type(cmp, ctc.ctc_return);
 397                 if (ret != 0)
 398                         return (ret);
 399                 ASSERT(cmp->cm_tmap[ctc.ctc_return].cmt_map != 0);
 400         }
 401         ctc.ctc_return = ctf_merge_gettype(cmp, ctc.ctc_return);
 402 
 403         for (i = 0; i < ctc.ctc_argc; i++) {
 404                 if (cmp->cm_tmap[argv[i]].cmt_map == 0) {
 405                         ret = ctf_merge_add_type(cmp, argv[i]);
 406                         if (ret != 0)
 407                                 return (ret);
 408                         ASSERT(cmp->cm_tmap[argv[i]].cmt_map != 0);
 409                 }
 410                 argv[i] = ctf_merge_gettype(cmp, argv[i]);
 411         }
 412 
 413         ret = ctf_add_funcptr(cmp->cm_out, flags, &ctc, argv);
 414         ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
 415         if (ret == CTF_ERR)
 416                 return (ret);
 417 
 418         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 419         cmp->cm_tmap[id].cmt_map = ret;
 420         return (0);
 421 }
 422 
 423 static int
 424 ctf_merge_add_forward(ctf_merge_types_t *cmp, ctf_id_t id)
 425 {
 426         int ret, flags;
 427         const ctf_type_t *tp;
 428         const char *name;
 429 
 430         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 431         name = ctf_strraw(cmp->cm_src, tp->ctt_name);
 432         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 433                 flags = CTF_ADD_ROOT;
 434         else
 435                 flags = CTF_ADD_NONROOT;
 436 
 437         /*
 438          * ctf_add_forward tries to check to see if a given forward already
 439          * exists in one of its hash tables.  If we're here then we know that we
 440          * have a forward in a container that isn't present in another.
 441          * Therefore, we choose a token hash table to satisfy the API choice
 442          * here.
 443          */
 444         ret = ctf_add_forward(cmp->cm_out, flags, name, CTF_K_STRUCT);
 445         if (ret == CTF_ERR)
 446                 return (CTF_ERR);
 447 
 448         VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 449         cmp->cm_tmap[id].cmt_map = ret;
 450         return (0);
 451 }
 452 
 453 typedef struct ctf_merge_su {
 454         ctf_merge_types_t *cms_cm;
 455         ctf_id_t cms_id;
 456 } ctf_merge_su_t;
 457 
 458 static int
 459 ctf_merge_add_member(const char *name, ctf_id_t type, ulong_t offset, void *arg)
 460 {
 461         ctf_merge_su_t *cms = arg;
 462 
 463         VERIFY(cms->cms_cm->cm_tmap[type].cmt_map != 0);
 464         type = cms->cms_cm->cm_tmap[type].cmt_map;
 465 
 466         ctf_dprintf("Trying to add member %s to %d\n", name, cms->cms_id);
 467         return (ctf_add_member(cms->cms_cm->cm_out, cms->cms_id, name,
 468             type, offset) == CTF_ERR);
 469 }
 470 
 471 /*
 472  * During the first pass, we always add the generic structure and union but none
 473  * of its members as they might not all have been mapped yet. Instead we just
 474  * mark all structures and unions as needing to be fixed up.
 475  */
 476 static int
 477 ctf_merge_add_sou(ctf_merge_types_t *cmp, ctf_id_t id, boolean_t forward)
 478 {
 479         int flags, kind;
 480         const ctf_type_t *tp;
 481         const char *name;
 482         ctf_id_t suid;
 483 
 484         tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
 485         name = ctf_strraw(cmp->cm_src, tp->ctt_name);
 486         if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
 487                 flags = CTF_ADD_ROOT;
 488         else
 489                 flags = CTF_ADD_NONROOT;
 490         kind = ctf_type_kind(cmp->cm_src, id);
 491 
 492         if (kind == CTF_K_STRUCT)
 493                 suid = ctf_add_struct(cmp->cm_out, flags, name);
 494         else
 495                 suid = ctf_add_union(cmp->cm_out, flags, name);
 496 
 497         if (suid == CTF_ERR)
 498                 return (suid);
 499 
 500         /*
 501          * If this is a forward reference then its mapping should already
 502          * exist.
 503          */
 504         if (forward == B_FALSE) {
 505                 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
 506                 cmp->cm_tmap[id].cmt_map = suid;
 507                 ctf_dprintf("added sou \"%s\" as (%d) %d->%d\n", name, kind, id,
 508                     suid);
 509         } else {
 510                 VERIFY(cmp->cm_tmap[id].cmt_map == suid);
 511         }
 512         cmp->cm_tmap[id].cmt_fixup = B_TRUE;
 513 
 514         return (0);
 515 }
 516 
 517 static int
 518 ctf_merge_add_type(ctf_merge_types_t *cmp, ctf_id_t id)
 519 {
 520         int kind, ret;
 521 
 522         /*
 523          * We may end up evaluating a type more than once as we may deal with it
 524          * as we recursively evaluate some kind of reference and then we may see
 525          * it normally.
 526          */
 527         if (cmp->cm_tmap[id].cmt_map != 0)
 528                 return (0);
 529 
 530         kind = ctf_type_kind(cmp->cm_src, id);
 531         switch (kind) {
 532         case CTF_K_INTEGER:
 533         case CTF_K_FLOAT:
 534                 ret = ctf_merge_add_number(cmp, id);
 535                 break;
 536         case CTF_K_ARRAY:
 537                 ret = ctf_merge_add_array(cmp, id);
 538                 break;
 539         case CTF_K_POINTER:
 540         case CTF_K_VOLATILE:
 541         case CTF_K_CONST:
 542         case CTF_K_RESTRICT:
 543                 ret = ctf_merge_add_reftype(cmp, id);
 544                 break;
 545         case CTF_K_TYPEDEF:
 546                 ret = ctf_merge_add_typedef(cmp, id);
 547                 break;
 548         case CTF_K_ENUM:
 549                 ret = ctf_merge_add_enum(cmp, id);
 550                 break;
 551         case CTF_K_FUNCTION:
 552                 ret = ctf_merge_add_func(cmp, id);
 553                 break;
 554         case CTF_K_FORWARD:
 555                 ret = ctf_merge_add_forward(cmp, id);
 556                 break;
 557         case CTF_K_STRUCT:
 558         case CTF_K_UNION:
 559                 ret = ctf_merge_add_sou(cmp, id, B_FALSE);
 560                 break;
 561         case CTF_K_UNKNOWN:
 562                 /*
 563                  * We don't add unknown types, and we later assert that nothing
 564                  * should reference them.
 565                  */
 566                 return (0);
 567         default:
 568                 abort();
 569         }
 570 
 571         return (ret);
 572 }
 573 
 574 static int
 575 ctf_merge_fixup_sou(ctf_merge_types_t *cmp, ctf_id_t id)
 576 {
 577         ctf_dtdef_t *dtd;
 578         ctf_merge_su_t cms;
 579         ctf_id_t mapid;
 580         ssize_t size;
 581 
 582         mapid = cmp->cm_tmap[id].cmt_map;
 583         VERIFY(mapid != 0);
 584         dtd = ctf_dtd_lookup(cmp->cm_out, mapid);
 585         VERIFY(dtd != NULL);
 586 
 587         ctf_dprintf("Trying to fix up sou %d\n", id);
 588         cms.cms_cm = cmp;
 589         cms.cms_id = mapid;
 590         if (ctf_member_iter(cmp->cm_src, id, ctf_merge_add_member, &cms) != 0)
 591                 return (CTF_ERR);
 592 
 593         if ((size = ctf_type_size(cmp->cm_src, id)) == CTF_ERR)
 594                 return (CTF_ERR);
 595         if (ctf_set_size(cmp->cm_out, mapid, size) == CTF_ERR)
 596                 return (CTF_ERR);
 597 
 598         return (0);
 599 }
 600 
 601 static int
 602 ctf_merge_fixup_type(ctf_merge_types_t *cmp, ctf_id_t id)
 603 {
 604         int kind, ret;
 605 
 606         kind = ctf_type_kind(cmp->cm_src, id);
 607         switch (kind) {
 608         case CTF_K_STRUCT:
 609         case CTF_K_UNION:
 610                 ret = ctf_merge_fixup_sou(cmp, id);
 611                 break;
 612         default:
 613                 VERIFY(0);
 614                 ret = CTF_ERR;
 615         }
 616 
 617         return (ret);
 618 }
 619 
 620 /*
 621  * Now that we've successfully merged everything, we're going to remap the type
 622  * table.
 623  *
 624  * Remember we have two containers: ->cm_src is what we're working from, and
 625  * ->cm_out is where we are building the de-duplicated CTF.
 626  *
 627  * The index of this table is always the type IDs in ->cm_src.
 628  *
 629  * When we built this table originally in ctf_diff_self(), if we found a novel
 630  * type, we marked it as .cmt_missing to indicate it needs adding to ->cm_out.
 631  * Otherwise, .cmt_map indicated the ->cm_src type ID that this type duplicates.
 632  *
 633  * Then, in ctf_merge_common(), we walked through and added all "cmt_missing"
 634  * types to ->cm_out with ctf_merge_add_type(). These routines update cmt_map
 635  * to be the *new* type ID in ->cm_out.  In this function, you can read
 636  * "cmt_missing" as meaning "added to ->cm_out, and cmt_map updated".
 637  *
 638  * So at this point, we need to mop up all types where .cmt_missing == B_FALSE,
 639  * making sure *their* .cmt_map values also point to the ->cm_out container.
 640  */
 641 static void
 642 ctf_merge_dedup_remap(ctf_merge_types_t *cmp)
 643 {
 644         int i;
 645 
 646         for (i = 1; i < cmp->cm_src->ctf_typemax + 1; i++) {
 647                 ctf_id_t tid;
 648 
 649                 if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
 650                         VERIFY(cmp->cm_tmap[i].cmt_map != 0);
 651                         continue;
 652                 }
 653 
 654                 tid = i;
 655                 while (cmp->cm_tmap[tid].cmt_missing == B_FALSE) {
 656                         VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
 657                         tid = cmp->cm_tmap[tid].cmt_map;
 658                 }
 659                 VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
 660                 cmp->cm_tmap[i].cmt_map = cmp->cm_tmap[tid].cmt_map;
 661         }
 662 }
 663 
 664 
 665 /*
 666  * We're going to do three passes over the containers.
 667  *
 668  * Pass 1 checks for forward references in the output container that we know
 669  * exist in the source container.
 670  *
 671  * Pass 2 adds all the missing types from the source container. As part of this
 672  * we may be adding a type as a forward reference that doesn't exist yet.
 673  * Any types that we encounter in this form, we need to add to a third pass.
 674  *
 675  * Pass 3 is the fixup pass. Here we go through and find all the types that were
 676  * missing in the first.
 677  *
 678  * Importantly, we *must* call ctf_update between the second and third pass,
 679  * otherwise several of the libctf functions will not properly find the data in
 680  * the container. If we're doing a dedup we also fix up the type mapping.
 681  */
 682 static int
 683 ctf_merge_common(ctf_merge_types_t *cmp)
 684 {
 685         int ret, i;
 686 
 687         ctf_phase_dump(cmp->cm_src, "merge-common-src", NULL);
 688         ctf_phase_dump(cmp->cm_out, "merge-common-dest", NULL);
 689 
 690         /* Pass 1 */
 691         for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
 692                 if (cmp->cm_tmap[i].cmt_forward == B_TRUE) {
 693                         ret = ctf_merge_add_sou(cmp, i, B_TRUE);
 694                         if (ret != 0) {
 695                                 return (ret);
 696                         }
 697                 }
 698         }
 699 
 700         /* Pass 2 */
 701         for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
 702                 if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
 703                         ret = ctf_merge_add_type(cmp, i);
 704                         if (ret != 0) {
 705                                 ctf_dprintf("Failed to merge type %d\n", i);
 706                                 return (ret);
 707                         }
 708                 }
 709         }
 710 
 711         ret = ctf_update(cmp->cm_out);
 712         if (ret != 0)
 713                 return (ret);
 714 
 715         if (cmp->cm_dedup == B_TRUE) {
 716                 ctf_merge_dedup_remap(cmp);
 717         }
 718 
 719         ctf_dprintf("Beginning merge pass 3\n");
 720         /* Pass 3 */
 721         for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
 722                 if (cmp->cm_tmap[i].cmt_fixup == B_TRUE) {
 723                         ret = ctf_merge_fixup_type(cmp, i);
 724                         if (ret != 0)
 725                                 return (ret);
 726                 }
 727         }
 728 
 729         return (0);
 730 }
 731 
 732 /*
 733  * Uniquification is slightly different from a stock merge. For starters, we
 734  * don't need to replace any forward references in the output. In this case
 735  * though, the types that already exist are in a parent container to the empty
 736  * output container.
 737  */
 738 static int
 739 ctf_merge_uniquify_types(ctf_merge_types_t *cmp)
 740 {
 741         int i, ret;
 742 
 743         for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
 744                 if (cmp->cm_tmap[i].cmt_missing == B_FALSE)
 745                         continue;
 746                 ret = ctf_merge_add_type(cmp, i);
 747                 if (ret != 0)
 748                         return (ret);
 749         }
 750 
 751         ret = ctf_update(cmp->cm_out);
 752         if (ret != 0)
 753                 return (ret);
 754 
 755         for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
 756                 if (cmp->cm_tmap[i].cmt_fixup == B_FALSE)
 757                         continue;
 758                 ret = ctf_merge_fixup_type(cmp, i);
 759                 if (ret != 0)
 760                         return (ret);
 761         }
 762 
 763         return (0);
 764 }
 765 
 766 static int
 767 ctf_merge_types_init(ctf_merge_types_t *cmp)
 768 {
 769         cmp->cm_tmap = ctf_alloc(sizeof (ctf_merge_tinfo_t) *
 770             (cmp->cm_src->ctf_typemax + 1));
 771         if (cmp->cm_tmap == NULL)
 772                 return (ctf_set_errno(cmp->cm_out, ENOMEM));
 773         bzero(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
 774             (cmp->cm_src->ctf_typemax + 1));
 775         return (0);
 776 }
 777 
 778 static void
 779 ctf_merge_types_fini(ctf_merge_types_t *cmp)
 780 {
 781         ctf_free(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
 782             (cmp->cm_src->ctf_typemax + 1));
 783 }
 784 
 785 /*
 786  * After performing a pass, we need to go through the object and function type
 787  * maps and potentially fix them up based on the new maps that we have.
 788  */
 789 static void
 790 ctf_merge_fixup_symmaps(ctf_merge_types_t *cmp, ctf_merge_input_t *cmi)
 791 {
 792         ctf_merge_objmap_t *cmo;
 793         ctf_merge_funcmap_t *cmf;
 794 
 795         for (cmo = list_head(&cmi->cmi_omap); cmo != NULL;
 796             cmo = list_next(&cmi->cmi_omap, cmo)) {
 797                 VERIFY3S(cmo->cmo_tid, !=, 0);
 798                 VERIFY(cmp->cm_tmap[cmo->cmo_tid].cmt_map != 0);
 799                 cmo->cmo_tid = cmp->cm_tmap[cmo->cmo_tid].cmt_map;
 800         }
 801 
 802         for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL;
 803             cmf = list_next(&cmi->cmi_fmap, cmf)) {
 804                 int i;
 805 
 806                 VERIFY(cmp->cm_tmap[cmf->cmf_rtid].cmt_map != 0);
 807                 cmf->cmf_rtid = cmp->cm_tmap[cmf->cmf_rtid].cmt_map;
 808                 for (i = 0; i < cmf->cmf_argc; i++) {
 809                         VERIFY(cmp->cm_tmap[cmf->cmf_args[i]].cmt_map != 0);
 810                         cmf->cmf_args[i] =
 811                             cmp->cm_tmap[cmf->cmf_args[i]].cmt_map;
 812                 }
 813         }
 814 }
 815 
 816 /*
 817  * Merge the types contained inside of two input files. The second input file is
 818  * always going to be the destination. We're guaranteed that it's always
 819  * writeable.
 820  */
 821 static int
 822 ctf_merge_types(void *arg, void *arg2, void **outp, void *unsued)
 823 {
 824         int ret;
 825         ctf_merge_types_t cm;
 826         ctf_diff_t *cdp;
 827         ctf_merge_input_t *scmi = arg;
 828         ctf_merge_input_t *dcmi = arg2;
 829         ctf_file_t *out = dcmi->cmi_input;
 830         ctf_file_t *source = scmi->cmi_input;
 831 
 832         ctf_dprintf("merging %p->%p\n", source, out);
 833 
 834         if (!(out->ctf_flags & LCTF_RDWR))
 835                 return (ctf_set_errno(out, ECTF_RDONLY));
 836 
 837         if (ctf_getmodel(out) != ctf_getmodel(source))
 838                 return (ctf_set_errno(out, ECTF_DMODEL));
 839 
 840         if ((ret = ctf_diff_init(out, source, &cdp)) != 0)
 841                 return (ret);
 842 
 843         cm.cm_out = out;
 844         cm.cm_src = source;
 845         cm.cm_dedup = B_FALSE;
 846         cm.cm_unique = B_FALSE;
 847         ret = ctf_merge_types_init(&cm);
 848         if (ret != 0) {
 849                 ctf_diff_fini(cdp);
 850                 return (ctf_set_errno(out, ret));
 851         }
 852 
 853         ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
 854         if (ret != 0)
 855                 goto cleanup;
 856         ret = ctf_merge_common(&cm);
 857         ctf_dprintf("merge common returned with %d\n", ret);
 858         if (ret == 0) {
 859                 ret = ctf_update(out);
 860                 ctf_dprintf("update returned with %d\n", ret);
 861         } else {
 862                 goto cleanup;
 863         }
 864 
 865         /*
 866          * Now we need to fix up the object and function maps.
 867          */
 868         ctf_merge_fixup_symmaps(&cm, scmi);
 869 
 870         /*
 871          * Now that we've fixed things up, we need to give our function and
 872          * object maps to the destination, such that it can continue to update
 873          * them going forward.
 874          */
 875         list_move_tail(&dcmi->cmi_fmap, &scmi->cmi_fmap);
 876         list_move_tail(&dcmi->cmi_omap, &scmi->cmi_omap);
 877 
 878 cleanup:
 879         if (ret == 0)
 880                 *outp = dcmi;
 881         ctf_merge_types_fini(&cm);
 882         ctf_diff_fini(cdp);
 883         if (ret != 0)
 884                 return (ctf_errno(out));
 885         ctf_phase_bump();
 886         return (0);
 887 }
 888 
 889 static int
 890 ctf_uniquify_types(ctf_merge_t *cmh, ctf_file_t *src, ctf_file_t **outp)
 891 {
 892         int err, ret;
 893         ctf_file_t *out;
 894         ctf_merge_types_t cm;
 895         ctf_diff_t *cdp;
 896         ctf_merge_input_t *cmi;
 897         ctf_file_t *parent = cmh->cmh_unique;
 898 
 899         *outp = NULL;
 900         out = ctf_fdcreate(cmh->cmh_ofd, &err);
 901         if (out == NULL)
 902                 return (ctf_set_errno(src, err));
 903 
 904         out->ctf_parname = cmh->cmh_pname;
 905         if (ctf_setmodel(out, ctf_getmodel(parent)) != 0) {
 906                 (void) ctf_set_errno(src, ctf_errno(out));
 907                 ctf_close(out);
 908                 return (CTF_ERR);
 909         }
 910 
 911         if (ctf_import(out, parent) != 0) {
 912                 (void) ctf_set_errno(src, ctf_errno(out));
 913                 ctf_close(out);
 914                 return (CTF_ERR);
 915         }
 916 
 917         if ((ret = ctf_diff_init(parent, src, &cdp)) != 0) {
 918                 ctf_close(out);
 919                 return (ctf_set_errno(src, ctf_errno(parent)));
 920         }
 921 
 922         cm.cm_out = parent;
 923         cm.cm_src = src;
 924         cm.cm_dedup = B_FALSE;
 925         cm.cm_unique = B_TRUE;
 926         ret = ctf_merge_types_init(&cm);
 927         if (ret != 0) {
 928                 ctf_close(out);
 929                 ctf_diff_fini(cdp);
 930                 return (ctf_set_errno(src, ret));
 931         }
 932 
 933         ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
 934         if (ret == 0) {
 935                 cm.cm_out = out;
 936                 ret = ctf_merge_uniquify_types(&cm);
 937                 if (ret == 0)
 938                         ret = ctf_update(out);
 939         }
 940 
 941         if (ret != 0) {
 942                 ctf_merge_types_fini(&cm);
 943                 ctf_diff_fini(cdp);
 944                 return (ctf_set_errno(src, ctf_errno(cm.cm_out)));
 945         }
 946 
 947         for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
 948             cmi = list_next(&cmh->cmh_inputs, cmi)) {
 949                 ctf_merge_fixup_symmaps(&cm, cmi);
 950         }
 951 
 952         ctf_merge_types_fini(&cm);
 953         ctf_diff_fini(cdp);
 954         *outp = out;
 955         return (0);
 956 }
 957 
 958 static void
 959 ctf_merge_fini_input(ctf_merge_input_t *cmi)
 960 {
 961         ctf_merge_objmap_t *cmo;
 962         ctf_merge_funcmap_t *cmf;
 963 
 964         while ((cmo = list_remove_head(&cmi->cmi_omap)) != NULL)
 965                 ctf_free(cmo, sizeof (ctf_merge_objmap_t));
 966 
 967         while ((cmf = list_remove_head(&cmi->cmi_fmap)) != NULL)
 968                 ctf_free(cmf, sizeof (ctf_merge_funcmap_t) +
 969                     sizeof (ctf_id_t) * cmf->cmf_argc);
 970 
 971         if (cmi->cmi_created == B_TRUE && cmi->cmi_input != NULL)
 972                 ctf_close(cmi->cmi_input);
 973 
 974         ctf_free(cmi, sizeof (ctf_merge_input_t));
 975 }
 976 
 977 void
 978 ctf_merge_fini(ctf_merge_t *cmh)
 979 {
 980         size_t len;
 981         ctf_merge_input_t *cmi;
 982 
 983         if (cmh->cmh_label != NULL) {
 984                 len = strlen(cmh->cmh_label) + 1;
 985                 ctf_free(cmh->cmh_label, len);
 986         }
 987 
 988         if (cmh->cmh_pname != NULL) {
 989                 len = strlen(cmh->cmh_pname) + 1;
 990                 ctf_free(cmh->cmh_pname, len);
 991         }
 992 
 993         while ((cmi = list_remove_head(&cmh->cmh_inputs)) != NULL)
 994                 ctf_merge_fini_input(cmi);
 995 
 996         ctf_free(cmh, sizeof (ctf_merge_t));
 997 }
 998 
 999 ctf_merge_t *
1000 ctf_merge_init(int fd, int *errp)
1001 {
1002         int err;
1003         ctf_merge_t *out;
1004         struct stat st;
1005 
1006         if (errp == NULL)
1007                 errp = &err;
1008 
1009         if (fd != -1 && fstat(fd, &st) != 0) {
1010                 *errp = EINVAL;
1011                 return (NULL);
1012         }
1013 
1014         out = ctf_alloc(sizeof (ctf_merge_t));
1015         if (out == NULL) {
1016                 *errp = ENOMEM;
1017                 return (NULL);
1018         }
1019 
1020         if (fd == -1) {
1021                 out->cmh_msyms = B_FALSE;
1022         } else {
1023                 out->cmh_msyms = B_TRUE;
1024         }
1025 
1026         list_create(&out->cmh_inputs, sizeof (ctf_merge_input_t),
1027             offsetof(ctf_merge_input_t, cmi_node));
1028         out->cmh_ninputs = 0;
1029         out->cmh_nthreads = 1;
1030         out->cmh_unique = NULL;
1031         out->cmh_ofd = fd;
1032         out->cmh_flags = 0;
1033         out->cmh_label = NULL;
1034         out->cmh_pname = NULL;
1035 
1036         return (out);
1037 }
1038 
1039 int
1040 ctf_merge_label(ctf_merge_t *cmh, const char *label)
1041 {
1042         char *dup;
1043 
1044         if (label == NULL)
1045                 return (EINVAL);
1046 
1047         dup = ctf_strdup(label);
1048         if (dup == NULL)
1049                 return (EAGAIN);
1050 
1051         if (cmh->cmh_label != NULL) {
1052                 size_t len = strlen(cmh->cmh_label) + 1;
1053                 ctf_free(cmh->cmh_label, len);
1054         }
1055 
1056         cmh->cmh_label = dup;
1057         return (0);
1058 }
1059 
1060 static int
1061 ctf_merge_add_function(ctf_merge_input_t *cmi, ctf_funcinfo_t *fip, ulong_t idx,
1062     const char *file, const char *name, const Elf64_Sym *symp)
1063 {
1064         ctf_merge_funcmap_t *fmap;
1065 
1066         fmap = ctf_alloc(sizeof (ctf_merge_funcmap_t) +
1067             sizeof (ctf_id_t) * fip->ctc_argc);
1068         if (fmap == NULL)
1069                 return (ENOMEM);
1070 
1071         fmap->cmf_idx = idx;
1072         fmap->cmf_sym = *symp;
1073         fmap->cmf_rtid = fip->ctc_return;
1074         fmap->cmf_flags = fip->ctc_flags;
1075         fmap->cmf_argc = fip->ctc_argc;
1076         fmap->cmf_name = name;
1077         if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
1078                 fmap->cmf_file = file;
1079         } else {
1080                 fmap->cmf_file = NULL;
1081         }
1082 
1083         if (ctf_func_args(cmi->cmi_input, idx, fmap->cmf_argc,
1084             fmap->cmf_args) != 0) {
1085                 ctf_free(fmap, sizeof (ctf_merge_funcmap_t) +
1086                     sizeof (ctf_id_t) * fip->ctc_argc);
1087                 return (ctf_errno(cmi->cmi_input));
1088         }
1089 
1090         ctf_dprintf("added initial function %s, %lu, %s %u\n", name, idx,
1091             fmap->cmf_file != NULL ? fmap->cmf_file : "global",
1092             ELF64_ST_BIND(symp->st_info));
1093         list_insert_tail(&cmi->cmi_fmap, fmap);
1094         return (0);
1095 }
1096 
1097 static int
1098 ctf_merge_add_object(ctf_merge_input_t *cmi, ctf_id_t id, ulong_t idx,
1099     const char *file, const char *name, const Elf64_Sym *symp)
1100 {
1101         ctf_merge_objmap_t *cmo;
1102 
1103         cmo = ctf_alloc(sizeof (ctf_merge_objmap_t));
1104         if (cmo == NULL)
1105                 return (ENOMEM);
1106 
1107         cmo->cmo_name = name;
1108         if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
1109                 cmo->cmo_file = file;
1110         } else {
1111                 cmo->cmo_file = NULL;
1112         }
1113         cmo->cmo_idx = idx;
1114         cmo->cmo_tid = id;
1115         cmo->cmo_sym = *symp;
1116         list_insert_tail(&cmi->cmi_omap, cmo);
1117 
1118         ctf_dprintf("added initial object %s, %lu, %ld, %s\n", name, idx, id,
1119             cmo->cmo_file != NULL ? cmo->cmo_file : "global");
1120 
1121         return (0);
1122 }
1123 
1124 static int
1125 ctf_merge_add_symbol(const Elf64_Sym *symp, ulong_t idx, const char *file,
1126     const char *name, boolean_t primary, void *arg)
1127 {
1128         ctf_merge_input_t *cmi = arg;
1129         ctf_file_t *fp = cmi->cmi_input;
1130         ushort_t *data, funcbase;
1131         uint_t type;
1132         ctf_funcinfo_t fi;
1133 
1134         /*
1135          * See if there is type information for this. If there is no
1136          * type information for this entry or no translation, then we
1137          * will find the value zero. This indicates no type ID for
1138          * objects and encodes unknown information for functions.
1139          */
1140         if (fp->ctf_sxlate[idx] == -1u)
1141                 return (0);
1142         data = (ushort_t *)((uintptr_t)fp->ctf_buf + fp->ctf_sxlate[idx]);
1143         if (*data == 0)
1144                 return (0);
1145 
1146         type = ELF64_ST_TYPE(symp->st_info);
1147 
1148         switch (type) {
1149         case STT_FUNC:
1150                 funcbase = *data;
1151                 if (LCTF_INFO_KIND(fp, funcbase) != CTF_K_FUNCTION)
1152                         return (0);
1153                 data++;
1154                 fi.ctc_return = *data;
1155                 data++;
1156                 fi.ctc_argc = LCTF_INFO_VLEN(fp, funcbase);
1157                 fi.ctc_flags = 0;
1158 
1159                 if (fi.ctc_argc != 0 && data[fi.ctc_argc - 1] == 0) {
1160                         fi.ctc_flags |= CTF_FUNC_VARARG;
1161                         fi.ctc_argc--;
1162                 }
1163                 return (ctf_merge_add_function(cmi, &fi, idx, file, name,
1164                     symp));
1165         case STT_OBJECT:
1166                 return (ctf_merge_add_object(cmi, *data, idx, file, name,
1167                     symp));
1168         default:
1169                 return (0);
1170         }
1171 }
1172 
1173 /*
1174  * Whenever we create an entry to merge, we then go and add a second empty
1175  * ctf_file_t which we use for the purposes of our merging. It's not the best,
1176  * but it's the best that we've got at the moment.
1177  */
1178 int
1179 ctf_merge_add(ctf_merge_t *cmh, ctf_file_t *input)
1180 {
1181         int ret;
1182         ctf_merge_input_t *cmi;
1183         ctf_file_t *empty;
1184 
1185         ctf_dprintf("adding input %p\n", input);
1186 
1187         if (input->ctf_flags & LCTF_CHILD)
1188                 return (ECTF_MCHILD);
1189 
1190         cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1191         if (cmi == NULL)
1192                 return (ENOMEM);
1193 
1194         cmi->cmi_created = B_FALSE;
1195         cmi->cmi_input = input;
1196         list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
1197             offsetof(ctf_merge_funcmap_t, cmf_node));
1198         list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
1199             offsetof(ctf_merge_objmap_t, cmo_node));
1200 
1201         if (cmh->cmh_msyms == B_TRUE) {
1202                 if ((ret = ctf_symtab_iter(input, ctf_merge_add_symbol,
1203                     cmi)) != 0) {
1204                         ctf_merge_fini_input(cmi);
1205                         return (ret);
1206                 }
1207         }
1208 
1209         list_insert_tail(&cmh->cmh_inputs, cmi);
1210         cmh->cmh_ninputs++;
1211 
1212         /* And now the empty one to merge into this */
1213         cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1214         if (cmi == NULL)
1215                 return (ENOMEM);
1216         list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
1217             offsetof(ctf_merge_funcmap_t, cmf_node));
1218         list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
1219             offsetof(ctf_merge_objmap_t, cmo_node));
1220 
1221         empty = ctf_fdcreate(cmh->cmh_ofd, &ret);
1222         if (empty == NULL)
1223                 return (ret);
1224         cmi->cmi_input = empty;
1225         cmi->cmi_created = B_TRUE;
1226 
1227         if (ctf_setmodel(empty, ctf_getmodel(input)) == CTF_ERR) {
1228                 return (ctf_errno(empty));
1229         }
1230 
1231         list_insert_tail(&cmh->cmh_inputs, cmi);
1232         cmh->cmh_ninputs++;
1233         ctf_dprintf("added containers %p and %p\n", input, empty);
1234         return (0);
1235 }
1236 
1237 int
1238 ctf_merge_uniquify(ctf_merge_t *cmh, ctf_file_t *u, const char *pname)
1239 {
1240         char *dup;
1241 
1242         if (u->ctf_flags & LCTF_CHILD)
1243                 return (ECTF_MCHILD);
1244         if (pname == NULL)
1245                 return (EINVAL);
1246         dup = ctf_strdup(pname);
1247         if (dup == NULL)
1248                 return (EINVAL);
1249         if (cmh->cmh_pname != NULL) {
1250                 size_t len = strlen(cmh->cmh_pname) + 1;
1251                 ctf_free(cmh->cmh_pname, len);
1252         }
1253         cmh->cmh_pname = dup;
1254         cmh->cmh_unique = u;
1255         return (0);
1256 }
1257 
1258 /*
1259  * Symbol matching rules: the purpose of this is to verify that the type
1260  * information that we have for a given symbol actually matches the output
1261  * symbol. This is unfortunately complicated by several different factors:
1262  *
1263  * 1. When merging multiple .o's into a single item, the symbol table index will
1264  * not match.
1265  *
1266  * 2. Visibility of a symbol may not be identical to the object file or the
1267  * DWARF information due to symbol reduction via a mapfile.
1268  *
1269  * As such, we have to employ the following rules:
1270  *
1271  * 1. A global symbol table entry always matches a global CTF symbol with the
1272  * same name.
1273  *
1274  * 2. A local symbol table entry always matches a local CTF symbol if they have
1275  * the same name and they belong to the same file.
1276  *
1277  * 3. A weak symbol matches a non-weak symbol. This happens if we find that the
1278  * types match, the values match, the sizes match, and the section indexes
1279  * match. This happens when we do a conversion in one pass, it almost never
1280  * happens when we're merging multiple object files. If we match a CTF global
1281  * symbol, that's a fixed match, otherwise it's a fuzzy match.
1282  *
1283  * 4. A local symbol table entry matches a global CTF entry if the
1284  * other pieces fail, but they have the same name. This is considered a fuzzy
1285  * match and is not used unless we have no other options.
1286  *
1287  * 5. A weak symbol table entry matches a weak CTF entry if the other pieces
1288  * fail, but they have the same name. This is considered a fuzzy match and is
1289  * not used unless we have no other options. When merging independent .o files,
1290  * this is often the only recourse we have to matching weak symbols.
1291  *
1292  * In the end, this would all be much simpler if we were able to do this as part
1293  * of libld which would be able to do all the symbol transformations.
1294  */
1295 static boolean_t
1296 ctf_merge_symbol_match(const char *ctf_file, const char *ctf_name,
1297     const Elf64_Sym *ctf_symp, const char *symtab_file, const char *symtab_name,
1298     const Elf64_Sym *symtab_symp, boolean_t *is_fuzzy)
1299 {
1300         *is_fuzzy = B_FALSE;
1301         uint_t symtab_bind, ctf_bind;
1302 
1303         symtab_bind = ELF64_ST_BIND(symtab_symp->st_info);
1304         ctf_bind = ELF64_ST_BIND(ctf_symp->st_info);
1305 
1306         ctf_dprintf("comparing merge match for %s/%s/%u->%s/%s/%u\n",
1307             symtab_file, symtab_name, symtab_bind,
1308             ctf_file, ctf_name, ctf_bind);
1309         if (strcmp(ctf_name, symtab_name) != 0) {
1310                 return (B_FALSE);
1311         }
1312 
1313         if (symtab_bind == STB_GLOBAL && ctf_bind == STB_GLOBAL) {
1314                 return (B_TRUE);
1315         } else if (symtab_bind == STB_GLOBAL) {
1316                 return (B_FALSE);
1317         }
1318 
1319         if (ctf_bind == STB_LOCAL && ctf_bind == symtab_bind &&
1320             ctf_file != NULL && symtab_file != NULL &&
1321             strcmp(ctf_file, symtab_file) == 0) {
1322                 return (B_TRUE);
1323         }
1324 
1325         if (symtab_bind == STB_WEAK && ctf_bind != STB_WEAK &&
1326             ELF64_ST_TYPE(symtab_symp->st_info) ==
1327             ELF64_ST_TYPE(ctf_symp->st_info) &&
1328             symtab_symp->st_value == ctf_symp->st_value &&
1329             symtab_symp->st_size == ctf_symp->st_size &&
1330             symtab_symp->st_shndx == ctf_symp->st_shndx) {
1331                 if (ctf_bind == STB_GLOBAL) {
1332                         return (B_TRUE);
1333                 }
1334 
1335                 if (ctf_bind == STB_LOCAL && ctf_file != NULL &&
1336                     symtab_file != NULL && strcmp(ctf_file, symtab_file) == 0) {
1337                         *is_fuzzy = B_TRUE;
1338                         return (B_TRUE);
1339                 }
1340         }
1341 
1342         if (ctf_bind == STB_GLOBAL ||
1343             (ctf_bind == STB_WEAK && symtab_bind == STB_WEAK)) {
1344                 *is_fuzzy = B_TRUE;
1345                 return (B_TRUE);
1346         }
1347 
1348         return (B_FALSE);
1349 }
1350 
1351 /*
1352  * For each symbol, try and find a match. We will attempt to find an exact
1353  * match; however, we will settle for a fuzzy match in general. There is one
1354  * case where we will not opt to use a fuzzy match, which is when performing the
1355  * deduplication of a container. In such a case we are trying to reduce common
1356  * types and a fuzzy match would be inappropriate as if we're in the context of
1357  * a single container, the conversion process should have identified any exact
1358  * or fuzzy matches that were required.
1359  */
1360 static int
1361 ctf_merge_symbols(const Elf64_Sym *symp, ulong_t idx, const char *file,
1362     const char *name, boolean_t primary, void *arg)
1363 {
1364         int err;
1365         uint_t type, bind;
1366         ctf_merge_symbol_arg_t *csa = arg;
1367         ctf_file_t *fp = csa->cmsa_out;
1368 
1369         type = ELF64_ST_TYPE(symp->st_info);
1370         bind = ELF64_ST_BIND(symp->st_info);
1371 
1372         ctf_dprintf("Trying to find match for %s/%s/%u\n", file, name,
1373             ELF64_ST_BIND(symp->st_info));
1374 
1375         if (type == STT_OBJECT) {
1376                 ctf_merge_objmap_t *cmo, *match = NULL;
1377 
1378                 for (cmo = list_head(csa->cmsa_objmap); cmo != NULL;
1379                     cmo = list_next(csa->cmsa_objmap, cmo)) {
1380                         boolean_t is_fuzzy = B_FALSE;
1381                         if (ctf_merge_symbol_match(cmo->cmo_file, cmo->cmo_name,
1382                             &cmo->cmo_sym, file, name, symp, &is_fuzzy)) {
1383                                 if (is_fuzzy && csa->cmsa_dedup &&
1384                                     bind != STB_WEAK) {
1385                                         continue;
1386                                 }
1387                                 match = cmo;
1388                                 if (is_fuzzy) {
1389                                         continue;
1390                                 }
1391                                 break;
1392                         }
1393                 }
1394 
1395                 if (match == NULL) {
1396                         return (0);
1397                 }
1398 
1399                 if ((err = ctf_add_object(fp, idx, match->cmo_tid)) != 0) {
1400                         ctf_dprintf("Failed to add symbol %s->%d: %s\n", name,
1401                             match->cmo_tid, ctf_errmsg(ctf_errno(fp)));
1402                         return (ctf_errno(fp));
1403                 }
1404                 ctf_dprintf("mapped object into output %s/%s->%ld\n", file,
1405                     name, match->cmo_tid);
1406         } else {
1407                 ctf_merge_funcmap_t *cmf, *match = NULL;
1408                 ctf_funcinfo_t fi;
1409 
1410                 for (cmf = list_head(csa->cmsa_funcmap); cmf != NULL;
1411                     cmf = list_next(csa->cmsa_funcmap, cmf)) {
1412                         boolean_t is_fuzzy = B_FALSE;
1413                         if (ctf_merge_symbol_match(cmf->cmf_file, cmf->cmf_name,
1414                             &cmf->cmf_sym, file, name, symp, &is_fuzzy)) {
1415                                 if (is_fuzzy && csa->cmsa_dedup &&
1416                                     bind != STB_WEAK) {
1417                                         continue;
1418                                 }
1419                                 match = cmf;
1420                                 if (is_fuzzy) {
1421                                         continue;
1422                                 }
1423                                 break;
1424                         }
1425                 }
1426 
1427                 if (match == NULL) {
1428                         return (0);
1429                 }
1430 
1431                 fi.ctc_return = match->cmf_rtid;
1432                 fi.ctc_argc = match->cmf_argc;
1433                 fi.ctc_flags = match->cmf_flags;
1434                 if ((err = ctf_add_function(fp, idx, &fi, match->cmf_args)) !=
1435                     0) {
1436                         ctf_dprintf("Failed to add function %s: %s\n", name,
1437                             ctf_errmsg(ctf_errno(fp)));
1438                         return (ctf_errno(fp));
1439                 }
1440                 ctf_dprintf("mapped function into output %s/%s\n", file,
1441                     name);
1442         }
1443 
1444         return (0);
1445 }
1446 
1447 int
1448 ctf_merge_merge(ctf_merge_t *cmh, ctf_file_t **outp)
1449 {
1450         int err, merr;
1451         ctf_merge_input_t *cmi;
1452         ctf_id_t ltype;
1453         mergeq_t *mqp;
1454         ctf_merge_input_t *final;
1455         ctf_file_t *out;
1456 
1457         ctf_dprintf("Beginning ctf_merge_merge()\n");
1458         if (cmh->cmh_label != NULL && cmh->cmh_unique != NULL) {
1459                 const char *label = ctf_label_topmost(cmh->cmh_unique);
1460                 if (label == NULL)
1461                         return (ECTF_NOLABEL);
1462                 if (strcmp(label, cmh->cmh_label) != 0)
1463                         return (ECTF_LCONFLICT);
1464         }
1465 
1466         if (mergeq_init(&mqp, cmh->cmh_nthreads) == -1) {
1467                 return (errno);
1468         }
1469 
1470         VERIFY(cmh->cmh_ninputs % 2 == 0);
1471         for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
1472             cmi = list_next(&cmh->cmh_inputs, cmi)) {
1473                 if (mergeq_add(mqp, cmi) == -1) {
1474                         err = errno;
1475                         mergeq_fini(mqp);
1476                 }
1477         }
1478 
1479         err = mergeq_merge(mqp, ctf_merge_types, NULL, (void **)&final, &merr);
1480         mergeq_fini(mqp);
1481 
1482         if (err == MERGEQ_ERROR) {
1483                 return (errno);
1484         } else if (err == MERGEQ_UERROR) {
1485                 return (merr);
1486         }
1487 
1488         /*
1489          * Disassociate the generated ctf_file_t from the original input. That
1490          * way when the input gets cleaned up, we don't accidentally kill the
1491          * final reference to the ctf_file_t. If it gets uniquified then we'll
1492          * kill it.
1493          */
1494         VERIFY(final->cmi_input != NULL);
1495         out = final->cmi_input;
1496         final->cmi_input = NULL;
1497 
1498         ctf_dprintf("preparing to uniquify against: %p\n", cmh->cmh_unique);
1499         if (cmh->cmh_unique != NULL) {
1500                 ctf_file_t *u;
1501                 err = ctf_uniquify_types(cmh, out, &u);
1502                 if (err != 0) {
1503                         err = ctf_errno(out);
1504                         ctf_close(out);
1505                         return (err);
1506                 }
1507                 ctf_close(out);
1508                 out = u;
1509         }
1510 
1511         ltype = out->ctf_typemax;
1512         if ((out->ctf_flags & LCTF_CHILD) && ltype != 0)
1513                 ltype += CTF_CHILD_START;
1514         ctf_dprintf("trying to add the label\n");
1515         if (cmh->cmh_label != NULL &&
1516             ctf_add_label(out, cmh->cmh_label, ltype, 0) != 0) {
1517                 ctf_close(out);
1518                 return (ctf_errno(out));
1519         }
1520 
1521         ctf_dprintf("merging symbols and the like\n");
1522         if (cmh->cmh_msyms == B_TRUE) {
1523                 ctf_merge_symbol_arg_t arg;
1524                 arg.cmsa_objmap = &final->cmi_omap;
1525                 arg.cmsa_funcmap = &final->cmi_fmap;
1526                 arg.cmsa_out = out;
1527                 arg.cmsa_dedup = B_FALSE;
1528                 err = ctf_symtab_iter(out, ctf_merge_symbols, &arg);
1529                 if (err != 0) {
1530                         ctf_close(out);
1531                         return (err);
1532                 }
1533         }
1534 
1535         err = ctf_update(out);
1536         if (err != 0) {
1537                 err = ctf_errno(out);
1538                 ctf_close(out);
1539                 return (err);
1540         }
1541 
1542         *outp = out;
1543         return (0);
1544 }
1545 
1546 /*
1547  * When we get told that something is unique, eg. same is B_FALSE, then that
1548  * tells us that we need to add it to the output. If same is B_TRUE, then we'll
1549  * want to record it in the mapping table so that we know how to redirect types
1550  * to the extant ones.
1551  */
1552 static void
1553 ctf_dedup_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
1554     ctf_id_t oid, void *arg)
1555 {
1556         ctf_merge_types_t *cmp = arg;
1557         ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
1558 
1559         if (same == B_TRUE) {
1560                 /*
1561                  * The output id here may itself map to something else.
1562                  * Therefore, we need to basically walk a chain and see what it
1563                  * points to until it itself points to a base type, eg. -1.
1564                  * Otherwise we'll dedup to something which no longer exists.
1565                  */
1566                 while (cmt[oid].cmt_missing == B_FALSE)
1567                         oid = cmt[oid].cmt_map;
1568                 cmt[iid].cmt_map = oid;
1569                 ctf_dprintf("%d->%d \n", iid, oid);
1570         } else {
1571                 VERIFY(cmt[iid].cmt_map == 0);
1572                 cmt[iid].cmt_missing = B_TRUE;
1573                 ctf_dprintf("%d is missing\n", iid);
1574         }
1575 }
1576 
1577 /*
1578  * Dedup a CTF container.
1579  *
1580  * DWARF and other encoding formats that we use to create CTF data may create
1581  * multiple copies of a given type. However, after doing a conversion, and
1582  * before doing a merge, we'd prefer, if possible, to have every input container
1583  * to be unique.
1584  *
1585  * Doing a deduplication is like a normal merge. However, when we diff the types
1586  * in the container, rather than doing a normal diff, we instead want to diff
1587  * against any already processed types. eg, for a given type i in a container,
1588  * we want to diff it from 0 to i - 1.
1589  */
1590 int
1591 ctf_merge_dedup(ctf_merge_t *cmp, ctf_file_t **outp)
1592 {
1593         int ret;
1594         ctf_diff_t *cdp = NULL;
1595         ctf_merge_input_t *cmi, *cmc;
1596         ctf_file_t *ifp, *ofp;
1597         ctf_merge_types_t cm;
1598 
1599         if (cmp == NULL || outp == NULL)
1600                 return (EINVAL);
1601 
1602         ctf_dprintf("encountered %d inputs\n", cmp->cmh_ninputs);
1603         if (cmp->cmh_ninputs != 2)
1604                 return (EINVAL);
1605 
1606         ctf_dprintf("passed argument sanity check\n");
1607 
1608         cmi = list_head(&cmp->cmh_inputs);
1609         VERIFY(cmi != NULL);
1610         cmc = list_next(&cmp->cmh_inputs, cmi);
1611         VERIFY(cmc != NULL);
1612         ifp = cmi->cmi_input;
1613         ofp = cmc->cmi_input;
1614         VERIFY(ifp != NULL);
1615         VERIFY(ofp != NULL);
1616         cm.cm_src = ifp;
1617         cm.cm_out = ofp;
1618         cm.cm_dedup = B_TRUE;
1619         cm.cm_unique = B_FALSE;
1620 
1621         if ((ret = ctf_merge_types_init(&cm)) != 0) {
1622                 return (ret);
1623         }
1624 
1625         if ((ret = ctf_diff_init(ifp, ifp, &cdp)) != 0)
1626                 goto err;
1627 
1628         ctf_dprintf("Successfully initialized dedup\n");
1629         if ((ret = ctf_diff_self(cdp, ctf_dedup_cb, &cm)) != 0)
1630                 goto err;
1631 
1632         ctf_dprintf("Successfully diffed types\n");
1633         ret = ctf_merge_common(&cm);
1634         ctf_dprintf("deduping types result: %d\n", ret);
1635         if (ret == 0)
1636                 ret = ctf_update(cm.cm_out);
1637         if (ret != 0)
1638                 goto err;
1639 
1640         ctf_dprintf("Successfully deduped types\n");
1641         ctf_phase_dump(cm.cm_out, "dedup-pre-syms", NULL);
1642 
1643         /*
1644          * Now we need to fix up the object and function maps.
1645          */
1646         ctf_merge_fixup_symmaps(&cm, cmi);
1647 
1648         if (cmp->cmh_msyms == B_TRUE) {
1649                 ctf_merge_symbol_arg_t arg;
1650                 arg.cmsa_objmap = &cmi->cmi_omap;
1651                 arg.cmsa_funcmap = &cmi->cmi_fmap;
1652                 arg.cmsa_out = cm.cm_out;
1653                 arg.cmsa_dedup = B_TRUE;
1654                 ret = ctf_symtab_iter(cm.cm_out, ctf_merge_symbols, &arg);
1655                 if (ret != 0) {
1656                         ctf_dprintf("failed to dedup symbols: %s\n",
1657                             ctf_errmsg(ret));
1658                         goto err;
1659                 }
1660         }
1661 
1662         ret = ctf_update(cm.cm_out);
1663         if (ret == 0) {
1664                 cmc->cmi_input = NULL;
1665                 *outp = cm.cm_out;
1666         }
1667         ctf_phase_dump(cm.cm_out, "dedup-post-syms", NULL);
1668 err:
1669         ctf_merge_types_fini(&cm);
1670         ctf_diff_fini(cdp);
1671         return (ret);
1672 }
1673 
1674 int
1675 ctf_merge_set_nthreads(ctf_merge_t *cmp, const uint_t nthrs)
1676 {
1677         if (nthrs == 0)
1678                 return (EINVAL);
1679         cmp->cmh_nthreads = nthrs;
1680         return (0);
1681 }