1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2012 Jason King.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 /*
  31  * Copyright 2020 Joyent, Inc.
  32  */
  33 
  34 /*
  35  * CTF DWARF conversion theory.
  36  *
  37  * DWARF data contains a series of compilation units. Each compilation unit
  38  * generally refers to an object file or what once was, in the case of linked
  39  * binaries and shared objects. Each compilation unit has a series of what DWARF
  40  * calls a DIE (Debugging Information Entry). The set of entries that we care
  41  * about have type information stored in a series of attributes. Each DIE also
  42  * has a tag that identifies the kind of attributes that it has.
  43  *
  44  * A given DIE may itself have children. For example, a DIE that represents a
  45  * structure has children which represent members. Whenever we encounter a DIE
  46  * that has children or other values or types associated with it, we recursively
  47  * process those children first so that way we can then refer to the generated
  48  * CTF type id while processing its parent. This reduces the amount of unknowns
  49  * and fixups that we need. It also ensures that we don't accidentally add types
  50  * that an overzealous compiler might add to the DWARF data but aren't used by
  51  * anything in the system.
  52  *
  53  * Once we do a conversion, we store a mapping in an AVL tree that goes from the
  54  * DWARF's die offset, which is relative to the given compilation unit, to a
  55  * ctf_id_t.
  56  *
  57  * Unfortunately, some compilers actually will emit duplicate entries for a
  58  * given type that look similar, but aren't quite. To that end, we go through
  59  * and do a variant on a merge once we're done processing a single compilation
  60  * unit which deduplicates all of the types that are in the unit.
  61  *
  62  * Finally, if we encounter an object that has multiple compilation units, then
  63  * we'll convert all of the compilation units separately and then do a merge, so
  64  * that way we can result in one single ctf_file_t that represents everything
  65  * for the object.
  66  *
  67  * Conversion Steps
  68  * ----------------
  69  *
  70  * Because a given object we've been given to convert may have multiple
  71  * compilation units, we break the work into two halves. The first half
  72  * processes each compilation unit (potentially in parallel) and then the second
  73  * half optionally merges all of the dies in the first half. First, we'll cover
  74  * what's involved in converting a single ctf_cu_t's dwarf to CTF. This covers
  75  * the work done in ctf_dwarf_convert_one().
  76  *
  77  * An individual ctf_cu_t, which represents a compilation unit, is converted to
  78  * CTF in a series of multiple passes.
  79  *
  80  * Pass 1: During the first pass we walk all of the top-level dies and if we
  81  * find a function, variable, struct, union, enum or typedef, we recursively
  82  * transform all of its types. We don't recurse or process everything, because
  83  * we don't want to add some of the types that compilers may add which are
  84  * effectively unused.
  85  *
  86  * During pass 1, if we encounter any structures or unions we mark them for
  87  * fixing up later. This is necessary because we may not be able to determine
  88  * the full size of a structure at the beginning of time. This will happen if
  89  * the DWARF attribute DW_AT_byte_size is not present for a member. Because of
  90  * this possibility we defer adding members to structures or even converting
  91  * them during pass 1 and save that for pass 2. Adding all of the base
  92  * structures without any of their members helps deal with any circular
  93  * dependencies that we might encounter.
  94  *
  95  * Pass 2: This pass is used to do the first half of fixing up structures and
  96  * unions. Rather than walk the entire type space again, we actually walk the
  97  * list of structures and unions that we marked for later fixing up. Here, we
  98  * iterate over every structure and add members to the underlying ctf_file_t,
  99  * but not to the structs themselves. One might wonder why we don't, and the
 100  * main reason is that libctf requires a ctf_update() be done before adding the
 101  * members to structures or unions.
 102  *
 103  * Pass 3: This pass is used to do the second half of fixing up structures and
 104  * unions. During this part we always go through and add members to structures
 105  * and unions that we added to the container in the previous pass. In addition,
 106  * we set the structure and union's actual size, which may have additional
 107  * padding added by the compiler, it isn't simply the last offset. DWARF always
 108  * guarantees an attribute exists for this. Importantly no ctf_id_t's change
 109  * during pass 2.
 110  *
 111  * Pass 4: The next phase is to add CTF entries for all of the symbols and
 112  * variables that are present in this die. During pass 1 we added entries to a
 113  * map for each variable and function. During this pass, we iterate over the
 114  * symbol table and when we encounter a symbol that we have in our lists of
 115  * translated information which matches, we then add it to the ctf_file_t.
 116  *
 117  * Pass 5: Here we go and look for any weak symbols and functions and see if
 118  * they match anything that we recognize. If so, then we add type information
 119  * for them at this point based on the matching type.
 120  *
 121  * Pass 6: This pass is actually a variant on a merge. The traditional merge
 122  * process expects there to be no duplicate types. As such, at the end of
 123  * conversion, we do a dedup on all of the types in the system. The
 124  * deduplication process is described in lib/libctf/common/ctf_merge.c.
 125  *
 126  * Once pass 6 is done, we've finished processing the individual compilation
 127  * unit.
 128  *
 129  * The following steps reflect the general process of doing a conversion.
 130  *
 131  * 1) Walk the dwarf section and determine the number of compilation units
 132  * 2) Create a ctf_cu_t for each compilation unit
 133  * 3) Add all ctf_cu_t's to a workq
 134  * 4) Have the workq process each die with ctf_dwarf_convert_one. This itself
 135  *    is comprised of several steps, which were already enumerated.
 136  * 5) If we have multiple cu's, we do a ctf merge of all the dies. The mechanics
 137  *    of the merge are discussed in lib/libctf/common/ctf_merge.c.
 138  * 6) Free everything up and return a ctf_file_t to the user. If we only had a
 139  *    single compilation unit, then we give that to the user. Otherwise, we
 140  *    return the merged ctf_file_t.
 141  *
 142  * Threading
 143  * ---------
 144  *
 145  * The process has been designed to be amenable to threading. Each compilation
 146  * unit has its own type stream, therefore the logical place to divide and
 147  * conquer is at the compilation unit. Each ctf_cu_t has been built to be able
 148  * to be processed independently of the others. It has its own libdwarf handle,
 149  * as a given libdwarf handle may only be used by a single thread at a time.
 150  * This allows the various ctf_cu_t's to be processed in parallel by different
 151  * threads.
 152  *
 153  * All of the ctf_cu_t's are loaded into a workq which allows for a number of
 154  * threads to be specified and used as a thread pool to process all of the
 155  * queued work. We set the number of threads to use in the workq equal to the
 156  * number of threads that the user has specified.
 157  *
 158  * After all of the compilation units have been drained, we use the same number
 159  * of threads when performing a merge of multiple compilation units, if they
 160  * exist.
 161  *
 162  * While all of these different parts do support and allow for multiple threads,
 163  * it's important that when only a single thread is specified, that it be the
 164  * calling thread. This allows the conversion routines to be used in a context
 165  * that doesn't allow additional threads, such as rtld.
 166  *
 167  * Common DWARF Mechanics and Notes
 168  * --------------------------------
 169  *
 170  * At this time, we really only support DWARFv2, though support for DWARFv4 is
 171  * mostly there. There is no intent to support DWARFv3.
 172  *
 173  * Generally types for something are stored in the DW_AT_type attribute. For
 174  * example, a function's return type will be stored in the local DW_AT_type
 175  * attribute while the arguments will be in child DIEs. There are also various
 176  * times when we don't have any DW_AT_type. In that case, the lack of a type
 177  * implies, at least for C, that its C type is void. Because DWARF doesn't emit
 178  * one, we have a synthetic void type that we create and manipulate instead and
 179  * pass it off to consumers on an as-needed basis. If nothing has a void type,
 180  * it will not be emitted.
 181  *
 182  * Architecture Specific Parts
 183  * ---------------------------
 184  *
 185  * The CTF tooling encodes various information about the various architectures
 186  * in the system. Importantly, the tool assumes that every architecture has a
 187  * data model where long and pointer are the same size. This is currently the
 188  * case, as the two data models illumos supports are ILP32 and LP64.
 189  *
 190  * In addition, we encode the mapping of various floating point sizes to various
 191  * types for each architecture. If a new architecture is being added, it should
 192  * be added to the list. The general design of the ctf conversion tools is to be
 193  * architecture independent. eg. any of the tools here should be able to convert
 194  * any architecture's DWARF into ctf; however, this has not been rigorously
 195  * tested and more importantly, the ctf routines don't currently write out the
 196  * data in an endian-aware form, they only use that of the currently running
 197  * library.
 198  */
 199 
 200 #include <libctf_impl.h>
 201 #include <sys/avl.h>
 202 #include <sys/debug.h>
 203 #include <gelf.h>
 204 #include <libdwarf.h>
 205 #include <dwarf.h>
 206 #include <libgen.h>
 207 #include <workq.h>
 208 #include <errno.h>
 209 
 210 #define DWARF_VERSION_TWO       2
 211 #define DWARF_VARARGS_NAME      "..."
 212 
 213 /*
 214  * Dwarf may refer recursively to other types that we've already processed. To
 215  * see if we've already converted them, we look them up in an AVL tree that's
 216  * sorted by the DWARF id.
 217  */
 218 typedef struct ctf_dwmap {
 219         avl_node_t      cdm_avl;
 220         Dwarf_Off       cdm_off;
 221         Dwarf_Die       cdm_die;
 222         ctf_id_t        cdm_id;
 223         boolean_t       cdm_fix;
 224 } ctf_dwmap_t;
 225 
 226 typedef struct ctf_dwvar {
 227         ctf_list_t      cdv_list;
 228         char            *cdv_name;
 229         ctf_id_t        cdv_type;
 230         boolean_t       cdv_global;
 231 } ctf_dwvar_t;
 232 
 233 typedef struct ctf_dwfunc {
 234         ctf_list_t      cdf_list;
 235         char            *cdf_name;
 236         ctf_funcinfo_t  cdf_fip;
 237         ctf_id_t        *cdf_argv;
 238         boolean_t       cdf_global;
 239 } ctf_dwfunc_t;
 240 
 241 typedef struct ctf_dwbitf {
 242         ctf_list_t      cdb_list;
 243         ctf_id_t        cdb_base;
 244         uint_t          cdb_nbits;
 245         ctf_id_t        cdb_id;
 246 } ctf_dwbitf_t;
 247 
 248 /*
 249  * The ctf_cu_t represents a single top-level DWARF die unit. While generally,
 250  * the typical object file has only a single die, if we're asked to convert
 251  * something that's been linked from multiple sources, multiple dies will exist.
 252  */
 253 typedef struct ctf_die {
 254         Elf             *cu_elf;        /* shared libelf handle */
 255         char            *cu_name;       /* basename of the DIE */
 256         ctf_merge_t     *cu_cmh;        /* merge handle */
 257         ctf_list_t      cu_vars;        /* List of variables */
 258         ctf_list_t      cu_funcs;       /* List of functions */
 259         ctf_list_t      cu_bitfields;   /* Bit field members */
 260         Dwarf_Debug     cu_dwarf;       /* libdwarf handle */
 261         Dwarf_Die       cu_cu;          /* libdwarf compilation unit */
 262         Dwarf_Off       cu_cuoff;       /* cu's offset */
 263         Dwarf_Off       cu_maxoff;      /* maximum offset */
 264         ctf_file_t      *cu_ctfp;       /* output CTF file */
 265         avl_tree_t      cu_map;         /* map die offsets to CTF types */
 266         char            *cu_errbuf;     /* error message buffer */
 267         size_t          cu_errlen;      /* error message buffer length */
 268         size_t          cu_ptrsz;       /* object's pointer size */
 269         boolean_t       cu_bigend;      /* is it big endian */
 270         boolean_t       cu_doweaks;     /* should we convert weak symbols? */
 271         uint_t          cu_mach;        /* machine type */
 272         ctf_id_t        cu_voidtid;     /* void pointer */
 273         ctf_id_t        cu_longtid;     /* id for a 'long' */
 274 } ctf_cu_t;
 275 
 276 static int ctf_dwarf_offset(ctf_cu_t *, Dwarf_Die, Dwarf_Off *);
 277 static int ctf_dwarf_convert_die(ctf_cu_t *, Dwarf_Die);
 278 static int ctf_dwarf_convert_type(ctf_cu_t *, Dwarf_Die, ctf_id_t *, int);
 279 
 280 static int ctf_dwarf_function_count(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
 281     boolean_t);
 282 static int ctf_dwarf_convert_fargs(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
 283     ctf_id_t *);
 284 
 285 /*
 286  * This is a generic way to set a CTF Conversion backend error depending on what
 287  * we were doing. Unless it was one of a specific set of errors that don't
 288  * indicate a programming / translation bug, eg. ENOMEM, then we transform it
 289  * into a CTF backend error and fill in the error buffer.
 290  */
 291 static int
 292 ctf_dwarf_error(ctf_cu_t *cup, ctf_file_t *cfp, int err, const char *fmt, ...)
 293 {
 294         va_list ap;
 295         int ret;
 296         size_t off = 0;
 297         ssize_t rem = cup->cu_errlen;
 298         if (cfp != NULL)
 299                 err = ctf_errno(cfp);
 300 
 301         if (err == ENOMEM)
 302                 return (err);
 303 
 304         ret = snprintf(cup->cu_errbuf, rem, "die %s: ", cup->cu_name);
 305         if (ret < 0)
 306                 goto err;
 307         off += ret;
 308         rem = MAX(rem - ret, 0);
 309 
 310         va_start(ap, fmt);
 311         ret = vsnprintf(cup->cu_errbuf + off, rem, fmt, ap);
 312         va_end(ap);
 313         if (ret < 0)
 314                 goto err;
 315 
 316         off += ret;
 317         rem = MAX(rem - ret, 0);
 318         if (fmt[strlen(fmt) - 1] != '\n') {
 319                 (void) snprintf(cup->cu_errbuf + off, rem,
 320                     ": %s\n", ctf_errmsg(err));
 321         }
 322         va_end(ap);
 323         return (ECTF_CONVBKERR);
 324 
 325 err:
 326         cup->cu_errbuf[0] = '\0';
 327         return (ECTF_CONVBKERR);
 328 }
 329 
 330 /*
 331  * DWARF often opts to put no explicit type to describe a void type. eg. if we
 332  * have a reference type whose DW_AT_type member doesn't exist, then we should
 333  * instead assume it points to void. Because this isn't represented, we
 334  * instead cause it to come into existence.
 335  */
 336 static ctf_id_t
 337 ctf_dwarf_void(ctf_cu_t *cup)
 338 {
 339         if (cup->cu_voidtid == CTF_ERR) {
 340                 ctf_encoding_t enc = { CTF_INT_SIGNED, 0, 0 };
 341                 cup->cu_voidtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_ROOT,
 342                     "void", &enc);
 343                 if (cup->cu_voidtid == CTF_ERR) {
 344                         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 345                             "failed to create void type: %s\n",
 346                             ctf_errmsg(ctf_errno(cup->cu_ctfp)));
 347                 }
 348         }
 349 
 350         return (cup->cu_voidtid);
 351 }
 352 
 353 /*
 354  * There are many different forms that an array index may take. However, we just
 355  * always force it to be of a type long no matter what. Therefore we use this to
 356  * have a single instance of long across everything.
 357  */
 358 static ctf_id_t
 359 ctf_dwarf_long(ctf_cu_t *cup)
 360 {
 361         if (cup->cu_longtid == CTF_ERR) {
 362                 ctf_encoding_t enc;
 363 
 364                 enc.cte_format = CTF_INT_SIGNED;
 365                 enc.cte_offset = 0;
 366                 /* All illumos systems are LP */
 367                 enc.cte_bits = cup->cu_ptrsz * 8;
 368                 cup->cu_longtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT,
 369                     "long", &enc);
 370                 if (cup->cu_longtid == CTF_ERR) {
 371                         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 372                             "failed to create long type: %s\n",
 373                             ctf_errmsg(ctf_errno(cup->cu_ctfp)));
 374                 }
 375 
 376         }
 377 
 378         return (cup->cu_longtid);
 379 }
 380 
 381 static int
 382 ctf_dwmap_comp(const void *a, const void *b)
 383 {
 384         const ctf_dwmap_t *ca = a;
 385         const ctf_dwmap_t *cb = b;
 386 
 387         if (ca->cdm_off > cb->cdm_off)
 388                 return (1);
 389         if (ca->cdm_off < cb->cdm_off)
 390                 return (-1);
 391         return (0);
 392 }
 393 
 394 static int
 395 ctf_dwmap_add(ctf_cu_t *cup, ctf_id_t id, Dwarf_Die die, boolean_t fix)
 396 {
 397         int ret;
 398         avl_index_t index;
 399         ctf_dwmap_t *dwmap;
 400         Dwarf_Off off;
 401 
 402         VERIFY(id > 0 && id < CTF_MAX_TYPE);
 403 
 404         if ((ret = ctf_dwarf_offset(cup, die, &off)) != 0)
 405                 return (ret);
 406 
 407         if ((dwmap = ctf_alloc(sizeof (ctf_dwmap_t))) == NULL)
 408                 return (ENOMEM);
 409 
 410         dwmap->cdm_die = die;
 411         dwmap->cdm_off = off;
 412         dwmap->cdm_id = id;
 413         dwmap->cdm_fix = fix;
 414 
 415         ctf_dprintf("dwmap: %p %" DW_PR_DUx "->%d\n", dwmap, off, id);
 416         VERIFY(avl_find(&cup->cu_map, dwmap, &index) == NULL);
 417         avl_insert(&cup->cu_map, dwmap, index);
 418         return (0);
 419 }
 420 
 421 static int
 422 ctf_dwarf_attribute(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
 423     Dwarf_Attribute *attrp)
 424 {
 425         int ret;
 426         Dwarf_Error derr;
 427 
 428         if ((ret = dwarf_attr(die, name, attrp, &derr)) == DW_DLV_OK)
 429                 return (0);
 430         if (ret == DW_DLV_NO_ENTRY) {
 431                 *attrp = NULL;
 432                 return (ENOENT);
 433         }
 434         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 435             "failed to get attribute for type: %s\n",
 436             dwarf_errmsg(derr));
 437         return (ECTF_CONVBKERR);
 438 }
 439 
 440 static int
 441 ctf_dwarf_ref(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, Dwarf_Off *refp)
 442 {
 443         int ret;
 444         Dwarf_Attribute attr;
 445         Dwarf_Error derr;
 446 
 447         if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
 448                 return (ret);
 449 
 450         if (dwarf_formref(attr, refp, &derr) == DW_DLV_OK) {
 451                 dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
 452                 return (0);
 453         }
 454 
 455         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 456             "failed to get unsigned attribute for type: %s\n",
 457             dwarf_errmsg(derr));
 458         return (ECTF_CONVBKERR);
 459 }
 460 
 461 static int
 462 ctf_dwarf_refdie(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
 463     Dwarf_Die *diep)
 464 {
 465         int ret;
 466         Dwarf_Off off;
 467         Dwarf_Error derr;
 468 
 469         if ((ret = ctf_dwarf_ref(cup, die, name, &off)) != 0)
 470                 return (ret);
 471 
 472         off += cup->cu_cuoff;
 473         if ((ret = dwarf_offdie(cup->cu_dwarf, off, diep, &derr)) !=
 474             DW_DLV_OK) {
 475                 (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 476                     "failed to get die from offset %" DW_PR_DUu ": %s\n",
 477                     off, dwarf_errmsg(derr));
 478                 return (ECTF_CONVBKERR);
 479         }
 480 
 481         return (0);
 482 }
 483 
 484 static int
 485 ctf_dwarf_signed(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
 486     Dwarf_Signed *valp)
 487 {
 488         int ret;
 489         Dwarf_Attribute attr;
 490         Dwarf_Error derr;
 491 
 492         if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
 493                 return (ret);
 494 
 495         if (dwarf_formsdata(attr, valp, &derr) == DW_DLV_OK) {
 496                 dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
 497                 return (0);
 498         }
 499 
 500         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 501             "failed to get unsigned attribute for type: %s\n",
 502             dwarf_errmsg(derr));
 503         return (ECTF_CONVBKERR);
 504 }
 505 
 506 static int
 507 ctf_dwarf_unsigned(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
 508     Dwarf_Unsigned *valp)
 509 {
 510         int ret;
 511         Dwarf_Attribute attr;
 512         Dwarf_Error derr;
 513 
 514         if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
 515                 return (ret);
 516 
 517         if (dwarf_formudata(attr, valp, &derr) == DW_DLV_OK) {
 518                 dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
 519                 return (0);
 520         }
 521 
 522         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 523             "failed to get unsigned attribute for type: %s\n",
 524             dwarf_errmsg(derr));
 525         return (ECTF_CONVBKERR);
 526 }
 527 
 528 static int
 529 ctf_dwarf_boolean(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
 530     Dwarf_Bool *val)
 531 {
 532         int ret;
 533         Dwarf_Attribute attr;
 534         Dwarf_Error derr;
 535 
 536         if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
 537                 return (ret);
 538 
 539         if (dwarf_formflag(attr, val, &derr) == DW_DLV_OK) {
 540                 dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
 541                 return (0);
 542         }
 543 
 544         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 545             "failed to get boolean attribute for type: %s\n",
 546             dwarf_errmsg(derr));
 547 
 548         return (ECTF_CONVBKERR);
 549 }
 550 
 551 static int
 552 ctf_dwarf_string(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, char **strp)
 553 {
 554         int ret;
 555         char *s;
 556         Dwarf_Attribute attr;
 557         Dwarf_Error derr;
 558 
 559         *strp = NULL;
 560         if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
 561                 return (ret);
 562 
 563         if (dwarf_formstring(attr, &s, &derr) == DW_DLV_OK) {
 564                 if ((*strp = ctf_strdup(s)) == NULL)
 565                         ret = ENOMEM;
 566                 else
 567                         ret = 0;
 568                 dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
 569                 return (ret);
 570         }
 571 
 572         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 573             "failed to get string attribute for type: %s\n",
 574             dwarf_errmsg(derr));
 575         return (ECTF_CONVBKERR);
 576 }
 577 
 578 static int
 579 ctf_dwarf_member_location(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Unsigned *valp)
 580 {
 581         int ret;
 582         Dwarf_Error derr;
 583         Dwarf_Attribute attr;
 584         Dwarf_Locdesc *loc;
 585         Dwarf_Signed locnum;
 586 
 587         if ((ret = ctf_dwarf_attribute(cup, die, DW_AT_data_member_location,
 588             &attr)) != 0)
 589                 return (ret);
 590 
 591         if (dwarf_loclist(attr, &loc, &locnum, &derr) != DW_DLV_OK) {
 592                 (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 593                     "failed to obtain location list for member offset: %s",
 594                     dwarf_errmsg(derr));
 595                 dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
 596                 return (ECTF_CONVBKERR);
 597         }
 598         dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
 599 
 600         if (locnum != 1 || loc->ld_s->lr_atom != DW_OP_plus_uconst) {
 601                 (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 602                     "failed to parse location structure for member");
 603                 dwarf_dealloc(cup->cu_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
 604                 dwarf_dealloc(cup->cu_dwarf, loc, DW_DLA_LOCDESC);
 605                 return (ECTF_CONVBKERR);
 606         }
 607 
 608         *valp = loc->ld_s->lr_number;
 609 
 610         dwarf_dealloc(cup->cu_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
 611         dwarf_dealloc(cup->cu_dwarf, loc, DW_DLA_LOCDESC);
 612         return (0);
 613 }
 614 
 615 
 616 static int
 617 ctf_dwarf_offset(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Off *offsetp)
 618 {
 619         Dwarf_Error derr;
 620 
 621         if (dwarf_dieoffset(die, offsetp, &derr) == DW_DLV_OK)
 622                 return (0);
 623 
 624         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 625             "failed to get die offset: %s\n",
 626             dwarf_errmsg(derr));
 627         return (ECTF_CONVBKERR);
 628 }
 629 
 630 /* simpler variant for debugging output */
 631 static Dwarf_Off
 632 ctf_die_offset(Dwarf_Die die)
 633 {
 634         Dwarf_Off off = -1;
 635         Dwarf_Error derr;
 636 
 637         (void) dwarf_dieoffset(die, &off, &derr);
 638         return (off);
 639 }
 640 
 641 static int
 642 ctf_dwarf_tag(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half *tagp)
 643 {
 644         Dwarf_Error derr;
 645 
 646         if (dwarf_tag(die, tagp, &derr) == DW_DLV_OK)
 647                 return (0);
 648 
 649         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 650             "failed to get tag type: %s\n",
 651             dwarf_errmsg(derr));
 652         return (ECTF_CONVBKERR);
 653 }
 654 
 655 static int
 656 ctf_dwarf_sib(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *sibp)
 657 {
 658         Dwarf_Error derr;
 659         int ret;
 660 
 661         *sibp = NULL;
 662         ret = dwarf_siblingof(cup->cu_dwarf, base, sibp, &derr);
 663         if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
 664                 return (0);
 665 
 666         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 667             "failed to sibling from die: %s\n",
 668             dwarf_errmsg(derr));
 669         return (ECTF_CONVBKERR);
 670 }
 671 
 672 static int
 673 ctf_dwarf_child(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *childp)
 674 {
 675         Dwarf_Error derr;
 676         int ret;
 677 
 678         *childp = NULL;
 679         ret = dwarf_child(base, childp, &derr);
 680         if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
 681                 return (0);
 682 
 683         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 684             "failed to child from die: %s\n",
 685             dwarf_errmsg(derr));
 686         return (ECTF_CONVBKERR);
 687 }
 688 
 689 /*
 690  * Compilers disagree on what to do to determine if something has global
 691  * visiblity. Traditionally gcc has used DW_AT_external to indicate this while
 692  * Studio has used DW_AT_visibility. We check DW_AT_visibility first and then
 693  * fall back to DW_AT_external. Lack of DW_AT_external implies that it is not.
 694  */
 695 static int
 696 ctf_dwarf_isglobal(ctf_cu_t *cup, Dwarf_Die die, boolean_t *igp)
 697 {
 698         int ret;
 699         Dwarf_Signed vis;
 700         Dwarf_Bool ext;
 701 
 702         if ((ret = ctf_dwarf_signed(cup, die, DW_AT_visibility, &vis)) == 0) {
 703                 *igp = vis == DW_VIS_exported;
 704                 return (0);
 705         } else if (ret != ENOENT) {
 706                 return (ret);
 707         }
 708 
 709         if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_external, &ext)) != 0) {
 710                 if (ret == ENOENT) {
 711                         *igp = B_FALSE;
 712                         return (0);
 713                 }
 714                 return (ret);
 715         }
 716         *igp = ext != 0 ? B_TRUE : B_FALSE;
 717         return (0);
 718 }
 719 
 720 static int
 721 ctf_dwarf_die_elfenc(Elf *elf, ctf_cu_t *cup, char *errbuf, size_t errlen)
 722 {
 723         GElf_Ehdr ehdr;
 724 
 725         if (gelf_getehdr(elf, &ehdr) == NULL) {
 726                 (void) snprintf(errbuf, errlen,
 727                     "failed to get ELF header: %s\n",
 728                     elf_errmsg(elf_errno()));
 729                 return (ECTF_CONVBKERR);
 730         }
 731 
 732         cup->cu_mach = ehdr.e_machine;
 733 
 734         if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
 735                 cup->cu_ptrsz = 4;
 736                 VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_ILP32) == 0);
 737         } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
 738                 cup->cu_ptrsz = 8;
 739                 VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_LP64) == 0);
 740         } else {
 741                 (void) snprintf(errbuf, errlen,
 742                     "unknown ELF class %d", ehdr.e_ident[EI_CLASS]);
 743                 return (ECTF_CONVBKERR);
 744         }
 745 
 746         if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB) {
 747                 cup->cu_bigend = B_FALSE;
 748         } else if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) {
 749                 cup->cu_bigend = B_TRUE;
 750         } else {
 751                 (void) snprintf(errbuf, errlen,
 752                     "unknown ELF data encoding: %hhu", ehdr.e_ident[EI_DATA]);
 753                 return (ECTF_CONVBKERR);
 754         }
 755 
 756         return (0);
 757 }
 758 
 759 typedef struct ctf_dwarf_fpent {
 760         size_t  cdfe_size;
 761         uint_t  cdfe_enc[3];
 762 } ctf_dwarf_fpent_t;
 763 
 764 typedef struct ctf_dwarf_fpmap {
 765         uint_t                  cdf_mach;
 766         ctf_dwarf_fpent_t       cdf_ents[4];
 767 } ctf_dwarf_fpmap_t;
 768 
 769 static const ctf_dwarf_fpmap_t ctf_dwarf_fpmaps[] = {
 770         { EM_SPARC, {
 771                 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
 772                 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
 773                 { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
 774                 { 0, { 0 } }
 775         } },
 776         { EM_SPARC32PLUS, {
 777                 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
 778                 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
 779                 { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
 780                 { 0, { 0 } }
 781         } },
 782         { EM_SPARCV9, {
 783                 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
 784                 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
 785                 { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
 786                 { 0, { 0 } }
 787         } },
 788         { EM_386, {
 789                 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
 790                 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
 791                 { 12, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
 792                 { 0, { 0 } }
 793         } },
 794         { EM_X86_64, {
 795                 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
 796                 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
 797                 { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
 798                 { 0, { 0 } }
 799         } },
 800         { EM_NONE }
 801 };
 802 
 803 static int
 804 ctf_dwarf_float_base(ctf_cu_t *cup, Dwarf_Signed type, ctf_encoding_t *enc)
 805 {
 806         const ctf_dwarf_fpmap_t *map = &ctf_dwarf_fpmaps[0];
 807         const ctf_dwarf_fpent_t *ent;
 808         uint_t col = 0, mult = 1;
 809 
 810         for (map = &ctf_dwarf_fpmaps[0]; map->cdf_mach != EM_NONE; map++) {
 811                 if (map->cdf_mach == cup->cu_mach)
 812                         break;
 813         }
 814 
 815         if (map->cdf_mach == EM_NONE) {
 816                 (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 817                     "Unsupported machine type: %d\n", cup->cu_mach);
 818                 return (ENOTSUP);
 819         }
 820 
 821         if (type == DW_ATE_complex_float) {
 822                 mult = 2;
 823                 col = 1;
 824         } else if (type == DW_ATE_imaginary_float ||
 825             type == DW_ATE_SUN_imaginary_float) {
 826                 col = 2;
 827         }
 828 
 829         ent = &map->cdf_ents[0];
 830         for (ent = &map->cdf_ents[0]; ent->cdfe_size != 0; ent++) {
 831                 if (ent->cdfe_size * mult * 8 == enc->cte_bits) {
 832                         enc->cte_format = ent->cdfe_enc[col];
 833                         return (0);
 834                 }
 835         }
 836 
 837         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 838             "failed to find valid fp mapping for encoding %d, size %d bits\n",
 839             type, enc->cte_bits);
 840         return (EINVAL);
 841 }
 842 
 843 static int
 844 ctf_dwarf_dwarf_base(ctf_cu_t *cup, Dwarf_Die die, int *kindp,
 845     ctf_encoding_t *enc)
 846 {
 847         int ret;
 848         Dwarf_Signed type;
 849 
 850         if ((ret = ctf_dwarf_signed(cup, die, DW_AT_encoding, &type)) != 0)
 851                 return (ret);
 852 
 853         switch (type) {
 854         case DW_ATE_unsigned:
 855         case DW_ATE_address:
 856                 *kindp = CTF_K_INTEGER;
 857                 enc->cte_format = 0;
 858                 break;
 859         case DW_ATE_unsigned_char:
 860                 *kindp = CTF_K_INTEGER;
 861                 enc->cte_format = CTF_INT_CHAR;
 862                 break;
 863         case DW_ATE_signed:
 864                 *kindp = CTF_K_INTEGER;
 865                 enc->cte_format = CTF_INT_SIGNED;
 866                 break;
 867         case DW_ATE_signed_char:
 868                 *kindp = CTF_K_INTEGER;
 869                 enc->cte_format = CTF_INT_SIGNED | CTF_INT_CHAR;
 870                 break;
 871         case DW_ATE_boolean:
 872                 *kindp = CTF_K_INTEGER;
 873                 enc->cte_format = CTF_INT_SIGNED | CTF_INT_BOOL;
 874                 break;
 875         case DW_ATE_float:
 876         case DW_ATE_complex_float:
 877         case DW_ATE_imaginary_float:
 878         case DW_ATE_SUN_imaginary_float:
 879         case DW_ATE_SUN_interval_float:
 880                 *kindp = CTF_K_FLOAT;
 881                 if ((ret = ctf_dwarf_float_base(cup, type, enc)) != 0)
 882                         return (ret);
 883                 break;
 884         default:
 885                 (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
 886                     "encountered unknown DWARF encoding: %d", type);
 887                 return (ECTF_CONVBKERR);
 888         }
 889 
 890         return (0);
 891 }
 892 
 893 /*
 894  * Different compilers (at least GCC and Studio) use different names for types.
 895  * This parses the types and attempts to unify them. If this fails, we just fall
 896  * back to using the DWARF itself.
 897  */
 898 static int
 899 ctf_dwarf_parse_base(const char *name, int *kindp, ctf_encoding_t *enc,
 900     char **newnamep)
 901 {
 902         char buf[256];
 903         char *base, *c, *last;
 904         int nlong = 0, nshort = 0, nchar = 0, nint = 0;
 905         int sign = 1;
 906 
 907         if (strlen(name) + 1 > sizeof (buf))
 908                 return (EINVAL);
 909 
 910         (void) strlcpy(buf, name, sizeof (buf));
 911         for (c = strtok_r(buf, " ", &last); c != NULL;
 912             c = strtok_r(NULL, " ", &last)) {
 913                 if (strcmp(c, "signed") == 0) {
 914                         sign = 1;
 915                 } else if (strcmp(c, "unsigned") == 0) {
 916                         sign = 0;
 917                 } else if (strcmp(c, "long") == 0) {
 918                         nlong++;
 919                 } else if (strcmp(c, "char") == 0) {
 920                         nchar++;
 921                 } else if (strcmp(c, "short") == 0) {
 922                         nshort++;
 923                 } else if (strcmp(c, "int") == 0) {
 924                         nint++;
 925                 } else {
 926                         /*
 927                          * If we don't recognize any of the tokens, we'll tell
 928                          * the caller to fall back to the dwarf-provided
 929                          * encoding information.
 930                          */
 931                         return (EINVAL);
 932                 }
 933         }
 934 
 935         if (nchar > 1 || nshort > 1 || nint > 1 || nlong > 2)
 936                 return (EINVAL);
 937 
 938         if (nchar > 0) {
 939                 if (nlong > 0 || nshort > 0 || nint > 0)
 940                         return (EINVAL);
 941                 base = "char";
 942         } else if (nshort > 0) {
 943                 if (nlong > 0)
 944                         return (EINVAL);
 945                 base = "short";
 946         } else if (nlong > 0) {
 947                 base = "long";
 948         } else {
 949                 base = "int";
 950         }
 951 
 952         if (nchar > 0)
 953                 enc->cte_format = CTF_INT_CHAR;
 954         else
 955                 enc->cte_format = 0;
 956 
 957         if (sign > 0)
 958                 enc->cte_format |= CTF_INT_SIGNED;
 959 
 960         (void) snprintf(buf, sizeof (buf), "%s%s%s",
 961             (sign ? "" : "unsigned "),
 962             (nlong > 1 ? "long " : ""),
 963             base);
 964 
 965         *newnamep = ctf_strdup(buf);
 966         if (*newnamep == NULL)
 967                 return (ENOMEM);
 968         *kindp = CTF_K_INTEGER;
 969         return (0);
 970 }
 971 
 972 static int
 973 ctf_dwarf_create_base(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot,
 974     Dwarf_Off off)
 975 {
 976         int ret;
 977         char *name, *nname;
 978         Dwarf_Unsigned sz;
 979         int kind;
 980         ctf_encoding_t enc;
 981         ctf_id_t id;
 982 
 983         if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0)
 984                 return (ret);
 985         if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &sz)) != 0) {
 986                 goto out;
 987         }
 988         ctf_dprintf("Creating base type %s from off %llu, size: %d\n", name,
 989             off, sz);
 990 
 991         bzero(&enc, sizeof (ctf_encoding_t));
 992         enc.cte_bits = sz * 8;
 993         if ((ret = ctf_dwarf_parse_base(name, &kind, &enc, &nname)) == 0) {
 994                 ctf_free(name, strlen(name) + 1);
 995                 name = nname;
 996         } else {
 997                 if (ret != EINVAL)
 998                         return (ret);
 999                 ctf_dprintf("falling back to dwarf for base type %s\n", name);
1000                 if ((ret = ctf_dwarf_dwarf_base(cup, die, &kind, &enc)) != 0)
1001                         return (ret);
1002         }
1003 
1004         id = ctf_add_encoded(cup->cu_ctfp, isroot, name, &enc, kind);
1005         if (id == CTF_ERR) {
1006                 ret = ctf_errno(cup->cu_ctfp);
1007         } else {
1008                 *idp = id;
1009                 ret = ctf_dwmap_add(cup, id, die, B_FALSE);
1010         }
1011 out:
1012         ctf_free(name, strlen(name) + 1);
1013         return (ret);
1014 }
1015 
1016 /*
1017  * Getting a member's offset is a surprisingly intricate dance. It works as
1018  * follows:
1019  *
1020  * 1) If we're in DWARFv4, then we either have a DW_AT_data_bit_offset or we
1021  * have a DW_AT_data_member_location. We won't have both. Thus we check first
1022  * for DW_AT_data_bit_offset, and if it exists, we're set.
1023  *
1024  * Next, if we have a bitfield and we don't have a DW_AT_data_bit_offset, then
1025  * we have to grab the data location and use the following dance:
1026  *
1027  * 2) Gather the set of DW_AT_byte_size, DW_AT_bit_offset, and DW_AT_bit_size.
1028  * Of course, the DW_AT_byte_size may be omitted, even though it isn't always.
1029  * When it's been omitted, we then have to say that the size is that of the
1030  * underlying type, which forces that to be after a ctf_update(). Here, we have
1031  * to do different things based on whether or not we're using big endian or
1032  * little endian to obtain the proper offset.
1033  */
1034 static int
1035 ctf_dwarf_member_offset(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t mid,
1036     ulong_t *offp)
1037 {
1038         int ret;
1039         Dwarf_Unsigned loc, bitsz, bytesz;
1040         Dwarf_Signed bitoff;
1041         size_t off;
1042         ssize_t tsz;
1043 
1044         if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_data_bit_offset,
1045             &loc)) == 0) {
1046                 *offp = loc;
1047                 return (0);
1048         } else if (ret != ENOENT) {
1049                 return (ret);
1050         }
1051 
1052         if ((ret = ctf_dwarf_member_location(cup, die, &loc)) != 0)
1053                 return (ret);
1054         off = loc * 8;
1055 
1056         if ((ret = ctf_dwarf_signed(cup, die, DW_AT_bit_offset,
1057             &bitoff)) != 0) {
1058                 if (ret != ENOENT)
1059                         return (ret);
1060                 *offp = off;
1061                 return (0);
1062         }
1063 
1064         /* At this point we have to have DW_AT_bit_size */
1065         if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0)
1066                 return (ret);
1067 
1068         if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size,
1069             &bytesz)) != 0) {
1070                 if (ret != ENOENT)
1071                         return (ret);
1072                 if ((tsz = ctf_type_size(cup->cu_ctfp, mid)) == CTF_ERR) {
1073                         int e = ctf_errno(cup->cu_ctfp);
1074                         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1075                             "failed to get type size: %s", ctf_errmsg(e));
1076                         return (ECTF_CONVBKERR);
1077                 }
1078         } else {
1079                 tsz = bytesz;
1080         }
1081         tsz *= 8;
1082         if (cup->cu_bigend == B_TRUE) {
1083                 *offp = off + bitoff;
1084         } else {
1085                 *offp = off + tsz - bitoff - bitsz;
1086         }
1087 
1088         return (0);
1089 }
1090 
1091 /*
1092  * We need to determine if the member in question is a bitfield. If it is, then
1093  * we need to go through and create a new type that's based on the actual base
1094  * type, but has a different size. We also rename the type as a result to help
1095  * deal with future collisions.
1096  *
1097  * Here we need to look and see if we have a DW_AT_bit_size value. If we have a
1098  * bit size member and it does not equal the byte size member, then we need to
1099  * create a bitfield type based on this.
1100  *
1101  * Note: When we support DWARFv4, there may be a chance that we need to also
1102  * search for the DW_AT_byte_size if we don't have a DW_AT_bit_size member.
1103  */
1104 static int
1105 ctf_dwarf_member_bitfield(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp)
1106 {
1107         int ret;
1108         Dwarf_Unsigned bitsz;
1109         ctf_encoding_t e;
1110         ctf_dwbitf_t *cdb;
1111         ctf_dtdef_t *dtd;
1112         ctf_id_t base = *idp;
1113         int kind;
1114 
1115         if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0) {
1116                 if (ret == ENOENT)
1117                         return (0);
1118                 return (ret);
1119         }
1120 
1121         ctf_dprintf("Trying to deal with bitfields on %d:%d\n", base, bitsz);
1122         /*
1123          * Given that we now have a bitsize, time to go do something about it.
1124          * We're going to create a new type based on the current one, but first
1125          * we need to find the base type. This means we need to traverse any
1126          * typedef's, consts, and volatiles until we get to what should be
1127          * something of type integer or enumeration.
1128          */
1129         VERIFY(bitsz < UINT32_MAX);
1130         dtd = ctf_dtd_lookup(cup->cu_ctfp, base);
1131         VERIFY(dtd != NULL);
1132         kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
1133         while (kind == CTF_K_TYPEDEF || kind == CTF_K_CONST ||
1134             kind == CTF_K_VOLATILE) {
1135                 dtd = ctf_dtd_lookup(cup->cu_ctfp, dtd->dtd_data.ctt_type);
1136                 VERIFY(dtd != NULL);
1137                 kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
1138         }
1139         ctf_dprintf("got kind %d\n", kind);
1140         VERIFY(kind == CTF_K_INTEGER || kind == CTF_K_ENUM);
1141 
1142         /*
1143          * As surprising as it may be, it is strictly possible to create a
1144          * bitfield that is based on an enum. Of course, the C standard leaves
1145          * enums sizing as an ABI concern more or less. To that effect, today on
1146          * all illumos platforms the size of an enum is generally that of an
1147          * int as our supported data models and ABIs all agree on that. So what
1148          * we'll do is fake up a CTF encoding here to use. In this case, we'll
1149          * treat it as an unsigned value of whatever size the underlying enum
1150          * currently has (which is in the ctt_size member of its dynamic type
1151          * data).
1152          */
1153         if (kind == CTF_K_INTEGER) {
1154                 e = dtd->dtd_u.dtu_enc;
1155         } else {
1156                 bzero(&e, sizeof (ctf_encoding_t));
1157                 e.cte_bits = dtd->dtd_data.ctt_size * NBBY;
1158         }
1159 
1160         for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL;
1161             cdb = ctf_list_next(cdb)) {
1162                 if (cdb->cdb_base == base && cdb->cdb_nbits == bitsz)
1163                         break;
1164         }
1165 
1166         /*
1167          * Create a new type if none exists. We name all types in a way that is
1168          * guaranteed not to conflict with the corresponding C type. We do this
1169          * by using the ':' operator.
1170          */
1171         if (cdb == NULL) {
1172                 size_t namesz;
1173                 char *name;
1174 
1175                 e.cte_bits = bitsz;
1176                 namesz = snprintf(NULL, 0, "%s:%d", dtd->dtd_name,
1177                     (uint32_t)bitsz);
1178                 name = ctf_alloc(namesz + 1);
1179                 if (name == NULL)
1180                         return (ENOMEM);
1181                 cdb = ctf_alloc(sizeof (ctf_dwbitf_t));
1182                 if (cdb == NULL) {
1183                         ctf_free(name, namesz + 1);
1184                         return (ENOMEM);
1185                 }
1186                 (void) snprintf(name, namesz + 1, "%s:%d", dtd->dtd_name,
1187                     (uint32_t)bitsz);
1188 
1189                 cdb->cdb_base = base;
1190                 cdb->cdb_nbits = bitsz;
1191                 cdb->cdb_id = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT,
1192                     name, &e);
1193                 if (cdb->cdb_id == CTF_ERR) {
1194                         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1195                             "failed to get add bitfield type %s: %s", name,
1196                             ctf_errmsg(ctf_errno(cup->cu_ctfp)));
1197                         ctf_free(name, namesz + 1);
1198                         ctf_free(cdb, sizeof (ctf_dwbitf_t));
1199                         return (ECTF_CONVBKERR);
1200                 }
1201                 ctf_free(name, namesz + 1);
1202                 ctf_list_append(&cup->cu_bitfields, cdb);
1203         }
1204 
1205         *idp = cdb->cdb_id;
1206 
1207         return (0);
1208 }
1209 
1210 static int
1211 ctf_dwarf_fixup_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t base, boolean_t add)
1212 {
1213         int ret, kind;
1214         Dwarf_Die child, memb;
1215         Dwarf_Unsigned size;
1216 
1217         kind = ctf_type_kind(cup->cu_ctfp, base);
1218         VERIFY(kind != CTF_ERR);
1219         VERIFY(kind == CTF_K_STRUCT || kind == CTF_K_UNION);
1220 
1221         /*
1222          * Members are in children. However, gcc also allows empty ones.
1223          */
1224         if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1225                 return (ret);
1226         if (child == NULL)
1227                 return (0);
1228 
1229         memb = child;
1230         while (memb != NULL) {
1231                 Dwarf_Die sib, tdie;
1232                 Dwarf_Half tag;
1233                 ctf_id_t mid;
1234                 char *mname;
1235                 ulong_t memboff = 0;
1236 
1237                 if ((ret = ctf_dwarf_tag(cup, memb, &tag)) != 0)
1238                         return (ret);
1239 
1240                 if (tag != DW_TAG_member)
1241                         continue;
1242 
1243                 if ((ret = ctf_dwarf_refdie(cup, memb, DW_AT_type, &tdie)) != 0)
1244                         return (ret);
1245 
1246                 if ((ret = ctf_dwarf_convert_type(cup, tdie, &mid,
1247                     CTF_ADD_NONROOT)) != 0)
1248                         return (ret);
1249                 ctf_dprintf("Got back type id: %d\n", mid);
1250 
1251                 /*
1252                  * If we're not adding a member, just go ahead and return.
1253                  */
1254                 if (add == B_FALSE) {
1255                         if ((ret = ctf_dwarf_member_bitfield(cup, memb,
1256                             &mid)) != 0)
1257                                 return (ret);
1258                         goto next;
1259                 }
1260 
1261                 if ((ret = ctf_dwarf_string(cup, memb, DW_AT_name,
1262                     &mname)) != 0 && ret != ENOENT)
1263                         return (ret);
1264                 if (ret == ENOENT)
1265                         mname = NULL;
1266 
1267                 if (kind == CTF_K_UNION) {
1268                         memboff = 0;
1269                 } else if ((ret = ctf_dwarf_member_offset(cup, memb, mid,
1270                     &memboff)) != 0) {
1271                         if (mname != NULL)
1272                                 ctf_free(mname, strlen(mname) + 1);
1273                         return (ret);
1274                 }
1275 
1276                 if ((ret = ctf_dwarf_member_bitfield(cup, memb, &mid)) != 0)
1277                         return (ret);
1278 
1279                 ret = ctf_add_member(cup->cu_ctfp, base, mname, mid, memboff);
1280                 if (ret == CTF_ERR) {
1281                         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1282                             "failed to add member %s: %s",
1283                             mname, ctf_errmsg(ctf_errno(cup->cu_ctfp)));
1284                         if (mname != NULL)
1285                                 ctf_free(mname, strlen(mname) + 1);
1286                         return (ECTF_CONVBKERR);
1287                 }
1288 
1289                 if (mname != NULL)
1290                         ctf_free(mname, strlen(mname) + 1);
1291 
1292 next:
1293                 if ((ret = ctf_dwarf_sib(cup, memb, &sib)) != 0)
1294                         return (ret);
1295                 memb = sib;
1296         }
1297 
1298         /*
1299          * If we're not adding members, then we don't know the final size of the
1300          * structure, so end here.
1301          */
1302         if (add == B_FALSE)
1303                 return (0);
1304 
1305         /* Finally set the size of the structure to the actual byte size */
1306         if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &size)) != 0)
1307                 return (ret);
1308         if ((ctf_set_size(cup->cu_ctfp, base, size)) == CTF_ERR) {
1309                 int e = ctf_errno(cup->cu_ctfp);
1310                 (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1311                     "failed to set type size for %d to 0x%x: %s", base,
1312                     (uint32_t)size, ctf_errmsg(e));
1313                 return (ECTF_CONVBKERR);
1314         }
1315 
1316         return (0);
1317 }
1318 
1319 static int
1320 ctf_dwarf_create_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1321     int kind, int isroot)
1322 {
1323         int ret;
1324         char *name;
1325         ctf_id_t base;
1326         Dwarf_Die child;
1327         Dwarf_Bool decl;
1328 
1329         /*
1330          * Deal with the terribly annoying case of anonymous structs and unions.
1331          * If they don't have a name, set the name to the empty string.
1332          */
1333         if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1334             ret != ENOENT)
1335                 return (ret);
1336         if (ret == ENOENT)
1337                 name = NULL;
1338 
1339         /*
1340          * We need to check if we just have a declaration here. If we do, then
1341          * instead of creating an actual structure or union, we're just going to
1342          * go ahead and create a forward. During a dedup or merge, the forward
1343          * will be replaced with the real thing.
1344          */
1345         if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration,
1346             &decl)) != 0) {
1347                 if (ret != ENOENT)
1348                         return (ret);
1349                 decl = 0;
1350         }
1351 
1352         if (decl != 0) {
1353                 base = ctf_add_forward(cup->cu_ctfp, isroot, name, kind);
1354         } else if (kind == CTF_K_STRUCT) {
1355                 base = ctf_add_struct(cup->cu_ctfp, isroot, name);
1356         } else {
1357                 base = ctf_add_union(cup->cu_ctfp, isroot, name);
1358         }
1359         ctf_dprintf("added sou %s (%d) (%d)\n", name, kind, base);
1360         if (name != NULL)
1361                 ctf_free(name, strlen(name) + 1);
1362         if (base == CTF_ERR)
1363                 return (ctf_errno(cup->cu_ctfp));
1364         *idp = base;
1365 
1366         /*
1367          * If it's just a declaration, we're not going to mark it for fix up or
1368          * do anything else.
1369          */
1370         if (decl == B_TRUE)
1371                 return (ctf_dwmap_add(cup, base, die, B_FALSE));
1372         if ((ret = ctf_dwmap_add(cup, base, die, B_TRUE)) != 0)
1373                 return (ret);
1374 
1375         /*
1376          * Members are in children. However, gcc also allows empty ones.
1377          */
1378         if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1379                 return (ret);
1380         if (child == NULL)
1381                 return (0);
1382 
1383         return (0);
1384 }
1385 
1386 static int
1387 ctf_dwarf_create_array_range(ctf_cu_t *cup, Dwarf_Die range, ctf_id_t *idp,
1388     ctf_id_t base, int isroot)
1389 {
1390         int ret;
1391         Dwarf_Die sib;
1392         Dwarf_Unsigned val;
1393         Dwarf_Signed sval;
1394         ctf_arinfo_t ar;
1395 
1396         ctf_dprintf("creating array range\n");
1397 
1398         if ((ret = ctf_dwarf_sib(cup, range, &sib)) != 0)
1399                 return (ret);
1400         if (sib != NULL) {
1401                 ctf_id_t id;
1402                 if ((ret = ctf_dwarf_create_array_range(cup, sib, &id,
1403                     base, CTF_ADD_NONROOT)) != 0)
1404                         return (ret);
1405                 ar.ctr_contents = id;
1406         } else {
1407                 ar.ctr_contents = base;
1408         }
1409 
1410         if ((ar.ctr_index = ctf_dwarf_long(cup)) == CTF_ERR)
1411                 return (ctf_errno(cup->cu_ctfp));
1412 
1413         /*
1414          * Array bounds can be signed or unsigned, but there are several kinds
1415          * of signless forms (data1, data2, etc) that take their sign from the
1416          * routine that is trying to interpret them.  That is, data1 can be
1417          * either signed or unsigned, depending on whether you use the signed or
1418          * unsigned accessor function.  GCC will use the signless forms to store
1419          * unsigned values which have their high bit set, so we need to try to
1420          * read them first as unsigned to get positive values.  We could also
1421          * try signed first, falling back to unsigned if we got a negative
1422          * value.
1423          */
1424         if ((ret = ctf_dwarf_unsigned(cup, range, DW_AT_upper_bound,
1425             &val)) == 0) {
1426                 ar.ctr_nelems = val + 1;
1427         } else if (ret != ENOENT) {
1428                 return (ret);
1429         } else if ((ret = ctf_dwarf_signed(cup, range, DW_AT_upper_bound,
1430             &sval)) == 0) {
1431                 ar.ctr_nelems = sval + 1;
1432         } else if (ret != ENOENT) {
1433                 return (ret);
1434         } else {
1435                 ar.ctr_nelems = 0;
1436         }
1437 
1438         if ((*idp = ctf_add_array(cup->cu_ctfp, isroot, &ar)) == CTF_ERR)
1439                 return (ctf_errno(cup->cu_ctfp));
1440 
1441         return (0);
1442 }
1443 
1444 /*
1445  * Try and create an array type. First, the kind of the array is specified in
1446  * the DW_AT_type entry. Next, the number of entries is stored in a more
1447  * complicated form, we should have a child that has the DW_TAG_subrange type.
1448  */
1449 static int
1450 ctf_dwarf_create_array(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1451 {
1452         int ret;
1453         Dwarf_Die tdie, rdie;
1454         ctf_id_t tid;
1455         Dwarf_Half rtag;
1456 
1457         if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0)
1458                 return (ret);
1459         if ((ret = ctf_dwarf_convert_type(cup, tdie, &tid,
1460             CTF_ADD_NONROOT)) != 0)
1461                 return (ret);
1462 
1463         if ((ret = ctf_dwarf_child(cup, die, &rdie)) != 0)
1464                 return (ret);
1465         if ((ret = ctf_dwarf_tag(cup, rdie, &rtag)) != 0)
1466                 return (ret);
1467         if (rtag != DW_TAG_subrange_type) {
1468                 (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1469                     "encountered array without DW_TAG_subrange_type child\n");
1470                 return (ECTF_CONVBKERR);
1471         }
1472 
1473         /*
1474          * The compiler may opt to describe a multi-dimensional array as one
1475          * giant array or it may opt to instead encode it as a series of
1476          * subranges. If it's the latter, then for each subrange we introduce a
1477          * type. We can always use the base type.
1478          */
1479         if ((ret = ctf_dwarf_create_array_range(cup, rdie, idp, tid,
1480             isroot)) != 0)
1481                 return (ret);
1482         ctf_dprintf("Got back id %d\n", *idp);
1483         return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1484 }
1485 
1486 /*
1487  * Given "const int const_array3[11]", GCC7 at least will create a DIE tree of
1488  * DW_TAG_const_type:DW_TAG_array_type:DW_Tag_const_type:<member_type>.
1489  *
1490  * Given C's syntax, this renders out as "const const int const_array3[11]".  To
1491  * get closer to round-tripping (and make the unit tests work), we'll peek for
1492  * this case, and avoid adding the extraneous qualifier if we see that the
1493  * underlying array referent already has the same qualifier.
1494  *
1495  * This is unfortunately less trivial than it could be: this issue applies to
1496  * qualifier sets like "const volatile", as well as multi-dimensional arrays, so
1497  * we need to descend down those.
1498  *
1499  * Returns CTF_ERR on error, or a boolean value otherwise.
1500  */
1501 static int
1502 needed_array_qualifier(ctf_cu_t *cup, int kind, ctf_id_t ref_id)
1503 {
1504         const ctf_type_t *t;
1505         ctf_arinfo_t arinfo;
1506         int akind;
1507 
1508         if (kind != CTF_K_CONST && kind != CTF_K_VOLATILE &&
1509             kind != CTF_K_RESTRICT)
1510                 return (1);
1511 
1512         if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, ref_id)) == NULL)
1513                 return (CTF_ERR);
1514 
1515         if (LCTF_INFO_KIND(cup->cu_ctfp, t->ctt_info) != CTF_K_ARRAY)
1516                 return (1);
1517 
1518         if (ctf_dyn_array_info(cup->cu_ctfp, ref_id, &arinfo) != 0)
1519                 return (CTF_ERR);
1520 
1521         ctf_id_t id = arinfo.ctr_contents;
1522 
1523         for (;;) {
1524                 if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, id)) == NULL)
1525                         return (CTF_ERR);
1526 
1527                 akind = LCTF_INFO_KIND(cup->cu_ctfp, t->ctt_info);
1528 
1529                 if (akind == kind)
1530                         break;
1531 
1532                 if (akind == CTF_K_ARRAY) {
1533                         if (ctf_dyn_array_info(cup->cu_ctfp,
1534                             id, &arinfo) != 0)
1535                                 return (CTF_ERR);
1536                         id = arinfo.ctr_contents;
1537                         continue;
1538                 }
1539 
1540                 if (akind != CTF_K_CONST && akind != CTF_K_VOLATILE &&
1541                     akind != CTF_K_RESTRICT)
1542                         break;
1543 
1544                 id = t->ctt_type;
1545         }
1546 
1547         if (kind == akind) {
1548                 ctf_dprintf("ignoring extraneous %s qualifier for array %d\n",
1549                     ctf_kind_name(cup->cu_ctfp, kind), ref_id);
1550         }
1551 
1552         return (kind != akind);
1553 }
1554 
1555 static int
1556 ctf_dwarf_create_reference(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1557     int kind, int isroot)
1558 {
1559         int ret;
1560         ctf_id_t id;
1561         Dwarf_Die tdie;
1562         char *name;
1563         size_t namelen;
1564 
1565         if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1566             ret != ENOENT)
1567                 return (ret);
1568         if (ret == ENOENT) {
1569                 name = NULL;
1570                 namelen = 0;
1571         } else {
1572                 namelen = strlen(name);
1573         }
1574 
1575         ctf_dprintf("reference kind %d %s\n", kind, name != NULL ? name : "<>");
1576 
1577         if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) {
1578                 if (ret != ENOENT) {
1579                         ctf_free(name, namelen);
1580                         return (ret);
1581                 }
1582                 if ((id = ctf_dwarf_void(cup)) == CTF_ERR) {
1583                         ctf_free(name, namelen);
1584                         return (ctf_errno(cup->cu_ctfp));
1585                 }
1586         } else {
1587                 if ((ret = ctf_dwarf_convert_type(cup, tdie, &id,
1588                     CTF_ADD_NONROOT)) != 0) {
1589                         ctf_free(name, namelen);
1590                         return (ret);
1591                 }
1592         }
1593 
1594         if ((ret = needed_array_qualifier(cup, kind, id)) <= 0) {
1595                 if (ret != 0) {
1596                         ret = (ctf_errno(cup->cu_ctfp));
1597                 } else {
1598                         *idp = id;
1599                 }
1600 
1601                 ctf_free(name, namelen);
1602                 return (ret);
1603         }
1604 
1605         if ((*idp = ctf_add_reftype(cup->cu_ctfp, isroot, name, id, kind)) ==
1606             CTF_ERR) {
1607                 ctf_free(name, namelen);
1608                 return (ctf_errno(cup->cu_ctfp));
1609         }
1610 
1611         ctf_free(name, namelen);
1612         return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1613 }
1614 
1615 /*
1616  * Get the size of the type of a particular die. Note that this is a simple
1617  * version that doesn't attempt to traverse further than expecting a single
1618  * sized type reference (so no qualifiers etc.). Nor does it attempt to do as
1619  * much as ctf_type_size() - which we cannot use here as that doesn't look up
1620  * dynamic types, and we don't yet want to do a ctf_update().
1621  */
1622 static int
1623 ctf_dwarf_get_type_size(ctf_cu_t *cup, Dwarf_Die die, size_t *sizep)
1624 {
1625         const ctf_type_t *t;
1626         Dwarf_Die tdie;
1627         ctf_id_t tid;
1628         int ret;
1629 
1630         if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0)
1631                 return (ret);
1632 
1633         if ((ret = ctf_dwarf_convert_type(cup, tdie, &tid,
1634             CTF_ADD_NONROOT)) != 0)
1635                 return (ret);
1636 
1637         if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, tid)) == NULL)
1638                 return (ENOENT);
1639 
1640         *sizep = ctf_get_ctt_size(cup->cu_ctfp, t, NULL, NULL);
1641         return (0);
1642 }
1643 
1644 static int
1645 ctf_dwarf_create_enum(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1646 {
1647         size_t size = 0;
1648         Dwarf_Die child;
1649         ctf_id_t id;
1650         char *name;
1651         int ret;
1652 
1653         if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1654             ret != ENOENT)
1655                 return (ret);
1656         if (ret == ENOENT)
1657                 name = NULL;
1658 
1659         (void) ctf_dwarf_get_type_size(cup, die, &size);
1660 
1661         id = ctf_add_enum(cup->cu_ctfp, isroot, name, size);
1662         ctf_dprintf("added enum %s (%d)\n", name, id);
1663         if (name != NULL)
1664                 ctf_free(name, strlen(name) + 1);
1665         if (id == CTF_ERR)
1666                 return (ctf_errno(cup->cu_ctfp));
1667         *idp = id;
1668         if ((ret = ctf_dwmap_add(cup, id, die, B_FALSE)) != 0)
1669                 return (ret);
1670 
1671         if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) {
1672                 if (ret == ENOENT)
1673                         ret = 0;
1674                 return (ret);
1675         }
1676 
1677         while (child != NULL) {
1678                 Dwarf_Half tag;
1679                 Dwarf_Signed sval;
1680                 Dwarf_Unsigned uval;
1681                 Dwarf_Die arg = child;
1682                 int eval;
1683 
1684                 if ((ret = ctf_dwarf_sib(cup, arg, &child)) != 0)
1685                         return (ret);
1686 
1687                 if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
1688                         return (ret);
1689 
1690                 if (tag != DW_TAG_enumerator) {
1691                         if ((ret = ctf_dwarf_convert_type(cup, arg, NULL,
1692                             CTF_ADD_NONROOT)) != 0)
1693                                 return (ret);
1694                         continue;
1695                 }
1696 
1697                 /*
1698                  * DWARF v4 section 5.7 tells us we'll always have names.
1699                  */
1700                 if ((ret = ctf_dwarf_string(cup, arg, DW_AT_name, &name)) != 0)
1701                         return (ret);
1702 
1703                 /*
1704                  * We have to be careful here: newer GCCs generate DWARF where
1705                  * an unsigned value will happily pass ctf_dwarf_signed().
1706                  * Since negative values will fail ctf_dwarf_unsigned(), we try
1707                  * that first to make sure we get the right value.
1708                  */
1709                 if ((ret = ctf_dwarf_unsigned(cup, arg, DW_AT_const_value,
1710                     &uval)) == 0) {
1711                         eval = (int)uval;
1712                 } else if ((ret = ctf_dwarf_signed(cup, arg, DW_AT_const_value,
1713                     &sval)) == 0) {
1714                         eval = sval;
1715                 }
1716 
1717                 if (ret != 0) {
1718                         if (ret != ENOENT)
1719                                 return (ret);
1720 
1721                         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1722                             "encountered enumeration without constant value\n");
1723                         return (ECTF_CONVBKERR);
1724                 }
1725 
1726                 ret = ctf_add_enumerator(cup->cu_ctfp, id, name, eval);
1727                 if (ret == CTF_ERR) {
1728                         (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1729                             "failed to add enumarator %s (%d) to %d\n",
1730                             name, eval, id);
1731                         ctf_free(name, strlen(name) + 1);
1732                         return (ctf_errno(cup->cu_ctfp));
1733                 }
1734                 ctf_free(name, strlen(name) + 1);
1735         }
1736 
1737         return (0);
1738 }
1739 
1740 /*
1741  * For a function pointer, walk over and process all of its children, unless we
1742  * encounter one that's just a declaration. In which case, we error on it.
1743  */
1744 static int
1745 ctf_dwarf_create_fptr(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1746 {
1747         int ret;
1748         Dwarf_Bool b;
1749         ctf_funcinfo_t fi;
1750         Dwarf_Die retdie;
1751         ctf_id_t *argv = NULL;
1752 
1753         bzero(&fi, sizeof (ctf_funcinfo_t));
1754 
1755         if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) != 0) {
1756                 if (ret != ENOENT)
1757                         return (ret);
1758         } else {
1759                 if (b != 0)
1760                         return (EPROTOTYPE);
1761         }
1762 
1763         /*
1764          * Return type is in DW_AT_type, if none, it returns void.
1765          */
1766         if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &retdie)) != 0) {
1767                 if (ret != ENOENT)
1768                         return (ret);
1769                 if ((fi.ctc_return = ctf_dwarf_void(cup)) == CTF_ERR)
1770                         return (ctf_errno(cup->cu_ctfp));
1771         } else {
1772                 if ((ret = ctf_dwarf_convert_type(cup, retdie, &fi.ctc_return,
1773                     CTF_ADD_NONROOT)) != 0)
1774                         return (ret);
1775         }
1776 
1777         if ((ret = ctf_dwarf_function_count(cup, die, &fi, B_TRUE)) != 0) {
1778                 return (ret);
1779         }
1780 
1781         if (fi.ctc_argc != 0) {
1782                 argv = ctf_alloc(sizeof (ctf_id_t) * fi.ctc_argc);
1783                 if (argv == NULL)
1784                         return (ENOMEM);
1785 
1786                 if ((ret = ctf_dwarf_convert_fargs(cup, die, &fi, argv)) != 0) {
1787                         ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1788                         return (ret);
1789                 }
1790         }
1791 
1792         if ((*idp = ctf_add_funcptr(cup->cu_ctfp, isroot, &fi, argv)) ==
1793             CTF_ERR) {
1794                 ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1795                 return (ctf_errno(cup->cu_ctfp));
1796         }
1797 
1798         ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1799         return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1800 }
1801 
1802 static int
1803 ctf_dwarf_convert_type(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1804     int isroot)
1805 {
1806         int ret;
1807         Dwarf_Off offset;
1808         Dwarf_Half tag;
1809         ctf_dwmap_t lookup, *map;
1810         ctf_id_t id;
1811 
1812         if (idp == NULL)
1813                 idp = &id;
1814 
1815         if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0)
1816                 return (ret);
1817 
1818         if (offset > cup->cu_maxoff) {
1819                 (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1820                     "die offset %llu beyond maximum for header %llu\n",
1821                     offset, cup->cu_maxoff);
1822                 return (ECTF_CONVBKERR);
1823         }
1824 
1825         /*
1826          * If we've already added an entry for this offset, then we're done.
1827          */
1828         lookup.cdm_off = offset;
1829         if ((map = avl_find(&cup->cu_map, &lookup, NULL)) != NULL) {
1830                 *idp = map->cdm_id;
1831                 return (0);
1832         }
1833 
1834         if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0)
1835                 return (ret);
1836 
1837         ret = ENOTSUP;
1838         switch (tag) {
1839         case DW_TAG_base_type:
1840                 ctf_dprintf("base\n");
1841                 ret = ctf_dwarf_create_base(cup, die, idp, isroot, offset);
1842                 break;
1843         case DW_TAG_array_type:
1844                 ctf_dprintf("array\n");
1845                 ret = ctf_dwarf_create_array(cup, die, idp, isroot);
1846                 break;
1847         case DW_TAG_enumeration_type:
1848                 ctf_dprintf("enum\n");
1849                 ret = ctf_dwarf_create_enum(cup, die, idp, isroot);
1850                 break;
1851         case DW_TAG_pointer_type:
1852                 ctf_dprintf("pointer\n");
1853                 ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_POINTER,
1854                     isroot);
1855                 break;
1856         case DW_TAG_structure_type:
1857                 ctf_dprintf("struct\n");
1858                 ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_STRUCT,
1859                     isroot);
1860                 break;
1861         case DW_TAG_subroutine_type:
1862                 ctf_dprintf("fptr\n");
1863                 ret = ctf_dwarf_create_fptr(cup, die, idp, isroot);
1864                 break;
1865         case DW_TAG_typedef:
1866                 ctf_dprintf("typedef\n");
1867                 ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_TYPEDEF,
1868                     isroot);
1869                 break;
1870         case DW_TAG_union_type:
1871                 ctf_dprintf("union\n");
1872                 ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_UNION,
1873                     isroot);
1874                 break;
1875         case DW_TAG_const_type:
1876                 ctf_dprintf("const\n");
1877                 ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_CONST,
1878                     isroot);
1879                 break;
1880         case DW_TAG_volatile_type:
1881                 ctf_dprintf("volatile\n");
1882                 ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_VOLATILE,
1883                     isroot);
1884                 break;
1885         case DW_TAG_restrict_type:
1886                 ctf_dprintf("restrict\n");
1887                 ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_RESTRICT,
1888                     isroot);
1889                 break;
1890         default:
1891                 ctf_dprintf("ignoring tag type %x\n", tag);
1892                 *idp = CTF_ERR;
1893                 ret = 0;
1894                 break;
1895         }
1896         ctf_dprintf("ctf_dwarf_convert_type tag specific handler returned %d\n",
1897             ret);
1898 
1899         return (ret);
1900 }
1901 
1902 static int
1903 ctf_dwarf_walk_lexical(ctf_cu_t *cup, Dwarf_Die die)
1904 {
1905         int ret;
1906         Dwarf_Die child;
1907 
1908         if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1909                 return (ret);
1910 
1911         if (child == NULL)
1912                 return (0);
1913 
1914         return (ctf_dwarf_convert_die(cup, die));
1915 }
1916 
1917 static int
1918 ctf_dwarf_function_count(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip,
1919     boolean_t fptr)
1920 {
1921         int ret;
1922         Dwarf_Die child, sib, arg;
1923 
1924         if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1925                 return (ret);
1926 
1927         arg = child;
1928         while (arg != NULL) {
1929                 Dwarf_Half tag;
1930 
1931                 if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
1932                         return (ret);
1933 
1934                 /*
1935                  * We have to check for a varargs type declaration. This will
1936                  * happen in one of two ways. If we have a function pointer
1937                  * type, then it'll be done with a tag of type
1938                  * DW_TAG_unspecified_parameters. However, it only means we have
1939                  * a variable number of arguments, if we have more than one
1940                  * argument found so far. Otherwise, when we have a function
1941                  * type, it instead uses a formal parameter whose name is '...'
1942                  * to indicate a variable arguments member.
1943                  *
1944                  * Also, if we have a function pointer, then we have to expect
1945                  * that we might not get a name at all.
1946                  */
1947                 if (tag == DW_TAG_formal_parameter && fptr == B_FALSE) {
1948                         char *name;
1949                         if ((ret = ctf_dwarf_string(cup, die, DW_AT_name,
1950                             &name)) != 0)
1951                                 return (ret);
1952                         if (strcmp(name, DWARF_VARARGS_NAME) == 0)
1953                                 fip->ctc_flags |= CTF_FUNC_VARARG;
1954                         else
1955                                 fip->ctc_argc++;
1956                         ctf_free(name, strlen(name) + 1);
1957                 } else if (tag == DW_TAG_formal_parameter) {
1958                         fip->ctc_argc++;
1959                 } else if (tag == DW_TAG_unspecified_parameters &&
1960                     fip->ctc_argc > 0) {
1961                         fip->ctc_flags |= CTF_FUNC_VARARG;
1962                 }
1963                 if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0)
1964                         return (ret);
1965                 arg = sib;
1966         }
1967 
1968         return (0);
1969 }
1970 
1971 static int
1972 ctf_dwarf_convert_fargs(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip,
1973     ctf_id_t *argv)
1974 {
1975         int ret;
1976         int i = 0;
1977         Dwarf_Die child, sib, arg;
1978 
1979         if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1980                 return (ret);
1981 
1982         arg = child;
1983         while (arg != NULL) {
1984                 Dwarf_Half tag;
1985 
1986                 if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
1987                         return (ret);
1988                 if (tag == DW_TAG_formal_parameter) {
1989                         Dwarf_Die tdie;
1990 
1991                         if ((ret = ctf_dwarf_refdie(cup, arg, DW_AT_type,
1992                             &tdie)) != 0)
1993                                 return (ret);
1994 
1995                         if ((ret = ctf_dwarf_convert_type(cup, tdie, &argv[i],
1996                             CTF_ADD_ROOT)) != 0)
1997                                 return (ret);
1998                         i++;
1999 
2000                         /*
2001                          * Once we hit argc entries, we're done. This ensures we
2002                          * don't accidentally hit a varargs which should be the
2003                          * last entry.
2004                          */
2005                         if (i == fip->ctc_argc)
2006                                 break;
2007                 }
2008 
2009                 if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0)
2010                         return (ret);
2011                 arg = sib;
2012         }
2013 
2014         return (0);
2015 }
2016 
2017 static int
2018 ctf_dwarf_convert_function(ctf_cu_t *cup, Dwarf_Die die)
2019 {
2020         ctf_dwfunc_t *cdf;
2021         Dwarf_Die tdie;
2022         Dwarf_Bool b;
2023         char *name;
2024         int ret;
2025 
2026         /*
2027          * Functions that don't have a name are generally functions that have
2028          * been inlined and thus most information about them has been lost. If
2029          * we can't get a name, then instead of returning ENOENT, we silently
2030          * swallow the error.
2031          */
2032         if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0) {
2033                 if (ret == ENOENT)
2034                         return (0);
2035                 return (ret);
2036         }
2037 
2038         ctf_dprintf("beginning work on function %s (die %llx)\n",
2039             name, ctf_die_offset(die));
2040 
2041         if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) != 0) {
2042                 if (ret != ENOENT)
2043                         return (ret);
2044         } else if (b != 0) {
2045                 /*
2046                  * GCC7 at least creates empty DW_AT_declarations for functions
2047                  * defined in headers.  As they lack details on the function
2048                  * prototype, we need to ignore them.  If we later actually
2049                  * see the relevant function's definition, we will see another
2050                  * DW_TAG_subprogram that is more complete.
2051                  */
2052                 ctf_dprintf("ignoring declaration of function %s (die %llx)\n",
2053                     name, ctf_die_offset(die));
2054                 return (0);
2055         }
2056 
2057         if ((cdf = ctf_alloc(sizeof (ctf_dwfunc_t))) == NULL) {
2058                 ctf_free(name, strlen(name) + 1);
2059                 return (ENOMEM);
2060         }
2061         bzero(cdf, sizeof (ctf_dwfunc_t));
2062         cdf->cdf_name = name;
2063 
2064         if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) == 0) {
2065                 if ((ret = ctf_dwarf_convert_type(cup, tdie,
2066                     &(cdf->cdf_fip.ctc_return), CTF_ADD_ROOT)) != 0) {
2067                         ctf_free(name, strlen(name) + 1);
2068                         ctf_free(cdf, sizeof (ctf_dwfunc_t));
2069                         return (ret);
2070                 }
2071         } else if (ret != ENOENT) {
2072                 ctf_free(name, strlen(name) + 1);
2073                 ctf_free(cdf, sizeof (ctf_dwfunc_t));
2074                 return (ret);
2075         } else {
2076                 if ((cdf->cdf_fip.ctc_return = ctf_dwarf_void(cup)) ==
2077                     CTF_ERR) {
2078                         ctf_free(name, strlen(name) + 1);
2079                         ctf_free(cdf, sizeof (ctf_dwfunc_t));
2080                         return (ctf_errno(cup->cu_ctfp));
2081                 }
2082         }
2083 
2084         /*
2085          * A function has a number of children, some of which may not be ones we
2086          * care about. Children that we care about have a type of
2087          * DW_TAG_formal_parameter. We're going to do two passes, the first to
2088          * count the arguments, the second to process them. Afterwards, we
2089          * should be good to go ahead and add this function.
2090          *
2091          * Note, we already got the return type by going in and grabbing it out
2092          * of the DW_AT_type.
2093          */
2094         if ((ret = ctf_dwarf_function_count(cup, die, &cdf->cdf_fip,
2095             B_FALSE)) != 0) {
2096                 ctf_free(name, strlen(name) + 1);
2097                 ctf_free(cdf, sizeof (ctf_dwfunc_t));
2098                 return (ret);
2099         }
2100 
2101         ctf_dprintf("beginning to convert function arguments %s\n", name);
2102         if (cdf->cdf_fip.ctc_argc != 0) {
2103                 uint_t argc = cdf->cdf_fip.ctc_argc;
2104                 cdf->cdf_argv = ctf_alloc(sizeof (ctf_id_t) * argc);
2105                 if (cdf->cdf_argv == NULL) {
2106                         ctf_free(name, strlen(name) + 1);
2107                         ctf_free(cdf, sizeof (ctf_dwfunc_t));
2108                         return (ENOMEM);
2109                 }
2110                 if ((ret = ctf_dwarf_convert_fargs(cup, die,
2111                     &cdf->cdf_fip, cdf->cdf_argv)) != 0) {
2112                         ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) * argc);
2113                         ctf_free(name, strlen(name) + 1);
2114                         ctf_free(cdf, sizeof (ctf_dwfunc_t));
2115                         return (ret);
2116                 }
2117         } else {
2118                 cdf->cdf_argv = NULL;
2119         }
2120 
2121         if ((ret = ctf_dwarf_isglobal(cup, die, &cdf->cdf_global)) != 0) {
2122                 ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) *
2123                     cdf->cdf_fip.ctc_argc);
2124                 ctf_free(name, strlen(name) + 1);
2125                 ctf_free(cdf, sizeof (ctf_dwfunc_t));
2126                 return (ret);
2127         }
2128 
2129         ctf_list_append(&cup->cu_funcs, cdf);
2130         return (ret);
2131 }
2132 
2133 /*
2134  * Convert variables, but only if they're not prototypes and have names.
2135  */
2136 static int
2137 ctf_dwarf_convert_variable(ctf_cu_t *cup, Dwarf_Die die)
2138 {
2139         int ret;
2140         char *name;
2141         Dwarf_Bool b;
2142         Dwarf_Die tdie;
2143         ctf_id_t id;
2144         ctf_dwvar_t *cdv;
2145 
2146         /* Skip "Non-Defining Declarations" */
2147         if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) == 0) {
2148                 if (b != 0)
2149                         return (0);
2150         } else if (ret != ENOENT) {
2151                 return (ret);
2152         }
2153 
2154         /*
2155          * If we find a DIE of "Declarations Completing Non-Defining
2156          * Declarations", we will use the referenced type's DIE.  This isn't
2157          * quite correct, e.g. DW_AT_decl_line will be the forward declaration
2158          * not this site.  It's sufficient for what we need, however: in
2159          * particular, we should find DW_AT_external as needed there.
2160          */
2161         if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_specification,
2162             &tdie)) == 0) {
2163                 Dwarf_Off offset;
2164                 if ((ret = ctf_dwarf_offset(cup, tdie, &offset)) != 0)
2165                         return (ret);
2166                 ctf_dprintf("die 0x%llx DW_AT_specification -> die 0x%llx\n",
2167                     ctf_die_offset(die), ctf_die_offset(tdie));
2168                 die = tdie;
2169         } else if (ret != ENOENT) {
2170                 return (ret);
2171         }
2172 
2173         if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
2174             ret != ENOENT)
2175                 return (ret);
2176         if (ret == ENOENT)
2177                 return (0);
2178 
2179         if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) {
2180                 ctf_free(name, strlen(name) + 1);
2181                 return (ret);
2182         }
2183 
2184         if ((ret = ctf_dwarf_convert_type(cup, tdie, &id,
2185             CTF_ADD_ROOT)) != 0)
2186                 return (ret);
2187 
2188         if ((cdv = ctf_alloc(sizeof (ctf_dwvar_t))) == NULL) {
2189                 ctf_free(name, strlen(name) + 1);
2190                 return (ENOMEM);
2191         }
2192 
2193         cdv->cdv_name = name;
2194         cdv->cdv_type = id;
2195 
2196         if ((ret = ctf_dwarf_isglobal(cup, die, &cdv->cdv_global)) != 0) {
2197                 ctf_free(cdv, sizeof (ctf_dwvar_t));
2198                 ctf_free(name, strlen(name) + 1);
2199                 return (ret);
2200         }
2201 
2202         ctf_list_append(&cup->cu_vars, cdv);
2203         return (0);
2204 }
2205 
2206 /*
2207  * Walk through our set of top-level types and process them.
2208  */
2209 static int
2210 ctf_dwarf_walk_toplevel(ctf_cu_t *cup, Dwarf_Die die)
2211 {
2212         int ret;
2213         Dwarf_Off offset;
2214         Dwarf_Half tag;
2215 
2216         if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0)
2217                 return (ret);
2218 
2219         if (offset > cup->cu_maxoff) {
2220                 (void) snprintf(cup->cu_errbuf, cup->cu_errlen,
2221                     "die offset %llu beyond maximum for header %llu\n",
2222                     offset, cup->cu_maxoff);
2223                 return (ECTF_CONVBKERR);
2224         }
2225 
2226         if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0)
2227                 return (ret);
2228 
2229         ret = 0;
2230         switch (tag) {
2231         case DW_TAG_subprogram:
2232                 ctf_dprintf("top level func\n");
2233                 ret = ctf_dwarf_convert_function(cup, die);
2234                 break;
2235         case DW_TAG_variable:
2236                 ctf_dprintf("top level var\n");
2237                 ret = ctf_dwarf_convert_variable(cup, die);
2238                 break;
2239         case DW_TAG_lexical_block:
2240                 ctf_dprintf("top level block\n");
2241                 ret = ctf_dwarf_walk_lexical(cup, die);
2242                 break;
2243         case DW_TAG_enumeration_type:
2244         case DW_TAG_structure_type:
2245         case DW_TAG_typedef:
2246         case DW_TAG_union_type:
2247                 ctf_dprintf("top level type\n");
2248                 ret = ctf_dwarf_convert_type(cup, die, NULL, B_TRUE);
2249                 break;
2250         default:
2251                 break;
2252         }
2253 
2254         return (ret);
2255 }
2256 
2257 
2258 /*
2259  * We're given a node. At this node we need to convert it and then proceed to
2260  * convert any siblings that are associaed with this die.
2261  */
2262 static int
2263 ctf_dwarf_convert_die(ctf_cu_t *cup, Dwarf_Die die)
2264 {
2265         while (die != NULL) {
2266                 int ret;
2267                 Dwarf_Die sib;
2268 
2269                 if ((ret = ctf_dwarf_walk_toplevel(cup, die)) != 0)
2270                         return (ret);
2271 
2272                 if ((ret = ctf_dwarf_sib(cup, die, &sib)) != 0)
2273                         return (ret);
2274                 die = sib;
2275         }
2276         return (0);
2277 }
2278 
2279 static int
2280 ctf_dwarf_fixup_die(ctf_cu_t *cup, boolean_t addpass)
2281 {
2282         ctf_dwmap_t *map;
2283 
2284         for (map = avl_first(&cup->cu_map); map != NULL;
2285             map = AVL_NEXT(&cup->cu_map, map)) {
2286                 int ret;
2287                 if (map->cdm_fix == B_FALSE)
2288                         continue;
2289                 if ((ret = ctf_dwarf_fixup_sou(cup, map->cdm_die, map->cdm_id,
2290                     addpass)) != 0)
2291                         return (ret);
2292         }
2293 
2294         return (0);
2295 }
2296 
2297 /*
2298  * The DWARF information about a symbol and the information in the symbol table
2299  * may not be the same due to symbol reduction that is performed by ld due to a
2300  * mapfile or other such directive. We process weak symbols at a later time.
2301  *
2302  * The following are the rules that we employ:
2303  *
2304  * 1. A DWARF function that is considered exported matches STB_GLOBAL entries
2305  * with the same name.
2306  *
2307  * 2. A DWARF function that is considered exported matches STB_LOCAL entries
2308  * with the same name and the same file. This case may happen due to mapfile
2309  * reduction.
2310  *
2311  * 3. A DWARF function that is not considered exported matches STB_LOCAL entries
2312  * with the same name and the same file.
2313  *
2314  * 4. A DWARF function that has the same name as the symbol table entry, but the
2315  * files do not match. This is considered a 'fuzzy' match. This may also happen
2316  * due to a mapfile reduction. Fuzzy matching is only used when we know that the
2317  * file in question refers to the primary object. This is because when a symbol
2318  * is reduced in a mapfile, it's always going to be tagged as a local value in
2319  * the generated output and it is considered as to belong to the primary file
2320  * which is the first STT_FILE symbol we see.
2321  */
2322 static boolean_t
2323 ctf_dwarf_symbol_match(const char *symtab_file, const char *symtab_name,
2324     uint_t symtab_bind, const char *dwarf_file, const char *dwarf_name,
2325     boolean_t dwarf_global, boolean_t *is_fuzzy)
2326 {
2327         *is_fuzzy = B_FALSE;
2328 
2329         if (symtab_bind != STB_LOCAL && symtab_bind != STB_GLOBAL) {
2330                 return (B_FALSE);
2331         }
2332 
2333         if (strcmp(symtab_name, dwarf_name) != 0) {
2334                 return (B_FALSE);
2335         }
2336 
2337         if (symtab_bind == STB_GLOBAL) {
2338                 return (dwarf_global);
2339         }
2340 
2341         if (strcmp(symtab_file, dwarf_file) == 0) {
2342                 return (B_TRUE);
2343         }
2344 
2345         if (dwarf_global) {
2346                 *is_fuzzy = B_TRUE;
2347                 return (B_TRUE);
2348         }
2349 
2350         return (B_FALSE);
2351 }
2352 
2353 static ctf_dwfunc_t *
2354 ctf_dwarf_match_func(ctf_cu_t *cup, const char *file, const char *name,
2355     uint_t bind, boolean_t primary)
2356 {
2357         ctf_dwfunc_t *cdf, *fuzzy = NULL;
2358 
2359         if (bind == STB_WEAK)
2360                 return (NULL);
2361 
2362         if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2363                 return (NULL);
2364 
2365         for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL;
2366             cdf = ctf_list_next(cdf)) {
2367                 boolean_t is_fuzzy = B_FALSE;
2368 
2369                 if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2370                     cdf->cdf_name, cdf->cdf_global, &is_fuzzy)) {
2371                         if (is_fuzzy) {
2372                                 if (primary) {
2373                                         fuzzy = cdf;
2374                                 }
2375                                 continue;
2376                         } else {
2377                                 return (cdf);
2378                         }
2379                 }
2380         }
2381 
2382         return (fuzzy);
2383 }
2384 
2385 static ctf_dwvar_t *
2386 ctf_dwarf_match_var(ctf_cu_t *cup, const char *file, const char *name,
2387     uint_t bind, boolean_t primary)
2388 {
2389         ctf_dwvar_t *cdv, *fuzzy = NULL;
2390 
2391         if (bind == STB_WEAK)
2392                 return (NULL);
2393 
2394         if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2395                 return (NULL);
2396 
2397         for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL;
2398             cdv = ctf_list_next(cdv)) {
2399                 boolean_t is_fuzzy = B_FALSE;
2400 
2401                 if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2402                     cdv->cdv_name, cdv->cdv_global, &is_fuzzy)) {
2403                         if (is_fuzzy) {
2404                                 if (primary) {
2405                                         fuzzy = cdv;
2406                                 }
2407                         } else {
2408                                 return (cdv);
2409                         }
2410                 }
2411         }
2412 
2413         return (fuzzy);
2414 }
2415 
2416 static int
2417 ctf_dwarf_conv_funcvars_cb(const Elf64_Sym *symp, ulong_t idx,
2418     const char *file, const char *name, boolean_t primary, void *arg)
2419 {
2420         int ret;
2421         uint_t bind, type;
2422         ctf_cu_t *cup = arg;
2423 
2424         bind = GELF_ST_BIND(symp->st_info);
2425         type = GELF_ST_TYPE(symp->st_info);
2426 
2427         /*
2428          * Come back to weak symbols in another pass
2429          */
2430         if (bind == STB_WEAK)
2431                 return (0);
2432 
2433         if (type == STT_OBJECT) {
2434                 ctf_dwvar_t *cdv = ctf_dwarf_match_var(cup, file, name,
2435                     bind, primary);
2436                 if (cdv == NULL)
2437                         return (0);
2438                 ret = ctf_add_object(cup->cu_ctfp, idx, cdv->cdv_type);
2439                 ctf_dprintf("added object %s->%ld\n", name, cdv->cdv_type);
2440         } else {
2441                 ctf_dwfunc_t *cdf = ctf_dwarf_match_func(cup, file, name,
2442                     bind, primary);
2443                 if (cdf == NULL)
2444                         return (0);
2445                 ret = ctf_add_function(cup->cu_ctfp, idx, &cdf->cdf_fip,
2446                     cdf->cdf_argv);
2447                 ctf_dprintf("added function %s\n", name);
2448         }
2449 
2450         if (ret == CTF_ERR) {
2451                 return (ctf_errno(cup->cu_ctfp));
2452         }
2453 
2454         return (0);
2455 }
2456 
2457 static int
2458 ctf_dwarf_conv_funcvars(ctf_cu_t *cup)
2459 {
2460         return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_funcvars_cb, cup));
2461 }
2462 
2463 /*
2464  * If we have a weak symbol, attempt to find the strong symbol it will resolve
2465  * to.  Note: the code where this actually happens is in sym_process() in
2466  * cmd/sgs/libld/common/syms.c
2467  *
2468  * Finding the matching symbol is unfortunately not trivial.  For a symbol to be
2469  * a candidate, it must:
2470  *
2471  * - have the same type (function, object)
2472  * - have the same value (address)
2473  * - have the same size
2474  * - not be another weak symbol
2475  * - belong to the same section (checked via section index)
2476  *
2477  * To perform this check, we first iterate over the symbol table. For each weak
2478  * symbol that we encounter, we then do a second walk over the symbol table,
2479  * calling ctf_dwarf_conv_check_weak(). If a symbol matches the above, then it's
2480  * either a local or global symbol. If we find a global symbol then we go with
2481  * it and stop searching for additional matches.
2482  *
2483  * If instead, we find a local symbol, things are more complicated. The first
2484  * thing we do is to try and see if we have file information about both symbols
2485  * (STT_FILE). If they both have file information and it matches, then we treat
2486  * that as a good match and stop searching for additional matches.
2487  *
2488  * Otherwise, this means we have a non-matching file and a local symbol. We
2489  * treat this as a candidate and if we find a better match (one of the two cases
2490  * above), use that instead. There are two different ways this can happen.
2491  * Either this is a completely different symbol, or it's a once-global symbol
2492  * that was scoped to local via a mapfile.  In the former case, curfile is
2493  * likely inaccurate since the linker does not preserve the needed curfile in
2494  * the order of the symbol table (see the comments about locally scoped symbols
2495  * in libld's update_osym()).  As we can't tell this case from the former one,
2496  * we use this symbol iff no other matching symbol is found.
2497  *
2498  * What we really need here is a SUNW section containing weak<->strong mappings
2499  * that we can consume.
2500  */
2501 typedef struct ctf_dwarf_weak_arg {
2502         const Elf64_Sym *cweak_symp;
2503         const char *cweak_file;
2504         boolean_t cweak_candidate;
2505         ulong_t cweak_idx;
2506 } ctf_dwarf_weak_arg_t;
2507 
2508 static int
2509 ctf_dwarf_conv_check_weak(const Elf64_Sym *symp, ulong_t idx, const char *file,
2510     const char *name, boolean_t primary, void *arg)
2511 {
2512         ctf_dwarf_weak_arg_t *cweak = arg;
2513 
2514         const Elf64_Sym *wsymp = cweak->cweak_symp;
2515 
2516         ctf_dprintf("comparing weak to %s\n", name);
2517 
2518         if (GELF_ST_BIND(symp->st_info) == STB_WEAK) {
2519                 return (0);
2520         }
2521 
2522         if (GELF_ST_TYPE(wsymp->st_info) != GELF_ST_TYPE(symp->st_info)) {
2523                 return (0);
2524         }
2525 
2526         if (wsymp->st_value != symp->st_value) {
2527                 return (0);
2528         }
2529 
2530         if (wsymp->st_size != symp->st_size) {
2531                 return (0);
2532         }
2533 
2534         if (wsymp->st_shndx != symp->st_shndx) {
2535                 return (0);
2536         }
2537 
2538         /*
2539          * Check if it's a weak candidate.
2540          */
2541         if (GELF_ST_BIND(symp->st_info) == STB_LOCAL &&
2542             (file == NULL || cweak->cweak_file == NULL ||
2543             strcmp(file, cweak->cweak_file) != 0)) {
2544                 cweak->cweak_candidate = B_TRUE;
2545                 cweak->cweak_idx = idx;
2546                 return (0);
2547         }
2548 
2549         /*
2550          * Found a match, break.
2551          */
2552         cweak->cweak_idx = idx;
2553         return (1);
2554 }
2555 
2556 static int
2557 ctf_dwarf_duplicate_sym(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx)
2558 {
2559         ctf_id_t id = ctf_lookup_by_symbol(cup->cu_ctfp, matchidx);
2560 
2561         /*
2562          * If we matched something that for some reason didn't have type data,
2563          * we don't consider that a fatal error and silently swallow it.
2564          */
2565         if (id == CTF_ERR) {
2566                 if (ctf_errno(cup->cu_ctfp) == ECTF_NOTYPEDAT)
2567                         return (0);
2568                 else
2569                         return (ctf_errno(cup->cu_ctfp));
2570         }
2571 
2572         if (ctf_add_object(cup->cu_ctfp, idx, id) == CTF_ERR)
2573                 return (ctf_errno(cup->cu_ctfp));
2574 
2575         return (0);
2576 }
2577 
2578 static int
2579 ctf_dwarf_duplicate_func(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx)
2580 {
2581         int ret;
2582         ctf_funcinfo_t fip;
2583         ctf_id_t *args = NULL;
2584 
2585         if (ctf_func_info(cup->cu_ctfp, matchidx, &fip) == CTF_ERR) {
2586                 if (ctf_errno(cup->cu_ctfp) == ECTF_NOFUNCDAT)
2587                         return (0);
2588                 else
2589                         return (ctf_errno(cup->cu_ctfp));
2590         }
2591 
2592         if (fip.ctc_argc != 0) {
2593                 args = ctf_alloc(sizeof (ctf_id_t) * fip.ctc_argc);
2594                 if (args == NULL)
2595                         return (ENOMEM);
2596 
2597                 if (ctf_func_args(cup->cu_ctfp, matchidx, fip.ctc_argc, args) ==
2598                     CTF_ERR) {
2599                         ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2600                         return (ctf_errno(cup->cu_ctfp));
2601                 }
2602         }
2603 
2604         ret = ctf_add_function(cup->cu_ctfp, idx, &fip, args);
2605         if (args != NULL)
2606                 ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2607         if (ret == CTF_ERR)
2608                 return (ctf_errno(cup->cu_ctfp));
2609 
2610         return (0);
2611 }
2612 
2613 static int
2614 ctf_dwarf_conv_weaks_cb(const Elf64_Sym *symp, ulong_t idx, const char *file,
2615     const char *name, boolean_t primary, void *arg)
2616 {
2617         int ret, type;
2618         ctf_dwarf_weak_arg_t cweak;
2619         ctf_cu_t *cup = arg;
2620 
2621         /*
2622          * We only care about weak symbols.
2623          */
2624         if (GELF_ST_BIND(symp->st_info) != STB_WEAK)
2625                 return (0);
2626 
2627         type = GELF_ST_TYPE(symp->st_info);
2628         ASSERT(type == STT_OBJECT || type == STT_FUNC);
2629 
2630         /*
2631          * For each weak symbol we encounter, we need to do a second iteration
2632          * to try and find a match. We should probably think about other
2633          * techniques to try and save us time in the future.
2634          */
2635         cweak.cweak_symp = symp;
2636         cweak.cweak_file = file;
2637         cweak.cweak_candidate = B_FALSE;
2638         cweak.cweak_idx = 0;
2639 
2640         ctf_dprintf("Trying to find weak equiv for %s\n", name);
2641 
2642         ret = ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_check_weak, &cweak);
2643         VERIFY(ret == 0 || ret == 1);
2644 
2645         /*
2646          * Nothing was ever found, we're not going to add anything for this
2647          * entry.
2648          */
2649         if (ret == 0 && cweak.cweak_candidate == B_FALSE) {
2650                 ctf_dprintf("found no weak match for %s\n", name);
2651                 return (0);
2652         }
2653 
2654         /*
2655          * Now, finally go and add the type based on the match.
2656          */
2657         ctf_dprintf("matched weak symbol %lu to %lu\n", idx, cweak.cweak_idx);
2658         if (type == STT_OBJECT) {
2659                 ret = ctf_dwarf_duplicate_sym(cup, idx, cweak.cweak_idx);
2660         } else {
2661                 ret = ctf_dwarf_duplicate_func(cup, idx, cweak.cweak_idx);
2662         }
2663 
2664         return (ret);
2665 }
2666 
2667 static int
2668 ctf_dwarf_conv_weaks(ctf_cu_t *cup)
2669 {
2670         return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_weaks_cb, cup));
2671 }
2672 
2673 /* ARGSUSED */
2674 static int
2675 ctf_dwarf_convert_one(void *arg, void *unused)
2676 {
2677         int ret;
2678         ctf_file_t *dedup;
2679         ctf_cu_t *cup = arg;
2680 
2681         ctf_dprintf("converting die: %s\n", cup->cu_name);
2682         ctf_dprintf("max offset: %x\n", cup->cu_maxoff);
2683         VERIFY(cup != NULL);
2684 
2685         ret = ctf_dwarf_convert_die(cup, cup->cu_cu);
2686         ctf_dprintf("ctf_dwarf_convert_die (%s) returned %d\n", cup->cu_name,
2687             ret);
2688         if (ret != 0) {
2689                 return (ret);
2690         }
2691         if (ctf_update(cup->cu_ctfp) != 0) {
2692                 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2693                     "failed to update output ctf container"));
2694         }
2695 
2696         ret = ctf_dwarf_fixup_die(cup, B_FALSE);
2697         ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cup->cu_name,
2698             ret);
2699         if (ret != 0) {
2700                 return (ret);
2701         }
2702         if (ctf_update(cup->cu_ctfp) != 0) {
2703                 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2704                     "failed to update output ctf container"));
2705         }
2706 
2707         ret = ctf_dwarf_fixup_die(cup, B_TRUE);
2708         ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cup->cu_name,
2709             ret);
2710         if (ret != 0) {
2711                 return (ret);
2712         }
2713         if (ctf_update(cup->cu_ctfp) != 0) {
2714                 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2715                     "failed to update output ctf container"));
2716         }
2717 
2718 
2719         if ((ret = ctf_dwarf_conv_funcvars(cup)) != 0) {
2720                 return (ctf_dwarf_error(cup, NULL, ret,
2721                     "failed to convert strong functions and variables"));
2722         }
2723 
2724         if (ctf_update(cup->cu_ctfp) != 0) {
2725                 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2726                     "failed to update output ctf container"));
2727         }
2728 
2729         if (cup->cu_doweaks == B_TRUE) {
2730                 if ((ret = ctf_dwarf_conv_weaks(cup)) != 0) {
2731                         return (ctf_dwarf_error(cup, NULL, ret,
2732                             "failed to convert weak functions and variables"));
2733                 }
2734 
2735                 if (ctf_update(cup->cu_ctfp) != 0) {
2736                         return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2737                             "failed to update output ctf container"));
2738                 }
2739         }
2740 
2741         ctf_phase_dump(cup->cu_ctfp, "pre-dwarf-dedup", cup->cu_name);
2742         ctf_dprintf("adding inputs for dedup\n");
2743         if ((ret = ctf_merge_add(cup->cu_cmh, cup->cu_ctfp)) != 0) {
2744                 return (ctf_dwarf_error(cup, NULL, ret,
2745                     "failed to add inputs for merge"));
2746         }
2747 
2748         ctf_dprintf("starting dedup of %s\n", cup->cu_name);
2749         if ((ret = ctf_merge_dedup(cup->cu_cmh, &dedup)) != 0) {
2750                 return (ctf_dwarf_error(cup, NULL, ret,
2751                     "failed to deduplicate die"));
2752         }
2753         ctf_close(cup->cu_ctfp);
2754         cup->cu_ctfp = dedup;
2755         ctf_phase_dump(cup->cu_ctfp, "post-dwarf-dedup", cup->cu_name);
2756 
2757         return (0);
2758 }
2759 
2760 /*
2761  * Note, we expect that if we're returning a ctf_file_t from one of the dies,
2762  * say in the single node case, it's been saved and the entry here has been set
2763  * to NULL, which ctf_close happily ignores.
2764  */
2765 static void
2766 ctf_dwarf_free_die(ctf_cu_t *cup)
2767 {
2768         ctf_dwfunc_t *cdf, *ndf;
2769         ctf_dwvar_t *cdv, *ndv;
2770         ctf_dwbitf_t *cdb, *ndb;
2771         ctf_dwmap_t *map;
2772         void *cookie;
2773         Dwarf_Error derr;
2774 
2775         ctf_dprintf("Beginning to free die: %p\n", cup);
2776         cup->cu_elf = NULL;
2777         ctf_dprintf("Trying to free name: %p\n", cup->cu_name);
2778         if (cup->cu_name != NULL)
2779                 ctf_free(cup->cu_name, strlen(cup->cu_name) + 1);
2780         ctf_dprintf("Trying to free merge handle: %p\n", cup->cu_cmh);
2781         if (cup->cu_cmh != NULL) {
2782                 ctf_merge_fini(cup->cu_cmh);
2783                 cup->cu_cmh = NULL;
2784         }
2785 
2786         ctf_dprintf("Trying to free functions\n");
2787         for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL; cdf = ndf) {
2788                 ndf = ctf_list_next(cdf);
2789                 ctf_free(cdf->cdf_name, strlen(cdf->cdf_name) + 1);
2790                 if (cdf->cdf_fip.ctc_argc != 0) {
2791                         ctf_free(cdf->cdf_argv,
2792                             sizeof (ctf_id_t) * cdf->cdf_fip.ctc_argc);
2793                 }
2794                 ctf_free(cdf, sizeof (ctf_dwfunc_t));
2795         }
2796 
2797         ctf_dprintf("Trying to free variables\n");
2798         for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL; cdv = ndv) {
2799                 ndv = ctf_list_next(cdv);
2800                 ctf_free(cdv->cdv_name, strlen(cdv->cdv_name) + 1);
2801                 ctf_free(cdv, sizeof (ctf_dwvar_t));
2802         }
2803 
2804         ctf_dprintf("Trying to free bitfields\n");
2805         for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL; cdb = ndb) {
2806                 ndb = ctf_list_next(cdb);
2807                 ctf_free(cdb, sizeof (ctf_dwbitf_t));
2808         }
2809 
2810         ctf_dprintf("Trying to clean up dwarf_t: %p\n", cup->cu_dwarf);
2811         if (cup->cu_dwarf != NULL)
2812                 (void) dwarf_finish(cup->cu_dwarf, &derr);
2813         cup->cu_dwarf = NULL;
2814         ctf_close(cup->cu_ctfp);
2815 
2816         cookie = NULL;
2817         while ((map = avl_destroy_nodes(&cup->cu_map, &cookie)) != NULL) {
2818                 ctf_free(map, sizeof (ctf_dwmap_t));
2819         }
2820         avl_destroy(&cup->cu_map);
2821         cup->cu_errbuf = NULL;
2822 }
2823 
2824 static void
2825 ctf_dwarf_free_dies(ctf_cu_t *cdies, int ndies)
2826 {
2827         int i;
2828 
2829         ctf_dprintf("Beginning to free dies\n");
2830         for (i = 0; i < ndies; i++) {
2831                 ctf_dwarf_free_die(&cdies[i]);
2832         }
2833 
2834         ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
2835 }
2836 
2837 static int
2838 ctf_dwarf_count_dies(Dwarf_Debug dw, Dwarf_Error *derr, int *ndies,
2839     char *errbuf, size_t errlen)
2840 {
2841         int ret;
2842         Dwarf_Half vers;
2843         Dwarf_Unsigned nexthdr;
2844 
2845         while ((ret = dwarf_next_cu_header(dw, NULL, &vers, NULL, NULL,
2846             &nexthdr, derr)) != DW_DLV_NO_ENTRY) {
2847                 if (ret != DW_DLV_OK) {
2848                         (void) snprintf(errbuf, errlen,
2849                             "file does not contain valid DWARF data: %s\n",
2850                             dwarf_errmsg(*derr));
2851                         return (ECTF_CONVBKERR);
2852                 }
2853 
2854                 if (vers != DWARF_VERSION_TWO) {
2855                         (void) snprintf(errbuf, errlen,
2856                             "unsupported DWARF version: %d\n", vers);
2857                         return (ECTF_CONVBKERR);
2858                 }
2859                 *ndies = *ndies + 1;
2860         }
2861 
2862         return (0);
2863 }
2864 
2865 static int
2866 ctf_dwarf_init_die(int fd, Elf *elf, ctf_cu_t *cup, int ndie, char *errbuf,
2867     size_t errlen)
2868 {
2869         int ret;
2870         Dwarf_Unsigned hdrlen, abboff, nexthdr;
2871         Dwarf_Half addrsz;
2872         Dwarf_Unsigned offset = 0;
2873         Dwarf_Error derr;
2874 
2875         while ((ret = dwarf_next_cu_header(cup->cu_dwarf, &hdrlen, NULL,
2876             &abboff, &addrsz, &nexthdr, &derr)) != DW_DLV_NO_ENTRY) {
2877                 char *name;
2878                 Dwarf_Die cu, child;
2879 
2880                 /* Based on the counting above, we should be good to go */
2881                 VERIFY(ret == DW_DLV_OK);
2882                 if (ndie > 0) {
2883                         ndie--;
2884                         offset = nexthdr;
2885                         continue;
2886                 }
2887 
2888                 /*
2889                  * Compilers are apparently inconsistent. Some emit no DWARF for
2890                  * empty files and others emit empty compilation unit.
2891                  */
2892                 cup->cu_voidtid = CTF_ERR;
2893                 cup->cu_longtid = CTF_ERR;
2894                 cup->cu_elf = elf;
2895                 cup->cu_maxoff = nexthdr - 1;
2896                 cup->cu_ctfp = ctf_fdcreate(fd, &ret);
2897                 if (cup->cu_ctfp == NULL)
2898                         return (ret);
2899 
2900                 avl_create(&cup->cu_map, ctf_dwmap_comp, sizeof (ctf_dwmap_t),
2901                     offsetof(ctf_dwmap_t, cdm_avl));
2902                 cup->cu_errbuf = errbuf;
2903                 cup->cu_errlen = errlen;
2904                 bzero(&cup->cu_vars, sizeof (ctf_list_t));
2905                 bzero(&cup->cu_funcs, sizeof (ctf_list_t));
2906                 bzero(&cup->cu_bitfields, sizeof (ctf_list_t));
2907 
2908                 if ((ret = ctf_dwarf_die_elfenc(elf, cup, errbuf,
2909                     errlen)) != 0)
2910                         return (ret);
2911 
2912                 if ((ret = ctf_dwarf_sib(cup, NULL, &cu)) != 0)
2913                         return (ret);
2914 
2915                 if (cu == NULL) {
2916                         (void) snprintf(errbuf, errlen,
2917                             "file does not contain DWARF data");
2918                         return (ECTF_CONVNODEBUG);
2919                 }
2920 
2921                 if ((ret = ctf_dwarf_child(cup, cu, &child)) != 0)
2922                         return (ret);
2923 
2924                 if (child == NULL) {
2925                         (void) snprintf(errbuf, errlen,
2926                             "file does not contain DWARF data");
2927                         return (ECTF_CONVNODEBUG);
2928                 }
2929 
2930                 cup->cu_cuoff = offset;
2931                 cup->cu_cu = child;
2932 
2933                 if ((cup->cu_cmh = ctf_merge_init(fd, &ret)) == NULL)
2934                         return (ret);
2935 
2936                 if (ctf_dwarf_string(cup, cu, DW_AT_name, &name) == 0) {
2937                         size_t len = strlen(name) + 1;
2938                         char *b = basename(name);
2939                         cup->cu_name = strdup(b);
2940                         ctf_free(name, len);
2941                 }
2942                 break;
2943         }
2944 
2945         return (0);
2946 }
2947 
2948 /*
2949  * This is our only recourse to identify a C source file that is missing debug
2950  * info: it will be mentioned as an STT_FILE, but not have a compile unit entry.
2951  * (A traditional ctfmerge works on individual files, so can identify missing
2952  * DWARF more directly, via ctf_has_c_source() on the .o file.)
2953  *
2954  * As we operate on basenames, this can of course miss some cases, but it's
2955  * better than not checking at all.
2956  *
2957  * We explicitly whitelist some CRT components.  Failing that, there's always
2958  * the -m option.
2959  */
2960 static boolean_t
2961 c_source_has_debug(const char *file, ctf_cu_t *cus, size_t nr_cus)
2962 {
2963         const char *basename = strrchr(file, '/');
2964 
2965         if (basename == NULL)
2966                 basename = file;
2967         else
2968                 basename++;
2969 
2970         if (strcmp(basename, "common-crt.c") == 0 ||
2971             strcmp(basename, "gmon.c") == 0 ||
2972             strcmp(basename, "dlink_init.c") == 0 ||
2973             strcmp(basename, "dlink_common.c") == 0 ||
2974             strncmp(basename, "crt", strlen("crt")) == 0 ||
2975             strncmp(basename, "values-", strlen("values-")) == 0)
2976                 return (B_TRUE);
2977 
2978         for (size_t i = 0; i < nr_cus; i++) {
2979                 if (strcmp(basename, cus[i].cu_name) == 0)
2980                         return (B_TRUE);
2981         }
2982 
2983         return (B_FALSE);
2984 }
2985 
2986 static int
2987 ctf_dwarf_check_missing(ctf_cu_t *cus, size_t nr_cus, Elf *elf,
2988     char *errmsg, size_t errlen)
2989 {
2990         Elf_Scn *scn, *strscn;
2991         Elf_Data *data, *strdata;
2992         GElf_Shdr shdr;
2993         ulong_t i;
2994 
2995         scn = NULL;
2996         while ((scn = elf_nextscn(elf, scn)) != NULL) {
2997                 if (gelf_getshdr(scn, &shdr) == NULL) {
2998                         (void) snprintf(errmsg, errlen,
2999                             "failed to get section header: %s\n",
3000                             elf_errmsg(elf_errno()));
3001                         return (EINVAL);
3002                 }
3003 
3004                 if (shdr.sh_type == SHT_SYMTAB)
3005                         break;
3006         }
3007 
3008         if (scn == NULL)
3009                 return (0);
3010 
3011         if ((strscn = elf_getscn(elf, shdr.sh_link)) == NULL) {
3012                 (void) snprintf(errmsg, errlen,
3013                     "failed to get str section: %s\n",
3014                     elf_errmsg(elf_errno()));
3015                 return (EINVAL);
3016         }
3017 
3018         if ((data = elf_getdata(scn, NULL)) == NULL) {
3019                 (void) snprintf(errmsg, errlen, "failed to read section: %s\n",
3020                     elf_errmsg(elf_errno()));
3021                 return (EINVAL);
3022         }
3023 
3024         if ((strdata = elf_getdata(strscn, NULL)) == NULL) {
3025                 (void) snprintf(errmsg, errlen,
3026                     "failed to read string table: %s\n",
3027                     elf_errmsg(elf_errno()));
3028                 return (EINVAL);
3029         }
3030 
3031         for (i = 0; i < shdr.sh_size / shdr.sh_entsize; i++) {
3032                 GElf_Sym sym;
3033                 const char *file;
3034                 size_t len;
3035 
3036                 if (gelf_getsym(data, i, &sym) == NULL) {
3037                         (void) snprintf(errmsg, errlen,
3038                             "failed to read sym %lu: %s\n",
3039                             i, elf_errmsg(elf_errno()));
3040                         return (EINVAL);
3041                 }
3042 
3043                 if (GELF_ST_TYPE(sym.st_info) != STT_FILE)
3044                         continue;
3045 
3046                 file = (const char *)((uintptr_t)strdata->d_buf + sym.st_name);
3047                 len = strlen(file);
3048                 if (len < 2 || strncmp(".c", &file[len - 2], 2) != 0)
3049                         continue;
3050 
3051                 if (!c_source_has_debug(file, cus, nr_cus)) {
3052                         (void) snprintf(errmsg, errlen,
3053                             "file %s is missing debug info\n", file);
3054                         return (ECTF_CONVNODEBUG);
3055                 }
3056         }
3057 
3058         return (0);
3059 }
3060 
3061 int
3062 ctf_dwarf_convert(int fd, Elf *elf, uint_t nthrs, uint_t flags,
3063     ctf_file_t **fpp, char *errbuf, size_t errlen)
3064 {
3065         int err, ret, ndies, i;
3066         Dwarf_Debug dw;
3067         Dwarf_Error derr;
3068         ctf_cu_t *cdies = NULL, *cup;
3069         workq_t *wqp = NULL;
3070 
3071         *fpp = NULL;
3072 
3073         ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL, &dw, &derr);
3074         if (ret != DW_DLV_OK) {
3075                 if (ret == DW_DLV_NO_ENTRY ||
3076                     dwarf_errno(derr) == DW_DLE_DEBUG_INFO_NULL) {
3077                         (void) snprintf(errbuf, errlen,
3078                             "file does not contain DWARF data\n");
3079                         return (ECTF_CONVNODEBUG);
3080                 }
3081 
3082                 (void) snprintf(errbuf, errlen,
3083                     "dwarf_elf_init() failed: %s\n", dwarf_errmsg(derr));
3084                 return (ECTF_CONVBKERR);
3085         }
3086 
3087         /*
3088          * Iterate over all of the compilation units and create a ctf_cu_t for
3089          * each of them.  This is used to determine if we have zero, one, or
3090          * multiple dies to convert. If we have zero, that's an error. If
3091          * there's only one die, that's the simple case.  No merge needed and
3092          * only a single Dwarf_Debug as well.
3093          */
3094         ndies = 0;
3095         err = ctf_dwarf_count_dies(dw, &derr, &ndies, errbuf, errlen);
3096 
3097         ctf_dprintf("found %d DWARF CUs\n", ndies);
3098 
3099         if (ndies == 0) {
3100                 (void) snprintf(errbuf, errlen,
3101                     "file does not contain DWARF data\n");
3102                 return (ECTF_CONVNODEBUG);
3103         }
3104 
3105         (void) dwarf_finish(dw, &derr);
3106         cdies = ctf_alloc(sizeof (ctf_cu_t) * ndies);
3107         if (cdies == NULL) {
3108                 return (ENOMEM);
3109         }
3110 
3111         bzero(cdies, sizeof (ctf_cu_t) * ndies);
3112 
3113         for (i = 0; i < ndies; i++) {
3114                 cup = &cdies[i];
3115                 ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL,
3116                     &cup->cu_dwarf, &derr);
3117                 if (ret != 0) {
3118                         ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
3119                         (void) snprintf(errbuf, errlen,
3120                             "failed to initialize DWARF: %s\n",
3121                             dwarf_errmsg(derr));
3122                         return (ECTF_CONVBKERR);
3123                 }
3124 
3125                 err = ctf_dwarf_init_die(fd, elf, cup, i, errbuf, errlen);
3126                 if (err != 0)
3127                         goto out;
3128 
3129                 cup->cu_doweaks = ndies > 1 ? B_FALSE : B_TRUE;
3130         }
3131 
3132         if (!(flags & CTF_ALLOW_MISSING_DEBUG) &&
3133             (err = ctf_dwarf_check_missing(cdies, ndies,
3134             elf, errbuf, errlen)) != 0)
3135                 goto out;
3136 
3137         /*
3138          * If we only have one compilation unit, there's no reason to use
3139          * multiple threads, even if the user requested them. After all, they
3140          * just gave us an upper bound.
3141          */
3142         if (ndies == 1)
3143                 nthrs = 1;
3144 
3145         if (workq_init(&wqp, nthrs) == -1) {
3146                 err = errno;
3147                 goto out;
3148         }
3149 
3150         for (i = 0; i < ndies; i++) {
3151                 cup = &cdies[i];
3152                 ctf_dprintf("adding cu %s: %p, %x %x\n", cup->cu_name,
3153                     cup->cu_cu, cup->cu_cuoff, cup->cu_maxoff);
3154                 if (workq_add(wqp, cup) == -1) {
3155                         err = errno;
3156                         goto out;
3157                 }
3158         }
3159 
3160         ret = workq_work(wqp, ctf_dwarf_convert_one, NULL, &err);
3161         if (ret == WORKQ_ERROR) {
3162                 err = errno;
3163                 goto out;
3164         } else if (ret == WORKQ_UERROR) {
3165                 ctf_dprintf("internal convert failed: %s\n",
3166                     ctf_errmsg(err));
3167                 goto out;
3168         }
3169 
3170         ctf_dprintf("Determining next phase: have %d CUs\n", ndies);
3171         if (ndies != 1) {
3172                 ctf_merge_t *cmp;
3173 
3174                 cmp = ctf_merge_init(fd, &err);
3175                 if (cmp == NULL)
3176                         goto out;
3177 
3178                 ctf_dprintf("setting threads\n");
3179                 if ((err = ctf_merge_set_nthreads(cmp, nthrs)) != 0) {
3180                         ctf_merge_fini(cmp);
3181                         goto out;
3182                 }
3183 
3184                 for (i = 0; i < ndies; i++) {
3185                         cup = &cdies[i];
3186                         if ((err = ctf_merge_add(cmp, cup->cu_ctfp)) != 0) {
3187                                 ctf_merge_fini(cmp);
3188                                 goto out;
3189                         }
3190                 }
3191 
3192                 ctf_dprintf("performing merge\n");
3193                 err = ctf_merge_merge(cmp, fpp);
3194                 if (err != 0) {
3195                         ctf_dprintf("failed merge!\n");
3196                         *fpp = NULL;
3197                         ctf_merge_fini(cmp);
3198                         goto out;
3199                 }
3200                 ctf_merge_fini(cmp);
3201                 err = 0;
3202                 ctf_dprintf("successfully converted!\n");
3203         } else {
3204                 err = 0;
3205                 *fpp = cdies->cu_ctfp;
3206                 cdies->cu_ctfp = NULL;
3207                 ctf_dprintf("successfully converted!\n");
3208         }
3209 
3210 out:
3211         workq_fini(wqp);
3212         ctf_dwarf_free_dies(cdies, ndies);
3213         return (err);
3214 }