Print this page
10812 ctf tools shouldn't add blank labels
10813 ctf symbol mapping needs work
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/lib/libctf/common/ctf_merge.c
          +++ new/usr/src/lib/libctf/common/ctf_merge.c
↓ open down ↓ 2 lines elided ↑ open up ↑
   3    3   * Common Development and Distribution License ("CDDL"), version 1.0.
   4    4   * You may only use this file in accordance with the terms of version
   5    5   * 1.0 of the CDDL.
   6    6   *
   7    7   * A full copy of the text of the CDDL should have accompanied this
   8    8   * source.  A copy of the CDDL is also available via the Internet at
   9    9   * http://www.illumos.org/license/CDDL.
  10   10   */
  11   11  
  12   12  /*
  13      - * Copyright (c) 2015 Joyent, Inc.
       13 + * Copyright (c) 2019 Joyent, Inc.
  14   14   */
  15   15  
  16   16  /*
  17   17   * To perform a merge of two CTF containers, we first diff the two containers
  18   18   * types. For every type that's in the src container, but not in the dst
  19   19   * container, we note it and add it to dst container. If there are any objects
  20   20   * or functions associated with src, we go through and update the types that
  21   21   * they refer to such that they all refer to types in the dst container.
  22   22   *
  23   23   * The bulk of the logic for the merge, after we've run the diff, occurs in
↓ open down ↓ 29 lines elided ↑ open up ↑
  53   53          ctf_file_t *cm_out;             /* Output CTF file */
  54   54          ctf_file_t *cm_src;             /* Input CTF file */
  55   55          ctf_merge_tinfo_t *cm_tmap;     /* Type state information */
  56   56          boolean_t cm_dedup;             /* Are we doing a dedup? */
  57   57          boolean_t cm_unique;            /* are we doing a uniquify? */
  58   58  } ctf_merge_types_t;
  59   59  
  60   60  typedef struct ctf_merge_objmap {
  61   61          list_node_t cmo_node;
  62   62          const char *cmo_name;           /* Symbol name */
       63 +        const char *cmo_file;           /* Symbol file */
  63   64          ulong_t cmo_idx;                /* Symbol ID */
       65 +        Elf64_Sym cmo_sym;              /* Symbol Entry */
  64   66          ctf_id_t cmo_tid;               /* Type ID */
  65   67  } ctf_merge_objmap_t;
  66   68  
  67   69  typedef struct ctf_merge_funcmap {
  68   70          list_node_t cmf_node;
  69   71          const char *cmf_name;           /* Symbol name */
       72 +        const char *cmf_file;           /* Symbol file */
  70   73          ulong_t cmf_idx;                /* Symbol ID */
       74 +        Elf64_Sym cmf_sym;              /* Symbol Entry */
  71   75          ctf_id_t cmf_rtid;              /* Type ID */
  72   76          uint_t cmf_flags;               /* ctf_funcinfo_t ctc_flags */
  73   77          uint_t cmf_argc;                /* Number of arguments */
  74   78          ctf_id_t cmf_args[];            /* Types of arguments */
  75   79  } ctf_merge_funcmap_t;
  76   80  
  77   81  typedef struct ctf_merge_input {
  78   82          list_node_t cmi_node;
  79   83          ctf_file_t *cmi_input;
  80   84          list_t cmi_omap;
↓ open down ↓ 6 lines elided ↑ open up ↑
  87   91          uint_t cmh_ninputs;             /* Number of inputs */
  88   92          uint_t cmh_nthreads;            /* Number of threads to use */
  89   93          ctf_file_t *cmh_unique;         /* ctf to uniquify against */
  90   94          boolean_t cmh_msyms;            /* Should we merge symbols/funcs? */
  91   95          int cmh_ofd;                    /* FD for output file */
  92   96          int cmh_flags;                  /* Flags that control merge behavior */
  93   97          char *cmh_label;                /* Optional label */
  94   98          char *cmh_pname;                /* Parent name */
  95   99  };
  96  100  
      101 +typedef struct ctf_merge_symbol_arg {
      102 +        list_t *cmsa_objmap;
      103 +        list_t *cmsa_funcmap;
      104 +        ctf_file_t *cmsa_out;
      105 +        boolean_t cmsa_dedup;
      106 +} ctf_merge_symbol_arg_t;
      107 +
  97  108  static int ctf_merge_add_type(ctf_merge_types_t *, ctf_id_t);
  98  109  
  99  110  static ctf_id_t
 100  111  ctf_merge_gettype(ctf_merge_types_t *cmp, ctf_id_t id)
 101  112  {
 102  113          if (cmp->cm_dedup == B_FALSE) {
 103  114                  VERIFY(cmp->cm_tmap[id].cmt_map != 0);
 104  115                  return (cmp->cm_tmap[id].cmt_map);
 105  116          }
 106  117  
↓ open down ↓ 547 lines elided ↑ open up ↑
 654  665   *
 655  666   * Importantly, we *must* call ctf_update between the second and third pass,
 656  667   * otherwise several of the libctf functions will not properly find the data in
 657  668   * the container. If we're doing a dedup we also fix up the type mapping.
 658  669   */
 659  670  static int
 660  671  ctf_merge_common(ctf_merge_types_t *cmp)
 661  672  {
 662  673          int ret, i;
 663  674  
 664      -        ctf_phase_dump(cmp->cm_src, "merge-common-src");
 665      -        ctf_phase_dump(cmp->cm_out, "merge-common-dest");
      675 +        ctf_phase_dump(cmp->cm_src, "merge-common-src", NULL);
      676 +        ctf_phase_dump(cmp->cm_out, "merge-common-dest", NULL);
 666  677  
 667  678          /* Pass 1 */
 668  679          for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
 669  680                  if (cmp->cm_tmap[i].cmt_forward == B_TRUE) {
 670  681                          ret = ctf_merge_add_sou(cmp, i, B_TRUE);
 671  682                          if (ret != 0) {
 672  683                                  return (ret);
 673  684                          }
 674  685                  }
 675  686          }
↓ open down ↓ 81 lines elided ↑ open up ↑
 757  768  }
 758  769  
 759  770  static void
 760  771  ctf_merge_types_fini(ctf_merge_types_t *cmp)
 761  772  {
 762  773          ctf_free(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
 763  774              (cmp->cm_src->ctf_typemax + 1));
 764  775  }
 765  776  
 766  777  /*
      778 + * After performing a pass, we need to go through the object and function type
      779 + * maps and potentially fix them up based on the new maps that we have.
      780 + */
      781 +static void
      782 +ctf_merge_fixup_symmaps(ctf_merge_types_t *cmp, ctf_merge_input_t *cmi)
      783 +{
      784 +        ctf_merge_objmap_t *cmo;
      785 +        ctf_merge_funcmap_t *cmf;
      786 +
      787 +        for (cmo = list_head(&cmi->cmi_omap); cmo != NULL;
      788 +            cmo = list_next(&cmi->cmi_omap, cmo)) {
      789 +                VERIFY3S(cmo->cmo_tid, !=, 0);
      790 +                VERIFY(cmp->cm_tmap[cmo->cmo_tid].cmt_map != 0);
      791 +                cmo->cmo_tid = cmp->cm_tmap[cmo->cmo_tid].cmt_map;
      792 +        }
      793 +
      794 +        for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL;
      795 +            cmf = list_next(&cmi->cmi_fmap, cmf)) {
      796 +                int i;
      797 +
      798 +                VERIFY(cmp->cm_tmap[cmf->cmf_rtid].cmt_map != 0);
      799 +                cmf->cmf_rtid = cmp->cm_tmap[cmf->cmf_rtid].cmt_map;
      800 +                for (i = 0; i < cmf->cmf_argc; i++) {
      801 +                        VERIFY(cmp->cm_tmap[cmf->cmf_args[i]].cmt_map != 0);
      802 +                        cmf->cmf_args[i] =
      803 +                            cmp->cm_tmap[cmf->cmf_args[i]].cmt_map;
      804 +                }
      805 +        }
      806 +}
      807 +
      808 +/*
 767  809   * Merge the types contained inside of two input files. The second input file is
 768  810   * always going to be the destination. We're guaranteed that it's always
 769  811   * writeable.
 770  812   */
 771  813  static int
 772  814  ctf_merge_types(void *arg, void *arg2, void **outp, void *unsued)
 773  815  {
 774  816          int ret;
 775  817          ctf_merge_types_t cm;
 776  818          ctf_diff_t *cdp;
 777      -        ctf_merge_objmap_t *cmo;
 778      -        ctf_merge_funcmap_t *cmf;
 779  819          ctf_merge_input_t *scmi = arg;
 780  820          ctf_merge_input_t *dcmi = arg2;
 781  821          ctf_file_t *out = dcmi->cmi_input;
 782  822          ctf_file_t *source = scmi->cmi_input;
 783  823  
 784  824          ctf_dprintf("merging %p->%p\n", source, out);
 785  825  
 786  826          if (!(out->ctf_flags & LCTF_RDWR))
 787  827                  return (ctf_set_errno(out, ECTF_RDONLY));
 788  828  
↓ open down ↓ 21 lines elided ↑ open up ↑
 810  850          if (ret == 0) {
 811  851                  ret = ctf_update(out);
 812  852                  ctf_dprintf("update returned with %d\n", ret);
 813  853          } else {
 814  854                  goto cleanup;
 815  855          }
 816  856  
 817  857          /*
 818  858           * Now we need to fix up the object and function maps.
 819  859           */
 820      -        for (cmo = list_head(&scmi->cmi_omap); cmo != NULL;
 821      -            cmo = list_next(&scmi->cmi_omap, cmo)) {
 822      -                if (cmo->cmo_tid == 0)
 823      -                        continue;
 824      -                VERIFY(cm.cm_tmap[cmo->cmo_tid].cmt_map != 0);
 825      -                cmo->cmo_tid = cm.cm_tmap[cmo->cmo_tid].cmt_map;
 826      -        }
      860 +        ctf_merge_fixup_symmaps(&cm, scmi);
 827  861  
 828      -        for (cmf = list_head(&scmi->cmi_fmap); cmf != NULL;
 829      -            cmf = list_next(&scmi->cmi_fmap, cmf)) {
 830      -                int i;
 831      -
 832      -                VERIFY(cm.cm_tmap[cmf->cmf_rtid].cmt_map != 0);
 833      -                cmf->cmf_rtid = cm.cm_tmap[cmf->cmf_rtid].cmt_map;
 834      -                for (i = 0; i < cmf->cmf_argc; i++) {
 835      -                        VERIFY(cm.cm_tmap[cmf->cmf_args[i]].cmt_map != 0);
 836      -                        cmf->cmf_args[i] = cm.cm_tmap[cmf->cmf_args[i]].cmt_map;
 837      -                }
 838      -        }
 839      -
 840  862          /*
 841  863           * Now that we've fixed things up, we need to give our function and
 842  864           * object maps to the destination, such that it can continue to update
 843  865           * them going forward.
 844  866           */
 845  867          list_move_tail(&dcmi->cmi_fmap, &scmi->cmi_fmap);
 846  868          list_move_tail(&dcmi->cmi_omap, &scmi->cmi_omap);
 847  869  
 848  870  cleanup:
 849  871          if (ret == 0)
 850  872                  *outp = dcmi;
 851  873          ctf_merge_types_fini(&cm);
 852  874          ctf_diff_fini(cdp);
 853  875          if (ret != 0)
 854  876                  return (ctf_errno(out));
      877 +        ctf_phase_bump();
 855  878          return (0);
 856  879  }
 857  880  
 858      -/*
 859      - * After performing a pass, we need to go through the object and function type
 860      - * maps and potentially fix them up based on the new maps that we haev.
 861      - */
 862      -static void
 863      -ctf_merge_fixup_nontypes(ctf_merge_types_t *cmp, ctf_merge_input_t *cmi)
 864      -{
 865      -        ctf_merge_objmap_t *cmo;
 866      -        ctf_merge_funcmap_t *cmf;
 867      -
 868      -        for (cmo = list_head(&cmi->cmi_omap); cmo != NULL;
 869      -            cmo = list_next(&cmi->cmi_omap, cmo)) {
 870      -                if (cmo->cmo_tid == 0)
 871      -                        continue;
 872      -                VERIFY(cmp->cm_tmap[cmo->cmo_tid].cmt_map != 0);
 873      -                cmo->cmo_tid = cmp->cm_tmap[cmo->cmo_tid].cmt_map;
 874      -        }
 875      -
 876      -        for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL;
 877      -            cmf = list_next(&cmi->cmi_fmap, cmf)) {
 878      -                int i;
 879      -
 880      -                VERIFY(cmp->cm_tmap[cmf->cmf_rtid].cmt_map != 0);
 881      -                cmf->cmf_rtid = cmp->cm_tmap[cmf->cmf_rtid].cmt_map;
 882      -                for (i = 0; i < cmf->cmf_argc; i++) {
 883      -                        VERIFY(cmp->cm_tmap[cmf->cmf_args[i]].cmt_map !=
 884      -                            0);
 885      -                        cmf->cmf_args[i] =
 886      -                            cmp->cm_tmap[cmf->cmf_args[i]].cmt_map;
 887      -                }
 888      -        }
 889      -}
 890      -
 891  881  static int
 892  882  ctf_uniquify_types(ctf_merge_t *cmh, ctf_file_t *src, ctf_file_t **outp)
 893  883  {
 894  884          int err, ret;
 895  885          ctf_file_t *out;
 896  886          ctf_merge_types_t cm;
 897  887          ctf_diff_t *cdp;
 898  888          ctf_merge_input_t *cmi;
 899  889          ctf_file_t *parent = cmh->cmh_unique;
 900  890  
↓ open down ↓ 40 lines elided ↑ open up ↑
 941  931          }
 942  932  
 943  933          if (ret != 0) {
 944  934                  ctf_merge_types_fini(&cm);
 945  935                  ctf_diff_fini(cdp);
 946  936                  return (ctf_set_errno(src, ctf_errno(cm.cm_out)));
 947  937          }
 948  938  
 949  939          for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
 950  940              cmi = list_next(&cmh->cmh_inputs, cmi)) {
 951      -                ctf_merge_fixup_nontypes(&cm, cmi);
      941 +                ctf_merge_fixup_symmaps(&cm, cmi);
 952  942          }
 953  943  
 954  944          ctf_merge_types_fini(&cm);
 955  945          ctf_diff_fini(cdp);
 956  946          *outp = out;
 957  947          return (0);
 958  948  }
 959  949  
 960  950  static void
 961  951  ctf_merge_fini_input(ctf_merge_input_t *cmi)
↓ open down ↓ 91 lines elided ↑ open up ↑
1053 1043          if (cmh->cmh_label != NULL) {
1054 1044                  size_t len = strlen(cmh->cmh_label) + 1;
1055 1045                  ctf_free(cmh->cmh_label, len);
1056 1046          }
1057 1047  
1058 1048          cmh->cmh_label = dup;
1059 1049          return (0);
1060 1050  }
1061 1051  
1062 1052  static int
1063      -ctf_merge_add_funcs_cb(const char *name, ulong_t idx, ctf_funcinfo_t *fip,
1064      -    void *arg)
     1053 +ctf_merge_add_function(ctf_merge_input_t *cmi, ctf_funcinfo_t *fip, ulong_t idx,
     1054 +    const char *file, const char *name, const Elf64_Sym *symp)
1065 1055  {
1066      -        ctf_merge_input_t *cmi = arg;
1067 1056          ctf_merge_funcmap_t *fmap;
1068 1057  
1069 1058          fmap = ctf_alloc(sizeof (ctf_merge_funcmap_t) +
1070 1059              sizeof (ctf_id_t) * fip->ctc_argc);
1071 1060          if (fmap == NULL)
1072 1061                  return (ENOMEM);
1073 1062  
1074 1063          fmap->cmf_idx = idx;
     1064 +        fmap->cmf_sym = *symp;
1075 1065          fmap->cmf_rtid = fip->ctc_return;
1076 1066          fmap->cmf_flags = fip->ctc_flags;
1077 1067          fmap->cmf_argc = fip->ctc_argc;
1078 1068          fmap->cmf_name = name;
     1069 +        if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
     1070 +                fmap->cmf_file = file;
     1071 +        } else {
     1072 +                fmap->cmf_file = NULL;
     1073 +        }
1079 1074  
1080 1075          if (ctf_func_args(cmi->cmi_input, idx, fmap->cmf_argc,
1081 1076              fmap->cmf_args) != 0) {
1082 1077                  ctf_free(fmap, sizeof (ctf_merge_funcmap_t) +
1083 1078                      sizeof (ctf_id_t) * fip->ctc_argc);
1084 1079                  return (ctf_errno(cmi->cmi_input));
1085 1080          }
1086 1081  
     1082 +        ctf_dprintf("added initial function %s, %lu, %s %u\n", name, idx,
     1083 +            fmap->cmf_file != NULL ? fmap->cmf_file : "global",
     1084 +            ELF64_ST_BIND(symp->st_info));
1087 1085          list_insert_tail(&cmi->cmi_fmap, fmap);
1088 1086          return (0);
1089 1087  }
1090 1088  
1091 1089  static int
1092      -ctf_merge_add_objs_cb(const char *name, ctf_id_t id, ulong_t idx, void *arg)
     1090 +ctf_merge_add_object(ctf_merge_input_t *cmi, ctf_id_t id, ulong_t idx,
     1091 +    const char *file, const char *name, const Elf64_Sym *symp)
1093 1092  {
1094      -        ctf_merge_input_t *cmi = arg;
1095 1093          ctf_merge_objmap_t *cmo;
1096 1094  
1097 1095          cmo = ctf_alloc(sizeof (ctf_merge_objmap_t));
1098 1096          if (cmo == NULL)
1099 1097                  return (ENOMEM);
1100 1098  
1101 1099          cmo->cmo_name = name;
     1100 +        if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
     1101 +                cmo->cmo_file = file;
     1102 +        } else {
     1103 +                cmo->cmo_file = NULL;
     1104 +        }
1102 1105          cmo->cmo_idx = idx;
1103 1106          cmo->cmo_tid = id;
     1107 +        cmo->cmo_sym = *symp;
1104 1108          list_insert_tail(&cmi->cmi_omap, cmo);
     1109 +
     1110 +        ctf_dprintf("added initial object %s, %lu, %ld, %s\n", name, idx, id,
     1111 +            cmo->cmo_file != NULL ? cmo->cmo_file : "global");
     1112 +
1105 1113          return (0);
1106 1114  }
1107 1115  
     1116 +static int
     1117 +ctf_merge_add_symbol(const Elf64_Sym *symp, ulong_t idx, const char *file,
     1118 +    const char *name, boolean_t primary, void *arg)
     1119 +{
     1120 +        ctf_merge_input_t *cmi = arg;
     1121 +        ctf_file_t *fp = cmi->cmi_input;
     1122 +        ushort_t *data, funcbase;
     1123 +        uint_t type;
     1124 +        ctf_funcinfo_t fi;
     1125 +
     1126 +        /*
     1127 +         * See if there is type information for this. If there is no
     1128 +         * type information for this entry or no translation, then we
     1129 +         * will find the value zero. This indicates no type ID for
     1130 +         * objects and encodes unknown information for functions.
     1131 +         */
     1132 +        if (fp->ctf_sxlate[idx] == -1u)
     1133 +                return (0);
     1134 +        data = (ushort_t *)((uintptr_t)fp->ctf_buf + fp->ctf_sxlate[idx]);
     1135 +        if (*data == 0)
     1136 +                return (0);
     1137 +
     1138 +        type = ELF64_ST_TYPE(symp->st_info);
     1139 +
     1140 +        switch (type) {
     1141 +        case STT_FUNC:
     1142 +                funcbase = *data;
     1143 +                if (LCTF_INFO_KIND(fp, funcbase) != CTF_K_FUNCTION)
     1144 +                        return (0);
     1145 +                data++;
     1146 +                fi.ctc_return = *data;
     1147 +                data++;
     1148 +                fi.ctc_argc = LCTF_INFO_VLEN(fp, funcbase);
     1149 +                fi.ctc_flags = 0;
     1150 +
     1151 +                if (fi.ctc_argc != 0 && data[fi.ctc_argc - 1] == 0) {
     1152 +                        fi.ctc_flags |= CTF_FUNC_VARARG;
     1153 +                        fi.ctc_argc--;
     1154 +                }
     1155 +                return (ctf_merge_add_function(cmi, &fi, idx, file, name,
     1156 +                    symp));
     1157 +        case STT_OBJECT:
     1158 +                return (ctf_merge_add_object(cmi, *data, idx, file, name,
     1159 +                    symp));
     1160 +        default:
     1161 +                return (0);
     1162 +        }
     1163 +}
     1164 +
1108 1165  /*
1109 1166   * Whenever we create an entry to merge, we then go and add a second empty
1110 1167   * ctf_file_t which we use for the purposes of our merging. It's not the best,
1111 1168   * but it's the best that we've got at the moment.
1112 1169   */
1113 1170  int
1114 1171  ctf_merge_add(ctf_merge_t *cmh, ctf_file_t *input)
1115 1172  {
1116 1173          int ret;
1117 1174          ctf_merge_input_t *cmi;
1118 1175          ctf_file_t *empty;
1119 1176  
     1177 +        ctf_dprintf("adding input %p\n", input);
     1178 +
1120 1179          if (input->ctf_flags & LCTF_CHILD)
1121 1180                  return (ECTF_MCHILD);
1122 1181  
1123 1182          cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1124 1183          if (cmi == NULL)
1125 1184                  return (ENOMEM);
1126 1185  
1127 1186          cmi->cmi_created = B_FALSE;
1128 1187          cmi->cmi_input = input;
1129 1188          list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
1130 1189              offsetof(ctf_merge_funcmap_t, cmf_node));
1131 1190          list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
1132 1191              offsetof(ctf_merge_objmap_t, cmo_node));
1133 1192  
1134 1193          if (cmh->cmh_msyms == B_TRUE) {
1135      -                if ((ret = ctf_function_iter(input, ctf_merge_add_funcs_cb,
     1194 +                if ((ret = ctf_symtab_iter(input, ctf_merge_add_symbol,
1136 1195                      cmi)) != 0) {
1137 1196                          ctf_merge_fini_input(cmi);
1138 1197                          return (ret);
1139 1198                  }
1140      -
1141      -                if ((ret = ctf_object_iter(input, ctf_merge_add_objs_cb,
1142      -                    cmi)) != 0) {
1143      -                        ctf_merge_fini_input(cmi);
1144      -                        return (ret);
1145      -                }
1146 1199          }
1147 1200  
1148 1201          list_insert_tail(&cmh->cmh_inputs, cmi);
1149 1202          cmh->cmh_ninputs++;
1150 1203  
1151 1204          /* And now the empty one to merge into this */
1152 1205          cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1153 1206          if (cmi == NULL)
1154 1207                  return (ENOMEM);
1155 1208          list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
↓ open down ↓ 31 lines elided ↑ open up ↑
1187 1240                  return (EINVAL);
1188 1241          if (cmh->cmh_pname != NULL) {
1189 1242                  size_t len = strlen(cmh->cmh_pname) + 1;
1190 1243                  ctf_free(cmh->cmh_pname, len);
1191 1244          }
1192 1245          cmh->cmh_pname = dup;
1193 1246          cmh->cmh_unique = u;
1194 1247          return (0);
1195 1248  }
1196 1249  
1197      -static int
1198      -ctf_merge_symbols(ctf_merge_t *cmh, ctf_file_t *fp)
     1250 +/*
     1251 + * Symbol matching rules: the purpose of this is to verify that the type
     1252 + * information that we have for a given symbol actually matches the output
     1253 + * symbol. This is unfortunately complicated by several different factors:
     1254 + *
     1255 + * 1. When merging multiple .o's into a single item, the symbol table index will
     1256 + * not match.
     1257 + *
     1258 + * 2. Visibility of a symbol may not be identical to the object file or the
     1259 + * DWARF information due to symbol reduction via a mapfile.
     1260 + *
     1261 + * As such, we have to employ the following rules:
     1262 + *
     1263 + * 1. A global symbol table entry always matches a global CTF symbol with the
     1264 + * same name.
     1265 + *
     1266 + * 2. A local symbol table entry always matches a local CTF symbol if they have
     1267 + * the same name and they belong to the same file.
     1268 + *
     1269 + * 3. A weak symbol matches a non-weak symbol. This happens if we find that the
     1270 + * types match, the values match, the sizes match, and the section indexes
     1271 + * match. This happens when we do a conversion in one pass, it almost never
     1272 + * happens when we're merging multiple object files. If we match a CTF global
     1273 + * symbol, that's a fixed match, otherwise it's a fuzzy match.
     1274 + *
     1275 + * 4. A local symbol table entry matches a global CTF entry if the
     1276 + * other pieces fail, but they have the same name. This is considered a fuzzy
     1277 + * match and is not used unless we have no other options.
     1278 + *
     1279 + * 5. A weak symbol table entry matches a weak CTF entry if the other pieces
     1280 + * fail, but they have the same name. This is considered a fuzzy match and is
     1281 + * not used unless we have no other options. When merging independent .o files,
     1282 + * this is often the only recourse we have to matching weak symbols.
     1283 + *
     1284 + * In the end, this would all be much simpler if we were able to do this as part
     1285 + * of libld which would be able to do all the symbol transformations.
     1286 + */
     1287 +static boolean_t
     1288 +ctf_merge_symbol_match(const char *ctf_file, const char *ctf_name,
     1289 +    const Elf64_Sym *ctf_symp, const char *symtab_file, const char *symtab_name,
     1290 +    const Elf64_Sym *symtab_symp, boolean_t *is_fuzzy)
1199 1291  {
1200      -        int err;
1201      -        ulong_t i;
     1292 +        *is_fuzzy = B_FALSE;
     1293 +        uint_t symtab_bind, ctf_bind;
1202 1294  
1203      -        uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
1204      -        uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data;
     1295 +        symtab_bind = ELF64_ST_BIND(symtab_symp->st_info);
     1296 +        ctf_bind = ELF64_ST_BIND(ctf_symp->st_info);
1205 1297  
1206      -        for (i = 0; i < fp->ctf_nsyms; i++) {
1207      -                const char *name;
1208      -                ctf_merge_input_t *cmi;
1209      -                ctf_merge_objmap_t *cmo;
     1298 +        ctf_dprintf("comparing merge match for %s/%s/%u->%s/%s/%u\n",
     1299 +            symtab_file, symtab_name, symtab_bind,
     1300 +            ctf_file, ctf_name, ctf_bind);
     1301 +        if (strcmp(ctf_name, symtab_name) != 0) {
     1302 +                return (B_FALSE);
     1303 +        }
1210 1304  
1211      -                if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
1212      -                        const Elf32_Sym *symp = (Elf32_Sym *)symbase + i;
1213      -                        int type = ELF32_ST_TYPE(symp->st_info);
1214      -                        if (type != STT_OBJECT)
1215      -                                continue;
1216      -                        if (ctf_sym_valid(strbase, type, symp->st_shndx,
1217      -                            symp->st_value, symp->st_name) == B_FALSE)
1218      -                                continue;
1219      -                        name = (char *)(strbase + symp->st_name);
1220      -                } else {
1221      -                        const Elf64_Sym *symp = (Elf64_Sym *)symbase + i;
1222      -                        int type = ELF64_ST_TYPE(symp->st_info);
1223      -                        if (type != STT_OBJECT)
1224      -                                continue;
1225      -                        if (ctf_sym_valid(strbase, type, symp->st_shndx,
1226      -                            symp->st_value, symp->st_name) == B_FALSE)
1227      -                                continue;
1228      -                        name = (char *)(strbase + symp->st_name);
     1305 +        if (symtab_bind == STB_GLOBAL && ctf_bind == STB_GLOBAL) {
     1306 +                return (B_TRUE);
     1307 +        } else if (symtab_bind == STB_GLOBAL) {
     1308 +                return (B_FALSE);
     1309 +        }
     1310 +
     1311 +        if (ctf_bind == STB_LOCAL && ctf_bind == symtab_bind &&
     1312 +            ctf_file != NULL && symtab_file != NULL &&
     1313 +            strcmp(ctf_file, symtab_file) == 0) {
     1314 +                return (B_TRUE);
     1315 +        }
     1316 +
     1317 +        if (symtab_bind == STB_WEAK && ctf_bind != STB_WEAK &&
     1318 +            ELF64_ST_TYPE(symtab_symp->st_info) ==
     1319 +            ELF64_ST_TYPE(ctf_symp->st_info) &&
     1320 +            symtab_symp->st_value == ctf_symp->st_value &&
     1321 +            symtab_symp->st_size == ctf_symp->st_size &&
     1322 +            symtab_symp->st_shndx == ctf_symp->st_shndx) {
     1323 +                if (ctf_bind == STB_GLOBAL) {
     1324 +                        return (B_TRUE);
1229 1325                  }
1230 1326  
1231      -                cmo = NULL;
1232      -                for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
1233      -                    cmi = list_next(&cmh->cmh_inputs, cmi)) {
1234      -                        for (cmo = list_head(&cmi->cmi_omap); cmo != NULL;
1235      -                            cmo = list_next(&cmi->cmi_omap, cmo)) {
1236      -                                if (strcmp(cmo->cmo_name, name) == 0)
1237      -                                        goto found;
1238      -                        }
     1327 +                if (ctf_bind == STB_LOCAL && ctf_file != NULL &&
     1328 +                    symtab_file != NULL && strcmp(ctf_file, symtab_file) == 0) {
     1329 +                        *is_fuzzy = B_TRUE;
     1330 +                        return (B_TRUE);
1239 1331                  }
1240      -found:
1241      -                if (cmo != NULL) {
1242      -                        if (cmo->cmo_tid == 0)
1243      -                                continue;
1244      -                        if ((err = ctf_add_object(fp, i, cmo->cmo_tid)) != 0) {
1245      -                                ctf_dprintf("Failed to add symbol %s->%d: %s\n",
1246      -                                    name, cmo->cmo_tid,
1247      -                                    ctf_errmsg(ctf_errno(fp)));
1248      -                                return (err);
1249      -                        }
1250      -                }
1251 1332          }
1252 1333  
1253      -        return (0);
     1334 +        if (ctf_bind == STB_GLOBAL ||
     1335 +            (ctf_bind == STB_WEAK && symtab_bind == STB_WEAK)) {
     1336 +                *is_fuzzy = B_TRUE;
     1337 +                return (B_TRUE);
     1338 +        }
     1339 +
     1340 +        return (B_FALSE);
1254 1341  }
1255 1342  
     1343 +/*
     1344 + * For each symbol, try and find a match. We will attempt to find an exact
     1345 + * match; however, we will settle for a fuzzy match in general. There is one
     1346 + * case where we will not opt to use a fuzzy match, which is when performing the
     1347 + * deduplication of a container. In such a case we are trying to reduce common
     1348 + * types and a fuzzy match would be inappropriate as if we're in the context of
     1349 + * a single container, the conversion process should have identified any exact
     1350 + * or fuzzy matches that were required.
     1351 + */
1256 1352  static int
1257      -ctf_merge_functions(ctf_merge_t *cmh, ctf_file_t *fp)
     1353 +ctf_merge_symbols(const Elf64_Sym *symp, ulong_t idx, const char *file,
     1354 +    const char *name, boolean_t primary, void *arg)
1258 1355  {
1259 1356          int err;
1260      -        ulong_t i;
1261      -        ctf_funcinfo_t fi;
     1357 +        uint_t type, bind;
     1358 +        ctf_merge_symbol_arg_t *csa = arg;
     1359 +        ctf_file_t *fp = csa->cmsa_out;
1262 1360  
1263      -        uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
1264      -        uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data;
     1361 +        type = ELF64_ST_TYPE(symp->st_info);
     1362 +        bind = ELF64_ST_BIND(symp->st_info);
1265 1363  
1266      -        for (i = 0; i < fp->ctf_nsyms; i++) {
1267      -                const char *name;
1268      -                ctf_merge_input_t *cmi;
1269      -                ctf_merge_funcmap_t *cmf;
     1364 +        ctf_dprintf("Trying to find match for %s/%s/%u\n", file, name,
     1365 +            ELF64_ST_BIND(symp->st_info));
1270 1366  
1271      -                if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
1272      -                        const Elf32_Sym *symp = (Elf32_Sym *)symbase + i;
1273      -                        int type = ELF32_ST_TYPE(symp->st_info);
1274      -                        if (ELF32_ST_TYPE(symp->st_info) != STT_FUNC)
1275      -                                continue;
1276      -                        if (ctf_sym_valid(strbase, type, symp->st_shndx,
1277      -                            symp->st_value, symp->st_name) == B_FALSE)
1278      -                                continue;
1279      -                        name = (char *)(strbase + symp->st_name);
1280      -                } else {
1281      -                        const Elf64_Sym *symp = (Elf64_Sym *)symbase + i;
1282      -                        int type = ELF64_ST_TYPE(symp->st_info);
1283      -                        if (ELF64_ST_TYPE(symp->st_info) != STT_FUNC)
1284      -                                continue;
1285      -                        if (ctf_sym_valid(strbase, type, symp->st_shndx,
1286      -                            symp->st_value, symp->st_name) == B_FALSE)
1287      -                                continue;
1288      -                        name = (char *)(strbase + symp->st_name);
     1367 +        if (type == STT_OBJECT) {
     1368 +                ctf_merge_objmap_t *cmo, *match = NULL;
     1369 +
     1370 +                for (cmo = list_head(csa->cmsa_objmap); cmo != NULL;
     1371 +                    cmo = list_next(csa->cmsa_objmap, cmo)) {
     1372 +                        boolean_t is_fuzzy = B_FALSE;
     1373 +                        if (ctf_merge_symbol_match(cmo->cmo_file, cmo->cmo_name,
     1374 +                            &cmo->cmo_sym, file, name, symp, &is_fuzzy)) {
     1375 +                                if (is_fuzzy && csa->cmsa_dedup &&
     1376 +                                    bind != STB_WEAK) {
     1377 +                                        continue;
     1378 +                                }
     1379 +                                match = cmo;
     1380 +                                if (is_fuzzy) {
     1381 +                                        continue;
     1382 +                                }
     1383 +                                break;
     1384 +                        }
1289 1385                  }
1290 1386  
1291      -                cmf = NULL;
1292      -                for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
1293      -                    cmi = list_next(&cmh->cmh_inputs, cmi)) {
1294      -                        for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL;
1295      -                            cmf = list_next(&cmi->cmi_fmap, cmf)) {
1296      -                                if (strcmp(cmf->cmf_name, name) == 0)
1297      -                                        goto found;
     1387 +                if (match == NULL) {
     1388 +                        return (0);
     1389 +                }
     1390 +
     1391 +                if ((err = ctf_add_object(fp, idx, match->cmo_tid)) != 0) {
     1392 +                        ctf_dprintf("Failed to add symbol %s->%d: %s\n", name,
     1393 +                            match->cmo_tid, ctf_errmsg(ctf_errno(fp)));
     1394 +                        return (ctf_errno(fp));
     1395 +                }
     1396 +                ctf_dprintf("mapped object into output %s/%s->%ld\n", file,
     1397 +                    name, match->cmo_tid);
     1398 +        } else {
     1399 +                ctf_merge_funcmap_t *cmf, *match = NULL;
     1400 +                ctf_funcinfo_t fi;
     1401 +
     1402 +                for (cmf = list_head(csa->cmsa_funcmap); cmf != NULL;
     1403 +                    cmf = list_next(csa->cmsa_funcmap, cmf)) {
     1404 +                        boolean_t is_fuzzy = B_FALSE;
     1405 +                        if (ctf_merge_symbol_match(cmf->cmf_file, cmf->cmf_name,
     1406 +                            &cmf->cmf_sym, file, name, symp, &is_fuzzy)) {
     1407 +                                if (is_fuzzy && csa->cmsa_dedup &&
     1408 +                                    bind != STB_WEAK) {
     1409 +                                        continue;
     1410 +                                }
     1411 +                                match = cmf;
     1412 +                                if (is_fuzzy) {
     1413 +                                        continue;
     1414 +                                }
     1415 +                                break;
1298 1416                          }
1299 1417                  }
1300      -found:
1301      -                if (cmf != NULL) {
1302      -                        fi.ctc_return = cmf->cmf_rtid;
1303      -                        fi.ctc_argc = cmf->cmf_argc;
1304      -                        fi.ctc_flags = cmf->cmf_flags;
1305      -                        if ((err = ctf_add_function(fp, i, &fi,
1306      -                            cmf->cmf_args)) != 0)
1307      -                                return (err);
     1418 +
     1419 +                if (match == NULL) {
     1420 +                        return (0);
1308 1421                  }
     1422 +
     1423 +                fi.ctc_return = match->cmf_rtid;
     1424 +                fi.ctc_argc = match->cmf_argc;
     1425 +                fi.ctc_flags = match->cmf_flags;
     1426 +                if ((err = ctf_add_function(fp, idx, &fi, match->cmf_args)) !=
     1427 +                    0) {
     1428 +                        ctf_dprintf("Failed to add function %s: %s\n", name,
     1429 +                            ctf_errmsg(ctf_errno(fp)));
     1430 +                        return (ctf_errno(fp));
     1431 +                }
     1432 +                ctf_dprintf("mapped function into output %s/%s\n", file,
     1433 +                    name);
1309 1434          }
1310 1435  
1311 1436          return (0);
1312      -
1313 1437  }
1314 1438  
1315 1439  int
1316 1440  ctf_merge_merge(ctf_merge_t *cmh, ctf_file_t **outp)
1317 1441  {
1318 1442          int err, merr;
1319 1443          ctf_merge_input_t *cmi;
1320 1444          ctf_id_t ltype;
1321 1445          mergeq_t *mqp;
1322 1446          ctf_merge_input_t *final;
1323 1447          ctf_file_t *out;
1324 1448  
     1449 +        ctf_dprintf("Beginning ctf_merge_merge()\n");
1325 1450          if (cmh->cmh_label != NULL && cmh->cmh_unique != NULL) {
1326 1451                  const char *label = ctf_label_topmost(cmh->cmh_unique);
1327 1452                  if (label == NULL)
1328 1453                          return (ECTF_NOLABEL);
1329 1454                  if (strcmp(label, cmh->cmh_label) != 0)
1330 1455                          return (ECTF_LCONFLICT);
1331 1456          }
1332 1457  
1333 1458          if (mergeq_init(&mqp, cmh->cmh_nthreads) == -1) {
1334 1459                  return (errno);
↓ open down ↓ 45 lines elided ↑ open up ↑
1380 1505                  ltype += CTF_CHILD_START;
1381 1506          ctf_dprintf("trying to add the label\n");
1382 1507          if (cmh->cmh_label != NULL &&
1383 1508              ctf_add_label(out, cmh->cmh_label, ltype, 0) != 0) {
1384 1509                  ctf_close(out);
1385 1510                  return (ctf_errno(out));
1386 1511          }
1387 1512  
1388 1513          ctf_dprintf("merging symbols and the like\n");
1389 1514          if (cmh->cmh_msyms == B_TRUE) {
1390      -                err = ctf_merge_symbols(cmh, out);
     1515 +                ctf_merge_symbol_arg_t arg;
     1516 +                arg.cmsa_objmap = &final->cmi_omap;
     1517 +                arg.cmsa_funcmap = &final->cmi_fmap;
     1518 +                arg.cmsa_out = out;
     1519 +                arg.cmsa_dedup = B_FALSE;
     1520 +                err = ctf_symtab_iter(out, ctf_merge_symbols, &arg);
1391 1521                  if (err != 0) {
1392 1522                          ctf_close(out);
1393      -                        return (ctf_errno(out));
     1523 +                        return (err);
1394 1524                  }
1395      -
1396      -                err = ctf_merge_functions(cmh, out);
1397      -                if (err != 0) {
1398      -                        ctf_close(out);
1399      -                        return (ctf_errno(out));
1400      -                }
1401 1525          }
1402 1526  
1403 1527          err = ctf_update(out);
1404 1528          if (err != 0) {
     1529 +                err = ctf_errno(out);
1405 1530                  ctf_close(out);
1406      -                return (ctf_errno(out));
     1531 +                return (err);
1407 1532          }
1408 1533  
1409 1534          *outp = out;
1410 1535          return (0);
1411 1536  }
1412 1537  
1413 1538  /*
1414 1539   * When we get told that something is unique, eg. same is B_FALSE, then that
1415 1540   * tells us that we need to add it to the output. If same is B_TRUE, then we'll
1416 1541   * want to record it in the mapping table so that we know how to redirect types
↓ open down ↓ 81 lines elided ↑ open up ↑
1498 1623  
1499 1624          ctf_dprintf("Successfully diffed types\n");
1500 1625          ret = ctf_merge_common(&cm);
1501 1626          ctf_dprintf("deduping types result: %d\n", ret);
1502 1627          if (ret == 0)
1503 1628                  ret = ctf_update(cm.cm_out);
1504 1629          if (ret != 0)
1505 1630                  goto err;
1506 1631  
1507 1632          ctf_dprintf("Successfully deduped types\n");
1508      -        ctf_phase_dump(cm.cm_out, "dedup-pre-syms");
     1633 +        ctf_phase_dump(cm.cm_out, "dedup-pre-syms", NULL);
1509 1634  
1510 1635          /*
1511 1636           * Now we need to fix up the object and function maps.
1512 1637           */
1513      -        ctf_merge_fixup_nontypes(&cm, cmi);
     1638 +        ctf_merge_fixup_symmaps(&cm, cmi);
1514 1639  
1515 1640          if (cmp->cmh_msyms == B_TRUE) {
1516      -                ret = ctf_merge_symbols(cmp, cm.cm_out);
     1641 +                ctf_merge_symbol_arg_t arg;
     1642 +                arg.cmsa_objmap = &cmi->cmi_omap;
     1643 +                arg.cmsa_funcmap = &cmi->cmi_fmap;
     1644 +                arg.cmsa_out = cm.cm_out;
     1645 +                arg.cmsa_dedup = B_TRUE;
     1646 +                ret = ctf_symtab_iter(cm.cm_out, ctf_merge_symbols, &arg);
1517 1647                  if (ret != 0) {
1518      -                        ret = ctf_errno(cm.cm_out);
1519 1648                          ctf_dprintf("failed to dedup symbols: %s\n",
1520 1649                              ctf_errmsg(ret));
1521 1650                          goto err;
1522 1651                  }
1523      -
1524      -                ret = ctf_merge_functions(cmp, cm.cm_out);
1525      -                if (ret != 0) {
1526      -                        ret = ctf_errno(cm.cm_out);
1527      -                        ctf_dprintf("failed to dedup functions: %s\n",
1528      -                            ctf_errmsg(ret));
1529      -                        goto err;
1530      -                }
1531 1652          }
1532 1653  
1533 1654          ret = ctf_update(cm.cm_out);
1534 1655          if (ret == 0) {
1535 1656                  cmc->cmi_input = NULL;
1536 1657                  *outp = cm.cm_out;
1537 1658          }
     1659 +        ctf_phase_dump(cm.cm_out, "dedup-post-syms", NULL);
1538 1660  err:
1539 1661          ctf_merge_types_fini(&cm);
1540 1662          ctf_diff_fini(cdp);
1541 1663          return (ret);
1542 1664  }
1543 1665  
1544 1666  int
1545 1667  ctf_merge_set_nthreads(ctf_merge_t *cmp, const uint_t nthrs)
1546 1668  {
1547 1669          if (nthrs == 0)
1548 1670                  return (EINVAL);
1549 1671          cmp->cmh_nthreads = nthrs;
1550 1672          return (0);
1551 1673  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX