1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*      Copyright (c) 1988 AT&T     */
  27 /*        All Rights Reserved   */
  28 
  29 #include <stdlib.h>
  30 #include <errno.h>
  31 #include <libelf.h>
  32 #include "decl.h"
  33 #include "msg.h"
  34 
  35 
  36 /*
  37  * Convert archive symbol table to memory format
  38  *
  39  * This takes a pointer to file's archive symbol table, alignment
  40  * unconstrained.  Returns null terminated vector of Elf_Arsym
  41  * structures. Elf_Arsym uses size_t to represent offsets, which
  42  * will be 32-bit in 32-bit versions, and 64-bits otherwise.
  43  *
  44  * There are two forms of archive symbol table, the original 32-bit
  45  * form, and a 64-bit form originally found in IRIX64. The two formats
  46  * differ only in the width of the integer word:
  47  *
  48  *              # offsets       4/8-byte word
  49  *              offset[0...]    4/8-byte word each
  50  *              strings         null-terminated, for offset[x]
  51  *
  52  * By default, the 64-bit form is only used when the archive exceeds
  53  * the limits of 32-bits (4GB) in size. However, this is not required,
  54  * and the ar -S option can be used to create a 64-bit symbol table in
  55  * an archive that is under 4GB.
  56  *
  57  * Both 32 and 64-bit versions of libelf can read the 32-bit format
  58  * without loss of information. Similarly, a 64-bit version of libelf
  59  * will have no problem reading a 64-bit symbol table. This leaves the
  60  * case where a 32-bit libelf reads a 64-bit symbol table, which requires
  61  * some explanation. The offsets in a 64-bit symbol table will have zeros
  62  * in the upper half of the words until the size of the archive exceeds 4GB.
  63  * However, 32-bit libelf is unable to read any files larger than 2GB
  64  * (see comments in update.c). As such, any archive that the 32-bit version
  65  * of this code will encounter will be under 4GB in size. The upper 4
  66  * bytes of each word will be zero, and can be safely ignored.
  67  */
  68 
  69 
  70 /*
  71  * Offsets in archive headers are written in MSB (large endian) order
  72  * on all platforms, regardless of native byte order. These macros read
  73  * 4 and 8 byte values from unaligned memory.
  74  *
  75  * note:
  76  * -    The get8() macro for 32-bit code can ignore the first 4 bytes of
  77  *      of the word, because they are known to be 0.
  78  *
  79  * -    The inner most value in these macros is cast to an unsigned integer
  80  *      of the final width in order to prevent the C comilier from doing
  81  *      unwanted sign extension when the topmost bit of a byte is set.
  82  */
  83 #define get4(p) (((((((uint32_t)p[0]<<8)+p[1])<<8)+p[2])<<8)+p[3])
  84 
  85 #ifdef _LP64
  86 #define get8(p) (((((((((((((((uint64_t)p[0]<<8)+p[1])<<8)+p[2])<<8)+ \
  87     p[3])<<8)+p[4])<<8)+p[5])<<8)+p[6])<<8)+p[7])
  88 #else
  89 #define get8(p) (((((((uint64_t)p[4]<<8)+p[5])<<8)+p[6])<<8)+p[7])
  90 #endif
  91 
  92 
  93 static Elf_Void *
  94 arsym(Byte *off, size_t sz, size_t *e, int is64)
  95 {
  96         char            *endstr = (char *)off + sz;
  97         register char   *str;
  98         Byte            *endoff;
  99         Elf_Void        *oas;
 100         size_t          eltsize = is64 ? 8 : 4;
 101 
 102         {
 103                 register size_t n;
 104 
 105                 if (is64) {
 106                         if (sz < 8 || (sz - 8) / 8 < (n = get8(off))) {
 107                                 _elf_seterr(EFMT_ARSYMSZ, 0);
 108                                 return (NULL);
 109                         }
 110                 } else {
 111                         if (sz < 4 || (sz - 4) / 4 < (n = get4(off))) {
 112                                 _elf_seterr(EFMT_ARSYMSZ, 0);
 113                                 return (NULL);
 114                         }
 115                 }
 116                 off += eltsize;
 117                 endoff = off + n * eltsize;
 118 
 119                 /*
 120                  * If there are symbols in the symbol table, a
 121                  * string table must be present and NULL terminated.
 122                  *
 123                  * The format dictates that the string table must always be
 124                  * present, however in the case of an archive containing no
 125                  * symbols GNU ar will not create one.  We are permissive for
 126                  * the sake of compatibility.
 127                  */
 128                 if ((n > 0) && (((str = (char *)endoff) >= endstr) ||
 129                     (*(endstr - 1) != '\0'))) {
 130                         _elf_seterr(EFMT_ARSYM, 0);
 131                         return (NULL);
 132                 }
 133 
 134                 /*
 135                  * There is always at least one entry returned if a symtab
 136                  * exists since the table's last entry is an artificial one
 137                  * with a NULL as_name, but is included in the count.
 138                  *
 139                  * overflow can occur here, but not likely
 140                  */
 141                 *e = n + 1;
 142                 if ((oas = calloc(n + 1, sizeof (Elf_Arsym))) == NULL) {
 143                         _elf_seterr(EMEM_ARSYM, errno);
 144                         return (NULL);
 145                 }
 146         }
 147         {
 148                 register Elf_Arsym      *as = (Elf_Arsym *)oas;
 149 
 150                 while (off < endoff) {
 151                         if (str >= endstr) {
 152                                 _elf_seterr(EFMT_ARSYMSTR, 0);
 153                                 free(oas);
 154                                 return (NULL);
 155                         }
 156                         if (is64)
 157                                 as->as_off = get8(off);
 158                         else
 159                                 as->as_off = get4(off);
 160                         as->as_name = str;
 161                         as->as_hash = elf_hash(str);
 162                         ++as;
 163                         off += eltsize;
 164                         while (*str++ != '\0')
 165                                 /* LINTED */
 166                                 ;
 167                 }
 168                 as->as_name = NULL;
 169                 as->as_off = 0;
 170                 as->as_hash = ~(unsigned long)0L;
 171         }
 172         return (oas);
 173 }
 174 
 175 
 176 Elf_Arsym *
 177 elf_getarsym(Elf *elf, size_t *ptr)
 178 {
 179         Byte            *as;
 180         size_t          sz;
 181         Elf_Arsym       *rc;
 182         int             is64;
 183 
 184         if (ptr != 0)
 185                 *ptr = 0;
 186         if (elf == NULL)
 187                 return (0);
 188         ELFRLOCK(elf);
 189         if (elf->ed_kind != ELF_K_AR) {
 190                 ELFUNLOCK(elf);
 191                 _elf_seterr(EREQ_AR, 0);
 192                 return (0);
 193         }
 194         if ((as = (Byte *)elf->ed_arsym) == 0) {
 195                 ELFUNLOCK(elf);
 196                 return (0);
 197         }
 198         if (elf->ed_myflags & EDF_ASALLOC) {
 199                 if (ptr != 0)
 200                         *ptr = elf->ed_arsymsz;
 201                 ELFUNLOCK(elf);
 202                 /* LINTED */
 203                 return ((Elf_Arsym *)as);
 204         }
 205         is64 = (elf->ed_myflags & EDF_ARSYM64) != 0;
 206 
 207         /*
 208          * We're gonna need a write lock.
 209          */
 210         ELFUNLOCK(elf)
 211         ELFWLOCK(elf)
 212         sz = elf->ed_arsymsz;
 213         if (_elf_vm(elf, (size_t)(as - (Byte *)elf->ed_ident), sz) !=
 214             OK_YES) {
 215                 ELFUNLOCK(elf);
 216                 return (0);
 217         }
 218         if ((elf->ed_arsym = arsym(as, sz, &elf->ed_arsymsz, is64)) == 0) {
 219                 ELFUNLOCK(elf);
 220                 return (0);
 221         }
 222         elf->ed_myflags |= EDF_ASALLOC;
 223         if (ptr != 0)
 224                 *ptr = elf->ed_arsymsz;
 225         rc = (Elf_Arsym *)elf->ed_arsym;
 226         ELFUNLOCK(elf);
 227         return (rc);
 228 }
 229 
 230 /*
 231  * Private function to obtain the value sizeof() would return
 232  * for a word from the symbol table from the given archive. Normally,
 233  * this is an unimportant implementation detail hidden within
 234  * elf_getarsym(). However, it is useful to elfdump for formatting the
 235  * output correctly, and for the file command.
 236  *
 237  * exit:
 238  *      Returns 4 (32-bit) or 8 (64-bit) if a symbol table is present.
 239  *      Returns 0 in all other cases.
 240  */
 241 size_t
 242 _elf_getarsymwordsize(Elf *elf)
 243 {
 244         size_t  size;
 245 
 246         if (elf == NULL)
 247                 return (0);
 248 
 249         ELFRLOCK(elf);
 250         if ((elf->ed_kind == ELF_K_AR) && (elf->ed_arsym != 0))
 251                 size = (elf->ed_myflags & EDF_ARSYM64) ? 8 : 4;
 252         else
 253                 size = 0;
 254         ELFUNLOCK(elf);
 255 
 256         return (size);
 257 }