1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 *
26 * Copyright 2011 Jason King. All rights reserved.
27 */
28
29 #include <ctype.h>
30 #include <getopt.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <sys/sysmacros.h>
35 #include <sys/elf_SPARC.h>
36
37 #include <libdisasm.h>
38
39 #include "dis_target.h"
40 #include "dis_util.h"
41 #include "dis_list.h"
42
43 int g_demangle; /* Demangle C++ names */
44 int g_quiet; /* Quiet mode */
45 int g_numeric; /* Numeric mode */
46 int g_flags; /* libdisasm language flags */
47 int g_doall; /* true if no functions or sections were given */
48
49 dis_namelist_t *g_funclist; /* list of functions to disassemble, if any */
50 dis_namelist_t *g_seclist; /* list of sections to disassemble, if any */
51
52 /*
53 * Section options for -d, -D, and -s
54 */
55 #define DIS_DATA_RELATIVE 1
56 #define DIS_DATA_ABSOLUTE 2
57 #define DIS_TEXT 3
58
59 /*
60 * libdisasm callback data. Keeps track of current data (function or section)
61 * and offset within that data.
62 */
63 typedef struct dis_buffer {
64 dis_tgt_t *db_tgt; /* current dis target */
65 void *db_data; /* function or section data */
66 uint64_t db_addr; /* address of function start */
67 size_t db_size; /* size of data */
68 uint64_t db_nextaddr; /* next address to be read */
69 } dis_buffer_t;
70
71 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */
72
73 /*
74 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately
75 * formatted symbol, based on the offset and current setttings.
76 */
77 void
78 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf,
79 size_t buflen)
80 {
81 if (symbol == NULL || g_numeric) {
82 if (g_flags & DIS_OCTAL)
83 (void) snprintf(buf, buflen, "0%llo", addr);
84 else
85 (void) snprintf(buf, buflen, "0x%llx", addr);
86 } else {
87 if (g_demangle)
88 symbol = dis_demangle(symbol);
89
90 if (offset == 0)
91 (void) snprintf(buf, buflen, "%s", symbol);
92 else if (g_flags & DIS_OCTAL)
93 (void) snprintf(buf, buflen, "%s+0%o", symbol, offset);
94 else
95 (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset);
96 }
97 }
98
99 /*
100 * The main disassembly routine. Given a fixed-sized buffer and starting
101 * address, disassemble the data using the supplied target and libdisasm handle.
102 */
103 void
104 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data,
105 size_t datalen)
106 {
107 dis_buffer_t db = { 0 };
108 char buf[BUFSIZE];
109 char symbuf[BUFSIZE];
110 const char *symbol;
111 const char *last_symbol;
112 off_t symoffset;
113 int i;
114 int bytesperline;
115 size_t symsize;
116 int isfunc;
117 size_t symwidth = 0;
118
119 db.db_tgt = tgt;
120 db.db_data = data;
121 db.db_addr = addr;
122 db.db_size = datalen;
123
124 dis_set_data(dhp, &db);
125
126 if ((bytesperline = dis_max_instrlen(dhp)) > 6)
127 bytesperline = 6;
128
129 symbol = NULL;
130
131 while (addr < db.db_addr + db.db_size) {
132
133 if (dis_disassemble(dhp, addr, buf, BUFSIZE) != 0) {
134 #if defined(__sparc)
135 /*
136 * Since sparc instructions are fixed size, we
137 * always know the address of the next instruction
138 */
139 (void) snprintf(buf, sizeof (buf),
140 "*** invalid opcode ***");
141 db.db_nextaddr = addr + 4;
142
143 #else
144 off_t next;
145
146 (void) snprintf(buf, sizeof (buf),
147 "*** invalid opcode ***");
148
149 /*
150 * On architectures with variable sized instructions
151 * we have no way to figure out where the next
152 * instruction starts if we encounter an invalid
153 * instruction. Instead we print the rest of the
154 * instruction stream as hex until we reach the
155 * next valid symbol in the section.
156 */
157 if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) {
158 db.db_nextaddr = db.db_addr + db.db_size;
159 } else {
160 if (next > db.db_size)
161 db.db_nextaddr = db.db_addr +
162 db.db_size;
163 else
164 db.db_nextaddr = addr + next;
165 }
166 #endif
167 }
168
169 /*
170 * Print out the line as:
171 *
172 * address: bytes text
173 *
174 * If there are more than 6 bytes in any given instruction,
175 * spread the bytes across two lines. We try to get symbolic
176 * information for the address, but if that fails we print out
177 * the numeric address instead.
178 *
179 * We try to keep the address portion of the text aligned at
180 * MINSYMWIDTH characters. If we are disassembling a function
181 * with a long name, this can be annoying. So we pick a width
182 * based on the maximum width that the current symbol can be.
183 * This at least produces text aligned within each function.
184 */
185 last_symbol = symbol;
186 symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize,
187 &isfunc);
188 if (symbol == NULL) {
189 symbol = dis_find_section(tgt, addr, &symoffset);
190 symsize = symoffset;
191 }
192
193 if (symbol != last_symbol)
194 getsymname(addr, symbol, symsize, symbuf,
195 sizeof (symbuf));
196
197 symwidth = MAX(symwidth, strlen(symbuf));
198 getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf));
199
200 /*
201 * If we've crossed a new function boundary, print out the
202 * function name on a blank line.
203 */
204 if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc)
205 (void) printf("%s()\n", symbol);
206
207 (void) printf(" %s:%*s ", symbuf,
208 symwidth - strlen(symbuf), "");
209
210 /* print bytes */
211 for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr));
212 i++) {
213 int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
214 if (g_flags & DIS_OCTAL)
215 (void) printf("%03o ", byte);
216 else
217 (void) printf("%02x ", byte);
218 }
219
220 /* trailing spaces for missing bytes */
221 for (; i < bytesperline; i++) {
222 if (g_flags & DIS_OCTAL)
223 (void) printf(" ");
224 else
225 (void) printf(" ");
226 }
227
228 /* contents of disassembly */
229 (void) printf(" %s", buf);
230
231 /* excess bytes that spill over onto subsequent lines */
232 for (; i < db.db_nextaddr - addr; i++) {
233 int byte = *((uchar_t *)data + (addr - db.db_addr) + i);
234 if (i % bytesperline == 0)
235 (void) printf("\n %*s ", symwidth, "");
236 if (g_flags & DIS_OCTAL)
237 (void) printf("%03o ", byte);
238 else
239 (void) printf("%02x ", byte);
240 }
241
242 (void) printf("\n");
243
244 addr = db.db_nextaddr;
245 }
246 }
247
248 /*
249 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup
250 * function, and convert the result using getsymname().
251 */
252 int
253 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start,
254 size_t *symlen)
255 {
256 dis_buffer_t *db = data;
257 const char *symbol;
258 off_t offset;
259 size_t size;
260
261 /*
262 * If NULL symbol is returned, getsymname takes care of
263 * printing appropriate address in buf instead of symbol.
264 */
265 symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL);
266
267 if (buf != NULL)
268 getsymname(addr, symbol, offset, buf, buflen);
269
270 if (start != NULL)
271 *start = addr - offset;
272 if (symlen != NULL)
273 *symlen = size;
274
275 if (symbol == NULL)
276 return (-1);
277
278 return (0);
279 }
280
281 /*
282 * libdisasm wrapper around target reading. libdisasm will always read data
283 * in order, so update our current offset within the buffer appropriately.
284 * We only support reading from within the current object; libdisasm should
285 * never ask us to do otherwise.
286 */
287 int
288 do_read(void *data, uint64_t addr, void *buf, size_t len)
289 {
290 dis_buffer_t *db = data;
291 size_t offset;
292
293 if (addr < db->db_addr || addr >= db->db_addr + db->db_size)
294 return (-1);
295
296 offset = addr - db->db_addr;
297 len = MIN(len, db->db_size - offset);
298
299 (void) memcpy(buf, (char *)db->db_data + offset, len);
300
301 db->db_nextaddr = addr + len;
302
303 return (len);
304 }
305
306 /*
307 * Routine to dump raw data in a human-readable format. Used by the -d and -D
308 * options. We model our output after the xxd(1) program, which gives nicely
309 * formatted output, along with an ASCII translation of the result.
310 */
311 void
312 dump_data(uint64_t addr, void *data, size_t datalen)
313 {
314 uintptr_t curaddr = addr & (~0xf);
315 uint8_t *bytes = data;
316 int i;
317 int width;
318
319 /*
320 * Determine if the address given to us fits in 32-bit range, in which
321 * case use a 4-byte width.
322 */
323 if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL)
324 width = 8;
325 else
326 width = 16;
327
328 while (curaddr < addr + datalen) {
329 /*
330 * Display leading address
331 */
332 (void) printf("%0*x: ", width, curaddr);
333
334 /*
335 * Print out data in two-byte chunks. If the current address
336 * is before the starting address or after the end of the
337 * section, print spaces.
338 */
339 for (i = 0; i < 16; i++) {
340 if (curaddr + i < addr ||curaddr + i >= addr + datalen)
341 (void) printf(" ");
342 else
343 (void) printf("%02x",
344 bytes[curaddr + i - addr]);
345
346 if (i & 1)
347 (void) printf(" ");
348 }
349
350 (void) printf(" ");
351
352 /*
353 * Print out the ASCII representation
354 */
355 for (i = 0; i < 16; i++) {
356 if (curaddr + i < addr ||
357 curaddr + i >= addr + datalen) {
358 (void) printf(" ");
359 } else {
360 uint8_t byte = bytes[curaddr + i - addr];
361 if (isprint(byte))
362 (void) printf("%c", byte);
363 else
364 (void) printf(".");
365 }
366 }
367
368 (void) printf("\n");
369
370 curaddr += 16;
371 }
372 }
373
374 /*
375 * Disassemble a section implicitly specified as part of a file. This function
376 * is called for all sections when no other flags are specified. We ignore any
377 * data sections, and print out only those sections containing text.
378 */
379 void
380 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
381 {
382 dis_handle_t *dhp = data;
383
384 /* ignore data sections */
385 if (!dis_section_istext(scn))
386 return;
387
388 if (!g_quiet)
389 (void) printf("\nsection %s\n", dis_section_name(scn));
390
391 dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn),
392 dis_section_size(scn));
393 }
394
395 /*
396 * Structure passed to dis_named_{section,function} which keeps track of both
397 * the target and the libdisasm handle.
398 */
399 typedef struct callback_arg {
400 dis_tgt_t *ca_tgt;
401 dis_handle_t *ca_handle;
402 } callback_arg_t;
403
404 /*
405 * Disassemble a section explicitly named with -s, -d, or -D. The 'type'
406 * argument contains the type of argument given. Pass the data onto the
407 * appropriate helper routine.
408 */
409 void
410 dis_named_section(dis_scn_t *scn, int type, void *data)
411 {
412 callback_arg_t *ca = data;
413
414 if (!g_quiet)
415 (void) printf("\nsection %s\n", dis_section_name(scn));
416
417 switch (type) {
418 case DIS_DATA_RELATIVE:
419 dump_data(0, dis_section_data(scn), dis_section_size(scn));
420 break;
421 case DIS_DATA_ABSOLUTE:
422 dump_data(dis_section_addr(scn), dis_section_data(scn),
423 dis_section_size(scn));
424 break;
425 case DIS_TEXT:
426 dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn),
427 dis_section_data(scn), dis_section_size(scn));
428 break;
429 }
430 }
431
432 /*
433 * Disassemble a function explicitly specified with '-F'. The 'type' argument
434 * is unused.
435 */
436 /* ARGSUSED */
437 void
438 dis_named_function(dis_func_t *func, int type, void *data)
439 {
440 callback_arg_t *ca = data;
441
442 dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func),
443 dis_function_data(func), dis_function_size(func));
444 }
445
446 /*
447 * Disassemble a complete file. First, we determine the type of the file based
448 * on the ELF machine type, and instantiate a version of the disassembler
449 * appropriate for the file. We then resolve any named sections or functions
450 * against the file, and iterate over the results (or all sections if no flags
451 * were specified).
452 */
453 void
454 dis_file(const char *filename)
455 {
456 dis_tgt_t *tgt, *current;
457 dis_scnlist_t *sections;
458 dis_funclist_t *functions;
459 dis_handle_t *dhp;
460 GElf_Ehdr ehdr;
461
462 /*
463 * First, initialize the target
464 */
465 if ((tgt = dis_tgt_create(filename)) == NULL)
466 return;
467
468 if (!g_quiet)
469 (void) printf("disassembly for %s\n\n", filename);
470
471 /*
472 * A given file may contain multiple targets (if it is an archive, for
473 * example). We iterate over all possible targets if this is the case.
474 */
475 for (current = tgt; current != NULL; current = dis_tgt_next(current)) {
476 dis_tgt_ehdr(current, &ehdr);
477
478 /*
479 * Eventually, this should probably live within libdisasm, and
480 * we should be able to disassemble targets from different
481 * architectures. For now, we only support objects as the
482 * native machine type.
483 */
484 switch (ehdr.e_machine) {
485 #ifdef __sparc
486 case EM_SPARC:
487 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
488 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
489 warn("invalid E_IDENT field for SPARC object");
490 return;
491 }
492 g_flags |= DIS_SPARC_V8;
493 break;
494
495 case EM_SPARC32PLUS:
496 {
497 uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK;
498
499 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 ||
500 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
501 warn("invalid E_IDENT field for SPARC object");
502 return;
503 }
504
505 if (flags != 0 &&
506 (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 |
507 EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS)
508 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
509 else
510 g_flags |= DIS_SPARC_V9;
511 break;
512 }
513
514 case EM_SPARCV9:
515 if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
516 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) {
517 warn("invalid E_IDENT field for SPARC object");
518 return;
519 }
520
521 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI;
522 break;
523 #endif /* __sparc */
524
525 #if defined(__i386) || defined(__amd64)
526 case EM_386:
527 g_flags |= DIS_X86_SIZE32;
528 break;
529
530 case EM_AMD64:
531 g_flags |= DIS_X86_SIZE64;
532 break;
533 #endif /* __i386 || __amd64 */
534
535 default:
536 die("%s: unsupported ELF machine 0x%x", filename,
537 ehdr.e_machine);
538 }
539
540 /*
541 * If ET_REL (.o), printing immediate symbols is likely to
542 * result in garbage, as symbol lookups on unrelocated
543 * immediates find false and useless matches.
544 */
545
546 if (ehdr.e_type == ET_REL)
547 g_flags |= DIS_NOIMMSYM;
548
549 if (!g_quiet && dis_tgt_member(current) != NULL)
550 (void) printf("\narchive member %s\n",
551 dis_tgt_member(current));
552
553 /*
554 * Instantiate a libdisasm handle based on the file type.
555 */
556 if ((dhp = dis_handle_create(g_flags, current, do_lookup,
557 do_read)) == NULL)
558 die("%s: failed to initialize disassembler: %s",
559 filename, dis_strerror(dis_errno()));
560
561 if (g_doall) {
562 /*
563 * With no arguments, iterate over all sections and
564 * disassemble only those that contain text.
565 */
566 dis_tgt_section_iter(current, dis_text_section, dhp);
567 } else {
568 callback_arg_t ca;
569
570 ca.ca_tgt = current;
571 ca.ca_handle = dhp;
572
573 /*
574 * If sections or functions were explicitly specified,
575 * resolve those names against the object, and iterate
576 * over just the resulting data.
577 */
578 sections = dis_namelist_resolve_sections(g_seclist,
579 current);
580 functions = dis_namelist_resolve_functions(g_funclist,
581 current);
582
583 dis_scnlist_iter(sections, dis_named_section, &ca);
584 dis_funclist_iter(functions, dis_named_function, &ca);
585
586 dis_scnlist_destroy(sections);
587 dis_funclist_destroy(functions);
588 }
589
590 dis_handle_destroy(dhp);
591 }
592
593 dis_tgt_destroy(tgt);
594 }
595
596 void
597 usage(void)
598 {
599 (void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n");
600 (void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n");
601 exit(2);
602 }
603
604 typedef struct lib_node {
605 char *path;
606 struct lib_node *next;
607 } lib_node_t;
608
609 int
610 main(int argc, char **argv)
611 {
612 int optchar;
613 int i;
614 lib_node_t *libs = NULL;
615
616 g_funclist = dis_namelist_create();
617 g_seclist = dis_namelist_create();
618
619 while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) {
620 switch (optchar) {
621 case 'C':
622 g_demangle = 1;
623 break;
624 case 'd':
625 dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE);
626 break;
627 case 'D':
628 dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE);
629 break;
630 case 'F':
631 dis_namelist_add(g_funclist, optarg, 0);
632 break;
633 case 'l': {
634 /*
635 * The '-l foo' option historically would attempt to
636 * disassemble '$LIBDIR/libfoo.a'. The $LIBDIR
637 * environment variable has never been supported or
638 * documented for our linker. However, until this
639 * option is formally EOLed, we have to support it.
640 */
641 char *dir;
642 lib_node_t *node;
643 size_t len;
644
645 if ((dir = getenv("LIBDIR")) == NULL ||
646 dir[0] == '\0')
647 dir = "/usr/lib";
648 node = safe_malloc(sizeof (lib_node_t));
649 len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a");
650 node->path = safe_malloc(len);
651
652 (void) snprintf(node->path, len, "%s/lib%s.a", dir,
653 optarg);
654 node->next = libs;
655 libs = node;
656 break;
657 }
658 case 'L':
659 /*
660 * The '-L' option historically would attempt to read
661 * the .debug section of the target to determine source
662 * line information in order to annotate the output.
663 * No compiler has emitted these sections in many years,
664 * and the option has never done what it purported to
665 * do. We silently consume the option for
666 * compatibility.
667 */
668 break;
669 case 'n':
670 g_numeric = 1;
671 break;
672 case 'o':
673 g_flags |= DIS_OCTAL;
674 break;
675 case 'q':
676 g_quiet = 1;
677 break;
678 case 't':
679 dis_namelist_add(g_seclist, optarg, DIS_TEXT);
680 break;
681 case 'V':
682 (void) printf("Solaris disassembler version 1.0\n");
683 return (0);
684 default:
685 usage();
686 break;
687 }
688 }
689
690 argc -= optind;
691 argv += optind;
692
693 if (argc == 0 && libs == NULL) {
694 warn("no objects specified");
695 usage();
696 }
697
698 if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist))
699 g_doall = 1;
700
701 /*
702 * See comment for 'l' option, above.
703 */
704 while (libs != NULL) {
705 lib_node_t *node = libs->next;
706
707 dis_file(libs->path);
708 free(libs->path);
709 free(libs);
710 libs = node;
711 }
712
713 for (i = 0; i < argc; i++)
714 dis_file(argv[i]);
715
716 dis_namelist_destroy(g_funclist);
717 dis_namelist_destroy(g_seclist);
718
719 return (g_error);
720 }