1 /* 2 * Linearize - walk the statement tree (but _not_ the expressions) 3 * to generate a linear version of it and the basic blocks. 4 * 5 * NOTE! We're not interested in the actual sub-expressions yet, 6 * even though they can generate conditional branches and 7 * subroutine calls. That's all "local" behaviour. 8 * 9 * Copyright (C) 2004 Linus Torvalds 10 * Copyright (C) 2004 Christopher Li 11 */ 12 13 #include <string.h> 14 #include <stdarg.h> 15 #include <stdlib.h> 16 #include <stdio.h> 17 #include <assert.h> 18 19 #include "parse.h" 20 #include "expression.h" 21 #include "linearize.h" 22 #include "optimize.h" 23 #include "flow.h" 24 #include "target.h" 25 26 static pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt); 27 static pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr); 28 29 static pseudo_t add_cast(struct entrypoint *ep, struct symbol *to, struct symbol *from, int op, pseudo_t src); 30 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right); 31 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val); 32 static pseudo_t linearize_one_symbol(struct entrypoint *ep, struct symbol *sym); 33 34 struct access_data; 35 static pseudo_t add_load(struct entrypoint *ep, struct access_data *); 36 static pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *); 37 static pseudo_t cast_pseudo(struct entrypoint *ep, pseudo_t src, struct symbol *from, struct symbol *to); 38 39 struct pseudo void_pseudo = {}; 40 41 static struct position current_pos; 42 43 ALLOCATOR(pseudo_user, "pseudo_user"); 44 45 static struct instruction *alloc_instruction(int opcode, int size) 46 { 47 struct instruction * insn = __alloc_instruction(0); 48 insn->opcode = opcode; 49 insn->size = size; 50 insn->pos = current_pos; 51 return insn; 52 } 53 54 static inline int type_size(struct symbol *type) 55 { 56 return type ? type->bit_size > 0 ? type->bit_size : 0 : 0; 57 } 58 59 static struct instruction *alloc_typed_instruction(int opcode, struct symbol *type) 60 { 61 struct instruction *insn = alloc_instruction(opcode, type_size(type)); 62 insn->type = type; 63 return insn; 64 } 65 66 static struct entrypoint *alloc_entrypoint(void) 67 { 68 return __alloc_entrypoint(0); 69 } 70 71 static struct basic_block *alloc_basic_block(struct entrypoint *ep, struct position pos) 72 { 73 static int nr; 74 struct basic_block *bb = __alloc_basic_block(0); 75 bb->pos = pos; 76 bb->ep = ep; 77 bb->nr = nr++; 78 return bb; 79 } 80 81 static struct multijmp *alloc_multijmp(struct basic_block *target, long long begin, long long end) 82 { 83 struct multijmp *multijmp = __alloc_multijmp(0); 84 multijmp->target = target; 85 multijmp->begin = begin; 86 multijmp->end = end; 87 return multijmp; 88 } 89 90 const char *show_label(struct basic_block *bb) 91 { 92 static int n; 93 static char buffer[4][16]; 94 char *buf = buffer[3 & ++n]; 95 96 if (!bb) 97 return ".L???"; 98 snprintf(buf, 64, ".L%u", bb->nr); 99 return buf; 100 } 101 102 const char *show_pseudo(pseudo_t pseudo) 103 { 104 static int n; 105 static char buffer[4][64]; 106 char *buf; 107 int i; 108 109 if (!pseudo) 110 return "no pseudo"; 111 if (pseudo == VOID) 112 return "VOID"; 113 buf = buffer[3 & ++n]; 114 switch(pseudo->type) { 115 case PSEUDO_SYM: { 116 struct symbol *sym = pseudo->sym; 117 struct expression *expr; 118 119 if (!sym) { 120 snprintf(buf, 64, "<bad symbol>"); 121 break; 122 } 123 if (sym->bb_target) { 124 snprintf(buf, 64, "%s", show_label(sym->bb_target)); 125 break; 126 } 127 if (sym->ident) { 128 snprintf(buf, 64, "%s", show_ident(sym->ident)); 129 break; 130 } 131 expr = sym->initializer; 132 snprintf(buf, 64, "<anon symbol:%p>", verbose ? sym : NULL); 133 if (expr) { 134 switch (expr->type) { 135 case EXPR_VALUE: 136 snprintf(buf, 64, "<symbol value: %lld>", expr->value); 137 break; 138 case EXPR_STRING: 139 return show_string(expr->string); 140 default: 141 break; 142 } 143 } 144 break; 145 } 146 case PSEUDO_REG: 147 i = snprintf(buf, 64, "%%r%d", pseudo->nr); 148 if (pseudo->ident) 149 sprintf(buf+i, "(%s)", show_ident(pseudo->ident)); 150 break; 151 case PSEUDO_VAL: { 152 long long value = pseudo->value; 153 if (value > 1000 || value < -1000) 154 snprintf(buf, 64, "$%#llx", value); 155 else 156 snprintf(buf, 64, "$%lld", value); 157 break; 158 } 159 case PSEUDO_ARG: 160 snprintf(buf, 64, "%%arg%d", pseudo->nr); 161 break; 162 case PSEUDO_PHI: 163 i = snprintf(buf, 64, "%%phi%d", pseudo->nr); 164 if (pseudo->ident) 165 sprintf(buf+i, "(%s)", show_ident(pseudo->ident)); 166 break; 167 case PSEUDO_UNDEF: 168 return "UNDEF"; 169 default: 170 snprintf(buf, 64, "<bad pseudo type %d>", pseudo->type); 171 } 172 return buf; 173 } 174 175 static const char *opcodes[] = { 176 [OP_BADOP] = "bad_op", 177 178 /* Fn entrypoint */ 179 [OP_ENTRY] = "<entry-point>", 180 181 /* Terminator */ 182 [OP_RET] = "ret", 183 [OP_BR] = "br", 184 [OP_CBR] = "cbr", 185 [OP_SWITCH] = "switch", 186 [OP_COMPUTEDGOTO] = "jmp *", 187 188 /* Binary */ 189 [OP_ADD] = "add", 190 [OP_SUB] = "sub", 191 [OP_MUL] = "mul", 192 [OP_DIVU] = "divu", 193 [OP_DIVS] = "divs", 194 [OP_MODU] = "modu", 195 [OP_MODS] = "mods", 196 [OP_SHL] = "shl", 197 [OP_LSR] = "lsr", 198 [OP_ASR] = "asr", 199 200 /* Floating-point Binary */ 201 [OP_FADD] = "fadd", 202 [OP_FSUB] = "fsub", 203 [OP_FMUL] = "fmul", 204 [OP_FDIV] = "fdiv", 205 206 /* Logical */ 207 [OP_AND] = "and", 208 [OP_OR] = "or", 209 [OP_XOR] = "xor", 210 211 /* Binary comparison */ 212 [OP_SET_EQ] = "seteq", 213 [OP_SET_NE] = "setne", 214 [OP_SET_LE] = "setle", 215 [OP_SET_GE] = "setge", 216 [OP_SET_LT] = "setlt", 217 [OP_SET_GT] = "setgt", 218 [OP_SET_B] = "setb", 219 [OP_SET_A] = "seta", 220 [OP_SET_BE] = "setbe", 221 [OP_SET_AE] = "setae", 222 223 /* floating-point comparison */ 224 [OP_FCMP_ORD] = "fcmpord", 225 [OP_FCMP_OEQ] = "fcmpoeq", 226 [OP_FCMP_ONE] = "fcmpone", 227 [OP_FCMP_OLE] = "fcmpole", 228 [OP_FCMP_OGE] = "fcmpoge", 229 [OP_FCMP_OLT] = "fcmpolt", 230 [OP_FCMP_OGT] = "fcmpogt", 231 [OP_FCMP_UEQ] = "fcmpueq", 232 [OP_FCMP_UNE] = "fcmpune", 233 [OP_FCMP_ULE] = "fcmpule", 234 [OP_FCMP_UGE] = "fcmpuge", 235 [OP_FCMP_ULT] = "fcmpult", 236 [OP_FCMP_UGT] = "fcmpugt", 237 [OP_FCMP_UNO] = "fcmpuno", 238 239 /* Uni */ 240 [OP_NOT] = "not", 241 [OP_NEG] = "neg", 242 [OP_FNEG] = "fneg", 243 244 /* Special three-input */ 245 [OP_SEL] = "select", 246 247 /* Memory */ 248 [OP_LOAD] = "load", 249 [OP_STORE] = "store", 250 [OP_SETVAL] = "set", 251 [OP_SETFVAL] = "setfval", 252 [OP_SYMADDR] = "symaddr", 253 254 /* Other */ 255 [OP_PHI] = "phi", 256 [OP_PHISOURCE] = "phisrc", 257 [OP_SEXT] = "sext", 258 [OP_ZEXT] = "zext", 259 [OP_TRUNC] = "trunc", 260 [OP_FCVTU] = "fcvtu", 261 [OP_FCVTS] = "fcvts", 262 [OP_UCVTF] = "ucvtf", 263 [OP_SCVTF] = "scvtf", 264 [OP_FCVTF] = "fcvtf", 265 [OP_UTPTR] = "utptr", 266 [OP_PTRTU] = "ptrtu", 267 [OP_PTRCAST] = "ptrcast", 268 [OP_INLINED_CALL] = "# call", 269 [OP_CALL] = "call", 270 [OP_SLICE] = "slice", 271 [OP_NOP] = "nop", 272 [OP_DEATHNOTE] = "dead", 273 [OP_ASM] = "asm", 274 275 /* Sparse tagging (line numbers, context, whatever) */ 276 [OP_CONTEXT] = "context", 277 [OP_RANGE] = "range-check", 278 279 [OP_COPY] = "copy", 280 }; 281 282 static char *show_asm_constraints(char *buf, const char *sep, struct asm_constraint_list *list) 283 { 284 struct asm_constraint *entry; 285 286 FOR_EACH_PTR(list, entry) { 287 buf += sprintf(buf, "%s\"%s\"", sep, entry->constraint); 288 if (entry->pseudo) 289 buf += sprintf(buf, " (%s)", show_pseudo(entry->pseudo)); 290 if (entry->ident) 291 buf += sprintf(buf, " [%s]", show_ident(entry->ident)); 292 sep = ", "; 293 } END_FOR_EACH_PTR(entry); 294 return buf; 295 } 296 297 static char *show_asm(char *buf, struct instruction *insn) 298 { 299 struct asm_rules *rules = insn->asm_rules; 300 301 buf += sprintf(buf, "\"%s\"", insn->string); 302 buf = show_asm_constraints(buf, "\n\t\tout: ", rules->outputs); 303 buf = show_asm_constraints(buf, "\n\t\tin: ", rules->inputs); 304 buf = show_asm_constraints(buf, "\n\t\tclobber: ", rules->clobbers); 305 return buf; 306 } 307 308 const char *show_instruction(struct instruction *insn) 309 { 310 int opcode = insn->opcode; 311 static char buffer[4096]; 312 char *buf; 313 314 buf = buffer; 315 if (!insn->bb) 316 buf += sprintf(buf, "# "); 317 318 if (opcode < ARRAY_SIZE(opcodes)) { 319 const char *op = opcodes[opcode]; 320 if (!op) 321 buf += sprintf(buf, "opcode:%d", opcode); 322 else 323 buf += sprintf(buf, "%s", op); 324 if (insn->size) 325 buf += sprintf(buf, ".%d", insn->size); 326 memset(buf, ' ', 20); 327 buf++; 328 } 329 330 if (buf < buffer + 12) 331 buf = buffer + 12; 332 switch (opcode) { 333 case OP_RET: 334 if (insn->src && insn->src != VOID) 335 buf += sprintf(buf, "%s", show_pseudo(insn->src)); 336 break; 337 338 case OP_CBR: 339 buf += sprintf(buf, "%s, %s, %s", show_pseudo(insn->cond), show_label(insn->bb_true), show_label(insn->bb_false)); 340 break; 341 342 case OP_BR: 343 buf += sprintf(buf, "%s", show_label(insn->bb_true)); 344 break; 345 346 case OP_SETVAL: { 347 struct expression *expr = insn->val; 348 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target)); 349 350 if (!expr) { 351 buf += sprintf(buf, "%s", "<none>"); 352 break; 353 } 354 355 switch (expr->type) { 356 case EXPR_VALUE: 357 buf += sprintf(buf, "%lld", expr->value); 358 break; 359 case EXPR_FVALUE: 360 buf += sprintf(buf, "%Le", expr->fvalue); 361 break; 362 case EXPR_STRING: 363 buf += sprintf(buf, "%.40s", show_string(expr->string)); 364 break; 365 case EXPR_SYMBOL: 366 buf += sprintf(buf, "%s", show_ident(expr->symbol->ident)); 367 break; 368 case EXPR_LABEL: 369 buf += sprintf(buf, "%s", show_label(expr->symbol->bb_target)); 370 break; 371 default: 372 buf += sprintf(buf, "SETVAL EXPR TYPE %d", expr->type); 373 } 374 break; 375 } 376 case OP_SETFVAL: 377 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target)); 378 buf += sprintf(buf, "%Le", insn->fvalue); 379 break; 380 381 case OP_SWITCH: { 382 struct multijmp *jmp; 383 buf += sprintf(buf, "%s", show_pseudo(insn->cond)); 384 FOR_EACH_PTR(insn->multijmp_list, jmp) { 385 if (jmp->begin == jmp->end) 386 buf += sprintf(buf, ", %lld -> %s", jmp->begin, show_label(jmp->target)); 387 else if (jmp->begin < jmp->end) 388 buf += sprintf(buf, ", %lld ... %lld -> %s", jmp->begin, jmp->end, show_label(jmp->target)); 389 else 390 buf += sprintf(buf, ", default -> %s", show_label(jmp->target)); 391 } END_FOR_EACH_PTR(jmp); 392 break; 393 } 394 case OP_COMPUTEDGOTO: { 395 struct multijmp *jmp; 396 buf += sprintf(buf, "%s", show_pseudo(insn->src)); 397 FOR_EACH_PTR(insn->multijmp_list, jmp) { 398 buf += sprintf(buf, ", %s", show_label(jmp->target)); 399 } END_FOR_EACH_PTR(jmp); 400 break; 401 } 402 403 case OP_PHISOURCE: { 404 struct instruction *phi; 405 buf += sprintf(buf, "%s <- %s ", show_pseudo(insn->target), show_pseudo(insn->phi_src)); 406 FOR_EACH_PTR(insn->phi_users, phi) { 407 buf += sprintf(buf, " (%s)", show_pseudo(phi->target)); 408 } END_FOR_EACH_PTR(phi); 409 break; 410 } 411 412 case OP_PHI: { 413 pseudo_t phi; 414 const char *s = " <-"; 415 buf += sprintf(buf, "%s", show_pseudo(insn->target)); 416 FOR_EACH_PTR(insn->phi_list, phi) { 417 if (phi == VOID && !verbose) 418 continue; 419 buf += sprintf(buf, "%s %s", s, show_pseudo(phi)); 420 s = ","; 421 } END_FOR_EACH_PTR(phi); 422 break; 423 } 424 case OP_LOAD: 425 buf += sprintf(buf, "%s <- %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src)); 426 break; 427 case OP_STORE: 428 buf += sprintf(buf, "%s -> %d[%s]", show_pseudo(insn->target), insn->offset, show_pseudo(insn->src)); 429 break; 430 case OP_INLINED_CALL: 431 case OP_CALL: { 432 struct pseudo *arg; 433 if (insn->target && insn->target != VOID) 434 buf += sprintf(buf, "%s <- ", show_pseudo(insn->target)); 435 buf += sprintf(buf, "%s", show_pseudo(insn->func)); 436 FOR_EACH_PTR(insn->arguments, arg) { 437 buf += sprintf(buf, ", %s", show_pseudo(arg)); 438 } END_FOR_EACH_PTR(arg); 439 break; 440 } 441 case OP_SEXT: case OP_ZEXT: 442 case OP_TRUNC: 443 case OP_FCVTU: case OP_FCVTS: 444 case OP_UCVTF: case OP_SCVTF: 445 case OP_FCVTF: 446 case OP_UTPTR: 447 case OP_PTRTU: 448 case OP_PTRCAST: 449 buf += sprintf(buf, "%s <- (%d) %s", 450 show_pseudo(insn->target), 451 type_size(insn->orig_type), 452 show_pseudo(insn->src)); 453 break; 454 case OP_BINARY ... OP_BINARY_END: 455 case OP_FPCMP ... OP_FPCMP_END: 456 case OP_BINCMP ... OP_BINCMP_END: 457 buf += sprintf(buf, "%s <- %s, %s", show_pseudo(insn->target), show_pseudo(insn->src1), show_pseudo(insn->src2)); 458 break; 459 460 case OP_SEL: 461 buf += sprintf(buf, "%s <- %s, %s, %s", show_pseudo(insn->target), 462 show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3)); 463 break; 464 465 case OP_SLICE: 466 buf += sprintf(buf, "%s <- %s, %d, %d", show_pseudo(insn->target), show_pseudo(insn->base), insn->from, insn->len); 467 break; 468 469 case OP_NOT: case OP_NEG: 470 case OP_FNEG: 471 case OP_SYMADDR: 472 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1)); 473 break; 474 475 case OP_CONTEXT: 476 buf += sprintf(buf, "%s%d", insn->check ? "check: " : "", insn->increment); 477 break; 478 case OP_RANGE: 479 buf += sprintf(buf, "%s between %s..%s", show_pseudo(insn->src1), show_pseudo(insn->src2), show_pseudo(insn->src3)); 480 break; 481 case OP_NOP: 482 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src1)); 483 break; 484 case OP_DEATHNOTE: 485 buf += sprintf(buf, "%s", show_pseudo(insn->target)); 486 break; 487 case OP_ASM: 488 buf = show_asm(buf, insn); 489 break; 490 case OP_COPY: 491 buf += sprintf(buf, "%s <- %s", show_pseudo(insn->target), show_pseudo(insn->src)); 492 break; 493 default: 494 break; 495 } 496 497 if (buf >= buffer + sizeof(buffer)) 498 die("instruction buffer overflowed %td\n", buf - buffer); 499 do { --buf; } while (*buf == ' '); 500 *++buf = 0; 501 return buffer; 502 } 503 504 void show_bb(struct basic_block *bb) 505 { 506 struct instruction *insn; 507 508 printf("%s:\n", show_label(bb)); 509 if (verbose) { 510 pseudo_t needs, defines; 511 printf("%s:%d\n", stream_name(bb->pos.stream), bb->pos.line); 512 513 FOR_EACH_PTR(bb->needs, needs) { 514 struct instruction *def = needs->def; 515 if (def->opcode != OP_PHI) { 516 printf(" **uses %s (from %s)**\n", show_pseudo(needs), show_label(def->bb)); 517 } else { 518 pseudo_t phi; 519 const char *sep = " "; 520 printf(" **uses %s (from", show_pseudo(needs)); 521 FOR_EACH_PTR(def->phi_list, phi) { 522 if (phi == VOID) 523 continue; 524 printf("%s(%s:%s)", sep, show_pseudo(phi), show_label(phi->def->bb)); 525 sep = ", "; 526 } END_FOR_EACH_PTR(phi); 527 printf(")**\n"); 528 } 529 } END_FOR_EACH_PTR(needs); 530 531 FOR_EACH_PTR(bb->defines, defines) { 532 printf(" **defines %s **\n", show_pseudo(defines)); 533 } END_FOR_EACH_PTR(defines); 534 535 if (bb->parents) { 536 struct basic_block *from; 537 FOR_EACH_PTR(bb->parents, from) { 538 printf(" **from %s (%s:%d:%d)**\n", show_label(from), 539 stream_name(from->pos.stream), from->pos.line, from->pos.pos); 540 } END_FOR_EACH_PTR(from); 541 } 542 543 if (bb->children) { 544 struct basic_block *to; 545 FOR_EACH_PTR(bb->children, to) { 546 printf(" **to %s (%s:%d:%d)**\n", show_label(to), 547 stream_name(to->pos.stream), to->pos.line, to->pos.pos); 548 } END_FOR_EACH_PTR(to); 549 } 550 } 551 552 FOR_EACH_PTR(bb->insns, insn) { 553 if (!insn->bb && verbose < 2) 554 continue; 555 printf("\t%s\n", show_instruction(insn)); 556 } END_FOR_EACH_PTR(insn); 557 if (!bb_terminated(bb)) 558 printf("\tEND\n"); 559 } 560 561 static void show_symbol_usage(pseudo_t pseudo) 562 { 563 struct pseudo_user *pu; 564 565 if (pseudo) { 566 FOR_EACH_PTR(pseudo->users, pu) { 567 printf("\t%s\n", show_instruction(pu->insn)); 568 } END_FOR_EACH_PTR(pu); 569 } 570 } 571 572 void show_entry(struct entrypoint *ep) 573 { 574 struct symbol *sym; 575 struct basic_block *bb; 576 577 printf("%s:\n", show_ident(ep->name->ident)); 578 579 if (verbose) { 580 printf("ep %p: %s\n", ep, show_ident(ep->name->ident)); 581 582 FOR_EACH_PTR(ep->syms, sym) { 583 if (!sym->pseudo) 584 continue; 585 if (!sym->pseudo->users) 586 continue; 587 printf(" sym: %p %s\n", sym, show_ident(sym->ident)); 588 if (sym->ctype.modifiers & (MOD_EXTERN | MOD_STATIC | MOD_ADDRESSABLE)) 589 printf("\texternal visibility\n"); 590 show_symbol_usage(sym->pseudo); 591 } END_FOR_EACH_PTR(sym); 592 593 printf("\n"); 594 } 595 596 FOR_EACH_PTR(ep->bbs, bb) { 597 if (!bb) 598 continue; 599 if (!bb->parents && !bb->children && !bb->insns && verbose < 2) 600 continue; 601 show_bb(bb); 602 printf("\n"); 603 } END_FOR_EACH_PTR(bb); 604 605 printf("\n"); 606 } 607 608 static void bind_label(struct symbol *label, struct basic_block *bb, struct position pos) 609 { 610 if (label->bb_target) 611 warning(pos, "label '%s' already bound", show_ident(label->ident)); 612 label->bb_target = bb; 613 } 614 615 static struct basic_block * get_bound_block(struct entrypoint *ep, struct symbol *label) 616 { 617 struct basic_block *bb = label->bb_target; 618 619 if (!bb) { 620 bb = alloc_basic_block(ep, label->pos); 621 label->bb_target = bb; 622 } 623 return bb; 624 } 625 626 static void finish_block(struct entrypoint *ep) 627 { 628 struct basic_block *src = ep->active; 629 if (bb_reachable(src)) 630 ep->active = NULL; 631 } 632 633 static void add_goto(struct entrypoint *ep, struct basic_block *dst) 634 { 635 struct basic_block *src = ep->active; 636 if (bb_reachable(src)) { 637 struct instruction *br = alloc_instruction(OP_BR, 0); 638 br->bb_true = dst; 639 add_bb(&dst->parents, src); 640 add_bb(&src->children, dst); 641 br->bb = src; 642 add_instruction(&src->insns, br); 643 ep->active = NULL; 644 } 645 } 646 647 static void add_one_insn(struct entrypoint *ep, struct instruction *insn) 648 { 649 struct basic_block *bb = ep->active; 650 651 if (bb_reachable(bb)) { 652 insn->bb = bb; 653 add_instruction(&bb->insns, insn); 654 } 655 } 656 657 static void set_activeblock(struct entrypoint *ep, struct basic_block *bb) 658 { 659 if (!bb_terminated(ep->active)) 660 add_goto(ep, bb); 661 662 ep->active = bb; 663 if (bb_reachable(bb)) 664 add_bb(&ep->bbs, bb); 665 } 666 667 static void remove_parent(struct basic_block *child, struct basic_block *parent) 668 { 669 remove_bb_from_list(&child->parents, parent, 1); 670 if (!child->parents) 671 repeat_phase |= REPEAT_CFG_CLEANUP; 672 } 673 674 /* Change a "switch" or a conditional branch into a branch */ 675 void insert_branch(struct basic_block *bb, struct instruction *jmp, struct basic_block *target) 676 { 677 struct instruction *br, *old; 678 struct basic_block *child; 679 680 /* Remove the switch */ 681 old = delete_last_instruction(&bb->insns); 682 assert(old == jmp); 683 kill_instruction(old); 684 685 br = alloc_instruction(OP_BR, 0); 686 br->bb = bb; 687 br->bb_true = target; 688 add_instruction(&bb->insns, br); 689 690 FOR_EACH_PTR(bb->children, child) { 691 if (child == target) { 692 target = NULL; /* Trigger just once */ 693 continue; 694 } 695 DELETE_CURRENT_PTR(child); 696 remove_parent(child, bb); 697 } END_FOR_EACH_PTR(child); 698 PACK_PTR_LIST(&bb->children); 699 } 700 701 702 void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi_node, pseudo_t if_true, pseudo_t if_false) 703 { 704 pseudo_t target; 705 struct instruction *select; 706 707 /* Remove the 'br' */ 708 delete_last_instruction(&bb->insns); 709 710 select = alloc_typed_instruction(OP_SEL, phi_node->type); 711 select->bb = bb; 712 713 assert(br->cond); 714 use_pseudo(select, br->cond, &select->src1); 715 716 target = phi_node->target; 717 assert(target->def == phi_node); 718 select->target = target; 719 target->def = select; 720 721 use_pseudo(select, if_true, &select->src2); 722 use_pseudo(select, if_false, &select->src3); 723 724 add_instruction(&bb->insns, select); 725 add_instruction(&bb->insns, br); 726 } 727 728 static inline int bb_empty(struct basic_block *bb) 729 { 730 return !bb->insns; 731 } 732 733 /* Add a label to the currently active block, return new active block */ 734 static struct basic_block * add_label(struct entrypoint *ep, struct symbol *label) 735 { 736 struct basic_block *bb = label->bb_target; 737 738 if (bb) { 739 set_activeblock(ep, bb); 740 return bb; 741 } 742 bb = ep->active; 743 if (!bb_reachable(bb) || !bb_empty(bb)) { 744 bb = alloc_basic_block(ep, label->pos); 745 set_activeblock(ep, bb); 746 } 747 label->bb_target = bb; 748 return bb; 749 } 750 751 static void add_branch(struct entrypoint *ep, pseudo_t cond, struct basic_block *bb_true, struct basic_block *bb_false) 752 { 753 struct basic_block *bb = ep->active; 754 struct instruction *br; 755 756 if (bb_reachable(bb)) { 757 br = alloc_instruction(OP_CBR, 0); 758 use_pseudo(br, cond, &br->cond); 759 br->bb_true = bb_true; 760 br->bb_false = bb_false; 761 add_bb(&bb_true->parents, bb); 762 add_bb(&bb_false->parents, bb); 763 add_bb(&bb->children, bb_true); 764 add_bb(&bb->children, bb_false); 765 add_one_insn(ep, br); 766 } 767 } 768 769 pseudo_t alloc_pseudo(struct instruction *def) 770 { 771 static int nr = 0; 772 struct pseudo * pseudo = __alloc_pseudo(0); 773 pseudo->type = PSEUDO_REG; 774 pseudo->nr = ++nr; 775 pseudo->def = def; 776 return pseudo; 777 } 778 779 static pseudo_t symbol_pseudo(struct entrypoint *ep, struct symbol *sym) 780 { 781 pseudo_t pseudo; 782 783 if (!sym) 784 return VOID; 785 786 pseudo = sym->pseudo; 787 if (!pseudo) { 788 pseudo = __alloc_pseudo(0); 789 pseudo->nr = -1; 790 pseudo->type = PSEUDO_SYM; 791 pseudo->sym = sym; 792 pseudo->ident = sym->ident; 793 sym->pseudo = pseudo; 794 add_pseudo(&ep->accesses, pseudo); 795 } 796 /* Symbol pseudos have neither nr nor def */ 797 return pseudo; 798 } 799 800 pseudo_t value_pseudo(long long val) 801 { 802 #define MAX_VAL_HASH 64 803 static struct pseudo_list *prev[MAX_VAL_HASH]; 804 int hash = val & (MAX_VAL_HASH-1); 805 struct pseudo_list **list = prev + hash; 806 pseudo_t pseudo; 807 808 FOR_EACH_PTR(*list, pseudo) { 809 if (pseudo->value == val) 810 return pseudo; 811 } END_FOR_EACH_PTR(pseudo); 812 813 pseudo = __alloc_pseudo(0); 814 pseudo->type = PSEUDO_VAL; 815 pseudo->value = val; 816 add_pseudo(list, pseudo); 817 818 /* Value pseudos have neither nr, usage nor def */ 819 return pseudo; 820 } 821 822 pseudo_t undef_pseudo(void) 823 { 824 pseudo_t pseudo = __alloc_pseudo(0); 825 pseudo->type = PSEUDO_UNDEF; 826 return pseudo; 827 } 828 829 static pseudo_t argument_pseudo(struct entrypoint *ep, int nr) 830 { 831 pseudo_t pseudo = __alloc_pseudo(0); 832 struct instruction *entry = ep->entry; 833 834 pseudo->type = PSEUDO_ARG; 835 pseudo->nr = nr; 836 pseudo->def = entry; 837 add_pseudo(&entry->arg_list, pseudo); 838 839 /* Argument pseudos have neither usage nor def */ 840 return pseudo; 841 } 842 843 struct instruction *alloc_phisrc(pseudo_t pseudo, struct symbol *type) 844 { 845 struct instruction *insn = alloc_typed_instruction(OP_PHISOURCE, type); 846 pseudo_t phi = __alloc_pseudo(0); 847 static int nr = 0; 848 849 phi->type = PSEUDO_PHI; 850 phi->nr = ++nr; 851 phi->def = insn; 852 853 use_pseudo(insn, pseudo, &insn->phi_src); 854 insn->target = phi; 855 return insn; 856 } 857 858 pseudo_t alloc_phi(struct basic_block *source, pseudo_t pseudo, struct symbol *type) 859 { 860 struct instruction *insn; 861 862 if (!source) 863 return VOID; 864 865 insn = alloc_phisrc(pseudo, type); 866 insn->bb = source; 867 add_instruction(&source->insns, insn); 868 return insn->target; 869 } 870 871 struct instruction *alloc_phi_node(struct basic_block *bb, struct symbol *type, struct ident *ident) 872 { 873 struct instruction *phi_node = alloc_typed_instruction(OP_PHI, type); 874 pseudo_t phi; 875 876 phi = alloc_pseudo(phi_node); 877 phi->ident = ident; 878 phi->def = phi_node; 879 phi_node->target = phi; 880 phi_node->bb = bb; 881 return phi_node; 882 } 883 884 void add_phi_node(struct basic_block *bb, struct instruction *phi_node) 885 { 886 struct instruction *insn; 887 888 FOR_EACH_PTR(bb->insns, insn) { 889 enum opcode op = insn->opcode; 890 if (op == OP_PHI) 891 continue; 892 INSERT_CURRENT(phi_node, insn); 893 return; 894 } END_FOR_EACH_PTR(insn); 895 896 // FIXME 897 add_instruction(&bb->insns, phi_node); 898 } 899 900 struct instruction *insert_phi_node(struct basic_block *bb, struct symbol *var) 901 { 902 struct instruction *phi_node = alloc_phi_node(bb, var, var->ident); 903 add_phi_node(bb, phi_node); 904 return phi_node; 905 } 906 907 /* 908 * We carry the "access_data" structure around for any accesses, 909 * which simplifies things a lot. It contains all the access 910 * information in one place. 911 */ 912 struct access_data { 913 struct symbol *type; // ctype 914 struct symbol *btype; // base type of bitfields 915 pseudo_t address; // pseudo containing address .. 916 unsigned int offset; // byte offset 917 }; 918 919 static int linearize_simple_address(struct entrypoint *ep, 920 struct expression *addr, 921 struct access_data *ad) 922 { 923 if (addr->type == EXPR_SYMBOL) { 924 linearize_one_symbol(ep, addr->symbol); 925 ad->address = symbol_pseudo(ep, addr->symbol); 926 return 1; 927 } 928 if (addr->type == EXPR_BINOP) { 929 if (addr->right->type == EXPR_VALUE) { 930 if (addr->op == '+') { 931 ad->offset += get_expression_value(addr->right); 932 return linearize_simple_address(ep, addr->left, ad); 933 } 934 } 935 } 936 ad->address = linearize_expression(ep, addr); 937 return 1; 938 } 939 940 static struct symbol *bitfield_base_type(struct symbol *sym) 941 { 942 struct symbol *base = sym; 943 944 if (sym) { 945 if (sym->type == SYM_NODE) 946 base = base->ctype.base_type; 947 if (base->type == SYM_BITFIELD) 948 return base->ctype.base_type; 949 } 950 return sym; 951 } 952 953 static int linearize_address_gen(struct entrypoint *ep, 954 struct expression *expr, 955 struct access_data *ad) 956 { 957 struct symbol *ctype = expr->ctype; 958 959 if (!ctype) 960 return 0; 961 ad->type = ctype; 962 if (expr->type == EXPR_PREOP && expr->op == '*') 963 return linearize_simple_address(ep, expr->unop, ad); 964 965 warning(expr->pos, "generating address of non-lvalue (%d)", expr->type); 966 return 0; 967 } 968 969 static pseudo_t add_load(struct entrypoint *ep, struct access_data *ad) 970 { 971 struct instruction *insn; 972 pseudo_t new; 973 974 if (!ep->active) 975 return VOID; 976 977 insn = alloc_typed_instruction(OP_LOAD, ad->btype); 978 new = alloc_pseudo(insn); 979 980 insn->target = new; 981 insn->offset = ad->offset; 982 insn->is_volatile = ad->type && (ad->type->ctype.modifiers & MOD_VOLATILE); 983 use_pseudo(insn, ad->address, &insn->src); 984 add_one_insn(ep, insn); 985 return new; 986 } 987 988 static void add_store(struct entrypoint *ep, struct access_data *ad, pseudo_t value) 989 { 990 struct basic_block *bb = ep->active; 991 struct instruction *store; 992 993 if (!bb) 994 return; 995 996 store = alloc_typed_instruction(OP_STORE, ad->btype); 997 store->offset = ad->offset; 998 store->is_volatile = ad->type && (ad->type->ctype.modifiers & MOD_VOLATILE); 999 use_pseudo(store, value, &store->target); 1000 use_pseudo(store, ad->address, &store->src); 1001 add_one_insn(ep, store); 1002 } 1003 1004 static pseudo_t linearize_bitfield_insert(struct entrypoint *ep, 1005 pseudo_t ori, pseudo_t val, struct symbol *ctype, struct symbol *btype) 1006 { 1007 unsigned int shift = ctype->bit_offset; 1008 unsigned int size = ctype->bit_size; 1009 unsigned long long mask = ((1ULL << size) - 1); 1010 unsigned long long smask= bits_mask(btype->bit_size); 1011 1012 val = add_cast(ep, btype, ctype, OP_ZEXT, val); 1013 if (shift) { 1014 val = add_binary_op(ep, btype, OP_SHL, val, value_pseudo(shift)); 1015 mask <<= shift; 1016 } 1017 ori = add_binary_op(ep, btype, OP_AND, ori, value_pseudo(~mask & smask)); 1018 val = add_binary_op(ep, btype, OP_OR, ori, val); 1019 1020 return val; 1021 } 1022 1023 static pseudo_t linearize_store_gen(struct entrypoint *ep, 1024 pseudo_t value, 1025 struct access_data *ad) 1026 { 1027 struct symbol *ctype = ad->type; 1028 struct symbol *btype; 1029 pseudo_t store = value; 1030 1031 if (!ep->active) 1032 return VOID; 1033 1034 btype = ad->btype = bitfield_base_type(ctype); 1035 if (type_size(btype) != type_size(ctype)) { 1036 pseudo_t orig = add_load(ep, ad); 1037 store = linearize_bitfield_insert(ep, orig, value, ctype, btype); 1038 } 1039 add_store(ep, ad, store); 1040 return value; 1041 } 1042 1043 static void taint_undefined_behaviour(struct instruction *insn) 1044 { 1045 pseudo_t src2; 1046 1047 switch (insn->opcode) { 1048 case OP_LSR: 1049 case OP_ASR: 1050 case OP_SHL: 1051 src2 = insn->src2; 1052 if (src2->type != PSEUDO_VAL) 1053 break; 1054 if ((unsigned long long)src2->value >= insn->size) 1055 insn->tainted = 1; 1056 break; 1057 } 1058 } 1059 1060 static pseudo_t add_binary_op(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t left, pseudo_t right) 1061 { 1062 struct instruction *insn = alloc_typed_instruction(op, ctype); 1063 pseudo_t target = alloc_pseudo(insn); 1064 insn->target = target; 1065 use_pseudo(insn, left, &insn->src1); 1066 use_pseudo(insn, right, &insn->src2); 1067 add_one_insn(ep, insn); 1068 return target; 1069 } 1070 1071 static pseudo_t add_setval(struct entrypoint *ep, struct symbol *ctype, struct expression *val) 1072 { 1073 struct instruction *insn = alloc_typed_instruction(OP_SETVAL, ctype); 1074 pseudo_t target = alloc_pseudo(insn); 1075 insn->target = target; 1076 insn->val = val; 1077 add_one_insn(ep, insn); 1078 return target; 1079 } 1080 1081 static pseudo_t add_setfval(struct entrypoint *ep, struct symbol *ctype, long double fval) 1082 { 1083 struct instruction *insn = alloc_typed_instruction(OP_SETFVAL, ctype); 1084 pseudo_t target = alloc_pseudo(insn); 1085 insn->target = target; 1086 insn->fvalue = fval; 1087 add_one_insn(ep, insn); 1088 return target; 1089 } 1090 1091 static pseudo_t add_symbol_address(struct entrypoint *ep, struct symbol *sym) 1092 { 1093 struct instruction *insn = alloc_instruction(OP_SYMADDR, bits_in_pointer); 1094 pseudo_t target = alloc_pseudo(insn); 1095 1096 insn->target = target; 1097 use_pseudo(insn, symbol_pseudo(ep, sym), &insn->src); 1098 add_one_insn(ep, insn); 1099 return target; 1100 } 1101 1102 static pseudo_t linearize_bitfield_extract(struct entrypoint *ep, 1103 pseudo_t val, struct symbol *ctype, struct symbol *btype) 1104 { 1105 unsigned int off = ctype->bit_offset; 1106 1107 if (off) { 1108 pseudo_t shift = value_pseudo(off); 1109 val = add_binary_op(ep, btype, OP_LSR, val, shift); 1110 } 1111 val = cast_pseudo(ep, val, btype, ctype); 1112 return val; 1113 } 1114 1115 static pseudo_t linearize_load_gen(struct entrypoint *ep, struct access_data *ad) 1116 { 1117 struct symbol *ctype = ad->type; 1118 struct symbol *btype; 1119 pseudo_t new; 1120 1121 if (!ep->active) 1122 return VOID; 1123 1124 btype = ad->btype = bitfield_base_type(ctype); 1125 new = add_load(ep, ad); 1126 if (ctype->bit_size != type_size(btype)) 1127 new = linearize_bitfield_extract(ep, new, ctype, btype); 1128 return new; 1129 } 1130 1131 static pseudo_t linearize_access(struct entrypoint *ep, struct expression *expr) 1132 { 1133 struct access_data ad = { NULL, }; 1134 pseudo_t value; 1135 1136 if (!linearize_address_gen(ep, expr, &ad)) 1137 return VOID; 1138 value = linearize_load_gen(ep, &ad); 1139 return value; 1140 } 1141 1142 static pseudo_t linearize_inc_dec(struct entrypoint *ep, struct expression *expr, int postop) 1143 { 1144 struct access_data ad = { NULL, }; 1145 pseudo_t old, new, one; 1146 int op = expr->op == SPECIAL_INCREMENT ? OP_ADD : OP_SUB; 1147 1148 if (!linearize_address_gen(ep, expr->unop, &ad)) 1149 return VOID; 1150 1151 old = linearize_load_gen(ep, &ad); 1152 op = opcode_float(op, expr->ctype); 1153 if (is_float_type(expr->ctype)) 1154 one = add_setfval(ep, expr->ctype, expr->op_value); 1155 else 1156 one = value_pseudo(expr->op_value); 1157 if (ad.btype != ad.type) 1158 old = cast_pseudo(ep, old, ad.type, ad.btype); 1159 new = add_binary_op(ep, ad.btype, op, old, one); 1160 if (ad.btype != ad.type) 1161 new = cast_pseudo(ep, new, ad.btype, ad.type); 1162 linearize_store_gen(ep, new, &ad); 1163 return postop ? old : new; 1164 } 1165 1166 static pseudo_t add_unop(struct entrypoint *ep, struct symbol *ctype, int op, pseudo_t src) 1167 { 1168 struct instruction *insn = alloc_typed_instruction(op, ctype); 1169 pseudo_t new = alloc_pseudo(insn); 1170 1171 insn->target = new; 1172 use_pseudo(insn, src, &insn->src1); 1173 add_one_insn(ep, insn); 1174 return new; 1175 } 1176 1177 static pseudo_t add_cast(struct entrypoint *ep, struct symbol *to, 1178 struct symbol *from, int op, pseudo_t src) 1179 { 1180 pseudo_t new = add_unop(ep, to, op, src); 1181 new->def->orig_type = from; 1182 return new; 1183 } 1184 1185 static pseudo_t linearize_slice(struct entrypoint *ep, struct expression *expr) 1186 { 1187 pseudo_t pre = linearize_expression(ep, expr->base); 1188 struct instruction *insn = alloc_typed_instruction(OP_SLICE, expr->ctype); 1189 pseudo_t new = alloc_pseudo(insn); 1190 1191 insn->target = new; 1192 insn->from = expr->r_bitpos; 1193 insn->len = expr->r_nrbits; 1194 use_pseudo(insn, pre, &insn->base); 1195 add_one_insn(ep, insn); 1196 return new; 1197 } 1198 1199 static pseudo_t linearize_regular_preop(struct entrypoint *ep, struct expression *expr) 1200 { 1201 pseudo_t pre = linearize_expression(ep, expr->unop); 1202 struct symbol *ctype = expr->ctype; 1203 switch (expr->op) { 1204 case '+': 1205 return pre; 1206 case '!': { 1207 pseudo_t zero = value_pseudo(0); 1208 return add_binary_op(ep, ctype, OP_SET_EQ, pre, zero); 1209 } 1210 case '~': 1211 return add_unop(ep, ctype, OP_NOT, pre); 1212 case '-': 1213 return add_unop(ep, ctype, opcode_float(OP_NEG, ctype), pre); 1214 } 1215 return VOID; 1216 } 1217 1218 static pseudo_t linearize_preop(struct entrypoint *ep, struct expression *expr) 1219 { 1220 /* 1221 * '*' is an lvalue access, and is fundamentally different 1222 * from an arithmetic operation. Maybe it should have an 1223 * expression type of its own.. 1224 */ 1225 if (expr->op == '*') 1226 return linearize_access(ep, expr); 1227 if (expr->op == SPECIAL_INCREMENT || expr->op == SPECIAL_DECREMENT) 1228 return linearize_inc_dec(ep, expr, 0); 1229 return linearize_regular_preop(ep, expr); 1230 } 1231 1232 static pseudo_t linearize_postop(struct entrypoint *ep, struct expression *expr) 1233 { 1234 return linearize_inc_dec(ep, expr, 1); 1235 } 1236 1237 /* 1238 * Casts to pointers are "less safe" than other casts, since 1239 * they imply type-unsafe accesses. "void *" is a special 1240 * case, since you can't access through it anyway without another 1241 * cast. 1242 */ 1243 enum mtype { 1244 MTYPE_UINT, 1245 MTYPE_SINT, 1246 MTYPE_PTR, 1247 MTYPE_VPTR, // TODO: must be removed ? 1248 MTYPE_FLOAT, 1249 MTYPE_BAD, 1250 }; 1251 1252 static enum mtype get_mtype(struct symbol *s) 1253 { 1254 int sign = (s->ctype.modifiers & MOD_SIGNED) ? 1 : 0; 1255 1256 retry: switch (s->type) { 1257 case SYM_NODE: 1258 s = s->ctype.base_type; 1259 goto retry; 1260 case SYM_PTR: 1261 if (s->ctype.base_type == &void_ctype) 1262 return MTYPE_VPTR; 1263 return MTYPE_PTR; 1264 case SYM_BITFIELD: 1265 case SYM_RESTRICT: 1266 case SYM_FOULED: 1267 case SYM_ENUM: 1268 s = s->ctype.base_type; 1269 /* fall-through */ 1270 case_int: 1271 return sign ? MTYPE_SINT : MTYPE_UINT; 1272 case SYM_BASETYPE: 1273 if (s->ctype.base_type == &fp_type) 1274 return MTYPE_FLOAT; 1275 if (s->ctype.base_type == &int_type) 1276 goto case_int; 1277 /* fall-through */ 1278 default: 1279 return MTYPE_BAD; 1280 } 1281 } 1282 1283 static int get_cast_opcode(struct symbol *dst, struct symbol *src) 1284 { 1285 enum mtype stype = get_mtype(src); 1286 enum mtype dtype = get_mtype(dst); 1287 1288 switch (dtype) { 1289 case MTYPE_FLOAT: 1290 switch (stype) { 1291 case MTYPE_FLOAT: 1292 if (dst->bit_size == src->bit_size) 1293 return OP_NOP; 1294 return OP_FCVTF; 1295 case MTYPE_UINT: 1296 return OP_UCVTF; 1297 case MTYPE_SINT: 1298 return OP_SCVTF; 1299 default: 1300 return OP_BADOP; 1301 } 1302 case MTYPE_PTR: 1303 switch (stype) { 1304 case MTYPE_UINT: 1305 case MTYPE_SINT: 1306 return OP_UTPTR; 1307 case MTYPE_PTR: 1308 case MTYPE_VPTR: 1309 return OP_PTRCAST; 1310 default: 1311 return OP_BADOP; 1312 } 1313 case MTYPE_VPTR: 1314 switch (stype) { 1315 case MTYPE_PTR: 1316 case MTYPE_VPTR: 1317 case MTYPE_UINT: 1318 stype = MTYPE_UINT; 1319 /* fall through */ 1320 case MTYPE_SINT: 1321 break; 1322 default: 1323 return OP_BADOP; 1324 } 1325 /* fall through */ 1326 case MTYPE_UINT: 1327 case MTYPE_SINT: 1328 switch (stype) { 1329 case MTYPE_FLOAT: 1330 return dtype == MTYPE_UINT ? OP_FCVTU : OP_FCVTS; 1331 case MTYPE_PTR: 1332 return OP_PTRTU; 1333 case MTYPE_VPTR: 1334 case MTYPE_UINT: 1335 case MTYPE_SINT: 1336 if (dst->bit_size ==src->bit_size) 1337 return OP_NOP; 1338 if (dst->bit_size < src->bit_size) 1339 return OP_TRUNC; 1340 return stype == MTYPE_SINT ? OP_SEXT : OP_ZEXT; 1341 default: 1342 return OP_BADOP; 1343 } 1344 /* fall through */ 1345 default: 1346 if (src->type == SYM_NODE) 1347 src = src->ctype.base_type; 1348 if (dst->type == SYM_NODE) 1349 dst = dst->ctype.base_type; 1350 if (src == dst) 1351 return OP_NOP; 1352 return OP_BADOP; 1353 } 1354 } 1355 1356 static pseudo_t cast_pseudo(struct entrypoint *ep, pseudo_t src, struct symbol *from, struct symbol *to) 1357 { 1358 const struct position pos = current_pos; 1359 pseudo_t result; 1360 struct instruction *insn; 1361 int opcode; 1362 1363 if (src == VOID) 1364 return VOID; 1365 if (!from || !to) 1366 return VOID; 1367 if (from->bit_size < 0 || to->bit_size < 0) 1368 return VOID; 1369 opcode = get_cast_opcode(to, from); 1370 switch (opcode) { 1371 case OP_NOP: 1372 return src; 1373 case OP_UTPTR: 1374 if (from->bit_size == to->bit_size) 1375 break; 1376 if (src == value_pseudo(0)) 1377 break; 1378 if (Wint_to_pointer_cast) 1379 warning(pos, "non size-preserving integer to pointer cast"); 1380 src = cast_pseudo(ep, src, from, size_t_ctype); 1381 from = size_t_ctype; 1382 break; 1383 case OP_PTRTU: 1384 if (from->bit_size == to->bit_size) 1385 break; 1386 if (Wpointer_to_int_cast) 1387 warning(pos, "non size-preserving pointer to integer cast"); 1388 src = cast_pseudo(ep, src, from, size_t_ctype); 1389 return cast_pseudo(ep, src, size_t_ctype, to); 1390 case OP_BADOP: 1391 return VOID; 1392 default: 1393 break; 1394 } 1395 insn = alloc_typed_instruction(opcode, to); 1396 result = alloc_pseudo(insn); 1397 insn->target = result; 1398 insn->orig_type = from; 1399 use_pseudo(insn, src, &insn->src); 1400 add_one_insn(ep, insn); 1401 return result; 1402 } 1403 1404 static int map_opcode(int opcode, struct symbol *ctype) 1405 { 1406 if (ctype && is_float_type(ctype)) 1407 return opcode_table[opcode].to_float; 1408 if (ctype && (ctype->ctype.modifiers & MOD_SIGNED)) { 1409 switch(opcode) { 1410 case OP_DIVU: case OP_MODU: case OP_LSR: 1411 opcode++; 1412 } 1413 } 1414 return opcode; 1415 } 1416 1417 static inline pseudo_t add_convert_to_bool(struct entrypoint *ep, pseudo_t src, struct symbol *type) 1418 { 1419 pseudo_t zero; 1420 int op; 1421 1422 if (!type || src == VOID) 1423 return VOID; 1424 if (is_bool_type(type)) 1425 return src; 1426 if (src->type == PSEUDO_VAL && (src->value == 0 || src->value == 1)) 1427 return src; 1428 if (is_float_type(type)) { 1429 zero = add_setfval(ep, type, 0.0); 1430 op = map_opcode(OP_SET_NE, type); 1431 } else { 1432 zero = value_pseudo(0); 1433 op = OP_SET_NE; 1434 } 1435 return add_binary_op(ep, &bool_ctype, op, src, zero); 1436 } 1437 1438 static pseudo_t linearize_expression_to_bool(struct entrypoint *ep, struct expression *expr) 1439 { 1440 pseudo_t dst; 1441 dst = linearize_expression(ep, expr); 1442 dst = add_convert_to_bool(ep, dst, expr->ctype); 1443 return dst; 1444 } 1445 1446 static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *expr) 1447 { 1448 struct access_data ad = { NULL, }; 1449 struct expression *target = expr->left; 1450 struct expression *src = expr->right; 1451 struct symbol *ctype; 1452 pseudo_t value; 1453 1454 value = linearize_expression(ep, src); 1455 if (!target || !linearize_address_gen(ep, target, &ad)) 1456 return value; 1457 if (expr->op != '=') { 1458 pseudo_t oldvalue = linearize_load_gen(ep, &ad); 1459 pseudo_t dst; 1460 static const int op_trans[] = { 1461 [SPECIAL_ADD_ASSIGN - SPECIAL_BASE] = OP_ADD, 1462 [SPECIAL_SUB_ASSIGN - SPECIAL_BASE] = OP_SUB, 1463 [SPECIAL_MUL_ASSIGN - SPECIAL_BASE] = OP_MUL, 1464 [SPECIAL_DIV_ASSIGN - SPECIAL_BASE] = OP_DIVU, 1465 [SPECIAL_MOD_ASSIGN - SPECIAL_BASE] = OP_MODU, 1466 [SPECIAL_SHL_ASSIGN - SPECIAL_BASE] = OP_SHL, 1467 [SPECIAL_SHR_ASSIGN - SPECIAL_BASE] = OP_LSR, 1468 [SPECIAL_AND_ASSIGN - SPECIAL_BASE] = OP_AND, 1469 [SPECIAL_OR_ASSIGN - SPECIAL_BASE] = OP_OR, 1470 [SPECIAL_XOR_ASSIGN - SPECIAL_BASE] = OP_XOR 1471 }; 1472 int opcode; 1473 1474 if (!src) 1475 return VOID; 1476 1477 ctype = src->ctype; 1478 oldvalue = cast_pseudo(ep, oldvalue, target->ctype, ctype); 1479 opcode = map_opcode(op_trans[expr->op - SPECIAL_BASE], ctype); 1480 dst = add_binary_op(ep, ctype, opcode, oldvalue, value); 1481 taint_undefined_behaviour(dst->def); 1482 value = cast_pseudo(ep, dst, ctype, expr->ctype); 1483 } 1484 value = linearize_store_gen(ep, value, &ad); 1485 return value; 1486 } 1487 1488 static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr) 1489 { 1490 struct expression *arg, *fn; 1491 struct instruction *insn = alloc_typed_instruction(OP_CALL, expr->ctype); 1492 pseudo_t retval, call; 1493 struct ctype *ctype = NULL; 1494 struct symbol *fntype; 1495 struct context *context; 1496 1497 if (!expr->ctype) 1498 return VOID; 1499 1500 fn = expr->fn; 1501 fntype = fn->ctype; 1502 ctype = &fntype->ctype; 1503 if (fntype->type == SYM_NODE) 1504 fntype = fntype->ctype.base_type; 1505 1506 add_symbol(&insn->fntypes, fntype); 1507 FOR_EACH_PTR(expr->args, arg) { 1508 pseudo_t new = linearize_expression(ep, arg); 1509 use_pseudo(insn, new, add_pseudo(&insn->arguments, new)); 1510 add_symbol(&insn->fntypes, arg->ctype); 1511 } END_FOR_EACH_PTR(arg); 1512 1513 if (fn->type == EXPR_PREOP && fn->op == '*' && is_func_type(fn->ctype)) 1514 fn = fn->unop; 1515 1516 if (fn->type == EXPR_SYMBOL) { 1517 call = symbol_pseudo(ep, fn->symbol); 1518 } else { 1519 call = linearize_expression(ep, fn); 1520 } 1521 use_pseudo(insn, call, &insn->func); 1522 retval = VOID; 1523 if (expr->ctype != &void_ctype) 1524 retval = alloc_pseudo(insn); 1525 insn->target = retval; 1526 add_one_insn(ep, insn); 1527 1528 if (ctype) { 1529 FOR_EACH_PTR(ctype->contexts, context) { 1530 int in = context->in; 1531 int out = context->out; 1532 int check = 0; 1533 int context_diff; 1534 if (in < 0) { 1535 check = 1; 1536 in = 0; 1537 } 1538 if (out < 0) { 1539 check = 0; 1540 out = 0; 1541 } 1542 context_diff = out - in; 1543 if (check || context_diff) { 1544 insn = alloc_instruction(OP_CONTEXT, 0); 1545 insn->increment = context_diff; 1546 insn->check = check; 1547 insn->context_expr = context->context; 1548 add_one_insn(ep, insn); 1549 } 1550 } END_FOR_EACH_PTR(context); 1551 } 1552 1553 return retval; 1554 } 1555 1556 static pseudo_t linearize_binop_bool(struct entrypoint *ep, struct expression *expr) 1557 { 1558 pseudo_t src1, src2, dst; 1559 int op = (expr->op == SPECIAL_LOGICAL_OR) ? OP_OR : OP_AND; 1560 1561 src1 = linearize_expression_to_bool(ep, expr->left); 1562 src2 = linearize_expression_to_bool(ep, expr->right); 1563 dst = add_binary_op(ep, &bool_ctype, op, src1, src2); 1564 if (expr->ctype != &bool_ctype) 1565 dst = cast_pseudo(ep, dst, &bool_ctype, expr->ctype); 1566 return dst; 1567 } 1568 1569 static pseudo_t linearize_binop(struct entrypoint *ep, struct expression *expr) 1570 { 1571 pseudo_t src1, src2, dst; 1572 static const int opcode[] = { 1573 ['+'] = OP_ADD, ['-'] = OP_SUB, 1574 ['*'] = OP_MUL, ['/'] = OP_DIVU, 1575 ['%'] = OP_MODU, ['&'] = OP_AND, 1576 ['|'] = OP_OR, ['^'] = OP_XOR, 1577 [SPECIAL_LEFTSHIFT] = OP_SHL, 1578 [SPECIAL_RIGHTSHIFT] = OP_LSR, 1579 }; 1580 int op; 1581 1582 src1 = linearize_expression(ep, expr->left); 1583 src2 = linearize_expression(ep, expr->right); 1584 op = map_opcode(opcode[expr->op], expr->ctype); 1585 dst = add_binary_op(ep, expr->ctype, op, src1, src2); 1586 taint_undefined_behaviour(dst->def); 1587 return dst; 1588 } 1589 1590 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false); 1591 1592 static pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false); 1593 1594 static pseudo_t linearize_select(struct entrypoint *ep, struct expression *expr) 1595 { 1596 pseudo_t cond, valt, valf, res; 1597 struct instruction *insn; 1598 1599 valt = linearize_expression(ep, expr->cond_true); 1600 valf = linearize_expression(ep, expr->cond_false); 1601 cond = linearize_expression(ep, expr->conditional); 1602 1603 insn = alloc_typed_instruction(OP_SEL, expr->ctype); 1604 if (!expr->cond_true) 1605 valt = cond; 1606 use_pseudo(insn, cond, &insn->src1); 1607 use_pseudo(insn, valt, &insn->src2); 1608 use_pseudo(insn, valf, &insn->src3); 1609 1610 res = alloc_pseudo(insn); 1611 insn->target = res; 1612 add_one_insn(ep, insn); 1613 return res; 1614 } 1615 1616 static pseudo_t add_join_conditional(struct entrypoint *ep, struct expression *expr, 1617 pseudo_t phi1, pseudo_t phi2) 1618 { 1619 pseudo_t target; 1620 struct instruction *phi_node; 1621 1622 if (phi1 == VOID) 1623 return phi2; 1624 if (phi2 == VOID) 1625 return phi1; 1626 1627 phi_node = alloc_typed_instruction(OP_PHI, expr->ctype); 1628 use_pseudo(phi_node, phi1, add_pseudo(&phi_node->phi_list, phi1)); 1629 use_pseudo(phi_node, phi2, add_pseudo(&phi_node->phi_list, phi2)); 1630 phi_node->target = target = alloc_pseudo(phi_node); 1631 add_one_insn(ep, phi_node); 1632 return target; 1633 } 1634 1635 static pseudo_t linearize_short_conditional(struct entrypoint *ep, struct expression *expr, 1636 struct expression *cond, 1637 struct expression *expr_false) 1638 { 1639 pseudo_t src1, src2; 1640 struct basic_block *bb_false; 1641 struct basic_block *merge; 1642 pseudo_t phi1, phi2; 1643 1644 if (!expr_false || !ep->active) 1645 return VOID; 1646 1647 bb_false = alloc_basic_block(ep, expr_false->pos); 1648 merge = alloc_basic_block(ep, expr->pos); 1649 1650 src1 = linearize_expression(ep, cond); 1651 phi1 = alloc_phi(ep->active, src1, expr->ctype); 1652 add_branch(ep, src1, merge, bb_false); 1653 1654 set_activeblock(ep, bb_false); 1655 src2 = linearize_expression(ep, expr_false); 1656 phi2 = alloc_phi(ep->active, src2, expr->ctype); 1657 set_activeblock(ep, merge); 1658 1659 return add_join_conditional(ep, expr, phi1, phi2); 1660 } 1661 1662 static pseudo_t linearize_conditional(struct entrypoint *ep, struct expression *expr, 1663 struct expression *cond, 1664 struct expression *expr_true, 1665 struct expression *expr_false) 1666 { 1667 pseudo_t src1, src2; 1668 pseudo_t phi1, phi2; 1669 struct basic_block *bb_true, *bb_false, *merge; 1670 1671 if (!cond || !expr_true || !expr_false || !ep->active) 1672 return VOID; 1673 bb_true = alloc_basic_block(ep, expr_true->pos); 1674 bb_false = alloc_basic_block(ep, expr_false->pos); 1675 merge = alloc_basic_block(ep, expr->pos); 1676 1677 linearize_cond_branch(ep, cond, bb_true, bb_false); 1678 1679 set_activeblock(ep, bb_true); 1680 src1 = linearize_expression(ep, expr_true); 1681 phi1 = alloc_phi(ep->active, src1, expr->ctype); 1682 add_goto(ep, merge); 1683 1684 set_activeblock(ep, bb_false); 1685 src2 = linearize_expression(ep, expr_false); 1686 phi2 = alloc_phi(ep->active, src2, expr->ctype); 1687 set_activeblock(ep, merge); 1688 1689 return add_join_conditional(ep, expr, phi1, phi2); 1690 } 1691 1692 static void insert_phis(struct basic_block *bb, pseudo_t src, struct symbol *ctype, 1693 struct instruction *node) 1694 { 1695 struct basic_block *parent; 1696 1697 FOR_EACH_PTR(bb->parents, parent) { 1698 struct instruction *br = delete_last_instruction(&parent->insns); 1699 pseudo_t phi = alloc_phi(parent, src, ctype); 1700 add_instruction(&parent->insns, br); 1701 use_pseudo(node, phi, add_pseudo(&node->phi_list, phi)); 1702 } END_FOR_EACH_PTR(parent); 1703 } 1704 1705 static pseudo_t linearize_logical(struct entrypoint *ep, struct expression *expr) 1706 { 1707 struct symbol *ctype = expr->ctype; 1708 struct basic_block *other, *merge; 1709 struct instruction *node; 1710 pseudo_t src1, src2, phi2; 1711 1712 if (!ep->active || !expr->left || !expr->right) 1713 return VOID; 1714 1715 other = alloc_basic_block(ep, expr->right->pos); 1716 merge = alloc_basic_block(ep, expr->pos); 1717 node = alloc_phi_node(merge, ctype, NULL); 1718 1719 // LHS and its shortcut 1720 if (expr->op == SPECIAL_LOGICAL_OR) { 1721 linearize_cond_branch(ep, expr->left, merge, other); 1722 src1 = value_pseudo(1); 1723 } else { 1724 linearize_cond_branch(ep, expr->left, other, merge); 1725 src1 = value_pseudo(0); 1726 } 1727 insert_phis(merge, src1, ctype, node); 1728 1729 // RHS 1730 set_activeblock(ep, other); 1731 src2 = linearize_expression_to_bool(ep, expr->right); 1732 src2 = cast_pseudo(ep, src2, &bool_ctype, ctype); 1733 phi2 = alloc_phi(ep->active, src2, ctype); 1734 use_pseudo(node, phi2, add_pseudo(&node->phi_list, phi2)); 1735 1736 // join 1737 set_activeblock(ep, merge); 1738 add_instruction(&merge->insns, node); 1739 return node->target; 1740 } 1741 1742 static pseudo_t linearize_compare(struct entrypoint *ep, struct expression *expr) 1743 { 1744 static const int cmpop[] = { 1745 ['>'] = OP_SET_GT, ['<'] = OP_SET_LT, 1746 [SPECIAL_EQUAL] = OP_SET_EQ, 1747 [SPECIAL_NOTEQUAL] = OP_SET_NE, 1748 [SPECIAL_GTE] = OP_SET_GE, 1749 [SPECIAL_LTE] = OP_SET_LE, 1750 [SPECIAL_UNSIGNED_LT] = OP_SET_B, 1751 [SPECIAL_UNSIGNED_GT] = OP_SET_A, 1752 [SPECIAL_UNSIGNED_LTE] = OP_SET_BE, 1753 [SPECIAL_UNSIGNED_GTE] = OP_SET_AE, 1754 }; 1755 int op = opcode_float(cmpop[expr->op], expr->right->ctype); 1756 pseudo_t src1 = linearize_expression(ep, expr->left); 1757 pseudo_t src2 = linearize_expression(ep, expr->right); 1758 pseudo_t dst = add_binary_op(ep, expr->ctype, op, src1, src2); 1759 return dst; 1760 } 1761 1762 1763 static pseudo_t linearize_cond_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false) 1764 { 1765 pseudo_t cond; 1766 1767 if (!expr || !bb_reachable(ep->active)) 1768 return VOID; 1769 1770 switch (expr->type) { 1771 1772 case EXPR_STRING: 1773 case EXPR_VALUE: 1774 add_goto(ep, expr->value ? bb_true : bb_false); 1775 return VOID; 1776 1777 case EXPR_FVALUE: 1778 add_goto(ep, expr->fvalue ? bb_true : bb_false); 1779 return VOID; 1780 1781 case EXPR_LOGICAL: 1782 linearize_logical_branch(ep, expr, bb_true, bb_false); 1783 return VOID; 1784 1785 case EXPR_COMPARE: 1786 cond = linearize_compare(ep, expr); 1787 add_branch(ep, cond, bb_true, bb_false); 1788 break; 1789 1790 case EXPR_PREOP: 1791 if (expr->op == '!') 1792 return linearize_cond_branch(ep, expr->unop, bb_false, bb_true); 1793 /* fall through */ 1794 default: { 1795 cond = linearize_expression_to_bool(ep, expr); 1796 add_branch(ep, cond, bb_true, bb_false); 1797 1798 return VOID; 1799 } 1800 } 1801 return VOID; 1802 } 1803 1804 1805 1806 static pseudo_t linearize_logical_branch(struct entrypoint *ep, struct expression *expr, struct basic_block *bb_true, struct basic_block *bb_false) 1807 { 1808 struct basic_block *next = alloc_basic_block(ep, expr->pos); 1809 1810 if (expr->op == SPECIAL_LOGICAL_OR) 1811 linearize_cond_branch(ep, expr->left, bb_true, next); 1812 else 1813 linearize_cond_branch(ep, expr->left, next, bb_false); 1814 set_activeblock(ep, next); 1815 linearize_cond_branch(ep, expr->right, bb_true, bb_false); 1816 return VOID; 1817 } 1818 1819 static pseudo_t linearize_cast(struct entrypoint *ep, struct expression *expr) 1820 { 1821 pseudo_t src; 1822 struct expression *orig = expr->cast_expression; 1823 1824 if (!orig) 1825 return VOID; 1826 1827 src = linearize_expression(ep, orig); 1828 return cast_pseudo(ep, src, orig->ctype, expr->ctype); 1829 } 1830 1831 static pseudo_t linearize_initializer(struct entrypoint *ep, struct expression *initializer, struct access_data *ad) 1832 { 1833 switch (initializer->type) { 1834 case EXPR_INITIALIZER: { 1835 struct expression *expr; 1836 FOR_EACH_PTR(initializer->expr_list, expr) { 1837 linearize_initializer(ep, expr, ad); 1838 } END_FOR_EACH_PTR(expr); 1839 break; 1840 } 1841 case EXPR_POS: 1842 ad->offset = initializer->init_offset; 1843 linearize_initializer(ep, initializer->init_expr, ad); 1844 break; 1845 default: { 1846 pseudo_t value = linearize_expression(ep, initializer); 1847 ad->type = initializer->ctype; 1848 linearize_store_gen(ep, value, ad); 1849 return value; 1850 } 1851 } 1852 1853 return VOID; 1854 } 1855 1856 static void linearize_argument(struct entrypoint *ep, struct symbol *arg, int nr) 1857 { 1858 struct access_data ad = { NULL, }; 1859 1860 ad.type = arg; 1861 ad.address = symbol_pseudo(ep, arg); 1862 linearize_store_gen(ep, argument_pseudo(ep, nr), &ad); 1863 } 1864 1865 static pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr) 1866 { 1867 if (!expr) 1868 return VOID; 1869 1870 current_pos = expr->pos; 1871 switch (expr->type) { 1872 case EXPR_SYMBOL: 1873 linearize_one_symbol(ep, expr->symbol); 1874 return add_symbol_address(ep, expr->symbol); 1875 1876 case EXPR_VALUE: 1877 return value_pseudo(expr->value); 1878 1879 case EXPR_STRING: 1880 case EXPR_LABEL: 1881 return add_setval(ep, expr->ctype, expr); 1882 1883 case EXPR_FVALUE: 1884 return add_setfval(ep, expr->ctype, expr->fvalue); 1885 1886 case EXPR_STATEMENT: 1887 return linearize_statement(ep, expr->statement); 1888 1889 case EXPR_CALL: 1890 return linearize_call_expression(ep, expr); 1891 1892 case EXPR_BINOP: 1893 if (expr->op == SPECIAL_LOGICAL_AND || expr->op == SPECIAL_LOGICAL_OR) 1894 return linearize_binop_bool(ep, expr); 1895 return linearize_binop(ep, expr); 1896 1897 case EXPR_LOGICAL: 1898 return linearize_logical(ep, expr); 1899 1900 case EXPR_COMPARE: 1901 return linearize_compare(ep, expr); 1902 1903 case EXPR_SELECT: 1904 return linearize_select(ep, expr); 1905 1906 case EXPR_CONDITIONAL: 1907 if (!expr->cond_true) 1908 return linearize_short_conditional(ep, expr, expr->conditional, expr->cond_false); 1909 1910 return linearize_conditional(ep, expr, expr->conditional, 1911 expr->cond_true, expr->cond_false); 1912 1913 case EXPR_COMMA: 1914 linearize_expression(ep, expr->left); 1915 return linearize_expression(ep, expr->right); 1916 1917 case EXPR_ASSIGNMENT: 1918 return linearize_assignment(ep, expr); 1919 1920 case EXPR_PREOP: 1921 return linearize_preop(ep, expr); 1922 1923 case EXPR_POSTOP: 1924 return linearize_postop(ep, expr); 1925 1926 case EXPR_CAST: 1927 case EXPR_FORCE_CAST: 1928 case EXPR_IMPLIED_CAST: 1929 return linearize_cast(ep, expr); 1930 1931 case EXPR_SLICE: 1932 return linearize_slice(ep, expr); 1933 1934 case EXPR_INITIALIZER: 1935 case EXPR_POS: 1936 warning(expr->pos, "unexpected initializer expression (%d %d)", expr->type, expr->op); 1937 return VOID; 1938 default: 1939 warning(expr->pos, "unknown expression (%d %d)", expr->type, expr->op); 1940 return VOID; 1941 } 1942 return VOID; 1943 } 1944 1945 static pseudo_t linearize_one_symbol(struct entrypoint *ep, struct symbol *sym) 1946 { 1947 struct access_data ad = { NULL, }; 1948 pseudo_t value; 1949 1950 if (!sym || !sym->initializer || sym->initialized) 1951 return VOID; 1952 1953 /* We need to output these puppies some day too.. */ 1954 if (sym->ctype.modifiers & (MOD_STATIC | MOD_TOPLEVEL)) 1955 return VOID; 1956 1957 sym->initialized = 1; 1958 ad.address = symbol_pseudo(ep, sym); 1959 1960 if (sym->initializer && !is_scalar_type(sym)) { 1961 // default zero initialization [6.7.9.21] 1962 // FIXME: this init the whole aggregate while 1963 // only the existing fields need to be initialized. 1964 // FIXME: this init the whole aggregate even if 1965 // all fields arelater explicitely initialized. 1966 ad.type = sym; 1967 ad.address = symbol_pseudo(ep, sym); 1968 linearize_store_gen(ep, value_pseudo(0), &ad); 1969 } 1970 1971 value = linearize_initializer(ep, sym->initializer, &ad); 1972 return value; 1973 } 1974 1975 static pseudo_t linearize_compound_statement(struct entrypoint *ep, struct statement *stmt) 1976 { 1977 pseudo_t pseudo; 1978 struct statement *s; 1979 1980 pseudo = VOID; 1981 FOR_EACH_PTR(stmt->stmts, s) { 1982 pseudo = linearize_statement(ep, s); 1983 } END_FOR_EACH_PTR(s); 1984 1985 return pseudo; 1986 } 1987 1988 static void add_return(struct entrypoint *ep, struct basic_block *bb, struct symbol *ctype, pseudo_t src) 1989 { 1990 struct instruction *phi_node = first_instruction(bb->insns); 1991 pseudo_t phi; 1992 if (!phi_node) { 1993 phi_node = alloc_typed_instruction(OP_PHI, ctype); 1994 phi_node->target = alloc_pseudo(phi_node); 1995 phi_node->bb = bb; 1996 add_instruction(&bb->insns, phi_node); 1997 } 1998 phi = alloc_phi(ep->active, src, ctype); 1999 phi->ident = &return_ident; 2000 use_pseudo(phi_node, phi, add_pseudo(&phi_node->phi_list, phi)); 2001 } 2002 2003 static pseudo_t linearize_fn_statement(struct entrypoint *ep, struct statement *stmt) 2004 { 2005 struct instruction *phi_node; 2006 struct basic_block *bb; 2007 pseudo_t pseudo; 2008 2009 pseudo = linearize_compound_statement(ep, stmt); 2010 if (!is_void_type(stmt->ret)) { // non-void function 2011 struct basic_block *active = ep->active; 2012 if (active && !bb_terminated(active)) { // missing return 2013 struct basic_block *bb_ret; 2014 bb_ret = get_bound_block(ep, stmt->ret); 2015 add_return(ep, bb_ret, stmt->ret, undef_pseudo()); 2016 } 2017 } 2018 bb = add_label(ep, stmt->ret); 2019 phi_node = first_instruction(bb->insns); 2020 if (phi_node) 2021 pseudo = phi_node->target; 2022 return pseudo; 2023 } 2024 2025 static pseudo_t linearize_inlined_call(struct entrypoint *ep, struct statement *stmt) 2026 { 2027 struct instruction *insn = alloc_instruction(OP_INLINED_CALL, 0); 2028 struct statement *args = stmt->args; 2029 struct basic_block *bb; 2030 pseudo_t pseudo; 2031 2032 if (args) { 2033 struct symbol *sym; 2034 2035 concat_symbol_list(args->declaration, &ep->syms); 2036 FOR_EACH_PTR(args->declaration, sym) { 2037 pseudo_t value = linearize_one_symbol(ep, sym); 2038 add_pseudo(&insn->arguments, value); 2039 } END_FOR_EACH_PTR(sym); 2040 } 2041 2042 pseudo = linearize_fn_statement(ep, stmt); 2043 insn->target = pseudo; 2044 2045 use_pseudo(insn, symbol_pseudo(ep, stmt->inline_fn), &insn->func); 2046 bb = ep->active; 2047 if (!bb->insns) 2048 bb->pos = stmt->pos; 2049 add_one_insn(ep, insn); 2050 return pseudo; 2051 } 2052 2053 static pseudo_t linearize_context(struct entrypoint *ep, struct statement *stmt) 2054 { 2055 struct instruction *insn = alloc_instruction(OP_CONTEXT, 0); 2056 struct expression *expr = stmt->expression; 2057 2058 insn->increment = get_expression_value(expr); 2059 insn->context_expr = stmt->context; 2060 add_one_insn(ep, insn); 2061 return VOID; 2062 } 2063 2064 static pseudo_t linearize_range(struct entrypoint *ep, struct statement *stmt) 2065 { 2066 struct instruction *insn = alloc_instruction(OP_RANGE, 0); 2067 2068 use_pseudo(insn, linearize_expression(ep, stmt->range_expression), &insn->src1); 2069 use_pseudo(insn, linearize_expression(ep, stmt->range_low), &insn->src2); 2070 use_pseudo(insn, linearize_expression(ep, stmt->range_high), &insn->src3); 2071 add_one_insn(ep, insn); 2072 return VOID; 2073 } 2074 2075 ALLOCATOR(asm_rules, "asm rules"); 2076 ALLOCATOR(asm_constraint, "asm constraints"); 2077 2078 static void add_asm_input(struct entrypoint *ep, struct instruction *insn, struct expression *expr, 2079 const char *constraint, const struct ident *ident) 2080 { 2081 pseudo_t pseudo = linearize_expression(ep, expr); 2082 struct asm_constraint *rule = __alloc_asm_constraint(0); 2083 2084 rule->ident = ident; 2085 rule->constraint = constraint; 2086 use_pseudo(insn, pseudo, &rule->pseudo); 2087 add_ptr_list(&insn->asm_rules->inputs, rule); 2088 } 2089 2090 static void add_asm_output(struct entrypoint *ep, struct instruction *insn, struct expression *expr, 2091 const char *constraint, const struct ident *ident) 2092 { 2093 struct access_data ad = { NULL, }; 2094 pseudo_t pseudo = alloc_pseudo(insn); 2095 struct asm_constraint *rule; 2096 2097 if (!expr || !linearize_address_gen(ep, expr, &ad)) 2098 return; 2099 linearize_store_gen(ep, pseudo, &ad); 2100 rule = __alloc_asm_constraint(0); 2101 rule->ident = ident; 2102 rule->constraint = constraint; 2103 use_pseudo(insn, pseudo, &rule->pseudo); 2104 add_ptr_list(&insn->asm_rules->outputs, rule); 2105 } 2106 2107 static pseudo_t linearize_asm_statement(struct entrypoint *ep, struct statement *stmt) 2108 { 2109 struct expression *expr; 2110 struct instruction *insn; 2111 struct asm_rules *rules; 2112 const char *constraint; 2113 2114 insn = alloc_instruction(OP_ASM, 0); 2115 expr = stmt->asm_string; 2116 if (!expr || expr->type != EXPR_STRING) { 2117 warning(stmt->pos, "expected string in inline asm"); 2118 return VOID; 2119 } 2120 insn->string = expr->string->data; 2121 2122 rules = __alloc_asm_rules(0); 2123 insn->asm_rules = rules; 2124 2125 /* Gather the inputs.. */ 2126 FOR_EACH_PTR(stmt->asm_inputs, expr) { 2127 constraint = expr->constraint ? expr->constraint->string->data : ""; 2128 add_asm_input(ep, insn, expr->expr, constraint, expr->name); 2129 } END_FOR_EACH_PTR(expr); 2130 2131 add_one_insn(ep, insn); 2132 2133 /* Assign the outputs */ 2134 FOR_EACH_PTR(stmt->asm_outputs, expr) { 2135 constraint = expr->constraint ? expr->constraint->string->data : ""; 2136 add_asm_output(ep, insn, expr->expr, constraint, expr->name); 2137 } END_FOR_EACH_PTR(expr); 2138 2139 return VOID; 2140 } 2141 2142 static int multijmp_cmp(const void *_a, const void *_b) 2143 { 2144 const struct multijmp *a = _a; 2145 const struct multijmp *b = _b; 2146 2147 // "default" case? 2148 if (a->begin > a->end) { 2149 if (b->begin > b->end) 2150 return 0; 2151 return 1; 2152 } 2153 if (b->begin > b->end) 2154 return -1; 2155 if (a->begin == b->begin) { 2156 if (a->end == b->end) 2157 return 0; 2158 return (a->end < b->end) ? -1 : 1; 2159 } 2160 return a->begin < b->begin ? -1 : 1; 2161 } 2162 2163 static void sort_switch_cases(struct instruction *insn) 2164 { 2165 sort_list((struct ptr_list **)&insn->multijmp_list, multijmp_cmp); 2166 } 2167 2168 static pseudo_t linearize_declaration(struct entrypoint *ep, struct statement *stmt) 2169 { 2170 struct symbol *sym; 2171 2172 concat_symbol_list(stmt->declaration, &ep->syms); 2173 2174 FOR_EACH_PTR(stmt->declaration, sym) { 2175 linearize_one_symbol(ep, sym); 2176 } END_FOR_EACH_PTR(sym); 2177 return VOID; 2178 } 2179 2180 static pseudo_t linearize_return(struct entrypoint *ep, struct statement *stmt) 2181 { 2182 struct expression *expr = stmt->expression; 2183 struct symbol *ret = stmt->ret_target; 2184 struct basic_block *bb_return = get_bound_block(ep, ret); 2185 struct basic_block *active; 2186 pseudo_t src = linearize_expression(ep, expr); 2187 active = ep->active; 2188 if (active && !is_void_type(ret)) { 2189 add_return(ep, bb_return, ret, src); 2190 } 2191 add_goto(ep, bb_return); 2192 return VOID; 2193 } 2194 2195 static pseudo_t linearize_switch(struct entrypoint *ep, struct statement *stmt) 2196 { 2197 struct symbol *sym; 2198 struct instruction *switch_ins; 2199 struct basic_block *switch_end = alloc_basic_block(ep, stmt->pos); 2200 struct basic_block *active, *default_case; 2201 struct expression *expr = stmt->switch_expression; 2202 struct multijmp *jmp; 2203 pseudo_t pseudo; 2204 2205 if (!expr || !expr->ctype) 2206 return VOID; 2207 pseudo = linearize_expression(ep, expr); 2208 active = ep->active; 2209 if (!active) { 2210 active = alloc_basic_block(ep, stmt->pos); 2211 set_activeblock(ep, active); 2212 } 2213 2214 switch_ins = alloc_typed_instruction(OP_SWITCH, expr->ctype); 2215 use_pseudo(switch_ins, pseudo, &switch_ins->cond); 2216 add_one_insn(ep, switch_ins); 2217 finish_block(ep); 2218 2219 default_case = NULL; 2220 FOR_EACH_PTR(stmt->switch_case->symbol_list, sym) { 2221 struct statement *case_stmt = sym->stmt; 2222 struct basic_block *bb_case = get_bound_block(ep, sym); 2223 2224 if (!case_stmt->case_expression) { 2225 default_case = bb_case; 2226 continue; 2227 } else if (case_stmt->case_expression->type != EXPR_VALUE) { 2228 continue; 2229 } else { 2230 struct expression *case_to = case_stmt->case_to; 2231 long long begin, end; 2232 2233 begin = end = case_stmt->case_expression->value; 2234 if (case_to && case_to->type == EXPR_VALUE) 2235 end = case_to->value; 2236 if (begin > end) 2237 jmp = alloc_multijmp(bb_case, end, begin); 2238 else 2239 jmp = alloc_multijmp(bb_case, begin, end); 2240 2241 } 2242 add_multijmp(&switch_ins->multijmp_list, jmp); 2243 add_bb(&bb_case->parents, active); 2244 add_bb(&active->children, bb_case); 2245 } END_FOR_EACH_PTR(sym); 2246 2247 bind_label(stmt->switch_break, switch_end, stmt->pos); 2248 2249 /* And linearize the actual statement */ 2250 linearize_statement(ep, stmt->switch_statement); 2251 set_activeblock(ep, switch_end); 2252 2253 if (!default_case) 2254 default_case = switch_end; 2255 2256 jmp = alloc_multijmp(default_case, 1, 0); 2257 add_multijmp(&switch_ins->multijmp_list, jmp); 2258 add_bb(&default_case->parents, active); 2259 add_bb(&active->children, default_case); 2260 sort_switch_cases(switch_ins); 2261 2262 return VOID; 2263 } 2264 2265 static pseudo_t linearize_iterator(struct entrypoint *ep, struct statement *stmt) 2266 { 2267 struct statement *pre_statement = stmt->iterator_pre_statement; 2268 struct expression *pre_condition = stmt->iterator_pre_condition; 2269 struct statement *statement = stmt->iterator_statement; 2270 struct statement *post_statement = stmt->iterator_post_statement; 2271 struct expression *post_condition = stmt->iterator_post_condition; 2272 struct basic_block *loop_top, *loop_body, *loop_continue, *loop_end; 2273 struct symbol *sym; 2274 2275 FOR_EACH_PTR(stmt->iterator_syms, sym) { 2276 linearize_one_symbol(ep, sym); 2277 } END_FOR_EACH_PTR(sym); 2278 concat_symbol_list(stmt->iterator_syms, &ep->syms); 2279 linearize_statement(ep, pre_statement); 2280 2281 loop_body = loop_top = alloc_basic_block(ep, stmt->pos); 2282 loop_continue = alloc_basic_block(ep, stmt->pos); 2283 loop_end = alloc_basic_block(ep, stmt->pos); 2284 2285 /* An empty post-condition means that it's the same as the pre-condition */ 2286 if (!post_condition) { 2287 loop_top = alloc_basic_block(ep, stmt->pos); 2288 set_activeblock(ep, loop_top); 2289 } 2290 2291 if (pre_condition) 2292 linearize_cond_branch(ep, pre_condition, loop_body, loop_end); 2293 2294 bind_label(stmt->iterator_continue, loop_continue, stmt->pos); 2295 bind_label(stmt->iterator_break, loop_end, stmt->pos); 2296 2297 set_activeblock(ep, loop_body); 2298 linearize_statement(ep, statement); 2299 add_goto(ep, loop_continue); 2300 2301 set_activeblock(ep, loop_continue); 2302 linearize_statement(ep, post_statement); 2303 if (!post_condition) 2304 add_goto(ep, loop_top); 2305 else 2306 linearize_cond_branch(ep, post_condition, loop_top, loop_end); 2307 set_activeblock(ep, loop_end); 2308 2309 return VOID; 2310 } 2311 2312 static pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt) 2313 { 2314 struct basic_block *bb; 2315 2316 if (!stmt) 2317 return VOID; 2318 2319 bb = ep->active; 2320 if (bb && !bb->insns) 2321 bb->pos = stmt->pos; 2322 current_pos = stmt->pos; 2323 2324 switch (stmt->type) { 2325 case STMT_NONE: 2326 break; 2327 2328 case STMT_DECLARATION: 2329 return linearize_declaration(ep, stmt); 2330 2331 case STMT_CONTEXT: 2332 return linearize_context(ep, stmt); 2333 2334 case STMT_RANGE: 2335 return linearize_range(ep, stmt); 2336 2337 case STMT_EXPRESSION: 2338 return linearize_expression(ep, stmt->expression); 2339 2340 case STMT_ASM: 2341 return linearize_asm_statement(ep, stmt); 2342 2343 case STMT_RETURN: 2344 return linearize_return(ep, stmt); 2345 2346 case STMT_CASE: { 2347 add_label(ep, stmt->case_label); 2348 linearize_statement(ep, stmt->case_statement); 2349 break; 2350 } 2351 2352 case STMT_LABEL: { 2353 struct symbol *label = stmt->label_identifier; 2354 2355 if (label->used) { 2356 add_label(ep, label); 2357 } 2358 return linearize_statement(ep, stmt->label_statement); 2359 } 2360 2361 case STMT_GOTO: { 2362 struct symbol *sym; 2363 struct expression *expr; 2364 struct instruction *goto_ins; 2365 struct basic_block *active; 2366 pseudo_t pseudo; 2367 2368 active = ep->active; 2369 if (!bb_reachable(active)) 2370 break; 2371 2372 if (stmt->goto_label) { 2373 add_goto(ep, get_bound_block(ep, stmt->goto_label)); 2374 break; 2375 } 2376 2377 expr = stmt->goto_expression; 2378 if (!expr) 2379 break; 2380 2381 /* This can happen as part of simplification */ 2382 if (expr->type == EXPR_LABEL) { 2383 add_goto(ep, get_bound_block(ep, expr->label_symbol)); 2384 break; 2385 } 2386 2387 pseudo = linearize_expression(ep, expr); 2388 goto_ins = alloc_instruction(OP_COMPUTEDGOTO, 0); 2389 use_pseudo(goto_ins, pseudo, &goto_ins->src); 2390 add_one_insn(ep, goto_ins); 2391 2392 FOR_EACH_PTR(stmt->target_list, sym) { 2393 struct basic_block *bb_computed = get_bound_block(ep, sym); 2394 struct multijmp *jmp = alloc_multijmp(bb_computed, 1, 0); 2395 add_multijmp(&goto_ins->multijmp_list, jmp); 2396 add_bb(&bb_computed->parents, ep->active); 2397 add_bb(&active->children, bb_computed); 2398 } END_FOR_EACH_PTR(sym); 2399 2400 finish_block(ep); 2401 break; 2402 } 2403 2404 case STMT_COMPOUND: 2405 if (stmt->inline_fn) 2406 return linearize_inlined_call(ep, stmt); 2407 return linearize_compound_statement(ep, stmt); 2408 2409 /* 2410 * This could take 'likely/unlikely' into account, and 2411 * switch the arms around appropriately.. 2412 */ 2413 case STMT_IF: { 2414 struct basic_block *bb_true, *bb_false, *endif; 2415 struct expression *cond = stmt->if_conditional; 2416 2417 bb_true = alloc_basic_block(ep, stmt->pos); 2418 bb_false = endif = alloc_basic_block(ep, stmt->pos); 2419 2420 linearize_cond_branch(ep, cond, bb_true, bb_false); 2421 2422 set_activeblock(ep, bb_true); 2423 linearize_statement(ep, stmt->if_true); 2424 2425 if (stmt->if_false) { 2426 endif = alloc_basic_block(ep, stmt->pos); 2427 add_goto(ep, endif); 2428 set_activeblock(ep, bb_false); 2429 linearize_statement(ep, stmt->if_false); 2430 } 2431 set_activeblock(ep, endif); 2432 break; 2433 } 2434 2435 case STMT_SWITCH: 2436 return linearize_switch(ep, stmt); 2437 2438 case STMT_ITERATOR: 2439 return linearize_iterator(ep, stmt); 2440 2441 default: 2442 break; 2443 } 2444 return VOID; 2445 } 2446 2447 static struct entrypoint *linearize_fn(struct symbol *sym, struct symbol *base_type) 2448 { 2449 struct statement *stmt = base_type->stmt; 2450 struct entrypoint *ep; 2451 struct basic_block *bb; 2452 struct symbol *ret_type; 2453 struct symbol *arg; 2454 struct instruction *entry; 2455 struct instruction *ret; 2456 pseudo_t result; 2457 int i; 2458 2459 if (!stmt) 2460 return NULL; 2461 2462 ep = alloc_entrypoint(); 2463 ep->name = sym; 2464 sym->ep = ep; 2465 bb = alloc_basic_block(ep, sym->pos); 2466 set_activeblock(ep, bb); 2467 2468 if (stmt->type == STMT_ASM) { // top-level asm 2469 linearize_asm_statement(ep, stmt); 2470 return ep; 2471 } 2472 2473 entry = alloc_instruction(OP_ENTRY, 0); 2474 add_one_insn(ep, entry); 2475 ep->entry = entry; 2476 2477 concat_symbol_list(base_type->arguments, &ep->syms); 2478 2479 /* FIXME!! We should do something else about varargs.. */ 2480 i = 0; 2481 FOR_EACH_PTR(base_type->arguments, arg) { 2482 linearize_argument(ep, arg, ++i); 2483 } END_FOR_EACH_PTR(arg); 2484 2485 result = linearize_fn_statement(ep, stmt); 2486 ret_type = base_type->ctype.base_type; 2487 ret = alloc_typed_instruction(OP_RET, ret_type); 2488 if (type_size(ret_type) > 0) 2489 use_pseudo(ret, result, &ret->src); 2490 add_one_insn(ep, ret); 2491 2492 optimize(ep); 2493 return ep; 2494 } 2495 2496 struct entrypoint *linearize_symbol(struct symbol *sym) 2497 { 2498 struct symbol *base_type; 2499 2500 if (!sym) 2501 return NULL; 2502 current_pos = sym->pos; 2503 base_type = sym->ctype.base_type; 2504 if (!base_type) 2505 return NULL; 2506 if (base_type->type == SYM_FN) 2507 return linearize_fn(sym, base_type); 2508 return NULL; 2509 }