1 /* 2 * Example usage: 3 * ./sparse-llvm hello.c | llc | as -o hello.o 4 */ 5 6 #include <llvm-c/Core.h> 7 #include <llvm-c/BitWriter.h> 8 #include <llvm-c/Analysis.h> 9 #include <llvm-c/Target.h> 10 11 #include <stdbool.h> 12 #include <stdio.h> 13 #include <unistd.h> 14 #include <string.h> 15 #include <assert.h> 16 17 #include "symbol.h" 18 #include "expression.h" 19 #include "linearize.h" 20 #include "flow.h" 21 22 struct function { 23 LLVMBuilderRef builder; 24 LLVMValueRef fn; 25 LLVMModuleRef module; 26 }; 27 28 static LLVMTypeRef symbol_type(struct symbol *sym); 29 30 static LLVMTypeRef func_return_type(struct symbol *sym) 31 { 32 return symbol_type(sym->ctype.base_type); 33 } 34 35 static LLVMTypeRef sym_func_type(struct symbol *sym) 36 { 37 int n_arg = symbol_list_size(sym->arguments); 38 LLVMTypeRef *arg_type = calloc(n_arg, sizeof(LLVMTypeRef)); 39 LLVMTypeRef ret_type = func_return_type(sym); 40 struct symbol *arg; 41 int idx = 0; 42 43 FOR_EACH_PTR(sym->arguments, arg) { 44 struct symbol *arg_sym = arg->ctype.base_type; 45 46 arg_type[idx++] = symbol_type(arg_sym); 47 } END_FOR_EACH_PTR(arg); 48 49 return LLVMFunctionType(ret_type, arg_type, n_arg, sym->variadic); 50 } 51 52 static LLVMTypeRef sym_array_type(struct symbol *sym) 53 { 54 LLVMTypeRef elem_type; 55 struct symbol *base_type; 56 57 base_type = sym->ctype.base_type; 58 /* empty struct is undefined [6.7.2.1(8)] */ 59 assert(base_type->bit_size > 0); 60 61 elem_type = symbol_type(base_type); 62 if (!elem_type) 63 return NULL; 64 65 return LLVMArrayType(elem_type, sym->bit_size / base_type->bit_size); 66 } 67 68 #define MAX_STRUCT_MEMBERS 64 69 70 static LLVMTypeRef sym_struct_type(struct symbol *sym) 71 { 72 LLVMTypeRef elem_types[MAX_STRUCT_MEMBERS]; 73 struct symbol *member; 74 char buffer[256]; 75 LLVMTypeRef ret; 76 unsigned nr = 0; 77 78 snprintf(buffer, sizeof(buffer), "struct.%s", sym->ident ? sym->ident->name : "anno"); 79 ret = LLVMStructCreateNamed(LLVMGetGlobalContext(), buffer); 80 /* set ->aux to avoid recursion */ 81 sym->aux = ret; 82 83 FOR_EACH_PTR(sym->symbol_list, member) { 84 LLVMTypeRef member_type; 85 86 assert(nr < MAX_STRUCT_MEMBERS); 87 88 member_type = symbol_type(member); 89 90 elem_types[nr++] = member_type; 91 } END_FOR_EACH_PTR(member); 92 93 LLVMStructSetBody(ret, elem_types, nr, 0 /* packed? */); 94 return ret; 95 } 96 97 static LLVMTypeRef sym_union_type(struct symbol *sym) 98 { 99 LLVMTypeRef elements; 100 unsigned union_size; 101 102 /* 103 * There's no union support in the LLVM API so we treat unions as 104 * opaque structs. The downside is that we lose type information on the 105 * members but as LLVM doesn't care, neither do we. 106 */ 107 union_size = sym->bit_size / 8; 108 109 elements = LLVMArrayType(LLVMInt8Type(), union_size); 110 111 return LLVMStructType(&elements, 1, 0 /* packed? */); 112 } 113 114 static LLVMTypeRef sym_ptr_type(struct symbol *sym) 115 { 116 LLVMTypeRef type; 117 118 /* 'void *' is treated like 'char *' */ 119 if (is_void_type(sym->ctype.base_type)) 120 type = LLVMInt8Type(); 121 else 122 type = symbol_type(sym->ctype.base_type); 123 124 return LLVMPointerType(type, 0); 125 } 126 127 static LLVMTypeRef sym_basetype_type(struct symbol *sym) 128 { 129 LLVMTypeRef ret = NULL; 130 131 if (is_float_type(sym)) { 132 switch (sym->bit_size) { 133 case 32: 134 ret = LLVMFloatType(); 135 break; 136 case 64: 137 ret = LLVMDoubleType(); 138 break; 139 case 80: 140 ret = LLVMX86FP80Type(); 141 break; 142 default: 143 die("invalid bit size %d for type %d", sym->bit_size, sym->type); 144 break; 145 } 146 } else { 147 switch (sym->bit_size) { 148 case -1: 149 ret = LLVMVoidType(); 150 break; 151 case 1: 152 ret = LLVMInt1Type(); 153 break; 154 case 8: 155 ret = LLVMInt8Type(); 156 break; 157 case 16: 158 ret = LLVMInt16Type(); 159 break; 160 case 32: 161 ret = LLVMInt32Type(); 162 break; 163 case 64: 164 ret = LLVMInt64Type(); 165 break; 166 default: 167 die("invalid bit size %d for type %d", sym->bit_size, sym->type); 168 break; 169 } 170 } 171 172 return ret; 173 } 174 175 static LLVMTypeRef symbol_type(struct symbol *sym) 176 { 177 LLVMTypeRef ret = NULL; 178 179 /* don't cache the result for SYM_NODE */ 180 if (sym->type == SYM_NODE) 181 return symbol_type(sym->ctype.base_type); 182 183 if (sym->aux) 184 return sym->aux; 185 186 switch (sym->type) { 187 case SYM_BITFIELD: 188 ret = LLVMIntType(sym->bit_size); 189 break; 190 case SYM_RESTRICT: 191 case SYM_ENUM: 192 ret = symbol_type(sym->ctype.base_type); 193 break; 194 case SYM_BASETYPE: 195 ret = sym_basetype_type(sym); 196 break; 197 case SYM_PTR: 198 ret = sym_ptr_type(sym); 199 break; 200 case SYM_UNION: 201 ret = sym_union_type(sym); 202 break; 203 case SYM_STRUCT: 204 ret = sym_struct_type(sym); 205 break; 206 case SYM_ARRAY: 207 ret = sym_array_type(sym); 208 break; 209 case SYM_FN: 210 ret = sym_func_type(sym); 211 break; 212 default: 213 assert(0); 214 } 215 216 /* cache the result */ 217 sym->aux = ret; 218 return ret; 219 } 220 221 static LLVMTypeRef insn_symbol_type(struct instruction *insn) 222 { 223 if (insn->type) 224 return symbol_type(insn->type); 225 226 switch (insn->size) { 227 case 8: return LLVMInt8Type(); 228 case 16: return LLVMInt16Type(); 229 case 32: return LLVMInt32Type(); 230 case 64: return LLVMInt64Type(); 231 232 default: 233 die("invalid bit size %d", insn->size); 234 break; 235 } 236 237 return NULL; /* not reached */ 238 } 239 240 static LLVMLinkage data_linkage(struct symbol *sym) 241 { 242 if (sym->ctype.modifiers & MOD_STATIC) 243 return LLVMPrivateLinkage; 244 245 return LLVMExternalLinkage; 246 } 247 248 static LLVMLinkage function_linkage(struct symbol *sym) 249 { 250 if (sym->ctype.modifiers & MOD_STATIC) 251 return LLVMInternalLinkage; 252 253 return LLVMExternalLinkage; 254 } 255 256 #define MAX_PSEUDO_NAME 64 257 258 static const char *pseudo_name(pseudo_t pseudo, char *buf) 259 { 260 switch (pseudo->type) { 261 case PSEUDO_REG: 262 snprintf(buf, MAX_PSEUDO_NAME, "R%d.", pseudo->nr); 263 break; 264 case PSEUDO_PHI: 265 snprintf(buf, MAX_PSEUDO_NAME, "PHI%d.", pseudo->nr); 266 break; 267 case PSEUDO_SYM: 268 case PSEUDO_VAL: 269 case PSEUDO_ARG: 270 case PSEUDO_VOID: 271 buf[0] = '\0'; 272 break; 273 case PSEUDO_UNDEF: 274 assert(0); 275 break; 276 default: 277 assert(0); 278 } 279 280 return buf; 281 } 282 283 static LLVMValueRef get_sym_value(LLVMModuleRef module, struct symbol *sym) 284 { 285 const char *name = show_ident(sym->ident); 286 LLVMTypeRef type = symbol_type(sym); 287 LLVMValueRef result = NULL; 288 struct expression *expr; 289 290 assert(sym->bb_target == NULL); 291 292 expr = sym->initializer; 293 if (expr && !sym->ident) { 294 switch (expr->type) { 295 case EXPR_STRING: { 296 const char *s = expr->string->data; 297 LLVMValueRef indices[] = { LLVMConstInt(LLVMInt64Type(), 0, 0), LLVMConstInt(LLVMInt64Type(), 0, 0) }; 298 LLVMValueRef data; 299 300 data = LLVMAddGlobal(module, LLVMArrayType(LLVMInt8Type(), strlen(s) + 1), ".str"); 301 LLVMSetLinkage(data, LLVMPrivateLinkage); 302 LLVMSetGlobalConstant(data, 1); 303 LLVMSetInitializer(data, LLVMConstString(strdup(s), strlen(s) + 1, true)); 304 305 result = LLVMConstGEP(data, indices, ARRAY_SIZE(indices)); 306 return result; 307 } 308 default: 309 break; 310 } 311 } 312 313 if (LLVMGetTypeKind(type) == LLVMFunctionTypeKind) { 314 result = LLVMGetNamedFunction(module, name); 315 if (!result) 316 result = LLVMAddFunction(module, name, type); 317 } else { 318 result = LLVMGetNamedGlobal(module, name); 319 if (!result) 320 result = LLVMAddGlobal(module, type, name); 321 } 322 323 return result; 324 } 325 326 static LLVMValueRef constant_value(unsigned long long val, LLVMTypeRef dtype) 327 { 328 LLVMValueRef result; 329 330 switch (LLVMGetTypeKind(dtype)) { 331 case LLVMPointerTypeKind: 332 if (val != 0) { // for example: ... = (void*) 0x123; 333 LLVMTypeRef itype = LLVMIntType(bits_in_pointer); 334 result = LLVMConstInt(itype, val, 1); 335 result = LLVMConstIntToPtr(result, dtype); 336 } else { 337 result = LLVMConstPointerNull(dtype); 338 } 339 break; 340 case LLVMIntegerTypeKind: 341 result = LLVMConstInt(dtype, val, 1); 342 break; 343 case LLVMArrayTypeKind: 344 case LLVMStructTypeKind: 345 if (val != 0) 346 return NULL; 347 result = LLVMConstNull(dtype); 348 break; 349 default: 350 return NULL; 351 } 352 return result; 353 } 354 355 static LLVMValueRef val_to_value(unsigned long long val, struct symbol *ctype) 356 { 357 LLVMValueRef result; 358 LLVMTypeRef dtype; 359 360 assert(ctype); 361 dtype = symbol_type(ctype); 362 result = constant_value(val, dtype); 363 if (result) 364 return result; 365 sparse_error(ctype->pos, "no value possible for %s", show_typename(ctype)); 366 return LLVMGetUndef(symbol_type(ctype)); 367 } 368 369 static LLVMValueRef pseudo_to_value(struct function *fn, struct symbol *ctype, pseudo_t pseudo) 370 { 371 LLVMValueRef result = NULL; 372 373 switch (pseudo->type) { 374 case PSEUDO_REG: 375 result = pseudo->priv; 376 break; 377 case PSEUDO_SYM: 378 result = get_sym_value(fn->module, pseudo->sym); 379 break; 380 case PSEUDO_VAL: 381 result = val_to_value(pseudo->value, ctype); 382 break; 383 case PSEUDO_ARG: { 384 result = LLVMGetParam(fn->fn, pseudo->nr - 1); 385 break; 386 } 387 case PSEUDO_PHI: 388 result = pseudo->priv; 389 break; 390 case PSEUDO_VOID: 391 result = NULL; 392 break; 393 case PSEUDO_UNDEF: 394 result = LLVMGetUndef(symbol_type(ctype)); 395 break; 396 default: 397 assert(0); 398 } 399 400 return result; 401 } 402 403 static LLVMValueRef pseudo_to_rvalue(struct function *fn, struct symbol *ctype, pseudo_t pseudo) 404 { 405 LLVMValueRef val = pseudo_to_value(fn, ctype, pseudo); 406 LLVMTypeRef dtype = symbol_type(ctype); 407 char name[MAX_PSEUDO_NAME]; 408 409 pseudo_name(pseudo, name); 410 return LLVMBuildBitCast(fn->builder, val, dtype, name); 411 } 412 413 static LLVMValueRef value_to_ivalue(struct function *fn, struct symbol *ctype, LLVMValueRef val) 414 { 415 const char *name = LLVMGetValueName(val); 416 LLVMTypeRef dtype = symbol_type(ctype); 417 418 if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMPointerTypeKind) { 419 LLVMTypeRef dtype = LLVMIntType(bits_in_pointer); 420 val = LLVMBuildPtrToInt(fn->builder, val, dtype, name); 421 } 422 if (ctype && is_int_type(ctype)) { 423 val = LLVMBuildIntCast(fn->builder, val, dtype, name); 424 } 425 return val; 426 } 427 428 static LLVMValueRef value_to_pvalue(struct function *fn, struct symbol *ctype, LLVMValueRef val) 429 { 430 const char *name = LLVMGetValueName(val); 431 LLVMTypeRef dtype = symbol_type(ctype); 432 433 assert(is_ptr_type(ctype)); 434 switch (LLVMGetTypeKind(LLVMTypeOf(val))) { 435 case LLVMIntegerTypeKind: 436 val = LLVMBuildIntToPtr(fn->builder, val, dtype, name); 437 break; 438 case LLVMPointerTypeKind: 439 val = LLVMBuildBitCast(fn->builder, val, dtype, name); 440 break; 441 default: 442 break; 443 } 444 return val; 445 } 446 447 static LLVMValueRef adjust_type(struct function *fn, struct symbol *ctype, LLVMValueRef val) 448 { 449 if (is_int_type(ctype)) 450 return value_to_ivalue(fn, ctype, val); 451 if (is_ptr_type(ctype)) 452 return value_to_pvalue(fn, ctype, val); 453 return val; 454 } 455 456 /* 457 * Get the LLVMValue corresponding to the pseudo 458 * and force the type corresponding to ctype. 459 */ 460 static LLVMValueRef get_operand(struct function *fn, struct symbol *ctype, pseudo_t pseudo) 461 { 462 LLVMValueRef target = pseudo_to_value(fn, ctype, pseudo); 463 return adjust_type(fn, ctype, target); 464 } 465 466 /* 467 * Get the LLVMValue corresponding to the pseudo 468 * and force the type corresponding to ctype but 469 * map all pointers to intptr_t. 470 */ 471 static LLVMValueRef get_ioperand(struct function *fn, struct symbol *ctype, pseudo_t pseudo) 472 { 473 LLVMValueRef target = pseudo_to_value(fn, ctype, pseudo); 474 return value_to_ivalue(fn, ctype, target); 475 } 476 477 static LLVMValueRef calc_gep(LLVMBuilderRef builder, LLVMValueRef base, LLVMValueRef off) 478 { 479 LLVMTypeRef type = LLVMTypeOf(base); 480 unsigned int as = LLVMGetPointerAddressSpace(type); 481 LLVMTypeRef bytep = LLVMPointerType(LLVMInt8Type(), as); 482 LLVMValueRef addr; 483 const char *name = LLVMGetValueName(off); 484 485 /* convert base to char* type */ 486 base = LLVMBuildPointerCast(builder, base, bytep, name); 487 /* addr = base + off */ 488 addr = LLVMBuildInBoundsGEP(builder, base, &off, 1, name); 489 /* convert back to the actual pointer type */ 490 addr = LLVMBuildPointerCast(builder, addr, type, name); 491 return addr; 492 } 493 494 static LLVMRealPredicate translate_fop(int opcode) 495 { 496 static const LLVMRealPredicate trans_tbl[] = { 497 [OP_FCMP_ORD] = LLVMRealORD, 498 [OP_FCMP_OEQ] = LLVMRealOEQ, 499 [OP_FCMP_ONE] = LLVMRealONE, 500 [OP_FCMP_OLE] = LLVMRealOLE, 501 [OP_FCMP_OGE] = LLVMRealOGE, 502 [OP_FCMP_OLT] = LLVMRealOLT, 503 [OP_FCMP_OGT] = LLVMRealOGT, 504 [OP_FCMP_UEQ] = LLVMRealUEQ, 505 [OP_FCMP_UNE] = LLVMRealUNE, 506 [OP_FCMP_ULE] = LLVMRealULE, 507 [OP_FCMP_UGE] = LLVMRealUGE, 508 [OP_FCMP_ULT] = LLVMRealULT, 509 [OP_FCMP_UGT] = LLVMRealUGT, 510 [OP_FCMP_UNO] = LLVMRealUNO, 511 }; 512 513 return trans_tbl[opcode]; 514 } 515 516 static LLVMIntPredicate translate_op(int opcode) 517 { 518 static const LLVMIntPredicate trans_tbl[] = { 519 [OP_SET_EQ] = LLVMIntEQ, 520 [OP_SET_NE] = LLVMIntNE, 521 [OP_SET_LE] = LLVMIntSLE, 522 [OP_SET_GE] = LLVMIntSGE, 523 [OP_SET_LT] = LLVMIntSLT, 524 [OP_SET_GT] = LLVMIntSGT, 525 [OP_SET_B] = LLVMIntULT, 526 [OP_SET_A] = LLVMIntUGT, 527 [OP_SET_BE] = LLVMIntULE, 528 [OP_SET_AE] = LLVMIntUGE, 529 }; 530 531 return trans_tbl[opcode]; 532 } 533 534 static void output_op_binary(struct function *fn, struct instruction *insn) 535 { 536 LLVMValueRef lhs, rhs, target; 537 char target_name[64]; 538 539 lhs = get_ioperand(fn, insn->type, insn->src1); 540 rhs = get_ioperand(fn, insn->type, insn->src2); 541 542 pseudo_name(insn->target, target_name); 543 544 switch (insn->opcode) { 545 /* Binary */ 546 case OP_ADD: 547 target = LLVMBuildAdd(fn->builder, lhs, rhs, target_name); 548 break; 549 case OP_SUB: 550 target = LLVMBuildSub(fn->builder, lhs, rhs, target_name); 551 break; 552 case OP_MUL: 553 target = LLVMBuildMul(fn->builder, lhs, rhs, target_name); 554 break; 555 case OP_DIVU: 556 target = LLVMBuildUDiv(fn->builder, lhs, rhs, target_name); 557 break; 558 case OP_DIVS: 559 assert(!is_float_type(insn->type)); 560 target = LLVMBuildSDiv(fn->builder, lhs, rhs, target_name); 561 break; 562 case OP_MODU: 563 assert(!is_float_type(insn->type)); 564 target = LLVMBuildURem(fn->builder, lhs, rhs, target_name); 565 break; 566 case OP_MODS: 567 assert(!is_float_type(insn->type)); 568 target = LLVMBuildSRem(fn->builder, lhs, rhs, target_name); 569 break; 570 case OP_SHL: 571 assert(!is_float_type(insn->type)); 572 target = LLVMBuildShl(fn->builder, lhs, rhs, target_name); 573 break; 574 case OP_LSR: 575 assert(!is_float_type(insn->type)); 576 target = LLVMBuildLShr(fn->builder, lhs, rhs, target_name); 577 break; 578 case OP_ASR: 579 assert(!is_float_type(insn->type)); 580 target = LLVMBuildAShr(fn->builder, lhs, rhs, target_name); 581 break; 582 583 /* floating-point */ 584 case OP_FADD: 585 target = LLVMBuildFAdd(fn->builder, lhs, rhs, target_name); 586 break; 587 case OP_FSUB: 588 target = LLVMBuildFSub(fn->builder, lhs, rhs, target_name); 589 break; 590 case OP_FMUL: 591 target = LLVMBuildFMul(fn->builder, lhs, rhs, target_name); 592 break; 593 case OP_FDIV: 594 target = LLVMBuildFDiv(fn->builder, lhs, rhs, target_name); 595 break; 596 597 /* Logical */ 598 case OP_AND: 599 assert(!is_float_type(insn->type)); 600 target = LLVMBuildAnd(fn->builder, lhs, rhs, target_name); 601 break; 602 case OP_OR: 603 assert(!is_float_type(insn->type)); 604 target = LLVMBuildOr(fn->builder, lhs, rhs, target_name); 605 break; 606 case OP_XOR: 607 assert(!is_float_type(insn->type)); 608 target = LLVMBuildXor(fn->builder, lhs, rhs, target_name); 609 break; 610 default: 611 assert(0); 612 break; 613 } 614 615 target = adjust_type(fn, insn->type, target); 616 insn->target->priv = target; 617 } 618 619 static void output_op_compare(struct function *fn, struct instruction *insn) 620 { 621 LLVMValueRef lhs, rhs, target; 622 char target_name[64]; 623 624 lhs = pseudo_to_value(fn, NULL, insn->src1); 625 if (insn->src2->type == PSEUDO_VAL) 626 rhs = constant_value(insn->src2->value, LLVMTypeOf(lhs)); 627 else 628 rhs = pseudo_to_value(fn, NULL, insn->src2); 629 if (!rhs) 630 rhs = LLVMGetUndef(symbol_type(insn->type)); 631 632 pseudo_name(insn->target, target_name); 633 634 LLVMTypeRef dst_type = insn_symbol_type(insn); 635 636 switch (LLVMGetTypeKind(LLVMTypeOf(lhs))) { 637 case LLVMPointerTypeKind: 638 lhs = value_to_pvalue(fn, &ptr_ctype, lhs); 639 rhs = value_to_pvalue(fn, &ptr_ctype, rhs); 640 /* fall through */ 641 642 case LLVMIntegerTypeKind: { 643 LLVMIntPredicate op = translate_op(insn->opcode); 644 645 if (LLVMGetTypeKind(LLVMTypeOf(rhs)) == LLVMPointerTypeKind) { 646 LLVMTypeRef ltype = LLVMTypeOf(lhs); 647 rhs = LLVMBuildPtrToInt(fn->builder, rhs, ltype, ""); 648 } 649 target = LLVMBuildICmp(fn->builder, op, lhs, rhs, target_name); 650 break; 651 } 652 case LLVMHalfTypeKind: 653 case LLVMFloatTypeKind: 654 case LLVMDoubleTypeKind: 655 case LLVMX86_FP80TypeKind: 656 case LLVMFP128TypeKind: 657 case LLVMPPC_FP128TypeKind: { 658 LLVMRealPredicate op = translate_fop(insn->opcode); 659 660 target = LLVMBuildFCmp(fn->builder, op, lhs, rhs, target_name); 661 break; 662 } 663 default: 664 assert(0); 665 } 666 667 target = LLVMBuildZExt(fn->builder, target, dst_type, target_name); 668 669 insn->target->priv = target; 670 } 671 672 static void output_op_ret(struct function *fn, struct instruction *insn) 673 { 674 pseudo_t pseudo = insn->src; 675 676 if (pseudo && pseudo != VOID) { 677 LLVMValueRef result = get_operand(fn, insn->type, pseudo); 678 LLVMBuildRet(fn->builder, result); 679 } else 680 LLVMBuildRetVoid(fn->builder); 681 } 682 683 static LLVMValueRef calc_memop_addr(struct function *fn, struct instruction *insn) 684 { 685 LLVMTypeRef int_type, addr_type; 686 LLVMValueRef src, off, addr; 687 unsigned int as; 688 689 /* int type large enough to hold a pointer */ 690 int_type = LLVMIntType(bits_in_pointer); 691 off = LLVMConstInt(int_type, insn->offset, 0); 692 693 /* convert src to the effective pointer type */ 694 src = pseudo_to_value(fn, insn->type, insn->src); 695 as = LLVMGetPointerAddressSpace(LLVMTypeOf(src)); 696 addr_type = LLVMPointerType(insn_symbol_type(insn), as); 697 src = LLVMBuildPointerCast(fn->builder, src, addr_type, LLVMGetValueName(src)); 698 699 /* addr = src + off */ 700 addr = calc_gep(fn->builder, src, off); 701 return addr; 702 } 703 704 705 static void output_op_load(struct function *fn, struct instruction *insn) 706 { 707 LLVMValueRef addr, target; 708 char name[MAX_PSEUDO_NAME]; 709 710 addr = calc_memop_addr(fn, insn); 711 712 /* perform load */ 713 pseudo_name(insn->target, name); 714 target = LLVMBuildLoad(fn->builder, addr, name); 715 716 insn->target->priv = target; 717 } 718 719 static void output_op_store(struct function *fn, struct instruction *insn) 720 { 721 LLVMValueRef addr, target_in; 722 723 addr = calc_memop_addr(fn, insn); 724 725 target_in = pseudo_to_rvalue(fn, insn->type, insn->target); 726 727 /* perform store */ 728 LLVMBuildStore(fn->builder, target_in, addr); 729 } 730 731 static LLVMValueRef bool_value(struct function *fn, LLVMValueRef value) 732 { 733 if (LLVMTypeOf(value) != LLVMInt1Type()) 734 value = LLVMBuildIsNotNull(fn->builder, value, LLVMGetValueName(value)); 735 736 return value; 737 } 738 739 static void output_op_cbr(struct function *fn, struct instruction *br) 740 { 741 LLVMValueRef cond = bool_value(fn, 742 pseudo_to_value(fn, NULL, br->cond)); 743 744 LLVMBuildCondBr(fn->builder, cond, 745 br->bb_true->priv, 746 br->bb_false->priv); 747 } 748 749 static void output_op_br(struct function *fn, struct instruction *br) 750 { 751 LLVMBuildBr(fn->builder, br->bb_true->priv); 752 } 753 754 static void output_op_sel(struct function *fn, struct instruction *insn) 755 { 756 LLVMValueRef target, src1, src2, src3; 757 char name[MAX_PSEUDO_NAME]; 758 759 src1 = bool_value(fn, pseudo_to_value(fn, NULL, insn->src1)); 760 src2 = get_operand(fn, insn->type, insn->src2); 761 src3 = get_operand(fn, insn->type, insn->src3); 762 763 pseudo_name(insn->target, name); 764 target = LLVMBuildSelect(fn->builder, src1, src2, src3, name); 765 766 insn->target->priv = adjust_type(fn, insn->type, target); 767 } 768 769 static void output_op_switch(struct function *fn, struct instruction *insn) 770 { 771 LLVMValueRef sw_val, target; 772 struct basic_block *def = NULL; 773 struct multijmp *jmp; 774 int n_jmp = 0; 775 776 FOR_EACH_PTR(insn->multijmp_list, jmp) { 777 if (jmp->begin <= jmp->end) { 778 n_jmp += (jmp->end - jmp->begin) + 1; 779 } else /* default case */ 780 def = jmp->target; 781 } END_FOR_EACH_PTR(jmp); 782 783 sw_val = get_ioperand(fn, insn->type, insn->cond); 784 target = LLVMBuildSwitch(fn->builder, sw_val, 785 def ? def->priv : NULL, n_jmp); 786 787 FOR_EACH_PTR(insn->multijmp_list, jmp) { 788 long long val; 789 790 for (val = jmp->begin; val <= jmp->end; val++) { 791 LLVMValueRef Val = val_to_value(val, insn->type); 792 LLVMAddCase(target, Val, jmp->target->priv); 793 } 794 } END_FOR_EACH_PTR(jmp); 795 } 796 797 static void output_op_call(struct function *fn, struct instruction *insn) 798 { 799 LLVMValueRef target, func; 800 struct symbol *ctype; 801 int n_arg = 0, i; 802 struct pseudo *arg; 803 LLVMValueRef *args; 804 char name[64]; 805 806 n_arg = pseudo_list_size(insn->arguments); 807 args = calloc(n_arg, sizeof(LLVMValueRef)); 808 809 PREPARE_PTR_LIST(insn->fntypes, ctype); 810 if (insn->func->type == PSEUDO_REG || insn->func->type == PSEUDO_PHI) 811 func = get_operand(fn, ctype, insn->func); 812 else 813 func = pseudo_to_value(fn, ctype, insn->func); 814 i = 0; 815 FOR_EACH_PTR(insn->arguments, arg) { 816 NEXT_PTR_LIST(ctype); 817 args[i++] = pseudo_to_rvalue(fn, ctype, arg); 818 } END_FOR_EACH_PTR(arg); 819 FINISH_PTR_LIST(ctype); 820 821 pseudo_name(insn->target, name); 822 target = LLVMBuildCall(fn->builder, func, args, n_arg, name); 823 824 insn->target->priv = target; 825 } 826 827 static void output_op_phisrc(struct function *fn, struct instruction *insn) 828 { 829 LLVMValueRef v; 830 struct instruction *phi; 831 832 assert(insn->target->priv == NULL); 833 834 /* target = src */ 835 v = get_operand(fn, insn->type, insn->phi_src); 836 837 FOR_EACH_PTR(insn->phi_users, phi) { 838 LLVMValueRef load, ptr; 839 840 assert(phi->opcode == OP_PHI); 841 /* phi must be load from alloca */ 842 load = phi->target->priv; 843 assert(LLVMGetInstructionOpcode(load) == LLVMLoad); 844 ptr = LLVMGetOperand(load, 0); 845 /* store v to alloca */ 846 LLVMBuildStore(fn->builder, v, ptr); 847 } END_FOR_EACH_PTR(phi); 848 } 849 850 static void output_op_phi(struct function *fn, struct instruction *insn) 851 { 852 LLVMValueRef load = insn->target->priv; 853 854 /* forward load */ 855 assert(LLVMGetInstructionOpcode(load) == LLVMLoad); 856 /* forward load has no parent block */ 857 assert(!LLVMGetInstructionParent(load)); 858 /* finalize load in current block */ 859 LLVMInsertIntoBuilder(fn->builder, load); 860 } 861 862 static void output_op_ptrcast(struct function *fn, struct instruction *insn) 863 { 864 LLVMValueRef src, target; 865 LLVMTypeRef dtype; 866 struct symbol *otype = insn->orig_type; 867 LLVMOpcode op; 868 char target_name[64]; 869 870 src = get_operand(fn, otype, insn->src); 871 pseudo_name(insn->target, target_name); 872 873 dtype = symbol_type(insn->type); 874 switch (insn->opcode) { 875 case OP_UTPTR: 876 case OP_SEXT: // FIXME 877 assert(is_int_type(otype)); 878 assert(is_ptr_type(insn->type)); 879 op = LLVMIntToPtr; 880 break; 881 case OP_PTRTU: 882 assert(is_ptr_type(otype)); 883 assert(is_int_type(insn->type)); 884 op = LLVMPtrToInt; 885 break; 886 case OP_PTRCAST: 887 case OP_ZEXT: // FIXME 888 assert(is_ptr_type(otype)); 889 assert(is_ptr_type(insn->type)); 890 op = LLVMBitCast; 891 break; 892 default: 893 assert(0); 894 } 895 896 target = LLVMBuildCast(fn->builder, op, src, dtype, target_name); 897 insn->target->priv = target; 898 } 899 900 static void output_op_cast(struct function *fn, struct instruction *insn, LLVMOpcode op) 901 { 902 LLVMValueRef src, target; 903 LLVMTypeRef dtype; 904 struct symbol *otype = insn->orig_type; 905 char target_name[64]; 906 907 if (is_ptr_type(insn->type)) // cast to void* is OP_CAST ... 908 return output_op_ptrcast(fn, insn); 909 910 assert(is_int_type(insn->type)); 911 912 src = get_operand(fn, otype, insn->src); 913 pseudo_name(insn->target, target_name); 914 915 dtype = symbol_type(insn->type); 916 if (is_ptr_type(otype)) { 917 op = LLVMPtrToInt; 918 } else if (is_float_type(otype)) { 919 assert(op == LLVMFPToUI || op == LLVMFPToSI); 920 } else if (is_int_type(otype)) { 921 unsigned int width = otype->bit_size; 922 if (insn->size < width) 923 op = LLVMTrunc; 924 else if (insn->size == width) 925 op = LLVMBitCast; 926 } else { 927 assert(0); 928 } 929 930 target = LLVMBuildCast(fn->builder, op, src, dtype, target_name); 931 insn->target->priv = target; 932 } 933 934 static void output_op_fpcast(struct function *fn, struct instruction *insn) 935 { 936 LLVMTypeRef dtype = symbol_type(insn->type); 937 LLVMValueRef src, target; 938 struct symbol *otype = insn->orig_type; 939 char name[64]; 940 941 assert(is_float_type(insn->type)); 942 943 pseudo_name(insn->target, name); 944 src = get_operand(fn, otype, insn->src); 945 switch (insn->opcode) { 946 case OP_FCVTF: 947 target = LLVMBuildFPCast(fn->builder, src, dtype, name); 948 break; 949 case OP_SCVTF: 950 target = LLVMBuildSIToFP(fn->builder, src, dtype, name); 951 break; 952 case OP_UCVTF: 953 target = LLVMBuildUIToFP(fn->builder, src, dtype, name); 954 break; 955 default: 956 assert(0); 957 } 958 insn->target->priv = target; 959 } 960 961 static void output_op_setval(struct function *fn, struct instruction *insn) 962 { 963 struct expression *val = insn->val; 964 LLVMValueRef target; 965 966 switch (val->type) { 967 case EXPR_LABEL: 968 target = LLVMBlockAddress(fn->fn, val->symbol->bb_target->priv); 969 break; 970 default: 971 assert(0); 972 } 973 974 insn->target->priv = target; 975 } 976 977 static void output_op_setfval(struct function *fn, struct instruction *insn) 978 { 979 LLVMTypeRef dtype = symbol_type(insn->type); 980 LLVMValueRef target; 981 982 target = LLVMConstReal(dtype, insn->fvalue); 983 insn->target->priv = target; 984 } 985 986 static void output_insn(struct function *fn, struct instruction *insn) 987 { 988 switch (insn->opcode) { 989 case OP_RET: 990 output_op_ret(fn, insn); 991 break; 992 case OP_BR: 993 output_op_br(fn, insn); 994 break; 995 case OP_CBR: 996 output_op_cbr(fn, insn); 997 break; 998 case OP_SYMADDR: 999 assert(0); 1000 break; 1001 case OP_SETVAL: 1002 output_op_setval(fn, insn); 1003 break; 1004 case OP_SETFVAL: 1005 output_op_setfval(fn, insn); 1006 break; 1007 case OP_SWITCH: 1008 output_op_switch(fn, insn); 1009 break; 1010 case OP_COMPUTEDGOTO: 1011 assert(0); 1012 break; 1013 case OP_PHISOURCE: 1014 output_op_phisrc(fn, insn); 1015 break; 1016 case OP_PHI: 1017 output_op_phi(fn, insn); 1018 break; 1019 case OP_LOAD: 1020 output_op_load(fn, insn); 1021 break; 1022 case OP_STORE: 1023 output_op_store(fn, insn); 1024 break; 1025 case OP_INLINED_CALL: 1026 break; 1027 case OP_CALL: 1028 output_op_call(fn, insn); 1029 break; 1030 case OP_ZEXT: 1031 output_op_cast(fn, insn, LLVMZExt); 1032 break; 1033 case OP_SEXT: 1034 output_op_cast(fn, insn, LLVMSExt); 1035 break; 1036 case OP_TRUNC: 1037 output_op_cast(fn, insn, LLVMTrunc); 1038 break; 1039 case OP_FCVTU: 1040 output_op_cast(fn, insn, LLVMFPToUI); 1041 break; 1042 case OP_FCVTS: 1043 output_op_cast(fn, insn, LLVMFPToSI); 1044 break; 1045 case OP_UCVTF: case OP_SCVTF: 1046 case OP_FCVTF: 1047 output_op_fpcast(fn, insn); 1048 break; 1049 case OP_UTPTR: 1050 case OP_PTRTU: 1051 case OP_PTRCAST: 1052 output_op_ptrcast(fn, insn); 1053 break; 1054 case OP_BINARY ... OP_BINARY_END: 1055 output_op_binary(fn, insn); 1056 break; 1057 case OP_FPCMP ... OP_BINCMP_END: 1058 output_op_compare(fn, insn); 1059 break; 1060 case OP_SEL: 1061 output_op_sel(fn, insn); 1062 break; 1063 case OP_SLICE: 1064 assert(0); 1065 break; 1066 case OP_NOT: { 1067 LLVMValueRef src, target; 1068 char target_name[64]; 1069 1070 src = pseudo_to_value(fn, insn->type, insn->src); 1071 1072 pseudo_name(insn->target, target_name); 1073 1074 target = LLVMBuildNot(fn->builder, src, target_name); 1075 1076 insn->target->priv = target; 1077 break; 1078 } 1079 case OP_FNEG: 1080 case OP_NEG: { 1081 LLVMValueRef src, target; 1082 char target_name[64]; 1083 1084 src = pseudo_to_value(fn, insn->type, insn->src); 1085 1086 pseudo_name(insn->target, target_name); 1087 1088 if (insn->opcode == OP_FNEG) 1089 target = LLVMBuildFNeg(fn->builder, src, target_name); 1090 else 1091 target = LLVMBuildNeg(fn->builder, src, target_name); 1092 1093 insn->target->priv = target; 1094 break; 1095 } 1096 case OP_CONTEXT: 1097 assert(0); 1098 break; 1099 case OP_RANGE: 1100 assert(0); 1101 break; 1102 case OP_NOP: 1103 assert(0); 1104 break; 1105 case OP_DEATHNOTE: 1106 break; 1107 case OP_ASM: 1108 assert(0); 1109 break; 1110 case OP_COPY: 1111 assert(0); 1112 break; 1113 default: 1114 break; 1115 } 1116 } 1117 1118 static void output_bb(struct function *fn, struct basic_block *bb) 1119 { 1120 struct instruction *insn; 1121 1122 FOR_EACH_PTR(bb->insns, insn) { 1123 if (!insn->bb) 1124 continue; 1125 1126 output_insn(fn, insn); 1127 } 1128 END_FOR_EACH_PTR(insn); 1129 } 1130 1131 #define MAX_ARGS 64 1132 1133 static void output_fn(LLVMModuleRef module, struct entrypoint *ep) 1134 { 1135 struct symbol *sym = ep->name; 1136 struct symbol *base_type = sym->ctype.base_type; 1137 struct function function = { .module = module }; 1138 struct basic_block *bb; 1139 int nr_args = 0; 1140 int i; 1141 1142 function.fn = get_sym_value(module, sym); 1143 LLVMSetFunctionCallConv(function.fn, LLVMCCallConv); 1144 LLVMSetLinkage(function.fn, function_linkage(sym)); 1145 1146 function.builder = LLVMCreateBuilder(); 1147 1148 /* give a name to each argument */ 1149 nr_args = symbol_list_size(base_type->arguments); 1150 for (i = 0; i < nr_args; i++) { 1151 char name[MAX_PSEUDO_NAME]; 1152 LLVMValueRef arg; 1153 1154 arg = LLVMGetParam(function.fn, i); 1155 snprintf(name, sizeof(name), "ARG%d.", i+1); 1156 LLVMSetValueName(arg, name); 1157 } 1158 1159 /* create the BBs */ 1160 FOR_EACH_PTR(ep->bbs, bb) { 1161 static int nr_bb; 1162 LLVMBasicBlockRef bbr; 1163 char bbname[32]; 1164 struct instruction *insn; 1165 1166 sprintf(bbname, "L%d", nr_bb++); 1167 bbr = LLVMAppendBasicBlock(function.fn, bbname); 1168 1169 bb->priv = bbr; 1170 1171 /* allocate alloca for each phi */ 1172 FOR_EACH_PTR(bb->insns, insn) { 1173 LLVMBasicBlockRef entrybbr; 1174 LLVMTypeRef phi_type; 1175 LLVMValueRef ptr; 1176 1177 if (!insn->bb || insn->opcode != OP_PHI) 1178 continue; 1179 /* insert alloca into entry block */ 1180 entrybbr = LLVMGetEntryBasicBlock(function.fn); 1181 LLVMPositionBuilderAtEnd(function.builder, entrybbr); 1182 phi_type = insn_symbol_type(insn); 1183 ptr = LLVMBuildAlloca(function.builder, phi_type, ""); 1184 /* emit forward load for phi */ 1185 LLVMClearInsertionPosition(function.builder); 1186 insn->target->priv = LLVMBuildLoad(function.builder, ptr, "phi"); 1187 } END_FOR_EACH_PTR(insn); 1188 } 1189 END_FOR_EACH_PTR(bb); 1190 1191 FOR_EACH_PTR(ep->bbs, bb) { 1192 LLVMPositionBuilderAtEnd(function.builder, bb->priv); 1193 1194 output_bb(&function, bb); 1195 } 1196 END_FOR_EACH_PTR(bb); 1197 } 1198 1199 static LLVMValueRef output_data(LLVMModuleRef module, struct symbol *sym) 1200 { 1201 struct expression *initializer = sym->initializer; 1202 LLVMValueRef initial_value; 1203 LLVMValueRef data; 1204 const char *name; 1205 1206 if (initializer) { 1207 switch (initializer->type) { 1208 case EXPR_VALUE: 1209 initial_value = LLVMConstInt(symbol_type(sym), initializer->value, 1); 1210 break; 1211 case EXPR_FVALUE: 1212 initial_value = LLVMConstReal(symbol_type(sym), initializer->fvalue); 1213 break; 1214 case EXPR_SYMBOL: { 1215 struct symbol *sym = initializer->symbol; 1216 1217 initial_value = LLVMGetNamedGlobal(module, show_ident(sym->ident)); 1218 if (!initial_value) 1219 initial_value = output_data(module, sym); 1220 break; 1221 } 1222 case EXPR_STRING: { 1223 const char *s = initializer->string->data; 1224 1225 initial_value = LLVMConstString(strdup(s), strlen(s) + 1, true); 1226 break; 1227 } 1228 default: 1229 warning(initializer->pos, "can't initialize type: %s", show_typename(sym)); 1230 initial_value = NULL; 1231 break; 1232 } 1233 } else { 1234 LLVMTypeRef type = symbol_type(sym); 1235 1236 initial_value = LLVMConstNull(type); 1237 } 1238 1239 if (!initial_value) 1240 return NULL; 1241 1242 name = sym->ident ? show_ident(sym->ident) : "" ; 1243 1244 data = LLVMAddGlobal(module, LLVMTypeOf(initial_value), name); 1245 1246 LLVMSetLinkage(data, data_linkage(sym)); 1247 if (sym->ctype.modifiers & MOD_CONST) 1248 LLVMSetGlobalConstant(data, 1); 1249 if (sym->ctype.modifiers & MOD_TLS) 1250 LLVMSetThreadLocal(data, 1); 1251 if (sym->ctype.alignment) 1252 LLVMSetAlignment(data, sym->ctype.alignment); 1253 1254 if (!(sym->ctype.modifiers & MOD_EXTERN)) 1255 LLVMSetInitializer(data, initial_value); 1256 1257 return data; 1258 } 1259 1260 static int is_prototype(struct symbol *sym) 1261 { 1262 if (sym->type == SYM_NODE) 1263 sym = sym->ctype.base_type; 1264 return sym && sym->type == SYM_FN && !sym->stmt; 1265 } 1266 1267 static int compile(LLVMModuleRef module, struct symbol_list *list) 1268 { 1269 struct symbol *sym; 1270 1271 FOR_EACH_PTR(list, sym) { 1272 struct entrypoint *ep; 1273 expand_symbol(sym); 1274 1275 if (is_prototype(sym)) { 1276 // this will do the LLVMAddFunction() we want 1277 get_sym_value(module, sym); 1278 continue; 1279 } 1280 1281 ep = linearize_symbol(sym); 1282 if (ep) 1283 output_fn(module, ep); 1284 else 1285 output_data(module, sym); 1286 } 1287 END_FOR_EACH_PTR(sym); 1288 1289 return 0; 1290 } 1291 1292 #ifndef LLVM_DEFAULT_TARGET_TRIPLE 1293 #define LLVM_DEFAULT_TARGET_TRIPLE LLVM_HOSTTRIPLE 1294 #endif 1295 1296 #define X86_LINUX_LAYOUT \ 1297 "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-" \ 1298 "i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-" \ 1299 "a0:0:64-f80:32:32-n8:16:32-S128" 1300 1301 #define X86_64_LINUX_LAYOUT \ 1302 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-" \ 1303 "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-" \ 1304 "a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 1305 1306 static void set_target(LLVMModuleRef module) 1307 { 1308 char target[] = LLVM_DEFAULT_TARGET_TRIPLE; 1309 const char *arch, *vendor, *os, *env, *layout = NULL; 1310 char triple[256]; 1311 1312 arch = strtok(target, "-"); 1313 vendor = strtok(NULL, "-"); 1314 os = strtok(NULL, "-"); 1315 env = strtok(NULL, "-"); 1316 1317 if (!os) 1318 return; 1319 if (!env) 1320 env = "unknown"; 1321 1322 if (!strcmp(arch, "x86_64") && !strcmp(os, "linux")) { 1323 if (arch_m64) { 1324 layout = X86_64_LINUX_LAYOUT; 1325 } else { 1326 arch = "i386"; 1327 layout = X86_LINUX_LAYOUT; 1328 } 1329 } 1330 1331 /* unsupported target */ 1332 if (!layout) 1333 return; 1334 1335 snprintf(triple, sizeof(triple), "%s-%s-%s-%s", arch, vendor, os, env); 1336 LLVMSetTarget(module, triple); 1337 LLVMSetDataLayout(module, layout); 1338 } 1339 1340 int main(int argc, char **argv) 1341 { 1342 struct string_list *filelist = NULL; 1343 struct symbol_list *symlist; 1344 LLVMModuleRef module; 1345 char *file; 1346 1347 symlist = sparse_initialize(argc, argv, &filelist); 1348 1349 module = LLVMModuleCreateWithName("sparse"); 1350 set_target(module); 1351 1352 compile(module, symlist); 1353 1354 /* need ->phi_users */ 1355 dbg_dead = 1; 1356 FOR_EACH_PTR(filelist, file) { 1357 symlist = sparse(file); 1358 if (die_if_error) 1359 return 1; 1360 compile(module, symlist); 1361 } END_FOR_EACH_PTR(file); 1362 1363 LLVMVerifyModule(module, LLVMPrintMessageAction, NULL); 1364 1365 LLVMWriteBitcodeToFD(module, STDOUT_FILENO, 0, 0); 1366 1367 LLVMDisposeModule(module); 1368 1369 report_stats(); 1370 return 0; 1371 }