1 #!/usr/bin/python
   2 
   3 # Copyright (C) 2013 Oracle.
   4 #
   5 # Licensed under the Open Software License version 1.1
   6 
   7 import sqlite3
   8 import sys
   9 import re
  10 
  11 try:
  12     con = sqlite3.connect('smatch_db.sqlite')
  13 except sqlite3.Error, e:
  14     print "Error %s:" % e.args[0]
  15     sys.exit(1)
  16 
  17 def usage():
  18     print "%s" %(sys.argv[0])
  19     print "<function> - how a function is called"
  20     print "info <type> - how a function is called, filtered by type"
  21     print "return_states <function> - what a function returns"
  22     print "call_tree <function> - show the call tree"
  23     print "where <struct_type> <member> - where a struct member is set"
  24     print "type_size <struct_type> <member> - how a struct member is allocated"
  25     print "data_info <struct_type> <member> - information about a given data type"
  26     print "function_ptr <function> - which function pointers point to this"
  27     print "trace_param <function> <param> - trace where a parameter came from"
  28     print "locals <file> - print the local values in a file."
  29     sys.exit(1)
  30 
  31 function_ptrs = []
  32 searched_ptrs = []
  33 def get_function_pointers_helper(func):
  34     cur = con.cursor()
  35     cur.execute("select distinct ptr from function_ptr where function = '%s';" %(func))
  36     for row in cur:
  37         ptr = row[0]
  38         if ptr in function_ptrs:
  39             continue
  40         function_ptrs.append(ptr)
  41         if not ptr in searched_ptrs:
  42             searched_ptrs.append(ptr)
  43             get_function_pointers_helper(ptr)
  44 
  45 def get_function_pointers(func):
  46     global function_ptrs
  47     global searched_ptrs
  48     function_ptrs = [func]
  49     searched_ptrs = [func]
  50     get_function_pointers_helper(func)
  51     return function_ptrs
  52 
  53 db_types = {   0: "INTERNAL",
  54              101: "PARAM_CLEARED",
  55              103: "PARAM_LIMIT",
  56              104: "PARAM_FILTER",
  57             1001: "PARAM_VALUE",
  58             1002: "BUF_SIZE",
  59             1004: "CAPPED_DATA",
  60             1005: "RETURN_VALUE",
  61             1006: "DEREFERENCE",
  62             1007: "RANGE_CAP",
  63             1008: "LOCK_HELD",
  64             1009: "LOCK_RELEASED",
  65             1010: "ABSOLUTE_LIMITS",
  66             1012: "PARAM_ADD",
  67             1013: "PARAM_FREED",
  68             1014: "DATA_SOURCE",
  69             1015: "FUZZY_MAX",
  70             1016: "STR_LEN",
  71             1017: "ARRAY_LEN",
  72             1018: "CAPABLE",
  73             1019: "NS_CAPABLE",
  74             1022: "TYPE_LINK",
  75             1023: "UNTRACKED_PARAM",
  76             1024: "CULL_PATH",
  77             1025: "PARAM_SET",
  78             1026: "PARAM_USED",
  79             1027: "BYTE_UNITS",
  80             1028: "COMPARE_LIMIT",
  81             1029: "PARAM_COMPARE",
  82             1030: "EXPECTS_TYPE",
  83             1031: "CONSTRAINT",
  84             1032: "PASSES_TYPE",
  85             1033: "CONSTRAINT_REQUIRED",
  86             1034: "BIT_INFO",
  87             1035: "NOSPEC",
  88             1036: "NOSPEC_WB",
  89             1037: "STMT_CNT",
  90             1038: "TERMINATED",
  91             1039: "SLEEP",
  92             1040: "NO_SLEEP_CNT",
  93             1041: "SMALLISH",
  94             1042: "FRESH_MTAG",
  95 
  96             8017: "USER_DATA",
  97             9017: "USER_DATA_SET",
  98             8018: "NO_OVERFLOW",
  99             8019: "NO_OVERFLOW_SIMPLE",
 100             8020: "LOCKED",
 101             8021: "UNLOCKED",
 102             8023: "ATOMIC_INC",
 103             8024: "ATOMIC_DEC",
 104 };
 105 
 106 def add_range(rl, min_val, max_val):
 107     check_next = 0
 108     done = 0
 109     ret = []
 110     idx = 0
 111 
 112     if len(rl) == 0:
 113         return [[min_val, max_val]]
 114 
 115     for idx in range(len(rl)):
 116         cur_min = rl[idx][0]
 117         cur_max = rl[idx][1]
 118 
 119         # we already merged the new range but we might need to change later
 120         # ranges if they over lap with more than one
 121         if check_next:
 122             # join with added range
 123             if max_val + 1 == cur_min:
 124                 ret[len(ret) - 1][1] = cur_max
 125                 done = 1
 126                 break
 127             # don't overlap
 128             if max_val < cur_min:
 129                 ret.append([cur_min, cur_max])
 130                 done = 1
 131                 break
 132             # partially overlap
 133             if max_val < cur_max:
 134                 ret[len(ret) - 1][1] = cur_max
 135                 done = 1
 136                 break
 137             # completely overlap
 138             continue
 139 
 140         # join 2 ranges into one
 141         if max_val + 1 == cur_min:
 142             ret.append([min_val, cur_max])
 143             done = 1
 144             break
 145         # range is entirely below
 146         if max_val < cur_min:
 147             ret.append([min_val, max_val])
 148             ret.append([cur_min, cur_max])
 149             done = 1
 150             break
 151         # range is partially below
 152         if min_val < cur_min:
 153             if max_val <= cur_max:
 154                 ret.append([min_val, cur_max])
 155                 done = 1
 156                 break
 157             else:
 158                 ret.append([min_val, max_val])
 159                 check_next = 1
 160                 continue
 161         # range already included
 162         if max_val <= cur_max:
 163             ret.append([cur_min, cur_max])
 164             done = 1
 165             break;
 166         # range partially above
 167         if min_val <= cur_max:
 168             ret.append([cur_min, max_val])
 169             check_next = 1
 170             continue
 171         # join 2 ranges on the other side
 172         if min_val - 1 == cur_max:
 173             ret.append([cur_min, max_val])
 174             check_next = 1
 175             continue
 176         # range is above
 177         ret.append([cur_min, cur_max])
 178 
 179     if idx + 1 < len(rl):          # we hit a break statement
 180         ret = ret + rl[idx + 1:]
 181     elif done:                     # we hit a break on the last iteration
 182         pass
 183     elif not check_next:           # it's past the end of the rl
 184         ret.append([min_val, max_val])
 185 
 186     return ret;
 187 
 188 def rl_union(rl1, rl2):
 189     ret = []
 190     for r in rl1:
 191         ret = add_range(ret, r[0], r[1])
 192     for r in rl2:
 193         ret = add_range(ret, r[0], r[1])
 194 
 195     if (rl1 or rl2) and not ret:
 196         print "bug: merging %s + %s gives empty" %(rl1, rl2)
 197 
 198     return ret
 199 
 200 def txt_to_val(txt):
 201     if txt == "s64min":
 202         return -(2**63)
 203     elif txt == "s32min":
 204         return -(2**31)
 205     elif txt == "s16min":
 206         return -(2**15)
 207     elif txt == "s64max":
 208         return 2**63 - 1
 209     elif txt == "s32max":
 210         return 2**31 - 1
 211     elif txt == "s16max":
 212         return 2**15 - 1
 213     elif txt == "u64max":
 214         return 2**64 - 1
 215     elif txt == "ptr_max":
 216         return 2**64 - 1
 217     elif txt == "u32max":
 218         return 2**32 - 1
 219     elif txt == "u16max":
 220         return 2**16 - 1
 221     else:
 222         try:
 223             return int(txt)
 224         except ValueError:
 225             return 0
 226 
 227 def val_to_txt(val):
 228     if val == -(2**63):
 229         return "s64min"
 230     elif val == -(2**31):
 231         return "s32min"
 232     elif val == -(2**15):
 233         return "s16min"
 234     elif val == 2**63 - 1:
 235         return "s64max"
 236     elif val == 2**31 - 1:
 237         return "s32max"
 238     elif val == 2**15 - 1:
 239         return "s16max"
 240     elif val == 2**64 - 1:
 241         return "u64max"
 242     elif val == 2**32 - 1:
 243         return "u32max"
 244     elif val == 2**16 - 1:
 245         return "u16max"
 246     elif val < 0:
 247         return "(%d)" %(val)
 248     else:
 249         return "%d" %(val)
 250 
 251 def get_next_str(txt):
 252     val = ""
 253     parsed = 0
 254 
 255     if txt[0] == '(':
 256         parsed += 1
 257         for char in txt[1:]:
 258             if char == ')':
 259                 break
 260             parsed += 1
 261         val = txt[1:parsed]
 262         parsed += 1
 263     elif txt[0] == 's' or txt[0] == 'u':
 264         parsed += 6
 265         val = txt[:parsed]
 266     else:
 267         if txt[0] == '-':
 268             parsed += 1
 269         for char in txt[parsed:]:
 270             if char == '-':
 271                 break
 272             parsed += 1
 273         val = txt[:parsed]
 274     return [parsed, val]
 275 
 276 def txt_to_rl(txt):
 277     if len(txt) == 0:
 278         return []
 279 
 280     ret = []
 281     pairs = txt.split(",")
 282     for pair in pairs:
 283         cnt, min_str = get_next_str(pair)
 284         if cnt == len(pair):
 285             max_str = min_str
 286         else:
 287             cnt, max_str = get_next_str(pair[cnt + 1:])
 288         min_val = txt_to_val(min_str)
 289         max_val = txt_to_val(max_str)
 290         ret.append([min_val, max_val])
 291 
 292 #    Hm...  Smatch won't call INT_MAX s32max if the variable is unsigned.
 293 #    if txt != rl_to_txt(ret):
 294 #        print "bug: converting: text = %s rl = %s internal = %s" %(txt, rl_to_txt(ret), ret)
 295 
 296     return ret
 297 
 298 def rl_to_txt(rl):
 299     ret = ""
 300     for idx in range(len(rl)):
 301         cur_min = rl[idx][0]
 302         cur_max = rl[idx][1]
 303 
 304         if idx != 0:
 305             ret += ","
 306 
 307         if cur_min == cur_max:
 308             ret += val_to_txt(cur_min)
 309         else:
 310             ret += val_to_txt(cur_min)
 311             ret += "-"
 312             ret += val_to_txt(cur_max)
 313     return ret
 314 
 315 def type_to_str(type_int):
 316 
 317     t = int(type_int)
 318     if db_types.has_key(t):
 319         return db_types[t]
 320     return type_int
 321 
 322 def type_to_int(type_string):
 323     for k in db_types.keys():
 324         if db_types[k] == type_string:
 325             return k
 326     return -1
 327 
 328 def display_caller_info(printed, cur, param_names):
 329     for txt in cur:
 330         if not printed:
 331             print "file | caller | function | type | parameter | key | value |"
 332         printed = 1
 333 
 334         parameter = int(txt[6])
 335         key = txt[7]
 336         if len(param_names) and parameter in param_names:
 337             key = key.replace("$", param_names[parameter])
 338 
 339         print "%20s | %20s | %20s |" %(txt[0], txt[1], txt[2]),
 340         print " %10s |" %(type_to_str(txt[5])),
 341         print " %d | %s | %s" %(parameter, key, txt[8])
 342     return printed
 343 
 344 def get_caller_info(filename, ptrs, my_type):
 345     cur = con.cursor()
 346     param_names = get_param_names(filename, func)
 347     printed = 0
 348     type_filter = ""
 349     if my_type != "":
 350         type_filter = "and type = %d" %(type_to_int(my_type))
 351     for ptr in ptrs:
 352         cur.execute("select * from caller_info where function = '%s' %s;" %(ptr, type_filter))
 353         printed = display_caller_info(printed, cur, param_names)
 354 
 355 def print_caller_info(filename, func, my_type = ""):
 356     ptrs = get_function_pointers(func)
 357     get_caller_info(filename, ptrs, my_type)
 358 
 359 def merge_values(param_names, vals, cur):
 360     for txt in cur:
 361         parameter = int(txt[0])
 362         name = txt[1]
 363         rl = txt_to_rl(txt[2])
 364         if parameter in param_names:
 365             name = name.replace("$", param_names[parameter])
 366 
 367         if not parameter in vals:
 368             vals[parameter] = {}
 369 
 370         # the first item on the list is the number of rows.  it's incremented
 371         # every time we call merge_values().
 372         if name in vals[parameter]:
 373             vals[parameter][name] = [vals[parameter][name][0] + 1, rl_union(vals[parameter][name][1], rl)]
 374         else:
 375             vals[parameter][name] = [1, rl]
 376 
 377 def get_param_names(filename, func):
 378     cur = con.cursor()
 379     param_names = {}
 380     cur.execute("select parameter, value from parameter_name where file = '%s' and function = '%s';" %(filename, func))
 381     for txt in cur:
 382         parameter = int(txt[0])
 383         name = txt[1]
 384         param_names[parameter] = name
 385     if len(param_names):
 386         return param_names
 387 
 388     cur.execute("select parameter, value from parameter_name where function = '%s';" %(func))
 389     for txt in cur:
 390         parameter = int(txt[0])
 391         name = txt[1]
 392         param_names[parameter] = name
 393     return param_names
 394 
 395 def get_caller_count(ptrs):
 396     cur = con.cursor()
 397     count = 0
 398     for ptr in ptrs:
 399         cur.execute("select count(distinct(call_id)) from caller_info where function = '%s';" %(ptr))
 400         for txt in cur:
 401             count += int(txt[0])
 402     return count
 403 
 404 def print_merged_caller_values(filename, func, ptrs, param_names, call_cnt):
 405     cur = con.cursor()
 406     vals = {}
 407     for ptr in ptrs:
 408         cur.execute("select parameter, key, value from caller_info where function = '%s' and type = %d;" %(ptr, type_to_int("PARAM_VALUE")))
 409         merge_values(param_names, vals, cur);
 410 
 411     for param in sorted(vals):
 412         for name in sorted(vals[param]):
 413             if vals[param][name][0] != call_cnt:
 414                 continue
 415             print "%d %s -> %s" %(param, name, rl_to_txt(vals[param][name][1]))
 416 
 417 
 418 def print_unmerged_caller_values(filename, func, ptrs, param_names):
 419     cur = con.cursor()
 420     for ptr in ptrs:
 421         prev = -1
 422         cur.execute("select file, caller, call_id, parameter, key, value from caller_info where function = '%s' and type = %d;" %(ptr, type_to_int("PARAM_VALUE")))
 423         for filename, caller, call_id, parameter, name, value in cur:
 424             if prev != int(call_id):
 425                 prev = int(call_id)
 426 
 427             parameter = int(parameter)
 428             if parameter < len(param_names):
 429                 name = name.replace("$", param_names[parameter])
 430             else:
 431                 name = name.replace("$", "$%d" %(parameter))
 432 
 433             print "%s | %s | %s | %s" %(filename, caller, name, value)
 434         print "=========================="
 435 
 436 def print_caller_values(filename, func, ptrs):
 437     param_names = get_param_names(filename, func)
 438     call_cnt = get_caller_count(ptrs)
 439 
 440     print_merged_caller_values(filename, func, ptrs, param_names, call_cnt)
 441     print "=========================="
 442     print_unmerged_caller_values(filename, func, ptrs, param_names)
 443 
 444 def caller_info_values(filename, func):
 445     ptrs = get_function_pointers(func)
 446     print_caller_values(filename, func, ptrs)
 447 
 448 def print_return_states(func):
 449     cur = con.cursor()
 450     cur.execute("select * from return_states where function = '%s';" %(func))
 451     count = 0
 452     for txt in cur:
 453         printed = 1
 454         if count == 0:
 455             print "file | function | return_id | return_value | type | param | key | value |"
 456         count += 1
 457         print "%s | %s | %2s | %13s" %(txt[0], txt[1], txt[3], txt[4]),
 458         print "| %13s |" %(type_to_str(txt[6])),
 459         print " %2d | %20s | %20s |" %(txt[7], txt[8], txt[9])
 460 
 461 def print_return_implies(func):
 462     cur = con.cursor()
 463     cur.execute("select * from return_implies where function = '%s';" %(func))
 464     count = 0
 465     for txt in cur:
 466         if not count:
 467             print "file | function | type | param | key | value |"
 468         count += 1
 469         print "%15s | %15s" %(txt[0], txt[1]),
 470         print "| %15s" %(type_to_str(txt[4])),
 471         print "| %3d | %s | %15s |" %(txt[5], txt[6], txt[7])
 472 
 473 def print_type_size(struct_type, member):
 474     cur = con.cursor()
 475     cur.execute("select * from type_size where type like '(struct %s)->%s';" %(struct_type, member))
 476     print "type | size"
 477     for txt in cur:
 478         print "%-15s | %s" %(txt[0], txt[1])
 479 
 480     cur.execute("select * from function_type_size where type like '(struct %s)->%s';" %(struct_type, member))
 481     print "file | function | type | size"
 482     for txt in cur:
 483         print "%-15s | %-15s | %-15s | %s" %(txt[0], txt[1], txt[2], txt[3])
 484 
 485 def print_data_info(struct_type, member):
 486     cur = con.cursor()
 487     cur.execute("select * from data_info where data like '(struct %s)->%s';" %(struct_type, member))
 488     print "file | data | type | value"
 489     for txt in cur:
 490         print "%-15s | %-15s | %-15s | %s" %(txt[0], txt[1], type_to_str(txt[2]), txt[3])
 491 
 492 def print_fn_ptrs(func):
 493     ptrs = get_function_pointers(func)
 494     if not ptrs:
 495         return
 496     print "%s = " %(func),
 497     print(ptrs)
 498 
 499 def print_functions(member):
 500     cur = con.cursor()
 501     cur.execute("select * from function_ptr where ptr like '%%->%s';" %(member))
 502     print "File | Pointer | Function | Static"
 503     for txt in cur:
 504         print "%-15s | %-15s | %-15s | %s" %(txt[0], txt[2], txt[1], txt[3])
 505 
 506 def get_callers(func):
 507     ret = []
 508     cur = con.cursor()
 509     ptrs = get_function_pointers(func)
 510     for ptr in ptrs:
 511         cur.execute("select distinct caller from caller_info where function = '%s';" %(ptr))
 512         for row in cur:
 513             ret.append(row[0])
 514     return ret
 515 
 516 printed_funcs = []
 517 def call_tree_helper(func, indent = 0):
 518     global printed_funcs
 519     if func in printed_funcs:
 520         return
 521     print "%s%s()" %(" " * indent, func)
 522     if func == "too common":
 523         return
 524     if indent > 6:
 525         return
 526     printed_funcs.append(func)
 527     callers = get_callers(func)
 528     if len(callers) >= 20:
 529         print "Over 20 callers for %s()" %(func)
 530         return
 531     for caller in callers:
 532         call_tree_helper(caller, indent + 2)
 533 
 534 def print_call_tree(func):
 535     global printed_funcs
 536     printed_funcs = []
 537     call_tree_helper(func)
 538 
 539 def function_type_value(struct_type, member):
 540     cur = con.cursor()
 541     cur.execute("select * from function_type_value where type like '(struct %s)->%s';" %(struct_type, member))
 542     for txt in cur:
 543         print "%-30s | %-30s | %s | %s" %(txt[0], txt[1], txt[2], txt[3])
 544 
 545 def trace_callers(func, param):
 546     sources = []
 547     prev_type = 0
 548 
 549     cur = con.cursor()
 550     ptrs = get_function_pointers(func)
 551     for ptr in ptrs:
 552         cur.execute("select type, caller, value from caller_info where function = '%s' and (type = 0 or type = 1014 or type = 1028) and (parameter = -1 or parameter = %d);" %(ptr, param))
 553         for row in cur:
 554             data_type = int(row[0])
 555             if data_type == 1014:
 556                 sources.append((row[1], row[2]))
 557             elif data_type == 1028:
 558                 sources.append(("%", row[2])) # hack...
 559             elif data_type == 0 and prev_type == 0:
 560                 sources.append((row[1], ""))
 561             prev_type = data_type
 562     return sources
 563 
 564 def trace_param_helper(func, param, indent = 0):
 565     global printed_funcs
 566     if func in printed_funcs:
 567         return
 568     print "%s%s(param %d)" %(" " * indent, func, param)
 569     if func == "too common":
 570         return
 571     if indent > 20:
 572         return
 573     printed_funcs.append(func)
 574     sources = trace_callers(func, param)
 575     for path in sources:
 576 
 577         if len(path[1]) and path[1][0] == 'p' and path[1][1] == ' ':
 578             p = int(path[1][2:])
 579             trace_param_helper(path[0], p, indent + 2)
 580         elif len(path[0]) and path[0][0] == '%':
 581             print "  %s%s" %(" " * indent, path[1])
 582         else:
 583             print "* %s%s %s" %(" " * (indent - 1), path[0], path[1])
 584 
 585 def trace_param(func, param):
 586     global printed_funcs
 587     printed_funcs = []
 588     print "tracing %s %d" %(func, param)
 589     trace_param_helper(func, param)
 590 
 591 def print_locals(filename):
 592     cur = con.cursor()
 593     cur.execute("select file,data,value from data_info where file = '%s' and type = 8029 and value != 0;" %(filename))
 594     for txt in cur:
 595         print "%s | %s | %s" %(txt[0], txt[1], txt[2])
 596 
 597 def constraint(struct_type, member):
 598     cur = con.cursor()
 599     cur.execute("select * from constraints_required where data like '(struct %s)->%s' or bound like '(struct %s)->%s';" %(struct_type, member, struct_type, member))
 600     for txt in cur:
 601         print "%-30s | %-30s | %s | %s" %(txt[0], txt[1], txt[2], txt[3])
 602 
 603 if len(sys.argv) < 2:
 604     usage()
 605 
 606 if len(sys.argv) == 2:
 607     func = sys.argv[1]
 608     print_caller_info("", func)
 609 elif sys.argv[1] == "info":
 610     my_type = ""
 611     if len(sys.argv) == 4:
 612         my_type = sys.argv[3]
 613     func = sys.argv[2]
 614     print_caller_info("", func, my_type)
 615 elif sys.argv[1] == "call_info":
 616     if len(sys.argv) != 4:
 617         usage()
 618     filename = sys.argv[2]
 619     func = sys.argv[3]
 620     caller_info_values(filename, func)
 621     print_caller_info(filename, func)
 622 elif sys.argv[1] == "function_ptr" or sys.argv[1] == "fn_ptr":
 623     func = sys.argv[2]
 624     print_fn_ptrs(func)
 625 elif sys.argv[1] == "return_states":
 626     func = sys.argv[2]
 627     print_return_states(func)
 628     print "================================================"
 629     print_return_implies(func)
 630 elif sys.argv[1] == "return_implies":
 631     func = sys.argv[2]
 632     print_return_implies(func)
 633 elif sys.argv[1] == "type_size" or sys.argv[1] == "buf_size":
 634     struct_type = sys.argv[2]
 635     member = sys.argv[3]
 636     print_type_size(struct_type, member)
 637 elif sys.argv[1] == "data_info":
 638     struct_type = sys.argv[2]
 639     member = sys.argv[3]
 640     print_data_info(struct_type, member)
 641 elif sys.argv[1] == "call_tree":
 642     func = sys.argv[2]
 643     print_call_tree(func)
 644 elif sys.argv[1] == "where":
 645     if len(sys.argv) == 3:
 646         struct_type = "%"
 647         member = sys.argv[2]
 648     elif len(sys.argv) == 4:
 649         struct_type = sys.argv[2]
 650         member = sys.argv[3]
 651     function_type_value(struct_type, member)
 652 elif sys.argv[1] == "local":
 653     filename = sys.argv[2]
 654     variable = ""
 655     if len(sys.argv) == 4:
 656         variable = sys.argv[3]
 657     local_values(filename, variable)
 658 elif sys.argv[1] == "functions":
 659     member = sys.argv[2]
 660     print_functions(member)
 661 elif sys.argv[1] == "trace_param":
 662     if len(sys.argv) != 4:
 663         usage()
 664     func = sys.argv[2]
 665     param = int(sys.argv[3])
 666     trace_param(func, param)
 667 elif sys.argv[1] == "locals":
 668     if len(sys.argv) != 3:
 669         usage()
 670     filename = sys.argv[2]
 671     print_locals(filename);
 672 elif sys.argv[1] == "constraint":
 673     if len(sys.argv) == 3:
 674         struct_type = "%"
 675         member = sys.argv[2]
 676     elif len(sys.argv) == 4:
 677         struct_type = sys.argv[2]
 678         member = sys.argv[3]
 679     constraint(struct_type, member)
 680 elif sys.argv[1] == "test":
 681     filename = sys.argv[2]
 682     func = sys.argv[3]
 683     caller_info_values(filename, func)
 684 else:
 685     usage()