1 from collections import defaultdict
   2 import copy
   3 import json
   4 import sys
   5 import pprint
   6 
   7 from constants import (
   8     GLOBAL_BLACKLIST,
   9     IMPL_DEP_FILE_STR,
  10     OUTPUT_FILE_STR,
  11     SYSCALL_PREFIXES,
  12     ListType,
  13     hardcode_syscall_read_fields,
  14     hardcode_syscall_write_fields,
  15 )
  16 
  17 class Parser(object):
  18     def __init__(
  19         self,
  20         impl_dep_file_str=IMPL_DEP_FILE_STR,
  21         output_file_str=OUTPUT_FILE_STR,
  22         verbose=False,
  23         pretty=False
  24     ):
  25         try:
  26             self.impl_dep_file = file(impl_dep_file_str, 'r')
  27             self.output_file = file(output_file_str + '.json', 'w+')
  28             if verbose:
  29                 self.output_file_verbose = file(output_file_str + '_verbose.json', 'w+')
  30             if pretty:
  31                 self.pretty_output_file = file(output_file_str + '.pretty', 'w+')
  32                 self.pretty_output_file_verbose = file(output_file_str + '_verbose.pretty', 'w+')
  33         except IOError:
  34             sys.stderr.write("ERROR: Cannot open files %s %s.\n" % (impl_dep_file_str, output_file_str))
  35             sys.exit(1)
  36         self.verbose = verbose
  37         self.pretty = pretty
  38         self.syscall_read_fields = defaultdict(set)
  39         self.syscall_write_fields = defaultdict(set)
  40         self.implicit_dependencies = defaultdict(set)
  41         self.verbose_impl_dep = defaultdict(list)
  42         self.deref_counter = defaultdict(int)  # count which struct->members are most common
  43 
  44         for syscall,fields in hardcode_syscall_read_fields.iteritems():
  45             self.syscall_read_fields[syscall].update(set(fields))
  46 
  47         for syscall,fields in hardcode_syscall_write_fields.iteritems():
  48             self.syscall_write_fields[syscall].update(set(fields))
  49 
  50     def _sanitize_syscall(self, syscall):
  51         for prefix in SYSCALL_PREFIXES:
  52             if syscall.startswith(prefix):
  53                 return syscall[len(prefix):]
  54         return syscall
  55 
  56     def _deref_to_tuple(self, deref):
  57         """ (struct a)->b ==> (a,b) """
  58         struct, member = deref.split('->')
  59         struct = struct[1:-1]  # strip parens
  60         struct = struct.split(' ')[1]  # drop struct keyword
  61         return (struct, member)
  62 
  63     def _split_field(self, field):
  64         field = field.strip()
  65         field = field[1: -1]  # strip square brackets
  66         derefs = [struct.strip() for struct in field.strip().split(',') if struct]
  67         return map(
  68             lambda deref: self._deref_to_tuple(deref),
  69             derefs
  70         )
  71 
  72     def _sanitize_line(self, line):
  73         syscall_and_listtype, field = line.split(':')
  74         syscall, list_type = syscall_and_listtype.split(' ')
  75         syscall = self._sanitize_syscall(syscall)
  76         derefs = self._split_field(field)
  77         return syscall, list_type, derefs
  78 
  79     def _add_fields(self, syscall, list_type, derefs):
  80         if list_type == ListType.READ:
  81             d = self.syscall_read_fields
  82         elif list_type == ListType.WRITE:
  83             d = self.syscall_write_fields
  84         for deref in derefs:
  85             if deref in GLOBAL_BLACKLIST:  # ignore spammy structs
  86                 continue
  87             d[syscall].add(deref)
  88 
  89     def _construct_implicit_deps(self):
  90         """ just do a naive O(n^2) loop to see intersections between write_list and read_list """
  91         for this_call,read_fields in self.syscall_read_fields.iteritems():
  92             for that_call,write_fields in self.syscall_write_fields.iteritems():
  93                 if that_call == this_call:  # calls are obviously dependent on themselves. ignore.
  94                     continue
  95                 intersection = read_fields & write_fields
  96                 if intersection:
  97                     self.implicit_dependencies[this_call].add(that_call)
  98                 if intersection and self.verbose:
  99                     self.verbose_impl_dep[this_call].append({
 100                         'call': that_call,
 101                         'reason': intersection,
 102                     })
 103                     for deref in intersection:
 104                         self.deref_counter[deref] += 1
 105 
 106     def parse(self):
 107         for line in self.impl_dep_file:
 108             syscall, list_type, derefs = self._sanitize_line(line)
 109             self._add_fields(syscall, list_type, derefs)
 110         # pprint.pprint(dict(self.syscall_write_fields))
 111         # pprint.pprint(dict(self.syscall_read_fields))
 112         self._construct_implicit_deps()
 113         # pprint.pprint(dict(self.implicit_dependencies))
 114         # pprint.pprint(dict(self.verbose_impl_dep))
 115 
 116     def _listify_verbose_reason(self, reason):
 117         r = copy.deepcopy(reason)
 118         r['reason'] = list(r['reason'])
 119         r['reason'] = map(
 120             lambda (struct,field): struct + '->' + field,
 121             r['reason']
 122         )
 123         return r
 124 
 125     def _get_json_dependencies(self):
 126         implicit_dependencies = {}
 127         verbose_impl_dep = {}
 128         for call, dep_set in self.implicit_dependencies.iteritems():
 129             implicit_dependencies[call] = list(dep_set)
 130         for call, call_reasons in self.verbose_impl_dep.iteritems():
 131             verbose_impl_dep[call] = map(
 132                 lambda reason: self._listify_verbose_reason(reason),
 133                 call_reasons,
 134             )
 135         return implicit_dependencies, verbose_impl_dep
 136 
 137     def write(self):
 138         implicit_dependencies, verbose_impl_dep = self._get_json_dependencies()
 139         json.dump(implicit_dependencies, self.output_file)
 140         if self.verbose:
 141             json.dump(verbose_impl_dep, self.output_file_verbose)
 142         if self.pretty:
 143             pprint.pprint(dict(self.implicit_dependencies), self.pretty_output_file)
 144             pprint.pprint(dict(self.verbose_impl_dep), self.pretty_output_file_verbose)
 145         for deref, count in sorted(self.deref_counter.iteritems(), key=lambda (k,v): (v,k)):
 146             print "%s: %d" % (deref, count)
 147 
 148     def close(self):
 149         self.output_file.close()
 150         self.impl_dep_file.close()
 151         if self.verbose:
 152             self.output_file_verbose.close()