1 #
   2 # CDDL HEADER START
   3 #
   4 # The contents of this file are subject to the terms of the
   5 # Common Development and Distribution License (the "License").
   6 # You may not use this file except in compliance with the License.
   7 #
   8 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9 # or http://www.opensolaris.org/os/licensing.
  10 # See the License for the specific language governing permissions
  11 # and limitations under the License.
  12 #
  13 # When distributing Covered Code, include this CDDL HEADER in each
  14 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15 # If applicable, add the following below this CDDL HEADER, with the
  16 # fields enclosed by brackets "[]" replaced with your own identifying
  17 # information: Portions Copyright [yyyy] [name of copyright owner]
  18 #
  19 # CDDL HEADER END
  20 #
  21 
  22 #
  23 # Copyright 2016 Joyent, Inc.
  24 #
  25 
  26 import re, sys
  27 
  28 spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n'
  29 altMsg = '%s: Line %d contains "%s"; please use "%s" instead for consistency with other documentation\n'
  30 
  31 misspellings = {
  32         'absense': 'absence',
  33         'accessable': 'accessible',
  34         'accomodate': 'accommodate',
  35         'accomodation': 'accommodation',
  36         'accross': 'across',
  37         'acheive': 'achieve',
  38         'addional': 'additional',
  39         'addres': 'address',
  40         'admininistrative': 'administrative',
  41         'adminstered': 'administered',
  42         'adminstrate': 'administrate',
  43         'adminstration': 'administration',
  44         'adminstrative': 'administrative',
  45         'adminstrator': 'administrator',
  46         'admissability': 'admissibility',
  47         'adress': 'address',
  48         'adressable': 'addressable',
  49         'adressed': 'addressed',
  50         'adressing': 'addressing, dressing',
  51         'aginst': 'against',
  52         'agression': 'aggression',
  53         'agressive': 'aggressive',
  54         'alot': 'a lot, allot',
  55         'and and': 'and',
  56         'apparantly': 'apparently',
  57         'appearence': 'appearance',
  58         'arguement': 'argument',
  59         'assasination': 'assassination',
  60         'auxilliary': 'auxiliary',
  61         'basicly': 'basically',
  62         'begining': 'beginning',
  63         'belive': 'believe',
  64         'beteen': 'between',
  65         'betwen': 'between',
  66         'beween': 'between',
  67         'bewteen': 'between',
  68         'bizzare': 'bizarre',
  69         'buisness': 'business',
  70         'calender': 'calendar',
  71         'cemetary': 'cemetery',
  72         'chauffer': 'chauffeur',
  73         'collegue': 'colleague',
  74         'comming': 'coming',
  75         'commited': 'committed',
  76         'commitee': 'committee',
  77         'commiting': 'committing',
  78         'comparision': 'comparison',
  79         'comparisions': 'comparisons',
  80         'compatability': 'compatibility',
  81         'compatable': 'compatible',
  82         'compatablity': 'compatibility',
  83         'compatiable': 'compatible',
  84         'compatiblity': 'compatibility',
  85         'completly': 'completely',
  86         'concious': 'conscious',
  87         'condidtion': 'condition',
  88         'conected': 'connected',
  89         'conjuction': 'conjunction',
  90         'continous': 'continuous',
  91         'curiousity': 'curiosity',
  92         'deamon': 'daemon',
  93         'definately': 'definitely',
  94         'desireable': 'desirable',
  95         'diffrent': 'different',
  96         'dilemna': 'dilemma',
  97         'dissapear': 'disappear',
  98         'dissapoint': 'disappoint',
  99         'ecstacy': 'ecstasy',
 100         'embarass': 'embarrass',
 101         'enviroment': 'environment',
 102         'exept': 'except',
 103         'existance': 'existence',
 104         'familar': 'familiar',
 105         'finaly': 'finally',
 106         'folowing': 'following',
 107         'foriegn': 'foreign',
 108         'forseeable': 'foreseeable',
 109         'fourty': 'forty',
 110         'foward': 'forward',
 111         'freind': 'friend',
 112         'futher': 'further',
 113         'gaurd': 'guard',
 114         'glamourous': 'glamorous',
 115         'goverment': 'government',
 116         'happend': 'happened',
 117         'harrassment': 'harassment',
 118         'hierachical': 'hierarchical',
 119         'hierachies': 'hierarchies',
 120         'hierachy': 'hierarchy',
 121         'hierarcical': 'hierarchical',
 122         'hierarcy': 'hierarchy',
 123         'honourary': 'honorary',
 124         'humourous': 'humorous',
 125         'idiosyncracy': 'idiosyncrasy',
 126         'immediatly': 'immediately',
 127         'inaccessable': 'inaccessible',
 128         'inbetween': 'between',
 129         'incidently': 'incidentally',
 130         'independant': 'independent',
 131         'infomation': 'information',
 132         'interupt': 'interrupt',
 133         'intial': 'initial',
 134         'intially': 'initially',
 135         'irresistable': 'irresistible',
 136         'jist': 'gist',
 137         'knowlege': 'knowledge',
 138         'lenght': 'length',
 139         'liase': 'liaise',
 140         'liason': 'liaison',
 141         'libary': 'library',
 142         'maching': 'machine, marching, matching',
 143         'millenia': 'millennia',
 144         'millenium': 'millennium',
 145         'neccessary': 'necessary',
 146         'negotation': 'negotiation',
 147         'nontheless': 'nonetheless',
 148         'noticable': 'noticeable',
 149         'occassion': 'occasion',
 150         'occassional': 'occasional',
 151         'occassionally': 'occasionally',
 152         'occurance': 'occurrence',
 153         'occured': 'occurred',
 154         'occurence': 'occurrence',
 155         'occuring': 'occurring',
 156         'ommision': 'omission',
 157         'orginal': 'original',
 158         'orginally': 'originally',
 159         'pavillion': 'pavilion',
 160         'peice': 'piece',
 161         'persistant': 'persistent',
 162         'politican': 'politician',
 163         'posession': 'possession',
 164         'possiblity': 'possibility',
 165         'preceed': 'precede',
 166         'preceeded': 'preceded',
 167         'preceeding': 'preceding',
 168         'preceeds': 'precedes',
 169         'prefered': 'preferred',
 170         'prefering': 'preferring',
 171         'presense': 'presence',
 172         'proces': 'process',
 173         'propoganda': 'propaganda',
 174         'psuedo': 'pseudo',
 175         'publically': 'publicly',
 176         'realy': 'really',
 177         'reciept': 'receipt',
 178         'recieve': 'receive',
 179         'recieved': 'received',
 180         'reciever': 'receiver',
 181         'recievers': 'receivers',
 182         'recieves': 'receives',
 183         'recieving': 'receiving',
 184         'recomend': 'recommend',
 185         'recomended': 'recommended',
 186         'recomending': 'recommending',
 187         'recomends': 'recommends',
 188         'recurse': 'recur',
 189         'recurses': 'recurs',
 190         'recursing': 'recurring',
 191         'refered': 'referred',
 192         'refering': 'referring',
 193         'religous': 'religious',
 194         'rember': 'remember',
 195         'remeber': 'remember',
 196         'repetion': 'repetition',
 197         'reponsible': 'responsible',
 198         'resistence': 'resistance',
 199         'retreive': 'retrieve',
 200         'seige': 'siege',
 201         'sence': 'since',
 202         'seperate': 'separate',
 203         'seperated': 'separated',
 204         'seperately': 'separately',
 205         'seperates': 'separates',
 206         'similiar': 'similar',
 207         'somwhere': 'somewhere',
 208         'sould': 'could, should, sold, soul',
 209         'sturcture': 'structure',
 210         'succesful': 'successful',
 211         'succesfully': 'successfully',
 212         'successfull': 'successful',
 213         'sucessful': 'successful',
 214         'supercede': 'supersede',
 215         'supress': 'suppress',
 216         'supressed': 'suppressed',
 217         'suprise': 'surprise',
 218         'suprisingly': 'surprisingly',
 219         'sytem': 'system',
 220         'tendancy': 'tendency',
 221         'the the': 'the',
 222         'the these': 'these',
 223         'therefor': 'therefore',
 224         'threshhold': 'threshold',
 225         'tolerence': 'tolerance',
 226         'tommorow': 'tomorrow',
 227         'tommorrow': 'tomorrow',
 228         'tounge': 'tongue',
 229         'tranformed': 'transformed',
 230         'transfered': 'transferred',
 231         'truely': 'truly',
 232         'trustworthyness': 'trustworthiness',
 233         'uncommited': 'uncommitted',
 234         'unforseen': 'unforeseen',
 235         'unfortunatly': 'unfortunately',
 236         'unsuccessfull': 'unsuccessful',
 237         'untill': 'until',
 238         'upto': 'up to',
 239         'whereever': 'wherever',
 240         'wich': 'which',
 241         'wierd': 'weird',
 242         'wtih': 'with',
 243 }
 244 
 245 alternates = {
 246         'parseable': 'parsable',
 247         'sub-command': 'subcommand',
 248         'sub-commands': 'subcommands',
 249         'writeable': 'writable'
 250 }
 251 
 252 misspellingREs = []
 253 alternateREs = []
 254 
 255 for misspelling, correct in misspellings.iteritems():
 256         regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE)
 257         entry = (regex, misspelling, correct)
 258         misspellingREs.append(entry)
 259 
 260 for alternate, correct in alternates.iteritems():
 261         regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE)
 262         entry = (regex, alternate, correct)
 263         alternateREs.append(entry)
 264 
 265 def check(errmsg, output, filename, line, lineno, entry):
 266         if entry[0].search(line):
 267                 output.write(errmsg % (filename, lineno, entry[1], entry[2]))
 268                 return 1
 269         else:
 270                 return 0
 271 
 272 def spellcheck(fh, filename=None, output=sys.stderr, **opts):
 273         lineno = 1
 274         ret = 0
 275 
 276         if not filename:
 277                 filename = fh.name
 278 
 279         fh.seek(0)
 280         for line in fh:
 281                 for entry in misspellingREs:
 282                         ret |= check(spellMsg, output, filename, line,
 283                             lineno, entry)
 284                 for entry in alternateREs:
 285                         ret |= check(altMsg, output, filename, line,
 286                             lineno, entry)
 287                 lineno += 1
 288 
 289         return ret