1 # 2 # CDDL HEADER START 3 # 4 # The contents of this file are subject to the terms of the 5 # Common Development and Distribution License (the "License"). 6 # You may not use this file except in compliance with the License. 7 # 8 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 # or http://www.opensolaris.org/os/licensing. 10 # See the License for the specific language governing permissions 11 # and limitations under the License. 12 # 13 # When distributing Covered Code, include this CDDL HEADER in each 14 # file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 # If applicable, add the following below this CDDL HEADER, with the 16 # fields enclosed by brackets "[]" replaced with your own identifying 17 # information: Portions Copyright [yyyy] [name of copyright owner] 18 # 19 # CDDL HEADER END 20 # 21 22 # 23 # Copyright 2016 Joyent, Inc. 24 # 25 26 import re, sys 27 28 spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n' 29 altMsg = '%s: Line %d contains "%s"; please use "%s" instead for consistency with other documentation\n' 30 31 misspellings = { 32 'absense': 'absence', 33 'accessable': 'accessible', 34 'accomodate': 'accommodate', 35 'accomodation': 'accommodation', 36 'accross': 'across', 37 'acheive': 'achieve', 38 'addional': 'additional', 39 'addres': 'address', 40 'admininistrative': 'administrative', 41 'adminstered': 'administered', 42 'adminstrate': 'administrate', 43 'adminstration': 'administration', 44 'adminstrative': 'administrative', 45 'adminstrator': 'administrator', 46 'admissability': 'admissibility', 47 'adress': 'address', 48 'adressable': 'addressable', 49 'adressed': 'addressed', 50 'adressing': 'addressing, dressing', 51 'aginst': 'against', 52 'agression': 'aggression', 53 'agressive': 'aggressive', 54 'alot': 'a lot, allot', 55 'and and': 'and', 56 'apparantly': 'apparently', 57 'appearence': 'appearance', 58 'arguement': 'argument', 59 'assasination': 'assassination', 60 'auxilliary': 'auxiliary', 61 'basicly': 'basically', 62 'begining': 'beginning', 63 'belive': 'believe', 64 'beteen': 'between', 65 'betwen': 'between', 66 'beween': 'between', 67 'bewteen': 'between', 68 'bizzare': 'bizarre', 69 'buisness': 'business', 70 'calender': 'calendar', 71 'cemetary': 'cemetery', 72 'chauffer': 'chauffeur', 73 'collegue': 'colleague', 74 'comming': 'coming', 75 'commited': 'committed', 76 'commitee': 'committee', 77 'commiting': 'committing', 78 'comparision': 'comparison', 79 'comparisions': 'comparisons', 80 'compatability': 'compatibility', 81 'compatable': 'compatible', 82 'compatablity': 'compatibility', 83 'compatiable': 'compatible', 84 'compatiblity': 'compatibility', 85 'completly': 'completely', 86 'concious': 'conscious', 87 'condidtion': 'condition', 88 'conected': 'connected', 89 'conjuction': 'conjunction', 90 'continous': 'continuous', 91 'curiousity': 'curiosity', 92 'deamon': 'daemon', 93 'definately': 'definitely', 94 'desireable': 'desirable', 95 'diffrent': 'different', 96 'dilemna': 'dilemma', 97 'dissapear': 'disappear', 98 'dissapoint': 'disappoint', 99 'ecstacy': 'ecstasy', 100 'embarass': 'embarrass', 101 'enviroment': 'environment', 102 'exept': 'except', 103 'existance': 'existence', 104 'familar': 'familiar', 105 'finaly': 'finally', 106 'folowing': 'following', 107 'foriegn': 'foreign', 108 'forseeable': 'foreseeable', 109 'fourty': 'forty', 110 'foward': 'forward', 111 'freind': 'friend', 112 'futher': 'further', 113 'gaurd': 'guard', 114 'glamourous': 'glamorous', 115 'goverment': 'government', 116 'happend': 'happened', 117 'harrassment': 'harassment', 118 'hierachical': 'hierarchical', 119 'hierachies': 'hierarchies', 120 'hierachy': 'hierarchy', 121 'hierarcical': 'hierarchical', 122 'hierarcy': 'hierarchy', 123 'honourary': 'honorary', 124 'humourous': 'humorous', 125 'idiosyncracy': 'idiosyncrasy', 126 'immediatly': 'immediately', 127 'inaccessable': 'inaccessible', 128 'inbetween': 'between', 129 'incidently': 'incidentally', 130 'independant': 'independent', 131 'infomation': 'information', 132 'interupt': 'interrupt', 133 'intial': 'initial', 134 'intially': 'initially', 135 'irresistable': 'irresistible', 136 'jist': 'gist', 137 'knowlege': 'knowledge', 138 'lenght': 'length', 139 'liase': 'liaise', 140 'liason': 'liaison', 141 'libary': 'library', 142 'maching': 'machine, marching, matching', 143 'millenia': 'millennia', 144 'millenium': 'millennium', 145 'neccessary': 'necessary', 146 'negotation': 'negotiation', 147 'nontheless': 'nonetheless', 148 'noticable': 'noticeable', 149 'occassion': 'occasion', 150 'occassional': 'occasional', 151 'occassionally': 'occasionally', 152 'occurance': 'occurrence', 153 'occured': 'occurred', 154 'occurence': 'occurrence', 155 'occuring': 'occurring', 156 'ommision': 'omission', 157 'orginal': 'original', 158 'orginally': 'originally', 159 'ouput': 'output', 160 'overriden': 'overridden', 161 'particuliar': 'particular', 162 'pavillion': 'pavilion', 163 'peice': 'piece', 164 'persistant': 'persistent', 165 'politican': 'politician', 166 'posession': 'possession', 167 'possiblity': 'possibility', 168 'preceed': 'precede', 169 'preceeded': 'preceded', 170 'preceeding': 'preceding', 171 'preceeds': 'precedes', 172 'prefered': 'preferred', 173 'prefering': 'preferring', 174 'presense': 'presence', 175 'proces': 'process', 176 'propoganda': 'propaganda', 177 'psuedo': 'pseudo', 178 'publically': 'publicly', 179 'realy': 'really', 180 'reciept': 'receipt', 181 'recieve': 'receive', 182 'recieved': 'received', 183 'reciever': 'receiver', 184 'recievers': 'receivers', 185 'recieves': 'receives', 186 'recieving': 'receiving', 187 'recomend': 'recommend', 188 'recomended': 'recommended', 189 'recomending': 'recommending', 190 'recomends': 'recommends', 191 'recurse': 'recur', 192 'recurses': 'recurs', 193 'recursing': 'recurring', 194 'refered': 'referred', 195 'refering': 'referring', 196 'religous': 'religious', 197 'rember': 'remember', 198 'remeber': 'remember', 199 'repetion': 'repetition', 200 'reponsible': 'responsible', 201 'resistence': 'resistance', 202 'retreive': 'retrieve', 203 'seige': 'siege', 204 'sence': 'since', 205 'seperate': 'separate', 206 'seperated': 'separated', 207 'seperately': 'separately', 208 'seperates': 'separates', 209 'similiar': 'similar', 210 'somwhere': 'somewhere', 211 'sould': 'could, should, sold, soul', 212 'sturcture': 'structure', 213 'succesful': 'successful', 214 'succesfully': 'successfully', 215 'successfull': 'successful', 216 'sucessful': 'successful', 217 'supercede': 'supersede', 218 'supress': 'suppress', 219 'supressed': 'suppressed', 220 'suprise': 'surprise', 221 'suprisingly': 'surprisingly', 222 'sytem': 'system', 223 'tendancy': 'tendency', 224 'the the': 'the', 225 'the these': 'these', 226 'therefor': 'therefore', 227 'threshhold': 'threshold', 228 'tolerence': 'tolerance', 229 'tommorow': 'tomorrow', 230 'tommorrow': 'tomorrow', 231 'tounge': 'tongue', 232 'tranformed': 'transformed', 233 'transfered': 'transferred', 234 'truely': 'truly', 235 'trustworthyness': 'trustworthiness', 236 'uncommited': 'uncommitted', 237 'unforseen': 'unforeseen', 238 'unfortunatly': 'unfortunately', 239 'unsuccessfull': 'unsuccessful', 240 'untill': 'until', 241 'upto': 'up to', 242 'whereever': 'wherever', 243 'wich': 'which', 244 'wierd': 'weird', 245 'wtih': 'with', 246 } 247 248 alternates = { 249 'parseable': 'parsable', 250 'sub-command': 'subcommand', 251 'sub-commands': 'subcommands', 252 'writeable': 'writable' 253 } 254 255 misspellingREs = [] 256 alternateREs = [] 257 258 for misspelling, correct in misspellings.iteritems(): 259 regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE) 260 entry = (regex, misspelling, correct) 261 misspellingREs.append(entry) 262 263 for alternate, correct in alternates.iteritems(): 264 regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE) 265 entry = (regex, alternate, correct) 266 alternateREs.append(entry) 267 268 def check(errmsg, output, filename, line, lineno, entry): 269 if entry[0].search(line): 270 output.write(errmsg % (filename, lineno, entry[1], entry[2])) 271 return 1 272 else: 273 return 0 274 275 def spellcheck(fh, filename=None, output=sys.stderr, **opts): 276 lineno = 1 277 ret = 0 278 279 if not filename: 280 filename = fh.name 281 282 fh.seek(0) 283 for line in fh: 284 for entry in misspellingREs: 285 ret |= check(spellMsg, output, filename, line, 286 lineno, entry) 287 for entry in alternateREs: 288 ret |= check(altMsg, output, filename, line, 289 lineno, entry) 290 lineno += 1 291 292 return ret