1 #
   2 # CDDL HEADER START
   3 #
   4 # The contents of this file are subject to the terms of the
   5 # Common Development and Distribution License (the "License").
   6 # You may not use this file except in compliance with the License.
   7 #
   8 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9 # or http://www.opensolaris.org/os/licensing.
  10 # See the License for the specific language governing permissions
  11 # and limitations under the License.
  12 #
  13 # When distributing Covered Code, include this CDDL HEADER in each
  14 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15 # If applicable, add the following below this CDDL HEADER, with the
  16 # fields enclosed by brackets "[]" replaced with your own identifying
  17 # information: Portions Copyright [yyyy] [name of copyright owner]
  18 #
  19 # CDDL HEADER END
  20 #
  21 
  22 #
  23 # Copyright 2016 Joyent, Inc.
  24 #
  25 
  26 import re, sys
  27 
  28 spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n'
  29 altMsg = '%s: Line %d contains "%s"; please use "%s" instead for consistency with other documentation\n'
  30 
  31 misspellings = {
  32         'absense': 'absence',
  33         'accessable': 'accessible',
  34         'accomodate': 'accommodate',
  35         'accomodation': 'accommodation',
  36         'accross': 'across',
  37         'acheive': 'achieve',
  38         'addional': 'additional',
  39         'addres': 'address',
  40         'admininistrative': 'administrative',
  41         'adminstered': 'administered',
  42         'adminstrate': 'administrate',
  43         'adminstration': 'administration',
  44         'adminstrative': 'administrative',
  45         'adminstrator': 'administrator',
  46         'admissability': 'admissibility',
  47         'adress': 'address',
  48         'adressable': 'addressable',
  49         'adressed': 'addressed',
  50         'adressing': 'addressing, dressing',
  51         'aginst': 'against',
  52         'agression': 'aggression',
  53         'agressive': 'aggressive',
  54         'alot': 'a lot, allot',
  55         'and and': 'and',
  56         'apparantly': 'apparently',
  57         'appearence': 'appearance',
  58         'arguement': 'argument',
  59         'assasination': 'assassination',
  60         'auxilliary': 'auxiliary',
  61         'basicly': 'basically',
  62         'begining': 'beginning',
  63         'belive': 'believe',
  64         'beteen': 'between',
  65         'betwen': 'between',
  66         'beween': 'between',
  67         'bewteen': 'between',
  68         'bizzare': 'bizarre',
  69         'buisness': 'business',
  70         'calender': 'calendar',
  71         'cemetary': 'cemetery',
  72         'chauffer': 'chauffeur',
  73         'collegue': 'colleague',
  74         'comming': 'coming',
  75         'commited': 'committed',
  76         'commitee': 'committee',
  77         'commiting': 'committing',
  78         'comparision': 'comparison',
  79         'comparisions': 'comparisons',
  80         'compatability': 'compatibility',
  81         'compatable': 'compatible',
  82         'compatablity': 'compatibility',
  83         'compatiable': 'compatible',
  84         'compatiblity': 'compatibility',
  85         'completly': 'completely',
  86         'concious': 'conscious',
  87         'condidtion': 'condition',
  88         'conected': 'connected',
  89         'conjuction': 'conjunction',
  90         'continous': 'continuous',
  91         'curiousity': 'curiosity',
  92         'deamon': 'daemon',
  93         'definately': 'definitely',
  94         'desireable': 'desirable',
  95         'diffrent': 'different',
  96         'dilemna': 'dilemma',
  97         'dissapear': 'disappear',
  98         'dissapoint': 'disappoint',
  99         'ecstacy': 'ecstasy',
 100         'embarass': 'embarrass',
 101         'enviroment': 'environment',
 102         'exept': 'except',
 103         'existance': 'existence',
 104         'familar': 'familiar',
 105         'finaly': 'finally',
 106         'folowing': 'following',
 107         'foriegn': 'foreign',
 108         'forseeable': 'foreseeable',
 109         'fourty': 'forty',
 110         'foward': 'forward',
 111         'freind': 'friend',
 112         'futher': 'further',
 113         'gaurd': 'guard',
 114         'glamourous': 'glamorous',
 115         'goverment': 'government',
 116         'happend': 'happened',
 117         'harrassment': 'harassment',
 118         'hierachical': 'hierarchical',
 119         'hierachies': 'hierarchies',
 120         'hierachy': 'hierarchy',
 121         'hierarcical': 'hierarchical',
 122         'hierarcy': 'hierarchy',
 123         'honourary': 'honorary',
 124         'humourous': 'humorous',
 125         'idiosyncracy': 'idiosyncrasy',
 126         'immediatly': 'immediately',
 127         'inaccessable': 'inaccessible',
 128         'inbetween': 'between',
 129         'incidently': 'incidentally',
 130         'independant': 'independent',
 131         'infomation': 'information',
 132         'interupt': 'interrupt',
 133         'intial': 'initial',
 134         'intially': 'initially',
 135         'irresistable': 'irresistible',
 136         'jist': 'gist',
 137         'knowlege': 'knowledge',
 138         'lenght': 'length',
 139         'liase': 'liaise',
 140         'liason': 'liaison',
 141         'libary': 'library',
 142         'maching': 'machine, marching, matching',
 143         'millenia': 'millennia',
 144         'millenium': 'millennium',
 145         'neccessary': 'necessary',
 146         'negotation': 'negotiation',
 147         'nontheless': 'nonetheless',
 148         'noticable': 'noticeable',
 149         'occassion': 'occasion',
 150         'occassional': 'occasional',
 151         'occassionally': 'occasionally',
 152         'occurance': 'occurrence',
 153         'occured': 'occurred',
 154         'occurence': 'occurrence',
 155         'occuring': 'occurring',
 156         'ommision': 'omission',
 157         'orginal': 'original',
 158         'orginally': 'originally',
 159         'ouput': 'output',
 160         'overriden': 'overridden',
 161         'particuliar': 'particular',
 162         'pavillion': 'pavilion',
 163         'peice': 'piece',
 164         'persistant': 'persistent',
 165         'politican': 'politician',
 166         'posession': 'possession',
 167         'possiblity': 'possibility',
 168         'preceed': 'precede',
 169         'preceeded': 'preceded',
 170         'preceeding': 'preceding',
 171         'preceeds': 'precedes',
 172         'prefered': 'preferred',
 173         'prefering': 'preferring',
 174         'presense': 'presence',
 175         'proces': 'process',
 176         'propoganda': 'propaganda',
 177         'psuedo': 'pseudo',
 178         'publically': 'publicly',
 179         'realy': 'really',
 180         'reciept': 'receipt',
 181         'recieve': 'receive',
 182         'recieved': 'received',
 183         'reciever': 'receiver',
 184         'recievers': 'receivers',
 185         'recieves': 'receives',
 186         'recieving': 'receiving',
 187         'recomend': 'recommend',
 188         'recomended': 'recommended',
 189         'recomending': 'recommending',
 190         'recomends': 'recommends',
 191         'recurse': 'recur',
 192         'recurses': 'recurs',
 193         'recursing': 'recurring',
 194         'refered': 'referred',
 195         'refering': 'referring',
 196         'religous': 'religious',
 197         'rember': 'remember',
 198         'remeber': 'remember',
 199         'repetion': 'repetition',
 200         'reponsible': 'responsible',
 201         'resistence': 'resistance',
 202         'retreive': 'retrieve',
 203         'seige': 'siege',
 204         'sence': 'since',
 205         'seperate': 'separate',
 206         'seperated': 'separated',
 207         'seperately': 'separately',
 208         'seperates': 'separates',
 209         'similiar': 'similar',
 210         'somwhere': 'somewhere',
 211         'sould': 'could, should, sold, soul',
 212         'sturcture': 'structure',
 213         'succesful': 'successful',
 214         'succesfully': 'successfully',
 215         'successfull': 'successful',
 216         'sucessful': 'successful',
 217         'supercede': 'supersede',
 218         'supress': 'suppress',
 219         'supressed': 'suppressed',
 220         'suprise': 'surprise',
 221         'suprisingly': 'surprisingly',
 222         'sytem': 'system',
 223         'tendancy': 'tendency',
 224         'the the': 'the',
 225         'the these': 'these',
 226         'therefor': 'therefore',
 227         'threshhold': 'threshold',
 228         'tolerence': 'tolerance',
 229         'tommorow': 'tomorrow',
 230         'tommorrow': 'tomorrow',
 231         'tounge': 'tongue',
 232         'tranformed': 'transformed',
 233         'transfered': 'transferred',
 234         'truely': 'truly',
 235         'trustworthyness': 'trustworthiness',
 236         'uncommited': 'uncommitted',
 237         'unforseen': 'unforeseen',
 238         'unfortunatly': 'unfortunately',
 239         'unsuccessfull': 'unsuccessful',
 240         'untill': 'until',
 241         'upto': 'up to',
 242         'whereever': 'wherever',
 243         'wich': 'which',
 244         'wierd': 'weird',
 245         'wtih': 'with',
 246 }
 247 
 248 alternates = {
 249         'parseable': 'parsable',
 250         'sub-command': 'subcommand',
 251         'sub-commands': 'subcommands',
 252         'writeable': 'writable'
 253 }
 254 
 255 misspellingREs = []
 256 alternateREs = []
 257 
 258 for misspelling, correct in misspellings.iteritems():
 259         regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE)
 260         entry = (regex, misspelling, correct)
 261         misspellingREs.append(entry)
 262 
 263 for alternate, correct in alternates.iteritems():
 264         regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE)
 265         entry = (regex, alternate, correct)
 266         alternateREs.append(entry)
 267 
 268 def check(errmsg, output, filename, line, lineno, entry):
 269         if entry[0].search(line):
 270                 output.write(errmsg % (filename, lineno, entry[1], entry[2]))
 271                 return 1
 272         else:
 273                 return 0
 274 
 275 def spellcheck(fh, filename=None, output=sys.stderr, **opts):
 276         lineno = 1
 277         ret = 0
 278 
 279         if not filename:
 280                 filename = fh.name
 281 
 282         fh.seek(0)
 283         for line in fh:
 284                 for entry in misspellingREs:
 285                         ret |= check(spellMsg, output, filename, line,
 286                             lineno, entry)
 287                 for entry in alternateREs:
 288                         ret |= check(altMsg, output, filename, line,
 289                             lineno, entry)
 290                 lineno += 1
 291 
 292         return ret