10057 Man page misspellings ouput particuliar overriden
Reviewed by: Gergő Mihály Doma <domag02@gmail.com>
1 #
2 # CDDL HEADER START
3 #
4 # The contents of this file are subject to the terms of the
5 # Common Development and Distribution License (the "License").
6 # You may not use this file except in compliance with the License.
7 #
8 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 # or http://www.opensolaris.org/os/licensing.
10 # See the License for the specific language governing permissions
11 # and limitations under the License.
12 #
13 # When distributing Covered Code, include this CDDL HEADER in each
14 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 # If applicable, add the following below this CDDL HEADER, with the
16 # fields enclosed by brackets "[]" replaced with your own identifying
17 # information: Portions Copyright [yyyy] [name of copyright owner]
18 #
19 # CDDL HEADER END
20 #
21
22 #
23 # Copyright 2016 Joyent, Inc.
24 #
25
26 import re, sys
27
28 spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n'
29 altMsg = '%s: Line %d contains "%s"; please use "%s" instead for consistency with other documentation\n'
30
31 misspellings = {
32 'absense': 'absence',
33 'accessable': 'accessible',
34 'accomodate': 'accommodate',
35 'accomodation': 'accommodation',
36 'accross': 'across',
37 'acheive': 'achieve',
38 'addional': 'additional',
39 'addres': 'address',
40 'admininistrative': 'administrative',
41 'adminstered': 'administered',
42 'adminstrate': 'administrate',
43 'adminstration': 'administration',
44 'adminstrative': 'administrative',
45 'adminstrator': 'administrator',
46 'admissability': 'admissibility',
47 'adress': 'address',
48 'adressable': 'addressable',
49 'adressed': 'addressed',
50 'adressing': 'addressing, dressing',
51 'aginst': 'against',
52 'agression': 'aggression',
53 'agressive': 'aggressive',
54 'alot': 'a lot, allot',
55 'and and': 'and',
56 'apparantly': 'apparently',
57 'appearence': 'appearance',
58 'arguement': 'argument',
59 'assasination': 'assassination',
60 'auxilliary': 'auxiliary',
61 'basicly': 'basically',
62 'begining': 'beginning',
63 'belive': 'believe',
64 'beteen': 'between',
65 'betwen': 'between',
66 'beween': 'between',
67 'bewteen': 'between',
68 'bizzare': 'bizarre',
69 'buisness': 'business',
70 'calender': 'calendar',
71 'cemetary': 'cemetery',
72 'chauffer': 'chauffeur',
73 'collegue': 'colleague',
74 'comming': 'coming',
75 'commited': 'committed',
76 'commitee': 'committee',
77 'commiting': 'committing',
78 'comparision': 'comparison',
79 'comparisions': 'comparisons',
80 'compatability': 'compatibility',
81 'compatable': 'compatible',
82 'compatablity': 'compatibility',
83 'compatiable': 'compatible',
84 'compatiblity': 'compatibility',
85 'completly': 'completely',
86 'concious': 'conscious',
87 'condidtion': 'condition',
88 'conected': 'connected',
89 'conjuction': 'conjunction',
90 'continous': 'continuous',
91 'curiousity': 'curiosity',
92 'deamon': 'daemon',
93 'definately': 'definitely',
94 'desireable': 'desirable',
95 'diffrent': 'different',
96 'dilemna': 'dilemma',
97 'dissapear': 'disappear',
98 'dissapoint': 'disappoint',
99 'ecstacy': 'ecstasy',
100 'embarass': 'embarrass',
101 'enviroment': 'environment',
102 'exept': 'except',
103 'existance': 'existence',
104 'familar': 'familiar',
105 'finaly': 'finally',
106 'folowing': 'following',
107 'foriegn': 'foreign',
108 'forseeable': 'foreseeable',
109 'fourty': 'forty',
110 'foward': 'forward',
111 'freind': 'friend',
112 'futher': 'further',
113 'gaurd': 'guard',
114 'glamourous': 'glamorous',
115 'goverment': 'government',
116 'happend': 'happened',
117 'harrassment': 'harassment',
118 'hierachical': 'hierarchical',
119 'hierachies': 'hierarchies',
120 'hierachy': 'hierarchy',
121 'hierarcical': 'hierarchical',
122 'hierarcy': 'hierarchy',
123 'honourary': 'honorary',
124 'humourous': 'humorous',
125 'idiosyncracy': 'idiosyncrasy',
126 'immediatly': 'immediately',
127 'inaccessable': 'inaccessible',
128 'inbetween': 'between',
129 'incidently': 'incidentally',
130 'independant': 'independent',
131 'infomation': 'information',
132 'interupt': 'interrupt',
133 'intial': 'initial',
134 'intially': 'initially',
135 'irresistable': 'irresistible',
136 'jist': 'gist',
137 'knowlege': 'knowledge',
138 'lenght': 'length',
139 'liase': 'liaise',
140 'liason': 'liaison',
141 'libary': 'library',
142 'maching': 'machine, marching, matching',
143 'millenia': 'millennia',
144 'millenium': 'millennium',
145 'neccessary': 'necessary',
146 'negotation': 'negotiation',
147 'nontheless': 'nonetheless',
148 'noticable': 'noticeable',
149 'occassion': 'occasion',
150 'occassional': 'occasional',
151 'occassionally': 'occasionally',
152 'occurance': 'occurrence',
153 'occured': 'occurred',
154 'occurence': 'occurrence',
155 'occuring': 'occurring',
156 'ommision': 'omission',
157 'orginal': 'original',
158 'orginally': 'originally',
159 'ouput': 'output',
160 'overriden': 'overridden',
161 'particuliar': 'particular',
162 'pavillion': 'pavilion',
163 'peice': 'piece',
164 'persistant': 'persistent',
165 'politican': 'politician',
166 'posession': 'possession',
167 'possiblity': 'possibility',
168 'preceed': 'precede',
169 'preceeded': 'preceded',
170 'preceeding': 'preceding',
171 'preceeds': 'precedes',
172 'prefered': 'preferred',
173 'prefering': 'preferring',
174 'presense': 'presence',
175 'proces': 'process',
176 'propoganda': 'propaganda',
177 'psuedo': 'pseudo',
178 'publically': 'publicly',
179 'realy': 'really',
180 'reciept': 'receipt',
181 'recieve': 'receive',
182 'recieved': 'received',
183 'reciever': 'receiver',
184 'recievers': 'receivers',
185 'recieves': 'receives',
186 'recieving': 'receiving',
187 'recomend': 'recommend',
188 'recomended': 'recommended',
189 'recomending': 'recommending',
190 'recomends': 'recommends',
191 'recurse': 'recur',
192 'recurses': 'recurs',
193 'recursing': 'recurring',
194 'refered': 'referred',
195 'refering': 'referring',
196 'religous': 'religious',
197 'rember': 'remember',
198 'remeber': 'remember',
199 'repetion': 'repetition',
200 'reponsible': 'responsible',
201 'resistence': 'resistance',
202 'retreive': 'retrieve',
203 'seige': 'siege',
204 'sence': 'since',
205 'seperate': 'separate',
206 'seperated': 'separated',
207 'seperately': 'separately',
208 'seperates': 'separates',
209 'similiar': 'similar',
210 'somwhere': 'somewhere',
211 'sould': 'could, should, sold, soul',
212 'sturcture': 'structure',
213 'succesful': 'successful',
214 'succesfully': 'successfully',
215 'successfull': 'successful',
216 'sucessful': 'successful',
217 'supercede': 'supersede',
218 'supress': 'suppress',
219 'supressed': 'suppressed',
220 'suprise': 'surprise',
221 'suprisingly': 'surprisingly',
222 'sytem': 'system',
223 'tendancy': 'tendency',
224 'the the': 'the',
225 'the these': 'these',
226 'therefor': 'therefore',
227 'threshhold': 'threshold',
228 'tolerence': 'tolerance',
229 'tommorow': 'tomorrow',
230 'tommorrow': 'tomorrow',
231 'tounge': 'tongue',
232 'tranformed': 'transformed',
233 'transfered': 'transferred',
234 'truely': 'truly',
235 'trustworthyness': 'trustworthiness',
236 'uncommited': 'uncommitted',
237 'unforseen': 'unforeseen',
238 'unfortunatly': 'unfortunately',
239 'unsuccessfull': 'unsuccessful',
240 'untill': 'until',
241 'upto': 'up to',
242 'whereever': 'wherever',
243 'wich': 'which',
244 'wierd': 'weird',
245 'wtih': 'with',
246 }
247
248 alternates = {
249 'parseable': 'parsable',
250 'sub-command': 'subcommand',
251 'sub-commands': 'subcommands',
252 'writeable': 'writable'
253 }
254
255 misspellingREs = []
256 alternateREs = []
257
258 for misspelling, correct in misspellings.iteritems():
259 regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE)
260 entry = (regex, misspelling, correct)
261 misspellingREs.append(entry)
262
263 for alternate, correct in alternates.iteritems():
264 regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE)
265 entry = (regex, alternate, correct)
266 alternateREs.append(entry)
267
268 def check(errmsg, output, filename, line, lineno, entry):
269 if entry[0].search(line):
270 output.write(errmsg % (filename, lineno, entry[1], entry[2]))
271 return 1
272 else:
273 return 0
274
275 def spellcheck(fh, filename=None, output=sys.stderr, **opts):
276 lineno = 1
277 ret = 0
278
279 if not filename:
280 filename = fh.name
281
282 fh.seek(0)
283 for line in fh:
284 for entry in misspellingREs:
285 ret |= check(spellMsg, output, filename, line,
286 lineno, entry)
287 for entry in alternateREs:
288 ret |= check(altMsg, output, filename, line,
289 lineno, entry)
290 lineno += 1
291
292 return ret
--- EOF ---