Print this page
10907 hot_patch_kernel_text() has no respect for boundaries
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/subr.c
+++ new/usr/src/uts/common/os/subr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 +/*
27 + * Copyright 2019 Joyent, Inc.
28 + */
29 +
26 30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 -/* All Rights Reserved */
31 +/* All Rights Reserved */
28 32
29 33 #include <sys/types.h>
30 34 #include <sys/sysmacros.h>
31 35 #include <sys/param.h>
32 36 #include <sys/vmparam.h>
33 37 #include <sys/systm.h>
34 38 #include <sys/cred.h>
35 39 #include <sys/user.h>
36 40 #include <sys/proc.h>
37 41 #include <sys/conf.h>
38 42 #include <sys/tuneable.h>
39 43 #include <sys/cpuvar.h>
40 44 #include <sys/archsystm.h>
41 45 #include <sys/vmem.h>
42 46 #include <vm/seg_kmem.h>
43 47 #include <sys/errno.h>
44 48 #include <sys/cmn_err.h>
45 49 #include <sys/debug.h>
46 50 #include <sys/atomic.h>
47 51 #include <sys/model.h>
48 52 #include <sys/kmem.h>
49 53 #include <sys/memlist.h>
50 54 #include <sys/autoconf.h>
51 55 #include <sys/ontrap.h>
52 56 #include <sys/utsname.h>
53 57 #include <sys/zone.h>
54 58
55 59 #ifdef __sparc
56 60 #include <sys/membar.h>
57 61 #endif
58 62
59 63 /*
60 64 * Routine which sets a user error; placed in
61 65 * illegal entries in the bdevsw and cdevsw tables.
62 66 */
63 67
64 68 int
65 69 nodev()
66 70 {
67 71 return (curthread->t_lwp ?
68 72 ttolwp(curthread)->lwp_error = ENXIO : ENXIO);
69 73 }
70 74
71 75 /*
72 76 * Null routine; placed in insignificant entries
73 77 * in the bdevsw and cdevsw tables.
74 78 */
75 79
76 80 int
77 81 nulldev()
78 82 {
79 83 return (0);
80 84 }
81 85
82 86 static kmutex_t udevlock;
83 87
84 88 /*
85 89 * Generate an unused major device number.
86 90 */
87 91 major_t
88 92 getudev()
89 93 {
90 94 static major_t next = 0;
91 95 major_t ret;
92 96
93 97 /*
94 98 * Ensure that we start allocating major numbers above the 'devcnt'
95 99 * count. The only limit we place on the number is that it should be a
96 100 * legal 32-bit SVR4 major number and be greater than or equal to devcnt
97 101 * in the current system).
98 102 */
99 103 mutex_enter(&udevlock);
100 104 if (next == 0)
101 105 next = devcnt;
102 106 if (next <= L_MAXMAJ32 && next >= devcnt)
103 107 ret = next++;
104 108 else {
105 109 /*
106 110 * If we fail to allocate a major number because devcnt has
107 111 * reached L_MAXMAJ32, we may be the victim of a sparsely
108 112 * populated devnames array. We scan the array backwards
109 113 * looking for an empty slot; if we find one, mark it as
110 114 * DN_GETUDEV so it doesn't get taken by subsequent consumers
111 115 * users of the devnames array, and issue a warning.
112 116 * It is vital for this routine to take drastic measures to
113 117 * succeed, since the kernel really needs it to boot.
114 118 */
115 119 int i;
116 120 for (i = devcnt - 1; i >= 0; i--) {
117 121 LOCK_DEV_OPS(&devnamesp[i].dn_lock);
118 122 if (devnamesp[i].dn_name == NULL &&
119 123 ((devnamesp[i].dn_flags & DN_TAKEN_GETUDEV) == 0))
120 124 break;
121 125 UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
122 126 }
123 127 if (i != -1) {
124 128 cmn_err(CE_WARN, "Reusing device major number %d.", i);
125 129 ASSERT(i >= 0 && i < devcnt);
126 130 devnamesp[i].dn_flags |= DN_TAKEN_GETUDEV;
127 131 UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
128 132 ret = (major_t)i;
129 133 } else {
130 134 ret = DDI_MAJOR_T_NONE;
131 135 }
132 136 }
133 137 mutex_exit(&udevlock);
134 138 return (ret);
135 139 }
136 140
137 141
138 142 /*
139 143 * Compress 'long' device number encoding to 32-bit device number
140 144 * encoding. If it won't fit, we return failure, but set the
141 145 * device number to 32-bit NODEV for the sake of our callers.
142 146 */
143 147 int
144 148 cmpldev(dev32_t *dst, dev_t dev)
145 149 {
146 150 #if defined(_LP64)
147 151 if (dev == NODEV) {
148 152 *dst = NODEV32;
149 153 } else {
150 154 major_t major = dev >> L_BITSMINOR;
151 155 minor_t minor = dev & L_MAXMIN;
152 156
153 157 if (major > L_MAXMAJ32 || minor > L_MAXMIN32) {
154 158 *dst = NODEV32;
155 159 return (0);
156 160 }
157 161
158 162 *dst = (dev32_t)((major << L_BITSMINOR32) | minor);
159 163 }
160 164 #else
161 165 *dst = (dev32_t)dev;
162 166 #endif
163 167 return (1);
164 168 }
165 169
166 170 /*
167 171 * Expand 32-bit dev_t's to long dev_t's. Expansion always "fits"
168 172 * into the return type, but we're careful to expand NODEV explicitly.
169 173 */
170 174 dev_t
171 175 expldev(dev32_t dev32)
172 176 {
173 177 #ifdef _LP64
174 178 if (dev32 == NODEV32)
175 179 return (NODEV);
176 180 return (makedevice((dev32 >> L_BITSMINOR32) & L_MAXMAJ32,
177 181 dev32 & L_MAXMIN32));
178 182 #else
179 183 return ((dev_t)dev32);
180 184 #endif
181 185 }
182 186
183 187 #ifndef _LP64
184 188 /*
185 189 * Keep these entry points for 32-bit systems but enforce the use
186 190 * of MIN/MAX macros on 64-bit systems. The DDI header files already
187 191 * define min/max as macros so drivers shouldn't need these functions.
188 192 */
189 193
190 194 int
191 195 min(int a, int b)
192 196 {
193 197 return (a < b ? a : b);
194 198 }
195 199
196 200 int
197 201 max(int a, int b)
198 202 {
199 203 return (a > b ? a : b);
200 204 }
201 205
202 206 uint_t
203 207 umin(uint_t a, uint_t b)
204 208 {
205 209 return (a < b ? a : b);
206 210 }
207 211
208 212 uint_t
209 213 umax(uint_t a, uint_t b)
210 214 {
211 215 return (a > b ? a : b);
212 216 }
213 217
214 218 #endif /* !_LP64 */
215 219
216 220 /*
217 221 * Parse suboptions from a string.
218 222 * Same as getsubopt(3C).
219 223 */
220 224 int
221 225 getsubopt(char **optionsp, char * const *tokens, char **valuep)
222 226 {
223 227 char *s = *optionsp, *p;
224 228 int i;
225 229 size_t optlen;
226 230
227 231 *valuep = NULL;
228 232 if (*s == '\0')
229 233 return (-1);
230 234 p = strchr(s, ','); /* find next option */
231 235 if (p == NULL) {
232 236 p = s + strlen(s);
233 237 } else {
234 238 *p++ = '\0'; /* mark end and point to next */
235 239 }
236 240 *optionsp = p; /* point to next option */
237 241 p = strchr(s, '='); /* find value */
238 242 if (p == NULL) {
239 243 optlen = strlen(s);
240 244 *valuep = NULL;
241 245 } else {
242 246 optlen = p - s;
243 247 *valuep = ++p;
244 248 }
245 249 for (i = 0; tokens[i] != NULL; i++) {
246 250 if ((optlen == strlen(tokens[i])) &&
247 251 (strncmp(s, tokens[i], optlen) == 0))
248 252 return (i);
249 253 }
250 254 /* no match, point value at option and return error */
251 255 *valuep = s;
252 256 return (-1);
253 257 }
254 258
255 259 /*
256 260 * Append the suboption string 'opt' starting at the position 'str'
257 261 * within the buffer defined by 'buf' and 'len'. If 'buf' is not null,
258 262 * a comma is appended first.
259 263 * Return a pointer to the end of the resulting string (the null byte).
260 264 * Return NULL if there isn't enough space left to append 'opt'.
261 265 */
262 266 char *
263 267 append_subopt(const char *buf, size_t len, char *str, const char *opt)
264 268 {
265 269 size_t l = strlen(opt);
266 270
267 271 /*
268 272 * Include a ',' if this is not the first option.
269 273 * Include space for the null byte.
270 274 */
271 275 if (strlen(buf) + (buf[0] != '\0') + l + 1 > len)
272 276 return (NULL);
273 277
274 278 if (buf[0] != '\0')
275 279 *str++ = ',';
276 280 (void) strcpy(str, opt);
277 281 return (str + l);
278 282 }
279 283
280 284 /*
281 285 * Tables to convert a single byte to/from binary-coded decimal (BCD).
282 286 */
283 287 uchar_t byte_to_bcd[256] = {
284 288 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
285 289 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
286 290 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
287 291 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
288 292 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
289 293 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
290 294 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
291 295 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
292 296 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
293 297 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
294 298 };
295 299
296 300 uchar_t bcd_to_byte[256] = { /* CSTYLED */
297 301 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0,
298 302 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0,
299 303 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 0, 0, 0, 0, 0, 0,
300 304 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 0, 0, 0, 0, 0,
↓ open down ↓ |
263 lines elided |
↑ open up ↑ |
301 305 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 0, 0, 0, 0, 0, 0,
302 306 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 0, 0, 0, 0, 0, 0,
303 307 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 0, 0, 0, 0, 0,
304 308 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 0, 0, 0, 0, 0, 0,
305 309 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 0, 0, 0, 0, 0, 0,
306 310 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
307 311 };
308 312
309 313 /*
310 314 * Hot-patch a single instruction in the kernel's text.
311 - * If you want to patch multiple instructions you must
312 - * arrange to do it so that all intermediate stages are
313 - * sane -- we don't stop other cpus while doing this.
315 + *
316 + * If you want to patch multiple instructions you must arrange to do it so that
317 + * all intermediate stages are sane -- we don't stop other cpus while doing
318 + * this.
319 + *
314 320 * Size must be 1, 2, or 4 bytes with iaddr aligned accordingly.
321 + *
322 + * The instruction itself might straddle a page boundary, so we have to account
323 + * for that.
315 324 */
316 325 void
317 326 hot_patch_kernel_text(caddr_t iaddr, uint32_t new_instr, uint_t size)
318 327 {
328 + const uintptr_t pageoff = (uintptr_t)iaddr & PAGEOFFSET;
329 + const boolean_t straddles = (pageoff + size > PAGESIZE);
330 + const size_t mapsize = straddles ? PAGESIZE * 2 : PAGESIZE;
331 + caddr_t ipageaddr = iaddr - pageoff;
319 332 caddr_t vaddr;
320 333 page_t **ppp;
321 - uintptr_t off = (uintptr_t)iaddr & PAGEOFFSET;
322 334
323 - vaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
335 + vaddr = vmem_alloc(heap_arena, mapsize, VM_SLEEP);
324 336
325 - (void) as_pagelock(&kas, &ppp, iaddr - off, PAGESIZE, S_WRITE);
337 + (void) as_pagelock(&kas, &ppp, ipageaddr, mapsize, S_WRITE);
326 338
327 339 hat_devload(kas.a_hat, vaddr, PAGESIZE,
328 - hat_getpfnum(kas.a_hat, iaddr - off),
329 - PROT_READ | PROT_WRITE, HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
340 + hat_getpfnum(kas.a_hat, ipageaddr), PROT_READ | PROT_WRITE,
341 + HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
330 342
343 + if (straddles) {
344 + hat_devload(kas.a_hat, vaddr + PAGESIZE, PAGESIZE,
345 + hat_getpfnum(kas.a_hat, ipageaddr + PAGESIZE),
346 + PROT_READ | PROT_WRITE, HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
347 + }
348 +
331 349 switch (size) {
332 350 case 1:
333 - *(uint8_t *)(vaddr + off) = new_instr;
351 + *(uint8_t *)(vaddr + pageoff) = new_instr;
334 352 break;
335 353 case 2:
336 - *(uint16_t *)(vaddr + off) = new_instr;
354 + *(uint16_t *)(vaddr + pageoff) = new_instr;
337 355 break;
338 356 case 4:
339 - *(uint32_t *)(vaddr + off) = new_instr;
357 + *(uint32_t *)(vaddr + pageoff) = new_instr;
340 358 break;
341 359 default:
342 360 panic("illegal hot-patch");
343 361 }
344 362
345 363 membar_enter();
346 - sync_icache(vaddr + off, size);
364 + sync_icache(vaddr + pageoff, size);
347 365 sync_icache(iaddr, size);
348 - as_pageunlock(&kas, ppp, iaddr - off, PAGESIZE, S_WRITE);
349 - hat_unload(kas.a_hat, vaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
350 - vmem_free(heap_arena, vaddr, PAGESIZE);
366 + as_pageunlock(&kas, ppp, ipageaddr, mapsize, S_WRITE);
367 + hat_unload(kas.a_hat, vaddr, mapsize, HAT_UNLOAD_UNLOCK);
368 + vmem_free(heap_arena, vaddr, mapsize);
351 369 }
352 370
353 371 /*
354 372 * Routine to report an attempt to execute non-executable data. If the
355 373 * address executed lies in the stack, explicitly say so.
356 374 */
357 375 void
358 376 report_stack_exec(proc_t *p, caddr_t addr)
359 377 {
360 378 if (!noexec_user_stack_log)
361 379 return;
362 380
363 381 if (addr < p->p_usrstack && addr >= (p->p_usrstack - p->p_stksize)) {
364 382 cmn_err(CE_NOTE, "%s[%d] attempt to execute code "
365 383 "on stack by uid %d", p->p_user.u_comm,
366 384 p->p_pid, crgetruid(p->p_cred));
367 385 } else {
368 386 cmn_err(CE_NOTE, "%s[%d] attempt to execute non-executable "
369 387 "data at 0x%p by uid %d", p->p_user.u_comm,
370 388 p->p_pid, (void *) addr, crgetruid(p->p_cred));
371 389 }
372 390
373 391 delay(hz / 50);
374 392 }
375 393
376 394 /*
377 395 * Determine whether the address range [addr, addr + len) is in memlist mp.
378 396 */
379 397 int
380 398 address_in_memlist(struct memlist *mp, uint64_t addr, size_t len)
381 399 {
382 400 while (mp != 0) {
383 401 if ((addr >= mp->ml_address) &&
384 402 (addr + len <= mp->ml_address + mp->ml_size))
385 403 return (1); /* TRUE */
386 404 mp = mp->ml_next;
387 405 }
388 406 return (0); /* FALSE */
389 407 }
390 408
391 409 /*
392 410 * Pop the topmost element from the t_ontrap stack, removing the current set of
393 411 * on_trap() protections. Refer to <sys/ontrap.h> for more info. If the
394 412 * stack is already empty, no_trap() just returns.
395 413 */
396 414 void
397 415 no_trap(void)
398 416 {
399 417 if (curthread->t_ontrap != NULL) {
400 418 #ifdef __sparc
401 419 membar_sync(); /* deferred error barrier (see sparcv9_subr.s) */
402 420 #endif
403 421 curthread->t_ontrap = curthread->t_ontrap->ot_prev;
404 422 }
405 423 }
406 424
407 425 /*
408 426 * Return utsname.nodename outside a zone, or the zone name within.
409 427 */
410 428 char *
411 429 uts_nodename(void)
412 430 {
413 431 if (curproc == NULL)
414 432 return (utsname.nodename);
415 433 return (curproc->p_zone->zone_nodename);
416 434 }
↓ open down ↓ |
56 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX