1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 #include <sys/types.h> 30 #include <sys/sysmacros.h> 31 #include <sys/param.h> 32 #include <sys/vmparam.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/user.h> 36 #include <sys/proc.h> 37 #include <sys/conf.h> 38 #include <sys/tuneable.h> 39 #include <sys/cpuvar.h> 40 #include <sys/archsystm.h> 41 #include <sys/vmem.h> 42 #include <vm/seg_kmem.h> 43 #include <sys/errno.h> 44 #include <sys/cmn_err.h> 45 #include <sys/debug.h> 46 #include <sys/atomic.h> 47 #include <sys/model.h> 48 #include <sys/kmem.h> 49 #include <sys/memlist.h> 50 #include <sys/autoconf.h> 51 #include <sys/ontrap.h> 52 #include <sys/utsname.h> 53 #include <sys/zone.h> 54 55 #ifdef __sparc 56 #include <sys/membar.h> 57 #endif 58 59 /* 60 * Routine which sets a user error; placed in 61 * illegal entries in the bdevsw and cdevsw tables. 62 */ 63 64 int 65 nodev() 66 { 67 return (curthread->t_lwp ? 68 ttolwp(curthread)->lwp_error = ENXIO : ENXIO); 69 } 70 71 /* 72 * Null routine; placed in insignificant entries 73 * in the bdevsw and cdevsw tables. 74 */ 75 76 int 77 nulldev() 78 { 79 return (0); 80 } 81 82 static kmutex_t udevlock; 83 84 /* 85 * Generate an unused major device number. 86 */ 87 major_t 88 getudev() 89 { 90 static major_t next = 0; 91 major_t ret; 92 93 /* 94 * Ensure that we start allocating major numbers above the 'devcnt' 95 * count. The only limit we place on the number is that it should be a 96 * legal 32-bit SVR4 major number and be greater than or equal to devcnt 97 * in the current system). 98 */ 99 mutex_enter(&udevlock); 100 if (next == 0) 101 next = devcnt; 102 if (next <= L_MAXMAJ32 && next >= devcnt) 103 ret = next++; 104 else { 105 /* 106 * If we fail to allocate a major number because devcnt has 107 * reached L_MAXMAJ32, we may be the victim of a sparsely 108 * populated devnames array. We scan the array backwards 109 * looking for an empty slot; if we find one, mark it as 110 * DN_GETUDEV so it doesn't get taken by subsequent consumers 111 * users of the devnames array, and issue a warning. 112 * It is vital for this routine to take drastic measures to 113 * succeed, since the kernel really needs it to boot. 114 */ 115 int i; 116 for (i = devcnt - 1; i >= 0; i--) { 117 LOCK_DEV_OPS(&devnamesp[i].dn_lock); 118 if (devnamesp[i].dn_name == NULL && 119 ((devnamesp[i].dn_flags & DN_TAKEN_GETUDEV) == 0)) 120 break; 121 UNLOCK_DEV_OPS(&devnamesp[i].dn_lock); 122 } 123 if (i != -1) { 124 cmn_err(CE_WARN, "Reusing device major number %d.", i); 125 ASSERT(i >= 0 && i < devcnt); 126 devnamesp[i].dn_flags |= DN_TAKEN_GETUDEV; 127 UNLOCK_DEV_OPS(&devnamesp[i].dn_lock); 128 ret = (major_t)i; 129 } else { 130 ret = DDI_MAJOR_T_NONE; 131 } 132 } 133 mutex_exit(&udevlock); 134 return (ret); 135 } 136 137 138 /* 139 * Compress 'long' device number encoding to 32-bit device number 140 * encoding. If it won't fit, we return failure, but set the 141 * device number to 32-bit NODEV for the sake of our callers. 142 */ 143 int 144 cmpldev(dev32_t *dst, dev_t dev) 145 { 146 #if defined(_LP64) 147 if (dev == NODEV) { 148 *dst = NODEV32; 149 } else { 150 major_t major = dev >> L_BITSMINOR; 151 minor_t minor = dev & L_MAXMIN; 152 153 if (major > L_MAXMAJ32 || minor > L_MAXMIN32) { 154 *dst = NODEV32; 155 return (0); 156 } 157 158 *dst = (dev32_t)((major << L_BITSMINOR32) | minor); 159 } 160 #else 161 *dst = (dev32_t)dev; 162 #endif 163 return (1); 164 } 165 166 /* 167 * Expand 32-bit dev_t's to long dev_t's. Expansion always "fits" 168 * into the return type, but we're careful to expand NODEV explicitly. 169 */ 170 dev_t 171 expldev(dev32_t dev32) 172 { 173 #ifdef _LP64 174 if (dev32 == NODEV32) 175 return (NODEV); 176 return (makedevice((dev32 >> L_BITSMINOR32) & L_MAXMAJ32, 177 dev32 & L_MAXMIN32)); 178 #else 179 return ((dev_t)dev32); 180 #endif 181 } 182 183 #ifndef _LP64 184 /* 185 * Keep these entry points for 32-bit systems but enforce the use 186 * of MIN/MAX macros on 64-bit systems. The DDI header files already 187 * define min/max as macros so drivers shouldn't need these functions. 188 */ 189 190 int 191 min(int a, int b) 192 { 193 return (a < b ? a : b); 194 } 195 196 int 197 max(int a, int b) 198 { 199 return (a > b ? a : b); 200 } 201 202 uint_t 203 umin(uint_t a, uint_t b) 204 { 205 return (a < b ? a : b); 206 } 207 208 uint_t 209 umax(uint_t a, uint_t b) 210 { 211 return (a > b ? a : b); 212 } 213 214 #endif /* !_LP64 */ 215 216 /* 217 * Parse suboptions from a string. 218 * Same as getsubopt(3C). 219 */ 220 int 221 getsubopt(char **optionsp, char * const *tokens, char **valuep) 222 { 223 char *s = *optionsp, *p; 224 int i; 225 size_t optlen; 226 227 *valuep = NULL; 228 if (*s == '\0') 229 return (-1); 230 p = strchr(s, ','); /* find next option */ 231 if (p == NULL) { 232 p = s + strlen(s); 233 } else { 234 *p++ = '\0'; /* mark end and point to next */ 235 } 236 *optionsp = p; /* point to next option */ 237 p = strchr(s, '='); /* find value */ 238 if (p == NULL) { 239 optlen = strlen(s); 240 *valuep = NULL; 241 } else { 242 optlen = p - s; 243 *valuep = ++p; 244 } 245 for (i = 0; tokens[i] != NULL; i++) { 246 if ((optlen == strlen(tokens[i])) && 247 (strncmp(s, tokens[i], optlen) == 0)) 248 return (i); 249 } 250 /* no match, point value at option and return error */ 251 *valuep = s; 252 return (-1); 253 } 254 255 /* 256 * Append the suboption string 'opt' starting at the position 'str' 257 * within the buffer defined by 'buf' and 'len'. If 'buf' is not null, 258 * a comma is appended first. 259 * Return a pointer to the end of the resulting string (the null byte). 260 * Return NULL if there isn't enough space left to append 'opt'. 261 */ 262 char * 263 append_subopt(const char *buf, size_t len, char *str, const char *opt) 264 { 265 size_t l = strlen(opt); 266 267 /* 268 * Include a ',' if this is not the first option. 269 * Include space for the null byte. 270 */ 271 if (strlen(buf) + (buf[0] != '\0') + l + 1 > len) 272 return (NULL); 273 274 if (buf[0] != '\0') 275 *str++ = ','; 276 (void) strcpy(str, opt); 277 return (str + l); 278 } 279 280 /* 281 * Tables to convert a single byte to/from binary-coded decimal (BCD). 282 */ 283 uchar_t byte_to_bcd[256] = { 284 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 285 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 286 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 287 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 288 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 289 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 290 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 291 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 292 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 293 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 294 }; 295 296 uchar_t bcd_to_byte[256] = { /* CSTYLED */ 297 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 298 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 299 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 0, 0, 0, 0, 0, 0, 300 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 0, 0, 0, 0, 0, 301 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 0, 0, 0, 0, 0, 0, 302 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 0, 0, 0, 0, 0, 0, 303 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 0, 0, 0, 0, 0, 304 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 0, 0, 0, 0, 0, 0, 305 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 0, 0, 0, 0, 0, 0, 306 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 307 }; 308 309 /* 310 * Hot-patch a single instruction in the kernel's text. 311 * If you want to patch multiple instructions you must 312 * arrange to do it so that all intermediate stages are 313 * sane -- we don't stop other cpus while doing this. 314 * Size must be 1, 2, or 4 bytes with iaddr aligned accordingly. 315 */ 316 void 317 hot_patch_kernel_text(caddr_t iaddr, uint32_t new_instr, uint_t size) 318 { 319 caddr_t vaddr; 320 page_t **ppp; 321 uintptr_t off = (uintptr_t)iaddr & PAGEOFFSET; 322 323 vaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 324 325 (void) as_pagelock(&kas, &ppp, iaddr - off, PAGESIZE, S_WRITE); 326 327 hat_devload(kas.a_hat, vaddr, PAGESIZE, 328 hat_getpfnum(kas.a_hat, iaddr - off), 329 PROT_READ | PROT_WRITE, HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); 330 331 switch (size) { 332 case 1: 333 *(uint8_t *)(vaddr + off) = new_instr; 334 break; 335 case 2: 336 *(uint16_t *)(vaddr + off) = new_instr; 337 break; 338 case 4: 339 *(uint32_t *)(vaddr + off) = new_instr; 340 break; 341 default: 342 panic("illegal hot-patch"); 343 } 344 345 membar_enter(); 346 sync_icache(vaddr + off, size); 347 sync_icache(iaddr, size); 348 as_pageunlock(&kas, ppp, iaddr - off, PAGESIZE, S_WRITE); 349 hat_unload(kas.a_hat, vaddr, PAGESIZE, HAT_UNLOAD_UNLOCK); 350 vmem_free(heap_arena, vaddr, PAGESIZE); 351 } 352 353 /* 354 * Routine to report an attempt to execute non-executable data. If the 355 * address executed lies in the stack, explicitly say so. 356 */ 357 void 358 report_stack_exec(proc_t *p, caddr_t addr) 359 { 360 if (!noexec_user_stack_log) 361 return; 362 363 if (addr < p->p_usrstack && addr >= (p->p_usrstack - p->p_stksize)) { 364 cmn_err(CE_NOTE, "%s[%d] attempt to execute code " 365 "on stack by uid %d", p->p_user.u_comm, 366 p->p_pid, crgetruid(p->p_cred)); 367 } else { 368 cmn_err(CE_NOTE, "%s[%d] attempt to execute non-executable " 369 "data at 0x%p by uid %d", p->p_user.u_comm, 370 p->p_pid, (void *) addr, crgetruid(p->p_cred)); 371 } 372 373 delay(hz / 50); 374 } 375 376 /* 377 * Determine whether the address range [addr, addr + len) is in memlist mp. 378 */ 379 int 380 address_in_memlist(struct memlist *mp, uint64_t addr, size_t len) 381 { 382 while (mp != 0) { 383 if ((addr >= mp->ml_address) && 384 (addr + len <= mp->ml_address + mp->ml_size)) 385 return (1); /* TRUE */ 386 mp = mp->ml_next; 387 } 388 return (0); /* FALSE */ 389 } 390 391 /* 392 * Pop the topmost element from the t_ontrap stack, removing the current set of 393 * on_trap() protections. Refer to <sys/ontrap.h> for more info. If the 394 * stack is already empty, no_trap() just returns. 395 */ 396 void 397 no_trap(void) 398 { 399 if (curthread->t_ontrap != NULL) { 400 #ifdef __sparc 401 membar_sync(); /* deferred error barrier (see sparcv9_subr.s) */ 402 #endif 403 curthread->t_ontrap = curthread->t_ontrap->ot_prev; 404 } 405 } 406 407 /* 408 * Return utsname.nodename outside a zone, or the zone name within. 409 */ 410 char * 411 uts_nodename(void) 412 { 413 if (curproc == NULL) 414 return (utsname.nodename); 415 return (curproc->p_zone->zone_nodename); 416 }