1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  27 /*        All Rights Reserved   */
  28 
  29 #include <sys/types.h>
  30 #include <sys/sysmacros.h>
  31 #include <sys/param.h>
  32 #include <sys/vmparam.h>
  33 #include <sys/systm.h>
  34 #include <sys/cred.h>
  35 #include <sys/user.h>
  36 #include <sys/proc.h>
  37 #include <sys/conf.h>
  38 #include <sys/tuneable.h>
  39 #include <sys/cpuvar.h>
  40 #include <sys/archsystm.h>
  41 #include <sys/vmem.h>
  42 #include <vm/seg_kmem.h>
  43 #include <sys/errno.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/debug.h>
  46 #include <sys/atomic.h>
  47 #include <sys/model.h>
  48 #include <sys/kmem.h>
  49 #include <sys/memlist.h>
  50 #include <sys/autoconf.h>
  51 #include <sys/ontrap.h>
  52 #include <sys/utsname.h>
  53 #include <sys/zone.h>
  54 
  55 #ifdef __sparc
  56 #include <sys/membar.h>
  57 #endif
  58 
  59 /*
  60  * Routine which sets a user error; placed in
  61  * illegal entries in the bdevsw and cdevsw tables.
  62  */
  63 
  64 int
  65 nodev()
  66 {
  67         return (curthread->t_lwp ?
  68             ttolwp(curthread)->lwp_error = ENXIO : ENXIO);
  69 }
  70 
  71 /*
  72  * Null routine; placed in insignificant entries
  73  * in the bdevsw and cdevsw tables.
  74  */
  75 
  76 int
  77 nulldev()
  78 {
  79         return (0);
  80 }
  81 
  82 static kmutex_t udevlock;
  83 
  84 /*
  85  * Generate an unused major device number.
  86  */
  87 major_t
  88 getudev()
  89 {
  90         static major_t next = 0;
  91         major_t ret;
  92 
  93         /*
  94          * Ensure that we start allocating major numbers above the 'devcnt'
  95          * count.  The only limit we place on the number is that it should be a
  96          * legal 32-bit SVR4 major number and be greater than or equal to devcnt
  97          * in the current system).
  98          */
  99         mutex_enter(&udevlock);
 100         if (next == 0)
 101                 next = devcnt;
 102         if (next <= L_MAXMAJ32 && next >= devcnt)
 103                 ret = next++;
 104         else {
 105                 /*
 106                  * If we fail to allocate a major number because devcnt has
 107                  * reached L_MAXMAJ32, we may be the victim of a sparsely
 108                  * populated devnames array.  We scan the array backwards
 109                  * looking for an empty slot;  if we find one, mark it as
 110                  * DN_GETUDEV so it doesn't get taken by subsequent consumers
 111                  * users of the devnames array, and issue a warning.
 112                  * It is vital for this routine to take drastic measures to
 113                  * succeed, since the kernel really needs it to boot.
 114                  */
 115                 int i;
 116                 for (i = devcnt - 1; i >= 0; i--) {
 117                         LOCK_DEV_OPS(&devnamesp[i].dn_lock);
 118                         if (devnamesp[i].dn_name == NULL &&
 119                             ((devnamesp[i].dn_flags & DN_TAKEN_GETUDEV) == 0))
 120                                 break;
 121                         UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
 122                 }
 123                 if (i != -1) {
 124                         cmn_err(CE_WARN, "Reusing device major number %d.", i);
 125                         ASSERT(i >= 0 && i < devcnt);
 126                         devnamesp[i].dn_flags |= DN_TAKEN_GETUDEV;
 127                         UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
 128                         ret = (major_t)i;
 129                 } else {
 130                         ret = DDI_MAJOR_T_NONE;
 131                 }
 132         }
 133         mutex_exit(&udevlock);
 134         return (ret);
 135 }
 136 
 137 
 138 /*
 139  * Compress 'long' device number encoding to 32-bit device number
 140  * encoding.  If it won't fit, we return failure, but set the
 141  * device number to 32-bit NODEV for the sake of our callers.
 142  */
 143 int
 144 cmpldev(dev32_t *dst, dev_t dev)
 145 {
 146 #if defined(_LP64)
 147         if (dev == NODEV) {
 148                 *dst = NODEV32;
 149         } else {
 150                 major_t major = dev >> L_BITSMINOR;
 151                 minor_t minor = dev & L_MAXMIN;
 152 
 153                 if (major > L_MAXMAJ32 || minor > L_MAXMIN32) {
 154                         *dst = NODEV32;
 155                         return (0);
 156                 }
 157 
 158                 *dst = (dev32_t)((major << L_BITSMINOR32) | minor);
 159         }
 160 #else
 161         *dst = (dev32_t)dev;
 162 #endif
 163         return (1);
 164 }
 165 
 166 /*
 167  * Expand 32-bit dev_t's to long dev_t's.  Expansion always "fits"
 168  * into the return type, but we're careful to expand NODEV explicitly.
 169  */
 170 dev_t
 171 expldev(dev32_t dev32)
 172 {
 173 #ifdef _LP64
 174         if (dev32 == NODEV32)
 175                 return (NODEV);
 176         return (makedevice((dev32 >> L_BITSMINOR32) & L_MAXMAJ32,
 177             dev32 & L_MAXMIN32));
 178 #else
 179         return ((dev_t)dev32);
 180 #endif
 181 }
 182 
 183 #ifndef _LP64
 184 /*
 185  * Keep these entry points for 32-bit systems but enforce the use
 186  * of MIN/MAX macros on 64-bit systems.  The DDI header files already
 187  * define min/max as macros so drivers shouldn't need these functions.
 188  */
 189 
 190 int
 191 min(int a, int b)
 192 {
 193         return (a < b ? a : b);
 194 }
 195 
 196 int
 197 max(int a, int b)
 198 {
 199         return (a > b ? a : b);
 200 }
 201 
 202 uint_t
 203 umin(uint_t a, uint_t b)
 204 {
 205         return (a < b ? a : b);
 206 }
 207 
 208 uint_t
 209 umax(uint_t a, uint_t b)
 210 {
 211         return (a > b ? a : b);
 212 }
 213 
 214 #endif /* !_LP64 */
 215 
 216 /*
 217  * Parse suboptions from a string.
 218  * Same as getsubopt(3C).
 219  */
 220 int
 221 getsubopt(char **optionsp, char * const *tokens, char **valuep)
 222 {
 223         char *s = *optionsp, *p;
 224         int i;
 225         size_t optlen;
 226 
 227         *valuep = NULL;
 228         if (*s == '\0')
 229                 return (-1);
 230         p = strchr(s, ',');             /* find next option */
 231         if (p == NULL) {
 232                 p = s + strlen(s);
 233         } else {
 234                 *p++ = '\0';            /* mark end and point to next */
 235         }
 236         *optionsp = p;                  /* point to next option */
 237         p = strchr(s, '=');             /* find value */
 238         if (p == NULL) {
 239                 optlen = strlen(s);
 240                 *valuep = NULL;
 241         } else {
 242                 optlen = p - s;
 243                 *valuep = ++p;
 244         }
 245         for (i = 0; tokens[i] != NULL; i++) {
 246                 if ((optlen == strlen(tokens[i])) &&
 247                     (strncmp(s, tokens[i], optlen) == 0))
 248                         return (i);
 249         }
 250         /* no match, point value at option and return error */
 251         *valuep = s;
 252         return (-1);
 253 }
 254 
 255 /*
 256  * Append the suboption string 'opt' starting at the position 'str'
 257  * within the buffer defined by 'buf' and 'len'. If 'buf' is not null,
 258  * a comma is appended first.
 259  * Return a pointer to the end of the resulting string (the null byte).
 260  * Return NULL if there isn't enough space left to append 'opt'.
 261  */
 262 char *
 263 append_subopt(const char *buf, size_t len, char *str, const char *opt)
 264 {
 265         size_t l = strlen(opt);
 266 
 267         /*
 268          * Include a ',' if this is not the first option.
 269          * Include space for the null byte.
 270          */
 271         if (strlen(buf) + (buf[0] != '\0') + l + 1 > len)
 272                 return (NULL);
 273 
 274         if (buf[0] != '\0')
 275                 *str++ = ',';
 276         (void) strcpy(str, opt);
 277         return (str + l);
 278 }
 279 
 280 /*
 281  * Tables to convert a single byte to/from binary-coded decimal (BCD).
 282  */
 283 uchar_t byte_to_bcd[256] = {
 284         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 285         0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
 286         0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
 287         0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
 288         0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
 289         0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
 290         0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
 291         0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
 292         0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
 293         0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
 294 };
 295 
 296 uchar_t bcd_to_byte[256] = {            /* CSTYLED */
 297          0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  0,  0,  0,  0,  0,
 298         10, 11, 12, 13, 14, 15, 16, 17, 18, 19,  0,  0,  0,  0,  0,  0,
 299         20, 21, 22, 23, 24, 25, 26, 27, 28, 29,  0,  0,  0,  0,  0,  0,
 300         30, 31, 32, 33, 34, 35, 36, 37, 38, 39,  0,  0,  0,  0,  0,  0,
 301         40, 41, 42, 43, 44, 45, 46, 47, 48, 49,  0,  0,  0,  0,  0,  0,
 302         50, 51, 52, 53, 54, 55, 56, 57, 58, 59,  0,  0,  0,  0,  0,  0,
 303         60, 61, 62, 63, 64, 65, 66, 67, 68, 69,  0,  0,  0,  0,  0,  0,
 304         70, 71, 72, 73, 74, 75, 76, 77, 78, 79,  0,  0,  0,  0,  0,  0,
 305         80, 81, 82, 83, 84, 85, 86, 87, 88, 89,  0,  0,  0,  0,  0,  0,
 306         90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
 307 };
 308 
 309 /*
 310  * Hot-patch a single instruction in the kernel's text.
 311  * If you want to patch multiple instructions you must
 312  * arrange to do it so that all intermediate stages are
 313  * sane -- we don't stop other cpus while doing this.
 314  * Size must be 1, 2, or 4 bytes with iaddr aligned accordingly.
 315  */
 316 void
 317 hot_patch_kernel_text(caddr_t iaddr, uint32_t new_instr, uint_t size)
 318 {
 319         caddr_t vaddr;
 320         page_t **ppp;
 321         uintptr_t off = (uintptr_t)iaddr & PAGEOFFSET;
 322 
 323         vaddr = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
 324 
 325         (void) as_pagelock(&kas, &ppp, iaddr - off, PAGESIZE, S_WRITE);
 326 
 327         hat_devload(kas.a_hat, vaddr, PAGESIZE,
 328             hat_getpfnum(kas.a_hat, iaddr - off),
 329             PROT_READ | PROT_WRITE, HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
 330 
 331         switch (size) {
 332         case 1:
 333                 *(uint8_t *)(vaddr + off) = new_instr;
 334                 break;
 335         case 2:
 336                 *(uint16_t *)(vaddr + off) = new_instr;
 337                 break;
 338         case 4:
 339                 *(uint32_t *)(vaddr + off) = new_instr;
 340                 break;
 341         default:
 342                 panic("illegal hot-patch");
 343         }
 344 
 345         membar_enter();
 346         sync_icache(vaddr + off, size);
 347         sync_icache(iaddr, size);
 348         as_pageunlock(&kas, ppp, iaddr - off, PAGESIZE, S_WRITE);
 349         hat_unload(kas.a_hat, vaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
 350         vmem_free(heap_arena, vaddr, PAGESIZE);
 351 }
 352 
 353 /*
 354  * Routine to report an attempt to execute non-executable data.  If the
 355  * address executed lies in the stack, explicitly say so.
 356  */
 357 void
 358 report_stack_exec(proc_t *p, caddr_t addr)
 359 {
 360         if (!noexec_user_stack_log)
 361                 return;
 362 
 363         if (addr < p->p_usrstack && addr >= (p->p_usrstack - p->p_stksize)) {
 364                 cmn_err(CE_NOTE, "%s[%d] attempt to execute code "
 365                     "on stack by uid %d", p->p_user.u_comm,
 366                     p->p_pid, crgetruid(p->p_cred));
 367         } else {
 368                 cmn_err(CE_NOTE, "%s[%d] attempt to execute non-executable "
 369                     "data at 0x%p by uid %d", p->p_user.u_comm,
 370                     p->p_pid, (void *) addr, crgetruid(p->p_cred));
 371         }
 372 
 373         delay(hz / 50);
 374 }
 375 
 376 /*
 377  * Determine whether the address range [addr, addr + len) is in memlist mp.
 378  */
 379 int
 380 address_in_memlist(struct memlist *mp, uint64_t addr, size_t len)
 381 {
 382         while (mp != 0)  {
 383                 if ((addr >= mp->ml_address) &&
 384                     (addr + len <= mp->ml_address + mp->ml_size))
 385                         return (1);      /* TRUE */
 386                 mp = mp->ml_next;
 387         }
 388         return (0);     /* FALSE */
 389 }
 390 
 391 /*
 392  * Pop the topmost element from the t_ontrap stack, removing the current set of
 393  * on_trap() protections.  Refer to <sys/ontrap.h> for more info.  If the
 394  * stack is already empty, no_trap() just returns.
 395  */
 396 void
 397 no_trap(void)
 398 {
 399         if (curthread->t_ontrap != NULL) {
 400 #ifdef __sparc
 401                 membar_sync(); /* deferred error barrier (see sparcv9_subr.s) */
 402 #endif
 403                 curthread->t_ontrap = curthread->t_ontrap->ot_prev;
 404         }
 405 }
 406 
 407 /*
 408  * Return utsname.nodename outside a zone, or the zone name within.
 409  */
 410 char *
 411 uts_nodename(void)
 412 {
 413         if (curproc == NULL)
 414                 return (utsname.nodename);
 415         return (curproc->p_zone->zone_nodename);
 416 }