1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2019 Joyent, Inc.
  28  */
  29 
  30 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  31 /*        All Rights Reserved   */
  32 
  33 #include <sys/types.h>
  34 #include <sys/sysmacros.h>
  35 #include <sys/param.h>
  36 #include <sys/vmparam.h>
  37 #include <sys/systm.h>
  38 #include <sys/cred.h>
  39 #include <sys/user.h>
  40 #include <sys/proc.h>
  41 #include <sys/conf.h>
  42 #include <sys/tuneable.h>
  43 #include <sys/cpuvar.h>
  44 #include <sys/archsystm.h>
  45 #include <sys/vmem.h>
  46 #include <vm/seg_kmem.h>
  47 #include <sys/errno.h>
  48 #include <sys/cmn_err.h>
  49 #include <sys/debug.h>
  50 #include <sys/atomic.h>
  51 #include <sys/model.h>
  52 #include <sys/kmem.h>
  53 #include <sys/memlist.h>
  54 #include <sys/autoconf.h>
  55 #include <sys/ontrap.h>
  56 #include <sys/utsname.h>
  57 #include <sys/zone.h>
  58 
  59 #ifdef __sparc
  60 #include <sys/membar.h>
  61 #endif
  62 
  63 /*
  64  * Routine which sets a user error; placed in
  65  * illegal entries in the bdevsw and cdevsw tables.
  66  */
  67 
  68 int
  69 nodev()
  70 {
  71         return (curthread->t_lwp ?
  72             ttolwp(curthread)->lwp_error = ENXIO : ENXIO);
  73 }
  74 
  75 /*
  76  * Null routine; placed in insignificant entries
  77  * in the bdevsw and cdevsw tables.
  78  */
  79 
  80 int
  81 nulldev()
  82 {
  83         return (0);
  84 }
  85 
  86 static kmutex_t udevlock;
  87 
  88 /*
  89  * Generate an unused major device number.
  90  */
  91 major_t
  92 getudev()
  93 {
  94         static major_t next = 0;
  95         major_t ret;
  96 
  97         /*
  98          * Ensure that we start allocating major numbers above the 'devcnt'
  99          * count.  The only limit we place on the number is that it should be a
 100          * legal 32-bit SVR4 major number and be greater than or equal to devcnt
 101          * in the current system).
 102          */
 103         mutex_enter(&udevlock);
 104         if (next == 0)
 105                 next = devcnt;
 106         if (next <= L_MAXMAJ32 && next >= devcnt)
 107                 ret = next++;
 108         else {
 109                 /*
 110                  * If we fail to allocate a major number because devcnt has
 111                  * reached L_MAXMAJ32, we may be the victim of a sparsely
 112                  * populated devnames array.  We scan the array backwards
 113                  * looking for an empty slot;  if we find one, mark it as
 114                  * DN_GETUDEV so it doesn't get taken by subsequent consumers
 115                  * users of the devnames array, and issue a warning.
 116                  * It is vital for this routine to take drastic measures to
 117                  * succeed, since the kernel really needs it to boot.
 118                  */
 119                 int i;
 120                 for (i = devcnt - 1; i >= 0; i--) {
 121                         LOCK_DEV_OPS(&devnamesp[i].dn_lock);
 122                         if (devnamesp[i].dn_name == NULL &&
 123                             ((devnamesp[i].dn_flags & DN_TAKEN_GETUDEV) == 0))
 124                                 break;
 125                         UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
 126                 }
 127                 if (i != -1) {
 128                         cmn_err(CE_WARN, "Reusing device major number %d.", i);
 129                         ASSERT(i >= 0 && i < devcnt);
 130                         devnamesp[i].dn_flags |= DN_TAKEN_GETUDEV;
 131                         UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
 132                         ret = (major_t)i;
 133                 } else {
 134                         ret = DDI_MAJOR_T_NONE;
 135                 }
 136         }
 137         mutex_exit(&udevlock);
 138         return (ret);
 139 }
 140 
 141 
 142 /*
 143  * Compress 'long' device number encoding to 32-bit device number
 144  * encoding.  If it won't fit, we return failure, but set the
 145  * device number to 32-bit NODEV for the sake of our callers.
 146  */
 147 int
 148 cmpldev(dev32_t *dst, dev_t dev)
 149 {
 150 #if defined(_LP64)
 151         if (dev == NODEV) {
 152                 *dst = NODEV32;
 153         } else {
 154                 major_t major = dev >> L_BITSMINOR;
 155                 minor_t minor = dev & L_MAXMIN;
 156 
 157                 if (major > L_MAXMAJ32 || minor > L_MAXMIN32) {
 158                         *dst = NODEV32;
 159                         return (0);
 160                 }
 161 
 162                 *dst = (dev32_t)((major << L_BITSMINOR32) | minor);
 163         }
 164 #else
 165         *dst = (dev32_t)dev;
 166 #endif
 167         return (1);
 168 }
 169 
 170 /*
 171  * Expand 32-bit dev_t's to long dev_t's.  Expansion always "fits"
 172  * into the return type, but we're careful to expand NODEV explicitly.
 173  */
 174 dev_t
 175 expldev(dev32_t dev32)
 176 {
 177 #ifdef _LP64
 178         if (dev32 == NODEV32)
 179                 return (NODEV);
 180         return (makedevice((dev32 >> L_BITSMINOR32) & L_MAXMAJ32,
 181             dev32 & L_MAXMIN32));
 182 #else
 183         return ((dev_t)dev32);
 184 #endif
 185 }
 186 
 187 #ifndef _LP64
 188 /*
 189  * Keep these entry points for 32-bit systems but enforce the use
 190  * of MIN/MAX macros on 64-bit systems.  The DDI header files already
 191  * define min/max as macros so drivers shouldn't need these functions.
 192  */
 193 
 194 int
 195 min(int a, int b)
 196 {
 197         return (a < b ? a : b);
 198 }
 199 
 200 int
 201 max(int a, int b)
 202 {
 203         return (a > b ? a : b);
 204 }
 205 
 206 uint_t
 207 umin(uint_t a, uint_t b)
 208 {
 209         return (a < b ? a : b);
 210 }
 211 
 212 uint_t
 213 umax(uint_t a, uint_t b)
 214 {
 215         return (a > b ? a : b);
 216 }
 217 
 218 #endif /* !_LP64 */
 219 
 220 /*
 221  * Parse suboptions from a string.
 222  * Same as getsubopt(3C).
 223  */
 224 int
 225 getsubopt(char **optionsp, char * const *tokens, char **valuep)
 226 {
 227         char *s = *optionsp, *p;
 228         int i;
 229         size_t optlen;
 230 
 231         *valuep = NULL;
 232         if (*s == '\0')
 233                 return (-1);
 234         p = strchr(s, ',');             /* find next option */
 235         if (p == NULL) {
 236                 p = s + strlen(s);
 237         } else {
 238                 *p++ = '\0';            /* mark end and point to next */
 239         }
 240         *optionsp = p;                  /* point to next option */
 241         p = strchr(s, '=');             /* find value */
 242         if (p == NULL) {
 243                 optlen = strlen(s);
 244                 *valuep = NULL;
 245         } else {
 246                 optlen = p - s;
 247                 *valuep = ++p;
 248         }
 249         for (i = 0; tokens[i] != NULL; i++) {
 250                 if ((optlen == strlen(tokens[i])) &&
 251                     (strncmp(s, tokens[i], optlen) == 0))
 252                         return (i);
 253         }
 254         /* no match, point value at option and return error */
 255         *valuep = s;
 256         return (-1);
 257 }
 258 
 259 /*
 260  * Append the suboption string 'opt' starting at the position 'str'
 261  * within the buffer defined by 'buf' and 'len'. If 'buf' is not null,
 262  * a comma is appended first.
 263  * Return a pointer to the end of the resulting string (the null byte).
 264  * Return NULL if there isn't enough space left to append 'opt'.
 265  */
 266 char *
 267 append_subopt(const char *buf, size_t len, char *str, const char *opt)
 268 {
 269         size_t l = strlen(opt);
 270 
 271         /*
 272          * Include a ',' if this is not the first option.
 273          * Include space for the null byte.
 274          */
 275         if (strlen(buf) + (buf[0] != '\0') + l + 1 > len)
 276                 return (NULL);
 277 
 278         if (buf[0] != '\0')
 279                 *str++ = ',';
 280         (void) strcpy(str, opt);
 281         return (str + l);
 282 }
 283 
 284 /*
 285  * Tables to convert a single byte to/from binary-coded decimal (BCD).
 286  */
 287 uchar_t byte_to_bcd[256] = {
 288         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
 289         0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
 290         0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
 291         0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
 292         0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
 293         0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
 294         0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
 295         0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
 296         0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
 297         0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
 298 };
 299 
 300 uchar_t bcd_to_byte[256] = {            /* CSTYLED */
 301          0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  0,  0,  0,  0,  0,
 302         10, 11, 12, 13, 14, 15, 16, 17, 18, 19,  0,  0,  0,  0,  0,  0,
 303         20, 21, 22, 23, 24, 25, 26, 27, 28, 29,  0,  0,  0,  0,  0,  0,
 304         30, 31, 32, 33, 34, 35, 36, 37, 38, 39,  0,  0,  0,  0,  0,  0,
 305         40, 41, 42, 43, 44, 45, 46, 47, 48, 49,  0,  0,  0,  0,  0,  0,
 306         50, 51, 52, 53, 54, 55, 56, 57, 58, 59,  0,  0,  0,  0,  0,  0,
 307         60, 61, 62, 63, 64, 65, 66, 67, 68, 69,  0,  0,  0,  0,  0,  0,
 308         70, 71, 72, 73, 74, 75, 76, 77, 78, 79,  0,  0,  0,  0,  0,  0,
 309         80, 81, 82, 83, 84, 85, 86, 87, 88, 89,  0,  0,  0,  0,  0,  0,
 310         90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
 311 };
 312 
 313 /*
 314  * Hot-patch a single instruction in the kernel's text.
 315  *
 316  * If you want to patch multiple instructions you must arrange to do it so that
 317  * all intermediate stages are sane -- we don't stop other cpus while doing
 318  * this.
 319  *
 320  * Size must be 1, 2, or 4 bytes with iaddr aligned accordingly.
 321  *
 322  * The instruction itself might straddle a page boundary, so we have to account
 323  * for that.
 324  */
 325 void
 326 hot_patch_kernel_text(caddr_t iaddr, uint32_t new_instr, uint_t size)
 327 {
 328         const uintptr_t pageoff = (uintptr_t)iaddr & PAGEOFFSET;
 329         const boolean_t straddles = (pageoff + size > PAGESIZE);
 330         const size_t mapsize = straddles ? PAGESIZE * 2 : PAGESIZE;
 331         caddr_t ipageaddr = iaddr - pageoff;
 332         caddr_t vaddr;
 333         page_t **ppp;
 334 
 335         vaddr = vmem_alloc(heap_arena, mapsize, VM_SLEEP);
 336 
 337         (void) as_pagelock(&kas, &ppp, ipageaddr, mapsize, S_WRITE);
 338 
 339         hat_devload(kas.a_hat, vaddr, PAGESIZE,
 340             hat_getpfnum(kas.a_hat, ipageaddr), PROT_READ | PROT_WRITE,
 341             HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
 342 
 343         if (straddles) {
 344                 hat_devload(kas.a_hat, vaddr + PAGESIZE, PAGESIZE,
 345                     hat_getpfnum(kas.a_hat, ipageaddr + PAGESIZE),
 346                     PROT_READ | PROT_WRITE, HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
 347         }
 348 
 349         switch (size) {
 350         case 1:
 351                 *(uint8_t *)(vaddr + pageoff) = new_instr;
 352                 break;
 353         case 2:
 354                 *(uint16_t *)(vaddr + pageoff) = new_instr;
 355                 break;
 356         case 4:
 357                 *(uint32_t *)(vaddr + pageoff) = new_instr;
 358                 break;
 359         default:
 360                 panic("illegal hot-patch");
 361         }
 362 
 363         membar_enter();
 364         sync_icache(vaddr + pageoff, size);
 365         sync_icache(iaddr, size);
 366         as_pageunlock(&kas, ppp, ipageaddr, mapsize, S_WRITE);
 367         hat_unload(kas.a_hat, vaddr, mapsize, HAT_UNLOAD_UNLOCK);
 368         vmem_free(heap_arena, vaddr, mapsize);
 369 }
 370 
 371 /*
 372  * Routine to report an attempt to execute non-executable data.  If the
 373  * address executed lies in the stack, explicitly say so.
 374  */
 375 void
 376 report_stack_exec(proc_t *p, caddr_t addr)
 377 {
 378         if (!noexec_user_stack_log)
 379                 return;
 380 
 381         if (addr < p->p_usrstack && addr >= (p->p_usrstack - p->p_stksize)) {
 382                 cmn_err(CE_NOTE, "%s[%d] attempt to execute code "
 383                     "on stack by uid %d", p->p_user.u_comm,
 384                     p->p_pid, crgetruid(p->p_cred));
 385         } else {
 386                 cmn_err(CE_NOTE, "%s[%d] attempt to execute non-executable "
 387                     "data at 0x%p by uid %d", p->p_user.u_comm,
 388                     p->p_pid, (void *) addr, crgetruid(p->p_cred));
 389         }
 390 
 391         delay(hz / 50);
 392 }
 393 
 394 /*
 395  * Determine whether the address range [addr, addr + len) is in memlist mp.
 396  */
 397 int
 398 address_in_memlist(struct memlist *mp, uint64_t addr, size_t len)
 399 {
 400         while (mp != 0)  {
 401                 if ((addr >= mp->ml_address) &&
 402                     (addr + len <= mp->ml_address + mp->ml_size))
 403                         return (1);      /* TRUE */
 404                 mp = mp->ml_next;
 405         }
 406         return (0);     /* FALSE */
 407 }
 408 
 409 /*
 410  * Pop the topmost element from the t_ontrap stack, removing the current set of
 411  * on_trap() protections.  Refer to <sys/ontrap.h> for more info.  If the
 412  * stack is already empty, no_trap() just returns.
 413  */
 414 void
 415 no_trap(void)
 416 {
 417         if (curthread->t_ontrap != NULL) {
 418 #ifdef __sparc
 419                 membar_sync(); /* deferred error barrier (see sparcv9_subr.s) */
 420 #endif
 421                 curthread->t_ontrap = curthread->t_ontrap->ot_prev;
 422         }
 423 }
 424 
 425 /*
 426  * Return utsname.nodename outside a zone, or the zone name within.
 427  */
 428 char *
 429 uts_nodename(void)
 430 {
 431         if (curproc == NULL)
 432                 return (utsname.nodename);
 433         return (curproc->p_zone->zone_nodename);
 434 }