1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  24  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  25  * Copyright 2017 RackTop Systems.
  26  */
  27 
  28 #include <assert.h>
  29 #include <fcntl.h>
  30 #include <poll.h>
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34 #include <zlib.h>
  35 #include <libgen.h>
  36 #include <sys/spa.h>
  37 #include <sys/stat.h>
  38 #include <sys/processor.h>
  39 #include <sys/zfs_context.h>
  40 #include <sys/rrwlock.h>
  41 #include <sys/zmod.h>
  42 #include <sys/utsname.h>
  43 #include <sys/systeminfo.h>
  44 
  45 extern void system_taskq_init(void);
  46 extern void system_taskq_fini(void);
  47 
  48 /*
  49  * Emulation of kernel services in userland.
  50  */
  51 
  52 pgcnt_t physmem;
  53 vnode_t *rootdir = (vnode_t *)0xabcd1234;
  54 char hw_serial[HW_HOSTID_LEN];
  55 kmutex_t cpu_lock;
  56 vmem_t *zio_arena = NULL;
  57 
  58 /* If set, all blocks read will be copied to the specified directory. */
  59 char *vn_dumpdir = NULL;
  60 
  61 struct utsname utsname = {
  62         "userland", "libzpool", "1", "1", "na"
  63 };
  64 
  65 /*
  66  * =========================================================================
  67  * vnode operations
  68  * =========================================================================
  69  */
  70 /*
  71  * Note: for the xxxat() versions of these functions, we assume that the
  72  * starting vp is always rootdir (which is true for spa_directory.c, the only
  73  * ZFS consumer of these interfaces).  We assert this is true, and then emulate
  74  * them by adding '/' in front of the path.
  75  */
  76 
  77 /*ARGSUSED*/
  78 int
  79 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
  80 {
  81         int fd;
  82         int dump_fd;
  83         vnode_t *vp;
  84         int old_umask;
  85         char realpath[MAXPATHLEN];
  86         struct stat64 st;
  87 
  88         /*
  89          * If we're accessing a real disk from userland, we need to use
  90          * the character interface to avoid caching.  This is particularly
  91          * important if we're trying to look at a real in-kernel storage
  92          * pool from userland, e.g. via zdb, because otherwise we won't
  93          * see the changes occurring under the segmap cache.
  94          * On the other hand, the stupid character device returns zero
  95          * for its size.  So -- gag -- we open the block device to get
  96          * its size, and remember it for subsequent VOP_GETATTR().
  97          */
  98         if (strncmp(path, "/dev/", 5) == 0) {
  99                 char *dsk;
 100                 fd = open64(path, O_RDONLY);
 101                 if (fd == -1)
 102                         return (errno);
 103                 if (fstat64(fd, &st) == -1) {
 104                         close(fd);
 105                         return (errno);
 106                 }
 107                 close(fd);
 108                 (void) sprintf(realpath, "%s", path);
 109                 dsk = strstr(path, "/dsk/");
 110                 if (dsk != NULL)
 111                         (void) sprintf(realpath + (dsk - path) + 1, "r%s",
 112                             dsk + 1);
 113         } else {
 114                 (void) sprintf(realpath, "%s", path);
 115                 if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
 116                         return (errno);
 117         }
 118 
 119         if (flags & FCREAT)
 120                 old_umask = umask(0);
 121 
 122         /*
 123          * The construct 'flags - FREAD' conveniently maps combinations of
 124          * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
 125          */
 126         fd = open64(realpath, flags - FREAD, mode);
 127 
 128         if (flags & FCREAT)
 129                 (void) umask(old_umask);
 130 
 131         if (vn_dumpdir != NULL) {
 132                 char dumppath[MAXPATHLEN];
 133                 (void) snprintf(dumppath, sizeof (dumppath),
 134                     "%s/%s", vn_dumpdir, basename(realpath));
 135                 dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
 136                 if (dump_fd == -1)
 137                         return (errno);
 138         } else {
 139                 dump_fd = -1;
 140         }
 141 
 142         if (fd == -1)
 143                 return (errno);
 144 
 145         if (fstat64(fd, &st) == -1) {
 146                 close(fd);
 147                 return (errno);
 148         }
 149 
 150         (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
 151 
 152         *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
 153 
 154         vp->v_fd = fd;
 155         vp->v_size = st.st_size;
 156         vp->v_path = spa_strdup(path);
 157         vp->v_dump_fd = dump_fd;
 158 
 159         return (0);
 160 }
 161 
 162 /*ARGSUSED*/
 163 int
 164 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
 165     int x3, vnode_t *startvp, int fd)
 166 {
 167         char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
 168         int ret;
 169 
 170         ASSERT(startvp == rootdir);
 171         (void) sprintf(realpath, "/%s", path);
 172 
 173         /* fd ignored for now, need if want to simulate nbmand support */
 174         ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
 175 
 176         umem_free(realpath, strlen(path) + 2);
 177 
 178         return (ret);
 179 }
 180 
 181 /*ARGSUSED*/
 182 int
 183 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
 184     int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
 185 {
 186         ssize_t iolen, split;
 187 
 188         if (uio == UIO_READ) {
 189                 iolen = pread64(vp->v_fd, addr, len, offset);
 190                 if (vp->v_dump_fd != -1) {
 191                         int status =
 192                             pwrite64(vp->v_dump_fd, addr, iolen, offset);
 193                         ASSERT(status != -1);
 194                 }
 195         } else {
 196                 /*
 197                  * To simulate partial disk writes, we split writes into two
 198                  * system calls so that the process can be killed in between.
 199                  */
 200                 int sectors = len >> SPA_MINBLOCKSHIFT;
 201                 split = (sectors > 0 ? rand() % sectors : 0) <<
 202                     SPA_MINBLOCKSHIFT;
 203                 iolen = pwrite64(vp->v_fd, addr, split, offset);
 204                 iolen += pwrite64(vp->v_fd, (char *)addr + split,
 205                     len - split, offset + split);
 206         }
 207 
 208         if (iolen == -1)
 209                 return (errno);
 210         if (residp)
 211                 *residp = len - iolen;
 212         else if (iolen != len)
 213                 return (EIO);
 214         return (0);
 215 }
 216 
 217 void
 218 vn_close(vnode_t *vp)
 219 {
 220         close(vp->v_fd);
 221         if (vp->v_dump_fd != -1)
 222                 close(vp->v_dump_fd);
 223         spa_strfree(vp->v_path);
 224         umem_free(vp, sizeof (vnode_t));
 225 }
 226 
 227 /*
 228  * At a minimum we need to update the size since vdev_reopen()
 229  * will no longer call vn_openat().
 230  */
 231 int
 232 fop_getattr(vnode_t *vp, vattr_t *vap)
 233 {
 234         struct stat64 st;
 235 
 236         if (fstat64(vp->v_fd, &st) == -1) {
 237                 close(vp->v_fd);
 238                 return (errno);
 239         }
 240 
 241         vap->va_size = st.st_size;
 242         return (0);
 243 }
 244 
 245 #ifdef ZFS_DEBUG
 246 
 247 /*
 248  * =========================================================================
 249  * Figure out which debugging statements to print
 250  * =========================================================================
 251  */
 252 
 253 static char *dprintf_string;
 254 static int dprintf_print_all;
 255 
 256 int
 257 dprintf_find_string(const char *string)
 258 {
 259         char *tmp_str = dprintf_string;
 260         int len = strlen(string);
 261 
 262         /*
 263          * Find out if this is a string we want to print.
 264          * String format: file1.c,function_name1,file2.c,file3.c
 265          */
 266 
 267         while (tmp_str != NULL) {
 268                 if (strncmp(tmp_str, string, len) == 0 &&
 269                     (tmp_str[len] == ',' || tmp_str[len] == '\0'))
 270                         return (1);
 271                 tmp_str = strchr(tmp_str, ',');
 272                 if (tmp_str != NULL)
 273                         tmp_str++; /* Get rid of , */
 274         }
 275         return (0);
 276 }
 277 
 278 void
 279 dprintf_setup(int *argc, char **argv)
 280 {
 281         int i, j;
 282 
 283         /*
 284          * Debugging can be specified two ways: by setting the
 285          * environment variable ZFS_DEBUG, or by including a
 286          * "debug=..."  argument on the command line.  The command
 287          * line setting overrides the environment variable.
 288          */
 289 
 290         for (i = 1; i < *argc; i++) {
 291                 int len = strlen("debug=");
 292                 /* First look for a command line argument */
 293                 if (strncmp("debug=", argv[i], len) == 0) {
 294                         dprintf_string = argv[i] + len;
 295                         /* Remove from args */
 296                         for (j = i; j < *argc; j++)
 297                                 argv[j] = argv[j+1];
 298                         argv[j] = NULL;
 299                         (*argc)--;
 300                 }
 301         }
 302 
 303         if (dprintf_string == NULL) {
 304                 /* Look for ZFS_DEBUG environment variable */
 305                 dprintf_string = getenv("ZFS_DEBUG");
 306         }
 307 
 308         /*
 309          * Are we just turning on all debugging?
 310          */
 311         if (dprintf_find_string("on"))
 312                 dprintf_print_all = 1;
 313 
 314         if (dprintf_string != NULL)
 315                 zfs_flags |= ZFS_DEBUG_DPRINTF;
 316 }
 317 
 318 /*
 319  * =========================================================================
 320  * debug printfs
 321  * =========================================================================
 322  */
 323 void
 324 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
 325 {
 326         const char *newfile;
 327         va_list adx;
 328 
 329         /*
 330          * Get rid of annoying "../common/" prefix to filename.
 331          */
 332         newfile = strrchr(file, '/');
 333         if (newfile != NULL) {
 334                 newfile = newfile + 1; /* Get rid of leading / */
 335         } else {
 336                 newfile = file;
 337         }
 338 
 339         if (dprintf_print_all ||
 340             dprintf_find_string(newfile) ||
 341             dprintf_find_string(func)) {
 342                 /* Print out just the function name if requested */
 343                 flockfile(stdout);
 344                 if (dprintf_find_string("pid"))
 345                         (void) printf("%d ", getpid());
 346                 if (dprintf_find_string("tid"))
 347                         (void) printf("%u ", thr_self());
 348                 if (dprintf_find_string("cpu"))
 349                         (void) printf("%u ", getcpuid());
 350                 if (dprintf_find_string("time"))
 351                         (void) printf("%llu ", gethrtime());
 352                 if (dprintf_find_string("long"))
 353                         (void) printf("%s, line %d: ", newfile, line);
 354                 (void) printf("%s: ", func);
 355                 va_start(adx, fmt);
 356                 (void) vprintf(fmt, adx);
 357                 va_end(adx);
 358                 funlockfile(stdout);
 359         }
 360 }
 361 
 362 #endif /* ZFS_DEBUG */
 363 
 364 /*
 365  * =========================================================================
 366  * kobj interfaces
 367  * =========================================================================
 368  */
 369 struct _buf *
 370 kobj_open_file(char *name)
 371 {
 372         struct _buf *file;
 373         vnode_t *vp;
 374 
 375         /* set vp as the _fd field of the file */
 376         if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
 377             -1) != 0)
 378                 return ((void *)-1UL);
 379 
 380         file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
 381         file->_fd = (intptr_t)vp;
 382         return (file);
 383 }
 384 
 385 int
 386 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
 387 {
 388         ssize_t resid;
 389 
 390         vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
 391             UIO_SYSSPACE, 0, 0, 0, &resid);
 392 
 393         return (size - resid);
 394 }
 395 
 396 void
 397 kobj_close_file(struct _buf *file)
 398 {
 399         vn_close((vnode_t *)file->_fd);
 400         umem_free(file, sizeof (struct _buf));
 401 }
 402 
 403 int
 404 kobj_get_filesize(struct _buf *file, uint64_t *size)
 405 {
 406         struct stat64 st;
 407         vnode_t *vp = (vnode_t *)file->_fd;
 408 
 409         if (fstat64(vp->v_fd, &st) == -1) {
 410                 vn_close(vp);
 411                 return (errno);
 412         }
 413         *size = st.st_size;
 414         return (0);
 415 }
 416 
 417 /*
 418  * =========================================================================
 419  * kernel emulation setup & teardown
 420  * =========================================================================
 421  */
 422 static int
 423 umem_out_of_memory(void)
 424 {
 425         char errmsg[] = "out of memory -- generating core dump\n";
 426 
 427         write(fileno(stderr), errmsg, sizeof (errmsg));
 428         abort();
 429         return (0);
 430 }
 431 
 432 void
 433 kernel_init(int mode)
 434 {
 435         extern uint_t rrw_tsd_key;
 436 
 437         umem_nofail_callback(umem_out_of_memory);
 438 
 439         physmem = sysconf(_SC_PHYS_PAGES);
 440 
 441         dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
 442             (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
 443 
 444         (void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
 445             (mode & FWRITE) ? gethostid() : 0);
 446 
 447         system_taskq_init();
 448 
 449         mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
 450 
 451         spa_init(mode);
 452 
 453         tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 454 }
 455 
 456 void
 457 kernel_fini(void)
 458 {
 459         spa_fini();
 460 
 461         system_taskq_fini();
 462 }
 463 
 464 int
 465 z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
 466 {
 467         int ret;
 468         uLongf len = *dstlen;
 469 
 470         if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
 471                 *dstlen = (size_t)len;
 472 
 473         return (ret);
 474 }
 475 
 476 int
 477 z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
 478     int level)
 479 {
 480         int ret;
 481         uLongf len = *dstlen;
 482 
 483         if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
 484                 *dstlen = (size_t)len;
 485 
 486         return (ret);
 487 }
 488 
 489 int
 490 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 491 {
 492         return (0);
 493 }
 494 
 495 int
 496 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 497 {
 498         return (0);
 499 }
 500 
 501 int
 502 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 503 {
 504         return (0);
 505 }
 506 
 507 /* ARGSUSED */
 508 int
 509 zfs_onexit_fd_hold(int fd, minor_t *minorp)
 510 {
 511         *minorp = 0;
 512         return (0);
 513 }
 514 
 515 /* ARGSUSED */
 516 void
 517 zfs_onexit_fd_rele(int fd)
 518 {
 519 }
 520 
 521 /* ARGSUSED */
 522 int
 523 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
 524     uint64_t *action_handle)
 525 {
 526         return (0);
 527 }
 528 
 529 /* ARGSUSED */
 530 int
 531 zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
 532 {
 533         return (0);
 534 }
 535 
 536 /* ARGSUSED */
 537 int
 538 zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
 539 {
 540         return (0);
 541 }