1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
  14  */
  15 
  16 /*
  17  * bootfs vnode operations
  18  */
  19 
  20 #include <sys/types.h>
  21 #include <sys/uio.h>
  22 #include <sys/sunddi.h>
  23 #include <sys/errno.h>
  24 #include <sys/vfs_opreg.h>
  25 #include <sys/vnode.h>
  26 #include <sys/mman.h>
  27 #include <fs/fs_subr.h>
  28 #include <sys/policy.h>
  29 #include <sys/sysmacros.h>
  30 #include <sys/dirent.h>
  31 #include <sys/uio.h>
  32 #include <vm/pvn.h>
  33 #include <vm/hat.h>
  34 #include <vm/seg_map.h>
  35 #include <vm/seg_vn.h>
  36 #include <sys/vmsystm.h>
  37 
  38 #include <sys/fs/bootfs_impl.h>
  39 
  40 struct vnodeops *bootfs_vnodeops;
  41 
  42 /*ARGSUSED*/
  43 static int
  44 bootfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
  45 {
  46         return (0);
  47 }
  48 
  49 /*ARGSUSED*/
  50 static int
  51 bootfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
  52     caller_context_t *ct)
  53 {
  54         return (0);
  55 }
  56 
  57 /*ARGSUSED*/
  58 static int
  59 bootfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
  60     caller_context_t *ct)
  61 {
  62         int err;
  63         ssize_t sres = uiop->uio_resid;
  64         bootfs_node_t *bnp = vp->v_data;
  65 
  66         if (vp->v_type == VDIR)
  67                 return (EISDIR);
  68 
  69         if (vp->v_type != VREG)
  70                 return (EINVAL);
  71 
  72         if (uiop->uio_loffset < 0)
  73                 return (EINVAL);
  74 
  75         if (uiop->uio_loffset >= bnp->bvn_size)
  76                 return (0);
  77 
  78         err = 0;
  79         while (uiop->uio_resid != 0) {
  80                 caddr_t base;
  81                 long offset, frem;
  82                 ulong_t poff, segoff;
  83                 size_t bytes;
  84                 int relerr;
  85 
  86                 offset = uiop->uio_loffset;
  87                 poff = offset & PAGEOFFSET;
  88                 bytes = MIN(PAGESIZE - poff, uiop->uio_resid);
  89 
  90                 frem = bnp->bvn_size - offset;
  91                 if (frem <= 0) {
  92                         err = 0;
  93                         break;
  94                 }
  95 
  96                 /* Don't read past EOF */
  97                 bytes = MIN(bytes, frem);
  98 
  99                 /*
 100                  * Segmaps are likely larger than our page size, so make sure we
 101                  * have the proper offfset into the resulting segmap data.
 102                  */
 103                 segoff = (offset & PAGEMASK) & MAXBOFFSET;
 104 
 105                 base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, bytes,
 106                     1, S_READ);
 107 
 108                 err = uiomove(base + segoff + poff, bytes, UIO_READ, uiop);
 109                 relerr = segmap_release(segkmap, base, 0);
 110 
 111                 if (err == 0)
 112                         err = relerr;
 113 
 114                 if (err != 0)
 115                         break;
 116         }
 117 
 118         /* Even if we had an error in a partial read, return success */
 119         if (uiop->uio_resid > sres)
 120                 err = 0;
 121 
 122         gethrestime(&bnp->bvn_attr.va_atime);
 123 
 124         return (err);
 125 }
 126 
 127 /*ARGSUSED*/
 128 static int
 129 bootfs_ioctl(vnode_t *vp, int cmd, intptr_t data, int flag,
 130     cred_t *cr, int *rvalp, caller_context_t *ct)
 131 {
 132         return (ENOTTY);
 133 }
 134 
 135 /*ARGSUSED*/
 136 static int
 137 bootfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
 138     caller_context_t *ct)
 139 {
 140         uint32_t mask;
 141         bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data;
 142 
 143         mask = vap->va_mask;
 144         bcopy(&bpn->bvn_attr, vap, sizeof (vattr_t));
 145         vap->va_mask = mask;
 146         return (0);
 147 }
 148 
 149 /*ARGSUSED*/
 150 static int
 151 bootfs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
 152     caller_context_t *ct)
 153 {
 154         int shift = 0;
 155         bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data;
 156 
 157         if (crgetuid(cr) != bpn->bvn_attr.va_uid) {
 158                 shift += 3;
 159                 if (groupmember(bpn->bvn_attr.va_gid, cr) == 0)
 160                         shift += 3;
 161         }
 162 
 163         return (secpolicy_vnode_access2(cr, vp, bpn->bvn_attr.va_uid,
 164             bpn->bvn_attr.va_mode << shift, mode));
 165 }
 166 
 167 /*ARGSUSED*/
 168 static int
 169 bootfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
 170     int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
 171     int *direntflags, pathname_t *realpnp)
 172 {
 173         avl_index_t where;
 174         bootfs_node_t sn, *bnp;
 175         bootfs_node_t *bpp = (bootfs_node_t *)dvp->v_data;
 176 
 177         if (flags & LOOKUP_XATTR)
 178                 return (EINVAL);
 179 
 180         if (bpp->bvn_attr.va_type != VDIR)
 181                 return (ENOTDIR);
 182 
 183         if (*nm == '\0' || strcmp(nm, ".") == 0) {
 184                 VN_HOLD(dvp);
 185                 *vpp = dvp;
 186                 return (0);
 187         }
 188 
 189         if (strcmp(nm, "..") == 0) {
 190                 VN_HOLD(bpp->bvn_parent->bvn_vnp);
 191                 *vpp = bpp->bvn_parent->bvn_vnp;
 192                 return (0);
 193         }
 194 
 195         sn.bvn_name = nm;
 196         bnp = avl_find(&bpp->bvn_dir, &sn, &where);
 197         if (bnp == NULL)
 198                 return (ENOENT);
 199 
 200         VN_HOLD(bnp->bvn_vnp);
 201         *vpp = bnp->bvn_vnp;
 202         return (0);
 203 }
 204 
 205 /*ARGSUSED*/
 206 static int
 207 bootfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
 208     caller_context_t *ct, int flags)
 209 {
 210         bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data;
 211         dirent64_t *dp;
 212         void *buf;
 213         ulong_t bsize, brem;
 214         offset_t coff, roff;
 215         int dlen, ret;
 216         bootfs_node_t *dnp;
 217         boolean_t first = B_TRUE;
 218 
 219         if (uiop->uio_loffset >= MAXOFF_T) {
 220                 if (eofp != NULL)
 221                         *eofp = 1;
 222                 return (0);
 223         }
 224 
 225         if (uiop->uio_iovcnt != 1)
 226                 return (EINVAL);
 227 
 228         if (!(uiop->uio_iov->iov_len > 0))
 229                 return (EINVAL);
 230 
 231         if (vp->v_type != VDIR)
 232                 return (ENOTDIR);
 233 
 234         roff = uiop->uio_loffset;
 235         coff = 0;
 236         brem = bsize = uiop->uio_iov->iov_len;
 237         buf = kmem_alloc(bsize, KM_SLEEP);
 238         dp = buf;
 239 
 240         /*
 241          * Recall that offsets here are done based on the name of the dirent
 242          * excluding the null terminator. Therefore `.` is always at 0, `..` is
 243          * always at 1, and then the first real dirent is at 3. This offset is
 244          * what's actually stored when we update the offset in the structure.
 245          */
 246         if (roff == 0) {
 247                 dlen = DIRENT64_RECLEN(1);
 248                 if (first == B_TRUE) {
 249                         if (dlen > brem) {
 250                                 kmem_free(buf, bsize);
 251                                 return (EINVAL);
 252                         }
 253                         first = B_FALSE;
 254                 }
 255                 dp->d_ino = (ino64_t)bnp->bvn_attr.va_nodeid;
 256                 dp->d_off = 0;
 257                 dp->d_reclen = (ushort_t)dlen;
 258                 (void) strncpy(dp->d_name, ".", DIRENT64_NAMELEN(dlen));
 259                 dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
 260                 brem -= dlen;
 261         }
 262 
 263         if (roff <= 1) {
 264                 dlen = DIRENT64_RECLEN(2);
 265                 if (first == B_TRUE) {
 266                         if (dlen > brem) {
 267                                 kmem_free(buf, bsize);
 268                                 return (EINVAL);
 269                         }
 270                         first = B_FALSE;
 271                 }
 272                 dp->d_ino = (ino64_t)bnp->bvn_parent->bvn_attr.va_nodeid;
 273                 dp->d_off = 1;
 274                 dp->d_reclen = (ushort_t)dlen;
 275                 (void) strncpy(dp->d_name, "..", DIRENT64_NAMELEN(dlen));
 276                 dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
 277                 brem -= dlen;
 278         }
 279 
 280         coff = 3;
 281         for (dnp = avl_first(&bnp->bvn_dir); dnp != NULL;
 282             dnp = AVL_NEXT(&bnp->bvn_dir, dnp)) {
 283                 size_t nlen = strlen(dnp->bvn_name);
 284 
 285                 if (roff > coff) {
 286                         coff += nlen;
 287                         continue;
 288                 }
 289 
 290                 dlen = DIRENT64_RECLEN(nlen);
 291                 if (dlen > brem) {
 292                         if (first == B_TRUE) {
 293                                 kmem_free(buf, bsize);
 294                                 return (EINVAL);
 295                         }
 296                         break;
 297                 }
 298                 first = B_FALSE;
 299 
 300                 dp->d_ino = (ino64_t)dnp->bvn_attr.va_nodeid;
 301                 dp->d_off = coff;
 302                 dp->d_reclen = (ushort_t)dlen;
 303                 (void) strncpy(dp->d_name, dnp->bvn_name,
 304                     DIRENT64_NAMELEN(dlen));
 305                 dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
 306                 brem -= dlen;
 307                 coff += nlen;
 308         }
 309 
 310         ret = uiomove(buf, (bsize - brem), UIO_READ, uiop);
 311 
 312         if (ret == 0) {
 313                 if (dnp == NULL) {
 314                         coff++;
 315                         if (eofp != NULL)
 316                                 *eofp = 1;
 317                 } else if (eofp != NULL) {
 318                         *eofp = 0;
 319                 }
 320                 uiop->uio_loffset = coff;
 321         }
 322         gethrestime(&bnp->bvn_attr.va_atime);
 323         kmem_free(buf, bsize);
 324         return (ret);
 325 }
 326 
 327 /*ARGSUSED*/
 328 static void
 329 bootfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
 330 {
 331 }
 332 
 333 /*ARGSUSED*/
 334 static int
 335 bootfs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
 336 {
 337         if (write_lock != 0)
 338                 return (EINVAL);
 339         return (0);
 340 }
 341 
 342 /*ARGSUSED*/
 343 static void
 344 bootfs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
 345 {
 346 }
 347 
 348 /*ARGSUSED*/
 349 static int
 350 bootfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp,
 351     caller_context_t *ct)
 352 {
 353         bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data;
 354         if (vp->v_type == VDIR)
 355                 return (0);
 356         return ((*noffp < 0 || *noffp > bnp->bvn_size ? EINVAL : 0));
 357 }
 358 
 359 /*
 360  * We need to fill in a single page of a vnode's memory based on the actual data
 361  * from the kernel. We'll use this node's sliding window into physical memory
 362  * and update one page at a time.
 363  */
 364 /*ARGSUSED*/
 365 static int
 366 bootfs_getapage(vnode_t *vp, u_offset_t off, size_t len, uint_t *protp,
 367     page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
 368     cred_t *cr)
 369 {
 370         bootfs_node_t *bnp = vp->v_data;
 371         page_t *pp, *fpp;
 372         pfn_t pfn;
 373 
 374         for (;;) {
 375                 /* Easy case where the page exists */
 376                 pp = page_lookup(vp, off, rw == S_CREATE ? SE_EXCL : SE_SHARED);
 377                 if (pp != NULL) {
 378                         if (pl != NULL) {
 379                                 pl[0] = pp;
 380                                 pl[1] = NULL;
 381                         } else {
 382                                 page_unlock(pp);
 383                         }
 384                         return (0);
 385                 }
 386 
 387                 pp = page_create_va(vp, off, PAGESIZE, PG_EXCL | PG_WAIT, seg,
 388                     addr);
 389 
 390                 /*
 391                  * If we didn't get the page, that means someone else beat us to
 392                  * creating this so we need to try again.
 393                  */
 394                 if (pp != NULL)
 395                         break;
 396         }
 397 
 398         pfn = btop((bnp->bvn_addr + off) & PAGEMASK);
 399         fpp = page_numtopp_nolock(pfn);
 400 
 401         if (ppcopy(fpp, pp) == 0) {
 402                 pvn_read_done(pp, B_ERROR);
 403                 return (EIO);
 404         }
 405 
 406         if (pl != NULL) {
 407                 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
 408         } else {
 409                 pvn_io_done(pp);
 410         }
 411 
 412         return (0);
 413 }
 414 
 415 /*ARGSUSED*/
 416 static int
 417 bootfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
 418     page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
 419     cred_t *cr, caller_context_t *ct)
 420 {
 421         int err;
 422         bootfs_node_t *bnp = vp->v_data;
 423 
 424         if (off + len > bnp->bvn_size + PAGEOFFSET)
 425                 return (EFAULT);
 426 
 427         if (protp != NULL)
 428                 *protp = PROT_ALL;
 429 
 430         if (len <= PAGESIZE)
 431                 err = bootfs_getapage(vp, (u_offset_t)off, len, protp, pl,
 432                     plsz, seg, addr, rw, cr);
 433         else
 434                 err = pvn_getpages(bootfs_getapage, vp, (u_offset_t)off, len,
 435                     protp, pl, plsz, seg, addr, rw, cr);
 436 
 437         return (err);
 438 }
 439 
 440 /*ARGSUSED*/
 441 static int
 442 bootfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
 443     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
 444     caller_context_t *ct)
 445 {
 446         int ret;
 447         segvn_crargs_t vn_a;
 448 
 449 #ifdef  _ILP32
 450         if (len > MAXOFF_T)
 451                 return (ENOMEM);
 452 #endif
 453 
 454         if (vp->v_flag & VNOMAP)
 455                 return (ENOSYS);
 456 
 457         if (off < 0 || off > MAXOFFSET_T - off)
 458                 return (ENXIO);
 459 
 460         if (vp->v_type != VREG)
 461                 return (ENODEV);
 462 
 463         if ((prot & PROT_WRITE) && (flags & MAP_SHARED))
 464                 return (ENOTSUP);
 465 
 466         as_rangelock(as);
 467         ret = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
 468         if (ret != 0) {
 469                 as_rangeunlock(as);
 470                 return (ret);
 471         }
 472 
 473         vn_a.vp = vp;
 474         vn_a.offset = (u_offset_t)off;
 475         vn_a.type = flags & MAP_TYPE;
 476         vn_a.prot = prot;
 477         vn_a.maxprot = maxprot;
 478         vn_a.cred = cr;
 479         vn_a.amp = NULL;
 480         vn_a.flags = flags & ~MAP_TYPE;
 481         vn_a.szc = 0;
 482         vn_a.lgrp_mem_policy_flags = 0;
 483 
 484         ret = as_map(as, *addrp, len, segvn_create, &vn_a);
 485 
 486         as_rangeunlock(as);
 487         return (ret);
 488 
 489 }
 490 
 491 /*ARGSUSED*/
 492 static int
 493 bootfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
 494     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
 495     caller_context_t *ct)
 496 {
 497         return (0);
 498 }
 499 
 500 /*ARGSUSED*/
 501 static int
 502 bootfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
 503     size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
 504     caller_context_t *ct)
 505 {
 506         return (0);
 507 }
 508 
 509 static int
 510 bootfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
 511     caller_context_t *ct)
 512 {
 513         int ret;
 514 
 515         switch (cmd) {
 516         case _PC_TIMESTAMP_RESOLUTION:
 517                 *valp = 1L;
 518                 ret = 0;
 519                 break;
 520         default:
 521                 ret = fs_pathconf(vp, cmd, valp, cr, ct);
 522         }
 523 
 524         return (ret);
 525 }
 526 
 527 const fs_operation_def_t bootfs_vnodeops_template[] = {
 528         VOPNAME_OPEN,           { .vop_open = bootfs_open },
 529         VOPNAME_CLOSE,          { .vop_close = bootfs_close },
 530         VOPNAME_READ,           { .vop_read = bootfs_read },
 531         VOPNAME_IOCTL,          { .vop_ioctl = bootfs_ioctl },
 532         VOPNAME_GETATTR,        { .vop_getattr = bootfs_getattr },
 533         VOPNAME_ACCESS,         { .vop_access = bootfs_access },
 534         VOPNAME_LOOKUP,         { .vop_lookup = bootfs_lookup },
 535         VOPNAME_READDIR,        { .vop_readdir = bootfs_readdir },
 536         VOPNAME_INACTIVE,       { .vop_inactive = bootfs_inactive },
 537         VOPNAME_RWLOCK,         { .vop_rwlock = bootfs_rwlock },
 538         VOPNAME_RWUNLOCK,       { .vop_rwunlock = bootfs_rwunlock },
 539         VOPNAME_SEEK,           { .vop_seek = bootfs_seek },
 540         VOPNAME_GETPAGE,        { .vop_getpage = bootfs_getpage },
 541         VOPNAME_MAP,            { .vop_map = bootfs_map },
 542         VOPNAME_ADDMAP,         { .vop_addmap = bootfs_addmap },
 543         VOPNAME_DELMAP,         { .vop_delmap = bootfs_delmap },
 544         VOPNAME_PATHCONF,       { .vop_pathconf = bootfs_pathconf },
 545         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_nosupport },
 546         NULL,                   NULL
 547 };