1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright (c) 2015 Joyent, Inc. All rights reserved. 14 */ 15 16 /* 17 * bootfs vnode operations 18 */ 19 20 #include <sys/types.h> 21 #include <sys/uio.h> 22 #include <sys/sunddi.h> 23 #include <sys/errno.h> 24 #include <sys/vfs_opreg.h> 25 #include <sys/vnode.h> 26 #include <sys/mman.h> 27 #include <fs/fs_subr.h> 28 #include <sys/policy.h> 29 #include <sys/sysmacros.h> 30 #include <sys/dirent.h> 31 #include <sys/uio.h> 32 #include <vm/pvn.h> 33 #include <vm/hat.h> 34 #include <vm/seg_map.h> 35 #include <vm/seg_vn.h> 36 #include <sys/vmsystm.h> 37 38 #include <sys/fs/bootfs_impl.h> 39 40 struct vnodeops *bootfs_vnodeops; 41 42 /*ARGSUSED*/ 43 static int 44 bootfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 45 { 46 return (0); 47 } 48 49 /*ARGSUSED*/ 50 static int 51 bootfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 52 caller_context_t *ct) 53 { 54 return (0); 55 } 56 57 /*ARGSUSED*/ 58 static int 59 bootfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr, 60 caller_context_t *ct) 61 { 62 int err; 63 ssize_t sres = uiop->uio_resid; 64 bootfs_node_t *bnp = vp->v_data; 65 66 if (vp->v_type == VDIR) 67 return (EISDIR); 68 69 if (vp->v_type != VREG) 70 return (EINVAL); 71 72 if (uiop->uio_loffset < 0) 73 return (EINVAL); 74 75 if (uiop->uio_loffset >= bnp->bvn_size) 76 return (0); 77 78 err = 0; 79 while (uiop->uio_resid != 0) { 80 caddr_t base; 81 long offset, frem; 82 ulong_t poff, segoff; 83 size_t bytes; 84 int relerr; 85 86 offset = uiop->uio_loffset; 87 poff = offset & PAGEOFFSET; 88 bytes = MIN(PAGESIZE - poff, uiop->uio_resid); 89 90 frem = bnp->bvn_size - offset; 91 if (frem <= 0) { 92 err = 0; 93 break; 94 } 95 96 /* Don't read past EOF */ 97 bytes = MIN(bytes, frem); 98 99 /* 100 * Segmaps are likely larger than our page size, so make sure we 101 * have the proper offfset into the resulting segmap data. 102 */ 103 segoff = (offset & PAGEMASK) & MAXBOFFSET; 104 105 base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, bytes, 106 1, S_READ); 107 108 err = uiomove(base + segoff + poff, bytes, UIO_READ, uiop); 109 relerr = segmap_release(segkmap, base, 0); 110 111 if (err == 0) 112 err = relerr; 113 114 if (err != 0) 115 break; 116 } 117 118 /* Even if we had an error in a partial read, return success */ 119 if (uiop->uio_resid > sres) 120 err = 0; 121 122 gethrestime(&bnp->bvn_attr.va_atime); 123 124 return (err); 125 } 126 127 /*ARGSUSED*/ 128 static int 129 bootfs_ioctl(vnode_t *vp, int cmd, intptr_t data, int flag, 130 cred_t *cr, int *rvalp, caller_context_t *ct) 131 { 132 return (ENOTTY); 133 } 134 135 /*ARGSUSED*/ 136 static int 137 bootfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 138 caller_context_t *ct) 139 { 140 uint32_t mask; 141 bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data; 142 143 mask = vap->va_mask; 144 bcopy(&bpn->bvn_attr, vap, sizeof (vattr_t)); 145 vap->va_mask = mask; 146 return (0); 147 } 148 149 /*ARGSUSED*/ 150 static int 151 bootfs_access(vnode_t *vp, int mode, int flags, cred_t *cr, 152 caller_context_t *ct) 153 { 154 int shift = 0; 155 bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data; 156 157 if (crgetuid(cr) != bpn->bvn_attr.va_uid) { 158 shift += 3; 159 if (groupmember(bpn->bvn_attr.va_gid, cr) == 0) 160 shift += 3; 161 } 162 163 return (secpolicy_vnode_access2(cr, vp, bpn->bvn_attr.va_uid, 164 bpn->bvn_attr.va_mode << shift, mode)); 165 } 166 167 /*ARGSUSED*/ 168 static int 169 bootfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, 170 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 171 int *direntflags, pathname_t *realpnp) 172 { 173 avl_index_t where; 174 bootfs_node_t sn, *bnp; 175 bootfs_node_t *bpp = (bootfs_node_t *)dvp->v_data; 176 177 if (flags & LOOKUP_XATTR) 178 return (EINVAL); 179 180 if (bpp->bvn_attr.va_type != VDIR) 181 return (ENOTDIR); 182 183 if (*nm == '\0' || strcmp(nm, ".") == 0) { 184 VN_HOLD(dvp); 185 *vpp = dvp; 186 return (0); 187 } 188 189 if (strcmp(nm, "..") == 0) { 190 VN_HOLD(bpp->bvn_parent->bvn_vnp); 191 *vpp = bpp->bvn_parent->bvn_vnp; 192 return (0); 193 } 194 195 sn.bvn_name = nm; 196 bnp = avl_find(&bpp->bvn_dir, &sn, &where); 197 if (bnp == NULL) 198 return (ENOENT); 199 200 VN_HOLD(bnp->bvn_vnp); 201 *vpp = bnp->bvn_vnp; 202 return (0); 203 } 204 205 /*ARGSUSED*/ 206 static int 207 bootfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp, 208 caller_context_t *ct, int flags) 209 { 210 bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data; 211 dirent64_t *dp; 212 void *buf; 213 ulong_t bsize, brem; 214 offset_t coff, roff; 215 int dlen, ret; 216 bootfs_node_t *dnp; 217 boolean_t first = B_TRUE; 218 219 if (uiop->uio_loffset >= MAXOFF_T) { 220 if (eofp != NULL) 221 *eofp = 1; 222 return (0); 223 } 224 225 if (uiop->uio_iovcnt != 1) 226 return (EINVAL); 227 228 if (!(uiop->uio_iov->iov_len > 0)) 229 return (EINVAL); 230 231 if (vp->v_type != VDIR) 232 return (ENOTDIR); 233 234 roff = uiop->uio_loffset; 235 coff = 0; 236 brem = bsize = uiop->uio_iov->iov_len; 237 buf = kmem_alloc(bsize, KM_SLEEP); 238 dp = buf; 239 240 /* 241 * Recall that offsets here are done based on the name of the dirent 242 * excluding the null terminator. Therefore `.` is always at 0, `..` is 243 * always at 1, and then the first real dirent is at 3. This offset is 244 * what's actually stored when we update the offset in the structure. 245 */ 246 if (roff == 0) { 247 dlen = DIRENT64_RECLEN(1); 248 if (first == B_TRUE) { 249 if (dlen > brem) { 250 kmem_free(buf, bsize); 251 return (EINVAL); 252 } 253 first = B_FALSE; 254 } 255 dp->d_ino = (ino64_t)bnp->bvn_attr.va_nodeid; 256 dp->d_off = 0; 257 dp->d_reclen = (ushort_t)dlen; 258 (void) strncpy(dp->d_name, ".", DIRENT64_NAMELEN(dlen)); 259 dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen); 260 brem -= dlen; 261 } 262 263 if (roff <= 1) { 264 dlen = DIRENT64_RECLEN(2); 265 if (first == B_TRUE) { 266 if (dlen > brem) { 267 kmem_free(buf, bsize); 268 return (EINVAL); 269 } 270 first = B_FALSE; 271 } 272 dp->d_ino = (ino64_t)bnp->bvn_parent->bvn_attr.va_nodeid; 273 dp->d_off = 1; 274 dp->d_reclen = (ushort_t)dlen; 275 (void) strncpy(dp->d_name, "..", DIRENT64_NAMELEN(dlen)); 276 dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen); 277 brem -= dlen; 278 } 279 280 coff = 3; 281 for (dnp = avl_first(&bnp->bvn_dir); dnp != NULL; 282 dnp = AVL_NEXT(&bnp->bvn_dir, dnp)) { 283 size_t nlen = strlen(dnp->bvn_name); 284 285 if (roff > coff) { 286 coff += nlen; 287 continue; 288 } 289 290 dlen = DIRENT64_RECLEN(nlen); 291 if (dlen > brem) { 292 if (first == B_TRUE) { 293 kmem_free(buf, bsize); 294 return (EINVAL); 295 } 296 break; 297 } 298 first = B_FALSE; 299 300 dp->d_ino = (ino64_t)dnp->bvn_attr.va_nodeid; 301 dp->d_off = coff; 302 dp->d_reclen = (ushort_t)dlen; 303 (void) strncpy(dp->d_name, dnp->bvn_name, 304 DIRENT64_NAMELEN(dlen)); 305 dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen); 306 brem -= dlen; 307 coff += nlen; 308 } 309 310 ret = uiomove(buf, (bsize - brem), UIO_READ, uiop); 311 312 if (ret == 0) { 313 if (dnp == NULL) { 314 coff++; 315 if (eofp != NULL) 316 *eofp = 1; 317 } else if (eofp != NULL) { 318 *eofp = 0; 319 } 320 uiop->uio_loffset = coff; 321 } 322 gethrestime(&bnp->bvn_attr.va_atime); 323 kmem_free(buf, bsize); 324 return (ret); 325 } 326 327 /*ARGSUSED*/ 328 static void 329 bootfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 330 { 331 } 332 333 /*ARGSUSED*/ 334 static int 335 bootfs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct) 336 { 337 if (write_lock != 0) 338 return (EINVAL); 339 return (0); 340 } 341 342 /*ARGSUSED*/ 343 static void 344 bootfs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct) 345 { 346 } 347 348 /*ARGSUSED*/ 349 static int 350 bootfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 351 caller_context_t *ct) 352 { 353 bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data; 354 if (vp->v_type == VDIR) 355 return (0); 356 return ((*noffp < 0 || *noffp > bnp->bvn_size ? EINVAL : 0)); 357 } 358 359 /* 360 * We need to fill in a single page of a vnode's memory based on the actual data 361 * from the kernel. We'll use this node's sliding window into physical memory 362 * and update one page at a time. 363 */ 364 /*ARGSUSED*/ 365 static int 366 bootfs_getapage(vnode_t *vp, u_offset_t off, size_t len, uint_t *protp, 367 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw, 368 cred_t *cr) 369 { 370 bootfs_node_t *bnp = vp->v_data; 371 page_t *pp, *fpp; 372 pfn_t pfn; 373 374 for (;;) { 375 /* Easy case where the page exists */ 376 pp = page_lookup(vp, off, rw == S_CREATE ? SE_EXCL : SE_SHARED); 377 if (pp != NULL) { 378 if (pl != NULL) { 379 pl[0] = pp; 380 pl[1] = NULL; 381 } else { 382 page_unlock(pp); 383 } 384 return (0); 385 } 386 387 pp = page_create_va(vp, off, PAGESIZE, PG_EXCL | PG_WAIT, seg, 388 addr); 389 390 /* 391 * If we didn't get the page, that means someone else beat us to 392 * creating this so we need to try again. 393 */ 394 if (pp != NULL) 395 break; 396 } 397 398 pfn = btop((bnp->bvn_addr + off) & PAGEMASK); 399 fpp = page_numtopp_nolock(pfn); 400 401 if (ppcopy(fpp, pp) == 0) { 402 pvn_read_done(pp, B_ERROR); 403 return (EIO); 404 } 405 406 if (pl != NULL) { 407 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw); 408 } else { 409 pvn_io_done(pp); 410 } 411 412 return (0); 413 } 414 415 /*ARGSUSED*/ 416 static int 417 bootfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 418 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw, 419 cred_t *cr, caller_context_t *ct) 420 { 421 int err; 422 bootfs_node_t *bnp = vp->v_data; 423 424 if (off + len > bnp->bvn_size + PAGEOFFSET) 425 return (EFAULT); 426 427 if (protp != NULL) 428 *protp = PROT_ALL; 429 430 if (len <= PAGESIZE) 431 err = bootfs_getapage(vp, (u_offset_t)off, len, protp, pl, 432 plsz, seg, addr, rw, cr); 433 else 434 err = pvn_getpages(bootfs_getapage, vp, (u_offset_t)off, len, 435 protp, pl, plsz, seg, addr, rw, cr); 436 437 return (err); 438 } 439 440 /*ARGSUSED*/ 441 static int 442 bootfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 443 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 444 caller_context_t *ct) 445 { 446 int ret; 447 segvn_crargs_t vn_a; 448 449 #ifdef _ILP32 450 if (len > MAXOFF_T) 451 return (ENOMEM); 452 #endif 453 454 if (vp->v_flag & VNOMAP) 455 return (ENOSYS); 456 457 if (off < 0 || off > MAXOFFSET_T - off) 458 return (ENXIO); 459 460 if (vp->v_type != VREG) 461 return (ENODEV); 462 463 if ((prot & PROT_WRITE) && (flags & MAP_SHARED)) 464 return (ENOTSUP); 465 466 as_rangelock(as); 467 ret = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 468 if (ret != 0) { 469 as_rangeunlock(as); 470 return (ret); 471 } 472 473 vn_a.vp = vp; 474 vn_a.offset = (u_offset_t)off; 475 vn_a.type = flags & MAP_TYPE; 476 vn_a.prot = prot; 477 vn_a.maxprot = maxprot; 478 vn_a.cred = cr; 479 vn_a.amp = NULL; 480 vn_a.flags = flags & ~MAP_TYPE; 481 vn_a.szc = 0; 482 vn_a.lgrp_mem_policy_flags = 0; 483 484 ret = as_map(as, *addrp, len, segvn_create, &vn_a); 485 486 as_rangeunlock(as); 487 return (ret); 488 489 } 490 491 /*ARGSUSED*/ 492 static int 493 bootfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 494 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 495 caller_context_t *ct) 496 { 497 return (0); 498 } 499 500 /*ARGSUSED*/ 501 static int 502 bootfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 503 size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 504 caller_context_t *ct) 505 { 506 return (0); 507 } 508 509 static int 510 bootfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 511 caller_context_t *ct) 512 { 513 int ret; 514 515 switch (cmd) { 516 case _PC_TIMESTAMP_RESOLUTION: 517 *valp = 1L; 518 ret = 0; 519 break; 520 default: 521 ret = fs_pathconf(vp, cmd, valp, cr, ct); 522 } 523 524 return (ret); 525 } 526 527 const fs_operation_def_t bootfs_vnodeops_template[] = { 528 { VOPNAME_OPEN, { .vop_open = bootfs_open } }, 529 { VOPNAME_CLOSE, { .vop_close = bootfs_close } }, 530 { VOPNAME_READ, { .vop_read = bootfs_read } }, 531 { VOPNAME_IOCTL, { .vop_ioctl = bootfs_ioctl } }, 532 { VOPNAME_GETATTR, { .vop_getattr = bootfs_getattr } }, 533 { VOPNAME_ACCESS, { .vop_access = bootfs_access } }, 534 { VOPNAME_LOOKUP, { .vop_lookup = bootfs_lookup } }, 535 { VOPNAME_READDIR, { .vop_readdir = bootfs_readdir } }, 536 { VOPNAME_INACTIVE, { .vop_inactive = bootfs_inactive } }, 537 { VOPNAME_RWLOCK, { .vop_rwlock = bootfs_rwlock } }, 538 { VOPNAME_RWUNLOCK, { .vop_rwunlock = bootfs_rwunlock } }, 539 { VOPNAME_SEEK, { .vop_seek = bootfs_seek } }, 540 { VOPNAME_GETPAGE, { .vop_getpage = bootfs_getpage } }, 541 { VOPNAME_MAP, { .vop_map = bootfs_map } }, 542 { VOPNAME_ADDMAP, { .vop_addmap = bootfs_addmap } }, 543 { VOPNAME_DELMAP, { .vop_delmap = bootfs_delmap } }, 544 { VOPNAME_PATHCONF, { .vop_pathconf = bootfs_pathconf } }, 545 { VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_nosupport } }, 546 { NULL, { NULL } } 547 };