1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2012, Joyent, Inc. All rights reserved. 23 */ 24 25 #include <sys/types.h> 26 #include <sys/param.h> 27 #include <sys/sysmacros.h> 28 #include <sys/systm.h> 29 #include <sys/time.h> 30 #include <sys/vfs.h> 31 #include <sys/vnode.h> 32 #include <sys/errno.h> 33 #include <sys/cmn_err.h> 34 #include <sys/cred.h> 35 #include <sys/stat.h> 36 #include <sys/policy.h> 37 #include <sys/fs/hyprlofs_info.h> 38 39 static int hldir_make_hlnode(hlnode_t *, hlfsmount_t *, vattr_t *, enum de_op, 40 vnode_t *, hlnode_t **, cred_t *); 41 static int hldiraddentry(hlnode_t *, hlnode_t *, char *); 42 43 44 #define HL_HASH_SIZE 8192 /* must be power of 2 */ 45 #define HL_MUTEX_SIZE 64 46 47 static hldirent_t *hl_hashtable[HL_HASH_SIZE]; 48 static kmutex_t hl_hashmutex[HL_MUTEX_SIZE]; 49 50 #define HL_HASH_INDEX(a) ((a) & (HL_HASH_SIZE-1)) 51 #define HL_MUTEX_INDEX(a) ((a) & (HL_MUTEX_SIZE-1)) 52 53 #define HYPRLOFS_HASH(tp, name, hash) \ 54 { \ 55 char Xc, *Xcp; \ 56 hash = (uint_t)(uintptr_t)(tp) >> 8; \ 57 for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \ 58 hash = (hash << 4) + hash + (uint_t)Xc; \ 59 } 60 61 void 62 hyprlofs_hash_init(void) 63 { 64 int ix; 65 66 for (ix = 0; ix < HL_MUTEX_SIZE; ix++) 67 mutex_init(&hl_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL); 68 } 69 70 static void 71 hyprlofs_hash_in(hldirent_t *h) 72 { 73 uint_t hash; 74 hldirent_t **prevpp; 75 kmutex_t *hmtx; 76 77 HYPRLOFS_HASH(h->hld_parent, h->hld_name, hash); 78 h->hld_hash = hash; 79 prevpp = &hl_hashtable[HL_HASH_INDEX(hash)]; 80 hmtx = &hl_hashmutex[HL_MUTEX_INDEX(hash)]; 81 mutex_enter(hmtx); 82 h->hld_link = *prevpp; 83 *prevpp = h; 84 mutex_exit(hmtx); 85 } 86 87 /* Remove hldirent *h from the hash list. */ 88 static void 89 hyprlofs_hash_out(hldirent_t *h) 90 { 91 uint_t hash; 92 hldirent_t **prevpp; 93 kmutex_t *hmtx; 94 95 hash = h->hld_hash; 96 prevpp = &hl_hashtable[HL_HASH_INDEX(hash)]; 97 hmtx = &hl_hashmutex[HL_MUTEX_INDEX(hash)]; 98 mutex_enter(hmtx); 99 while (*prevpp != h) 100 prevpp = &(*prevpp)->hld_link; 101 *prevpp = h->hld_link; 102 mutex_exit(hmtx); 103 } 104 105 static hldirent_t * 106 hyprlofs_hash_lookup(char *name, hlnode_t *parent, uint_t hold, 107 hlnode_t **found) 108 { 109 hldirent_t *l; 110 uint_t hash; 111 kmutex_t *hmtx; 112 hlnode_t *hnp; 113 114 HYPRLOFS_HASH(parent, name, hash); 115 hmtx = &hl_hashmutex[HL_MUTEX_INDEX(hash)]; 116 mutex_enter(hmtx); 117 l = hl_hashtable[HL_HASH_INDEX(hash)]; 118 while (l) { 119 if (l->hld_hash == hash && l->hld_parent == parent && 120 strcmp(l->hld_name, name) == 0) { 121 /* 122 * Ensure that the hlnode that we put a hold on is the 123 * same one that we pass back. Thus the temp. var 124 * hnp is necessary. 125 */ 126 hnp = l->hld_hlnode; 127 if (hold) { 128 ASSERT(hnp); 129 hlnode_hold(hnp); 130 } 131 if (found) 132 *found = hnp; 133 mutex_exit(hmtx); 134 return (l); 135 } else { 136 l = l->hld_link; 137 } 138 } 139 mutex_exit(hmtx); 140 return (NULL); 141 } 142 143 /* 144 * Search directory 'parent' for entry 'name'. 145 * 146 * The calling thread can't hold the write version of the rwlock for the 147 * directory being searched 148 * 149 * On success *foundtp points to the found hlnode with its vnode held. 150 */ 151 int 152 hyprlofs_dirlookup(hlnode_t *parent, char *name, hlnode_t **foundtp, cred_t *cr) 153 { 154 int error; 155 156 *foundtp = NULL; 157 if (parent->hln_type != VDIR) 158 return (ENOTDIR); 159 160 if ((error = hyprlofs_taccess(parent, VEXEC, cr))) 161 return (error); 162 163 if (*name == '\0') { 164 hlnode_hold(parent); 165 *foundtp = parent; 166 return (0); 167 } 168 169 /* 170 * Search the directory for the matching name. We need the lock 171 * protecting the hln_dir list so that it doesn't change out from 172 * underneath us. hyprlofs_hash_lookup() will pass back the hlnode 173 * with a hold on it. 174 */ 175 if (hyprlofs_hash_lookup(name, parent, 1, foundtp) != NULL) { 176 ASSERT(*foundtp); 177 return (0); 178 } 179 180 return (ENOENT); 181 } 182 183 /* 184 * Enter a directory entry (either a file or subdir, depending on op) for 185 * 'name' and 'hp' into directory 'dir' 186 */ 187 int 188 hyprlofs_direnter( 189 hlfsmount_t *hm, 190 hlnode_t *dir, /* target directory to make entry in */ 191 char *name, /* name of entry */ 192 enum de_op op, /* entry operation */ 193 vnode_t *realvp, /* real vnode */ 194 vattr_t *va, 195 hlnode_t **hpp, /* return hlnode */ 196 cred_t *cr) 197 { 198 hldirent_t *hdp; 199 hlnode_t *found = NULL; 200 hlnode_t *hp; 201 int error = 0; 202 char *s; 203 204 /* hln_rwlock is held to serialize direnter and dirdeletes */ 205 ASSERT(RW_WRITE_HELD(&dir->hln_rwlock)); 206 ASSERT(dir->hln_type == VDIR); 207 208 /* Don't allow '/' characters in pathname component */ 209 for (s = name; *s; s++) 210 if (*s == '/') 211 return (EACCES); 212 213 if (name[0] == '\0') 214 panic("hyprlofs_direnter: NULL name"); 215 216 /* 217 * This might be a "dangling detached directory". It could have been 218 * removed, but a reference to it kept in u_cwd. Don't bother searching 219 * it, and with any luck the user will get tired of dealing with us and 220 * cd to some absolute pathway. This is in ufs, too. 221 */ 222 if (dir->hln_nlink == 0) { 223 return (ENOENT); 224 } 225 226 /* Search for the entry. Return "found" if it exists. */ 227 hdp = hyprlofs_hash_lookup(name, dir, 1, &found); 228 229 if (hdp) { 230 ASSERT(found); 231 switch (op) { 232 case DE_CREATE: 233 case DE_MKDIR: 234 if (hpp) { 235 *hpp = found; 236 error = EEXIST; 237 } else { 238 hlnode_rele(found); 239 } 240 break; 241 } 242 } else { 243 244 /* 245 * The entry does not exist. Check write perms in dir to see if 246 * entry can be created. 247 */ 248 if ((error = hyprlofs_taccess(dir, VWRITE, cr))) 249 return (error); 250 251 /* Make new hlnode and directory entry as required. */ 252 if ((error = hldir_make_hlnode(dir, hm, va, op, realvp, &hp, 253 cr))) 254 return (error); 255 256 if ((error = hldiraddentry(dir, hp, name))) { 257 /* Unmake the inode we just made. */ 258 rw_enter(&hp->hln_rwlock, RW_WRITER); 259 if ((hp->hln_type) == VDIR) { 260 ASSERT(hdp == NULL); 261 /* cleanup allocs made by hyprlofs_dirinit() */ 262 hyprlofs_dirtrunc(hp); 263 } 264 mutex_enter(&hp->hln_tlock); 265 hp->hln_nlink = 0; 266 mutex_exit(&hp->hln_tlock); 267 gethrestime(&hp->hln_ctime); 268 rw_exit(&hp->hln_rwlock); 269 hlnode_rele(hp); 270 hp = NULL; 271 } else if (hpp) { 272 *hpp = hp; 273 } else { 274 hlnode_rele(hp); 275 } 276 } 277 278 return (error); 279 } 280 281 /* 282 * Delete entry hp of name "nm" from dir. Free dir entry space and decrement 283 * link count on hlnode(s). 284 */ 285 int 286 hyprlofs_dirdelete(hlnode_t *dir, hlnode_t *hp, char *nm, enum dr_op op, 287 cred_t *cr) 288 { 289 hldirent_t *hpdp; 290 int error; 291 size_t namelen; 292 hlnode_t *hnp; 293 timestruc_t now; 294 295 ASSERT(RW_WRITE_HELD(&dir->hln_rwlock)); 296 ASSERT(RW_WRITE_HELD(&hp->hln_rwlock)); 297 ASSERT(dir->hln_type == VDIR); 298 299 if (nm[0] == '\0') 300 panic("hyprlofs_dirdelete: NULL name for %p", (void *)hp); 301 302 /* return error if removing . or .. */ 303 if (nm[0] == '.') { 304 if (nm[1] == '\0') 305 return (EINVAL); 306 if (nm[1] == '.' && nm[2] == '\0') 307 return (EEXIST); /* thus in ufs */ 308 } 309 310 if (error = hyprlofs_taccess(dir, VEXEC|VWRITE, cr)) 311 return (error); 312 313 if (dir->hln_dir == NULL) 314 return (ENOENT); 315 316 hpdp = hyprlofs_hash_lookup(nm, dir, 0, &hnp); 317 if (hpdp == NULL) { 318 /* 319 * If it is gone, some other thread got here first! 320 * Return error ENOENT. 321 */ 322 return (ENOENT); 323 } 324 325 /* 326 * If the hlnode in the hldirent changed (shouldn't happen since we 327 * don't support rename) then original is gone, so return that status 328 * (same as UFS). 329 */ 330 if (hp != hnp) 331 return (ENOENT); 332 333 hyprlofs_hash_out(hpdp); 334 335 /* Take hpdp out of the directory list. */ 336 ASSERT(hpdp->hld_next != hpdp); 337 ASSERT(hpdp->hld_prev != hpdp); 338 if (hpdp->hld_prev) { 339 hpdp->hld_prev->hld_next = hpdp->hld_next; 340 } 341 if (hpdp->hld_next) { 342 hpdp->hld_next->hld_prev = hpdp->hld_prev; 343 } 344 345 /* 346 * If the roving slot pointer happens to match hpdp, point it at the 347 * previous dirent. 348 */ 349 if (dir->hln_dir->hld_prev == hpdp) { 350 dir->hln_dir->hld_prev = hpdp->hld_prev; 351 } 352 ASSERT(hpdp->hld_next != hpdp); 353 ASSERT(hpdp->hld_prev != hpdp); 354 355 /* hpdp points to the correct directory entry */ 356 namelen = strlen(hpdp->hld_name) + 1; 357 358 hyprlofs_memfree(hpdp, sizeof (hldirent_t) + namelen); 359 dir->hln_size -= (sizeof (hldirent_t) + namelen); 360 dir->hln_dirents--; 361 362 gethrestime(&now); 363 dir->hln_mtime = now; 364 dir->hln_ctime = now; 365 hp->hln_ctime = now; 366 367 ASSERT(hp->hln_nlink > 0); 368 DECR_COUNT(&hp->hln_nlink, &hp->hln_tlock); 369 if (op == DR_RMDIR && hp->hln_type == VDIR) { 370 hyprlofs_dirtrunc(hp); 371 ASSERT(hp->hln_nlink == 0); 372 } 373 return (0); 374 } 375 376 /* 377 * hyprlofs_dirinit initializes a dir with '.' and '..' entries without 378 * checking perms and locking 379 */ 380 void 381 hyprlofs_dirinit( 382 hlnode_t *parent, /* parent of directory to initialize */ 383 hlnode_t *dir) /* the new directory */ 384 { 385 hldirent_t *dot, *dotdot; 386 timestruc_t now; 387 388 ASSERT(RW_WRITE_HELD(&parent->hln_rwlock)); 389 ASSERT(dir->hln_type == VDIR); 390 391 dot = hyprlofs_memalloc(sizeof (hldirent_t) + 2, HL_MUSTHAVE); 392 dotdot = hyprlofs_memalloc(sizeof (hldirent_t) + 3, HL_MUSTHAVE); 393 394 /* Initialize the entries */ 395 dot->hld_hlnode = dir; 396 dot->hld_offset = 0; 397 dot->hld_name = (char *)dot + sizeof (hldirent_t); 398 dot->hld_name[0] = '.'; 399 dot->hld_parent = dir; 400 hyprlofs_hash_in(dot); 401 402 dotdot->hld_hlnode = parent; 403 dotdot->hld_offset = 1; 404 dotdot->hld_name = (char *)dotdot + sizeof (hldirent_t); 405 dotdot->hld_name[0] = '.'; 406 dotdot->hld_name[1] = '.'; 407 dotdot->hld_parent = dir; 408 hyprlofs_hash_in(dotdot); 409 410 /* Initialize directory entry list. */ 411 dot->hld_next = dotdot; 412 dot->hld_prev = dotdot; 413 dotdot->hld_next = NULL; 414 dotdot->hld_prev = dot; 415 416 gethrestime(&now); 417 dir->hln_mtime = now; 418 dir->hln_ctime = now; 419 420 /* 421 * Since hyprlofs_dirinit is called with both dir and parent being the 422 * same for the root vnode, we need to increment this before we set 423 * hln_nlink = 2 below. 424 */ 425 INCR_COUNT(&parent->hln_nlink, &parent->hln_tlock); 426 parent->hln_ctime = now; 427 428 dir->hln_dir = dot; 429 dir->hln_size = 2 * sizeof (hldirent_t) + 5; /* dot and dotdot */ 430 dir->hln_dirents = 2; 431 dir->hln_nlink = 2; 432 } 433 434 435 /* 436 * hyprlofs_dirtrunc removes all dir entries under this dir. 437 */ 438 void 439 hyprlofs_dirtrunc(hlnode_t *dir) 440 { 441 hldirent_t *hdp; 442 hlnode_t *tp; 443 size_t namelen; 444 timestruc_t now; 445 446 ASSERT(RW_WRITE_HELD(&dir->hln_rwlock)); 447 ASSERT(dir->hln_type == VDIR); 448 449 if (dir->hln_looped) 450 return; 451 452 for (hdp = dir->hln_dir; hdp; hdp = dir->hln_dir) { 453 ASSERT(hdp->hld_next != hdp); 454 ASSERT(hdp->hld_prev != hdp); 455 ASSERT(hdp->hld_hlnode); 456 457 dir->hln_dir = hdp->hld_next; 458 namelen = strlen(hdp->hld_name) + 1; 459 460 /* 461 * Adjust the link counts to account for this dir entry removal. 462 */ 463 tp = hdp->hld_hlnode; 464 465 ASSERT(tp->hln_nlink > 0); 466 DECR_COUNT(&tp->hln_nlink, &tp->hln_tlock); 467 468 hyprlofs_hash_out(hdp); 469 470 hyprlofs_memfree(hdp, sizeof (hldirent_t) + namelen); 471 dir->hln_size -= (sizeof (hldirent_t) + namelen); 472 dir->hln_dirents--; 473 } 474 475 gethrestime(&now); 476 dir->hln_mtime = now; 477 dir->hln_ctime = now; 478 479 ASSERT(dir->hln_dir == NULL); 480 ASSERT(dir->hln_size == 0); 481 ASSERT(dir->hln_dirents == 0); 482 } 483 484 static int 485 hldiraddentry( 486 hlnode_t *dir, /* target directory to make entry in */ 487 hlnode_t *hp, /* new hlnode */ 488 char *name) 489 { 490 hldirent_t *hdp, *hpdp; 491 size_t namelen, alloc_size; 492 timestruc_t now; 493 494 /* 495 * Make sure the parent dir wasn't removed from underneath the caller. 496 */ 497 if (dir->hln_dir == NULL) 498 return (ENOENT); 499 500 /* Check that everything is on the same FS. */ 501 if (hp->hln_vnode->v_vfsp != dir->hln_vnode->v_vfsp) 502 return (EXDEV); 503 504 /* Alloc and init dir entry */ 505 namelen = strlen(name) + 1; 506 alloc_size = namelen + sizeof (hldirent_t); 507 hdp = hyprlofs_memalloc(alloc_size, 0); 508 if (hdp == NULL) 509 return (ENOSPC); 510 511 dir->hln_size += alloc_size; 512 dir->hln_dirents++; 513 hdp->hld_hlnode = hp; 514 hdp->hld_parent = dir; 515 516 /* The dir entry and its name were allocated sequentially. */ 517 hdp->hld_name = (char *)hdp + sizeof (hldirent_t); 518 (void) strcpy(hdp->hld_name, name); 519 520 hyprlofs_hash_in(hdp); 521 522 /* 523 * Some utilities expect the size of a directory to remain fairly 524 * static. For example, a routine which unlinks files between calls to 525 * readdir(); the size of the dir changes from underneath it and so the 526 * real dir offset in bytes is invalid. To circumvent this problem, we 527 * initialize a dir entry with a phony offset, and use this offset to 528 * determine end of file in hyprlofs_readdir. 529 */ 530 hpdp = dir->hln_dir->hld_prev; 531 /* 532 * Install at first empty "slot" in directory list. 533 */ 534 while (hpdp->hld_next != NULL && (hpdp->hld_next->hld_offset - 535 hpdp->hld_offset) <= 1) { 536 ASSERT(hpdp->hld_next != hpdp); 537 ASSERT(hpdp->hld_prev != hpdp); 538 ASSERT(hpdp->hld_next->hld_offset > hpdp->hld_offset); 539 hpdp = hpdp->hld_next; 540 } 541 hdp->hld_offset = hpdp->hld_offset + 1; 542 543 /* 544 * If we're at the end of the dirent list and the offset (which is 545 * necessarily the largest offset in this dir) is more than twice the 546 * number of dirents, that means the dir is 50% holes. At this point 547 * we reset the slot pointer back to the beginning of the dir so we 548 * start using the holes. The idea is that if there are N dirents, 549 * there must also be N holes, so we can satisfy the next N creates by 550 * walking at most 2N entries; thus the average cost of a create is 551 * constant. Note that we use the first dirent's hld_prev as the roving 552 * slot pointer. This saves a word in every dirent. 553 */ 554 if (hpdp->hld_next == NULL && hpdp->hld_offset > 2 * dir->hln_dirents) 555 dir->hln_dir->hld_prev = dir->hln_dir->hld_next; 556 else 557 dir->hln_dir->hld_prev = hdp; 558 559 ASSERT(hpdp->hld_next != hpdp); 560 ASSERT(hpdp->hld_prev != hpdp); 561 562 hdp->hld_next = hpdp->hld_next; 563 if (hdp->hld_next) { 564 hdp->hld_next->hld_prev = hdp; 565 } 566 hdp->hld_prev = hpdp; 567 hpdp->hld_next = hdp; 568 569 ASSERT(hdp->hld_next != hdp); 570 ASSERT(hdp->hld_prev != hdp); 571 ASSERT(hpdp->hld_next != hpdp); 572 ASSERT(hpdp->hld_prev != hpdp); 573 574 gethrestime(&now); 575 dir->hln_mtime = now; 576 dir->hln_ctime = now; 577 578 return (0); 579 } 580 581 static int 582 hldir_make_hlnode(hlnode_t *dir, hlfsmount_t *hm, vattr_t *va, enum de_op op, 583 vnode_t *realvp, hlnode_t **newnode, cred_t *cr) 584 { 585 hlnode_t *hp; 586 enum vtype type; 587 588 ASSERT(va != NULL); 589 ASSERT(op == DE_CREATE || op == DE_MKDIR); 590 if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) || 591 ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime))) 592 return (EOVERFLOW); 593 type = va->va_type; 594 hp = hyprlofs_memalloc(sizeof (hlnode_t), HL_MUSTHAVE); 595 hyprlofs_node_init(hm, hp, va, cr); 596 597 hp->hln_vnode->v_rdev = hp->hln_rdev = NODEV; 598 hp->hln_vnode->v_type = type; 599 hp->hln_uid = crgetuid(cr); 600 601 /* 602 * To determine the gid of the created file: 603 * If the directory's set-gid bit is set, set the gid to the gid 604 * of the parent dir, otherwise, use the process's gid. 605 */ 606 if (dir->hln_mode & VSGID) 607 hp->hln_gid = dir->hln_gid; 608 else 609 hp->hln_gid = crgetgid(cr); 610 611 /* 612 * If we're creating a dir and the parent dir has the set-GID bit set, 613 * set it on the new dir. Otherwise, if the user is neither privileged 614 * nor a member of the file's new group, clear the file's set-GID bit. 615 */ 616 if (dir->hln_mode & VSGID && type == VDIR) 617 hp->hln_mode |= VSGID; 618 else { 619 if ((hp->hln_mode & VSGID) && 620 secpolicy_vnode_setids_setgids(cr, hp->hln_gid) != 0) 621 hp->hln_mode &= ~VSGID; 622 } 623 624 if (va->va_mask & AT_ATIME) 625 hp->hln_atime = va->va_atime; 626 if (va->va_mask & AT_MTIME) 627 hp->hln_mtime = va->va_mtime; 628 629 if (op == DE_MKDIR) { 630 hyprlofs_dirinit(dir, hp); 631 hp->hln_looped = 0; 632 } else { 633 hp->hln_realvp = realvp; 634 hp->hln_size = va->va_size; 635 hp->hln_looped = 1; 636 } 637 638 *newnode = hp; 639 return (0); 640 }