1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2012, Joyent, Inc. All rights reserved.
23 */
24
25 #include <sys/types.h>
26 #include <sys/param.h>
27 #include <sys/sysmacros.h>
28 #include <sys/systm.h>
29 #include <sys/time.h>
30 #include <sys/vfs.h>
31 #include <sys/vnode.h>
32 #include <sys/errno.h>
33 #include <sys/cmn_err.h>
34 #include <sys/cred.h>
35 #include <sys/stat.h>
36 #include <sys/policy.h>
37 #include <sys/fs/hyprlofs_info.h>
38
39 static int hldir_make_hlnode(hlnode_t *, hlfsmount_t *, vattr_t *, enum de_op,
40 vnode_t *, hlnode_t **, cred_t *);
41 static int hldiraddentry(hlnode_t *, hlnode_t *, char *);
42
43
44 #define HL_HASH_SIZE 8192 /* must be power of 2 */
45 #define HL_MUTEX_SIZE 64
46
47 static hldirent_t *hl_hashtable[HL_HASH_SIZE];
48 static kmutex_t hl_hashmutex[HL_MUTEX_SIZE];
49
50 #define HL_HASH_INDEX(a) ((a) & (HL_HASH_SIZE-1))
51 #define HL_MUTEX_INDEX(a) ((a) & (HL_MUTEX_SIZE-1))
52
53 #define HYPRLOFS_HASH(tp, name, hash) \
54 { \
55 char Xc, *Xcp; \
56 hash = (uint_t)(uintptr_t)(tp) >> 8; \
57 for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \
58 hash = (hash << 4) + hash + (uint_t)Xc; \
59 }
60
61 void
62 hyprlofs_hash_init(void)
63 {
64 int ix;
65
66 for (ix = 0; ix < HL_MUTEX_SIZE; ix++)
67 mutex_init(&hl_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL);
68 }
69
70 static void
71 hyprlofs_hash_in(hldirent_t *h)
72 {
73 uint_t hash;
74 hldirent_t **prevpp;
75 kmutex_t *hmtx;
76
77 HYPRLOFS_HASH(h->hld_parent, h->hld_name, hash);
78 h->hld_hash = hash;
79 prevpp = &hl_hashtable[HL_HASH_INDEX(hash)];
80 hmtx = &hl_hashmutex[HL_MUTEX_INDEX(hash)];
81 mutex_enter(hmtx);
82 h->hld_link = *prevpp;
83 *prevpp = h;
84 mutex_exit(hmtx);
85 }
86
87 /* Remove hldirent *h from the hash list. */
88 static void
89 hyprlofs_hash_out(hldirent_t *h)
90 {
91 uint_t hash;
92 hldirent_t **prevpp;
93 kmutex_t *hmtx;
94
95 hash = h->hld_hash;
96 prevpp = &hl_hashtable[HL_HASH_INDEX(hash)];
97 hmtx = &hl_hashmutex[HL_MUTEX_INDEX(hash)];
98 mutex_enter(hmtx);
99 while (*prevpp != h)
100 prevpp = &(*prevpp)->hld_link;
101 *prevpp = h->hld_link;
102 mutex_exit(hmtx);
103 }
104
105 static hldirent_t *
106 hyprlofs_hash_lookup(char *name, hlnode_t *parent, uint_t hold,
107 hlnode_t **found)
108 {
109 hldirent_t *l;
110 uint_t hash;
111 kmutex_t *hmtx;
112 hlnode_t *hnp;
113
114 HYPRLOFS_HASH(parent, name, hash);
115 hmtx = &hl_hashmutex[HL_MUTEX_INDEX(hash)];
116 mutex_enter(hmtx);
117 l = hl_hashtable[HL_HASH_INDEX(hash)];
118 while (l) {
119 if (l->hld_hash == hash && l->hld_parent == parent &&
120 strcmp(l->hld_name, name) == 0) {
121 /*
122 * Ensure that the hlnode that we put a hold on is the
123 * same one that we pass back. Thus the temp. var
124 * hnp is necessary.
125 */
126 hnp = l->hld_hlnode;
127 if (hold) {
128 ASSERT(hnp);
129 hlnode_hold(hnp);
130 }
131 if (found)
132 *found = hnp;
133 mutex_exit(hmtx);
134 return (l);
135 } else {
136 l = l->hld_link;
137 }
138 }
139 mutex_exit(hmtx);
140 return (NULL);
141 }
142
143 /*
144 * Search directory 'parent' for entry 'name'.
145 *
146 * The calling thread can't hold the write version of the rwlock for the
147 * directory being searched
148 *
149 * On success *foundtp points to the found hlnode with its vnode held.
150 */
151 int
152 hyprlofs_dirlookup(hlnode_t *parent, char *name, hlnode_t **foundtp, cred_t *cr)
153 {
154 int error;
155
156 *foundtp = NULL;
157 if (parent->hln_type != VDIR)
158 return (ENOTDIR);
159
160 if ((error = hyprlofs_taccess(parent, VEXEC, cr)))
161 return (error);
162
163 if (*name == '\0') {
164 hlnode_hold(parent);
165 *foundtp = parent;
166 return (0);
167 }
168
169 /*
170 * Search the directory for the matching name. We need the lock
171 * protecting the hln_dir list so that it doesn't change out from
172 * underneath us. hyprlofs_hash_lookup() will pass back the hlnode
173 * with a hold on it.
174 */
175 if (hyprlofs_hash_lookup(name, parent, 1, foundtp) != NULL) {
176 ASSERT(*foundtp);
177 return (0);
178 }
179
180 return (ENOENT);
181 }
182
183 /*
184 * Enter a directory entry (either a file or subdir, depending on op) for
185 * 'name' and 'hp' into directory 'dir'
186 */
187 int
188 hyprlofs_direnter(
189 hlfsmount_t *hm,
190 hlnode_t *dir, /* target directory to make entry in */
191 char *name, /* name of entry */
192 enum de_op op, /* entry operation */
193 vnode_t *realvp, /* real vnode */
194 vattr_t *va,
195 hlnode_t **hpp, /* return hlnode */
196 cred_t *cr)
197 {
198 hldirent_t *hdp;
199 hlnode_t *found = NULL;
200 hlnode_t *hp;
201 int error = 0;
202 char *s;
203
204 /* hln_rwlock is held to serialize direnter and dirdeletes */
205 ASSERT(RW_WRITE_HELD(&dir->hln_rwlock));
206 ASSERT(dir->hln_type == VDIR);
207
208 /* Don't allow '/' characters in pathname component */
209 for (s = name; *s; s++)
210 if (*s == '/')
211 return (EACCES);
212
213 if (name[0] == '\0')
214 panic("hyprlofs_direnter: NULL name");
215
216 /*
217 * This might be a "dangling detached directory". It could have been
218 * removed, but a reference to it kept in u_cwd. Don't bother searching
219 * it, and with any luck the user will get tired of dealing with us and
220 * cd to some absolute pathway. This is in ufs, too.
221 */
222 if (dir->hln_nlink == 0) {
223 return (ENOENT);
224 }
225
226 /* Search for the entry. Return "found" if it exists. */
227 hdp = hyprlofs_hash_lookup(name, dir, 1, &found);
228
229 if (hdp) {
230 ASSERT(found);
231 switch (op) {
232 case DE_CREATE:
233 case DE_MKDIR:
234 if (hpp) {
235 *hpp = found;
236 error = EEXIST;
237 } else {
238 hlnode_rele(found);
239 }
240 break;
241 }
242 } else {
243
244 /*
245 * The entry does not exist. Check write perms in dir to see if
246 * entry can be created.
247 */
248 if ((error = hyprlofs_taccess(dir, VWRITE, cr)))
249 return (error);
250
251 /* Make new hlnode and directory entry as required. */
252 if ((error = hldir_make_hlnode(dir, hm, va, op, realvp, &hp,
253 cr)))
254 return (error);
255
256 if ((error = hldiraddentry(dir, hp, name))) {
257 /* Unmake the inode we just made. */
258 rw_enter(&hp->hln_rwlock, RW_WRITER);
259 if ((hp->hln_type) == VDIR) {
260 ASSERT(hdp == NULL);
261 /* cleanup allocs made by hyprlofs_dirinit() */
262 hyprlofs_dirtrunc(hp);
263 }
264 mutex_enter(&hp->hln_tlock);
265 hp->hln_nlink = 0;
266 mutex_exit(&hp->hln_tlock);
267 gethrestime(&hp->hln_ctime);
268 rw_exit(&hp->hln_rwlock);
269 hlnode_rele(hp);
270 hp = NULL;
271 } else if (hpp) {
272 *hpp = hp;
273 } else {
274 hlnode_rele(hp);
275 }
276 }
277
278 return (error);
279 }
280
281 /*
282 * Delete entry hp of name "nm" from dir. Free dir entry space and decrement
283 * link count on hlnode(s).
284 */
285 int
286 hyprlofs_dirdelete(hlnode_t *dir, hlnode_t *hp, char *nm, enum dr_op op,
287 cred_t *cr)
288 {
289 hldirent_t *hpdp;
290 int error;
291 size_t namelen;
292 hlnode_t *hnp;
293 timestruc_t now;
294
295 ASSERT(RW_WRITE_HELD(&dir->hln_rwlock));
296 ASSERT(RW_WRITE_HELD(&hp->hln_rwlock));
297 ASSERT(dir->hln_type == VDIR);
298
299 if (nm[0] == '\0')
300 panic("hyprlofs_dirdelete: NULL name for %p", (void *)hp);
301
302 /* return error if removing . or .. */
303 if (nm[0] == '.') {
304 if (nm[1] == '\0')
305 return (EINVAL);
306 if (nm[1] == '.' && nm[2] == '\0')
307 return (EEXIST); /* thus in ufs */
308 }
309
310 if ((error = hyprlofs_taccess(dir, VEXEC|VWRITE, cr)) != 0)
311 return (error);
312
313 if (dir->hln_dir == NULL)
314 return (ENOENT);
315
316 hpdp = hyprlofs_hash_lookup(nm, dir, 0, &hnp);
317 if (hpdp == NULL) {
318 /*
319 * If it is gone, some other thread got here first!
320 * Return error ENOENT.
321 */
322 return (ENOENT);
323 }
324
325 /*
326 * If the hlnode in the hldirent changed (shouldn't happen since we
327 * don't support rename) then original is gone, so return that status
328 * (same as UFS).
329 */
330 if (hp != hnp)
331 return (ENOENT);
332
333 hyprlofs_hash_out(hpdp);
334
335 /* Take hpdp out of the directory list. */
336 ASSERT(hpdp->hld_next != hpdp);
337 ASSERT(hpdp->hld_prev != hpdp);
338 if (hpdp->hld_prev) {
339 hpdp->hld_prev->hld_next = hpdp->hld_next;
340 }
341 if (hpdp->hld_next) {
342 hpdp->hld_next->hld_prev = hpdp->hld_prev;
343 }
344
345 /*
346 * If the roving slot pointer happens to match hpdp, point it at the
347 * previous dirent.
348 */
349 if (dir->hln_dir->hld_prev == hpdp) {
350 dir->hln_dir->hld_prev = hpdp->hld_prev;
351 }
352 ASSERT(hpdp->hld_next != hpdp);
353 ASSERT(hpdp->hld_prev != hpdp);
354
355 /* hpdp points to the correct directory entry */
356 namelen = strlen(hpdp->hld_name) + 1;
357
358 hyprlofs_memfree(hpdp, sizeof (hldirent_t) + namelen);
359 dir->hln_size -= (sizeof (hldirent_t) + namelen);
360 dir->hln_dirents--;
361
362 gethrestime(&now);
363 dir->hln_mtime = now;
364 dir->hln_ctime = now;
365 hp->hln_ctime = now;
366
367 ASSERT(hp->hln_nlink > 0);
368 DECR_COUNT(&hp->hln_nlink, &hp->hln_tlock);
369 if (op == DR_RMDIR && hp->hln_type == VDIR) {
370 hyprlofs_dirtrunc(hp);
371 ASSERT(hp->hln_nlink == 0);
372 }
373 return (0);
374 }
375
376 /*
377 * hyprlofs_dirinit initializes a dir with '.' and '..' entries without
378 * checking perms and locking
379 */
380 void
381 hyprlofs_dirinit(
382 hlnode_t *parent, /* parent of directory to initialize */
383 hlnode_t *dir) /* the new directory */
384 {
385 hldirent_t *dot, *dotdot;
386 timestruc_t now;
387
388 ASSERT(RW_WRITE_HELD(&parent->hln_rwlock));
389 ASSERT(dir->hln_type == VDIR);
390
391 dot = hyprlofs_memalloc(sizeof (hldirent_t) + 2, HL_MUSTHAVE);
392 dotdot = hyprlofs_memalloc(sizeof (hldirent_t) + 3, HL_MUSTHAVE);
393
394 /* Initialize the entries */
395 dot->hld_hlnode = dir;
396 dot->hld_offset = 0;
397 dot->hld_name = (char *)dot + sizeof (hldirent_t);
398 dot->hld_name[0] = '.';
399 dot->hld_parent = dir;
400 hyprlofs_hash_in(dot);
401
402 dotdot->hld_hlnode = parent;
403 dotdot->hld_offset = 1;
404 dotdot->hld_name = (char *)dotdot + sizeof (hldirent_t);
405 dotdot->hld_name[0] = '.';
406 dotdot->hld_name[1] = '.';
407 dotdot->hld_parent = dir;
408 hyprlofs_hash_in(dotdot);
409
410 /* Initialize directory entry list. */
411 dot->hld_next = dotdot;
412 dot->hld_prev = dotdot;
413 dotdot->hld_next = NULL;
414 dotdot->hld_prev = dot;
415
416 gethrestime(&now);
417 dir->hln_mtime = now;
418 dir->hln_ctime = now;
419
420 /*
421 * Since hyprlofs_dirinit is called with both dir and parent being the
422 * same for the root vnode, we need to increment this before we set
423 * hln_nlink = 2 below.
424 */
425 INCR_COUNT(&parent->hln_nlink, &parent->hln_tlock);
426 parent->hln_ctime = now;
427
428 dir->hln_dir = dot;
429 dir->hln_size = 2 * sizeof (hldirent_t) + 5; /* dot and dotdot */
430 dir->hln_dirents = 2;
431 dir->hln_nlink = 2;
432 }
433
434
435 /*
436 * hyprlofs_dirtrunc removes all dir entries under this dir.
437 */
438 void
439 hyprlofs_dirtrunc(hlnode_t *dir)
440 {
441 hldirent_t *hdp;
442 hlnode_t *tp;
443 size_t namelen;
444 timestruc_t now;
445
446 ASSERT(RW_WRITE_HELD(&dir->hln_rwlock));
447 ASSERT(dir->hln_type == VDIR);
448
449 if (dir->hln_looped)
450 return;
451
452 for (hdp = dir->hln_dir; hdp; hdp = dir->hln_dir) {
453 ASSERT(hdp->hld_next != hdp);
454 ASSERT(hdp->hld_prev != hdp);
455 ASSERT(hdp->hld_hlnode);
456
457 dir->hln_dir = hdp->hld_next;
458 namelen = strlen(hdp->hld_name) + 1;
459
460 /*
461 * Adjust the link counts to account for this dir entry removal.
462 */
463 tp = hdp->hld_hlnode;
464
465 ASSERT(tp->hln_nlink > 0);
466 DECR_COUNT(&tp->hln_nlink, &tp->hln_tlock);
467
468 hyprlofs_hash_out(hdp);
469
470 hyprlofs_memfree(hdp, sizeof (hldirent_t) + namelen);
471 dir->hln_size -= (sizeof (hldirent_t) + namelen);
472 dir->hln_dirents--;
473 }
474
475 gethrestime(&now);
476 dir->hln_mtime = now;
477 dir->hln_ctime = now;
478
479 ASSERT(dir->hln_dir == NULL);
480 ASSERT(dir->hln_size == 0);
481 ASSERT(dir->hln_dirents == 0);
482 }
483
484 static int
485 hldiraddentry(
486 hlnode_t *dir, /* target directory to make entry in */
487 hlnode_t *hp, /* new hlnode */
488 char *name)
489 {
490 hldirent_t *hdp, *hpdp;
491 size_t namelen, alloc_size;
492 timestruc_t now;
493
494 /*
495 * Make sure the parent dir wasn't removed from underneath the caller.
496 */
497 if (dir->hln_dir == NULL)
498 return (ENOENT);
499
500 /* Check that everything is on the same FS. */
501 if (hp->hln_vnode->v_vfsp != dir->hln_vnode->v_vfsp)
502 return (EXDEV);
503
504 /* Alloc and init dir entry */
505 namelen = strlen(name) + 1;
506 alloc_size = namelen + sizeof (hldirent_t);
507 hdp = hyprlofs_memalloc(alloc_size, 0);
508 if (hdp == NULL)
509 return (ENOSPC);
510
511 dir->hln_size += alloc_size;
512 dir->hln_dirents++;
513 hdp->hld_hlnode = hp;
514 hdp->hld_parent = dir;
515
516 /* The dir entry and its name were allocated sequentially. */
517 hdp->hld_name = (char *)hdp + sizeof (hldirent_t);
518 (void) strcpy(hdp->hld_name, name);
519
520 hyprlofs_hash_in(hdp);
521
522 /*
523 * Some utilities expect the size of a directory to remain fairly
524 * static. For example, a routine which unlinks files between calls to
525 * readdir(); the size of the dir changes from underneath it and so the
526 * real dir offset in bytes is invalid. To circumvent this problem, we
527 * initialize a dir entry with a phony offset, and use this offset to
528 * determine end of file in hyprlofs_readdir.
529 */
530 hpdp = dir->hln_dir->hld_prev;
531 /*
532 * Install at first empty "slot" in directory list.
533 */
534 while (hpdp->hld_next != NULL && (hpdp->hld_next->hld_offset -
535 hpdp->hld_offset) <= 1) {
536 ASSERT(hpdp->hld_next != hpdp);
537 ASSERT(hpdp->hld_prev != hpdp);
538 ASSERT(hpdp->hld_next->hld_offset > hpdp->hld_offset);
539 hpdp = hpdp->hld_next;
540 }
541 hdp->hld_offset = hpdp->hld_offset + 1;
542
543 /*
544 * If we're at the end of the dirent list and the offset (which is
545 * necessarily the largest offset in this dir) is more than twice the
546 * number of dirents, that means the dir is 50% holes. At this point
547 * we reset the slot pointer back to the beginning of the dir so we
548 * start using the holes. The idea is that if there are N dirents,
549 * there must also be N holes, so we can satisfy the next N creates by
550 * walking at most 2N entries; thus the average cost of a create is
551 * constant. Note that we use the first dirent's hld_prev as the roving
552 * slot pointer. This saves a word in every dirent.
553 */
554 if (hpdp->hld_next == NULL && hpdp->hld_offset > 2 * dir->hln_dirents)
555 dir->hln_dir->hld_prev = dir->hln_dir->hld_next;
556 else
557 dir->hln_dir->hld_prev = hdp;
558
559 ASSERT(hpdp->hld_next != hpdp);
560 ASSERT(hpdp->hld_prev != hpdp);
561
562 hdp->hld_next = hpdp->hld_next;
563 if (hdp->hld_next) {
564 hdp->hld_next->hld_prev = hdp;
565 }
566 hdp->hld_prev = hpdp;
567 hpdp->hld_next = hdp;
568
569 ASSERT(hdp->hld_next != hdp);
570 ASSERT(hdp->hld_prev != hdp);
571 ASSERT(hpdp->hld_next != hpdp);
572 ASSERT(hpdp->hld_prev != hpdp);
573
574 gethrestime(&now);
575 dir->hln_mtime = now;
576 dir->hln_ctime = now;
577
578 return (0);
579 }
580
581 static int
582 hldir_make_hlnode(hlnode_t *dir, hlfsmount_t *hm, vattr_t *va, enum de_op op,
583 vnode_t *realvp, hlnode_t **newnode, cred_t *cr)
584 {
585 hlnode_t *hp;
586 enum vtype type;
587
588 ASSERT(va != NULL);
589 ASSERT(op == DE_CREATE || op == DE_MKDIR);
590 if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
591 ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
592 return (EOVERFLOW);
593 type = va->va_type;
594 hp = hyprlofs_memalloc(sizeof (hlnode_t), HL_MUSTHAVE);
595 hyprlofs_node_init(hm, hp, va, cr);
596
597 hp->hln_vnode->v_rdev = hp->hln_rdev = NODEV;
598 hp->hln_vnode->v_type = type;
599 hp->hln_uid = crgetuid(cr);
600
601 /*
602 * To determine the gid of the created file:
603 * If the directory's set-gid bit is set, set the gid to the gid
604 * of the parent dir, otherwise, use the process's gid.
605 */
606 if (dir->hln_mode & VSGID)
607 hp->hln_gid = dir->hln_gid;
608 else
609 hp->hln_gid = crgetgid(cr);
610
611 /*
612 * If we're creating a dir and the parent dir has the set-GID bit set,
613 * set it on the new dir. Otherwise, if the user is neither privileged
614 * nor a member of the file's new group, clear the file's set-GID bit.
615 */
616 if (dir->hln_mode & VSGID && type == VDIR)
617 hp->hln_mode |= VSGID;
618 else {
619 if ((hp->hln_mode & VSGID) &&
620 secpolicy_vnode_setids_setgids(cr, hp->hln_gid) != 0)
621 hp->hln_mode &= ~VSGID;
622 }
623
624 if (va->va_mask & AT_ATIME)
625 hp->hln_atime = va->va_atime;
626 if (va->va_mask & AT_MTIME)
627 hp->hln_mtime = va->va_mtime;
628
629 if (op == DE_MKDIR) {
630 hyprlofs_dirinit(dir, hp);
631 hp->hln_looped = 0;
632 } else {
633 hp->hln_realvp = realvp;
634 hp->hln_size = va->va_size;
635 hp->hln_looped = 1;
636 }
637
638 *newnode = hp;
639 return (0);
640 }