1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Copyright 2016 RackTop Systems.
26 */
27
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/time.h>
33 #include <sys/vfs.h>
34 #include <sys/vnode.h>
35 #include <sys/errno.h>
36 #include <sys/cmn_err.h>
37 #include <sys/cred.h>
38 #include <sys/stat.h>
39 #include <sys/debug.h>
40 #include <sys/policy.h>
41 #include <sys/fs/tmpnode.h>
42 #include <sys/fs/tmp.h>
43 #include <sys/vtrace.h>
44
45 static int tdircheckpath(struct tmpnode *, struct tmpnode *, struct cred *);
46 static int tdirrename(struct tmpnode *, struct tmpnode *, struct tmpnode *,
47 char *, struct tmpnode *, struct tdirent *, struct cred *);
48 static void tdirfixdotdot(struct tmpnode *, struct tmpnode *, struct tmpnode *);
49 static int tdirmaketnode(struct tmpnode *, struct tmount *, struct vattr *,
50 enum de_op, struct tmpnode **, struct cred *);
51 static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *,
52 enum de_op, struct tmpnode *);
53
54
55 #define T_HASH_SIZE 8192 /* must be power of 2 */
56 #define T_MUTEX_SIZE 64
57
58 static struct tdirent *t_hashtable[T_HASH_SIZE];
59 static kmutex_t t_hashmutex[T_MUTEX_SIZE];
60
61 #define T_HASH_INDEX(a) ((a) & (T_HASH_SIZE-1))
62 #define T_MUTEX_INDEX(a) ((a) & (T_MUTEX_SIZE-1))
63
64 #define TMPFS_HASH(tp, name, hash) \
65 { \
66 char Xc, *Xcp; \
67 hash = (uint_t)(uintptr_t)(tp) >> 8; \
68 for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \
69 hash = (hash << 4) + hash + (uint_t)Xc; \
70 }
71
72 void
73 tmpfs_hash_init(void)
74 {
75 int ix;
76
77 for (ix = 0; ix < T_MUTEX_SIZE; ix++)
78 mutex_init(&t_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL);
79 }
80
81 /*
82 * This routine is where the rubber meets the road for identities.
83 */
84 static void
85 tmpfs_hash_in(struct tdirent *t)
86 {
87 uint_t hash;
88 struct tdirent **prevpp;
89 kmutex_t *t_hmtx;
90
91 TMPFS_HASH(t->td_parent, t->td_name, hash);
92 t->td_hash = hash;
93 prevpp = &t_hashtable[T_HASH_INDEX(hash)];
94 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
95 mutex_enter(t_hmtx);
96 t->td_link = *prevpp;
97 *prevpp = t;
98 mutex_exit(t_hmtx);
99 }
100
101 /*
102 * Remove tdirent *t from the hash list.
103 */
104 static void
105 tmpfs_hash_out(struct tdirent *t)
106 {
107 uint_t hash;
108 struct tdirent **prevpp;
109 kmutex_t *t_hmtx;
110
111 hash = t->td_hash;
112 prevpp = &t_hashtable[T_HASH_INDEX(hash)];
113 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
114 mutex_enter(t_hmtx);
115 while (*prevpp != t)
116 prevpp = &(*prevpp)->td_link;
117 *prevpp = t->td_link;
118 mutex_exit(t_hmtx);
119 }
120
121 /*
122 * Currently called by tdirrename() only.
123 * rename operation needs to be done with lock held, to ensure that
124 * no other operations can access the tmpnode at the same instance.
125 */
126 static void
127 tmpfs_hash_change(struct tdirent *tdp, struct tmpnode *fromtp)
128 {
129 uint_t hash;
130 kmutex_t *t_hmtx;
131
132 hash = tdp->td_hash;
133 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
134 mutex_enter(t_hmtx);
135 tdp->td_tmpnode = fromtp;
136 mutex_exit(t_hmtx);
137 }
138
139 static struct tdirent *
140 tmpfs_hash_lookup(char *name, struct tmpnode *parent, uint_t hold,
141 struct tmpnode **found)
142 {
143 struct tdirent *l;
144 uint_t hash;
145 kmutex_t *t_hmtx;
146 struct tmpnode *tnp;
147
148 TMPFS_HASH(parent, name, hash);
149 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
150 mutex_enter(t_hmtx);
151 l = t_hashtable[T_HASH_INDEX(hash)];
152 while (l) {
153 if ((l->td_hash == hash) &&
154 (l->td_parent == parent) &&
155 (strcmp(l->td_name, name) == 0)) {
156 /*
157 * We need to make sure that the tmpnode that
158 * we put a hold on is the same one that we pass back.
159 * Hence, temporary variable tnp is necessary.
160 */
161 tnp = l->td_tmpnode;
162 if (hold) {
163 ASSERT(tnp);
164 tmpnode_hold(tnp);
165 }
166 if (found)
167 *found = tnp;
168 mutex_exit(t_hmtx);
169 return (l);
170 } else {
171 l = l->td_link;
172 }
173 }
174 mutex_exit(t_hmtx);
175 return (NULL);
176 }
177
178 /*
179 * Search directory 'parent' for entry 'name'.
180 *
181 * The calling thread can't hold the write version
182 * of the rwlock for the directory being searched
183 *
184 * 0 is returned on success and *foundtp points
185 * to the found tmpnode with its vnode held.
186 */
187 int
188 tdirlookup(
189 struct tmpnode *parent,
190 char *name,
191 struct tmpnode **foundtp,
192 struct cred *cred)
193 {
194 int error;
195
196 *foundtp = NULL;
197 if (parent->tn_type != VDIR)
198 return (ENOTDIR);
199
200 if ((error = tmp_taccess(parent, VEXEC, cred)))
201 return (error);
202
203 if (*name == '\0') {
204 tmpnode_hold(parent);
205 *foundtp = parent;
206 return (0);
207 }
208
209 /*
210 * Search the directory for the matching name
211 * We need the lock protecting the tn_dir list
212 * so that it doesn't change out from underneath us.
213 * tmpfs_hash_lookup() will pass back the tmpnode
214 * with a hold on it.
215 */
216
217 if (tmpfs_hash_lookup(name, parent, 1, foundtp) != NULL) {
218 ASSERT(*foundtp);
219 return (0);
220 }
221
222 return (ENOENT);
223 }
224
225 /*
226 * Enter a directory entry for 'name' and 'tp' into directory 'dir'
227 *
228 * Returns 0 on success.
229 */
230 int
231 tdirenter(
232 struct tmount *tm,
233 struct tmpnode *dir, /* target directory to make entry in */
234 char *name, /* name of entry */
235 enum de_op op, /* entry operation */
236 struct tmpnode *fromparent, /* source directory if rename */
237 struct tmpnode *tp, /* source tmpnode, if link/rename */
238 struct vattr *va,
239 struct tmpnode **tpp, /* return tmpnode, if create/mkdir */
240 struct cred *cred,
241 caller_context_t *ctp)
242 {
243 struct tdirent *tdp;
244 struct tmpnode *found = NULL;
245 int error = 0;
246 char *s;
247
248 /*
249 * tn_rwlock is held to serialize direnter and dirdeletes
250 */
251 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
252 ASSERT(dir->tn_type == VDIR);
253
254 /*
255 * Don't allow '/' characters in pathname component
256 * (thus in ufs_direnter()).
257 */
258 for (s = name; *s; s++)
259 if (*s == '/')
260 return (EACCES);
261
262 if (name[0] == '\0')
263 panic("tdirenter: NULL name");
264
265 /*
266 * For link and rename lock the source entry and check the link count
267 * to see if it has been removed while it was unlocked.
268 */
269 if (op == DE_LINK || op == DE_RENAME) {
270 if (tp != dir)
271 rw_enter(&tp->tn_rwlock, RW_WRITER);
272 mutex_enter(&tp->tn_tlock);
273 if (tp->tn_nlink == 0) {
274 mutex_exit(&tp->tn_tlock);
275 if (tp != dir)
276 rw_exit(&tp->tn_rwlock);
277 return (ENOENT);
278 }
279
280 if (tp->tn_nlink == MAXLINK) {
281 mutex_exit(&tp->tn_tlock);
282 if (tp != dir)
283 rw_exit(&tp->tn_rwlock);
284 return (EMLINK);
285 }
286 tp->tn_nlink++;
287 gethrestime(&tp->tn_ctime);
288 mutex_exit(&tp->tn_tlock);
289 if (tp != dir)
290 rw_exit(&tp->tn_rwlock);
291 }
292
293 /*
294 * This might be a "dangling detached directory".
295 * it could have been removed, but a reference
296 * to it kept in u_cwd. don't bother searching
297 * it, and with any luck the user will get tired
298 * of dealing with us and cd to some absolute
299 * pathway. *sigh*, thus in ufs, too.
300 */
301 if (dir->tn_nlink == 0) {
302 error = ENOENT;
303 goto out;
304 }
305
306 /*
307 * If this is a rename of a directory and the parent is
308 * different (".." must be changed), then the source
309 * directory must not be in the directory hierarchy
310 * above the target, as this would orphan everything
311 * below the source directory.
312 */
313 if (op == DE_RENAME) {
314 if (tp == dir) {
315 error = EINVAL;
316 goto out;
317 }
318 if (tp->tn_type == VDIR) {
319 if ((fromparent != dir) &&
320 (error = tdircheckpath(tp, dir, cred))) {
321 goto out;
322 }
323 }
324 }
325
326 /*
327 * Search for the entry. Return "found" if it exists.
328 */
329 tdp = tmpfs_hash_lookup(name, dir, 1, &found);
330
331 if (tdp) {
332 ASSERT(found);
333 switch (op) {
334 case DE_CREATE:
335 case DE_MKDIR:
336 if (tpp) {
337 *tpp = found;
338 error = EEXIST;
339 } else {
340 tmpnode_rele(found);
341 }
342 break;
343
344 case DE_RENAME:
345 error = tdirrename(fromparent, tp,
346 dir, name, found, tdp, cred);
347 if (error == 0) {
348 if (found != NULL) {
349 vnevent_rename_dest(TNTOV(found),
350 TNTOV(dir), name, ctp);
351 }
352 }
353
354 tmpnode_rele(found);
355 break;
356
357 case DE_LINK:
358 /*
359 * Can't link to an existing file.
360 */
361 error = EEXIST;
362 tmpnode_rele(found);
363 break;
364 }
365 } else {
366
367 /*
368 * The entry does not exist. Check write permission in
369 * directory to see if entry can be created.
370 */
371 if (error = tmp_taccess(dir, VWRITE, cred))
372 goto out;
373 if (op == DE_CREATE || op == DE_MKDIR) {
374 /*
375 * Make new tmpnode and directory entry as required.
376 */
377 error = tdirmaketnode(dir, tm, va, op, &tp, cred);
378 if (error)
379 goto out;
380 }
381 if (error = tdiraddentry(dir, tp, name, op, fromparent)) {
382 if (op == DE_CREATE || op == DE_MKDIR) {
383 /*
384 * Unmake the inode we just made.
385 */
386 rw_enter(&tp->tn_rwlock, RW_WRITER);
387 if ((tp->tn_type) == VDIR) {
388 ASSERT(tdp == NULL);
389 /*
390 * cleanup allocs made by tdirinit()
391 */
392 tdirtrunc(tp);
393 }
394 mutex_enter(&tp->tn_tlock);
395 tp->tn_nlink = 0;
396 mutex_exit(&tp->tn_tlock);
397 gethrestime(&tp->tn_ctime);
398 rw_exit(&tp->tn_rwlock);
399 tmpnode_rele(tp);
400 tp = NULL;
401 }
402 } else if (tpp) {
403 *tpp = tp;
404 } else if (op == DE_CREATE || op == DE_MKDIR) {
405 tmpnode_rele(tp);
406 }
407 }
408
409 out:
410 if (error && (op == DE_LINK || op == DE_RENAME)) {
411 /*
412 * Undo bumped link count.
413 */
414 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
415 gethrestime(&tp->tn_ctime);
416 }
417 return (error);
418 }
419
420 /*
421 * Delete entry tp of name "nm" from dir.
422 * Free dir entry space and decrement link count on tmpnode(s).
423 *
424 * Return 0 on success.
425 */
426 int
427 tdirdelete(
428 struct tmpnode *dir,
429 struct tmpnode *tp,
430 char *nm,
431 enum dr_op op,
432 struct cred *cred)
433 {
434 struct tdirent *tpdp;
435 int error;
436 size_t namelen;
437 struct tmpnode *tnp;
438 timestruc_t now;
439
440 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
441 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
442 ASSERT(dir->tn_type == VDIR);
443
444 if (nm[0] == '\0')
445 panic("tdirdelete: NULL name for %p", (void *)tp);
446
447 /*
448 * return error when removing . and ..
449 */
450 if (nm[0] == '.') {
451 if (nm[1] == '\0')
452 return (EINVAL);
453 if (nm[1] == '.' && nm[2] == '\0')
454 return (EEXIST); /* thus in ufs */
455 }
456
457 if (error = tmp_taccess(dir, VEXEC|VWRITE, cred))
458 return (error);
459
460 /*
461 * If the parent directory is "sticky", then the user must
462 * own the parent directory or the file in it, or else must
463 * have permission to write the file. Otherwise it may not
464 * be deleted (except by privileged users).
465 * Same as ufs_dirremove.
466 */
467 if ((error = tmp_sticky_remove_access(dir, tp, cred)) != 0)
468 return (error);
469
470 if (dir->tn_dir == NULL)
471 return (ENOENT);
472
473 tpdp = tmpfs_hash_lookup(nm, dir, 0, &tnp);
474 if (tpdp == NULL) {
475 /*
476 * If it is gone, some other thread got here first!
477 * Return error ENOENT.
478 */
479 return (ENOENT);
480 }
481
482 /*
483 * If the tmpnode in the tdirent changed, we were probably
484 * the victim of a concurrent rename operation. The original
485 * is gone, so return that status (same as UFS).
486 */
487 if (tp != tnp)
488 return (ENOENT);
489
490 tmpfs_hash_out(tpdp);
491
492 /*
493 * Take tpdp out of the directory list.
494 */
495 ASSERT(tpdp->td_next != tpdp);
496 ASSERT(tpdp->td_prev != tpdp);
497 if (tpdp->td_prev) {
498 tpdp->td_prev->td_next = tpdp->td_next;
499 }
500 if (tpdp->td_next) {
501 tpdp->td_next->td_prev = tpdp->td_prev;
502 }
503
504 /*
505 * If the roving slot pointer happens to match tpdp,
506 * point it at the previous dirent.
507 */
508 if (dir->tn_dir->td_prev == tpdp) {
509 dir->tn_dir->td_prev = tpdp->td_prev;
510 }
511 ASSERT(tpdp->td_next != tpdp);
512 ASSERT(tpdp->td_prev != tpdp);
513
514 /*
515 * tpdp points to the correct directory entry
516 */
517 namelen = strlen(tpdp->td_name) + 1;
518
519 tmp_memfree(tpdp, sizeof (struct tdirent) + namelen);
520 dir->tn_size -= (sizeof (struct tdirent) + namelen);
521 dir->tn_dirents--;
522
523 gethrestime(&now);
524 dir->tn_mtime = now;
525 dir->tn_ctime = now;
526 tp->tn_ctime = now;
527
528 /*
529 * If this is a _REMOVE (unlink) operation there may
530 * be other links to the directory entry.
531 */
532 ASSERT(tp->tn_nlink > 0);
533 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
534 if (op == DR_RMDIR || (op == DR_REMOVE && tp->tn_type == VDIR)) {
535 if (tp->tn_nlink > 1) {
536 ASSERT(op == DR_REMOVE);
537 } else {
538 tdirtrunc(tp);
539 ASSERT(tp->tn_nlink == 0);
540 }
541 }
542 return (0);
543 }
544
545 /*
546 * tdirinit is used internally to initialize a directory (dir)
547 * with '.' and '..' entries without checking permissions and locking
548 */
549 void
550 tdirinit(
551 struct tmpnode *parent, /* parent of directory to initialize */
552 struct tmpnode *dir) /* the new directory */
553 {
554 struct tdirent *dot, *dotdot;
555 timestruc_t now;
556
557 ASSERT(RW_WRITE_HELD(&parent->tn_rwlock));
558 ASSERT(dir->tn_type == VDIR);
559
560 dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE);
561 dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE);
562
563 /*
564 * Initialize the entries
565 */
566 dot->td_tmpnode = dir;
567 dot->td_offset = 0;
568 dot->td_name = (char *)dot + sizeof (struct tdirent);
569 dot->td_name[0] = '.';
570 dot->td_parent = dir;
571 tmpfs_hash_in(dot);
572
573 dotdot->td_tmpnode = parent;
574 dotdot->td_offset = 1;
575 dotdot->td_name = (char *)dotdot + sizeof (struct tdirent);
576 dotdot->td_name[0] = '.';
577 dotdot->td_name[1] = '.';
578 dotdot->td_parent = dir;
579 tmpfs_hash_in(dotdot);
580
581 /*
582 * Initialize directory entry list.
583 */
584 dot->td_next = dotdot;
585 dot->td_prev = dotdot; /* dot's td_prev holds roving slot pointer */
586 dotdot->td_next = NULL;
587 dotdot->td_prev = dot;
588
589 gethrestime(&now);
590 dir->tn_mtime = now;
591 dir->tn_ctime = now;
592
593 /*
594 * Link counts are special for the hidden attribute directory.
595 * The only explicit reference in the name space is "." and
596 * the reference through ".." is not counted on the parent
597 * file. The attrdir is created as a side effect to lookup,
598 * so don't change the ctime of the parent.
599 * Since tdirinit is called with both dir and parent being the
600 * same for the root vnode, we need to increment this before we set
601 * tn_nlink = 2 below.
602 */
603 if (!(dir->tn_vnode->v_flag & V_XATTRDIR)) {
604 INCR_COUNT(&parent->tn_nlink, &parent->tn_tlock);
605 parent->tn_ctime = now;
606 }
607
608 dir->tn_dir = dot;
609 dir->tn_size = 2 * sizeof (struct tdirent) + 5; /* dot and dotdot */
610 dir->tn_dirents = 2;
611 dir->tn_nlink = 2;
612 }
613
614
615 /*
616 * tdirtrunc is called to remove all directory entries under this directory.
617 */
618 void
619 tdirtrunc(struct tmpnode *dir)
620 {
621 struct tdirent *tdp;
622 struct tmpnode *tp;
623 size_t namelen;
624 timestruc_t now;
625 int isvattrdir, isdotdot, skip_decr;
626 int lock_held;
627
628 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
629 ASSERT(dir->tn_type == VDIR);
630
631 isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0;
632 for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) {
633 ASSERT(tdp->td_next != tdp);
634 ASSERT(tdp->td_prev != tdp);
635 ASSERT(tdp->td_tmpnode);
636
637 dir->tn_dir = tdp->td_next;
638 namelen = strlen(tdp->td_name) + 1;
639
640 /*
641 * Adjust the link counts to account for this directory
642 * entry removal. Hidden attribute directories may
643 * not be empty as they may be truncated as a side-
644 * effect of removing the parent. We do hold/rele
645 * operations to free up these tmpnodes.
646 *
647 * Skip the link count adjustment for parents of
648 * attribute directories as those link counts
649 * do not include the ".." reference in the hidden
650 * directories.
651 */
652 tp = tdp->td_tmpnode;
653 isdotdot = (strcmp("..", tdp->td_name) == 0);
654 skip_decr = (isvattrdir && isdotdot);
655 if (!skip_decr) {
656 ASSERT(tp->tn_nlink > 0);
657 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
658 }
659
660 tmpfs_hash_out(tdp);
661
662 tmp_memfree(tdp, sizeof (struct tdirent) + namelen);
663 dir->tn_size -= (sizeof (struct tdirent) + namelen);
664 dir->tn_dirents--;
665
666 /*
667 * This directory entry may itself be a directory with
668 * entries and removing it may have created orphans.
669 * On a normal filesystem like UFS this wouldn't be
670 * a huge problem because fcsk can reclaim them. For
671 * TMPFS which resides in RAM however, it means we
672 * end up leaking memory.
673 *
674 * To avoid this we also truncate child directories,
675 * but only if they have no other links to them.
676 */
677 if (!isdotdot && tp->tn_type == VDIR && tp != dir) {
678 if (tp->tn_nlink > 1)
679 continue;
680 lock_held = RW_WRITE_HELD(&tp->tn_rwlock);
681 if (!lock_held)
682 rw_enter(&tp->tn_rwlock, RW_WRITER);
683 tdirtrunc(tp);
684 if (!lock_held)
685 rw_exit(&tp->tn_rwlock);
686 ASSERT(tp->tn_nlink == 0);
687 }
688 }
689
690 gethrestime(&now);
691 dir->tn_mtime = now;
692 dir->tn_ctime = now;
693
694 ASSERT(dir->tn_dir == NULL);
695 ASSERT(dir->tn_size == 0);
696 ASSERT(dir->tn_dirents == 0);
697 }
698
699 /*
700 * Check if the source directory is in the path of the target directory.
701 * The target directory is locked by the caller.
702 *
703 * XXX - The source and target's should be different upon entry.
704 */
705 static int
706 tdircheckpath(
707 struct tmpnode *fromtp,
708 struct tmpnode *toparent,
709 struct cred *cred)
710 {
711 int error = 0;
712 struct tmpnode *dir, *dotdot;
713 struct tdirent *tdp;
714
715 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
716
717 tdp = tmpfs_hash_lookup("..", toparent, 1, &dotdot);
718 if (tdp == NULL)
719 return (ENOENT);
720
721 ASSERT(dotdot);
722
723 if (dotdot == toparent) {
724 /* root of fs. search trivially satisfied. */
725 tmpnode_rele(dotdot);
726 return (0);
727 }
728 for (;;) {
729 /*
730 * Return error for cases like "mv c c/d",
731 * "mv c c/d/e" and so on.
732 */
733 if (dotdot == fromtp) {
734 tmpnode_rele(dotdot);
735 error = EINVAL;
736 break;
737 }
738 dir = dotdot;
739 error = tdirlookup(dir, "..", &dotdot, cred);
740 if (error) {
741 tmpnode_rele(dir);
742 break;
743 }
744 /*
745 * We're okay if we traverse the directory tree up to
746 * the root directory and don't run into the
747 * parent directory.
748 */
749 if (dir == dotdot) {
750 tmpnode_rele(dir);
751 tmpnode_rele(dotdot);
752 break;
753 }
754 tmpnode_rele(dir);
755 }
756 return (error);
757 }
758
759 static int
760 tdirrename(
761 struct tmpnode *fromparent, /* parent directory of source */
762 struct tmpnode *fromtp, /* source tmpnode */
763 struct tmpnode *toparent, /* parent directory of target */
764 char *nm, /* entry we are trying to change */
765 struct tmpnode *to, /* target tmpnode */
766 struct tdirent *where, /* target tmpnode directory entry */
767 struct cred *cred) /* credentials */
768 {
769 int error = 0;
770 int doingdirectory;
771 timestruc_t now;
772
773 #if defined(lint)
774 nm = nm;
775 #endif
776 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
777
778 /*
779 * Short circuit rename of something to itself.
780 */
781 if (fromtp == to)
782 return (ESAME); /* special KLUDGE error code */
783
784 rw_enter(&fromtp->tn_rwlock, RW_READER);
785 rw_enter(&to->tn_rwlock, RW_READER);
786
787 /*
788 * Check that everything is on the same filesystem.
789 */
790 if (to->tn_vnode->v_vfsp != toparent->tn_vnode->v_vfsp ||
791 to->tn_vnode->v_vfsp != fromtp->tn_vnode->v_vfsp) {
792 error = EXDEV;
793 goto out;
794 }
795
796 /*
797 * Must have write permission to rewrite target entry.
798 * Check for stickyness.
799 */
800 if ((error = tmp_taccess(toparent, VWRITE, cred)) != 0 ||
801 (error = tmp_sticky_remove_access(toparent, to, cred)) != 0)
802 goto out;
803
804 /*
805 * Ensure source and target are compatible (both directories
806 * or both not directories). If target is a directory it must
807 * be empty and have no links to it; in addition it must not
808 * be a mount point, and both the source and target must be
809 * writable.
810 */
811 doingdirectory = (fromtp->tn_type == VDIR);
812 if (to->tn_type == VDIR) {
813 if (!doingdirectory) {
814 error = EISDIR;
815 goto out;
816 }
817 /*
818 * vn_vfswlock will prevent mounts from using the directory
819 * until we are done.
820 */
821 if (vn_vfswlock(TNTOV(to))) {
822 error = EBUSY;
823 goto out;
824 }
825 if (vn_mountedvfs(TNTOV(to)) != NULL) {
826 vn_vfsunlock(TNTOV(to));
827 error = EBUSY;
828 goto out;
829 }
830
831 mutex_enter(&to->tn_tlock);
832 if (to->tn_dirents > 2 || to->tn_nlink > 2) {
833 mutex_exit(&to->tn_tlock);
834 vn_vfsunlock(TNTOV(to));
835 error = EEXIST; /* SIGH should be ENOTEMPTY */
836 /*
837 * Update atime because checking tn_dirents is
838 * logically equivalent to reading the directory
839 */
840 gethrestime(&to->tn_atime);
841 goto out;
842 }
843 mutex_exit(&to->tn_tlock);
844 } else if (doingdirectory) {
845 error = ENOTDIR;
846 goto out;
847 }
848
849 tmpfs_hash_change(where, fromtp);
850 gethrestime(&now);
851 toparent->tn_mtime = now;
852 toparent->tn_ctime = now;
853
854 /*
855 * Upgrade to write lock on "to" (i.e., the target tmpnode).
856 */
857 rw_exit(&to->tn_rwlock);
858 rw_enter(&to->tn_rwlock, RW_WRITER);
859
860 /*
861 * Decrement the link count of the target tmpnode.
862 */
863 DECR_COUNT(&to->tn_nlink, &to->tn_tlock);
864 to->tn_ctime = now;
865
866 if (doingdirectory) {
867 /*
868 * The entry for "to" no longer exists so release the vfslock.
869 */
870 vn_vfsunlock(TNTOV(to));
871
872 /*
873 * Decrement the target link count and delete all entires.
874 */
875 tdirtrunc(to);
876 ASSERT(to->tn_nlink == 0);
877
878 /*
879 * Renaming a directory with the parent different
880 * requires that ".." be rewritten. The window is
881 * still there for ".." to be inconsistent, but this
882 * is unavoidable, and a lot shorter than when it was
883 * done in a user process.
884 */
885 if (fromparent != toparent)
886 tdirfixdotdot(fromtp, fromparent, toparent);
887 }
888 out:
889 rw_exit(&to->tn_rwlock);
890 rw_exit(&fromtp->tn_rwlock);
891 return (error);
892 }
893
894 static void
895 tdirfixdotdot(
896 struct tmpnode *fromtp, /* child directory */
897 struct tmpnode *fromparent, /* old parent directory */
898 struct tmpnode *toparent) /* new parent directory */
899 {
900 struct tdirent *dotdot;
901
902 ASSERT(RW_LOCK_HELD(&toparent->tn_rwlock));
903
904 /*
905 * Increment the link count in the new parent tmpnode
906 */
907 INCR_COUNT(&toparent->tn_nlink, &toparent->tn_tlock);
908 gethrestime(&toparent->tn_ctime);
909
910 dotdot = tmpfs_hash_lookup("..", fromtp, 0, NULL);
911
912 ASSERT(dotdot->td_tmpnode == fromparent);
913 dotdot->td_tmpnode = toparent;
914
915 /*
916 * Decrement the link count of the old parent tmpnode.
917 * If fromparent is NULL, then this is a new directory link;
918 * it has no parent, so we need not do anything.
919 */
920 if (fromparent != NULL) {
921 mutex_enter(&fromparent->tn_tlock);
922 if (fromparent->tn_nlink != 0) {
923 fromparent->tn_nlink--;
924 gethrestime(&fromparent->tn_ctime);
925 }
926 mutex_exit(&fromparent->tn_tlock);
927 }
928 }
929
930 static int
931 tdiraddentry(
932 struct tmpnode *dir, /* target directory to make entry in */
933 struct tmpnode *tp, /* new tmpnode */
934 char *name,
935 enum de_op op,
936 struct tmpnode *fromtp)
937 {
938 struct tdirent *tdp, *tpdp;
939 size_t namelen, alloc_size;
940 timestruc_t now;
941
942 /*
943 * Make sure the parent directory wasn't removed from
944 * underneath the caller.
945 */
946 if (dir->tn_dir == NULL)
947 return (ENOENT);
948
949 /*
950 * Check that everything is on the same filesystem.
951 */
952 if (tp->tn_vnode->v_vfsp != dir->tn_vnode->v_vfsp)
953 return (EXDEV);
954
955 /*
956 * Allocate and initialize directory entry
957 */
958 namelen = strlen(name) + 1;
959 alloc_size = namelen + sizeof (struct tdirent);
960 tdp = tmp_memalloc(alloc_size, 0);
961 if (tdp == NULL)
962 return (ENOSPC);
963
964 if ((op == DE_RENAME) && (tp->tn_type == VDIR))
965 tdirfixdotdot(tp, fromtp, dir);
966
967 dir->tn_size += alloc_size;
968 dir->tn_dirents++;
969 tdp->td_tmpnode = tp;
970 tdp->td_parent = dir;
971
972 /*
973 * The directory entry and its name were allocated sequentially.
974 */
975 tdp->td_name = (char *)tdp + sizeof (struct tdirent);
976 (void) strcpy(tdp->td_name, name);
977
978 tmpfs_hash_in(tdp);
979
980 /*
981 * Some utilities expect the size of a directory to remain
982 * somewhat static. For example, a routine which unlinks
983 * files between calls to readdir(); the size of the
984 * directory changes from underneath it and so the real
985 * directory offset in bytes is invalid. To circumvent
986 * this problem, we initialize a directory entry with an
987 * phony offset, and use this offset to determine end of
988 * file in tmp_readdir.
989 */
990 tpdp = dir->tn_dir->td_prev;
991 /*
992 * Install at first empty "slot" in directory list.
993 */
994 while (tpdp->td_next != NULL && (tpdp->td_next->td_offset -
995 tpdp->td_offset) <= 1) {
996 ASSERT(tpdp->td_next != tpdp);
997 ASSERT(tpdp->td_prev != tpdp);
998 ASSERT(tpdp->td_next->td_offset > tpdp->td_offset);
999 tpdp = tpdp->td_next;
1000 }
1001 tdp->td_offset = tpdp->td_offset + 1;
1002
1003 /*
1004 * If we're at the end of the dirent list and the offset (which
1005 * is necessarily the largest offset in this directory) is more
1006 * than twice the number of dirents, that means the directory is
1007 * 50% holes. At this point we reset the slot pointer back to
1008 * the beginning of the directory so we start using the holes.
1009 * The idea is that if there are N dirents, there must also be
1010 * N holes, so we can satisfy the next N creates by walking at
1011 * most 2N entries; thus the average cost of a create is constant.
1012 * Note that we use the first dirent's td_prev as the roving
1013 * slot pointer; it's ugly, but it saves a word in every dirent.
1014 */
1015 if (tpdp->td_next == NULL && tpdp->td_offset > 2 * dir->tn_dirents)
1016 dir->tn_dir->td_prev = dir->tn_dir->td_next;
1017 else
1018 dir->tn_dir->td_prev = tdp;
1019
1020 ASSERT(tpdp->td_next != tpdp);
1021 ASSERT(tpdp->td_prev != tpdp);
1022
1023 tdp->td_next = tpdp->td_next;
1024 if (tdp->td_next) {
1025 tdp->td_next->td_prev = tdp;
1026 }
1027 tdp->td_prev = tpdp;
1028 tpdp->td_next = tdp;
1029
1030 ASSERT(tdp->td_next != tdp);
1031 ASSERT(tdp->td_prev != tdp);
1032 ASSERT(tpdp->td_next != tpdp);
1033 ASSERT(tpdp->td_prev != tpdp);
1034
1035 gethrestime(&now);
1036 dir->tn_mtime = now;
1037 dir->tn_ctime = now;
1038
1039 return (0);
1040 }
1041
1042 static int
1043 tdirmaketnode(
1044 struct tmpnode *dir,
1045 struct tmount *tm,
1046 struct vattr *va,
1047 enum de_op op,
1048 struct tmpnode **newnode,
1049 struct cred *cred)
1050 {
1051 struct tmpnode *tp;
1052 enum vtype type;
1053
1054 ASSERT(va != NULL);
1055 ASSERT(op == DE_CREATE || op == DE_MKDIR);
1056 if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
1057 ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
1058 return (EOVERFLOW);
1059 type = va->va_type;
1060 tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
1061 tmpnode_init(tm, tp, va, cred);
1062
1063 /* setup normal file/dir's extended attribute directory */
1064 if (dir->tn_flags & ISXATTR) {
1065 /* parent dir is , mark file as xattr */
1066 tp->tn_flags |= ISXATTR;
1067 }
1068
1069
1070 if (type == VBLK || type == VCHR) {
1071 tp->tn_vnode->v_rdev = tp->tn_rdev = va->va_rdev;
1072 } else {
1073 tp->tn_vnode->v_rdev = tp->tn_rdev = NODEV;
1074 }
1075 tp->tn_vnode->v_type = type;
1076 tp->tn_uid = crgetuid(cred);
1077
1078 /*
1079 * To determine the group-id of the created file:
1080 * 1) If the gid is set in the attribute list (non-Sun & pre-4.0
1081 * clients are not likely to set the gid), then use it if
1082 * the process is privileged, belongs to the target group,
1083 * or the group is the same as the parent directory.
1084 * 2) If the filesystem was not mounted with the Old-BSD-compatible
1085 * GRPID option, and the directory's set-gid bit is clear,
1086 * then use the process's gid.
1087 * 3) Otherwise, set the group-id to the gid of the parent directory.
1088 */
1089 if ((va->va_mask & AT_GID) &&
1090 ((va->va_gid == dir->tn_gid) || groupmember(va->va_gid, cred) ||
1091 secpolicy_vnode_create_gid(cred) == 0)) {
1092 /*
1093 * XXX - is this only the case when a 4.0 NFS client, or a
1094 * client derived from that code, makes a call over the wire?
1095 */
1096 tp->tn_gid = va->va_gid;
1097 } else {
1098 if (dir->tn_mode & VSGID)
1099 tp->tn_gid = dir->tn_gid;
1100 else
1101 tp->tn_gid = crgetgid(cred);
1102 }
1103 /*
1104 * If we're creating a directory, and the parent directory has the
1105 * set-GID bit set, set it on the new directory.
1106 * Otherwise, if the user is neither privileged nor a member of the
1107 * file's new group, clear the file's set-GID bit.
1108 */
1109 if (dir->tn_mode & VSGID && type == VDIR)
1110 tp->tn_mode |= VSGID;
1111 else {
1112 if ((tp->tn_mode & VSGID) &&
1113 secpolicy_vnode_setids_setgids(cred, tp->tn_gid) != 0)
1114 tp->tn_mode &= ~VSGID;
1115 }
1116
1117 if (va->va_mask & AT_ATIME)
1118 tp->tn_atime = va->va_atime;
1119 if (va->va_mask & AT_MTIME)
1120 tp->tn_mtime = va->va_mtime;
1121
1122 if (op == DE_MKDIR)
1123 tdirinit(dir, tp);
1124
1125 *newnode = tp;
1126 return (0);
1127 }