Print this page
7656 unlinking directory on tmpfs can cause kernel panic
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/tmpfs/tmp_dir.c
+++ new/usr/src/uts/common/fs/tmpfs/tmp_dir.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 + *
25 + * Copyright 2016 RackTop Systems.
24 26 */
25 27
26 -#pragma ident "%Z%%M% %I% %E% SMI"
27 -
28 28 #include <sys/types.h>
29 29 #include <sys/param.h>
30 30 #include <sys/sysmacros.h>
31 31 #include <sys/systm.h>
32 32 #include <sys/time.h>
33 33 #include <sys/vfs.h>
34 34 #include <sys/vnode.h>
35 35 #include <sys/errno.h>
36 36 #include <sys/cmn_err.h>
37 37 #include <sys/cred.h>
38 38 #include <sys/stat.h>
39 39 #include <sys/debug.h>
40 40 #include <sys/policy.h>
41 41 #include <sys/fs/tmpnode.h>
42 42 #include <sys/fs/tmp.h>
43 43 #include <sys/vtrace.h>
44 44
45 45 static int tdircheckpath(struct tmpnode *, struct tmpnode *, struct cred *);
46 46 static int tdirrename(struct tmpnode *, struct tmpnode *, struct tmpnode *,
47 47 char *, struct tmpnode *, struct tdirent *, struct cred *);
48 48 static void tdirfixdotdot(struct tmpnode *, struct tmpnode *, struct tmpnode *);
49 49 static int tdirmaketnode(struct tmpnode *, struct tmount *, struct vattr *,
50 50 enum de_op, struct tmpnode **, struct cred *);
51 51 static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *,
52 52 enum de_op, struct tmpnode *);
53 53
54 54
55 55 #define T_HASH_SIZE 8192 /* must be power of 2 */
56 56 #define T_MUTEX_SIZE 64
57 57
58 58 static struct tdirent *t_hashtable[T_HASH_SIZE];
59 59 static kmutex_t t_hashmutex[T_MUTEX_SIZE];
60 60
61 61 #define T_HASH_INDEX(a) ((a) & (T_HASH_SIZE-1))
62 62 #define T_MUTEX_INDEX(a) ((a) & (T_MUTEX_SIZE-1))
63 63
64 64 #define TMPFS_HASH(tp, name, hash) \
65 65 { \
66 66 char Xc, *Xcp; \
67 67 hash = (uint_t)(uintptr_t)(tp) >> 8; \
68 68 for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \
69 69 hash = (hash << 4) + hash + (uint_t)Xc; \
70 70 }
71 71
72 72 void
73 73 tmpfs_hash_init(void)
74 74 {
75 75 int ix;
76 76
77 77 for (ix = 0; ix < T_MUTEX_SIZE; ix++)
78 78 mutex_init(&t_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL);
79 79 }
80 80
81 81 /*
82 82 * This routine is where the rubber meets the road for identities.
83 83 */
84 84 static void
85 85 tmpfs_hash_in(struct tdirent *t)
86 86 {
87 87 uint_t hash;
88 88 struct tdirent **prevpp;
89 89 kmutex_t *t_hmtx;
90 90
91 91 TMPFS_HASH(t->td_parent, t->td_name, hash);
92 92 t->td_hash = hash;
93 93 prevpp = &t_hashtable[T_HASH_INDEX(hash)];
94 94 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
95 95 mutex_enter(t_hmtx);
96 96 t->td_link = *prevpp;
97 97 *prevpp = t;
98 98 mutex_exit(t_hmtx);
99 99 }
100 100
101 101 /*
102 102 * Remove tdirent *t from the hash list.
103 103 */
104 104 static void
105 105 tmpfs_hash_out(struct tdirent *t)
106 106 {
107 107 uint_t hash;
108 108 struct tdirent **prevpp;
109 109 kmutex_t *t_hmtx;
110 110
111 111 hash = t->td_hash;
112 112 prevpp = &t_hashtable[T_HASH_INDEX(hash)];
113 113 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
114 114 mutex_enter(t_hmtx);
115 115 while (*prevpp != t)
116 116 prevpp = &(*prevpp)->td_link;
117 117 *prevpp = t->td_link;
118 118 mutex_exit(t_hmtx);
119 119 }
120 120
121 121 /*
122 122 * Currently called by tdirrename() only.
123 123 * rename operation needs to be done with lock held, to ensure that
124 124 * no other operations can access the tmpnode at the same instance.
125 125 */
126 126 static void
127 127 tmpfs_hash_change(struct tdirent *tdp, struct tmpnode *fromtp)
128 128 {
129 129 uint_t hash;
130 130 kmutex_t *t_hmtx;
131 131
132 132 hash = tdp->td_hash;
133 133 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
134 134 mutex_enter(t_hmtx);
135 135 tdp->td_tmpnode = fromtp;
136 136 mutex_exit(t_hmtx);
137 137 }
138 138
139 139 static struct tdirent *
140 140 tmpfs_hash_lookup(char *name, struct tmpnode *parent, uint_t hold,
141 141 struct tmpnode **found)
142 142 {
143 143 struct tdirent *l;
144 144 uint_t hash;
145 145 kmutex_t *t_hmtx;
146 146 struct tmpnode *tnp;
147 147
148 148 TMPFS_HASH(parent, name, hash);
149 149 t_hmtx = &t_hashmutex[T_MUTEX_INDEX(hash)];
150 150 mutex_enter(t_hmtx);
151 151 l = t_hashtable[T_HASH_INDEX(hash)];
152 152 while (l) {
153 153 if ((l->td_hash == hash) &&
154 154 (l->td_parent == parent) &&
155 155 (strcmp(l->td_name, name) == 0)) {
156 156 /*
157 157 * We need to make sure that the tmpnode that
158 158 * we put a hold on is the same one that we pass back.
159 159 * Hence, temporary variable tnp is necessary.
160 160 */
161 161 tnp = l->td_tmpnode;
162 162 if (hold) {
163 163 ASSERT(tnp);
164 164 tmpnode_hold(tnp);
165 165 }
166 166 if (found)
167 167 *found = tnp;
168 168 mutex_exit(t_hmtx);
169 169 return (l);
170 170 } else {
171 171 l = l->td_link;
172 172 }
173 173 }
174 174 mutex_exit(t_hmtx);
175 175 return (NULL);
176 176 }
177 177
178 178 /*
179 179 * Search directory 'parent' for entry 'name'.
180 180 *
181 181 * The calling thread can't hold the write version
182 182 * of the rwlock for the directory being searched
183 183 *
184 184 * 0 is returned on success and *foundtp points
185 185 * to the found tmpnode with its vnode held.
186 186 */
187 187 int
188 188 tdirlookup(
189 189 struct tmpnode *parent,
190 190 char *name,
191 191 struct tmpnode **foundtp,
192 192 struct cred *cred)
193 193 {
194 194 int error;
195 195
196 196 *foundtp = NULL;
197 197 if (parent->tn_type != VDIR)
198 198 return (ENOTDIR);
199 199
200 200 if ((error = tmp_taccess(parent, VEXEC, cred)))
201 201 return (error);
202 202
203 203 if (*name == '\0') {
204 204 tmpnode_hold(parent);
205 205 *foundtp = parent;
206 206 return (0);
207 207 }
208 208
209 209 /*
210 210 * Search the directory for the matching name
211 211 * We need the lock protecting the tn_dir list
212 212 * so that it doesn't change out from underneath us.
213 213 * tmpfs_hash_lookup() will pass back the tmpnode
214 214 * with a hold on it.
215 215 */
216 216
217 217 if (tmpfs_hash_lookup(name, parent, 1, foundtp) != NULL) {
218 218 ASSERT(*foundtp);
219 219 return (0);
220 220 }
221 221
222 222 return (ENOENT);
223 223 }
224 224
225 225 /*
226 226 * Enter a directory entry for 'name' and 'tp' into directory 'dir'
227 227 *
228 228 * Returns 0 on success.
229 229 */
230 230 int
231 231 tdirenter(
232 232 struct tmount *tm,
233 233 struct tmpnode *dir, /* target directory to make entry in */
234 234 char *name, /* name of entry */
235 235 enum de_op op, /* entry operation */
236 236 struct tmpnode *fromparent, /* source directory if rename */
237 237 struct tmpnode *tp, /* source tmpnode, if link/rename */
238 238 struct vattr *va,
239 239 struct tmpnode **tpp, /* return tmpnode, if create/mkdir */
240 240 struct cred *cred,
241 241 caller_context_t *ctp)
242 242 {
243 243 struct tdirent *tdp;
244 244 struct tmpnode *found = NULL;
245 245 int error = 0;
246 246 char *s;
247 247
248 248 /*
249 249 * tn_rwlock is held to serialize direnter and dirdeletes
250 250 */
251 251 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
252 252 ASSERT(dir->tn_type == VDIR);
253 253
254 254 /*
255 255 * Don't allow '/' characters in pathname component
256 256 * (thus in ufs_direnter()).
257 257 */
258 258 for (s = name; *s; s++)
259 259 if (*s == '/')
260 260 return (EACCES);
261 261
262 262 if (name[0] == '\0')
263 263 panic("tdirenter: NULL name");
264 264
265 265 /*
266 266 * For link and rename lock the source entry and check the link count
267 267 * to see if it has been removed while it was unlocked.
268 268 */
269 269 if (op == DE_LINK || op == DE_RENAME) {
270 270 if (tp != dir)
271 271 rw_enter(&tp->tn_rwlock, RW_WRITER);
272 272 mutex_enter(&tp->tn_tlock);
273 273 if (tp->tn_nlink == 0) {
274 274 mutex_exit(&tp->tn_tlock);
275 275 if (tp != dir)
276 276 rw_exit(&tp->tn_rwlock);
277 277 return (ENOENT);
278 278 }
279 279
280 280 if (tp->tn_nlink == MAXLINK) {
281 281 mutex_exit(&tp->tn_tlock);
282 282 if (tp != dir)
283 283 rw_exit(&tp->tn_rwlock);
284 284 return (EMLINK);
285 285 }
286 286 tp->tn_nlink++;
287 287 gethrestime(&tp->tn_ctime);
288 288 mutex_exit(&tp->tn_tlock);
289 289 if (tp != dir)
290 290 rw_exit(&tp->tn_rwlock);
291 291 }
292 292
293 293 /*
294 294 * This might be a "dangling detached directory".
295 295 * it could have been removed, but a reference
296 296 * to it kept in u_cwd. don't bother searching
297 297 * it, and with any luck the user will get tired
298 298 * of dealing with us and cd to some absolute
299 299 * pathway. *sigh*, thus in ufs, too.
300 300 */
301 301 if (dir->tn_nlink == 0) {
302 302 error = ENOENT;
303 303 goto out;
304 304 }
305 305
306 306 /*
307 307 * If this is a rename of a directory and the parent is
308 308 * different (".." must be changed), then the source
309 309 * directory must not be in the directory hierarchy
310 310 * above the target, as this would orphan everything
311 311 * below the source directory.
312 312 */
313 313 if (op == DE_RENAME) {
314 314 if (tp == dir) {
315 315 error = EINVAL;
316 316 goto out;
317 317 }
318 318 if (tp->tn_type == VDIR) {
319 319 if ((fromparent != dir) &&
320 320 (error = tdircheckpath(tp, dir, cred))) {
321 321 goto out;
322 322 }
323 323 }
324 324 }
325 325
326 326 /*
327 327 * Search for the entry. Return "found" if it exists.
328 328 */
329 329 tdp = tmpfs_hash_lookup(name, dir, 1, &found);
330 330
331 331 if (tdp) {
332 332 ASSERT(found);
333 333 switch (op) {
334 334 case DE_CREATE:
335 335 case DE_MKDIR:
336 336 if (tpp) {
337 337 *tpp = found;
338 338 error = EEXIST;
339 339 } else {
340 340 tmpnode_rele(found);
341 341 }
342 342 break;
343 343
344 344 case DE_RENAME:
345 345 error = tdirrename(fromparent, tp,
346 346 dir, name, found, tdp, cred);
347 347 if (error == 0) {
348 348 if (found != NULL) {
349 349 vnevent_rename_dest(TNTOV(found),
350 350 TNTOV(dir), name, ctp);
351 351 }
352 352 }
353 353
354 354 tmpnode_rele(found);
355 355 break;
356 356
357 357 case DE_LINK:
358 358 /*
359 359 * Can't link to an existing file.
360 360 */
361 361 error = EEXIST;
362 362 tmpnode_rele(found);
363 363 break;
364 364 }
365 365 } else {
366 366
367 367 /*
368 368 * The entry does not exist. Check write permission in
369 369 * directory to see if entry can be created.
370 370 */
371 371 if (error = tmp_taccess(dir, VWRITE, cred))
372 372 goto out;
373 373 if (op == DE_CREATE || op == DE_MKDIR) {
374 374 /*
375 375 * Make new tmpnode and directory entry as required.
376 376 */
377 377 error = tdirmaketnode(dir, tm, va, op, &tp, cred);
378 378 if (error)
379 379 goto out;
380 380 }
381 381 if (error = tdiraddentry(dir, tp, name, op, fromparent)) {
382 382 if (op == DE_CREATE || op == DE_MKDIR) {
383 383 /*
384 384 * Unmake the inode we just made.
385 385 */
386 386 rw_enter(&tp->tn_rwlock, RW_WRITER);
387 387 if ((tp->tn_type) == VDIR) {
388 388 ASSERT(tdp == NULL);
389 389 /*
390 390 * cleanup allocs made by tdirinit()
391 391 */
392 392 tdirtrunc(tp);
393 393 }
394 394 mutex_enter(&tp->tn_tlock);
395 395 tp->tn_nlink = 0;
396 396 mutex_exit(&tp->tn_tlock);
397 397 gethrestime(&tp->tn_ctime);
398 398 rw_exit(&tp->tn_rwlock);
399 399 tmpnode_rele(tp);
400 400 tp = NULL;
401 401 }
402 402 } else if (tpp) {
403 403 *tpp = tp;
404 404 } else if (op == DE_CREATE || op == DE_MKDIR) {
405 405 tmpnode_rele(tp);
406 406 }
407 407 }
408 408
409 409 out:
410 410 if (error && (op == DE_LINK || op == DE_RENAME)) {
411 411 /*
412 412 * Undo bumped link count.
413 413 */
414 414 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
415 415 gethrestime(&tp->tn_ctime);
416 416 }
417 417 return (error);
418 418 }
419 419
420 420 /*
421 421 * Delete entry tp of name "nm" from dir.
422 422 * Free dir entry space and decrement link count on tmpnode(s).
423 423 *
424 424 * Return 0 on success.
425 425 */
426 426 int
427 427 tdirdelete(
428 428 struct tmpnode *dir,
429 429 struct tmpnode *tp,
430 430 char *nm,
431 431 enum dr_op op,
432 432 struct cred *cred)
433 433 {
434 434 struct tdirent *tpdp;
435 435 int error;
436 436 size_t namelen;
437 437 struct tmpnode *tnp;
438 438 timestruc_t now;
439 439
440 440 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
441 441 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
442 442 ASSERT(dir->tn_type == VDIR);
443 443
444 444 if (nm[0] == '\0')
445 445 panic("tdirdelete: NULL name for %p", (void *)tp);
446 446
447 447 /*
448 448 * return error when removing . and ..
449 449 */
450 450 if (nm[0] == '.') {
451 451 if (nm[1] == '\0')
452 452 return (EINVAL);
453 453 if (nm[1] == '.' && nm[2] == '\0')
454 454 return (EEXIST); /* thus in ufs */
455 455 }
456 456
457 457 if (error = tmp_taccess(dir, VEXEC|VWRITE, cred))
458 458 return (error);
459 459
460 460 /*
461 461 * If the parent directory is "sticky", then the user must
462 462 * own the parent directory or the file in it, or else must
463 463 * have permission to write the file. Otherwise it may not
464 464 * be deleted (except by privileged users).
465 465 * Same as ufs_dirremove.
466 466 */
467 467 if ((error = tmp_sticky_remove_access(dir, tp, cred)) != 0)
468 468 return (error);
469 469
470 470 if (dir->tn_dir == NULL)
471 471 return (ENOENT);
472 472
473 473 tpdp = tmpfs_hash_lookup(nm, dir, 0, &tnp);
474 474 if (tpdp == NULL) {
475 475 /*
476 476 * If it is gone, some other thread got here first!
477 477 * Return error ENOENT.
478 478 */
479 479 return (ENOENT);
480 480 }
481 481
482 482 /*
483 483 * If the tmpnode in the tdirent changed, we were probably
484 484 * the victim of a concurrent rename operation. The original
485 485 * is gone, so return that status (same as UFS).
486 486 */
487 487 if (tp != tnp)
488 488 return (ENOENT);
489 489
490 490 tmpfs_hash_out(tpdp);
491 491
492 492 /*
493 493 * Take tpdp out of the directory list.
494 494 */
495 495 ASSERT(tpdp->td_next != tpdp);
496 496 ASSERT(tpdp->td_prev != tpdp);
497 497 if (tpdp->td_prev) {
498 498 tpdp->td_prev->td_next = tpdp->td_next;
499 499 }
500 500 if (tpdp->td_next) {
501 501 tpdp->td_next->td_prev = tpdp->td_prev;
502 502 }
503 503
504 504 /*
505 505 * If the roving slot pointer happens to match tpdp,
506 506 * point it at the previous dirent.
507 507 */
508 508 if (dir->tn_dir->td_prev == tpdp) {
509 509 dir->tn_dir->td_prev = tpdp->td_prev;
510 510 }
511 511 ASSERT(tpdp->td_next != tpdp);
512 512 ASSERT(tpdp->td_prev != tpdp);
513 513
514 514 /*
515 515 * tpdp points to the correct directory entry
516 516 */
517 517 namelen = strlen(tpdp->td_name) + 1;
↓ open down ↓ |
480 lines elided |
↑ open up ↑ |
518 518
519 519 tmp_memfree(tpdp, sizeof (struct tdirent) + namelen);
520 520 dir->tn_size -= (sizeof (struct tdirent) + namelen);
521 521 dir->tn_dirents--;
522 522
523 523 gethrestime(&now);
524 524 dir->tn_mtime = now;
525 525 dir->tn_ctime = now;
526 526 tp->tn_ctime = now;
527 527
528 + /*
529 + * If this is a _REMOVE (unlink) operation there may
530 + * be other links to the directory entry.
531 + */
528 532 ASSERT(tp->tn_nlink > 0);
529 533 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
530 - if (op == DR_RMDIR && tp->tn_type == VDIR) {
531 - tdirtrunc(tp);
532 - ASSERT(tp->tn_nlink == 0);
534 + if (op == DR_RMDIR || (op == DR_REMOVE && tp->tn_type == VDIR)) {
535 + if (tp->tn_nlink > 1) {
536 + ASSERT(op == DR_REMOVE);
537 + } else {
538 + tdirtrunc(tp);
539 + ASSERT(tp->tn_nlink == 0);
540 + }
533 541 }
534 542 return (0);
535 543 }
536 544
537 545 /*
538 546 * tdirinit is used internally to initialize a directory (dir)
539 547 * with '.' and '..' entries without checking permissions and locking
540 548 */
541 549 void
542 550 tdirinit(
543 551 struct tmpnode *parent, /* parent of directory to initialize */
544 552 struct tmpnode *dir) /* the new directory */
545 553 {
546 554 struct tdirent *dot, *dotdot;
547 555 timestruc_t now;
548 556
549 557 ASSERT(RW_WRITE_HELD(&parent->tn_rwlock));
550 558 ASSERT(dir->tn_type == VDIR);
551 559
552 560 dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE);
553 561 dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE);
554 562
555 563 /*
556 564 * Initialize the entries
557 565 */
558 566 dot->td_tmpnode = dir;
559 567 dot->td_offset = 0;
560 568 dot->td_name = (char *)dot + sizeof (struct tdirent);
561 569 dot->td_name[0] = '.';
562 570 dot->td_parent = dir;
563 571 tmpfs_hash_in(dot);
564 572
565 573 dotdot->td_tmpnode = parent;
566 574 dotdot->td_offset = 1;
567 575 dotdot->td_name = (char *)dotdot + sizeof (struct tdirent);
568 576 dotdot->td_name[0] = '.';
569 577 dotdot->td_name[1] = '.';
570 578 dotdot->td_parent = dir;
571 579 tmpfs_hash_in(dotdot);
572 580
573 581 /*
574 582 * Initialize directory entry list.
575 583 */
576 584 dot->td_next = dotdot;
577 585 dot->td_prev = dotdot; /* dot's td_prev holds roving slot pointer */
578 586 dotdot->td_next = NULL;
579 587 dotdot->td_prev = dot;
580 588
581 589 gethrestime(&now);
582 590 dir->tn_mtime = now;
583 591 dir->tn_ctime = now;
584 592
585 593 /*
586 594 * Link counts are special for the hidden attribute directory.
587 595 * The only explicit reference in the name space is "." and
588 596 * the reference through ".." is not counted on the parent
589 597 * file. The attrdir is created as a side effect to lookup,
590 598 * so don't change the ctime of the parent.
591 599 * Since tdirinit is called with both dir and parent being the
592 600 * same for the root vnode, we need to increment this before we set
593 601 * tn_nlink = 2 below.
594 602 */
595 603 if (!(dir->tn_vnode->v_flag & V_XATTRDIR)) {
596 604 INCR_COUNT(&parent->tn_nlink, &parent->tn_tlock);
597 605 parent->tn_ctime = now;
598 606 }
599 607
600 608 dir->tn_dir = dot;
601 609 dir->tn_size = 2 * sizeof (struct tdirent) + 5; /* dot and dotdot */
602 610 dir->tn_dirents = 2;
603 611 dir->tn_nlink = 2;
604 612 }
605 613
606 614
607 615 /*
↓ open down ↓ |
65 lines elided |
↑ open up ↑ |
608 616 * tdirtrunc is called to remove all directory entries under this directory.
609 617 */
610 618 void
611 619 tdirtrunc(struct tmpnode *dir)
612 620 {
613 621 struct tdirent *tdp;
614 622 struct tmpnode *tp;
615 623 size_t namelen;
616 624 timestruc_t now;
617 625 int isvattrdir, isdotdot, skip_decr;
626 + int lock_held;
618 627
619 628 ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
620 629 ASSERT(dir->tn_type == VDIR);
621 630
622 631 isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0;
623 632 for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) {
624 633 ASSERT(tdp->td_next != tdp);
625 634 ASSERT(tdp->td_prev != tdp);
626 635 ASSERT(tdp->td_tmpnode);
627 636
628 637 dir->tn_dir = tdp->td_next;
629 638 namelen = strlen(tdp->td_name) + 1;
630 639
631 640 /*
632 641 * Adjust the link counts to account for this directory
633 642 * entry removal. Hidden attribute directories may
634 643 * not be empty as they may be truncated as a side-
635 644 * effect of removing the parent. We do hold/rele
636 645 * operations to free up these tmpnodes.
637 646 *
638 647 * Skip the link count adjustment for parents of
639 648 * attribute directories as those link counts
640 649 * do not include the ".." reference in the hidden
641 650 * directories.
642 651 */
643 652 tp = tdp->td_tmpnode;
644 653 isdotdot = (strcmp("..", tdp->td_name) == 0);
645 654 skip_decr = (isvattrdir && isdotdot);
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
646 655 if (!skip_decr) {
647 656 ASSERT(tp->tn_nlink > 0);
648 657 DECR_COUNT(&tp->tn_nlink, &tp->tn_tlock);
649 658 }
650 659
651 660 tmpfs_hash_out(tdp);
652 661
653 662 tmp_memfree(tdp, sizeof (struct tdirent) + namelen);
654 663 dir->tn_size -= (sizeof (struct tdirent) + namelen);
655 664 dir->tn_dirents--;
665 +
666 + /*
667 + * This directory entry may itself be a directory with
668 + * entries and removing it may have created orphans.
669 + * On a normal filesystem like UFS this wouldn't be
670 + * a huge problem because fcsk can reclaim them. For
671 + * TMPFS which resides in RAM however, it means we
672 + * end up leaking memory.
673 + *
674 + * To avoid this we also truncate child directories,
675 + * but only if they have no other links to them.
676 + */
677 + if (!isdotdot && tp->tn_type == VDIR && tp != dir) {
678 + if (tp->tn_nlink > 1)
679 + continue;
680 + lock_held = RW_WRITE_HELD(&tp->tn_rwlock);
681 + if (!lock_held)
682 + rw_enter(&tp->tn_rwlock, RW_WRITER);
683 + tdirtrunc(tp);
684 + if (!lock_held)
685 + rw_exit(&tp->tn_rwlock);
686 + ASSERT(tp->tn_nlink == 0);
687 + }
656 688 }
657 689
658 690 gethrestime(&now);
659 691 dir->tn_mtime = now;
660 692 dir->tn_ctime = now;
661 693
662 694 ASSERT(dir->tn_dir == NULL);
663 695 ASSERT(dir->tn_size == 0);
664 696 ASSERT(dir->tn_dirents == 0);
665 697 }
666 698
667 699 /*
668 700 * Check if the source directory is in the path of the target directory.
669 701 * The target directory is locked by the caller.
670 702 *
671 703 * XXX - The source and target's should be different upon entry.
672 704 */
673 705 static int
674 706 tdircheckpath(
675 707 struct tmpnode *fromtp,
676 708 struct tmpnode *toparent,
677 709 struct cred *cred)
678 710 {
679 711 int error = 0;
680 712 struct tmpnode *dir, *dotdot;
681 713 struct tdirent *tdp;
682 714
683 715 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
684 716
685 717 tdp = tmpfs_hash_lookup("..", toparent, 1, &dotdot);
686 718 if (tdp == NULL)
687 719 return (ENOENT);
688 720
689 721 ASSERT(dotdot);
690 722
691 723 if (dotdot == toparent) {
692 724 /* root of fs. search trivially satisfied. */
693 725 tmpnode_rele(dotdot);
694 726 return (0);
695 727 }
696 728 for (;;) {
697 729 /*
698 730 * Return error for cases like "mv c c/d",
699 731 * "mv c c/d/e" and so on.
700 732 */
701 733 if (dotdot == fromtp) {
702 734 tmpnode_rele(dotdot);
703 735 error = EINVAL;
704 736 break;
705 737 }
706 738 dir = dotdot;
707 739 error = tdirlookup(dir, "..", &dotdot, cred);
708 740 if (error) {
709 741 tmpnode_rele(dir);
710 742 break;
711 743 }
712 744 /*
713 745 * We're okay if we traverse the directory tree up to
714 746 * the root directory and don't run into the
715 747 * parent directory.
716 748 */
717 749 if (dir == dotdot) {
718 750 tmpnode_rele(dir);
719 751 tmpnode_rele(dotdot);
720 752 break;
721 753 }
722 754 tmpnode_rele(dir);
723 755 }
724 756 return (error);
725 757 }
726 758
727 759 static int
728 760 tdirrename(
729 761 struct tmpnode *fromparent, /* parent directory of source */
730 762 struct tmpnode *fromtp, /* source tmpnode */
731 763 struct tmpnode *toparent, /* parent directory of target */
732 764 char *nm, /* entry we are trying to change */
733 765 struct tmpnode *to, /* target tmpnode */
734 766 struct tdirent *where, /* target tmpnode directory entry */
735 767 struct cred *cred) /* credentials */
736 768 {
737 769 int error = 0;
738 770 int doingdirectory;
739 771 timestruc_t now;
740 772
741 773 #if defined(lint)
742 774 nm = nm;
743 775 #endif
744 776 ASSERT(RW_WRITE_HELD(&toparent->tn_rwlock));
745 777
746 778 /*
747 779 * Short circuit rename of something to itself.
748 780 */
749 781 if (fromtp == to)
750 782 return (ESAME); /* special KLUDGE error code */
751 783
752 784 rw_enter(&fromtp->tn_rwlock, RW_READER);
753 785 rw_enter(&to->tn_rwlock, RW_READER);
754 786
755 787 /*
756 788 * Check that everything is on the same filesystem.
757 789 */
758 790 if (to->tn_vnode->v_vfsp != toparent->tn_vnode->v_vfsp ||
759 791 to->tn_vnode->v_vfsp != fromtp->tn_vnode->v_vfsp) {
760 792 error = EXDEV;
761 793 goto out;
762 794 }
763 795
764 796 /*
765 797 * Must have write permission to rewrite target entry.
766 798 * Check for stickyness.
767 799 */
768 800 if ((error = tmp_taccess(toparent, VWRITE, cred)) != 0 ||
769 801 (error = tmp_sticky_remove_access(toparent, to, cred)) != 0)
770 802 goto out;
771 803
772 804 /*
773 805 * Ensure source and target are compatible (both directories
774 806 * or both not directories). If target is a directory it must
775 807 * be empty and have no links to it; in addition it must not
776 808 * be a mount point, and both the source and target must be
777 809 * writable.
778 810 */
779 811 doingdirectory = (fromtp->tn_type == VDIR);
780 812 if (to->tn_type == VDIR) {
781 813 if (!doingdirectory) {
782 814 error = EISDIR;
783 815 goto out;
784 816 }
785 817 /*
786 818 * vn_vfswlock will prevent mounts from using the directory
787 819 * until we are done.
788 820 */
789 821 if (vn_vfswlock(TNTOV(to))) {
790 822 error = EBUSY;
791 823 goto out;
792 824 }
793 825 if (vn_mountedvfs(TNTOV(to)) != NULL) {
794 826 vn_vfsunlock(TNTOV(to));
795 827 error = EBUSY;
796 828 goto out;
797 829 }
798 830
799 831 mutex_enter(&to->tn_tlock);
800 832 if (to->tn_dirents > 2 || to->tn_nlink > 2) {
801 833 mutex_exit(&to->tn_tlock);
802 834 vn_vfsunlock(TNTOV(to));
803 835 error = EEXIST; /* SIGH should be ENOTEMPTY */
804 836 /*
805 837 * Update atime because checking tn_dirents is
806 838 * logically equivalent to reading the directory
807 839 */
808 840 gethrestime(&to->tn_atime);
809 841 goto out;
810 842 }
811 843 mutex_exit(&to->tn_tlock);
812 844 } else if (doingdirectory) {
813 845 error = ENOTDIR;
814 846 goto out;
815 847 }
816 848
817 849 tmpfs_hash_change(where, fromtp);
818 850 gethrestime(&now);
819 851 toparent->tn_mtime = now;
820 852 toparent->tn_ctime = now;
821 853
822 854 /*
823 855 * Upgrade to write lock on "to" (i.e., the target tmpnode).
824 856 */
825 857 rw_exit(&to->tn_rwlock);
826 858 rw_enter(&to->tn_rwlock, RW_WRITER);
827 859
828 860 /*
829 861 * Decrement the link count of the target tmpnode.
830 862 */
831 863 DECR_COUNT(&to->tn_nlink, &to->tn_tlock);
832 864 to->tn_ctime = now;
833 865
834 866 if (doingdirectory) {
835 867 /*
836 868 * The entry for "to" no longer exists so release the vfslock.
837 869 */
838 870 vn_vfsunlock(TNTOV(to));
839 871
840 872 /*
841 873 * Decrement the target link count and delete all entires.
842 874 */
843 875 tdirtrunc(to);
844 876 ASSERT(to->tn_nlink == 0);
845 877
846 878 /*
847 879 * Renaming a directory with the parent different
848 880 * requires that ".." be rewritten. The window is
849 881 * still there for ".." to be inconsistent, but this
850 882 * is unavoidable, and a lot shorter than when it was
851 883 * done in a user process.
852 884 */
853 885 if (fromparent != toparent)
854 886 tdirfixdotdot(fromtp, fromparent, toparent);
855 887 }
856 888 out:
857 889 rw_exit(&to->tn_rwlock);
858 890 rw_exit(&fromtp->tn_rwlock);
859 891 return (error);
860 892 }
861 893
862 894 static void
863 895 tdirfixdotdot(
864 896 struct tmpnode *fromtp, /* child directory */
865 897 struct tmpnode *fromparent, /* old parent directory */
866 898 struct tmpnode *toparent) /* new parent directory */
867 899 {
868 900 struct tdirent *dotdot;
869 901
870 902 ASSERT(RW_LOCK_HELD(&toparent->tn_rwlock));
871 903
872 904 /*
873 905 * Increment the link count in the new parent tmpnode
874 906 */
875 907 INCR_COUNT(&toparent->tn_nlink, &toparent->tn_tlock);
876 908 gethrestime(&toparent->tn_ctime);
877 909
878 910 dotdot = tmpfs_hash_lookup("..", fromtp, 0, NULL);
879 911
880 912 ASSERT(dotdot->td_tmpnode == fromparent);
881 913 dotdot->td_tmpnode = toparent;
882 914
883 915 /*
884 916 * Decrement the link count of the old parent tmpnode.
885 917 * If fromparent is NULL, then this is a new directory link;
886 918 * it has no parent, so we need not do anything.
887 919 */
888 920 if (fromparent != NULL) {
889 921 mutex_enter(&fromparent->tn_tlock);
890 922 if (fromparent->tn_nlink != 0) {
891 923 fromparent->tn_nlink--;
892 924 gethrestime(&fromparent->tn_ctime);
893 925 }
894 926 mutex_exit(&fromparent->tn_tlock);
895 927 }
896 928 }
897 929
898 930 static int
899 931 tdiraddentry(
900 932 struct tmpnode *dir, /* target directory to make entry in */
901 933 struct tmpnode *tp, /* new tmpnode */
902 934 char *name,
903 935 enum de_op op,
904 936 struct tmpnode *fromtp)
905 937 {
906 938 struct tdirent *tdp, *tpdp;
907 939 size_t namelen, alloc_size;
908 940 timestruc_t now;
909 941
910 942 /*
911 943 * Make sure the parent directory wasn't removed from
912 944 * underneath the caller.
913 945 */
914 946 if (dir->tn_dir == NULL)
915 947 return (ENOENT);
916 948
917 949 /*
918 950 * Check that everything is on the same filesystem.
919 951 */
920 952 if (tp->tn_vnode->v_vfsp != dir->tn_vnode->v_vfsp)
921 953 return (EXDEV);
922 954
923 955 /*
924 956 * Allocate and initialize directory entry
925 957 */
926 958 namelen = strlen(name) + 1;
927 959 alloc_size = namelen + sizeof (struct tdirent);
928 960 tdp = tmp_memalloc(alloc_size, 0);
929 961 if (tdp == NULL)
930 962 return (ENOSPC);
931 963
932 964 if ((op == DE_RENAME) && (tp->tn_type == VDIR))
933 965 tdirfixdotdot(tp, fromtp, dir);
934 966
935 967 dir->tn_size += alloc_size;
936 968 dir->tn_dirents++;
937 969 tdp->td_tmpnode = tp;
938 970 tdp->td_parent = dir;
939 971
940 972 /*
941 973 * The directory entry and its name were allocated sequentially.
942 974 */
943 975 tdp->td_name = (char *)tdp + sizeof (struct tdirent);
944 976 (void) strcpy(tdp->td_name, name);
945 977
946 978 tmpfs_hash_in(tdp);
947 979
948 980 /*
949 981 * Some utilities expect the size of a directory to remain
950 982 * somewhat static. For example, a routine which unlinks
951 983 * files between calls to readdir(); the size of the
952 984 * directory changes from underneath it and so the real
953 985 * directory offset in bytes is invalid. To circumvent
954 986 * this problem, we initialize a directory entry with an
955 987 * phony offset, and use this offset to determine end of
956 988 * file in tmp_readdir.
957 989 */
958 990 tpdp = dir->tn_dir->td_prev;
959 991 /*
960 992 * Install at first empty "slot" in directory list.
961 993 */
962 994 while (tpdp->td_next != NULL && (tpdp->td_next->td_offset -
963 995 tpdp->td_offset) <= 1) {
964 996 ASSERT(tpdp->td_next != tpdp);
965 997 ASSERT(tpdp->td_prev != tpdp);
966 998 ASSERT(tpdp->td_next->td_offset > tpdp->td_offset);
967 999 tpdp = tpdp->td_next;
968 1000 }
969 1001 tdp->td_offset = tpdp->td_offset + 1;
970 1002
971 1003 /*
972 1004 * If we're at the end of the dirent list and the offset (which
973 1005 * is necessarily the largest offset in this directory) is more
974 1006 * than twice the number of dirents, that means the directory is
975 1007 * 50% holes. At this point we reset the slot pointer back to
976 1008 * the beginning of the directory so we start using the holes.
977 1009 * The idea is that if there are N dirents, there must also be
978 1010 * N holes, so we can satisfy the next N creates by walking at
979 1011 * most 2N entries; thus the average cost of a create is constant.
980 1012 * Note that we use the first dirent's td_prev as the roving
981 1013 * slot pointer; it's ugly, but it saves a word in every dirent.
982 1014 */
983 1015 if (tpdp->td_next == NULL && tpdp->td_offset > 2 * dir->tn_dirents)
984 1016 dir->tn_dir->td_prev = dir->tn_dir->td_next;
985 1017 else
986 1018 dir->tn_dir->td_prev = tdp;
987 1019
988 1020 ASSERT(tpdp->td_next != tpdp);
989 1021 ASSERT(tpdp->td_prev != tpdp);
990 1022
991 1023 tdp->td_next = tpdp->td_next;
992 1024 if (tdp->td_next) {
993 1025 tdp->td_next->td_prev = tdp;
994 1026 }
995 1027 tdp->td_prev = tpdp;
996 1028 tpdp->td_next = tdp;
997 1029
998 1030 ASSERT(tdp->td_next != tdp);
999 1031 ASSERT(tdp->td_prev != tdp);
1000 1032 ASSERT(tpdp->td_next != tpdp);
1001 1033 ASSERT(tpdp->td_prev != tpdp);
1002 1034
1003 1035 gethrestime(&now);
1004 1036 dir->tn_mtime = now;
1005 1037 dir->tn_ctime = now;
1006 1038
1007 1039 return (0);
1008 1040 }
1009 1041
1010 1042 static int
1011 1043 tdirmaketnode(
1012 1044 struct tmpnode *dir,
1013 1045 struct tmount *tm,
1014 1046 struct vattr *va,
1015 1047 enum de_op op,
1016 1048 struct tmpnode **newnode,
1017 1049 struct cred *cred)
1018 1050 {
1019 1051 struct tmpnode *tp;
1020 1052 enum vtype type;
1021 1053
1022 1054 ASSERT(va != NULL);
1023 1055 ASSERT(op == DE_CREATE || op == DE_MKDIR);
1024 1056 if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
1025 1057 ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
1026 1058 return (EOVERFLOW);
1027 1059 type = va->va_type;
1028 1060 tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
1029 1061 tmpnode_init(tm, tp, va, cred);
1030 1062
1031 1063 /* setup normal file/dir's extended attribute directory */
1032 1064 if (dir->tn_flags & ISXATTR) {
1033 1065 /* parent dir is , mark file as xattr */
1034 1066 tp->tn_flags |= ISXATTR;
1035 1067 }
1036 1068
1037 1069
1038 1070 if (type == VBLK || type == VCHR) {
1039 1071 tp->tn_vnode->v_rdev = tp->tn_rdev = va->va_rdev;
1040 1072 } else {
1041 1073 tp->tn_vnode->v_rdev = tp->tn_rdev = NODEV;
1042 1074 }
1043 1075 tp->tn_vnode->v_type = type;
1044 1076 tp->tn_uid = crgetuid(cred);
1045 1077
1046 1078 /*
1047 1079 * To determine the group-id of the created file:
1048 1080 * 1) If the gid is set in the attribute list (non-Sun & pre-4.0
1049 1081 * clients are not likely to set the gid), then use it if
1050 1082 * the process is privileged, belongs to the target group,
1051 1083 * or the group is the same as the parent directory.
1052 1084 * 2) If the filesystem was not mounted with the Old-BSD-compatible
1053 1085 * GRPID option, and the directory's set-gid bit is clear,
1054 1086 * then use the process's gid.
1055 1087 * 3) Otherwise, set the group-id to the gid of the parent directory.
1056 1088 */
1057 1089 if ((va->va_mask & AT_GID) &&
1058 1090 ((va->va_gid == dir->tn_gid) || groupmember(va->va_gid, cred) ||
1059 1091 secpolicy_vnode_create_gid(cred) == 0)) {
1060 1092 /*
1061 1093 * XXX - is this only the case when a 4.0 NFS client, or a
1062 1094 * client derived from that code, makes a call over the wire?
1063 1095 */
1064 1096 tp->tn_gid = va->va_gid;
1065 1097 } else {
1066 1098 if (dir->tn_mode & VSGID)
1067 1099 tp->tn_gid = dir->tn_gid;
1068 1100 else
1069 1101 tp->tn_gid = crgetgid(cred);
1070 1102 }
1071 1103 /*
1072 1104 * If we're creating a directory, and the parent directory has the
1073 1105 * set-GID bit set, set it on the new directory.
1074 1106 * Otherwise, if the user is neither privileged nor a member of the
1075 1107 * file's new group, clear the file's set-GID bit.
1076 1108 */
1077 1109 if (dir->tn_mode & VSGID && type == VDIR)
1078 1110 tp->tn_mode |= VSGID;
1079 1111 else {
1080 1112 if ((tp->tn_mode & VSGID) &&
1081 1113 secpolicy_vnode_setids_setgids(cred, tp->tn_gid) != 0)
1082 1114 tp->tn_mode &= ~VSGID;
1083 1115 }
1084 1116
1085 1117 if (va->va_mask & AT_ATIME)
1086 1118 tp->tn_atime = va->va_atime;
1087 1119 if (va->va_mask & AT_MTIME)
1088 1120 tp->tn_mtime = va->va_mtime;
1089 1121
1090 1122 if (op == DE_MKDIR)
1091 1123 tdirinit(dir, tp);
1092 1124
1093 1125 *newnode = tp;
1094 1126 return (0);
1095 1127 }
↓ open down ↓ |
430 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX