Print this page
7122 fix negative timestamps with nsec == 0 in nfs4_time_ntov()
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_subr.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_subr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /*
26 26 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
27 27 */
28 28
29 29 /*
30 30 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
31 31 * All Rights Reserved
32 32 */
33 33
34 34 #include <sys/param.h>
35 35 #include <sys/types.h>
36 36 #include <sys/systm.h>
37 37 #include <sys/cmn_err.h>
38 38 #include <sys/vtrace.h>
39 39 #include <sys/session.h>
40 40 #include <sys/thread.h>
41 41 #include <sys/dnlc.h>
42 42 #include <sys/cred.h>
43 43 #include <sys/priv.h>
44 44 #include <sys/list.h>
45 45 #include <sys/sdt.h>
46 46 #include <sys/policy.h>
47 47
48 48 #include <rpc/types.h>
49 49 #include <rpc/xdr.h>
50 50
51 51 #include <nfs/nfs.h>
52 52
53 53 #include <nfs/nfs_clnt.h>
54 54
55 55 #include <nfs/nfs4.h>
56 56 #include <nfs/rnode4.h>
57 57 #include <nfs/nfs4_clnt.h>
58 58
59 59 /*
60 60 * client side statistics
61 61 */
62 62 static const struct clstat4 clstat4_tmpl = {
63 63 { "calls", KSTAT_DATA_UINT64 },
64 64 { "badcalls", KSTAT_DATA_UINT64 },
65 65 { "referrals", KSTAT_DATA_UINT64 },
66 66 { "referlinks", KSTAT_DATA_UINT64 },
67 67 { "clgets", KSTAT_DATA_UINT64 },
68 68 { "cltoomany", KSTAT_DATA_UINT64 },
69 69 #ifdef DEBUG
70 70 { "clalloc", KSTAT_DATA_UINT64 },
71 71 { "noresponse", KSTAT_DATA_UINT64 },
72 72 { "failover", KSTAT_DATA_UINT64 },
73 73 { "remap", KSTAT_DATA_UINT64 },
74 74 #endif
75 75 };
76 76
77 77 #ifdef DEBUG
78 78 struct clstat4_debug clstat4_debug = {
79 79 { "nrnode", KSTAT_DATA_UINT64 },
80 80 { "access", KSTAT_DATA_UINT64 },
81 81 { "dirent", KSTAT_DATA_UINT64 },
82 82 { "dirents", KSTAT_DATA_UINT64 },
83 83 { "reclaim", KSTAT_DATA_UINT64 },
84 84 { "clreclaim", KSTAT_DATA_UINT64 },
85 85 { "f_reclaim", KSTAT_DATA_UINT64 },
86 86 { "a_reclaim", KSTAT_DATA_UINT64 },
87 87 { "r_reclaim", KSTAT_DATA_UINT64 },
88 88 { "r_path", KSTAT_DATA_UINT64 },
89 89 };
90 90 #endif
91 91
92 92 /*
93 93 * We keep a global list of per-zone client data, so we can clean up all zones
94 94 * if we get low on memory.
95 95 */
96 96 static list_t nfs4_clnt_list;
97 97 static kmutex_t nfs4_clnt_list_lock;
98 98 zone_key_t nfs4clnt_zone_key;
99 99
100 100 static struct kmem_cache *chtab4_cache;
101 101
102 102 #ifdef DEBUG
103 103 static int nfs4_rfscall_debug;
104 104 static int nfs4_try_failover_any;
105 105 int nfs4_utf8_debug = 0;
106 106 #endif
107 107
108 108 /*
109 109 * NFSv4 readdir cache implementation
110 110 */
111 111 typedef struct rddir4_cache_impl {
112 112 rddir4_cache rc; /* readdir cache element */
113 113 kmutex_t lock; /* lock protects count */
114 114 uint_t count; /* reference count */
115 115 avl_node_t tree; /* AVL tree link */
116 116 } rddir4_cache_impl;
117 117
118 118 static int rddir4_cache_compar(const void *, const void *);
119 119 static void rddir4_cache_free(rddir4_cache_impl *);
120 120 static rddir4_cache *rddir4_cache_alloc(int);
121 121 static void rddir4_cache_hold(rddir4_cache *);
122 122 static int try_failover(enum clnt_stat);
123 123
124 124 static int nfs4_readdir_cache_hits = 0;
125 125 static int nfs4_readdir_cache_waits = 0;
126 126 static int nfs4_readdir_cache_misses = 0;
127 127
128 128 /*
129 129 * Shared nfs4 functions
130 130 */
131 131
132 132 /*
133 133 * Copy an nfs_fh4. The destination storage (to->nfs_fh4_val) must already
134 134 * be allocated.
135 135 */
136 136
137 137 void
138 138 nfs_fh4_copy(nfs_fh4 *from, nfs_fh4 *to)
139 139 {
140 140 to->nfs_fh4_len = from->nfs_fh4_len;
141 141 bcopy(from->nfs_fh4_val, to->nfs_fh4_val, to->nfs_fh4_len);
142 142 }
143 143
144 144 /*
145 145 * nfs4cmpfh - compare 2 filehandles.
146 146 * Returns 0 if the two nfsv4 filehandles are the same, -1 if the first is
147 147 * "less" than the second, +1 if the first is "greater" than the second.
148 148 */
149 149
150 150 int
151 151 nfs4cmpfh(const nfs_fh4 *fh4p1, const nfs_fh4 *fh4p2)
152 152 {
153 153 const char *c1, *c2;
154 154
155 155 if (fh4p1->nfs_fh4_len < fh4p2->nfs_fh4_len)
156 156 return (-1);
157 157 if (fh4p1->nfs_fh4_len > fh4p2->nfs_fh4_len)
158 158 return (1);
159 159 for (c1 = fh4p1->nfs_fh4_val, c2 = fh4p2->nfs_fh4_val;
160 160 c1 < fh4p1->nfs_fh4_val + fh4p1->nfs_fh4_len;
161 161 c1++, c2++) {
162 162 if (*c1 < *c2)
163 163 return (-1);
164 164 if (*c1 > *c2)
165 165 return (1);
166 166 }
167 167
168 168 return (0);
169 169 }
170 170
171 171 /*
172 172 * Compare two v4 filehandles. Return zero if they're the same, non-zero
173 173 * if they're not. Like nfs4cmpfh(), but different filehandle
174 174 * representation, and doesn't provide information about greater than or
175 175 * less than.
176 176 */
177 177
178 178 int
179 179 nfs4cmpfhandle(nfs4_fhandle_t *fh1, nfs4_fhandle_t *fh2)
180 180 {
181 181 if (fh1->fh_len == fh2->fh_len)
182 182 return (bcmp(fh1->fh_buf, fh2->fh_buf, fh1->fh_len));
183 183
184 184 return (1);
185 185 }
186 186
187 187 int
188 188 stateid4_cmp(stateid4 *s1, stateid4 *s2)
189 189 {
190 190 if (bcmp(s1, s2, sizeof (stateid4)) == 0)
191 191 return (1);
192 192 else
193 193 return (0);
194 194 }
195 195
196 196 nfsstat4
197 197 puterrno4(int error)
198 198 {
199 199 switch (error) {
200 200 case 0:
201 201 return (NFS4_OK);
202 202 case EPERM:
203 203 return (NFS4ERR_PERM);
204 204 case ENOENT:
205 205 return (NFS4ERR_NOENT);
206 206 case EINTR:
207 207 return (NFS4ERR_IO);
208 208 case EIO:
209 209 return (NFS4ERR_IO);
210 210 case ENXIO:
211 211 return (NFS4ERR_NXIO);
212 212 case ENOMEM:
213 213 return (NFS4ERR_RESOURCE);
214 214 case EACCES:
215 215 return (NFS4ERR_ACCESS);
216 216 case EBUSY:
217 217 return (NFS4ERR_IO);
218 218 case EEXIST:
219 219 return (NFS4ERR_EXIST);
220 220 case EXDEV:
221 221 return (NFS4ERR_XDEV);
222 222 case ENODEV:
223 223 return (NFS4ERR_IO);
224 224 case ENOTDIR:
225 225 return (NFS4ERR_NOTDIR);
226 226 case EISDIR:
227 227 return (NFS4ERR_ISDIR);
228 228 case EINVAL:
229 229 return (NFS4ERR_INVAL);
230 230 case EMFILE:
231 231 return (NFS4ERR_RESOURCE);
232 232 case EFBIG:
233 233 return (NFS4ERR_FBIG);
234 234 case ENOSPC:
235 235 return (NFS4ERR_NOSPC);
236 236 case EROFS:
237 237 return (NFS4ERR_ROFS);
238 238 case EMLINK:
239 239 return (NFS4ERR_MLINK);
240 240 case EDEADLK:
241 241 return (NFS4ERR_DEADLOCK);
242 242 case ENOLCK:
243 243 return (NFS4ERR_DENIED);
244 244 case EREMOTE:
245 245 return (NFS4ERR_SERVERFAULT);
246 246 case ENOTSUP:
247 247 return (NFS4ERR_NOTSUPP);
248 248 case EDQUOT:
249 249 return (NFS4ERR_DQUOT);
250 250 case ENAMETOOLONG:
251 251 return (NFS4ERR_NAMETOOLONG);
252 252 case EOVERFLOW:
253 253 return (NFS4ERR_INVAL);
254 254 case ENOSYS:
255 255 return (NFS4ERR_NOTSUPP);
256 256 case ENOTEMPTY:
257 257 return (NFS4ERR_NOTEMPTY);
258 258 case EOPNOTSUPP:
259 259 return (NFS4ERR_NOTSUPP);
260 260 case ESTALE:
261 261 return (NFS4ERR_STALE);
262 262 case EAGAIN:
263 263 if (curthread->t_flag & T_WOULDBLOCK) {
264 264 curthread->t_flag &= ~T_WOULDBLOCK;
265 265 return (NFS4ERR_DELAY);
266 266 }
267 267 return (NFS4ERR_LOCKED);
268 268 default:
269 269 return ((enum nfsstat4)error);
270 270 }
271 271 }
272 272
273 273 int
274 274 geterrno4(enum nfsstat4 status)
275 275 {
276 276 switch (status) {
277 277 case NFS4_OK:
278 278 return (0);
279 279 case NFS4ERR_PERM:
280 280 return (EPERM);
281 281 case NFS4ERR_NOENT:
282 282 return (ENOENT);
283 283 case NFS4ERR_IO:
284 284 return (EIO);
285 285 case NFS4ERR_NXIO:
286 286 return (ENXIO);
287 287 case NFS4ERR_ACCESS:
288 288 return (EACCES);
289 289 case NFS4ERR_EXIST:
290 290 return (EEXIST);
291 291 case NFS4ERR_XDEV:
292 292 return (EXDEV);
293 293 case NFS4ERR_NOTDIR:
294 294 return (ENOTDIR);
295 295 case NFS4ERR_ISDIR:
296 296 return (EISDIR);
297 297 case NFS4ERR_INVAL:
298 298 return (EINVAL);
299 299 case NFS4ERR_FBIG:
300 300 return (EFBIG);
301 301 case NFS4ERR_NOSPC:
302 302 return (ENOSPC);
303 303 case NFS4ERR_ROFS:
304 304 return (EROFS);
305 305 case NFS4ERR_MLINK:
306 306 return (EMLINK);
307 307 case NFS4ERR_NAMETOOLONG:
308 308 return (ENAMETOOLONG);
309 309 case NFS4ERR_NOTEMPTY:
310 310 return (ENOTEMPTY);
311 311 case NFS4ERR_DQUOT:
312 312 return (EDQUOT);
313 313 case NFS4ERR_STALE:
314 314 return (ESTALE);
315 315 case NFS4ERR_BADHANDLE:
316 316 return (ESTALE);
317 317 case NFS4ERR_BAD_COOKIE:
318 318 return (EINVAL);
319 319 case NFS4ERR_NOTSUPP:
320 320 return (EOPNOTSUPP);
321 321 case NFS4ERR_TOOSMALL:
322 322 return (EINVAL);
323 323 case NFS4ERR_SERVERFAULT:
324 324 return (EIO);
325 325 case NFS4ERR_BADTYPE:
326 326 return (EINVAL);
327 327 case NFS4ERR_DELAY:
328 328 return (ENXIO);
329 329 case NFS4ERR_SAME:
330 330 return (EPROTO);
331 331 case NFS4ERR_DENIED:
332 332 return (ENOLCK);
333 333 case NFS4ERR_EXPIRED:
334 334 return (EPROTO);
335 335 case NFS4ERR_LOCKED:
336 336 return (EACCES);
337 337 case NFS4ERR_GRACE:
338 338 return (EAGAIN);
339 339 case NFS4ERR_FHEXPIRED: /* if got here, failed to get a new fh */
340 340 return (ESTALE);
341 341 case NFS4ERR_SHARE_DENIED:
342 342 return (EACCES);
343 343 case NFS4ERR_WRONGSEC:
344 344 return (EPERM);
345 345 case NFS4ERR_CLID_INUSE:
346 346 return (EAGAIN);
347 347 case NFS4ERR_RESOURCE:
348 348 return (EAGAIN);
349 349 case NFS4ERR_MOVED:
350 350 return (EPROTO);
351 351 case NFS4ERR_NOFILEHANDLE:
352 352 return (EIO);
353 353 case NFS4ERR_MINOR_VERS_MISMATCH:
354 354 return (ENOTSUP);
355 355 case NFS4ERR_STALE_CLIENTID:
356 356 return (EIO);
357 357 case NFS4ERR_STALE_STATEID:
358 358 return (EIO);
359 359 case NFS4ERR_OLD_STATEID:
360 360 return (EIO);
361 361 case NFS4ERR_BAD_STATEID:
362 362 return (EIO);
363 363 case NFS4ERR_BAD_SEQID:
364 364 return (EIO);
365 365 case NFS4ERR_NOT_SAME:
366 366 return (EPROTO);
367 367 case NFS4ERR_LOCK_RANGE:
368 368 return (EPROTO);
369 369 case NFS4ERR_SYMLINK:
370 370 return (EPROTO);
371 371 case NFS4ERR_RESTOREFH:
372 372 return (EPROTO);
373 373 case NFS4ERR_LEASE_MOVED:
374 374 return (EPROTO);
375 375 case NFS4ERR_ATTRNOTSUPP:
376 376 return (ENOTSUP);
377 377 case NFS4ERR_NO_GRACE:
378 378 return (EPROTO);
379 379 case NFS4ERR_RECLAIM_BAD:
380 380 return (EPROTO);
381 381 case NFS4ERR_RECLAIM_CONFLICT:
382 382 return (EPROTO);
383 383 case NFS4ERR_BADXDR:
384 384 return (EINVAL);
385 385 case NFS4ERR_LOCKS_HELD:
386 386 return (EIO);
387 387 case NFS4ERR_OPENMODE:
388 388 return (EACCES);
389 389 case NFS4ERR_BADOWNER:
390 390 /*
391 391 * Client and server are in different DNS domains
392 392 * and the NFSMAPID_DOMAIN in /etc/default/nfs
393 393 * doesn't match. No good answer here. Return
394 394 * EACCESS, which translates to "permission denied".
395 395 */
396 396 return (EACCES);
397 397 case NFS4ERR_BADCHAR:
398 398 return (EINVAL);
399 399 case NFS4ERR_BADNAME:
400 400 return (EINVAL);
401 401 case NFS4ERR_BAD_RANGE:
402 402 return (EIO);
403 403 case NFS4ERR_LOCK_NOTSUPP:
404 404 return (ENOTSUP);
405 405 case NFS4ERR_OP_ILLEGAL:
406 406 return (EINVAL);
407 407 case NFS4ERR_DEADLOCK:
408 408 return (EDEADLK);
409 409 case NFS4ERR_FILE_OPEN:
410 410 return (EACCES);
411 411 case NFS4ERR_ADMIN_REVOKED:
412 412 return (EPROTO);
413 413 case NFS4ERR_CB_PATH_DOWN:
414 414 return (EPROTO);
415 415 default:
416 416 #ifdef DEBUG
417 417 zcmn_err(getzoneid(), CE_WARN, "geterrno4: got status %d",
418 418 status);
419 419 #endif
420 420 return ((int)status);
421 421 }
422 422 }
423 423
424 424 void
425 425 nfs4_log_badowner(mntinfo4_t *mi, nfs_opnum4 op)
426 426 {
427 427 nfs4_server_t *server;
428 428
429 429 /*
430 430 * Return if already printed/queued a msg
431 431 * for this mount point.
432 432 */
433 433 if (mi->mi_flags & MI4_BADOWNER_DEBUG)
434 434 return;
435 435 /*
436 436 * Happens once per client <-> server pair.
437 437 */
438 438 if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER,
439 439 mi->mi_flags & MI4_INT))
440 440 return;
441 441
442 442 server = find_nfs4_server(mi);
443 443 if (server == NULL) {
444 444 nfs_rw_exit(&mi->mi_recovlock);
445 445 return;
446 446 }
447 447
448 448 if (!(server->s_flags & N4S_BADOWNER_DEBUG)) {
449 449 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
450 450 "!NFSMAPID_DOMAIN does not match"
451 451 " the server: %s domain.\n"
452 452 "Please check configuration",
453 453 mi->mi_curr_serv->sv_hostname);
454 454 server->s_flags |= N4S_BADOWNER_DEBUG;
455 455 }
456 456 mutex_exit(&server->s_lock);
457 457 nfs4_server_rele(server);
458 458 nfs_rw_exit(&mi->mi_recovlock);
459 459
460 460 /*
461 461 * Happens once per mntinfo4_t.
462 462 * This error is deemed as one of the recovery facts "RF_BADOWNER",
463 463 * queue this in the mesg queue for this mount_info. This message
464 464 * is not printed, meaning its absent from id_to_dump_solo_fact()
465 465 * but its there for inspection if the queue is ever dumped/inspected.
466 466 */
467 467 mutex_enter(&mi->mi_lock);
468 468 if (!(mi->mi_flags & MI4_BADOWNER_DEBUG)) {
469 469 nfs4_queue_fact(RF_BADOWNER, mi, NFS4ERR_BADOWNER, 0, op,
470 470 FALSE, NULL, 0, NULL);
471 471 mi->mi_flags |= MI4_BADOWNER_DEBUG;
472 472 }
473 473 mutex_exit(&mi->mi_lock);
474 474 }
475 475
476 476 int
477 477 nfs4_time_ntov(nfstime4 *ntime, timestruc_t *vatime)
478 478 {
479 479 int64_t sec;
480 480 int32_t nsec;
481 481
482 482 /*
483 483 * Here check that the nfsv4 time is valid for the system.
484 484 * nfsv4 time value is a signed 64-bit, and the system time
485 485 * may be either int64_t or int32_t (depends on the kernel),
486 486 * so if the kernel is 32-bit, the nfsv4 time value may not fit.
487 487 */
↓ open down ↓ |
487 lines elided |
↑ open up ↑ |
488 488 #ifndef _LP64
489 489 if (! NFS4_TIME_OK(ntime->seconds)) {
490 490 return (EOVERFLOW);
491 491 }
492 492 #endif
493 493
494 494 /* Invalid to specify 1 billion (or more) nsecs */
495 495 if (ntime->nseconds >= 1000000000)
496 496 return (EINVAL);
497 497
498 - if (ntime->seconds < 0) {
498 + if (ntime->seconds < 0 && ntime->nseconds != 0) {
499 499 sec = ntime->seconds + 1;
500 500 nsec = -1000000000 + ntime->nseconds;
501 501 } else {
502 502 sec = ntime->seconds;
503 503 nsec = ntime->nseconds;
504 504 }
505 505
506 506 vatime->tv_sec = sec;
507 507 vatime->tv_nsec = nsec;
508 508
509 509 return (0);
510 510 }
511 511
512 512 int
513 513 nfs4_time_vton(timestruc_t *vatime, nfstime4 *ntime)
514 514 {
515 515 int64_t sec;
516 516 uint32_t nsec;
517 517
518 518 /*
519 519 * nfsv4 time value is a signed 64-bit, and the system time
520 520 * may be either int64_t or int32_t (depends on the kernel),
521 521 * so all system time values will fit.
522 522 */
523 523 if (vatime->tv_nsec >= 0) {
524 524 sec = vatime->tv_sec;
525 525 nsec = vatime->tv_nsec;
526 526 } else {
527 527 sec = vatime->tv_sec - 1;
528 528 nsec = 1000000000 + vatime->tv_nsec;
529 529 }
530 530 ntime->seconds = sec;
531 531 ntime->nseconds = nsec;
532 532
533 533 return (0);
534 534 }
535 535
536 536 /*
537 537 * Converts a utf8 string to a valid null terminated filename string.
538 538 *
539 539 * XXX - Not actually translating the UTF-8 string as per RFC 2279.
540 540 * For now, just validate that the UTF-8 string off the wire
541 541 * does not have characters that will freak out UFS, and leave
542 542 * it at that.
543 543 */
544 544 char *
545 545 utf8_to_fn(utf8string *u8s, uint_t *lenp, char *s)
546 546 {
547 547 ASSERT(lenp != NULL);
548 548
549 549 if (u8s == NULL || u8s->utf8string_len <= 0 ||
550 550 u8s->utf8string_val == NULL)
551 551 return (NULL);
552 552
553 553 /*
554 554 * Check for obvious illegal filename chars
555 555 */
556 556 if (utf8_strchr(u8s, '/') != NULL) {
557 557 #ifdef DEBUG
558 558 if (nfs4_utf8_debug) {
559 559 char *path;
560 560 int len = u8s->utf8string_len;
561 561
562 562 path = kmem_alloc(len + 1, KM_SLEEP);
563 563 bcopy(u8s->utf8string_val, path, len);
564 564 path[len] = '\0';
565 565
566 566 zcmn_err(getzoneid(), CE_WARN,
567 567 "Invalid UTF-8 filename: %s", path);
568 568
569 569 kmem_free(path, len + 1);
570 570 }
571 571 #endif
572 572 return (NULL);
573 573 }
574 574
575 575 return (utf8_to_str(u8s, lenp, s));
576 576 }
577 577
578 578 /*
579 579 * Converts a utf8 string to a C string.
580 580 * kmem_allocs a new string if not supplied
581 581 */
582 582 char *
583 583 utf8_to_str(utf8string *str, uint_t *lenp, char *s)
584 584 {
585 585 char *sp;
586 586 char *u8p;
587 587 int len;
588 588 int i;
589 589
590 590 ASSERT(lenp != NULL);
591 591
592 592 if (str == NULL)
593 593 return (NULL);
594 594
595 595 u8p = str->utf8string_val;
596 596 len = str->utf8string_len;
597 597 if (len <= 0 || u8p == NULL) {
598 598 if (s)
599 599 *s = '\0';
600 600 return (NULL);
601 601 }
602 602
603 603 sp = s;
604 604 if (sp == NULL)
605 605 sp = kmem_alloc(len + 1, KM_SLEEP);
606 606
607 607 /*
608 608 * At least check for embedded nulls
609 609 */
610 610 for (i = 0; i < len; i++) {
611 611 sp[i] = u8p[i];
612 612 if (u8p[i] == '\0') {
613 613 #ifdef DEBUG
614 614 zcmn_err(getzoneid(), CE_WARN,
615 615 "Embedded NULL in UTF-8 string");
616 616 #endif
617 617 if (s == NULL)
618 618 kmem_free(sp, len + 1);
619 619 return (NULL);
620 620 }
621 621 }
622 622 sp[len] = '\0';
623 623 *lenp = len + 1;
624 624
625 625 return (sp);
626 626 }
627 627
628 628 /*
629 629 * str_to_utf8 - converts a null-terminated C string to a utf8 string
630 630 */
631 631 utf8string *
632 632 str_to_utf8(char *nm, utf8string *str)
633 633 {
634 634 int len;
635 635
636 636 if (str == NULL)
637 637 return (NULL);
638 638
639 639 if (nm == NULL || *nm == '\0') {
640 640 str->utf8string_len = 0;
641 641 str->utf8string_val = NULL;
642 642 }
643 643
644 644 len = strlen(nm);
645 645
646 646 str->utf8string_val = kmem_alloc(len, KM_SLEEP);
647 647 str->utf8string_len = len;
648 648 bcopy(nm, str->utf8string_val, len);
649 649
650 650 return (str);
651 651 }
652 652
653 653 utf8string *
654 654 utf8_copy(utf8string *src, utf8string *dest)
655 655 {
656 656 if (src == NULL)
657 657 return (NULL);
658 658 if (dest == NULL)
659 659 return (NULL);
660 660
661 661 if (src->utf8string_len > 0) {
662 662 dest->utf8string_val = kmem_alloc(src->utf8string_len,
663 663 KM_SLEEP);
664 664 bcopy(src->utf8string_val, dest->utf8string_val,
665 665 src->utf8string_len);
666 666 dest->utf8string_len = src->utf8string_len;
667 667 } else {
668 668 dest->utf8string_val = NULL;
669 669 dest->utf8string_len = 0;
670 670 }
671 671
672 672 return (dest);
673 673 }
674 674
675 675 int
676 676 utf8_compare(const utf8string *a, const utf8string *b)
677 677 {
678 678 int mlen, cmp;
679 679 int alen, blen;
680 680 char *aval, *bval;
681 681
682 682 if ((a == NULL) && (b == NULL))
683 683 return (0);
684 684 else if (a == NULL)
685 685 return (-1);
686 686 else if (b == NULL)
687 687 return (1);
688 688
689 689 alen = a->utf8string_len;
690 690 blen = b->utf8string_len;
691 691 aval = a->utf8string_val;
692 692 bval = b->utf8string_val;
693 693
694 694 if (((alen == 0) || (aval == NULL)) &&
695 695 ((blen == 0) || (bval == NULL)))
696 696 return (0);
697 697 else if ((alen == 0) || (aval == NULL))
698 698 return (-1);
699 699 else if ((blen == 0) || (bval == NULL))
700 700 return (1);
701 701
702 702 mlen = MIN(alen, blen);
703 703 cmp = strncmp(aval, bval, mlen);
704 704
705 705 if ((cmp == 0) && (alen == blen))
706 706 return (0);
707 707 else if ((cmp == 0) && (alen < blen))
708 708 return (-1);
709 709 else if (cmp == 0)
710 710 return (1);
711 711 else if (cmp < 0)
712 712 return (-1);
713 713 return (1);
714 714 }
715 715
716 716 /*
717 717 * utf8_dir_verify - checks that the utf8 string is valid
718 718 */
719 719 nfsstat4
720 720 utf8_dir_verify(utf8string *str)
721 721 {
722 722 char *nm;
723 723 int len;
724 724
725 725 if (str == NULL)
726 726 return (NFS4ERR_INVAL);
727 727
728 728 nm = str->utf8string_val;
729 729 len = str->utf8string_len;
730 730 if (nm == NULL || len == 0) {
731 731 return (NFS4ERR_INVAL);
732 732 }
733 733
734 734 if (len == 1 && nm[0] == '.')
735 735 return (NFS4ERR_BADNAME);
736 736 if (len == 2 && nm[0] == '.' && nm[1] == '.')
737 737 return (NFS4ERR_BADNAME);
738 738
739 739 if (utf8_strchr(str, '/') != NULL)
740 740 return (NFS4ERR_BADNAME);
741 741
742 742 if (utf8_strchr(str, '\0') != NULL)
743 743 return (NFS4ERR_BADNAME);
744 744
745 745 return (NFS4_OK);
746 746 }
747 747
748 748 /*
749 749 * from rpcsec module (common/rpcsec)
750 750 */
751 751 extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **);
752 752 extern void sec_clnt_freeh(AUTH *);
753 753 extern void sec_clnt_freeinfo(struct sec_data *);
754 754
755 755 /*
756 756 * authget() gets an auth handle based on the security
757 757 * information from the servinfo in mountinfo.
758 758 * The auth handle is stored in ch_client->cl_auth.
759 759 *
760 760 * First security flavor of choice is to use sv_secdata
761 761 * which is initiated by the client. If that fails, get
762 762 * secinfo from the server and then select one from the
763 763 * server secinfo list .
764 764 *
765 765 * For RPCSEC_GSS flavor, upon success, a secure context is
766 766 * established between client and server.
767 767 */
768 768 int
769 769 authget(servinfo4_t *svp, CLIENT *ch_client, cred_t *cr)
770 770 {
771 771 int error, i;
772 772
773 773 /*
774 774 * SV4_TRYSECINFO indicates to try the secinfo list from
775 775 * sv_secinfo until a successful one is reached. Point
776 776 * sv_currsec to the selected security mechanism for
777 777 * later sessions.
778 778 */
779 779 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
780 780 if ((svp->sv_flags & SV4_TRYSECINFO) && svp->sv_secinfo) {
781 781 for (i = svp->sv_secinfo->index; i < svp->sv_secinfo->count;
782 782 i++) {
783 783 if (!(error = sec_clnt_geth(ch_client,
784 784 &svp->sv_secinfo->sdata[i],
785 785 cr, &ch_client->cl_auth))) {
786 786
787 787 svp->sv_currsec = &svp->sv_secinfo->sdata[i];
788 788 svp->sv_secinfo->index = i;
789 789 /* done */
790 790 svp->sv_flags &= ~SV4_TRYSECINFO;
791 791 break;
792 792 }
793 793
794 794 /*
795 795 * Allow the caller retry with the security flavor
796 796 * pointed by svp->sv_secinfo->index when
797 797 * ETIMEDOUT/ECONNRESET occurs.
798 798 */
799 799 if (error == ETIMEDOUT || error == ECONNRESET) {
800 800 svp->sv_secinfo->index = i;
801 801 break;
802 802 }
803 803 }
804 804 } else {
805 805 /* sv_currsec points to one of the entries in sv_secinfo */
806 806 if (svp->sv_currsec) {
807 807 error = sec_clnt_geth(ch_client, svp->sv_currsec, cr,
808 808 &ch_client->cl_auth);
809 809 } else {
810 810 /* If it's null, use sv_secdata. */
811 811 error = sec_clnt_geth(ch_client, svp->sv_secdata, cr,
812 812 &ch_client->cl_auth);
813 813 }
814 814 }
815 815 nfs_rw_exit(&svp->sv_lock);
816 816
817 817 return (error);
818 818 }
819 819
820 820 /*
821 821 * Common handle get program for NFS, NFS ACL, and NFS AUTH client.
822 822 */
823 823 int
824 824 clget4(clinfo_t *ci, servinfo4_t *svp, cred_t *cr, CLIENT **newcl,
825 825 struct chtab **chp, struct nfs4_clnt *nfscl)
826 826 {
827 827 struct chhead *ch, *newch;
828 828 struct chhead **plistp;
829 829 struct chtab *cp;
830 830 int error;
831 831 k_sigset_t smask;
832 832
833 833 if (newcl == NULL || chp == NULL || ci == NULL)
834 834 return (EINVAL);
835 835
836 836 *newcl = NULL;
837 837 *chp = NULL;
838 838
839 839 /*
840 840 * Find an unused handle or create one
841 841 */
842 842 newch = NULL;
843 843 nfscl->nfscl_stat.clgets.value.ui64++;
844 844 top:
845 845 /*
846 846 * Find the correct entry in the cache to check for free
847 847 * client handles. The search is based on the RPC program
848 848 * number, program version number, dev_t for the transport
849 849 * device, and the protocol family.
850 850 */
851 851 mutex_enter(&nfscl->nfscl_chtable4_lock);
852 852 plistp = &nfscl->nfscl_chtable4;
853 853 for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) {
854 854 if (ch->ch_prog == ci->cl_prog &&
855 855 ch->ch_vers == ci->cl_vers &&
856 856 ch->ch_dev == svp->sv_knconf->knc_rdev &&
857 857 (strcmp(ch->ch_protofmly,
858 858 svp->sv_knconf->knc_protofmly) == 0))
859 859 break;
860 860 plistp = &ch->ch_next;
861 861 }
862 862
863 863 /*
864 864 * If we didn't find a cache entry for this quadruple, then
865 865 * create one. If we don't have one already preallocated,
866 866 * then drop the cache lock, create one, and then start over.
867 867 * If we did have a preallocated entry, then just add it to
868 868 * the front of the list.
869 869 */
870 870 if (ch == NULL) {
871 871 if (newch == NULL) {
872 872 mutex_exit(&nfscl->nfscl_chtable4_lock);
873 873 newch = kmem_alloc(sizeof (*newch), KM_SLEEP);
874 874 newch->ch_timesused = 0;
875 875 newch->ch_prog = ci->cl_prog;
876 876 newch->ch_vers = ci->cl_vers;
877 877 newch->ch_dev = svp->sv_knconf->knc_rdev;
878 878 newch->ch_protofmly = kmem_alloc(
879 879 strlen(svp->sv_knconf->knc_protofmly) + 1,
880 880 KM_SLEEP);
881 881 (void) strcpy(newch->ch_protofmly,
882 882 svp->sv_knconf->knc_protofmly);
883 883 newch->ch_list = NULL;
884 884 goto top;
885 885 }
886 886 ch = newch;
887 887 newch = NULL;
888 888 ch->ch_next = nfscl->nfscl_chtable4;
889 889 nfscl->nfscl_chtable4 = ch;
890 890 /*
891 891 * We found a cache entry, but if it isn't on the front of the
892 892 * list, then move it to the front of the list to try to take
893 893 * advantage of locality of operations.
894 894 */
895 895 } else if (ch != nfscl->nfscl_chtable4) {
896 896 *plistp = ch->ch_next;
897 897 ch->ch_next = nfscl->nfscl_chtable4;
898 898 nfscl->nfscl_chtable4 = ch;
899 899 }
900 900
901 901 /*
902 902 * If there was a free client handle cached, then remove it
903 903 * from the list, init it, and use it.
904 904 */
905 905 if (ch->ch_list != NULL) {
906 906 cp = ch->ch_list;
907 907 ch->ch_list = cp->ch_list;
908 908 mutex_exit(&nfscl->nfscl_chtable4_lock);
909 909 if (newch != NULL) {
910 910 kmem_free(newch->ch_protofmly,
911 911 strlen(newch->ch_protofmly) + 1);
912 912 kmem_free(newch, sizeof (*newch));
913 913 }
914 914 (void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf,
915 915 &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr);
916 916
917 917 /*
918 918 * Get an auth handle.
919 919 */
920 920 error = authget(svp, cp->ch_client, cr);
921 921 if (error || cp->ch_client->cl_auth == NULL) {
922 922 CLNT_DESTROY(cp->ch_client);
923 923 kmem_cache_free(chtab4_cache, cp);
924 924 return ((error != 0) ? error : EINTR);
925 925 }
926 926 ch->ch_timesused++;
927 927 *newcl = cp->ch_client;
928 928 *chp = cp;
929 929 return (0);
930 930 }
931 931
932 932 /*
933 933 * There weren't any free client handles which fit, so allocate
934 934 * a new one and use that.
935 935 */
936 936 #ifdef DEBUG
937 937 atomic_inc_64(&nfscl->nfscl_stat.clalloc.value.ui64);
938 938 #endif
939 939 mutex_exit(&nfscl->nfscl_chtable4_lock);
940 940
941 941 nfscl->nfscl_stat.cltoomany.value.ui64++;
942 942 if (newch != NULL) {
943 943 kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1);
944 944 kmem_free(newch, sizeof (*newch));
945 945 }
946 946
947 947 cp = kmem_cache_alloc(chtab4_cache, KM_SLEEP);
948 948 cp->ch_head = ch;
949 949
950 950 sigintr(&smask, (int)ci->cl_flags & MI4_INT);
951 951 error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog,
952 952 ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client);
953 953 sigunintr(&smask);
954 954
955 955 if (error != 0) {
956 956 kmem_cache_free(chtab4_cache, cp);
957 957 #ifdef DEBUG
958 958 atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64);
959 959 #endif
960 960 /*
961 961 * Warning is unnecessary if error is EINTR.
962 962 */
963 963 if (error != EINTR) {
964 964 nfs_cmn_err(error, CE_WARN,
965 965 "clget: couldn't create handle: %m\n");
966 966 }
967 967 return (error);
968 968 }
969 969 (void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL);
970 970 auth_destroy(cp->ch_client->cl_auth);
971 971
972 972 /*
973 973 * Get an auth handle.
974 974 */
975 975 error = authget(svp, cp->ch_client, cr);
976 976 if (error || cp->ch_client->cl_auth == NULL) {
977 977 CLNT_DESTROY(cp->ch_client);
978 978 kmem_cache_free(chtab4_cache, cp);
979 979 #ifdef DEBUG
980 980 atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64);
981 981 #endif
982 982 return ((error != 0) ? error : EINTR);
983 983 }
984 984 ch->ch_timesused++;
985 985 *newcl = cp->ch_client;
986 986 ASSERT(cp->ch_client->cl_nosignal == FALSE);
987 987 *chp = cp;
988 988 return (0);
989 989 }
990 990
991 991 static int
992 992 nfs_clget4(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, CLIENT **newcl,
993 993 struct chtab **chp, struct nfs4_clnt *nfscl)
994 994 {
995 995 clinfo_t ci;
996 996 bool_t is_recov;
997 997 int firstcall, error = 0;
998 998
999 999 /*
1000 1000 * Set read buffer size to rsize
1001 1001 * and add room for RPC headers.
1002 1002 */
1003 1003 ci.cl_readsize = mi->mi_tsize;
1004 1004 if (ci.cl_readsize != 0)
1005 1005 ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
1006 1006
1007 1007 /*
1008 1008 * If soft mount and server is down just try once.
1009 1009 * meaning: do not retransmit.
1010 1010 */
1011 1011 if (!(mi->mi_flags & MI4_HARD) && (mi->mi_flags & MI4_DOWN))
1012 1012 ci.cl_retrans = 0;
1013 1013 else
1014 1014 ci.cl_retrans = mi->mi_retrans;
1015 1015
1016 1016 ci.cl_prog = mi->mi_prog;
1017 1017 ci.cl_vers = mi->mi_vers;
1018 1018 ci.cl_flags = mi->mi_flags;
1019 1019
1020 1020 /*
1021 1021 * clget4 calls authget() to get an auth handle. For RPCSEC_GSS
1022 1022 * security flavor, the client tries to establish a security context
1023 1023 * by contacting the server. If the connection is timed out or reset,
1024 1024 * e.g. server reboot, we will try again.
1025 1025 */
1026 1026 is_recov = (curthread == mi->mi_recovthread);
1027 1027 firstcall = 1;
1028 1028
1029 1029 do {
1030 1030 error = clget4(&ci, svp, cr, newcl, chp, nfscl);
1031 1031
1032 1032 if (error == 0)
1033 1033 break;
1034 1034
1035 1035 /*
1036 1036 * For forced unmount and zone shutdown, bail out but
1037 1037 * let the recovery thread do one more transmission.
1038 1038 */
1039 1039 if ((FS_OR_ZONE_GONE4(mi->mi_vfsp)) &&
1040 1040 (!is_recov || !firstcall)) {
1041 1041 error = EIO;
1042 1042 break;
1043 1043 }
1044 1044
1045 1045 /* do not retry for soft mount */
1046 1046 if (!(mi->mi_flags & MI4_HARD))
1047 1047 break;
1048 1048
1049 1049 /* let the caller deal with the failover case */
1050 1050 if (FAILOVER_MOUNT4(mi))
1051 1051 break;
1052 1052
1053 1053 firstcall = 0;
1054 1054
1055 1055 } while (error == ETIMEDOUT || error == ECONNRESET);
1056 1056
1057 1057 return (error);
1058 1058 }
1059 1059
1060 1060 void
1061 1061 clfree4(CLIENT *cl, struct chtab *cp, struct nfs4_clnt *nfscl)
1062 1062 {
1063 1063 if (cl->cl_auth != NULL) {
1064 1064 sec_clnt_freeh(cl->cl_auth);
1065 1065 cl->cl_auth = NULL;
1066 1066 }
1067 1067
1068 1068 /*
1069 1069 * Timestamp this cache entry so that we know when it was last
1070 1070 * used.
1071 1071 */
1072 1072 cp->ch_freed = gethrestime_sec();
1073 1073
1074 1074 /*
1075 1075 * Add the free client handle to the front of the list.
1076 1076 * This way, the list will be sorted in youngest to oldest
1077 1077 * order.
1078 1078 */
1079 1079 mutex_enter(&nfscl->nfscl_chtable4_lock);
1080 1080 cp->ch_list = cp->ch_head->ch_list;
1081 1081 cp->ch_head->ch_list = cp;
1082 1082 mutex_exit(&nfscl->nfscl_chtable4_lock);
1083 1083 }
1084 1084
1085 1085 #define CL_HOLDTIME 60 /* time to hold client handles */
1086 1086
1087 1087 static void
1088 1088 clreclaim4_zone(struct nfs4_clnt *nfscl, uint_t cl_holdtime)
1089 1089 {
1090 1090 struct chhead *ch;
1091 1091 struct chtab *cp; /* list of objects that can be reclaimed */
1092 1092 struct chtab *cpe;
1093 1093 struct chtab *cpl;
1094 1094 struct chtab **cpp;
1095 1095 #ifdef DEBUG
1096 1096 int n = 0;
1097 1097 clstat4_debug.clreclaim.value.ui64++;
1098 1098 #endif
1099 1099
1100 1100 /*
1101 1101 * Need to reclaim some memory, so step through the cache
1102 1102 * looking through the lists for entries which can be freed.
1103 1103 */
1104 1104 cp = NULL;
1105 1105
1106 1106 mutex_enter(&nfscl->nfscl_chtable4_lock);
1107 1107
1108 1108 /*
1109 1109 * Here we step through each non-NULL quadruple and start to
1110 1110 * construct the reclaim list pointed to by cp. Note that
1111 1111 * cp will contain all eligible chtab entries. When this traversal
1112 1112 * completes, chtab entries from the last quadruple will be at the
1113 1113 * front of cp and entries from previously inspected quadruples have
1114 1114 * been appended to the rear of cp.
1115 1115 */
1116 1116 for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) {
1117 1117 if (ch->ch_list == NULL)
1118 1118 continue;
1119 1119 /*
1120 1120 * Search each list for entries older then
1121 1121 * cl_holdtime seconds. The lists are maintained
1122 1122 * in youngest to oldest order so that when the
1123 1123 * first entry is found which is old enough, then
1124 1124 * all of the rest of the entries on the list will
1125 1125 * be old enough as well.
1126 1126 */
1127 1127 cpl = ch->ch_list;
1128 1128 cpp = &ch->ch_list;
1129 1129 while (cpl != NULL &&
1130 1130 cpl->ch_freed + cl_holdtime > gethrestime_sec()) {
1131 1131 cpp = &cpl->ch_list;
1132 1132 cpl = cpl->ch_list;
1133 1133 }
1134 1134 if (cpl != NULL) {
1135 1135 *cpp = NULL;
1136 1136 if (cp != NULL) {
1137 1137 cpe = cpl;
1138 1138 while (cpe->ch_list != NULL)
1139 1139 cpe = cpe->ch_list;
1140 1140 cpe->ch_list = cp;
1141 1141 }
1142 1142 cp = cpl;
1143 1143 }
1144 1144 }
1145 1145
1146 1146 mutex_exit(&nfscl->nfscl_chtable4_lock);
1147 1147
1148 1148 /*
1149 1149 * If cp is empty, then there is nothing to reclaim here.
1150 1150 */
1151 1151 if (cp == NULL)
1152 1152 return;
1153 1153
1154 1154 /*
1155 1155 * Step through the list of entries to free, destroying each client
1156 1156 * handle and kmem_free'ing the memory for each entry.
1157 1157 */
1158 1158 while (cp != NULL) {
1159 1159 #ifdef DEBUG
1160 1160 n++;
1161 1161 #endif
1162 1162 CLNT_DESTROY(cp->ch_client);
1163 1163 cpl = cp->ch_list;
1164 1164 kmem_cache_free(chtab4_cache, cp);
1165 1165 cp = cpl;
1166 1166 }
1167 1167
1168 1168 #ifdef DEBUG
1169 1169 /*
1170 1170 * Update clalloc so that nfsstat shows the current number
1171 1171 * of allocated client handles.
1172 1172 */
1173 1173 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n);
1174 1174 #endif
1175 1175 }
1176 1176
1177 1177 /* ARGSUSED */
1178 1178 static void
1179 1179 clreclaim4(void *all)
1180 1180 {
1181 1181 struct nfs4_clnt *nfscl;
1182 1182
1183 1183 /*
1184 1184 * The system is low on memory; go through and try to reclaim some from
1185 1185 * every zone on the system.
1186 1186 */
1187 1187 mutex_enter(&nfs4_clnt_list_lock);
1188 1188 nfscl = list_head(&nfs4_clnt_list);
1189 1189 for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl))
1190 1190 clreclaim4_zone(nfscl, CL_HOLDTIME);
1191 1191 mutex_exit(&nfs4_clnt_list_lock);
1192 1192 }
1193 1193
1194 1194 /*
1195 1195 * Minimum time-out values indexed by call type
1196 1196 * These units are in "eights" of a second to avoid multiplies
1197 1197 */
1198 1198 static unsigned int minimum_timeo[] = {
1199 1199 6, 7, 10
1200 1200 };
1201 1201
1202 1202 #define SHORTWAIT (NFS_COTS_TIMEO / 10)
1203 1203
1204 1204 /*
1205 1205 * Back off for retransmission timeout, MAXTIMO is in hz of a sec
1206 1206 */
1207 1207 #define MAXTIMO (20*hz)
1208 1208 #define backoff(tim) (((tim) < MAXTIMO) ? dobackoff(tim) : (tim))
1209 1209 #define dobackoff(tim) ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1))
1210 1210
1211 1211 static int
1212 1212 nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
1213 1213 xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *doqueue,
1214 1214 enum clnt_stat *rpc_statusp, int flags, struct nfs4_clnt *nfscl)
1215 1215 {
1216 1216 CLIENT *client;
1217 1217 struct chtab *ch;
1218 1218 cred_t *cr = icr;
1219 1219 struct rpc_err rpcerr, rpcerr_tmp;
1220 1220 enum clnt_stat status;
1221 1221 int error;
1222 1222 struct timeval wait;
1223 1223 int timeo; /* in units of hz */
1224 1224 bool_t tryagain, is_recov;
1225 1225 bool_t cred_cloned = FALSE;
1226 1226 k_sigset_t smask;
1227 1227 servinfo4_t *svp;
1228 1228 #ifdef DEBUG
1229 1229 char *bufp;
1230 1230 #endif
1231 1231 int firstcall;
1232 1232
1233 1233 rpcerr.re_status = RPC_SUCCESS;
1234 1234
1235 1235 /*
1236 1236 * If we know that we are rebooting then let's
1237 1237 * not bother with doing any over the wireness.
1238 1238 */
1239 1239 mutex_enter(&mi->mi_lock);
1240 1240 if (mi->mi_flags & MI4_SHUTDOWN) {
1241 1241 mutex_exit(&mi->mi_lock);
1242 1242 return (EIO);
1243 1243 }
1244 1244 mutex_exit(&mi->mi_lock);
1245 1245
1246 1246 /* For TSOL, use a new cred which has net_mac_aware flag */
1247 1247 if (!cred_cloned && is_system_labeled()) {
1248 1248 cred_cloned = TRUE;
1249 1249 cr = crdup(icr);
1250 1250 (void) setpflags(NET_MAC_AWARE, 1, cr);
1251 1251 }
1252 1252
1253 1253 /*
1254 1254 * clget() calls clnt_tli_kinit() which clears the xid, so we
1255 1255 * are guaranteed to reprocess the retry as a new request.
1256 1256 */
1257 1257 svp = mi->mi_curr_serv;
1258 1258 rpcerr.re_errno = nfs_clget4(mi, svp, cr, &client, &ch, nfscl);
1259 1259 if (rpcerr.re_errno != 0)
1260 1260 return (rpcerr.re_errno);
1261 1261
1262 1262 timeo = (mi->mi_timeo * hz) / 10;
1263 1263
1264 1264 /*
1265 1265 * If hard mounted fs, retry call forever unless hard error
1266 1266 * occurs.
1267 1267 *
1268 1268 * For forced unmount, let the recovery thread through but return
1269 1269 * an error for all others. This is so that user processes can
1270 1270 * exit quickly. The recovery thread bails out after one
1271 1271 * transmission so that it can tell if it needs to continue.
1272 1272 *
1273 1273 * For zone shutdown, behave as above to encourage quick
1274 1274 * process exit, but also fail quickly when servers have
1275 1275 * timed out before and reduce the timeouts.
1276 1276 */
1277 1277 is_recov = (curthread == mi->mi_recovthread);
1278 1278 firstcall = 1;
1279 1279 do {
1280 1280 tryagain = FALSE;
1281 1281
1282 1282 NFS4_DEBUG(nfs4_rfscall_debug, (CE_NOTE,
1283 1283 "nfs4_rfscall: vfs_flag=0x%x, %s",
1284 1284 mi->mi_vfsp->vfs_flag,
1285 1285 is_recov ? "recov thread" : "not recov thread"));
1286 1286
1287 1287 /*
1288 1288 * It's possible while we're retrying the admin
1289 1289 * decided to reboot.
1290 1290 */
1291 1291 mutex_enter(&mi->mi_lock);
1292 1292 if (mi->mi_flags & MI4_SHUTDOWN) {
1293 1293 mutex_exit(&mi->mi_lock);
1294 1294 clfree4(client, ch, nfscl);
1295 1295 if (cred_cloned)
1296 1296 crfree(cr);
1297 1297 return (EIO);
1298 1298 }
1299 1299 mutex_exit(&mi->mi_lock);
1300 1300
1301 1301 if ((mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) &&
1302 1302 (!is_recov || !firstcall)) {
1303 1303 clfree4(client, ch, nfscl);
1304 1304 if (cred_cloned)
1305 1305 crfree(cr);
1306 1306 return (EIO);
1307 1307 }
1308 1308
1309 1309 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) {
1310 1310 mutex_enter(&mi->mi_lock);
1311 1311 if ((mi->mi_flags & MI4_TIMEDOUT) ||
1312 1312 !is_recov || !firstcall) {
1313 1313 mutex_exit(&mi->mi_lock);
1314 1314 clfree4(client, ch, nfscl);
1315 1315 if (cred_cloned)
1316 1316 crfree(cr);
1317 1317 return (EIO);
1318 1318 }
1319 1319 mutex_exit(&mi->mi_lock);
1320 1320 timeo = (MIN(mi->mi_timeo, SHORTWAIT) * hz) / 10;
1321 1321 }
1322 1322
1323 1323 firstcall = 0;
1324 1324 TICK_TO_TIMEVAL(timeo, &wait);
1325 1325
1326 1326 /*
1327 1327 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
1328 1328 * and SIGTERM. (Preserving the existing masks).
1329 1329 * Mask out SIGINT if mount option nointr is specified.
1330 1330 */
1331 1331 sigintr(&smask, (int)mi->mi_flags & MI4_INT);
1332 1332 if (!(mi->mi_flags & MI4_INT))
1333 1333 client->cl_nosignal = TRUE;
1334 1334
1335 1335 /*
1336 1336 * If there is a current signal, then don't bother
1337 1337 * even trying to send out the request because we
1338 1338 * won't be able to block waiting for the response.
1339 1339 * Simply assume RPC_INTR and get on with it.
1340 1340 */
1341 1341 if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING))
1342 1342 status = RPC_INTR;
1343 1343 else {
1344 1344 status = CLNT_CALL(client, which, xdrargs, argsp,
1345 1345 xdrres, resp, wait);
1346 1346 }
1347 1347
1348 1348 if (!(mi->mi_flags & MI4_INT))
1349 1349 client->cl_nosignal = FALSE;
1350 1350 /*
1351 1351 * restore original signal mask
1352 1352 */
1353 1353 sigunintr(&smask);
1354 1354
1355 1355 switch (status) {
1356 1356 case RPC_SUCCESS:
1357 1357 break;
1358 1358
1359 1359 case RPC_INTR:
1360 1360 /*
1361 1361 * There is no way to recover from this error,
1362 1362 * even if mount option nointr is specified.
1363 1363 * SIGKILL, for example, cannot be blocked.
1364 1364 */
1365 1365 rpcerr.re_status = RPC_INTR;
1366 1366 rpcerr.re_errno = EINTR;
1367 1367 break;
1368 1368
1369 1369 case RPC_UDERROR:
1370 1370 /*
1371 1371 * If the NFS server is local (vold) and
1372 1372 * it goes away then we get RPC_UDERROR.
1373 1373 * This is a retryable error, so we would
1374 1374 * loop, so check to see if the specific
1375 1375 * error was ECONNRESET, indicating that
1376 1376 * target did not exist at all. If so,
1377 1377 * return with RPC_PROGUNAVAIL and
1378 1378 * ECONNRESET to indicate why.
1379 1379 */
1380 1380 CLNT_GETERR(client, &rpcerr);
1381 1381 if (rpcerr.re_errno == ECONNRESET) {
1382 1382 rpcerr.re_status = RPC_PROGUNAVAIL;
1383 1383 rpcerr.re_errno = ECONNRESET;
1384 1384 break;
1385 1385 }
1386 1386 /*FALLTHROUGH*/
1387 1387
1388 1388 default: /* probably RPC_TIMEDOUT */
1389 1389
1390 1390 if (IS_UNRECOVERABLE_RPC(status))
1391 1391 break;
1392 1392
1393 1393 /*
1394 1394 * increment server not responding count
1395 1395 */
1396 1396 mutex_enter(&mi->mi_lock);
1397 1397 mi->mi_noresponse++;
1398 1398 mutex_exit(&mi->mi_lock);
1399 1399 #ifdef DEBUG
1400 1400 nfscl->nfscl_stat.noresponse.value.ui64++;
1401 1401 #endif
1402 1402 /*
1403 1403 * On zone shutdown, mark server dead and move on.
1404 1404 */
1405 1405 if (zone_status_get(curproc->p_zone) >=
1406 1406 ZONE_IS_SHUTTING_DOWN) {
1407 1407 mutex_enter(&mi->mi_lock);
1408 1408 mi->mi_flags |= MI4_TIMEDOUT;
1409 1409 mutex_exit(&mi->mi_lock);
1410 1410 clfree4(client, ch, nfscl);
1411 1411 if (cred_cloned)
1412 1412 crfree(cr);
1413 1413 return (EIO);
1414 1414 }
1415 1415
1416 1416 /*
1417 1417 * NFS client failover support:
1418 1418 * return and let the caller take care of
1419 1419 * failover. We only return for failover mounts
1420 1420 * because otherwise we want the "not responding"
1421 1421 * message, the timer updates, etc.
1422 1422 */
1423 1423 if (mi->mi_vers == 4 && FAILOVER_MOUNT4(mi) &&
1424 1424 (error = try_failover(status)) != 0) {
1425 1425 clfree4(client, ch, nfscl);
1426 1426 if (cred_cloned)
1427 1427 crfree(cr);
1428 1428 *rpc_statusp = status;
1429 1429 return (error);
1430 1430 }
1431 1431
1432 1432 if (flags & RFSCALL_SOFT)
1433 1433 break;
1434 1434
1435 1435 tryagain = TRUE;
1436 1436
1437 1437 /*
1438 1438 * The call is in progress (over COTS).
1439 1439 * Try the CLNT_CALL again, but don't
1440 1440 * print a noisy error message.
1441 1441 */
1442 1442 if (status == RPC_INPROGRESS)
1443 1443 break;
1444 1444
1445 1445 timeo = backoff(timeo);
1446 1446 CLNT_GETERR(client, &rpcerr_tmp);
1447 1447
1448 1448 mutex_enter(&mi->mi_lock);
1449 1449 if (!(mi->mi_flags & MI4_PRINTED)) {
1450 1450 mi->mi_flags |= MI4_PRINTED;
1451 1451 mutex_exit(&mi->mi_lock);
1452 1452 if ((status == RPC_CANTSEND) &&
1453 1453 (rpcerr_tmp.re_errno == ENOBUFS))
1454 1454 nfs4_queue_fact(RF_SENDQ_FULL, mi, 0,
1455 1455 0, 0, FALSE, NULL, 0, NULL);
1456 1456 else
1457 1457 nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi,
1458 1458 0, 0, 0, FALSE, NULL, 0, NULL);
1459 1459 } else
1460 1460 mutex_exit(&mi->mi_lock);
1461 1461
1462 1462 if (*doqueue && nfs_has_ctty()) {
1463 1463 *doqueue = 0;
1464 1464 if (!(mi->mi_flags & MI4_NOPRINT)) {
1465 1465 if ((status == RPC_CANTSEND) &&
1466 1466 (rpcerr_tmp.re_errno == ENOBUFS))
1467 1467 nfs4_queue_fact(RF_SENDQ_FULL,
1468 1468 mi, 0, 0, 0, FALSE, NULL,
1469 1469 0, NULL);
1470 1470 else
1471 1471 nfs4_queue_fact(
1472 1472 RF_SRV_NOT_RESPOND, mi, 0,
1473 1473 0, 0, FALSE, NULL, 0, NULL);
1474 1474 }
1475 1475 }
1476 1476 }
1477 1477 } while (tryagain);
1478 1478
1479 1479 DTRACE_PROBE2(nfs4__rfscall_debug, enum clnt_stat, status,
1480 1480 int, rpcerr.re_errno);
1481 1481
1482 1482 if (status != RPC_SUCCESS) {
1483 1483 zoneid_t zoneid = mi->mi_zone->zone_id;
1484 1484
1485 1485 /*
1486 1486 * Let soft mounts use the timed out message.
1487 1487 */
1488 1488 if (status == RPC_INPROGRESS)
1489 1489 status = RPC_TIMEDOUT;
1490 1490 nfscl->nfscl_stat.badcalls.value.ui64++;
1491 1491 if (status != RPC_INTR) {
1492 1492 mutex_enter(&mi->mi_lock);
1493 1493 mi->mi_flags |= MI4_DOWN;
1494 1494 mutex_exit(&mi->mi_lock);
1495 1495 CLNT_GETERR(client, &rpcerr);
1496 1496 #ifdef DEBUG
1497 1497 bufp = clnt_sperror(client, svp->sv_hostname);
1498 1498 zprintf(zoneid, "NFS%d %s failed for %s\n",
1499 1499 mi->mi_vers, mi->mi_rfsnames[which], bufp);
1500 1500 if (nfs_has_ctty()) {
1501 1501 if (!(mi->mi_flags & MI4_NOPRINT)) {
1502 1502 uprintf("NFS%d %s failed for %s\n",
1503 1503 mi->mi_vers, mi->mi_rfsnames[which],
1504 1504 bufp);
1505 1505 }
1506 1506 }
1507 1507 kmem_free(bufp, MAXPATHLEN);
1508 1508 #else
1509 1509 zprintf(zoneid,
1510 1510 "NFS %s failed for server %s: error %d (%s)\n",
1511 1511 mi->mi_rfsnames[which], svp->sv_hostname,
1512 1512 status, clnt_sperrno(status));
1513 1513 if (nfs_has_ctty()) {
1514 1514 if (!(mi->mi_flags & MI4_NOPRINT)) {
1515 1515 uprintf(
1516 1516 "NFS %s failed for server %s: error %d (%s)\n",
1517 1517 mi->mi_rfsnames[which],
1518 1518 svp->sv_hostname, status,
1519 1519 clnt_sperrno(status));
1520 1520 }
1521 1521 }
1522 1522 #endif
1523 1523 /*
1524 1524 * when CLNT_CALL() fails with RPC_AUTHERROR,
1525 1525 * re_errno is set appropriately depending on
1526 1526 * the authentication error
1527 1527 */
1528 1528 if (status == RPC_VERSMISMATCH ||
1529 1529 status == RPC_PROGVERSMISMATCH)
1530 1530 rpcerr.re_errno = EIO;
1531 1531 }
1532 1532 } else {
1533 1533 /*
1534 1534 * Test the value of mi_down and mi_printed without
1535 1535 * holding the mi_lock mutex. If they are both zero,
1536 1536 * then it is okay to skip the down and printed
1537 1537 * processing. This saves on a mutex_enter and
1538 1538 * mutex_exit pair for a normal, successful RPC.
1539 1539 * This was just complete overhead.
1540 1540 */
1541 1541 if (mi->mi_flags & (MI4_DOWN | MI4_PRINTED)) {
1542 1542 mutex_enter(&mi->mi_lock);
1543 1543 mi->mi_flags &= ~MI4_DOWN;
1544 1544 if (mi->mi_flags & MI4_PRINTED) {
1545 1545 mi->mi_flags &= ~MI4_PRINTED;
1546 1546 mutex_exit(&mi->mi_lock);
1547 1547 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1548 1548 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0,
1549 1549 0, FALSE, NULL, 0, NULL);
1550 1550 } else
1551 1551 mutex_exit(&mi->mi_lock);
1552 1552 }
1553 1553
1554 1554 if (*doqueue == 0) {
1555 1555 if (!(mi->mi_flags & MI4_NOPRINT) &&
1556 1556 !(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1557 1557 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0, 0,
1558 1558 FALSE, NULL, 0, NULL);
1559 1559
1560 1560 *doqueue = 1;
1561 1561 }
1562 1562 }
1563 1563
1564 1564 clfree4(client, ch, nfscl);
1565 1565 if (cred_cloned)
1566 1566 crfree(cr);
1567 1567
1568 1568 ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0);
1569 1569
1570 1570 TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "nfs4_rfscall_end:errno %d",
1571 1571 rpcerr.re_errno);
1572 1572
1573 1573 *rpc_statusp = status;
1574 1574 return (rpcerr.re_errno);
1575 1575 }
1576 1576
1577 1577 /*
1578 1578 * rfs4call - general wrapper for RPC calls initiated by the client
1579 1579 */
1580 1580 void
1581 1581 rfs4call(mntinfo4_t *mi, COMPOUND4args_clnt *argsp, COMPOUND4res_clnt *resp,
1582 1582 cred_t *cr, int *doqueue, int flags, nfs4_error_t *ep)
1583 1583 {
1584 1584 int i, error;
1585 1585 enum clnt_stat rpc_status = NFS4_OK;
1586 1586 int num_resops;
1587 1587 struct nfs4_clnt *nfscl;
1588 1588
1589 1589 ASSERT(nfs_zone() == mi->mi_zone);
1590 1590 nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone());
1591 1591 ASSERT(nfscl != NULL);
1592 1592
1593 1593 nfscl->nfscl_stat.calls.value.ui64++;
1594 1594 mi->mi_reqs[NFSPROC4_COMPOUND].value.ui64++;
1595 1595
1596 1596 /* Set up the results struct for XDR usage */
1597 1597 resp->argsp = argsp;
1598 1598 resp->array = NULL;
1599 1599 resp->status = 0;
1600 1600 resp->decode_len = 0;
1601 1601
1602 1602 error = nfs4_rfscall(mi, NFSPROC4_COMPOUND,
1603 1603 xdr_COMPOUND4args_clnt, (caddr_t)argsp,
1604 1604 xdr_COMPOUND4res_clnt, (caddr_t)resp, cr,
1605 1605 doqueue, &rpc_status, flags, nfscl);
1606 1606
1607 1607 /* Return now if it was an RPC error */
1608 1608 if (error) {
1609 1609 ep->error = error;
1610 1610 ep->stat = resp->status;
1611 1611 ep->rpc_status = rpc_status;
1612 1612 return;
1613 1613 }
1614 1614
1615 1615 /* else we'll count the processed operations */
1616 1616 num_resops = resp->decode_len;
1617 1617 for (i = 0; i < num_resops; i++) {
1618 1618 /*
1619 1619 * Count the individual operations
1620 1620 * processed by the server.
1621 1621 */
1622 1622 if (resp->array[i].resop >= NFSPROC4_NULL &&
1623 1623 resp->array[i].resop <= OP_WRITE)
1624 1624 mi->mi_reqs[resp->array[i].resop].value.ui64++;
1625 1625 }
1626 1626
1627 1627 ep->error = 0;
1628 1628 ep->stat = resp->status;
1629 1629 ep->rpc_status = rpc_status;
1630 1630 }
1631 1631
1632 1632 /*
1633 1633 * nfs4rename_update - updates stored state after a rename. Currently this
1634 1634 * is the path of the object and anything under it, and the filehandle of
1635 1635 * the renamed object.
1636 1636 */
1637 1637 void
1638 1638 nfs4rename_update(vnode_t *renvp, vnode_t *ndvp, nfs_fh4 *nfh4p, char *nnm)
1639 1639 {
1640 1640 sfh4_update(VTOR4(renvp)->r_fh, nfh4p);
1641 1641 fn_move(VTOSV(renvp)->sv_name, VTOSV(ndvp)->sv_name, nnm);
1642 1642 }
1643 1643
1644 1644 /*
1645 1645 * Routine to look up the filehandle for the given path and rootvp.
1646 1646 *
1647 1647 * Return values:
1648 1648 * - success: returns zero and *statp is set to NFS4_OK, and *fhp is
1649 1649 * updated.
1650 1650 * - error: return value (errno value) and/or *statp is set appropriately.
1651 1651 */
1652 1652 #define RML_ORDINARY 1
1653 1653 #define RML_NAMED_ATTR 2
1654 1654 #define RML_ATTRDIR 3
1655 1655
1656 1656 static void
1657 1657 remap_lookup(nfs4_fname_t *fname, vnode_t *rootvp,
1658 1658 int filetype, cred_t *cr,
1659 1659 nfs_fh4 *fhp, nfs4_ga_res_t *garp, /* fh, attrs for object */
1660 1660 nfs_fh4 *pfhp, nfs4_ga_res_t *pgarp, /* fh, attrs for parent */
1661 1661 nfs4_error_t *ep)
1662 1662 {
1663 1663 COMPOUND4args_clnt args;
1664 1664 COMPOUND4res_clnt res;
1665 1665 nfs_argop4 *argop;
1666 1666 nfs_resop4 *resop;
1667 1667 int num_argops;
1668 1668 lookup4_param_t lookuparg;
1669 1669 nfs_fh4 *tmpfhp;
1670 1670 int doqueue = 1;
1671 1671 char *path;
1672 1672 mntinfo4_t *mi;
1673 1673
1674 1674 ASSERT(fname != NULL);
1675 1675 ASSERT(rootvp->v_type == VDIR);
1676 1676
1677 1677 mi = VTOMI4(rootvp);
1678 1678 path = fn_path(fname);
1679 1679 switch (filetype) {
1680 1680 case RML_NAMED_ATTR:
1681 1681 lookuparg.l4_getattrs = LKP4_LAST_NAMED_ATTR;
1682 1682 args.ctag = TAG_REMAP_LOOKUP_NA;
1683 1683 break;
1684 1684 case RML_ATTRDIR:
1685 1685 lookuparg.l4_getattrs = LKP4_LAST_ATTRDIR;
1686 1686 args.ctag = TAG_REMAP_LOOKUP_AD;
1687 1687 break;
1688 1688 case RML_ORDINARY:
1689 1689 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES;
1690 1690 args.ctag = TAG_REMAP_LOOKUP;
1691 1691 break;
1692 1692 default:
1693 1693 ep->error = EINVAL;
1694 1694 return;
1695 1695 }
1696 1696 lookuparg.argsp = &args;
1697 1697 lookuparg.resp = &res;
1698 1698 lookuparg.header_len = 1; /* Putfh */
1699 1699 lookuparg.trailer_len = 0;
1700 1700 lookuparg.ga_bits = NFS4_VATTR_MASK;
1701 1701 lookuparg.mi = VTOMI4(rootvp);
1702 1702
1703 1703 (void) nfs4lookup_setup(path, &lookuparg, 1);
1704 1704
1705 1705 /* 0: putfh directory */
1706 1706 argop = args.array;
1707 1707 argop[0].argop = OP_CPUTFH;
1708 1708 argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(rootvp)->r_fh;
1709 1709
1710 1710 num_argops = args.array_len;
1711 1711
1712 1712 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep);
1713 1713
1714 1714 if (ep->error || res.status != NFS4_OK)
1715 1715 goto exit;
1716 1716
1717 1717 /* get the object filehandle */
1718 1718 resop = &res.array[res.array_len - 2];
1719 1719 if (resop->resop != OP_GETFH) {
1720 1720 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL,
1721 1721 0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0);
1722 1722 ep->stat = NFS4ERR_SERVERFAULT;
1723 1723 goto exit;
1724 1724 }
1725 1725 tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1726 1726 if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) {
1727 1727 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL,
1728 1728 tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE,
1729 1729 TAG_NONE, 0, 0);
1730 1730 ep->stat = NFS4ERR_SERVERFAULT;
1731 1731 goto exit;
1732 1732 }
1733 1733 fhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP);
1734 1734 nfs_fh4_copy(tmpfhp, fhp);
1735 1735
1736 1736 /* get the object attributes */
1737 1737 resop = &res.array[res.array_len - 1];
1738 1738 if (garp && resop->resop == OP_GETATTR)
1739 1739 *garp = resop->nfs_resop4_u.opgetattr.ga_res;
1740 1740
1741 1741 /* See if there are enough fields in the response for parent info */
1742 1742 if ((int)res.array_len - 5 <= 0)
1743 1743 goto exit;
1744 1744
1745 1745 /* get the parent filehandle */
1746 1746 resop = &res.array[res.array_len - 5];
1747 1747 if (resop->resop != OP_GETFH) {
1748 1748 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL,
1749 1749 0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0);
1750 1750 ep->stat = NFS4ERR_SERVERFAULT;
1751 1751 goto exit;
1752 1752 }
1753 1753 tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1754 1754 if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) {
1755 1755 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL,
1756 1756 tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE,
1757 1757 TAG_NONE, 0, 0);
1758 1758 ep->stat = NFS4ERR_SERVERFAULT;
1759 1759 goto exit;
1760 1760 }
1761 1761 pfhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP);
1762 1762 nfs_fh4_copy(tmpfhp, pfhp);
1763 1763
1764 1764 /* get the parent attributes */
1765 1765 resop = &res.array[res.array_len - 4];
1766 1766 if (pgarp && resop->resop == OP_GETATTR)
1767 1767 *pgarp = resop->nfs_resop4_u.opgetattr.ga_res;
1768 1768
1769 1769 exit:
1770 1770 /*
1771 1771 * It is too hard to remember where all the OP_LOOKUPs are
1772 1772 */
1773 1773 nfs4args_lookup_free(argop, num_argops);
1774 1774 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1775 1775
1776 1776 if (!ep->error)
1777 1777 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1778 1778 kmem_free(path, strlen(path)+1);
1779 1779 }
1780 1780
1781 1781 /*
1782 1782 * NFS client failover / volatile filehandle support
1783 1783 *
1784 1784 * Recover the filehandle for the given rnode.
1785 1785 *
1786 1786 * Errors are returned via the nfs4_error_t parameter.
1787 1787 */
1788 1788
1789 1789 void
1790 1790 nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep)
1791 1791 {
1792 1792 int is_stub;
1793 1793 rnode4_t *rp = VTOR4(vp);
1794 1794 vnode_t *rootvp = NULL;
1795 1795 vnode_t *dvp = NULL;
1796 1796 cred_t *cr, *cred_otw;
1797 1797 nfs4_ga_res_t gar, pgar;
1798 1798 nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL};
1799 1799 int filetype = RML_ORDINARY;
1800 1800 nfs4_recov_state_t recov = {NULL, 0, 0};
1801 1801 int badfhcount = 0;
1802 1802 nfs4_open_stream_t *osp = NULL;
1803 1803 bool_t first_time = TRUE; /* first time getting OTW cred */
1804 1804 bool_t last_time = FALSE; /* last time getting OTW cred */
1805 1805
1806 1806 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1807 1807 "nfs4_remap_file: remapping %s", rnode4info(rp)));
1808 1808 ASSERT(nfs4_consistent_type(vp));
1809 1809
1810 1810 if (vp->v_flag & VROOT) {
1811 1811 nfs4_remap_root(mi, ep, flags);
1812 1812 return;
1813 1813 }
1814 1814
1815 1815 /*
1816 1816 * Given the root fh, use the path stored in
1817 1817 * the rnode to find the fh for the new server.
1818 1818 */
1819 1819 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp);
1820 1820 if (ep->error != 0)
1821 1821 return;
1822 1822
1823 1823 cr = curthread->t_cred;
1824 1824 ASSERT(cr != NULL);
1825 1825 get_remap_cred:
1826 1826 /*
1827 1827 * Releases the osp, if it is provided.
1828 1828 * Puts a hold on the cred_otw and the new osp (if found).
1829 1829 */
1830 1830 cred_otw = nfs4_get_otw_cred_by_osp(rp, cr, &osp,
1831 1831 &first_time, &last_time);
1832 1832 ASSERT(cred_otw != NULL);
1833 1833
1834 1834 if (rp->r_flags & R4ISXATTR) {
1835 1835 filetype = RML_NAMED_ATTR;
1836 1836 (void) vtodv(vp, &dvp, cred_otw, FALSE);
1837 1837 }
1838 1838
1839 1839 if (vp->v_flag & V_XATTRDIR) {
1840 1840 filetype = RML_ATTRDIR;
1841 1841 }
1842 1842
1843 1843 if (filetype == RML_ORDINARY && rootvp->v_type == VREG) {
1844 1844 /* file mount, doesn't need a remap */
1845 1845 goto done;
1846 1846 }
1847 1847
1848 1848 again:
1849 1849 remap_lookup(rp->r_svnode.sv_name, rootvp, filetype, cred_otw,
1850 1850 &newfh, &gar, &newpfh, &pgar, ep);
1851 1851
1852 1852 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1853 1853 "nfs4_remap_file: remap_lookup returned %d/%d",
1854 1854 ep->error, ep->stat));
1855 1855
1856 1856 if (last_time == FALSE && ep->error == EACCES) {
1857 1857 crfree(cred_otw);
1858 1858 if (dvp != NULL)
1859 1859 VN_RELE(dvp);
1860 1860 goto get_remap_cred;
1861 1861 }
1862 1862 if (ep->error != 0)
1863 1863 goto done;
1864 1864
1865 1865 switch (ep->stat) {
1866 1866 case NFS4_OK:
1867 1867 badfhcount = 0;
1868 1868 if (recov.rs_flags & NFS4_RS_DELAY_MSG) {
1869 1869 mutex_enter(&rp->r_statelock);
1870 1870 rp->r_delay_interval = 0;
1871 1871 mutex_exit(&rp->r_statelock);
1872 1872 uprintf("NFS File Available..\n");
1873 1873 }
1874 1874 break;
1875 1875 case NFS4ERR_FHEXPIRED:
1876 1876 case NFS4ERR_BADHANDLE:
1877 1877 case NFS4ERR_STALE:
1878 1878 /*
1879 1879 * If we ran into filehandle problems, we should try to
1880 1880 * remap the root vnode first and hope life gets better.
1881 1881 * But we need to avoid loops.
1882 1882 */
1883 1883 if (badfhcount++ > 0)
1884 1884 goto done;
1885 1885 if (newfh.nfs_fh4_len != 0) {
1886 1886 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
1887 1887 newfh.nfs_fh4_len = 0;
1888 1888 }
1889 1889 if (newpfh.nfs_fh4_len != 0) {
1890 1890 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
1891 1891 newpfh.nfs_fh4_len = 0;
1892 1892 }
1893 1893 /* relative path - remap rootvp then retry */
1894 1894 VN_RELE(rootvp);
1895 1895 rootvp = NULL;
1896 1896 nfs4_remap_root(mi, ep, flags);
1897 1897 if (ep->error != 0 || ep->stat != NFS4_OK)
1898 1898 goto done;
1899 1899 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp);
1900 1900 if (ep->error != 0)
1901 1901 goto done;
1902 1902 goto again;
1903 1903 case NFS4ERR_DELAY:
1904 1904 badfhcount = 0;
1905 1905 nfs4_set_delay_wait(vp);
1906 1906 ep->error = nfs4_wait_for_delay(vp, &recov);
1907 1907 if (ep->error != 0)
1908 1908 goto done;
1909 1909 goto again;
1910 1910 case NFS4ERR_ACCESS:
1911 1911 /* get new cred, try again */
1912 1912 if (last_time == TRUE)
1913 1913 goto done;
1914 1914 if (dvp != NULL)
1915 1915 VN_RELE(dvp);
1916 1916 crfree(cred_otw);
1917 1917 goto get_remap_cred;
1918 1918 default:
1919 1919 goto done;
1920 1920 }
1921 1921
1922 1922 /*
1923 1923 * Check on the new and old rnodes before updating;
1924 1924 * if the vnode type or size changes, issue a warning
1925 1925 * and mark the file dead.
1926 1926 */
1927 1927 mutex_enter(&rp->r_statelock);
1928 1928 if (flags & NFS4_REMAP_CKATTRS) {
1929 1929 if (vp->v_type != gar.n4g_va.va_type ||
1930 1930 (vp->v_type != VDIR &&
1931 1931 rp->r_size != gar.n4g_va.va_size)) {
1932 1932 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1933 1933 "nfs4_remap_file: size %d vs. %d, type %d vs. %d",
1934 1934 (int)rp->r_size, (int)gar.n4g_va.va_size,
1935 1935 vp->v_type, gar.n4g_va.va_type));
1936 1936 mutex_exit(&rp->r_statelock);
1937 1937 nfs4_queue_event(RE_FILE_DIFF, mi,
1938 1938 rp->r_server->sv_hostname, 0, vp, NULL, 0, NULL, 0,
1939 1939 TAG_NONE, TAG_NONE, 0, 0);
1940 1940 nfs4_fail_recov(vp, NULL, 0, NFS4_OK);
1941 1941 goto done;
1942 1942 }
1943 1943 }
1944 1944 ASSERT(gar.n4g_va.va_type != VNON);
1945 1945 rp->r_server = mi->mi_curr_serv;
1946 1946
1947 1947 /*
1948 1948 * Turn this object into a "stub" object if we
1949 1949 * crossed an underlying server fs boundary.
1950 1950 *
1951 1951 * This stub will be for a mirror-mount.
1952 1952 * A referral would look like a boundary crossing
1953 1953 * as well, but would not be the same type of object,
1954 1954 * so we would expect to mark the object dead.
1955 1955 *
1956 1956 * See comment in r4_do_attrcache() for more details.
1957 1957 */
1958 1958 is_stub = 0;
1959 1959 if (gar.n4g_fsid_valid) {
1960 1960 (void) nfs_rw_enter_sig(&rp->r_server->sv_lock, RW_READER, 0);
1961 1961 rp->r_srv_fsid = gar.n4g_fsid;
1962 1962 if (!FATTR4_FSID_EQ(&gar.n4g_fsid, &rp->r_server->sv_fsid))
1963 1963 is_stub = 1;
1964 1964 nfs_rw_exit(&rp->r_server->sv_lock);
1965 1965 #ifdef DEBUG
1966 1966 } else {
1967 1967 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1968 1968 "remap_file: fsid attr not provided by server. rp=%p",
1969 1969 (void *)rp));
1970 1970 #endif
1971 1971 }
1972 1972 if (is_stub)
1973 1973 r4_stub_mirrormount(rp);
1974 1974 else
1975 1975 r4_stub_none(rp);
1976 1976 mutex_exit(&rp->r_statelock);
1977 1977 nfs4_attrcache_noinval(vp, &gar, gethrtime()); /* force update */
1978 1978 sfh4_update(rp->r_fh, &newfh);
1979 1979 ASSERT(nfs4_consistent_type(vp));
1980 1980
1981 1981 /*
1982 1982 * If we got parent info, use it to update the parent
1983 1983 */
1984 1984 if (newpfh.nfs_fh4_len != 0) {
1985 1985 if (rp->r_svnode.sv_dfh != NULL)
1986 1986 sfh4_update(rp->r_svnode.sv_dfh, &newpfh);
1987 1987 if (dvp != NULL) {
1988 1988 /* force update of attrs */
1989 1989 nfs4_attrcache_noinval(dvp, &pgar, gethrtime());
1990 1990 }
1991 1991 }
1992 1992 done:
1993 1993 if (newfh.nfs_fh4_len != 0)
1994 1994 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
1995 1995 if (newpfh.nfs_fh4_len != 0)
1996 1996 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
1997 1997 if (cred_otw != NULL)
1998 1998 crfree(cred_otw);
1999 1999 if (rootvp != NULL)
2000 2000 VN_RELE(rootvp);
2001 2001 if (dvp != NULL)
2002 2002 VN_RELE(dvp);
2003 2003 if (osp != NULL)
2004 2004 open_stream_rele(osp, rp);
2005 2005 }
2006 2006
2007 2007 /*
2008 2008 * Client-side failover support: remap the filehandle for vp if it appears
2009 2009 * necessary. errors are returned via the nfs4_error_t parameter; though,
2010 2010 * if there is a problem, we will just try again later.
2011 2011 */
2012 2012
2013 2013 void
2014 2014 nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep)
2015 2015 {
2016 2016 if (vp == NULL)
2017 2017 return;
2018 2018
2019 2019 if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY))
2020 2020 return;
2021 2021
2022 2022 if (VTOR4(vp)->r_server == mi->mi_curr_serv)
2023 2023 return;
2024 2024
2025 2025 nfs4_remap_file(mi, vp, flags, ep);
2026 2026 }
2027 2027
2028 2028 /*
2029 2029 * nfs4_make_dotdot() - find or create a parent vnode of a non-root node.
2030 2030 *
2031 2031 * Our caller has a filehandle for ".." relative to a particular
2032 2032 * directory object. We want to find or create a parent vnode
2033 2033 * with that filehandle and return it. We can of course create
2034 2034 * a vnode from this filehandle, but we need to also make sure
2035 2035 * that if ".." is a regular file (i.e. dvp is a V_XATTRDIR)
2036 2036 * that we have a parent FH for future reopens as well. If
2037 2037 * we have a remap failure, we won't be able to reopen this
2038 2038 * file, but we won't treat that as fatal because a reopen
2039 2039 * is at least unlikely. Someday nfs4_reopen() should look
2040 2040 * for a missing parent FH and try a remap to recover from it.
2041 2041 *
2042 2042 * need_start_op argument indicates whether this function should
2043 2043 * do a start_op before calling remap_lookup(). This should
2044 2044 * be FALSE, if you are the recovery thread or in an op; otherwise,
2045 2045 * set it to TRUE.
2046 2046 */
2047 2047 int
2048 2048 nfs4_make_dotdot(nfs4_sharedfh_t *fhp, hrtime_t t, vnode_t *dvp,
2049 2049 cred_t *cr, vnode_t **vpp, int need_start_op)
2050 2050 {
2051 2051 mntinfo4_t *mi = VTOMI4(dvp);
2052 2052 nfs4_fname_t *np = NULL, *pnp = NULL;
2053 2053 vnode_t *vp = NULL, *rootvp = NULL;
2054 2054 rnode4_t *rp;
2055 2055 nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL};
2056 2056 nfs4_ga_res_t gar, pgar;
2057 2057 vattr_t va, pva;
2058 2058 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
2059 2059 nfs4_sharedfh_t *sfh = NULL, *psfh = NULL;
2060 2060 nfs4_recov_state_t recov_state;
2061 2061
2062 2062 #ifdef DEBUG
2063 2063 /*
2064 2064 * ensure need_start_op is correct
2065 2065 */
2066 2066 {
2067 2067 int no_need_start_op = (tsd_get(nfs4_tsd_key) ||
2068 2068 (curthread == mi->mi_recovthread));
2069 2069 /* C needs a ^^ operator! */
2070 2070 ASSERT(((need_start_op) && (!no_need_start_op)) ||
2071 2071 ((! need_start_op) && (no_need_start_op)));
2072 2072 }
2073 2073 #endif
2074 2074 ASSERT(VTOMI4(dvp)->mi_zone == nfs_zone());
2075 2075
2076 2076 NFS4_DEBUG(nfs4_client_shadow_debug, (CE_NOTE,
2077 2077 "nfs4_make_dotdot: called with fhp %p, dvp %s", (void *)fhp,
2078 2078 rnode4info(VTOR4(dvp))));
2079 2079
2080 2080 /*
2081 2081 * rootvp might be needed eventually. Holding it now will
2082 2082 * ensure that r4find_unlocked() will find it, if ".." is the root.
2083 2083 */
2084 2084 e.error = VFS_ROOT(mi->mi_vfsp, &rootvp);
2085 2085 if (e.error != 0)
2086 2086 goto out;
2087 2087 rp = r4find_unlocked(fhp, mi->mi_vfsp);
2088 2088 if (rp != NULL) {
2089 2089 *vpp = RTOV4(rp);
2090 2090 VN_RELE(rootvp);
2091 2091 return (0);
2092 2092 }
2093 2093
2094 2094 /*
2095 2095 * Since we don't have the rnode, we have to go over the wire.
2096 2096 * remap_lookup() can get all of the filehandles and attributes
2097 2097 * we need in one operation.
2098 2098 */
2099 2099 np = fn_parent(VTOSV(dvp)->sv_name);
2100 2100 /* if a parent was not found return an error */
2101 2101 if (np == NULL) {
2102 2102 e.error = ENOENT;
2103 2103 goto out;
2104 2104 }
2105 2105
2106 2106 recov_state.rs_flags = 0;
2107 2107 recov_state.rs_num_retry_despite_err = 0;
2108 2108 recov_retry:
2109 2109 if (need_start_op) {
2110 2110 e.error = nfs4_start_fop(mi, rootvp, NULL, OH_LOOKUP,
2111 2111 &recov_state, NULL);
2112 2112 if (e.error != 0) {
2113 2113 goto out;
2114 2114 }
2115 2115 }
2116 2116
2117 2117 pgar.n4g_va.va_type = VNON;
2118 2118 gar.n4g_va.va_type = VNON;
2119 2119
2120 2120 remap_lookup(np, rootvp, RML_ORDINARY, cr,
2121 2121 &newfh, &gar, &newpfh, &pgar, &e);
2122 2122 if (nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp)) {
2123 2123 if (need_start_op) {
2124 2124 bool_t abort;
2125 2125
2126 2126 abort = nfs4_start_recovery(&e, mi,
2127 2127 rootvp, NULL, NULL, NULL, OP_LOOKUP, NULL, NULL,
2128 2128 NULL);
2129 2129 if (abort) {
2130 2130 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2131 2131 &recov_state, FALSE);
2132 2132 if (e.error == 0)
2133 2133 e.error = EIO;
2134 2134 goto out;
2135 2135 }
2136 2136 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2137 2137 &recov_state, TRUE);
2138 2138 goto recov_retry;
2139 2139 }
2140 2140 if (e.error == 0)
2141 2141 e.error = EIO;
2142 2142 goto out;
2143 2143 }
2144 2144
2145 2145 va = gar.n4g_va;
2146 2146 pva = pgar.n4g_va;
2147 2147
2148 2148 if ((e.error != 0) ||
2149 2149 (va.va_type != VDIR)) {
2150 2150 if (need_start_op)
2151 2151 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2152 2152 &recov_state, FALSE);
2153 2153 if (e.error == 0)
2154 2154 e.error = EIO;
2155 2155 goto out;
2156 2156 }
2157 2157
2158 2158 if (e.stat != NFS4_OK) {
2159 2159 if (need_start_op)
2160 2160 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2161 2161 &recov_state, FALSE);
2162 2162 e.error = EIO;
2163 2163 goto out;
2164 2164 }
2165 2165
2166 2166 /*
2167 2167 * It is possible for remap_lookup() to return with no error,
2168 2168 * but without providing the parent filehandle and attrs.
2169 2169 */
2170 2170 if (pva.va_type != VDIR) {
2171 2171 /*
2172 2172 * Call remap_lookup() again, this time with the
2173 2173 * newpfh and pgar args in the first position.
2174 2174 */
2175 2175 pnp = fn_parent(np);
2176 2176 if (pnp != NULL) {
2177 2177 remap_lookup(pnp, rootvp, RML_ORDINARY, cr,
2178 2178 &newpfh, &pgar, NULL, NULL, &e);
2179 2179 /*
2180 2180 * This remap_lookup call modifies pgar. The following
2181 2181 * line prevents trouble when checking the va_type of
2182 2182 * pva later in this code.
2183 2183 */
2184 2184 pva = pgar.n4g_va;
2185 2185
2186 2186 if (nfs4_needs_recovery(&e, FALSE,
2187 2187 mi->mi_vfsp)) {
2188 2188 if (need_start_op) {
2189 2189 bool_t abort;
2190 2190
2191 2191 abort = nfs4_start_recovery(&e, mi,
2192 2192 rootvp, NULL, NULL, NULL,
2193 2193 OP_LOOKUP, NULL, NULL, NULL);
2194 2194 if (abort) {
2195 2195 nfs4_end_fop(mi, rootvp, NULL,
2196 2196 OH_LOOKUP, &recov_state,
2197 2197 FALSE);
2198 2198 if (e.error == 0)
2199 2199 e.error = EIO;
2200 2200 goto out;
2201 2201 }
2202 2202 nfs4_end_fop(mi, rootvp, NULL,
2203 2203 OH_LOOKUP, &recov_state, TRUE);
2204 2204 goto recov_retry;
2205 2205 }
2206 2206 if (e.error == 0)
2207 2207 e.error = EIO;
2208 2208 goto out;
2209 2209 }
2210 2210
2211 2211 if (e.stat != NFS4_OK) {
2212 2212 if (need_start_op)
2213 2213 nfs4_end_fop(mi, rootvp, NULL,
2214 2214 OH_LOOKUP, &recov_state, FALSE);
2215 2215 e.error = EIO;
2216 2216 goto out;
2217 2217 }
2218 2218 }
2219 2219 if ((pnp == NULL) ||
2220 2220 (e.error != 0) ||
2221 2221 (pva.va_type == VNON)) {
2222 2222 if (need_start_op)
2223 2223 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2224 2224 &recov_state, FALSE);
2225 2225 if (e.error == 0)
2226 2226 e.error = EIO;
2227 2227 goto out;
2228 2228 }
2229 2229 }
2230 2230 ASSERT(newpfh.nfs_fh4_len != 0);
2231 2231 if (need_start_op)
2232 2232 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, &recov_state, FALSE);
2233 2233 psfh = sfh4_get(&newpfh, mi);
2234 2234
2235 2235 sfh = sfh4_get(&newfh, mi);
2236 2236 vp = makenfs4node_by_fh(sfh, psfh, &np, &gar, mi, cr, t);
2237 2237
2238 2238 out:
2239 2239 if (np != NULL)
2240 2240 fn_rele(&np);
2241 2241 if (pnp != NULL)
2242 2242 fn_rele(&pnp);
2243 2243 if (newfh.nfs_fh4_len != 0)
2244 2244 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
2245 2245 if (newpfh.nfs_fh4_len != 0)
2246 2246 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
2247 2247 if (sfh != NULL)
2248 2248 sfh4_rele(&sfh);
2249 2249 if (psfh != NULL)
2250 2250 sfh4_rele(&psfh);
2251 2251 if (rootvp != NULL)
2252 2252 VN_RELE(rootvp);
2253 2253 *vpp = vp;
2254 2254 return (e.error);
2255 2255 }
2256 2256
2257 2257 #ifdef DEBUG
2258 2258 size_t r_path_memuse = 0;
2259 2259 #endif
2260 2260
2261 2261 /*
2262 2262 * NFS client failover support
2263 2263 *
2264 2264 * sv4_free() frees the malloc'd portion of a "servinfo_t".
2265 2265 */
2266 2266 void
2267 2267 sv4_free(servinfo4_t *svp)
2268 2268 {
2269 2269 servinfo4_t *next;
2270 2270 struct knetconfig *knconf;
2271 2271
2272 2272 while (svp != NULL) {
2273 2273 next = svp->sv_next;
2274 2274 if (svp->sv_dhsec)
2275 2275 sec_clnt_freeinfo(svp->sv_dhsec);
2276 2276 if (svp->sv_secdata)
2277 2277 sec_clnt_freeinfo(svp->sv_secdata);
2278 2278 if (svp->sv_save_secinfo &&
2279 2279 svp->sv_save_secinfo != svp->sv_secinfo)
2280 2280 secinfo_free(svp->sv_save_secinfo);
2281 2281 if (svp->sv_secinfo)
2282 2282 secinfo_free(svp->sv_secinfo);
2283 2283 if (svp->sv_hostname && svp->sv_hostnamelen > 0)
2284 2284 kmem_free(svp->sv_hostname, svp->sv_hostnamelen);
2285 2285 knconf = svp->sv_knconf;
2286 2286 if (knconf != NULL) {
2287 2287 if (knconf->knc_protofmly != NULL)
2288 2288 kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
2289 2289 if (knconf->knc_proto != NULL)
2290 2290 kmem_free(knconf->knc_proto, KNC_STRSIZE);
2291 2291 kmem_free(knconf, sizeof (*knconf));
2292 2292 }
2293 2293 knconf = svp->sv_origknconf;
2294 2294 if (knconf != NULL) {
2295 2295 if (knconf->knc_protofmly != NULL)
2296 2296 kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
2297 2297 if (knconf->knc_proto != NULL)
2298 2298 kmem_free(knconf->knc_proto, KNC_STRSIZE);
2299 2299 kmem_free(knconf, sizeof (*knconf));
2300 2300 }
2301 2301 if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0)
2302 2302 kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen);
2303 2303 if (svp->sv_path != NULL) {
2304 2304 kmem_free(svp->sv_path, svp->sv_pathlen);
2305 2305 }
2306 2306 nfs_rw_destroy(&svp->sv_lock);
2307 2307 kmem_free(svp, sizeof (*svp));
2308 2308 svp = next;
2309 2309 }
2310 2310 }
2311 2311
2312 2312 void
2313 2313 nfs4_printfhandle(nfs4_fhandle_t *fhp)
2314 2314 {
2315 2315 int *ip;
2316 2316 char *buf;
2317 2317 size_t bufsize;
2318 2318 char *cp;
2319 2319
2320 2320 /*
2321 2321 * 13 == "(file handle:"
2322 2322 * maximum of NFS_FHANDLE / sizeof (*ip) elements in fh_buf times
2323 2323 * 1 == ' '
2324 2324 * 8 == maximum strlen of "%x"
2325 2325 * 3 == ")\n\0"
2326 2326 */
2327 2327 bufsize = 13 + ((NFS_FHANDLE_LEN / sizeof (*ip)) * (1 + 8)) + 3;
2328 2328 buf = kmem_alloc(bufsize, KM_NOSLEEP);
2329 2329 if (buf == NULL)
2330 2330 return;
2331 2331
2332 2332 cp = buf;
2333 2333 (void) strcpy(cp, "(file handle:");
2334 2334 while (*cp != '\0')
2335 2335 cp++;
2336 2336 for (ip = (int *)fhp->fh_buf;
2337 2337 ip < (int *)&fhp->fh_buf[fhp->fh_len];
2338 2338 ip++) {
2339 2339 (void) sprintf(cp, " %x", *ip);
2340 2340 while (*cp != '\0')
2341 2341 cp++;
2342 2342 }
2343 2343 (void) strcpy(cp, ")\n");
2344 2344
2345 2345 zcmn_err(getzoneid(), CE_CONT, "%s", buf);
2346 2346
2347 2347 kmem_free(buf, bufsize);
2348 2348 }
2349 2349
2350 2350 /*
2351 2351 * The NFSv4 readdir cache subsystem.
2352 2352 *
2353 2353 * We provide a set of interfaces to allow the rest of the system to utilize
2354 2354 * a caching mechanism while encapsulating the details of the actual
2355 2355 * implementation. This should allow for better maintainability and
2356 2356 * extensibility by consolidating the implementation details in one location.
2357 2357 */
2358 2358
2359 2359 /*
2360 2360 * Comparator used by AVL routines.
2361 2361 */
2362 2362 static int
2363 2363 rddir4_cache_compar(const void *x, const void *y)
2364 2364 {
2365 2365 rddir4_cache_impl *ai = (rddir4_cache_impl *)x;
2366 2366 rddir4_cache_impl *bi = (rddir4_cache_impl *)y;
2367 2367 rddir4_cache *a = &ai->rc;
2368 2368 rddir4_cache *b = &bi->rc;
2369 2369
2370 2370 if (a->nfs4_cookie == b->nfs4_cookie) {
2371 2371 if (a->buflen == b->buflen)
2372 2372 return (0);
2373 2373 if (a->buflen < b->buflen)
2374 2374 return (-1);
2375 2375 return (1);
2376 2376 }
2377 2377
2378 2378 if (a->nfs4_cookie < b->nfs4_cookie)
2379 2379 return (-1);
2380 2380
2381 2381 return (1);
2382 2382 }
2383 2383
2384 2384 /*
2385 2385 * Allocate an opaque handle for the readdir cache.
2386 2386 */
2387 2387 void
2388 2388 rddir4_cache_create(rnode4_t *rp)
2389 2389 {
2390 2390 ASSERT(rp->r_dir == NULL);
2391 2391
2392 2392 rp->r_dir = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
2393 2393
2394 2394 avl_create(rp->r_dir, rddir4_cache_compar, sizeof (rddir4_cache_impl),
2395 2395 offsetof(rddir4_cache_impl, tree));
2396 2396 }
2397 2397
2398 2398 /*
2399 2399 * Purge the cache of all cached readdir responses.
2400 2400 */
2401 2401 void
2402 2402 rddir4_cache_purge(rnode4_t *rp)
2403 2403 {
2404 2404 rddir4_cache_impl *rdip;
2405 2405 rddir4_cache_impl *nrdip;
2406 2406
2407 2407 ASSERT(MUTEX_HELD(&rp->r_statelock));
2408 2408
2409 2409 if (rp->r_dir == NULL)
2410 2410 return;
2411 2411
2412 2412 rdip = avl_first(rp->r_dir);
2413 2413
2414 2414 while (rdip != NULL) {
2415 2415 nrdip = AVL_NEXT(rp->r_dir, rdip);
2416 2416 avl_remove(rp->r_dir, rdip);
2417 2417 rdip->rc.flags &= ~RDDIRCACHED;
2418 2418 rddir4_cache_rele(rp, &rdip->rc);
2419 2419 rdip = nrdip;
2420 2420 }
2421 2421 ASSERT(avl_numnodes(rp->r_dir) == 0);
2422 2422 }
2423 2423
2424 2424 /*
2425 2425 * Destroy the readdir cache.
2426 2426 */
2427 2427 void
2428 2428 rddir4_cache_destroy(rnode4_t *rp)
2429 2429 {
2430 2430 ASSERT(MUTEX_HELD(&rp->r_statelock));
2431 2431 if (rp->r_dir == NULL)
2432 2432 return;
2433 2433
2434 2434 rddir4_cache_purge(rp);
2435 2435 avl_destroy(rp->r_dir);
2436 2436 kmem_free(rp->r_dir, sizeof (avl_tree_t));
2437 2437 rp->r_dir = NULL;
2438 2438 }
2439 2439
2440 2440 /*
2441 2441 * Locate a readdir response from the readdir cache.
2442 2442 *
2443 2443 * Return values:
2444 2444 *
2445 2445 * NULL - If there is an unrecoverable situation like the operation may have
2446 2446 * been interrupted.
2447 2447 *
2448 2448 * rddir4_cache * - A pointer to a rddir4_cache is returned to the caller.
2449 2449 * The flags are set approprately, such that the caller knows
2450 2450 * what state the entry is in.
2451 2451 */
2452 2452 rddir4_cache *
2453 2453 rddir4_cache_lookup(rnode4_t *rp, offset_t cookie, int count)
2454 2454 {
2455 2455 rddir4_cache_impl *rdip = NULL;
2456 2456 rddir4_cache_impl srdip;
2457 2457 rddir4_cache *srdc;
2458 2458 rddir4_cache *rdc = NULL;
2459 2459 rddir4_cache *nrdc = NULL;
2460 2460 avl_index_t where;
2461 2461
2462 2462 top:
2463 2463 ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER));
2464 2464 ASSERT(MUTEX_HELD(&rp->r_statelock));
2465 2465 /*
2466 2466 * Check to see if the readdir cache has been disabled. If so, then
2467 2467 * simply allocate an rddir4_cache entry and return it, since caching
2468 2468 * operations do not apply.
2469 2469 */
2470 2470 if (rp->r_dir == NULL) {
2471 2471 if (nrdc == NULL) {
2472 2472 /*
2473 2473 * Drop the lock because we are doing a sleeping
2474 2474 * allocation.
2475 2475 */
2476 2476 mutex_exit(&rp->r_statelock);
2477 2477 rdc = rddir4_cache_alloc(KM_SLEEP);
2478 2478 rdc->nfs4_cookie = cookie;
2479 2479 rdc->buflen = count;
2480 2480 mutex_enter(&rp->r_statelock);
2481 2481 return (rdc);
2482 2482 }
2483 2483 return (nrdc);
2484 2484 }
2485 2485
2486 2486 srdc = &srdip.rc;
2487 2487 srdc->nfs4_cookie = cookie;
2488 2488 srdc->buflen = count;
2489 2489
2490 2490 rdip = avl_find(rp->r_dir, &srdip, &where);
2491 2491
2492 2492 /*
2493 2493 * If we didn't find an entry then create one and insert it
2494 2494 * into the cache.
2495 2495 */
2496 2496 if (rdip == NULL) {
2497 2497 /*
2498 2498 * Check for the case where we have made a second pass through
2499 2499 * the cache due to a lockless allocation. If we find that no
2500 2500 * thread has already inserted this entry, do the insert now
2501 2501 * and return.
2502 2502 */
2503 2503 if (nrdc != NULL) {
2504 2504 avl_insert(rp->r_dir, nrdc->data, where);
2505 2505 nrdc->flags |= RDDIRCACHED;
2506 2506 rddir4_cache_hold(nrdc);
2507 2507 return (nrdc);
2508 2508 }
2509 2509
2510 2510 #ifdef DEBUG
2511 2511 nfs4_readdir_cache_misses++;
2512 2512 #endif
2513 2513 /*
2514 2514 * First, try to allocate an entry without sleeping. If that
2515 2515 * fails then drop the lock and do a sleeping allocation.
2516 2516 */
2517 2517 nrdc = rddir4_cache_alloc(KM_NOSLEEP);
2518 2518 if (nrdc != NULL) {
2519 2519 nrdc->nfs4_cookie = cookie;
2520 2520 nrdc->buflen = count;
2521 2521 avl_insert(rp->r_dir, nrdc->data, where);
2522 2522 nrdc->flags |= RDDIRCACHED;
2523 2523 rddir4_cache_hold(nrdc);
2524 2524 return (nrdc);
2525 2525 }
2526 2526
2527 2527 /*
2528 2528 * Drop the lock and do a sleeping allocation. We incur
2529 2529 * additional overhead by having to search the cache again,
2530 2530 * but this case should be rare.
2531 2531 */
2532 2532 mutex_exit(&rp->r_statelock);
2533 2533 nrdc = rddir4_cache_alloc(KM_SLEEP);
2534 2534 nrdc->nfs4_cookie = cookie;
2535 2535 nrdc->buflen = count;
2536 2536 mutex_enter(&rp->r_statelock);
2537 2537 /*
2538 2538 * We need to take another pass through the cache
2539 2539 * since we dropped our lock to perform the alloc.
2540 2540 * Another thread may have come by and inserted the
2541 2541 * entry we are interested in.
2542 2542 */
2543 2543 goto top;
2544 2544 }
2545 2545
2546 2546 /*
2547 2547 * Check to see if we need to free our entry. This can happen if
2548 2548 * another thread came along beat us to the insert. We can
2549 2549 * safely call rddir4_cache_free directly because no other thread
2550 2550 * would have a reference to this entry.
2551 2551 */
2552 2552 if (nrdc != NULL)
2553 2553 rddir4_cache_free((rddir4_cache_impl *)nrdc->data);
2554 2554
2555 2555 #ifdef DEBUG
2556 2556 nfs4_readdir_cache_hits++;
2557 2557 #endif
2558 2558 /*
2559 2559 * Found something. Make sure it's ready to return.
2560 2560 */
2561 2561 rdc = &rdip->rc;
2562 2562 rddir4_cache_hold(rdc);
2563 2563 /*
2564 2564 * If the cache entry is in the process of being filled in, wait
2565 2565 * until this completes. The RDDIRWAIT bit is set to indicate that
2566 2566 * someone is waiting and when the thread currently filling the entry
2567 2567 * is done, it should do a cv_broadcast to wakeup all of the threads
2568 2568 * waiting for it to finish. If the thread wakes up to find that
2569 2569 * someone new is now trying to complete the the entry, go back
2570 2570 * to sleep.
2571 2571 */
2572 2572 while (rdc->flags & RDDIR) {
2573 2573 /*
2574 2574 * The entry is not complete.
2575 2575 */
2576 2576 nfs_rw_exit(&rp->r_rwlock);
2577 2577 rdc->flags |= RDDIRWAIT;
2578 2578 #ifdef DEBUG
2579 2579 nfs4_readdir_cache_waits++;
2580 2580 #endif
2581 2581 while (rdc->flags & RDDIRWAIT) {
2582 2582 if (!cv_wait_sig(&rdc->cv, &rp->r_statelock)) {
2583 2583 /*
2584 2584 * We got interrupted, probably the user
2585 2585 * typed ^C or an alarm fired. We free the
2586 2586 * new entry if we allocated one.
2587 2587 */
2588 2588 rddir4_cache_rele(rp, rdc);
2589 2589 mutex_exit(&rp->r_statelock);
2590 2590 (void) nfs_rw_enter_sig(&rp->r_rwlock,
2591 2591 RW_READER, FALSE);
2592 2592 mutex_enter(&rp->r_statelock);
2593 2593 return (NULL);
2594 2594 }
2595 2595 }
2596 2596 mutex_exit(&rp->r_statelock);
2597 2597 (void) nfs_rw_enter_sig(&rp->r_rwlock,
2598 2598 RW_READER, FALSE);
2599 2599 mutex_enter(&rp->r_statelock);
2600 2600 }
2601 2601
2602 2602 /*
2603 2603 * The entry we were waiting on may have been purged from
2604 2604 * the cache and should no longer be used, release it and
2605 2605 * start over.
2606 2606 */
2607 2607 if (!(rdc->flags & RDDIRCACHED)) {
2608 2608 rddir4_cache_rele(rp, rdc);
2609 2609 goto top;
2610 2610 }
2611 2611
2612 2612 /*
2613 2613 * The entry is completed. Return it.
2614 2614 */
2615 2615 return (rdc);
2616 2616 }
2617 2617
2618 2618 /*
2619 2619 * Allocate a cache element and return it. Can return NULL if memory is
2620 2620 * low.
2621 2621 */
2622 2622 static rddir4_cache *
2623 2623 rddir4_cache_alloc(int flags)
2624 2624 {
2625 2625 rddir4_cache_impl *rdip = NULL;
2626 2626 rddir4_cache *rc = NULL;
2627 2627
2628 2628 rdip = kmem_alloc(sizeof (rddir4_cache_impl), flags);
2629 2629
2630 2630 if (rdip != NULL) {
2631 2631 rc = &rdip->rc;
2632 2632 rc->data = (void *)rdip;
2633 2633 rc->nfs4_cookie = 0;
2634 2634 rc->nfs4_ncookie = 0;
2635 2635 rc->entries = NULL;
2636 2636 rc->eof = 0;
2637 2637 rc->entlen = 0;
2638 2638 rc->buflen = 0;
2639 2639 rc->actlen = 0;
2640 2640 /*
2641 2641 * A readdir is required so set the flag.
2642 2642 */
2643 2643 rc->flags = RDDIRREQ;
2644 2644 cv_init(&rc->cv, NULL, CV_DEFAULT, NULL);
2645 2645 rc->error = 0;
2646 2646 mutex_init(&rdip->lock, NULL, MUTEX_DEFAULT, NULL);
2647 2647 rdip->count = 1;
2648 2648 #ifdef DEBUG
2649 2649 atomic_inc_64(&clstat4_debug.dirent.value.ui64);
2650 2650 #endif
2651 2651 }
2652 2652 return (rc);
2653 2653 }
2654 2654
2655 2655 /*
2656 2656 * Increment the reference count to this cache element.
2657 2657 */
2658 2658 static void
2659 2659 rddir4_cache_hold(rddir4_cache *rc)
2660 2660 {
2661 2661 rddir4_cache_impl *rdip = (rddir4_cache_impl *)rc->data;
2662 2662
2663 2663 mutex_enter(&rdip->lock);
2664 2664 rdip->count++;
2665 2665 mutex_exit(&rdip->lock);
2666 2666 }
2667 2667
2668 2668 /*
2669 2669 * Release a reference to this cache element. If the count is zero then
2670 2670 * free the element.
2671 2671 */
2672 2672 void
2673 2673 rddir4_cache_rele(rnode4_t *rp, rddir4_cache *rdc)
2674 2674 {
2675 2675 rddir4_cache_impl *rdip = (rddir4_cache_impl *)rdc->data;
2676 2676
2677 2677 ASSERT(MUTEX_HELD(&rp->r_statelock));
2678 2678
2679 2679 /*
2680 2680 * Check to see if we have any waiters. If so, we can wake them
2681 2681 * so that they can proceed.
2682 2682 */
2683 2683 if (rdc->flags & RDDIRWAIT) {
2684 2684 rdc->flags &= ~RDDIRWAIT;
2685 2685 cv_broadcast(&rdc->cv);
2686 2686 }
2687 2687
2688 2688 mutex_enter(&rdip->lock);
2689 2689 ASSERT(rdip->count > 0);
2690 2690 if (--rdip->count == 0) {
2691 2691 mutex_exit(&rdip->lock);
2692 2692 rddir4_cache_free(rdip);
2693 2693 } else
2694 2694 mutex_exit(&rdip->lock);
2695 2695 }
2696 2696
2697 2697 /*
2698 2698 * Free a cache element.
2699 2699 */
2700 2700 static void
2701 2701 rddir4_cache_free(rddir4_cache_impl *rdip)
2702 2702 {
2703 2703 rddir4_cache *rc = &rdip->rc;
2704 2704
2705 2705 #ifdef DEBUG
2706 2706 atomic_dec_64(&clstat4_debug.dirent.value.ui64);
2707 2707 #endif
2708 2708 if (rc->entries != NULL)
2709 2709 kmem_free(rc->entries, rc->buflen);
2710 2710 cv_destroy(&rc->cv);
2711 2711 mutex_destroy(&rdip->lock);
2712 2712 kmem_free(rdip, sizeof (*rdip));
2713 2713 }
2714 2714
2715 2715 /*
2716 2716 * Snapshot callback for nfs:0:nfs4_client as registered with the kstat
2717 2717 * framework.
2718 2718 */
2719 2719 static int
2720 2720 cl4_snapshot(kstat_t *ksp, void *buf, int rw)
2721 2721 {
2722 2722 ksp->ks_snaptime = gethrtime();
2723 2723 if (rw == KSTAT_WRITE) {
2724 2724 bcopy(buf, ksp->ks_private, sizeof (clstat4_tmpl));
2725 2725 #ifdef DEBUG
2726 2726 /*
2727 2727 * Currently only the global zone can write to kstats, but we
2728 2728 * add the check just for paranoia.
2729 2729 */
2730 2730 if (INGLOBALZONE(curproc))
2731 2731 bcopy((char *)buf + sizeof (clstat4_tmpl),
2732 2732 &clstat4_debug, sizeof (clstat4_debug));
2733 2733 #endif
2734 2734 } else {
2735 2735 bcopy(ksp->ks_private, buf, sizeof (clstat4_tmpl));
2736 2736 #ifdef DEBUG
2737 2737 /*
2738 2738 * If we're displaying the "global" debug kstat values, we
2739 2739 * display them as-is to all zones since in fact they apply to
2740 2740 * the system as a whole.
2741 2741 */
2742 2742 bcopy(&clstat4_debug, (char *)buf + sizeof (clstat4_tmpl),
2743 2743 sizeof (clstat4_debug));
2744 2744 #endif
2745 2745 }
2746 2746 return (0);
2747 2747 }
2748 2748
2749 2749
2750 2750
2751 2751 /*
2752 2752 * Zone support
2753 2753 */
2754 2754 static void *
2755 2755 clinit4_zone(zoneid_t zoneid)
2756 2756 {
2757 2757 kstat_t *nfs4_client_kstat;
2758 2758 struct nfs4_clnt *nfscl;
2759 2759 uint_t ndata;
2760 2760
2761 2761 nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP);
2762 2762 mutex_init(&nfscl->nfscl_chtable4_lock, NULL, MUTEX_DEFAULT, NULL);
2763 2763 nfscl->nfscl_chtable4 = NULL;
2764 2764 nfscl->nfscl_zoneid = zoneid;
2765 2765
2766 2766 bcopy(&clstat4_tmpl, &nfscl->nfscl_stat, sizeof (clstat4_tmpl));
2767 2767 ndata = sizeof (clstat4_tmpl) / sizeof (kstat_named_t);
2768 2768 #ifdef DEBUG
2769 2769 ndata += sizeof (clstat4_debug) / sizeof (kstat_named_t);
2770 2770 #endif
2771 2771 if ((nfs4_client_kstat = kstat_create_zone("nfs", 0, "nfs4_client",
2772 2772 "misc", KSTAT_TYPE_NAMED, ndata,
2773 2773 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) {
2774 2774 nfs4_client_kstat->ks_private = &nfscl->nfscl_stat;
2775 2775 nfs4_client_kstat->ks_snapshot = cl4_snapshot;
2776 2776 kstat_install(nfs4_client_kstat);
2777 2777 }
2778 2778 mutex_enter(&nfs4_clnt_list_lock);
2779 2779 list_insert_head(&nfs4_clnt_list, nfscl);
2780 2780 mutex_exit(&nfs4_clnt_list_lock);
2781 2781
2782 2782 return (nfscl);
2783 2783 }
2784 2784
2785 2785 /*ARGSUSED*/
2786 2786 static void
2787 2787 clfini4_zone(zoneid_t zoneid, void *arg)
2788 2788 {
2789 2789 struct nfs4_clnt *nfscl = arg;
2790 2790 chhead_t *chp, *next;
2791 2791
2792 2792 if (nfscl == NULL)
2793 2793 return;
2794 2794 mutex_enter(&nfs4_clnt_list_lock);
2795 2795 list_remove(&nfs4_clnt_list, nfscl);
2796 2796 mutex_exit(&nfs4_clnt_list_lock);
2797 2797 clreclaim4_zone(nfscl, 0);
2798 2798 for (chp = nfscl->nfscl_chtable4; chp != NULL; chp = next) {
2799 2799 ASSERT(chp->ch_list == NULL);
2800 2800 kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1);
2801 2801 next = chp->ch_next;
2802 2802 kmem_free(chp, sizeof (*chp));
2803 2803 }
2804 2804 kstat_delete_byname_zone("nfs", 0, "nfs4_client", zoneid);
2805 2805 mutex_destroy(&nfscl->nfscl_chtable4_lock);
2806 2806 kmem_free(nfscl, sizeof (*nfscl));
2807 2807 }
2808 2808
2809 2809 /*
2810 2810 * Called by endpnt_destructor to make sure the client handles are
2811 2811 * cleaned up before the RPC endpoints. This becomes a no-op if
2812 2812 * clfini_zone (above) is called first. This function is needed
2813 2813 * (rather than relying on clfini_zone to clean up) because the ZSD
2814 2814 * callbacks have no ordering mechanism, so we have no way to ensure
2815 2815 * that clfini_zone is called before endpnt_destructor.
2816 2816 */
2817 2817 void
2818 2818 clcleanup4_zone(zoneid_t zoneid)
2819 2819 {
2820 2820 struct nfs4_clnt *nfscl;
2821 2821
2822 2822 mutex_enter(&nfs4_clnt_list_lock);
2823 2823 nfscl = list_head(&nfs4_clnt_list);
2824 2824 for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl)) {
2825 2825 if (nfscl->nfscl_zoneid == zoneid) {
2826 2826 clreclaim4_zone(nfscl, 0);
2827 2827 break;
2828 2828 }
2829 2829 }
2830 2830 mutex_exit(&nfs4_clnt_list_lock);
2831 2831 }
2832 2832
2833 2833 int
2834 2834 nfs4_subr_init(void)
2835 2835 {
2836 2836 /*
2837 2837 * Allocate and initialize the client handle cache
2838 2838 */
2839 2839 chtab4_cache = kmem_cache_create("client_handle4_cache",
2840 2840 sizeof (struct chtab), 0, NULL, NULL, clreclaim4, NULL,
2841 2841 NULL, 0);
2842 2842
2843 2843 /*
2844 2844 * Initialize the list of per-zone client handles (and associated data).
2845 2845 * This needs to be done before we call zone_key_create().
2846 2846 */
2847 2847 list_create(&nfs4_clnt_list, sizeof (struct nfs4_clnt),
2848 2848 offsetof(struct nfs4_clnt, nfscl_node));
2849 2849
2850 2850 /*
2851 2851 * Initialize the zone_key for per-zone client handle lists.
2852 2852 */
2853 2853 zone_key_create(&nfs4clnt_zone_key, clinit4_zone, NULL, clfini4_zone);
2854 2854
2855 2855 if (nfs4err_delay_time == 0)
2856 2856 nfs4err_delay_time = NFS4ERR_DELAY_TIME;
2857 2857
2858 2858 return (0);
2859 2859 }
2860 2860
2861 2861 int
2862 2862 nfs4_subr_fini(void)
2863 2863 {
2864 2864 /*
2865 2865 * Deallocate the client handle cache
2866 2866 */
2867 2867 kmem_cache_destroy(chtab4_cache);
2868 2868
2869 2869 /*
2870 2870 * Destroy the zone_key
2871 2871 */
2872 2872 (void) zone_key_delete(nfs4clnt_zone_key);
2873 2873
2874 2874 return (0);
2875 2875 }
2876 2876 /*
2877 2877 * Set or Clear direct I/O flag
2878 2878 * VOP_RWLOCK() is held for write access to prevent a race condition
2879 2879 * which would occur if a process is in the middle of a write when
2880 2880 * directio flag gets set. It is possible that all pages may not get flushed.
2881 2881 *
2882 2882 * This is a copy of nfs_directio, changes here may need to be made
2883 2883 * there and vice versa.
2884 2884 */
2885 2885
2886 2886 int
2887 2887 nfs4_directio(vnode_t *vp, int cmd, cred_t *cr)
2888 2888 {
2889 2889 int error = 0;
2890 2890 rnode4_t *rp;
2891 2891
2892 2892 rp = VTOR4(vp);
2893 2893
2894 2894 if (cmd == DIRECTIO_ON) {
2895 2895
2896 2896 if (rp->r_flags & R4DIRECTIO)
2897 2897 return (0);
2898 2898
2899 2899 /*
2900 2900 * Flush the page cache.
2901 2901 */
2902 2902
2903 2903 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
2904 2904
2905 2905 if (rp->r_flags & R4DIRECTIO) {
2906 2906 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
2907 2907 return (0);
2908 2908 }
2909 2909
2910 2910 if (nfs4_has_pages(vp) &&
2911 2911 ((rp->r_flags & R4DIRTY) || rp->r_awcount > 0)) {
2912 2912 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
2913 2913 B_INVAL, cr, NULL);
2914 2914 if (error) {
2915 2915 if (error == ENOSPC || error == EDQUOT) {
2916 2916 mutex_enter(&rp->r_statelock);
2917 2917 if (!rp->r_error)
2918 2918 rp->r_error = error;
2919 2919 mutex_exit(&rp->r_statelock);
2920 2920 }
2921 2921 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
2922 2922 return (error);
2923 2923 }
2924 2924 }
2925 2925
2926 2926 mutex_enter(&rp->r_statelock);
2927 2927 rp->r_flags |= R4DIRECTIO;
2928 2928 mutex_exit(&rp->r_statelock);
2929 2929 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
2930 2930 return (0);
2931 2931 }
2932 2932
2933 2933 if (cmd == DIRECTIO_OFF) {
2934 2934 mutex_enter(&rp->r_statelock);
2935 2935 rp->r_flags &= ~R4DIRECTIO; /* disable direct mode */
2936 2936 mutex_exit(&rp->r_statelock);
2937 2937 return (0);
2938 2938 }
2939 2939
2940 2940 return (EINVAL);
2941 2941 }
2942 2942
2943 2943 /*
2944 2944 * Return TRUE if the file has any pages. Always go back to
2945 2945 * the master vnode to check v_pages since none of the shadows
2946 2946 * can have pages.
2947 2947 */
2948 2948
2949 2949 bool_t
2950 2950 nfs4_has_pages(vnode_t *vp)
2951 2951 {
2952 2952 rnode4_t *rp;
2953 2953
2954 2954 rp = VTOR4(vp);
2955 2955 if (IS_SHADOW(vp, rp))
2956 2956 vp = RTOV4(rp); /* RTOV4 always gives the master */
2957 2957
2958 2958 return (vn_has_cached_data(vp));
2959 2959 }
2960 2960
2961 2961 /*
2962 2962 * This table is used to determine whether the client should attempt
2963 2963 * failover based on the clnt_stat value returned by CLNT_CALL. The
2964 2964 * clnt_stat is used as an index into the table. If
2965 2965 * the error value that corresponds to the clnt_stat value in the
2966 2966 * table is non-zero, then that is the error to be returned AND
2967 2967 * that signals that failover should be attempted.
2968 2968 *
2969 2969 * Special note: If the RPC_ values change, then direct indexing of the
2970 2970 * table is no longer valid, but having the RPC_ values in the table
2971 2971 * allow the functions to detect the change and issue a warning.
2972 2972 * In this case, the code will always attempt failover as a defensive
2973 2973 * measure.
2974 2974 */
2975 2975
2976 2976 static struct try_failover_tab {
2977 2977 enum clnt_stat cstat;
2978 2978 int error;
2979 2979 } try_failover_table [] = {
2980 2980
2981 2981 RPC_SUCCESS, 0,
2982 2982 RPC_CANTENCODEARGS, 0,
2983 2983 RPC_CANTDECODERES, 0,
2984 2984 RPC_CANTSEND, ECOMM,
2985 2985 RPC_CANTRECV, ECOMM,
2986 2986 RPC_TIMEDOUT, ETIMEDOUT,
2987 2987 RPC_VERSMISMATCH, 0,
2988 2988 RPC_AUTHERROR, 0,
2989 2989 RPC_PROGUNAVAIL, 0,
2990 2990 RPC_PROGVERSMISMATCH, 0,
2991 2991 RPC_PROCUNAVAIL, 0,
2992 2992 RPC_CANTDECODEARGS, 0,
2993 2993 RPC_SYSTEMERROR, ENOSR,
2994 2994 RPC_UNKNOWNHOST, EHOSTUNREACH,
2995 2995 RPC_RPCBFAILURE, ENETUNREACH,
2996 2996 RPC_PROGNOTREGISTERED, ECONNREFUSED,
2997 2997 RPC_FAILED, ETIMEDOUT,
2998 2998 RPC_UNKNOWNPROTO, EHOSTUNREACH,
2999 2999 RPC_INTR, 0,
3000 3000 RPC_UNKNOWNADDR, EHOSTUNREACH,
3001 3001 RPC_TLIERROR, 0,
3002 3002 RPC_NOBROADCAST, EHOSTUNREACH,
3003 3003 RPC_N2AXLATEFAILURE, ECONNREFUSED,
3004 3004 RPC_UDERROR, 0,
3005 3005 RPC_INPROGRESS, 0,
3006 3006 RPC_STALERACHANDLE, EINVAL,
3007 3007 RPC_CANTCONNECT, ECONNREFUSED,
3008 3008 RPC_XPRTFAILED, ECONNABORTED,
3009 3009 RPC_CANTCREATESTREAM, ECONNREFUSED,
3010 3010 RPC_CANTSTORE, ENOBUFS
3011 3011 };
3012 3012
3013 3013 /*
3014 3014 * nfs4_try_failover - determine whether the client should
3015 3015 * attempt failover based on the values stored in the nfs4_error_t.
3016 3016 */
3017 3017 int
3018 3018 nfs4_try_failover(nfs4_error_t *ep)
3019 3019 {
3020 3020 if (ep->error == ETIMEDOUT || ep->stat == NFS4ERR_RESOURCE)
3021 3021 return (TRUE);
3022 3022
3023 3023 if (ep->error && ep->rpc_status != RPC_SUCCESS)
3024 3024 return (try_failover(ep->rpc_status) != 0 ? TRUE : FALSE);
3025 3025
3026 3026 return (FALSE);
3027 3027 }
3028 3028
3029 3029 /*
3030 3030 * try_failover - internal version of nfs4_try_failover, called
3031 3031 * only by rfscall and aclcall. Determine if failover is warranted
3032 3032 * based on the clnt_stat and return the error number if it is.
3033 3033 */
3034 3034 static int
3035 3035 try_failover(enum clnt_stat rpc_status)
3036 3036 {
3037 3037 int err = 0;
3038 3038
3039 3039 if (rpc_status == RPC_SUCCESS)
3040 3040 return (0);
3041 3041
3042 3042 #ifdef DEBUG
3043 3043 if (rpc_status != 0 && nfs4_try_failover_any) {
3044 3044 err = ETIMEDOUT;
3045 3045 goto done;
3046 3046 }
3047 3047 #endif
3048 3048 /*
3049 3049 * The rpc status is used as an index into the table.
3050 3050 * If the rpc status is outside of the range of the
3051 3051 * table or if the rpc error numbers have been changed
3052 3052 * since the table was constructed, then print a warning
3053 3053 * (DEBUG only) and try failover anyway. Otherwise, just
3054 3054 * grab the resulting error number out of the table.
3055 3055 */
3056 3056 if (rpc_status < RPC_SUCCESS || rpc_status >=
3057 3057 sizeof (try_failover_table)/sizeof (try_failover_table[0]) ||
3058 3058 try_failover_table[rpc_status].cstat != rpc_status) {
3059 3059
3060 3060 err = ETIMEDOUT;
3061 3061 #ifdef DEBUG
3062 3062 cmn_err(CE_NOTE, "try_failover: unexpected rpc error %d",
3063 3063 rpc_status);
3064 3064 #endif
3065 3065 } else
3066 3066 err = try_failover_table[rpc_status].error;
3067 3067
3068 3068 done:
3069 3069 if (rpc_status)
3070 3070 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
3071 3071 "nfs4_try_failover: %strying failover on error %d",
3072 3072 err ? "" : "NOT ", rpc_status));
3073 3073
3074 3074 return (err);
3075 3075 }
3076 3076
3077 3077 void
3078 3078 nfs4_error_zinit(nfs4_error_t *ep)
3079 3079 {
3080 3080 ep->error = 0;
3081 3081 ep->stat = NFS4_OK;
3082 3082 ep->rpc_status = RPC_SUCCESS;
3083 3083 }
3084 3084
3085 3085 void
3086 3086 nfs4_error_init(nfs4_error_t *ep, int error)
3087 3087 {
3088 3088 ep->error = error;
3089 3089 ep->stat = NFS4_OK;
3090 3090 ep->rpc_status = RPC_SUCCESS;
3091 3091 }
3092 3092
3093 3093
3094 3094 #ifdef DEBUG
3095 3095
3096 3096 /*
3097 3097 * Return a 16-bit hash for filehandle, stateid, clientid, owner.
3098 3098 * use the same algorithm as for NFS v3.
3099 3099 *
3100 3100 */
3101 3101 int
3102 3102 hash16(void *p, int len)
3103 3103 {
3104 3104 int i, rem;
3105 3105 uint_t *wp;
3106 3106 uint_t key = 0;
3107 3107
3108 3108 /* protect against non word aligned */
3109 3109 if ((rem = len & 3) != 0)
3110 3110 len &= ~3;
3111 3111
3112 3112 for (i = 0, wp = (uint_t *)p; i < len; i += 4, wp++) {
3113 3113 key ^= (*wp >> 16) ^ *wp;
3114 3114 }
3115 3115
3116 3116 /* hash left-over bytes */
3117 3117 for (i = 0; i < rem; i++)
3118 3118 key ^= *((uchar_t *)p + i);
3119 3119
3120 3120 return (key & 0xffff);
3121 3121 }
3122 3122
3123 3123 /*
3124 3124 * rnode4info - return filehandle and path information for an rnode.
3125 3125 * XXX MT issues: uses a single static buffer, no locking of path.
3126 3126 */
3127 3127 char *
3128 3128 rnode4info(rnode4_t *rp)
3129 3129 {
3130 3130 static char buf[80];
3131 3131 nfs4_fhandle_t fhandle;
3132 3132 char *path;
3133 3133 char *type;
3134 3134
3135 3135 if (rp == NULL)
3136 3136 return ("null");
3137 3137 if (rp->r_flags & R4ISXATTR)
3138 3138 type = "attr";
3139 3139 else if (RTOV4(rp)->v_flag & V_XATTRDIR)
3140 3140 type = "attrdir";
3141 3141 else if (RTOV4(rp)->v_flag & VROOT)
3142 3142 type = "root";
3143 3143 else if (RTOV4(rp)->v_type == VDIR)
3144 3144 type = "dir";
3145 3145 else if (RTOV4(rp)->v_type == VREG)
3146 3146 type = "file";
3147 3147 else
3148 3148 type = "other";
3149 3149 sfh4_copyval(rp->r_fh, &fhandle);
3150 3150 path = fn_path(rp->r_svnode.sv_name);
3151 3151 (void) snprintf(buf, 80, "$%p[%s], type=%s, flags=%04X, FH=%04X\n",
3152 3152 (void *)rp, path, type, rp->r_flags,
3153 3153 hash16((void *)&fhandle.fh_buf, fhandle.fh_len));
3154 3154 kmem_free(path, strlen(path)+1);
3155 3155 return (buf);
3156 3156 }
3157 3157 #endif
↓ open down ↓ |
2649 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX