Print this page
We've seen a situation where the NFS4-client tries endlessly to return a
delegation. This happened after a server reboot. The server responds with
STALE_STATEID to the DELEGRETURN. This case is not directly handled by
nfs4_do_delegreturn. Instead, it triggers a recovery of the clientid, which
in turn triggers a reclaim of all open files for this server.
To find the open files, rtable4 is enumerated for each mount to the server.
This is supposed to reopen the file, so that the next DELEGRETURN will
succeed.
In this case, the rnode is not in the rtable-hash, so it never gets
recovered. This leads to an endless delegreturn-loop, iterated once per
second.
This fix tests on NFS4_STALE_STATEID in combination with the rnode not
being in the hash. In this case, it treats the error as fatal and just
discards the delegation.
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_callback.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_callback.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 27 /* All Rights Reserved */
28 28
29 29 #include <sys/param.h>
30 30 #include <sys/types.h>
31 31 #include <sys/systm.h>
32 32 #include <sys/cred.h>
33 33 #include <sys/vfs.h>
34 34 #include <sys/vnode.h>
35 35 #include <sys/pathname.h>
36 36 #include <sys/sysmacros.h>
37 37 #include <sys/kmem.h>
38 38 #include <sys/kstat.h>
39 39 #include <sys/mkdev.h>
40 40 #include <sys/mount.h>
41 41 #include <sys/statvfs.h>
42 42 #include <sys/errno.h>
43 43 #include <sys/debug.h>
44 44 #include <sys/cmn_err.h>
45 45 #include <sys/utsname.h>
46 46 #include <sys/bootconf.h>
47 47 #include <sys/modctl.h>
48 48 #include <sys/acl.h>
49 49 #include <sys/flock.h>
50 50 #include <sys/kstr.h>
51 51 #include <sys/stropts.h>
52 52 #include <sys/strsubr.h>
53 53 #include <sys/atomic.h>
54 54 #include <sys/disp.h>
55 55 #include <sys/policy.h>
56 56 #include <sys/list.h>
57 57 #include <sys/zone.h>
58 58
59 59 #include <rpc/types.h>
60 60 #include <rpc/auth.h>
61 61 #include <rpc/rpcsec_gss.h>
62 62 #include <rpc/clnt.h>
63 63 #include <rpc/xdr.h>
64 64
65 65 #include <nfs/nfs.h>
66 66 #include <nfs/nfs_clnt.h>
67 67 #include <nfs/mount.h>
68 68 #include <nfs/nfs_acl.h>
69 69
70 70 #include <fs/fs_subr.h>
71 71
72 72 #include <nfs/nfs4.h>
73 73 #include <nfs/rnode4.h>
74 74 #include <nfs/nfs4_clnt.h>
75 75 #include <nfs/nfssys.h>
76 76
77 77 #ifdef DEBUG
78 78 /*
79 79 * These are "special" state IDs and file handles that
80 80 * match any delegation state ID or file handled. This
81 81 * is for testing purposes only.
82 82 */
83 83
84 84 stateid4 nfs4_deleg_any = { 0x7FFFFFF0 };
85 85 char nfs4_deleg_fh[] = "\0377\0376\0375\0374";
86 86 nfs_fh4 nfs4_deleg_anyfh = { sizeof (nfs4_deleg_fh)-1, nfs4_deleg_fh };
87 87 nfsstat4 cb4_getattr_fail = NFS4_OK;
88 88 nfsstat4 cb4_recall_fail = NFS4_OK;
89 89
90 90 int nfs4_callback_debug;
91 91 int nfs4_recall_debug;
92 92 int nfs4_drat_debug;
93 93
94 94 #endif
95 95
96 96 #define CB_NOTE(x) NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, x))
97 97 #define CB_WARN(x) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x))
98 98 #define CB_WARN1(x, y) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x, y))
99 99
100 100 enum nfs4_delegreturn_policy nfs4_delegreturn_policy = INACTIVE;
101 101
102 102 static zone_key_t nfs4_callback_zone_key;
103 103
104 104 /*
105 105 * NFS4_MAPSIZE is the number of bytes we are willing to consume
106 106 * for the block allocation map when the server grants a NFS_LIMIT_BLOCK
107 107 * style delegation.
108 108 */
109 109
110 110 #define NFS4_MAPSIZE 8192
111 111 #define NFS4_MAPWORDS NFS4_MAPSIZE/sizeof (uint_t)
112 112 #define NbPW (NBBY*sizeof (uint_t))
113 113
114 114 static int nfs4_num_prognums = 1024;
115 115 static SVC_CALLOUT_TABLE nfs4_cb_sct;
116 116
117 117 struct nfs4_dnode {
118 118 list_node_t linkage;
119 119 rnode4_t *rnodep;
120 120 int flags; /* Flags for nfs4delegreturn_impl() */
121 121 };
122 122
123 123 static const struct nfs4_callback_stats nfs4_callback_stats_tmpl = {
124 124 { "delegations", KSTAT_DATA_UINT64 },
125 125 { "cb_getattr", KSTAT_DATA_UINT64 },
126 126 { "cb_recall", KSTAT_DATA_UINT64 },
127 127 { "cb_null", KSTAT_DATA_UINT64 },
128 128 { "cb_dispatch", KSTAT_DATA_UINT64 },
129 129 { "delegaccept_r", KSTAT_DATA_UINT64 },
130 130 { "delegaccept_rw", KSTAT_DATA_UINT64 },
131 131 { "delegreturn", KSTAT_DATA_UINT64 },
132 132 { "callbacks", KSTAT_DATA_UINT64 },
133 133 { "claim_cur", KSTAT_DATA_UINT64 },
134 134 { "claim_cur_ok", KSTAT_DATA_UINT64 },
135 135 { "recall_trunc", KSTAT_DATA_UINT64 },
136 136 { "recall_failed", KSTAT_DATA_UINT64 },
137 137 { "return_limit_write", KSTAT_DATA_UINT64 },
138 138 { "return_limit_addmap", KSTAT_DATA_UINT64 },
139 139 { "deleg_recover", KSTAT_DATA_UINT64 },
140 140 { "cb_illegal", KSTAT_DATA_UINT64 }
141 141 };
142 142
143 143 struct nfs4_cb_port {
144 144 list_node_t linkage; /* linkage into per-zone port list */
145 145 char netid[KNC_STRSIZE];
146 146 char uaddr[KNC_STRSIZE];
147 147 char protofmly[KNC_STRSIZE];
148 148 char proto[KNC_STRSIZE];
149 149 };
150 150
151 151 static int cb_getattr_bytes;
152 152
153 153 struct cb_recall_pass {
154 154 rnode4_t *rp;
155 155 int flags; /* Flags for nfs4delegreturn_impl() */
156 156 bool_t truncate;
157 157 };
158 158
159 159 static nfs4_open_stream_t *get_next_deleg_stream(rnode4_t *, int);
160 160 static void nfs4delegreturn_thread(struct cb_recall_pass *);
161 161 static int deleg_reopen(vnode_t *, bool_t *, struct nfs4_callback_globals *,
162 162 int);
163 163 static void nfs4_dlistadd(rnode4_t *, struct nfs4_callback_globals *, int);
164 164 static void nfs4_dlistclean_impl(struct nfs4_callback_globals *, int);
165 165 static int nfs4delegreturn_impl(rnode4_t *, int,
166 166 struct nfs4_callback_globals *);
167 167 static void nfs4delegreturn_cleanup_impl(rnode4_t *, nfs4_server_t *,
168 168 struct nfs4_callback_globals *);
169 169
170 170 static void
171 171 cb_getattr(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
172 172 struct compound_state *cs, struct nfs4_callback_globals *ncg)
173 173 {
174 174 CB_GETATTR4args *args = &argop->nfs_cb_argop4_u.opcbgetattr;
175 175 CB_GETATTR4res *resp = &resop->nfs_cb_resop4_u.opcbgetattr;
176 176 rnode4_t *rp;
177 177 vnode_t *vp;
178 178 bool_t found = FALSE;
179 179 struct nfs4_server *sp;
180 180 struct fattr4 *fap;
181 181 rpc_inline_t *fdata;
182 182 long mapcnt;
183 183 fattr4_change change;
184 184 fattr4_size size;
185 185 uint_t rflag;
186 186
187 187 ncg->nfs4_callback_stats.cb_getattr.value.ui64++;
188 188
189 189 #ifdef DEBUG
190 190 /*
191 191 * error injection hook: set cb_getattr_fail global to
192 192 * NFS4 pcol error to be returned
193 193 */
194 194 if (cb4_getattr_fail != NFS4_OK) {
195 195 *cs->statusp = resp->status = cb4_getattr_fail;
196 196 return;
197 197 }
198 198 #endif
199 199
200 200 resp->obj_attributes.attrmask = 0;
201 201
202 202 mutex_enter(&ncg->nfs4_cb_lock);
203 203 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
204 204 mutex_exit(&ncg->nfs4_cb_lock);
205 205
206 206 if (nfs4_server_vlock(sp, 0) == FALSE) {
207 207
208 208 CB_WARN("cb_getattr: cannot find server\n");
209 209
210 210 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
211 211 return;
212 212 }
213 213
214 214 /*
215 215 * In cb_compound, callback_ident was validated against rq_prog,
216 216 * but we couldn't verify that it was set to the value we provided
217 217 * at setclientid time (because we didn't have server struct yet).
218 218 * Now we have the server struct, but don't have callback_ident
219 219 * handy. So, validate server struct program number against req
220 220 * RPC's prog number. At this point, we know the RPC prog num
221 221 * is valid (else we wouldn't be here); however, we don't know
222 222 * that it was the prog number we supplied to this server at
223 223 * setclientid time. If the prog numbers aren't equivalent, then
224 224 * log the problem and fail the request because either cbserv
225 225 * and/or cbclient are confused. This will probably never happen.
226 226 */
227 227 if (sp->s_program != req->rq_prog) {
228 228 #ifdef DEBUG
229 229 zcmn_err(getzoneid(), CE_WARN,
230 230 "cb_getattr: wrong server program number srv=%d req=%d\n",
231 231 sp->s_program, req->rq_prog);
232 232 #else
233 233 zcmn_err(getzoneid(), CE_WARN,
234 234 "cb_getattr: wrong server program number\n");
235 235 #endif
236 236 mutex_exit(&sp->s_lock);
237 237 nfs4_server_rele(sp);
238 238 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
239 239 return;
240 240 }
241 241
242 242 /*
243 243 * Search the delegation list for a matching file handle;
244 244 * mutex on sp prevents the list from changing.
245 245 */
246 246
247 247 rp = list_head(&sp->s_deleg_list);
248 248 for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
249 249 nfs4_fhandle_t fhandle;
250 250
251 251 sfh4_copyval(rp->r_fh, &fhandle);
252 252
253 253 if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
254 254 bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
255 255 fhandle.fh_len) == 0)) {
256 256
257 257 found = TRUE;
258 258 break;
259 259 }
260 260 #ifdef DEBUG
261 261 if (nfs4_deleg_anyfh.nfs_fh4_len == args->fh.nfs_fh4_len &&
262 262 bcmp(nfs4_deleg_anyfh.nfs_fh4_val, args->fh.nfs_fh4_val,
263 263 args->fh.nfs_fh4_len) == 0) {
264 264
265 265 found = TRUE;
266 266 break;
267 267 }
268 268 #endif
269 269 }
270 270
271 271 /*
272 272 * VN_HOLD the vnode before releasing s_lock to guarantee
273 273 * we have a valid vnode reference.
274 274 */
275 275 if (found == TRUE) {
276 276 vp = RTOV4(rp);
277 277 VN_HOLD(vp);
278 278 }
279 279
280 280 mutex_exit(&sp->s_lock);
281 281 nfs4_server_rele(sp);
282 282
283 283 if (found == FALSE) {
284 284
285 285 CB_WARN("cb_getattr: bad fhandle\n");
286 286
287 287 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
288 288 return;
289 289 }
290 290
291 291 /*
292 292 * Figure out which attributes the server wants. We only
293 293 * offer FATTR4_CHANGE & FATTR4_SIZE; ignore the rest.
294 294 */
295 295 fdata = kmem_alloc(cb_getattr_bytes, KM_SLEEP);
296 296
297 297 /*
298 298 * Don't actually need to create XDR to encode these
299 299 * simple data structures.
300 300 * xdrmem_create(&xdr, fdata, cb_getattr_bytes, XDR_ENCODE);
301 301 */
302 302 fap = &resp->obj_attributes;
303 303
304 304 fap->attrmask = 0;
305 305 /* attrlist4_len starts at 0 and increases as attrs are processed */
306 306 fap->attrlist4 = (char *)fdata;
307 307 fap->attrlist4_len = 0;
308 308
309 309 /* don't supply attrs if request was zero */
310 310 if (args->attr_request != 0) {
311 311 if (args->attr_request & FATTR4_CHANGE_MASK) {
312 312 /*
313 313 * If the file is mmapped, then increment the change
314 314 * attribute and return it. This will guarantee that
315 315 * the server will perceive that the file has changed
316 316 * if there is any chance that the client application
317 317 * has changed it. Otherwise, just return the change
318 318 * attribute as it has been updated by nfs4write_deleg.
319 319 */
320 320
321 321 mutex_enter(&rp->r_statelock);
322 322 mapcnt = rp->r_mapcnt;
323 323 rflag = rp->r_flags;
324 324 mutex_exit(&rp->r_statelock);
325 325
326 326 mutex_enter(&rp->r_statev4_lock);
327 327 /*
328 328 * If object mapped, then always return new change.
329 329 * Otherwise, return change if object has dirty
330 330 * pages. If object doesn't have any dirty pages,
331 331 * then all changes have been pushed to server, so
332 332 * reset change to grant change.
333 333 */
334 334 if (mapcnt)
335 335 rp->r_deleg_change++;
336 336 else if (! (rflag & R4DIRTY))
337 337 rp->r_deleg_change = rp->r_deleg_change_grant;
338 338 change = rp->r_deleg_change;
339 339 mutex_exit(&rp->r_statev4_lock);
340 340
341 341 /*
342 342 * Use inline XDR code directly, we know that we
343 343 * going to a memory buffer and it has enough
344 344 * space so it cannot fail.
345 345 */
346 346 IXDR_PUT_U_HYPER(fdata, change);
347 347 fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
348 348 fap->attrmask |= FATTR4_CHANGE_MASK;
349 349 }
350 350
351 351 if (args->attr_request & FATTR4_SIZE_MASK) {
352 352 /*
353 353 * Use an atomic add of 0 to fetch a consistent view
354 354 * of r_size; this avoids having to take rw_lock
355 355 * which could cause a deadlock.
356 356 */
357 357 size = atomic_add_64_nv((uint64_t *)&rp->r_size, 0);
358 358
359 359 /*
360 360 * Use inline XDR code directly, we know that we
361 361 * going to a memory buffer and it has enough
362 362 * space so it cannot fail.
363 363 */
364 364 IXDR_PUT_U_HYPER(fdata, size);
365 365 fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT;
366 366 fap->attrmask |= FATTR4_SIZE_MASK;
367 367 }
368 368 }
369 369
370 370 VN_RELE(vp);
371 371
372 372 *cs->statusp = resp->status = NFS4_OK;
373 373 }
374 374
375 375 static void
376 376 cb_getattr_free(nfs_cb_resop4 *resop)
377 377 {
378 378 if (resop->nfs_cb_resop4_u.opcbgetattr.obj_attributes.attrlist4)
379 379 kmem_free(resop->nfs_cb_resop4_u.opcbgetattr.
380 380 obj_attributes.attrlist4, cb_getattr_bytes);
381 381 }
382 382
383 383 static void
384 384 cb_recall(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
385 385 struct compound_state *cs, struct nfs4_callback_globals *ncg)
386 386 {
387 387 CB_RECALL4args * args = &argop->nfs_cb_argop4_u.opcbrecall;
388 388 CB_RECALL4res *resp = &resop->nfs_cb_resop4_u.opcbrecall;
389 389 rnode4_t *rp;
390 390 vnode_t *vp;
391 391 struct nfs4_server *sp;
392 392 bool_t found = FALSE;
393 393
394 394 ncg->nfs4_callback_stats.cb_recall.value.ui64++;
395 395
396 396 ASSERT(req->rq_prog >= NFS4_CALLBACK);
397 397 ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
398 398
399 399 #ifdef DEBUG
400 400 /*
401 401 * error injection hook: set cb_recall_fail global to
402 402 * NFS4 pcol error to be returned
403 403 */
404 404 if (cb4_recall_fail != NFS4_OK) {
405 405 *cs->statusp = resp->status = cb4_recall_fail;
406 406 return;
407 407 }
408 408 #endif
409 409
410 410 mutex_enter(&ncg->nfs4_cb_lock);
411 411 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
412 412 mutex_exit(&ncg->nfs4_cb_lock);
413 413
414 414 if (nfs4_server_vlock(sp, 0) == FALSE) {
415 415
416 416 CB_WARN("cb_recall: cannot find server\n");
417 417
418 418 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
419 419 return;
420 420 }
421 421
422 422 /*
423 423 * Search the delegation list for a matching file handle
424 424 * AND stateid; mutex on sp prevents the list from changing.
425 425 */
426 426
427 427 rp = list_head(&sp->s_deleg_list);
428 428 for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) {
429 429 mutex_enter(&rp->r_statev4_lock);
430 430
431 431 /* check both state id and file handle! */
432 432
433 433 if ((bcmp(&rp->r_deleg_stateid, &args->stateid,
434 434 sizeof (stateid4)) == 0)) {
435 435 nfs4_fhandle_t fhandle;
436 436
437 437 sfh4_copyval(rp->r_fh, &fhandle);
438 438 if ((fhandle.fh_len == args->fh.nfs_fh4_len &&
439 439 bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val,
440 440 fhandle.fh_len) == 0)) {
441 441
442 442 found = TRUE;
443 443 break;
444 444 } else {
445 445 #ifdef DEBUG
446 446 CB_WARN("cb_recall: stateid OK, bad fh");
447 447 #endif
448 448 }
449 449 }
450 450 #ifdef DEBUG
451 451 if (bcmp(&args->stateid, &nfs4_deleg_any,
452 452 sizeof (stateid4)) == 0) {
453 453
454 454 found = TRUE;
455 455 break;
456 456 }
457 457 #endif
458 458 mutex_exit(&rp->r_statev4_lock);
459 459 }
460 460
461 461 /*
462 462 * VN_HOLD the vnode before releasing s_lock to guarantee
463 463 * we have a valid vnode reference. The async thread will
464 464 * release the hold when it's done.
465 465 */
466 466 if (found == TRUE) {
467 467 mutex_exit(&rp->r_statev4_lock);
468 468 vp = RTOV4(rp);
469 469 VN_HOLD(vp);
470 470 }
471 471 mutex_exit(&sp->s_lock);
472 472 nfs4_server_rele(sp);
473 473
474 474 if (found == FALSE) {
475 475
476 476 CB_WARN("cb_recall: bad stateid\n");
477 477
478 478 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
479 479 return;
480 480 }
481 481
482 482 /* Fire up a thread to do the delegreturn */
483 483 nfs4delegreturn_async(rp, NFS4_DR_RECALL|NFS4_DR_REOPEN,
484 484 args->truncate);
485 485
486 486 *cs->statusp = resp->status = 0;
487 487 }
488 488
489 489 /* ARGSUSED */
490 490 static void
491 491 cb_recall_free(nfs_cb_resop4 *resop)
492 492 {
493 493 /* nothing to do here, cb_recall doesn't kmem_alloc */
494 494 }
495 495
496 496 /*
497 497 * This function handles the CB_NULL proc call from an NFSv4 Server.
498 498 *
499 499 * We take note that the server has sent a CB_NULL for later processing
500 500 * in the recovery logic. It is noted so we may pause slightly after the
501 501 * setclientid and before reopening files. The pause is to allow the
502 502 * NFSv4 Server time to receive the CB_NULL reply and adjust any of
503 503 * its internal structures such that it has the opportunity to grant
504 504 * delegations to reopened files.
505 505 *
506 506 */
507 507
508 508 /* ARGSUSED */
509 509 static void
510 510 cb_null(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
511 511 struct nfs4_callback_globals *ncg)
512 512 {
513 513 struct nfs4_server *sp;
514 514
515 515 ncg->nfs4_callback_stats.cb_null.value.ui64++;
516 516
517 517 ASSERT(req->rq_prog >= NFS4_CALLBACK);
518 518 ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums);
519 519
520 520 mutex_enter(&ncg->nfs4_cb_lock);
521 521 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK];
522 522 mutex_exit(&ncg->nfs4_cb_lock);
523 523
524 524 if (nfs4_server_vlock(sp, 0) != FALSE) {
525 525 sp->s_flags |= N4S_CB_PINGED;
526 526 cv_broadcast(&sp->wait_cb_null);
527 527 mutex_exit(&sp->s_lock);
528 528 nfs4_server_rele(sp);
529 529 }
530 530 }
531 531
532 532 /*
533 533 * cb_illegal args: void
534 534 * res : status (NFS4ERR_OP_CB_ILLEGAL)
535 535 */
536 536 /* ARGSUSED */
537 537 static void
538 538 cb_illegal(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req,
539 539 struct compound_state *cs, struct nfs4_callback_globals *ncg)
540 540 {
541 541 CB_ILLEGAL4res *resp = &resop->nfs_cb_resop4_u.opcbillegal;
542 542
543 543 ncg->nfs4_callback_stats.cb_illegal.value.ui64++;
544 544 resop->resop = OP_CB_ILLEGAL;
545 545 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
546 546 }
547 547
548 548 static void
549 549 cb_compound(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
550 550 struct nfs4_callback_globals *ncg)
551 551 {
552 552 uint_t i;
553 553 struct compound_state cs;
554 554 nfs_cb_argop4 *argop;
555 555 nfs_cb_resop4 *resop, *new_res;
556 556 uint_t op;
557 557
558 558 bzero(&cs, sizeof (cs));
559 559 cs.statusp = &resp->status;
560 560 cs.cont = TRUE;
561 561
562 562 /*
563 563 * Form a reply tag by copying over the reqeuest tag.
564 564 */
565 565 resp->tag.utf8string_len = args->tag.utf8string_len;
566 566 resp->tag.utf8string_val = kmem_alloc(resp->tag.utf8string_len,
567 567 KM_SLEEP);
568 568 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
569 569 args->tag.utf8string_len);
570 570
571 571 /*
572 572 * XXX for now, minorversion should be zero
573 573 */
574 574 if (args->minorversion != CB4_MINORVERSION) {
575 575 resp->array_len = 0;
576 576 resp->array = NULL;
577 577 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
578 578 return;
579 579 }
580 580
581 581 #ifdef DEBUG
582 582 /*
583 583 * Verify callback_ident. It doesn't really matter if it's wrong
584 584 * because we don't really use callback_ident -- we use prog number
585 585 * of the RPC request instead. In this case, just print a DEBUG
586 586 * console message to reveal brokenness of cbclient (at bkoff/cthon).
587 587 */
588 588 if (args->callback_ident != req->rq_prog)
589 589 zcmn_err(getzoneid(), CE_WARN,
590 590 "cb_compound: cb_client using wrong "
591 591 "callback_ident(%d), should be %d",
592 592 args->callback_ident, req->rq_prog);
593 593 #endif
594 594
595 595 resp->array_len = args->array_len;
596 596 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_cb_resop4),
597 597 KM_SLEEP);
598 598
599 599 for (i = 0; i < args->array_len && cs.cont; i++) {
600 600
601 601 argop = &args->array[i];
602 602 resop = &resp->array[i];
603 603 resop->resop = argop->argop;
604 604 op = (uint_t)resop->resop;
605 605
606 606 switch (op) {
607 607
608 608 case OP_CB_GETATTR:
609 609
610 610 cb_getattr(argop, resop, req, &cs, ncg);
611 611 break;
612 612
613 613 case OP_CB_RECALL:
614 614
615 615 cb_recall(argop, resop, req, &cs, ncg);
616 616 break;
617 617
618 618 case OP_CB_ILLEGAL:
619 619
620 620 /* fall through */
621 621
622 622 default:
623 623 /*
624 624 * Handle OP_CB_ILLEGAL and any undefined opcode.
625 625 * Currently, the XDR code will return BADXDR
626 626 * if cb op doesn't decode to legal value, so
627 627 * it really only handles OP_CB_ILLEGAL.
628 628 */
629 629 op = OP_CB_ILLEGAL;
630 630 cb_illegal(argop, resop, req, &cs, ncg);
631 631 }
632 632
633 633 if (*cs.statusp != NFS4_OK)
634 634 cs.cont = FALSE;
635 635
636 636 /*
637 637 * If not at last op, and if we are to stop, then
638 638 * compact the results array.
639 639 */
640 640 if ((i + 1) < args->array_len && !cs.cont) {
641 641
642 642 new_res = kmem_alloc(
643 643 (i+1) * sizeof (nfs_cb_resop4), KM_SLEEP);
644 644 bcopy(resp->array,
645 645 new_res, (i+1) * sizeof (nfs_cb_resop4));
646 646 kmem_free(resp->array,
647 647 args->array_len * sizeof (nfs_cb_resop4));
648 648
649 649 resp->array_len = i + 1;
650 650 resp->array = new_res;
651 651 }
652 652 }
653 653
654 654 }
655 655
656 656 static void
657 657 cb_compound_free(CB_COMPOUND4res *resp)
658 658 {
659 659 uint_t i, op;
660 660 nfs_cb_resop4 *resop;
661 661
662 662 if (resp->tag.utf8string_val) {
663 663 UTF8STRING_FREE(resp->tag)
664 664 }
665 665
666 666 for (i = 0; i < resp->array_len; i++) {
667 667
668 668 resop = &resp->array[i];
669 669 op = (uint_t)resop->resop;
670 670
671 671 switch (op) {
672 672
673 673 case OP_CB_GETATTR:
674 674
675 675 cb_getattr_free(resop);
676 676 break;
677 677
678 678 case OP_CB_RECALL:
679 679
680 680 cb_recall_free(resop);
681 681 break;
682 682
683 683 default:
684 684 break;
685 685 }
686 686 }
687 687
688 688 if (resp->array != NULL) {
689 689 kmem_free(resp->array,
690 690 resp->array_len * sizeof (nfs_cb_resop4));
691 691 }
692 692 }
693 693
694 694 static void
695 695 cb_dispatch(struct svc_req *req, SVCXPRT *xprt)
696 696 {
697 697 CB_COMPOUND4args args;
698 698 CB_COMPOUND4res res;
699 699 struct nfs4_callback_globals *ncg;
700 700
701 701 bool_t (*xdr_args)(), (*xdr_res)();
702 702 void (*proc)(CB_COMPOUND4args *, CB_COMPOUND4res *, struct svc_req *,
703 703 struct nfs4_callback_globals *);
704 704 void (*freeproc)(CB_COMPOUND4res *);
705 705
706 706 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
707 707 ASSERT(ncg != NULL);
708 708
709 709 ncg->nfs4_callback_stats.cb_dispatch.value.ui64++;
710 710
711 711 switch (req->rq_proc) {
712 712 case CB_NULL:
713 713 xdr_args = xdr_void;
714 714 xdr_res = xdr_void;
715 715 proc = cb_null;
716 716 freeproc = NULL;
717 717 break;
718 718
719 719 case CB_COMPOUND:
720 720 xdr_args = xdr_CB_COMPOUND4args_clnt;
721 721 xdr_res = xdr_CB_COMPOUND4res;
722 722 proc = cb_compound;
723 723 freeproc = cb_compound_free;
724 724 break;
725 725
726 726 default:
727 727 CB_WARN("cb_dispatch: no proc\n");
728 728 svcerr_noproc(xprt);
729 729 return;
730 730 }
731 731
732 732 args.tag.utf8string_val = NULL;
733 733 args.array = NULL;
734 734
735 735 if (!SVC_GETARGS(xprt, xdr_args, (caddr_t)&args)) {
736 736
737 737 CB_WARN("cb_dispatch: cannot getargs\n");
738 738 svcerr_decode(xprt);
739 739 return;
740 740 }
741 741
742 742 (*proc)(&args, &res, req, ncg);
743 743
744 744 if (svc_sendreply(xprt, xdr_res, (caddr_t)&res) == FALSE) {
745 745
746 746 CB_WARN("cb_dispatch: bad sendreply\n");
747 747 svcerr_systemerr(xprt);
748 748 }
749 749
750 750 if (freeproc)
751 751 (*freeproc)(&res);
752 752
753 753 if (!SVC_FREEARGS(xprt, xdr_args, (caddr_t)&args)) {
754 754
755 755 CB_WARN("cb_dispatch: bad freeargs\n");
756 756 }
757 757 }
758 758
759 759 static rpcprog_t
760 760 nfs4_getnextprogram(struct nfs4_callback_globals *ncg)
761 761 {
762 762 int i, j;
763 763
764 764 j = ncg->nfs4_program_hint;
765 765 for (i = 0; i < nfs4_num_prognums; i++, j++) {
766 766
767 767 if (j >= nfs4_num_prognums)
768 768 j = 0;
769 769
770 770 if (ncg->nfs4prog2server[j] == NULL) {
771 771 ncg->nfs4_program_hint = j+1;
772 772 return (j+NFS4_CALLBACK);
773 773 }
774 774 }
775 775
776 776 return (0);
777 777 }
778 778
779 779 void
780 780 nfs4callback_destroy(nfs4_server_t *np)
781 781 {
782 782 struct nfs4_callback_globals *ncg;
783 783 int i;
784 784
785 785 if (np->s_program == 0)
786 786 return;
787 787
788 788 ncg = np->zone_globals;
789 789 i = np->s_program - NFS4_CALLBACK;
790 790
791 791 mutex_enter(&ncg->nfs4_cb_lock);
792 792
793 793 ASSERT(ncg->nfs4prog2server[i] == np);
794 794
795 795 ncg->nfs4prog2server[i] = NULL;
796 796
797 797 if (i < ncg->nfs4_program_hint)
798 798 ncg->nfs4_program_hint = i;
799 799
800 800 mutex_exit(&ncg->nfs4_cb_lock);
801 801 }
802 802
803 803 /*
804 804 * nfs4_setport - This function saves a netid and univeral address for
805 805 * the callback program. These values will be used during setclientid.
806 806 */
807 807 static void
808 808 nfs4_setport(char *netid, char *uaddr, char *protofmly, char *proto,
809 809 struct nfs4_callback_globals *ncg)
810 810 {
811 811 struct nfs4_cb_port *p;
812 812 bool_t found = FALSE;
813 813
814 814 ASSERT(MUTEX_HELD(&ncg->nfs4_cb_lock));
815 815
816 816 p = list_head(&ncg->nfs4_cb_ports);
817 817 for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
818 818 if (strcmp(p->netid, netid) == 0) {
819 819 found = TRUE;
820 820 break;
821 821 }
822 822 }
823 823 if (found == TRUE)
824 824 (void) strcpy(p->uaddr, uaddr);
825 825 else {
826 826 p = kmem_alloc(sizeof (*p), KM_SLEEP);
827 827
828 828 (void) strcpy(p->uaddr, uaddr);
829 829 (void) strcpy(p->netid, netid);
830 830 (void) strcpy(p->protofmly, protofmly);
831 831 (void) strcpy(p->proto, proto);
832 832 list_insert_head(&ncg->nfs4_cb_ports, p);
833 833 }
834 834 }
835 835
836 836 /*
837 837 * nfs4_cb_args - This function is used to construct the callback
838 838 * portion of the arguments needed for setclientid.
839 839 */
840 840
841 841 void
842 842 nfs4_cb_args(nfs4_server_t *np, struct knetconfig *knc, SETCLIENTID4args *args)
843 843 {
844 844 struct nfs4_cb_port *p;
845 845 bool_t found = FALSE;
846 846 rpcprog_t pgm;
847 847 struct nfs4_callback_globals *ncg = np->zone_globals;
848 848
849 849 /*
850 850 * This server structure may already have a program number
851 851 * assigned to it. This happens when the client has to
852 852 * re-issue SETCLIENTID. Just re-use the information.
853 853 */
854 854 if (np->s_program >= NFS4_CALLBACK &&
855 855 np->s_program < NFS4_CALLBACK + nfs4_num_prognums)
856 856 nfs4callback_destroy(np);
857 857
858 858 mutex_enter(&ncg->nfs4_cb_lock);
859 859
860 860 p = list_head(&ncg->nfs4_cb_ports);
861 861 for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) {
862 862 if (strcmp(p->protofmly, knc->knc_protofmly) == 0 &&
863 863 strcmp(p->proto, knc->knc_proto) == 0) {
864 864 found = TRUE;
865 865 break;
866 866 }
867 867 }
868 868
869 869 if (found == FALSE) {
870 870
871 871 NFS4_DEBUG(nfs4_callback_debug,
872 872 (CE_WARN, "nfs4_cb_args: could not find netid for %s/%s\n",
873 873 knc->knc_protofmly, knc->knc_proto));
874 874
875 875 args->callback.cb_program = 0;
876 876 args->callback.cb_location.r_netid = NULL;
877 877 args->callback.cb_location.r_addr = NULL;
878 878 args->callback_ident = 0;
879 879 mutex_exit(&ncg->nfs4_cb_lock);
880 880 return;
881 881 }
882 882
883 883 if ((pgm = nfs4_getnextprogram(ncg)) == 0) {
884 884 CB_WARN("nfs4_cb_args: out of program numbers\n");
885 885
886 886 args->callback.cb_program = 0;
887 887 args->callback.cb_location.r_netid = NULL;
888 888 args->callback.cb_location.r_addr = NULL;
889 889 args->callback_ident = 0;
890 890 mutex_exit(&ncg->nfs4_cb_lock);
891 891 return;
892 892 }
893 893
894 894 ncg->nfs4prog2server[pgm-NFS4_CALLBACK] = np;
895 895 args->callback.cb_program = pgm;
896 896 args->callback.cb_location.r_netid = p->netid;
897 897 args->callback.cb_location.r_addr = p->uaddr;
898 898 args->callback_ident = pgm;
899 899
900 900 np->s_program = pgm;
901 901
902 902 mutex_exit(&ncg->nfs4_cb_lock);
903 903 }
904 904
905 905 static int
906 906 nfs4_dquery(struct nfs4_svc_args *arg, model_t model)
907 907 {
908 908 file_t *fp;
909 909 vnode_t *vp;
910 910 rnode4_t *rp;
911 911 int error;
912 912 STRUCT_HANDLE(nfs4_svc_args, uap);
913 913
914 914 STRUCT_SET_HANDLE(uap, model, arg);
915 915
916 916 if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
917 917 return (EBADF);
918 918
919 919 vp = fp->f_vnode;
920 920
921 921 if (vp == NULL || vp->v_type != VREG ||
922 922 !vn_matchops(vp, nfs4_vnodeops)) {
923 923 releasef(STRUCT_FGET(uap, fd));
924 924 return (EBADF);
925 925 }
926 926
927 927 rp = VTOR4(vp);
928 928
929 929 /*
930 930 * I can't convince myself that we need locking here. The
931 931 * rnode cannot disappear and the value returned is instantly
932 932 * stale anway, so why bother?
933 933 */
934 934
935 935 error = suword32(STRUCT_FGETP(uap, netid), rp->r_deleg_type);
936 936 releasef(STRUCT_FGET(uap, fd));
937 937 return (error);
938 938 }
939 939
940 940
941 941 /*
942 942 * NFS4 client system call. This service does the
943 943 * necessary initialization for the callback program.
944 944 * This is fashioned after the server side interaction
945 945 * between nfsd and the kernel. On the client, the
946 946 * mount command forks and the child process does the
947 947 * necessary interaction with the kernel.
948 948 *
949 949 * uap->fd is the fd of an open transport provider
950 950 */
951 951 int
952 952 nfs4_svc(struct nfs4_svc_args *arg, model_t model)
953 953 {
954 954 file_t *fp;
955 955 int error;
956 956 int readsize;
957 957 char buf[KNC_STRSIZE], uaddr[KNC_STRSIZE];
958 958 char protofmly[KNC_STRSIZE], proto[KNC_STRSIZE];
959 959 size_t len;
960 960 STRUCT_HANDLE(nfs4_svc_args, uap);
961 961 struct netbuf addrmask;
962 962 int cmd;
963 963 SVCMASTERXPRT *cb_xprt;
964 964 struct nfs4_callback_globals *ncg;
965 965
966 966 #ifdef lint
967 967 model = model; /* STRUCT macros don't always refer to it */
968 968 #endif
969 969
970 970 STRUCT_SET_HANDLE(uap, model, arg);
971 971
972 972 if (STRUCT_FGET(uap, cmd) == NFS4_DQUERY)
973 973 return (nfs4_dquery(arg, model));
974 974
975 975 if (secpolicy_nfs(CRED()) != 0)
976 976 return (EPERM);
977 977
978 978 if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
979 979 return (EBADF);
980 980
981 981 /*
982 982 * Set read buffer size to rsize
983 983 * and add room for RPC headers.
984 984 */
985 985 readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
986 986 if (readsize < RPC_MAXDATASIZE)
987 987 readsize = RPC_MAXDATASIZE;
988 988
989 989 error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
990 990 KNC_STRSIZE, &len);
991 991 if (error) {
992 992 releasef(STRUCT_FGET(uap, fd));
993 993 return (error);
994 994 }
995 995
996 996 cmd = STRUCT_FGET(uap, cmd);
997 997
998 998 if (cmd & NFS4_KRPC_START) {
999 999 addrmask.len = STRUCT_FGET(uap, addrmask.len);
1000 1000 addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
1001 1001 addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
1002 1002 error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
1003 1003 addrmask.len);
1004 1004 if (error) {
1005 1005 releasef(STRUCT_FGET(uap, fd));
1006 1006 kmem_free(addrmask.buf, addrmask.maxlen);
1007 1007 return (error);
1008 1008 }
1009 1009 }
1010 1010 else
1011 1011 addrmask.buf = NULL;
1012 1012
1013 1013 error = copyinstr((const char *)STRUCT_FGETP(uap, addr), uaddr,
1014 1014 sizeof (uaddr), &len);
1015 1015 if (error) {
1016 1016 releasef(STRUCT_FGET(uap, fd));
1017 1017 if (addrmask.buf)
1018 1018 kmem_free(addrmask.buf, addrmask.maxlen);
1019 1019 return (error);
1020 1020 }
1021 1021
1022 1022 error = copyinstr((const char *)STRUCT_FGETP(uap, protofmly), protofmly,
1023 1023 sizeof (protofmly), &len);
1024 1024 if (error) {
1025 1025 releasef(STRUCT_FGET(uap, fd));
1026 1026 if (addrmask.buf)
1027 1027 kmem_free(addrmask.buf, addrmask.maxlen);
1028 1028 return (error);
1029 1029 }
1030 1030
1031 1031 error = copyinstr((const char *)STRUCT_FGETP(uap, proto), proto,
1032 1032 sizeof (proto), &len);
1033 1033 if (error) {
1034 1034 releasef(STRUCT_FGET(uap, fd));
1035 1035 if (addrmask.buf)
1036 1036 kmem_free(addrmask.buf, addrmask.maxlen);
1037 1037 return (error);
1038 1038 }
1039 1039
1040 1040 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1041 1041 ASSERT(ncg != NULL);
1042 1042
1043 1043 mutex_enter(&ncg->nfs4_cb_lock);
1044 1044 if (cmd & NFS4_SETPORT)
1045 1045 nfs4_setport(buf, uaddr, protofmly, proto, ncg);
1046 1046
1047 1047 if (cmd & NFS4_KRPC_START) {
1048 1048 error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &cb_xprt,
1049 1049 &nfs4_cb_sct, NULL, NFS_CB_SVCPOOL_ID, FALSE);
1050 1050 if (error) {
1051 1051 CB_WARN1("nfs4_svc: svc_tli_kcreate failed %d\n",
1052 1052 error);
1053 1053 kmem_free(addrmask.buf, addrmask.maxlen);
1054 1054 }
1055 1055 }
1056 1056
1057 1057 mutex_exit(&ncg->nfs4_cb_lock);
1058 1058 releasef(STRUCT_FGET(uap, fd));
1059 1059 return (error);
1060 1060 }
1061 1061
1062 1062 struct nfs4_callback_globals *
1063 1063 nfs4_get_callback_globals(void)
1064 1064 {
1065 1065 return (zone_getspecific(nfs4_callback_zone_key, nfs_zone()));
1066 1066 }
1067 1067
1068 1068 static void *
1069 1069 nfs4_callback_init_zone(zoneid_t zoneid)
1070 1070 {
1071 1071 kstat_t *nfs4_callback_kstat;
1072 1072 struct nfs4_callback_globals *ncg;
1073 1073
1074 1074 ncg = kmem_zalloc(sizeof (*ncg), KM_SLEEP);
1075 1075
1076 1076 ncg->nfs4prog2server = kmem_zalloc(nfs4_num_prognums *
1077 1077 sizeof (struct nfs4_server *), KM_SLEEP);
1078 1078
1079 1079 /* initialize the dlist */
1080 1080 mutex_init(&ncg->nfs4_dlist_lock, NULL, MUTEX_DEFAULT, NULL);
1081 1081 list_create(&ncg->nfs4_dlist, sizeof (struct nfs4_dnode),
1082 1082 offsetof(struct nfs4_dnode, linkage));
1083 1083
1084 1084 /* initialize cb_port list */
1085 1085 mutex_init(&ncg->nfs4_cb_lock, NULL, MUTEX_DEFAULT, NULL);
1086 1086 list_create(&ncg->nfs4_cb_ports, sizeof (struct nfs4_cb_port),
1087 1087 offsetof(struct nfs4_cb_port, linkage));
1088 1088
1089 1089 /* get our own copy of the kstats */
1090 1090 bcopy(&nfs4_callback_stats_tmpl, &ncg->nfs4_callback_stats,
1091 1091 sizeof (nfs4_callback_stats_tmpl));
1092 1092 /* register "nfs:0:nfs4_callback_stats" for this zone */
1093 1093 if ((nfs4_callback_kstat =
1094 1094 kstat_create_zone("nfs", 0, "nfs4_callback_stats", "misc",
1095 1095 KSTAT_TYPE_NAMED,
1096 1096 sizeof (ncg->nfs4_callback_stats) / sizeof (kstat_named_t),
1097 1097 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE,
1098 1098 zoneid)) != NULL) {
1099 1099 nfs4_callback_kstat->ks_data = &ncg->nfs4_callback_stats;
1100 1100 kstat_install(nfs4_callback_kstat);
1101 1101 }
1102 1102 return (ncg);
1103 1103 }
1104 1104
1105 1105 static void
1106 1106 nfs4_discard_delegations(struct nfs4_callback_globals *ncg)
1107 1107 {
1108 1108 nfs4_server_t *sp;
1109 1109 int i, num_removed;
1110 1110
1111 1111 /*
1112 1112 * It's OK here to just run through the registered "programs", as
1113 1113 * servers without programs won't have any delegations to handle.
1114 1114 */
1115 1115 for (i = 0; i < nfs4_num_prognums; i++) {
1116 1116 rnode4_t *rp;
1117 1117
1118 1118 mutex_enter(&ncg->nfs4_cb_lock);
1119 1119 sp = ncg->nfs4prog2server[i];
1120 1120 mutex_exit(&ncg->nfs4_cb_lock);
1121 1121
1122 1122 if (nfs4_server_vlock(sp, 1) == FALSE)
1123 1123 continue;
1124 1124 num_removed = 0;
1125 1125 while ((rp = list_head(&sp->s_deleg_list)) != NULL) {
1126 1126 mutex_enter(&rp->r_statev4_lock);
1127 1127 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1128 1128 /*
1129 1129 * We need to take matters into our own hands,
1130 1130 * as nfs4delegreturn_cleanup_impl() won't
1131 1131 * remove this from the list.
1132 1132 */
1133 1133 list_remove(&sp->s_deleg_list, rp);
1134 1134 mutex_exit(&rp->r_statev4_lock);
1135 1135 nfs4_dec_state_ref_count_nolock(sp,
1136 1136 VTOMI4(RTOV4(rp)));
1137 1137 num_removed++;
1138 1138 continue;
1139 1139 }
1140 1140 mutex_exit(&rp->r_statev4_lock);
1141 1141 VN_HOLD(RTOV4(rp));
1142 1142 mutex_exit(&sp->s_lock);
1143 1143 /*
1144 1144 * The following will remove the node from the list.
1145 1145 */
1146 1146 nfs4delegreturn_cleanup_impl(rp, sp, ncg);
1147 1147 VN_RELE(RTOV4(rp));
1148 1148 mutex_enter(&sp->s_lock);
1149 1149 }
1150 1150 mutex_exit(&sp->s_lock);
1151 1151 /* each removed list node reles a reference */
1152 1152 while (num_removed-- > 0)
1153 1153 nfs4_server_rele(sp);
1154 1154 /* remove our reference for nfs4_server_vlock */
1155 1155 nfs4_server_rele(sp);
1156 1156 }
1157 1157 }
1158 1158
1159 1159 /* ARGSUSED */
1160 1160 static void
1161 1161 nfs4_callback_shutdown_zone(zoneid_t zoneid, void *data)
1162 1162 {
1163 1163 struct nfs4_callback_globals *ncg = data;
1164 1164
1165 1165 /*
1166 1166 * Clean pending delegation return list.
1167 1167 */
1168 1168 nfs4_dlistclean_impl(ncg, NFS4_DR_DISCARD);
1169 1169
1170 1170 /*
1171 1171 * Discard all delegations.
1172 1172 */
1173 1173 nfs4_discard_delegations(ncg);
1174 1174 }
1175 1175
1176 1176 static void
1177 1177 nfs4_callback_fini_zone(zoneid_t zoneid, void *data)
1178 1178 {
1179 1179 struct nfs4_callback_globals *ncg = data;
1180 1180 struct nfs4_cb_port *p;
1181 1181 nfs4_server_t *sp, *next;
1182 1182 nfs4_server_t freelist;
1183 1183 int i;
1184 1184
1185 1185 kstat_delete_byname_zone("nfs", 0, "nfs4_callback_stats", zoneid);
1186 1186
1187 1187 /*
1188 1188 * Discard all delegations that may have crept in since we did the
1189 1189 * _shutdown.
1190 1190 */
1191 1191 nfs4_discard_delegations(ncg);
1192 1192 /*
1193 1193 * We're completely done with this zone and all associated
1194 1194 * nfs4_server_t's. Any remaining nfs4_server_ts should only have one
1195 1195 * more reference outstanding -- the reference we didn't release in
1196 1196 * nfs4_renew_lease_thread().
1197 1197 *
1198 1198 * Here we need to run through the global nfs4_server_lst as we need to
1199 1199 * deal with nfs4_server_ts without programs, as they also have threads
1200 1200 * created for them, and so have outstanding references that we need to
1201 1201 * release.
1202 1202 */
1203 1203 freelist.forw = &freelist;
1204 1204 freelist.back = &freelist;
1205 1205 mutex_enter(&nfs4_server_lst_lock);
1206 1206 sp = nfs4_server_lst.forw;
1207 1207 while (sp != &nfs4_server_lst) {
1208 1208 next = sp->forw;
1209 1209 if (sp->zoneid == zoneid) {
1210 1210 remque(sp);
1211 1211 insque(sp, &freelist);
1212 1212 }
1213 1213 sp = next;
1214 1214 }
1215 1215 mutex_exit(&nfs4_server_lst_lock);
1216 1216
1217 1217 sp = freelist.forw;
1218 1218 while (sp != &freelist) {
1219 1219 next = sp->forw;
1220 1220 nfs4_server_rele(sp); /* free the list's reference */
1221 1221 sp = next;
1222 1222 }
1223 1223
1224 1224 #ifdef DEBUG
1225 1225 for (i = 0; i < nfs4_num_prognums; i++) {
1226 1226 ASSERT(ncg->nfs4prog2server[i] == NULL);
1227 1227 }
1228 1228 #endif
1229 1229 kmem_free(ncg->nfs4prog2server, nfs4_num_prognums *
1230 1230 sizeof (struct nfs4_server *));
1231 1231
1232 1232 mutex_enter(&ncg->nfs4_cb_lock);
1233 1233 while ((p = list_head(&ncg->nfs4_cb_ports)) != NULL) {
1234 1234 list_remove(&ncg->nfs4_cb_ports, p);
1235 1235 kmem_free(p, sizeof (*p));
1236 1236 }
1237 1237 list_destroy(&ncg->nfs4_cb_ports);
1238 1238 mutex_destroy(&ncg->nfs4_cb_lock);
1239 1239 list_destroy(&ncg->nfs4_dlist);
1240 1240 mutex_destroy(&ncg->nfs4_dlist_lock);
1241 1241 kmem_free(ncg, sizeof (*ncg));
1242 1242 }
1243 1243
1244 1244 void
1245 1245 nfs4_callback_init(void)
1246 1246 {
1247 1247 int i;
1248 1248 SVC_CALLOUT *nfs4_cb_sc;
1249 1249
1250 1250 /* initialize the callback table */
1251 1251 nfs4_cb_sc = kmem_alloc(nfs4_num_prognums *
1252 1252 sizeof (SVC_CALLOUT), KM_SLEEP);
1253 1253
1254 1254 for (i = 0; i < nfs4_num_prognums; i++) {
1255 1255 nfs4_cb_sc[i].sc_prog = NFS4_CALLBACK+i;
1256 1256 nfs4_cb_sc[i].sc_versmin = NFS_CB;
1257 1257 nfs4_cb_sc[i].sc_versmax = NFS_CB;
1258 1258 nfs4_cb_sc[i].sc_dispatch = cb_dispatch;
1259 1259 }
1260 1260
1261 1261 nfs4_cb_sct.sct_size = nfs4_num_prognums;
1262 1262 nfs4_cb_sct.sct_free = FALSE;
1263 1263 nfs4_cb_sct.sct_sc = nfs4_cb_sc;
1264 1264
1265 1265 /*
1266 1266 * Compute max bytes required for dyamically allocated parts
1267 1267 * of cb_getattr reply. Only size and change are supported now.
1268 1268 * If CB_GETATTR is changed to reply with additional attrs,
1269 1269 * additional sizes must be added below.
1270 1270 *
1271 1271 * fattr4_change + fattr4_size == uint64_t + uint64_t
1272 1272 */
1273 1273 cb_getattr_bytes = 2 * BYTES_PER_XDR_UNIT + 2 * BYTES_PER_XDR_UNIT;
1274 1274
1275 1275 zone_key_create(&nfs4_callback_zone_key, nfs4_callback_init_zone,
1276 1276 nfs4_callback_shutdown_zone, nfs4_callback_fini_zone);
1277 1277 }
1278 1278
1279 1279 void
1280 1280 nfs4_callback_fini(void)
1281 1281 {
1282 1282 }
1283 1283
1284 1284 /*
1285 1285 * NB: This function can be called from the *wrong* zone (ie, the zone that
1286 1286 * 'rp' belongs to and the caller's zone may not be the same). This can happen
1287 1287 * if the zone is going away and we get called from nfs4_async_inactive(). In
1288 1288 * this case the globals will be NULL and we won't update the counters, which
1289 1289 * doesn't matter as the zone is going away anyhow.
1290 1290 */
1291 1291 static void
1292 1292 nfs4delegreturn_cleanup_impl(rnode4_t *rp, nfs4_server_t *np,
1293 1293 struct nfs4_callback_globals *ncg)
1294 1294 {
1295 1295 mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1296 1296 boolean_t need_rele = B_FALSE;
1297 1297
1298 1298 /*
1299 1299 * Caller must be holding mi_recovlock in read mode
1300 1300 * to call here. This is provided by start_op.
1301 1301 * Delegation management requires to grab s_lock
1302 1302 * first and then r_statev4_lock.
1303 1303 */
1304 1304
1305 1305 if (np == NULL) {
1306 1306 np = find_nfs4_server_all(mi, 1);
1307 1307 if (np == NULL)
1308 1308 return;
1309 1309 need_rele = B_TRUE;
1310 1310 } else {
1311 1311 mutex_enter(&np->s_lock);
1312 1312 }
1313 1313
1314 1314 mutex_enter(&rp->r_statev4_lock);
1315 1315
1316 1316 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1317 1317 mutex_exit(&rp->r_statev4_lock);
1318 1318 mutex_exit(&np->s_lock);
1319 1319 if (need_rele)
1320 1320 nfs4_server_rele(np);
1321 1321 return;
1322 1322 }
1323 1323
1324 1324 /*
1325 1325 * Free the cred originally held when
1326 1326 * the delegation was granted. Caller must
1327 1327 * hold this cred if it wants to use it after
1328 1328 * this call.
1329 1329 */
1330 1330 crfree(rp->r_deleg_cred);
1331 1331 rp->r_deleg_cred = NULL;
1332 1332 rp->r_deleg_type = OPEN_DELEGATE_NONE;
1333 1333 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1334 1334 rp->r_deleg_needs_recall = FALSE;
1335 1335 rp->r_deleg_return_pending = FALSE;
1336 1336
1337 1337 /*
1338 1338 * Remove the rnode from the server's list and
1339 1339 * update the ref counts.
1340 1340 */
1341 1341 list_remove(&np->s_deleg_list, rp);
1342 1342 mutex_exit(&rp->r_statev4_lock);
1343 1343 nfs4_dec_state_ref_count_nolock(np, mi);
1344 1344 mutex_exit(&np->s_lock);
1345 1345 /* removed list node removes a reference */
1346 1346 nfs4_server_rele(np);
1347 1347 if (need_rele)
1348 1348 nfs4_server_rele(np);
1349 1349 if (ncg != NULL)
1350 1350 ncg->nfs4_callback_stats.delegations.value.ui64--;
1351 1351 }
1352 1352
1353 1353 void
1354 1354 nfs4delegreturn_cleanup(rnode4_t *rp, nfs4_server_t *np)
1355 1355 {
1356 1356 struct nfs4_callback_globals *ncg;
1357 1357
1358 1358 if (np != NULL) {
1359 1359 ncg = np->zone_globals;
1360 1360 } else if (nfs_zone() == VTOMI4(RTOV4(rp))->mi_zone) {
1361 1361 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1362 1362 ASSERT(ncg != NULL);
1363 1363 } else {
1364 1364 /*
1365 1365 * Request coming from the wrong zone.
1366 1366 */
1367 1367 ASSERT(getzoneid() == GLOBAL_ZONEID);
1368 1368 ncg = NULL;
1369 1369 }
1370 1370
1371 1371 nfs4delegreturn_cleanup_impl(rp, np, ncg);
1372 1372 }
1373 1373
1374 1374 static void
1375 1375 nfs4delegreturn_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp,
1376 1376 cred_t *cr, vnode_t *vp)
1377 1377 {
1378 1378 if (error != ETIMEDOUT && error != EINTR &&
1379 1379 !NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) {
1380 1380 lost_rqstp->lr_op = 0;
1381 1381 return;
1382 1382 }
1383 1383
1384 1384 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
1385 1385 "nfs4close_save_lost_rqst: error %d", error));
1386 1386
1387 1387 lost_rqstp->lr_op = OP_DELEGRETURN;
1388 1388 /*
1389 1389 * The vp is held and rele'd via the recovery code.
1390 1390 * See nfs4_save_lost_rqst.
1391 1391 */
1392 1392 lost_rqstp->lr_vp = vp;
1393 1393 lost_rqstp->lr_dvp = NULL;
1394 1394 lost_rqstp->lr_oop = NULL;
1395 1395 lost_rqstp->lr_osp = NULL;
1396 1396 lost_rqstp->lr_lop = NULL;
1397 1397 lost_rqstp->lr_cr = cr;
1398 1398 lost_rqstp->lr_flk = NULL;
1399 1399 lost_rqstp->lr_putfirst = FALSE;
1400 1400 }
1401 1401
1402 1402 static void
1403 1403 nfs4delegreturn_otw(rnode4_t *rp, cred_t *cr, nfs4_error_t *ep)
1404 1404 {
1405 1405 COMPOUND4args_clnt args;
1406 1406 COMPOUND4res_clnt res;
1407 1407 nfs_argop4 argops[3];
1408 1408 nfs4_ga_res_t *garp = NULL;
1409 1409 hrtime_t t;
1410 1410 int numops;
1411 1411 int doqueue = 1;
1412 1412
1413 1413 args.ctag = TAG_DELEGRETURN;
1414 1414
1415 1415 numops = 3; /* PUTFH, GETATTR, DELEGRETURN */
1416 1416
1417 1417 args.array = argops;
1418 1418 args.array_len = numops;
1419 1419
1420 1420 argops[0].argop = OP_CPUTFH;
1421 1421 argops[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
1422 1422
1423 1423 argops[1].argop = OP_GETATTR;
1424 1424 argops[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
1425 1425 argops[1].nfs_argop4_u.opgetattr.mi = VTOMI4(RTOV4(rp));
1426 1426
1427 1427 argops[2].argop = OP_DELEGRETURN;
1428 1428 argops[2].nfs_argop4_u.opdelegreturn.deleg_stateid =
1429 1429 rp->r_deleg_stateid;
1430 1430
1431 1431 t = gethrtime();
1432 1432 rfs4call(VTOMI4(RTOV4(rp)), &args, &res, cr, &doqueue, 0, ep);
1433 1433
1434 1434 if (ep->error)
1435 1435 return;
1436 1436
1437 1437 if (res.status == NFS4_OK) {
1438 1438 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res;
1439 1439 nfs4_attr_cache(RTOV4(rp), garp, t, cr, TRUE, NULL);
1440 1440
1441 1441 }
1442 1442 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1443 1443 }
1444 1444
1445 1445 int
1446 1446 nfs4_do_delegreturn(rnode4_t *rp, int flags, cred_t *cr,
1447 1447 struct nfs4_callback_globals *ncg)
1448 1448 {
1449 1449 vnode_t *vp = RTOV4(rp);
1450 1450 mntinfo4_t *mi = VTOMI4(vp);
1451 1451 nfs4_lost_rqst_t lost_rqst;
1452 1452 nfs4_recov_state_t recov_state;
1453 1453 bool_t needrecov = FALSE, recovonly, done = FALSE;
1454 1454 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1455 1455
1456 1456 ncg->nfs4_callback_stats.delegreturn.value.ui64++;
1457 1457
1458 1458 while (!done) {
1459 1459 e.error = nfs4_start_fop(mi, vp, NULL, OH_DELEGRETURN,
1460 1460 &recov_state, &recovonly);
1461 1461
1462 1462 if (e.error) {
1463 1463 if (flags & NFS4_DR_FORCE) {
1464 1464 (void) nfs_rw_enter_sig(&mi->mi_recovlock,
1465 1465 RW_READER, 0);
1466 1466 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1467 1467 nfs_rw_exit(&mi->mi_recovlock);
1468 1468 }
1469 1469 break;
1470 1470 }
1471 1471
1472 1472 /*
1473 1473 * Check to see if the delegation has already been
1474 1474 * returned by the recovery thread. The state of
1475 1475 * the delegation cannot change at this point due
1476 1476 * to start_fop and the r_deleg_recall_lock.
1477 1477 */
1478 1478 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1479 1479 e.error = 0;
1480 1480 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1481 1481 break;
1482 1482 }
1483 1483
1484 1484 if (recovonly) {
1485 1485 /*
1486 1486 * Delegation will be returned via the
1487 1487 * recovery framework. Build a lost request
1488 1488 * structure, start recovery and get out.
1489 1489 */
1490 1490 nfs4_error_init(&e, EINTR);
1491 1491 nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1492 1492 cr, vp);
1493 1493 (void) nfs4_start_recovery(&e, mi, vp,
1494 1494 NULL, &rp->r_deleg_stateid,
1495 1495 lost_rqst.lr_op == OP_DELEGRETURN ?
1496 1496 &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1497 1497 NULL, NULL);
1498 1498 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1499 1499 break;
↓ open down ↓ |
1499 lines elided |
↑ open up ↑ |
1500 1500 }
1501 1501
1502 1502 nfs4delegreturn_otw(rp, cr, &e);
1503 1503
1504 1504 /*
1505 1505 * Ignore some errors on delegreturn; no point in marking
1506 1506 * the file dead on a state destroying operation.
1507 1507 */
1508 1508 if (e.error == 0 && (nfs4_recov_marks_dead(e.stat) ||
1509 1509 e.stat == NFS4ERR_BADHANDLE ||
1510 - e.stat == NFS4ERR_STALE))
1510 + e.stat == NFS4ERR_STALE ||
1511 + (e.stat == NFS4ERR_STALE_STATEID &&
1512 + !(rp->r_flags & R4HASHED))))
1511 1513 needrecov = FALSE;
1512 1514 else
1513 1515 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1514 1516
1515 1517 if (needrecov) {
1516 1518 nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst,
1517 1519 cr, vp);
1518 1520 (void) nfs4_start_recovery(&e, mi, vp,
1519 1521 NULL, &rp->r_deleg_stateid,
1520 1522 lost_rqst.lr_op == OP_DELEGRETURN ?
1521 1523 &lost_rqst : NULL, OP_DELEGRETURN, NULL,
1522 1524 NULL, NULL);
1523 1525 } else {
1524 1526 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1525 1527 done = TRUE;
1526 1528 }
1527 1529
1528 1530 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1529 1531 }
1530 1532 return (e.error);
1531 1533 }
1532 1534
1533 1535 /*
1534 1536 * nfs4_resend_delegreturn - used to drive the delegreturn
1535 1537 * operation via the recovery thread.
1536 1538 */
1537 1539 void
1538 1540 nfs4_resend_delegreturn(nfs4_lost_rqst_t *lorp, nfs4_error_t *ep,
1539 1541 nfs4_server_t *np)
1540 1542 {
1541 1543 rnode4_t *rp = VTOR4(lorp->lr_vp);
1542 1544
1543 1545 /* If the file failed recovery, just quit. */
1544 1546 mutex_enter(&rp->r_statelock);
1545 1547 if (rp->r_flags & R4RECOVERR) {
1546 1548 ep->error = EIO;
1547 1549 }
1548 1550 mutex_exit(&rp->r_statelock);
1549 1551
1550 1552 if (!ep->error)
1551 1553 nfs4delegreturn_otw(rp, lorp->lr_cr, ep);
1552 1554
1553 1555 /*
1554 1556 * If recovery is now needed, then return the error
1555 1557 * and status and let the recovery thread handle it,
1556 1558 * including re-driving another delegreturn. Otherwise,
1557 1559 * just give up and clean up the delegation.
1558 1560 */
1559 1561 if (nfs4_needs_recovery(ep, TRUE, lorp->lr_vp->v_vfsp))
1560 1562 return;
1561 1563
1562 1564 if (rp->r_deleg_type != OPEN_DELEGATE_NONE)
1563 1565 nfs4delegreturn_cleanup(rp, np);
1564 1566
1565 1567 nfs4_error_zinit(ep);
1566 1568 }
1567 1569
1568 1570 /*
1569 1571 * nfs4delegreturn - general function to return a delegation.
1570 1572 *
1571 1573 * NFS4_DR_FORCE - return the delegation even if start_op fails
1572 1574 * NFS4_DR_PUSH - push modified data back to the server via VOP_PUTPAGE
1573 1575 * NFS4_DR_DISCARD - discard the delegation w/o delegreturn
1574 1576 * NFS4_DR_DID_OP - calling function already did nfs4_start_op
1575 1577 * NFS4_DR_RECALL - delegreturned initiated via CB_RECALL
1576 1578 * NFS4_DR_REOPEN - do file reopens, if applicable
1577 1579 */
1578 1580 static int
1579 1581 nfs4delegreturn_impl(rnode4_t *rp, int flags, struct nfs4_callback_globals *ncg)
1580 1582 {
1581 1583 int error = 0;
1582 1584 cred_t *cr = NULL;
1583 1585 vnode_t *vp;
1584 1586 bool_t needrecov = FALSE;
1585 1587 bool_t rw_entered = FALSE;
1586 1588 bool_t do_reopen;
1587 1589
1588 1590 vp = RTOV4(rp);
1589 1591
1590 1592 /*
1591 1593 * If NFS4_DR_DISCARD is set by itself, take a short-cut and
1592 1594 * discard without doing an otw DELEGRETURN. This may only be used
1593 1595 * by the recovery thread because it bypasses the synchronization
1594 1596 * with r_deleg_recall_lock and mi->mi_recovlock.
1595 1597 */
1596 1598 if (flags == NFS4_DR_DISCARD) {
1597 1599 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1598 1600 return (0);
1599 1601 }
1600 1602
1601 1603 if (flags & NFS4_DR_DID_OP) {
1602 1604 /*
1603 1605 * Caller had already done start_op, which means the
1604 1606 * r_deleg_recall_lock is already held in READ mode
1605 1607 * so we cannot take it in write mode. Return the
1606 1608 * delegation asynchronously.
1607 1609 *
1608 1610 * Remove the NFS4_DR_DID_OP flag so we don't
1609 1611 * get stuck looping through here.
1610 1612 */
1611 1613 VN_HOLD(vp);
1612 1614 nfs4delegreturn_async(rp, (flags & ~NFS4_DR_DID_OP), FALSE);
1613 1615 return (0);
1614 1616 }
1615 1617
1616 1618 /*
1617 1619 * Verify we still have a delegation and crhold the credential.
1618 1620 */
1619 1621 mutex_enter(&rp->r_statev4_lock);
1620 1622 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
1621 1623 mutex_exit(&rp->r_statev4_lock);
1622 1624 goto out;
1623 1625 }
1624 1626 cr = rp->r_deleg_cred;
1625 1627 ASSERT(cr != NULL);
1626 1628 crhold(cr);
1627 1629 mutex_exit(&rp->r_statev4_lock);
1628 1630
1629 1631 /*
1630 1632 * Push the modified data back to the server synchronously
1631 1633 * before doing DELEGRETURN.
1632 1634 */
1633 1635 if (flags & NFS4_DR_PUSH)
1634 1636 (void) VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
1635 1637
1636 1638 /*
1637 1639 * Take r_deleg_recall_lock in WRITE mode, this will prevent
1638 1640 * nfs4_is_otw_open_necessary from trying to use the delegation
1639 1641 * while the DELEGRETURN is in progress.
1640 1642 */
1641 1643 (void) nfs_rw_enter_sig(&rp->r_deleg_recall_lock, RW_WRITER, FALSE);
1642 1644
1643 1645 rw_entered = TRUE;
1644 1646
1645 1647 if (rp->r_deleg_type == OPEN_DELEGATE_NONE)
1646 1648 goto out;
1647 1649
1648 1650 if (flags & NFS4_DR_REOPEN) {
1649 1651 /*
1650 1652 * If R4RECOVERRP is already set, then skip re-opening
1651 1653 * the delegation open streams and go straight to doing
1652 1654 * delegreturn. (XXX if the file has failed recovery, then the
1653 1655 * delegreturn attempt is likely to be futile.)
1654 1656 */
1655 1657 mutex_enter(&rp->r_statelock);
1656 1658 do_reopen = !(rp->r_flags & R4RECOVERRP);
1657 1659 mutex_exit(&rp->r_statelock);
1658 1660
1659 1661 if (do_reopen) {
1660 1662 error = deleg_reopen(vp, &needrecov, ncg, flags);
1661 1663 if (error != 0) {
1662 1664 if ((flags & (NFS4_DR_FORCE | NFS4_DR_RECALL))
1663 1665 == 0)
1664 1666 goto out;
1665 1667 } else if (needrecov) {
1666 1668 if ((flags & NFS4_DR_FORCE) == 0)
1667 1669 goto out;
1668 1670 }
1669 1671 }
1670 1672 }
1671 1673
1672 1674 if (flags & NFS4_DR_DISCARD) {
1673 1675 mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1674 1676
1675 1677 mutex_enter(&rp->r_statelock);
1676 1678 /*
1677 1679 * deleg_return_pending is cleared inside of delegation_accept
1678 1680 * when a delegation is accepted. if this flag has been
1679 1681 * cleared, then a new delegation has overwritten the one we
1680 1682 * were about to throw away.
1681 1683 */
1682 1684 if (!rp->r_deleg_return_pending) {
1683 1685 mutex_exit(&rp->r_statelock);
1684 1686 goto out;
1685 1687 }
1686 1688 mutex_exit(&rp->r_statelock);
1687 1689 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE);
1688 1690 nfs4delegreturn_cleanup_impl(rp, NULL, ncg);
1689 1691 nfs_rw_exit(&mi->mi_recovlock);
1690 1692 } else {
1691 1693 error = nfs4_do_delegreturn(rp, flags, cr, ncg);
1692 1694 }
1693 1695
1694 1696 out:
1695 1697 if (cr)
1696 1698 crfree(cr);
1697 1699 if (rw_entered)
1698 1700 nfs_rw_exit(&rp->r_deleg_recall_lock);
1699 1701 return (error);
1700 1702 }
1701 1703
1702 1704 int
1703 1705 nfs4delegreturn(rnode4_t *rp, int flags)
1704 1706 {
1705 1707 struct nfs4_callback_globals *ncg;
1706 1708
1707 1709 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
1708 1710 ASSERT(ncg != NULL);
1709 1711
1710 1712 return (nfs4delegreturn_impl(rp, flags, ncg));
1711 1713 }
1712 1714
1713 1715 void
1714 1716 nfs4delegreturn_async(rnode4_t *rp, int flags, bool_t trunc)
1715 1717 {
1716 1718 struct cb_recall_pass *pp;
1717 1719
1718 1720 pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
1719 1721 pp->rp = rp;
1720 1722 pp->flags = flags;
1721 1723 pp->truncate = trunc;
1722 1724
1723 1725 /*
1724 1726 * Fire up a thread to do the actual delegreturn
1725 1727 * Caller must guarantee that the rnode doesn't
1726 1728 * vanish (by calling VN_HOLD).
1727 1729 */
1728 1730
1729 1731 (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
1730 1732 minclsyspri);
1731 1733 }
1732 1734
1733 1735 static void
1734 1736 delegreturn_all_thread(rpcprog_t *pp)
1735 1737 {
1736 1738 nfs4_server_t *np;
1737 1739 bool_t found = FALSE;
1738 1740 rpcprog_t prog;
1739 1741 rnode4_t *rp;
1740 1742 vnode_t *vp;
1741 1743 zoneid_t zoneid = getzoneid();
1742 1744 struct nfs4_callback_globals *ncg;
1743 1745
1744 1746 NFS4_DEBUG(nfs4_drat_debug,
1745 1747 (CE_NOTE, "delereturn_all_thread: prog %d\n", *pp));
1746 1748
1747 1749 prog = *pp;
1748 1750 kmem_free(pp, sizeof (*pp));
1749 1751 pp = NULL;
1750 1752
1751 1753 mutex_enter(&nfs4_server_lst_lock);
1752 1754 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
1753 1755 if (np->zoneid == zoneid && np->s_program == prog) {
1754 1756 mutex_enter(&np->s_lock);
1755 1757 found = TRUE;
1756 1758 break;
1757 1759 }
1758 1760 }
1759 1761 mutex_exit(&nfs4_server_lst_lock);
1760 1762
1761 1763 /*
1762 1764 * It's possible that the nfs4_server which was using this
1763 1765 * program number has vanished since this thread is async.
1764 1766 * If so, just return. Your work here is finished, my friend.
1765 1767 */
1766 1768 if (!found)
1767 1769 goto out;
1768 1770
1769 1771 ncg = np->zone_globals;
1770 1772 while ((rp = list_head(&np->s_deleg_list)) != NULL) {
1771 1773 vp = RTOV4(rp);
1772 1774 VN_HOLD(vp);
1773 1775 mutex_exit(&np->s_lock);
1774 1776 (void) nfs4delegreturn_impl(rp, NFS4_DR_PUSH|NFS4_DR_REOPEN,
1775 1777 ncg);
1776 1778 VN_RELE(vp);
1777 1779
1778 1780 /* retake the s_lock for next trip through the loop */
1779 1781 mutex_enter(&np->s_lock);
1780 1782 }
1781 1783 mutex_exit(&np->s_lock);
1782 1784 out:
1783 1785 NFS4_DEBUG(nfs4_drat_debug,
1784 1786 (CE_NOTE, "delereturn_all_thread: complete\n"));
1785 1787 zthread_exit();
1786 1788 }
1787 1789
1788 1790 void
1789 1791 nfs4_delegreturn_all(nfs4_server_t *sp)
1790 1792 {
1791 1793 rpcprog_t pro, *pp;
1792 1794
1793 1795 mutex_enter(&sp->s_lock);
1794 1796
1795 1797 /* Check to see if the delegation list is empty */
1796 1798
1797 1799 if (list_head(&sp->s_deleg_list) == NULL) {
1798 1800 mutex_exit(&sp->s_lock);
1799 1801 return;
1800 1802 }
1801 1803 /*
1802 1804 * Grab the program number; the async thread will use this
1803 1805 * to find the nfs4_server.
1804 1806 */
1805 1807 pro = sp->s_program;
1806 1808 mutex_exit(&sp->s_lock);
1807 1809 pp = kmem_alloc(sizeof (rpcprog_t), KM_SLEEP);
1808 1810 *pp = pro;
1809 1811 (void) zthread_create(NULL, 0, delegreturn_all_thread, pp, 0,
1810 1812 minclsyspri);
1811 1813 }
1812 1814
1813 1815
1814 1816 /*
1815 1817 * Discard any delegations
1816 1818 *
1817 1819 * Iterate over the servers s_deleg_list and
1818 1820 * for matching mount-point rnodes discard
1819 1821 * the delegation.
1820 1822 */
1821 1823 void
1822 1824 nfs4_deleg_discard(mntinfo4_t *mi, nfs4_server_t *sp)
1823 1825 {
1824 1826 rnode4_t *rp, *next;
1825 1827 mntinfo4_t *r_mi;
1826 1828 struct nfs4_callback_globals *ncg;
1827 1829
1828 1830 ASSERT(mutex_owned(&sp->s_lock));
1829 1831 ncg = sp->zone_globals;
1830 1832
1831 1833 for (rp = list_head(&sp->s_deleg_list); rp != NULL; rp = next) {
1832 1834 r_mi = VTOMI4(RTOV4(rp));
1833 1835 next = list_next(&sp->s_deleg_list, rp);
1834 1836
1835 1837 if (r_mi != mi) {
1836 1838 /*
1837 1839 * Skip if this rnode is in not on the
1838 1840 * same mount-point
1839 1841 */
1840 1842 continue;
1841 1843 }
1842 1844
1843 1845 ASSERT(rp->r_deleg_type == OPEN_DELEGATE_READ);
1844 1846
1845 1847 #ifdef DEBUG
1846 1848 if (nfs4_client_recov_debug) {
1847 1849 zprintf(getzoneid(),
1848 1850 "nfs4_deleg_discard: matched rnode %p "
1849 1851 "-- discarding delegation\n", (void *)rp);
1850 1852 }
1851 1853 #endif
1852 1854 mutex_enter(&rp->r_statev4_lock);
1853 1855 /*
1854 1856 * Free the cred originally held when the delegation
1855 1857 * was granted. Also need to decrement the refcnt
1856 1858 * on this server for each delegation we discard
1857 1859 */
1858 1860 if (rp->r_deleg_cred)
1859 1861 crfree(rp->r_deleg_cred);
1860 1862 rp->r_deleg_cred = NULL;
1861 1863 rp->r_deleg_type = OPEN_DELEGATE_NONE;
1862 1864 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
1863 1865 rp->r_deleg_needs_recall = FALSE;
1864 1866 ASSERT(sp->s_refcnt > 1);
1865 1867 sp->s_refcnt--;
1866 1868 list_remove(&sp->s_deleg_list, rp);
1867 1869 mutex_exit(&rp->r_statev4_lock);
1868 1870 nfs4_dec_state_ref_count_nolock(sp, mi);
1869 1871 ncg->nfs4_callback_stats.delegations.value.ui64--;
1870 1872 }
1871 1873 }
1872 1874
1873 1875 /*
1874 1876 * Reopen any open streams that were covered by the given file's
1875 1877 * delegation.
1876 1878 * Returns zero or an errno value. If there was no error, *recovp
1877 1879 * indicates whether recovery was initiated.
1878 1880 */
1879 1881
1880 1882 static int
1881 1883 deleg_reopen(vnode_t *vp, bool_t *recovp, struct nfs4_callback_globals *ncg,
1882 1884 int flags)
1883 1885 {
1884 1886 nfs4_open_stream_t *osp;
1885 1887 nfs4_recov_state_t recov_state;
1886 1888 bool_t needrecov = FALSE;
1887 1889 mntinfo4_t *mi;
1888 1890 rnode4_t *rp;
1889 1891 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
1890 1892 int claimnull;
1891 1893
1892 1894 mi = VTOMI4(vp);
1893 1895 rp = VTOR4(vp);
1894 1896
1895 1897 recov_state.rs_flags = 0;
1896 1898 recov_state.rs_num_retry_despite_err = 0;
1897 1899
1898 1900 retry:
1899 1901 if ((e.error = nfs4_start_op(mi, vp, NULL, &recov_state)) != 0) {
1900 1902 return (e.error);
1901 1903 }
1902 1904
1903 1905 /*
1904 1906 * if we mean to discard the delegation, it must be BAD, so don't
1905 1907 * use it when doing the reopen or it will fail too.
1906 1908 */
1907 1909 claimnull = (flags & NFS4_DR_DISCARD);
1908 1910 /*
1909 1911 * Loop through the open streams for this rnode to find
1910 1912 * all of the ones created using the delegation state ID.
1911 1913 * Each of these needs to be re-opened.
1912 1914 */
1913 1915
1914 1916 while ((osp = get_next_deleg_stream(rp, claimnull)) != NULL) {
1915 1917
1916 1918 if (claimnull) {
1917 1919 nfs4_reopen(vp, osp, &e, CLAIM_NULL, FALSE, FALSE);
1918 1920 } else {
1919 1921 ncg->nfs4_callback_stats.claim_cur.value.ui64++;
1920 1922
1921 1923 nfs4_reopen(vp, osp, &e, CLAIM_DELEGATE_CUR, FALSE,
1922 1924 FALSE);
1923 1925 if (e.error == 0 && e.stat == NFS4_OK)
1924 1926 ncg->nfs4_callback_stats.
1925 1927 claim_cur_ok.value.ui64++;
1926 1928 }
1927 1929
1928 1930 if (e.error == EAGAIN) {
1929 1931 nfs4_end_op(mi, vp, NULL, &recov_state, TRUE);
1930 1932 goto retry;
1931 1933 }
1932 1934
1933 1935 /*
1934 1936 * if error is EINTR, ETIMEDOUT, or NFS4_FRC_UNMT_ERR, then
1935 1937 * recovery has already been started inside of nfs4_reopen.
1936 1938 */
1937 1939 if (e.error == EINTR || e.error == ETIMEDOUT ||
1938 1940 NFS4_FRC_UNMT_ERR(e.error, vp->v_vfsp)) {
1939 1941 open_stream_rele(osp, rp);
1940 1942 break;
1941 1943 }
1942 1944
1943 1945 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp);
1944 1946
1945 1947 if (e.error != 0 && !needrecov) {
1946 1948 /*
1947 1949 * Recovery is not possible, but don't give up yet;
1948 1950 * we'd still like to do delegreturn after
1949 1951 * reopening as many streams as possible.
1950 1952 * Continue processing the open streams.
1951 1953 */
1952 1954
1953 1955 ncg->nfs4_callback_stats.recall_failed.value.ui64++;
1954 1956
1955 1957 } else if (needrecov) {
1956 1958 /*
1957 1959 * Start recovery and bail out. The recovery
1958 1960 * thread will take it from here.
1959 1961 */
1960 1962 (void) nfs4_start_recovery(&e, mi, vp, NULL, NULL,
1961 1963 NULL, OP_OPEN, NULL, NULL, NULL);
1962 1964 open_stream_rele(osp, rp);
1963 1965 *recovp = TRUE;
1964 1966 break;
1965 1967 }
1966 1968
1967 1969 open_stream_rele(osp, rp);
1968 1970 }
1969 1971
1970 1972 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov);
1971 1973
1972 1974 return (e.error);
1973 1975 }
1974 1976
1975 1977 /*
1976 1978 * get_next_deleg_stream - returns the next open stream which
1977 1979 * represents a delegation for this rnode. In order to assure
1978 1980 * forward progress, the caller must guarantee that each open
1979 1981 * stream returned is changed so that a future call won't return
1980 1982 * it again.
1981 1983 *
1982 1984 * There are several ways for the open stream to change. If the open
1983 1985 * stream is !os_delegation, then we aren't interested in it. Also, if
1984 1986 * either os_failed_reopen or !os_valid, then don't return the osp.
1985 1987 *
1986 1988 * If claimnull is false (doing reopen CLAIM_DELEGATE_CUR) then return
1987 1989 * the osp if it is an os_delegation open stream. Also, if the rnode still
1988 1990 * has r_deleg_return_pending, then return the os_delegation osp. Lastly,
1989 1991 * if the rnode's r_deleg_stateid is different from the osp's open_stateid,
1990 1992 * then return the osp.
1991 1993 *
1992 1994 * We have already taken the 'r_deleg_recall_lock' as WRITER, which
1993 1995 * prevents new OPENs from going OTW (as start_fop takes this
1994 1996 * lock in READ mode); thus, no new open streams can be created
1995 1997 * (which inherently means no new delegation open streams are
1996 1998 * being created).
1997 1999 */
1998 2000
1999 2001 static nfs4_open_stream_t *
2000 2002 get_next_deleg_stream(rnode4_t *rp, int claimnull)
2001 2003 {
2002 2004 nfs4_open_stream_t *osp;
2003 2005
2004 2006 ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_WRITER));
2005 2007
2006 2008 /*
2007 2009 * Search through the list of open streams looking for
2008 2010 * one that was created while holding the delegation.
2009 2011 */
2010 2012 mutex_enter(&rp->r_os_lock);
2011 2013 for (osp = list_head(&rp->r_open_streams); osp != NULL;
2012 2014 osp = list_next(&rp->r_open_streams, osp)) {
2013 2015 mutex_enter(&osp->os_sync_lock);
2014 2016 if (!osp->os_delegation || osp->os_failed_reopen ||
2015 2017 !osp->os_valid) {
2016 2018 mutex_exit(&osp->os_sync_lock);
2017 2019 continue;
2018 2020 }
2019 2021 if (!claimnull || rp->r_deleg_return_pending ||
2020 2022 !stateid4_cmp(&osp->open_stateid, &rp->r_deleg_stateid)) {
2021 2023 osp->os_ref_count++;
2022 2024 mutex_exit(&osp->os_sync_lock);
2023 2025 mutex_exit(&rp->r_os_lock);
2024 2026 return (osp);
2025 2027 }
2026 2028 mutex_exit(&osp->os_sync_lock);
2027 2029 }
2028 2030 mutex_exit(&rp->r_os_lock);
2029 2031
2030 2032 return (NULL);
2031 2033 }
2032 2034
2033 2035 static void
2034 2036 nfs4delegreturn_thread(struct cb_recall_pass *args)
2035 2037 {
2036 2038 rnode4_t *rp;
2037 2039 vnode_t *vp;
2038 2040 cred_t *cr;
2039 2041 int dtype, error, flags;
2040 2042 bool_t rdirty, rip;
2041 2043 kmutex_t cpr_lock;
2042 2044 callb_cpr_t cpr_info;
2043 2045 struct nfs4_callback_globals *ncg;
2044 2046
2045 2047 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2046 2048 ASSERT(ncg != NULL);
2047 2049
2048 2050 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
2049 2051
2050 2052 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,
2051 2053 "nfsv4delegRtn");
2052 2054
2053 2055 rp = args->rp;
2054 2056 vp = RTOV4(rp);
2055 2057
2056 2058 mutex_enter(&rp->r_statev4_lock);
2057 2059 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2058 2060 mutex_exit(&rp->r_statev4_lock);
2059 2061 goto out;
2060 2062 }
2061 2063 mutex_exit(&rp->r_statev4_lock);
2062 2064
2063 2065 /*
2064 2066 * Take the read-write lock in read mode to prevent other
2065 2067 * threads from modifying the data during the recall. This
2066 2068 * doesn't affect mmappers.
2067 2069 */
2068 2070 (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE);
2069 2071
2070 2072 /* Proceed with delegreturn */
2071 2073
2072 2074 mutex_enter(&rp->r_statev4_lock);
2073 2075 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) {
2074 2076 mutex_exit(&rp->r_statev4_lock);
2075 2077 nfs_rw_exit(&rp->r_rwlock);
2076 2078 goto out;
2077 2079 }
2078 2080 dtype = rp->r_deleg_type;
2079 2081 cr = rp->r_deleg_cred;
2080 2082 ASSERT(cr != NULL);
2081 2083 crhold(cr);
2082 2084 mutex_exit(&rp->r_statev4_lock);
2083 2085
2084 2086 flags = args->flags;
2085 2087
2086 2088 /*
2087 2089 * If the file is being truncated at the server, then throw
2088 2090 * away all of the pages, it doesn't matter what flavor of
2089 2091 * delegation we have.
2090 2092 */
2091 2093
2092 2094 if (args->truncate) {
2093 2095 ncg->nfs4_callback_stats.recall_trunc.value.ui64++;
2094 2096 nfs4_invalidate_pages(vp, 0, cr);
2095 2097 } else if (dtype == OPEN_DELEGATE_WRITE) {
2096 2098
2097 2099 mutex_enter(&rp->r_statelock);
2098 2100 rdirty = rp->r_flags & R4DIRTY;
2099 2101 mutex_exit(&rp->r_statelock);
2100 2102
2101 2103 if (rdirty) {
2102 2104 error = VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL);
2103 2105
2104 2106 if (error)
2105 2107 CB_WARN1("nfs4delegreturn_thread:"
2106 2108 " VOP_PUTPAGE: %d\n", error);
2107 2109 }
2108 2110 /* turn off NFS4_DR_PUSH because we just did that above. */
2109 2111 flags &= ~NFS4_DR_PUSH;
2110 2112 }
2111 2113
2112 2114 mutex_enter(&rp->r_statelock);
2113 2115 rip = rp->r_flags & R4RECOVERRP;
2114 2116 mutex_exit(&rp->r_statelock);
2115 2117
2116 2118 /* If a failed recovery is indicated, discard the pages */
2117 2119
2118 2120 if (rip) {
2119 2121
2120 2122 error = VOP_PUTPAGE(vp, 0, 0, B_INVAL, cr, NULL);
2121 2123
2122 2124 if (error)
2123 2125 CB_WARN1("nfs4delegreturn_thread: VOP_PUTPAGE: %d\n",
2124 2126 error);
2125 2127 }
2126 2128
2127 2129 /*
2128 2130 * Pass the flags to nfs4delegreturn_impl, but be sure not to pass
2129 2131 * NFS4_DR_DID_OP, which just calls nfs4delegreturn_async again.
2130 2132 */
2131 2133 flags &= ~NFS4_DR_DID_OP;
2132 2134
2133 2135 (void) nfs4delegreturn_impl(rp, flags, ncg);
2134 2136
2135 2137 nfs_rw_exit(&rp->r_rwlock);
2136 2138 crfree(cr);
2137 2139 out:
2138 2140 kmem_free(args, sizeof (struct cb_recall_pass));
2139 2141 VN_RELE(vp);
2140 2142 mutex_enter(&cpr_lock);
2141 2143 CALLB_CPR_EXIT(&cpr_info);
2142 2144 mutex_destroy(&cpr_lock);
2143 2145 zthread_exit();
2144 2146 }
2145 2147
2146 2148 /*
2147 2149 * This function has one assumption that the caller of this function is
2148 2150 * either doing recovery (therefore cannot call nfs4_start_op) or has
2149 2151 * already called nfs4_start_op().
2150 2152 */
2151 2153 void
2152 2154 nfs4_delegation_accept(rnode4_t *rp, open_claim_type4 claim, OPEN4res *res,
2153 2155 nfs4_ga_res_t *garp, cred_t *cr)
2154 2156 {
2155 2157 open_read_delegation4 *orp;
2156 2158 open_write_delegation4 *owp;
2157 2159 nfs4_server_t *np;
2158 2160 bool_t already = FALSE;
2159 2161 bool_t recall = FALSE;
2160 2162 bool_t valid_garp = TRUE;
2161 2163 bool_t delegation_granted = FALSE;
2162 2164 bool_t dr_needed = FALSE;
2163 2165 bool_t recov;
2164 2166 int dr_flags = 0;
2165 2167 long mapcnt;
2166 2168 uint_t rflag;
2167 2169 mntinfo4_t *mi;
2168 2170 struct nfs4_callback_globals *ncg;
2169 2171 open_delegation_type4 odt;
2170 2172
2171 2173 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2172 2174 ASSERT(ncg != NULL);
2173 2175
2174 2176 mi = VTOMI4(RTOV4(rp));
2175 2177
2176 2178 /*
2177 2179 * Accept a delegation granted to the client via an OPEN.
2178 2180 * Set the delegation fields in the rnode and insert the
2179 2181 * rnode onto the list anchored in the nfs4_server_t. The
2180 2182 * proper locking order requires the nfs4_server_t first,
2181 2183 * even though it may not be needed in all cases.
2182 2184 *
2183 2185 * NB: find_nfs4_server returns with s_lock held.
2184 2186 */
2185 2187
2186 2188 if ((np = find_nfs4_server(mi)) == NULL)
2187 2189 return;
2188 2190
2189 2191 /* grab the statelock too, for examining r_mapcnt */
2190 2192 mutex_enter(&rp->r_statelock);
2191 2193 mutex_enter(&rp->r_statev4_lock);
2192 2194
2193 2195 if (rp->r_deleg_type == OPEN_DELEGATE_READ ||
2194 2196 rp->r_deleg_type == OPEN_DELEGATE_WRITE)
2195 2197 already = TRUE;
2196 2198
2197 2199 odt = res->delegation.delegation_type;
2198 2200
2199 2201 if (odt == OPEN_DELEGATE_READ) {
2200 2202
2201 2203 rp->r_deleg_type = res->delegation.delegation_type;
2202 2204 orp = &res->delegation.open_delegation4_u.read;
2203 2205 rp->r_deleg_stateid = orp->stateid;
2204 2206 rp->r_deleg_perms = orp->permissions;
2205 2207 if (claim == CLAIM_PREVIOUS)
2206 2208 if ((recall = orp->recall) != 0)
2207 2209 dr_needed = TRUE;
2208 2210
2209 2211 delegation_granted = TRUE;
2210 2212
2211 2213 ncg->nfs4_callback_stats.delegations.value.ui64++;
2212 2214 ncg->nfs4_callback_stats.delegaccept_r.value.ui64++;
2213 2215
2214 2216 } else if (odt == OPEN_DELEGATE_WRITE) {
2215 2217
2216 2218 rp->r_deleg_type = res->delegation.delegation_type;
2217 2219 owp = &res->delegation.open_delegation4_u.write;
2218 2220 rp->r_deleg_stateid = owp->stateid;
2219 2221 rp->r_deleg_perms = owp->permissions;
2220 2222 rp->r_deleg_limit = owp->space_limit;
2221 2223 if (claim == CLAIM_PREVIOUS)
2222 2224 if ((recall = owp->recall) != 0)
2223 2225 dr_needed = TRUE;
2224 2226
2225 2227 delegation_granted = TRUE;
2226 2228
2227 2229 if (garp == NULL || !garp->n4g_change_valid) {
2228 2230 valid_garp = FALSE;
2229 2231 rp->r_deleg_change = 0;
2230 2232 rp->r_deleg_change_grant = 0;
2231 2233 } else {
2232 2234 rp->r_deleg_change = garp->n4g_change;
2233 2235 rp->r_deleg_change_grant = garp->n4g_change;
2234 2236 }
2235 2237 mapcnt = rp->r_mapcnt;
2236 2238 rflag = rp->r_flags;
2237 2239
2238 2240 /*
2239 2241 * Update the delegation change attribute if
2240 2242 * there are mappers for the file is dirty. This
2241 2243 * might be the case during recovery after server
2242 2244 * reboot.
2243 2245 */
2244 2246 if (mapcnt > 0 || rflag & R4DIRTY)
2245 2247 rp->r_deleg_change++;
2246 2248
2247 2249 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2248 2250 "nfs4_delegation_accept: r_deleg_change: 0x%x\n",
2249 2251 (int)(rp->r_deleg_change >> 32)));
2250 2252 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE,
2251 2253 "nfs4_delegation_accept: r_delg_change_grant: 0x%x\n",
2252 2254 (int)(rp->r_deleg_change_grant >> 32)));
2253 2255
2254 2256
2255 2257 ncg->nfs4_callback_stats.delegations.value.ui64++;
2256 2258 ncg->nfs4_callback_stats.delegaccept_rw.value.ui64++;
2257 2259 } else if (already) {
2258 2260 /*
2259 2261 * No delegation granted. If the rnode currently has
2260 2262 * has one, then consider it tainted and return it.
2261 2263 */
2262 2264 dr_needed = TRUE;
2263 2265 }
2264 2266
2265 2267 if (delegation_granted) {
2266 2268 /* Add the rnode to the list. */
2267 2269 if (!already) {
2268 2270 crhold(cr);
2269 2271 rp->r_deleg_cred = cr;
2270 2272
2271 2273 ASSERT(mutex_owned(&np->s_lock));
2272 2274 list_insert_head(&np->s_deleg_list, rp);
2273 2275 /* added list node gets a reference */
2274 2276 np->s_refcnt++;
2275 2277 nfs4_inc_state_ref_count_nolock(np, mi);
2276 2278 }
2277 2279 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
2278 2280 }
2279 2281
2280 2282 /*
2281 2283 * We've now safely accepted the delegation, if any. Drop the
2282 2284 * locks and figure out what post-processing is needed. We'd
2283 2285 * like to retain r_statev4_lock, but nfs4_server_rele takes
2284 2286 * s_lock which would be a lock ordering violation.
2285 2287 */
2286 2288 mutex_exit(&rp->r_statev4_lock);
2287 2289 mutex_exit(&rp->r_statelock);
2288 2290 mutex_exit(&np->s_lock);
2289 2291 nfs4_server_rele(np);
2290 2292
2291 2293 /*
2292 2294 * Check to see if we are in recovery. Remember that
2293 2295 * this function is protected by start_op, so a recovery
2294 2296 * cannot begin until we are out of here.
2295 2297 */
2296 2298 mutex_enter(&mi->mi_lock);
2297 2299 recov = mi->mi_recovflags & MI4_RECOV_ACTIV;
2298 2300 mutex_exit(&mi->mi_lock);
2299 2301
2300 2302 mutex_enter(&rp->r_statev4_lock);
2301 2303
2302 2304 if (nfs4_delegreturn_policy == IMMEDIATE || !valid_garp)
2303 2305 dr_needed = TRUE;
2304 2306
2305 2307 if (dr_needed && rp->r_deleg_return_pending == FALSE) {
2306 2308 if (recov) {
2307 2309 /*
2308 2310 * We cannot call delegreturn from inside
2309 2311 * of recovery or VOP_PUTPAGE will hang
2310 2312 * due to nfs4_start_fop call in
2311 2313 * nfs4write. Use dlistadd to add the
2312 2314 * rnode to the list of rnodes needing
2313 2315 * cleaning. We do not need to do reopen
2314 2316 * here because recov_openfiles will do it.
2315 2317 * In the non-recall case, just discard the
2316 2318 * delegation as it is no longer valid.
2317 2319 */
2318 2320 if (recall)
2319 2321 dr_flags = NFS4_DR_PUSH;
2320 2322 else
2321 2323 dr_flags = NFS4_DR_PUSH|NFS4_DR_DISCARD;
2322 2324
2323 2325 nfs4_dlistadd(rp, ncg, dr_flags);
2324 2326 dr_flags = 0;
2325 2327 } else {
2326 2328 /*
2327 2329 * Push the modified data back to the server,
2328 2330 * reopen any delegation open streams, and return
2329 2331 * the delegation. Drop the statev4_lock first!
2330 2332 */
2331 2333 dr_flags = NFS4_DR_PUSH|NFS4_DR_DID_OP|NFS4_DR_REOPEN;
2332 2334 }
2333 2335 }
2334 2336 mutex_exit(&rp->r_statev4_lock);
2335 2337 if (dr_flags)
2336 2338 (void) nfs4delegreturn_impl(rp, dr_flags, ncg);
2337 2339 }
2338 2340
2339 2341 /*
2340 2342 * nfs4delegabandon - Abandon the delegation on an rnode4. This code
2341 2343 * is called when the client receives EXPIRED, BAD_STATEID, OLD_STATEID
2342 2344 * or BADSEQID and the recovery code is unable to recover. Push any
2343 2345 * dirty data back to the server and return the delegation (if any).
2344 2346 */
2345 2347
2346 2348 void
2347 2349 nfs4delegabandon(rnode4_t *rp)
2348 2350 {
2349 2351 vnode_t *vp;
2350 2352 struct cb_recall_pass *pp;
2351 2353 open_delegation_type4 dt;
2352 2354
2353 2355 mutex_enter(&rp->r_statev4_lock);
2354 2356 dt = rp->r_deleg_type;
2355 2357 mutex_exit(&rp->r_statev4_lock);
2356 2358
2357 2359 if (dt == OPEN_DELEGATE_NONE)
2358 2360 return;
2359 2361
2360 2362 vp = RTOV4(rp);
2361 2363 VN_HOLD(vp);
2362 2364
2363 2365 pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP);
2364 2366 pp->rp = rp;
2365 2367 /*
2366 2368 * Recovery on the file has failed and we want to return
2367 2369 * the delegation. We don't want to reopen files and
2368 2370 * nfs4delegreturn_thread() figures out what to do about
2369 2371 * the data. The only thing to do is attempt to return
2370 2372 * the delegation.
2371 2373 */
2372 2374 pp->flags = 0;
2373 2375 pp->truncate = FALSE;
2374 2376
2375 2377 /*
2376 2378 * Fire up a thread to do the delegreturn; this is
2377 2379 * necessary because we could be inside a GETPAGE or
2378 2380 * PUTPAGE and we cannot do another one.
2379 2381 */
2380 2382
2381 2383 (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0,
2382 2384 minclsyspri);
2383 2385 }
2384 2386
2385 2387 static int
2386 2388 wait_for_recall1(vnode_t *vp, nfs4_op_hint_t op, nfs4_recov_state_t *rsp,
2387 2389 int flg)
2388 2390 {
2389 2391 rnode4_t *rp;
2390 2392 int error = 0;
2391 2393
2392 2394 #ifdef lint
2393 2395 op = op;
2394 2396 #endif
2395 2397
2396 2398 if (vp && vp->v_type == VREG) {
2397 2399 rp = VTOR4(vp);
2398 2400
2399 2401 /*
2400 2402 * Take r_deleg_recall_lock in read mode to synchronize
2401 2403 * with delegreturn.
2402 2404 */
2403 2405 error = nfs_rw_enter_sig(&rp->r_deleg_recall_lock,
2404 2406 RW_READER, INTR4(vp));
2405 2407
2406 2408 if (error == 0)
2407 2409 rsp->rs_flags |= flg;
2408 2410
2409 2411 }
2410 2412 return (error);
2411 2413 }
2412 2414
2413 2415 void
2414 2416 nfs4_end_op_recall(vnode_t *vp1, vnode_t *vp2, nfs4_recov_state_t *rsp)
2415 2417 {
2416 2418 NFS4_DEBUG(nfs4_recall_debug,
2417 2419 (CE_NOTE, "nfs4_end_op_recall: 0x%p, 0x%p\n",
2418 2420 (void *)vp1, (void *)vp2));
2419 2421
2420 2422 if (vp2 && rsp->rs_flags & NFS4_RS_RECALL_HELD2)
2421 2423 nfs_rw_exit(&VTOR4(vp2)->r_deleg_recall_lock);
2422 2424 if (vp1 && rsp->rs_flags & NFS4_RS_RECALL_HELD1)
2423 2425 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2424 2426 }
2425 2427
2426 2428 int
2427 2429 wait_for_recall(vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op,
2428 2430 nfs4_recov_state_t *rsp)
2429 2431 {
2430 2432 int error;
2431 2433
2432 2434 NFS4_DEBUG(nfs4_recall_debug,
2433 2435 (CE_NOTE, "wait_for_recall: 0x%p, 0x%p\n",
2434 2436 (void *)vp1, (void *) vp2));
2435 2437
2436 2438 rsp->rs_flags &= ~(NFS4_RS_RECALL_HELD1|NFS4_RS_RECALL_HELD2);
2437 2439
2438 2440 if ((error = wait_for_recall1(vp1, op, rsp, NFS4_RS_RECALL_HELD1)) != 0)
2439 2441 return (error);
2440 2442
2441 2443 if ((error = wait_for_recall1(vp2, op, rsp, NFS4_RS_RECALL_HELD2))
2442 2444 != 0) {
2443 2445 if (rsp->rs_flags & NFS4_RS_RECALL_HELD1) {
2444 2446 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock);
2445 2447 rsp->rs_flags &= ~NFS4_RS_RECALL_HELD1;
2446 2448 }
2447 2449
2448 2450 return (error);
2449 2451 }
2450 2452
2451 2453 return (0);
2452 2454 }
2453 2455
2454 2456 /*
2455 2457 * nfs4_dlistadd - Add this rnode to a list of rnodes to be
2456 2458 * DELEGRETURN'd at the end of recovery.
2457 2459 */
2458 2460
2459 2461 static void
2460 2462 nfs4_dlistadd(rnode4_t *rp, struct nfs4_callback_globals *ncg, int flags)
2461 2463 {
2462 2464 struct nfs4_dnode *dp;
2463 2465
2464 2466 ASSERT(mutex_owned(&rp->r_statev4_lock));
2465 2467 /*
2466 2468 * Mark the delegation as having a return pending.
2467 2469 * This will prevent the use of the delegation stateID
2468 2470 * by read, write, setattr and open.
2469 2471 */
2470 2472 rp->r_deleg_return_pending = TRUE;
2471 2473 dp = kmem_alloc(sizeof (*dp), KM_SLEEP);
2472 2474 VN_HOLD(RTOV4(rp));
2473 2475 dp->rnodep = rp;
2474 2476 dp->flags = flags;
2475 2477 mutex_enter(&ncg->nfs4_dlist_lock);
2476 2478 list_insert_head(&ncg->nfs4_dlist, dp);
2477 2479 #ifdef DEBUG
2478 2480 ncg->nfs4_dlistadd_c++;
2479 2481 #endif
2480 2482 mutex_exit(&ncg->nfs4_dlist_lock);
2481 2483 }
2482 2484
2483 2485 /*
2484 2486 * nfs4_dlistclean_impl - Do DELEGRETURN for each rnode on the list.
2485 2487 * of files awaiting cleaning. If the override_flags are non-zero
2486 2488 * then use them rather than the flags that were set when the rnode
2487 2489 * was added to the dlist.
2488 2490 */
2489 2491 static void
2490 2492 nfs4_dlistclean_impl(struct nfs4_callback_globals *ncg, int override_flags)
2491 2493 {
2492 2494 rnode4_t *rp;
2493 2495 struct nfs4_dnode *dp;
2494 2496 int flags;
2495 2497
2496 2498 ASSERT(override_flags == 0 || override_flags == NFS4_DR_DISCARD);
2497 2499
2498 2500 mutex_enter(&ncg->nfs4_dlist_lock);
2499 2501 while ((dp = list_head(&ncg->nfs4_dlist)) != NULL) {
2500 2502 #ifdef DEBUG
2501 2503 ncg->nfs4_dlistclean_c++;
2502 2504 #endif
2503 2505 list_remove(&ncg->nfs4_dlist, dp);
2504 2506 mutex_exit(&ncg->nfs4_dlist_lock);
2505 2507 rp = dp->rnodep;
2506 2508 flags = (override_flags != 0) ? override_flags : dp->flags;
2507 2509 kmem_free(dp, sizeof (*dp));
2508 2510 (void) nfs4delegreturn_impl(rp, flags, ncg);
2509 2511 VN_RELE(RTOV4(rp));
2510 2512 mutex_enter(&ncg->nfs4_dlist_lock);
2511 2513 }
2512 2514 mutex_exit(&ncg->nfs4_dlist_lock);
2513 2515 }
2514 2516
2515 2517 void
2516 2518 nfs4_dlistclean(void)
2517 2519 {
2518 2520 struct nfs4_callback_globals *ncg;
2519 2521
2520 2522 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone());
2521 2523 ASSERT(ncg != NULL);
2522 2524
2523 2525 nfs4_dlistclean_impl(ncg, 0);
2524 2526 }
↓ open down ↓ |
1004 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX