1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2012 by Delphix. All rights reserved.
26 */
27
28 /*
29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 * All Rights Reserved
31 */
32
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/systm.h>
36 #include <sys/cred.h>
37 #include <sys/buf.h>
38 #include <sys/vfs.h>
39 #include <sys/vfs_opreg.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/errno.h>
43 #include <sys/sysmacros.h>
44 #include <sys/statvfs.h>
45 #include <sys/kmem.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/systeminfo.h>
50 #include <sys/flock.h>
51 #include <sys/pathname.h>
52 #include <sys/nbmlock.h>
53 #include <sys/share.h>
54 #include <sys/atomic.h>
55 #include <sys/policy.h>
56 #include <sys/fem.h>
57 #include <sys/sdt.h>
58 #include <sys/ddi.h>
59 #include <sys/zone.h>
60
61 #include <fs/fs_reparse.h>
62
63 #include <rpc/types.h>
64 #include <rpc/auth.h>
65 #include <rpc/rpcsec_gss.h>
66 #include <rpc/svc.h>
67
68 #include <nfs/nfs.h>
69 #include <nfs/export.h>
70 #include <nfs/nfs_cmd.h>
71 #include <nfs/lm.h>
72 #include <nfs/nfs4.h>
73
74 #include <sys/strsubr.h>
75 #include <sys/strsun.h>
76
77 #include <inet/common.h>
78 #include <inet/ip.h>
79 #include <inet/ip6.h>
80
81 #include <sys/tsol/label.h>
82 #include <sys/tsol/tndb.h>
83
84 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
85 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
86 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
87 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
88 extern struct svc_ops rdma_svc_ops;
89 extern int nfs_loaned_buffers;
90 /* End of Tunables */
91
92 static int rdma_setup_read_data4(READ4args *, READ4res *);
93
94 /*
95 * Used to bump the stateid4.seqid value and show changes in the stateid
96 */
97 #define next_stateid(sp) (++(sp)->bits.chgseq)
98
99 /*
100 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
101 * This is used to return NFS4ERR_TOOSMALL when clients specify
102 * maxcount that isn't large enough to hold the smallest possible
103 * XDR encoded dirent.
104 *
105 * sizeof cookie (8 bytes) +
106 * sizeof name_len (4 bytes) +
107 * sizeof smallest (padded) name (4 bytes) +
108 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
109 * sizeof attrlist4_len (4 bytes) +
110 * sizeof next boolean (4 bytes)
111 *
112 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
113 * the smallest possible entry4 (assumes no attrs requested).
114 * sizeof nfsstat4 (4 bytes) +
115 * sizeof verifier4 (8 bytes) +
116 * sizeof entry4list bool (4 bytes) +
117 * sizeof entry4 (36 bytes) +
118 * sizeof eof bool (4 bytes)
119 *
120 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
121 * VOP_READDIR. Its value is the size of the maximum possible dirent
122 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
123 * required for a given name length. MAXNAMELEN is the maximum
124 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
125 * macros are to allow for . and .. entries -- just a minor tweak to try
126 * and guarantee that buffer we give to VOP_READDIR will be large enough
127 * to hold ., .., and the largest possible solaris dirent64.
128 */
129 #define RFS4_MINLEN_ENTRY4 36
130 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
131 #define RFS4_MINLEN_RDDIR_BUF \
132 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
133
134 /*
135 * It would be better to pad to 4 bytes since that's what XDR would do,
136 * but the dirents UFS gives us are already padded to 8, so just take
137 * what we're given. Dircount is only a hint anyway. Currently the
138 * solaris kernel is ASCII only, so there's no point in calling the
139 * UTF8 functions.
140 *
141 * dirent64: named padded to provide 8 byte struct alignment
142 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
143 *
144 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
145 *
146 */
147 #define DIRENT64_TO_DIRCOUNT(dp) \
148 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
149
150 time_t rfs4_start_time; /* Initialized in rfs4_srvrinit */
151
152 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
153
154 u_longlong_t nfs4_srv_caller_id;
155 uint_t nfs4_srv_vkey = 0;
156
157 verifier4 Write4verf;
158 verifier4 Readdir4verf;
159
160 void rfs4_init_compound_state(struct compound_state *);
161
162 static void nullfree(caddr_t);
163 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
164 struct compound_state *);
165 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
166 struct compound_state *);
167 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
168 struct compound_state *);
169 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 struct compound_state *);
171 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 struct compound_state *);
173 static void rfs4_op_create_free(nfs_resop4 *resop);
174 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
175 struct svc_req *, struct compound_state *);
176 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
177 struct svc_req *, struct compound_state *);
178 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
179 struct compound_state *);
180 static void rfs4_op_getattr_free(nfs_resop4 *);
181 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
182 struct compound_state *);
183 static void rfs4_op_getfh_free(nfs_resop4 *);
184 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
185 struct compound_state *);
186 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 struct compound_state *);
188 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 struct compound_state *);
190 static void lock_denied_free(nfs_resop4 *);
191 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
192 struct compound_state *);
193 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
194 struct compound_state *);
195 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 struct compound_state *);
197 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 struct compound_state *);
199 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
200 struct svc_req *req, struct compound_state *cs);
201 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
202 struct compound_state *);
203 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
204 struct compound_state *);
205 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
206 struct svc_req *, struct compound_state *);
207 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
208 struct svc_req *, struct compound_state *);
209 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
210 struct compound_state *);
211 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 struct compound_state *);
213 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 struct compound_state *);
215 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 struct compound_state *);
217 static void rfs4_op_read_free(nfs_resop4 *);
218 static void rfs4_op_readdir_free(nfs_resop4 *resop);
219 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 struct compound_state *);
221 static void rfs4_op_readlink_free(nfs_resop4 *);
222 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
223 struct svc_req *, struct compound_state *);
224 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
225 struct compound_state *);
226 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
227 struct compound_state *);
228 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 struct compound_state *);
230 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 struct compound_state *);
232 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 struct compound_state *);
234 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 struct compound_state *);
236 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 struct compound_state *);
238 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 struct compound_state *);
240 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
241 struct svc_req *, struct compound_state *);
242 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
243 struct svc_req *req, struct compound_state *);
244 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
245 struct compound_state *);
246 static void rfs4_op_secinfo_free(nfs_resop4 *);
247
248 static nfsstat4 check_open_access(uint32_t,
249 struct compound_state *, struct svc_req *);
250 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
251 void rfs4_ss_clid(rfs4_client_t *);
252
253 /*
254 * translation table for attrs
255 */
256 struct nfs4_ntov_table {
257 union nfs4_attr_u *na;
258 uint8_t amap[NFS4_MAXNUM_ATTRS];
259 int attrcnt;
260 bool_t vfsstat;
261 };
262
263 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
264 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
265 struct nfs4_svgetit_arg *sargp);
266
267 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
268 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
269 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
270
271 fem_t *deleg_rdops;
272 fem_t *deleg_wrops;
273
274 rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */
275 kmutex_t rfs4_servinst_lock; /* protects linked list */
276 int rfs4_seen_first_compound; /* set first time we see one */
277
278 /*
279 * NFS4 op dispatch table
280 */
281
282 struct rfsv4disp {
283 void (*dis_proc)(); /* proc to call */
284 void (*dis_resfree)(); /* frees space allocated by proc */
285 int dis_flags; /* RPC_IDEMPOTENT, etc... */
286 };
287
288 static struct rfsv4disp rfsv4disptab[] = {
289 /*
290 * NFS VERSION 4
291 */
292
293 /* RFS_NULL = 0 */
294 {rfs4_op_illegal, nullfree, 0},
295
296 /* UNUSED = 1 */
297 {rfs4_op_illegal, nullfree, 0},
298
299 /* UNUSED = 2 */
300 {rfs4_op_illegal, nullfree, 0},
301
302 /* OP_ACCESS = 3 */
303 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
304
305 /* OP_CLOSE = 4 */
306 {rfs4_op_close, nullfree, 0},
307
308 /* OP_COMMIT = 5 */
309 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
310
311 /* OP_CREATE = 6 */
312 {rfs4_op_create, nullfree, 0},
313
314 /* OP_DELEGPURGE = 7 */
315 {rfs4_op_delegpurge, nullfree, 0},
316
317 /* OP_DELEGRETURN = 8 */
318 {rfs4_op_delegreturn, nullfree, 0},
319
320 /* OP_GETATTR = 9 */
321 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
322
323 /* OP_GETFH = 10 */
324 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
325
326 /* OP_LINK = 11 */
327 {rfs4_op_link, nullfree, 0},
328
329 /* OP_LOCK = 12 */
330 {rfs4_op_lock, lock_denied_free, 0},
331
332 /* OP_LOCKT = 13 */
333 {rfs4_op_lockt, lock_denied_free, 0},
334
335 /* OP_LOCKU = 14 */
336 {rfs4_op_locku, nullfree, 0},
337
338 /* OP_LOOKUP = 15 */
339 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
340
341 /* OP_LOOKUPP = 16 */
342 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
343
344 /* OP_NVERIFY = 17 */
345 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
346
347 /* OP_OPEN = 18 */
348 {rfs4_op_open, rfs4_free_reply, 0},
349
350 /* OP_OPENATTR = 19 */
351 {rfs4_op_openattr, nullfree, 0},
352
353 /* OP_OPEN_CONFIRM = 20 */
354 {rfs4_op_open_confirm, nullfree, 0},
355
356 /* OP_OPEN_DOWNGRADE = 21 */
357 {rfs4_op_open_downgrade, nullfree, 0},
358
359 /* OP_OPEN_PUTFH = 22 */
360 {rfs4_op_putfh, nullfree, RPC_ALL},
361
362 /* OP_PUTPUBFH = 23 */
363 {rfs4_op_putpubfh, nullfree, RPC_ALL},
364
365 /* OP_PUTROOTFH = 24 */
366 {rfs4_op_putrootfh, nullfree, RPC_ALL},
367
368 /* OP_READ = 25 */
369 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
370
371 /* OP_READDIR = 26 */
372 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
373
374 /* OP_READLINK = 27 */
375 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
376
377 /* OP_REMOVE = 28 */
378 {rfs4_op_remove, nullfree, 0},
379
380 /* OP_RENAME = 29 */
381 {rfs4_op_rename, nullfree, 0},
382
383 /* OP_RENEW = 30 */
384 {rfs4_op_renew, nullfree, 0},
385
386 /* OP_RESTOREFH = 31 */
387 {rfs4_op_restorefh, nullfree, RPC_ALL},
388
389 /* OP_SAVEFH = 32 */
390 {rfs4_op_savefh, nullfree, RPC_ALL},
391
392 /* OP_SECINFO = 33 */
393 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
394
395 /* OP_SETATTR = 34 */
396 {rfs4_op_setattr, nullfree, 0},
397
398 /* OP_SETCLIENTID = 35 */
399 {rfs4_op_setclientid, nullfree, 0},
400
401 /* OP_SETCLIENTID_CONFIRM = 36 */
402 {rfs4_op_setclientid_confirm, nullfree, 0},
403
404 /* OP_VERIFY = 37 */
405 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
406
407 /* OP_WRITE = 38 */
408 {rfs4_op_write, nullfree, 0},
409
410 /* OP_RELEASE_LOCKOWNER = 39 */
411 {rfs4_op_release_lockowner, nullfree, 0},
412 };
413
414 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
415
416 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
417
418 #ifdef DEBUG
419
420 int rfs4_fillone_debug = 0;
421 int rfs4_no_stub_access = 1;
422 int rfs4_rddir_debug = 0;
423
424 static char *rfs4_op_string[] = {
425 "rfs4_op_null",
426 "rfs4_op_1 unused",
427 "rfs4_op_2 unused",
428 "rfs4_op_access",
429 "rfs4_op_close",
430 "rfs4_op_commit",
431 "rfs4_op_create",
432 "rfs4_op_delegpurge",
433 "rfs4_op_delegreturn",
434 "rfs4_op_getattr",
435 "rfs4_op_getfh",
436 "rfs4_op_link",
437 "rfs4_op_lock",
438 "rfs4_op_lockt",
439 "rfs4_op_locku",
440 "rfs4_op_lookup",
441 "rfs4_op_lookupp",
442 "rfs4_op_nverify",
443 "rfs4_op_open",
444 "rfs4_op_openattr",
445 "rfs4_op_open_confirm",
446 "rfs4_op_open_downgrade",
447 "rfs4_op_putfh",
448 "rfs4_op_putpubfh",
449 "rfs4_op_putrootfh",
450 "rfs4_op_read",
451 "rfs4_op_readdir",
452 "rfs4_op_readlink",
453 "rfs4_op_remove",
454 "rfs4_op_rename",
455 "rfs4_op_renew",
456 "rfs4_op_restorefh",
457 "rfs4_op_savefh",
458 "rfs4_op_secinfo",
459 "rfs4_op_setattr",
460 "rfs4_op_setclientid",
461 "rfs4_op_setclient_confirm",
462 "rfs4_op_verify",
463 "rfs4_op_write",
464 "rfs4_op_release_lockowner",
465 "rfs4_op_illegal"
466 };
467 #endif
468
469 void rfs4_ss_chkclid(rfs4_client_t *);
470
471 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
472
473 extern void rfs4_free_fs_locations4(fs_locations4 *);
474
475 #ifdef nextdp
476 #undef nextdp
477 #endif
478 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
479
480 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
481 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
482 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
483 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
484 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
485 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
486 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
487 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
488 NULL, NULL
489 };
490 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
491 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
492 VOPNAME_READ, { .femop_read = deleg_wr_read },
493 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
494 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
495 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
496 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
497 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
498 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
499 NULL, NULL
500 };
501
502 int
503 rfs4_srvrinit(void)
504 {
505 timespec32_t verf;
506 int error;
507 extern void rfs4_attr_init();
508 extern krwlock_t rfs4_deleg_policy_lock;
509
510 /*
511 * The following algorithm attempts to find a unique verifier
512 * to be used as the write verifier returned from the server
513 * to the client. It is important that this verifier change
514 * whenever the server reboots. Of secondary importance, it
515 * is important for the verifier to be unique between two
516 * different servers.
517 *
518 * Thus, an attempt is made to use the system hostid and the
519 * current time in seconds when the nfssrv kernel module is
520 * loaded. It is assumed that an NFS server will not be able
521 * to boot and then to reboot in less than a second. If the
522 * hostid has not been set, then the current high resolution
523 * time is used. This will ensure different verifiers each
524 * time the server reboots and minimize the chances that two
525 * different servers will have the same verifier.
526 * XXX - this is broken on LP64 kernels.
527 */
528 verf.tv_sec = (time_t)zone_get_hostid(NULL);
529 if (verf.tv_sec != 0) {
530 verf.tv_nsec = gethrestime_sec();
531 } else {
532 timespec_t tverf;
533
534 gethrestime(&tverf);
535 verf.tv_sec = (time_t)tverf.tv_sec;
536 verf.tv_nsec = tverf.tv_nsec;
537 }
538
539 Write4verf = *(uint64_t *)&verf;
540
541 rfs4_attr_init();
542 mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
543
544 /* Used to manage create/destroy of server state */
545 mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
546
547 /* Used to manage access to server instance linked list */
548 mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
549
550 /* Used to manage access to rfs4_deleg_policy */
551 rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
552
553 error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
554 if (error != 0) {
555 rfs4_disable_delegation();
556 } else {
557 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
558 &deleg_wrops);
559 if (error != 0) {
560 rfs4_disable_delegation();
561 fem_free(deleg_rdops);
562 }
563 }
564
565 nfs4_srv_caller_id = fs_new_caller_id();
566
567 lockt_sysid = lm_alloc_sysidt();
568
569 vsd_create(&nfs4_srv_vkey, NULL);
570
571 return (0);
572 }
573
574 void
575 rfs4_srvrfini(void)
576 {
577 extern krwlock_t rfs4_deleg_policy_lock;
578
579 if (lockt_sysid != LM_NOSYSID) {
580 lm_free_sysidt(lockt_sysid);
581 lockt_sysid = LM_NOSYSID;
582 }
583
584 mutex_destroy(&rfs4_deleg_lock);
585 mutex_destroy(&rfs4_state_lock);
586 rw_destroy(&rfs4_deleg_policy_lock);
587
588 fem_free(deleg_rdops);
589 fem_free(deleg_wrops);
590 }
591
592 void
593 rfs4_init_compound_state(struct compound_state *cs)
594 {
595 bzero(cs, sizeof (*cs));
596 cs->cont = TRUE;
597 cs->access = CS_ACCESS_DENIED;
598 cs->deleg = FALSE;
599 cs->mandlock = FALSE;
600 cs->fh.nfs_fh4_val = cs->fhbuf;
601 }
602
603 void
604 rfs4_grace_start(rfs4_servinst_t *sip)
605 {
606 rw_enter(&sip->rwlock, RW_WRITER);
607 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
608 sip->grace_period = rfs4_grace_period;
609 rw_exit(&sip->rwlock);
610 }
611
612 /*
613 * returns true if the instance's grace period has never been started
614 */
615 int
616 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
617 {
618 time_t start_time;
619
620 rw_enter(&sip->rwlock, RW_READER);
621 start_time = sip->start_time;
622 rw_exit(&sip->rwlock);
623
624 return (start_time == 0);
625 }
626
627 /*
628 * Indicates if server instance is within the
629 * grace period.
630 */
631 int
632 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
633 {
634 time_t grace_expiry;
635
636 rw_enter(&sip->rwlock, RW_READER);
637 grace_expiry = sip->start_time + sip->grace_period;
638 rw_exit(&sip->rwlock);
639
640 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
641 }
642
643 int
644 rfs4_clnt_in_grace(rfs4_client_t *cp)
645 {
646 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
647
648 return (rfs4_servinst_in_grace(cp->rc_server_instance));
649 }
650
651 /*
652 * reset all currently active grace periods
653 */
654 void
655 rfs4_grace_reset_all(void)
656 {
657 rfs4_servinst_t *sip;
658
659 mutex_enter(&rfs4_servinst_lock);
660 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
661 if (rfs4_servinst_in_grace(sip))
662 rfs4_grace_start(sip);
663 mutex_exit(&rfs4_servinst_lock);
664 }
665
666 /*
667 * start any new instances' grace periods
668 */
669 void
670 rfs4_grace_start_new(void)
671 {
672 rfs4_servinst_t *sip;
673
674 mutex_enter(&rfs4_servinst_lock);
675 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
676 if (rfs4_servinst_grace_new(sip))
677 rfs4_grace_start(sip);
678 mutex_exit(&rfs4_servinst_lock);
679 }
680
681 static rfs4_dss_path_t *
682 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
683 {
684 size_t len;
685 rfs4_dss_path_t *dss_path;
686
687 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
688
689 /*
690 * Take a copy of the string, since the original may be overwritten.
691 * Sadly, no strdup() in the kernel.
692 */
693 /* allow for NUL */
694 len = strlen(path) + 1;
695 dss_path->path = kmem_alloc(len, KM_SLEEP);
696 (void) strlcpy(dss_path->path, path, len);
697
698 /* associate with servinst */
699 dss_path->sip = sip;
700 dss_path->index = index;
701
702 /*
703 * Add to list of served paths.
704 * No locking required, as we're only ever called at startup.
705 */
706 if (rfs4_dss_pathlist == NULL) {
707 /* this is the first dss_path_t */
708
709 /* needed for insque/remque */
710 dss_path->next = dss_path->prev = dss_path;
711
712 rfs4_dss_pathlist = dss_path;
713 } else {
714 insque(dss_path, rfs4_dss_pathlist);
715 }
716
717 return (dss_path);
718 }
719
720 /*
721 * Create a new server instance, and make it the currently active instance.
722 * Note that starting the grace period too early will reduce the clients'
723 * recovery window.
724 */
725 void
726 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
727 {
728 unsigned i;
729 rfs4_servinst_t *sip;
730 rfs4_oldstate_t *oldstate;
731
732 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
733 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
734
735 sip->start_time = (time_t)0;
736 sip->grace_period = (time_t)0;
737 sip->next = NULL;
738 sip->prev = NULL;
739
740 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
741 /*
742 * This initial dummy entry is required to setup for insque/remque.
743 * It must be skipped over whenever the list is traversed.
744 */
745 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
746 /* insque/remque require initial list entry to be self-terminated */
747 oldstate->next = oldstate;
748 oldstate->prev = oldstate;
749 sip->oldstate = oldstate;
750
751
752 sip->dss_npaths = dss_npaths;
753 sip->dss_paths = kmem_alloc(dss_npaths *
754 sizeof (rfs4_dss_path_t *), KM_SLEEP);
755
756 for (i = 0; i < dss_npaths; i++) {
757 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
758 }
759
760 mutex_enter(&rfs4_servinst_lock);
761 if (rfs4_cur_servinst != NULL) {
762 /* add to linked list */
763 sip->prev = rfs4_cur_servinst;
764 rfs4_cur_servinst->next = sip;
765 }
766 if (start_grace)
767 rfs4_grace_start(sip);
768 /* make the new instance "current" */
769 rfs4_cur_servinst = sip;
770
771 mutex_exit(&rfs4_servinst_lock);
772 }
773
774 /*
775 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
776 * all instances directly.
777 */
778 void
779 rfs4_servinst_destroy_all(void)
780 {
781 rfs4_servinst_t *sip, *prev, *current;
782 #ifdef DEBUG
783 int n = 0;
784 #endif
785
786 mutex_enter(&rfs4_servinst_lock);
787 ASSERT(rfs4_cur_servinst != NULL);
788 current = rfs4_cur_servinst;
789 rfs4_cur_servinst = NULL;
790 for (sip = current; sip != NULL; sip = prev) {
791 prev = sip->prev;
792 rw_destroy(&sip->rwlock);
793 if (sip->oldstate)
794 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
795 if (sip->dss_paths)
796 kmem_free(sip->dss_paths,
797 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
798 kmem_free(sip, sizeof (rfs4_servinst_t));
799 #ifdef DEBUG
800 n++;
801 #endif
802 }
803 mutex_exit(&rfs4_servinst_lock);
804 }
805
806 /*
807 * Assign the current server instance to a client_t.
808 * Should be called with cp->rc_dbe held.
809 */
810 void
811 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
812 {
813 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
814
815 /*
816 * The lock ensures that if the current instance is in the process
817 * of changing, we will see the new one.
818 */
819 mutex_enter(&rfs4_servinst_lock);
820 cp->rc_server_instance = sip;
821 mutex_exit(&rfs4_servinst_lock);
822 }
823
824 rfs4_servinst_t *
825 rfs4_servinst(rfs4_client_t *cp)
826 {
827 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
828
829 return (cp->rc_server_instance);
830 }
831
832 /* ARGSUSED */
833 static void
834 nullfree(caddr_t resop)
835 {
836 }
837
838 /*
839 * This is a fall-through for invalid or not implemented (yet) ops
840 */
841 /* ARGSUSED */
842 static void
843 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
844 struct compound_state *cs)
845 {
846 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
847 }
848
849 /*
850 * Check if the security flavor, nfsnum, is in the flavor_list.
851 */
852 bool_t
853 in_flavor_list(int nfsnum, int *flavor_list, int count)
854 {
855 int i;
856
857 for (i = 0; i < count; i++) {
858 if (nfsnum == flavor_list[i])
859 return (TRUE);
860 }
861 return (FALSE);
862 }
863
864 /*
865 * Used by rfs4_op_secinfo to get the security information from the
866 * export structure associated with the component.
867 */
868 /* ARGSUSED */
869 static nfsstat4
870 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
871 {
872 int error, different_export = 0;
873 vnode_t *dvp, *vp;
874 struct exportinfo *exi = NULL;
875 fid_t fid;
876 uint_t count, i;
877 secinfo4 *resok_val;
878 struct secinfo *secp;
879 seconfig_t *si;
880 bool_t did_traverse = FALSE;
881 int dotdot, walk;
882
883 dvp = cs->vp;
884 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
885
886 /*
887 * If dotdotting, then need to check whether it's above the
888 * root of a filesystem, or above an export point.
889 */
890 if (dotdot) {
891
892 /*
893 * If dotdotting at the root of a filesystem, then
894 * need to traverse back to the mounted-on filesystem
895 * and do the dotdot lookup there.
896 */
897 if (cs->vp->v_flag & VROOT) {
898
899 /*
900 * If at the system root, then can
901 * go up no further.
902 */
903 if (VN_CMP(dvp, rootdir))
904 return (puterrno4(ENOENT));
905
906 /*
907 * Traverse back to the mounted-on filesystem
908 */
909 dvp = untraverse(cs->vp);
910
911 /*
912 * Set the different_export flag so we remember
913 * to pick up a new exportinfo entry for
914 * this new filesystem.
915 */
916 different_export = 1;
917 } else {
918
919 /*
920 * If dotdotting above an export point then set
921 * the different_export to get new export info.
922 */
923 different_export = nfs_exported(cs->exi, cs->vp);
924 }
925 }
926
927 /*
928 * Get the vnode for the component "nm".
929 */
930 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
931 NULL, NULL, NULL);
932 if (error)
933 return (puterrno4(error));
934
935 /*
936 * If the vnode is in a pseudo filesystem, or if the security flavor
937 * used in the request is valid but not an explicitly shared flavor,
938 * or the access bit indicates that this is a limited access,
939 * check whether this vnode is visible.
940 */
941 if (!different_export &&
942 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
943 cs->access & CS_ACCESS_LIMITED)) {
944 if (! nfs_visible(cs->exi, vp, &different_export)) {
945 VN_RELE(vp);
946 return (puterrno4(ENOENT));
947 }
948 }
949
950 /*
951 * If it's a mountpoint, then traverse it.
952 */
953 if (vn_ismntpt(vp)) {
954 if ((error = traverse(&vp)) != 0) {
955 VN_RELE(vp);
956 return (puterrno4(error));
957 }
958 /* remember that we had to traverse mountpoint */
959 did_traverse = TRUE;
960 different_export = 1;
961 } else if (vp->v_vfsp != dvp->v_vfsp) {
962 /*
963 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
964 * then vp is probably an LOFS object. We don't need the
965 * realvp, we just need to know that we might have crossed
966 * a server fs boundary and need to call checkexport4.
967 * (LOFS lookup hides server fs mountpoints, and actually calls
968 * traverse)
969 */
970 different_export = 1;
971 }
972
973 /*
974 * Get the export information for it.
975 */
976 if (different_export) {
977
978 bzero(&fid, sizeof (fid));
979 fid.fid_len = MAXFIDSZ;
980 error = vop_fid_pseudo(vp, &fid);
981 if (error) {
982 VN_RELE(vp);
983 return (puterrno4(error));
984 }
985
986 if (dotdot)
987 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
988 else
989 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
990
991 if (exi == NULL) {
992 if (did_traverse == TRUE) {
993 /*
994 * If this vnode is a mounted-on vnode,
995 * but the mounted-on file system is not
996 * exported, send back the secinfo for
997 * the exported node that the mounted-on
998 * vnode lives in.
999 */
1000 exi = cs->exi;
1001 } else {
1002 VN_RELE(vp);
1003 return (puterrno4(EACCES));
1004 }
1005 }
1006 } else {
1007 exi = cs->exi;
1008 }
1009 ASSERT(exi != NULL);
1010
1011
1012 /*
1013 * Create the secinfo result based on the security information
1014 * from the exportinfo structure (exi).
1015 *
1016 * Return all flavors for a pseudo node.
1017 * For a real export node, return the flavor that the client
1018 * has access with.
1019 */
1020 ASSERT(RW_LOCK_HELD(&exported_lock));
1021 if (PSEUDO(exi)) {
1022 count = exi->exi_export.ex_seccnt; /* total sec count */
1023 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1024 secp = exi->exi_export.ex_secinfo;
1025
1026 for (i = 0; i < count; i++) {
1027 si = &secp[i].s_secinfo;
1028 resok_val[i].flavor = si->sc_rpcnum;
1029 if (resok_val[i].flavor == RPCSEC_GSS) {
1030 rpcsec_gss_info *info;
1031
1032 info = &resok_val[i].flavor_info;
1033 info->qop = si->sc_qop;
1034 info->service = (rpc_gss_svc_t)si->sc_service;
1035
1036 /* get oid opaque data */
1037 info->oid.sec_oid4_len =
1038 si->sc_gss_mech_type->length;
1039 info->oid.sec_oid4_val = kmem_alloc(
1040 si->sc_gss_mech_type->length, KM_SLEEP);
1041 bcopy(
1042 si->sc_gss_mech_type->elements,
1043 info->oid.sec_oid4_val,
1044 info->oid.sec_oid4_len);
1045 }
1046 }
1047 resp->SECINFO4resok_len = count;
1048 resp->SECINFO4resok_val = resok_val;
1049 } else {
1050 int ret_cnt = 0, k = 0;
1051 int *flavor_list;
1052
1053 count = exi->exi_export.ex_seccnt; /* total sec count */
1054 secp = exi->exi_export.ex_secinfo;
1055
1056 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1057 /* find out which flavors to return */
1058 for (i = 0; i < count; i ++) {
1059 int access, flavor, perm;
1060
1061 flavor = secp[i].s_secinfo.sc_nfsnum;
1062 perm = secp[i].s_flags;
1063
1064 access = nfsauth4_secinfo_access(exi, cs->req,
1065 flavor, perm, cs->basecr);
1066
1067 if (! (access & NFSAUTH_DENIED) &&
1068 ! (access & NFSAUTH_WRONGSEC)) {
1069 flavor_list[ret_cnt] = flavor;
1070 ret_cnt++;
1071 }
1072 }
1073
1074 /* Create the returning SECINFO value */
1075 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1076
1077 for (i = 0; i < count; i++) {
1078 /*
1079 * If the flavor is in the flavor list,
1080 * fill in resok_val.
1081 */
1082 si = &secp[i].s_secinfo;
1083 if (in_flavor_list(si->sc_nfsnum,
1084 flavor_list, ret_cnt)) {
1085 resok_val[k].flavor = si->sc_rpcnum;
1086 if (resok_val[k].flavor == RPCSEC_GSS) {
1087 rpcsec_gss_info *info;
1088
1089 info = &resok_val[k].flavor_info;
1090 info->qop = si->sc_qop;
1091 info->service = (rpc_gss_svc_t)
1092 si->sc_service;
1093
1094 /* get oid opaque data */
1095 info->oid.sec_oid4_len =
1096 si->sc_gss_mech_type->length;
1097 info->oid.sec_oid4_val = kmem_alloc(
1098 si->sc_gss_mech_type->length,
1099 KM_SLEEP);
1100 bcopy(si->sc_gss_mech_type->elements,
1101 info->oid.sec_oid4_val,
1102 info->oid.sec_oid4_len);
1103 }
1104 k++;
1105 }
1106 if (k >= ret_cnt)
1107 break;
1108 }
1109 resp->SECINFO4resok_len = ret_cnt;
1110 resp->SECINFO4resok_val = resok_val;
1111 kmem_free(flavor_list, count * sizeof (int));
1112 }
1113
1114 VN_RELE(vp);
1115 return (NFS4_OK);
1116 }
1117
1118 /*
1119 * SECINFO (Operation 33): Obtain required security information on
1120 * the component name in the format of (security-mechanism-oid, qop, service)
1121 * triplets.
1122 */
1123 /* ARGSUSED */
1124 static void
1125 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1126 struct compound_state *cs)
1127 {
1128 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1129 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1130 utf8string *utfnm = &args->name;
1131 uint_t len;
1132 char *nm;
1133 struct sockaddr *ca;
1134 char *name = NULL;
1135 nfsstat4 status = NFS4_OK;
1136
1137 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1138 SECINFO4args *, args);
1139
1140 /*
1141 * Current file handle (cfh) should have been set before getting
1142 * into this function. If not, return error.
1143 */
1144 if (cs->vp == NULL) {
1145 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1146 goto out;
1147 }
1148
1149 if (cs->vp->v_type != VDIR) {
1150 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1151 goto out;
1152 }
1153
1154 /*
1155 * Verify the component name. If failed, error out, but
1156 * do not error out if the component name is a "..".
1157 * SECINFO will return its parents secinfo data for SECINFO "..".
1158 */
1159 status = utf8_dir_verify(utfnm);
1160 if (status != NFS4_OK) {
1161 if (utfnm->utf8string_len != 2 ||
1162 utfnm->utf8string_val[0] != '.' ||
1163 utfnm->utf8string_val[1] != '.') {
1164 *cs->statusp = resp->status = status;
1165 goto out;
1166 }
1167 }
1168
1169 nm = utf8_to_str(utfnm, &len, NULL);
1170 if (nm == NULL) {
1171 *cs->statusp = resp->status = NFS4ERR_INVAL;
1172 goto out;
1173 }
1174
1175 if (len > MAXNAMELEN) {
1176 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1177 kmem_free(nm, len);
1178 goto out;
1179 }
1180
1181 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1182 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1183 MAXPATHLEN + 1);
1184
1185 if (name == NULL) {
1186 *cs->statusp = resp->status = NFS4ERR_INVAL;
1187 kmem_free(nm, len);
1188 goto out;
1189 }
1190
1191
1192 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1193
1194 if (name != nm)
1195 kmem_free(name, MAXPATHLEN + 1);
1196 kmem_free(nm, len);
1197
1198 out:
1199 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1200 SECINFO4res *, resp);
1201 }
1202
1203 /*
1204 * Free SECINFO result.
1205 */
1206 /* ARGSUSED */
1207 static void
1208 rfs4_op_secinfo_free(nfs_resop4 *resop)
1209 {
1210 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1211 int count, i;
1212 secinfo4 *resok_val;
1213
1214 /* If this is not an Ok result, nothing to free. */
1215 if (resp->status != NFS4_OK) {
1216 return;
1217 }
1218
1219 count = resp->SECINFO4resok_len;
1220 resok_val = resp->SECINFO4resok_val;
1221
1222 for (i = 0; i < count; i++) {
1223 if (resok_val[i].flavor == RPCSEC_GSS) {
1224 rpcsec_gss_info *info;
1225
1226 info = &resok_val[i].flavor_info;
1227 kmem_free(info->oid.sec_oid4_val,
1228 info->oid.sec_oid4_len);
1229 }
1230 }
1231 kmem_free(resok_val, count * sizeof (secinfo4));
1232 resp->SECINFO4resok_len = 0;
1233 resp->SECINFO4resok_val = NULL;
1234 }
1235
1236 /* ARGSUSED */
1237 static void
1238 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1239 struct compound_state *cs)
1240 {
1241 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1242 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1243 int error;
1244 vnode_t *vp;
1245 struct vattr va;
1246 int checkwriteperm;
1247 cred_t *cr = cs->cr;
1248 bslabel_t *clabel, *slabel;
1249 ts_label_t *tslabel;
1250 boolean_t admin_low_client;
1251
1252 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1253 ACCESS4args *, args);
1254
1255 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1256 if (cs->access == CS_ACCESS_DENIED) {
1257 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1258 goto out;
1259 }
1260 #endif
1261 if (cs->vp == NULL) {
1262 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1263 goto out;
1264 }
1265
1266 ASSERT(cr != NULL);
1267
1268 vp = cs->vp;
1269
1270 /*
1271 * If the file system is exported read only, it is not appropriate
1272 * to check write permissions for regular files and directories.
1273 * Special files are interpreted by the client, so the underlying
1274 * permissions are sent back to the client for interpretation.
1275 */
1276 if (rdonly4(req, cs) &&
1277 (vp->v_type == VREG || vp->v_type == VDIR))
1278 checkwriteperm = 0;
1279 else
1280 checkwriteperm = 1;
1281
1282 /*
1283 * XXX
1284 * We need the mode so that we can correctly determine access
1285 * permissions relative to a mandatory lock file. Access to
1286 * mandatory lock files is denied on the server, so it might
1287 * as well be reflected to the server during the open.
1288 */
1289 va.va_mask = AT_MODE;
1290 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1291 if (error) {
1292 *cs->statusp = resp->status = puterrno4(error);
1293 goto out;
1294 }
1295 resp->access = 0;
1296 resp->supported = 0;
1297
1298 if (is_system_labeled()) {
1299 ASSERT(req->rq_label != NULL);
1300 clabel = req->rq_label;
1301 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1302 "got client label from request(1)",
1303 struct svc_req *, req);
1304 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1305 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1306 *cs->statusp = resp->status = puterrno4(EACCES);
1307 goto out;
1308 }
1309 slabel = label2bslabel(tslabel);
1310 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1311 char *, "got server label(1) for vp(2)",
1312 bslabel_t *, slabel, vnode_t *, vp);
1313
1314 admin_low_client = B_FALSE;
1315 } else
1316 admin_low_client = B_TRUE;
1317 }
1318
1319 if (args->access & ACCESS4_READ) {
1320 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1321 if (!error && !MANDLOCK(vp, va.va_mode) &&
1322 (!is_system_labeled() || admin_low_client ||
1323 bldominates(clabel, slabel)))
1324 resp->access |= ACCESS4_READ;
1325 resp->supported |= ACCESS4_READ;
1326 }
1327 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1328 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1329 if (!error && (!is_system_labeled() || admin_low_client ||
1330 bldominates(clabel, slabel)))
1331 resp->access |= ACCESS4_LOOKUP;
1332 resp->supported |= ACCESS4_LOOKUP;
1333 }
1334 if (checkwriteperm &&
1335 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1336 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1337 if (!error && !MANDLOCK(vp, va.va_mode) &&
1338 (!is_system_labeled() || admin_low_client ||
1339 blequal(clabel, slabel)))
1340 resp->access |=
1341 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1342 resp->supported |=
1343 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1344 }
1345
1346 if (checkwriteperm &&
1347 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1348 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1349 if (!error && (!is_system_labeled() || admin_low_client ||
1350 blequal(clabel, slabel)))
1351 resp->access |= ACCESS4_DELETE;
1352 resp->supported |= ACCESS4_DELETE;
1353 }
1354 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1355 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1356 if (!error && !MANDLOCK(vp, va.va_mode) &&
1357 (!is_system_labeled() || admin_low_client ||
1358 bldominates(clabel, slabel)))
1359 resp->access |= ACCESS4_EXECUTE;
1360 resp->supported |= ACCESS4_EXECUTE;
1361 }
1362
1363 if (is_system_labeled() && !admin_low_client)
1364 label_rele(tslabel);
1365
1366 *cs->statusp = resp->status = NFS4_OK;
1367 out:
1368 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1369 ACCESS4res *, resp);
1370 }
1371
1372 /* ARGSUSED */
1373 static void
1374 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1375 struct compound_state *cs)
1376 {
1377 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1378 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1379 int error;
1380 vnode_t *vp = cs->vp;
1381 cred_t *cr = cs->cr;
1382 vattr_t va;
1383
1384 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1385 COMMIT4args *, args);
1386
1387 if (vp == NULL) {
1388 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1389 goto out;
1390 }
1391 if (cs->access == CS_ACCESS_DENIED) {
1392 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1393 goto out;
1394 }
1395
1396 if (args->offset + args->count < args->offset) {
1397 *cs->statusp = resp->status = NFS4ERR_INVAL;
1398 goto out;
1399 }
1400
1401 va.va_mask = AT_UID;
1402 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1403
1404 /*
1405 * If we can't get the attributes, then we can't do the
1406 * right access checking. So, we'll fail the request.
1407 */
1408 if (error) {
1409 *cs->statusp = resp->status = puterrno4(error);
1410 goto out;
1411 }
1412 if (rdonly4(req, cs)) {
1413 *cs->statusp = resp->status = NFS4ERR_ROFS;
1414 goto out;
1415 }
1416
1417 if (vp->v_type != VREG) {
1418 if (vp->v_type == VDIR)
1419 resp->status = NFS4ERR_ISDIR;
1420 else
1421 resp->status = NFS4ERR_INVAL;
1422 *cs->statusp = resp->status;
1423 goto out;
1424 }
1425
1426 if (crgetuid(cr) != va.va_uid &&
1427 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1428 *cs->statusp = resp->status = puterrno4(error);
1429 goto out;
1430 }
1431
1432 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1433
1434 if (error) {
1435 *cs->statusp = resp->status = puterrno4(error);
1436 goto out;
1437 }
1438
1439 *cs->statusp = resp->status = NFS4_OK;
1440 resp->writeverf = Write4verf;
1441 out:
1442 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1443 COMMIT4res *, resp);
1444 }
1445
1446 /*
1447 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1448 * was completed. It does the nfsv4 create for special files.
1449 */
1450 /* ARGSUSED */
1451 static vnode_t *
1452 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1453 struct compound_state *cs, vattr_t *vap, char *nm)
1454 {
1455 int error;
1456 cred_t *cr = cs->cr;
1457 vnode_t *dvp = cs->vp;
1458 vnode_t *vp = NULL;
1459 int mode;
1460 enum vcexcl excl;
1461
1462 switch (args->type) {
1463 case NF4CHR:
1464 case NF4BLK:
1465 if (secpolicy_sys_devices(cr) != 0) {
1466 *cs->statusp = resp->status = NFS4ERR_PERM;
1467 return (NULL);
1468 }
1469 if (args->type == NF4CHR)
1470 vap->va_type = VCHR;
1471 else
1472 vap->va_type = VBLK;
1473 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1474 args->ftype4_u.devdata.specdata2);
1475 vap->va_mask |= AT_RDEV;
1476 break;
1477 case NF4SOCK:
1478 vap->va_type = VSOCK;
1479 break;
1480 case NF4FIFO:
1481 vap->va_type = VFIFO;
1482 break;
1483 default:
1484 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1485 return (NULL);
1486 }
1487
1488 /*
1489 * Must specify the mode.
1490 */
1491 if (!(vap->va_mask & AT_MODE)) {
1492 *cs->statusp = resp->status = NFS4ERR_INVAL;
1493 return (NULL);
1494 }
1495
1496 excl = EXCL;
1497
1498 mode = 0;
1499
1500 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1501 if (error) {
1502 *cs->statusp = resp->status = puterrno4(error);
1503 return (NULL);
1504 }
1505 return (vp);
1506 }
1507
1508 /*
1509 * nfsv4 create is used to create non-regular files. For regular files,
1510 * use nfsv4 open.
1511 */
1512 /* ARGSUSED */
1513 static void
1514 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1515 struct compound_state *cs)
1516 {
1517 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1518 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1519 int error;
1520 struct vattr bva, iva, iva2, ava, *vap;
1521 cred_t *cr = cs->cr;
1522 vnode_t *dvp = cs->vp;
1523 vnode_t *vp = NULL;
1524 vnode_t *realvp;
1525 char *nm, *lnm;
1526 uint_t len, llen;
1527 int syncval = 0;
1528 struct nfs4_svgetit_arg sarg;
1529 struct nfs4_ntov_table ntov;
1530 struct statvfs64 sb;
1531 nfsstat4 status;
1532 struct sockaddr *ca;
1533 char *name = NULL;
1534 char *lname = NULL;
1535
1536 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1537 CREATE4args *, args);
1538
1539 resp->attrset = 0;
1540
1541 if (dvp == NULL) {
1542 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1543 goto out;
1544 }
1545
1546 /*
1547 * If there is an unshared filesystem mounted on this vnode,
1548 * do not allow to create an object in this directory.
1549 */
1550 if (vn_ismntpt(dvp)) {
1551 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1552 goto out;
1553 }
1554
1555 /* Verify that type is correct */
1556 switch (args->type) {
1557 case NF4LNK:
1558 case NF4BLK:
1559 case NF4CHR:
1560 case NF4SOCK:
1561 case NF4FIFO:
1562 case NF4DIR:
1563 break;
1564 default:
1565 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1566 goto out;
1567 };
1568
1569 if (cs->access == CS_ACCESS_DENIED) {
1570 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1571 goto out;
1572 }
1573 if (dvp->v_type != VDIR) {
1574 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1575 goto out;
1576 }
1577 status = utf8_dir_verify(&args->objname);
1578 if (status != NFS4_OK) {
1579 *cs->statusp = resp->status = status;
1580 goto out;
1581 }
1582
1583 if (rdonly4(req, cs)) {
1584 *cs->statusp = resp->status = NFS4ERR_ROFS;
1585 goto out;
1586 }
1587
1588 /*
1589 * Name of newly created object
1590 */
1591 nm = utf8_to_fn(&args->objname, &len, NULL);
1592 if (nm == NULL) {
1593 *cs->statusp = resp->status = NFS4ERR_INVAL;
1594 goto out;
1595 }
1596
1597 if (len > MAXNAMELEN) {
1598 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1599 kmem_free(nm, len);
1600 goto out;
1601 }
1602
1603 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1604 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1605 MAXPATHLEN + 1);
1606
1607 if (name == NULL) {
1608 *cs->statusp = resp->status = NFS4ERR_INVAL;
1609 kmem_free(nm, len);
1610 goto out;
1611 }
1612
1613 resp->attrset = 0;
1614
1615 sarg.sbp = &sb;
1616 sarg.is_referral = B_FALSE;
1617 nfs4_ntov_table_init(&ntov);
1618
1619 status = do_rfs4_set_attrs(&resp->attrset,
1620 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1621
1622 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1623 status = NFS4ERR_INVAL;
1624
1625 if (status != NFS4_OK) {
1626 *cs->statusp = resp->status = status;
1627 if (name != nm)
1628 kmem_free(name, MAXPATHLEN + 1);
1629 kmem_free(nm, len);
1630 nfs4_ntov_table_free(&ntov, &sarg);
1631 resp->attrset = 0;
1632 goto out;
1633 }
1634
1635 /* Get "before" change value */
1636 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1637 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1638 if (error) {
1639 *cs->statusp = resp->status = puterrno4(error);
1640 if (name != nm)
1641 kmem_free(name, MAXPATHLEN + 1);
1642 kmem_free(nm, len);
1643 nfs4_ntov_table_free(&ntov, &sarg);
1644 resp->attrset = 0;
1645 goto out;
1646 }
1647 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1648
1649 vap = sarg.vap;
1650
1651 /*
1652 * Set the default initial values for attributes when the parent
1653 * directory does not have the VSUID/VSGID bit set and they have
1654 * not been specified in createattrs.
1655 */
1656 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1657 vap->va_uid = crgetuid(cr);
1658 vap->va_mask |= AT_UID;
1659 }
1660 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1661 vap->va_gid = crgetgid(cr);
1662 vap->va_mask |= AT_GID;
1663 }
1664
1665 vap->va_mask |= AT_TYPE;
1666 switch (args->type) {
1667 case NF4DIR:
1668 vap->va_type = VDIR;
1669 if ((vap->va_mask & AT_MODE) == 0) {
1670 vap->va_mode = 0700; /* default: owner rwx only */
1671 vap->va_mask |= AT_MODE;
1672 }
1673 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1674 if (error)
1675 break;
1676
1677 /*
1678 * Get the initial "after" sequence number, if it fails,
1679 * set to zero
1680 */
1681 iva.va_mask = AT_SEQ;
1682 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1683 iva.va_seq = 0;
1684 break;
1685 case NF4LNK:
1686 vap->va_type = VLNK;
1687 if ((vap->va_mask & AT_MODE) == 0) {
1688 vap->va_mode = 0700; /* default: owner rwx only */
1689 vap->va_mask |= AT_MODE;
1690 }
1691
1692 /*
1693 * symlink names must be treated as data
1694 */
1695 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1696 &llen, NULL);
1697
1698 if (lnm == NULL) {
1699 *cs->statusp = resp->status = NFS4ERR_INVAL;
1700 if (name != nm)
1701 kmem_free(name, MAXPATHLEN + 1);
1702 kmem_free(nm, len);
1703 nfs4_ntov_table_free(&ntov, &sarg);
1704 resp->attrset = 0;
1705 goto out;
1706 }
1707
1708 if (llen > MAXPATHLEN) {
1709 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1710 if (name != nm)
1711 kmem_free(name, MAXPATHLEN + 1);
1712 kmem_free(nm, len);
1713 kmem_free(lnm, llen);
1714 nfs4_ntov_table_free(&ntov, &sarg);
1715 resp->attrset = 0;
1716 goto out;
1717 }
1718
1719 lname = nfscmd_convname(ca, cs->exi, lnm,
1720 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1721
1722 if (lname == NULL) {
1723 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1724 if (name != nm)
1725 kmem_free(name, MAXPATHLEN + 1);
1726 kmem_free(nm, len);
1727 kmem_free(lnm, llen);
1728 nfs4_ntov_table_free(&ntov, &sarg);
1729 resp->attrset = 0;
1730 goto out;
1731 }
1732
1733 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1734 if (lname != lnm)
1735 kmem_free(lname, MAXPATHLEN + 1);
1736 kmem_free(lnm, llen);
1737 if (error)
1738 break;
1739
1740 /*
1741 * Get the initial "after" sequence number, if it fails,
1742 * set to zero
1743 */
1744 iva.va_mask = AT_SEQ;
1745 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1746 iva.va_seq = 0;
1747
1748 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1749 NULL, NULL, NULL);
1750 if (error)
1751 break;
1752
1753 /*
1754 * va_seq is not safe over VOP calls, check it again
1755 * if it has changed zero out iva to force atomic = FALSE.
1756 */
1757 iva2.va_mask = AT_SEQ;
1758 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1759 iva2.va_seq != iva.va_seq)
1760 iva.va_seq = 0;
1761 break;
1762 default:
1763 /*
1764 * probably a special file.
1765 */
1766 if ((vap->va_mask & AT_MODE) == 0) {
1767 vap->va_mode = 0600; /* default: owner rw only */
1768 vap->va_mask |= AT_MODE;
1769 }
1770 syncval = FNODSYNC;
1771 /*
1772 * We know this will only generate one VOP call
1773 */
1774 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1775
1776 if (vp == NULL) {
1777 if (name != nm)
1778 kmem_free(name, MAXPATHLEN + 1);
1779 kmem_free(nm, len);
1780 nfs4_ntov_table_free(&ntov, &sarg);
1781 resp->attrset = 0;
1782 goto out;
1783 }
1784
1785 /*
1786 * Get the initial "after" sequence number, if it fails,
1787 * set to zero
1788 */
1789 iva.va_mask = AT_SEQ;
1790 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1791 iva.va_seq = 0;
1792
1793 break;
1794 }
1795 if (name != nm)
1796 kmem_free(name, MAXPATHLEN + 1);
1797 kmem_free(nm, len);
1798
1799 if (error) {
1800 *cs->statusp = resp->status = puterrno4(error);
1801 }
1802
1803 /*
1804 * Force modified data and metadata out to stable storage.
1805 */
1806 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1807
1808 if (resp->status != NFS4_OK) {
1809 if (vp != NULL)
1810 VN_RELE(vp);
1811 nfs4_ntov_table_free(&ntov, &sarg);
1812 resp->attrset = 0;
1813 goto out;
1814 }
1815
1816 /*
1817 * Finish setup of cinfo response, "before" value already set.
1818 * Get "after" change value, if it fails, simply return the
1819 * before value.
1820 */
1821 ava.va_mask = AT_CTIME|AT_SEQ;
1822 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1823 ava.va_ctime = bva.va_ctime;
1824 ava.va_seq = 0;
1825 }
1826 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1827
1828 /*
1829 * True verification that object was created with correct
1830 * attrs is impossible. The attrs could have been changed
1831 * immediately after object creation. If attributes did
1832 * not verify, the only recourse for the server is to
1833 * destroy the object. Maybe if some attrs (like gid)
1834 * are set incorrectly, the object should be destroyed;
1835 * however, seems bad as a default policy. Do we really
1836 * want to destroy an object over one of the times not
1837 * verifying correctly? For these reasons, the server
1838 * currently sets bits in attrset for createattrs
1839 * that were set; however, no verification is done.
1840 *
1841 * vmask_to_nmask accounts for vattr bits set on create
1842 * [do_rfs4_set_attrs() only sets resp bits for
1843 * non-vattr/vfs bits.]
1844 * Mask off any bits set by default so as not to return
1845 * more attrset bits than were requested in createattrs
1846 */
1847 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1848 resp->attrset &= args->createattrs.attrmask;
1849 nfs4_ntov_table_free(&ntov, &sarg);
1850
1851 error = makefh4(&cs->fh, vp, cs->exi);
1852 if (error) {
1853 *cs->statusp = resp->status = puterrno4(error);
1854 }
1855
1856 /*
1857 * The cinfo.atomic = TRUE only if we got no errors, we have
1858 * non-zero va_seq's, and it has incremented by exactly one
1859 * during the creation and it didn't change during the VOP_LOOKUP
1860 * or VOP_FSYNC.
1861 */
1862 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1863 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1864 resp->cinfo.atomic = TRUE;
1865 else
1866 resp->cinfo.atomic = FALSE;
1867
1868 /*
1869 * Force modified metadata out to stable storage.
1870 *
1871 * if a underlying vp exists, pass it to VOP_FSYNC
1872 */
1873 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1874 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1875 else
1876 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1877
1878 if (resp->status != NFS4_OK) {
1879 VN_RELE(vp);
1880 goto out;
1881 }
1882 if (cs->vp)
1883 VN_RELE(cs->vp);
1884
1885 cs->vp = vp;
1886 *cs->statusp = resp->status = NFS4_OK;
1887 out:
1888 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1889 CREATE4res *, resp);
1890 }
1891
1892 /*ARGSUSED*/
1893 static void
1894 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1895 struct compound_state *cs)
1896 {
1897 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1898 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1899
1900 rfs4_op_inval(argop, resop, req, cs);
1901
1902 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1903 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1904 }
1905
1906 /*ARGSUSED*/
1907 static void
1908 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1909 struct compound_state *cs)
1910 {
1911 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1912 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1913 rfs4_deleg_state_t *dsp;
1914 nfsstat4 status;
1915
1916 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1917 DELEGRETURN4args *, args);
1918
1919 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1920 resp->status = *cs->statusp = status;
1921 if (status != NFS4_OK)
1922 goto out;
1923
1924 /* Ensure specified filehandle matches */
1925 if (cs->vp != dsp->rds_finfo->rf_vp) {
1926 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1927 } else
1928 rfs4_return_deleg(dsp, FALSE);
1929
1930 rfs4_update_lease(dsp->rds_client);
1931
1932 rfs4_deleg_state_rele(dsp);
1933 out:
1934 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1935 DELEGRETURN4res *, resp);
1936 }
1937
1938 /*
1939 * Check to see if a given "flavor" is an explicitly shared flavor.
1940 * The assumption of this routine is the "flavor" is already a valid
1941 * flavor in the secinfo list of "exi".
1942 *
1943 * e.g.
1944 * # share -o sec=flavor1 /export
1945 * # share -o sec=flavor2 /export/home
1946 *
1947 * flavor2 is not an explicitly shared flavor for /export,
1948 * however it is in the secinfo list for /export thru the
1949 * server namespace setup.
1950 */
1951 int
1952 is_exported_sec(int flavor, struct exportinfo *exi)
1953 {
1954 int i;
1955 struct secinfo *sp;
1956
1957 sp = exi->exi_export.ex_secinfo;
1958 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1959 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1960 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1961 return (SEC_REF_EXPORTED(&sp[i]));
1962 }
1963 }
1964
1965 /* Should not reach this point based on the assumption */
1966 return (0);
1967 }
1968
1969 /*
1970 * Check if the security flavor used in the request matches what is
1971 * required at the export point or at the root pseudo node (exi_root).
1972 *
1973 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1974 *
1975 */
1976 static int
1977 secinfo_match_or_authnone(struct compound_state *cs)
1978 {
1979 int i;
1980 struct secinfo *sp;
1981
1982 /*
1983 * Check cs->nfsflavor (from the request) against
1984 * the current export data in cs->exi.
1985 */
1986 sp = cs->exi->exi_export.ex_secinfo;
1987 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1988 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1989 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1990 return (1);
1991 }
1992
1993 return (0);
1994 }
1995
1996 /*
1997 * Check the access authority for the client and return the correct error.
1998 */
1999 nfsstat4
2000 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2001 {
2002 int authres;
2003
2004 /*
2005 * First, check if the security flavor used in the request
2006 * are among the flavors set in the server namespace.
2007 */
2008 if (!secinfo_match_or_authnone(cs)) {
2009 *cs->statusp = NFS4ERR_WRONGSEC;
2010 return (*cs->statusp);
2011 }
2012
2013 authres = checkauth4(cs, req);
2014
2015 if (authres > 0) {
2016 *cs->statusp = NFS4_OK;
2017 if (! (cs->access & CS_ACCESS_LIMITED))
2018 cs->access = CS_ACCESS_OK;
2019 } else if (authres == 0) {
2020 *cs->statusp = NFS4ERR_ACCESS;
2021 } else if (authres == -2) {
2022 *cs->statusp = NFS4ERR_WRONGSEC;
2023 } else {
2024 *cs->statusp = NFS4ERR_DELAY;
2025 }
2026 return (*cs->statusp);
2027 }
2028
2029 /*
2030 * bitmap4_to_attrmask is called by getattr and readdir.
2031 * It sets up the vattr mask and determines whether vfsstat call is needed
2032 * based on the input bitmap.
2033 * Returns nfsv4 status.
2034 */
2035 static nfsstat4
2036 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2037 {
2038 int i;
2039 uint_t va_mask;
2040 struct statvfs64 *sbp = sargp->sbp;
2041
2042 sargp->sbp = NULL;
2043 sargp->flag = 0;
2044 sargp->rdattr_error = NFS4_OK;
2045 sargp->mntdfid_set = FALSE;
2046 if (sargp->cs->vp)
2047 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2048 FH4_ATTRDIR | FH4_NAMEDATTR);
2049 else
2050 sargp->xattr = 0;
2051
2052 /*
2053 * Set rdattr_error_req to true if return error per
2054 * failed entry rather than fail the readdir.
2055 */
2056 if (breq & FATTR4_RDATTR_ERROR_MASK)
2057 sargp->rdattr_error_req = 1;
2058 else
2059 sargp->rdattr_error_req = 0;
2060
2061 /*
2062 * generate the va_mask
2063 * Handle the easy cases first
2064 */
2065 switch (breq) {
2066 case NFS4_NTOV_ATTR_MASK:
2067 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2068 return (NFS4_OK);
2069
2070 case NFS4_FS_ATTR_MASK:
2071 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2072 sargp->sbp = sbp;
2073 return (NFS4_OK);
2074
2075 case NFS4_NTOV_ATTR_CACHE_MASK:
2076 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2077 return (NFS4_OK);
2078
2079 case FATTR4_LEASE_TIME_MASK:
2080 sargp->vap->va_mask = 0;
2081 return (NFS4_OK);
2082
2083 default:
2084 va_mask = 0;
2085 for (i = 0; i < nfs4_ntov_map_size; i++) {
2086 if ((breq & nfs4_ntov_map[i].fbit) &&
2087 nfs4_ntov_map[i].vbit)
2088 va_mask |= nfs4_ntov_map[i].vbit;
2089 }
2090
2091 /*
2092 * Check is vfsstat is needed
2093 */
2094 if (breq & NFS4_FS_ATTR_MASK)
2095 sargp->sbp = sbp;
2096
2097 sargp->vap->va_mask = va_mask;
2098 return (NFS4_OK);
2099 }
2100 /* NOTREACHED */
2101 }
2102
2103 /*
2104 * bitmap4_get_sysattrs is called by getattr and readdir.
2105 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2106 * Returns nfsv4 status.
2107 */
2108 static nfsstat4
2109 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2110 {
2111 int error;
2112 struct compound_state *cs = sargp->cs;
2113 vnode_t *vp = cs->vp;
2114
2115 if (sargp->sbp != NULL) {
2116 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2117 sargp->sbp = NULL; /* to identify error */
2118 return (puterrno4(error));
2119 }
2120 }
2121
2122 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2123 }
2124
2125 static void
2126 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2127 {
2128 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2129 KM_SLEEP);
2130 ntovp->attrcnt = 0;
2131 ntovp->vfsstat = FALSE;
2132 }
2133
2134 static void
2135 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2136 struct nfs4_svgetit_arg *sargp)
2137 {
2138 int i;
2139 union nfs4_attr_u *na;
2140 uint8_t *amap;
2141
2142 /*
2143 * XXX Should do the same checks for whether the bit is set
2144 */
2145 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2146 i < ntovp->attrcnt; i++, na++, amap++) {
2147 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2148 NFS4ATTR_FREEIT, sargp, na);
2149 }
2150 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2151 /*
2152 * xdr_free for getattr will be done later
2153 */
2154 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2155 i < ntovp->attrcnt; i++, na++, amap++) {
2156 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2157 }
2158 }
2159 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2160 }
2161
2162 /*
2163 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2164 */
2165 static nfsstat4
2166 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2167 struct nfs4_svgetit_arg *sargp)
2168 {
2169 int error = 0;
2170 int i, k;
2171 struct nfs4_ntov_table ntov;
2172 XDR xdr;
2173 ulong_t xdr_size;
2174 char *xdr_attrs;
2175 nfsstat4 status = NFS4_OK;
2176 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2177 union nfs4_attr_u *na;
2178 uint8_t *amap;
2179
2180 sargp->op = NFS4ATTR_GETIT;
2181 sargp->flag = 0;
2182
2183 fattrp->attrmask = 0;
2184 /* if no bits requested, then return empty fattr4 */
2185 if (breq == 0) {
2186 fattrp->attrlist4_len = 0;
2187 fattrp->attrlist4 = NULL;
2188 return (NFS4_OK);
2189 }
2190
2191 /*
2192 * return NFS4ERR_INVAL when client requests write-only attrs
2193 */
2194 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2195 return (NFS4ERR_INVAL);
2196
2197 nfs4_ntov_table_init(&ntov);
2198 na = ntov.na;
2199 amap = ntov.amap;
2200
2201 /*
2202 * Now loop to get or verify the attrs
2203 */
2204 for (i = 0; i < nfs4_ntov_map_size; i++) {
2205 if (breq & nfs4_ntov_map[i].fbit) {
2206 if ((*nfs4_ntov_map[i].sv_getit)(
2207 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2208
2209 error = (*nfs4_ntov_map[i].sv_getit)(
2210 NFS4ATTR_GETIT, sargp, na);
2211
2212 /*
2213 * Possible error values:
2214 * >0 if sv_getit failed to
2215 * get the attr; 0 if succeeded;
2216 * <0 if rdattr_error and the
2217 * attribute cannot be returned.
2218 */
2219 if (error && !(sargp->rdattr_error_req))
2220 goto done;
2221 /*
2222 * If error then just for entry
2223 */
2224 if (error == 0) {
2225 fattrp->attrmask |=
2226 nfs4_ntov_map[i].fbit;
2227 *amap++ =
2228 (uint8_t)nfs4_ntov_map[i].nval;
2229 na++;
2230 (ntov.attrcnt)++;
2231 } else if ((error > 0) &&
2232 (sargp->rdattr_error == NFS4_OK)) {
2233 sargp->rdattr_error = puterrno4(error);
2234 }
2235 error = 0;
2236 }
2237 }
2238 }
2239
2240 /*
2241 * If rdattr_error was set after the return value for it was assigned,
2242 * update it.
2243 */
2244 if (prev_rdattr_error != sargp->rdattr_error) {
2245 na = ntov.na;
2246 amap = ntov.amap;
2247 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2248 k = *amap;
2249 if (k < FATTR4_RDATTR_ERROR) {
2250 continue;
2251 }
2252 if ((k == FATTR4_RDATTR_ERROR) &&
2253 ((*nfs4_ntov_map[k].sv_getit)(
2254 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2255
2256 (void) (*nfs4_ntov_map[k].sv_getit)(
2257 NFS4ATTR_GETIT, sargp, na);
2258 }
2259 break;
2260 }
2261 }
2262
2263 xdr_size = 0;
2264 na = ntov.na;
2265 amap = ntov.amap;
2266 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2267 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2268 }
2269
2270 fattrp->attrlist4_len = xdr_size;
2271 if (xdr_size) {
2272 /* freed by rfs4_op_getattr_free() */
2273 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2274
2275 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2276
2277 na = ntov.na;
2278 amap = ntov.amap;
2279 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2280 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2281 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2282 int, *amap);
2283 status = NFS4ERR_SERVERFAULT;
2284 break;
2285 }
2286 }
2287 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2288 } else {
2289 fattrp->attrlist4 = NULL;
2290 }
2291 done:
2292
2293 nfs4_ntov_table_free(&ntov, sargp);
2294
2295 if (error != 0)
2296 status = puterrno4(error);
2297
2298 return (status);
2299 }
2300
2301 /* ARGSUSED */
2302 static void
2303 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2304 struct compound_state *cs)
2305 {
2306 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2307 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2308 struct nfs4_svgetit_arg sarg;
2309 struct statvfs64 sb;
2310 nfsstat4 status;
2311
2312 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2313 GETATTR4args *, args);
2314
2315 if (cs->vp == NULL) {
2316 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2317 goto out;
2318 }
2319
2320 if (cs->access == CS_ACCESS_DENIED) {
2321 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2322 goto out;
2323 }
2324
2325 sarg.sbp = &sb;
2326 sarg.cs = cs;
2327 sarg.is_referral = B_FALSE;
2328
2329 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2330 if (status == NFS4_OK) {
2331
2332 status = bitmap4_get_sysattrs(&sarg);
2333 if (status == NFS4_OK) {
2334
2335 /* Is this a referral? */
2336 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2337 /* Older V4 Solaris client sees a link */
2338 if (client_is_downrev(req))
2339 sarg.vap->va_type = VLNK;
2340 else
2341 sarg.is_referral = B_TRUE;
2342 }
2343
2344 status = do_rfs4_op_getattr(args->attr_request,
2345 &resp->obj_attributes, &sarg);
2346 }
2347 }
2348 *cs->statusp = resp->status = status;
2349 out:
2350 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2351 GETATTR4res *, resp);
2352 }
2353
2354 static void
2355 rfs4_op_getattr_free(nfs_resop4 *resop)
2356 {
2357 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2358
2359 nfs4_fattr4_free(&resp->obj_attributes);
2360 }
2361
2362 /* ARGSUSED */
2363 static void
2364 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2365 struct compound_state *cs)
2366 {
2367 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2368
2369 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2370
2371 if (cs->vp == NULL) {
2372 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2373 goto out;
2374 }
2375 if (cs->access == CS_ACCESS_DENIED) {
2376 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2377 goto out;
2378 }
2379
2380 /* check for reparse point at the share point */
2381 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2382 /* it's all bad */
2383 cs->exi->exi_moved = 1;
2384 *cs->statusp = resp->status = NFS4ERR_MOVED;
2385 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2386 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2387 return;
2388 }
2389
2390 /* check for reparse point at vp */
2391 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2392 /* it's not all bad */
2393 *cs->statusp = resp->status = NFS4ERR_MOVED;
2394 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2395 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2396 return;
2397 }
2398
2399 resp->object.nfs_fh4_val =
2400 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2401 nfs_fh4_copy(&cs->fh, &resp->object);
2402 *cs->statusp = resp->status = NFS4_OK;
2403 out:
2404 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2405 GETFH4res *, resp);
2406 }
2407
2408 static void
2409 rfs4_op_getfh_free(nfs_resop4 *resop)
2410 {
2411 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2412
2413 if (resp->status == NFS4_OK &&
2414 resp->object.nfs_fh4_val != NULL) {
2415 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2416 resp->object.nfs_fh4_val = NULL;
2417 resp->object.nfs_fh4_len = 0;
2418 }
2419 }
2420
2421 /*
2422 * illegal: args: void
2423 * res : status (NFS4ERR_OP_ILLEGAL)
2424 */
2425 /* ARGSUSED */
2426 static void
2427 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2428 struct svc_req *req, struct compound_state *cs)
2429 {
2430 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2431
2432 resop->resop = OP_ILLEGAL;
2433 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2434 }
2435
2436 /*
2437 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2438 * res: status. If success - CURRENT_FH unchanged, return change_info
2439 */
2440 /* ARGSUSED */
2441 static void
2442 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2443 struct compound_state *cs)
2444 {
2445 LINK4args *args = &argop->nfs_argop4_u.oplink;
2446 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2447 int error;
2448 vnode_t *vp;
2449 vnode_t *dvp;
2450 struct vattr bdva, idva, adva;
2451 char *nm;
2452 uint_t len;
2453 struct sockaddr *ca;
2454 char *name = NULL;
2455 nfsstat4 status;
2456
2457 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2458 LINK4args *, args);
2459
2460 /* SAVED_FH: source object */
2461 vp = cs->saved_vp;
2462 if (vp == NULL) {
2463 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2464 goto out;
2465 }
2466
2467 /* CURRENT_FH: target directory */
2468 dvp = cs->vp;
2469 if (dvp == NULL) {
2470 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2471 goto out;
2472 }
2473
2474 /*
2475 * If there is a non-shared filesystem mounted on this vnode,
2476 * do not allow to link any file in this directory.
2477 */
2478 if (vn_ismntpt(dvp)) {
2479 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2480 goto out;
2481 }
2482
2483 if (cs->access == CS_ACCESS_DENIED) {
2484 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2485 goto out;
2486 }
2487
2488 /* Check source object's type validity */
2489 if (vp->v_type == VDIR) {
2490 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2491 goto out;
2492 }
2493
2494 /* Check target directory's type */
2495 if (dvp->v_type != VDIR) {
2496 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2497 goto out;
2498 }
2499
2500 if (cs->saved_exi != cs->exi) {
2501 *cs->statusp = resp->status = NFS4ERR_XDEV;
2502 goto out;
2503 }
2504
2505 status = utf8_dir_verify(&args->newname);
2506 if (status != NFS4_OK) {
2507 *cs->statusp = resp->status = status;
2508 goto out;
2509 }
2510
2511 nm = utf8_to_fn(&args->newname, &len, NULL);
2512 if (nm == NULL) {
2513 *cs->statusp = resp->status = NFS4ERR_INVAL;
2514 goto out;
2515 }
2516
2517 if (len > MAXNAMELEN) {
2518 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2519 kmem_free(nm, len);
2520 goto out;
2521 }
2522
2523 if (rdonly4(req, cs)) {
2524 *cs->statusp = resp->status = NFS4ERR_ROFS;
2525 kmem_free(nm, len);
2526 goto out;
2527 }
2528
2529 /* Get "before" change value */
2530 bdva.va_mask = AT_CTIME|AT_SEQ;
2531 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2532 if (error) {
2533 *cs->statusp = resp->status = puterrno4(error);
2534 kmem_free(nm, len);
2535 goto out;
2536 }
2537
2538 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2539 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2540 MAXPATHLEN + 1);
2541
2542 if (name == NULL) {
2543 *cs->statusp = resp->status = NFS4ERR_INVAL;
2544 kmem_free(nm, len);
2545 goto out;
2546 }
2547
2548 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2549
2550 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2551
2552 if (nm != name)
2553 kmem_free(name, MAXPATHLEN + 1);
2554 kmem_free(nm, len);
2555
2556 /*
2557 * Get the initial "after" sequence number, if it fails, set to zero
2558 */
2559 idva.va_mask = AT_SEQ;
2560 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2561 idva.va_seq = 0;
2562
2563 /*
2564 * Force modified data and metadata out to stable storage.
2565 */
2566 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2567 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2568
2569 if (error) {
2570 *cs->statusp = resp->status = puterrno4(error);
2571 goto out;
2572 }
2573
2574 /*
2575 * Get "after" change value, if it fails, simply return the
2576 * before value.
2577 */
2578 adva.va_mask = AT_CTIME|AT_SEQ;
2579 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2580 adva.va_ctime = bdva.va_ctime;
2581 adva.va_seq = 0;
2582 }
2583
2584 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2585
2586 /*
2587 * The cinfo.atomic = TRUE only if we have
2588 * non-zero va_seq's, and it has incremented by exactly one
2589 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2590 */
2591 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2592 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2593 resp->cinfo.atomic = TRUE;
2594 else
2595 resp->cinfo.atomic = FALSE;
2596
2597 *cs->statusp = resp->status = NFS4_OK;
2598 out:
2599 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2600 LINK4res *, resp);
2601 }
2602
2603 /*
2604 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2605 */
2606
2607 /* ARGSUSED */
2608 static nfsstat4
2609 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2610 {
2611 int error;
2612 int different_export = 0;
2613 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2614 struct exportinfo *exi = NULL, *pre_exi = NULL;
2615 nfsstat4 stat;
2616 fid_t fid;
2617 int attrdir, dotdot, walk;
2618 bool_t is_newvp = FALSE;
2619
2620 if (cs->vp->v_flag & V_XATTRDIR) {
2621 attrdir = 1;
2622 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2623 } else {
2624 attrdir = 0;
2625 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2626 }
2627
2628 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2629
2630 /*
2631 * If dotdotting, then need to check whether it's
2632 * above the root of a filesystem, or above an
2633 * export point.
2634 */
2635 if (dotdot) {
2636
2637 /*
2638 * If dotdotting at the root of a filesystem, then
2639 * need to traverse back to the mounted-on filesystem
2640 * and do the dotdot lookup there.
2641 */
2642 if (cs->vp->v_flag & VROOT) {
2643
2644 /*
2645 * If at the system root, then can
2646 * go up no further.
2647 */
2648 if (VN_CMP(cs->vp, rootdir))
2649 return (puterrno4(ENOENT));
2650
2651 /*
2652 * Traverse back to the mounted-on filesystem
2653 */
2654 cs->vp = untraverse(cs->vp);
2655
2656 /*
2657 * Set the different_export flag so we remember
2658 * to pick up a new exportinfo entry for
2659 * this new filesystem.
2660 */
2661 different_export = 1;
2662 } else {
2663
2664 /*
2665 * If dotdotting above an export point then set
2666 * the different_export to get new export info.
2667 */
2668 different_export = nfs_exported(cs->exi, cs->vp);
2669 }
2670 }
2671
2672 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2673 NULL, NULL, NULL);
2674 if (error)
2675 return (puterrno4(error));
2676
2677 /*
2678 * If the vnode is in a pseudo filesystem, check whether it is visible.
2679 *
2680 * XXX if the vnode is a symlink and it is not visible in
2681 * a pseudo filesystem, return ENOENT (not following symlink).
2682 * V4 client can not mount such symlink. This is a regression
2683 * from V2/V3.
2684 *
2685 * In the same exported filesystem, if the security flavor used
2686 * is not an explicitly shared flavor, limit the view to the visible
2687 * list entries only. This is not a WRONGSEC case because it's already
2688 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2689 */
2690 if (!different_export &&
2691 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2692 cs->access & CS_ACCESS_LIMITED)) {
2693 if (! nfs_visible(cs->exi, vp, &different_export)) {
2694 VN_RELE(vp);
2695 return (puterrno4(ENOENT));
2696 }
2697 }
2698
2699 /*
2700 * If it's a mountpoint, then traverse it.
2701 */
2702 if (vn_ismntpt(vp)) {
2703 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2704 pre_tvp = vp; /* save pre-traversed vnode */
2705
2706 /*
2707 * hold pre_tvp to counteract rele by traverse. We will
2708 * need pre_tvp below if checkexport4 fails
2709 */
2710 VN_HOLD(pre_tvp);
2711 if ((error = traverse(&vp)) != 0) {
2712 VN_RELE(vp);
2713 VN_RELE(pre_tvp);
2714 return (puterrno4(error));
2715 }
2716 different_export = 1;
2717 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2718 /*
2719 * The vfsp comparison is to handle the case where
2720 * a LOFS mount is shared. lo_lookup traverses mount points,
2721 * and NFS is unaware of local fs transistions because
2722 * v_vfsmountedhere isn't set. For this special LOFS case,
2723 * the dir and the obj returned by lookup will have different
2724 * vfs ptrs.
2725 */
2726 different_export = 1;
2727 }
2728
2729 if (different_export) {
2730
2731 bzero(&fid, sizeof (fid));
2732 fid.fid_len = MAXFIDSZ;
2733 error = vop_fid_pseudo(vp, &fid);
2734 if (error) {
2735 VN_RELE(vp);
2736 if (pre_tvp)
2737 VN_RELE(pre_tvp);
2738 return (puterrno4(error));
2739 }
2740
2741 if (dotdot)
2742 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2743 else
2744 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2745
2746 if (exi == NULL) {
2747 if (pre_tvp) {
2748 /*
2749 * If this vnode is a mounted-on vnode,
2750 * but the mounted-on file system is not
2751 * exported, send back the filehandle for
2752 * the mounted-on vnode, not the root of
2753 * the mounted-on file system.
2754 */
2755 VN_RELE(vp);
2756 vp = pre_tvp;
2757 exi = pre_exi;
2758 } else {
2759 VN_RELE(vp);
2760 return (puterrno4(EACCES));
2761 }
2762 } else if (pre_tvp) {
2763 /* we're done with pre_tvp now. release extra hold */
2764 VN_RELE(pre_tvp);
2765 }
2766
2767 cs->exi = exi;
2768
2769 /*
2770 * Now we do a checkauth4. The reason is that
2771 * this client/user may not have access to the new
2772 * exported file system, and if he does,
2773 * the client/user may be mapped to a different uid.
2774 *
2775 * We start with a new cr, because the checkauth4 done
2776 * in the PUT*FH operation over wrote the cred's uid,
2777 * gid, etc, and we want the real thing before calling
2778 * checkauth4()
2779 */
2780 crfree(cs->cr);
2781 cs->cr = crdup(cs->basecr);
2782
2783 oldvp = cs->vp;
2784 cs->vp = vp;
2785 is_newvp = TRUE;
2786
2787 stat = call_checkauth4(cs, req);
2788 if (stat != NFS4_OK) {
2789 VN_RELE(cs->vp);
2790 cs->vp = oldvp;
2791 return (stat);
2792 }
2793 }
2794
2795 /*
2796 * After various NFS checks, do a label check on the path
2797 * component. The label on this path should either be the
2798 * global zone's label or a zone's label. We are only
2799 * interested in the zone's label because exported files
2800 * in global zone is accessible (though read-only) to
2801 * clients. The exportability/visibility check is already
2802 * done before reaching this code.
2803 */
2804 if (is_system_labeled()) {
2805 bslabel_t *clabel;
2806
2807 ASSERT(req->rq_label != NULL);
2808 clabel = req->rq_label;
2809 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2810 "got client label from request(1)", struct svc_req *, req);
2811
2812 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2813 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2814 cs->exi)) {
2815 error = EACCES;
2816 goto err_out;
2817 }
2818 } else {
2819 /*
2820 * We grant access to admin_low label clients
2821 * only if the client is trusted, i.e. also
2822 * running Solaris Trusted Extension.
2823 */
2824 struct sockaddr *ca;
2825 int addr_type;
2826 void *ipaddr;
2827 tsol_tpc_t *tp;
2828
2829 ca = (struct sockaddr *)svc_getrpccaller(
2830 req->rq_xprt)->buf;
2831 if (ca->sa_family == AF_INET) {
2832 addr_type = IPV4_VERSION;
2833 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2834 } else if (ca->sa_family == AF_INET6) {
2835 addr_type = IPV6_VERSION;
2836 ipaddr = &((struct sockaddr_in6 *)
2837 ca)->sin6_addr;
2838 }
2839 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2840 if (tp == NULL || tp->tpc_tp.tp_doi !=
2841 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2842 SUN_CIPSO) {
2843 if (tp != NULL)
2844 TPC_RELE(tp);
2845 error = EACCES;
2846 goto err_out;
2847 }
2848 TPC_RELE(tp);
2849 }
2850 }
2851
2852 error = makefh4(&cs->fh, vp, cs->exi);
2853
2854 err_out:
2855 if (error) {
2856 if (is_newvp) {
2857 VN_RELE(cs->vp);
2858 cs->vp = oldvp;
2859 } else
2860 VN_RELE(vp);
2861 return (puterrno4(error));
2862 }
2863
2864 if (!is_newvp) {
2865 if (cs->vp)
2866 VN_RELE(cs->vp);
2867 cs->vp = vp;
2868 } else if (oldvp)
2869 VN_RELE(oldvp);
2870
2871 /*
2872 * if did lookup on attrdir and didn't lookup .., set named
2873 * attr fh flag
2874 */
2875 if (attrdir && ! dotdot)
2876 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2877
2878 /* Assume false for now, open proc will set this */
2879 cs->mandlock = FALSE;
2880
2881 return (NFS4_OK);
2882 }
2883
2884 /* ARGSUSED */
2885 static void
2886 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2887 struct compound_state *cs)
2888 {
2889 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2890 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2891 char *nm;
2892 uint_t len;
2893 struct sockaddr *ca;
2894 char *name = NULL;
2895 nfsstat4 status;
2896
2897 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2898 LOOKUP4args *, args);
2899
2900 if (cs->vp == NULL) {
2901 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2902 goto out;
2903 }
2904
2905 if (cs->vp->v_type == VLNK) {
2906 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2907 goto out;
2908 }
2909
2910 if (cs->vp->v_type != VDIR) {
2911 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2912 goto out;
2913 }
2914
2915 status = utf8_dir_verify(&args->objname);
2916 if (status != NFS4_OK) {
2917 *cs->statusp = resp->status = status;
2918 goto out;
2919 }
2920
2921 nm = utf8_to_str(&args->objname, &len, NULL);
2922 if (nm == NULL) {
2923 *cs->statusp = resp->status = NFS4ERR_INVAL;
2924 goto out;
2925 }
2926
2927 if (len > MAXNAMELEN) {
2928 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2929 kmem_free(nm, len);
2930 goto out;
2931 }
2932
2933 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2934 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2935 MAXPATHLEN + 1);
2936
2937 if (name == NULL) {
2938 *cs->statusp = resp->status = NFS4ERR_INVAL;
2939 kmem_free(nm, len);
2940 goto out;
2941 }
2942
2943 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2944
2945 if (name != nm)
2946 kmem_free(name, MAXPATHLEN + 1);
2947 kmem_free(nm, len);
2948
2949 out:
2950 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2951 LOOKUP4res *, resp);
2952 }
2953
2954 /* ARGSUSED */
2955 static void
2956 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2957 struct compound_state *cs)
2958 {
2959 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2960
2961 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2962
2963 if (cs->vp == NULL) {
2964 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2965 goto out;
2966 }
2967
2968 if (cs->vp->v_type != VDIR) {
2969 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2970 goto out;
2971 }
2972
2973 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2974
2975 /*
2976 * From NFSV4 Specification, LOOKUPP should not check for
2977 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2978 */
2979 if (resp->status == NFS4ERR_WRONGSEC) {
2980 *cs->statusp = resp->status = NFS4_OK;
2981 }
2982
2983 out:
2984 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2985 LOOKUPP4res *, resp);
2986 }
2987
2988
2989 /*ARGSUSED2*/
2990 static void
2991 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2992 struct compound_state *cs)
2993 {
2994 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
2995 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
2996 vnode_t *avp = NULL;
2997 int lookup_flags = LOOKUP_XATTR, error;
2998 int exp_ro = 0;
2999
3000 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3001 OPENATTR4args *, args);
3002
3003 if (cs->vp == NULL) {
3004 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3005 goto out;
3006 }
3007
3008 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3009 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3010 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3011 goto out;
3012 }
3013
3014 /*
3015 * If file system supports passing ACE mask to VOP_ACCESS then
3016 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3017 */
3018
3019 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3020 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3021 V_ACE_MASK, cs->cr, NULL);
3022 else
3023 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3024 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3025 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3026
3027 if (error) {
3028 *cs->statusp = resp->status = puterrno4(EACCES);
3029 goto out;
3030 }
3031
3032 /*
3033 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3034 * the file system is exported read-only -- regardless of
3035 * createdir flag. Otherwise the attrdir would be created
3036 * (assuming server fs isn't mounted readonly locally). If
3037 * VOP_LOOKUP returns ENOENT in this case, the error will
3038 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3039 * because specfs has no VOP_LOOKUP op, so the macro would
3040 * return ENOSYS. EINVAL is returned by all (current)
3041 * Solaris file system implementations when any of their
3042 * restrictions are violated (xattr(dir) can't have xattrdir).
3043 * Returning NOTSUPP is more appropriate in this case
3044 * because the object will never be able to have an attrdir.
3045 */
3046 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3047 lookup_flags |= CREATE_XATTR_DIR;
3048
3049 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3050 NULL, NULL, NULL);
3051
3052 if (error) {
3053 if (error == ENOENT && args->createdir && exp_ro)
3054 *cs->statusp = resp->status = puterrno4(EROFS);
3055 else if (error == EINVAL || error == ENOSYS)
3056 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3057 else
3058 *cs->statusp = resp->status = puterrno4(error);
3059 goto out;
3060 }
3061
3062 ASSERT(avp->v_flag & V_XATTRDIR);
3063
3064 error = makefh4(&cs->fh, avp, cs->exi);
3065
3066 if (error) {
3067 VN_RELE(avp);
3068 *cs->statusp = resp->status = puterrno4(error);
3069 goto out;
3070 }
3071
3072 VN_RELE(cs->vp);
3073 cs->vp = avp;
3074
3075 /*
3076 * There is no requirement for an attrdir fh flag
3077 * because the attrdir has a vnode flag to distinguish
3078 * it from regular (non-xattr) directories. The
3079 * FH4_ATTRDIR flag is set for future sanity checks.
3080 */
3081 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3082 *cs->statusp = resp->status = NFS4_OK;
3083
3084 out:
3085 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3086 OPENATTR4res *, resp);
3087 }
3088
3089 static int
3090 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3091 caller_context_t *ct)
3092 {
3093 int error;
3094 int i;
3095 clock_t delaytime;
3096
3097 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3098
3099 /*
3100 * Don't block on mandatory locks. If this routine returns
3101 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3102 */
3103 uio->uio_fmode = FNONBLOCK;
3104
3105 for (i = 0; i < rfs4_maxlock_tries; i++) {
3106
3107
3108 if (direction == FREAD) {
3109 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3110 error = VOP_READ(vp, uio, ioflag, cred, ct);
3111 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3112 } else {
3113 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3114 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3115 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3116 }
3117
3118 if (error != EAGAIN)
3119 break;
3120
3121 if (i < rfs4_maxlock_tries - 1) {
3122 delay(delaytime);
3123 delaytime *= 2;
3124 }
3125 }
3126
3127 return (error);
3128 }
3129
3130 /* ARGSUSED */
3131 static void
3132 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3133 struct compound_state *cs)
3134 {
3135 READ4args *args = &argop->nfs_argop4_u.opread;
3136 READ4res *resp = &resop->nfs_resop4_u.opread;
3137 int error;
3138 int verror;
3139 vnode_t *vp;
3140 struct vattr va;
3141 struct iovec iov, *iovp = NULL;
3142 int iovcnt;
3143 struct uio uio;
3144 u_offset_t offset;
3145 bool_t *deleg = &cs->deleg;
3146 nfsstat4 stat;
3147 int in_crit = 0;
3148 mblk_t *mp = NULL;
3149 int alloc_err = 0;
3150 int rdma_used = 0;
3151 int loaned_buffers;
3152 caller_context_t ct;
3153 struct uio *uiop;
3154
3155 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3156 READ4args, args);
3157
3158 vp = cs->vp;
3159 if (vp == NULL) {
3160 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3161 goto out;
3162 }
3163 if (cs->access == CS_ACCESS_DENIED) {
3164 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3165 goto out;
3166 }
3167
3168 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3169 deleg, TRUE, &ct)) != NFS4_OK) {
3170 *cs->statusp = resp->status = stat;
3171 goto out;
3172 }
3173
3174 /*
3175 * Enter the critical region before calling VOP_RWLOCK
3176 * to avoid a deadlock with write requests.
3177 */
3178 if (nbl_need_check(vp)) {
3179 nbl_start_crit(vp, RW_READER);
3180 in_crit = 1;
3181 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3182 &ct)) {
3183 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3184 goto out;
3185 }
3186 }
3187
3188 if (args->wlist) {
3189 if (args->count > clist_len(args->wlist)) {
3190 *cs->statusp = resp->status = NFS4ERR_INVAL;
3191 goto out;
3192 }
3193 rdma_used = 1;
3194 }
3195
3196 /* use loaned buffers for TCP */
3197 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3198
3199 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3200 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3201
3202 /*
3203 * If we can't get the attributes, then we can't do the
3204 * right access checking. So, we'll fail the request.
3205 */
3206 if (verror) {
3207 *cs->statusp = resp->status = puterrno4(verror);
3208 goto out;
3209 }
3210
3211 if (vp->v_type != VREG) {
3212 *cs->statusp = resp->status =
3213 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3214 goto out;
3215 }
3216
3217 if (crgetuid(cs->cr) != va.va_uid &&
3218 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3219 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3220 *cs->statusp = resp->status = puterrno4(error);
3221 goto out;
3222 }
3223
3224 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3225 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3226 goto out;
3227 }
3228
3229 offset = args->offset;
3230 if (offset >= va.va_size) {
3231 *cs->statusp = resp->status = NFS4_OK;
3232 resp->eof = TRUE;
3233 resp->data_len = 0;
3234 resp->data_val = NULL;
3235 resp->mblk = NULL;
3236 /* RDMA */
3237 resp->wlist = args->wlist;
3238 resp->wlist_len = resp->data_len;
3239 *cs->statusp = resp->status = NFS4_OK;
3240 if (resp->wlist)
3241 clist_zero_len(resp->wlist);
3242 goto out;
3243 }
3244
3245 if (args->count == 0) {
3246 *cs->statusp = resp->status = NFS4_OK;
3247 resp->eof = FALSE;
3248 resp->data_len = 0;
3249 resp->data_val = NULL;
3250 resp->mblk = NULL;
3251 /* RDMA */
3252 resp->wlist = args->wlist;
3253 resp->wlist_len = resp->data_len;
3254 if (resp->wlist)
3255 clist_zero_len(resp->wlist);
3256 goto out;
3257 }
3258
3259 /*
3260 * Do not allocate memory more than maximum allowed
3261 * transfer size
3262 */
3263 if (args->count > rfs4_tsize(req))
3264 args->count = rfs4_tsize(req);
3265
3266 if (loaned_buffers) {
3267 uiop = (uio_t *)rfs_setup_xuio(vp);
3268 ASSERT(uiop != NULL);
3269 uiop->uio_segflg = UIO_SYSSPACE;
3270 uiop->uio_loffset = args->offset;
3271 uiop->uio_resid = args->count;
3272
3273 /* Jump to do the read if successful */
3274 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3275 /*
3276 * Need to hold the vnode until after VOP_RETZCBUF()
3277 * is called.
3278 */
3279 VN_HOLD(vp);
3280 goto doio_read;
3281 }
3282
3283 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3284 uiop->uio_loffset, int, uiop->uio_resid);
3285
3286 uiop->uio_extflg = 0;
3287
3288 /* failure to setup for zero copy */
3289 rfs_free_xuio((void *)uiop);
3290 loaned_buffers = 0;
3291 }
3292
3293 /*
3294 * If returning data via RDMA Write, then grab the chunk list. If we
3295 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3296 */
3297 if (rdma_used) {
3298 mp = NULL;
3299 (void) rdma_get_wchunk(req, &iov, args->wlist);
3300 uio.uio_iov = &iov;
3301 uio.uio_iovcnt = 1;
3302 } else {
3303 /*
3304 * mp will contain the data to be sent out in the read reply.
3305 * It will be freed after the reply has been sent.
3306 */
3307 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3308 ASSERT(mp != NULL);
3309 ASSERT(alloc_err == 0);
3310 uio.uio_iov = iovp;
3311 uio.uio_iovcnt = iovcnt;
3312 }
3313
3314 uio.uio_segflg = UIO_SYSSPACE;
3315 uio.uio_extflg = UIO_COPY_CACHED;
3316 uio.uio_loffset = args->offset;
3317 uio.uio_resid = args->count;
3318 uiop = &uio;
3319
3320 doio_read:
3321 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3322
3323 va.va_mask = AT_SIZE;
3324 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3325
3326 if (error) {
3327 if (mp)
3328 freemsg(mp);
3329 *cs->statusp = resp->status = puterrno4(error);
3330 goto out;
3331 }
3332
3333 /* make mblk using zc buffers */
3334 if (loaned_buffers) {
3335 mp = uio_to_mblk(uiop);
3336 ASSERT(mp != NULL);
3337 }
3338
3339 *cs->statusp = resp->status = NFS4_OK;
3340
3341 ASSERT(uiop->uio_resid >= 0);
3342 resp->data_len = args->count - uiop->uio_resid;
3343 if (mp) {
3344 resp->data_val = (char *)mp->b_datap->db_base;
3345 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3346 } else {
3347 resp->data_val = (caddr_t)iov.iov_base;
3348 }
3349
3350 resp->mblk = mp;
3351
3352 if (!verror && offset + resp->data_len == va.va_size)
3353 resp->eof = TRUE;
3354 else
3355 resp->eof = FALSE;
3356
3357 if (rdma_used) {
3358 if (!rdma_setup_read_data4(args, resp)) {
3359 *cs->statusp = resp->status = NFS4ERR_INVAL;
3360 }
3361 } else {
3362 resp->wlist = NULL;
3363 }
3364
3365 out:
3366 if (in_crit)
3367 nbl_end_crit(vp);
3368
3369 if (iovp != NULL)
3370 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3371
3372 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3373 READ4res *, resp);
3374 }
3375
3376 static void
3377 rfs4_op_read_free(nfs_resop4 *resop)
3378 {
3379 READ4res *resp = &resop->nfs_resop4_u.opread;
3380
3381 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3382 freemsg(resp->mblk);
3383 resp->mblk = NULL;
3384 resp->data_val = NULL;
3385 resp->data_len = 0;
3386 }
3387 }
3388
3389 static void
3390 rfs4_op_readdir_free(nfs_resop4 * resop)
3391 {
3392 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3393
3394 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3395 freeb(resp->mblk);
3396 resp->mblk = NULL;
3397 resp->data_len = 0;
3398 }
3399 }
3400
3401
3402 /* ARGSUSED */
3403 static void
3404 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3405 struct compound_state *cs)
3406 {
3407 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3408 int error;
3409 vnode_t *vp;
3410 struct exportinfo *exi, *sav_exi;
3411 nfs_fh4_fmt_t *fh_fmtp;
3412
3413 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3414
3415 if (cs->vp) {
3416 VN_RELE(cs->vp);
3417 cs->vp = NULL;
3418 }
3419
3420 if (cs->cr)
3421 crfree(cs->cr);
3422
3423 cs->cr = crdup(cs->basecr);
3424
3425 vp = exi_public->exi_vp;
3426 if (vp == NULL) {
3427 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3428 goto out;
3429 }
3430
3431 error = makefh4(&cs->fh, vp, exi_public);
3432 if (error != 0) {
3433 *cs->statusp = resp->status = puterrno4(error);
3434 goto out;
3435 }
3436 sav_exi = cs->exi;
3437 if (exi_public == exi_root) {
3438 /*
3439 * No filesystem is actually shared public, so we default
3440 * to exi_root. In this case, we must check whether root
3441 * is exported.
3442 */
3443 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3444
3445 /*
3446 * if root filesystem is exported, the exportinfo struct that we
3447 * should use is what checkexport4 returns, because root_exi is
3448 * actually a mostly empty struct.
3449 */
3450 exi = checkexport4(&fh_fmtp->fh4_fsid,
3451 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3452 cs->exi = ((exi != NULL) ? exi : exi_public);
3453 } else {
3454 /*
3455 * it's a properly shared filesystem
3456 */
3457 cs->exi = exi_public;
3458 }
3459
3460 if (is_system_labeled()) {
3461 bslabel_t *clabel;
3462
3463 ASSERT(req->rq_label != NULL);
3464 clabel = req->rq_label;
3465 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3466 "got client label from request(1)",
3467 struct svc_req *, req);
3468 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3469 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3470 cs->exi)) {
3471 *cs->statusp = resp->status =
3472 NFS4ERR_SERVERFAULT;
3473 goto out;
3474 }
3475 }
3476 }
3477
3478 VN_HOLD(vp);
3479 cs->vp = vp;
3480
3481 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3482 VN_RELE(cs->vp);
3483 cs->vp = NULL;
3484 cs->exi = sav_exi;
3485 goto out;
3486 }
3487
3488 *cs->statusp = resp->status = NFS4_OK;
3489 out:
3490 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3491 PUTPUBFH4res *, resp);
3492 }
3493
3494 /*
3495 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3496 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3497 * or joe have restrictive search permissions, then we shouldn't let
3498 * the client get a file handle. This is easy to enforce. However, we
3499 * don't know what security flavor should be used until we resolve the
3500 * path name. Another complication is uid mapping. If root is
3501 * the user, then it will be mapped to the anonymous user by default,
3502 * but we won't know that till we've resolved the path name. And we won't
3503 * know what the anonymous user is.
3504 * Luckily, SECINFO is specified to take a full filename.
3505 * So what we will have to in rfs4_op_lookup is check that flavor of
3506 * the target object matches that of the request, and if root was the
3507 * caller, check for the root= and anon= options, and if necessary,
3508 * repeat the lookup using the right cred_t. But that's not done yet.
3509 */
3510 /* ARGSUSED */
3511 static void
3512 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3513 struct compound_state *cs)
3514 {
3515 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3516 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3517 nfs_fh4_fmt_t *fh_fmtp;
3518
3519 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3520 PUTFH4args *, args);
3521
3522 if (cs->vp) {
3523 VN_RELE(cs->vp);
3524 cs->vp = NULL;
3525 }
3526
3527 if (cs->cr) {
3528 crfree(cs->cr);
3529 cs->cr = NULL;
3530 }
3531
3532
3533 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3534 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3535 goto out;
3536 }
3537
3538 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3539 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3540 NULL);
3541
3542 if (cs->exi == NULL) {
3543 *cs->statusp = resp->status = NFS4ERR_STALE;
3544 goto out;
3545 }
3546
3547 cs->cr = crdup(cs->basecr);
3548
3549 ASSERT(cs->cr != NULL);
3550
3551 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3552 *cs->statusp = resp->status;
3553 goto out;
3554 }
3555
3556 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3557 VN_RELE(cs->vp);
3558 cs->vp = NULL;
3559 goto out;
3560 }
3561
3562 nfs_fh4_copy(&args->object, &cs->fh);
3563 *cs->statusp = resp->status = NFS4_OK;
3564 cs->deleg = FALSE;
3565
3566 out:
3567 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3568 PUTFH4res *, resp);
3569 }
3570
3571 /* ARGSUSED */
3572 static void
3573 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3574 struct compound_state *cs)
3575 {
3576 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3577 int error;
3578 fid_t fid;
3579 struct exportinfo *exi, *sav_exi;
3580
3581 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3582
3583 if (cs->vp) {
3584 VN_RELE(cs->vp);
3585 cs->vp = NULL;
3586 }
3587
3588 if (cs->cr)
3589 crfree(cs->cr);
3590
3591 cs->cr = crdup(cs->basecr);
3592
3593 /*
3594 * Using rootdir, the system root vnode,
3595 * get its fid.
3596 */
3597 bzero(&fid, sizeof (fid));
3598 fid.fid_len = MAXFIDSZ;
3599 error = vop_fid_pseudo(rootdir, &fid);
3600 if (error != 0) {
3601 *cs->statusp = resp->status = puterrno4(error);
3602 goto out;
3603 }
3604
3605 /*
3606 * Then use the root fsid & fid it to find out if it's exported
3607 *
3608 * If the server root isn't exported directly, then
3609 * it should at least be a pseudo export based on
3610 * one or more exports further down in the server's
3611 * file tree.
3612 */
3613 exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3614 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3615 NFS4_DEBUG(rfs4_debug,
3616 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3617 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3618 goto out;
3619 }
3620
3621 /*
3622 * Now make a filehandle based on the root
3623 * export and root vnode.
3624 */
3625 error = makefh4(&cs->fh, rootdir, exi);
3626 if (error != 0) {
3627 *cs->statusp = resp->status = puterrno4(error);
3628 goto out;
3629 }
3630
3631 sav_exi = cs->exi;
3632 cs->exi = exi;
3633
3634 VN_HOLD(rootdir);
3635 cs->vp = rootdir;
3636
3637 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3638 VN_RELE(rootdir);
3639 cs->vp = NULL;
3640 cs->exi = sav_exi;
3641 goto out;
3642 }
3643
3644 *cs->statusp = resp->status = NFS4_OK;
3645 cs->deleg = FALSE;
3646 out:
3647 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3648 PUTROOTFH4res *, resp);
3649 }
3650
3651 /*
3652 * set_rdattr_params sets up the variables used to manage what information
3653 * to get for each directory entry.
3654 */
3655 static nfsstat4
3656 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3657 bitmap4 attrs, bool_t *need_to_lookup)
3658 {
3659 uint_t va_mask;
3660 nfsstat4 status;
3661 bitmap4 objbits;
3662
3663 status = bitmap4_to_attrmask(attrs, sargp);
3664 if (status != NFS4_OK) {
3665 /*
3666 * could not even figure attr mask
3667 */
3668 return (status);
3669 }
3670 va_mask = sargp->vap->va_mask;
3671
3672 /*
3673 * dirent's d_ino is always correct value for mounted_on_fileid.
3674 * mntdfid_set is set once here, but mounted_on_fileid is
3675 * set in main dirent processing loop for each dirent.
3676 * The mntdfid_set is a simple optimization that lets the
3677 * server attr code avoid work when caller is readdir.
3678 */
3679 sargp->mntdfid_set = TRUE;
3680
3681 /*
3682 * Lookup entry only if client asked for any of the following:
3683 * a) vattr attrs
3684 * b) vfs attrs
3685 * c) attrs w/per-object scope requested (change, filehandle, etc)
3686 * other than mounted_on_fileid (which we can take from dirent)
3687 */
3688 objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3689
3690 if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3691 *need_to_lookup = TRUE;
3692 else
3693 *need_to_lookup = FALSE;
3694
3695 if (sargp->sbp == NULL)
3696 return (NFS4_OK);
3697
3698 /*
3699 * If filesystem attrs are requested, get them now from the
3700 * directory vp, as most entries will have same filesystem. The only
3701 * exception are mounted over entries but we handle
3702 * those as we go (XXX mounted over detection not yet implemented).
3703 */
3704 sargp->vap->va_mask = 0; /* to avoid VOP_GETATTR */
3705 status = bitmap4_get_sysattrs(sargp);
3706 sargp->vap->va_mask = va_mask;
3707
3708 if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3709 /*
3710 * Failed to get filesystem attributes.
3711 * Return a rdattr_error for each entry, but don't fail.
3712 * However, don't get any obj-dependent attrs.
3713 */
3714 sargp->rdattr_error = status; /* for rdattr_error */
3715 *need_to_lookup = FALSE;
3716 /*
3717 * At least get fileid for regular readdir output
3718 */
3719 sargp->vap->va_mask &= AT_NODEID;
3720 status = NFS4_OK;
3721 }
3722
3723 return (status);
3724 }
3725
3726 /*
3727 * readlink: args: CURRENT_FH.
3728 * res: status. If success - CURRENT_FH unchanged, return linktext.
3729 */
3730
3731 /* ARGSUSED */
3732 static void
3733 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3734 struct compound_state *cs)
3735 {
3736 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3737 int error;
3738 vnode_t *vp;
3739 struct iovec iov;
3740 struct vattr va;
3741 struct uio uio;
3742 char *data;
3743 struct sockaddr *ca;
3744 char *name = NULL;
3745 int is_referral;
3746
3747 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3748
3749 /* CURRENT_FH: directory */
3750 vp = cs->vp;
3751 if (vp == NULL) {
3752 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3753 goto out;
3754 }
3755
3756 if (cs->access == CS_ACCESS_DENIED) {
3757 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3758 goto out;
3759 }
3760
3761 /* Is it a referral? */
3762 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3763
3764 is_referral = 1;
3765
3766 } else {
3767
3768 is_referral = 0;
3769
3770 if (vp->v_type == VDIR) {
3771 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3772 goto out;
3773 }
3774
3775 if (vp->v_type != VLNK) {
3776 *cs->statusp = resp->status = NFS4ERR_INVAL;
3777 goto out;
3778 }
3779
3780 }
3781
3782 va.va_mask = AT_MODE;
3783 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3784 if (error) {
3785 *cs->statusp = resp->status = puterrno4(error);
3786 goto out;
3787 }
3788
3789 if (MANDLOCK(vp, va.va_mode)) {
3790 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3791 goto out;
3792 }
3793
3794 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3795
3796 if (is_referral) {
3797 char *s;
3798 size_t strsz;
3799
3800 /* Get an artificial symlink based on a referral */
3801 s = build_symlink(vp, cs->cr, &strsz);
3802 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3803 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3804 vnode_t *, vp, char *, s);
3805 if (s == NULL)
3806 error = EINVAL;
3807 else {
3808 error = 0;
3809 (void) strlcpy(data, s, MAXPATHLEN + 1);
3810 kmem_free(s, strsz);
3811 }
3812
3813 } else {
3814
3815 iov.iov_base = data;
3816 iov.iov_len = MAXPATHLEN;
3817 uio.uio_iov = &iov;
3818 uio.uio_iovcnt = 1;
3819 uio.uio_segflg = UIO_SYSSPACE;
3820 uio.uio_extflg = UIO_COPY_CACHED;
3821 uio.uio_loffset = 0;
3822 uio.uio_resid = MAXPATHLEN;
3823
3824 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3825
3826 if (!error)
3827 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3828 }
3829
3830 if (error) {
3831 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3832 *cs->statusp = resp->status = puterrno4(error);
3833 goto out;
3834 }
3835
3836 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3837 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3838 MAXPATHLEN + 1);
3839
3840 if (name == NULL) {
3841 /*
3842 * Even though the conversion failed, we return
3843 * something. We just don't translate it.
3844 */
3845 name = data;
3846 }
3847
3848 /*
3849 * treat link name as data
3850 */
3851 (void) str_to_utf8(name, (utf8string *)&resp->link);
3852
3853 if (name != data)
3854 kmem_free(name, MAXPATHLEN + 1);
3855 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3856 *cs->statusp = resp->status = NFS4_OK;
3857
3858 out:
3859 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3860 READLINK4res *, resp);
3861 }
3862
3863 static void
3864 rfs4_op_readlink_free(nfs_resop4 *resop)
3865 {
3866 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3867 utf8string *symlink = (utf8string *)&resp->link;
3868
3869 if (symlink->utf8string_val) {
3870 UTF8STRING_FREE(*symlink)
3871 }
3872 }
3873
3874 /*
3875 * release_lockowner:
3876 * Release any state associated with the supplied
3877 * lockowner. Note if any lo_state is holding locks we will not
3878 * rele that lo_state and thus the lockowner will not be destroyed.
3879 * A client using lock after the lock owner stateid has been released
3880 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3881 * to reissue the lock with new_lock_owner set to TRUE.
3882 * args: lock_owner
3883 * res: status
3884 */
3885 /* ARGSUSED */
3886 static void
3887 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3888 struct svc_req *req, struct compound_state *cs)
3889 {
3890 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3891 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3892 rfs4_lockowner_t *lo;
3893 rfs4_openowner_t *oo;
3894 rfs4_state_t *sp;
3895 rfs4_lo_state_t *lsp;
3896 rfs4_client_t *cp;
3897 bool_t create = FALSE;
3898 locklist_t *llist;
3899 sysid_t sysid;
3900
3901 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3902 cs, RELEASE_LOCKOWNER4args *, ap);
3903
3904 /* Make sure there is a clientid around for this request */
3905 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3906
3907 if (cp == NULL) {
3908 *cs->statusp = resp->status =
3909 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3910 goto out;
3911 }
3912 rfs4_client_rele(cp);
3913
3914 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3915 if (lo == NULL) {
3916 *cs->statusp = resp->status = NFS4_OK;
3917 goto out;
3918 }
3919 ASSERT(lo->rl_client != NULL);
3920
3921 /*
3922 * Check for EXPIRED client. If so will reap state with in a lease
3923 * period or on next set_clientid_confirm step
3924 */
3925 if (rfs4_lease_expired(lo->rl_client)) {
3926 rfs4_lockowner_rele(lo);
3927 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3928 goto out;
3929 }
3930
3931 /*
3932 * If no sysid has been assigned, then no locks exist; just return.
3933 */
3934 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3935 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3936 rfs4_lockowner_rele(lo);
3937 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3938 goto out;
3939 }
3940
3941 sysid = lo->rl_client->rc_sysidt;
3942 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3943
3944 /*
3945 * Mark the lockowner invalid.
3946 */
3947 rfs4_dbe_hide(lo->rl_dbe);
3948
3949 /*
3950 * sysid-pid pair should now not be used since the lockowner is
3951 * invalid. If the client were to instantiate the lockowner again
3952 * it would be assigned a new pid. Thus we can get the list of
3953 * current locks.
3954 */
3955
3956 llist = flk_get_active_locks(sysid, lo->rl_pid);
3957 /* If we are still holding locks fail */
3958 if (llist != NULL) {
3959
3960 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3961
3962 flk_free_locklist(llist);
3963 /*
3964 * We need to unhide the lockowner so the client can
3965 * try it again. The bad thing here is if the client
3966 * has a logic error that took it here in the first place
3967 * he probably has lost accounting of the locks that it
3968 * is holding. So we may have dangling state until the
3969 * open owner state is reaped via close. One scenario
3970 * that could possibly occur is that the client has
3971 * sent the unlock request(s) in separate threads
3972 * and has not waited for the replies before sending the
3973 * RELEASE_LOCKOWNER request. Presumably, it would expect
3974 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3975 * reissuing the request.
3976 */
3977 rfs4_dbe_unhide(lo->rl_dbe);
3978 rfs4_lockowner_rele(lo);
3979 goto out;
3980 }
3981
3982 /*
3983 * For the corresponding client we need to check each open
3984 * owner for any opens that have lockowner state associated
3985 * with this lockowner.
3986 */
3987
3988 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3989 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
3990 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
3991
3992 rfs4_dbe_lock(oo->ro_dbe);
3993 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3994 sp = list_next(&oo->ro_statelist, sp)) {
3995
3996 rfs4_dbe_lock(sp->rs_dbe);
3997 for (lsp = list_head(&sp->rs_lostatelist);
3998 lsp != NULL;
3999 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4000 if (lsp->rls_locker == lo) {
4001 rfs4_dbe_lock(lsp->rls_dbe);
4002 rfs4_dbe_invalidate(lsp->rls_dbe);
4003 rfs4_dbe_unlock(lsp->rls_dbe);
4004 }
4005 }
4006 rfs4_dbe_unlock(sp->rs_dbe);
4007 }
4008 rfs4_dbe_unlock(oo->ro_dbe);
4009 }
4010 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4011
4012 rfs4_lockowner_rele(lo);
4013
4014 *cs->statusp = resp->status = NFS4_OK;
4015
4016 out:
4017 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4018 cs, RELEASE_LOCKOWNER4res *, resp);
4019 }
4020
4021 /*
4022 * short utility function to lookup a file and recall the delegation
4023 */
4024 static rfs4_file_t *
4025 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4026 int *lkup_error, cred_t *cr)
4027 {
4028 vnode_t *vp;
4029 rfs4_file_t *fp = NULL;
4030 bool_t fcreate = FALSE;
4031 int error;
4032
4033 if (vpp)
4034 *vpp = NULL;
4035
4036 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4037 NULL)) == 0) {
4038 if (vp->v_type == VREG)
4039 fp = rfs4_findfile(vp, NULL, &fcreate);
4040 if (vpp)
4041 *vpp = vp;
4042 else
4043 VN_RELE(vp);
4044 }
4045
4046 if (lkup_error)
4047 *lkup_error = error;
4048
4049 return (fp);
4050 }
4051
4052 /*
4053 * remove: args: CURRENT_FH: directory; name.
4054 * res: status. If success - CURRENT_FH unchanged, return change_info
4055 * for directory.
4056 */
4057 /* ARGSUSED */
4058 static void
4059 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4060 struct compound_state *cs)
4061 {
4062 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4063 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4064 int error;
4065 vnode_t *dvp, *vp;
4066 struct vattr bdva, idva, adva;
4067 char *nm;
4068 uint_t len;
4069 rfs4_file_t *fp;
4070 int in_crit = 0;
4071 bslabel_t *clabel;
4072 struct sockaddr *ca;
4073 char *name = NULL;
4074 nfsstat4 status;
4075
4076 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4077 REMOVE4args *, args);
4078
4079 /* CURRENT_FH: directory */
4080 dvp = cs->vp;
4081 if (dvp == NULL) {
4082 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4083 goto out;
4084 }
4085
4086 if (cs->access == CS_ACCESS_DENIED) {
4087 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4088 goto out;
4089 }
4090
4091 /*
4092 * If there is an unshared filesystem mounted on this vnode,
4093 * Do not allow to remove anything in this directory.
4094 */
4095 if (vn_ismntpt(dvp)) {
4096 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4097 goto out;
4098 }
4099
4100 if (dvp->v_type != VDIR) {
4101 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4102 goto out;
4103 }
4104
4105 status = utf8_dir_verify(&args->target);
4106 if (status != NFS4_OK) {
4107 *cs->statusp = resp->status = status;
4108 goto out;
4109 }
4110
4111 /*
4112 * Lookup the file so that we can check if it's a directory
4113 */
4114 nm = utf8_to_fn(&args->target, &len, NULL);
4115 if (nm == NULL) {
4116 *cs->statusp = resp->status = NFS4ERR_INVAL;
4117 goto out;
4118 }
4119
4120 if (len > MAXNAMELEN) {
4121 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4122 kmem_free(nm, len);
4123 goto out;
4124 }
4125
4126 if (rdonly4(req, cs)) {
4127 *cs->statusp = resp->status = NFS4ERR_ROFS;
4128 kmem_free(nm, len);
4129 goto out;
4130 }
4131
4132 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4133 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4134 MAXPATHLEN + 1);
4135
4136 if (name == NULL) {
4137 *cs->statusp = resp->status = NFS4ERR_INVAL;
4138 kmem_free(nm, len);
4139 goto out;
4140 }
4141
4142 /*
4143 * Lookup the file to determine type and while we are see if
4144 * there is a file struct around and check for delegation.
4145 * We don't need to acquire va_seq before this lookup, if
4146 * it causes an update, cinfo.before will not match, which will
4147 * trigger a cache flush even if atomic is TRUE.
4148 */
4149 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4150 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4151 NULL)) {
4152 VN_RELE(vp);
4153 rfs4_file_rele(fp);
4154 *cs->statusp = resp->status = NFS4ERR_DELAY;
4155 if (nm != name)
4156 kmem_free(name, MAXPATHLEN + 1);
4157 kmem_free(nm, len);
4158 goto out;
4159 }
4160 }
4161
4162 /* Didn't find anything to remove */
4163 if (vp == NULL) {
4164 *cs->statusp = resp->status = error;
4165 if (nm != name)
4166 kmem_free(name, MAXPATHLEN + 1);
4167 kmem_free(nm, len);
4168 goto out;
4169 }
4170
4171 if (nbl_need_check(vp)) {
4172 nbl_start_crit(vp, RW_READER);
4173 in_crit = 1;
4174 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4175 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4176 if (nm != name)
4177 kmem_free(name, MAXPATHLEN + 1);
4178 kmem_free(nm, len);
4179 nbl_end_crit(vp);
4180 VN_RELE(vp);
4181 if (fp) {
4182 rfs4_clear_dont_grant(fp);
4183 rfs4_file_rele(fp);
4184 }
4185 goto out;
4186 }
4187 }
4188
4189 /* check label before allowing removal */
4190 if (is_system_labeled()) {
4191 ASSERT(req->rq_label != NULL);
4192 clabel = req->rq_label;
4193 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4194 "got client label from request(1)",
4195 struct svc_req *, req);
4196 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4197 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4198 cs->exi)) {
4199 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4200 if (name != nm)
4201 kmem_free(name, MAXPATHLEN + 1);
4202 kmem_free(nm, len);
4203 if (in_crit)
4204 nbl_end_crit(vp);
4205 VN_RELE(vp);
4206 if (fp) {
4207 rfs4_clear_dont_grant(fp);
4208 rfs4_file_rele(fp);
4209 }
4210 goto out;
4211 }
4212 }
4213 }
4214
4215 /* Get dir "before" change value */
4216 bdva.va_mask = AT_CTIME|AT_SEQ;
4217 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4218 if (error) {
4219 *cs->statusp = resp->status = puterrno4(error);
4220 if (nm != name)
4221 kmem_free(name, MAXPATHLEN + 1);
4222 kmem_free(nm, len);
4223 if (in_crit)
4224 nbl_end_crit(vp);
4225 VN_RELE(vp);
4226 if (fp) {
4227 rfs4_clear_dont_grant(fp);
4228 rfs4_file_rele(fp);
4229 }
4230 goto out;
4231 }
4232 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4233
4234 /* Actually do the REMOVE operation */
4235 if (vp->v_type == VDIR) {
4236 /*
4237 * Can't remove a directory that has a mounted-on filesystem.
4238 */
4239 if (vn_ismntpt(vp)) {
4240 error = EACCES;
4241 } else {
4242 /*
4243 * System V defines rmdir to return EEXIST,
4244 * not ENOTEMPTY, if the directory is not
4245 * empty. A System V NFS server needs to map
4246 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4247 * transmit over the wire.
4248 */
4249 if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4250 NULL, 0)) == EEXIST)
4251 error = ENOTEMPTY;
4252 }
4253 } else {
4254 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4255 fp != NULL) {
4256 struct vattr va;
4257 vnode_t *tvp;
4258
4259 rfs4_dbe_lock(fp->rf_dbe);
4260 tvp = fp->rf_vp;
4261 if (tvp)
4262 VN_HOLD(tvp);
4263 rfs4_dbe_unlock(fp->rf_dbe);
4264
4265 if (tvp) {
4266 /*
4267 * This is va_seq safe because we are not
4268 * manipulating dvp.
4269 */
4270 va.va_mask = AT_NLINK;
4271 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4272 va.va_nlink == 0) {
4273 /* Remove state on file remove */
4274 if (in_crit) {
4275 nbl_end_crit(vp);
4276 in_crit = 0;
4277 }
4278 rfs4_close_all_state(fp);
4279 }
4280 VN_RELE(tvp);
4281 }
4282 }
4283 }
4284
4285 if (in_crit)
4286 nbl_end_crit(vp);
4287 VN_RELE(vp);
4288
4289 if (fp) {
4290 rfs4_clear_dont_grant(fp);
4291 rfs4_file_rele(fp);
4292 }
4293 if (nm != name)
4294 kmem_free(name, MAXPATHLEN + 1);
4295 kmem_free(nm, len);
4296
4297 if (error) {
4298 *cs->statusp = resp->status = puterrno4(error);
4299 goto out;
4300 }
4301
4302 /*
4303 * Get the initial "after" sequence number, if it fails, set to zero
4304 */
4305 idva.va_mask = AT_SEQ;
4306 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4307 idva.va_seq = 0;
4308
4309 /*
4310 * Force modified data and metadata out to stable storage.
4311 */
4312 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4313
4314 /*
4315 * Get "after" change value, if it fails, simply return the
4316 * before value.
4317 */
4318 adva.va_mask = AT_CTIME|AT_SEQ;
4319 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4320 adva.va_ctime = bdva.va_ctime;
4321 adva.va_seq = 0;
4322 }
4323
4324 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4325
4326 /*
4327 * The cinfo.atomic = TRUE only if we have
4328 * non-zero va_seq's, and it has incremented by exactly one
4329 * during the VOP_REMOVE/RMDIR and it didn't change during
4330 * the VOP_FSYNC.
4331 */
4332 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4333 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4334 resp->cinfo.atomic = TRUE;
4335 else
4336 resp->cinfo.atomic = FALSE;
4337
4338 *cs->statusp = resp->status = NFS4_OK;
4339
4340 out:
4341 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4342 REMOVE4res *, resp);
4343 }
4344
4345 /*
4346 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4347 * oldname and newname.
4348 * res: status. If success - CURRENT_FH unchanged, return change_info
4349 * for both from and target directories.
4350 */
4351 /* ARGSUSED */
4352 static void
4353 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4354 struct compound_state *cs)
4355 {
4356 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4357 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4358 int error;
4359 vnode_t *odvp;
4360 vnode_t *ndvp;
4361 vnode_t *srcvp, *targvp;
4362 struct vattr obdva, oidva, oadva;
4363 struct vattr nbdva, nidva, nadva;
4364 char *onm, *nnm;
4365 uint_t olen, nlen;
4366 rfs4_file_t *fp, *sfp;
4367 int in_crit_src, in_crit_targ;
4368 int fp_rele_grant_hold, sfp_rele_grant_hold;
4369 bslabel_t *clabel;
4370 struct sockaddr *ca;
4371 char *converted_onm = NULL;
4372 char *converted_nnm = NULL;
4373 nfsstat4 status;
4374
4375 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4376 RENAME4args *, args);
4377
4378 fp = sfp = NULL;
4379 srcvp = targvp = NULL;
4380 in_crit_src = in_crit_targ = 0;
4381 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4382
4383 /* CURRENT_FH: target directory */
4384 ndvp = cs->vp;
4385 if (ndvp == NULL) {
4386 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4387 goto out;
4388 }
4389
4390 /* SAVED_FH: from directory */
4391 odvp = cs->saved_vp;
4392 if (odvp == NULL) {
4393 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4394 goto out;
4395 }
4396
4397 if (cs->access == CS_ACCESS_DENIED) {
4398 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4399 goto out;
4400 }
4401
4402 /*
4403 * If there is an unshared filesystem mounted on this vnode,
4404 * do not allow to rename objects in this directory.
4405 */
4406 if (vn_ismntpt(odvp)) {
4407 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4408 goto out;
4409 }
4410
4411 /*
4412 * If there is an unshared filesystem mounted on this vnode,
4413 * do not allow to rename to this directory.
4414 */
4415 if (vn_ismntpt(ndvp)) {
4416 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4417 goto out;
4418 }
4419
4420 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4421 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4422 goto out;
4423 }
4424
4425 if (cs->saved_exi != cs->exi) {
4426 *cs->statusp = resp->status = NFS4ERR_XDEV;
4427 goto out;
4428 }
4429
4430 status = utf8_dir_verify(&args->oldname);
4431 if (status != NFS4_OK) {
4432 *cs->statusp = resp->status = status;
4433 goto out;
4434 }
4435
4436 status = utf8_dir_verify(&args->newname);
4437 if (status != NFS4_OK) {
4438 *cs->statusp = resp->status = status;
4439 goto out;
4440 }
4441
4442 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4443 if (onm == NULL) {
4444 *cs->statusp = resp->status = NFS4ERR_INVAL;
4445 goto out;
4446 }
4447 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4448 nlen = MAXPATHLEN + 1;
4449 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4450 nlen);
4451
4452 if (converted_onm == NULL) {
4453 *cs->statusp = resp->status = NFS4ERR_INVAL;
4454 kmem_free(onm, olen);
4455 goto out;
4456 }
4457
4458 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4459 if (nnm == NULL) {
4460 *cs->statusp = resp->status = NFS4ERR_INVAL;
4461 if (onm != converted_onm)
4462 kmem_free(converted_onm, MAXPATHLEN + 1);
4463 kmem_free(onm, olen);
4464 goto out;
4465 }
4466 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4467 MAXPATHLEN + 1);
4468
4469 if (converted_nnm == NULL) {
4470 *cs->statusp = resp->status = NFS4ERR_INVAL;
4471 kmem_free(nnm, nlen);
4472 nnm = NULL;
4473 if (onm != converted_onm)
4474 kmem_free(converted_onm, MAXPATHLEN + 1);
4475 kmem_free(onm, olen);
4476 goto out;
4477 }
4478
4479
4480 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4481 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4482 kmem_free(onm, olen);
4483 kmem_free(nnm, nlen);
4484 goto out;
4485 }
4486
4487
4488 if (rdonly4(req, cs)) {
4489 *cs->statusp = resp->status = NFS4ERR_ROFS;
4490 if (onm != converted_onm)
4491 kmem_free(converted_onm, MAXPATHLEN + 1);
4492 kmem_free(onm, olen);
4493 if (nnm != converted_nnm)
4494 kmem_free(converted_nnm, MAXPATHLEN + 1);
4495 kmem_free(nnm, nlen);
4496 goto out;
4497 }
4498
4499 /* check label of the target dir */
4500 if (is_system_labeled()) {
4501 ASSERT(req->rq_label != NULL);
4502 clabel = req->rq_label;
4503 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4504 "got client label from request(1)",
4505 struct svc_req *, req);
4506 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4507 if (!do_rfs_label_check(clabel, ndvp,
4508 EQUALITY_CHECK, cs->exi)) {
4509 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4510 goto err_out;
4511 }
4512 }
4513 }
4514
4515 /*
4516 * Is the source a file and have a delegation?
4517 * We don't need to acquire va_seq before these lookups, if
4518 * it causes an update, cinfo.before will not match, which will
4519 * trigger a cache flush even if atomic is TRUE.
4520 */
4521 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4522 &error, cs->cr)) {
4523 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4524 NULL)) {
4525 *cs->statusp = resp->status = NFS4ERR_DELAY;
4526 goto err_out;
4527 }
4528 }
4529
4530 if (srcvp == NULL) {
4531 *cs->statusp = resp->status = puterrno4(error);
4532 if (onm != converted_onm)
4533 kmem_free(converted_onm, MAXPATHLEN + 1);
4534 kmem_free(onm, olen);
4535 if (nnm != converted_nnm)
4536 kmem_free(converted_nnm, MAXPATHLEN + 1);
4537 kmem_free(nnm, nlen);
4538 goto out;
4539 }
4540
4541 sfp_rele_grant_hold = 1;
4542
4543 /* Does the destination exist and a file and have a delegation? */
4544 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4545 NULL, cs->cr)) {
4546 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4547 NULL)) {
4548 *cs->statusp = resp->status = NFS4ERR_DELAY;
4549 goto err_out;
4550 }
4551 }
4552 fp_rele_grant_hold = 1;
4553
4554
4555 /* Check for NBMAND lock on both source and target */
4556 if (nbl_need_check(srcvp)) {
4557 nbl_start_crit(srcvp, RW_READER);
4558 in_crit_src = 1;
4559 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4560 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4561 goto err_out;
4562 }
4563 }
4564
4565 if (targvp && nbl_need_check(targvp)) {
4566 nbl_start_crit(targvp, RW_READER);
4567 in_crit_targ = 1;
4568 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4569 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4570 goto err_out;
4571 }
4572 }
4573
4574 /* Get source "before" change value */
4575 obdva.va_mask = AT_CTIME|AT_SEQ;
4576 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4577 if (!error) {
4578 nbdva.va_mask = AT_CTIME|AT_SEQ;
4579 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4580 }
4581 if (error) {
4582 *cs->statusp = resp->status = puterrno4(error);
4583 goto err_out;
4584 }
4585
4586 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4587 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4588
4589 if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4590 cs->cr, NULL, 0)) == 0 && fp != NULL) {
4591 struct vattr va;
4592 vnode_t *tvp;
4593
4594 rfs4_dbe_lock(fp->rf_dbe);
4595 tvp = fp->rf_vp;
4596 if (tvp)
4597 VN_HOLD(tvp);
4598 rfs4_dbe_unlock(fp->rf_dbe);
4599
4600 if (tvp) {
4601 va.va_mask = AT_NLINK;
4602 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4603 va.va_nlink == 0) {
4604 /* The file is gone and so should the state */
4605 if (in_crit_targ) {
4606 nbl_end_crit(targvp);
4607 in_crit_targ = 0;
4608 }
4609 rfs4_close_all_state(fp);
4610 }
4611 VN_RELE(tvp);
4612 }
4613 }
4614 if (error == 0)
4615 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4616
4617 if (in_crit_src)
4618 nbl_end_crit(srcvp);
4619 if (srcvp)
4620 VN_RELE(srcvp);
4621 if (in_crit_targ)
4622 nbl_end_crit(targvp);
4623 if (targvp)
4624 VN_RELE(targvp);
4625
4626 if (sfp) {
4627 rfs4_clear_dont_grant(sfp);
4628 rfs4_file_rele(sfp);
4629 }
4630 if (fp) {
4631 rfs4_clear_dont_grant(fp);
4632 rfs4_file_rele(fp);
4633 }
4634
4635 if (converted_onm != onm)
4636 kmem_free(converted_onm, MAXPATHLEN + 1);
4637 kmem_free(onm, olen);
4638 if (converted_nnm != nnm)
4639 kmem_free(converted_nnm, MAXPATHLEN + 1);
4640 kmem_free(nnm, nlen);
4641
4642 /*
4643 * Get the initial "after" sequence number, if it fails, set to zero
4644 */
4645 oidva.va_mask = AT_SEQ;
4646 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4647 oidva.va_seq = 0;
4648
4649 nidva.va_mask = AT_SEQ;
4650 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4651 nidva.va_seq = 0;
4652
4653 /*
4654 * Force modified data and metadata out to stable storage.
4655 */
4656 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4657 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4658
4659 if (error) {
4660 *cs->statusp = resp->status = puterrno4(error);
4661 goto out;
4662 }
4663
4664 /*
4665 * Get "after" change values, if it fails, simply return the
4666 * before value.
4667 */
4668 oadva.va_mask = AT_CTIME|AT_SEQ;
4669 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4670 oadva.va_ctime = obdva.va_ctime;
4671 oadva.va_seq = 0;
4672 }
4673
4674 nadva.va_mask = AT_CTIME|AT_SEQ;
4675 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4676 nadva.va_ctime = nbdva.va_ctime;
4677 nadva.va_seq = 0;
4678 }
4679
4680 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4681 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4682
4683 /*
4684 * The cinfo.atomic = TRUE only if we have
4685 * non-zero va_seq's, and it has incremented by exactly one
4686 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4687 */
4688 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4689 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4690 resp->source_cinfo.atomic = TRUE;
4691 else
4692 resp->source_cinfo.atomic = FALSE;
4693
4694 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4695 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4696 resp->target_cinfo.atomic = TRUE;
4697 else
4698 resp->target_cinfo.atomic = FALSE;
4699
4700 #ifdef VOLATILE_FH_TEST
4701 {
4702 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4703
4704 /*
4705 * Add the renamed file handle to the volatile rename list
4706 */
4707 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4708 /* file handles may expire on rename */
4709 vnode_t *vp;
4710
4711 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4712 /*
4713 * Already know that nnm will be a valid string
4714 */
4715 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4716 NULL, NULL, NULL);
4717 kmem_free(nnm, nlen);
4718 if (!error) {
4719 add_volrnm_fh(cs->exi, vp);
4720 VN_RELE(vp);
4721 }
4722 }
4723 }
4724 #endif /* VOLATILE_FH_TEST */
4725
4726 *cs->statusp = resp->status = NFS4_OK;
4727 out:
4728 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4729 RENAME4res *, resp);
4730 return;
4731
4732 err_out:
4733 if (onm != converted_onm)
4734 kmem_free(converted_onm, MAXPATHLEN + 1);
4735 if (onm != NULL)
4736 kmem_free(onm, olen);
4737 if (nnm != converted_nnm)
4738 kmem_free(converted_nnm, MAXPATHLEN + 1);
4739 if (nnm != NULL)
4740 kmem_free(nnm, nlen);
4741
4742 if (in_crit_src) nbl_end_crit(srcvp);
4743 if (in_crit_targ) nbl_end_crit(targvp);
4744 if (targvp) VN_RELE(targvp);
4745 if (srcvp) VN_RELE(srcvp);
4746 if (sfp) {
4747 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4748 rfs4_file_rele(sfp);
4749 }
4750 if (fp) {
4751 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4752 rfs4_file_rele(fp);
4753 }
4754
4755 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4756 RENAME4res *, resp);
4757 }
4758
4759 /* ARGSUSED */
4760 static void
4761 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4762 struct compound_state *cs)
4763 {
4764 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4765 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4766 rfs4_client_t *cp;
4767
4768 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4769 RENEW4args *, args);
4770
4771 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4772 *cs->statusp = resp->status =
4773 rfs4_check_clientid(&args->clientid, 0);
4774 goto out;
4775 }
4776
4777 if (rfs4_lease_expired(cp)) {
4778 rfs4_client_rele(cp);
4779 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4780 goto out;
4781 }
4782
4783 rfs4_update_lease(cp);
4784
4785 mutex_enter(cp->rc_cbinfo.cb_lock);
4786 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4787 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4788 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4789 } else {
4790 *cs->statusp = resp->status = NFS4_OK;
4791 }
4792 mutex_exit(cp->rc_cbinfo.cb_lock);
4793
4794 rfs4_client_rele(cp);
4795
4796 out:
4797 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4798 RENEW4res *, resp);
4799 }
4800
4801 /* ARGSUSED */
4802 static void
4803 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4804 struct compound_state *cs)
4805 {
4806 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4807
4808 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4809
4810 /* No need to check cs->access - we are not accessing any object */
4811 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4812 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4813 goto out;
4814 }
4815 if (cs->vp != NULL) {
4816 VN_RELE(cs->vp);
4817 }
4818 cs->vp = cs->saved_vp;
4819 cs->saved_vp = NULL;
4820 cs->exi = cs->saved_exi;
4821 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4822 *cs->statusp = resp->status = NFS4_OK;
4823 cs->deleg = FALSE;
4824
4825 out:
4826 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4827 RESTOREFH4res *, resp);
4828 }
4829
4830 /* ARGSUSED */
4831 static void
4832 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4833 struct compound_state *cs)
4834 {
4835 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4836
4837 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4838
4839 /* No need to check cs->access - we are not accessing any object */
4840 if (cs->vp == NULL) {
4841 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4842 goto out;
4843 }
4844 if (cs->saved_vp != NULL) {
4845 VN_RELE(cs->saved_vp);
4846 }
4847 cs->saved_vp = cs->vp;
4848 VN_HOLD(cs->saved_vp);
4849 cs->saved_exi = cs->exi;
4850 /*
4851 * since SAVEFH is fairly rare, don't alloc space for its fh
4852 * unless necessary.
4853 */
4854 if (cs->saved_fh.nfs_fh4_val == NULL) {
4855 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4856 }
4857 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4858 *cs->statusp = resp->status = NFS4_OK;
4859
4860 out:
4861 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4862 SAVEFH4res *, resp);
4863 }
4864
4865 /*
4866 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4867 * return the bitmap of attrs that were set successfully. It is also
4868 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4869 * always be called only after rfs4_do_set_attrs().
4870 *
4871 * Verify that the attributes are same as the expected ones. sargp->vap
4872 * and sargp->sbp contain the input attributes as translated from fattr4.
4873 *
4874 * This function verifies only the attrs that correspond to a vattr or
4875 * vfsstat struct. That is because of the extra step needed to get the
4876 * corresponding system structs. Other attributes have already been set or
4877 * verified by do_rfs4_set_attrs.
4878 *
4879 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4880 */
4881 static int
4882 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4883 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4884 {
4885 int error, ret_error = 0;
4886 int i, k;
4887 uint_t sva_mask = sargp->vap->va_mask;
4888 uint_t vbit;
4889 union nfs4_attr_u *na;
4890 uint8_t *amap;
4891 bool_t getsb = ntovp->vfsstat;
4892
4893 if (sva_mask != 0) {
4894 /*
4895 * Okay to overwrite sargp->vap because we verify based
4896 * on the incoming values.
4897 */
4898 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4899 sargp->cs->cr, NULL);
4900 if (ret_error) {
4901 if (resp == NULL)
4902 return (ret_error);
4903 /*
4904 * Must return bitmap of successful attrs
4905 */
4906 sva_mask = 0; /* to prevent checking vap later */
4907 } else {
4908 /*
4909 * Some file systems clobber va_mask. it is probably
4910 * wrong of them to do so, nonethless we practice
4911 * defensive coding.
4912 * See bug id 4276830.
4913 */
4914 sargp->vap->va_mask = sva_mask;
4915 }
4916 }
4917
4918 if (getsb) {
4919 /*
4920 * Now get the superblock and loop on the bitmap, as there is
4921 * no simple way of translating from superblock to bitmap4.
4922 */
4923 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4924 if (ret_error) {
4925 if (resp == NULL)
4926 goto errout;
4927 getsb = FALSE;
4928 }
4929 }
4930
4931 /*
4932 * Now loop and verify each attribute which getattr returned
4933 * whether it's the same as the input.
4934 */
4935 if (resp == NULL && !getsb && (sva_mask == 0))
4936 goto errout;
4937
4938 na = ntovp->na;
4939 amap = ntovp->amap;
4940 k = 0;
4941 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4942 k = *amap;
4943 ASSERT(nfs4_ntov_map[k].nval == k);
4944 vbit = nfs4_ntov_map[k].vbit;
4945
4946 /*
4947 * If vattr attribute but VOP_GETATTR failed, or it's
4948 * superblock attribute but VFS_STATVFS failed, skip
4949 */
4950 if (vbit) {
4951 if ((vbit & sva_mask) == 0)
4952 continue;
4953 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4954 continue;
4955 }
4956 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4957 if (resp != NULL) {
4958 if (error)
4959 ret_error = -1; /* not all match */
4960 else /* update response bitmap */
4961 *resp |= nfs4_ntov_map[k].fbit;
4962 continue;
4963 }
4964 if (error) {
4965 ret_error = -1; /* not all match */
4966 break;
4967 }
4968 }
4969 errout:
4970 return (ret_error);
4971 }
4972
4973 /*
4974 * Decode the attribute to be set/verified. If the attr requires a sys op
4975 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4976 * call the sv_getit function for it, because the sys op hasn't yet been done.
4977 * Return 0 for success, error code if failed.
4978 *
4979 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4980 */
4981 static int
4982 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4983 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4984 {
4985 int error = 0;
4986 bool_t set_later;
4987
4988 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4989
4990 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4991 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4992 /*
4993 * don't verify yet if a vattr or sb dependent attr,
4994 * because we don't have their sys values yet.
4995 * Will be done later.
4996 */
4997 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4998 /*
4999 * ACLs are a special case, since setting the MODE
5000 * conflicts with setting the ACL. We delay setting
5001 * the ACL until all other attributes have been set.
5002 * The ACL gets set in do_rfs4_op_setattr().
5003 */
5004 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5005 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5006 sargp, nap);
5007 if (error) {
5008 xdr_free(nfs4_ntov_map[k].xfunc,
5009 (caddr_t)nap);
5010 }
5011 }
5012 }
5013 } else {
5014 #ifdef DEBUG
5015 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5016 "decoding attribute %d\n", k);
5017 #endif
5018 error = EINVAL;
5019 }
5020 if (!error && resp_bval && !set_later) {
5021 *resp_bval |= nfs4_ntov_map[k].fbit;
5022 }
5023
5024 return (error);
5025 }
5026
5027 /*
5028 * Set vattr based on incoming fattr4 attrs - used by setattr.
5029 * Set response mask. Ignore any values that are not writable vattr attrs.
5030 */
5031 static nfsstat4
5032 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5033 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5034 nfs4_attr_cmd_t cmd)
5035 {
5036 int error = 0;
5037 int i;
5038 char *attrs = fattrp->attrlist4;
5039 uint32_t attrslen = fattrp->attrlist4_len;
5040 XDR xdr;
5041 nfsstat4 status = NFS4_OK;
5042 vnode_t *vp = cs->vp;
5043 union nfs4_attr_u *na;
5044 uint8_t *amap;
5045
5046 #ifndef lint
5047 /*
5048 * Make sure that maximum attribute number can be expressed as an
5049 * 8 bit quantity.
5050 */
5051 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5052 #endif
5053
5054 if (vp == NULL) {
5055 if (resp)
5056 *resp = 0;
5057 return (NFS4ERR_NOFILEHANDLE);
5058 }
5059 if (cs->access == CS_ACCESS_DENIED) {
5060 if (resp)
5061 *resp = 0;
5062 return (NFS4ERR_ACCESS);
5063 }
5064
5065 sargp->op = cmd;
5066 sargp->cs = cs;
5067 sargp->flag = 0; /* may be set later */
5068 sargp->vap->va_mask = 0;
5069 sargp->rdattr_error = NFS4_OK;
5070 sargp->rdattr_error_req = FALSE;
5071 /* sargp->sbp is set by the caller */
5072
5073 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5074
5075 na = ntovp->na;
5076 amap = ntovp->amap;
5077
5078 /*
5079 * The following loop iterates on the nfs4_ntov_map checking
5080 * if the fbit is set in the requested bitmap.
5081 * If set then we process the arguments using the
5082 * rfs4_fattr4 conversion functions to populate the setattr
5083 * vattr and va_mask. Any settable attrs that are not using vattr
5084 * will be set in this loop.
5085 */
5086 for (i = 0; i < nfs4_ntov_map_size; i++) {
5087 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5088 continue;
5089 }
5090 /*
5091 * If setattr, must be a writable attr.
5092 * If verify/nverify, must be a readable attr.
5093 */
5094 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5095 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5096 /*
5097 * Client tries to set/verify an
5098 * unsupported attribute, tries to set
5099 * a read only attr or verify a write
5100 * only one - error!
5101 */
5102 break;
5103 }
5104 /*
5105 * Decode the attribute to set/verify
5106 */
5107 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5108 &xdr, resp ? resp : NULL, na);
5109 if (error)
5110 break;
5111 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5112 na++;
5113 (ntovp->attrcnt)++;
5114 if (nfs4_ntov_map[i].vfsstat)
5115 ntovp->vfsstat = TRUE;
5116 }
5117
5118 if (error != 0)
5119 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5120 puterrno4(error));
5121 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5122 return (status);
5123 }
5124
5125 static nfsstat4
5126 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5127 stateid4 *stateid)
5128 {
5129 int error = 0;
5130 struct nfs4_svgetit_arg sarg;
5131 bool_t trunc;
5132
5133 nfsstat4 status = NFS4_OK;
5134 cred_t *cr = cs->cr;
5135 vnode_t *vp = cs->vp;
5136 struct nfs4_ntov_table ntov;
5137 struct statvfs64 sb;
5138 struct vattr bva;
5139 struct flock64 bf;
5140 int in_crit = 0;
5141 uint_t saved_mask = 0;
5142 caller_context_t ct;
5143
5144 *resp = 0;
5145 sarg.sbp = &sb;
5146 sarg.is_referral = B_FALSE;
5147 nfs4_ntov_table_init(&ntov);
5148 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5149 NFS4ATTR_SETIT);
5150 if (status != NFS4_OK) {
5151 /*
5152 * failed set attrs
5153 */
5154 goto done;
5155 }
5156 if ((sarg.vap->va_mask == 0) &&
5157 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5158 /*
5159 * no further work to be done
5160 */
5161 goto done;
5162 }
5163
5164 /*
5165 * If we got a request to set the ACL and the MODE, only
5166 * allow changing VSUID, VSGID, and VSVTX. Attempting
5167 * to change any other bits, along with setting an ACL,
5168 * gives NFS4ERR_INVAL.
5169 */
5170 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5171 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5172 vattr_t va;
5173
5174 va.va_mask = AT_MODE;
5175 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5176 if (error) {
5177 status = puterrno4(error);
5178 goto done;
5179 }
5180 if ((sarg.vap->va_mode ^ va.va_mode) &
5181 ~(VSUID | VSGID | VSVTX)) {
5182 status = NFS4ERR_INVAL;
5183 goto done;
5184 }
5185 }
5186
5187 /* Check stateid only if size has been set */
5188 if (sarg.vap->va_mask & AT_SIZE) {
5189 trunc = (sarg.vap->va_size == 0);
5190 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5191 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5192 if (status != NFS4_OK)
5193 goto done;
5194 } else {
5195 ct.cc_sysid = 0;
5196 ct.cc_pid = 0;
5197 ct.cc_caller_id = nfs4_srv_caller_id;
5198 ct.cc_flags = CC_DONTBLOCK;
5199 }
5200
5201 /* XXX start of possible race with delegations */
5202
5203 /*
5204 * We need to specially handle size changes because it is
5205 * possible for the client to create a file with read-only
5206 * modes, but with the file opened for writing. If the client
5207 * then tries to set the file size, e.g. ftruncate(3C),
5208 * fcntl(F_FREESP), the normal access checking done in
5209 * VOP_SETATTR would prevent the client from doing it even though
5210 * it should be allowed to do so. To get around this, we do the
5211 * access checking for ourselves and use VOP_SPACE which doesn't
5212 * do the access checking.
5213 * Also the client should not be allowed to change the file
5214 * size if there is a conflicting non-blocking mandatory lock in
5215 * the region of the change.
5216 */
5217 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5218 u_offset_t offset;
5219 ssize_t length;
5220
5221 /*
5222 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5223 * before returning, sarg.vap->va_mask is used to
5224 * generate the setattr reply bitmap. We also clear
5225 * AT_SIZE below before calling VOP_SPACE. For both
5226 * of these cases, the va_mask needs to be saved here
5227 * and restored after calling VOP_SETATTR.
5228 */
5229 saved_mask = sarg.vap->va_mask;
5230
5231 /*
5232 * Check any possible conflict due to NBMAND locks.
5233 * Get into critical region before VOP_GETATTR, so the
5234 * size attribute is valid when checking conflicts.
5235 */
5236 if (nbl_need_check(vp)) {
5237 nbl_start_crit(vp, RW_READER);
5238 in_crit = 1;
5239 }
5240
5241 bva.va_mask = AT_UID|AT_SIZE;
5242 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5243 status = puterrno4(error);
5244 goto done;
5245 }
5246
5247 if (in_crit) {
5248 if (sarg.vap->va_size < bva.va_size) {
5249 offset = sarg.vap->va_size;
5250 length = bva.va_size - sarg.vap->va_size;
5251 } else {
5252 offset = bva.va_size;
5253 length = sarg.vap->va_size - bva.va_size;
5254 }
5255 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5256 &ct)) {
5257 status = NFS4ERR_LOCKED;
5258 goto done;
5259 }
5260 }
5261
5262 if (crgetuid(cr) == bva.va_uid) {
5263 sarg.vap->va_mask &= ~AT_SIZE;
5264 bf.l_type = F_WRLCK;
5265 bf.l_whence = 0;
5266 bf.l_start = (off64_t)sarg.vap->va_size;
5267 bf.l_len = 0;
5268 bf.l_sysid = 0;
5269 bf.l_pid = 0;
5270 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5271 (offset_t)sarg.vap->va_size, cr, &ct);
5272 }
5273 }
5274
5275 if (!error && sarg.vap->va_mask != 0)
5276 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5277
5278 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5279 if (saved_mask & AT_SIZE)
5280 sarg.vap->va_mask |= AT_SIZE;
5281
5282 /*
5283 * If an ACL was being set, it has been delayed until now,
5284 * in order to set the mode (via the VOP_SETATTR() above) first.
5285 */
5286 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5287 int i;
5288
5289 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5290 if (ntov.amap[i] == FATTR4_ACL)
5291 break;
5292 if (i < NFS4_MAXNUM_ATTRS) {
5293 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5294 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5295 if (error == 0) {
5296 *resp |= FATTR4_ACL_MASK;
5297 } else if (error == ENOTSUP) {
5298 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5299 status = NFS4ERR_ATTRNOTSUPP;
5300 goto done;
5301 }
5302 } else {
5303 NFS4_DEBUG(rfs4_debug,
5304 (CE_NOTE, "do_rfs4_op_setattr: "
5305 "unable to find ACL in fattr4"));
5306 error = EINVAL;
5307 }
5308 }
5309
5310 if (error) {
5311 /* check if a monitor detected a delegation conflict */
5312 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5313 status = NFS4ERR_DELAY;
5314 else
5315 status = puterrno4(error);
5316
5317 /*
5318 * Set the response bitmap when setattr failed.
5319 * If VOP_SETATTR partially succeeded, test by doing a
5320 * VOP_GETATTR on the object and comparing the data
5321 * to the setattr arguments.
5322 */
5323 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5324 } else {
5325 /*
5326 * Force modified metadata out to stable storage.
5327 */
5328 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5329 /*
5330 * Set response bitmap
5331 */
5332 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5333 }
5334
5335 /* Return early and already have a NFSv4 error */
5336 done:
5337 /*
5338 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5339 * conversion sets both readable and writeable NFS4 attrs
5340 * for AT_MTIME and AT_ATIME. The line below masks out
5341 * unrequested attrs from the setattr result bitmap. This
5342 * is placed after the done: label to catch the ATTRNOTSUP
5343 * case.
5344 */
5345 *resp &= fattrp->attrmask;
5346
5347 if (in_crit)
5348 nbl_end_crit(vp);
5349
5350 nfs4_ntov_table_free(&ntov, &sarg);
5351
5352 return (status);
5353 }
5354
5355 /* ARGSUSED */
5356 static void
5357 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5358 struct compound_state *cs)
5359 {
5360 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5361 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5362 bslabel_t *clabel;
5363
5364 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5365 SETATTR4args *, args);
5366
5367 if (cs->vp == NULL) {
5368 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5369 goto out;
5370 }
5371
5372 /*
5373 * If there is an unshared filesystem mounted on this vnode,
5374 * do not allow to setattr on this vnode.
5375 */
5376 if (vn_ismntpt(cs->vp)) {
5377 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5378 goto out;
5379 }
5380
5381 resp->attrsset = 0;
5382
5383 if (rdonly4(req, cs)) {
5384 *cs->statusp = resp->status = NFS4ERR_ROFS;
5385 goto out;
5386 }
5387
5388 /* check label before setting attributes */
5389 if (is_system_labeled()) {
5390 ASSERT(req->rq_label != NULL);
5391 clabel = req->rq_label;
5392 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5393 "got client label from request(1)",
5394 struct svc_req *, req);
5395 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5396 if (!do_rfs_label_check(clabel, cs->vp,
5397 EQUALITY_CHECK, cs->exi)) {
5398 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5399 goto out;
5400 }
5401 }
5402 }
5403
5404 *cs->statusp = resp->status =
5405 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5406 &args->stateid);
5407
5408 out:
5409 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5410 SETATTR4res *, resp);
5411 }
5412
5413 /* ARGSUSED */
5414 static void
5415 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5416 struct compound_state *cs)
5417 {
5418 /*
5419 * verify and nverify are exactly the same, except that nverify
5420 * succeeds when some argument changed, and verify succeeds when
5421 * when none changed.
5422 */
5423
5424 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5425 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5426
5427 int error;
5428 struct nfs4_svgetit_arg sarg;
5429 struct statvfs64 sb;
5430 struct nfs4_ntov_table ntov;
5431
5432 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5433 VERIFY4args *, args);
5434
5435 if (cs->vp == NULL) {
5436 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5437 goto out;
5438 }
5439
5440 sarg.sbp = &sb;
5441 sarg.is_referral = B_FALSE;
5442 nfs4_ntov_table_init(&ntov);
5443 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5444 &sarg, &ntov, NFS4ATTR_VERIT);
5445 if (resp->status != NFS4_OK) {
5446 /*
5447 * do_rfs4_set_attrs will try to verify systemwide attrs,
5448 * so could return -1 for "no match".
5449 */
5450 if (resp->status == -1)
5451 resp->status = NFS4ERR_NOT_SAME;
5452 goto done;
5453 }
5454 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5455 switch (error) {
5456 case 0:
5457 resp->status = NFS4_OK;
5458 break;
5459 case -1:
5460 resp->status = NFS4ERR_NOT_SAME;
5461 break;
5462 default:
5463 resp->status = puterrno4(error);
5464 break;
5465 }
5466 done:
5467 *cs->statusp = resp->status;
5468 nfs4_ntov_table_free(&ntov, &sarg);
5469 out:
5470 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5471 VERIFY4res *, resp);
5472 }
5473
5474 /* ARGSUSED */
5475 static void
5476 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5477 struct compound_state *cs)
5478 {
5479 /*
5480 * verify and nverify are exactly the same, except that nverify
5481 * succeeds when some argument changed, and verify succeeds when
5482 * when none changed.
5483 */
5484
5485 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5486 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5487
5488 int error;
5489 struct nfs4_svgetit_arg sarg;
5490 struct statvfs64 sb;
5491 struct nfs4_ntov_table ntov;
5492
5493 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5494 NVERIFY4args *, args);
5495
5496 if (cs->vp == NULL) {
5497 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5498 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5499 NVERIFY4res *, resp);
5500 return;
5501 }
5502 sarg.sbp = &sb;
5503 sarg.is_referral = B_FALSE;
5504 nfs4_ntov_table_init(&ntov);
5505 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5506 &sarg, &ntov, NFS4ATTR_VERIT);
5507 if (resp->status != NFS4_OK) {
5508 /*
5509 * do_rfs4_set_attrs will try to verify systemwide attrs,
5510 * so could return -1 for "no match".
5511 */
5512 if (resp->status == -1)
5513 resp->status = NFS4_OK;
5514 goto done;
5515 }
5516 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5517 switch (error) {
5518 case 0:
5519 resp->status = NFS4ERR_SAME;
5520 break;
5521 case -1:
5522 resp->status = NFS4_OK;
5523 break;
5524 default:
5525 resp->status = puterrno4(error);
5526 break;
5527 }
5528 done:
5529 *cs->statusp = resp->status;
5530 nfs4_ntov_table_free(&ntov, &sarg);
5531
5532 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5533 NVERIFY4res *, resp);
5534 }
5535
5536 /*
5537 * XXX - This should live in an NFS header file.
5538 */
5539 #define MAX_IOVECS 12
5540
5541 /* ARGSUSED */
5542 static void
5543 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5544 struct compound_state *cs)
5545 {
5546 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5547 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5548 int error;
5549 vnode_t *vp;
5550 struct vattr bva;
5551 u_offset_t rlimit;
5552 struct uio uio;
5553 struct iovec iov[MAX_IOVECS];
5554 struct iovec *iovp;
5555 int iovcnt;
5556 int ioflag;
5557 cred_t *savecred, *cr;
5558 bool_t *deleg = &cs->deleg;
5559 nfsstat4 stat;
5560 int in_crit = 0;
5561 caller_context_t ct;
5562
5563 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5564 WRITE4args *, args);
5565
5566 vp = cs->vp;
5567 if (vp == NULL) {
5568 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5569 goto out;
5570 }
5571 if (cs->access == CS_ACCESS_DENIED) {
5572 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5573 goto out;
5574 }
5575
5576 cr = cs->cr;
5577
5578 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5579 deleg, TRUE, &ct)) != NFS4_OK) {
5580 *cs->statusp = resp->status = stat;
5581 goto out;
5582 }
5583
5584 /*
5585 * We have to enter the critical region before calling VOP_RWLOCK
5586 * to avoid a deadlock with ufs.
5587 */
5588 if (nbl_need_check(vp)) {
5589 nbl_start_crit(vp, RW_READER);
5590 in_crit = 1;
5591 if (nbl_conflict(vp, NBL_WRITE,
5592 args->offset, args->data_len, 0, &ct)) {
5593 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5594 goto out;
5595 }
5596 }
5597
5598 bva.va_mask = AT_MODE | AT_UID;
5599 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5600
5601 /*
5602 * If we can't get the attributes, then we can't do the
5603 * right access checking. So, we'll fail the request.
5604 */
5605 if (error) {
5606 *cs->statusp = resp->status = puterrno4(error);
5607 goto out;
5608 }
5609
5610 if (rdonly4(req, cs)) {
5611 *cs->statusp = resp->status = NFS4ERR_ROFS;
5612 goto out;
5613 }
5614
5615 if (vp->v_type != VREG) {
5616 *cs->statusp = resp->status =
5617 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5618 goto out;
5619 }
5620
5621 if (crgetuid(cr) != bva.va_uid &&
5622 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5623 *cs->statusp = resp->status = puterrno4(error);
5624 goto out;
5625 }
5626
5627 if (MANDLOCK(vp, bva.va_mode)) {
5628 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5629 goto out;
5630 }
5631
5632 if (args->data_len == 0) {
5633 *cs->statusp = resp->status = NFS4_OK;
5634 resp->count = 0;
5635 resp->committed = args->stable;
5636 resp->writeverf = Write4verf;
5637 goto out;
5638 }
5639
5640 if (args->mblk != NULL) {
5641 mblk_t *m;
5642 uint_t bytes, round_len;
5643
5644 iovcnt = 0;
5645 bytes = 0;
5646 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5647 for (m = args->mblk;
5648 m != NULL && bytes < round_len;
5649 m = m->b_cont) {
5650 iovcnt++;
5651 bytes += MBLKL(m);
5652 }
5653 #ifdef DEBUG
5654 /* should have ended on an mblk boundary */
5655 if (bytes != round_len) {
5656 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5657 bytes, round_len, args->data_len);
5658 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5659 (void *)args->mblk, (void *)m);
5660 ASSERT(bytes == round_len);
5661 }
5662 #endif
5663 if (iovcnt <= MAX_IOVECS) {
5664 iovp = iov;
5665 } else {
5666 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5667 }
5668 mblk_to_iov(args->mblk, iovcnt, iovp);
5669 } else if (args->rlist != NULL) {
5670 iovcnt = 1;
5671 iovp = iov;
5672 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5673 iovp->iov_len = args->data_len;
5674 } else {
5675 iovcnt = 1;
5676 iovp = iov;
5677 iovp->iov_base = args->data_val;
5678 iovp->iov_len = args->data_len;
5679 }
5680
5681 uio.uio_iov = iovp;
5682 uio.uio_iovcnt = iovcnt;
5683
5684 uio.uio_segflg = UIO_SYSSPACE;
5685 uio.uio_extflg = UIO_COPY_DEFAULT;
5686 uio.uio_loffset = args->offset;
5687 uio.uio_resid = args->data_len;
5688 uio.uio_llimit = curproc->p_fsz_ctl;
5689 rlimit = uio.uio_llimit - args->offset;
5690 if (rlimit < (u_offset_t)uio.uio_resid)
5691 uio.uio_resid = (int)rlimit;
5692
5693 if (args->stable == UNSTABLE4)
5694 ioflag = 0;
5695 else if (args->stable == FILE_SYNC4)
5696 ioflag = FSYNC;
5697 else if (args->stable == DATA_SYNC4)
5698 ioflag = FDSYNC;
5699 else {
5700 if (iovp != iov)
5701 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5702 *cs->statusp = resp->status = NFS4ERR_INVAL;
5703 goto out;
5704 }
5705
5706 /*
5707 * We're changing creds because VM may fault and we need
5708 * the cred of the current thread to be used if quota
5709 * checking is enabled.
5710 */
5711 savecred = curthread->t_cred;
5712 curthread->t_cred = cr;
5713 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5714 curthread->t_cred = savecred;
5715
5716 if (iovp != iov)
5717 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5718
5719 if (error) {
5720 *cs->statusp = resp->status = puterrno4(error);
5721 goto out;
5722 }
5723
5724 *cs->statusp = resp->status = NFS4_OK;
5725 resp->count = args->data_len - uio.uio_resid;
5726
5727 if (ioflag == 0)
5728 resp->committed = UNSTABLE4;
5729 else
5730 resp->committed = FILE_SYNC4;
5731
5732 resp->writeverf = Write4verf;
5733
5734 out:
5735 if (in_crit)
5736 nbl_end_crit(vp);
5737
5738 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5739 WRITE4res *, resp);
5740 }
5741
5742
5743 /* XXX put in a header file */
5744 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5745
5746 void
5747 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5748 struct svc_req *req, cred_t *cr, int *rv)
5749 {
5750 uint_t i;
5751 struct compound_state cs;
5752
5753 if (rv != NULL)
5754 *rv = 0;
5755 rfs4_init_compound_state(&cs);
5756 /*
5757 * Form a reply tag by copying over the reqeuest tag.
5758 */
5759 resp->tag.utf8string_val =
5760 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5761 resp->tag.utf8string_len = args->tag.utf8string_len;
5762 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5763 resp->tag.utf8string_len);
5764
5765 cs.statusp = &resp->status;
5766 cs.req = req;
5767 resp->array = NULL;
5768 resp->array_len = 0;
5769
5770 /*
5771 * XXX for now, minorversion should be zero
5772 */
5773 if (args->minorversion != NFS4_MINORVERSION) {
5774 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5775 &cs, COMPOUND4args *, args);
5776 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5777 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5778 &cs, COMPOUND4res *, resp);
5779 return;
5780 }
5781
5782 if (args->array_len == 0) {
5783 resp->status = NFS4_OK;
5784 return;
5785 }
5786
5787 ASSERT(exi == NULL);
5788 ASSERT(cr == NULL);
5789
5790 cr = crget();
5791 ASSERT(cr != NULL);
5792
5793 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5794 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5795 &cs, COMPOUND4args *, args);
5796 crfree(cr);
5797 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5798 &cs, COMPOUND4res *, resp);
5799 svcerr_badcred(req->rq_xprt);
5800 if (rv != NULL)
5801 *rv = 1;
5802 return;
5803 }
5804 resp->array_len = args->array_len;
5805 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5806 KM_SLEEP);
5807
5808 cs.basecr = cr;
5809
5810 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5811 COMPOUND4args *, args);
5812
5813 /*
5814 * For now, NFS4 compound processing must be protected by
5815 * exported_lock because it can access more than one exportinfo
5816 * per compound and share/unshare can now change multiple
5817 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5818 * per proc (excluding public exinfo), and exi_count design
5819 * is sufficient to protect concurrent execution of NFS2/3
5820 * ops along with unexport. This lock will be removed as
5821 * part of the NFSv4 phase 2 namespace redesign work.
5822 */
5823 rw_enter(&exported_lock, RW_READER);
5824
5825 /*
5826 * If this is the first compound we've seen, we need to start all
5827 * new instances' grace periods.
5828 */
5829 if (rfs4_seen_first_compound == 0) {
5830 rfs4_grace_start_new();
5831 /*
5832 * This must be set after rfs4_grace_start_new(), otherwise
5833 * another thread could proceed past here before the former
5834 * is finished.
5835 */
5836 rfs4_seen_first_compound = 1;
5837 }
5838
5839 for (i = 0; i < args->array_len && cs.cont; i++) {
5840 nfs_argop4 *argop;
5841 nfs_resop4 *resop;
5842 uint_t op;
5843
5844 argop = &args->array[i];
5845 resop = &resp->array[i];
5846 resop->resop = argop->argop;
5847 op = (uint_t)resop->resop;
5848
5849 if (op < rfsv4disp_cnt) {
5850 /*
5851 * Count the individual ops here; NULL and COMPOUND
5852 * are counted in common_dispatch()
5853 */
5854 rfsproccnt_v4_ptr[op].value.ui64++;
5855
5856 NFS4_DEBUG(rfs4_debug > 1,
5857 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5858 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5859 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5860 rfs4_op_string[op], *cs.statusp));
5861 if (*cs.statusp != NFS4_OK)
5862 cs.cont = FALSE;
5863 } else {
5864 /*
5865 * This is effectively dead code since XDR code
5866 * will have already returned BADXDR if op doesn't
5867 * decode to legal value. This only done for a
5868 * day when XDR code doesn't verify v4 opcodes.
5869 */
5870 op = OP_ILLEGAL;
5871 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5872
5873 rfs4_op_illegal(argop, resop, req, &cs);
5874 cs.cont = FALSE;
5875 }
5876
5877 /*
5878 * If not at last op, and if we are to stop, then
5879 * compact the results array.
5880 */
5881 if ((i + 1) < args->array_len && !cs.cont) {
5882 nfs_resop4 *new_res = kmem_alloc(
5883 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5884 bcopy(resp->array,
5885 new_res, (i+1) * sizeof (nfs_resop4));
5886 kmem_free(resp->array,
5887 args->array_len * sizeof (nfs_resop4));
5888
5889 resp->array_len = i + 1;
5890 resp->array = new_res;
5891 }
5892 }
5893
5894 rw_exit(&exported_lock);
5895
5896 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5897 COMPOUND4res *, resp);
5898
5899 if (cs.vp)
5900 VN_RELE(cs.vp);
5901 if (cs.saved_vp)
5902 VN_RELE(cs.saved_vp);
5903 if (cs.saved_fh.nfs_fh4_val)
5904 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5905
5906 if (cs.basecr)
5907 crfree(cs.basecr);
5908 if (cs.cr)
5909 crfree(cs.cr);
5910 /*
5911 * done with this compound request, free the label
5912 */
5913
5914 if (req->rq_label != NULL) {
5915 kmem_free(req->rq_label, sizeof (bslabel_t));
5916 req->rq_label = NULL;
5917 }
5918 }
5919
5920 /*
5921 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5922 * XXX zero out the tag and array values. Need to investigate why the
5923 * XXX calls occur, but at least prevent the panic for now.
5924 */
5925 void
5926 rfs4_compound_free(COMPOUND4res *resp)
5927 {
5928 uint_t i;
5929
5930 if (resp->tag.utf8string_val) {
5931 UTF8STRING_FREE(resp->tag)
5932 }
5933
5934 for (i = 0; i < resp->array_len; i++) {
5935 nfs_resop4 *resop;
5936 uint_t op;
5937
5938 resop = &resp->array[i];
5939 op = (uint_t)resop->resop;
5940 if (op < rfsv4disp_cnt) {
5941 (*rfsv4disptab[op].dis_resfree)(resop);
5942 }
5943 }
5944 if (resp->array != NULL) {
5945 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5946 }
5947 }
5948
5949 /*
5950 * Process the value of the compound request rpc flags, as a bit-AND
5951 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5952 */
5953 void
5954 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5955 {
5956 int i;
5957 int flag = RPC_ALL;
5958
5959 for (i = 0; flag && i < args->array_len; i++) {
5960 uint_t op;
5961
5962 op = (uint_t)args->array[i].argop;
5963
5964 if (op < rfsv4disp_cnt)
5965 flag &= rfsv4disptab[op].dis_flags;
5966 else
5967 flag = 0;
5968 }
5969 *flagp = flag;
5970 }
5971
5972 nfsstat4
5973 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5974 {
5975 nfsstat4 e;
5976
5977 rfs4_dbe_lock(cp->rc_dbe);
5978
5979 if (cp->rc_sysidt != LM_NOSYSID) {
5980 *sp = cp->rc_sysidt;
5981 e = NFS4_OK;
5982
5983 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5984 *sp = cp->rc_sysidt;
5985 e = NFS4_OK;
5986
5987 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5988 "rfs4_client_sysid: allocated 0x%x\n", *sp));
5989 } else
5990 e = NFS4ERR_DELAY;
5991
5992 rfs4_dbe_unlock(cp->rc_dbe);
5993 return (e);
5994 }
5995
5996 #if defined(DEBUG) && ! defined(lint)
5997 static void lock_print(char *str, int operation, struct flock64 *flk)
5998 {
5999 char *op, *type;
6000
6001 switch (operation) {
6002 case F_GETLK: op = "F_GETLK";
6003 break;
6004 case F_SETLK: op = "F_SETLK";
6005 break;
6006 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6007 break;
6008 default: op = "F_UNKNOWN";
6009 break;
6010 }
6011 switch (flk->l_type) {
6012 case F_UNLCK: type = "F_UNLCK";
6013 break;
6014 case F_RDLCK: type = "F_RDLCK";
6015 break;
6016 case F_WRLCK: type = "F_WRLCK";
6017 break;
6018 default: type = "F_UNKNOWN";
6019 break;
6020 }
6021
6022 ASSERT(flk->l_whence == 0);
6023 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6024 str, op, type, (longlong_t)flk->l_start,
6025 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6026 }
6027
6028 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6029 #else
6030 #define LOCK_PRINT(d, s, t, f)
6031 #endif
6032
6033 /*ARGSUSED*/
6034 static bool_t
6035 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6036 {
6037 return (TRUE);
6038 }
6039
6040 /*
6041 * Look up the pathname using the vp in cs as the directory vnode.
6042 * cs->vp will be the vnode for the file on success
6043 */
6044
6045 static nfsstat4
6046 rfs4_lookup(component4 *component, struct svc_req *req,
6047 struct compound_state *cs)
6048 {
6049 char *nm;
6050 uint32_t len;
6051 nfsstat4 status;
6052 struct sockaddr *ca;
6053 char *name;
6054
6055 if (cs->vp == NULL) {
6056 return (NFS4ERR_NOFILEHANDLE);
6057 }
6058 if (cs->vp->v_type != VDIR) {
6059 return (NFS4ERR_NOTDIR);
6060 }
6061
6062 status = utf8_dir_verify(component);
6063 if (status != NFS4_OK)
6064 return (status);
6065
6066 nm = utf8_to_fn(component, &len, NULL);
6067 if (nm == NULL) {
6068 return (NFS4ERR_INVAL);
6069 }
6070
6071 if (len > MAXNAMELEN) {
6072 kmem_free(nm, len);
6073 return (NFS4ERR_NAMETOOLONG);
6074 }
6075
6076 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6077 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6078 MAXPATHLEN + 1);
6079
6080 if (name == NULL) {
6081 kmem_free(nm, len);
6082 return (NFS4ERR_INVAL);
6083 }
6084
6085 status = do_rfs4_op_lookup(name, req, cs);
6086
6087 if (name != nm)
6088 kmem_free(name, MAXPATHLEN + 1);
6089
6090 kmem_free(nm, len);
6091
6092 return (status);
6093 }
6094
6095 static nfsstat4
6096 rfs4_lookupfile(component4 *component, struct svc_req *req,
6097 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6098 {
6099 nfsstat4 status;
6100 vnode_t *dvp = cs->vp;
6101 vattr_t bva, ava, fva;
6102 int error;
6103
6104 /* Get "before" change value */
6105 bva.va_mask = AT_CTIME|AT_SEQ;
6106 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6107 if (error)
6108 return (puterrno4(error));
6109
6110 /* rfs4_lookup may VN_RELE directory */
6111 VN_HOLD(dvp);
6112
6113 status = rfs4_lookup(component, req, cs);
6114 if (status != NFS4_OK) {
6115 VN_RELE(dvp);
6116 return (status);
6117 }
6118
6119 /*
6120 * Get "after" change value, if it fails, simply return the
6121 * before value.
6122 */
6123 ava.va_mask = AT_CTIME|AT_SEQ;
6124 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6125 ava.va_ctime = bva.va_ctime;
6126 ava.va_seq = 0;
6127 }
6128 VN_RELE(dvp);
6129
6130 /*
6131 * Validate the file is a file
6132 */
6133 fva.va_mask = AT_TYPE|AT_MODE;
6134 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6135 if (error)
6136 return (puterrno4(error));
6137
6138 if (fva.va_type != VREG) {
6139 if (fva.va_type == VDIR)
6140 return (NFS4ERR_ISDIR);
6141 if (fva.va_type == VLNK)
6142 return (NFS4ERR_SYMLINK);
6143 return (NFS4ERR_INVAL);
6144 }
6145
6146 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6147 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6148
6149 /*
6150 * It is undefined if VOP_LOOKUP will change va_seq, so
6151 * cinfo.atomic = TRUE only if we have
6152 * non-zero va_seq's, and they have not changed.
6153 */
6154 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6155 cinfo->atomic = TRUE;
6156 else
6157 cinfo->atomic = FALSE;
6158
6159 /* Check for mandatory locking */
6160 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6161 return (check_open_access(access, cs, req));
6162 }
6163
6164 static nfsstat4
6165 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6166 timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
6167 {
6168 int error;
6169 nfsstat4 status = NFS4_OK;
6170 vattr_t va;
6171
6172 tryagain:
6173
6174 /*
6175 * The file open mode used is VWRITE. If the client needs
6176 * some other semantic, then it should do the access checking
6177 * itself. It would have been nice to have the file open mode
6178 * passed as part of the arguments.
6179 */
6180
6181 *created = TRUE;
6182 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6183
6184 if (error) {
6185 *created = FALSE;
6186
6187 /*
6188 * If we got something other than file already exists
6189 * then just return this error. Otherwise, we got
6190 * EEXIST. If we were doing a GUARDED create, then
6191 * just return this error. Otherwise, we need to
6192 * make sure that this wasn't a duplicate of an
6193 * exclusive create request.
6194 *
6195 * The assumption is made that a non-exclusive create
6196 * request will never return EEXIST.
6197 */
6198
6199 if (error != EEXIST || mode == GUARDED4) {
6200 status = puterrno4(error);
6201 return (status);
6202 }
6203 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6204 NULL, NULL, NULL);
6205
6206 if (error) {
6207 /*
6208 * We couldn't find the file that we thought that
6209 * we just created. So, we'll just try creating
6210 * it again.
6211 */
6212 if (error == ENOENT)
6213 goto tryagain;
6214
6215 status = puterrno4(error);
6216 return (status);
6217 }
6218
6219 if (mode == UNCHECKED4) {
6220 /* existing object must be regular file */
6221 if ((*vpp)->v_type != VREG) {
6222 if ((*vpp)->v_type == VDIR)
6223 status = NFS4ERR_ISDIR;
6224 else if ((*vpp)->v_type == VLNK)
6225 status = NFS4ERR_SYMLINK;
6226 else
6227 status = NFS4ERR_INVAL;
6228 VN_RELE(*vpp);
6229 return (status);
6230 }
6231
6232 return (NFS4_OK);
6233 }
6234
6235 /* Check for duplicate request */
6236 ASSERT(mtime != 0);
6237 va.va_mask = AT_MTIME;
6238 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6239 if (!error) {
6240 /* We found the file */
6241 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6242 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6243 /* but its not our creation */
6244 VN_RELE(*vpp);
6245 return (NFS4ERR_EXIST);
6246 }
6247 *created = TRUE; /* retrans of create == created */
6248 return (NFS4_OK);
6249 }
6250 VN_RELE(*vpp);
6251 return (NFS4ERR_EXIST);
6252 }
6253
6254 return (NFS4_OK);
6255 }
6256
6257 static nfsstat4
6258 check_open_access(uint32_t access, struct compound_state *cs,
6259 struct svc_req *req)
6260 {
6261 int error;
6262 vnode_t *vp;
6263 bool_t readonly;
6264 cred_t *cr = cs->cr;
6265
6266 /* For now we don't allow mandatory locking as per V2/V3 */
6267 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6268 return (NFS4ERR_ACCESS);
6269 }
6270
6271 vp = cs->vp;
6272 ASSERT(cr != NULL && vp->v_type == VREG);
6273
6274 /*
6275 * If the file system is exported read only and we are trying
6276 * to open for write, then return NFS4ERR_ROFS
6277 */
6278
6279 readonly = rdonly4(req, cs);
6280
6281 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6282 return (NFS4ERR_ROFS);
6283
6284 if (access & OPEN4_SHARE_ACCESS_READ) {
6285 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6286 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6287 return (NFS4ERR_ACCESS);
6288 }
6289 }
6290
6291 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6292 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6293 if (error)
6294 return (NFS4ERR_ACCESS);
6295 }
6296
6297 return (NFS4_OK);
6298 }
6299
6300 static nfsstat4
6301 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6302 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6303 {
6304 struct nfs4_svgetit_arg sarg;
6305 struct nfs4_ntov_table ntov;
6306
6307 bool_t ntov_table_init = FALSE;
6308 struct statvfs64 sb;
6309 nfsstat4 status;
6310 vnode_t *vp;
6311 vattr_t bva, ava, iva, cva, *vap;
6312 vnode_t *dvp;
6313 timespec32_t *mtime;
6314 char *nm = NULL;
6315 uint_t buflen;
6316 bool_t created;
6317 bool_t setsize = FALSE;
6318 len_t reqsize;
6319 int error;
6320 bool_t trunc;
6321 caller_context_t ct;
6322 component4 *component;
6323 bslabel_t *clabel;
6324 struct sockaddr *ca;
6325 char *name = NULL;
6326
6327 sarg.sbp = &sb;
6328 sarg.is_referral = B_FALSE;
6329
6330 dvp = cs->vp;
6331
6332 /* Check if the file system is read only */
6333 if (rdonly4(req, cs))
6334 return (NFS4ERR_ROFS);
6335
6336 /* check the label of including directory */
6337 if (is_system_labeled()) {
6338 ASSERT(req->rq_label != NULL);
6339 clabel = req->rq_label;
6340 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6341 "got client label from request(1)",
6342 struct svc_req *, req);
6343 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6344 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6345 cs->exi)) {
6346 return (NFS4ERR_ACCESS);
6347 }
6348 }
6349 }
6350
6351 /*
6352 * Get the last component of path name in nm. cs will reference
6353 * the including directory on success.
6354 */
6355 component = &args->open_claim4_u.file;
6356 status = utf8_dir_verify(component);
6357 if (status != NFS4_OK)
6358 return (status);
6359
6360 nm = utf8_to_fn(component, &buflen, NULL);
6361
6362 if (nm == NULL)
6363 return (NFS4ERR_RESOURCE);
6364
6365 if (buflen > MAXNAMELEN) {
6366 kmem_free(nm, buflen);
6367 return (NFS4ERR_NAMETOOLONG);
6368 }
6369
6370 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6371 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6372 if (error) {
6373 kmem_free(nm, buflen);
6374 return (puterrno4(error));
6375 }
6376
6377 if (bva.va_type != VDIR) {
6378 kmem_free(nm, buflen);
6379 return (NFS4ERR_NOTDIR);
6380 }
6381
6382 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6383
6384 switch (args->mode) {
6385 case GUARDED4:
6386 /*FALLTHROUGH*/
6387 case UNCHECKED4:
6388 nfs4_ntov_table_init(&ntov);
6389 ntov_table_init = TRUE;
6390
6391 *attrset = 0;
6392 status = do_rfs4_set_attrs(attrset,
6393 &args->createhow4_u.createattrs,
6394 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6395
6396 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6397 sarg.vap->va_type != VREG) {
6398 if (sarg.vap->va_type == VDIR)
6399 status = NFS4ERR_ISDIR;
6400 else if (sarg.vap->va_type == VLNK)
6401 status = NFS4ERR_SYMLINK;
6402 else
6403 status = NFS4ERR_INVAL;
6404 }
6405
6406 if (status != NFS4_OK) {
6407 kmem_free(nm, buflen);
6408 nfs4_ntov_table_free(&ntov, &sarg);
6409 *attrset = 0;
6410 return (status);
6411 }
6412
6413 vap = sarg.vap;
6414 vap->va_type = VREG;
6415 vap->va_mask |= AT_TYPE;
6416
6417 if ((vap->va_mask & AT_MODE) == 0) {
6418 vap->va_mask |= AT_MODE;
6419 vap->va_mode = (mode_t)0600;
6420 }
6421
6422 if (vap->va_mask & AT_SIZE) {
6423
6424 /* Disallow create with a non-zero size */
6425
6426 if ((reqsize = sarg.vap->va_size) != 0) {
6427 kmem_free(nm, buflen);
6428 nfs4_ntov_table_free(&ntov, &sarg);
6429 *attrset = 0;
6430 return (NFS4ERR_INVAL);
6431 }
6432 setsize = TRUE;
6433 }
6434 break;
6435
6436 case EXCLUSIVE4:
6437 /* prohibit EXCL create of named attributes */
6438 if (dvp->v_flag & V_XATTRDIR) {
6439 kmem_free(nm, buflen);
6440 *attrset = 0;
6441 return (NFS4ERR_INVAL);
6442 }
6443
6444 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6445 cva.va_type = VREG;
6446 /*
6447 * Ensure no time overflows. Assumes underlying
6448 * filesystem supports at least 32 bits.
6449 * Truncate nsec to usec resolution to allow valid
6450 * compares even if the underlying filesystem truncates.
6451 */
6452 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6453 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6454 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6455 cva.va_mode = (mode_t)0;
6456 vap = &cva;
6457
6458 /*
6459 * For EXCL create, attrset is set to the server attr
6460 * used to cache the client's verifier.
6461 */
6462 *attrset = FATTR4_TIME_MODIFY_MASK;
6463 break;
6464 }
6465
6466 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6467 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6468 MAXPATHLEN + 1);
6469
6470 if (name == NULL) {
6471 kmem_free(nm, buflen);
6472 return (NFS4ERR_SERVERFAULT);
6473 }
6474
6475 status = create_vnode(dvp, name, vap, args->mode, mtime,
6476 cs->cr, &vp, &created);
6477 if (nm != name)
6478 kmem_free(name, MAXPATHLEN + 1);
6479 kmem_free(nm, buflen);
6480
6481 if (status != NFS4_OK) {
6482 if (ntov_table_init)
6483 nfs4_ntov_table_free(&ntov, &sarg);
6484 *attrset = 0;
6485 return (status);
6486 }
6487
6488 trunc = (setsize && !created);
6489
6490 if (args->mode != EXCLUSIVE4) {
6491 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6492
6493 /*
6494 * True verification that object was created with correct
6495 * attrs is impossible. The attrs could have been changed
6496 * immediately after object creation. If attributes did
6497 * not verify, the only recourse for the server is to
6498 * destroy the object. Maybe if some attrs (like gid)
6499 * are set incorrectly, the object should be destroyed;
6500 * however, seems bad as a default policy. Do we really
6501 * want to destroy an object over one of the times not
6502 * verifying correctly? For these reasons, the server
6503 * currently sets bits in attrset for createattrs
6504 * that were set; however, no verification is done.
6505 *
6506 * vmask_to_nmask accounts for vattr bits set on create
6507 * [do_rfs4_set_attrs() only sets resp bits for
6508 * non-vattr/vfs bits.]
6509 * Mask off any bits we set by default so as not to return
6510 * more attrset bits than were requested in createattrs
6511 */
6512 if (created) {
6513 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6514 *attrset &= createmask;
6515 } else {
6516 /*
6517 * We did not create the vnode (we tried but it
6518 * already existed). In this case, the only createattr
6519 * that the spec allows the server to set is size,
6520 * and even then, it can only be set if it is 0.
6521 */
6522 *attrset = 0;
6523 if (trunc)
6524 *attrset = FATTR4_SIZE_MASK;
6525 }
6526 }
6527 if (ntov_table_init)
6528 nfs4_ntov_table_free(&ntov, &sarg);
6529
6530 /*
6531 * Get the initial "after" sequence number, if it fails,
6532 * set to zero, time to before.
6533 */
6534 iva.va_mask = AT_CTIME|AT_SEQ;
6535 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6536 iva.va_seq = 0;
6537 iva.va_ctime = bva.va_ctime;
6538 }
6539
6540 /*
6541 * create_vnode attempts to create the file exclusive,
6542 * if it already exists the VOP_CREATE will fail and
6543 * may not increase va_seq. It is atomic if
6544 * we haven't changed the directory, but if it has changed
6545 * we don't know what changed it.
6546 */
6547 if (!created) {
6548 if (bva.va_seq && iva.va_seq &&
6549 bva.va_seq == iva.va_seq)
6550 cinfo->atomic = TRUE;
6551 else
6552 cinfo->atomic = FALSE;
6553 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6554 } else {
6555 /*
6556 * The entry was created, we need to sync the
6557 * directory metadata.
6558 */
6559 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6560
6561 /*
6562 * Get "after" change value, if it fails, simply return the
6563 * before value.
6564 */
6565 ava.va_mask = AT_CTIME|AT_SEQ;
6566 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6567 ava.va_ctime = bva.va_ctime;
6568 ava.va_seq = 0;
6569 }
6570
6571 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6572
6573 /*
6574 * The cinfo->atomic = TRUE only if we have
6575 * non-zero va_seq's, and it has incremented by exactly one
6576 * during the create_vnode and it didn't
6577 * change during the VOP_FSYNC.
6578 */
6579 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6580 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6581 cinfo->atomic = TRUE;
6582 else
6583 cinfo->atomic = FALSE;
6584 }
6585
6586 /* Check for mandatory locking and that the size gets set. */
6587 cva.va_mask = AT_MODE;
6588 if (setsize)
6589 cva.va_mask |= AT_SIZE;
6590
6591 /* Assume the worst */
6592 cs->mandlock = TRUE;
6593
6594 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6595 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6596
6597 /*
6598 * Truncate the file if necessary; this would be
6599 * the case for create over an existing file.
6600 */
6601
6602 if (trunc) {
6603 int in_crit = 0;
6604 rfs4_file_t *fp;
6605 bool_t create = FALSE;
6606
6607 /*
6608 * We are writing over an existing file.
6609 * Check to see if we need to recall a delegation.
6610 */
6611 rfs4_hold_deleg_policy();
6612 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6613 if (rfs4_check_delegated_byfp(FWRITE, fp,
6614 (reqsize == 0), FALSE, FALSE, &clientid)) {
6615 rfs4_file_rele(fp);
6616 rfs4_rele_deleg_policy();
6617 VN_RELE(vp);
6618 *attrset = 0;
6619 return (NFS4ERR_DELAY);
6620 }
6621 rfs4_file_rele(fp);
6622 }
6623 rfs4_rele_deleg_policy();
6624
6625 if (nbl_need_check(vp)) {
6626 in_crit = 1;
6627
6628 ASSERT(reqsize == 0);
6629
6630 nbl_start_crit(vp, RW_READER);
6631 if (nbl_conflict(vp, NBL_WRITE, 0,
6632 cva.va_size, 0, NULL)) {
6633 in_crit = 0;
6634 nbl_end_crit(vp);
6635 VN_RELE(vp);
6636 *attrset = 0;
6637 return (NFS4ERR_ACCESS);
6638 }
6639 }
6640 ct.cc_sysid = 0;
6641 ct.cc_pid = 0;
6642 ct.cc_caller_id = nfs4_srv_caller_id;
6643 ct.cc_flags = CC_DONTBLOCK;
6644
6645 cva.va_mask = AT_SIZE;
6646 cva.va_size = reqsize;
6647 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6648 if (in_crit)
6649 nbl_end_crit(vp);
6650 }
6651 }
6652
6653 error = makefh4(&cs->fh, vp, cs->exi);
6654
6655 /*
6656 * Force modified data and metadata out to stable storage.
6657 */
6658 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6659
6660 if (error) {
6661 VN_RELE(vp);
6662 *attrset = 0;
6663 return (puterrno4(error));
6664 }
6665
6666 /* if parent dir is attrdir, set namedattr fh flag */
6667 if (dvp->v_flag & V_XATTRDIR)
6668 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6669
6670 if (cs->vp)
6671 VN_RELE(cs->vp);
6672
6673 cs->vp = vp;
6674
6675 /*
6676 * if we did not create the file, we will need to check
6677 * the access bits on the file
6678 */
6679
6680 if (!created) {
6681 if (setsize)
6682 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6683 status = check_open_access(args->share_access, cs, req);
6684 if (status != NFS4_OK)
6685 *attrset = 0;
6686 }
6687 return (status);
6688 }
6689
6690 /*ARGSUSED*/
6691 static void
6692 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6693 rfs4_openowner_t *oo, delegreq_t deleg,
6694 uint32_t access, uint32_t deny,
6695 OPEN4res *resp, int deleg_cur)
6696 {
6697 /* XXX Currently not using req */
6698 rfs4_state_t *sp;
6699 rfs4_file_t *fp;
6700 bool_t screate = TRUE;
6701 bool_t fcreate = TRUE;
6702 uint32_t open_a, share_a;
6703 uint32_t open_d, share_d;
6704 rfs4_deleg_state_t *dsp;
6705 sysid_t sysid;
6706 nfsstat4 status;
6707 caller_context_t ct;
6708 int fflags = 0;
6709 int recall = 0;
6710 int err;
6711 int first_open;
6712
6713 /* get the file struct and hold a lock on it during initial open */
6714 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6715 if (fp == NULL) {
6716 resp->status = NFS4ERR_RESOURCE;
6717 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6718 return;
6719 }
6720
6721 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6722 if (sp == NULL) {
6723 resp->status = NFS4ERR_RESOURCE;
6724 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6725 /* No need to keep any reference */
6726 rw_exit(&fp->rf_file_rwlock);
6727 rfs4_file_rele(fp);
6728 return;
6729 }
6730
6731 /* try to get the sysid before continuing */
6732 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6733 resp->status = status;
6734 rfs4_file_rele(fp);
6735 /* Not a fully formed open; "close" it */
6736 if (screate == TRUE)
6737 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6738 rfs4_state_rele(sp);
6739 return;
6740 }
6741
6742 /* Calculate the fflags for this OPEN. */
6743 if (access & OPEN4_SHARE_ACCESS_READ)
6744 fflags |= FREAD;
6745 if (access & OPEN4_SHARE_ACCESS_WRITE)
6746 fflags |= FWRITE;
6747
6748 rfs4_dbe_lock(sp->rs_dbe);
6749
6750 /*
6751 * Calculate the new deny and access mode that this open is adding to
6752 * the file for this open owner;
6753 */
6754 open_d = (deny & ~sp->rs_open_deny);
6755 open_a = (access & ~sp->rs_open_access);
6756
6757 /*
6758 * Calculate the new share access and share deny modes that this open
6759 * is adding to the file for this open owner;
6760 */
6761 share_a = (access & ~sp->rs_share_access);
6762 share_d = (deny & ~sp->rs_share_deny);
6763
6764 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6765
6766 /*
6767 * Check to see the client has already sent an open for this
6768 * open owner on this file with the same share/deny modes.
6769 * If so, we don't need to check for a conflict and we don't
6770 * need to add another shrlock. If not, then we need to
6771 * check for conflicts in deny and access before checking for
6772 * conflicts in delegation. We don't want to recall a
6773 * delegation based on an open that will eventually fail based
6774 * on shares modes.
6775 */
6776
6777 if (share_a || share_d) {
6778 if ((err = rfs4_share(sp, access, deny)) != 0) {
6779 rfs4_dbe_unlock(sp->rs_dbe);
6780 resp->status = err;
6781
6782 rfs4_file_rele(fp);
6783 /* Not a fully formed open; "close" it */
6784 if (screate == TRUE)
6785 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6786 rfs4_state_rele(sp);
6787 return;
6788 }
6789 }
6790
6791 rfs4_dbe_lock(fp->rf_dbe);
6792
6793 /*
6794 * Check to see if this file is delegated and if so, if a
6795 * recall needs to be done.
6796 */
6797 if (rfs4_check_recall(sp, access)) {
6798 rfs4_dbe_unlock(fp->rf_dbe);
6799 rfs4_dbe_unlock(sp->rs_dbe);
6800 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6801 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6802 rfs4_dbe_lock(sp->rs_dbe);
6803
6804 /* if state closed while lock was dropped */
6805 if (sp->rs_closed) {
6806 if (share_a || share_d)
6807 (void) rfs4_unshare(sp);
6808 rfs4_dbe_unlock(sp->rs_dbe);
6809 rfs4_file_rele(fp);
6810 /* Not a fully formed open; "close" it */
6811 if (screate == TRUE)
6812 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6813 rfs4_state_rele(sp);
6814 resp->status = NFS4ERR_OLD_STATEID;
6815 return;
6816 }
6817
6818 rfs4_dbe_lock(fp->rf_dbe);
6819 /* Let's see if the delegation was returned */
6820 if (rfs4_check_recall(sp, access)) {
6821 rfs4_dbe_unlock(fp->rf_dbe);
6822 if (share_a || share_d)
6823 (void) rfs4_unshare(sp);
6824 rfs4_dbe_unlock(sp->rs_dbe);
6825 rfs4_file_rele(fp);
6826 rfs4_update_lease(sp->rs_owner->ro_client);
6827
6828 /* Not a fully formed open; "close" it */
6829 if (screate == TRUE)
6830 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6831 rfs4_state_rele(sp);
6832 resp->status = NFS4ERR_DELAY;
6833 return;
6834 }
6835 }
6836 /*
6837 * the share check passed and any delegation conflict has been
6838 * taken care of, now call vop_open.
6839 * if this is the first open then call vop_open with fflags.
6840 * if not, call vn_open_upgrade with just the upgrade flags.
6841 *
6842 * if the file has been opened already, it will have the current
6843 * access mode in the state struct. if it has no share access, then
6844 * this is a new open.
6845 *
6846 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6847 * call VOP_OPEN(), just do the open upgrade.
6848 */
6849 if (first_open && !deleg_cur) {
6850 ct.cc_sysid = sysid;
6851 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6852 ct.cc_caller_id = nfs4_srv_caller_id;
6853 ct.cc_flags = CC_DONTBLOCK;
6854 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6855 if (err) {
6856 rfs4_dbe_unlock(fp->rf_dbe);
6857 if (share_a || share_d)
6858 (void) rfs4_unshare(sp);
6859 rfs4_dbe_unlock(sp->rs_dbe);
6860 rfs4_file_rele(fp);
6861
6862 /* Not a fully formed open; "close" it */
6863 if (screate == TRUE)
6864 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6865 rfs4_state_rele(sp);
6866 /* check if a monitor detected a delegation conflict */
6867 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6868 resp->status = NFS4ERR_DELAY;
6869 else
6870 resp->status = NFS4ERR_SERVERFAULT;
6871 return;
6872 }
6873 } else { /* open upgrade */
6874 /*
6875 * calculate the fflags for the new mode that is being added
6876 * by this upgrade.
6877 */
6878 fflags = 0;
6879 if (open_a & OPEN4_SHARE_ACCESS_READ)
6880 fflags |= FREAD;
6881 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6882 fflags |= FWRITE;
6883 vn_open_upgrade(cs->vp, fflags);
6884 }
6885 sp->rs_open_access |= access;
6886 sp->rs_open_deny |= deny;
6887
6888 if (open_d & OPEN4_SHARE_DENY_READ)
6889 fp->rf_deny_read++;
6890 if (open_d & OPEN4_SHARE_DENY_WRITE)
6891 fp->rf_deny_write++;
6892 fp->rf_share_deny |= deny;
6893
6894 if (open_a & OPEN4_SHARE_ACCESS_READ)
6895 fp->rf_access_read++;
6896 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6897 fp->rf_access_write++;
6898 fp->rf_share_access |= access;
6899
6900 /*
6901 * Check for delegation here. if the deleg argument is not
6902 * DELEG_ANY, then this is a reclaim from a client and
6903 * we must honor the delegation requested. If necessary we can
6904 * set the recall flag.
6905 */
6906
6907 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6908
6909 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6910
6911 next_stateid(&sp->rs_stateid);
6912
6913 resp->stateid = sp->rs_stateid.stateid;
6914
6915 rfs4_dbe_unlock(fp->rf_dbe);
6916 rfs4_dbe_unlock(sp->rs_dbe);
6917
6918 if (dsp) {
6919 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6920 rfs4_deleg_state_rele(dsp);
6921 }
6922
6923 rfs4_file_rele(fp);
6924 rfs4_state_rele(sp);
6925
6926 resp->status = NFS4_OK;
6927 }
6928
6929 /*ARGSUSED*/
6930 static void
6931 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6932 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6933 {
6934 change_info4 *cinfo = &resp->cinfo;
6935 bitmap4 *attrset = &resp->attrset;
6936
6937 if (args->opentype == OPEN4_NOCREATE)
6938 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6939 req, cs, args->share_access, cinfo);
6940 else {
6941 /* inhibit delegation grants during exclusive create */
6942
6943 if (args->mode == EXCLUSIVE4)
6944 rfs4_disable_delegation();
6945
6946 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6947 oo->ro_client->rc_clientid);
6948 }
6949
6950 if (resp->status == NFS4_OK) {
6951
6952 /* cs->vp cs->fh now reference the desired file */
6953
6954 rfs4_do_open(cs, req, oo,
6955 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6956 args->share_access, args->share_deny, resp, 0);
6957
6958 /*
6959 * If rfs4_createfile set attrset, we must
6960 * clear this attrset before the response is copied.
6961 */
6962 if (resp->status != NFS4_OK && resp->attrset) {
6963 resp->attrset = 0;
6964 }
6965 }
6966 else
6967 *cs->statusp = resp->status;
6968
6969 if (args->mode == EXCLUSIVE4)
6970 rfs4_enable_delegation();
6971 }
6972
6973 /*ARGSUSED*/
6974 static void
6975 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6976 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6977 {
6978 change_info4 *cinfo = &resp->cinfo;
6979 vattr_t va;
6980 vtype_t v_type = cs->vp->v_type;
6981 int error = 0;
6982
6983 /* Verify that we have a regular file */
6984 if (v_type != VREG) {
6985 if (v_type == VDIR)
6986 resp->status = NFS4ERR_ISDIR;
6987 else if (v_type == VLNK)
6988 resp->status = NFS4ERR_SYMLINK;
6989 else
6990 resp->status = NFS4ERR_INVAL;
6991 return;
6992 }
6993
6994 va.va_mask = AT_MODE|AT_UID;
6995 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
6996 if (error) {
6997 resp->status = puterrno4(error);
6998 return;
6999 }
7000
7001 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7002
7003 /*
7004 * Check if we have access to the file, Note the the file
7005 * could have originally been open UNCHECKED or GUARDED
7006 * with mode bits that will now fail, but there is nothing
7007 * we can really do about that except in the case that the
7008 * owner of the file is the one requesting the open.
7009 */
7010 if (crgetuid(cs->cr) != va.va_uid) {
7011 resp->status = check_open_access(args->share_access, cs, req);
7012 if (resp->status != NFS4_OK) {
7013 return;
7014 }
7015 }
7016
7017 /*
7018 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7019 */
7020 cinfo->before = 0;
7021 cinfo->after = 0;
7022 cinfo->atomic = FALSE;
7023
7024 rfs4_do_open(cs, req, oo,
7025 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7026 args->share_access, args->share_deny, resp, 0);
7027 }
7028
7029 static void
7030 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7031 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7032 {
7033 int error;
7034 nfsstat4 status;
7035 stateid4 stateid =
7036 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7037 rfs4_deleg_state_t *dsp;
7038
7039 /*
7040 * Find the state info from the stateid and confirm that the
7041 * file is delegated. If the state openowner is the same as
7042 * the supplied openowner we're done. If not, get the file
7043 * info from the found state info. Use that file info to
7044 * create the state for this lock owner. Note solaris doen't
7045 * really need the pathname to find the file. We may want to
7046 * lookup the pathname and make sure that the vp exist and
7047 * matches the vp in the file structure. However it is
7048 * possible that the pathname nolonger exists (local process
7049 * unlinks the file), so this may not be that useful.
7050 */
7051
7052 status = rfs4_get_deleg_state(&stateid, &dsp);
7053 if (status != NFS4_OK) {
7054 resp->status = status;
7055 return;
7056 }
7057
7058 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7059
7060 /*
7061 * New lock owner, create state. Since this was probably called
7062 * in response to a CB_RECALL we set deleg to DELEG_NONE
7063 */
7064
7065 ASSERT(cs->vp != NULL);
7066 VN_RELE(cs->vp);
7067 VN_HOLD(dsp->rds_finfo->rf_vp);
7068 cs->vp = dsp->rds_finfo->rf_vp;
7069
7070 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7071 rfs4_deleg_state_rele(dsp);
7072 *cs->statusp = resp->status = puterrno4(error);
7073 return;
7074 }
7075
7076 /* Mark progress for delegation returns */
7077 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7078 rfs4_deleg_state_rele(dsp);
7079 rfs4_do_open(cs, req, oo, DELEG_NONE,
7080 args->share_access, args->share_deny, resp, 1);
7081 }
7082
7083 /*ARGSUSED*/
7084 static void
7085 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7086 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7087 {
7088 /*
7089 * Lookup the pathname, it must already exist since this file
7090 * was delegated.
7091 *
7092 * Find the file and state info for this vp and open owner pair.
7093 * check that they are in fact delegated.
7094 * check that the state access and deny modes are the same.
7095 *
7096 * Return the delgation possibly seting the recall flag.
7097 */
7098 rfs4_file_t *fp;
7099 rfs4_state_t *sp;
7100 bool_t create = FALSE;
7101 bool_t dcreate = FALSE;
7102 rfs4_deleg_state_t *dsp;
7103 nfsace4 *ace;
7104
7105 /* Note we ignore oflags */
7106 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7107 req, cs, args->share_access, &resp->cinfo);
7108
7109 if (resp->status != NFS4_OK) {
7110 return;
7111 }
7112
7113 /* get the file struct and hold a lock on it during initial open */
7114 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7115 if (fp == NULL) {
7116 resp->status = NFS4ERR_RESOURCE;
7117 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7118 return;
7119 }
7120
7121 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7122 if (sp == NULL) {
7123 resp->status = NFS4ERR_SERVERFAULT;
7124 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7125 rw_exit(&fp->rf_file_rwlock);
7126 rfs4_file_rele(fp);
7127 return;
7128 }
7129
7130 rfs4_dbe_lock(sp->rs_dbe);
7131 rfs4_dbe_lock(fp->rf_dbe);
7132 if (args->share_access != sp->rs_share_access ||
7133 args->share_deny != sp->rs_share_deny ||
7134 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7135 NFS4_DEBUG(rfs4_debug,
7136 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7137 rfs4_dbe_unlock(fp->rf_dbe);
7138 rfs4_dbe_unlock(sp->rs_dbe);
7139 rfs4_file_rele(fp);
7140 rfs4_state_rele(sp);
7141 resp->status = NFS4ERR_SERVERFAULT;
7142 return;
7143 }
7144 rfs4_dbe_unlock(fp->rf_dbe);
7145 rfs4_dbe_unlock(sp->rs_dbe);
7146
7147 dsp = rfs4_finddeleg(sp, &dcreate);
7148 if (dsp == NULL) {
7149 rfs4_state_rele(sp);
7150 rfs4_file_rele(fp);
7151 resp->status = NFS4ERR_SERVERFAULT;
7152 return;
7153 }
7154
7155 next_stateid(&sp->rs_stateid);
7156
7157 resp->stateid = sp->rs_stateid.stateid;
7158
7159 resp->delegation.delegation_type = dsp->rds_dtype;
7160
7161 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7162 open_read_delegation4 *rv =
7163 &resp->delegation.open_delegation4_u.read;
7164
7165 rv->stateid = dsp->rds_delegid.stateid;
7166 rv->recall = FALSE; /* no policy in place to set to TRUE */
7167 ace = &rv->permissions;
7168 } else {
7169 open_write_delegation4 *rv =
7170 &resp->delegation.open_delegation4_u.write;
7171
7172 rv->stateid = dsp->rds_delegid.stateid;
7173 rv->recall = FALSE; /* no policy in place to set to TRUE */
7174 ace = &rv->permissions;
7175 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7176 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7177 }
7178
7179 /* XXX For now */
7180 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7181 ace->flag = 0;
7182 ace->access_mask = 0;
7183 ace->who.utf8string_len = 0;
7184 ace->who.utf8string_val = 0;
7185
7186 rfs4_deleg_state_rele(dsp);
7187 rfs4_state_rele(sp);
7188 rfs4_file_rele(fp);
7189 }
7190
7191 typedef enum {
7192 NFS4_CHKSEQ_OKAY = 0,
7193 NFS4_CHKSEQ_REPLAY = 1,
7194 NFS4_CHKSEQ_BAD = 2
7195 } rfs4_chkseq_t;
7196
7197 /*
7198 * Generic function for sequence number checks.
7199 */
7200 static rfs4_chkseq_t
7201 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7202 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7203 {
7204 /* Same sequence ids and matching operations? */
7205 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7206 if (copyres == TRUE) {
7207 rfs4_free_reply(resop);
7208 rfs4_copy_reply(resop, lastop);
7209 }
7210 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7211 "Replayed SEQID %d\n", seqid));
7212 return (NFS4_CHKSEQ_REPLAY);
7213 }
7214
7215 /* If the incoming sequence is not the next expected then it is bad */
7216 if (rqst_seq != seqid + 1) {
7217 if (rqst_seq == seqid) {
7218 NFS4_DEBUG(rfs4_debug,
7219 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7220 "but last op was %d current op is %d\n",
7221 lastop->resop, resop->resop));
7222 return (NFS4_CHKSEQ_BAD);
7223 }
7224 NFS4_DEBUG(rfs4_debug,
7225 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7226 rqst_seq, seqid));
7227 return (NFS4_CHKSEQ_BAD);
7228 }
7229
7230 /* Everything okay -- next expected */
7231 return (NFS4_CHKSEQ_OKAY);
7232 }
7233
7234
7235 static rfs4_chkseq_t
7236 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7237 {
7238 rfs4_chkseq_t rc;
7239
7240 rfs4_dbe_lock(op->ro_dbe);
7241 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7242 TRUE);
7243 rfs4_dbe_unlock(op->ro_dbe);
7244
7245 if (rc == NFS4_CHKSEQ_OKAY)
7246 rfs4_update_lease(op->ro_client);
7247
7248 return (rc);
7249 }
7250
7251 static rfs4_chkseq_t
7252 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7253 {
7254 rfs4_chkseq_t rc;
7255
7256 rfs4_dbe_lock(op->ro_dbe);
7257 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7258 olo_seqid, resop, FALSE);
7259 rfs4_dbe_unlock(op->ro_dbe);
7260
7261 return (rc);
7262 }
7263
7264 static rfs4_chkseq_t
7265 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7266 {
7267 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7268
7269 rfs4_dbe_lock(lsp->rls_dbe);
7270 if (!lsp->rls_skip_seqid_check)
7271 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7272 resop, TRUE);
7273 rfs4_dbe_unlock(lsp->rls_dbe);
7274
7275 return (rc);
7276 }
7277
7278 static void
7279 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7280 struct svc_req *req, struct compound_state *cs)
7281 {
7282 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7283 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7284 open_owner4 *owner = &args->owner;
7285 open_claim_type4 claim = args->claim;
7286 rfs4_client_t *cp;
7287 rfs4_openowner_t *oo;
7288 bool_t create;
7289 bool_t replay = FALSE;
7290 int can_reclaim;
7291
7292 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7293 OPEN4args *, args);
7294
7295 if (cs->vp == NULL) {
7296 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7297 goto end;
7298 }
7299
7300 /*
7301 * Need to check clientid and lease expiration first based on
7302 * error ordering and incrementing sequence id.
7303 */
7304 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7305 if (cp == NULL) {
7306 *cs->statusp = resp->status =
7307 rfs4_check_clientid(&owner->clientid, 0);
7308 goto end;
7309 }
7310
7311 if (rfs4_lease_expired(cp)) {
7312 rfs4_client_close(cp);
7313 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7314 goto end;
7315 }
7316 can_reclaim = cp->rc_can_reclaim;
7317
7318 /*
7319 * Find the open_owner for use from this point forward. Take
7320 * care in updating the sequence id based on the type of error
7321 * being returned.
7322 */
7323 retry:
7324 create = TRUE;
7325 oo = rfs4_findopenowner(owner, &create, args->seqid);
7326 if (oo == NULL) {
7327 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7328 rfs4_client_rele(cp);
7329 goto end;
7330 }
7331
7332 /* Hold off access to the sequence space while the open is done */
7333 rfs4_sw_enter(&oo->ro_sw);
7334
7335 /*
7336 * If the open_owner existed before at the server, then check
7337 * the sequence id.
7338 */
7339 if (!create && !oo->ro_postpone_confirm) {
7340 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7341 case NFS4_CHKSEQ_BAD:
7342 if ((args->seqid > oo->ro_open_seqid) &&
7343 oo->ro_need_confirm) {
7344 rfs4_free_opens(oo, TRUE, FALSE);
7345 rfs4_sw_exit(&oo->ro_sw);
7346 rfs4_openowner_rele(oo);
7347 goto retry;
7348 }
7349 resp->status = NFS4ERR_BAD_SEQID;
7350 goto out;
7351 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7352 replay = TRUE;
7353 goto out;
7354 default:
7355 break;
7356 }
7357
7358 /*
7359 * Sequence was ok and open owner exists
7360 * check to see if we have yet to see an
7361 * open_confirm.
7362 */
7363 if (oo->ro_need_confirm) {
7364 rfs4_free_opens(oo, TRUE, FALSE);
7365 rfs4_sw_exit(&oo->ro_sw);
7366 rfs4_openowner_rele(oo);
7367 goto retry;
7368 }
7369 }
7370 /* Grace only applies to regular-type OPENs */
7371 if (rfs4_clnt_in_grace(cp) &&
7372 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7373 *cs->statusp = resp->status = NFS4ERR_GRACE;
7374 goto out;
7375 }
7376
7377 /*
7378 * If previous state at the server existed then can_reclaim
7379 * will be set. If not reply NFS4ERR_NO_GRACE to the
7380 * client.
7381 */
7382 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7383 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7384 goto out;
7385 }
7386
7387
7388 /*
7389 * Reject the open if the client has missed the grace period
7390 */
7391 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7392 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7393 goto out;
7394 }
7395
7396 /* Couple of up-front bookkeeping items */
7397 if (oo->ro_need_confirm) {
7398 /*
7399 * If this is a reclaim OPEN then we should not ask
7400 * for a confirmation of the open_owner per the
7401 * protocol specification.
7402 */
7403 if (claim == CLAIM_PREVIOUS)
7404 oo->ro_need_confirm = FALSE;
7405 else
7406 resp->rflags |= OPEN4_RESULT_CONFIRM;
7407 }
7408 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7409
7410 /*
7411 * If there is an unshared filesystem mounted on this vnode,
7412 * do not allow to open/create in this directory.
7413 */
7414 if (vn_ismntpt(cs->vp)) {
7415 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7416 goto out;
7417 }
7418
7419 /*
7420 * access must READ, WRITE, or BOTH. No access is invalid.
7421 * deny can be READ, WRITE, BOTH, or NONE.
7422 * bits not defined for access/deny are invalid.
7423 */
7424 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7425 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7426 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7427 *cs->statusp = resp->status = NFS4ERR_INVAL;
7428 goto out;
7429 }
7430
7431
7432 /*
7433 * make sure attrset is zero before response is built.
7434 */
7435 resp->attrset = 0;
7436
7437 switch (claim) {
7438 case CLAIM_NULL:
7439 rfs4_do_opennull(cs, req, args, oo, resp);
7440 break;
7441 case CLAIM_PREVIOUS:
7442 rfs4_do_openprev(cs, req, args, oo, resp);
7443 break;
7444 case CLAIM_DELEGATE_CUR:
7445 rfs4_do_opendelcur(cs, req, args, oo, resp);
7446 break;
7447 case CLAIM_DELEGATE_PREV:
7448 rfs4_do_opendelprev(cs, req, args, oo, resp);
7449 break;
7450 default:
7451 resp->status = NFS4ERR_INVAL;
7452 break;
7453 }
7454
7455 out:
7456 rfs4_client_rele(cp);
7457
7458 /* Catch sequence id handling here to make it a little easier */
7459 switch (resp->status) {
7460 case NFS4ERR_BADXDR:
7461 case NFS4ERR_BAD_SEQID:
7462 case NFS4ERR_BAD_STATEID:
7463 case NFS4ERR_NOFILEHANDLE:
7464 case NFS4ERR_RESOURCE:
7465 case NFS4ERR_STALE_CLIENTID:
7466 case NFS4ERR_STALE_STATEID:
7467 /*
7468 * The protocol states that if any of these errors are
7469 * being returned, the sequence id should not be
7470 * incremented. Any other return requires an
7471 * increment.
7472 */
7473 break;
7474 default:
7475 /* Always update the lease in this case */
7476 rfs4_update_lease(oo->ro_client);
7477
7478 /* Regular response - copy the result */
7479 if (!replay)
7480 rfs4_update_open_resp(oo, resop, &cs->fh);
7481
7482 /*
7483 * REPLAY case: Only if the previous response was OK
7484 * do we copy the filehandle. If not OK, no
7485 * filehandle to copy.
7486 */
7487 if (replay == TRUE &&
7488 resp->status == NFS4_OK &&
7489 oo->ro_reply_fh.nfs_fh4_val) {
7490 /*
7491 * If this is a replay, we must restore the
7492 * current filehandle/vp to that of what was
7493 * returned originally. Try our best to do
7494 * it.
7495 */
7496 nfs_fh4_fmt_t *fh_fmtp =
7497 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7498
7499 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7500 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7501
7502 if (cs->exi == NULL) {
7503 resp->status = NFS4ERR_STALE;
7504 goto finish;
7505 }
7506
7507 VN_RELE(cs->vp);
7508
7509 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7510 &resp->status);
7511
7512 if (cs->vp == NULL)
7513 goto finish;
7514
7515 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7516 }
7517
7518 /*
7519 * If this was a replay, no need to update the
7520 * sequence id. If the open_owner was not created on
7521 * this pass, then update. The first use of an
7522 * open_owner will not bump the sequence id.
7523 */
7524 if (replay == FALSE && !create)
7525 rfs4_update_open_sequence(oo);
7526 /*
7527 * If the client is receiving an error and the
7528 * open_owner needs to be confirmed, there is no way
7529 * to notify the client of this fact ignoring the fact
7530 * that the server has no method of returning a
7531 * stateid to confirm. Therefore, the server needs to
7532 * mark this open_owner in a way as to avoid the
7533 * sequence id checking the next time the client uses
7534 * this open_owner.
7535 */
7536 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7537 oo->ro_postpone_confirm = TRUE;
7538 /*
7539 * If OK response then clear the postpone flag and
7540 * reset the sequence id to keep in sync with the
7541 * client.
7542 */
7543 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7544 oo->ro_postpone_confirm = FALSE;
7545 oo->ro_open_seqid = args->seqid;
7546 }
7547 break;
7548 }
7549
7550 finish:
7551 *cs->statusp = resp->status;
7552
7553 rfs4_sw_exit(&oo->ro_sw);
7554 rfs4_openowner_rele(oo);
7555
7556 end:
7557 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7558 OPEN4res *, resp);
7559 }
7560
7561 /*ARGSUSED*/
7562 void
7563 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7564 struct svc_req *req, struct compound_state *cs)
7565 {
7566 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7567 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7568 rfs4_state_t *sp;
7569 nfsstat4 status;
7570
7571 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7572 OPEN_CONFIRM4args *, args);
7573
7574 if (cs->vp == NULL) {
7575 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7576 goto out;
7577 }
7578
7579 if (cs->vp->v_type != VREG) {
7580 *cs->statusp = resp->status =
7581 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7582 return;
7583 }
7584
7585 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7586 if (status != NFS4_OK) {
7587 *cs->statusp = resp->status = status;
7588 goto out;
7589 }
7590
7591 /* Ensure specified filehandle matches */
7592 if (cs->vp != sp->rs_finfo->rf_vp) {
7593 rfs4_state_rele(sp);
7594 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7595 goto out;
7596 }
7597
7598 /* hold off other access to open_owner while we tinker */
7599 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7600
7601 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7602 case NFS4_CHECK_STATEID_OKAY:
7603 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7604 resop) != 0) {
7605 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7606 break;
7607 }
7608 /*
7609 * If it is the appropriate stateid and determined to
7610 * be "OKAY" then this means that the stateid does not
7611 * need to be confirmed and the client is in error for
7612 * sending an OPEN_CONFIRM.
7613 */
7614 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7615 break;
7616 case NFS4_CHECK_STATEID_OLD:
7617 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7618 break;
7619 case NFS4_CHECK_STATEID_BAD:
7620 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7621 break;
7622 case NFS4_CHECK_STATEID_EXPIRED:
7623 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7624 break;
7625 case NFS4_CHECK_STATEID_CLOSED:
7626 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7627 break;
7628 case NFS4_CHECK_STATEID_REPLAY:
7629 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7630 resop)) {
7631 case NFS4_CHKSEQ_OKAY:
7632 /*
7633 * This is replayed stateid; if seqid matches
7634 * next expected, then client is using wrong seqid.
7635 */
7636 /* fall through */
7637 case NFS4_CHKSEQ_BAD:
7638 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7639 break;
7640 case NFS4_CHKSEQ_REPLAY:
7641 /*
7642 * Note this case is the duplicate case so
7643 * resp->status is already set.
7644 */
7645 *cs->statusp = resp->status;
7646 rfs4_update_lease(sp->rs_owner->ro_client);
7647 break;
7648 }
7649 break;
7650 case NFS4_CHECK_STATEID_UNCONFIRMED:
7651 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7652 resop) != NFS4_CHKSEQ_OKAY) {
7653 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7654 break;
7655 }
7656 *cs->statusp = resp->status = NFS4_OK;
7657
7658 next_stateid(&sp->rs_stateid);
7659 resp->open_stateid = sp->rs_stateid.stateid;
7660 sp->rs_owner->ro_need_confirm = FALSE;
7661 rfs4_update_lease(sp->rs_owner->ro_client);
7662 rfs4_update_open_sequence(sp->rs_owner);
7663 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7664 break;
7665 default:
7666 ASSERT(FALSE);
7667 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7668 break;
7669 }
7670 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7671 rfs4_state_rele(sp);
7672
7673 out:
7674 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7675 OPEN_CONFIRM4res *, resp);
7676 }
7677
7678 /*ARGSUSED*/
7679 void
7680 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7681 struct svc_req *req, struct compound_state *cs)
7682 {
7683 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7684 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7685 uint32_t access = args->share_access;
7686 uint32_t deny = args->share_deny;
7687 nfsstat4 status;
7688 rfs4_state_t *sp;
7689 rfs4_file_t *fp;
7690 int fflags = 0;
7691
7692 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7693 OPEN_DOWNGRADE4args *, args);
7694
7695 if (cs->vp == NULL) {
7696 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7697 goto out;
7698 }
7699
7700 if (cs->vp->v_type != VREG) {
7701 *cs->statusp = resp->status = NFS4ERR_INVAL;
7702 return;
7703 }
7704
7705 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7706 if (status != NFS4_OK) {
7707 *cs->statusp = resp->status = status;
7708 goto out;
7709 }
7710
7711 /* Ensure specified filehandle matches */
7712 if (cs->vp != sp->rs_finfo->rf_vp) {
7713 rfs4_state_rele(sp);
7714 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7715 goto out;
7716 }
7717
7718 /* hold off other access to open_owner while we tinker */
7719 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7720
7721 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7722 case NFS4_CHECK_STATEID_OKAY:
7723 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7724 resop) != NFS4_CHKSEQ_OKAY) {
7725 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7726 goto end;
7727 }
7728 break;
7729 case NFS4_CHECK_STATEID_OLD:
7730 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7731 goto end;
7732 case NFS4_CHECK_STATEID_BAD:
7733 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7734 goto end;
7735 case NFS4_CHECK_STATEID_EXPIRED:
7736 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7737 goto end;
7738 case NFS4_CHECK_STATEID_CLOSED:
7739 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7740 goto end;
7741 case NFS4_CHECK_STATEID_UNCONFIRMED:
7742 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7743 goto end;
7744 case NFS4_CHECK_STATEID_REPLAY:
7745 /* Check the sequence id for the open owner */
7746 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7747 resop)) {
7748 case NFS4_CHKSEQ_OKAY:
7749 /*
7750 * This is replayed stateid; if seqid matches
7751 * next expected, then client is using wrong seqid.
7752 */
7753 /* fall through */
7754 case NFS4_CHKSEQ_BAD:
7755 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7756 goto end;
7757 case NFS4_CHKSEQ_REPLAY:
7758 /*
7759 * Note this case is the duplicate case so
7760 * resp->status is already set.
7761 */
7762 *cs->statusp = resp->status;
7763 rfs4_update_lease(sp->rs_owner->ro_client);
7764 goto end;
7765 }
7766 break;
7767 default:
7768 ASSERT(FALSE);
7769 break;
7770 }
7771
7772 rfs4_dbe_lock(sp->rs_dbe);
7773 /*
7774 * Check that the new access modes and deny modes are valid.
7775 * Check that no invalid bits are set.
7776 */
7777 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7778 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7779 *cs->statusp = resp->status = NFS4ERR_INVAL;
7780 rfs4_update_open_sequence(sp->rs_owner);
7781 rfs4_dbe_unlock(sp->rs_dbe);
7782 goto end;
7783 }
7784
7785 /*
7786 * The new modes must be a subset of the current modes and
7787 * the access must specify at least one mode. To test that
7788 * the new mode is a subset of the current modes we bitwise
7789 * AND them together and check that the result equals the new
7790 * mode. For example:
7791 * New mode, access == R and current mode, sp->rs_open_access == RW
7792 * access & sp->rs_open_access == R == access, so the new access mode
7793 * is valid. Consider access == RW, sp->rs_open_access = R
7794 * access & sp->rs_open_access == R != access, so the new access mode
7795 * is invalid.
7796 */
7797 if ((access & sp->rs_open_access) != access ||
7798 (deny & sp->rs_open_deny) != deny ||
7799 (access &
7800 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7801 *cs->statusp = resp->status = NFS4ERR_INVAL;
7802 rfs4_update_open_sequence(sp->rs_owner);
7803 rfs4_dbe_unlock(sp->rs_dbe);
7804 goto end;
7805 }
7806
7807 /*
7808 * Release any share locks associated with this stateID.
7809 * Strictly speaking, this violates the spec because the
7810 * spec effectively requires that open downgrade be atomic.
7811 * At present, fs_shrlock does not have this capability.
7812 */
7813 (void) rfs4_unshare(sp);
7814
7815 status = rfs4_share(sp, access, deny);
7816 if (status != NFS4_OK) {
7817 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7818 rfs4_update_open_sequence(sp->rs_owner);
7819 rfs4_dbe_unlock(sp->rs_dbe);
7820 goto end;
7821 }
7822
7823 fp = sp->rs_finfo;
7824 rfs4_dbe_lock(fp->rf_dbe);
7825
7826 /*
7827 * If the current mode has deny read and the new mode
7828 * does not, decrement the number of deny read mode bits
7829 * and if it goes to zero turn off the deny read bit
7830 * on the file.
7831 */
7832 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7833 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7834 fp->rf_deny_read--;
7835 if (fp->rf_deny_read == 0)
7836 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7837 }
7838
7839 /*
7840 * If the current mode has deny write and the new mode
7841 * does not, decrement the number of deny write mode bits
7842 * and if it goes to zero turn off the deny write bit
7843 * on the file.
7844 */
7845 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7846 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7847 fp->rf_deny_write--;
7848 if (fp->rf_deny_write == 0)
7849 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7850 }
7851
7852 /*
7853 * If the current mode has access read and the new mode
7854 * does not, decrement the number of access read mode bits
7855 * and if it goes to zero turn off the access read bit
7856 * on the file. set fflags to FREAD for the call to
7857 * vn_open_downgrade().
7858 */
7859 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7860 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7861 fp->rf_access_read--;
7862 if (fp->rf_access_read == 0)
7863 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7864 fflags |= FREAD;
7865 }
7866
7867 /*
7868 * If the current mode has access write and the new mode
7869 * does not, decrement the number of access write mode bits
7870 * and if it goes to zero turn off the access write bit
7871 * on the file. set fflags to FWRITE for the call to
7872 * vn_open_downgrade().
7873 */
7874 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7875 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7876 fp->rf_access_write--;
7877 if (fp->rf_access_write == 0)
7878 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7879 fflags |= FWRITE;
7880 }
7881
7882 /* Check that the file is still accessible */
7883 ASSERT(fp->rf_share_access);
7884
7885 rfs4_dbe_unlock(fp->rf_dbe);
7886
7887 /* now set the new open access and deny modes */
7888 sp->rs_open_access = access;
7889 sp->rs_open_deny = deny;
7890
7891 /*
7892 * we successfully downgraded the share lock, now we need to downgrade
7893 * the open. it is possible that the downgrade was only for a deny
7894 * mode and we have nothing else to do.
7895 */
7896 if ((fflags & (FREAD|FWRITE)) != 0)
7897 vn_open_downgrade(cs->vp, fflags);
7898
7899 /* Update the stateid */
7900 next_stateid(&sp->rs_stateid);
7901 resp->open_stateid = sp->rs_stateid.stateid;
7902
7903 rfs4_dbe_unlock(sp->rs_dbe);
7904
7905 *cs->statusp = resp->status = NFS4_OK;
7906 /* Update the lease */
7907 rfs4_update_lease(sp->rs_owner->ro_client);
7908 /* And the sequence */
7909 rfs4_update_open_sequence(sp->rs_owner);
7910 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7911
7912 end:
7913 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7914 rfs4_state_rele(sp);
7915 out:
7916 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7917 OPEN_DOWNGRADE4res *, resp);
7918 }
7919
7920 static void *
7921 memstr(const void *s1, const char *s2, size_t n)
7922 {
7923 size_t l = strlen(s2);
7924 char *p = (char *)s1;
7925
7926 while (n >= l) {
7927 if (bcmp(p, s2, l) == 0)
7928 return (p);
7929 p++;
7930 n--;
7931 }
7932
7933 return (NULL);
7934 }
7935
7936 /*
7937 * The logic behind this function is detailed in the NFSv4 RFC in the
7938 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7939 * that section for explicit guidance to server behavior for
7940 * SETCLIENTID.
7941 */
7942 void
7943 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7944 struct svc_req *req, struct compound_state *cs)
7945 {
7946 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7947 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7948 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7949 rfs4_clntip_t *ci;
7950 bool_t create;
7951 char *addr, *netid;
7952 int len;
7953
7954 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7955 SETCLIENTID4args *, args);
7956 retry:
7957 newcp = cp_confirmed = cp_unconfirmed = NULL;
7958
7959 /*
7960 * Save the caller's IP address
7961 */
7962 args->client.cl_addr =
7963 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7964
7965 /*
7966 * Record if it is a Solaris client that cannot handle referrals.
7967 */
7968 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
7969 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
7970 /* Add a "yes, it's downrev" record */
7971 create = TRUE;
7972 ci = rfs4_find_clntip(args->client.cl_addr, &create);
7973 ASSERT(ci != NULL);
7974 rfs4_dbe_rele(ci->ri_dbe);
7975 } else {
7976 /* Remove any previous record */
7977 rfs4_invalidate_clntip(args->client.cl_addr);
7978 }
7979
7980 /*
7981 * In search of an EXISTING client matching the incoming
7982 * request to establish a new client identifier at the server
7983 */
7984 create = TRUE;
7985 cp = rfs4_findclient(&args->client, &create, NULL);
7986
7987 /* Should never happen */
7988 ASSERT(cp != NULL);
7989
7990 if (cp == NULL) {
7991 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7992 goto out;
7993 }
7994
7995 /*
7996 * Easiest case. Client identifier is newly created and is
7997 * unconfirmed. Also note that for this case, no other
7998 * entries exist for the client identifier. Nothing else to
7999 * check. Just setup the response and respond.
8000 */
8001 if (create) {
8002 *cs->statusp = res->status = NFS4_OK;
8003 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8004 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8005 cp->rc_confirm_verf;
8006 /* Setup callback information; CB_NULL confirmation later */
8007 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8008
8009 rfs4_client_rele(cp);
8010 goto out;
8011 }
8012
8013 /*
8014 * An existing, confirmed client may exist but it may not have
8015 * been active for at least one lease period. If so, then
8016 * "close" the client and create a new client identifier
8017 */
8018 if (rfs4_lease_expired(cp)) {
8019 rfs4_client_close(cp);
8020 goto retry;
8021 }
8022
8023 if (cp->rc_need_confirm == TRUE)
8024 cp_unconfirmed = cp;
8025 else
8026 cp_confirmed = cp;
8027
8028 cp = NULL;
8029
8030 /*
8031 * We have a confirmed client, now check for an
8032 * unconfimred entry
8033 */
8034 if (cp_confirmed) {
8035 /* If creds don't match then client identifier is inuse */
8036 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8037 rfs4_cbinfo_t *cbp;
8038 /*
8039 * Some one else has established this client
8040 * id. Try and say * who they are. We will use
8041 * the call back address supplied by * the
8042 * first client.
8043 */
8044 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8045
8046 addr = netid = NULL;
8047
8048 cbp = &cp_confirmed->rc_cbinfo;
8049 if (cbp->cb_callback.cb_location.r_addr &&
8050 cbp->cb_callback.cb_location.r_netid) {
8051 cb_client4 *cbcp = &cbp->cb_callback;
8052
8053 len = strlen(cbcp->cb_location.r_addr)+1;
8054 addr = kmem_alloc(len, KM_SLEEP);
8055 bcopy(cbcp->cb_location.r_addr, addr, len);
8056 len = strlen(cbcp->cb_location.r_netid)+1;
8057 netid = kmem_alloc(len, KM_SLEEP);
8058 bcopy(cbcp->cb_location.r_netid, netid, len);
8059 }
8060
8061 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8062 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8063
8064 rfs4_client_rele(cp_confirmed);
8065 }
8066
8067 /*
8068 * Confirmed, creds match, and verifier matches; must
8069 * be an update of the callback info
8070 */
8071 if (cp_confirmed->rc_nfs_client.verifier ==
8072 args->client.verifier) {
8073 /* Setup callback information */
8074 rfs4_client_setcb(cp_confirmed, &args->callback,
8075 args->callback_ident);
8076
8077 /* everything okay -- move ahead */
8078 *cs->statusp = res->status = NFS4_OK;
8079 res->SETCLIENTID4res_u.resok4.clientid =
8080 cp_confirmed->rc_clientid;
8081
8082 /* update the confirm_verifier and return it */
8083 rfs4_client_scv_next(cp_confirmed);
8084 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8085 cp_confirmed->rc_confirm_verf;
8086
8087 rfs4_client_rele(cp_confirmed);
8088 goto out;
8089 }
8090
8091 /*
8092 * Creds match but the verifier doesn't. Must search
8093 * for an unconfirmed client that would be replaced by
8094 * this request.
8095 */
8096 create = FALSE;
8097 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8098 cp_confirmed);
8099 }
8100
8101 /*
8102 * At this point, we have taken care of the brand new client
8103 * struct, INUSE case, update of an existing, and confirmed
8104 * client struct.
8105 */
8106
8107 /*
8108 * check to see if things have changed while we originally
8109 * picked up the client struct. If they have, then return and
8110 * retry the processing of this SETCLIENTID request.
8111 */
8112 if (cp_unconfirmed) {
8113 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8114 if (!cp_unconfirmed->rc_need_confirm) {
8115 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8116 rfs4_client_rele(cp_unconfirmed);
8117 if (cp_confirmed)
8118 rfs4_client_rele(cp_confirmed);
8119 goto retry;
8120 }
8121 /* do away with the old unconfirmed one */
8122 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8123 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8124 rfs4_client_rele(cp_unconfirmed);
8125 cp_unconfirmed = NULL;
8126 }
8127
8128 /*
8129 * This search will temporarily hide the confirmed client
8130 * struct while a new client struct is created as the
8131 * unconfirmed one.
8132 */
8133 create = TRUE;
8134 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8135
8136 ASSERT(newcp != NULL);
8137
8138 if (newcp == NULL) {
8139 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8140 rfs4_client_rele(cp_confirmed);
8141 goto out;
8142 }
8143
8144 /*
8145 * If one was not created, then a similar request must be in
8146 * process so release and start over with this one
8147 */
8148 if (create != TRUE) {
8149 rfs4_client_rele(newcp);
8150 if (cp_confirmed)
8151 rfs4_client_rele(cp_confirmed);
8152 goto retry;
8153 }
8154
8155 *cs->statusp = res->status = NFS4_OK;
8156 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8157 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8158 newcp->rc_confirm_verf;
8159 /* Setup callback information; CB_NULL confirmation later */
8160 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8161
8162 newcp->rc_cp_confirmed = cp_confirmed;
8163
8164 rfs4_client_rele(newcp);
8165
8166 out:
8167 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8168 SETCLIENTID4res *, res);
8169 }
8170
8171 /*ARGSUSED*/
8172 void
8173 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8174 struct svc_req *req, struct compound_state *cs)
8175 {
8176 SETCLIENTID_CONFIRM4args *args =
8177 &argop->nfs_argop4_u.opsetclientid_confirm;
8178 SETCLIENTID_CONFIRM4res *res =
8179 &resop->nfs_resop4_u.opsetclientid_confirm;
8180 rfs4_client_t *cp, *cptoclose = NULL;
8181
8182 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8183 struct compound_state *, cs,
8184 SETCLIENTID_CONFIRM4args *, args);
8185
8186 *cs->statusp = res->status = NFS4_OK;
8187
8188 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8189
8190 if (cp == NULL) {
8191 *cs->statusp = res->status =
8192 rfs4_check_clientid(&args->clientid, 1);
8193 goto out;
8194 }
8195
8196 if (!creds_ok(cp, req, cs)) {
8197 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8198 rfs4_client_rele(cp);
8199 goto out;
8200 }
8201
8202 /* If the verifier doesn't match, the record doesn't match */
8203 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8204 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8205 rfs4_client_rele(cp);
8206 goto out;
8207 }
8208
8209 rfs4_dbe_lock(cp->rc_dbe);
8210 cp->rc_need_confirm = FALSE;
8211 if (cp->rc_cp_confirmed) {
8212 cptoclose = cp->rc_cp_confirmed;
8213 cptoclose->rc_ss_remove = 1;
8214 cp->rc_cp_confirmed = NULL;
8215 }
8216
8217 /*
8218 * Update the client's associated server instance, if it's changed
8219 * since the client was created.
8220 */
8221 if (rfs4_servinst(cp) != rfs4_cur_servinst)
8222 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8223
8224 /*
8225 * Record clientid in stable storage.
8226 * Must be done after server instance has been assigned.
8227 */
8228 rfs4_ss_clid(cp);
8229
8230 rfs4_dbe_unlock(cp->rc_dbe);
8231
8232 if (cptoclose)
8233 /* don't need to rele, client_close does it */
8234 rfs4_client_close(cptoclose);
8235
8236 /* If needed, initiate CB_NULL call for callback path */
8237 rfs4_deleg_cb_check(cp);
8238 rfs4_update_lease(cp);
8239
8240 /*
8241 * Check to see if client can perform reclaims
8242 */
8243 rfs4_ss_chkclid(cp);
8244
8245 rfs4_client_rele(cp);
8246
8247 out:
8248 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8249 struct compound_state *, cs,
8250 SETCLIENTID_CONFIRM4 *, res);
8251 }
8252
8253
8254 /*ARGSUSED*/
8255 void
8256 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8257 struct svc_req *req, struct compound_state *cs)
8258 {
8259 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8260 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8261 rfs4_state_t *sp;
8262 nfsstat4 status;
8263
8264 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8265 CLOSE4args *, args);
8266
8267 if (cs->vp == NULL) {
8268 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8269 goto out;
8270 }
8271
8272 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8273 if (status != NFS4_OK) {
8274 *cs->statusp = resp->status = status;
8275 goto out;
8276 }
8277
8278 /* Ensure specified filehandle matches */
8279 if (cs->vp != sp->rs_finfo->rf_vp) {
8280 rfs4_state_rele(sp);
8281 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8282 goto out;
8283 }
8284
8285 /* hold off other access to open_owner while we tinker */
8286 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8287
8288 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8289 case NFS4_CHECK_STATEID_OKAY:
8290 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8291 resop) != NFS4_CHKSEQ_OKAY) {
8292 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8293 goto end;
8294 }
8295 break;
8296 case NFS4_CHECK_STATEID_OLD:
8297 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8298 goto end;
8299 case NFS4_CHECK_STATEID_BAD:
8300 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8301 goto end;
8302 case NFS4_CHECK_STATEID_EXPIRED:
8303 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8304 goto end;
8305 case NFS4_CHECK_STATEID_CLOSED:
8306 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8307 goto end;
8308 case NFS4_CHECK_STATEID_UNCONFIRMED:
8309 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8310 goto end;
8311 case NFS4_CHECK_STATEID_REPLAY:
8312 /* Check the sequence id for the open owner */
8313 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8314 resop)) {
8315 case NFS4_CHKSEQ_OKAY:
8316 /*
8317 * This is replayed stateid; if seqid matches
8318 * next expected, then client is using wrong seqid.
8319 */
8320 /* FALL THROUGH */
8321 case NFS4_CHKSEQ_BAD:
8322 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8323 goto end;
8324 case NFS4_CHKSEQ_REPLAY:
8325 /*
8326 * Note this case is the duplicate case so
8327 * resp->status is already set.
8328 */
8329 *cs->statusp = resp->status;
8330 rfs4_update_lease(sp->rs_owner->ro_client);
8331 goto end;
8332 }
8333 break;
8334 default:
8335 ASSERT(FALSE);
8336 break;
8337 }
8338
8339 rfs4_dbe_lock(sp->rs_dbe);
8340
8341 /* Update the stateid. */
8342 next_stateid(&sp->rs_stateid);
8343 resp->open_stateid = sp->rs_stateid.stateid;
8344
8345 rfs4_dbe_unlock(sp->rs_dbe);
8346
8347 rfs4_update_lease(sp->rs_owner->ro_client);
8348 rfs4_update_open_sequence(sp->rs_owner);
8349 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8350
8351 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8352
8353 *cs->statusp = resp->status = status;
8354
8355 end:
8356 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8357 rfs4_state_rele(sp);
8358 out:
8359 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8360 CLOSE4res *, resp);
8361 }
8362
8363 /*
8364 * Manage the counts on the file struct and close all file locks
8365 */
8366 /*ARGSUSED*/
8367 void
8368 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8369 bool_t close_of_client)
8370 {
8371 rfs4_file_t *fp = sp->rs_finfo;
8372 rfs4_lo_state_t *lsp;
8373 int fflags = 0;
8374
8375 /*
8376 * If this call is part of the larger closing down of client
8377 * state then it is just easier to release all locks
8378 * associated with this client instead of going through each
8379 * individual file and cleaning locks there.
8380 */
8381 if (close_of_client) {
8382 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8383 !list_is_empty(&sp->rs_lostatelist) &&
8384 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8385 /* Is the PxFS kernel module loaded? */
8386 if (lm_remove_file_locks != NULL) {
8387 int new_sysid;
8388
8389 /* Encode the cluster nodeid in new sysid */
8390 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8391 lm_set_nlmid_flk(&new_sysid);
8392
8393 /*
8394 * This PxFS routine removes file locks for a
8395 * client over all nodes of a cluster.
8396 */
8397 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8398 "lm_remove_file_locks(sysid=0x%x)\n",
8399 new_sysid));
8400 (*lm_remove_file_locks)(new_sysid);
8401 } else {
8402 struct flock64 flk;
8403
8404 /* Release all locks for this client */
8405 flk.l_type = F_UNLKSYS;
8406 flk.l_whence = 0;
8407 flk.l_start = 0;
8408 flk.l_len = 0;
8409 flk.l_sysid =
8410 sp->rs_owner->ro_client->rc_sysidt;
8411 flk.l_pid = 0;
8412 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8413 &flk, F_REMOTELOCK | FREAD | FWRITE,
8414 (u_offset_t)0, NULL, CRED(), NULL);
8415 }
8416
8417 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8418 }
8419 }
8420
8421 /*
8422 * Release all locks on this file by this lock owner or at
8423 * least mark the locks as having been released
8424 */
8425 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8426 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8427 lsp->rls_locks_cleaned = TRUE;
8428
8429 /* Was this already taken care of above? */
8430 if (!close_of_client &&
8431 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8432 (void) cleanlocks(sp->rs_finfo->rf_vp,
8433 lsp->rls_locker->rl_pid,
8434 lsp->rls_locker->rl_client->rc_sysidt);
8435 }
8436
8437 /*
8438 * Release any shrlocks associated with this open state ID.
8439 * This must be done before the rfs4_state gets marked closed.
8440 */
8441 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8442 (void) rfs4_unshare(sp);
8443
8444 if (sp->rs_open_access) {
8445 rfs4_dbe_lock(fp->rf_dbe);
8446
8447 /*
8448 * Decrement the count for each access and deny bit that this
8449 * state has contributed to the file.
8450 * If the file counts go to zero
8451 * clear the appropriate bit in the appropriate mask.
8452 */
8453 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8454 fp->rf_access_read--;
8455 fflags |= FREAD;
8456 if (fp->rf_access_read == 0)
8457 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8458 }
8459 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8460 fp->rf_access_write--;
8461 fflags |= FWRITE;
8462 if (fp->rf_access_write == 0)
8463 fp->rf_share_access &=
8464 ~OPEN4_SHARE_ACCESS_WRITE;
8465 }
8466 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8467 fp->rf_deny_read--;
8468 if (fp->rf_deny_read == 0)
8469 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8470 }
8471 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8472 fp->rf_deny_write--;
8473 if (fp->rf_deny_write == 0)
8474 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8475 }
8476
8477 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8478
8479 rfs4_dbe_unlock(fp->rf_dbe);
8480
8481 sp->rs_open_access = 0;
8482 sp->rs_open_deny = 0;
8483 }
8484 }
8485
8486 /*
8487 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8488 */
8489 static nfsstat4
8490 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8491 {
8492 rfs4_lockowner_t *lo;
8493 rfs4_client_t *cp;
8494 uint32_t len;
8495
8496 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8497 if (lo != NULL) {
8498 cp = lo->rl_client;
8499 if (rfs4_lease_expired(cp)) {
8500 rfs4_lockowner_rele(lo);
8501 rfs4_dbe_hold(cp->rc_dbe);
8502 rfs4_client_close(cp);
8503 return (NFS4ERR_EXPIRED);
8504 }
8505 dp->owner.clientid = lo->rl_owner.clientid;
8506 len = lo->rl_owner.owner_len;
8507 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8508 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8509 dp->owner.owner_len = len;
8510 rfs4_lockowner_rele(lo);
8511 goto finish;
8512 }
8513
8514 /*
8515 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8516 * of the client id contain the boot time for a NFS4 lock. So we
8517 * fabricate and identity by setting clientid to the sysid, and
8518 * the lock owner to the pid.
8519 */
8520 dp->owner.clientid = flk->l_sysid;
8521 len = sizeof (pid_t);
8522 dp->owner.owner_len = len;
8523 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8524 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8525 finish:
8526 dp->offset = flk->l_start;
8527 dp->length = flk->l_len;
8528
8529 if (flk->l_type == F_RDLCK)
8530 dp->locktype = READ_LT;
8531 else if (flk->l_type == F_WRLCK)
8532 dp->locktype = WRITE_LT;
8533 else
8534 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8535
8536 return (NFS4_OK);
8537 }
8538
8539 /*
8540 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8541 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8542 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8543 * for that (obviously); they are sending the LOCK requests with some delays
8544 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8545 * locking and delay implementation at the client side.
8546 *
8547 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8548 * fast retries on its own (the for loop below) in a hope the lock will be
8549 * available soon. And if not, the client won't need to resend the LOCK
8550 * requests so fast to check the lock availability. This basically saves some
8551 * network traffic and tries to make sure the client gets the lock ASAP.
8552 */
8553 static int
8554 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8555 {
8556 int error;
8557 struct flock64 flk;
8558 int i;
8559 clock_t delaytime;
8560 int cmd;
8561 int spin_cnt = 0;
8562
8563 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8564 retry:
8565 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8566
8567 for (i = 0; i < rfs4_maxlock_tries; i++) {
8568 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8569 error = VOP_FRLOCK(vp, cmd,
8570 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8571
8572 if (error != EAGAIN && error != EACCES)
8573 break;
8574
8575 if (i < rfs4_maxlock_tries - 1) {
8576 delay(delaytime);
8577 delaytime *= 2;
8578 }
8579 }
8580
8581 if (error == EAGAIN || error == EACCES) {
8582 /* Get the owner of the lock */
8583 flk = *flock;
8584 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8585 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8586 NULL) == 0) {
8587 /*
8588 * There's a race inherent in the current VOP_FRLOCK
8589 * design where:
8590 * a: "other guy" takes a lock that conflicts with a
8591 * lock we want
8592 * b: we attempt to take our lock (non-blocking) and
8593 * the attempt fails.
8594 * c: "other guy" releases the conflicting lock
8595 * d: we ask what lock conflicts with the lock we want,
8596 * getting F_UNLCK (no lock blocks us)
8597 *
8598 * If we retry the non-blocking lock attempt in this
8599 * case (restart at step 'b') there's some possibility
8600 * that many such attempts might fail. However a test
8601 * designed to actually provoke this race shows that
8602 * the vast majority of cases require no retry, and
8603 * only a few took as many as three retries. Here's
8604 * the test outcome:
8605 *
8606 * number of retries how many times we needed
8607 * that many retries
8608 * 0 79461
8609 * 1 862
8610 * 2 49
8611 * 3 5
8612 *
8613 * Given those empirical results, we arbitrarily limit
8614 * the retry count to ten.
8615 *
8616 * If we actually make to ten retries and give up,
8617 * nothing catastrophic happens, but we're unable to
8618 * return the information about the conflicting lock to
8619 * the NFS client. That's an acceptable trade off vs.
8620 * letting this retry loop run forever.
8621 */
8622 if (flk.l_type == F_UNLCK) {
8623 if (spin_cnt++ < 10) {
8624 /* No longer locked, retry */
8625 goto retry;
8626 }
8627 } else {
8628 *flock = flk;
8629 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8630 F_GETLK, &flk);
8631 }
8632 }
8633 }
8634
8635 return (error);
8636 }
8637
8638 /*ARGSUSED*/
8639 static nfsstat4
8640 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8641 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8642 {
8643 nfsstat4 status;
8644 rfs4_lockowner_t *lo = lsp->rls_locker;
8645 rfs4_state_t *sp = lsp->rls_state;
8646 struct flock64 flock;
8647 int16_t ltype;
8648 int flag;
8649 int error;
8650 sysid_t sysid;
8651 LOCK4res *lres;
8652 vnode_t *vp;
8653
8654 if (rfs4_lease_expired(lo->rl_client)) {
8655 return (NFS4ERR_EXPIRED);
8656 }
8657
8658 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8659 return (status);
8660
8661 /* Check for zero length. To lock to end of file use all ones for V4 */
8662 if (length == 0)
8663 return (NFS4ERR_INVAL);
8664 else if (length == (length4)(~0))
8665 length = 0; /* Posix to end of file */
8666
8667 retry:
8668 rfs4_dbe_lock(sp->rs_dbe);
8669 if (sp->rs_closed == TRUE) {
8670 rfs4_dbe_unlock(sp->rs_dbe);
8671 return (NFS4ERR_OLD_STATEID);
8672 }
8673
8674 if (resop->resop != OP_LOCKU) {
8675 switch (locktype) {
8676 case READ_LT:
8677 case READW_LT:
8678 if ((sp->rs_share_access
8679 & OPEN4_SHARE_ACCESS_READ) == 0) {
8680 rfs4_dbe_unlock(sp->rs_dbe);
8681
8682 return (NFS4ERR_OPENMODE);
8683 }
8684 ltype = F_RDLCK;
8685 break;
8686 case WRITE_LT:
8687 case WRITEW_LT:
8688 if ((sp->rs_share_access
8689 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8690 rfs4_dbe_unlock(sp->rs_dbe);
8691
8692 return (NFS4ERR_OPENMODE);
8693 }
8694 ltype = F_WRLCK;
8695 break;
8696 }
8697 } else
8698 ltype = F_UNLCK;
8699
8700 flock.l_type = ltype;
8701 flock.l_whence = 0; /* SEEK_SET */
8702 flock.l_start = offset;
8703 flock.l_len = length;
8704 flock.l_sysid = sysid;
8705 flock.l_pid = lsp->rls_locker->rl_pid;
8706
8707 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8708 if (flock.l_len < 0 || flock.l_start < 0) {
8709 rfs4_dbe_unlock(sp->rs_dbe);
8710 return (NFS4ERR_INVAL);
8711 }
8712
8713 /*
8714 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8715 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8716 */
8717 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8718
8719 vp = sp->rs_finfo->rf_vp;
8720 VN_HOLD(vp);
8721
8722 /*
8723 * We need to unlock sp before we call the underlying filesystem to
8724 * acquire the file lock.
8725 */
8726 rfs4_dbe_unlock(sp->rs_dbe);
8727
8728 error = setlock(vp, &flock, flag, cred);
8729
8730 /*
8731 * Make sure the file is still open. In a case the file was closed in
8732 * the meantime, clean the lock we acquired using the setlock() call
8733 * above, and return the appropriate error.
8734 */
8735 rfs4_dbe_lock(sp->rs_dbe);
8736 if (sp->rs_closed == TRUE) {
8737 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8738 rfs4_dbe_unlock(sp->rs_dbe);
8739
8740 VN_RELE(vp);
8741
8742 return (NFS4ERR_OLD_STATEID);
8743 }
8744 rfs4_dbe_unlock(sp->rs_dbe);
8745
8746 VN_RELE(vp);
8747
8748 if (error == 0) {
8749 rfs4_dbe_lock(lsp->rls_dbe);
8750 next_stateid(&lsp->rls_lockid);
8751 rfs4_dbe_unlock(lsp->rls_dbe);
8752 }
8753
8754 /*
8755 * N.B. We map error values to nfsv4 errors. This is differrent
8756 * than puterrno4 routine.
8757 */
8758 switch (error) {
8759 case 0:
8760 status = NFS4_OK;
8761 break;
8762 case EAGAIN:
8763 case EACCES: /* Old value */
8764 /* Can only get here if op is OP_LOCK */
8765 ASSERT(resop->resop == OP_LOCK);
8766 lres = &resop->nfs_resop4_u.oplock;
8767 status = NFS4ERR_DENIED;
8768 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8769 == NFS4ERR_EXPIRED)
8770 goto retry;
8771 break;
8772 case ENOLCK:
8773 status = NFS4ERR_DELAY;
8774 break;
8775 case EOVERFLOW:
8776 status = NFS4ERR_INVAL;
8777 break;
8778 case EINVAL:
8779 status = NFS4ERR_NOTSUPP;
8780 break;
8781 default:
8782 status = NFS4ERR_SERVERFAULT;
8783 break;
8784 }
8785
8786 return (status);
8787 }
8788
8789 /*ARGSUSED*/
8790 void
8791 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8792 struct svc_req *req, struct compound_state *cs)
8793 {
8794 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8795 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8796 nfsstat4 status;
8797 stateid4 *stateid;
8798 rfs4_lockowner_t *lo;
8799 rfs4_client_t *cp;
8800 rfs4_state_t *sp = NULL;
8801 rfs4_lo_state_t *lsp = NULL;
8802 bool_t ls_sw_held = FALSE;
8803 bool_t create = TRUE;
8804 bool_t lcreate = TRUE;
8805 bool_t dup_lock = FALSE;
8806 int rc;
8807
8808 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8809 LOCK4args *, args);
8810
8811 if (cs->vp == NULL) {
8812 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8813 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8814 cs, LOCK4res *, resp);
8815 return;
8816 }
8817
8818 if (args->locker.new_lock_owner) {
8819 /* Create a new lockowner for this instance */
8820 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8821
8822 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8823
8824 stateid = &olo->open_stateid;
8825 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8826 if (status != NFS4_OK) {
8827 NFS4_DEBUG(rfs4_debug,
8828 (CE_NOTE, "Get state failed in lock %d", status));
8829 *cs->statusp = resp->status = status;
8830 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8831 cs, LOCK4res *, resp);
8832 return;
8833 }
8834
8835 /* Ensure specified filehandle matches */
8836 if (cs->vp != sp->rs_finfo->rf_vp) {
8837 rfs4_state_rele(sp);
8838 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8839 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8840 cs, LOCK4res *, resp);
8841 return;
8842 }
8843
8844 /* hold off other access to open_owner while we tinker */
8845 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8846
8847 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8848 case NFS4_CHECK_STATEID_OLD:
8849 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8850 goto end;
8851 case NFS4_CHECK_STATEID_BAD:
8852 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8853 goto end;
8854 case NFS4_CHECK_STATEID_EXPIRED:
8855 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8856 goto end;
8857 case NFS4_CHECK_STATEID_UNCONFIRMED:
8858 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8859 goto end;
8860 case NFS4_CHECK_STATEID_CLOSED:
8861 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8862 goto end;
8863 case NFS4_CHECK_STATEID_OKAY:
8864 case NFS4_CHECK_STATEID_REPLAY:
8865 switch (rfs4_check_olo_seqid(olo->open_seqid,
8866 sp->rs_owner, resop)) {
8867 case NFS4_CHKSEQ_OKAY:
8868 if (rc == NFS4_CHECK_STATEID_OKAY)
8869 break;
8870 /*
8871 * This is replayed stateid; if seqid
8872 * matches next expected, then client
8873 * is using wrong seqid.
8874 */
8875 /* FALLTHROUGH */
8876 case NFS4_CHKSEQ_BAD:
8877 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8878 goto end;
8879 case NFS4_CHKSEQ_REPLAY:
8880 /* This is a duplicate LOCK request */
8881 dup_lock = TRUE;
8882
8883 /*
8884 * For a duplicate we do not want to
8885 * create a new lockowner as it should
8886 * already exist.
8887 * Turn off the lockowner create flag.
8888 */
8889 lcreate = FALSE;
8890 }
8891 break;
8892 }
8893
8894 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8895 if (lo == NULL) {
8896 NFS4_DEBUG(rfs4_debug,
8897 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8898 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8899 goto end;
8900 }
8901
8902 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8903 if (lsp == NULL) {
8904 rfs4_update_lease(sp->rs_owner->ro_client);
8905 /*
8906 * Only update theh open_seqid if this is not
8907 * a duplicate request
8908 */
8909 if (dup_lock == FALSE) {
8910 rfs4_update_open_sequence(sp->rs_owner);
8911 }
8912
8913 NFS4_DEBUG(rfs4_debug,
8914 (CE_NOTE, "rfs4_op_lock: no state"));
8915 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8916 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8917 rfs4_lockowner_rele(lo);
8918 goto end;
8919 }
8920
8921 /*
8922 * This is the new_lock_owner branch and the client is
8923 * supposed to be associating a new lock_owner with
8924 * the open file at this point. If we find that a
8925 * lock_owner/state association already exists and a
8926 * successful LOCK request was returned to the client,
8927 * an error is returned to the client since this is
8928 * not appropriate. The client should be using the
8929 * existing lock_owner branch.
8930 */
8931 if (dup_lock == FALSE && create == FALSE) {
8932 if (lsp->rls_lock_completed == TRUE) {
8933 *cs->statusp =
8934 resp->status = NFS4ERR_BAD_SEQID;
8935 rfs4_lockowner_rele(lo);
8936 goto end;
8937 }
8938 }
8939
8940 rfs4_update_lease(sp->rs_owner->ro_client);
8941
8942 /*
8943 * Only update theh open_seqid if this is not
8944 * a duplicate request
8945 */
8946 if (dup_lock == FALSE) {
8947 rfs4_update_open_sequence(sp->rs_owner);
8948 }
8949
8950 /*
8951 * If this is a duplicate lock request, just copy the
8952 * previously saved reply and return.
8953 */
8954 if (dup_lock == TRUE) {
8955 /* verify that lock_seqid's match */
8956 if (lsp->rls_seqid != olo->lock_seqid) {
8957 NFS4_DEBUG(rfs4_debug,
8958 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8959 "lsp->seqid=%d old->seqid=%d",
8960 lsp->rls_seqid, olo->lock_seqid));
8961 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8962 } else {
8963 rfs4_copy_reply(resop, &lsp->rls_reply);
8964 /*
8965 * Make sure to copy the just
8966 * retrieved reply status into the
8967 * overall compound status
8968 */
8969 *cs->statusp = resp->status;
8970 }
8971 rfs4_lockowner_rele(lo);
8972 goto end;
8973 }
8974
8975 rfs4_dbe_lock(lsp->rls_dbe);
8976
8977 /* Make sure to update the lock sequence id */
8978 lsp->rls_seqid = olo->lock_seqid;
8979
8980 NFS4_DEBUG(rfs4_debug,
8981 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
8982
8983 /*
8984 * This is used to signify the newly created lockowner
8985 * stateid and its sequence number. The checks for
8986 * sequence number and increment don't occur on the
8987 * very first lock request for a lockowner.
8988 */
8989 lsp->rls_skip_seqid_check = TRUE;
8990
8991 /* hold off other access to lsp while we tinker */
8992 rfs4_sw_enter(&lsp->rls_sw);
8993 ls_sw_held = TRUE;
8994
8995 rfs4_dbe_unlock(lsp->rls_dbe);
8996
8997 rfs4_lockowner_rele(lo);
8998 } else {
8999 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9000 /* get lsp and hold the lock on the underlying file struct */
9001 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9002 != NFS4_OK) {
9003 *cs->statusp = resp->status = status;
9004 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9005 cs, LOCK4res *, resp);
9006 return;
9007 }
9008 create = FALSE; /* We didn't create lsp */
9009
9010 /* Ensure specified filehandle matches */
9011 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9012 rfs4_lo_state_rele(lsp, TRUE);
9013 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9014 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9015 cs, LOCK4res *, resp);
9016 return;
9017 }
9018
9019 /* hold off other access to lsp while we tinker */
9020 rfs4_sw_enter(&lsp->rls_sw);
9021 ls_sw_held = TRUE;
9022
9023 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9024 /*
9025 * The stateid looks like it was okay (expected to be
9026 * the next one)
9027 */
9028 case NFS4_CHECK_STATEID_OKAY:
9029 /*
9030 * The sequence id is now checked. Determine
9031 * if this is a replay or if it is in the
9032 * expected (next) sequence. In the case of a
9033 * replay, there are two replay conditions
9034 * that may occur. The first is the normal
9035 * condition where a LOCK is done with a
9036 * NFS4_OK response and the stateid is
9037 * updated. That case is handled below when
9038 * the stateid is identified as a REPLAY. The
9039 * second is the case where an error is
9040 * returned, like NFS4ERR_DENIED, and the
9041 * sequence number is updated but the stateid
9042 * is not updated. This second case is dealt
9043 * with here. So it may seem odd that the
9044 * stateid is okay but the sequence id is a
9045 * replay but it is okay.
9046 */
9047 switch (rfs4_check_lock_seqid(
9048 args->locker.locker4_u.lock_owner.lock_seqid,
9049 lsp, resop)) {
9050 case NFS4_CHKSEQ_REPLAY:
9051 if (resp->status != NFS4_OK) {
9052 /*
9053 * Here is our replay and need
9054 * to verify that the last
9055 * response was an error.
9056 */
9057 *cs->statusp = resp->status;
9058 goto end;
9059 }
9060 /*
9061 * This is done since the sequence id
9062 * looked like a replay but it didn't
9063 * pass our check so a BAD_SEQID is
9064 * returned as a result.
9065 */
9066 /*FALLTHROUGH*/
9067 case NFS4_CHKSEQ_BAD:
9068 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9069 goto end;
9070 case NFS4_CHKSEQ_OKAY:
9071 /* Everything looks okay move ahead */
9072 break;
9073 }
9074 break;
9075 case NFS4_CHECK_STATEID_OLD:
9076 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9077 goto end;
9078 case NFS4_CHECK_STATEID_BAD:
9079 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9080 goto end;
9081 case NFS4_CHECK_STATEID_EXPIRED:
9082 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9083 goto end;
9084 case NFS4_CHECK_STATEID_CLOSED:
9085 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9086 goto end;
9087 case NFS4_CHECK_STATEID_REPLAY:
9088 switch (rfs4_check_lock_seqid(
9089 args->locker.locker4_u.lock_owner.lock_seqid,
9090 lsp, resop)) {
9091 case NFS4_CHKSEQ_OKAY:
9092 /*
9093 * This is a replayed stateid; if
9094 * seqid matches the next expected,
9095 * then client is using wrong seqid.
9096 */
9097 case NFS4_CHKSEQ_BAD:
9098 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9099 goto end;
9100 case NFS4_CHKSEQ_REPLAY:
9101 rfs4_update_lease(lsp->rls_locker->rl_client);
9102 *cs->statusp = status = resp->status;
9103 goto end;
9104 }
9105 break;
9106 default:
9107 ASSERT(FALSE);
9108 break;
9109 }
9110
9111 rfs4_update_lock_sequence(lsp);
9112 rfs4_update_lease(lsp->rls_locker->rl_client);
9113 }
9114
9115 /*
9116 * NFS4 only allows locking on regular files, so
9117 * verify type of object.
9118 */
9119 if (cs->vp->v_type != VREG) {
9120 if (cs->vp->v_type == VDIR)
9121 status = NFS4ERR_ISDIR;
9122 else
9123 status = NFS4ERR_INVAL;
9124 goto out;
9125 }
9126
9127 cp = lsp->rls_state->rs_owner->ro_client;
9128
9129 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9130 status = NFS4ERR_GRACE;
9131 goto out;
9132 }
9133
9134 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9135 status = NFS4ERR_NO_GRACE;
9136 goto out;
9137 }
9138
9139 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9140 status = NFS4ERR_NO_GRACE;
9141 goto out;
9142 }
9143
9144 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9145 cs->deleg = TRUE;
9146
9147 status = rfs4_do_lock(lsp, args->locktype,
9148 args->offset, args->length, cs->cr, resop);
9149
9150 out:
9151 lsp->rls_skip_seqid_check = FALSE;
9152
9153 *cs->statusp = resp->status = status;
9154
9155 if (status == NFS4_OK) {
9156 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9157 lsp->rls_lock_completed = TRUE;
9158 }
9159 /*
9160 * Only update the "OPEN" response here if this was a new
9161 * lock_owner
9162 */
9163 if (sp)
9164 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9165
9166 rfs4_update_lock_resp(lsp, resop);
9167
9168 end:
9169 if (lsp) {
9170 if (ls_sw_held)
9171 rfs4_sw_exit(&lsp->rls_sw);
9172 /*
9173 * If an sp obtained, then the lsp does not represent
9174 * a lock on the file struct.
9175 */
9176 if (sp != NULL)
9177 rfs4_lo_state_rele(lsp, FALSE);
9178 else
9179 rfs4_lo_state_rele(lsp, TRUE);
9180 }
9181 if (sp) {
9182 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9183 rfs4_state_rele(sp);
9184 }
9185
9186 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9187 LOCK4res *, resp);
9188 }
9189
9190 /* free function for LOCK/LOCKT */
9191 static void
9192 lock_denied_free(nfs_resop4 *resop)
9193 {
9194 LOCK4denied *dp = NULL;
9195
9196 switch (resop->resop) {
9197 case OP_LOCK:
9198 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9199 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9200 break;
9201 case OP_LOCKT:
9202 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9203 dp = &resop->nfs_resop4_u.oplockt.denied;
9204 break;
9205 default:
9206 break;
9207 }
9208
9209 if (dp)
9210 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9211 }
9212
9213 /*ARGSUSED*/
9214 void
9215 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9216 struct svc_req *req, struct compound_state *cs)
9217 {
9218 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9219 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9220 nfsstat4 status;
9221 stateid4 *stateid = &args->lock_stateid;
9222 rfs4_lo_state_t *lsp;
9223
9224 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9225 LOCKU4args *, args);
9226
9227 if (cs->vp == NULL) {
9228 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9229 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9230 LOCKU4res *, resp);
9231 return;
9232 }
9233
9234 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9235 *cs->statusp = resp->status = status;
9236 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9237 LOCKU4res *, resp);
9238 return;
9239 }
9240
9241 /* Ensure specified filehandle matches */
9242 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9243 rfs4_lo_state_rele(lsp, TRUE);
9244 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9245 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9246 LOCKU4res *, resp);
9247 return;
9248 }
9249
9250 /* hold off other access to lsp while we tinker */
9251 rfs4_sw_enter(&lsp->rls_sw);
9252
9253 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9254 case NFS4_CHECK_STATEID_OKAY:
9255 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9256 != NFS4_CHKSEQ_OKAY) {
9257 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9258 goto end;
9259 }
9260 break;
9261 case NFS4_CHECK_STATEID_OLD:
9262 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9263 goto end;
9264 case NFS4_CHECK_STATEID_BAD:
9265 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9266 goto end;
9267 case NFS4_CHECK_STATEID_EXPIRED:
9268 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9269 goto end;
9270 case NFS4_CHECK_STATEID_CLOSED:
9271 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9272 goto end;
9273 case NFS4_CHECK_STATEID_REPLAY:
9274 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9275 case NFS4_CHKSEQ_OKAY:
9276 /*
9277 * This is a replayed stateid; if
9278 * seqid matches the next expected,
9279 * then client is using wrong seqid.
9280 */
9281 case NFS4_CHKSEQ_BAD:
9282 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9283 goto end;
9284 case NFS4_CHKSEQ_REPLAY:
9285 rfs4_update_lease(lsp->rls_locker->rl_client);
9286 *cs->statusp = status = resp->status;
9287 goto end;
9288 }
9289 break;
9290 default:
9291 ASSERT(FALSE);
9292 break;
9293 }
9294
9295 rfs4_update_lock_sequence(lsp);
9296 rfs4_update_lease(lsp->rls_locker->rl_client);
9297
9298 /*
9299 * NFS4 only allows locking on regular files, so
9300 * verify type of object.
9301 */
9302 if (cs->vp->v_type != VREG) {
9303 if (cs->vp->v_type == VDIR)
9304 status = NFS4ERR_ISDIR;
9305 else
9306 status = NFS4ERR_INVAL;
9307 goto out;
9308 }
9309
9310 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9311 status = NFS4ERR_GRACE;
9312 goto out;
9313 }
9314
9315 status = rfs4_do_lock(lsp, args->locktype,
9316 args->offset, args->length, cs->cr, resop);
9317
9318 out:
9319 *cs->statusp = resp->status = status;
9320
9321 if (status == NFS4_OK)
9322 resp->lock_stateid = lsp->rls_lockid.stateid;
9323
9324 rfs4_update_lock_resp(lsp, resop);
9325
9326 end:
9327 rfs4_sw_exit(&lsp->rls_sw);
9328 rfs4_lo_state_rele(lsp, TRUE);
9329
9330 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9331 LOCKU4res *, resp);
9332 }
9333
9334 /*
9335 * LOCKT is a best effort routine, the client can not be guaranteed that
9336 * the status return is still in effect by the time the reply is received.
9337 * They are numerous race conditions in this routine, but we are not required
9338 * and can not be accurate.
9339 */
9340 /*ARGSUSED*/
9341 void
9342 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9343 struct svc_req *req, struct compound_state *cs)
9344 {
9345 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9346 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9347 rfs4_lockowner_t *lo;
9348 rfs4_client_t *cp;
9349 bool_t create = FALSE;
9350 struct flock64 flk;
9351 int error;
9352 int flag = FREAD | FWRITE;
9353 int ltype;
9354 length4 posix_length;
9355 sysid_t sysid;
9356 pid_t pid;
9357
9358 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9359 LOCKT4args *, args);
9360
9361 if (cs->vp == NULL) {
9362 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9363 goto out;
9364 }
9365
9366 /*
9367 * NFS4 only allows locking on regular files, so
9368 * verify type of object.
9369 */
9370 if (cs->vp->v_type != VREG) {
9371 if (cs->vp->v_type == VDIR)
9372 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9373 else
9374 *cs->statusp = resp->status = NFS4ERR_INVAL;
9375 goto out;
9376 }
9377
9378 /*
9379 * Check out the clientid to ensure the server knows about it
9380 * so that we correctly inform the client of a server reboot.
9381 */
9382 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9383 == NULL) {
9384 *cs->statusp = resp->status =
9385 rfs4_check_clientid(&args->owner.clientid, 0);
9386 goto out;
9387 }
9388 if (rfs4_lease_expired(cp)) {
9389 rfs4_client_close(cp);
9390 /*
9391 * Protocol doesn't allow returning NFS4ERR_STALE as
9392 * other operations do on this check so STALE_CLIENTID
9393 * is returned instead
9394 */
9395 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9396 goto out;
9397 }
9398
9399 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9400 *cs->statusp = resp->status = NFS4ERR_GRACE;
9401 rfs4_client_rele(cp);
9402 goto out;
9403 }
9404 rfs4_client_rele(cp);
9405
9406 resp->status = NFS4_OK;
9407
9408 switch (args->locktype) {
9409 case READ_LT:
9410 case READW_LT:
9411 ltype = F_RDLCK;
9412 break;
9413 case WRITE_LT:
9414 case WRITEW_LT:
9415 ltype = F_WRLCK;
9416 break;
9417 }
9418
9419 posix_length = args->length;
9420 /* Check for zero length. To lock to end of file use all ones for V4 */
9421 if (posix_length == 0) {
9422 *cs->statusp = resp->status = NFS4ERR_INVAL;
9423 goto out;
9424 } else if (posix_length == (length4)(~0)) {
9425 posix_length = 0; /* Posix to end of file */
9426 }
9427
9428 /* Find or create a lockowner */
9429 lo = rfs4_findlockowner(&args->owner, &create);
9430
9431 if (lo) {
9432 pid = lo->rl_pid;
9433 if ((resp->status =
9434 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9435 goto err;
9436 } else {
9437 pid = 0;
9438 sysid = lockt_sysid;
9439 }
9440 retry:
9441 flk.l_type = ltype;
9442 flk.l_whence = 0; /* SEEK_SET */
9443 flk.l_start = args->offset;
9444 flk.l_len = posix_length;
9445 flk.l_sysid = sysid;
9446 flk.l_pid = pid;
9447 flag |= F_REMOTELOCK;
9448
9449 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9450
9451 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9452 if (flk.l_len < 0 || flk.l_start < 0) {
9453 resp->status = NFS4ERR_INVAL;
9454 goto err;
9455 }
9456 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9457 NULL, cs->cr, NULL);
9458
9459 /*
9460 * N.B. We map error values to nfsv4 errors. This is differrent
9461 * than puterrno4 routine.
9462 */
9463 switch (error) {
9464 case 0:
9465 if (flk.l_type == F_UNLCK)
9466 resp->status = NFS4_OK;
9467 else {
9468 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9469 goto retry;
9470 resp->status = NFS4ERR_DENIED;
9471 }
9472 break;
9473 case EOVERFLOW:
9474 resp->status = NFS4ERR_INVAL;
9475 break;
9476 case EINVAL:
9477 resp->status = NFS4ERR_NOTSUPP;
9478 break;
9479 default:
9480 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9481 error);
9482 resp->status = NFS4ERR_SERVERFAULT;
9483 break;
9484 }
9485
9486 err:
9487 if (lo)
9488 rfs4_lockowner_rele(lo);
9489 *cs->statusp = resp->status;
9490 out:
9491 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9492 LOCKT4res *, resp);
9493 }
9494
9495 int
9496 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9497 {
9498 int err;
9499 int cmd;
9500 vnode_t *vp;
9501 struct shrlock shr;
9502 struct shr_locowner shr_loco;
9503 int fflags = 0;
9504
9505 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9506 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9507
9508 if (sp->rs_closed)
9509 return (NFS4ERR_OLD_STATEID);
9510
9511 vp = sp->rs_finfo->rf_vp;
9512 ASSERT(vp);
9513
9514 shr.s_access = shr.s_deny = 0;
9515
9516 if (access & OPEN4_SHARE_ACCESS_READ) {
9517 fflags |= FREAD;
9518 shr.s_access |= F_RDACC;
9519 }
9520 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9521 fflags |= FWRITE;
9522 shr.s_access |= F_WRACC;
9523 }
9524 ASSERT(shr.s_access);
9525
9526 if (deny & OPEN4_SHARE_DENY_READ)
9527 shr.s_deny |= F_RDDNY;
9528 if (deny & OPEN4_SHARE_DENY_WRITE)
9529 shr.s_deny |= F_WRDNY;
9530
9531 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9532 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9533 shr_loco.sl_pid = shr.s_pid;
9534 shr_loco.sl_id = shr.s_sysid;
9535 shr.s_owner = (caddr_t)&shr_loco;
9536 shr.s_own_len = sizeof (shr_loco);
9537
9538 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9539
9540 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9541 if (err != 0) {
9542 if (err == EAGAIN)
9543 err = NFS4ERR_SHARE_DENIED;
9544 else
9545 err = puterrno4(err);
9546 return (err);
9547 }
9548
9549 sp->rs_share_access |= access;
9550 sp->rs_share_deny |= deny;
9551
9552 return (0);
9553 }
9554
9555 int
9556 rfs4_unshare(rfs4_state_t *sp)
9557 {
9558 int err;
9559 struct shrlock shr;
9560 struct shr_locowner shr_loco;
9561
9562 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9563
9564 if (sp->rs_closed || sp->rs_share_access == 0)
9565 return (0);
9566
9567 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9568 ASSERT(sp->rs_finfo->rf_vp);
9569
9570 shr.s_access = shr.s_deny = 0;
9571 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9572 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9573 shr_loco.sl_pid = shr.s_pid;
9574 shr_loco.sl_id = shr.s_sysid;
9575 shr.s_owner = (caddr_t)&shr_loco;
9576 shr.s_own_len = sizeof (shr_loco);
9577
9578 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9579 NULL);
9580 if (err != 0) {
9581 err = puterrno4(err);
9582 return (err);
9583 }
9584
9585 sp->rs_share_access = 0;
9586 sp->rs_share_deny = 0;
9587
9588 return (0);
9589
9590 }
9591
9592 static int
9593 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9594 {
9595 struct clist *wcl;
9596 count4 count = rok->data_len;
9597 int wlist_len;
9598
9599 wcl = args->wlist;
9600 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9601 return (FALSE);
9602 }
9603 wcl = args->wlist;
9604 rok->wlist_len = wlist_len;
9605 rok->wlist = wcl;
9606 return (TRUE);
9607 }
9608
9609 /* tunable to disable server referrals */
9610 int rfs4_no_referrals = 0;
9611
9612 /*
9613 * Find an NFS record in reparse point data.
9614 * Returns 0 for success and <0 or an errno value on failure.
9615 */
9616 int
9617 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9618 {
9619 int err;
9620 char *stype, *val;
9621 nvlist_t *nvl;
9622 nvpair_t *curr;
9623
9624 if ((nvl = reparse_init()) == NULL)
9625 return (-1);
9626
9627 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9628 reparse_free(nvl);
9629 return (err);
9630 }
9631
9632 curr = NULL;
9633 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9634 if ((stype = nvpair_name(curr)) == NULL) {
9635 reparse_free(nvl);
9636 return (-2);
9637 }
9638 if (strncasecmp(stype, "NFS", 3) == 0)
9639 break;
9640 }
9641
9642 if ((curr == NULL) ||
9643 (nvpair_value_string(curr, &val))) {
9644 reparse_free(nvl);
9645 return (-3);
9646 }
9647 *nvlp = nvl;
9648 *svcp = stype;
9649 *datap = val;
9650 return (0);
9651 }
9652
9653 int
9654 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9655 {
9656 nvlist_t *nvl;
9657 char *s, *d;
9658
9659 if (rfs4_no_referrals != 0)
9660 return (B_FALSE);
9661
9662 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9663 return (B_FALSE);
9664
9665 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9666 return (B_FALSE);
9667
9668 reparse_free(nvl);
9669
9670 return (B_TRUE);
9671 }
9672
9673 /*
9674 * There is a user-level copy of this routine in ref_subr.c.
9675 * Changes should be kept in sync.
9676 */
9677 static int
9678 nfs4_create_components(char *path, component4 *comp4)
9679 {
9680 int slen, plen, ncomp;
9681 char *ori_path, *nxtc, buf[MAXNAMELEN];
9682
9683 if (path == NULL)
9684 return (0);
9685
9686 plen = strlen(path) + 1; /* include the terminator */
9687 ori_path = path;
9688 ncomp = 0;
9689
9690 /* count number of components in the path */
9691 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9692 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9693 if ((slen = nxtc - path) == 0) {
9694 path = nxtc + 1;
9695 continue;
9696 }
9697
9698 if (comp4 != NULL) {
9699 bcopy(path, buf, slen);
9700 buf[slen] = '\0';
9701 (void) str_to_utf8(buf, &comp4[ncomp]);
9702 }
9703
9704 ncomp++; /* 1 valid component */
9705 path = nxtc + 1;
9706 }
9707 if (*nxtc == '\0' || *nxtc == '\n')
9708 break;
9709 }
9710
9711 return (ncomp);
9712 }
9713
9714 /*
9715 * There is a user-level copy of this routine in ref_subr.c.
9716 * Changes should be kept in sync.
9717 */
9718 static int
9719 make_pathname4(char *path, pathname4 *pathname)
9720 {
9721 int ncomp;
9722 component4 *comp4;
9723
9724 if (pathname == NULL)
9725 return (0);
9726
9727 if (path == NULL) {
9728 pathname->pathname4_val = NULL;
9729 pathname->pathname4_len = 0;
9730 return (0);
9731 }
9732
9733 /* count number of components to alloc buffer */
9734 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9735 pathname->pathname4_val = NULL;
9736 pathname->pathname4_len = 0;
9737 return (0);
9738 }
9739 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9740
9741 /* copy components into allocated buffer */
9742 ncomp = nfs4_create_components(path, comp4);
9743
9744 pathname->pathname4_val = comp4;
9745 pathname->pathname4_len = ncomp;
9746
9747 return (ncomp);
9748 }
9749
9750 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9751
9752 fs_locations4 *
9753 fetch_referral(vnode_t *vp, cred_t *cr)
9754 {
9755 nvlist_t *nvl;
9756 char *stype, *sdata;
9757 fs_locations4 *result;
9758 char buf[1024];
9759 size_t bufsize;
9760 XDR xdr;
9761 int err;
9762
9763 /*
9764 * Check attrs to ensure it's a reparse point
9765 */
9766 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9767 return (NULL);
9768
9769 /*
9770 * Look for an NFS record and get the type and data
9771 */
9772 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9773 return (NULL);
9774
9775 /*
9776 * With the type and data, upcall to get the referral
9777 */
9778 bufsize = sizeof (buf);
9779 bzero(buf, sizeof (buf));
9780 err = reparse_kderef((const char *)stype, (const char *)sdata,
9781 buf, &bufsize);
9782 reparse_free(nvl);
9783
9784 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9785 char *, stype, char *, sdata, char *, buf, int, err);
9786 if (err) {
9787 cmn_err(CE_NOTE,
9788 "reparsed daemon not running: unable to get referral (%d)",
9789 err);
9790 return (NULL);
9791 }
9792
9793 /*
9794 * We get an XDR'ed record back from the kderef call
9795 */
9796 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9797 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9798 err = xdr_fs_locations4(&xdr, result);
9799 XDR_DESTROY(&xdr);
9800 if (err != TRUE) {
9801 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9802 int, err);
9803 return (NULL);
9804 }
9805
9806 /*
9807 * Look at path to recover fs_root, ignoring the leading '/'
9808 */
9809 (void) make_pathname4(vp->v_path, &result->fs_root);
9810
9811 return (result);
9812 }
9813
9814 char *
9815 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9816 {
9817 fs_locations4 *fsl;
9818 fs_location4 *fs;
9819 char *server, *path, *symbuf;
9820 static char *prefix = "/net/";
9821 int i, size, npaths;
9822 uint_t len;
9823
9824 /* Get the referral */
9825 if ((fsl = fetch_referral(vp, cr)) == NULL)
9826 return (NULL);
9827
9828 /* Deal with only the first location and first server */
9829 fs = &fsl->locations_val[0];
9830 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9831 if (server == NULL) {
9832 rfs4_free_fs_locations4(fsl);
9833 kmem_free(fsl, sizeof (fs_locations4));
9834 return (NULL);
9835 }
9836
9837 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9838 size = strlen(prefix) + len;
9839 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9840 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9841
9842 /* Allocate the symlink buffer and fill it */
9843 symbuf = kmem_zalloc(size, KM_SLEEP);
9844 (void) strcat(symbuf, prefix);
9845 (void) strcat(symbuf, server);
9846 kmem_free(server, len);
9847
9848 npaths = 0;
9849 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9850 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9851 if (path == NULL)
9852 continue;
9853 (void) strcat(symbuf, "/");
9854 (void) strcat(symbuf, path);
9855 npaths++;
9856 kmem_free(path, len);
9857 }
9858
9859 rfs4_free_fs_locations4(fsl);
9860 kmem_free(fsl, sizeof (fs_locations4));
9861
9862 if (strsz != NULL)
9863 *strsz = size;
9864 return (symbuf);
9865 }
9866
9867 /*
9868 * Check to see if we have a downrev Solaris client, so that we
9869 * can send it a symlink instead of a referral.
9870 */
9871 int
9872 client_is_downrev(struct svc_req *req)
9873 {
9874 struct sockaddr *ca;
9875 rfs4_clntip_t *ci;
9876 bool_t create = FALSE;
9877 int is_downrev;
9878
9879 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9880 ASSERT(ca);
9881 ci = rfs4_find_clntip(ca, &create);
9882 if (ci == NULL)
9883 return (0);
9884 is_downrev = ci->ri_no_referrals;
9885 rfs4_dbe_rele(ci->ri_dbe);
9886 return (is_downrev);
9887 }