Print this page
cstyle sort of updates
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs4_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs4_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
24 24 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
25 25 * Copyright (c) 2012 by Delphix. All rights reserved.
26 26 */
27 27
28 28 /*
29 29 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
30 30 * All Rights Reserved
31 31 */
32 32
33 33 #include <sys/param.h>
34 34 #include <sys/types.h>
35 35 #include <sys/systm.h>
36 36 #include <sys/cred.h>
37 37 #include <sys/buf.h>
38 38 #include <sys/vfs.h>
39 39 #include <sys/vfs_opreg.h>
40 40 #include <sys/vnode.h>
41 41 #include <sys/uio.h>
42 42 #include <sys/errno.h>
43 43 #include <sys/sysmacros.h>
44 44 #include <sys/statvfs.h>
45 45 #include <sys/kmem.h>
46 46 #include <sys/dirent.h>
47 47 #include <sys/cmn_err.h>
48 48 #include <sys/debug.h>
49 49 #include <sys/systeminfo.h>
50 50 #include <sys/flock.h>
51 51 #include <sys/pathname.h>
52 52 #include <sys/nbmlock.h>
53 53 #include <sys/share.h>
54 54 #include <sys/atomic.h>
55 55 #include <sys/policy.h>
56 56 #include <sys/fem.h>
57 57 #include <sys/sdt.h>
58 58 #include <sys/ddi.h>
59 59 #include <sys/zone.h>
60 60 #include <sys/kstat.h>
61 61
62 62 #include <fs/fs_reparse.h>
63 63
64 64 #include <rpc/types.h>
65 65 #include <rpc/auth.h>
66 66 #include <rpc/rpcsec_gss.h>
67 67 #include <rpc/svc.h>
68 68
69 69 #include <nfs/nfs.h>
70 70 #include <nfs/export.h>
71 71 #include <nfs/nfs_cmd.h>
72 72 #include <nfs/lm.h>
73 73 #include <nfs/nfs4.h>
74 74
75 75 #include <sys/strsubr.h>
76 76 #include <sys/strsun.h>
77 77
78 78 #include <inet/common.h>
79 79 #include <inet/ip.h>
80 80 #include <inet/ip6.h>
81 81
82 82 #include <sys/tsol/label.h>
83 83 #include <sys/tsol/tndb.h>
84 84
85 85 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
86 86 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
87 87 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
88 88 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
89 89 extern struct svc_ops rdma_svc_ops;
90 90 extern int nfs_loaned_buffers;
91 91 /* End of Tunables */
92 92
93 93 static int rdma_setup_read_data4(READ4args *, READ4res *);
94 94
95 95 /*
96 96 * Used to bump the stateid4.seqid value and show changes in the stateid
97 97 */
98 98 #define next_stateid(sp) (++(sp)->bits.chgseq)
99 99
100 100 /*
101 101 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
102 102 * This is used to return NFS4ERR_TOOSMALL when clients specify
103 103 * maxcount that isn't large enough to hold the smallest possible
104 104 * XDR encoded dirent.
105 105 *
106 106 * sizeof cookie (8 bytes) +
107 107 * sizeof name_len (4 bytes) +
108 108 * sizeof smallest (padded) name (4 bytes) +
109 109 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
110 110 * sizeof attrlist4_len (4 bytes) +
111 111 * sizeof next boolean (4 bytes)
112 112 *
113 113 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
114 114 * the smallest possible entry4 (assumes no attrs requested).
115 115 * sizeof nfsstat4 (4 bytes) +
116 116 * sizeof verifier4 (8 bytes) +
117 117 * sizeof entry4list bool (4 bytes) +
118 118 * sizeof entry4 (36 bytes) +
119 119 * sizeof eof bool (4 bytes)
120 120 *
121 121 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
122 122 * VOP_READDIR. Its value is the size of the maximum possible dirent
123 123 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
124 124 * required for a given name length. MAXNAMELEN is the maximum
125 125 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
126 126 * macros are to allow for . and .. entries -- just a minor tweak to try
127 127 * and guarantee that buffer we give to VOP_READDIR will be large enough
128 128 * to hold ., .., and the largest possible solaris dirent64.
129 129 */
130 130 #define RFS4_MINLEN_ENTRY4 36
131 131 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
132 132 #define RFS4_MINLEN_RDDIR_BUF \
133 133 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
134 134
135 135 /*
136 136 * It would be better to pad to 4 bytes since that's what XDR would do,
137 137 * but the dirents UFS gives us are already padded to 8, so just take
138 138 * what we're given. Dircount is only a hint anyway. Currently the
139 139 * solaris kernel is ASCII only, so there's no point in calling the
140 140 * UTF8 functions.
141 141 *
142 142 * dirent64: named padded to provide 8 byte struct alignment
143 143 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
144 144 *
145 145 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
146 146 *
147 147 */
148 148 #define DIRENT64_TO_DIRCOUNT(dp) \
149 149 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
150 150
151 151 time_t rfs4_start_time; /* Initialized in rfs4_srvrinit */
152 152
153 153 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
154 154
155 155 u_longlong_t nfs4_srv_caller_id;
156 156 uint_t nfs4_srv_vkey = 0;
157 157
158 158 verifier4 Write4verf;
159 159 verifier4 Readdir4verf;
160 160
161 161 void rfs4_init_compound_state(struct compound_state *);
162 162
163 163 static void nullfree(caddr_t);
164 164 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
165 165 struct compound_state *);
166 166 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 167 struct compound_state *);
168 168 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 169 struct compound_state *);
170 170 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 171 struct compound_state *);
172 172 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 173 struct compound_state *);
174 174 static void rfs4_op_create_free(nfs_resop4 *resop);
175 175 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
176 176 struct svc_req *, struct compound_state *);
177 177 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
178 178 struct svc_req *, struct compound_state *);
179 179 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
180 180 struct compound_state *);
181 181 static void rfs4_op_getattr_free(nfs_resop4 *);
182 182 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
183 183 struct compound_state *);
184 184 static void rfs4_op_getfh_free(nfs_resop4 *);
185 185 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
186 186 struct compound_state *);
187 187 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
188 188 struct compound_state *);
189 189 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
190 190 struct compound_state *);
191 191 static void lock_denied_free(nfs_resop4 *);
192 192 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
193 193 struct compound_state *);
194 194 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 195 struct compound_state *);
196 196 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 197 struct compound_state *);
198 198 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 199 struct compound_state *);
200 200 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
201 201 struct svc_req *req, struct compound_state *cs);
202 202 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
203 203 struct compound_state *);
204 204 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 205 struct compound_state *);
206 206 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
207 207 struct svc_req *, struct compound_state *);
208 208 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
209 209 struct svc_req *, struct compound_state *);
210 210 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
211 211 struct compound_state *);
212 212 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
213 213 struct compound_state *);
214 214 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
215 215 struct compound_state *);
216 216 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
217 217 struct compound_state *);
218 218 static void rfs4_op_read_free(nfs_resop4 *);
219 219 static void rfs4_op_readdir_free(nfs_resop4 *resop);
220 220 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
221 221 struct compound_state *);
222 222 static void rfs4_op_readlink_free(nfs_resop4 *);
223 223 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
224 224 struct svc_req *, struct compound_state *);
225 225 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
226 226 struct compound_state *);
227 227 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
228 228 struct compound_state *);
229 229 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 230 struct compound_state *);
231 231 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
232 232 struct compound_state *);
233 233 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
234 234 struct compound_state *);
235 235 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
236 236 struct compound_state *);
237 237 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
238 238 struct compound_state *);
239 239 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
240 240 struct compound_state *);
241 241 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
242 242 struct svc_req *, struct compound_state *);
243 243 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
244 244 struct svc_req *req, struct compound_state *);
245 245 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
246 246 struct compound_state *);
247 247 static void rfs4_op_secinfo_free(nfs_resop4 *);
248 248
249 249 static nfsstat4 check_open_access(uint32_t,
250 250 struct compound_state *, struct svc_req *);
251 251 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
252 252 void rfs4_ss_clid(rfs4_client_t *);
253 253
254 254 /*
255 255 * translation table for attrs
256 256 */
257 257 struct nfs4_ntov_table {
258 258 union nfs4_attr_u *na;
259 259 uint8_t amap[NFS4_MAXNUM_ATTRS];
260 260 int attrcnt;
261 261 bool_t vfsstat;
262 262 };
263 263
264 264 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
265 265 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
266 266 struct nfs4_svgetit_arg *sargp);
267 267
268 268 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
269 269 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
270 270 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
271 271
272 272 fem_t *deleg_rdops;
273 273 fem_t *deleg_wrops;
274 274
275 275 rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */
276 276 kmutex_t rfs4_servinst_lock; /* protects linked list */
277 277 int rfs4_seen_first_compound; /* set first time we see one */
278 278
279 279 /*
280 280 * NFS4 op dispatch table
281 281 */
282 282
283 283 struct rfsv4disp {
284 284 void (*dis_proc)(); /* proc to call */
285 285 void (*dis_resfree)(); /* frees space allocated by proc */
286 286 int dis_flags; /* RPC_IDEMPOTENT, etc... */
287 287 int op_type; /* operation type, see below */
288 288 };
289 289
290 290 /*
291 291 * operation types; used primarily for the per-exportinfo kstat implementation
292 292 */
293 293 #define NFS4_OP_NOFH 0 /* The operation does not operate with any */
294 294 /* particular filehandle; we cannot associate */
295 295 /* it with any exportinfo. */
296 296
297 297 #define NFS4_OP_CFH 1 /* The operation works with the current */
298 298 /* filehandle; we associate the operation */
299 299 /* with the exportinfo related to the current */
300 300 /* filehandle (as set before the operation is */
301 301 /* executed). */
302 302
303 303 #define NFS4_OP_SFH 2 /* The operation works with the saved */
304 304 /* filehandle; we associate the operation */
305 305 /* with the exportinfo related to the saved */
306 306 /* filehandle (as set before the operation is */
307 307 /* executed). */
308 308
309 309 #define NFS4_OP_POSTCFH 3 /* The operation ignores the current */
310 310 /* filehandle, but sets the new current */
311 311 /* filehandle instead; we associate the */
312 312 /* operation with the exportinfo related to */
313 313 /* the current filehandle as set after the */
314 314 /* operation is successfuly executed. Since */
315 315 /* we do not know the particular exportinfo */
316 316 /* (and thus the kstat) before the operation */
317 317 /* is done, there is no simple way how to */
318 318 /* update some I/O kstat statistics related */
319 319 /* to kstat_queue(9F). */
320 320
321 321 static struct rfsv4disp rfsv4disptab[] = {
322 322 /*
323 323 * NFS VERSION 4
324 324 */
325 325
326 326 /* RFS_NULL = 0 */
327 327 {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
328 328
329 329 /* UNUSED = 1 */
330 330 {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
331 331
332 332 /* UNUSED = 2 */
333 333 {rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
334 334
335 335 /* OP_ACCESS = 3 */
336 336 {rfs4_op_access, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
337 337
338 338 /* OP_CLOSE = 4 */
339 339 {rfs4_op_close, nullfree, 0, NFS4_OP_CFH},
340 340
341 341 /* OP_COMMIT = 5 */
342 342 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
343 343
344 344 /* OP_CREATE = 6 */
345 345 {rfs4_op_create, nullfree, 0, NFS4_OP_CFH},
346 346
347 347 /* OP_DELEGPURGE = 7 */
348 348 {rfs4_op_delegpurge, nullfree, 0, NFS4_OP_NOFH},
349 349
350 350 /* OP_DELEGRETURN = 8 */
351 351 {rfs4_op_delegreturn, nullfree, 0, NFS4_OP_CFH},
352 352
353 353 /* OP_GETATTR = 9 */
354 354 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
355 355
356 356 /* OP_GETFH = 10 */
357 357 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL, NFS4_OP_CFH},
358 358
359 359 /* OP_LINK = 11 */
360 360 {rfs4_op_link, nullfree, 0, NFS4_OP_CFH},
361 361
362 362 /* OP_LOCK = 12 */
363 363 {rfs4_op_lock, lock_denied_free, 0, NFS4_OP_CFH},
364 364
365 365 /* OP_LOCKT = 13 */
366 366 {rfs4_op_lockt, lock_denied_free, 0, NFS4_OP_CFH},
367 367
368 368 /* OP_LOCKU = 14 */
369 369 {rfs4_op_locku, nullfree, 0, NFS4_OP_CFH},
370 370
371 371 /* OP_LOOKUP = 15 */
372 372 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
373 373 NFS4_OP_CFH},
374 374
375 375 /* OP_LOOKUPP = 16 */
376 376 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
377 377 NFS4_OP_CFH},
378 378
379 379 /* OP_NVERIFY = 17 */
380 380 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
381 381
382 382 /* OP_OPEN = 18 */
383 383 {rfs4_op_open, rfs4_free_reply, 0, NFS4_OP_CFH},
384 384
385 385 /* OP_OPENATTR = 19 */
386 386 {rfs4_op_openattr, nullfree, 0, NFS4_OP_CFH},
387 387
388 388 /* OP_OPEN_CONFIRM = 20 */
389 389 {rfs4_op_open_confirm, nullfree, 0, NFS4_OP_CFH},
390 390
391 391 /* OP_OPEN_DOWNGRADE = 21 */
392 392 {rfs4_op_open_downgrade, nullfree, 0, NFS4_OP_CFH},
393 393
394 394 /* OP_OPEN_PUTFH = 22 */
395 395 {rfs4_op_putfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
396 396
397 397 /* OP_PUTPUBFH = 23 */
398 398 {rfs4_op_putpubfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
399 399
400 400 /* OP_PUTROOTFH = 24 */
401 401 {rfs4_op_putrootfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
402 402
403 403 /* OP_READ = 25 */
404 404 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
405 405
406 406 /* OP_READDIR = 26 */
407 407 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
408 408
409 409 /* OP_READLINK = 27 */
410 410 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
411 411
412 412 /* OP_REMOVE = 28 */
413 413 {rfs4_op_remove, nullfree, 0, NFS4_OP_CFH},
414 414
415 415 /* OP_RENAME = 29 */
416 416 {rfs4_op_rename, nullfree, 0, NFS4_OP_CFH},
417 417
418 418 /* OP_RENEW = 30 */
419 419 {rfs4_op_renew, nullfree, 0, NFS4_OP_NOFH},
420 420
421 421 /* OP_RESTOREFH = 31 */
422 422 {rfs4_op_restorefh, nullfree, RPC_ALL, NFS4_OP_SFH},
423 423
424 424 /* OP_SAVEFH = 32 */
425 425 {rfs4_op_savefh, nullfree, RPC_ALL, NFS4_OP_CFH},
426 426
427 427 /* OP_SECINFO = 33 */
428 428 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0, NFS4_OP_CFH},
429 429
430 430 /* OP_SETATTR = 34 */
431 431 {rfs4_op_setattr, nullfree, 0, NFS4_OP_CFH},
432 432
433 433 /* OP_SETCLIENTID = 35 */
434 434 {rfs4_op_setclientid, nullfree, 0, NFS4_OP_NOFH},
435 435
436 436 /* OP_SETCLIENTID_CONFIRM = 36 */
437 437 {rfs4_op_setclientid_confirm, nullfree, 0, NFS4_OP_NOFH},
438 438
439 439 /* OP_VERIFY = 37 */
440 440 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
441 441
442 442 /* OP_WRITE = 38 */
443 443 {rfs4_op_write, nullfree, 0, NFS4_OP_CFH},
444 444
445 445 /* OP_RELEASE_LOCKOWNER = 39 */
446 446 {rfs4_op_release_lockowner, nullfree, 0, NFS4_OP_NOFH},
447 447 };
448 448
449 449 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
450 450
451 451 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
452 452
453 453 #ifdef DEBUG
454 454
455 455 int rfs4_fillone_debug = 0;
456 456 int rfs4_no_stub_access = 1;
457 457 int rfs4_rddir_debug = 0;
458 458
459 459 static char *rfs4_op_string[] = {
460 460 "rfs4_op_null",
461 461 "rfs4_op_1 unused",
462 462 "rfs4_op_2 unused",
463 463 "rfs4_op_access",
464 464 "rfs4_op_close",
465 465 "rfs4_op_commit",
466 466 "rfs4_op_create",
467 467 "rfs4_op_delegpurge",
468 468 "rfs4_op_delegreturn",
469 469 "rfs4_op_getattr",
470 470 "rfs4_op_getfh",
471 471 "rfs4_op_link",
472 472 "rfs4_op_lock",
473 473 "rfs4_op_lockt",
474 474 "rfs4_op_locku",
475 475 "rfs4_op_lookup",
476 476 "rfs4_op_lookupp",
477 477 "rfs4_op_nverify",
478 478 "rfs4_op_open",
479 479 "rfs4_op_openattr",
480 480 "rfs4_op_open_confirm",
481 481 "rfs4_op_open_downgrade",
482 482 "rfs4_op_putfh",
483 483 "rfs4_op_putpubfh",
484 484 "rfs4_op_putrootfh",
485 485 "rfs4_op_read",
486 486 "rfs4_op_readdir",
487 487 "rfs4_op_readlink",
488 488 "rfs4_op_remove",
489 489 "rfs4_op_rename",
490 490 "rfs4_op_renew",
491 491 "rfs4_op_restorefh",
492 492 "rfs4_op_savefh",
493 493 "rfs4_op_secinfo",
494 494 "rfs4_op_setattr",
495 495 "rfs4_op_setclientid",
496 496 "rfs4_op_setclient_confirm",
497 497 "rfs4_op_verify",
498 498 "rfs4_op_write",
499 499 "rfs4_op_release_lockowner",
500 500 "rfs4_op_illegal"
501 501 };
502 502 #endif
503 503
504 504 void rfs4_ss_chkclid(rfs4_client_t *);
505 505
↓ open down ↓ |
505 lines elided |
↑ open up ↑ |
506 506 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
507 507
508 508 extern void rfs4_free_fs_locations4(fs_locations4 *);
509 509
510 510 #ifdef nextdp
511 511 #undef nextdp
512 512 #endif
513 513 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
514 514
515 515 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
516 - VOPNAME_OPEN, { .femop_open = deleg_rd_open },
517 - VOPNAME_WRITE, { .femop_write = deleg_rd_write },
518 - VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
519 - VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
520 - VOPNAME_SPACE, { .femop_space = deleg_rd_space },
521 - VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
522 - VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
523 - NULL, NULL
516 + { VOPNAME_OPEN, { .femop_open = deleg_rd_open } },
517 + { VOPNAME_WRITE, { .femop_write = deleg_rd_write } },
518 + { VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr } },
519 + { VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock } },
520 + { VOPNAME_SPACE, { .femop_space = deleg_rd_space } },
521 + { VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr } },
522 + { VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent } },
523 + { NULL, { NULL } }
524 524 };
525 525 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
526 - VOPNAME_OPEN, { .femop_open = deleg_wr_open },
527 - VOPNAME_READ, { .femop_read = deleg_wr_read },
528 - VOPNAME_WRITE, { .femop_write = deleg_wr_write },
529 - VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
530 - VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
531 - VOPNAME_SPACE, { .femop_space = deleg_wr_space },
532 - VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
533 - VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
534 - NULL, NULL
526 + { VOPNAME_OPEN, { .femop_open = deleg_wr_open } },
527 + { VOPNAME_READ, { .femop_read = deleg_wr_read } },
528 + { VOPNAME_WRITE, { .femop_write = deleg_wr_write } },
529 + { VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr } },
530 + { VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock } },
531 + { VOPNAME_SPACE, { .femop_space = deleg_wr_space } },
532 + { VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr } },
533 + { VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent } },
534 + { NULL, { NULL } }
535 535 };
536 536
537 537 int
538 538 rfs4_srvrinit(void)
539 539 {
540 540 timespec32_t verf;
541 541 int error;
542 542 extern void rfs4_attr_init();
543 543 extern krwlock_t rfs4_deleg_policy_lock;
544 544
545 545 /*
546 546 * The following algorithm attempts to find a unique verifier
547 547 * to be used as the write verifier returned from the server
548 548 * to the client. It is important that this verifier change
549 549 * whenever the server reboots. Of secondary importance, it
550 550 * is important for the verifier to be unique between two
551 551 * different servers.
552 552 *
553 553 * Thus, an attempt is made to use the system hostid and the
554 554 * current time in seconds when the nfssrv kernel module is
555 555 * loaded. It is assumed that an NFS server will not be able
556 556 * to boot and then to reboot in less than a second. If the
557 557 * hostid has not been set, then the current high resolution
558 558 * time is used. This will ensure different verifiers each
559 559 * time the server reboots and minimize the chances that two
560 560 * different servers will have the same verifier.
561 561 * XXX - this is broken on LP64 kernels.
562 562 */
563 563 verf.tv_sec = (time_t)zone_get_hostid(NULL);
564 564 if (verf.tv_sec != 0) {
565 565 verf.tv_nsec = gethrestime_sec();
566 566 } else {
567 567 timespec_t tverf;
568 568
569 569 gethrestime(&tverf);
570 570 verf.tv_sec = (time_t)tverf.tv_sec;
571 571 verf.tv_nsec = tverf.tv_nsec;
572 572 }
573 573
574 574 Write4verf = *(uint64_t *)&verf;
575 575
576 576 rfs4_attr_init();
577 577 mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
578 578
579 579 /* Used to manage create/destroy of server state */
580 580 mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
581 581
582 582 /* Used to manage access to server instance linked list */
583 583 mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
584 584
585 585 /* Used to manage access to rfs4_deleg_policy */
586 586 rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
587 587
588 588 error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
589 589 if (error != 0) {
590 590 rfs4_disable_delegation();
591 591 } else {
592 592 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
593 593 &deleg_wrops);
594 594 if (error != 0) {
595 595 rfs4_disable_delegation();
596 596 fem_free(deleg_rdops);
597 597 }
598 598 }
599 599
600 600 nfs4_srv_caller_id = fs_new_caller_id();
601 601
602 602 lockt_sysid = lm_alloc_sysidt();
603 603
604 604 vsd_create(&nfs4_srv_vkey, NULL);
605 605
606 606 return (0);
607 607 }
608 608
609 609 void
610 610 rfs4_srvrfini(void)
611 611 {
612 612 extern krwlock_t rfs4_deleg_policy_lock;
613 613
614 614 if (lockt_sysid != LM_NOSYSID) {
615 615 lm_free_sysidt(lockt_sysid);
616 616 lockt_sysid = LM_NOSYSID;
617 617 }
618 618
619 619 mutex_destroy(&rfs4_deleg_lock);
620 620 mutex_destroy(&rfs4_state_lock);
621 621 rw_destroy(&rfs4_deleg_policy_lock);
622 622
623 623 fem_free(deleg_rdops);
624 624 fem_free(deleg_wrops);
625 625 }
626 626
627 627 void
628 628 rfs4_init_compound_state(struct compound_state *cs)
629 629 {
630 630 bzero(cs, sizeof (*cs));
631 631 cs->cont = TRUE;
632 632 cs->access = CS_ACCESS_DENIED;
633 633 cs->deleg = FALSE;
634 634 cs->mandlock = FALSE;
635 635 cs->fh.nfs_fh4_val = cs->fhbuf;
636 636 }
637 637
638 638 void
639 639 rfs4_grace_start(rfs4_servinst_t *sip)
640 640 {
641 641 rw_enter(&sip->rwlock, RW_WRITER);
642 642 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
643 643 sip->grace_period = rfs4_grace_period;
644 644 rw_exit(&sip->rwlock);
645 645 }
646 646
647 647 /*
648 648 * returns true if the instance's grace period has never been started
649 649 */
650 650 int
651 651 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
652 652 {
653 653 time_t start_time;
654 654
655 655 rw_enter(&sip->rwlock, RW_READER);
656 656 start_time = sip->start_time;
657 657 rw_exit(&sip->rwlock);
658 658
659 659 return (start_time == 0);
660 660 }
661 661
662 662 /*
663 663 * Indicates if server instance is within the
664 664 * grace period.
665 665 */
666 666 int
667 667 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
668 668 {
669 669 time_t grace_expiry;
670 670
671 671 rw_enter(&sip->rwlock, RW_READER);
672 672 grace_expiry = sip->start_time + sip->grace_period;
673 673 rw_exit(&sip->rwlock);
674 674
675 675 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
676 676 }
677 677
678 678 int
679 679 rfs4_clnt_in_grace(rfs4_client_t *cp)
680 680 {
681 681 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
682 682
683 683 return (rfs4_servinst_in_grace(cp->rc_server_instance));
684 684 }
685 685
686 686 /*
687 687 * reset all currently active grace periods
688 688 */
689 689 void
690 690 rfs4_grace_reset_all(void)
691 691 {
692 692 rfs4_servinst_t *sip;
693 693
694 694 mutex_enter(&rfs4_servinst_lock);
695 695 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
696 696 if (rfs4_servinst_in_grace(sip))
697 697 rfs4_grace_start(sip);
698 698 mutex_exit(&rfs4_servinst_lock);
699 699 }
700 700
701 701 /*
702 702 * start any new instances' grace periods
703 703 */
704 704 void
705 705 rfs4_grace_start_new(void)
706 706 {
707 707 rfs4_servinst_t *sip;
708 708
709 709 mutex_enter(&rfs4_servinst_lock);
710 710 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
711 711 if (rfs4_servinst_grace_new(sip))
712 712 rfs4_grace_start(sip);
713 713 mutex_exit(&rfs4_servinst_lock);
714 714 }
715 715
716 716 static rfs4_dss_path_t *
717 717 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
718 718 {
719 719 size_t len;
720 720 rfs4_dss_path_t *dss_path;
721 721
722 722 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
723 723
724 724 /*
725 725 * Take a copy of the string, since the original may be overwritten.
726 726 * Sadly, no strdup() in the kernel.
727 727 */
728 728 /* allow for NUL */
729 729 len = strlen(path) + 1;
730 730 dss_path->path = kmem_alloc(len, KM_SLEEP);
731 731 (void) strlcpy(dss_path->path, path, len);
732 732
733 733 /* associate with servinst */
734 734 dss_path->sip = sip;
735 735 dss_path->index = index;
736 736
737 737 /*
738 738 * Add to list of served paths.
739 739 * No locking required, as we're only ever called at startup.
740 740 */
741 741 if (rfs4_dss_pathlist == NULL) {
742 742 /* this is the first dss_path_t */
743 743
744 744 /* needed for insque/remque */
745 745 dss_path->next = dss_path->prev = dss_path;
746 746
747 747 rfs4_dss_pathlist = dss_path;
748 748 } else {
749 749 insque(dss_path, rfs4_dss_pathlist);
750 750 }
751 751
752 752 return (dss_path);
753 753 }
754 754
755 755 /*
756 756 * Create a new server instance, and make it the currently active instance.
757 757 * Note that starting the grace period too early will reduce the clients'
758 758 * recovery window.
759 759 */
760 760 void
761 761 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
762 762 {
763 763 unsigned i;
764 764 rfs4_servinst_t *sip;
765 765 rfs4_oldstate_t *oldstate;
766 766
767 767 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
768 768 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
769 769
770 770 sip->start_time = (time_t)0;
771 771 sip->grace_period = (time_t)0;
772 772 sip->next = NULL;
773 773 sip->prev = NULL;
774 774
775 775 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
776 776 /*
777 777 * This initial dummy entry is required to setup for insque/remque.
778 778 * It must be skipped over whenever the list is traversed.
779 779 */
780 780 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
781 781 /* insque/remque require initial list entry to be self-terminated */
782 782 oldstate->next = oldstate;
783 783 oldstate->prev = oldstate;
784 784 sip->oldstate = oldstate;
785 785
786 786
787 787 sip->dss_npaths = dss_npaths;
788 788 sip->dss_paths = kmem_alloc(dss_npaths *
789 789 sizeof (rfs4_dss_path_t *), KM_SLEEP);
790 790
791 791 for (i = 0; i < dss_npaths; i++) {
792 792 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
793 793 }
794 794
795 795 mutex_enter(&rfs4_servinst_lock);
796 796 if (rfs4_cur_servinst != NULL) {
797 797 /* add to linked list */
798 798 sip->prev = rfs4_cur_servinst;
799 799 rfs4_cur_servinst->next = sip;
800 800 }
801 801 if (start_grace)
802 802 rfs4_grace_start(sip);
803 803 /* make the new instance "current" */
804 804 rfs4_cur_servinst = sip;
805 805
806 806 mutex_exit(&rfs4_servinst_lock);
807 807 }
808 808
809 809 /*
810 810 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
811 811 * all instances directly.
812 812 */
813 813 void
814 814 rfs4_servinst_destroy_all(void)
815 815 {
816 816 rfs4_servinst_t *sip, *prev, *current;
817 817 #ifdef DEBUG
818 818 int n = 0;
819 819 #endif
820 820
821 821 mutex_enter(&rfs4_servinst_lock);
822 822 ASSERT(rfs4_cur_servinst != NULL);
823 823 current = rfs4_cur_servinst;
824 824 rfs4_cur_servinst = NULL;
825 825 for (sip = current; sip != NULL; sip = prev) {
826 826 prev = sip->prev;
827 827 rw_destroy(&sip->rwlock);
828 828 if (sip->oldstate)
829 829 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
830 830 if (sip->dss_paths)
831 831 kmem_free(sip->dss_paths,
832 832 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
833 833 kmem_free(sip, sizeof (rfs4_servinst_t));
834 834 #ifdef DEBUG
835 835 n++;
836 836 #endif
837 837 }
838 838 mutex_exit(&rfs4_servinst_lock);
839 839 }
840 840
841 841 /*
842 842 * Assign the current server instance to a client_t.
843 843 * Should be called with cp->rc_dbe held.
844 844 */
845 845 void
846 846 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
847 847 {
848 848 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
849 849
850 850 /*
851 851 * The lock ensures that if the current instance is in the process
852 852 * of changing, we will see the new one.
853 853 */
854 854 mutex_enter(&rfs4_servinst_lock);
855 855 cp->rc_server_instance = sip;
856 856 mutex_exit(&rfs4_servinst_lock);
857 857 }
858 858
859 859 rfs4_servinst_t *
860 860 rfs4_servinst(rfs4_client_t *cp)
861 861 {
862 862 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
863 863
864 864 return (cp->rc_server_instance);
865 865 }
866 866
867 867 /* ARGSUSED */
868 868 static void
869 869 nullfree(caddr_t resop)
870 870 {
871 871 }
872 872
873 873 /*
874 874 * This is a fall-through for invalid or not implemented (yet) ops
875 875 */
876 876 /* ARGSUSED */
877 877 static void
878 878 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
879 879 struct compound_state *cs)
880 880 {
881 881 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
882 882 }
883 883
884 884 /*
885 885 * Check if the security flavor, nfsnum, is in the flavor_list.
886 886 */
887 887 bool_t
888 888 in_flavor_list(int nfsnum, int *flavor_list, int count)
889 889 {
890 890 int i;
891 891
892 892 for (i = 0; i < count; i++) {
893 893 if (nfsnum == flavor_list[i])
894 894 return (TRUE);
895 895 }
896 896 return (FALSE);
897 897 }
898 898
899 899 /*
900 900 * Used by rfs4_op_secinfo to get the security information from the
901 901 * export structure associated with the component.
902 902 */
903 903 /* ARGSUSED */
904 904 static nfsstat4
905 905 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
906 906 {
907 907 int error, different_export = 0;
908 908 vnode_t *dvp, *vp;
909 909 struct exportinfo *exi = NULL;
910 910 fid_t fid;
911 911 uint_t count, i;
912 912 secinfo4 *resok_val;
913 913 struct secinfo *secp;
914 914 seconfig_t *si;
915 915 bool_t did_traverse = FALSE;
916 916 int dotdot, walk;
917 917
918 918 dvp = cs->vp;
919 919 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
920 920
921 921 /*
922 922 * If dotdotting, then need to check whether it's above the
923 923 * root of a filesystem, or above an export point.
924 924 */
925 925 if (dotdot) {
926 926
927 927 /*
928 928 * If dotdotting at the root of a filesystem, then
929 929 * need to traverse back to the mounted-on filesystem
930 930 * and do the dotdot lookup there.
931 931 */
932 932 if (cs->vp->v_flag & VROOT) {
933 933
934 934 /*
935 935 * If at the system root, then can
936 936 * go up no further.
937 937 */
938 938 if (VN_CMP(dvp, rootdir))
939 939 return (puterrno4(ENOENT));
940 940
941 941 /*
942 942 * Traverse back to the mounted-on filesystem
943 943 */
944 944 dvp = untraverse(cs->vp);
945 945
946 946 /*
947 947 * Set the different_export flag so we remember
948 948 * to pick up a new exportinfo entry for
949 949 * this new filesystem.
950 950 */
951 951 different_export = 1;
952 952 } else {
953 953
954 954 /*
955 955 * If dotdotting above an export point then set
956 956 * the different_export to get new export info.
957 957 */
958 958 different_export = nfs_exported(cs->exi, cs->vp);
959 959 }
960 960 }
961 961
962 962 /*
963 963 * Get the vnode for the component "nm".
964 964 */
965 965 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
966 966 NULL, NULL, NULL);
967 967 if (error)
968 968 return (puterrno4(error));
969 969
970 970 /*
971 971 * If the vnode is in a pseudo filesystem, or if the security flavor
972 972 * used in the request is valid but not an explicitly shared flavor,
973 973 * or the access bit indicates that this is a limited access,
974 974 * check whether this vnode is visible.
975 975 */
976 976 if (!different_export &&
977 977 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
978 978 cs->access & CS_ACCESS_LIMITED)) {
979 979 if (! nfs_visible(cs->exi, vp, &different_export)) {
980 980 VN_RELE(vp);
981 981 return (puterrno4(ENOENT));
982 982 }
983 983 }
984 984
985 985 /*
986 986 * If it's a mountpoint, then traverse it.
987 987 */
988 988 if (vn_ismntpt(vp)) {
989 989 if ((error = traverse(&vp)) != 0) {
990 990 VN_RELE(vp);
991 991 return (puterrno4(error));
992 992 }
993 993 /* remember that we had to traverse mountpoint */
994 994 did_traverse = TRUE;
995 995 different_export = 1;
996 996 } else if (vp->v_vfsp != dvp->v_vfsp) {
997 997 /*
998 998 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
999 999 * then vp is probably an LOFS object. We don't need the
1000 1000 * realvp, we just need to know that we might have crossed
1001 1001 * a server fs boundary and need to call checkexport4.
1002 1002 * (LOFS lookup hides server fs mountpoints, and actually calls
1003 1003 * traverse)
1004 1004 */
1005 1005 different_export = 1;
1006 1006 }
1007 1007
1008 1008 /*
1009 1009 * Get the export information for it.
1010 1010 */
1011 1011 if (different_export) {
1012 1012
1013 1013 bzero(&fid, sizeof (fid));
1014 1014 fid.fid_len = MAXFIDSZ;
1015 1015 error = vop_fid_pseudo(vp, &fid);
1016 1016 if (error) {
1017 1017 VN_RELE(vp);
1018 1018 return (puterrno4(error));
1019 1019 }
1020 1020
1021 1021 if (dotdot)
1022 1022 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1023 1023 else
1024 1024 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1025 1025
1026 1026 if (exi == NULL) {
1027 1027 if (did_traverse == TRUE) {
1028 1028 /*
1029 1029 * If this vnode is a mounted-on vnode,
1030 1030 * but the mounted-on file system is not
1031 1031 * exported, send back the secinfo for
1032 1032 * the exported node that the mounted-on
1033 1033 * vnode lives in.
1034 1034 */
1035 1035 exi = cs->exi;
1036 1036 } else {
1037 1037 VN_RELE(vp);
1038 1038 return (puterrno4(EACCES));
1039 1039 }
1040 1040 }
1041 1041 } else {
1042 1042 exi = cs->exi;
1043 1043 }
1044 1044 ASSERT(exi != NULL);
1045 1045
1046 1046
1047 1047 /*
1048 1048 * Create the secinfo result based on the security information
1049 1049 * from the exportinfo structure (exi).
1050 1050 *
1051 1051 * Return all flavors for a pseudo node.
1052 1052 * For a real export node, return the flavor that the client
1053 1053 * has access with.
1054 1054 */
1055 1055 ASSERT(RW_LOCK_HELD(&exported_lock));
1056 1056 if (PSEUDO(exi)) {
1057 1057 count = exi->exi_export.ex_seccnt; /* total sec count */
1058 1058 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1059 1059 secp = exi->exi_export.ex_secinfo;
1060 1060
1061 1061 for (i = 0; i < count; i++) {
1062 1062 si = &secp[i].s_secinfo;
1063 1063 resok_val[i].flavor = si->sc_rpcnum;
1064 1064 if (resok_val[i].flavor == RPCSEC_GSS) {
1065 1065 rpcsec_gss_info *info;
1066 1066
1067 1067 info = &resok_val[i].flavor_info;
1068 1068 info->qop = si->sc_qop;
1069 1069 info->service = (rpc_gss_svc_t)si->sc_service;
1070 1070
1071 1071 /* get oid opaque data */
1072 1072 info->oid.sec_oid4_len =
1073 1073 si->sc_gss_mech_type->length;
1074 1074 info->oid.sec_oid4_val = kmem_alloc(
1075 1075 si->sc_gss_mech_type->length, KM_SLEEP);
1076 1076 bcopy(
1077 1077 si->sc_gss_mech_type->elements,
1078 1078 info->oid.sec_oid4_val,
1079 1079 info->oid.sec_oid4_len);
1080 1080 }
1081 1081 }
1082 1082 resp->SECINFO4resok_len = count;
1083 1083 resp->SECINFO4resok_val = resok_val;
1084 1084 } else {
1085 1085 int ret_cnt = 0, k = 0;
1086 1086 int *flavor_list;
1087 1087
1088 1088 count = exi->exi_export.ex_seccnt; /* total sec count */
1089 1089 secp = exi->exi_export.ex_secinfo;
1090 1090
1091 1091 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1092 1092 /* find out which flavors to return */
1093 1093 for (i = 0; i < count; i ++) {
1094 1094 int access, flavor, perm;
1095 1095
1096 1096 flavor = secp[i].s_secinfo.sc_nfsnum;
1097 1097 perm = secp[i].s_flags;
1098 1098
1099 1099 access = nfsauth4_secinfo_access(exi, cs->req,
1100 1100 flavor, perm, cs->basecr);
1101 1101
1102 1102 if (! (access & NFSAUTH_DENIED) &&
1103 1103 ! (access & NFSAUTH_WRONGSEC)) {
1104 1104 flavor_list[ret_cnt] = flavor;
1105 1105 ret_cnt++;
1106 1106 }
1107 1107 }
1108 1108
1109 1109 /* Create the returning SECINFO value */
1110 1110 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1111 1111
1112 1112 for (i = 0; i < count; i++) {
1113 1113 /*
1114 1114 * If the flavor is in the flavor list,
1115 1115 * fill in resok_val.
1116 1116 */
1117 1117 si = &secp[i].s_secinfo;
1118 1118 if (in_flavor_list(si->sc_nfsnum,
1119 1119 flavor_list, ret_cnt)) {
1120 1120 resok_val[k].flavor = si->sc_rpcnum;
1121 1121 if (resok_val[k].flavor == RPCSEC_GSS) {
1122 1122 rpcsec_gss_info *info;
1123 1123
1124 1124 info = &resok_val[k].flavor_info;
1125 1125 info->qop = si->sc_qop;
1126 1126 info->service = (rpc_gss_svc_t)
1127 1127 si->sc_service;
1128 1128
1129 1129 /* get oid opaque data */
1130 1130 info->oid.sec_oid4_len =
1131 1131 si->sc_gss_mech_type->length;
1132 1132 info->oid.sec_oid4_val = kmem_alloc(
1133 1133 si->sc_gss_mech_type->length,
1134 1134 KM_SLEEP);
1135 1135 bcopy(si->sc_gss_mech_type->elements,
1136 1136 info->oid.sec_oid4_val,
1137 1137 info->oid.sec_oid4_len);
1138 1138 }
1139 1139 k++;
1140 1140 }
1141 1141 if (k >= ret_cnt)
1142 1142 break;
1143 1143 }
1144 1144 resp->SECINFO4resok_len = ret_cnt;
1145 1145 resp->SECINFO4resok_val = resok_val;
1146 1146 kmem_free(flavor_list, count * sizeof (int));
1147 1147 }
1148 1148
1149 1149 VN_RELE(vp);
1150 1150 return (NFS4_OK);
1151 1151 }
1152 1152
1153 1153 /*
1154 1154 * SECINFO (Operation 33): Obtain required security information on
1155 1155 * the component name in the format of (security-mechanism-oid, qop, service)
1156 1156 * triplets.
1157 1157 */
1158 1158 /* ARGSUSED */
1159 1159 static void
1160 1160 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1161 1161 struct compound_state *cs)
1162 1162 {
1163 1163 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1164 1164 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1165 1165 utf8string *utfnm = &args->name;
1166 1166 uint_t len;
1167 1167 char *nm;
1168 1168 struct sockaddr *ca;
1169 1169 char *name = NULL;
1170 1170 nfsstat4 status = NFS4_OK;
1171 1171
1172 1172 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1173 1173 SECINFO4args *, args);
1174 1174
1175 1175 /*
1176 1176 * Current file handle (cfh) should have been set before getting
1177 1177 * into this function. If not, return error.
1178 1178 */
1179 1179 if (cs->vp == NULL) {
1180 1180 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1181 1181 goto out;
1182 1182 }
1183 1183
1184 1184 if (cs->vp->v_type != VDIR) {
1185 1185 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1186 1186 goto out;
1187 1187 }
1188 1188
1189 1189 /*
1190 1190 * Verify the component name. If failed, error out, but
1191 1191 * do not error out if the component name is a "..".
1192 1192 * SECINFO will return its parents secinfo data for SECINFO "..".
1193 1193 */
1194 1194 status = utf8_dir_verify(utfnm);
1195 1195 if (status != NFS4_OK) {
1196 1196 if (utfnm->utf8string_len != 2 ||
1197 1197 utfnm->utf8string_val[0] != '.' ||
1198 1198 utfnm->utf8string_val[1] != '.') {
1199 1199 *cs->statusp = resp->status = status;
1200 1200 goto out;
1201 1201 }
1202 1202 }
1203 1203
1204 1204 nm = utf8_to_str(utfnm, &len, NULL);
1205 1205 if (nm == NULL) {
1206 1206 *cs->statusp = resp->status = NFS4ERR_INVAL;
1207 1207 goto out;
1208 1208 }
1209 1209
1210 1210 if (len > MAXNAMELEN) {
1211 1211 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1212 1212 kmem_free(nm, len);
1213 1213 goto out;
1214 1214 }
1215 1215
1216 1216 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1217 1217 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1218 1218 MAXPATHLEN + 1);
1219 1219
1220 1220 if (name == NULL) {
1221 1221 *cs->statusp = resp->status = NFS4ERR_INVAL;
1222 1222 kmem_free(nm, len);
1223 1223 goto out;
1224 1224 }
1225 1225
1226 1226
1227 1227 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1228 1228
1229 1229 if (name != nm)
1230 1230 kmem_free(name, MAXPATHLEN + 1);
1231 1231 kmem_free(nm, len);
1232 1232
1233 1233 out:
1234 1234 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1235 1235 SECINFO4res *, resp);
1236 1236 }
1237 1237
1238 1238 /*
1239 1239 * Free SECINFO result.
1240 1240 */
1241 1241 /* ARGSUSED */
1242 1242 static void
1243 1243 rfs4_op_secinfo_free(nfs_resop4 *resop)
1244 1244 {
1245 1245 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1246 1246 int count, i;
1247 1247 secinfo4 *resok_val;
1248 1248
1249 1249 /* If this is not an Ok result, nothing to free. */
1250 1250 if (resp->status != NFS4_OK) {
1251 1251 return;
1252 1252 }
1253 1253
1254 1254 count = resp->SECINFO4resok_len;
1255 1255 resok_val = resp->SECINFO4resok_val;
1256 1256
1257 1257 for (i = 0; i < count; i++) {
1258 1258 if (resok_val[i].flavor == RPCSEC_GSS) {
1259 1259 rpcsec_gss_info *info;
1260 1260
1261 1261 info = &resok_val[i].flavor_info;
1262 1262 kmem_free(info->oid.sec_oid4_val,
1263 1263 info->oid.sec_oid4_len);
1264 1264 }
1265 1265 }
1266 1266 kmem_free(resok_val, count * sizeof (secinfo4));
1267 1267 resp->SECINFO4resok_len = 0;
1268 1268 resp->SECINFO4resok_val = NULL;
1269 1269 }
1270 1270
1271 1271 /* ARGSUSED */
1272 1272 static void
1273 1273 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1274 1274 struct compound_state *cs)
1275 1275 {
1276 1276 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1277 1277 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1278 1278 int error;
1279 1279 vnode_t *vp;
1280 1280 struct vattr va;
1281 1281 int checkwriteperm;
1282 1282 cred_t *cr = cs->cr;
1283 1283 bslabel_t *clabel, *slabel;
1284 1284 ts_label_t *tslabel;
1285 1285 boolean_t admin_low_client;
1286 1286
1287 1287 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1288 1288 ACCESS4args *, args);
1289 1289
1290 1290 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1291 1291 if (cs->access == CS_ACCESS_DENIED) {
1292 1292 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1293 1293 goto out;
1294 1294 }
1295 1295 #endif
1296 1296 if (cs->vp == NULL) {
1297 1297 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1298 1298 goto out;
1299 1299 }
1300 1300
1301 1301 ASSERT(cr != NULL);
1302 1302
1303 1303 vp = cs->vp;
1304 1304
1305 1305 /*
1306 1306 * If the file system is exported read only, it is not appropriate
1307 1307 * to check write permissions for regular files and directories.
1308 1308 * Special files are interpreted by the client, so the underlying
1309 1309 * permissions are sent back to the client for interpretation.
1310 1310 */
1311 1311 if (rdonly4(req, cs) &&
1312 1312 (vp->v_type == VREG || vp->v_type == VDIR))
1313 1313 checkwriteperm = 0;
1314 1314 else
1315 1315 checkwriteperm = 1;
1316 1316
1317 1317 /*
1318 1318 * XXX
1319 1319 * We need the mode so that we can correctly determine access
1320 1320 * permissions relative to a mandatory lock file. Access to
1321 1321 * mandatory lock files is denied on the server, so it might
1322 1322 * as well be reflected to the server during the open.
1323 1323 */
1324 1324 va.va_mask = AT_MODE;
1325 1325 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1326 1326 if (error) {
1327 1327 *cs->statusp = resp->status = puterrno4(error);
1328 1328 goto out;
1329 1329 }
1330 1330 resp->access = 0;
1331 1331 resp->supported = 0;
1332 1332
1333 1333 if (is_system_labeled()) {
1334 1334 ASSERT(req->rq_label != NULL);
1335 1335 clabel = req->rq_label;
1336 1336 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1337 1337 "got client label from request(1)",
1338 1338 struct svc_req *, req);
1339 1339 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1340 1340 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1341 1341 *cs->statusp = resp->status = puterrno4(EACCES);
1342 1342 goto out;
1343 1343 }
1344 1344 slabel = label2bslabel(tslabel);
1345 1345 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1346 1346 char *, "got server label(1) for vp(2)",
1347 1347 bslabel_t *, slabel, vnode_t *, vp);
1348 1348
1349 1349 admin_low_client = B_FALSE;
1350 1350 } else
1351 1351 admin_low_client = B_TRUE;
1352 1352 }
1353 1353
1354 1354 if (args->access & ACCESS4_READ) {
1355 1355 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1356 1356 if (!error && !MANDLOCK(vp, va.va_mode) &&
1357 1357 (!is_system_labeled() || admin_low_client ||
1358 1358 bldominates(clabel, slabel)))
1359 1359 resp->access |= ACCESS4_READ;
1360 1360 resp->supported |= ACCESS4_READ;
1361 1361 }
1362 1362 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1363 1363 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1364 1364 if (!error && (!is_system_labeled() || admin_low_client ||
1365 1365 bldominates(clabel, slabel)))
1366 1366 resp->access |= ACCESS4_LOOKUP;
1367 1367 resp->supported |= ACCESS4_LOOKUP;
1368 1368 }
1369 1369 if (checkwriteperm &&
1370 1370 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1371 1371 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1372 1372 if (!error && !MANDLOCK(vp, va.va_mode) &&
1373 1373 (!is_system_labeled() || admin_low_client ||
1374 1374 blequal(clabel, slabel)))
1375 1375 resp->access |=
1376 1376 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1377 1377 resp->supported |=
1378 1378 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1379 1379 }
1380 1380
1381 1381 if (checkwriteperm &&
1382 1382 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1383 1383 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1384 1384 if (!error && (!is_system_labeled() || admin_low_client ||
1385 1385 blequal(clabel, slabel)))
1386 1386 resp->access |= ACCESS4_DELETE;
1387 1387 resp->supported |= ACCESS4_DELETE;
1388 1388 }
1389 1389 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1390 1390 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1391 1391 if (!error && !MANDLOCK(vp, va.va_mode) &&
1392 1392 (!is_system_labeled() || admin_low_client ||
1393 1393 bldominates(clabel, slabel)))
1394 1394 resp->access |= ACCESS4_EXECUTE;
1395 1395 resp->supported |= ACCESS4_EXECUTE;
1396 1396 }
1397 1397
1398 1398 if (is_system_labeled() && !admin_low_client)
1399 1399 label_rele(tslabel);
1400 1400
1401 1401 *cs->statusp = resp->status = NFS4_OK;
1402 1402 out:
1403 1403 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1404 1404 ACCESS4res *, resp);
1405 1405 }
1406 1406
1407 1407 /* ARGSUSED */
1408 1408 static void
1409 1409 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1410 1410 struct compound_state *cs)
1411 1411 {
1412 1412 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1413 1413 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1414 1414 int error;
1415 1415 vnode_t *vp = cs->vp;
1416 1416 cred_t *cr = cs->cr;
1417 1417 vattr_t va;
1418 1418
1419 1419 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1420 1420 COMMIT4args *, args);
1421 1421
1422 1422 if (vp == NULL) {
1423 1423 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1424 1424 goto out;
1425 1425 }
1426 1426 if (cs->access == CS_ACCESS_DENIED) {
1427 1427 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1428 1428 goto out;
1429 1429 }
1430 1430
1431 1431 if (args->offset + args->count < args->offset) {
1432 1432 *cs->statusp = resp->status = NFS4ERR_INVAL;
1433 1433 goto out;
1434 1434 }
1435 1435
1436 1436 va.va_mask = AT_UID;
1437 1437 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1438 1438
1439 1439 /*
1440 1440 * If we can't get the attributes, then we can't do the
1441 1441 * right access checking. So, we'll fail the request.
1442 1442 */
1443 1443 if (error) {
1444 1444 *cs->statusp = resp->status = puterrno4(error);
1445 1445 goto out;
1446 1446 }
1447 1447 if (rdonly4(req, cs)) {
1448 1448 *cs->statusp = resp->status = NFS4ERR_ROFS;
1449 1449 goto out;
1450 1450 }
1451 1451
1452 1452 if (vp->v_type != VREG) {
1453 1453 if (vp->v_type == VDIR)
1454 1454 resp->status = NFS4ERR_ISDIR;
1455 1455 else
1456 1456 resp->status = NFS4ERR_INVAL;
1457 1457 *cs->statusp = resp->status;
1458 1458 goto out;
1459 1459 }
1460 1460
1461 1461 if (crgetuid(cr) != va.va_uid &&
1462 1462 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1463 1463 *cs->statusp = resp->status = puterrno4(error);
1464 1464 goto out;
1465 1465 }
1466 1466
1467 1467 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1468 1468
1469 1469 if (error) {
1470 1470 *cs->statusp = resp->status = puterrno4(error);
1471 1471 goto out;
1472 1472 }
1473 1473
1474 1474 *cs->statusp = resp->status = NFS4_OK;
1475 1475 resp->writeverf = Write4verf;
1476 1476 out:
1477 1477 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1478 1478 COMMIT4res *, resp);
1479 1479 }
1480 1480
1481 1481 /*
1482 1482 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1483 1483 * was completed. It does the nfsv4 create for special files.
1484 1484 */
1485 1485 /* ARGSUSED */
1486 1486 static vnode_t *
1487 1487 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1488 1488 struct compound_state *cs, vattr_t *vap, char *nm)
1489 1489 {
1490 1490 int error;
1491 1491 cred_t *cr = cs->cr;
1492 1492 vnode_t *dvp = cs->vp;
1493 1493 vnode_t *vp = NULL;
1494 1494 int mode;
1495 1495 enum vcexcl excl;
1496 1496
1497 1497 switch (args->type) {
1498 1498 case NF4CHR:
1499 1499 case NF4BLK:
1500 1500 if (secpolicy_sys_devices(cr) != 0) {
1501 1501 *cs->statusp = resp->status = NFS4ERR_PERM;
1502 1502 return (NULL);
1503 1503 }
1504 1504 if (args->type == NF4CHR)
1505 1505 vap->va_type = VCHR;
1506 1506 else
1507 1507 vap->va_type = VBLK;
1508 1508 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1509 1509 args->ftype4_u.devdata.specdata2);
1510 1510 vap->va_mask |= AT_RDEV;
1511 1511 break;
1512 1512 case NF4SOCK:
1513 1513 vap->va_type = VSOCK;
1514 1514 break;
1515 1515 case NF4FIFO:
1516 1516 vap->va_type = VFIFO;
1517 1517 break;
1518 1518 default:
1519 1519 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1520 1520 return (NULL);
1521 1521 }
1522 1522
1523 1523 /*
1524 1524 * Must specify the mode.
1525 1525 */
1526 1526 if (!(vap->va_mask & AT_MODE)) {
1527 1527 *cs->statusp = resp->status = NFS4ERR_INVAL;
1528 1528 return (NULL);
1529 1529 }
1530 1530
1531 1531 excl = EXCL;
1532 1532
1533 1533 mode = 0;
1534 1534
1535 1535 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1536 1536 if (error) {
1537 1537 *cs->statusp = resp->status = puterrno4(error);
1538 1538 return (NULL);
1539 1539 }
1540 1540 return (vp);
1541 1541 }
1542 1542
1543 1543 /*
1544 1544 * nfsv4 create is used to create non-regular files. For regular files,
1545 1545 * use nfsv4 open.
1546 1546 */
1547 1547 /* ARGSUSED */
1548 1548 static void
1549 1549 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1550 1550 struct compound_state *cs)
1551 1551 {
1552 1552 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1553 1553 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1554 1554 int error;
1555 1555 struct vattr bva, iva, iva2, ava, *vap;
1556 1556 cred_t *cr = cs->cr;
1557 1557 vnode_t *dvp = cs->vp;
1558 1558 vnode_t *vp = NULL;
1559 1559 vnode_t *realvp;
1560 1560 char *nm, *lnm;
1561 1561 uint_t len, llen;
1562 1562 int syncval = 0;
1563 1563 struct nfs4_svgetit_arg sarg;
1564 1564 struct nfs4_ntov_table ntov;
1565 1565 struct statvfs64 sb;
1566 1566 nfsstat4 status;
1567 1567 struct sockaddr *ca;
1568 1568 char *name = NULL;
1569 1569 char *lname = NULL;
1570 1570
1571 1571 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1572 1572 CREATE4args *, args);
1573 1573
1574 1574 resp->attrset = 0;
1575 1575
1576 1576 if (dvp == NULL) {
1577 1577 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1578 1578 goto out;
1579 1579 }
1580 1580
1581 1581 /*
1582 1582 * If there is an unshared filesystem mounted on this vnode,
1583 1583 * do not allow to create an object in this directory.
1584 1584 */
1585 1585 if (vn_ismntpt(dvp)) {
1586 1586 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1587 1587 goto out;
1588 1588 }
1589 1589
1590 1590 /* Verify that type is correct */
1591 1591 switch (args->type) {
1592 1592 case NF4LNK:
1593 1593 case NF4BLK:
1594 1594 case NF4CHR:
1595 1595 case NF4SOCK:
1596 1596 case NF4FIFO:
1597 1597 case NF4DIR:
1598 1598 break;
1599 1599 default:
1600 1600 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1601 1601 goto out;
1602 1602 };
1603 1603
1604 1604 if (cs->access == CS_ACCESS_DENIED) {
1605 1605 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1606 1606 goto out;
1607 1607 }
1608 1608 if (dvp->v_type != VDIR) {
1609 1609 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1610 1610 goto out;
1611 1611 }
1612 1612 status = utf8_dir_verify(&args->objname);
1613 1613 if (status != NFS4_OK) {
1614 1614 *cs->statusp = resp->status = status;
1615 1615 goto out;
1616 1616 }
1617 1617
1618 1618 if (rdonly4(req, cs)) {
1619 1619 *cs->statusp = resp->status = NFS4ERR_ROFS;
1620 1620 goto out;
1621 1621 }
1622 1622
1623 1623 /*
1624 1624 * Name of newly created object
1625 1625 */
1626 1626 nm = utf8_to_fn(&args->objname, &len, NULL);
1627 1627 if (nm == NULL) {
1628 1628 *cs->statusp = resp->status = NFS4ERR_INVAL;
1629 1629 goto out;
1630 1630 }
1631 1631
1632 1632 if (len > MAXNAMELEN) {
1633 1633 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1634 1634 kmem_free(nm, len);
1635 1635 goto out;
1636 1636 }
1637 1637
1638 1638 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1639 1639 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1640 1640 MAXPATHLEN + 1);
1641 1641
1642 1642 if (name == NULL) {
1643 1643 *cs->statusp = resp->status = NFS4ERR_INVAL;
1644 1644 kmem_free(nm, len);
1645 1645 goto out;
1646 1646 }
1647 1647
1648 1648 resp->attrset = 0;
1649 1649
1650 1650 sarg.sbp = &sb;
1651 1651 sarg.is_referral = B_FALSE;
1652 1652 nfs4_ntov_table_init(&ntov);
1653 1653
1654 1654 status = do_rfs4_set_attrs(&resp->attrset,
1655 1655 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1656 1656
1657 1657 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1658 1658 status = NFS4ERR_INVAL;
1659 1659
1660 1660 if (status != NFS4_OK) {
1661 1661 *cs->statusp = resp->status = status;
1662 1662 if (name != nm)
1663 1663 kmem_free(name, MAXPATHLEN + 1);
1664 1664 kmem_free(nm, len);
1665 1665 nfs4_ntov_table_free(&ntov, &sarg);
1666 1666 resp->attrset = 0;
1667 1667 goto out;
1668 1668 }
1669 1669
1670 1670 /* Get "before" change value */
1671 1671 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1672 1672 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1673 1673 if (error) {
1674 1674 *cs->statusp = resp->status = puterrno4(error);
1675 1675 if (name != nm)
1676 1676 kmem_free(name, MAXPATHLEN + 1);
1677 1677 kmem_free(nm, len);
1678 1678 nfs4_ntov_table_free(&ntov, &sarg);
1679 1679 resp->attrset = 0;
1680 1680 goto out;
1681 1681 }
1682 1682 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1683 1683
1684 1684 vap = sarg.vap;
1685 1685
1686 1686 /*
1687 1687 * Set the default initial values for attributes when the parent
1688 1688 * directory does not have the VSUID/VSGID bit set and they have
1689 1689 * not been specified in createattrs.
1690 1690 */
1691 1691 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1692 1692 vap->va_uid = crgetuid(cr);
1693 1693 vap->va_mask |= AT_UID;
1694 1694 }
1695 1695 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1696 1696 vap->va_gid = crgetgid(cr);
1697 1697 vap->va_mask |= AT_GID;
1698 1698 }
1699 1699
1700 1700 vap->va_mask |= AT_TYPE;
1701 1701 switch (args->type) {
1702 1702 case NF4DIR:
1703 1703 vap->va_type = VDIR;
1704 1704 if ((vap->va_mask & AT_MODE) == 0) {
1705 1705 vap->va_mode = 0700; /* default: owner rwx only */
1706 1706 vap->va_mask |= AT_MODE;
1707 1707 }
1708 1708 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1709 1709 if (error)
1710 1710 break;
1711 1711
1712 1712 /*
1713 1713 * Get the initial "after" sequence number, if it fails,
1714 1714 * set to zero
1715 1715 */
1716 1716 iva.va_mask = AT_SEQ;
1717 1717 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1718 1718 iva.va_seq = 0;
1719 1719 break;
1720 1720 case NF4LNK:
1721 1721 vap->va_type = VLNK;
1722 1722 if ((vap->va_mask & AT_MODE) == 0) {
1723 1723 vap->va_mode = 0700; /* default: owner rwx only */
1724 1724 vap->va_mask |= AT_MODE;
1725 1725 }
1726 1726
1727 1727 /*
1728 1728 * symlink names must be treated as data
1729 1729 */
1730 1730 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1731 1731 &llen, NULL);
1732 1732
1733 1733 if (lnm == NULL) {
1734 1734 *cs->statusp = resp->status = NFS4ERR_INVAL;
1735 1735 if (name != nm)
1736 1736 kmem_free(name, MAXPATHLEN + 1);
1737 1737 kmem_free(nm, len);
1738 1738 nfs4_ntov_table_free(&ntov, &sarg);
1739 1739 resp->attrset = 0;
1740 1740 goto out;
1741 1741 }
1742 1742
1743 1743 if (llen > MAXPATHLEN) {
1744 1744 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1745 1745 if (name != nm)
1746 1746 kmem_free(name, MAXPATHLEN + 1);
1747 1747 kmem_free(nm, len);
1748 1748 kmem_free(lnm, llen);
1749 1749 nfs4_ntov_table_free(&ntov, &sarg);
1750 1750 resp->attrset = 0;
1751 1751 goto out;
1752 1752 }
1753 1753
1754 1754 lname = nfscmd_convname(ca, cs->exi, lnm,
1755 1755 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1756 1756
1757 1757 if (lname == NULL) {
1758 1758 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1759 1759 if (name != nm)
1760 1760 kmem_free(name, MAXPATHLEN + 1);
1761 1761 kmem_free(nm, len);
1762 1762 kmem_free(lnm, llen);
1763 1763 nfs4_ntov_table_free(&ntov, &sarg);
1764 1764 resp->attrset = 0;
1765 1765 goto out;
1766 1766 }
1767 1767
1768 1768 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1769 1769 if (lname != lnm)
1770 1770 kmem_free(lname, MAXPATHLEN + 1);
1771 1771 kmem_free(lnm, llen);
1772 1772 if (error)
1773 1773 break;
1774 1774
1775 1775 /*
1776 1776 * Get the initial "after" sequence number, if it fails,
1777 1777 * set to zero
1778 1778 */
1779 1779 iva.va_mask = AT_SEQ;
1780 1780 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1781 1781 iva.va_seq = 0;
1782 1782
1783 1783 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1784 1784 NULL, NULL, NULL);
1785 1785 if (error)
1786 1786 break;
1787 1787
1788 1788 /*
1789 1789 * va_seq is not safe over VOP calls, check it again
1790 1790 * if it has changed zero out iva to force atomic = FALSE.
1791 1791 */
1792 1792 iva2.va_mask = AT_SEQ;
1793 1793 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1794 1794 iva2.va_seq != iva.va_seq)
1795 1795 iva.va_seq = 0;
1796 1796 break;
1797 1797 default:
1798 1798 /*
1799 1799 * probably a special file.
1800 1800 */
1801 1801 if ((vap->va_mask & AT_MODE) == 0) {
1802 1802 vap->va_mode = 0600; /* default: owner rw only */
1803 1803 vap->va_mask |= AT_MODE;
1804 1804 }
1805 1805 syncval = FNODSYNC;
1806 1806 /*
1807 1807 * We know this will only generate one VOP call
1808 1808 */
1809 1809 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1810 1810
1811 1811 if (vp == NULL) {
1812 1812 if (name != nm)
1813 1813 kmem_free(name, MAXPATHLEN + 1);
1814 1814 kmem_free(nm, len);
1815 1815 nfs4_ntov_table_free(&ntov, &sarg);
1816 1816 resp->attrset = 0;
1817 1817 goto out;
1818 1818 }
1819 1819
1820 1820 /*
1821 1821 * Get the initial "after" sequence number, if it fails,
1822 1822 * set to zero
1823 1823 */
1824 1824 iva.va_mask = AT_SEQ;
1825 1825 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1826 1826 iva.va_seq = 0;
1827 1827
1828 1828 break;
1829 1829 }
1830 1830 if (name != nm)
1831 1831 kmem_free(name, MAXPATHLEN + 1);
1832 1832 kmem_free(nm, len);
1833 1833
1834 1834 if (error) {
1835 1835 *cs->statusp = resp->status = puterrno4(error);
1836 1836 }
1837 1837
1838 1838 /*
1839 1839 * Force modified data and metadata out to stable storage.
1840 1840 */
1841 1841 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1842 1842
1843 1843 if (resp->status != NFS4_OK) {
1844 1844 if (vp != NULL)
1845 1845 VN_RELE(vp);
1846 1846 nfs4_ntov_table_free(&ntov, &sarg);
1847 1847 resp->attrset = 0;
1848 1848 goto out;
1849 1849 }
1850 1850
1851 1851 /*
1852 1852 * Finish setup of cinfo response, "before" value already set.
1853 1853 * Get "after" change value, if it fails, simply return the
1854 1854 * before value.
1855 1855 */
1856 1856 ava.va_mask = AT_CTIME|AT_SEQ;
1857 1857 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1858 1858 ava.va_ctime = bva.va_ctime;
1859 1859 ava.va_seq = 0;
1860 1860 }
1861 1861 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1862 1862
1863 1863 /*
1864 1864 * True verification that object was created with correct
1865 1865 * attrs is impossible. The attrs could have been changed
1866 1866 * immediately after object creation. If attributes did
1867 1867 * not verify, the only recourse for the server is to
1868 1868 * destroy the object. Maybe if some attrs (like gid)
1869 1869 * are set incorrectly, the object should be destroyed;
1870 1870 * however, seems bad as a default policy. Do we really
1871 1871 * want to destroy an object over one of the times not
1872 1872 * verifying correctly? For these reasons, the server
1873 1873 * currently sets bits in attrset for createattrs
1874 1874 * that were set; however, no verification is done.
1875 1875 *
1876 1876 * vmask_to_nmask accounts for vattr bits set on create
1877 1877 * [do_rfs4_set_attrs() only sets resp bits for
1878 1878 * non-vattr/vfs bits.]
1879 1879 * Mask off any bits set by default so as not to return
1880 1880 * more attrset bits than were requested in createattrs
1881 1881 */
1882 1882 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1883 1883 resp->attrset &= args->createattrs.attrmask;
1884 1884 nfs4_ntov_table_free(&ntov, &sarg);
1885 1885
1886 1886 error = makefh4(&cs->fh, vp, cs->exi);
1887 1887 if (error) {
1888 1888 *cs->statusp = resp->status = puterrno4(error);
1889 1889 }
1890 1890
1891 1891 /*
1892 1892 * The cinfo.atomic = TRUE only if we got no errors, we have
1893 1893 * non-zero va_seq's, and it has incremented by exactly one
1894 1894 * during the creation and it didn't change during the VOP_LOOKUP
1895 1895 * or VOP_FSYNC.
1896 1896 */
1897 1897 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1898 1898 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1899 1899 resp->cinfo.atomic = TRUE;
1900 1900 else
1901 1901 resp->cinfo.atomic = FALSE;
1902 1902
1903 1903 /*
1904 1904 * Force modified metadata out to stable storage.
1905 1905 *
1906 1906 * if a underlying vp exists, pass it to VOP_FSYNC
1907 1907 */
1908 1908 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1909 1909 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1910 1910 else
1911 1911 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1912 1912
1913 1913 if (resp->status != NFS4_OK) {
1914 1914 VN_RELE(vp);
1915 1915 goto out;
1916 1916 }
1917 1917 if (cs->vp)
1918 1918 VN_RELE(cs->vp);
1919 1919
1920 1920 cs->vp = vp;
1921 1921 *cs->statusp = resp->status = NFS4_OK;
1922 1922 out:
1923 1923 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1924 1924 CREATE4res *, resp);
1925 1925 }
1926 1926
1927 1927 /*ARGSUSED*/
1928 1928 static void
1929 1929 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1930 1930 struct compound_state *cs)
1931 1931 {
1932 1932 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1933 1933 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1934 1934
1935 1935 rfs4_op_inval(argop, resop, req, cs);
1936 1936
1937 1937 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1938 1938 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1939 1939 }
1940 1940
1941 1941 /*ARGSUSED*/
1942 1942 static void
1943 1943 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1944 1944 struct compound_state *cs)
1945 1945 {
1946 1946 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1947 1947 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1948 1948 rfs4_deleg_state_t *dsp;
1949 1949 nfsstat4 status;
1950 1950
1951 1951 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1952 1952 DELEGRETURN4args *, args);
1953 1953
1954 1954 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1955 1955 resp->status = *cs->statusp = status;
1956 1956 if (status != NFS4_OK)
1957 1957 goto out;
1958 1958
1959 1959 /* Ensure specified filehandle matches */
1960 1960 if (cs->vp != dsp->rds_finfo->rf_vp) {
1961 1961 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1962 1962 } else
1963 1963 rfs4_return_deleg(dsp, FALSE);
1964 1964
1965 1965 rfs4_update_lease(dsp->rds_client);
1966 1966
1967 1967 rfs4_deleg_state_rele(dsp);
1968 1968 out:
1969 1969 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1970 1970 DELEGRETURN4res *, resp);
1971 1971 }
1972 1972
1973 1973 /*
1974 1974 * Check to see if a given "flavor" is an explicitly shared flavor.
1975 1975 * The assumption of this routine is the "flavor" is already a valid
1976 1976 * flavor in the secinfo list of "exi".
1977 1977 *
1978 1978 * e.g.
1979 1979 * # share -o sec=flavor1 /export
1980 1980 * # share -o sec=flavor2 /export/home
1981 1981 *
1982 1982 * flavor2 is not an explicitly shared flavor for /export,
1983 1983 * however it is in the secinfo list for /export thru the
1984 1984 * server namespace setup.
1985 1985 */
1986 1986 int
1987 1987 is_exported_sec(int flavor, struct exportinfo *exi)
1988 1988 {
1989 1989 int i;
1990 1990 struct secinfo *sp;
1991 1991
1992 1992 sp = exi->exi_export.ex_secinfo;
1993 1993 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1994 1994 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1995 1995 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1996 1996 return (SEC_REF_EXPORTED(&sp[i]));
1997 1997 }
1998 1998 }
1999 1999
2000 2000 /* Should not reach this point based on the assumption */
2001 2001 return (0);
2002 2002 }
2003 2003
2004 2004 /*
2005 2005 * Check if the security flavor used in the request matches what is
2006 2006 * required at the export point or at the root pseudo node (exi_root).
2007 2007 *
2008 2008 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2009 2009 *
2010 2010 */
2011 2011 static int
2012 2012 secinfo_match_or_authnone(struct compound_state *cs)
2013 2013 {
2014 2014 int i;
2015 2015 struct secinfo *sp;
2016 2016
2017 2017 /*
2018 2018 * Check cs->nfsflavor (from the request) against
2019 2019 * the current export data in cs->exi.
2020 2020 */
2021 2021 sp = cs->exi->exi_export.ex_secinfo;
2022 2022 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2023 2023 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2024 2024 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2025 2025 return (1);
2026 2026 }
2027 2027
2028 2028 return (0);
2029 2029 }
2030 2030
2031 2031 /*
2032 2032 * Check the access authority for the client and return the correct error.
2033 2033 */
2034 2034 nfsstat4
2035 2035 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2036 2036 {
2037 2037 int authres;
2038 2038
2039 2039 /*
2040 2040 * First, check if the security flavor used in the request
2041 2041 * are among the flavors set in the server namespace.
2042 2042 */
2043 2043 if (!secinfo_match_or_authnone(cs)) {
2044 2044 *cs->statusp = NFS4ERR_WRONGSEC;
2045 2045 return (*cs->statusp);
2046 2046 }
2047 2047
2048 2048 authres = checkauth4(cs, req);
2049 2049
2050 2050 if (authres > 0) {
2051 2051 *cs->statusp = NFS4_OK;
2052 2052 if (! (cs->access & CS_ACCESS_LIMITED))
2053 2053 cs->access = CS_ACCESS_OK;
2054 2054 } else if (authres == 0) {
2055 2055 *cs->statusp = NFS4ERR_ACCESS;
2056 2056 } else if (authres == -2) {
2057 2057 *cs->statusp = NFS4ERR_WRONGSEC;
2058 2058 } else {
2059 2059 *cs->statusp = NFS4ERR_DELAY;
2060 2060 }
2061 2061 return (*cs->statusp);
2062 2062 }
2063 2063
2064 2064 /*
2065 2065 * bitmap4_to_attrmask is called by getattr and readdir.
2066 2066 * It sets up the vattr mask and determines whether vfsstat call is needed
2067 2067 * based on the input bitmap.
2068 2068 * Returns nfsv4 status.
2069 2069 */
2070 2070 static nfsstat4
2071 2071 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2072 2072 {
2073 2073 int i;
2074 2074 uint_t va_mask;
2075 2075 struct statvfs64 *sbp = sargp->sbp;
2076 2076
2077 2077 sargp->sbp = NULL;
2078 2078 sargp->flag = 0;
2079 2079 sargp->rdattr_error = NFS4_OK;
2080 2080 sargp->mntdfid_set = FALSE;
2081 2081 if (sargp->cs->vp)
2082 2082 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2083 2083 FH4_ATTRDIR | FH4_NAMEDATTR);
2084 2084 else
2085 2085 sargp->xattr = 0;
2086 2086
2087 2087 /*
2088 2088 * Set rdattr_error_req to true if return error per
2089 2089 * failed entry rather than fail the readdir.
2090 2090 */
2091 2091 if (breq & FATTR4_RDATTR_ERROR_MASK)
2092 2092 sargp->rdattr_error_req = 1;
2093 2093 else
2094 2094 sargp->rdattr_error_req = 0;
2095 2095
2096 2096 /*
2097 2097 * generate the va_mask
2098 2098 * Handle the easy cases first
2099 2099 */
2100 2100 switch (breq) {
2101 2101 case NFS4_NTOV_ATTR_MASK:
2102 2102 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2103 2103 return (NFS4_OK);
2104 2104
2105 2105 case NFS4_FS_ATTR_MASK:
2106 2106 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2107 2107 sargp->sbp = sbp;
2108 2108 return (NFS4_OK);
2109 2109
2110 2110 case NFS4_NTOV_ATTR_CACHE_MASK:
2111 2111 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2112 2112 return (NFS4_OK);
2113 2113
2114 2114 case FATTR4_LEASE_TIME_MASK:
2115 2115 sargp->vap->va_mask = 0;
2116 2116 return (NFS4_OK);
2117 2117
2118 2118 default:
2119 2119 va_mask = 0;
2120 2120 for (i = 0; i < nfs4_ntov_map_size; i++) {
2121 2121 if ((breq & nfs4_ntov_map[i].fbit) &&
2122 2122 nfs4_ntov_map[i].vbit)
2123 2123 va_mask |= nfs4_ntov_map[i].vbit;
2124 2124 }
2125 2125
2126 2126 /*
2127 2127 * Check is vfsstat is needed
2128 2128 */
2129 2129 if (breq & NFS4_FS_ATTR_MASK)
2130 2130 sargp->sbp = sbp;
2131 2131
2132 2132 sargp->vap->va_mask = va_mask;
2133 2133 return (NFS4_OK);
2134 2134 }
2135 2135 /* NOTREACHED */
2136 2136 }
2137 2137
2138 2138 /*
2139 2139 * bitmap4_get_sysattrs is called by getattr and readdir.
2140 2140 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2141 2141 * Returns nfsv4 status.
2142 2142 */
2143 2143 static nfsstat4
2144 2144 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2145 2145 {
2146 2146 int error;
2147 2147 struct compound_state *cs = sargp->cs;
2148 2148 vnode_t *vp = cs->vp;
2149 2149
2150 2150 if (sargp->sbp != NULL) {
2151 2151 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2152 2152 sargp->sbp = NULL; /* to identify error */
2153 2153 return (puterrno4(error));
2154 2154 }
2155 2155 }
2156 2156
2157 2157 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2158 2158 }
2159 2159
2160 2160 static void
2161 2161 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2162 2162 {
2163 2163 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2164 2164 KM_SLEEP);
2165 2165 ntovp->attrcnt = 0;
2166 2166 ntovp->vfsstat = FALSE;
2167 2167 }
2168 2168
2169 2169 static void
2170 2170 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2171 2171 struct nfs4_svgetit_arg *sargp)
2172 2172 {
2173 2173 int i;
2174 2174 union nfs4_attr_u *na;
2175 2175 uint8_t *amap;
2176 2176
2177 2177 /*
2178 2178 * XXX Should do the same checks for whether the bit is set
2179 2179 */
2180 2180 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2181 2181 i < ntovp->attrcnt; i++, na++, amap++) {
2182 2182 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2183 2183 NFS4ATTR_FREEIT, sargp, na);
2184 2184 }
2185 2185 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2186 2186 /*
2187 2187 * xdr_free for getattr will be done later
2188 2188 */
2189 2189 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2190 2190 i < ntovp->attrcnt; i++, na++, amap++) {
2191 2191 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2192 2192 }
2193 2193 }
2194 2194 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2195 2195 }
2196 2196
2197 2197 /*
2198 2198 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2199 2199 */
2200 2200 static nfsstat4
2201 2201 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2202 2202 struct nfs4_svgetit_arg *sargp)
2203 2203 {
2204 2204 int error = 0;
2205 2205 int i, k;
2206 2206 struct nfs4_ntov_table ntov;
2207 2207 XDR xdr;
2208 2208 ulong_t xdr_size;
2209 2209 char *xdr_attrs;
2210 2210 nfsstat4 status = NFS4_OK;
2211 2211 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2212 2212 union nfs4_attr_u *na;
2213 2213 uint8_t *amap;
2214 2214
2215 2215 sargp->op = NFS4ATTR_GETIT;
2216 2216 sargp->flag = 0;
2217 2217
2218 2218 fattrp->attrmask = 0;
2219 2219 /* if no bits requested, then return empty fattr4 */
2220 2220 if (breq == 0) {
2221 2221 fattrp->attrlist4_len = 0;
2222 2222 fattrp->attrlist4 = NULL;
2223 2223 return (NFS4_OK);
2224 2224 }
2225 2225
2226 2226 /*
2227 2227 * return NFS4ERR_INVAL when client requests write-only attrs
2228 2228 */
2229 2229 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2230 2230 return (NFS4ERR_INVAL);
2231 2231
2232 2232 nfs4_ntov_table_init(&ntov);
2233 2233 na = ntov.na;
2234 2234 amap = ntov.amap;
2235 2235
2236 2236 /*
2237 2237 * Now loop to get or verify the attrs
2238 2238 */
2239 2239 for (i = 0; i < nfs4_ntov_map_size; i++) {
2240 2240 if (breq & nfs4_ntov_map[i].fbit) {
2241 2241 if ((*nfs4_ntov_map[i].sv_getit)(
2242 2242 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2243 2243
2244 2244 error = (*nfs4_ntov_map[i].sv_getit)(
2245 2245 NFS4ATTR_GETIT, sargp, na);
2246 2246
2247 2247 /*
2248 2248 * Possible error values:
2249 2249 * >0 if sv_getit failed to
2250 2250 * get the attr; 0 if succeeded;
2251 2251 * <0 if rdattr_error and the
2252 2252 * attribute cannot be returned.
2253 2253 */
2254 2254 if (error && !(sargp->rdattr_error_req))
2255 2255 goto done;
2256 2256 /*
2257 2257 * If error then just for entry
2258 2258 */
2259 2259 if (error == 0) {
2260 2260 fattrp->attrmask |=
2261 2261 nfs4_ntov_map[i].fbit;
2262 2262 *amap++ =
2263 2263 (uint8_t)nfs4_ntov_map[i].nval;
2264 2264 na++;
2265 2265 (ntov.attrcnt)++;
2266 2266 } else if ((error > 0) &&
2267 2267 (sargp->rdattr_error == NFS4_OK)) {
2268 2268 sargp->rdattr_error = puterrno4(error);
2269 2269 }
2270 2270 error = 0;
2271 2271 }
2272 2272 }
2273 2273 }
2274 2274
2275 2275 /*
2276 2276 * If rdattr_error was set after the return value for it was assigned,
2277 2277 * update it.
2278 2278 */
2279 2279 if (prev_rdattr_error != sargp->rdattr_error) {
2280 2280 na = ntov.na;
2281 2281 amap = ntov.amap;
2282 2282 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2283 2283 k = *amap;
2284 2284 if (k < FATTR4_RDATTR_ERROR) {
2285 2285 continue;
2286 2286 }
2287 2287 if ((k == FATTR4_RDATTR_ERROR) &&
2288 2288 ((*nfs4_ntov_map[k].sv_getit)(
2289 2289 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2290 2290
2291 2291 (void) (*nfs4_ntov_map[k].sv_getit)(
2292 2292 NFS4ATTR_GETIT, sargp, na);
2293 2293 }
2294 2294 break;
2295 2295 }
2296 2296 }
2297 2297
2298 2298 xdr_size = 0;
2299 2299 na = ntov.na;
2300 2300 amap = ntov.amap;
2301 2301 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2302 2302 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2303 2303 }
2304 2304
2305 2305 fattrp->attrlist4_len = xdr_size;
2306 2306 if (xdr_size) {
2307 2307 /* freed by rfs4_op_getattr_free() */
2308 2308 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2309 2309
2310 2310 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2311 2311
2312 2312 na = ntov.na;
2313 2313 amap = ntov.amap;
2314 2314 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2315 2315 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2316 2316 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2317 2317 int, *amap);
2318 2318 status = NFS4ERR_SERVERFAULT;
2319 2319 break;
2320 2320 }
2321 2321 }
2322 2322 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2323 2323 } else {
2324 2324 fattrp->attrlist4 = NULL;
2325 2325 }
2326 2326 done:
2327 2327
2328 2328 nfs4_ntov_table_free(&ntov, sargp);
2329 2329
2330 2330 if (error != 0)
2331 2331 status = puterrno4(error);
2332 2332
2333 2333 return (status);
2334 2334 }
2335 2335
2336 2336 /* ARGSUSED */
2337 2337 static void
2338 2338 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2339 2339 struct compound_state *cs)
2340 2340 {
2341 2341 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2342 2342 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2343 2343 struct nfs4_svgetit_arg sarg;
2344 2344 struct statvfs64 sb;
2345 2345 nfsstat4 status;
2346 2346
2347 2347 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2348 2348 GETATTR4args *, args);
2349 2349
2350 2350 if (cs->vp == NULL) {
2351 2351 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2352 2352 goto out;
2353 2353 }
2354 2354
2355 2355 if (cs->access == CS_ACCESS_DENIED) {
2356 2356 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2357 2357 goto out;
2358 2358 }
2359 2359
2360 2360 sarg.sbp = &sb;
2361 2361 sarg.cs = cs;
2362 2362 sarg.is_referral = B_FALSE;
2363 2363
2364 2364 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2365 2365 if (status == NFS4_OK) {
2366 2366
2367 2367 status = bitmap4_get_sysattrs(&sarg);
2368 2368 if (status == NFS4_OK) {
2369 2369
2370 2370 /* Is this a referral? */
2371 2371 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2372 2372 /* Older V4 Solaris client sees a link */
2373 2373 if (client_is_downrev(req))
2374 2374 sarg.vap->va_type = VLNK;
2375 2375 else
2376 2376 sarg.is_referral = B_TRUE;
2377 2377 }
2378 2378
2379 2379 status = do_rfs4_op_getattr(args->attr_request,
2380 2380 &resp->obj_attributes, &sarg);
2381 2381 }
2382 2382 }
2383 2383 *cs->statusp = resp->status = status;
2384 2384 out:
2385 2385 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2386 2386 GETATTR4res *, resp);
2387 2387 }
2388 2388
2389 2389 static void
2390 2390 rfs4_op_getattr_free(nfs_resop4 *resop)
2391 2391 {
2392 2392 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2393 2393
2394 2394 nfs4_fattr4_free(&resp->obj_attributes);
2395 2395 }
2396 2396
2397 2397 /* ARGSUSED */
2398 2398 static void
2399 2399 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2400 2400 struct compound_state *cs)
2401 2401 {
2402 2402 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2403 2403
2404 2404 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2405 2405
2406 2406 if (cs->vp == NULL) {
2407 2407 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2408 2408 goto out;
2409 2409 }
2410 2410 if (cs->access == CS_ACCESS_DENIED) {
2411 2411 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2412 2412 goto out;
2413 2413 }
2414 2414
2415 2415 /* check for reparse point at the share point */
2416 2416 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2417 2417 /* it's all bad */
2418 2418 cs->exi->exi_moved = 1;
2419 2419 *cs->statusp = resp->status = NFS4ERR_MOVED;
2420 2420 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2421 2421 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2422 2422 return;
2423 2423 }
2424 2424
2425 2425 /* check for reparse point at vp */
2426 2426 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2427 2427 /* it's not all bad */
2428 2428 *cs->statusp = resp->status = NFS4ERR_MOVED;
2429 2429 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2430 2430 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2431 2431 return;
2432 2432 }
2433 2433
2434 2434 resp->object.nfs_fh4_val =
2435 2435 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2436 2436 nfs_fh4_copy(&cs->fh, &resp->object);
2437 2437 *cs->statusp = resp->status = NFS4_OK;
2438 2438 out:
2439 2439 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2440 2440 GETFH4res *, resp);
2441 2441 }
2442 2442
2443 2443 static void
2444 2444 rfs4_op_getfh_free(nfs_resop4 *resop)
2445 2445 {
2446 2446 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2447 2447
2448 2448 if (resp->status == NFS4_OK &&
2449 2449 resp->object.nfs_fh4_val != NULL) {
2450 2450 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2451 2451 resp->object.nfs_fh4_val = NULL;
2452 2452 resp->object.nfs_fh4_len = 0;
2453 2453 }
2454 2454 }
2455 2455
2456 2456 /*
2457 2457 * illegal: args: void
2458 2458 * res : status (NFS4ERR_OP_ILLEGAL)
2459 2459 */
2460 2460 /* ARGSUSED */
2461 2461 static void
2462 2462 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2463 2463 struct svc_req *req, struct compound_state *cs)
2464 2464 {
2465 2465 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2466 2466
2467 2467 resop->resop = OP_ILLEGAL;
2468 2468 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2469 2469 }
2470 2470
2471 2471 /*
2472 2472 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2473 2473 * res: status. If success - CURRENT_FH unchanged, return change_info
2474 2474 */
2475 2475 /* ARGSUSED */
2476 2476 static void
2477 2477 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2478 2478 struct compound_state *cs)
2479 2479 {
2480 2480 LINK4args *args = &argop->nfs_argop4_u.oplink;
2481 2481 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2482 2482 int error;
2483 2483 vnode_t *vp;
2484 2484 vnode_t *dvp;
2485 2485 struct vattr bdva, idva, adva;
2486 2486 char *nm;
2487 2487 uint_t len;
2488 2488 struct sockaddr *ca;
2489 2489 char *name = NULL;
2490 2490 nfsstat4 status;
2491 2491
2492 2492 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2493 2493 LINK4args *, args);
2494 2494
2495 2495 /* SAVED_FH: source object */
2496 2496 vp = cs->saved_vp;
2497 2497 if (vp == NULL) {
2498 2498 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2499 2499 goto out;
2500 2500 }
2501 2501
2502 2502 /* CURRENT_FH: target directory */
2503 2503 dvp = cs->vp;
2504 2504 if (dvp == NULL) {
2505 2505 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2506 2506 goto out;
2507 2507 }
2508 2508
2509 2509 /*
2510 2510 * If there is a non-shared filesystem mounted on this vnode,
2511 2511 * do not allow to link any file in this directory.
2512 2512 */
2513 2513 if (vn_ismntpt(dvp)) {
2514 2514 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2515 2515 goto out;
2516 2516 }
2517 2517
2518 2518 if (cs->access == CS_ACCESS_DENIED) {
2519 2519 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2520 2520 goto out;
2521 2521 }
2522 2522
2523 2523 /* Check source object's type validity */
2524 2524 if (vp->v_type == VDIR) {
2525 2525 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2526 2526 goto out;
2527 2527 }
2528 2528
2529 2529 /* Check target directory's type */
2530 2530 if (dvp->v_type != VDIR) {
2531 2531 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2532 2532 goto out;
2533 2533 }
2534 2534
2535 2535 if (cs->saved_exi != cs->exi) {
2536 2536 *cs->statusp = resp->status = NFS4ERR_XDEV;
2537 2537 goto out;
2538 2538 }
2539 2539
2540 2540 status = utf8_dir_verify(&args->newname);
2541 2541 if (status != NFS4_OK) {
2542 2542 *cs->statusp = resp->status = status;
2543 2543 goto out;
2544 2544 }
2545 2545
2546 2546 nm = utf8_to_fn(&args->newname, &len, NULL);
2547 2547 if (nm == NULL) {
2548 2548 *cs->statusp = resp->status = NFS4ERR_INVAL;
2549 2549 goto out;
2550 2550 }
2551 2551
2552 2552 if (len > MAXNAMELEN) {
2553 2553 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2554 2554 kmem_free(nm, len);
2555 2555 goto out;
2556 2556 }
2557 2557
2558 2558 if (rdonly4(req, cs)) {
2559 2559 *cs->statusp = resp->status = NFS4ERR_ROFS;
2560 2560 kmem_free(nm, len);
2561 2561 goto out;
2562 2562 }
2563 2563
2564 2564 /* Get "before" change value */
2565 2565 bdva.va_mask = AT_CTIME|AT_SEQ;
2566 2566 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2567 2567 if (error) {
2568 2568 *cs->statusp = resp->status = puterrno4(error);
2569 2569 kmem_free(nm, len);
2570 2570 goto out;
2571 2571 }
2572 2572
2573 2573 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2574 2574 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2575 2575 MAXPATHLEN + 1);
2576 2576
2577 2577 if (name == NULL) {
2578 2578 *cs->statusp = resp->status = NFS4ERR_INVAL;
2579 2579 kmem_free(nm, len);
2580 2580 goto out;
2581 2581 }
2582 2582
2583 2583 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2584 2584
2585 2585 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2586 2586
2587 2587 if (nm != name)
2588 2588 kmem_free(name, MAXPATHLEN + 1);
2589 2589 kmem_free(nm, len);
2590 2590
2591 2591 /*
2592 2592 * Get the initial "after" sequence number, if it fails, set to zero
2593 2593 */
2594 2594 idva.va_mask = AT_SEQ;
2595 2595 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2596 2596 idva.va_seq = 0;
2597 2597
2598 2598 /*
2599 2599 * Force modified data and metadata out to stable storage.
2600 2600 */
2601 2601 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2602 2602 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2603 2603
2604 2604 if (error) {
2605 2605 *cs->statusp = resp->status = puterrno4(error);
2606 2606 goto out;
2607 2607 }
2608 2608
2609 2609 /*
2610 2610 * Get "after" change value, if it fails, simply return the
2611 2611 * before value.
2612 2612 */
2613 2613 adva.va_mask = AT_CTIME|AT_SEQ;
2614 2614 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2615 2615 adva.va_ctime = bdva.va_ctime;
2616 2616 adva.va_seq = 0;
2617 2617 }
2618 2618
2619 2619 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2620 2620
2621 2621 /*
2622 2622 * The cinfo.atomic = TRUE only if we have
2623 2623 * non-zero va_seq's, and it has incremented by exactly one
2624 2624 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2625 2625 */
2626 2626 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2627 2627 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2628 2628 resp->cinfo.atomic = TRUE;
2629 2629 else
2630 2630 resp->cinfo.atomic = FALSE;
2631 2631
2632 2632 *cs->statusp = resp->status = NFS4_OK;
2633 2633 out:
2634 2634 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2635 2635 LINK4res *, resp);
2636 2636 }
2637 2637
2638 2638 /*
2639 2639 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2640 2640 */
2641 2641
2642 2642 /* ARGSUSED */
2643 2643 static nfsstat4
2644 2644 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2645 2645 {
2646 2646 int error;
2647 2647 int different_export = 0;
2648 2648 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2649 2649 struct exportinfo *exi = NULL, *pre_exi = NULL;
2650 2650 nfsstat4 stat;
2651 2651 fid_t fid;
2652 2652 int attrdir, dotdot, walk;
2653 2653 bool_t is_newvp = FALSE;
2654 2654
2655 2655 if (cs->vp->v_flag & V_XATTRDIR) {
2656 2656 attrdir = 1;
2657 2657 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2658 2658 } else {
2659 2659 attrdir = 0;
2660 2660 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2661 2661 }
2662 2662
2663 2663 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2664 2664
2665 2665 /*
2666 2666 * If dotdotting, then need to check whether it's
2667 2667 * above the root of a filesystem, or above an
2668 2668 * export point.
2669 2669 */
2670 2670 if (dotdot) {
2671 2671
2672 2672 /*
2673 2673 * If dotdotting at the root of a filesystem, then
2674 2674 * need to traverse back to the mounted-on filesystem
2675 2675 * and do the dotdot lookup there.
2676 2676 */
2677 2677 if (cs->vp->v_flag & VROOT) {
2678 2678
2679 2679 /*
2680 2680 * If at the system root, then can
2681 2681 * go up no further.
2682 2682 */
2683 2683 if (VN_CMP(cs->vp, rootdir))
2684 2684 return (puterrno4(ENOENT));
2685 2685
2686 2686 /*
2687 2687 * Traverse back to the mounted-on filesystem
2688 2688 */
2689 2689 cs->vp = untraverse(cs->vp);
2690 2690
2691 2691 /*
2692 2692 * Set the different_export flag so we remember
2693 2693 * to pick up a new exportinfo entry for
2694 2694 * this new filesystem.
2695 2695 */
2696 2696 different_export = 1;
2697 2697 } else {
2698 2698
2699 2699 /*
2700 2700 * If dotdotting above an export point then set
2701 2701 * the different_export to get new export info.
2702 2702 */
2703 2703 different_export = nfs_exported(cs->exi, cs->vp);
2704 2704 }
2705 2705 }
2706 2706
2707 2707 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2708 2708 NULL, NULL, NULL);
2709 2709 if (error)
2710 2710 return (puterrno4(error));
2711 2711
2712 2712 /*
2713 2713 * If the vnode is in a pseudo filesystem, check whether it is visible.
2714 2714 *
2715 2715 * XXX if the vnode is a symlink and it is not visible in
2716 2716 * a pseudo filesystem, return ENOENT (not following symlink).
2717 2717 * V4 client can not mount such symlink. This is a regression
2718 2718 * from V2/V3.
2719 2719 *
2720 2720 * In the same exported filesystem, if the security flavor used
2721 2721 * is not an explicitly shared flavor, limit the view to the visible
2722 2722 * list entries only. This is not a WRONGSEC case because it's already
2723 2723 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2724 2724 */
2725 2725 if (!different_export &&
2726 2726 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2727 2727 cs->access & CS_ACCESS_LIMITED)) {
2728 2728 if (! nfs_visible(cs->exi, vp, &different_export)) {
2729 2729 VN_RELE(vp);
2730 2730 return (puterrno4(ENOENT));
2731 2731 }
2732 2732 }
2733 2733
2734 2734 /*
2735 2735 * If it's a mountpoint, then traverse it.
2736 2736 */
2737 2737 if (vn_ismntpt(vp)) {
2738 2738 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2739 2739 pre_tvp = vp; /* save pre-traversed vnode */
2740 2740
2741 2741 /*
2742 2742 * hold pre_tvp to counteract rele by traverse. We will
2743 2743 * need pre_tvp below if checkexport4 fails
2744 2744 */
2745 2745 VN_HOLD(pre_tvp);
2746 2746 if ((error = traverse(&vp)) != 0) {
2747 2747 VN_RELE(vp);
2748 2748 VN_RELE(pre_tvp);
2749 2749 return (puterrno4(error));
2750 2750 }
2751 2751 different_export = 1;
2752 2752 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2753 2753 /*
2754 2754 * The vfsp comparison is to handle the case where
2755 2755 * a LOFS mount is shared. lo_lookup traverses mount points,
2756 2756 * and NFS is unaware of local fs transistions because
2757 2757 * v_vfsmountedhere isn't set. For this special LOFS case,
2758 2758 * the dir and the obj returned by lookup will have different
2759 2759 * vfs ptrs.
2760 2760 */
2761 2761 different_export = 1;
2762 2762 }
2763 2763
2764 2764 if (different_export) {
2765 2765
2766 2766 bzero(&fid, sizeof (fid));
2767 2767 fid.fid_len = MAXFIDSZ;
2768 2768 error = vop_fid_pseudo(vp, &fid);
2769 2769 if (error) {
2770 2770 VN_RELE(vp);
2771 2771 if (pre_tvp)
2772 2772 VN_RELE(pre_tvp);
2773 2773 return (puterrno4(error));
2774 2774 }
2775 2775
2776 2776 if (dotdot)
2777 2777 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2778 2778 else
2779 2779 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2780 2780
2781 2781 if (exi == NULL) {
2782 2782 if (pre_tvp) {
2783 2783 /*
2784 2784 * If this vnode is a mounted-on vnode,
2785 2785 * but the mounted-on file system is not
2786 2786 * exported, send back the filehandle for
2787 2787 * the mounted-on vnode, not the root of
2788 2788 * the mounted-on file system.
2789 2789 */
2790 2790 VN_RELE(vp);
2791 2791 vp = pre_tvp;
2792 2792 exi = pre_exi;
2793 2793 } else {
2794 2794 VN_RELE(vp);
2795 2795 return (puterrno4(EACCES));
2796 2796 }
2797 2797 } else if (pre_tvp) {
2798 2798 /* we're done with pre_tvp now. release extra hold */
2799 2799 VN_RELE(pre_tvp);
2800 2800 }
2801 2801
2802 2802 cs->exi = exi;
2803 2803
2804 2804 /*
2805 2805 * Now we do a checkauth4. The reason is that
2806 2806 * this client/user may not have access to the new
2807 2807 * exported file system, and if he does,
2808 2808 * the client/user may be mapped to a different uid.
2809 2809 *
2810 2810 * We start with a new cr, because the checkauth4 done
2811 2811 * in the PUT*FH operation over wrote the cred's uid,
2812 2812 * gid, etc, and we want the real thing before calling
2813 2813 * checkauth4()
2814 2814 */
2815 2815 crfree(cs->cr);
2816 2816 cs->cr = crdup(cs->basecr);
2817 2817
2818 2818 oldvp = cs->vp;
2819 2819 cs->vp = vp;
2820 2820 is_newvp = TRUE;
2821 2821
2822 2822 stat = call_checkauth4(cs, req);
2823 2823 if (stat != NFS4_OK) {
2824 2824 VN_RELE(cs->vp);
2825 2825 cs->vp = oldvp;
2826 2826 return (stat);
2827 2827 }
2828 2828 }
2829 2829
2830 2830 /*
2831 2831 * After various NFS checks, do a label check on the path
2832 2832 * component. The label on this path should either be the
2833 2833 * global zone's label or a zone's label. We are only
2834 2834 * interested in the zone's label because exported files
2835 2835 * in global zone is accessible (though read-only) to
2836 2836 * clients. The exportability/visibility check is already
2837 2837 * done before reaching this code.
2838 2838 */
2839 2839 if (is_system_labeled()) {
2840 2840 bslabel_t *clabel;
2841 2841
2842 2842 ASSERT(req->rq_label != NULL);
2843 2843 clabel = req->rq_label;
2844 2844 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2845 2845 "got client label from request(1)", struct svc_req *, req);
2846 2846
2847 2847 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2848 2848 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2849 2849 cs->exi)) {
2850 2850 error = EACCES;
2851 2851 goto err_out;
2852 2852 }
2853 2853 } else {
2854 2854 /*
2855 2855 * We grant access to admin_low label clients
2856 2856 * only if the client is trusted, i.e. also
2857 2857 * running Solaris Trusted Extension.
2858 2858 */
2859 2859 struct sockaddr *ca;
2860 2860 int addr_type;
2861 2861 void *ipaddr;
2862 2862 tsol_tpc_t *tp;
2863 2863
2864 2864 ca = (struct sockaddr *)svc_getrpccaller(
2865 2865 req->rq_xprt)->buf;
2866 2866 if (ca->sa_family == AF_INET) {
2867 2867 addr_type = IPV4_VERSION;
2868 2868 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2869 2869 } else if (ca->sa_family == AF_INET6) {
2870 2870 addr_type = IPV6_VERSION;
2871 2871 ipaddr = &((struct sockaddr_in6 *)
2872 2872 ca)->sin6_addr;
2873 2873 }
2874 2874 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2875 2875 if (tp == NULL || tp->tpc_tp.tp_doi !=
2876 2876 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2877 2877 SUN_CIPSO) {
2878 2878 if (tp != NULL)
2879 2879 TPC_RELE(tp);
2880 2880 error = EACCES;
2881 2881 goto err_out;
2882 2882 }
2883 2883 TPC_RELE(tp);
2884 2884 }
2885 2885 }
2886 2886
2887 2887 error = makefh4(&cs->fh, vp, cs->exi);
2888 2888
2889 2889 err_out:
2890 2890 if (error) {
2891 2891 if (is_newvp) {
2892 2892 VN_RELE(cs->vp);
2893 2893 cs->vp = oldvp;
2894 2894 } else
2895 2895 VN_RELE(vp);
2896 2896 return (puterrno4(error));
2897 2897 }
2898 2898
2899 2899 if (!is_newvp) {
2900 2900 if (cs->vp)
2901 2901 VN_RELE(cs->vp);
2902 2902 cs->vp = vp;
2903 2903 } else if (oldvp)
2904 2904 VN_RELE(oldvp);
2905 2905
2906 2906 /*
2907 2907 * if did lookup on attrdir and didn't lookup .., set named
2908 2908 * attr fh flag
2909 2909 */
2910 2910 if (attrdir && ! dotdot)
2911 2911 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2912 2912
2913 2913 /* Assume false for now, open proc will set this */
2914 2914 cs->mandlock = FALSE;
2915 2915
2916 2916 return (NFS4_OK);
2917 2917 }
2918 2918
2919 2919 /* ARGSUSED */
2920 2920 static void
2921 2921 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2922 2922 struct compound_state *cs)
2923 2923 {
2924 2924 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2925 2925 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2926 2926 char *nm;
2927 2927 uint_t len;
2928 2928 struct sockaddr *ca;
2929 2929 char *name = NULL;
2930 2930 nfsstat4 status;
2931 2931
2932 2932 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2933 2933 LOOKUP4args *, args);
2934 2934
2935 2935 if (cs->vp == NULL) {
2936 2936 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2937 2937 goto out;
2938 2938 }
2939 2939
2940 2940 if (cs->vp->v_type == VLNK) {
2941 2941 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2942 2942 goto out;
2943 2943 }
2944 2944
2945 2945 if (cs->vp->v_type != VDIR) {
2946 2946 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2947 2947 goto out;
2948 2948 }
2949 2949
2950 2950 status = utf8_dir_verify(&args->objname);
2951 2951 if (status != NFS4_OK) {
2952 2952 *cs->statusp = resp->status = status;
2953 2953 goto out;
2954 2954 }
2955 2955
2956 2956 nm = utf8_to_str(&args->objname, &len, NULL);
2957 2957 if (nm == NULL) {
2958 2958 *cs->statusp = resp->status = NFS4ERR_INVAL;
2959 2959 goto out;
2960 2960 }
2961 2961
2962 2962 if (len > MAXNAMELEN) {
2963 2963 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2964 2964 kmem_free(nm, len);
2965 2965 goto out;
2966 2966 }
2967 2967
2968 2968 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2969 2969 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2970 2970 MAXPATHLEN + 1);
2971 2971
2972 2972 if (name == NULL) {
2973 2973 *cs->statusp = resp->status = NFS4ERR_INVAL;
2974 2974 kmem_free(nm, len);
2975 2975 goto out;
2976 2976 }
2977 2977
2978 2978 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2979 2979
2980 2980 if (name != nm)
2981 2981 kmem_free(name, MAXPATHLEN + 1);
2982 2982 kmem_free(nm, len);
2983 2983
2984 2984 out:
2985 2985 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2986 2986 LOOKUP4res *, resp);
2987 2987 }
2988 2988
2989 2989 /* ARGSUSED */
2990 2990 static void
2991 2991 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2992 2992 struct compound_state *cs)
2993 2993 {
2994 2994 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2995 2995
2996 2996 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2997 2997
2998 2998 if (cs->vp == NULL) {
2999 2999 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3000 3000 goto out;
3001 3001 }
3002 3002
3003 3003 if (cs->vp->v_type != VDIR) {
3004 3004 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3005 3005 goto out;
3006 3006 }
3007 3007
3008 3008 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3009 3009
3010 3010 /*
3011 3011 * From NFSV4 Specification, LOOKUPP should not check for
3012 3012 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3013 3013 */
3014 3014 if (resp->status == NFS4ERR_WRONGSEC) {
3015 3015 *cs->statusp = resp->status = NFS4_OK;
3016 3016 }
3017 3017
3018 3018 out:
3019 3019 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3020 3020 LOOKUPP4res *, resp);
3021 3021 }
3022 3022
3023 3023
3024 3024 /*ARGSUSED2*/
3025 3025 static void
3026 3026 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3027 3027 struct compound_state *cs)
3028 3028 {
3029 3029 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3030 3030 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3031 3031 vnode_t *avp = NULL;
3032 3032 int lookup_flags = LOOKUP_XATTR, error;
3033 3033 int exp_ro = 0;
3034 3034
3035 3035 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3036 3036 OPENATTR4args *, args);
3037 3037
3038 3038 if (cs->vp == NULL) {
3039 3039 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3040 3040 goto out;
3041 3041 }
3042 3042
3043 3043 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3044 3044 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3045 3045 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3046 3046 goto out;
3047 3047 }
3048 3048
3049 3049 /*
3050 3050 * If file system supports passing ACE mask to VOP_ACCESS then
3051 3051 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3052 3052 */
3053 3053
3054 3054 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3055 3055 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3056 3056 V_ACE_MASK, cs->cr, NULL);
3057 3057 else
3058 3058 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3059 3059 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3060 3060 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3061 3061
3062 3062 if (error) {
3063 3063 *cs->statusp = resp->status = puterrno4(EACCES);
3064 3064 goto out;
3065 3065 }
3066 3066
3067 3067 /*
3068 3068 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3069 3069 * the file system is exported read-only -- regardless of
3070 3070 * createdir flag. Otherwise the attrdir would be created
3071 3071 * (assuming server fs isn't mounted readonly locally). If
3072 3072 * VOP_LOOKUP returns ENOENT in this case, the error will
3073 3073 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3074 3074 * because specfs has no VOP_LOOKUP op, so the macro would
3075 3075 * return ENOSYS. EINVAL is returned by all (current)
3076 3076 * Solaris file system implementations when any of their
3077 3077 * restrictions are violated (xattr(dir) can't have xattrdir).
3078 3078 * Returning NOTSUPP is more appropriate in this case
3079 3079 * because the object will never be able to have an attrdir.
3080 3080 */
3081 3081 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3082 3082 lookup_flags |= CREATE_XATTR_DIR;
3083 3083
3084 3084 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3085 3085 NULL, NULL, NULL);
3086 3086
3087 3087 if (error) {
3088 3088 if (error == ENOENT && args->createdir && exp_ro)
3089 3089 *cs->statusp = resp->status = puterrno4(EROFS);
3090 3090 else if (error == EINVAL || error == ENOSYS)
3091 3091 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3092 3092 else
3093 3093 *cs->statusp = resp->status = puterrno4(error);
3094 3094 goto out;
3095 3095 }
3096 3096
3097 3097 ASSERT(avp->v_flag & V_XATTRDIR);
3098 3098
3099 3099 error = makefh4(&cs->fh, avp, cs->exi);
3100 3100
3101 3101 if (error) {
3102 3102 VN_RELE(avp);
3103 3103 *cs->statusp = resp->status = puterrno4(error);
3104 3104 goto out;
3105 3105 }
3106 3106
3107 3107 VN_RELE(cs->vp);
3108 3108 cs->vp = avp;
3109 3109
3110 3110 /*
3111 3111 * There is no requirement for an attrdir fh flag
3112 3112 * because the attrdir has a vnode flag to distinguish
3113 3113 * it from regular (non-xattr) directories. The
3114 3114 * FH4_ATTRDIR flag is set for future sanity checks.
3115 3115 */
3116 3116 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3117 3117 *cs->statusp = resp->status = NFS4_OK;
3118 3118
3119 3119 out:
3120 3120 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3121 3121 OPENATTR4res *, resp);
3122 3122 }
3123 3123
3124 3124 static int
3125 3125 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3126 3126 caller_context_t *ct)
3127 3127 {
3128 3128 int error;
3129 3129 int i;
3130 3130 clock_t delaytime;
3131 3131
3132 3132 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3133 3133
3134 3134 /*
3135 3135 * Don't block on mandatory locks. If this routine returns
3136 3136 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3137 3137 */
3138 3138 uio->uio_fmode = FNONBLOCK;
3139 3139
3140 3140 for (i = 0; i < rfs4_maxlock_tries; i++) {
3141 3141
3142 3142
3143 3143 if (direction == FREAD) {
3144 3144 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3145 3145 error = VOP_READ(vp, uio, ioflag, cred, ct);
3146 3146 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3147 3147 } else {
3148 3148 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3149 3149 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3150 3150 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3151 3151 }
3152 3152
3153 3153 if (error != EAGAIN)
3154 3154 break;
3155 3155
3156 3156 if (i < rfs4_maxlock_tries - 1) {
3157 3157 delay(delaytime);
3158 3158 delaytime *= 2;
3159 3159 }
3160 3160 }
3161 3161
3162 3162 return (error);
3163 3163 }
3164 3164
3165 3165 /* ARGSUSED */
3166 3166 static void
3167 3167 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3168 3168 struct compound_state *cs)
3169 3169 {
3170 3170 READ4args *args = &argop->nfs_argop4_u.opread;
3171 3171 READ4res *resp = &resop->nfs_resop4_u.opread;
3172 3172 int error;
3173 3173 int verror;
3174 3174 vnode_t *vp;
3175 3175 struct vattr va;
3176 3176 struct iovec iov, *iovp = NULL;
3177 3177 int iovcnt;
3178 3178 struct uio uio;
3179 3179 u_offset_t offset;
3180 3180 bool_t *deleg = &cs->deleg;
3181 3181 nfsstat4 stat;
3182 3182 int in_crit = 0;
3183 3183 mblk_t *mp = NULL;
3184 3184 int alloc_err = 0;
3185 3185 int rdma_used = 0;
3186 3186 int loaned_buffers;
3187 3187 caller_context_t ct;
3188 3188 struct uio *uiop;
3189 3189
3190 3190 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3191 3191 READ4args, args);
3192 3192
3193 3193 vp = cs->vp;
3194 3194 if (vp == NULL) {
3195 3195 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3196 3196 goto out;
3197 3197 }
3198 3198 if (cs->access == CS_ACCESS_DENIED) {
3199 3199 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3200 3200 goto out;
3201 3201 }
3202 3202
3203 3203 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3204 3204 deleg, TRUE, &ct)) != NFS4_OK) {
3205 3205 *cs->statusp = resp->status = stat;
3206 3206 goto out;
3207 3207 }
3208 3208
3209 3209 /*
3210 3210 * Enter the critical region before calling VOP_RWLOCK
3211 3211 * to avoid a deadlock with write requests.
3212 3212 */
3213 3213 if (nbl_need_check(vp)) {
3214 3214 nbl_start_crit(vp, RW_READER);
3215 3215 in_crit = 1;
3216 3216 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3217 3217 &ct)) {
3218 3218 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3219 3219 goto out;
3220 3220 }
3221 3221 }
3222 3222
3223 3223 if (args->wlist) {
3224 3224 if (args->count > clist_len(args->wlist)) {
3225 3225 *cs->statusp = resp->status = NFS4ERR_INVAL;
3226 3226 goto out;
3227 3227 }
3228 3228 rdma_used = 1;
3229 3229 }
3230 3230
3231 3231 /* use loaned buffers for TCP */
3232 3232 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3233 3233
3234 3234 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3235 3235 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3236 3236
3237 3237 /*
3238 3238 * If we can't get the attributes, then we can't do the
3239 3239 * right access checking. So, we'll fail the request.
3240 3240 */
3241 3241 if (verror) {
3242 3242 *cs->statusp = resp->status = puterrno4(verror);
3243 3243 goto out;
3244 3244 }
3245 3245
3246 3246 if (vp->v_type != VREG) {
3247 3247 *cs->statusp = resp->status =
3248 3248 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3249 3249 goto out;
3250 3250 }
3251 3251
3252 3252 if (crgetuid(cs->cr) != va.va_uid &&
3253 3253 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3254 3254 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3255 3255 *cs->statusp = resp->status = puterrno4(error);
3256 3256 goto out;
3257 3257 }
3258 3258
3259 3259 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3260 3260 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3261 3261 goto out;
3262 3262 }
3263 3263
3264 3264 offset = args->offset;
3265 3265 if (offset >= va.va_size) {
3266 3266 *cs->statusp = resp->status = NFS4_OK;
3267 3267 resp->eof = TRUE;
3268 3268 resp->data_len = 0;
3269 3269 resp->data_val = NULL;
3270 3270 resp->mblk = NULL;
3271 3271 /* RDMA */
3272 3272 resp->wlist = args->wlist;
3273 3273 resp->wlist_len = resp->data_len;
3274 3274 *cs->statusp = resp->status = NFS4_OK;
3275 3275 if (resp->wlist)
3276 3276 clist_zero_len(resp->wlist);
3277 3277 goto out;
3278 3278 }
3279 3279
3280 3280 if (args->count == 0) {
3281 3281 *cs->statusp = resp->status = NFS4_OK;
3282 3282 resp->eof = FALSE;
3283 3283 resp->data_len = 0;
3284 3284 resp->data_val = NULL;
3285 3285 resp->mblk = NULL;
3286 3286 /* RDMA */
3287 3287 resp->wlist = args->wlist;
3288 3288 resp->wlist_len = resp->data_len;
3289 3289 if (resp->wlist)
3290 3290 clist_zero_len(resp->wlist);
3291 3291 goto out;
3292 3292 }
3293 3293
3294 3294 /*
3295 3295 * Do not allocate memory more than maximum allowed
3296 3296 * transfer size
3297 3297 */
3298 3298 if (args->count > rfs4_tsize(req))
3299 3299 args->count = rfs4_tsize(req);
3300 3300
3301 3301 if (loaned_buffers) {
3302 3302 uiop = (uio_t *)rfs_setup_xuio(vp);
3303 3303 ASSERT(uiop != NULL);
3304 3304 uiop->uio_segflg = UIO_SYSSPACE;
3305 3305 uiop->uio_loffset = args->offset;
3306 3306 uiop->uio_resid = args->count;
3307 3307
3308 3308 /* Jump to do the read if successful */
3309 3309 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3310 3310 /*
3311 3311 * Need to hold the vnode until after VOP_RETZCBUF()
3312 3312 * is called.
3313 3313 */
3314 3314 VN_HOLD(vp);
3315 3315 goto doio_read;
3316 3316 }
3317 3317
3318 3318 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3319 3319 uiop->uio_loffset, int, uiop->uio_resid);
3320 3320
3321 3321 uiop->uio_extflg = 0;
3322 3322
3323 3323 /* failure to setup for zero copy */
3324 3324 rfs_free_xuio((void *)uiop);
3325 3325 loaned_buffers = 0;
3326 3326 }
3327 3327
3328 3328 /*
3329 3329 * If returning data via RDMA Write, then grab the chunk list. If we
3330 3330 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3331 3331 */
3332 3332 if (rdma_used) {
3333 3333 mp = NULL;
3334 3334 (void) rdma_get_wchunk(req, &iov, args->wlist);
3335 3335 uio.uio_iov = &iov;
3336 3336 uio.uio_iovcnt = 1;
3337 3337 } else {
3338 3338 /*
3339 3339 * mp will contain the data to be sent out in the read reply.
3340 3340 * It will be freed after the reply has been sent.
3341 3341 */
3342 3342 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3343 3343 ASSERT(mp != NULL);
3344 3344 ASSERT(alloc_err == 0);
3345 3345 uio.uio_iov = iovp;
3346 3346 uio.uio_iovcnt = iovcnt;
3347 3347 }
3348 3348
3349 3349 uio.uio_segflg = UIO_SYSSPACE;
3350 3350 uio.uio_extflg = UIO_COPY_CACHED;
3351 3351 uio.uio_loffset = args->offset;
3352 3352 uio.uio_resid = args->count;
3353 3353 uiop = &uio;
3354 3354
3355 3355 doio_read:
3356 3356 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3357 3357
3358 3358 va.va_mask = AT_SIZE;
3359 3359 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3360 3360
3361 3361 if (error) {
3362 3362 if (mp)
3363 3363 freemsg(mp);
3364 3364 *cs->statusp = resp->status = puterrno4(error);
3365 3365 goto out;
3366 3366 }
3367 3367
3368 3368 /* make mblk using zc buffers */
3369 3369 if (loaned_buffers) {
3370 3370 mp = uio_to_mblk(uiop);
3371 3371 ASSERT(mp != NULL);
3372 3372 }
3373 3373
3374 3374 *cs->statusp = resp->status = NFS4_OK;
3375 3375
3376 3376 ASSERT(uiop->uio_resid >= 0);
3377 3377 resp->data_len = args->count - uiop->uio_resid;
3378 3378 if (mp) {
3379 3379 resp->data_val = (char *)mp->b_datap->db_base;
3380 3380 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3381 3381 } else {
3382 3382 resp->data_val = (caddr_t)iov.iov_base;
3383 3383 }
3384 3384
3385 3385 resp->mblk = mp;
3386 3386
3387 3387 if (!verror && offset + resp->data_len == va.va_size)
3388 3388 resp->eof = TRUE;
3389 3389 else
3390 3390 resp->eof = FALSE;
3391 3391
3392 3392 if (rdma_used) {
3393 3393 if (!rdma_setup_read_data4(args, resp)) {
3394 3394 *cs->statusp = resp->status = NFS4ERR_INVAL;
3395 3395 }
3396 3396 } else {
3397 3397 resp->wlist = NULL;
3398 3398 }
3399 3399
3400 3400 out:
3401 3401 if (in_crit)
3402 3402 nbl_end_crit(vp);
3403 3403
3404 3404 if (iovp != NULL)
3405 3405 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3406 3406
3407 3407 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3408 3408 READ4res *, resp);
3409 3409 }
3410 3410
3411 3411 static void
3412 3412 rfs4_op_read_free(nfs_resop4 *resop)
3413 3413 {
3414 3414 READ4res *resp = &resop->nfs_resop4_u.opread;
3415 3415
3416 3416 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3417 3417 freemsg(resp->mblk);
3418 3418 resp->mblk = NULL;
3419 3419 resp->data_val = NULL;
3420 3420 resp->data_len = 0;
3421 3421 }
3422 3422 }
3423 3423
3424 3424 static void
3425 3425 rfs4_op_readdir_free(nfs_resop4 * resop)
3426 3426 {
3427 3427 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3428 3428
3429 3429 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3430 3430 freeb(resp->mblk);
3431 3431 resp->mblk = NULL;
3432 3432 resp->data_len = 0;
3433 3433 }
3434 3434 }
3435 3435
3436 3436
3437 3437 /* ARGSUSED */
3438 3438 static void
3439 3439 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3440 3440 struct compound_state *cs)
3441 3441 {
3442 3442 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3443 3443 int error;
3444 3444 vnode_t *vp;
3445 3445 struct exportinfo *exi, *sav_exi;
3446 3446 nfs_fh4_fmt_t *fh_fmtp;
3447 3447
3448 3448 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3449 3449
3450 3450 if (cs->vp) {
3451 3451 VN_RELE(cs->vp);
3452 3452 cs->vp = NULL;
3453 3453 }
3454 3454
3455 3455 if (cs->cr)
3456 3456 crfree(cs->cr);
3457 3457
3458 3458 cs->cr = crdup(cs->basecr);
3459 3459
3460 3460 vp = exi_public->exi_vp;
3461 3461 if (vp == NULL) {
3462 3462 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3463 3463 goto out;
3464 3464 }
3465 3465
3466 3466 error = makefh4(&cs->fh, vp, exi_public);
3467 3467 if (error != 0) {
3468 3468 *cs->statusp = resp->status = puterrno4(error);
3469 3469 goto out;
3470 3470 }
3471 3471 sav_exi = cs->exi;
3472 3472 if (exi_public == exi_root) {
3473 3473 /*
3474 3474 * No filesystem is actually shared public, so we default
3475 3475 * to exi_root. In this case, we must check whether root
3476 3476 * is exported.
3477 3477 */
3478 3478 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3479 3479
3480 3480 /*
3481 3481 * if root filesystem is exported, the exportinfo struct that we
3482 3482 * should use is what checkexport4 returns, because root_exi is
3483 3483 * actually a mostly empty struct.
3484 3484 */
3485 3485 exi = checkexport4(&fh_fmtp->fh4_fsid,
3486 3486 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3487 3487 cs->exi = ((exi != NULL) ? exi : exi_public);
3488 3488 } else {
3489 3489 /*
3490 3490 * it's a properly shared filesystem
3491 3491 */
3492 3492 cs->exi = exi_public;
3493 3493 }
3494 3494
3495 3495 if (is_system_labeled()) {
3496 3496 bslabel_t *clabel;
3497 3497
3498 3498 ASSERT(req->rq_label != NULL);
3499 3499 clabel = req->rq_label;
3500 3500 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3501 3501 "got client label from request(1)",
3502 3502 struct svc_req *, req);
3503 3503 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3504 3504 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3505 3505 cs->exi)) {
3506 3506 *cs->statusp = resp->status =
3507 3507 NFS4ERR_SERVERFAULT;
3508 3508 goto out;
3509 3509 }
3510 3510 }
3511 3511 }
3512 3512
3513 3513 VN_HOLD(vp);
3514 3514 cs->vp = vp;
3515 3515
3516 3516 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3517 3517 VN_RELE(cs->vp);
3518 3518 cs->vp = NULL;
3519 3519 cs->exi = sav_exi;
3520 3520 goto out;
3521 3521 }
3522 3522
3523 3523 *cs->statusp = resp->status = NFS4_OK;
3524 3524 out:
3525 3525 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3526 3526 PUTPUBFH4res *, resp);
3527 3527 }
3528 3528
3529 3529 /*
3530 3530 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3531 3531 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3532 3532 * or joe have restrictive search permissions, then we shouldn't let
3533 3533 * the client get a file handle. This is easy to enforce. However, we
3534 3534 * don't know what security flavor should be used until we resolve the
3535 3535 * path name. Another complication is uid mapping. If root is
3536 3536 * the user, then it will be mapped to the anonymous user by default,
3537 3537 * but we won't know that till we've resolved the path name. And we won't
3538 3538 * know what the anonymous user is.
3539 3539 * Luckily, SECINFO is specified to take a full filename.
3540 3540 * So what we will have to in rfs4_op_lookup is check that flavor of
3541 3541 * the target object matches that of the request, and if root was the
3542 3542 * caller, check for the root= and anon= options, and if necessary,
3543 3543 * repeat the lookup using the right cred_t. But that's not done yet.
3544 3544 */
3545 3545 /* ARGSUSED */
3546 3546 static void
3547 3547 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3548 3548 struct compound_state *cs)
3549 3549 {
3550 3550 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3551 3551 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3552 3552 nfs_fh4_fmt_t *fh_fmtp;
3553 3553
3554 3554 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3555 3555 PUTFH4args *, args);
3556 3556
3557 3557 if (cs->vp) {
3558 3558 VN_RELE(cs->vp);
3559 3559 cs->vp = NULL;
3560 3560 }
3561 3561
3562 3562 if (cs->cr) {
3563 3563 crfree(cs->cr);
3564 3564 cs->cr = NULL;
3565 3565 }
3566 3566
3567 3567 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3568 3568 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3569 3569 goto out;
3570 3570 }
3571 3571
3572 3572 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3573 3573 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3574 3574 NULL);
3575 3575
3576 3576 if (cs->exi == NULL) {
3577 3577 *cs->statusp = resp->status = NFS4ERR_STALE;
3578 3578 goto out;
3579 3579 }
3580 3580
3581 3581 cs->cr = crdup(cs->basecr);
3582 3582
3583 3583 ASSERT(cs->cr != NULL);
3584 3584
3585 3585 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3586 3586 *cs->statusp = resp->status;
3587 3587 goto out;
3588 3588 }
3589 3589
3590 3590 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3591 3591 VN_RELE(cs->vp);
3592 3592 cs->vp = NULL;
3593 3593 goto out;
3594 3594 }
3595 3595
3596 3596 nfs_fh4_copy(&args->object, &cs->fh);
3597 3597 *cs->statusp = resp->status = NFS4_OK;
3598 3598 cs->deleg = FALSE;
3599 3599
3600 3600 out:
3601 3601 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3602 3602 PUTFH4res *, resp);
3603 3603 }
3604 3604
3605 3605 /* ARGSUSED */
3606 3606 static void
3607 3607 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3608 3608 struct compound_state *cs)
3609 3609 {
3610 3610 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3611 3611 int error;
3612 3612 fid_t fid;
3613 3613 struct exportinfo *exi, *sav_exi;
3614 3614
3615 3615 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3616 3616
3617 3617 if (cs->vp) {
3618 3618 VN_RELE(cs->vp);
3619 3619 cs->vp = NULL;
3620 3620 }
3621 3621
3622 3622 if (cs->cr)
3623 3623 crfree(cs->cr);
3624 3624
3625 3625 cs->cr = crdup(cs->basecr);
3626 3626
3627 3627 /*
3628 3628 * Using rootdir, the system root vnode,
3629 3629 * get its fid.
3630 3630 */
3631 3631 bzero(&fid, sizeof (fid));
3632 3632 fid.fid_len = MAXFIDSZ;
3633 3633 error = vop_fid_pseudo(rootdir, &fid);
3634 3634 if (error != 0) {
3635 3635 *cs->statusp = resp->status = puterrno4(error);
3636 3636 goto out;
3637 3637 }
3638 3638
3639 3639 /*
3640 3640 * Then use the root fsid & fid it to find out if it's exported
3641 3641 *
3642 3642 * If the server root isn't exported directly, then
3643 3643 * it should at least be a pseudo export based on
3644 3644 * one or more exports further down in the server's
3645 3645 * file tree.
3646 3646 */
3647 3647 exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3648 3648 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3649 3649 NFS4_DEBUG(rfs4_debug,
3650 3650 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3651 3651 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3652 3652 goto out;
3653 3653 }
3654 3654
3655 3655 /*
3656 3656 * Now make a filehandle based on the root
3657 3657 * export and root vnode.
3658 3658 */
3659 3659 error = makefh4(&cs->fh, rootdir, exi);
3660 3660 if (error != 0) {
3661 3661 *cs->statusp = resp->status = puterrno4(error);
3662 3662 goto out;
3663 3663 }
3664 3664
3665 3665 sav_exi = cs->exi;
3666 3666 cs->exi = exi;
3667 3667
3668 3668 VN_HOLD(rootdir);
3669 3669 cs->vp = rootdir;
3670 3670
3671 3671 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3672 3672 VN_RELE(rootdir);
3673 3673 cs->vp = NULL;
3674 3674 cs->exi = sav_exi;
3675 3675 goto out;
3676 3676 }
3677 3677
3678 3678 *cs->statusp = resp->status = NFS4_OK;
3679 3679 cs->deleg = FALSE;
3680 3680 out:
3681 3681 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3682 3682 PUTROOTFH4res *, resp);
3683 3683 }
3684 3684
3685 3685 /*
3686 3686 * set_rdattr_params sets up the variables used to manage what information
3687 3687 * to get for each directory entry.
3688 3688 */
3689 3689 static nfsstat4
3690 3690 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3691 3691 bitmap4 attrs, bool_t *need_to_lookup)
3692 3692 {
3693 3693 uint_t va_mask;
3694 3694 nfsstat4 status;
3695 3695 bitmap4 objbits;
3696 3696
3697 3697 status = bitmap4_to_attrmask(attrs, sargp);
3698 3698 if (status != NFS4_OK) {
3699 3699 /*
3700 3700 * could not even figure attr mask
3701 3701 */
3702 3702 return (status);
3703 3703 }
3704 3704 va_mask = sargp->vap->va_mask;
3705 3705
3706 3706 /*
3707 3707 * dirent's d_ino is always correct value for mounted_on_fileid.
3708 3708 * mntdfid_set is set once here, but mounted_on_fileid is
3709 3709 * set in main dirent processing loop for each dirent.
3710 3710 * The mntdfid_set is a simple optimization that lets the
3711 3711 * server attr code avoid work when caller is readdir.
3712 3712 */
3713 3713 sargp->mntdfid_set = TRUE;
3714 3714
3715 3715 /*
3716 3716 * Lookup entry only if client asked for any of the following:
3717 3717 * a) vattr attrs
3718 3718 * b) vfs attrs
3719 3719 * c) attrs w/per-object scope requested (change, filehandle, etc)
3720 3720 * other than mounted_on_fileid (which we can take from dirent)
3721 3721 */
3722 3722 objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3723 3723
3724 3724 if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3725 3725 *need_to_lookup = TRUE;
3726 3726 else
3727 3727 *need_to_lookup = FALSE;
3728 3728
3729 3729 if (sargp->sbp == NULL)
3730 3730 return (NFS4_OK);
3731 3731
3732 3732 /*
3733 3733 * If filesystem attrs are requested, get them now from the
3734 3734 * directory vp, as most entries will have same filesystem. The only
3735 3735 * exception are mounted over entries but we handle
3736 3736 * those as we go (XXX mounted over detection not yet implemented).
3737 3737 */
3738 3738 sargp->vap->va_mask = 0; /* to avoid VOP_GETATTR */
3739 3739 status = bitmap4_get_sysattrs(sargp);
3740 3740 sargp->vap->va_mask = va_mask;
3741 3741
3742 3742 if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3743 3743 /*
3744 3744 * Failed to get filesystem attributes.
3745 3745 * Return a rdattr_error for each entry, but don't fail.
3746 3746 * However, don't get any obj-dependent attrs.
3747 3747 */
3748 3748 sargp->rdattr_error = status; /* for rdattr_error */
3749 3749 *need_to_lookup = FALSE;
3750 3750 /*
3751 3751 * At least get fileid for regular readdir output
3752 3752 */
3753 3753 sargp->vap->va_mask &= AT_NODEID;
3754 3754 status = NFS4_OK;
3755 3755 }
3756 3756
3757 3757 return (status);
3758 3758 }
3759 3759
3760 3760 /*
3761 3761 * readlink: args: CURRENT_FH.
3762 3762 * res: status. If success - CURRENT_FH unchanged, return linktext.
3763 3763 */
3764 3764
3765 3765 /* ARGSUSED */
3766 3766 static void
3767 3767 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3768 3768 struct compound_state *cs)
3769 3769 {
3770 3770 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3771 3771 int error;
3772 3772 vnode_t *vp;
3773 3773 struct iovec iov;
3774 3774 struct vattr va;
3775 3775 struct uio uio;
3776 3776 char *data;
3777 3777 struct sockaddr *ca;
3778 3778 char *name = NULL;
3779 3779 int is_referral;
3780 3780
3781 3781 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3782 3782
3783 3783 /* CURRENT_FH: directory */
3784 3784 vp = cs->vp;
3785 3785 if (vp == NULL) {
3786 3786 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3787 3787 goto out;
3788 3788 }
3789 3789
3790 3790 if (cs->access == CS_ACCESS_DENIED) {
3791 3791 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3792 3792 goto out;
3793 3793 }
3794 3794
3795 3795 /* Is it a referral? */
3796 3796 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3797 3797
3798 3798 is_referral = 1;
3799 3799
3800 3800 } else {
3801 3801
3802 3802 is_referral = 0;
3803 3803
3804 3804 if (vp->v_type == VDIR) {
3805 3805 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3806 3806 goto out;
3807 3807 }
3808 3808
3809 3809 if (vp->v_type != VLNK) {
3810 3810 *cs->statusp = resp->status = NFS4ERR_INVAL;
3811 3811 goto out;
3812 3812 }
3813 3813
3814 3814 }
3815 3815
3816 3816 va.va_mask = AT_MODE;
3817 3817 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3818 3818 if (error) {
3819 3819 *cs->statusp = resp->status = puterrno4(error);
3820 3820 goto out;
3821 3821 }
3822 3822
3823 3823 if (MANDLOCK(vp, va.va_mode)) {
3824 3824 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3825 3825 goto out;
3826 3826 }
3827 3827
3828 3828 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3829 3829
3830 3830 if (is_referral) {
3831 3831 char *s;
3832 3832 size_t strsz;
3833 3833
3834 3834 /* Get an artificial symlink based on a referral */
3835 3835 s = build_symlink(vp, cs->cr, &strsz);
3836 3836 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3837 3837 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3838 3838 vnode_t *, vp, char *, s);
3839 3839 if (s == NULL)
3840 3840 error = EINVAL;
3841 3841 else {
3842 3842 error = 0;
3843 3843 (void) strlcpy(data, s, MAXPATHLEN + 1);
3844 3844 kmem_free(s, strsz);
3845 3845 }
3846 3846
3847 3847 } else {
3848 3848
3849 3849 iov.iov_base = data;
3850 3850 iov.iov_len = MAXPATHLEN;
3851 3851 uio.uio_iov = &iov;
3852 3852 uio.uio_iovcnt = 1;
3853 3853 uio.uio_segflg = UIO_SYSSPACE;
3854 3854 uio.uio_extflg = UIO_COPY_CACHED;
3855 3855 uio.uio_loffset = 0;
3856 3856 uio.uio_resid = MAXPATHLEN;
3857 3857
3858 3858 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3859 3859
3860 3860 if (!error)
3861 3861 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3862 3862 }
3863 3863
3864 3864 if (error) {
3865 3865 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3866 3866 *cs->statusp = resp->status = puterrno4(error);
3867 3867 goto out;
3868 3868 }
3869 3869
3870 3870 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3871 3871 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3872 3872 MAXPATHLEN + 1);
3873 3873
3874 3874 if (name == NULL) {
3875 3875 /*
3876 3876 * Even though the conversion failed, we return
3877 3877 * something. We just don't translate it.
3878 3878 */
3879 3879 name = data;
3880 3880 }
3881 3881
3882 3882 /*
3883 3883 * treat link name as data
3884 3884 */
3885 3885 (void) str_to_utf8(name, (utf8string *)&resp->link);
3886 3886
3887 3887 if (name != data)
3888 3888 kmem_free(name, MAXPATHLEN + 1);
3889 3889 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3890 3890 *cs->statusp = resp->status = NFS4_OK;
3891 3891
3892 3892 out:
3893 3893 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3894 3894 READLINK4res *, resp);
3895 3895 }
3896 3896
3897 3897 static void
3898 3898 rfs4_op_readlink_free(nfs_resop4 *resop)
3899 3899 {
3900 3900 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3901 3901 utf8string *symlink = (utf8string *)&resp->link;
3902 3902
3903 3903 if (symlink->utf8string_val) {
3904 3904 UTF8STRING_FREE(*symlink)
3905 3905 }
3906 3906 }
3907 3907
3908 3908 /*
3909 3909 * release_lockowner:
3910 3910 * Release any state associated with the supplied
3911 3911 * lockowner. Note if any lo_state is holding locks we will not
3912 3912 * rele that lo_state and thus the lockowner will not be destroyed.
3913 3913 * A client using lock after the lock owner stateid has been released
3914 3914 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3915 3915 * to reissue the lock with new_lock_owner set to TRUE.
3916 3916 * args: lock_owner
3917 3917 * res: status
3918 3918 */
3919 3919 /* ARGSUSED */
3920 3920 static void
3921 3921 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3922 3922 struct svc_req *req, struct compound_state *cs)
3923 3923 {
3924 3924 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3925 3925 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3926 3926 rfs4_lockowner_t *lo;
3927 3927 rfs4_openowner_t *oo;
3928 3928 rfs4_state_t *sp;
3929 3929 rfs4_lo_state_t *lsp;
3930 3930 rfs4_client_t *cp;
3931 3931 bool_t create = FALSE;
3932 3932 locklist_t *llist;
3933 3933 sysid_t sysid;
3934 3934
3935 3935 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3936 3936 cs, RELEASE_LOCKOWNER4args *, ap);
3937 3937
3938 3938 /* Make sure there is a clientid around for this request */
3939 3939 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3940 3940
3941 3941 if (cp == NULL) {
3942 3942 *cs->statusp = resp->status =
3943 3943 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3944 3944 goto out;
3945 3945 }
3946 3946 rfs4_client_rele(cp);
3947 3947
3948 3948 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3949 3949 if (lo == NULL) {
3950 3950 *cs->statusp = resp->status = NFS4_OK;
3951 3951 goto out;
3952 3952 }
3953 3953 ASSERT(lo->rl_client != NULL);
3954 3954
3955 3955 /*
3956 3956 * Check for EXPIRED client. If so will reap state with in a lease
3957 3957 * period or on next set_clientid_confirm step
3958 3958 */
3959 3959 if (rfs4_lease_expired(lo->rl_client)) {
3960 3960 rfs4_lockowner_rele(lo);
3961 3961 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3962 3962 goto out;
3963 3963 }
3964 3964
3965 3965 /*
3966 3966 * If no sysid has been assigned, then no locks exist; just return.
3967 3967 */
3968 3968 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3969 3969 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3970 3970 rfs4_lockowner_rele(lo);
3971 3971 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3972 3972 goto out;
3973 3973 }
3974 3974
3975 3975 sysid = lo->rl_client->rc_sysidt;
3976 3976 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3977 3977
3978 3978 /*
3979 3979 * Mark the lockowner invalid.
3980 3980 */
3981 3981 rfs4_dbe_hide(lo->rl_dbe);
3982 3982
3983 3983 /*
3984 3984 * sysid-pid pair should now not be used since the lockowner is
3985 3985 * invalid. If the client were to instantiate the lockowner again
3986 3986 * it would be assigned a new pid. Thus we can get the list of
3987 3987 * current locks.
3988 3988 */
3989 3989
3990 3990 llist = flk_get_active_locks(sysid, lo->rl_pid);
3991 3991 /* If we are still holding locks fail */
3992 3992 if (llist != NULL) {
3993 3993
3994 3994 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3995 3995
3996 3996 flk_free_locklist(llist);
3997 3997 /*
3998 3998 * We need to unhide the lockowner so the client can
3999 3999 * try it again. The bad thing here is if the client
4000 4000 * has a logic error that took it here in the first place
4001 4001 * he probably has lost accounting of the locks that it
4002 4002 * is holding. So we may have dangling state until the
4003 4003 * open owner state is reaped via close. One scenario
4004 4004 * that could possibly occur is that the client has
4005 4005 * sent the unlock request(s) in separate threads
4006 4006 * and has not waited for the replies before sending the
4007 4007 * RELEASE_LOCKOWNER request. Presumably, it would expect
4008 4008 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4009 4009 * reissuing the request.
4010 4010 */
4011 4011 rfs4_dbe_unhide(lo->rl_dbe);
4012 4012 rfs4_lockowner_rele(lo);
4013 4013 goto out;
4014 4014 }
4015 4015
4016 4016 /*
4017 4017 * For the corresponding client we need to check each open
4018 4018 * owner for any opens that have lockowner state associated
4019 4019 * with this lockowner.
4020 4020 */
4021 4021
4022 4022 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4023 4023 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4024 4024 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4025 4025
4026 4026 rfs4_dbe_lock(oo->ro_dbe);
4027 4027 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4028 4028 sp = list_next(&oo->ro_statelist, sp)) {
4029 4029
4030 4030 rfs4_dbe_lock(sp->rs_dbe);
4031 4031 for (lsp = list_head(&sp->rs_lostatelist);
4032 4032 lsp != NULL;
4033 4033 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4034 4034 if (lsp->rls_locker == lo) {
4035 4035 rfs4_dbe_lock(lsp->rls_dbe);
4036 4036 rfs4_dbe_invalidate(lsp->rls_dbe);
4037 4037 rfs4_dbe_unlock(lsp->rls_dbe);
4038 4038 }
4039 4039 }
4040 4040 rfs4_dbe_unlock(sp->rs_dbe);
4041 4041 }
4042 4042 rfs4_dbe_unlock(oo->ro_dbe);
4043 4043 }
4044 4044 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4045 4045
4046 4046 rfs4_lockowner_rele(lo);
4047 4047
4048 4048 *cs->statusp = resp->status = NFS4_OK;
4049 4049
4050 4050 out:
4051 4051 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4052 4052 cs, RELEASE_LOCKOWNER4res *, resp);
4053 4053 }
4054 4054
4055 4055 /*
4056 4056 * short utility function to lookup a file and recall the delegation
4057 4057 */
4058 4058 static rfs4_file_t *
4059 4059 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4060 4060 int *lkup_error, cred_t *cr)
4061 4061 {
4062 4062 vnode_t *vp;
4063 4063 rfs4_file_t *fp = NULL;
4064 4064 bool_t fcreate = FALSE;
4065 4065 int error;
4066 4066
4067 4067 if (vpp)
4068 4068 *vpp = NULL;
4069 4069
4070 4070 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4071 4071 NULL)) == 0) {
4072 4072 if (vp->v_type == VREG)
4073 4073 fp = rfs4_findfile(vp, NULL, &fcreate);
4074 4074 if (vpp)
4075 4075 *vpp = vp;
4076 4076 else
4077 4077 VN_RELE(vp);
4078 4078 }
4079 4079
4080 4080 if (lkup_error)
4081 4081 *lkup_error = error;
4082 4082
4083 4083 return (fp);
4084 4084 }
4085 4085
4086 4086 /*
4087 4087 * remove: args: CURRENT_FH: directory; name.
4088 4088 * res: status. If success - CURRENT_FH unchanged, return change_info
4089 4089 * for directory.
4090 4090 */
4091 4091 /* ARGSUSED */
4092 4092 static void
4093 4093 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4094 4094 struct compound_state *cs)
4095 4095 {
4096 4096 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4097 4097 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4098 4098 int error;
4099 4099 vnode_t *dvp, *vp;
4100 4100 struct vattr bdva, idva, adva;
4101 4101 char *nm;
4102 4102 uint_t len;
4103 4103 rfs4_file_t *fp;
4104 4104 int in_crit = 0;
4105 4105 bslabel_t *clabel;
4106 4106 struct sockaddr *ca;
4107 4107 char *name = NULL;
4108 4108 nfsstat4 status;
4109 4109
4110 4110 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4111 4111 REMOVE4args *, args);
4112 4112
4113 4113 /* CURRENT_FH: directory */
4114 4114 dvp = cs->vp;
4115 4115 if (dvp == NULL) {
4116 4116 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4117 4117 goto out;
4118 4118 }
4119 4119
4120 4120 if (cs->access == CS_ACCESS_DENIED) {
4121 4121 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4122 4122 goto out;
4123 4123 }
4124 4124
4125 4125 /*
4126 4126 * If there is an unshared filesystem mounted on this vnode,
4127 4127 * Do not allow to remove anything in this directory.
4128 4128 */
4129 4129 if (vn_ismntpt(dvp)) {
4130 4130 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4131 4131 goto out;
4132 4132 }
4133 4133
4134 4134 if (dvp->v_type != VDIR) {
4135 4135 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4136 4136 goto out;
4137 4137 }
4138 4138
4139 4139 status = utf8_dir_verify(&args->target);
4140 4140 if (status != NFS4_OK) {
4141 4141 *cs->statusp = resp->status = status;
4142 4142 goto out;
4143 4143 }
4144 4144
4145 4145 /*
4146 4146 * Lookup the file so that we can check if it's a directory
4147 4147 */
4148 4148 nm = utf8_to_fn(&args->target, &len, NULL);
4149 4149 if (nm == NULL) {
4150 4150 *cs->statusp = resp->status = NFS4ERR_INVAL;
4151 4151 goto out;
4152 4152 }
4153 4153
4154 4154 if (len > MAXNAMELEN) {
4155 4155 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4156 4156 kmem_free(nm, len);
4157 4157 goto out;
4158 4158 }
4159 4159
4160 4160 if (rdonly4(req, cs)) {
4161 4161 *cs->statusp = resp->status = NFS4ERR_ROFS;
4162 4162 kmem_free(nm, len);
4163 4163 goto out;
4164 4164 }
4165 4165
4166 4166 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4167 4167 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4168 4168 MAXPATHLEN + 1);
4169 4169
4170 4170 if (name == NULL) {
4171 4171 *cs->statusp = resp->status = NFS4ERR_INVAL;
4172 4172 kmem_free(nm, len);
4173 4173 goto out;
4174 4174 }
4175 4175
4176 4176 /*
4177 4177 * Lookup the file to determine type and while we are see if
4178 4178 * there is a file struct around and check for delegation.
4179 4179 * We don't need to acquire va_seq before this lookup, if
4180 4180 * it causes an update, cinfo.before will not match, which will
4181 4181 * trigger a cache flush even if atomic is TRUE.
4182 4182 */
4183 4183 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4184 4184 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4185 4185 NULL)) {
4186 4186 VN_RELE(vp);
4187 4187 rfs4_file_rele(fp);
4188 4188 *cs->statusp = resp->status = NFS4ERR_DELAY;
4189 4189 if (nm != name)
4190 4190 kmem_free(name, MAXPATHLEN + 1);
4191 4191 kmem_free(nm, len);
4192 4192 goto out;
4193 4193 }
4194 4194 }
4195 4195
4196 4196 /* Didn't find anything to remove */
4197 4197 if (vp == NULL) {
4198 4198 *cs->statusp = resp->status = error;
4199 4199 if (nm != name)
4200 4200 kmem_free(name, MAXPATHLEN + 1);
4201 4201 kmem_free(nm, len);
4202 4202 goto out;
4203 4203 }
4204 4204
4205 4205 if (nbl_need_check(vp)) {
4206 4206 nbl_start_crit(vp, RW_READER);
4207 4207 in_crit = 1;
4208 4208 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4209 4209 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4210 4210 if (nm != name)
4211 4211 kmem_free(name, MAXPATHLEN + 1);
4212 4212 kmem_free(nm, len);
4213 4213 nbl_end_crit(vp);
4214 4214 VN_RELE(vp);
4215 4215 if (fp) {
4216 4216 rfs4_clear_dont_grant(fp);
4217 4217 rfs4_file_rele(fp);
4218 4218 }
4219 4219 goto out;
4220 4220 }
4221 4221 }
4222 4222
4223 4223 /* check label before allowing removal */
4224 4224 if (is_system_labeled()) {
4225 4225 ASSERT(req->rq_label != NULL);
4226 4226 clabel = req->rq_label;
4227 4227 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4228 4228 "got client label from request(1)",
4229 4229 struct svc_req *, req);
4230 4230 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4231 4231 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4232 4232 cs->exi)) {
4233 4233 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4234 4234 if (name != nm)
4235 4235 kmem_free(name, MAXPATHLEN + 1);
4236 4236 kmem_free(nm, len);
4237 4237 if (in_crit)
4238 4238 nbl_end_crit(vp);
4239 4239 VN_RELE(vp);
4240 4240 if (fp) {
4241 4241 rfs4_clear_dont_grant(fp);
4242 4242 rfs4_file_rele(fp);
4243 4243 }
4244 4244 goto out;
4245 4245 }
4246 4246 }
4247 4247 }
4248 4248
4249 4249 /* Get dir "before" change value */
4250 4250 bdva.va_mask = AT_CTIME|AT_SEQ;
4251 4251 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4252 4252 if (error) {
4253 4253 *cs->statusp = resp->status = puterrno4(error);
4254 4254 if (nm != name)
4255 4255 kmem_free(name, MAXPATHLEN + 1);
4256 4256 kmem_free(nm, len);
4257 4257 if (in_crit)
4258 4258 nbl_end_crit(vp);
4259 4259 VN_RELE(vp);
4260 4260 if (fp) {
4261 4261 rfs4_clear_dont_grant(fp);
4262 4262 rfs4_file_rele(fp);
4263 4263 }
4264 4264 goto out;
4265 4265 }
4266 4266 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4267 4267
4268 4268 /* Actually do the REMOVE operation */
4269 4269 if (vp->v_type == VDIR) {
4270 4270 /*
4271 4271 * Can't remove a directory that has a mounted-on filesystem.
4272 4272 */
4273 4273 if (vn_ismntpt(vp)) {
4274 4274 error = EACCES;
4275 4275 } else {
4276 4276 /*
4277 4277 * System V defines rmdir to return EEXIST,
4278 4278 * not ENOTEMPTY, if the directory is not
4279 4279 * empty. A System V NFS server needs to map
4280 4280 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4281 4281 * transmit over the wire.
4282 4282 */
4283 4283 if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4284 4284 NULL, 0)) == EEXIST)
4285 4285 error = ENOTEMPTY;
4286 4286 }
4287 4287 } else {
4288 4288 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4289 4289 fp != NULL) {
4290 4290 struct vattr va;
4291 4291 vnode_t *tvp;
4292 4292
4293 4293 rfs4_dbe_lock(fp->rf_dbe);
4294 4294 tvp = fp->rf_vp;
4295 4295 if (tvp)
4296 4296 VN_HOLD(tvp);
4297 4297 rfs4_dbe_unlock(fp->rf_dbe);
4298 4298
4299 4299 if (tvp) {
4300 4300 /*
4301 4301 * This is va_seq safe because we are not
4302 4302 * manipulating dvp.
4303 4303 */
4304 4304 va.va_mask = AT_NLINK;
4305 4305 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4306 4306 va.va_nlink == 0) {
4307 4307 /* Remove state on file remove */
4308 4308 if (in_crit) {
4309 4309 nbl_end_crit(vp);
4310 4310 in_crit = 0;
4311 4311 }
4312 4312 rfs4_close_all_state(fp);
4313 4313 }
4314 4314 VN_RELE(tvp);
4315 4315 }
4316 4316 }
4317 4317 }
4318 4318
4319 4319 if (in_crit)
4320 4320 nbl_end_crit(vp);
4321 4321 VN_RELE(vp);
4322 4322
4323 4323 if (fp) {
4324 4324 rfs4_clear_dont_grant(fp);
4325 4325 rfs4_file_rele(fp);
4326 4326 }
4327 4327 if (nm != name)
4328 4328 kmem_free(name, MAXPATHLEN + 1);
4329 4329 kmem_free(nm, len);
4330 4330
4331 4331 if (error) {
4332 4332 *cs->statusp = resp->status = puterrno4(error);
4333 4333 goto out;
4334 4334 }
4335 4335
4336 4336 /*
4337 4337 * Get the initial "after" sequence number, if it fails, set to zero
4338 4338 */
4339 4339 idva.va_mask = AT_SEQ;
4340 4340 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4341 4341 idva.va_seq = 0;
4342 4342
4343 4343 /*
4344 4344 * Force modified data and metadata out to stable storage.
4345 4345 */
4346 4346 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4347 4347
4348 4348 /*
4349 4349 * Get "after" change value, if it fails, simply return the
4350 4350 * before value.
4351 4351 */
4352 4352 adva.va_mask = AT_CTIME|AT_SEQ;
4353 4353 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4354 4354 adva.va_ctime = bdva.va_ctime;
4355 4355 adva.va_seq = 0;
4356 4356 }
4357 4357
4358 4358 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4359 4359
4360 4360 /*
4361 4361 * The cinfo.atomic = TRUE only if we have
4362 4362 * non-zero va_seq's, and it has incremented by exactly one
4363 4363 * during the VOP_REMOVE/RMDIR and it didn't change during
4364 4364 * the VOP_FSYNC.
4365 4365 */
4366 4366 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4367 4367 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4368 4368 resp->cinfo.atomic = TRUE;
4369 4369 else
4370 4370 resp->cinfo.atomic = FALSE;
4371 4371
4372 4372 *cs->statusp = resp->status = NFS4_OK;
4373 4373
4374 4374 out:
4375 4375 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4376 4376 REMOVE4res *, resp);
4377 4377 }
4378 4378
4379 4379 /*
4380 4380 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4381 4381 * oldname and newname.
4382 4382 * res: status. If success - CURRENT_FH unchanged, return change_info
4383 4383 * for both from and target directories.
4384 4384 */
4385 4385 /* ARGSUSED */
4386 4386 static void
4387 4387 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4388 4388 struct compound_state *cs)
4389 4389 {
4390 4390 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4391 4391 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4392 4392 int error;
4393 4393 vnode_t *odvp;
4394 4394 vnode_t *ndvp;
4395 4395 vnode_t *srcvp, *targvp;
4396 4396 struct vattr obdva, oidva, oadva;
4397 4397 struct vattr nbdva, nidva, nadva;
4398 4398 char *onm, *nnm;
4399 4399 uint_t olen, nlen;
4400 4400 rfs4_file_t *fp, *sfp;
4401 4401 int in_crit_src, in_crit_targ;
4402 4402 int fp_rele_grant_hold, sfp_rele_grant_hold;
4403 4403 bslabel_t *clabel;
4404 4404 struct sockaddr *ca;
4405 4405 char *converted_onm = NULL;
4406 4406 char *converted_nnm = NULL;
4407 4407 nfsstat4 status;
4408 4408
4409 4409 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4410 4410 RENAME4args *, args);
4411 4411
4412 4412 fp = sfp = NULL;
4413 4413 srcvp = targvp = NULL;
4414 4414 in_crit_src = in_crit_targ = 0;
4415 4415 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4416 4416
4417 4417 /* CURRENT_FH: target directory */
4418 4418 ndvp = cs->vp;
4419 4419 if (ndvp == NULL) {
4420 4420 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4421 4421 goto out;
4422 4422 }
4423 4423
4424 4424 /* SAVED_FH: from directory */
4425 4425 odvp = cs->saved_vp;
4426 4426 if (odvp == NULL) {
4427 4427 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4428 4428 goto out;
4429 4429 }
4430 4430
4431 4431 if (cs->access == CS_ACCESS_DENIED) {
4432 4432 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4433 4433 goto out;
4434 4434 }
4435 4435
4436 4436 /*
4437 4437 * If there is an unshared filesystem mounted on this vnode,
4438 4438 * do not allow to rename objects in this directory.
4439 4439 */
4440 4440 if (vn_ismntpt(odvp)) {
4441 4441 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4442 4442 goto out;
4443 4443 }
4444 4444
4445 4445 /*
4446 4446 * If there is an unshared filesystem mounted on this vnode,
4447 4447 * do not allow to rename to this directory.
4448 4448 */
4449 4449 if (vn_ismntpt(ndvp)) {
4450 4450 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4451 4451 goto out;
4452 4452 }
4453 4453
4454 4454 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4455 4455 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4456 4456 goto out;
4457 4457 }
4458 4458
4459 4459 if (cs->saved_exi != cs->exi) {
4460 4460 *cs->statusp = resp->status = NFS4ERR_XDEV;
4461 4461 goto out;
4462 4462 }
4463 4463
4464 4464 status = utf8_dir_verify(&args->oldname);
4465 4465 if (status != NFS4_OK) {
4466 4466 *cs->statusp = resp->status = status;
4467 4467 goto out;
4468 4468 }
4469 4469
4470 4470 status = utf8_dir_verify(&args->newname);
4471 4471 if (status != NFS4_OK) {
4472 4472 *cs->statusp = resp->status = status;
4473 4473 goto out;
4474 4474 }
4475 4475
4476 4476 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4477 4477 if (onm == NULL) {
4478 4478 *cs->statusp = resp->status = NFS4ERR_INVAL;
4479 4479 goto out;
4480 4480 }
4481 4481 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4482 4482 nlen = MAXPATHLEN + 1;
4483 4483 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4484 4484 nlen);
4485 4485
4486 4486 if (converted_onm == NULL) {
4487 4487 *cs->statusp = resp->status = NFS4ERR_INVAL;
4488 4488 kmem_free(onm, olen);
4489 4489 goto out;
4490 4490 }
4491 4491
4492 4492 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4493 4493 if (nnm == NULL) {
4494 4494 *cs->statusp = resp->status = NFS4ERR_INVAL;
4495 4495 if (onm != converted_onm)
4496 4496 kmem_free(converted_onm, MAXPATHLEN + 1);
4497 4497 kmem_free(onm, olen);
4498 4498 goto out;
4499 4499 }
4500 4500 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4501 4501 MAXPATHLEN + 1);
4502 4502
4503 4503 if (converted_nnm == NULL) {
4504 4504 *cs->statusp = resp->status = NFS4ERR_INVAL;
4505 4505 kmem_free(nnm, nlen);
4506 4506 nnm = NULL;
4507 4507 if (onm != converted_onm)
4508 4508 kmem_free(converted_onm, MAXPATHLEN + 1);
4509 4509 kmem_free(onm, olen);
4510 4510 goto out;
4511 4511 }
4512 4512
4513 4513
4514 4514 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4515 4515 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4516 4516 kmem_free(onm, olen);
4517 4517 kmem_free(nnm, nlen);
4518 4518 goto out;
4519 4519 }
4520 4520
4521 4521
4522 4522 if (rdonly4(req, cs)) {
4523 4523 *cs->statusp = resp->status = NFS4ERR_ROFS;
4524 4524 if (onm != converted_onm)
4525 4525 kmem_free(converted_onm, MAXPATHLEN + 1);
4526 4526 kmem_free(onm, olen);
4527 4527 if (nnm != converted_nnm)
4528 4528 kmem_free(converted_nnm, MAXPATHLEN + 1);
4529 4529 kmem_free(nnm, nlen);
4530 4530 goto out;
4531 4531 }
4532 4532
4533 4533 /* check label of the target dir */
4534 4534 if (is_system_labeled()) {
4535 4535 ASSERT(req->rq_label != NULL);
4536 4536 clabel = req->rq_label;
4537 4537 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4538 4538 "got client label from request(1)",
4539 4539 struct svc_req *, req);
4540 4540 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4541 4541 if (!do_rfs_label_check(clabel, ndvp,
4542 4542 EQUALITY_CHECK, cs->exi)) {
4543 4543 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4544 4544 goto err_out;
4545 4545 }
4546 4546 }
4547 4547 }
4548 4548
4549 4549 /*
4550 4550 * Is the source a file and have a delegation?
4551 4551 * We don't need to acquire va_seq before these lookups, if
4552 4552 * it causes an update, cinfo.before will not match, which will
4553 4553 * trigger a cache flush even if atomic is TRUE.
4554 4554 */
4555 4555 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4556 4556 &error, cs->cr)) {
4557 4557 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4558 4558 NULL)) {
4559 4559 *cs->statusp = resp->status = NFS4ERR_DELAY;
4560 4560 goto err_out;
4561 4561 }
4562 4562 }
4563 4563
4564 4564 if (srcvp == NULL) {
4565 4565 *cs->statusp = resp->status = puterrno4(error);
4566 4566 if (onm != converted_onm)
4567 4567 kmem_free(converted_onm, MAXPATHLEN + 1);
4568 4568 kmem_free(onm, olen);
4569 4569 if (nnm != converted_nnm)
4570 4570 kmem_free(converted_nnm, MAXPATHLEN + 1);
4571 4571 kmem_free(nnm, nlen);
4572 4572 goto out;
4573 4573 }
4574 4574
4575 4575 sfp_rele_grant_hold = 1;
4576 4576
4577 4577 /* Does the destination exist and a file and have a delegation? */
4578 4578 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4579 4579 NULL, cs->cr)) {
4580 4580 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4581 4581 NULL)) {
4582 4582 *cs->statusp = resp->status = NFS4ERR_DELAY;
4583 4583 goto err_out;
4584 4584 }
4585 4585 }
4586 4586 fp_rele_grant_hold = 1;
4587 4587
4588 4588
4589 4589 /* Check for NBMAND lock on both source and target */
4590 4590 if (nbl_need_check(srcvp)) {
4591 4591 nbl_start_crit(srcvp, RW_READER);
4592 4592 in_crit_src = 1;
4593 4593 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4594 4594 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4595 4595 goto err_out;
4596 4596 }
4597 4597 }
4598 4598
4599 4599 if (targvp && nbl_need_check(targvp)) {
4600 4600 nbl_start_crit(targvp, RW_READER);
4601 4601 in_crit_targ = 1;
4602 4602 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4603 4603 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4604 4604 goto err_out;
4605 4605 }
4606 4606 }
4607 4607
4608 4608 /* Get source "before" change value */
4609 4609 obdva.va_mask = AT_CTIME|AT_SEQ;
4610 4610 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4611 4611 if (!error) {
4612 4612 nbdva.va_mask = AT_CTIME|AT_SEQ;
4613 4613 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4614 4614 }
4615 4615 if (error) {
4616 4616 *cs->statusp = resp->status = puterrno4(error);
4617 4617 goto err_out;
4618 4618 }
4619 4619
4620 4620 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4621 4621 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4622 4622
4623 4623 if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4624 4624 cs->cr, NULL, 0)) == 0 && fp != NULL) {
4625 4625 struct vattr va;
4626 4626 vnode_t *tvp;
4627 4627
4628 4628 rfs4_dbe_lock(fp->rf_dbe);
4629 4629 tvp = fp->rf_vp;
4630 4630 if (tvp)
4631 4631 VN_HOLD(tvp);
4632 4632 rfs4_dbe_unlock(fp->rf_dbe);
4633 4633
4634 4634 if (tvp) {
4635 4635 va.va_mask = AT_NLINK;
4636 4636 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4637 4637 va.va_nlink == 0) {
4638 4638 /* The file is gone and so should the state */
4639 4639 if (in_crit_targ) {
4640 4640 nbl_end_crit(targvp);
4641 4641 in_crit_targ = 0;
4642 4642 }
4643 4643 rfs4_close_all_state(fp);
4644 4644 }
4645 4645 VN_RELE(tvp);
4646 4646 }
4647 4647 }
4648 4648 if (error == 0)
4649 4649 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4650 4650
4651 4651 if (in_crit_src)
4652 4652 nbl_end_crit(srcvp);
4653 4653 if (srcvp)
4654 4654 VN_RELE(srcvp);
4655 4655 if (in_crit_targ)
4656 4656 nbl_end_crit(targvp);
4657 4657 if (targvp)
4658 4658 VN_RELE(targvp);
4659 4659
4660 4660 if (sfp) {
4661 4661 rfs4_clear_dont_grant(sfp);
4662 4662 rfs4_file_rele(sfp);
4663 4663 }
4664 4664 if (fp) {
4665 4665 rfs4_clear_dont_grant(fp);
4666 4666 rfs4_file_rele(fp);
4667 4667 }
4668 4668
4669 4669 if (converted_onm != onm)
4670 4670 kmem_free(converted_onm, MAXPATHLEN + 1);
4671 4671 kmem_free(onm, olen);
4672 4672 if (converted_nnm != nnm)
4673 4673 kmem_free(converted_nnm, MAXPATHLEN + 1);
4674 4674 kmem_free(nnm, nlen);
4675 4675
4676 4676 /*
4677 4677 * Get the initial "after" sequence number, if it fails, set to zero
4678 4678 */
4679 4679 oidva.va_mask = AT_SEQ;
4680 4680 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4681 4681 oidva.va_seq = 0;
4682 4682
4683 4683 nidva.va_mask = AT_SEQ;
4684 4684 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4685 4685 nidva.va_seq = 0;
4686 4686
4687 4687 /*
4688 4688 * Force modified data and metadata out to stable storage.
4689 4689 */
4690 4690 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4691 4691 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4692 4692
4693 4693 if (error) {
4694 4694 *cs->statusp = resp->status = puterrno4(error);
4695 4695 goto out;
4696 4696 }
4697 4697
4698 4698 /*
4699 4699 * Get "after" change values, if it fails, simply return the
4700 4700 * before value.
4701 4701 */
4702 4702 oadva.va_mask = AT_CTIME|AT_SEQ;
4703 4703 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4704 4704 oadva.va_ctime = obdva.va_ctime;
4705 4705 oadva.va_seq = 0;
4706 4706 }
4707 4707
4708 4708 nadva.va_mask = AT_CTIME|AT_SEQ;
4709 4709 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4710 4710 nadva.va_ctime = nbdva.va_ctime;
4711 4711 nadva.va_seq = 0;
4712 4712 }
4713 4713
4714 4714 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4715 4715 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4716 4716
4717 4717 /*
4718 4718 * The cinfo.atomic = TRUE only if we have
4719 4719 * non-zero va_seq's, and it has incremented by exactly one
4720 4720 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4721 4721 */
4722 4722 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4723 4723 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4724 4724 resp->source_cinfo.atomic = TRUE;
4725 4725 else
4726 4726 resp->source_cinfo.atomic = FALSE;
4727 4727
4728 4728 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4729 4729 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4730 4730 resp->target_cinfo.atomic = TRUE;
4731 4731 else
4732 4732 resp->target_cinfo.atomic = FALSE;
4733 4733
4734 4734 #ifdef VOLATILE_FH_TEST
4735 4735 {
4736 4736 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4737 4737
4738 4738 /*
4739 4739 * Add the renamed file handle to the volatile rename list
4740 4740 */
4741 4741 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4742 4742 /* file handles may expire on rename */
4743 4743 vnode_t *vp;
4744 4744
4745 4745 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4746 4746 /*
4747 4747 * Already know that nnm will be a valid string
4748 4748 */
4749 4749 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4750 4750 NULL, NULL, NULL);
4751 4751 kmem_free(nnm, nlen);
4752 4752 if (!error) {
4753 4753 add_volrnm_fh(cs->exi, vp);
4754 4754 VN_RELE(vp);
4755 4755 }
4756 4756 }
4757 4757 }
4758 4758 #endif /* VOLATILE_FH_TEST */
4759 4759
4760 4760 *cs->statusp = resp->status = NFS4_OK;
4761 4761 out:
4762 4762 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4763 4763 RENAME4res *, resp);
4764 4764 return;
4765 4765
4766 4766 err_out:
4767 4767 if (onm != converted_onm)
4768 4768 kmem_free(converted_onm, MAXPATHLEN + 1);
4769 4769 if (onm != NULL)
4770 4770 kmem_free(onm, olen);
4771 4771 if (nnm != converted_nnm)
4772 4772 kmem_free(converted_nnm, MAXPATHLEN + 1);
4773 4773 if (nnm != NULL)
4774 4774 kmem_free(nnm, nlen);
4775 4775
4776 4776 if (in_crit_src) nbl_end_crit(srcvp);
4777 4777 if (in_crit_targ) nbl_end_crit(targvp);
4778 4778 if (targvp) VN_RELE(targvp);
4779 4779 if (srcvp) VN_RELE(srcvp);
4780 4780 if (sfp) {
4781 4781 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4782 4782 rfs4_file_rele(sfp);
4783 4783 }
4784 4784 if (fp) {
4785 4785 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4786 4786 rfs4_file_rele(fp);
4787 4787 }
4788 4788
4789 4789 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4790 4790 RENAME4res *, resp);
4791 4791 }
4792 4792
4793 4793 /* ARGSUSED */
4794 4794 static void
4795 4795 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4796 4796 struct compound_state *cs)
4797 4797 {
4798 4798 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4799 4799 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4800 4800 rfs4_client_t *cp;
4801 4801
4802 4802 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4803 4803 RENEW4args *, args);
4804 4804
4805 4805 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4806 4806 *cs->statusp = resp->status =
4807 4807 rfs4_check_clientid(&args->clientid, 0);
4808 4808 goto out;
4809 4809 }
4810 4810
4811 4811 if (rfs4_lease_expired(cp)) {
4812 4812 rfs4_client_rele(cp);
4813 4813 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4814 4814 goto out;
4815 4815 }
4816 4816
4817 4817 rfs4_update_lease(cp);
4818 4818
4819 4819 mutex_enter(cp->rc_cbinfo.cb_lock);
4820 4820 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4821 4821 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4822 4822 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4823 4823 } else {
4824 4824 *cs->statusp = resp->status = NFS4_OK;
4825 4825 }
4826 4826 mutex_exit(cp->rc_cbinfo.cb_lock);
4827 4827
4828 4828 rfs4_client_rele(cp);
4829 4829
4830 4830 out:
4831 4831 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4832 4832 RENEW4res *, resp);
4833 4833 }
4834 4834
4835 4835 /* ARGSUSED */
4836 4836 static void
4837 4837 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4838 4838 struct compound_state *cs)
4839 4839 {
4840 4840 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4841 4841
4842 4842 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4843 4843
4844 4844 /* No need to check cs->access - we are not accessing any object */
4845 4845 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4846 4846 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4847 4847 goto out;
4848 4848 }
4849 4849 if (cs->vp != NULL) {
4850 4850 VN_RELE(cs->vp);
4851 4851 }
4852 4852 cs->vp = cs->saved_vp;
4853 4853 cs->saved_vp = NULL;
4854 4854 cs->exi = cs->saved_exi;
4855 4855 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4856 4856 *cs->statusp = resp->status = NFS4_OK;
4857 4857 cs->deleg = FALSE;
4858 4858
4859 4859 out:
4860 4860 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4861 4861 RESTOREFH4res *, resp);
4862 4862 }
4863 4863
4864 4864 /* ARGSUSED */
4865 4865 static void
4866 4866 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4867 4867 struct compound_state *cs)
4868 4868 {
4869 4869 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4870 4870
4871 4871 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4872 4872
4873 4873 /* No need to check cs->access - we are not accessing any object */
4874 4874 if (cs->vp == NULL) {
4875 4875 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4876 4876 goto out;
4877 4877 }
4878 4878 if (cs->saved_vp != NULL) {
4879 4879 VN_RELE(cs->saved_vp);
4880 4880 }
4881 4881 cs->saved_vp = cs->vp;
4882 4882 VN_HOLD(cs->saved_vp);
4883 4883 cs->saved_exi = cs->exi;
4884 4884 /*
4885 4885 * since SAVEFH is fairly rare, don't alloc space for its fh
4886 4886 * unless necessary.
4887 4887 */
4888 4888 if (cs->saved_fh.nfs_fh4_val == NULL) {
4889 4889 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4890 4890 }
4891 4891 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4892 4892 *cs->statusp = resp->status = NFS4_OK;
4893 4893
4894 4894 out:
4895 4895 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4896 4896 SAVEFH4res *, resp);
4897 4897 }
4898 4898
4899 4899 /*
4900 4900 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4901 4901 * return the bitmap of attrs that were set successfully. It is also
4902 4902 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4903 4903 * always be called only after rfs4_do_set_attrs().
4904 4904 *
4905 4905 * Verify that the attributes are same as the expected ones. sargp->vap
4906 4906 * and sargp->sbp contain the input attributes as translated from fattr4.
4907 4907 *
4908 4908 * This function verifies only the attrs that correspond to a vattr or
4909 4909 * vfsstat struct. That is because of the extra step needed to get the
4910 4910 * corresponding system structs. Other attributes have already been set or
4911 4911 * verified by do_rfs4_set_attrs.
4912 4912 *
4913 4913 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4914 4914 */
4915 4915 static int
4916 4916 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4917 4917 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4918 4918 {
4919 4919 int error, ret_error = 0;
4920 4920 int i, k;
4921 4921 uint_t sva_mask = sargp->vap->va_mask;
4922 4922 uint_t vbit;
4923 4923 union nfs4_attr_u *na;
4924 4924 uint8_t *amap;
4925 4925 bool_t getsb = ntovp->vfsstat;
4926 4926
4927 4927 if (sva_mask != 0) {
4928 4928 /*
4929 4929 * Okay to overwrite sargp->vap because we verify based
4930 4930 * on the incoming values.
4931 4931 */
4932 4932 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4933 4933 sargp->cs->cr, NULL);
4934 4934 if (ret_error) {
4935 4935 if (resp == NULL)
4936 4936 return (ret_error);
4937 4937 /*
4938 4938 * Must return bitmap of successful attrs
4939 4939 */
4940 4940 sva_mask = 0; /* to prevent checking vap later */
4941 4941 } else {
4942 4942 /*
4943 4943 * Some file systems clobber va_mask. it is probably
4944 4944 * wrong of them to do so, nonethless we practice
4945 4945 * defensive coding.
4946 4946 * See bug id 4276830.
4947 4947 */
4948 4948 sargp->vap->va_mask = sva_mask;
4949 4949 }
4950 4950 }
4951 4951
4952 4952 if (getsb) {
4953 4953 /*
4954 4954 * Now get the superblock and loop on the bitmap, as there is
4955 4955 * no simple way of translating from superblock to bitmap4.
4956 4956 */
4957 4957 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4958 4958 if (ret_error) {
4959 4959 if (resp == NULL)
4960 4960 goto errout;
4961 4961 getsb = FALSE;
4962 4962 }
4963 4963 }
4964 4964
4965 4965 /*
4966 4966 * Now loop and verify each attribute which getattr returned
4967 4967 * whether it's the same as the input.
4968 4968 */
4969 4969 if (resp == NULL && !getsb && (sva_mask == 0))
4970 4970 goto errout;
4971 4971
4972 4972 na = ntovp->na;
4973 4973 amap = ntovp->amap;
4974 4974 k = 0;
4975 4975 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4976 4976 k = *amap;
4977 4977 ASSERT(nfs4_ntov_map[k].nval == k);
4978 4978 vbit = nfs4_ntov_map[k].vbit;
4979 4979
4980 4980 /*
4981 4981 * If vattr attribute but VOP_GETATTR failed, or it's
4982 4982 * superblock attribute but VFS_STATVFS failed, skip
4983 4983 */
4984 4984 if (vbit) {
4985 4985 if ((vbit & sva_mask) == 0)
4986 4986 continue;
4987 4987 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4988 4988 continue;
4989 4989 }
4990 4990 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4991 4991 if (resp != NULL) {
4992 4992 if (error)
4993 4993 ret_error = -1; /* not all match */
4994 4994 else /* update response bitmap */
4995 4995 *resp |= nfs4_ntov_map[k].fbit;
4996 4996 continue;
4997 4997 }
4998 4998 if (error) {
4999 4999 ret_error = -1; /* not all match */
5000 5000 break;
5001 5001 }
5002 5002 }
5003 5003 errout:
5004 5004 return (ret_error);
5005 5005 }
5006 5006
5007 5007 /*
5008 5008 * Decode the attribute to be set/verified. If the attr requires a sys op
5009 5009 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5010 5010 * call the sv_getit function for it, because the sys op hasn't yet been done.
5011 5011 * Return 0 for success, error code if failed.
5012 5012 *
5013 5013 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5014 5014 */
5015 5015 static int
5016 5016 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5017 5017 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5018 5018 {
5019 5019 int error = 0;
5020 5020 bool_t set_later;
5021 5021
5022 5022 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5023 5023
5024 5024 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5025 5025 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5026 5026 /*
5027 5027 * don't verify yet if a vattr or sb dependent attr,
5028 5028 * because we don't have their sys values yet.
5029 5029 * Will be done later.
5030 5030 */
5031 5031 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5032 5032 /*
5033 5033 * ACLs are a special case, since setting the MODE
5034 5034 * conflicts with setting the ACL. We delay setting
5035 5035 * the ACL until all other attributes have been set.
5036 5036 * The ACL gets set in do_rfs4_op_setattr().
5037 5037 */
5038 5038 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5039 5039 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5040 5040 sargp, nap);
5041 5041 if (error) {
5042 5042 xdr_free(nfs4_ntov_map[k].xfunc,
5043 5043 (caddr_t)nap);
5044 5044 }
5045 5045 }
5046 5046 }
5047 5047 } else {
5048 5048 #ifdef DEBUG
5049 5049 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5050 5050 "decoding attribute %d\n", k);
5051 5051 #endif
5052 5052 error = EINVAL;
5053 5053 }
5054 5054 if (!error && resp_bval && !set_later) {
5055 5055 *resp_bval |= nfs4_ntov_map[k].fbit;
5056 5056 }
5057 5057
5058 5058 return (error);
5059 5059 }
5060 5060
5061 5061 /*
5062 5062 * Set vattr based on incoming fattr4 attrs - used by setattr.
5063 5063 * Set response mask. Ignore any values that are not writable vattr attrs.
5064 5064 */
5065 5065 static nfsstat4
5066 5066 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5067 5067 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5068 5068 nfs4_attr_cmd_t cmd)
5069 5069 {
5070 5070 int error = 0;
5071 5071 int i;
5072 5072 char *attrs = fattrp->attrlist4;
5073 5073 uint32_t attrslen = fattrp->attrlist4_len;
5074 5074 XDR xdr;
5075 5075 nfsstat4 status = NFS4_OK;
5076 5076 vnode_t *vp = cs->vp;
5077 5077 union nfs4_attr_u *na;
5078 5078 uint8_t *amap;
5079 5079
5080 5080 #ifndef lint
5081 5081 /*
5082 5082 * Make sure that maximum attribute number can be expressed as an
5083 5083 * 8 bit quantity.
5084 5084 */
5085 5085 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5086 5086 #endif
5087 5087
5088 5088 if (vp == NULL) {
5089 5089 if (resp)
5090 5090 *resp = 0;
5091 5091 return (NFS4ERR_NOFILEHANDLE);
5092 5092 }
5093 5093 if (cs->access == CS_ACCESS_DENIED) {
5094 5094 if (resp)
5095 5095 *resp = 0;
5096 5096 return (NFS4ERR_ACCESS);
5097 5097 }
5098 5098
5099 5099 sargp->op = cmd;
5100 5100 sargp->cs = cs;
5101 5101 sargp->flag = 0; /* may be set later */
5102 5102 sargp->vap->va_mask = 0;
5103 5103 sargp->rdattr_error = NFS4_OK;
5104 5104 sargp->rdattr_error_req = FALSE;
5105 5105 /* sargp->sbp is set by the caller */
5106 5106
5107 5107 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5108 5108
5109 5109 na = ntovp->na;
5110 5110 amap = ntovp->amap;
5111 5111
5112 5112 /*
5113 5113 * The following loop iterates on the nfs4_ntov_map checking
5114 5114 * if the fbit is set in the requested bitmap.
5115 5115 * If set then we process the arguments using the
5116 5116 * rfs4_fattr4 conversion functions to populate the setattr
5117 5117 * vattr and va_mask. Any settable attrs that are not using vattr
5118 5118 * will be set in this loop.
5119 5119 */
5120 5120 for (i = 0; i < nfs4_ntov_map_size; i++) {
5121 5121 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5122 5122 continue;
5123 5123 }
5124 5124 /*
5125 5125 * If setattr, must be a writable attr.
5126 5126 * If verify/nverify, must be a readable attr.
5127 5127 */
5128 5128 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5129 5129 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5130 5130 /*
5131 5131 * Client tries to set/verify an
5132 5132 * unsupported attribute, tries to set
5133 5133 * a read only attr or verify a write
5134 5134 * only one - error!
5135 5135 */
5136 5136 break;
5137 5137 }
5138 5138 /*
5139 5139 * Decode the attribute to set/verify
5140 5140 */
5141 5141 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5142 5142 &xdr, resp ? resp : NULL, na);
5143 5143 if (error)
5144 5144 break;
5145 5145 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5146 5146 na++;
5147 5147 (ntovp->attrcnt)++;
5148 5148 if (nfs4_ntov_map[i].vfsstat)
5149 5149 ntovp->vfsstat = TRUE;
5150 5150 }
5151 5151
5152 5152 if (error != 0)
5153 5153 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5154 5154 puterrno4(error));
5155 5155 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5156 5156 return (status);
5157 5157 }
5158 5158
5159 5159 static nfsstat4
5160 5160 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5161 5161 stateid4 *stateid)
5162 5162 {
5163 5163 int error = 0;
5164 5164 struct nfs4_svgetit_arg sarg;
5165 5165 bool_t trunc;
5166 5166
5167 5167 nfsstat4 status = NFS4_OK;
5168 5168 cred_t *cr = cs->cr;
5169 5169 vnode_t *vp = cs->vp;
5170 5170 struct nfs4_ntov_table ntov;
5171 5171 struct statvfs64 sb;
5172 5172 struct vattr bva;
5173 5173 struct flock64 bf;
5174 5174 int in_crit = 0;
5175 5175 uint_t saved_mask = 0;
5176 5176 caller_context_t ct;
5177 5177
5178 5178 *resp = 0;
5179 5179 sarg.sbp = &sb;
5180 5180 sarg.is_referral = B_FALSE;
5181 5181 nfs4_ntov_table_init(&ntov);
5182 5182 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5183 5183 NFS4ATTR_SETIT);
5184 5184 if (status != NFS4_OK) {
5185 5185 /*
5186 5186 * failed set attrs
5187 5187 */
5188 5188 goto done;
5189 5189 }
5190 5190 if ((sarg.vap->va_mask == 0) &&
5191 5191 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5192 5192 /*
5193 5193 * no further work to be done
5194 5194 */
5195 5195 goto done;
5196 5196 }
5197 5197
5198 5198 /*
5199 5199 * If we got a request to set the ACL and the MODE, only
5200 5200 * allow changing VSUID, VSGID, and VSVTX. Attempting
5201 5201 * to change any other bits, along with setting an ACL,
5202 5202 * gives NFS4ERR_INVAL.
5203 5203 */
5204 5204 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5205 5205 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5206 5206 vattr_t va;
5207 5207
5208 5208 va.va_mask = AT_MODE;
5209 5209 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5210 5210 if (error) {
5211 5211 status = puterrno4(error);
5212 5212 goto done;
5213 5213 }
5214 5214 if ((sarg.vap->va_mode ^ va.va_mode) &
5215 5215 ~(VSUID | VSGID | VSVTX)) {
5216 5216 status = NFS4ERR_INVAL;
5217 5217 goto done;
5218 5218 }
5219 5219 }
5220 5220
5221 5221 /* Check stateid only if size has been set */
5222 5222 if (sarg.vap->va_mask & AT_SIZE) {
5223 5223 trunc = (sarg.vap->va_size == 0);
5224 5224 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5225 5225 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5226 5226 if (status != NFS4_OK)
5227 5227 goto done;
5228 5228 } else {
5229 5229 ct.cc_sysid = 0;
5230 5230 ct.cc_pid = 0;
5231 5231 ct.cc_caller_id = nfs4_srv_caller_id;
5232 5232 ct.cc_flags = CC_DONTBLOCK;
5233 5233 }
5234 5234
5235 5235 /* XXX start of possible race with delegations */
5236 5236
5237 5237 /*
5238 5238 * We need to specially handle size changes because it is
5239 5239 * possible for the client to create a file with read-only
5240 5240 * modes, but with the file opened for writing. If the client
5241 5241 * then tries to set the file size, e.g. ftruncate(3C),
5242 5242 * fcntl(F_FREESP), the normal access checking done in
5243 5243 * VOP_SETATTR would prevent the client from doing it even though
5244 5244 * it should be allowed to do so. To get around this, we do the
5245 5245 * access checking for ourselves and use VOP_SPACE which doesn't
5246 5246 * do the access checking.
5247 5247 * Also the client should not be allowed to change the file
5248 5248 * size if there is a conflicting non-blocking mandatory lock in
5249 5249 * the region of the change.
5250 5250 */
5251 5251 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5252 5252 u_offset_t offset;
5253 5253 ssize_t length;
5254 5254
5255 5255 /*
5256 5256 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5257 5257 * before returning, sarg.vap->va_mask is used to
5258 5258 * generate the setattr reply bitmap. We also clear
5259 5259 * AT_SIZE below before calling VOP_SPACE. For both
5260 5260 * of these cases, the va_mask needs to be saved here
5261 5261 * and restored after calling VOP_SETATTR.
5262 5262 */
5263 5263 saved_mask = sarg.vap->va_mask;
5264 5264
5265 5265 /*
5266 5266 * Check any possible conflict due to NBMAND locks.
5267 5267 * Get into critical region before VOP_GETATTR, so the
5268 5268 * size attribute is valid when checking conflicts.
5269 5269 */
5270 5270 if (nbl_need_check(vp)) {
5271 5271 nbl_start_crit(vp, RW_READER);
5272 5272 in_crit = 1;
5273 5273 }
5274 5274
5275 5275 bva.va_mask = AT_UID|AT_SIZE;
5276 5276 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5277 5277 status = puterrno4(error);
5278 5278 goto done;
5279 5279 }
5280 5280
5281 5281 if (in_crit) {
5282 5282 if (sarg.vap->va_size < bva.va_size) {
5283 5283 offset = sarg.vap->va_size;
5284 5284 length = bva.va_size - sarg.vap->va_size;
5285 5285 } else {
5286 5286 offset = bva.va_size;
5287 5287 length = sarg.vap->va_size - bva.va_size;
5288 5288 }
5289 5289 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5290 5290 &ct)) {
5291 5291 status = NFS4ERR_LOCKED;
5292 5292 goto done;
5293 5293 }
5294 5294 }
5295 5295
5296 5296 if (crgetuid(cr) == bva.va_uid) {
5297 5297 sarg.vap->va_mask &= ~AT_SIZE;
5298 5298 bf.l_type = F_WRLCK;
5299 5299 bf.l_whence = 0;
5300 5300 bf.l_start = (off64_t)sarg.vap->va_size;
5301 5301 bf.l_len = 0;
5302 5302 bf.l_sysid = 0;
5303 5303 bf.l_pid = 0;
5304 5304 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5305 5305 (offset_t)sarg.vap->va_size, cr, &ct);
5306 5306 }
5307 5307 }
5308 5308
5309 5309 if (!error && sarg.vap->va_mask != 0)
5310 5310 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5311 5311
5312 5312 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5313 5313 if (saved_mask & AT_SIZE)
5314 5314 sarg.vap->va_mask |= AT_SIZE;
5315 5315
5316 5316 /*
5317 5317 * If an ACL was being set, it has been delayed until now,
5318 5318 * in order to set the mode (via the VOP_SETATTR() above) first.
5319 5319 */
5320 5320 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5321 5321 int i;
5322 5322
5323 5323 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5324 5324 if (ntov.amap[i] == FATTR4_ACL)
5325 5325 break;
5326 5326 if (i < NFS4_MAXNUM_ATTRS) {
5327 5327 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5328 5328 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5329 5329 if (error == 0) {
5330 5330 *resp |= FATTR4_ACL_MASK;
5331 5331 } else if (error == ENOTSUP) {
5332 5332 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5333 5333 status = NFS4ERR_ATTRNOTSUPP;
5334 5334 goto done;
5335 5335 }
5336 5336 } else {
5337 5337 NFS4_DEBUG(rfs4_debug,
5338 5338 (CE_NOTE, "do_rfs4_op_setattr: "
5339 5339 "unable to find ACL in fattr4"));
5340 5340 error = EINVAL;
5341 5341 }
5342 5342 }
5343 5343
5344 5344 if (error) {
5345 5345 /* check if a monitor detected a delegation conflict */
5346 5346 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5347 5347 status = NFS4ERR_DELAY;
5348 5348 else
5349 5349 status = puterrno4(error);
5350 5350
5351 5351 /*
5352 5352 * Set the response bitmap when setattr failed.
5353 5353 * If VOP_SETATTR partially succeeded, test by doing a
5354 5354 * VOP_GETATTR on the object and comparing the data
5355 5355 * to the setattr arguments.
5356 5356 */
5357 5357 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5358 5358 } else {
5359 5359 /*
5360 5360 * Force modified metadata out to stable storage.
5361 5361 */
5362 5362 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5363 5363 /*
5364 5364 * Set response bitmap
5365 5365 */
5366 5366 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5367 5367 }
5368 5368
5369 5369 /* Return early and already have a NFSv4 error */
5370 5370 done:
5371 5371 /*
5372 5372 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5373 5373 * conversion sets both readable and writeable NFS4 attrs
5374 5374 * for AT_MTIME and AT_ATIME. The line below masks out
5375 5375 * unrequested attrs from the setattr result bitmap. This
5376 5376 * is placed after the done: label to catch the ATTRNOTSUP
5377 5377 * case.
5378 5378 */
5379 5379 *resp &= fattrp->attrmask;
5380 5380
5381 5381 if (in_crit)
5382 5382 nbl_end_crit(vp);
5383 5383
5384 5384 nfs4_ntov_table_free(&ntov, &sarg);
5385 5385
5386 5386 return (status);
5387 5387 }
5388 5388
5389 5389 /* ARGSUSED */
5390 5390 static void
5391 5391 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5392 5392 struct compound_state *cs)
5393 5393 {
5394 5394 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5395 5395 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5396 5396 bslabel_t *clabel;
5397 5397
5398 5398 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5399 5399 SETATTR4args *, args);
5400 5400
5401 5401 if (cs->vp == NULL) {
5402 5402 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5403 5403 goto out;
5404 5404 }
5405 5405
5406 5406 /*
5407 5407 * If there is an unshared filesystem mounted on this vnode,
5408 5408 * do not allow to setattr on this vnode.
5409 5409 */
5410 5410 if (vn_ismntpt(cs->vp)) {
5411 5411 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5412 5412 goto out;
5413 5413 }
5414 5414
5415 5415 resp->attrsset = 0;
5416 5416
5417 5417 if (rdonly4(req, cs)) {
5418 5418 *cs->statusp = resp->status = NFS4ERR_ROFS;
5419 5419 goto out;
5420 5420 }
5421 5421
5422 5422 /* check label before setting attributes */
5423 5423 if (is_system_labeled()) {
5424 5424 ASSERT(req->rq_label != NULL);
5425 5425 clabel = req->rq_label;
5426 5426 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5427 5427 "got client label from request(1)",
5428 5428 struct svc_req *, req);
5429 5429 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5430 5430 if (!do_rfs_label_check(clabel, cs->vp,
5431 5431 EQUALITY_CHECK, cs->exi)) {
5432 5432 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5433 5433 goto out;
5434 5434 }
5435 5435 }
5436 5436 }
5437 5437
5438 5438 *cs->statusp = resp->status =
5439 5439 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5440 5440 &args->stateid);
5441 5441
5442 5442 out:
5443 5443 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5444 5444 SETATTR4res *, resp);
5445 5445 }
5446 5446
5447 5447 /* ARGSUSED */
5448 5448 static void
5449 5449 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5450 5450 struct compound_state *cs)
5451 5451 {
5452 5452 /*
5453 5453 * verify and nverify are exactly the same, except that nverify
5454 5454 * succeeds when some argument changed, and verify succeeds when
5455 5455 * when none changed.
5456 5456 */
5457 5457
5458 5458 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5459 5459 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5460 5460
5461 5461 int error;
5462 5462 struct nfs4_svgetit_arg sarg;
5463 5463 struct statvfs64 sb;
5464 5464 struct nfs4_ntov_table ntov;
5465 5465
5466 5466 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5467 5467 VERIFY4args *, args);
5468 5468
5469 5469 if (cs->vp == NULL) {
5470 5470 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5471 5471 goto out;
5472 5472 }
5473 5473
5474 5474 sarg.sbp = &sb;
5475 5475 sarg.is_referral = B_FALSE;
5476 5476 nfs4_ntov_table_init(&ntov);
5477 5477 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5478 5478 &sarg, &ntov, NFS4ATTR_VERIT);
5479 5479 if (resp->status != NFS4_OK) {
5480 5480 /*
5481 5481 * do_rfs4_set_attrs will try to verify systemwide attrs,
5482 5482 * so could return -1 for "no match".
5483 5483 */
5484 5484 if (resp->status == -1)
5485 5485 resp->status = NFS4ERR_NOT_SAME;
5486 5486 goto done;
5487 5487 }
5488 5488 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5489 5489 switch (error) {
5490 5490 case 0:
5491 5491 resp->status = NFS4_OK;
5492 5492 break;
5493 5493 case -1:
5494 5494 resp->status = NFS4ERR_NOT_SAME;
5495 5495 break;
5496 5496 default:
5497 5497 resp->status = puterrno4(error);
5498 5498 break;
5499 5499 }
5500 5500 done:
5501 5501 *cs->statusp = resp->status;
5502 5502 nfs4_ntov_table_free(&ntov, &sarg);
5503 5503 out:
5504 5504 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5505 5505 VERIFY4res *, resp);
5506 5506 }
5507 5507
5508 5508 /* ARGSUSED */
5509 5509 static void
5510 5510 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5511 5511 struct compound_state *cs)
5512 5512 {
5513 5513 /*
5514 5514 * verify and nverify are exactly the same, except that nverify
5515 5515 * succeeds when some argument changed, and verify succeeds when
5516 5516 * when none changed.
5517 5517 */
5518 5518
5519 5519 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5520 5520 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5521 5521
5522 5522 int error;
5523 5523 struct nfs4_svgetit_arg sarg;
5524 5524 struct statvfs64 sb;
5525 5525 struct nfs4_ntov_table ntov;
5526 5526
5527 5527 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5528 5528 NVERIFY4args *, args);
5529 5529
5530 5530 if (cs->vp == NULL) {
5531 5531 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5532 5532 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5533 5533 NVERIFY4res *, resp);
5534 5534 return;
5535 5535 }
5536 5536 sarg.sbp = &sb;
5537 5537 sarg.is_referral = B_FALSE;
5538 5538 nfs4_ntov_table_init(&ntov);
5539 5539 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5540 5540 &sarg, &ntov, NFS4ATTR_VERIT);
5541 5541 if (resp->status != NFS4_OK) {
5542 5542 /*
5543 5543 * do_rfs4_set_attrs will try to verify systemwide attrs,
5544 5544 * so could return -1 for "no match".
5545 5545 */
5546 5546 if (resp->status == -1)
5547 5547 resp->status = NFS4_OK;
5548 5548 goto done;
5549 5549 }
5550 5550 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5551 5551 switch (error) {
5552 5552 case 0:
5553 5553 resp->status = NFS4ERR_SAME;
5554 5554 break;
5555 5555 case -1:
5556 5556 resp->status = NFS4_OK;
5557 5557 break;
5558 5558 default:
5559 5559 resp->status = puterrno4(error);
5560 5560 break;
5561 5561 }
5562 5562 done:
5563 5563 *cs->statusp = resp->status;
5564 5564 nfs4_ntov_table_free(&ntov, &sarg);
5565 5565
5566 5566 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5567 5567 NVERIFY4res *, resp);
5568 5568 }
5569 5569
5570 5570 /*
5571 5571 * XXX - This should live in an NFS header file.
5572 5572 */
5573 5573 #define MAX_IOVECS 12
5574 5574
5575 5575 /* ARGSUSED */
5576 5576 static void
5577 5577 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5578 5578 struct compound_state *cs)
5579 5579 {
5580 5580 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5581 5581 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5582 5582 int error;
5583 5583 vnode_t *vp;
5584 5584 struct vattr bva;
5585 5585 u_offset_t rlimit;
5586 5586 struct uio uio;
5587 5587 struct iovec iov[MAX_IOVECS];
5588 5588 struct iovec *iovp;
5589 5589 int iovcnt;
5590 5590 int ioflag;
5591 5591 cred_t *savecred, *cr;
5592 5592 bool_t *deleg = &cs->deleg;
5593 5593 nfsstat4 stat;
5594 5594 int in_crit = 0;
5595 5595 caller_context_t ct;
5596 5596
5597 5597 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5598 5598 WRITE4args *, args);
5599 5599
5600 5600 vp = cs->vp;
5601 5601 if (vp == NULL) {
5602 5602 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5603 5603 goto out;
5604 5604 }
5605 5605 if (cs->access == CS_ACCESS_DENIED) {
5606 5606 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5607 5607 goto out;
5608 5608 }
5609 5609
5610 5610 cr = cs->cr;
5611 5611
5612 5612 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5613 5613 deleg, TRUE, &ct)) != NFS4_OK) {
5614 5614 *cs->statusp = resp->status = stat;
5615 5615 goto out;
5616 5616 }
5617 5617
5618 5618 /*
5619 5619 * We have to enter the critical region before calling VOP_RWLOCK
5620 5620 * to avoid a deadlock with ufs.
5621 5621 */
5622 5622 if (nbl_need_check(vp)) {
5623 5623 nbl_start_crit(vp, RW_READER);
5624 5624 in_crit = 1;
5625 5625 if (nbl_conflict(vp, NBL_WRITE,
5626 5626 args->offset, args->data_len, 0, &ct)) {
5627 5627 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5628 5628 goto out;
5629 5629 }
5630 5630 }
5631 5631
5632 5632 bva.va_mask = AT_MODE | AT_UID;
5633 5633 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5634 5634
5635 5635 /*
5636 5636 * If we can't get the attributes, then we can't do the
5637 5637 * right access checking. So, we'll fail the request.
5638 5638 */
5639 5639 if (error) {
5640 5640 *cs->statusp = resp->status = puterrno4(error);
5641 5641 goto out;
5642 5642 }
5643 5643
5644 5644 if (rdonly4(req, cs)) {
5645 5645 *cs->statusp = resp->status = NFS4ERR_ROFS;
5646 5646 goto out;
5647 5647 }
5648 5648
5649 5649 if (vp->v_type != VREG) {
5650 5650 *cs->statusp = resp->status =
5651 5651 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5652 5652 goto out;
5653 5653 }
5654 5654
5655 5655 if (crgetuid(cr) != bva.va_uid &&
5656 5656 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5657 5657 *cs->statusp = resp->status = puterrno4(error);
5658 5658 goto out;
5659 5659 }
5660 5660
5661 5661 if (MANDLOCK(vp, bva.va_mode)) {
5662 5662 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5663 5663 goto out;
5664 5664 }
5665 5665
5666 5666 if (args->data_len == 0) {
5667 5667 *cs->statusp = resp->status = NFS4_OK;
5668 5668 resp->count = 0;
5669 5669 resp->committed = args->stable;
5670 5670 resp->writeverf = Write4verf;
5671 5671 goto out;
5672 5672 }
5673 5673
5674 5674 if (args->mblk != NULL) {
5675 5675 mblk_t *m;
5676 5676 uint_t bytes, round_len;
5677 5677
5678 5678 iovcnt = 0;
5679 5679 bytes = 0;
5680 5680 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5681 5681 for (m = args->mblk;
5682 5682 m != NULL && bytes < round_len;
5683 5683 m = m->b_cont) {
5684 5684 iovcnt++;
5685 5685 bytes += MBLKL(m);
5686 5686 }
5687 5687 #ifdef DEBUG
5688 5688 /* should have ended on an mblk boundary */
5689 5689 if (bytes != round_len) {
5690 5690 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5691 5691 bytes, round_len, args->data_len);
5692 5692 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5693 5693 (void *)args->mblk, (void *)m);
5694 5694 ASSERT(bytes == round_len);
5695 5695 }
5696 5696 #endif
5697 5697 if (iovcnt <= MAX_IOVECS) {
5698 5698 iovp = iov;
5699 5699 } else {
5700 5700 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5701 5701 }
5702 5702 mblk_to_iov(args->mblk, iovcnt, iovp);
5703 5703 } else if (args->rlist != NULL) {
5704 5704 iovcnt = 1;
5705 5705 iovp = iov;
5706 5706 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5707 5707 iovp->iov_len = args->data_len;
5708 5708 } else {
5709 5709 iovcnt = 1;
5710 5710 iovp = iov;
5711 5711 iovp->iov_base = args->data_val;
5712 5712 iovp->iov_len = args->data_len;
5713 5713 }
5714 5714
5715 5715 uio.uio_iov = iovp;
5716 5716 uio.uio_iovcnt = iovcnt;
5717 5717
5718 5718 uio.uio_segflg = UIO_SYSSPACE;
5719 5719 uio.uio_extflg = UIO_COPY_DEFAULT;
5720 5720 uio.uio_loffset = args->offset;
5721 5721 uio.uio_resid = args->data_len;
5722 5722 uio.uio_llimit = curproc->p_fsz_ctl;
5723 5723 rlimit = uio.uio_llimit - args->offset;
5724 5724 if (rlimit < (u_offset_t)uio.uio_resid)
5725 5725 uio.uio_resid = (int)rlimit;
5726 5726
5727 5727 if (args->stable == UNSTABLE4)
5728 5728 ioflag = 0;
5729 5729 else if (args->stable == FILE_SYNC4)
5730 5730 ioflag = FSYNC;
5731 5731 else if (args->stable == DATA_SYNC4)
5732 5732 ioflag = FDSYNC;
5733 5733 else {
5734 5734 if (iovp != iov)
5735 5735 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5736 5736 *cs->statusp = resp->status = NFS4ERR_INVAL;
5737 5737 goto out;
5738 5738 }
5739 5739
5740 5740 /*
5741 5741 * We're changing creds because VM may fault and we need
5742 5742 * the cred of the current thread to be used if quota
5743 5743 * checking is enabled.
5744 5744 */
5745 5745 savecred = curthread->t_cred;
5746 5746 curthread->t_cred = cr;
5747 5747 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5748 5748 curthread->t_cred = savecred;
5749 5749
5750 5750 if (iovp != iov)
5751 5751 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5752 5752
5753 5753 if (error) {
5754 5754 *cs->statusp = resp->status = puterrno4(error);
5755 5755 goto out;
5756 5756 }
5757 5757
5758 5758 *cs->statusp = resp->status = NFS4_OK;
5759 5759 resp->count = args->data_len - uio.uio_resid;
5760 5760
5761 5761 if (ioflag == 0)
5762 5762 resp->committed = UNSTABLE4;
5763 5763 else
5764 5764 resp->committed = FILE_SYNC4;
5765 5765
5766 5766 resp->writeverf = Write4verf;
5767 5767
5768 5768 out:
5769 5769 if (in_crit)
5770 5770 nbl_end_crit(vp);
5771 5771
5772 5772 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5773 5773 WRITE4res *, resp);
5774 5774 }
5775 5775
5776 5776
5777 5777 /* XXX put in a header file */
5778 5778 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5779 5779
5780 5780 void
5781 5781 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5782 5782 struct svc_req *req, cred_t *cr, int *rv)
5783 5783 {
5784 5784 uint_t i;
5785 5785 struct compound_state cs;
5786 5786
5787 5787 if (rv != NULL)
5788 5788 *rv = 0;
5789 5789 rfs4_init_compound_state(&cs);
5790 5790 /*
5791 5791 * Form a reply tag by copying over the reqeuest tag.
5792 5792 */
5793 5793 resp->tag.utf8string_val =
5794 5794 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5795 5795 resp->tag.utf8string_len = args->tag.utf8string_len;
5796 5796 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5797 5797 resp->tag.utf8string_len);
5798 5798
5799 5799 cs.statusp = &resp->status;
5800 5800 cs.req = req;
5801 5801 resp->array = NULL;
5802 5802 resp->array_len = 0;
5803 5803
5804 5804 /*
5805 5805 * XXX for now, minorversion should be zero
5806 5806 */
5807 5807 if (args->minorversion != NFS4_MINORVERSION) {
5808 5808 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5809 5809 &cs, COMPOUND4args *, args);
5810 5810 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5811 5811 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5812 5812 &cs, COMPOUND4res *, resp);
5813 5813 return;
5814 5814 }
5815 5815
5816 5816 if (args->array_len == 0) {
5817 5817 resp->status = NFS4_OK;
5818 5818 return;
5819 5819 }
5820 5820
5821 5821 ASSERT(exi == NULL);
5822 5822 ASSERT(cr == NULL);
5823 5823
5824 5824 cr = crget();
5825 5825 ASSERT(cr != NULL);
5826 5826
5827 5827 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5828 5828 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5829 5829 &cs, COMPOUND4args *, args);
5830 5830 crfree(cr);
5831 5831 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5832 5832 &cs, COMPOUND4res *, resp);
5833 5833 svcerr_badcred(req->rq_xprt);
5834 5834 if (rv != NULL)
5835 5835 *rv = 1;
5836 5836 return;
5837 5837 }
5838 5838 resp->array_len = args->array_len;
5839 5839 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5840 5840 KM_SLEEP);
5841 5841
5842 5842 cs.basecr = cr;
5843 5843
5844 5844 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5845 5845 COMPOUND4args *, args);
5846 5846
5847 5847 /*
5848 5848 * For now, NFS4 compound processing must be protected by
5849 5849 * exported_lock because it can access more than one exportinfo
5850 5850 * per compound and share/unshare can now change multiple
5851 5851 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5852 5852 * per proc (excluding public exinfo), and exi_count design
5853 5853 * is sufficient to protect concurrent execution of NFS2/3
5854 5854 * ops along with unexport. This lock will be removed as
5855 5855 * part of the NFSv4 phase 2 namespace redesign work.
5856 5856 */
5857 5857 rw_enter(&exported_lock, RW_READER);
5858 5858
5859 5859 /*
5860 5860 * If this is the first compound we've seen, we need to start all
5861 5861 * new instances' grace periods.
5862 5862 */
5863 5863 if (rfs4_seen_first_compound == 0) {
5864 5864 rfs4_grace_start_new();
5865 5865 /*
5866 5866 * This must be set after rfs4_grace_start_new(), otherwise
5867 5867 * another thread could proceed past here before the former
5868 5868 * is finished.
5869 5869 */
5870 5870 rfs4_seen_first_compound = 1;
5871 5871 }
5872 5872
5873 5873 for (i = 0; i < args->array_len && cs.cont; i++) {
5874 5874 nfs_argop4 *argop;
5875 5875 nfs_resop4 *resop;
5876 5876 uint_t op;
5877 5877
5878 5878 argop = &args->array[i];
5879 5879 resop = &resp->array[i];
5880 5880 resop->resop = argop->argop;
5881 5881 op = (uint_t)resop->resop;
5882 5882
5883 5883 if (op < rfsv4disp_cnt) {
5884 5884 kstat_t *ksp = rfsprocio_v4_ptr[op];
5885 5885 kstat_t *exi_ksp = NULL;
5886 5886
5887 5887 /*
5888 5888 * Count the individual ops here; NULL and COMPOUND
5889 5889 * are counted in common_dispatch()
5890 5890 */
5891 5891 rfsproccnt_v4_ptr[op].value.ui64++;
5892 5892
5893 5893 if (ksp != NULL) {
5894 5894 mutex_enter(ksp->ks_lock);
5895 5895 kstat_runq_enter(KSTAT_IO_PTR(ksp));
5896 5896 mutex_exit(ksp->ks_lock);
5897 5897 }
5898 5898
5899 5899 switch (rfsv4disptab[op].op_type) {
5900 5900 case NFS4_OP_CFH:
5901 5901 resop->exi = cs.exi;
5902 5902 break;
5903 5903 case NFS4_OP_SFH:
5904 5904 resop->exi = cs.saved_exi;
5905 5905 break;
5906 5906 default:
5907 5907 ASSERT(resop->exi == NULL);
5908 5908 break;
5909 5909 }
5910 5910
5911 5911 if (resop->exi != NULL) {
5912 5912 exi_ksp = resop->exi->exi_kstats->
5913 5913 rfsprocio_v4_ptr[op];
5914 5914 if (exi_ksp != NULL) {
5915 5915 mutex_enter(exi_ksp->ks_lock);
5916 5916 kstat_runq_enter(KSTAT_IO_PTR(exi_ksp));
5917 5917 mutex_exit(exi_ksp->ks_lock);
5918 5918 }
5919 5919 }
5920 5920
5921 5921 NFS4_DEBUG(rfs4_debug > 1,
5922 5922 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5923 5923 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5924 5924 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5925 5925 rfs4_op_string[op], *cs.statusp));
5926 5926 if (*cs.statusp != NFS4_OK)
5927 5927 cs.cont = FALSE;
5928 5928
5929 5929 if (rfsv4disptab[op].op_type == NFS4_OP_POSTCFH &&
5930 5930 *cs.statusp == NFS4_OK &&
5931 5931 (resop->exi = cs.exi) != NULL) {
5932 5932 exi_ksp = resop->exi->exi_kstats->
5933 5933 rfsprocio_v4_ptr[op];
5934 5934 }
5935 5935
5936 5936 if (exi_ksp != NULL) {
5937 5937 mutex_enter(exi_ksp->ks_lock);
5938 5938 KSTAT_IO_PTR(exi_ksp)->nwritten +=
5939 5939 argop->opsize;
5940 5940 KSTAT_IO_PTR(exi_ksp)->writes++;
5941 5941 if (rfsv4disptab[op].op_type != NFS4_OP_POSTCFH)
5942 5942 kstat_runq_exit(KSTAT_IO_PTR(exi_ksp));
5943 5943 mutex_exit(exi_ksp->ks_lock);
5944 5944
5945 5945 exi_hold(resop->exi);
5946 5946 } else {
5947 5947 resop->exi = NULL;
5948 5948 }
5949 5949
5950 5950 if (ksp != NULL) {
5951 5951 mutex_enter(ksp->ks_lock);
5952 5952 kstat_runq_exit(KSTAT_IO_PTR(ksp));
5953 5953 mutex_exit(ksp->ks_lock);
5954 5954 }
5955 5955 } else {
5956 5956 /*
5957 5957 * This is effectively dead code since XDR code
5958 5958 * will have already returned BADXDR if op doesn't
5959 5959 * decode to legal value. This only done for a
5960 5960 * day when XDR code doesn't verify v4 opcodes.
5961 5961 */
5962 5962 op = OP_ILLEGAL;
5963 5963 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5964 5964
5965 5965 rfs4_op_illegal(argop, resop, req, &cs);
5966 5966 cs.cont = FALSE;
5967 5967 }
5968 5968
5969 5969 /*
5970 5970 * If not at last op, and if we are to stop, then
5971 5971 * compact the results array.
5972 5972 */
5973 5973 if ((i + 1) < args->array_len && !cs.cont) {
5974 5974 nfs_resop4 *new_res = kmem_alloc(
5975 5975 (i + 1) * sizeof (nfs_resop4), KM_SLEEP);
5976 5976 bcopy(resp->array,
5977 5977 new_res, (i + 1) * sizeof (nfs_resop4));
5978 5978 kmem_free(resp->array,
5979 5979 args->array_len * sizeof (nfs_resop4));
5980 5980
5981 5981 resp->array_len = i + 1;
5982 5982 resp->array = new_res;
5983 5983 }
5984 5984 }
5985 5985
5986 5986 rw_exit(&exported_lock);
5987 5987
5988 5988 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5989 5989 COMPOUND4res *, resp);
5990 5990
5991 5991 if (cs.vp)
5992 5992 VN_RELE(cs.vp);
5993 5993 if (cs.saved_vp)
5994 5994 VN_RELE(cs.saved_vp);
5995 5995 if (cs.saved_fh.nfs_fh4_val)
5996 5996 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5997 5997
5998 5998 if (cs.basecr)
5999 5999 crfree(cs.basecr);
6000 6000 if (cs.cr)
6001 6001 crfree(cs.cr);
6002 6002 /*
6003 6003 * done with this compound request, free the label
6004 6004 */
6005 6005
6006 6006 if (req->rq_label != NULL) {
6007 6007 kmem_free(req->rq_label, sizeof (bslabel_t));
6008 6008 req->rq_label = NULL;
6009 6009 }
6010 6010 }
6011 6011
6012 6012 /*
6013 6013 * XXX because of what appears to be duplicate calls to rfs4_compound_free
6014 6014 * XXX zero out the tag and array values. Need to investigate why the
6015 6015 * XXX calls occur, but at least prevent the panic for now.
6016 6016 */
6017 6017 void
6018 6018 rfs4_compound_free(COMPOUND4res *resp)
6019 6019 {
6020 6020 uint_t i;
6021 6021
6022 6022 if (resp->tag.utf8string_val) {
6023 6023 UTF8STRING_FREE(resp->tag)
6024 6024 }
6025 6025
6026 6026 for (i = 0; i < resp->array_len; i++) {
6027 6027 nfs_resop4 *resop;
6028 6028 uint_t op;
6029 6029
6030 6030 resop = &resp->array[i];
6031 6031 op = (uint_t)resop->resop;
6032 6032 if (op < rfsv4disp_cnt) {
6033 6033 (*rfsv4disptab[op].dis_resfree)(resop);
6034 6034 }
6035 6035 }
6036 6036 if (resp->array != NULL) {
6037 6037 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6038 6038 }
6039 6039 }
6040 6040
6041 6041 /*
6042 6042 * Process the value of the compound request rpc flags, as a bit-AND
6043 6043 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6044 6044 */
6045 6045 void
6046 6046 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6047 6047 {
6048 6048 int i;
6049 6049 int flag = RPC_ALL;
6050 6050
6051 6051 for (i = 0; flag && i < args->array_len; i++) {
6052 6052 uint_t op;
6053 6053
6054 6054 op = (uint_t)args->array[i].argop;
6055 6055
6056 6056 if (op < rfsv4disp_cnt)
6057 6057 flag &= rfsv4disptab[op].dis_flags;
6058 6058 else
6059 6059 flag = 0;
6060 6060 }
6061 6061 *flagp = flag;
6062 6062 }
6063 6063
6064 6064 void
6065 6065 rfs4_compound_kstat_args(COMPOUND4args *args)
6066 6066 {
6067 6067 int i;
6068 6068
6069 6069 for (i = 0; i < args->array_len; i++) {
6070 6070 uint_t op = (uint_t)args->array[i].argop;
6071 6071
6072 6072 if (op < rfsv4disp_cnt) {
6073 6073 kstat_t *ksp = rfsprocio_v4_ptr[op];
6074 6074
6075 6075 if (ksp != NULL) {
6076 6076 mutex_enter(ksp->ks_lock);
6077 6077 KSTAT_IO_PTR(ksp)->nwritten +=
6078 6078 args->array[i].opsize;
6079 6079 KSTAT_IO_PTR(ksp)->writes++;
6080 6080 mutex_exit(ksp->ks_lock);
6081 6081 }
6082 6082 }
6083 6083 }
6084 6084 }
6085 6085
6086 6086 void
6087 6087 rfs4_compound_kstat_res(COMPOUND4res *res)
6088 6088 {
6089 6089 int i;
6090 6090
6091 6091 for (i = 0; i < res->array_len; i++) {
6092 6092 uint_t op = (uint_t)res->array[i].resop;
6093 6093
6094 6094 if (op < rfsv4disp_cnt) {
6095 6095 kstat_t *ksp = rfsprocio_v4_ptr[op];
6096 6096 struct exportinfo *exi = res->array[i].exi;
6097 6097
6098 6098 if (ksp != NULL) {
6099 6099 mutex_enter(ksp->ks_lock);
6100 6100 KSTAT_IO_PTR(ksp)->nread +=
6101 6101 res->array[i].opsize;
6102 6102 KSTAT_IO_PTR(ksp)->reads++;
6103 6103 mutex_exit(ksp->ks_lock);
6104 6104 }
6105 6105
6106 6106 if (exi != NULL) {
6107 6107 kstat_t *exi_ksp;
6108 6108
6109 6109 rw_enter(&exported_lock, RW_READER);
6110 6110
6111 6111 exi_ksp = exi->exi_kstats->rfsprocio_v4_ptr[op];
6112 6112 if (exi_ksp != NULL) {
6113 6113 mutex_enter(exi_ksp->ks_lock);
6114 6114 KSTAT_IO_PTR(exi_ksp)->nread +=
6115 6115 res->array[i].opsize;
6116 6116 KSTAT_IO_PTR(exi_ksp)->reads++;
6117 6117 mutex_exit(exi_ksp->ks_lock);
6118 6118 }
6119 6119
6120 6120 rw_exit(&exported_lock);
6121 6121
6122 6122 exi_rele(exi);
6123 6123 }
6124 6124 }
6125 6125 }
6126 6126 }
6127 6127
6128 6128 nfsstat4
6129 6129 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6130 6130 {
6131 6131 nfsstat4 e;
6132 6132
6133 6133 rfs4_dbe_lock(cp->rc_dbe);
6134 6134
6135 6135 if (cp->rc_sysidt != LM_NOSYSID) {
6136 6136 *sp = cp->rc_sysidt;
6137 6137 e = NFS4_OK;
6138 6138
6139 6139 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6140 6140 *sp = cp->rc_sysidt;
6141 6141 e = NFS4_OK;
6142 6142
6143 6143 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6144 6144 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6145 6145 } else
6146 6146 e = NFS4ERR_DELAY;
6147 6147
6148 6148 rfs4_dbe_unlock(cp->rc_dbe);
6149 6149 return (e);
6150 6150 }
6151 6151
6152 6152 #if defined(DEBUG) && ! defined(lint)
6153 6153 static void lock_print(char *str, int operation, struct flock64 *flk)
6154 6154 {
6155 6155 char *op, *type;
6156 6156
6157 6157 switch (operation) {
6158 6158 case F_GETLK: op = "F_GETLK";
6159 6159 break;
6160 6160 case F_SETLK: op = "F_SETLK";
6161 6161 break;
6162 6162 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6163 6163 break;
6164 6164 default: op = "F_UNKNOWN";
6165 6165 break;
6166 6166 }
6167 6167 switch (flk->l_type) {
6168 6168 case F_UNLCK: type = "F_UNLCK";
6169 6169 break;
6170 6170 case F_RDLCK: type = "F_RDLCK";
6171 6171 break;
6172 6172 case F_WRLCK: type = "F_WRLCK";
6173 6173 break;
6174 6174 default: type = "F_UNKNOWN";
6175 6175 break;
6176 6176 }
6177 6177
6178 6178 ASSERT(flk->l_whence == 0);
6179 6179 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6180 6180 str, op, type, (longlong_t)flk->l_start,
6181 6181 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6182 6182 }
6183 6183
6184 6184 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6185 6185 #else
6186 6186 #define LOCK_PRINT(d, s, t, f)
6187 6187 #endif
6188 6188
6189 6189 /*ARGSUSED*/
6190 6190 static bool_t
6191 6191 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6192 6192 {
6193 6193 return (TRUE);
6194 6194 }
6195 6195
6196 6196 /*
6197 6197 * Look up the pathname using the vp in cs as the directory vnode.
6198 6198 * cs->vp will be the vnode for the file on success
6199 6199 */
6200 6200
6201 6201 static nfsstat4
6202 6202 rfs4_lookup(component4 *component, struct svc_req *req,
6203 6203 struct compound_state *cs)
6204 6204 {
6205 6205 char *nm;
6206 6206 uint32_t len;
6207 6207 nfsstat4 status;
6208 6208 struct sockaddr *ca;
6209 6209 char *name;
6210 6210
6211 6211 if (cs->vp == NULL) {
6212 6212 return (NFS4ERR_NOFILEHANDLE);
6213 6213 }
6214 6214 if (cs->vp->v_type != VDIR) {
6215 6215 return (NFS4ERR_NOTDIR);
6216 6216 }
6217 6217
6218 6218 status = utf8_dir_verify(component);
6219 6219 if (status != NFS4_OK)
6220 6220 return (status);
6221 6221
6222 6222 nm = utf8_to_fn(component, &len, NULL);
6223 6223 if (nm == NULL) {
6224 6224 return (NFS4ERR_INVAL);
6225 6225 }
6226 6226
6227 6227 if (len > MAXNAMELEN) {
6228 6228 kmem_free(nm, len);
6229 6229 return (NFS4ERR_NAMETOOLONG);
6230 6230 }
6231 6231
6232 6232 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6233 6233 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6234 6234 MAXPATHLEN + 1);
6235 6235
6236 6236 if (name == NULL) {
6237 6237 kmem_free(nm, len);
6238 6238 return (NFS4ERR_INVAL);
6239 6239 }
6240 6240
6241 6241 status = do_rfs4_op_lookup(name, req, cs);
6242 6242
6243 6243 if (name != nm)
6244 6244 kmem_free(name, MAXPATHLEN + 1);
6245 6245
6246 6246 kmem_free(nm, len);
6247 6247
6248 6248 return (status);
6249 6249 }
6250 6250
6251 6251 static nfsstat4
6252 6252 rfs4_lookupfile(component4 *component, struct svc_req *req,
6253 6253 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6254 6254 {
6255 6255 nfsstat4 status;
6256 6256 vnode_t *dvp = cs->vp;
6257 6257 vattr_t bva, ava, fva;
6258 6258 int error;
6259 6259
6260 6260 /* Get "before" change value */
6261 6261 bva.va_mask = AT_CTIME|AT_SEQ;
6262 6262 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6263 6263 if (error)
6264 6264 return (puterrno4(error));
6265 6265
6266 6266 /* rfs4_lookup may VN_RELE directory */
6267 6267 VN_HOLD(dvp);
6268 6268
6269 6269 status = rfs4_lookup(component, req, cs);
6270 6270 if (status != NFS4_OK) {
6271 6271 VN_RELE(dvp);
6272 6272 return (status);
6273 6273 }
6274 6274
6275 6275 /*
6276 6276 * Get "after" change value, if it fails, simply return the
6277 6277 * before value.
6278 6278 */
6279 6279 ava.va_mask = AT_CTIME|AT_SEQ;
6280 6280 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6281 6281 ava.va_ctime = bva.va_ctime;
6282 6282 ava.va_seq = 0;
6283 6283 }
6284 6284 VN_RELE(dvp);
6285 6285
6286 6286 /*
6287 6287 * Validate the file is a file
6288 6288 */
6289 6289 fva.va_mask = AT_TYPE|AT_MODE;
6290 6290 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6291 6291 if (error)
6292 6292 return (puterrno4(error));
6293 6293
6294 6294 if (fva.va_type != VREG) {
6295 6295 if (fva.va_type == VDIR)
6296 6296 return (NFS4ERR_ISDIR);
6297 6297 if (fva.va_type == VLNK)
6298 6298 return (NFS4ERR_SYMLINK);
6299 6299 return (NFS4ERR_INVAL);
6300 6300 }
6301 6301
6302 6302 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6303 6303 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6304 6304
6305 6305 /*
6306 6306 * It is undefined if VOP_LOOKUP will change va_seq, so
6307 6307 * cinfo.atomic = TRUE only if we have
6308 6308 * non-zero va_seq's, and they have not changed.
6309 6309 */
6310 6310 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6311 6311 cinfo->atomic = TRUE;
6312 6312 else
6313 6313 cinfo->atomic = FALSE;
6314 6314
6315 6315 /* Check for mandatory locking */
6316 6316 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6317 6317 return (check_open_access(access, cs, req));
6318 6318 }
6319 6319
6320 6320 static nfsstat4
6321 6321 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6322 6322 timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
6323 6323 {
6324 6324 int error;
6325 6325 nfsstat4 status = NFS4_OK;
6326 6326 vattr_t va;
6327 6327
6328 6328 tryagain:
6329 6329
6330 6330 /*
6331 6331 * The file open mode used is VWRITE. If the client needs
6332 6332 * some other semantic, then it should do the access checking
6333 6333 * itself. It would have been nice to have the file open mode
6334 6334 * passed as part of the arguments.
6335 6335 */
6336 6336
6337 6337 *created = TRUE;
6338 6338 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6339 6339
6340 6340 if (error) {
6341 6341 *created = FALSE;
6342 6342
6343 6343 /*
6344 6344 * If we got something other than file already exists
6345 6345 * then just return this error. Otherwise, we got
6346 6346 * EEXIST. If we were doing a GUARDED create, then
6347 6347 * just return this error. Otherwise, we need to
6348 6348 * make sure that this wasn't a duplicate of an
6349 6349 * exclusive create request.
6350 6350 *
6351 6351 * The assumption is made that a non-exclusive create
6352 6352 * request will never return EEXIST.
6353 6353 */
6354 6354
6355 6355 if (error != EEXIST || mode == GUARDED4) {
6356 6356 status = puterrno4(error);
6357 6357 return (status);
6358 6358 }
6359 6359 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6360 6360 NULL, NULL, NULL);
6361 6361
6362 6362 if (error) {
6363 6363 /*
6364 6364 * We couldn't find the file that we thought that
6365 6365 * we just created. So, we'll just try creating
6366 6366 * it again.
6367 6367 */
6368 6368 if (error == ENOENT)
6369 6369 goto tryagain;
6370 6370
6371 6371 status = puterrno4(error);
6372 6372 return (status);
6373 6373 }
6374 6374
6375 6375 if (mode == UNCHECKED4) {
6376 6376 /* existing object must be regular file */
6377 6377 if ((*vpp)->v_type != VREG) {
6378 6378 if ((*vpp)->v_type == VDIR)
6379 6379 status = NFS4ERR_ISDIR;
6380 6380 else if ((*vpp)->v_type == VLNK)
6381 6381 status = NFS4ERR_SYMLINK;
6382 6382 else
6383 6383 status = NFS4ERR_INVAL;
6384 6384 VN_RELE(*vpp);
6385 6385 return (status);
6386 6386 }
6387 6387
6388 6388 return (NFS4_OK);
6389 6389 }
6390 6390
6391 6391 /* Check for duplicate request */
6392 6392 ASSERT(mtime != 0);
6393 6393 va.va_mask = AT_MTIME;
6394 6394 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6395 6395 if (!error) {
6396 6396 /* We found the file */
6397 6397 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6398 6398 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6399 6399 /* but its not our creation */
6400 6400 VN_RELE(*vpp);
6401 6401 return (NFS4ERR_EXIST);
6402 6402 }
6403 6403 *created = TRUE; /* retrans of create == created */
6404 6404 return (NFS4_OK);
6405 6405 }
6406 6406 VN_RELE(*vpp);
6407 6407 return (NFS4ERR_EXIST);
6408 6408 }
6409 6409
6410 6410 return (NFS4_OK);
6411 6411 }
6412 6412
6413 6413 static nfsstat4
6414 6414 check_open_access(uint32_t access, struct compound_state *cs,
6415 6415 struct svc_req *req)
6416 6416 {
6417 6417 int error;
6418 6418 vnode_t *vp;
6419 6419 bool_t readonly;
6420 6420 cred_t *cr = cs->cr;
6421 6421
6422 6422 /* For now we don't allow mandatory locking as per V2/V3 */
6423 6423 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6424 6424 return (NFS4ERR_ACCESS);
6425 6425 }
6426 6426
6427 6427 vp = cs->vp;
6428 6428 ASSERT(cr != NULL && vp->v_type == VREG);
6429 6429
6430 6430 /*
6431 6431 * If the file system is exported read only and we are trying
6432 6432 * to open for write, then return NFS4ERR_ROFS
6433 6433 */
6434 6434
6435 6435 readonly = rdonly4(req, cs);
6436 6436
6437 6437 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6438 6438 return (NFS4ERR_ROFS);
6439 6439
6440 6440 if (access & OPEN4_SHARE_ACCESS_READ) {
6441 6441 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6442 6442 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6443 6443 return (NFS4ERR_ACCESS);
6444 6444 }
6445 6445 }
6446 6446
6447 6447 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6448 6448 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6449 6449 if (error)
6450 6450 return (NFS4ERR_ACCESS);
6451 6451 }
6452 6452
6453 6453 return (NFS4_OK);
6454 6454 }
6455 6455
6456 6456 static nfsstat4
6457 6457 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6458 6458 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6459 6459 {
6460 6460 struct nfs4_svgetit_arg sarg;
6461 6461 struct nfs4_ntov_table ntov;
6462 6462
6463 6463 bool_t ntov_table_init = FALSE;
6464 6464 struct statvfs64 sb;
6465 6465 nfsstat4 status;
6466 6466 vnode_t *vp;
6467 6467 vattr_t bva, ava, iva, cva, *vap;
6468 6468 vnode_t *dvp;
6469 6469 timespec32_t *mtime;
6470 6470 char *nm = NULL;
6471 6471 uint_t buflen;
6472 6472 bool_t created;
6473 6473 bool_t setsize = FALSE;
6474 6474 len_t reqsize;
6475 6475 int error;
6476 6476 bool_t trunc;
6477 6477 caller_context_t ct;
6478 6478 component4 *component;
6479 6479 bslabel_t *clabel;
6480 6480 struct sockaddr *ca;
6481 6481 char *name = NULL;
6482 6482
6483 6483 sarg.sbp = &sb;
6484 6484 sarg.is_referral = B_FALSE;
6485 6485
6486 6486 dvp = cs->vp;
6487 6487
6488 6488 /* Check if the file system is read only */
6489 6489 if (rdonly4(req, cs))
6490 6490 return (NFS4ERR_ROFS);
6491 6491
6492 6492 /* check the label of including directory */
6493 6493 if (is_system_labeled()) {
6494 6494 ASSERT(req->rq_label != NULL);
6495 6495 clabel = req->rq_label;
6496 6496 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6497 6497 "got client label from request(1)",
6498 6498 struct svc_req *, req);
6499 6499 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6500 6500 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6501 6501 cs->exi)) {
6502 6502 return (NFS4ERR_ACCESS);
6503 6503 }
6504 6504 }
6505 6505 }
6506 6506
6507 6507 /*
6508 6508 * Get the last component of path name in nm. cs will reference
6509 6509 * the including directory on success.
6510 6510 */
6511 6511 component = &args->open_claim4_u.file;
6512 6512 status = utf8_dir_verify(component);
6513 6513 if (status != NFS4_OK)
6514 6514 return (status);
6515 6515
6516 6516 nm = utf8_to_fn(component, &buflen, NULL);
6517 6517
6518 6518 if (nm == NULL)
6519 6519 return (NFS4ERR_RESOURCE);
6520 6520
6521 6521 if (buflen > MAXNAMELEN) {
6522 6522 kmem_free(nm, buflen);
6523 6523 return (NFS4ERR_NAMETOOLONG);
6524 6524 }
6525 6525
6526 6526 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6527 6527 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6528 6528 if (error) {
6529 6529 kmem_free(nm, buflen);
6530 6530 return (puterrno4(error));
6531 6531 }
6532 6532
6533 6533 if (bva.va_type != VDIR) {
6534 6534 kmem_free(nm, buflen);
6535 6535 return (NFS4ERR_NOTDIR);
6536 6536 }
6537 6537
6538 6538 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6539 6539
6540 6540 switch (args->mode) {
6541 6541 case GUARDED4:
6542 6542 /*FALLTHROUGH*/
6543 6543 case UNCHECKED4:
6544 6544 nfs4_ntov_table_init(&ntov);
6545 6545 ntov_table_init = TRUE;
6546 6546
6547 6547 *attrset = 0;
6548 6548 status = do_rfs4_set_attrs(attrset,
6549 6549 &args->createhow4_u.createattrs,
6550 6550 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6551 6551
6552 6552 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6553 6553 sarg.vap->va_type != VREG) {
6554 6554 if (sarg.vap->va_type == VDIR)
6555 6555 status = NFS4ERR_ISDIR;
6556 6556 else if (sarg.vap->va_type == VLNK)
6557 6557 status = NFS4ERR_SYMLINK;
6558 6558 else
6559 6559 status = NFS4ERR_INVAL;
6560 6560 }
6561 6561
6562 6562 if (status != NFS4_OK) {
6563 6563 kmem_free(nm, buflen);
6564 6564 nfs4_ntov_table_free(&ntov, &sarg);
6565 6565 *attrset = 0;
6566 6566 return (status);
6567 6567 }
6568 6568
6569 6569 vap = sarg.vap;
6570 6570 vap->va_type = VREG;
6571 6571 vap->va_mask |= AT_TYPE;
6572 6572
6573 6573 if ((vap->va_mask & AT_MODE) == 0) {
6574 6574 vap->va_mask |= AT_MODE;
6575 6575 vap->va_mode = (mode_t)0600;
6576 6576 }
6577 6577
6578 6578 if (vap->va_mask & AT_SIZE) {
6579 6579
6580 6580 /* Disallow create with a non-zero size */
6581 6581
6582 6582 if ((reqsize = sarg.vap->va_size) != 0) {
6583 6583 kmem_free(nm, buflen);
6584 6584 nfs4_ntov_table_free(&ntov, &sarg);
6585 6585 *attrset = 0;
6586 6586 return (NFS4ERR_INVAL);
6587 6587 }
6588 6588 setsize = TRUE;
6589 6589 }
6590 6590 break;
6591 6591
6592 6592 case EXCLUSIVE4:
6593 6593 /* prohibit EXCL create of named attributes */
6594 6594 if (dvp->v_flag & V_XATTRDIR) {
6595 6595 kmem_free(nm, buflen);
6596 6596 *attrset = 0;
6597 6597 return (NFS4ERR_INVAL);
6598 6598 }
6599 6599
6600 6600 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6601 6601 cva.va_type = VREG;
6602 6602 /*
6603 6603 * Ensure no time overflows. Assumes underlying
6604 6604 * filesystem supports at least 32 bits.
6605 6605 * Truncate nsec to usec resolution to allow valid
6606 6606 * compares even if the underlying filesystem truncates.
6607 6607 */
6608 6608 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6609 6609 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6610 6610 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6611 6611 cva.va_mode = (mode_t)0;
6612 6612 vap = &cva;
6613 6613
6614 6614 /*
6615 6615 * For EXCL create, attrset is set to the server attr
6616 6616 * used to cache the client's verifier.
6617 6617 */
6618 6618 *attrset = FATTR4_TIME_MODIFY_MASK;
6619 6619 break;
6620 6620 }
6621 6621
6622 6622 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6623 6623 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6624 6624 MAXPATHLEN + 1);
6625 6625
6626 6626 if (name == NULL) {
6627 6627 kmem_free(nm, buflen);
6628 6628 return (NFS4ERR_SERVERFAULT);
6629 6629 }
6630 6630
6631 6631 status = create_vnode(dvp, name, vap, args->mode, mtime,
6632 6632 cs->cr, &vp, &created);
6633 6633 if (nm != name)
6634 6634 kmem_free(name, MAXPATHLEN + 1);
6635 6635 kmem_free(nm, buflen);
6636 6636
6637 6637 if (status != NFS4_OK) {
6638 6638 if (ntov_table_init)
6639 6639 nfs4_ntov_table_free(&ntov, &sarg);
6640 6640 *attrset = 0;
6641 6641 return (status);
6642 6642 }
6643 6643
6644 6644 trunc = (setsize && !created);
6645 6645
6646 6646 if (args->mode != EXCLUSIVE4) {
6647 6647 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6648 6648
6649 6649 /*
6650 6650 * True verification that object was created with correct
6651 6651 * attrs is impossible. The attrs could have been changed
6652 6652 * immediately after object creation. If attributes did
6653 6653 * not verify, the only recourse for the server is to
6654 6654 * destroy the object. Maybe if some attrs (like gid)
6655 6655 * are set incorrectly, the object should be destroyed;
6656 6656 * however, seems bad as a default policy. Do we really
6657 6657 * want to destroy an object over one of the times not
6658 6658 * verifying correctly? For these reasons, the server
6659 6659 * currently sets bits in attrset for createattrs
6660 6660 * that were set; however, no verification is done.
6661 6661 *
6662 6662 * vmask_to_nmask accounts for vattr bits set on create
6663 6663 * [do_rfs4_set_attrs() only sets resp bits for
6664 6664 * non-vattr/vfs bits.]
6665 6665 * Mask off any bits we set by default so as not to return
6666 6666 * more attrset bits than were requested in createattrs
6667 6667 */
6668 6668 if (created) {
6669 6669 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6670 6670 *attrset &= createmask;
6671 6671 } else {
6672 6672 /*
6673 6673 * We did not create the vnode (we tried but it
6674 6674 * already existed). In this case, the only createattr
6675 6675 * that the spec allows the server to set is size,
6676 6676 * and even then, it can only be set if it is 0.
6677 6677 */
6678 6678 *attrset = 0;
6679 6679 if (trunc)
6680 6680 *attrset = FATTR4_SIZE_MASK;
6681 6681 }
6682 6682 }
6683 6683 if (ntov_table_init)
6684 6684 nfs4_ntov_table_free(&ntov, &sarg);
6685 6685
6686 6686 /*
6687 6687 * Get the initial "after" sequence number, if it fails,
6688 6688 * set to zero, time to before.
6689 6689 */
6690 6690 iva.va_mask = AT_CTIME|AT_SEQ;
6691 6691 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6692 6692 iva.va_seq = 0;
6693 6693 iva.va_ctime = bva.va_ctime;
6694 6694 }
6695 6695
6696 6696 /*
6697 6697 * create_vnode attempts to create the file exclusive,
6698 6698 * if it already exists the VOP_CREATE will fail and
6699 6699 * may not increase va_seq. It is atomic if
6700 6700 * we haven't changed the directory, but if it has changed
6701 6701 * we don't know what changed it.
6702 6702 */
6703 6703 if (!created) {
6704 6704 if (bva.va_seq && iva.va_seq &&
6705 6705 bva.va_seq == iva.va_seq)
6706 6706 cinfo->atomic = TRUE;
6707 6707 else
6708 6708 cinfo->atomic = FALSE;
6709 6709 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6710 6710 } else {
6711 6711 /*
6712 6712 * The entry was created, we need to sync the
6713 6713 * directory metadata.
6714 6714 */
6715 6715 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6716 6716
6717 6717 /*
6718 6718 * Get "after" change value, if it fails, simply return the
6719 6719 * before value.
6720 6720 */
6721 6721 ava.va_mask = AT_CTIME|AT_SEQ;
6722 6722 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6723 6723 ava.va_ctime = bva.va_ctime;
6724 6724 ava.va_seq = 0;
6725 6725 }
6726 6726
6727 6727 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6728 6728
6729 6729 /*
6730 6730 * The cinfo->atomic = TRUE only if we have
6731 6731 * non-zero va_seq's, and it has incremented by exactly one
6732 6732 * during the create_vnode and it didn't
6733 6733 * change during the VOP_FSYNC.
6734 6734 */
6735 6735 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6736 6736 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6737 6737 cinfo->atomic = TRUE;
6738 6738 else
6739 6739 cinfo->atomic = FALSE;
6740 6740 }
6741 6741
6742 6742 /* Check for mandatory locking and that the size gets set. */
6743 6743 cva.va_mask = AT_MODE;
6744 6744 if (setsize)
6745 6745 cva.va_mask |= AT_SIZE;
6746 6746
6747 6747 /* Assume the worst */
6748 6748 cs->mandlock = TRUE;
6749 6749
6750 6750 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6751 6751 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6752 6752
6753 6753 /*
6754 6754 * Truncate the file if necessary; this would be
6755 6755 * the case for create over an existing file.
6756 6756 */
6757 6757
6758 6758 if (trunc) {
6759 6759 int in_crit = 0;
6760 6760 rfs4_file_t *fp;
6761 6761 bool_t create = FALSE;
6762 6762
6763 6763 /*
6764 6764 * We are writing over an existing file.
6765 6765 * Check to see if we need to recall a delegation.
6766 6766 */
6767 6767 rfs4_hold_deleg_policy();
6768 6768 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6769 6769 if (rfs4_check_delegated_byfp(FWRITE, fp,
6770 6770 (reqsize == 0), FALSE, FALSE, &clientid)) {
6771 6771 rfs4_file_rele(fp);
6772 6772 rfs4_rele_deleg_policy();
6773 6773 VN_RELE(vp);
6774 6774 *attrset = 0;
6775 6775 return (NFS4ERR_DELAY);
6776 6776 }
6777 6777 rfs4_file_rele(fp);
6778 6778 }
6779 6779 rfs4_rele_deleg_policy();
6780 6780
6781 6781 if (nbl_need_check(vp)) {
6782 6782 in_crit = 1;
6783 6783
6784 6784 ASSERT(reqsize == 0);
6785 6785
6786 6786 nbl_start_crit(vp, RW_READER);
6787 6787 if (nbl_conflict(vp, NBL_WRITE, 0,
6788 6788 cva.va_size, 0, NULL)) {
6789 6789 in_crit = 0;
6790 6790 nbl_end_crit(vp);
6791 6791 VN_RELE(vp);
6792 6792 *attrset = 0;
6793 6793 return (NFS4ERR_ACCESS);
6794 6794 }
6795 6795 }
6796 6796 ct.cc_sysid = 0;
6797 6797 ct.cc_pid = 0;
6798 6798 ct.cc_caller_id = nfs4_srv_caller_id;
6799 6799 ct.cc_flags = CC_DONTBLOCK;
6800 6800
6801 6801 cva.va_mask = AT_SIZE;
6802 6802 cva.va_size = reqsize;
6803 6803 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6804 6804 if (in_crit)
6805 6805 nbl_end_crit(vp);
6806 6806 }
6807 6807 }
6808 6808
6809 6809 error = makefh4(&cs->fh, vp, cs->exi);
6810 6810
6811 6811 /*
6812 6812 * Force modified data and metadata out to stable storage.
6813 6813 */
6814 6814 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6815 6815
6816 6816 if (error) {
6817 6817 VN_RELE(vp);
6818 6818 *attrset = 0;
6819 6819 return (puterrno4(error));
6820 6820 }
6821 6821
6822 6822 /* if parent dir is attrdir, set namedattr fh flag */
6823 6823 if (dvp->v_flag & V_XATTRDIR)
6824 6824 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6825 6825
6826 6826 if (cs->vp)
6827 6827 VN_RELE(cs->vp);
6828 6828
6829 6829 cs->vp = vp;
6830 6830
6831 6831 /*
6832 6832 * if we did not create the file, we will need to check
6833 6833 * the access bits on the file
6834 6834 */
6835 6835
6836 6836 if (!created) {
6837 6837 if (setsize)
6838 6838 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6839 6839 status = check_open_access(args->share_access, cs, req);
6840 6840 if (status != NFS4_OK)
6841 6841 *attrset = 0;
6842 6842 }
6843 6843 return (status);
6844 6844 }
6845 6845
6846 6846 /*ARGSUSED*/
6847 6847 static void
6848 6848 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6849 6849 rfs4_openowner_t *oo, delegreq_t deleg,
6850 6850 uint32_t access, uint32_t deny,
6851 6851 OPEN4res *resp, int deleg_cur)
6852 6852 {
6853 6853 /* XXX Currently not using req */
6854 6854 rfs4_state_t *sp;
6855 6855 rfs4_file_t *fp;
6856 6856 bool_t screate = TRUE;
6857 6857 bool_t fcreate = TRUE;
6858 6858 uint32_t open_a, share_a;
6859 6859 uint32_t open_d, share_d;
6860 6860 rfs4_deleg_state_t *dsp;
6861 6861 sysid_t sysid;
6862 6862 nfsstat4 status;
6863 6863 caller_context_t ct;
6864 6864 int fflags = 0;
6865 6865 int recall = 0;
6866 6866 int err;
6867 6867 int first_open;
6868 6868
6869 6869 /* get the file struct and hold a lock on it during initial open */
6870 6870 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6871 6871 if (fp == NULL) {
6872 6872 resp->status = NFS4ERR_RESOURCE;
6873 6873 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6874 6874 return;
6875 6875 }
6876 6876
6877 6877 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6878 6878 if (sp == NULL) {
6879 6879 resp->status = NFS4ERR_RESOURCE;
6880 6880 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6881 6881 /* No need to keep any reference */
6882 6882 rw_exit(&fp->rf_file_rwlock);
6883 6883 rfs4_file_rele(fp);
6884 6884 return;
6885 6885 }
6886 6886
6887 6887 /* try to get the sysid before continuing */
6888 6888 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6889 6889 resp->status = status;
6890 6890 rfs4_file_rele(fp);
6891 6891 /* Not a fully formed open; "close" it */
6892 6892 if (screate == TRUE)
6893 6893 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6894 6894 rfs4_state_rele(sp);
6895 6895 return;
6896 6896 }
6897 6897
6898 6898 /* Calculate the fflags for this OPEN. */
6899 6899 if (access & OPEN4_SHARE_ACCESS_READ)
6900 6900 fflags |= FREAD;
6901 6901 if (access & OPEN4_SHARE_ACCESS_WRITE)
6902 6902 fflags |= FWRITE;
6903 6903
6904 6904 rfs4_dbe_lock(sp->rs_dbe);
6905 6905
6906 6906 /*
6907 6907 * Calculate the new deny and access mode that this open is adding to
6908 6908 * the file for this open owner;
6909 6909 */
6910 6910 open_d = (deny & ~sp->rs_open_deny);
6911 6911 open_a = (access & ~sp->rs_open_access);
6912 6912
6913 6913 /*
6914 6914 * Calculate the new share access and share deny modes that this open
6915 6915 * is adding to the file for this open owner;
6916 6916 */
6917 6917 share_a = (access & ~sp->rs_share_access);
6918 6918 share_d = (deny & ~sp->rs_share_deny);
6919 6919
6920 6920 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6921 6921
6922 6922 /*
6923 6923 * Check to see the client has already sent an open for this
6924 6924 * open owner on this file with the same share/deny modes.
6925 6925 * If so, we don't need to check for a conflict and we don't
6926 6926 * need to add another shrlock. If not, then we need to
6927 6927 * check for conflicts in deny and access before checking for
6928 6928 * conflicts in delegation. We don't want to recall a
6929 6929 * delegation based on an open that will eventually fail based
6930 6930 * on shares modes.
6931 6931 */
6932 6932
6933 6933 if (share_a || share_d) {
6934 6934 if ((err = rfs4_share(sp, access, deny)) != 0) {
6935 6935 rfs4_dbe_unlock(sp->rs_dbe);
6936 6936 resp->status = err;
6937 6937
6938 6938 rfs4_file_rele(fp);
6939 6939 /* Not a fully formed open; "close" it */
6940 6940 if (screate == TRUE)
6941 6941 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6942 6942 rfs4_state_rele(sp);
6943 6943 return;
6944 6944 }
6945 6945 }
6946 6946
6947 6947 rfs4_dbe_lock(fp->rf_dbe);
6948 6948
6949 6949 /*
6950 6950 * Check to see if this file is delegated and if so, if a
6951 6951 * recall needs to be done.
6952 6952 */
6953 6953 if (rfs4_check_recall(sp, access)) {
6954 6954 rfs4_dbe_unlock(fp->rf_dbe);
6955 6955 rfs4_dbe_unlock(sp->rs_dbe);
6956 6956 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6957 6957 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6958 6958 rfs4_dbe_lock(sp->rs_dbe);
6959 6959
6960 6960 /* if state closed while lock was dropped */
6961 6961 if (sp->rs_closed) {
6962 6962 if (share_a || share_d)
6963 6963 (void) rfs4_unshare(sp);
6964 6964 rfs4_dbe_unlock(sp->rs_dbe);
6965 6965 rfs4_file_rele(fp);
6966 6966 /* Not a fully formed open; "close" it */
6967 6967 if (screate == TRUE)
6968 6968 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6969 6969 rfs4_state_rele(sp);
6970 6970 resp->status = NFS4ERR_OLD_STATEID;
6971 6971 return;
6972 6972 }
6973 6973
6974 6974 rfs4_dbe_lock(fp->rf_dbe);
6975 6975 /* Let's see if the delegation was returned */
6976 6976 if (rfs4_check_recall(sp, access)) {
6977 6977 rfs4_dbe_unlock(fp->rf_dbe);
6978 6978 if (share_a || share_d)
6979 6979 (void) rfs4_unshare(sp);
6980 6980 rfs4_dbe_unlock(sp->rs_dbe);
6981 6981 rfs4_file_rele(fp);
6982 6982 rfs4_update_lease(sp->rs_owner->ro_client);
6983 6983
6984 6984 /* Not a fully formed open; "close" it */
6985 6985 if (screate == TRUE)
6986 6986 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6987 6987 rfs4_state_rele(sp);
6988 6988 resp->status = NFS4ERR_DELAY;
6989 6989 return;
6990 6990 }
6991 6991 }
6992 6992 /*
6993 6993 * the share check passed and any delegation conflict has been
6994 6994 * taken care of, now call vop_open.
6995 6995 * if this is the first open then call vop_open with fflags.
6996 6996 * if not, call vn_open_upgrade with just the upgrade flags.
6997 6997 *
6998 6998 * if the file has been opened already, it will have the current
6999 6999 * access mode in the state struct. if it has no share access, then
7000 7000 * this is a new open.
7001 7001 *
7002 7002 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
7003 7003 * call VOP_OPEN(), just do the open upgrade.
7004 7004 */
7005 7005 if (first_open && !deleg_cur) {
7006 7006 ct.cc_sysid = sysid;
7007 7007 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
7008 7008 ct.cc_caller_id = nfs4_srv_caller_id;
7009 7009 ct.cc_flags = CC_DONTBLOCK;
7010 7010 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
7011 7011 if (err) {
7012 7012 rfs4_dbe_unlock(fp->rf_dbe);
7013 7013 if (share_a || share_d)
7014 7014 (void) rfs4_unshare(sp);
7015 7015 rfs4_dbe_unlock(sp->rs_dbe);
7016 7016 rfs4_file_rele(fp);
7017 7017
7018 7018 /* Not a fully formed open; "close" it */
7019 7019 if (screate == TRUE)
7020 7020 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7021 7021 rfs4_state_rele(sp);
7022 7022 /* check if a monitor detected a delegation conflict */
7023 7023 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
7024 7024 resp->status = NFS4ERR_DELAY;
7025 7025 else
7026 7026 resp->status = NFS4ERR_SERVERFAULT;
7027 7027 return;
7028 7028 }
7029 7029 } else { /* open upgrade */
7030 7030 /*
7031 7031 * calculate the fflags for the new mode that is being added
7032 7032 * by this upgrade.
7033 7033 */
7034 7034 fflags = 0;
7035 7035 if (open_a & OPEN4_SHARE_ACCESS_READ)
7036 7036 fflags |= FREAD;
7037 7037 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7038 7038 fflags |= FWRITE;
7039 7039 vn_open_upgrade(cs->vp, fflags);
7040 7040 }
7041 7041 sp->rs_open_access |= access;
7042 7042 sp->rs_open_deny |= deny;
7043 7043
7044 7044 if (open_d & OPEN4_SHARE_DENY_READ)
7045 7045 fp->rf_deny_read++;
7046 7046 if (open_d & OPEN4_SHARE_DENY_WRITE)
7047 7047 fp->rf_deny_write++;
7048 7048 fp->rf_share_deny |= deny;
7049 7049
7050 7050 if (open_a & OPEN4_SHARE_ACCESS_READ)
7051 7051 fp->rf_access_read++;
7052 7052 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7053 7053 fp->rf_access_write++;
7054 7054 fp->rf_share_access |= access;
7055 7055
7056 7056 /*
7057 7057 * Check for delegation here. if the deleg argument is not
7058 7058 * DELEG_ANY, then this is a reclaim from a client and
7059 7059 * we must honor the delegation requested. If necessary we can
7060 7060 * set the recall flag.
7061 7061 */
7062 7062
7063 7063 dsp = rfs4_grant_delegation(deleg, sp, &recall);
7064 7064
7065 7065 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
7066 7066
7067 7067 next_stateid(&sp->rs_stateid);
7068 7068
7069 7069 resp->stateid = sp->rs_stateid.stateid;
7070 7070
7071 7071 rfs4_dbe_unlock(fp->rf_dbe);
7072 7072 rfs4_dbe_unlock(sp->rs_dbe);
7073 7073
7074 7074 if (dsp) {
7075 7075 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
7076 7076 rfs4_deleg_state_rele(dsp);
7077 7077 }
7078 7078
7079 7079 rfs4_file_rele(fp);
7080 7080 rfs4_state_rele(sp);
7081 7081
7082 7082 resp->status = NFS4_OK;
7083 7083 }
7084 7084
7085 7085 /*ARGSUSED*/
7086 7086 static void
7087 7087 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
7088 7088 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7089 7089 {
7090 7090 change_info4 *cinfo = &resp->cinfo;
7091 7091 bitmap4 *attrset = &resp->attrset;
7092 7092
7093 7093 if (args->opentype == OPEN4_NOCREATE)
7094 7094 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
7095 7095 req, cs, args->share_access, cinfo);
7096 7096 else {
7097 7097 /* inhibit delegation grants during exclusive create */
7098 7098
7099 7099 if (args->mode == EXCLUSIVE4)
7100 7100 rfs4_disable_delegation();
7101 7101
7102 7102 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7103 7103 oo->ro_client->rc_clientid);
7104 7104 }
7105 7105
7106 7106 if (resp->status == NFS4_OK) {
7107 7107
7108 7108 /* cs->vp cs->fh now reference the desired file */
7109 7109
7110 7110 rfs4_do_open(cs, req, oo,
7111 7111 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7112 7112 args->share_access, args->share_deny, resp, 0);
7113 7113
7114 7114 /*
7115 7115 * If rfs4_createfile set attrset, we must
7116 7116 * clear this attrset before the response is copied.
7117 7117 */
7118 7118 if (resp->status != NFS4_OK && resp->attrset) {
7119 7119 resp->attrset = 0;
7120 7120 }
7121 7121 }
7122 7122 else
7123 7123 *cs->statusp = resp->status;
7124 7124
7125 7125 if (args->mode == EXCLUSIVE4)
7126 7126 rfs4_enable_delegation();
7127 7127 }
7128 7128
7129 7129 /*ARGSUSED*/
7130 7130 static void
7131 7131 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7132 7132 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7133 7133 {
7134 7134 change_info4 *cinfo = &resp->cinfo;
7135 7135 vattr_t va;
7136 7136 vtype_t v_type = cs->vp->v_type;
7137 7137 int error = 0;
7138 7138
7139 7139 /* Verify that we have a regular file */
7140 7140 if (v_type != VREG) {
7141 7141 if (v_type == VDIR)
7142 7142 resp->status = NFS4ERR_ISDIR;
7143 7143 else if (v_type == VLNK)
7144 7144 resp->status = NFS4ERR_SYMLINK;
7145 7145 else
7146 7146 resp->status = NFS4ERR_INVAL;
7147 7147 return;
7148 7148 }
7149 7149
7150 7150 va.va_mask = AT_MODE|AT_UID;
7151 7151 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7152 7152 if (error) {
7153 7153 resp->status = puterrno4(error);
7154 7154 return;
7155 7155 }
7156 7156
7157 7157 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7158 7158
7159 7159 /*
7160 7160 * Check if we have access to the file, Note the the file
7161 7161 * could have originally been open UNCHECKED or GUARDED
7162 7162 * with mode bits that will now fail, but there is nothing
7163 7163 * we can really do about that except in the case that the
7164 7164 * owner of the file is the one requesting the open.
7165 7165 */
7166 7166 if (crgetuid(cs->cr) != va.va_uid) {
7167 7167 resp->status = check_open_access(args->share_access, cs, req);
7168 7168 if (resp->status != NFS4_OK) {
7169 7169 return;
7170 7170 }
7171 7171 }
7172 7172
7173 7173 /*
7174 7174 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7175 7175 */
7176 7176 cinfo->before = 0;
7177 7177 cinfo->after = 0;
7178 7178 cinfo->atomic = FALSE;
7179 7179
7180 7180 rfs4_do_open(cs, req, oo,
7181 7181 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7182 7182 args->share_access, args->share_deny, resp, 0);
7183 7183 }
7184 7184
7185 7185 static void
7186 7186 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7187 7187 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7188 7188 {
7189 7189 int error;
7190 7190 nfsstat4 status;
7191 7191 stateid4 stateid =
7192 7192 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7193 7193 rfs4_deleg_state_t *dsp;
7194 7194
7195 7195 /*
7196 7196 * Find the state info from the stateid and confirm that the
7197 7197 * file is delegated. If the state openowner is the same as
7198 7198 * the supplied openowner we're done. If not, get the file
7199 7199 * info from the found state info. Use that file info to
7200 7200 * create the state for this lock owner. Note solaris doen't
7201 7201 * really need the pathname to find the file. We may want to
7202 7202 * lookup the pathname and make sure that the vp exist and
7203 7203 * matches the vp in the file structure. However it is
7204 7204 * possible that the pathname nolonger exists (local process
7205 7205 * unlinks the file), so this may not be that useful.
7206 7206 */
7207 7207
7208 7208 status = rfs4_get_deleg_state(&stateid, &dsp);
7209 7209 if (status != NFS4_OK) {
7210 7210 resp->status = status;
7211 7211 return;
7212 7212 }
7213 7213
7214 7214 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7215 7215
7216 7216 /*
7217 7217 * New lock owner, create state. Since this was probably called
7218 7218 * in response to a CB_RECALL we set deleg to DELEG_NONE
7219 7219 */
7220 7220
7221 7221 ASSERT(cs->vp != NULL);
7222 7222 VN_RELE(cs->vp);
7223 7223 VN_HOLD(dsp->rds_finfo->rf_vp);
7224 7224 cs->vp = dsp->rds_finfo->rf_vp;
7225 7225
7226 7226 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7227 7227 rfs4_deleg_state_rele(dsp);
7228 7228 *cs->statusp = resp->status = puterrno4(error);
7229 7229 return;
7230 7230 }
7231 7231
7232 7232 /* Mark progress for delegation returns */
7233 7233 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7234 7234 rfs4_deleg_state_rele(dsp);
7235 7235 rfs4_do_open(cs, req, oo, DELEG_NONE,
7236 7236 args->share_access, args->share_deny, resp, 1);
7237 7237 }
7238 7238
7239 7239 /*ARGSUSED*/
7240 7240 static void
7241 7241 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7242 7242 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7243 7243 {
7244 7244 /*
7245 7245 * Lookup the pathname, it must already exist since this file
7246 7246 * was delegated.
7247 7247 *
7248 7248 * Find the file and state info for this vp and open owner pair.
7249 7249 * check that they are in fact delegated.
7250 7250 * check that the state access and deny modes are the same.
7251 7251 *
7252 7252 * Return the delgation possibly seting the recall flag.
7253 7253 */
7254 7254 rfs4_file_t *fp;
7255 7255 rfs4_state_t *sp;
7256 7256 bool_t create = FALSE;
7257 7257 bool_t dcreate = FALSE;
7258 7258 rfs4_deleg_state_t *dsp;
7259 7259 nfsace4 *ace;
7260 7260
7261 7261 /* Note we ignore oflags */
7262 7262 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7263 7263 req, cs, args->share_access, &resp->cinfo);
7264 7264
7265 7265 if (resp->status != NFS4_OK) {
7266 7266 return;
7267 7267 }
7268 7268
7269 7269 /* get the file struct and hold a lock on it during initial open */
7270 7270 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7271 7271 if (fp == NULL) {
7272 7272 resp->status = NFS4ERR_RESOURCE;
7273 7273 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7274 7274 return;
7275 7275 }
7276 7276
7277 7277 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7278 7278 if (sp == NULL) {
7279 7279 resp->status = NFS4ERR_SERVERFAULT;
7280 7280 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7281 7281 rw_exit(&fp->rf_file_rwlock);
7282 7282 rfs4_file_rele(fp);
7283 7283 return;
7284 7284 }
7285 7285
7286 7286 rfs4_dbe_lock(sp->rs_dbe);
7287 7287 rfs4_dbe_lock(fp->rf_dbe);
7288 7288 if (args->share_access != sp->rs_share_access ||
7289 7289 args->share_deny != sp->rs_share_deny ||
7290 7290 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7291 7291 NFS4_DEBUG(rfs4_debug,
7292 7292 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7293 7293 rfs4_dbe_unlock(fp->rf_dbe);
7294 7294 rfs4_dbe_unlock(sp->rs_dbe);
7295 7295 rfs4_file_rele(fp);
7296 7296 rfs4_state_rele(sp);
7297 7297 resp->status = NFS4ERR_SERVERFAULT;
7298 7298 return;
7299 7299 }
7300 7300 rfs4_dbe_unlock(fp->rf_dbe);
7301 7301 rfs4_dbe_unlock(sp->rs_dbe);
7302 7302
7303 7303 dsp = rfs4_finddeleg(sp, &dcreate);
7304 7304 if (dsp == NULL) {
7305 7305 rfs4_state_rele(sp);
7306 7306 rfs4_file_rele(fp);
7307 7307 resp->status = NFS4ERR_SERVERFAULT;
7308 7308 return;
7309 7309 }
7310 7310
7311 7311 next_stateid(&sp->rs_stateid);
7312 7312
7313 7313 resp->stateid = sp->rs_stateid.stateid;
7314 7314
7315 7315 resp->delegation.delegation_type = dsp->rds_dtype;
7316 7316
7317 7317 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7318 7318 open_read_delegation4 *rv =
7319 7319 &resp->delegation.open_delegation4_u.read;
7320 7320
7321 7321 rv->stateid = dsp->rds_delegid.stateid;
7322 7322 rv->recall = FALSE; /* no policy in place to set to TRUE */
7323 7323 ace = &rv->permissions;
7324 7324 } else {
7325 7325 open_write_delegation4 *rv =
7326 7326 &resp->delegation.open_delegation4_u.write;
7327 7327
7328 7328 rv->stateid = dsp->rds_delegid.stateid;
7329 7329 rv->recall = FALSE; /* no policy in place to set to TRUE */
7330 7330 ace = &rv->permissions;
7331 7331 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7332 7332 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7333 7333 }
7334 7334
7335 7335 /* XXX For now */
7336 7336 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7337 7337 ace->flag = 0;
7338 7338 ace->access_mask = 0;
7339 7339 ace->who.utf8string_len = 0;
7340 7340 ace->who.utf8string_val = 0;
7341 7341
7342 7342 rfs4_deleg_state_rele(dsp);
7343 7343 rfs4_state_rele(sp);
7344 7344 rfs4_file_rele(fp);
7345 7345 }
7346 7346
7347 7347 typedef enum {
7348 7348 NFS4_CHKSEQ_OKAY = 0,
7349 7349 NFS4_CHKSEQ_REPLAY = 1,
7350 7350 NFS4_CHKSEQ_BAD = 2
7351 7351 } rfs4_chkseq_t;
7352 7352
7353 7353 /*
7354 7354 * Generic function for sequence number checks.
7355 7355 */
7356 7356 static rfs4_chkseq_t
7357 7357 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7358 7358 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7359 7359 {
7360 7360 /* Same sequence ids and matching operations? */
7361 7361 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7362 7362 if (copyres == TRUE) {
7363 7363 rfs4_free_reply(resop);
7364 7364 rfs4_copy_reply(resop, lastop);
7365 7365 }
7366 7366 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7367 7367 "Replayed SEQID %d\n", seqid));
7368 7368 return (NFS4_CHKSEQ_REPLAY);
7369 7369 }
7370 7370
7371 7371 /* If the incoming sequence is not the next expected then it is bad */
7372 7372 if (rqst_seq != seqid + 1) {
7373 7373 if (rqst_seq == seqid) {
7374 7374 NFS4_DEBUG(rfs4_debug,
7375 7375 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7376 7376 "but last op was %d current op is %d\n",
7377 7377 lastop->resop, resop->resop));
7378 7378 return (NFS4_CHKSEQ_BAD);
7379 7379 }
7380 7380 NFS4_DEBUG(rfs4_debug,
7381 7381 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7382 7382 rqst_seq, seqid));
7383 7383 return (NFS4_CHKSEQ_BAD);
7384 7384 }
7385 7385
7386 7386 /* Everything okay -- next expected */
7387 7387 return (NFS4_CHKSEQ_OKAY);
7388 7388 }
7389 7389
7390 7390
7391 7391 static rfs4_chkseq_t
7392 7392 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7393 7393 {
7394 7394 rfs4_chkseq_t rc;
7395 7395
7396 7396 rfs4_dbe_lock(op->ro_dbe);
7397 7397 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7398 7398 TRUE);
7399 7399 rfs4_dbe_unlock(op->ro_dbe);
7400 7400
7401 7401 if (rc == NFS4_CHKSEQ_OKAY)
7402 7402 rfs4_update_lease(op->ro_client);
7403 7403
7404 7404 return (rc);
7405 7405 }
7406 7406
7407 7407 static rfs4_chkseq_t
7408 7408 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7409 7409 {
7410 7410 rfs4_chkseq_t rc;
7411 7411
7412 7412 rfs4_dbe_lock(op->ro_dbe);
7413 7413 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7414 7414 olo_seqid, resop, FALSE);
7415 7415 rfs4_dbe_unlock(op->ro_dbe);
7416 7416
7417 7417 return (rc);
7418 7418 }
7419 7419
7420 7420 static rfs4_chkseq_t
7421 7421 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7422 7422 {
7423 7423 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7424 7424
7425 7425 rfs4_dbe_lock(lsp->rls_dbe);
7426 7426 if (!lsp->rls_skip_seqid_check)
7427 7427 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7428 7428 resop, TRUE);
7429 7429 rfs4_dbe_unlock(lsp->rls_dbe);
7430 7430
7431 7431 return (rc);
7432 7432 }
7433 7433
7434 7434 static void
7435 7435 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7436 7436 struct svc_req *req, struct compound_state *cs)
7437 7437 {
7438 7438 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7439 7439 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7440 7440 open_owner4 *owner = &args->owner;
7441 7441 open_claim_type4 claim = args->claim;
7442 7442 rfs4_client_t *cp;
7443 7443 rfs4_openowner_t *oo;
7444 7444 bool_t create;
7445 7445 bool_t replay = FALSE;
7446 7446 int can_reclaim;
7447 7447
7448 7448 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7449 7449 OPEN4args *, args);
7450 7450
7451 7451 if (cs->vp == NULL) {
7452 7452 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7453 7453 goto end;
7454 7454 }
7455 7455
7456 7456 /*
7457 7457 * Need to check clientid and lease expiration first based on
7458 7458 * error ordering and incrementing sequence id.
7459 7459 */
7460 7460 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7461 7461 if (cp == NULL) {
7462 7462 *cs->statusp = resp->status =
7463 7463 rfs4_check_clientid(&owner->clientid, 0);
7464 7464 goto end;
7465 7465 }
7466 7466
7467 7467 if (rfs4_lease_expired(cp)) {
7468 7468 rfs4_client_close(cp);
7469 7469 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7470 7470 goto end;
7471 7471 }
7472 7472 can_reclaim = cp->rc_can_reclaim;
7473 7473
7474 7474 /*
7475 7475 * Find the open_owner for use from this point forward. Take
7476 7476 * care in updating the sequence id based on the type of error
7477 7477 * being returned.
7478 7478 */
7479 7479 retry:
7480 7480 create = TRUE;
7481 7481 oo = rfs4_findopenowner(owner, &create, args->seqid);
7482 7482 if (oo == NULL) {
7483 7483 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7484 7484 rfs4_client_rele(cp);
7485 7485 goto end;
7486 7486 }
7487 7487
7488 7488 /* Hold off access to the sequence space while the open is done */
7489 7489 rfs4_sw_enter(&oo->ro_sw);
7490 7490
7491 7491 /*
7492 7492 * If the open_owner existed before at the server, then check
7493 7493 * the sequence id.
7494 7494 */
7495 7495 if (!create && !oo->ro_postpone_confirm) {
7496 7496 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7497 7497 case NFS4_CHKSEQ_BAD:
7498 7498 if ((args->seqid > oo->ro_open_seqid) &&
7499 7499 oo->ro_need_confirm) {
7500 7500 rfs4_free_opens(oo, TRUE, FALSE);
7501 7501 rfs4_sw_exit(&oo->ro_sw);
7502 7502 rfs4_openowner_rele(oo);
7503 7503 goto retry;
7504 7504 }
7505 7505 resp->status = NFS4ERR_BAD_SEQID;
7506 7506 goto out;
7507 7507 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7508 7508 replay = TRUE;
7509 7509 goto out;
7510 7510 default:
7511 7511 break;
7512 7512 }
7513 7513
7514 7514 /*
7515 7515 * Sequence was ok and open owner exists
7516 7516 * check to see if we have yet to see an
7517 7517 * open_confirm.
7518 7518 */
7519 7519 if (oo->ro_need_confirm) {
7520 7520 rfs4_free_opens(oo, TRUE, FALSE);
7521 7521 rfs4_sw_exit(&oo->ro_sw);
7522 7522 rfs4_openowner_rele(oo);
7523 7523 goto retry;
7524 7524 }
7525 7525 }
7526 7526 /* Grace only applies to regular-type OPENs */
7527 7527 if (rfs4_clnt_in_grace(cp) &&
7528 7528 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7529 7529 *cs->statusp = resp->status = NFS4ERR_GRACE;
7530 7530 goto out;
7531 7531 }
7532 7532
7533 7533 /*
7534 7534 * If previous state at the server existed then can_reclaim
7535 7535 * will be set. If not reply NFS4ERR_NO_GRACE to the
7536 7536 * client.
7537 7537 */
7538 7538 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7539 7539 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7540 7540 goto out;
7541 7541 }
7542 7542
7543 7543
7544 7544 /*
7545 7545 * Reject the open if the client has missed the grace period
7546 7546 */
7547 7547 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7548 7548 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7549 7549 goto out;
7550 7550 }
7551 7551
7552 7552 /* Couple of up-front bookkeeping items */
7553 7553 if (oo->ro_need_confirm) {
7554 7554 /*
7555 7555 * If this is a reclaim OPEN then we should not ask
7556 7556 * for a confirmation of the open_owner per the
7557 7557 * protocol specification.
7558 7558 */
7559 7559 if (claim == CLAIM_PREVIOUS)
7560 7560 oo->ro_need_confirm = FALSE;
7561 7561 else
7562 7562 resp->rflags |= OPEN4_RESULT_CONFIRM;
7563 7563 }
7564 7564 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7565 7565
7566 7566 /*
7567 7567 * If there is an unshared filesystem mounted on this vnode,
7568 7568 * do not allow to open/create in this directory.
7569 7569 */
7570 7570 if (vn_ismntpt(cs->vp)) {
7571 7571 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7572 7572 goto out;
7573 7573 }
7574 7574
7575 7575 /*
7576 7576 * access must READ, WRITE, or BOTH. No access is invalid.
7577 7577 * deny can be READ, WRITE, BOTH, or NONE.
7578 7578 * bits not defined for access/deny are invalid.
7579 7579 */
7580 7580 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7581 7581 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7582 7582 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7583 7583 *cs->statusp = resp->status = NFS4ERR_INVAL;
7584 7584 goto out;
7585 7585 }
7586 7586
7587 7587
7588 7588 /*
7589 7589 * make sure attrset is zero before response is built.
7590 7590 */
7591 7591 resp->attrset = 0;
7592 7592
7593 7593 switch (claim) {
7594 7594 case CLAIM_NULL:
7595 7595 rfs4_do_opennull(cs, req, args, oo, resp);
7596 7596 break;
7597 7597 case CLAIM_PREVIOUS:
7598 7598 rfs4_do_openprev(cs, req, args, oo, resp);
7599 7599 break;
7600 7600 case CLAIM_DELEGATE_CUR:
7601 7601 rfs4_do_opendelcur(cs, req, args, oo, resp);
7602 7602 break;
7603 7603 case CLAIM_DELEGATE_PREV:
7604 7604 rfs4_do_opendelprev(cs, req, args, oo, resp);
7605 7605 break;
7606 7606 default:
7607 7607 resp->status = NFS4ERR_INVAL;
7608 7608 break;
7609 7609 }
7610 7610
7611 7611 out:
7612 7612 rfs4_client_rele(cp);
7613 7613
7614 7614 /* Catch sequence id handling here to make it a little easier */
7615 7615 switch (resp->status) {
7616 7616 case NFS4ERR_BADXDR:
7617 7617 case NFS4ERR_BAD_SEQID:
7618 7618 case NFS4ERR_BAD_STATEID:
7619 7619 case NFS4ERR_NOFILEHANDLE:
7620 7620 case NFS4ERR_RESOURCE:
7621 7621 case NFS4ERR_STALE_CLIENTID:
7622 7622 case NFS4ERR_STALE_STATEID:
7623 7623 /*
7624 7624 * The protocol states that if any of these errors are
7625 7625 * being returned, the sequence id should not be
7626 7626 * incremented. Any other return requires an
7627 7627 * increment.
7628 7628 */
7629 7629 break;
7630 7630 default:
7631 7631 /* Always update the lease in this case */
7632 7632 rfs4_update_lease(oo->ro_client);
7633 7633
7634 7634 /* Regular response - copy the result */
7635 7635 if (!replay)
7636 7636 rfs4_update_open_resp(oo, resop, &cs->fh);
7637 7637
7638 7638 /*
7639 7639 * REPLAY case: Only if the previous response was OK
7640 7640 * do we copy the filehandle. If not OK, no
7641 7641 * filehandle to copy.
7642 7642 */
7643 7643 if (replay == TRUE &&
7644 7644 resp->status == NFS4_OK &&
7645 7645 oo->ro_reply_fh.nfs_fh4_val) {
7646 7646 /*
7647 7647 * If this is a replay, we must restore the
7648 7648 * current filehandle/vp to that of what was
7649 7649 * returned originally. Try our best to do
7650 7650 * it.
7651 7651 */
7652 7652 nfs_fh4_fmt_t *fh_fmtp =
7653 7653 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7654 7654
7655 7655 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7656 7656 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7657 7657
7658 7658 if (cs->exi == NULL) {
7659 7659 resp->status = NFS4ERR_STALE;
7660 7660 goto finish;
7661 7661 }
7662 7662
7663 7663 VN_RELE(cs->vp);
7664 7664
7665 7665 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7666 7666 &resp->status);
7667 7667
7668 7668 if (cs->vp == NULL)
7669 7669 goto finish;
7670 7670
7671 7671 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7672 7672 }
7673 7673
7674 7674 /*
7675 7675 * If this was a replay, no need to update the
7676 7676 * sequence id. If the open_owner was not created on
7677 7677 * this pass, then update. The first use of an
7678 7678 * open_owner will not bump the sequence id.
7679 7679 */
7680 7680 if (replay == FALSE && !create)
7681 7681 rfs4_update_open_sequence(oo);
7682 7682 /*
7683 7683 * If the client is receiving an error and the
7684 7684 * open_owner needs to be confirmed, there is no way
7685 7685 * to notify the client of this fact ignoring the fact
7686 7686 * that the server has no method of returning a
7687 7687 * stateid to confirm. Therefore, the server needs to
7688 7688 * mark this open_owner in a way as to avoid the
7689 7689 * sequence id checking the next time the client uses
7690 7690 * this open_owner.
7691 7691 */
7692 7692 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7693 7693 oo->ro_postpone_confirm = TRUE;
7694 7694 /*
7695 7695 * If OK response then clear the postpone flag and
7696 7696 * reset the sequence id to keep in sync with the
7697 7697 * client.
7698 7698 */
7699 7699 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7700 7700 oo->ro_postpone_confirm = FALSE;
7701 7701 oo->ro_open_seqid = args->seqid;
7702 7702 }
7703 7703 break;
7704 7704 }
7705 7705
7706 7706 finish:
7707 7707 *cs->statusp = resp->status;
7708 7708
7709 7709 rfs4_sw_exit(&oo->ro_sw);
7710 7710 rfs4_openowner_rele(oo);
7711 7711
7712 7712 end:
7713 7713 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7714 7714 OPEN4res *, resp);
7715 7715 }
7716 7716
7717 7717 /*ARGSUSED*/
7718 7718 void
7719 7719 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7720 7720 struct svc_req *req, struct compound_state *cs)
7721 7721 {
7722 7722 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7723 7723 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7724 7724 rfs4_state_t *sp;
7725 7725 nfsstat4 status;
7726 7726
7727 7727 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7728 7728 OPEN_CONFIRM4args *, args);
7729 7729
7730 7730 if (cs->vp == NULL) {
7731 7731 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7732 7732 goto out;
7733 7733 }
7734 7734
7735 7735 if (cs->vp->v_type != VREG) {
7736 7736 *cs->statusp = resp->status =
7737 7737 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7738 7738 return;
7739 7739 }
7740 7740
7741 7741 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7742 7742 if (status != NFS4_OK) {
7743 7743 *cs->statusp = resp->status = status;
7744 7744 goto out;
7745 7745 }
7746 7746
7747 7747 /* Ensure specified filehandle matches */
7748 7748 if (cs->vp != sp->rs_finfo->rf_vp) {
7749 7749 rfs4_state_rele(sp);
7750 7750 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7751 7751 goto out;
7752 7752 }
7753 7753
7754 7754 /* hold off other access to open_owner while we tinker */
7755 7755 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7756 7756
7757 7757 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7758 7758 case NFS4_CHECK_STATEID_OKAY:
7759 7759 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7760 7760 resop) != 0) {
7761 7761 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7762 7762 break;
7763 7763 }
7764 7764 /*
7765 7765 * If it is the appropriate stateid and determined to
7766 7766 * be "OKAY" then this means that the stateid does not
7767 7767 * need to be confirmed and the client is in error for
7768 7768 * sending an OPEN_CONFIRM.
7769 7769 */
7770 7770 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7771 7771 break;
7772 7772 case NFS4_CHECK_STATEID_OLD:
7773 7773 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7774 7774 break;
7775 7775 case NFS4_CHECK_STATEID_BAD:
7776 7776 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7777 7777 break;
7778 7778 case NFS4_CHECK_STATEID_EXPIRED:
7779 7779 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7780 7780 break;
7781 7781 case NFS4_CHECK_STATEID_CLOSED:
7782 7782 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7783 7783 break;
7784 7784 case NFS4_CHECK_STATEID_REPLAY:
7785 7785 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7786 7786 resop)) {
7787 7787 case NFS4_CHKSEQ_OKAY:
7788 7788 /*
7789 7789 * This is replayed stateid; if seqid matches
7790 7790 * next expected, then client is using wrong seqid.
7791 7791 */
7792 7792 /* fall through */
7793 7793 case NFS4_CHKSEQ_BAD:
7794 7794 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7795 7795 break;
7796 7796 case NFS4_CHKSEQ_REPLAY:
7797 7797 /*
7798 7798 * Note this case is the duplicate case so
7799 7799 * resp->status is already set.
7800 7800 */
7801 7801 *cs->statusp = resp->status;
7802 7802 rfs4_update_lease(sp->rs_owner->ro_client);
7803 7803 break;
7804 7804 }
7805 7805 break;
7806 7806 case NFS4_CHECK_STATEID_UNCONFIRMED:
7807 7807 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7808 7808 resop) != NFS4_CHKSEQ_OKAY) {
7809 7809 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7810 7810 break;
7811 7811 }
7812 7812 *cs->statusp = resp->status = NFS4_OK;
7813 7813
7814 7814 next_stateid(&sp->rs_stateid);
7815 7815 resp->open_stateid = sp->rs_stateid.stateid;
7816 7816 sp->rs_owner->ro_need_confirm = FALSE;
7817 7817 rfs4_update_lease(sp->rs_owner->ro_client);
7818 7818 rfs4_update_open_sequence(sp->rs_owner);
7819 7819 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7820 7820 break;
7821 7821 default:
7822 7822 ASSERT(FALSE);
7823 7823 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7824 7824 break;
7825 7825 }
7826 7826 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7827 7827 rfs4_state_rele(sp);
7828 7828
7829 7829 out:
7830 7830 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7831 7831 OPEN_CONFIRM4res *, resp);
7832 7832 }
7833 7833
7834 7834 /*ARGSUSED*/
7835 7835 void
7836 7836 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7837 7837 struct svc_req *req, struct compound_state *cs)
7838 7838 {
7839 7839 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7840 7840 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7841 7841 uint32_t access = args->share_access;
7842 7842 uint32_t deny = args->share_deny;
7843 7843 nfsstat4 status;
7844 7844 rfs4_state_t *sp;
7845 7845 rfs4_file_t *fp;
7846 7846 int fflags = 0;
7847 7847
7848 7848 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7849 7849 OPEN_DOWNGRADE4args *, args);
7850 7850
7851 7851 if (cs->vp == NULL) {
7852 7852 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7853 7853 goto out;
7854 7854 }
7855 7855
7856 7856 if (cs->vp->v_type != VREG) {
7857 7857 *cs->statusp = resp->status = NFS4ERR_INVAL;
7858 7858 return;
7859 7859 }
7860 7860
7861 7861 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7862 7862 if (status != NFS4_OK) {
7863 7863 *cs->statusp = resp->status = status;
7864 7864 goto out;
7865 7865 }
7866 7866
7867 7867 /* Ensure specified filehandle matches */
7868 7868 if (cs->vp != sp->rs_finfo->rf_vp) {
7869 7869 rfs4_state_rele(sp);
7870 7870 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7871 7871 goto out;
7872 7872 }
7873 7873
7874 7874 /* hold off other access to open_owner while we tinker */
7875 7875 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7876 7876
7877 7877 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7878 7878 case NFS4_CHECK_STATEID_OKAY:
7879 7879 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7880 7880 resop) != NFS4_CHKSEQ_OKAY) {
7881 7881 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7882 7882 goto end;
7883 7883 }
7884 7884 break;
7885 7885 case NFS4_CHECK_STATEID_OLD:
7886 7886 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7887 7887 goto end;
7888 7888 case NFS4_CHECK_STATEID_BAD:
7889 7889 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7890 7890 goto end;
7891 7891 case NFS4_CHECK_STATEID_EXPIRED:
7892 7892 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7893 7893 goto end;
7894 7894 case NFS4_CHECK_STATEID_CLOSED:
7895 7895 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7896 7896 goto end;
7897 7897 case NFS4_CHECK_STATEID_UNCONFIRMED:
7898 7898 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7899 7899 goto end;
7900 7900 case NFS4_CHECK_STATEID_REPLAY:
7901 7901 /* Check the sequence id for the open owner */
7902 7902 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7903 7903 resop)) {
7904 7904 case NFS4_CHKSEQ_OKAY:
7905 7905 /*
7906 7906 * This is replayed stateid; if seqid matches
7907 7907 * next expected, then client is using wrong seqid.
7908 7908 */
7909 7909 /* fall through */
7910 7910 case NFS4_CHKSEQ_BAD:
7911 7911 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7912 7912 goto end;
7913 7913 case NFS4_CHKSEQ_REPLAY:
7914 7914 /*
7915 7915 * Note this case is the duplicate case so
7916 7916 * resp->status is already set.
7917 7917 */
7918 7918 *cs->statusp = resp->status;
7919 7919 rfs4_update_lease(sp->rs_owner->ro_client);
7920 7920 goto end;
7921 7921 }
7922 7922 break;
7923 7923 default:
7924 7924 ASSERT(FALSE);
7925 7925 break;
7926 7926 }
7927 7927
7928 7928 rfs4_dbe_lock(sp->rs_dbe);
7929 7929 /*
7930 7930 * Check that the new access modes and deny modes are valid.
7931 7931 * Check that no invalid bits are set.
7932 7932 */
7933 7933 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7934 7934 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7935 7935 *cs->statusp = resp->status = NFS4ERR_INVAL;
7936 7936 rfs4_update_open_sequence(sp->rs_owner);
7937 7937 rfs4_dbe_unlock(sp->rs_dbe);
7938 7938 goto end;
7939 7939 }
7940 7940
7941 7941 /*
7942 7942 * The new modes must be a subset of the current modes and
7943 7943 * the access must specify at least one mode. To test that
7944 7944 * the new mode is a subset of the current modes we bitwise
7945 7945 * AND them together and check that the result equals the new
7946 7946 * mode. For example:
7947 7947 * New mode, access == R and current mode, sp->rs_open_access == RW
7948 7948 * access & sp->rs_open_access == R == access, so the new access mode
7949 7949 * is valid. Consider access == RW, sp->rs_open_access = R
7950 7950 * access & sp->rs_open_access == R != access, so the new access mode
7951 7951 * is invalid.
7952 7952 */
7953 7953 if ((access & sp->rs_open_access) != access ||
7954 7954 (deny & sp->rs_open_deny) != deny ||
7955 7955 (access &
7956 7956 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7957 7957 *cs->statusp = resp->status = NFS4ERR_INVAL;
7958 7958 rfs4_update_open_sequence(sp->rs_owner);
7959 7959 rfs4_dbe_unlock(sp->rs_dbe);
7960 7960 goto end;
7961 7961 }
7962 7962
7963 7963 /*
7964 7964 * Release any share locks associated with this stateID.
7965 7965 * Strictly speaking, this violates the spec because the
7966 7966 * spec effectively requires that open downgrade be atomic.
7967 7967 * At present, fs_shrlock does not have this capability.
7968 7968 */
7969 7969 (void) rfs4_unshare(sp);
7970 7970
7971 7971 status = rfs4_share(sp, access, deny);
7972 7972 if (status != NFS4_OK) {
7973 7973 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7974 7974 rfs4_update_open_sequence(sp->rs_owner);
7975 7975 rfs4_dbe_unlock(sp->rs_dbe);
7976 7976 goto end;
7977 7977 }
7978 7978
7979 7979 fp = sp->rs_finfo;
7980 7980 rfs4_dbe_lock(fp->rf_dbe);
7981 7981
7982 7982 /*
7983 7983 * If the current mode has deny read and the new mode
7984 7984 * does not, decrement the number of deny read mode bits
7985 7985 * and if it goes to zero turn off the deny read bit
7986 7986 * on the file.
7987 7987 */
7988 7988 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7989 7989 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7990 7990 fp->rf_deny_read--;
7991 7991 if (fp->rf_deny_read == 0)
7992 7992 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7993 7993 }
7994 7994
7995 7995 /*
7996 7996 * If the current mode has deny write and the new mode
7997 7997 * does not, decrement the number of deny write mode bits
7998 7998 * and if it goes to zero turn off the deny write bit
7999 7999 * on the file.
8000 8000 */
8001 8001 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
8002 8002 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
8003 8003 fp->rf_deny_write--;
8004 8004 if (fp->rf_deny_write == 0)
8005 8005 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8006 8006 }
8007 8007
8008 8008 /*
8009 8009 * If the current mode has access read and the new mode
8010 8010 * does not, decrement the number of access read mode bits
8011 8011 * and if it goes to zero turn off the access read bit
8012 8012 * on the file. set fflags to FREAD for the call to
8013 8013 * vn_open_downgrade().
8014 8014 */
8015 8015 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
8016 8016 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
8017 8017 fp->rf_access_read--;
8018 8018 if (fp->rf_access_read == 0)
8019 8019 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8020 8020 fflags |= FREAD;
8021 8021 }
8022 8022
8023 8023 /*
8024 8024 * If the current mode has access write and the new mode
8025 8025 * does not, decrement the number of access write mode bits
8026 8026 * and if it goes to zero turn off the access write bit
8027 8027 * on the file. set fflags to FWRITE for the call to
8028 8028 * vn_open_downgrade().
8029 8029 */
8030 8030 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
8031 8031 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8032 8032 fp->rf_access_write--;
8033 8033 if (fp->rf_access_write == 0)
8034 8034 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
8035 8035 fflags |= FWRITE;
8036 8036 }
8037 8037
8038 8038 /* Check that the file is still accessible */
8039 8039 ASSERT(fp->rf_share_access);
8040 8040
8041 8041 rfs4_dbe_unlock(fp->rf_dbe);
8042 8042
8043 8043 /* now set the new open access and deny modes */
8044 8044 sp->rs_open_access = access;
8045 8045 sp->rs_open_deny = deny;
8046 8046
8047 8047 /*
8048 8048 * we successfully downgraded the share lock, now we need to downgrade
8049 8049 * the open. it is possible that the downgrade was only for a deny
8050 8050 * mode and we have nothing else to do.
8051 8051 */
8052 8052 if ((fflags & (FREAD|FWRITE)) != 0)
8053 8053 vn_open_downgrade(cs->vp, fflags);
8054 8054
8055 8055 /* Update the stateid */
8056 8056 next_stateid(&sp->rs_stateid);
8057 8057 resp->open_stateid = sp->rs_stateid.stateid;
8058 8058
8059 8059 rfs4_dbe_unlock(sp->rs_dbe);
8060 8060
8061 8061 *cs->statusp = resp->status = NFS4_OK;
8062 8062 /* Update the lease */
8063 8063 rfs4_update_lease(sp->rs_owner->ro_client);
8064 8064 /* And the sequence */
8065 8065 rfs4_update_open_sequence(sp->rs_owner);
8066 8066 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8067 8067
8068 8068 end:
8069 8069 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8070 8070 rfs4_state_rele(sp);
8071 8071 out:
8072 8072 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
8073 8073 OPEN_DOWNGRADE4res *, resp);
8074 8074 }
8075 8075
8076 8076 static void *
8077 8077 memstr(const void *s1, const char *s2, size_t n)
8078 8078 {
8079 8079 size_t l = strlen(s2);
8080 8080 char *p = (char *)s1;
8081 8081
8082 8082 while (n >= l) {
8083 8083 if (bcmp(p, s2, l) == 0)
8084 8084 return (p);
8085 8085 p++;
8086 8086 n--;
8087 8087 }
8088 8088
8089 8089 return (NULL);
8090 8090 }
8091 8091
8092 8092 /*
8093 8093 * The logic behind this function is detailed in the NFSv4 RFC in the
8094 8094 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
8095 8095 * that section for explicit guidance to server behavior for
8096 8096 * SETCLIENTID.
8097 8097 */
8098 8098 void
8099 8099 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8100 8100 struct svc_req *req, struct compound_state *cs)
8101 8101 {
8102 8102 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8103 8103 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8104 8104 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8105 8105 rfs4_clntip_t *ci;
8106 8106 bool_t create;
8107 8107 char *addr, *netid;
8108 8108 int len;
8109 8109
8110 8110 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8111 8111 SETCLIENTID4args *, args);
8112 8112 retry:
8113 8113 newcp = cp_confirmed = cp_unconfirmed = NULL;
8114 8114
8115 8115 /*
8116 8116 * Save the caller's IP address
8117 8117 */
8118 8118 args->client.cl_addr =
8119 8119 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8120 8120
8121 8121 /*
8122 8122 * Record if it is a Solaris client that cannot handle referrals.
8123 8123 */
8124 8124 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8125 8125 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8126 8126 /* Add a "yes, it's downrev" record */
8127 8127 create = TRUE;
8128 8128 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8129 8129 ASSERT(ci != NULL);
8130 8130 rfs4_dbe_rele(ci->ri_dbe);
8131 8131 } else {
8132 8132 /* Remove any previous record */
8133 8133 rfs4_invalidate_clntip(args->client.cl_addr);
8134 8134 }
8135 8135
8136 8136 /*
8137 8137 * In search of an EXISTING client matching the incoming
8138 8138 * request to establish a new client identifier at the server
8139 8139 */
8140 8140 create = TRUE;
8141 8141 cp = rfs4_findclient(&args->client, &create, NULL);
8142 8142
8143 8143 /* Should never happen */
8144 8144 ASSERT(cp != NULL);
8145 8145
8146 8146 if (cp == NULL) {
8147 8147 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8148 8148 goto out;
8149 8149 }
8150 8150
8151 8151 /*
8152 8152 * Easiest case. Client identifier is newly created and is
8153 8153 * unconfirmed. Also note that for this case, no other
8154 8154 * entries exist for the client identifier. Nothing else to
8155 8155 * check. Just setup the response and respond.
8156 8156 */
8157 8157 if (create) {
8158 8158 *cs->statusp = res->status = NFS4_OK;
8159 8159 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8160 8160 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8161 8161 cp->rc_confirm_verf;
8162 8162 /* Setup callback information; CB_NULL confirmation later */
8163 8163 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8164 8164
8165 8165 rfs4_client_rele(cp);
8166 8166 goto out;
8167 8167 }
8168 8168
8169 8169 /*
8170 8170 * An existing, confirmed client may exist but it may not have
8171 8171 * been active for at least one lease period. If so, then
8172 8172 * "close" the client and create a new client identifier
8173 8173 */
8174 8174 if (rfs4_lease_expired(cp)) {
8175 8175 rfs4_client_close(cp);
8176 8176 goto retry;
8177 8177 }
8178 8178
8179 8179 if (cp->rc_need_confirm == TRUE)
8180 8180 cp_unconfirmed = cp;
8181 8181 else
8182 8182 cp_confirmed = cp;
8183 8183
8184 8184 cp = NULL;
8185 8185
8186 8186 /*
8187 8187 * We have a confirmed client, now check for an
8188 8188 * unconfimred entry
8189 8189 */
8190 8190 if (cp_confirmed) {
8191 8191 /* If creds don't match then client identifier is inuse */
8192 8192 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8193 8193 rfs4_cbinfo_t *cbp;
8194 8194 /*
8195 8195 * Some one else has established this client
8196 8196 * id. Try and say * who they are. We will use
8197 8197 * the call back address supplied by * the
8198 8198 * first client.
8199 8199 */
8200 8200 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8201 8201
8202 8202 addr = netid = NULL;
8203 8203
8204 8204 cbp = &cp_confirmed->rc_cbinfo;
8205 8205 if (cbp->cb_callback.cb_location.r_addr &&
8206 8206 cbp->cb_callback.cb_location.r_netid) {
8207 8207 cb_client4 *cbcp = &cbp->cb_callback;
8208 8208
8209 8209 len = strlen(cbcp->cb_location.r_addr)+1;
8210 8210 addr = kmem_alloc(len, KM_SLEEP);
8211 8211 bcopy(cbcp->cb_location.r_addr, addr, len);
8212 8212 len = strlen(cbcp->cb_location.r_netid)+1;
8213 8213 netid = kmem_alloc(len, KM_SLEEP);
8214 8214 bcopy(cbcp->cb_location.r_netid, netid, len);
8215 8215 }
8216 8216
8217 8217 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8218 8218 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8219 8219
8220 8220 rfs4_client_rele(cp_confirmed);
8221 8221 }
8222 8222
8223 8223 /*
8224 8224 * Confirmed, creds match, and verifier matches; must
8225 8225 * be an update of the callback info
8226 8226 */
8227 8227 if (cp_confirmed->rc_nfs_client.verifier ==
8228 8228 args->client.verifier) {
8229 8229 /* Setup callback information */
8230 8230 rfs4_client_setcb(cp_confirmed, &args->callback,
8231 8231 args->callback_ident);
8232 8232
8233 8233 /* everything okay -- move ahead */
8234 8234 *cs->statusp = res->status = NFS4_OK;
8235 8235 res->SETCLIENTID4res_u.resok4.clientid =
8236 8236 cp_confirmed->rc_clientid;
8237 8237
8238 8238 /* update the confirm_verifier and return it */
8239 8239 rfs4_client_scv_next(cp_confirmed);
8240 8240 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8241 8241 cp_confirmed->rc_confirm_verf;
8242 8242
8243 8243 rfs4_client_rele(cp_confirmed);
8244 8244 goto out;
8245 8245 }
8246 8246
8247 8247 /*
8248 8248 * Creds match but the verifier doesn't. Must search
8249 8249 * for an unconfirmed client that would be replaced by
8250 8250 * this request.
8251 8251 */
8252 8252 create = FALSE;
8253 8253 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8254 8254 cp_confirmed);
8255 8255 }
8256 8256
8257 8257 /*
8258 8258 * At this point, we have taken care of the brand new client
8259 8259 * struct, INUSE case, update of an existing, and confirmed
8260 8260 * client struct.
8261 8261 */
8262 8262
8263 8263 /*
8264 8264 * check to see if things have changed while we originally
8265 8265 * picked up the client struct. If they have, then return and
8266 8266 * retry the processing of this SETCLIENTID request.
8267 8267 */
8268 8268 if (cp_unconfirmed) {
8269 8269 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8270 8270 if (!cp_unconfirmed->rc_need_confirm) {
8271 8271 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8272 8272 rfs4_client_rele(cp_unconfirmed);
8273 8273 if (cp_confirmed)
8274 8274 rfs4_client_rele(cp_confirmed);
8275 8275 goto retry;
8276 8276 }
8277 8277 /* do away with the old unconfirmed one */
8278 8278 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8279 8279 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8280 8280 rfs4_client_rele(cp_unconfirmed);
8281 8281 cp_unconfirmed = NULL;
8282 8282 }
8283 8283
8284 8284 /*
8285 8285 * This search will temporarily hide the confirmed client
8286 8286 * struct while a new client struct is created as the
8287 8287 * unconfirmed one.
8288 8288 */
8289 8289 create = TRUE;
8290 8290 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8291 8291
8292 8292 ASSERT(newcp != NULL);
8293 8293
8294 8294 if (newcp == NULL) {
8295 8295 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8296 8296 rfs4_client_rele(cp_confirmed);
8297 8297 goto out;
8298 8298 }
8299 8299
8300 8300 /*
8301 8301 * If one was not created, then a similar request must be in
8302 8302 * process so release and start over with this one
8303 8303 */
8304 8304 if (create != TRUE) {
8305 8305 rfs4_client_rele(newcp);
8306 8306 if (cp_confirmed)
8307 8307 rfs4_client_rele(cp_confirmed);
8308 8308 goto retry;
8309 8309 }
8310 8310
8311 8311 *cs->statusp = res->status = NFS4_OK;
8312 8312 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8313 8313 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8314 8314 newcp->rc_confirm_verf;
8315 8315 /* Setup callback information; CB_NULL confirmation later */
8316 8316 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8317 8317
8318 8318 newcp->rc_cp_confirmed = cp_confirmed;
8319 8319
8320 8320 rfs4_client_rele(newcp);
8321 8321
8322 8322 out:
8323 8323 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8324 8324 SETCLIENTID4res *, res);
8325 8325 }
8326 8326
8327 8327 /*ARGSUSED*/
8328 8328 void
8329 8329 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8330 8330 struct svc_req *req, struct compound_state *cs)
8331 8331 {
8332 8332 SETCLIENTID_CONFIRM4args *args =
8333 8333 &argop->nfs_argop4_u.opsetclientid_confirm;
8334 8334 SETCLIENTID_CONFIRM4res *res =
8335 8335 &resop->nfs_resop4_u.opsetclientid_confirm;
8336 8336 rfs4_client_t *cp, *cptoclose = NULL;
8337 8337
8338 8338 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8339 8339 struct compound_state *, cs,
8340 8340 SETCLIENTID_CONFIRM4args *, args);
8341 8341
8342 8342 *cs->statusp = res->status = NFS4_OK;
8343 8343
8344 8344 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8345 8345
8346 8346 if (cp == NULL) {
8347 8347 *cs->statusp = res->status =
8348 8348 rfs4_check_clientid(&args->clientid, 1);
8349 8349 goto out;
8350 8350 }
8351 8351
8352 8352 if (!creds_ok(cp, req, cs)) {
8353 8353 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8354 8354 rfs4_client_rele(cp);
8355 8355 goto out;
8356 8356 }
8357 8357
8358 8358 /* If the verifier doesn't match, the record doesn't match */
8359 8359 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8360 8360 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8361 8361 rfs4_client_rele(cp);
8362 8362 goto out;
8363 8363 }
8364 8364
8365 8365 rfs4_dbe_lock(cp->rc_dbe);
8366 8366 cp->rc_need_confirm = FALSE;
8367 8367 if (cp->rc_cp_confirmed) {
8368 8368 cptoclose = cp->rc_cp_confirmed;
8369 8369 cptoclose->rc_ss_remove = 1;
8370 8370 cp->rc_cp_confirmed = NULL;
8371 8371 }
8372 8372
8373 8373 /*
8374 8374 * Update the client's associated server instance, if it's changed
8375 8375 * since the client was created.
8376 8376 */
8377 8377 if (rfs4_servinst(cp) != rfs4_cur_servinst)
8378 8378 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8379 8379
8380 8380 /*
8381 8381 * Record clientid in stable storage.
8382 8382 * Must be done after server instance has been assigned.
8383 8383 */
8384 8384 rfs4_ss_clid(cp);
8385 8385
8386 8386 rfs4_dbe_unlock(cp->rc_dbe);
8387 8387
8388 8388 if (cptoclose)
8389 8389 /* don't need to rele, client_close does it */
8390 8390 rfs4_client_close(cptoclose);
8391 8391
8392 8392 /* If needed, initiate CB_NULL call for callback path */
8393 8393 rfs4_deleg_cb_check(cp);
8394 8394 rfs4_update_lease(cp);
8395 8395
8396 8396 /*
8397 8397 * Check to see if client can perform reclaims
8398 8398 */
8399 8399 rfs4_ss_chkclid(cp);
8400 8400
8401 8401 rfs4_client_rele(cp);
8402 8402
8403 8403 out:
8404 8404 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8405 8405 struct compound_state *, cs,
8406 8406 SETCLIENTID_CONFIRM4 *, res);
8407 8407 }
8408 8408
8409 8409
8410 8410 /*ARGSUSED*/
8411 8411 void
8412 8412 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8413 8413 struct svc_req *req, struct compound_state *cs)
8414 8414 {
8415 8415 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8416 8416 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8417 8417 rfs4_state_t *sp;
8418 8418 nfsstat4 status;
8419 8419
8420 8420 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8421 8421 CLOSE4args *, args);
8422 8422
8423 8423 if (cs->vp == NULL) {
8424 8424 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8425 8425 goto out;
8426 8426 }
8427 8427
8428 8428 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8429 8429 if (status != NFS4_OK) {
8430 8430 *cs->statusp = resp->status = status;
8431 8431 goto out;
8432 8432 }
8433 8433
8434 8434 /* Ensure specified filehandle matches */
8435 8435 if (cs->vp != sp->rs_finfo->rf_vp) {
8436 8436 rfs4_state_rele(sp);
8437 8437 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8438 8438 goto out;
8439 8439 }
8440 8440
8441 8441 /* hold off other access to open_owner while we tinker */
8442 8442 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8443 8443
8444 8444 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8445 8445 case NFS4_CHECK_STATEID_OKAY:
8446 8446 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8447 8447 resop) != NFS4_CHKSEQ_OKAY) {
8448 8448 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8449 8449 goto end;
8450 8450 }
8451 8451 break;
8452 8452 case NFS4_CHECK_STATEID_OLD:
8453 8453 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8454 8454 goto end;
8455 8455 case NFS4_CHECK_STATEID_BAD:
8456 8456 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8457 8457 goto end;
8458 8458 case NFS4_CHECK_STATEID_EXPIRED:
8459 8459 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8460 8460 goto end;
8461 8461 case NFS4_CHECK_STATEID_CLOSED:
8462 8462 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8463 8463 goto end;
8464 8464 case NFS4_CHECK_STATEID_UNCONFIRMED:
8465 8465 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8466 8466 goto end;
8467 8467 case NFS4_CHECK_STATEID_REPLAY:
8468 8468 /* Check the sequence id for the open owner */
8469 8469 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8470 8470 resop)) {
8471 8471 case NFS4_CHKSEQ_OKAY:
8472 8472 /*
8473 8473 * This is replayed stateid; if seqid matches
8474 8474 * next expected, then client is using wrong seqid.
8475 8475 */
8476 8476 /* FALL THROUGH */
8477 8477 case NFS4_CHKSEQ_BAD:
8478 8478 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8479 8479 goto end;
8480 8480 case NFS4_CHKSEQ_REPLAY:
8481 8481 /*
8482 8482 * Note this case is the duplicate case so
8483 8483 * resp->status is already set.
8484 8484 */
8485 8485 *cs->statusp = resp->status;
8486 8486 rfs4_update_lease(sp->rs_owner->ro_client);
8487 8487 goto end;
8488 8488 }
8489 8489 break;
8490 8490 default:
8491 8491 ASSERT(FALSE);
8492 8492 break;
8493 8493 }
8494 8494
8495 8495 rfs4_dbe_lock(sp->rs_dbe);
8496 8496
8497 8497 /* Update the stateid. */
8498 8498 next_stateid(&sp->rs_stateid);
8499 8499 resp->open_stateid = sp->rs_stateid.stateid;
8500 8500
8501 8501 rfs4_dbe_unlock(sp->rs_dbe);
8502 8502
8503 8503 rfs4_update_lease(sp->rs_owner->ro_client);
8504 8504 rfs4_update_open_sequence(sp->rs_owner);
8505 8505 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8506 8506
8507 8507 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8508 8508
8509 8509 *cs->statusp = resp->status = status;
8510 8510
8511 8511 end:
8512 8512 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8513 8513 rfs4_state_rele(sp);
8514 8514 out:
8515 8515 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8516 8516 CLOSE4res *, resp);
8517 8517 }
8518 8518
8519 8519 /*
8520 8520 * Manage the counts on the file struct and close all file locks
8521 8521 */
8522 8522 /*ARGSUSED*/
8523 8523 void
8524 8524 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8525 8525 bool_t close_of_client)
8526 8526 {
8527 8527 rfs4_file_t *fp = sp->rs_finfo;
8528 8528 rfs4_lo_state_t *lsp;
8529 8529 int fflags = 0;
8530 8530
8531 8531 /*
8532 8532 * If this call is part of the larger closing down of client
8533 8533 * state then it is just easier to release all locks
8534 8534 * associated with this client instead of going through each
8535 8535 * individual file and cleaning locks there.
8536 8536 */
8537 8537 if (close_of_client) {
8538 8538 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8539 8539 !list_is_empty(&sp->rs_lostatelist) &&
8540 8540 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8541 8541 /* Is the PxFS kernel module loaded? */
8542 8542 if (lm_remove_file_locks != NULL) {
8543 8543 int new_sysid;
8544 8544
8545 8545 /* Encode the cluster nodeid in new sysid */
8546 8546 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8547 8547 lm_set_nlmid_flk(&new_sysid);
8548 8548
8549 8549 /*
8550 8550 * This PxFS routine removes file locks for a
8551 8551 * client over all nodes of a cluster.
8552 8552 */
8553 8553 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8554 8554 "lm_remove_file_locks(sysid=0x%x)\n",
8555 8555 new_sysid));
8556 8556 (*lm_remove_file_locks)(new_sysid);
8557 8557 } else {
8558 8558 struct flock64 flk;
8559 8559
8560 8560 /* Release all locks for this client */
8561 8561 flk.l_type = F_UNLKSYS;
8562 8562 flk.l_whence = 0;
8563 8563 flk.l_start = 0;
8564 8564 flk.l_len = 0;
8565 8565 flk.l_sysid =
8566 8566 sp->rs_owner->ro_client->rc_sysidt;
8567 8567 flk.l_pid = 0;
8568 8568 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8569 8569 &flk, F_REMOTELOCK | FREAD | FWRITE,
8570 8570 (u_offset_t)0, NULL, CRED(), NULL);
8571 8571 }
8572 8572
8573 8573 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8574 8574 }
8575 8575 }
8576 8576
8577 8577 /*
8578 8578 * Release all locks on this file by this lock owner or at
8579 8579 * least mark the locks as having been released
8580 8580 */
8581 8581 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8582 8582 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8583 8583 lsp->rls_locks_cleaned = TRUE;
8584 8584
8585 8585 /* Was this already taken care of above? */
8586 8586 if (!close_of_client &&
8587 8587 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8588 8588 (void) cleanlocks(sp->rs_finfo->rf_vp,
8589 8589 lsp->rls_locker->rl_pid,
8590 8590 lsp->rls_locker->rl_client->rc_sysidt);
8591 8591 }
8592 8592
8593 8593 /*
8594 8594 * Release any shrlocks associated with this open state ID.
8595 8595 * This must be done before the rfs4_state gets marked closed.
8596 8596 */
8597 8597 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8598 8598 (void) rfs4_unshare(sp);
8599 8599
8600 8600 if (sp->rs_open_access) {
8601 8601 rfs4_dbe_lock(fp->rf_dbe);
8602 8602
8603 8603 /*
8604 8604 * Decrement the count for each access and deny bit that this
8605 8605 * state has contributed to the file.
8606 8606 * If the file counts go to zero
8607 8607 * clear the appropriate bit in the appropriate mask.
8608 8608 */
8609 8609 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8610 8610 fp->rf_access_read--;
8611 8611 fflags |= FREAD;
8612 8612 if (fp->rf_access_read == 0)
8613 8613 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8614 8614 }
8615 8615 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8616 8616 fp->rf_access_write--;
8617 8617 fflags |= FWRITE;
8618 8618 if (fp->rf_access_write == 0)
8619 8619 fp->rf_share_access &=
8620 8620 ~OPEN4_SHARE_ACCESS_WRITE;
8621 8621 }
8622 8622 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8623 8623 fp->rf_deny_read--;
8624 8624 if (fp->rf_deny_read == 0)
8625 8625 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8626 8626 }
8627 8627 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8628 8628 fp->rf_deny_write--;
8629 8629 if (fp->rf_deny_write == 0)
8630 8630 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8631 8631 }
8632 8632
8633 8633 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8634 8634
8635 8635 rfs4_dbe_unlock(fp->rf_dbe);
8636 8636
8637 8637 sp->rs_open_access = 0;
8638 8638 sp->rs_open_deny = 0;
8639 8639 }
8640 8640 }
8641 8641
8642 8642 /*
8643 8643 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8644 8644 */
8645 8645 static nfsstat4
8646 8646 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8647 8647 {
8648 8648 rfs4_lockowner_t *lo;
8649 8649 rfs4_client_t *cp;
8650 8650 uint32_t len;
8651 8651
8652 8652 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8653 8653 if (lo != NULL) {
8654 8654 cp = lo->rl_client;
8655 8655 if (rfs4_lease_expired(cp)) {
8656 8656 rfs4_lockowner_rele(lo);
8657 8657 rfs4_dbe_hold(cp->rc_dbe);
8658 8658 rfs4_client_close(cp);
8659 8659 return (NFS4ERR_EXPIRED);
8660 8660 }
8661 8661 dp->owner.clientid = lo->rl_owner.clientid;
8662 8662 len = lo->rl_owner.owner_len;
8663 8663 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8664 8664 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8665 8665 dp->owner.owner_len = len;
8666 8666 rfs4_lockowner_rele(lo);
8667 8667 goto finish;
8668 8668 }
8669 8669
8670 8670 /*
8671 8671 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8672 8672 * of the client id contain the boot time for a NFS4 lock. So we
8673 8673 * fabricate and identity by setting clientid to the sysid, and
8674 8674 * the lock owner to the pid.
8675 8675 */
8676 8676 dp->owner.clientid = flk->l_sysid;
8677 8677 len = sizeof (pid_t);
8678 8678 dp->owner.owner_len = len;
8679 8679 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8680 8680 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8681 8681 finish:
8682 8682 dp->offset = flk->l_start;
8683 8683 dp->length = flk->l_len;
8684 8684
8685 8685 if (flk->l_type == F_RDLCK)
8686 8686 dp->locktype = READ_LT;
8687 8687 else if (flk->l_type == F_WRLCK)
8688 8688 dp->locktype = WRITE_LT;
8689 8689 else
8690 8690 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8691 8691
8692 8692 return (NFS4_OK);
8693 8693 }
8694 8694
8695 8695 /*
8696 8696 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8697 8697 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8698 8698 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8699 8699 * for that (obviously); they are sending the LOCK requests with some delays
8700 8700 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8701 8701 * locking and delay implementation at the client side.
8702 8702 *
8703 8703 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8704 8704 * fast retries on its own (the for loop below) in a hope the lock will be
8705 8705 * available soon. And if not, the client won't need to resend the LOCK
8706 8706 * requests so fast to check the lock availability. This basically saves some
8707 8707 * network traffic and tries to make sure the client gets the lock ASAP.
8708 8708 */
8709 8709 static int
8710 8710 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8711 8711 {
8712 8712 int error;
8713 8713 struct flock64 flk;
8714 8714 int i;
8715 8715 clock_t delaytime;
8716 8716 int cmd;
8717 8717 int spin_cnt = 0;
8718 8718
8719 8719 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8720 8720 retry:
8721 8721 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8722 8722
8723 8723 for (i = 0; i < rfs4_maxlock_tries; i++) {
8724 8724 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8725 8725 error = VOP_FRLOCK(vp, cmd,
8726 8726 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8727 8727
8728 8728 if (error != EAGAIN && error != EACCES)
8729 8729 break;
8730 8730
8731 8731 if (i < rfs4_maxlock_tries - 1) {
8732 8732 delay(delaytime);
8733 8733 delaytime *= 2;
8734 8734 }
8735 8735 }
8736 8736
8737 8737 if (error == EAGAIN || error == EACCES) {
8738 8738 /* Get the owner of the lock */
8739 8739 flk = *flock;
8740 8740 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8741 8741 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8742 8742 NULL) == 0) {
8743 8743 /*
8744 8744 * There's a race inherent in the current VOP_FRLOCK
8745 8745 * design where:
8746 8746 * a: "other guy" takes a lock that conflicts with a
8747 8747 * lock we want
8748 8748 * b: we attempt to take our lock (non-blocking) and
8749 8749 * the attempt fails.
8750 8750 * c: "other guy" releases the conflicting lock
8751 8751 * d: we ask what lock conflicts with the lock we want,
8752 8752 * getting F_UNLCK (no lock blocks us)
8753 8753 *
8754 8754 * If we retry the non-blocking lock attempt in this
8755 8755 * case (restart at step 'b') there's some possibility
8756 8756 * that many such attempts might fail. However a test
8757 8757 * designed to actually provoke this race shows that
8758 8758 * the vast majority of cases require no retry, and
8759 8759 * only a few took as many as three retries. Here's
8760 8760 * the test outcome:
8761 8761 *
8762 8762 * number of retries how many times we needed
8763 8763 * that many retries
8764 8764 * 0 79461
8765 8765 * 1 862
8766 8766 * 2 49
8767 8767 * 3 5
8768 8768 *
8769 8769 * Given those empirical results, we arbitrarily limit
8770 8770 * the retry count to ten.
8771 8771 *
8772 8772 * If we actually make to ten retries and give up,
8773 8773 * nothing catastrophic happens, but we're unable to
8774 8774 * return the information about the conflicting lock to
8775 8775 * the NFS client. That's an acceptable trade off vs.
8776 8776 * letting this retry loop run forever.
8777 8777 */
8778 8778 if (flk.l_type == F_UNLCK) {
8779 8779 if (spin_cnt++ < 10) {
8780 8780 /* No longer locked, retry */
8781 8781 goto retry;
8782 8782 }
8783 8783 } else {
8784 8784 *flock = flk;
8785 8785 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8786 8786 F_GETLK, &flk);
8787 8787 }
8788 8788 }
8789 8789 }
8790 8790
8791 8791 return (error);
8792 8792 }
8793 8793
8794 8794 /*ARGSUSED*/
8795 8795 static nfsstat4
8796 8796 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8797 8797 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8798 8798 {
8799 8799 nfsstat4 status;
8800 8800 rfs4_lockowner_t *lo = lsp->rls_locker;
8801 8801 rfs4_state_t *sp = lsp->rls_state;
8802 8802 struct flock64 flock;
8803 8803 int16_t ltype;
8804 8804 int flag;
8805 8805 int error;
8806 8806 sysid_t sysid;
8807 8807 LOCK4res *lres;
8808 8808 vnode_t *vp;
8809 8809
8810 8810 if (rfs4_lease_expired(lo->rl_client)) {
8811 8811 return (NFS4ERR_EXPIRED);
8812 8812 }
8813 8813
8814 8814 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8815 8815 return (status);
8816 8816
8817 8817 /* Check for zero length. To lock to end of file use all ones for V4 */
8818 8818 if (length == 0)
8819 8819 return (NFS4ERR_INVAL);
8820 8820 else if (length == (length4)(~0))
8821 8821 length = 0; /* Posix to end of file */
8822 8822
8823 8823 retry:
8824 8824 rfs4_dbe_lock(sp->rs_dbe);
8825 8825 if (sp->rs_closed == TRUE) {
8826 8826 rfs4_dbe_unlock(sp->rs_dbe);
8827 8827 return (NFS4ERR_OLD_STATEID);
8828 8828 }
8829 8829
8830 8830 if (resop->resop != OP_LOCKU) {
8831 8831 switch (locktype) {
8832 8832 case READ_LT:
8833 8833 case READW_LT:
8834 8834 if ((sp->rs_share_access
8835 8835 & OPEN4_SHARE_ACCESS_READ) == 0) {
8836 8836 rfs4_dbe_unlock(sp->rs_dbe);
8837 8837
8838 8838 return (NFS4ERR_OPENMODE);
8839 8839 }
8840 8840 ltype = F_RDLCK;
8841 8841 break;
8842 8842 case WRITE_LT:
8843 8843 case WRITEW_LT:
8844 8844 if ((sp->rs_share_access
8845 8845 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8846 8846 rfs4_dbe_unlock(sp->rs_dbe);
8847 8847
8848 8848 return (NFS4ERR_OPENMODE);
8849 8849 }
8850 8850 ltype = F_WRLCK;
8851 8851 break;
8852 8852 }
8853 8853 } else
8854 8854 ltype = F_UNLCK;
8855 8855
8856 8856 flock.l_type = ltype;
8857 8857 flock.l_whence = 0; /* SEEK_SET */
8858 8858 flock.l_start = offset;
8859 8859 flock.l_len = length;
8860 8860 flock.l_sysid = sysid;
8861 8861 flock.l_pid = lsp->rls_locker->rl_pid;
8862 8862
8863 8863 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8864 8864 if (flock.l_len < 0 || flock.l_start < 0) {
8865 8865 rfs4_dbe_unlock(sp->rs_dbe);
8866 8866 return (NFS4ERR_INVAL);
8867 8867 }
8868 8868
8869 8869 /*
8870 8870 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8871 8871 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8872 8872 */
8873 8873 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8874 8874
8875 8875 vp = sp->rs_finfo->rf_vp;
8876 8876 VN_HOLD(vp);
8877 8877
8878 8878 /*
8879 8879 * We need to unlock sp before we call the underlying filesystem to
8880 8880 * acquire the file lock.
8881 8881 */
8882 8882 rfs4_dbe_unlock(sp->rs_dbe);
8883 8883
8884 8884 error = setlock(vp, &flock, flag, cred);
8885 8885
8886 8886 /*
8887 8887 * Make sure the file is still open. In a case the file was closed in
8888 8888 * the meantime, clean the lock we acquired using the setlock() call
8889 8889 * above, and return the appropriate error.
8890 8890 */
8891 8891 rfs4_dbe_lock(sp->rs_dbe);
8892 8892 if (sp->rs_closed == TRUE) {
8893 8893 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8894 8894 rfs4_dbe_unlock(sp->rs_dbe);
8895 8895
8896 8896 VN_RELE(vp);
8897 8897
8898 8898 return (NFS4ERR_OLD_STATEID);
8899 8899 }
8900 8900 rfs4_dbe_unlock(sp->rs_dbe);
8901 8901
8902 8902 VN_RELE(vp);
8903 8903
8904 8904 if (error == 0) {
8905 8905 rfs4_dbe_lock(lsp->rls_dbe);
8906 8906 next_stateid(&lsp->rls_lockid);
8907 8907 rfs4_dbe_unlock(lsp->rls_dbe);
8908 8908 }
8909 8909
8910 8910 /*
8911 8911 * N.B. We map error values to nfsv4 errors. This is differrent
8912 8912 * than puterrno4 routine.
8913 8913 */
8914 8914 switch (error) {
8915 8915 case 0:
8916 8916 status = NFS4_OK;
8917 8917 break;
8918 8918 case EAGAIN:
8919 8919 case EACCES: /* Old value */
8920 8920 /* Can only get here if op is OP_LOCK */
8921 8921 ASSERT(resop->resop == OP_LOCK);
8922 8922 lres = &resop->nfs_resop4_u.oplock;
8923 8923 status = NFS4ERR_DENIED;
8924 8924 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8925 8925 == NFS4ERR_EXPIRED)
8926 8926 goto retry;
8927 8927 break;
8928 8928 case ENOLCK:
8929 8929 status = NFS4ERR_DELAY;
8930 8930 break;
8931 8931 case EOVERFLOW:
8932 8932 status = NFS4ERR_INVAL;
8933 8933 break;
8934 8934 case EINVAL:
8935 8935 status = NFS4ERR_NOTSUPP;
8936 8936 break;
8937 8937 default:
8938 8938 status = NFS4ERR_SERVERFAULT;
8939 8939 break;
8940 8940 }
8941 8941
8942 8942 return (status);
8943 8943 }
8944 8944
8945 8945 /*ARGSUSED*/
8946 8946 void
8947 8947 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8948 8948 struct svc_req *req, struct compound_state *cs)
8949 8949 {
8950 8950 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8951 8951 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8952 8952 nfsstat4 status;
8953 8953 stateid4 *stateid;
8954 8954 rfs4_lockowner_t *lo;
8955 8955 rfs4_client_t *cp;
8956 8956 rfs4_state_t *sp = NULL;
8957 8957 rfs4_lo_state_t *lsp = NULL;
8958 8958 bool_t ls_sw_held = FALSE;
8959 8959 bool_t create = TRUE;
8960 8960 bool_t lcreate = TRUE;
8961 8961 bool_t dup_lock = FALSE;
8962 8962 int rc;
8963 8963
8964 8964 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8965 8965 LOCK4args *, args);
8966 8966
8967 8967 if (cs->vp == NULL) {
8968 8968 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8969 8969 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8970 8970 cs, LOCK4res *, resp);
8971 8971 return;
8972 8972 }
8973 8973
8974 8974 if (args->locker.new_lock_owner) {
8975 8975 /* Create a new lockowner for this instance */
8976 8976 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8977 8977
8978 8978 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8979 8979
8980 8980 stateid = &olo->open_stateid;
8981 8981 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8982 8982 if (status != NFS4_OK) {
8983 8983 NFS4_DEBUG(rfs4_debug,
8984 8984 (CE_NOTE, "Get state failed in lock %d", status));
8985 8985 *cs->statusp = resp->status = status;
8986 8986 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8987 8987 cs, LOCK4res *, resp);
8988 8988 return;
8989 8989 }
8990 8990
8991 8991 /* Ensure specified filehandle matches */
8992 8992 if (cs->vp != sp->rs_finfo->rf_vp) {
8993 8993 rfs4_state_rele(sp);
8994 8994 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8995 8995 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8996 8996 cs, LOCK4res *, resp);
8997 8997 return;
8998 8998 }
8999 8999
9000 9000 /* hold off other access to open_owner while we tinker */
9001 9001 rfs4_sw_enter(&sp->rs_owner->ro_sw);
9002 9002
9003 9003 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
9004 9004 case NFS4_CHECK_STATEID_OLD:
9005 9005 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9006 9006 goto end;
9007 9007 case NFS4_CHECK_STATEID_BAD:
9008 9008 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9009 9009 goto end;
9010 9010 case NFS4_CHECK_STATEID_EXPIRED:
9011 9011 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9012 9012 goto end;
9013 9013 case NFS4_CHECK_STATEID_UNCONFIRMED:
9014 9014 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9015 9015 goto end;
9016 9016 case NFS4_CHECK_STATEID_CLOSED:
9017 9017 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9018 9018 goto end;
9019 9019 case NFS4_CHECK_STATEID_OKAY:
9020 9020 case NFS4_CHECK_STATEID_REPLAY:
9021 9021 switch (rfs4_check_olo_seqid(olo->open_seqid,
9022 9022 sp->rs_owner, resop)) {
9023 9023 case NFS4_CHKSEQ_OKAY:
9024 9024 if (rc == NFS4_CHECK_STATEID_OKAY)
9025 9025 break;
9026 9026 /*
9027 9027 * This is replayed stateid; if seqid
9028 9028 * matches next expected, then client
9029 9029 * is using wrong seqid.
9030 9030 */
9031 9031 /* FALLTHROUGH */
9032 9032 case NFS4_CHKSEQ_BAD:
9033 9033 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9034 9034 goto end;
9035 9035 case NFS4_CHKSEQ_REPLAY:
9036 9036 /* This is a duplicate LOCK request */
9037 9037 dup_lock = TRUE;
9038 9038
9039 9039 /*
9040 9040 * For a duplicate we do not want to
9041 9041 * create a new lockowner as it should
9042 9042 * already exist.
9043 9043 * Turn off the lockowner create flag.
9044 9044 */
9045 9045 lcreate = FALSE;
9046 9046 }
9047 9047 break;
9048 9048 }
9049 9049
9050 9050 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
9051 9051 if (lo == NULL) {
9052 9052 NFS4_DEBUG(rfs4_debug,
9053 9053 (CE_NOTE, "rfs4_op_lock: no lock owner"));
9054 9054 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
9055 9055 goto end;
9056 9056 }
9057 9057
9058 9058 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
9059 9059 if (lsp == NULL) {
9060 9060 rfs4_update_lease(sp->rs_owner->ro_client);
9061 9061 /*
9062 9062 * Only update theh open_seqid if this is not
9063 9063 * a duplicate request
9064 9064 */
9065 9065 if (dup_lock == FALSE) {
9066 9066 rfs4_update_open_sequence(sp->rs_owner);
9067 9067 }
9068 9068
9069 9069 NFS4_DEBUG(rfs4_debug,
9070 9070 (CE_NOTE, "rfs4_op_lock: no state"));
9071 9071 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
9072 9072 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9073 9073 rfs4_lockowner_rele(lo);
9074 9074 goto end;
9075 9075 }
9076 9076
9077 9077 /*
9078 9078 * This is the new_lock_owner branch and the client is
9079 9079 * supposed to be associating a new lock_owner with
9080 9080 * the open file at this point. If we find that a
9081 9081 * lock_owner/state association already exists and a
9082 9082 * successful LOCK request was returned to the client,
9083 9083 * an error is returned to the client since this is
9084 9084 * not appropriate. The client should be using the
9085 9085 * existing lock_owner branch.
9086 9086 */
9087 9087 if (dup_lock == FALSE && create == FALSE) {
9088 9088 if (lsp->rls_lock_completed == TRUE) {
9089 9089 *cs->statusp =
9090 9090 resp->status = NFS4ERR_BAD_SEQID;
9091 9091 rfs4_lockowner_rele(lo);
9092 9092 goto end;
9093 9093 }
9094 9094 }
9095 9095
9096 9096 rfs4_update_lease(sp->rs_owner->ro_client);
9097 9097
9098 9098 /*
9099 9099 * Only update theh open_seqid if this is not
9100 9100 * a duplicate request
9101 9101 */
9102 9102 if (dup_lock == FALSE) {
9103 9103 rfs4_update_open_sequence(sp->rs_owner);
9104 9104 }
9105 9105
9106 9106 /*
9107 9107 * If this is a duplicate lock request, just copy the
9108 9108 * previously saved reply and return.
9109 9109 */
9110 9110 if (dup_lock == TRUE) {
9111 9111 /* verify that lock_seqid's match */
9112 9112 if (lsp->rls_seqid != olo->lock_seqid) {
9113 9113 NFS4_DEBUG(rfs4_debug,
9114 9114 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9115 9115 "lsp->seqid=%d old->seqid=%d",
9116 9116 lsp->rls_seqid, olo->lock_seqid));
9117 9117 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9118 9118 } else {
9119 9119 rfs4_copy_reply(resop, &lsp->rls_reply);
9120 9120 /*
9121 9121 * Make sure to copy the just
9122 9122 * retrieved reply status into the
9123 9123 * overall compound status
9124 9124 */
9125 9125 *cs->statusp = resp->status;
9126 9126 }
9127 9127 rfs4_lockowner_rele(lo);
9128 9128 goto end;
9129 9129 }
9130 9130
9131 9131 rfs4_dbe_lock(lsp->rls_dbe);
9132 9132
9133 9133 /* Make sure to update the lock sequence id */
9134 9134 lsp->rls_seqid = olo->lock_seqid;
9135 9135
9136 9136 NFS4_DEBUG(rfs4_debug,
9137 9137 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9138 9138
9139 9139 /*
9140 9140 * This is used to signify the newly created lockowner
9141 9141 * stateid and its sequence number. The checks for
9142 9142 * sequence number and increment don't occur on the
9143 9143 * very first lock request for a lockowner.
9144 9144 */
9145 9145 lsp->rls_skip_seqid_check = TRUE;
9146 9146
9147 9147 /* hold off other access to lsp while we tinker */
9148 9148 rfs4_sw_enter(&lsp->rls_sw);
9149 9149 ls_sw_held = TRUE;
9150 9150
9151 9151 rfs4_dbe_unlock(lsp->rls_dbe);
9152 9152
9153 9153 rfs4_lockowner_rele(lo);
9154 9154 } else {
9155 9155 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9156 9156 /* get lsp and hold the lock on the underlying file struct */
9157 9157 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9158 9158 != NFS4_OK) {
9159 9159 *cs->statusp = resp->status = status;
9160 9160 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9161 9161 cs, LOCK4res *, resp);
9162 9162 return;
9163 9163 }
9164 9164 create = FALSE; /* We didn't create lsp */
9165 9165
9166 9166 /* Ensure specified filehandle matches */
9167 9167 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9168 9168 rfs4_lo_state_rele(lsp, TRUE);
9169 9169 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9170 9170 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9171 9171 cs, LOCK4res *, resp);
9172 9172 return;
9173 9173 }
9174 9174
9175 9175 /* hold off other access to lsp while we tinker */
9176 9176 rfs4_sw_enter(&lsp->rls_sw);
9177 9177 ls_sw_held = TRUE;
9178 9178
9179 9179 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9180 9180 /*
9181 9181 * The stateid looks like it was okay (expected to be
9182 9182 * the next one)
9183 9183 */
9184 9184 case NFS4_CHECK_STATEID_OKAY:
9185 9185 /*
9186 9186 * The sequence id is now checked. Determine
9187 9187 * if this is a replay or if it is in the
9188 9188 * expected (next) sequence. In the case of a
9189 9189 * replay, there are two replay conditions
9190 9190 * that may occur. The first is the normal
9191 9191 * condition where a LOCK is done with a
9192 9192 * NFS4_OK response and the stateid is
9193 9193 * updated. That case is handled below when
9194 9194 * the stateid is identified as a REPLAY. The
9195 9195 * second is the case where an error is
9196 9196 * returned, like NFS4ERR_DENIED, and the
9197 9197 * sequence number is updated but the stateid
9198 9198 * is not updated. This second case is dealt
9199 9199 * with here. So it may seem odd that the
9200 9200 * stateid is okay but the sequence id is a
9201 9201 * replay but it is okay.
9202 9202 */
9203 9203 switch (rfs4_check_lock_seqid(
9204 9204 args->locker.locker4_u.lock_owner.lock_seqid,
9205 9205 lsp, resop)) {
9206 9206 case NFS4_CHKSEQ_REPLAY:
9207 9207 if (resp->status != NFS4_OK) {
9208 9208 /*
9209 9209 * Here is our replay and need
9210 9210 * to verify that the last
9211 9211 * response was an error.
9212 9212 */
9213 9213 *cs->statusp = resp->status;
9214 9214 goto end;
9215 9215 }
9216 9216 /*
9217 9217 * This is done since the sequence id
9218 9218 * looked like a replay but it didn't
9219 9219 * pass our check so a BAD_SEQID is
9220 9220 * returned as a result.
9221 9221 */
9222 9222 /*FALLTHROUGH*/
9223 9223 case NFS4_CHKSEQ_BAD:
9224 9224 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9225 9225 goto end;
9226 9226 case NFS4_CHKSEQ_OKAY:
9227 9227 /* Everything looks okay move ahead */
9228 9228 break;
9229 9229 }
9230 9230 break;
9231 9231 case NFS4_CHECK_STATEID_OLD:
9232 9232 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9233 9233 goto end;
9234 9234 case NFS4_CHECK_STATEID_BAD:
9235 9235 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9236 9236 goto end;
9237 9237 case NFS4_CHECK_STATEID_EXPIRED:
9238 9238 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9239 9239 goto end;
9240 9240 case NFS4_CHECK_STATEID_CLOSED:
9241 9241 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9242 9242 goto end;
9243 9243 case NFS4_CHECK_STATEID_REPLAY:
9244 9244 switch (rfs4_check_lock_seqid(
9245 9245 args->locker.locker4_u.lock_owner.lock_seqid,
9246 9246 lsp, resop)) {
9247 9247 case NFS4_CHKSEQ_OKAY:
9248 9248 /*
9249 9249 * This is a replayed stateid; if
9250 9250 * seqid matches the next expected,
9251 9251 * then client is using wrong seqid.
9252 9252 */
9253 9253 case NFS4_CHKSEQ_BAD:
9254 9254 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9255 9255 goto end;
9256 9256 case NFS4_CHKSEQ_REPLAY:
9257 9257 rfs4_update_lease(lsp->rls_locker->rl_client);
9258 9258 *cs->statusp = status = resp->status;
9259 9259 goto end;
9260 9260 }
9261 9261 break;
9262 9262 default:
9263 9263 ASSERT(FALSE);
9264 9264 break;
9265 9265 }
9266 9266
9267 9267 rfs4_update_lock_sequence(lsp);
9268 9268 rfs4_update_lease(lsp->rls_locker->rl_client);
9269 9269 }
9270 9270
9271 9271 /*
9272 9272 * NFS4 only allows locking on regular files, so
9273 9273 * verify type of object.
9274 9274 */
9275 9275 if (cs->vp->v_type != VREG) {
9276 9276 if (cs->vp->v_type == VDIR)
9277 9277 status = NFS4ERR_ISDIR;
9278 9278 else
9279 9279 status = NFS4ERR_INVAL;
9280 9280 goto out;
9281 9281 }
9282 9282
9283 9283 cp = lsp->rls_state->rs_owner->ro_client;
9284 9284
9285 9285 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9286 9286 status = NFS4ERR_GRACE;
9287 9287 goto out;
9288 9288 }
9289 9289
9290 9290 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9291 9291 status = NFS4ERR_NO_GRACE;
9292 9292 goto out;
9293 9293 }
9294 9294
9295 9295 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9296 9296 status = NFS4ERR_NO_GRACE;
9297 9297 goto out;
9298 9298 }
9299 9299
9300 9300 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9301 9301 cs->deleg = TRUE;
9302 9302
9303 9303 status = rfs4_do_lock(lsp, args->locktype,
9304 9304 args->offset, args->length, cs->cr, resop);
9305 9305
9306 9306 out:
9307 9307 lsp->rls_skip_seqid_check = FALSE;
9308 9308
9309 9309 *cs->statusp = resp->status = status;
9310 9310
9311 9311 if (status == NFS4_OK) {
9312 9312 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9313 9313 lsp->rls_lock_completed = TRUE;
9314 9314 }
9315 9315 /*
9316 9316 * Only update the "OPEN" response here if this was a new
9317 9317 * lock_owner
9318 9318 */
9319 9319 if (sp)
9320 9320 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9321 9321
9322 9322 rfs4_update_lock_resp(lsp, resop);
9323 9323
9324 9324 end:
9325 9325 if (lsp) {
9326 9326 if (ls_sw_held)
9327 9327 rfs4_sw_exit(&lsp->rls_sw);
9328 9328 /*
9329 9329 * If an sp obtained, then the lsp does not represent
9330 9330 * a lock on the file struct.
9331 9331 */
9332 9332 if (sp != NULL)
9333 9333 rfs4_lo_state_rele(lsp, FALSE);
9334 9334 else
9335 9335 rfs4_lo_state_rele(lsp, TRUE);
9336 9336 }
9337 9337 if (sp) {
9338 9338 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9339 9339 rfs4_state_rele(sp);
9340 9340 }
9341 9341
9342 9342 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9343 9343 LOCK4res *, resp);
9344 9344 }
9345 9345
9346 9346 /* free function for LOCK/LOCKT */
9347 9347 static void
9348 9348 lock_denied_free(nfs_resop4 *resop)
9349 9349 {
9350 9350 LOCK4denied *dp = NULL;
9351 9351
9352 9352 switch (resop->resop) {
9353 9353 case OP_LOCK:
9354 9354 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9355 9355 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9356 9356 break;
9357 9357 case OP_LOCKT:
9358 9358 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9359 9359 dp = &resop->nfs_resop4_u.oplockt.denied;
9360 9360 break;
9361 9361 default:
9362 9362 break;
9363 9363 }
9364 9364
9365 9365 if (dp)
9366 9366 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9367 9367 }
9368 9368
9369 9369 /*ARGSUSED*/
9370 9370 void
9371 9371 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9372 9372 struct svc_req *req, struct compound_state *cs)
9373 9373 {
9374 9374 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9375 9375 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9376 9376 nfsstat4 status;
9377 9377 stateid4 *stateid = &args->lock_stateid;
9378 9378 rfs4_lo_state_t *lsp;
9379 9379
9380 9380 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9381 9381 LOCKU4args *, args);
9382 9382
9383 9383 if (cs->vp == NULL) {
9384 9384 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9385 9385 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9386 9386 LOCKU4res *, resp);
9387 9387 return;
9388 9388 }
9389 9389
9390 9390 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9391 9391 *cs->statusp = resp->status = status;
9392 9392 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9393 9393 LOCKU4res *, resp);
9394 9394 return;
9395 9395 }
9396 9396
9397 9397 /* Ensure specified filehandle matches */
9398 9398 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9399 9399 rfs4_lo_state_rele(lsp, TRUE);
9400 9400 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9401 9401 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9402 9402 LOCKU4res *, resp);
9403 9403 return;
9404 9404 }
9405 9405
9406 9406 /* hold off other access to lsp while we tinker */
9407 9407 rfs4_sw_enter(&lsp->rls_sw);
9408 9408
9409 9409 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9410 9410 case NFS4_CHECK_STATEID_OKAY:
9411 9411 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9412 9412 != NFS4_CHKSEQ_OKAY) {
9413 9413 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9414 9414 goto end;
9415 9415 }
9416 9416 break;
9417 9417 case NFS4_CHECK_STATEID_OLD:
9418 9418 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9419 9419 goto end;
9420 9420 case NFS4_CHECK_STATEID_BAD:
9421 9421 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9422 9422 goto end;
9423 9423 case NFS4_CHECK_STATEID_EXPIRED:
9424 9424 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9425 9425 goto end;
9426 9426 case NFS4_CHECK_STATEID_CLOSED:
9427 9427 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9428 9428 goto end;
9429 9429 case NFS4_CHECK_STATEID_REPLAY:
9430 9430 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9431 9431 case NFS4_CHKSEQ_OKAY:
9432 9432 /*
9433 9433 * This is a replayed stateid; if
9434 9434 * seqid matches the next expected,
9435 9435 * then client is using wrong seqid.
9436 9436 */
9437 9437 case NFS4_CHKSEQ_BAD:
9438 9438 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9439 9439 goto end;
9440 9440 case NFS4_CHKSEQ_REPLAY:
9441 9441 rfs4_update_lease(lsp->rls_locker->rl_client);
9442 9442 *cs->statusp = status = resp->status;
9443 9443 goto end;
9444 9444 }
9445 9445 break;
9446 9446 default:
9447 9447 ASSERT(FALSE);
9448 9448 break;
9449 9449 }
9450 9450
9451 9451 rfs4_update_lock_sequence(lsp);
9452 9452 rfs4_update_lease(lsp->rls_locker->rl_client);
9453 9453
9454 9454 /*
9455 9455 * NFS4 only allows locking on regular files, so
9456 9456 * verify type of object.
9457 9457 */
9458 9458 if (cs->vp->v_type != VREG) {
9459 9459 if (cs->vp->v_type == VDIR)
9460 9460 status = NFS4ERR_ISDIR;
9461 9461 else
9462 9462 status = NFS4ERR_INVAL;
9463 9463 goto out;
9464 9464 }
9465 9465
9466 9466 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9467 9467 status = NFS4ERR_GRACE;
9468 9468 goto out;
9469 9469 }
9470 9470
9471 9471 status = rfs4_do_lock(lsp, args->locktype,
9472 9472 args->offset, args->length, cs->cr, resop);
9473 9473
9474 9474 out:
9475 9475 *cs->statusp = resp->status = status;
9476 9476
9477 9477 if (status == NFS4_OK)
9478 9478 resp->lock_stateid = lsp->rls_lockid.stateid;
9479 9479
9480 9480 rfs4_update_lock_resp(lsp, resop);
9481 9481
9482 9482 end:
9483 9483 rfs4_sw_exit(&lsp->rls_sw);
9484 9484 rfs4_lo_state_rele(lsp, TRUE);
9485 9485
9486 9486 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9487 9487 LOCKU4res *, resp);
9488 9488 }
9489 9489
9490 9490 /*
9491 9491 * LOCKT is a best effort routine, the client can not be guaranteed that
9492 9492 * the status return is still in effect by the time the reply is received.
9493 9493 * They are numerous race conditions in this routine, but we are not required
9494 9494 * and can not be accurate.
9495 9495 */
9496 9496 /*ARGSUSED*/
9497 9497 void
9498 9498 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9499 9499 struct svc_req *req, struct compound_state *cs)
9500 9500 {
9501 9501 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9502 9502 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9503 9503 rfs4_lockowner_t *lo;
9504 9504 rfs4_client_t *cp;
9505 9505 bool_t create = FALSE;
9506 9506 struct flock64 flk;
9507 9507 int error;
9508 9508 int flag = FREAD | FWRITE;
9509 9509 int ltype;
9510 9510 length4 posix_length;
9511 9511 sysid_t sysid;
9512 9512 pid_t pid;
9513 9513
9514 9514 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9515 9515 LOCKT4args *, args);
9516 9516
9517 9517 if (cs->vp == NULL) {
9518 9518 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9519 9519 goto out;
9520 9520 }
9521 9521
9522 9522 /*
9523 9523 * NFS4 only allows locking on regular files, so
9524 9524 * verify type of object.
9525 9525 */
9526 9526 if (cs->vp->v_type != VREG) {
9527 9527 if (cs->vp->v_type == VDIR)
9528 9528 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9529 9529 else
9530 9530 *cs->statusp = resp->status = NFS4ERR_INVAL;
9531 9531 goto out;
9532 9532 }
9533 9533
9534 9534 /*
9535 9535 * Check out the clientid to ensure the server knows about it
9536 9536 * so that we correctly inform the client of a server reboot.
9537 9537 */
9538 9538 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9539 9539 == NULL) {
9540 9540 *cs->statusp = resp->status =
9541 9541 rfs4_check_clientid(&args->owner.clientid, 0);
9542 9542 goto out;
9543 9543 }
9544 9544 if (rfs4_lease_expired(cp)) {
9545 9545 rfs4_client_close(cp);
9546 9546 /*
9547 9547 * Protocol doesn't allow returning NFS4ERR_STALE as
9548 9548 * other operations do on this check so STALE_CLIENTID
9549 9549 * is returned instead
9550 9550 */
9551 9551 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9552 9552 goto out;
9553 9553 }
9554 9554
9555 9555 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9556 9556 *cs->statusp = resp->status = NFS4ERR_GRACE;
9557 9557 rfs4_client_rele(cp);
9558 9558 goto out;
9559 9559 }
9560 9560 rfs4_client_rele(cp);
9561 9561
9562 9562 resp->status = NFS4_OK;
9563 9563
9564 9564 switch (args->locktype) {
9565 9565 case READ_LT:
9566 9566 case READW_LT:
9567 9567 ltype = F_RDLCK;
9568 9568 break;
9569 9569 case WRITE_LT:
9570 9570 case WRITEW_LT:
9571 9571 ltype = F_WRLCK;
9572 9572 break;
9573 9573 }
9574 9574
9575 9575 posix_length = args->length;
9576 9576 /* Check for zero length. To lock to end of file use all ones for V4 */
9577 9577 if (posix_length == 0) {
9578 9578 *cs->statusp = resp->status = NFS4ERR_INVAL;
9579 9579 goto out;
9580 9580 } else if (posix_length == (length4)(~0)) {
9581 9581 posix_length = 0; /* Posix to end of file */
9582 9582 }
9583 9583
9584 9584 /* Find or create a lockowner */
9585 9585 lo = rfs4_findlockowner(&args->owner, &create);
9586 9586
9587 9587 if (lo) {
9588 9588 pid = lo->rl_pid;
9589 9589 if ((resp->status =
9590 9590 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9591 9591 goto err;
9592 9592 } else {
9593 9593 pid = 0;
9594 9594 sysid = lockt_sysid;
9595 9595 }
9596 9596 retry:
9597 9597 flk.l_type = ltype;
9598 9598 flk.l_whence = 0; /* SEEK_SET */
9599 9599 flk.l_start = args->offset;
9600 9600 flk.l_len = posix_length;
9601 9601 flk.l_sysid = sysid;
9602 9602 flk.l_pid = pid;
9603 9603 flag |= F_REMOTELOCK;
9604 9604
9605 9605 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9606 9606
9607 9607 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9608 9608 if (flk.l_len < 0 || flk.l_start < 0) {
9609 9609 resp->status = NFS4ERR_INVAL;
9610 9610 goto err;
9611 9611 }
9612 9612 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9613 9613 NULL, cs->cr, NULL);
9614 9614
9615 9615 /*
9616 9616 * N.B. We map error values to nfsv4 errors. This is differrent
9617 9617 * than puterrno4 routine.
9618 9618 */
9619 9619 switch (error) {
9620 9620 case 0:
9621 9621 if (flk.l_type == F_UNLCK)
9622 9622 resp->status = NFS4_OK;
9623 9623 else {
9624 9624 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9625 9625 goto retry;
9626 9626 resp->status = NFS4ERR_DENIED;
9627 9627 }
9628 9628 break;
9629 9629 case EOVERFLOW:
9630 9630 resp->status = NFS4ERR_INVAL;
9631 9631 break;
9632 9632 case EINVAL:
9633 9633 resp->status = NFS4ERR_NOTSUPP;
9634 9634 break;
9635 9635 default:
9636 9636 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9637 9637 error);
9638 9638 resp->status = NFS4ERR_SERVERFAULT;
9639 9639 break;
9640 9640 }
9641 9641
9642 9642 err:
9643 9643 if (lo)
9644 9644 rfs4_lockowner_rele(lo);
9645 9645 *cs->statusp = resp->status;
9646 9646 out:
9647 9647 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9648 9648 LOCKT4res *, resp);
9649 9649 }
9650 9650
9651 9651 int
9652 9652 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9653 9653 {
9654 9654 int err;
9655 9655 int cmd;
9656 9656 vnode_t *vp;
9657 9657 struct shrlock shr;
9658 9658 struct shr_locowner shr_loco;
9659 9659 int fflags = 0;
9660 9660
9661 9661 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9662 9662 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9663 9663
9664 9664 if (sp->rs_closed)
9665 9665 return (NFS4ERR_OLD_STATEID);
9666 9666
9667 9667 vp = sp->rs_finfo->rf_vp;
9668 9668 ASSERT(vp);
9669 9669
9670 9670 shr.s_access = shr.s_deny = 0;
9671 9671
9672 9672 if (access & OPEN4_SHARE_ACCESS_READ) {
9673 9673 fflags |= FREAD;
9674 9674 shr.s_access |= F_RDACC;
9675 9675 }
9676 9676 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9677 9677 fflags |= FWRITE;
9678 9678 shr.s_access |= F_WRACC;
9679 9679 }
9680 9680 ASSERT(shr.s_access);
9681 9681
9682 9682 if (deny & OPEN4_SHARE_DENY_READ)
9683 9683 shr.s_deny |= F_RDDNY;
9684 9684 if (deny & OPEN4_SHARE_DENY_WRITE)
9685 9685 shr.s_deny |= F_WRDNY;
9686 9686
9687 9687 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9688 9688 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9689 9689 shr_loco.sl_pid = shr.s_pid;
9690 9690 shr_loco.sl_id = shr.s_sysid;
9691 9691 shr.s_owner = (caddr_t)&shr_loco;
9692 9692 shr.s_own_len = sizeof (shr_loco);
9693 9693
9694 9694 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9695 9695
9696 9696 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9697 9697 if (err != 0) {
9698 9698 if (err == EAGAIN)
9699 9699 err = NFS4ERR_SHARE_DENIED;
9700 9700 else
9701 9701 err = puterrno4(err);
9702 9702 return (err);
9703 9703 }
9704 9704
9705 9705 sp->rs_share_access |= access;
9706 9706 sp->rs_share_deny |= deny;
9707 9707
9708 9708 return (0);
9709 9709 }
9710 9710
9711 9711 int
9712 9712 rfs4_unshare(rfs4_state_t *sp)
9713 9713 {
9714 9714 int err;
9715 9715 struct shrlock shr;
9716 9716 struct shr_locowner shr_loco;
9717 9717
9718 9718 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9719 9719
9720 9720 if (sp->rs_closed || sp->rs_share_access == 0)
9721 9721 return (0);
9722 9722
9723 9723 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9724 9724 ASSERT(sp->rs_finfo->rf_vp);
9725 9725
9726 9726 shr.s_access = shr.s_deny = 0;
9727 9727 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9728 9728 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9729 9729 shr_loco.sl_pid = shr.s_pid;
9730 9730 shr_loco.sl_id = shr.s_sysid;
9731 9731 shr.s_owner = (caddr_t)&shr_loco;
9732 9732 shr.s_own_len = sizeof (shr_loco);
9733 9733
9734 9734 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9735 9735 NULL);
9736 9736 if (err != 0) {
9737 9737 err = puterrno4(err);
9738 9738 return (err);
9739 9739 }
9740 9740
9741 9741 sp->rs_share_access = 0;
9742 9742 sp->rs_share_deny = 0;
9743 9743
9744 9744 return (0);
9745 9745
9746 9746 }
9747 9747
9748 9748 static int
9749 9749 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9750 9750 {
9751 9751 struct clist *wcl;
9752 9752 count4 count = rok->data_len;
9753 9753 int wlist_len;
9754 9754
9755 9755 wcl = args->wlist;
9756 9756 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9757 9757 return (FALSE);
9758 9758 }
9759 9759 wcl = args->wlist;
9760 9760 rok->wlist_len = wlist_len;
9761 9761 rok->wlist = wcl;
9762 9762 return (TRUE);
9763 9763 }
9764 9764
9765 9765 /* tunable to disable server referrals */
9766 9766 int rfs4_no_referrals = 0;
9767 9767
9768 9768 /*
9769 9769 * Find an NFS record in reparse point data.
9770 9770 * Returns 0 for success and <0 or an errno value on failure.
9771 9771 */
9772 9772 int
9773 9773 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9774 9774 {
9775 9775 int err;
9776 9776 char *stype, *val;
9777 9777 nvlist_t *nvl;
9778 9778 nvpair_t *curr;
9779 9779
9780 9780 if ((nvl = reparse_init()) == NULL)
9781 9781 return (-1);
9782 9782
9783 9783 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9784 9784 reparse_free(nvl);
9785 9785 return (err);
9786 9786 }
9787 9787
9788 9788 curr = NULL;
9789 9789 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9790 9790 if ((stype = nvpair_name(curr)) == NULL) {
9791 9791 reparse_free(nvl);
9792 9792 return (-2);
9793 9793 }
9794 9794 if (strncasecmp(stype, "NFS", 3) == 0)
9795 9795 break;
9796 9796 }
9797 9797
9798 9798 if ((curr == NULL) ||
9799 9799 (nvpair_value_string(curr, &val))) {
9800 9800 reparse_free(nvl);
9801 9801 return (-3);
9802 9802 }
9803 9803 *nvlp = nvl;
9804 9804 *svcp = stype;
9805 9805 *datap = val;
9806 9806 return (0);
9807 9807 }
9808 9808
9809 9809 int
9810 9810 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9811 9811 {
9812 9812 nvlist_t *nvl;
9813 9813 char *s, *d;
9814 9814
9815 9815 if (rfs4_no_referrals != 0)
9816 9816 return (B_FALSE);
9817 9817
9818 9818 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9819 9819 return (B_FALSE);
9820 9820
9821 9821 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9822 9822 return (B_FALSE);
9823 9823
9824 9824 reparse_free(nvl);
9825 9825
9826 9826 return (B_TRUE);
9827 9827 }
9828 9828
9829 9829 /*
9830 9830 * There is a user-level copy of this routine in ref_subr.c.
9831 9831 * Changes should be kept in sync.
9832 9832 */
9833 9833 static int
9834 9834 nfs4_create_components(char *path, component4 *comp4)
9835 9835 {
9836 9836 int slen, plen, ncomp;
9837 9837 char *ori_path, *nxtc, buf[MAXNAMELEN];
9838 9838
9839 9839 if (path == NULL)
9840 9840 return (0);
9841 9841
9842 9842 plen = strlen(path) + 1; /* include the terminator */
9843 9843 ori_path = path;
9844 9844 ncomp = 0;
9845 9845
9846 9846 /* count number of components in the path */
9847 9847 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9848 9848 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9849 9849 if ((slen = nxtc - path) == 0) {
9850 9850 path = nxtc + 1;
9851 9851 continue;
9852 9852 }
9853 9853
9854 9854 if (comp4 != NULL) {
9855 9855 bcopy(path, buf, slen);
9856 9856 buf[slen] = '\0';
9857 9857 (void) str_to_utf8(buf, &comp4[ncomp]);
9858 9858 }
9859 9859
9860 9860 ncomp++; /* 1 valid component */
9861 9861 path = nxtc + 1;
9862 9862 }
9863 9863 if (*nxtc == '\0' || *nxtc == '\n')
9864 9864 break;
9865 9865 }
9866 9866
9867 9867 return (ncomp);
9868 9868 }
9869 9869
9870 9870 /*
9871 9871 * There is a user-level copy of this routine in ref_subr.c.
9872 9872 * Changes should be kept in sync.
9873 9873 */
9874 9874 static int
9875 9875 make_pathname4(char *path, pathname4 *pathname)
9876 9876 {
9877 9877 int ncomp;
9878 9878 component4 *comp4;
9879 9879
9880 9880 if (pathname == NULL)
9881 9881 return (0);
9882 9882
9883 9883 if (path == NULL) {
9884 9884 pathname->pathname4_val = NULL;
9885 9885 pathname->pathname4_len = 0;
9886 9886 return (0);
9887 9887 }
9888 9888
9889 9889 /* count number of components to alloc buffer */
9890 9890 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9891 9891 pathname->pathname4_val = NULL;
9892 9892 pathname->pathname4_len = 0;
9893 9893 return (0);
9894 9894 }
9895 9895 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9896 9896
9897 9897 /* copy components into allocated buffer */
9898 9898 ncomp = nfs4_create_components(path, comp4);
9899 9899
9900 9900 pathname->pathname4_val = comp4;
9901 9901 pathname->pathname4_len = ncomp;
9902 9902
9903 9903 return (ncomp);
9904 9904 }
9905 9905
9906 9906 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9907 9907
9908 9908 fs_locations4 *
9909 9909 fetch_referral(vnode_t *vp, cred_t *cr)
9910 9910 {
9911 9911 nvlist_t *nvl;
9912 9912 char *stype, *sdata;
9913 9913 fs_locations4 *result;
9914 9914 char buf[1024];
9915 9915 size_t bufsize;
9916 9916 XDR xdr;
9917 9917 int err;
9918 9918
9919 9919 /*
9920 9920 * Check attrs to ensure it's a reparse point
9921 9921 */
9922 9922 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9923 9923 return (NULL);
9924 9924
9925 9925 /*
9926 9926 * Look for an NFS record and get the type and data
9927 9927 */
9928 9928 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9929 9929 return (NULL);
9930 9930
9931 9931 /*
9932 9932 * With the type and data, upcall to get the referral
9933 9933 */
9934 9934 bufsize = sizeof (buf);
9935 9935 bzero(buf, sizeof (buf));
9936 9936 err = reparse_kderef((const char *)stype, (const char *)sdata,
9937 9937 buf, &bufsize);
9938 9938 reparse_free(nvl);
9939 9939
9940 9940 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9941 9941 char *, stype, char *, sdata, char *, buf, int, err);
9942 9942 if (err) {
9943 9943 cmn_err(CE_NOTE,
9944 9944 "reparsed daemon not running: unable to get referral (%d)",
9945 9945 err);
9946 9946 return (NULL);
9947 9947 }
9948 9948
9949 9949 /*
9950 9950 * We get an XDR'ed record back from the kderef call
9951 9951 */
9952 9952 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9953 9953 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9954 9954 err = xdr_fs_locations4(&xdr, result);
9955 9955 XDR_DESTROY(&xdr);
9956 9956 if (err != TRUE) {
9957 9957 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9958 9958 int, err);
9959 9959 return (NULL);
9960 9960 }
9961 9961
9962 9962 /*
9963 9963 * Look at path to recover fs_root, ignoring the leading '/'
9964 9964 */
9965 9965 (void) make_pathname4(vp->v_path, &result->fs_root);
9966 9966
9967 9967 return (result);
9968 9968 }
9969 9969
9970 9970 char *
9971 9971 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9972 9972 {
9973 9973 fs_locations4 *fsl;
9974 9974 fs_location4 *fs;
9975 9975 char *server, *path, *symbuf;
9976 9976 static char *prefix = "/net/";
9977 9977 int i, size, npaths;
9978 9978 uint_t len;
9979 9979
9980 9980 /* Get the referral */
9981 9981 if ((fsl = fetch_referral(vp, cr)) == NULL)
9982 9982 return (NULL);
9983 9983
9984 9984 /* Deal with only the first location and first server */
9985 9985 fs = &fsl->locations_val[0];
9986 9986 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9987 9987 if (server == NULL) {
9988 9988 rfs4_free_fs_locations4(fsl);
9989 9989 kmem_free(fsl, sizeof (fs_locations4));
9990 9990 return (NULL);
9991 9991 }
9992 9992
9993 9993 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9994 9994 size = strlen(prefix) + len;
9995 9995 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9996 9996 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9997 9997
9998 9998 /* Allocate the symlink buffer and fill it */
9999 9999 symbuf = kmem_zalloc(size, KM_SLEEP);
10000 10000 (void) strcat(symbuf, prefix);
10001 10001 (void) strcat(symbuf, server);
10002 10002 kmem_free(server, len);
10003 10003
10004 10004 npaths = 0;
10005 10005 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
10006 10006 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
10007 10007 if (path == NULL)
10008 10008 continue;
10009 10009 (void) strcat(symbuf, "/");
10010 10010 (void) strcat(symbuf, path);
10011 10011 npaths++;
10012 10012 kmem_free(path, len);
10013 10013 }
10014 10014
10015 10015 rfs4_free_fs_locations4(fsl);
10016 10016 kmem_free(fsl, sizeof (fs_locations4));
10017 10017
10018 10018 if (strsz != NULL)
10019 10019 *strsz = size;
10020 10020 return (symbuf);
10021 10021 }
10022 10022
10023 10023 /*
10024 10024 * Check to see if we have a downrev Solaris client, so that we
10025 10025 * can send it a symlink instead of a referral.
10026 10026 */
10027 10027 int
10028 10028 client_is_downrev(struct svc_req *req)
10029 10029 {
10030 10030 struct sockaddr *ca;
10031 10031 rfs4_clntip_t *ci;
10032 10032 bool_t create = FALSE;
10033 10033 int is_downrev;
10034 10034
10035 10035 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
10036 10036 ASSERT(ca);
10037 10037 ci = rfs4_find_clntip(ca, &create);
10038 10038 if (ci == NULL)
10039 10039 return (0);
10040 10040 is_downrev = ci->ri_no_referrals;
10041 10041 rfs4_dbe_rele(ci->ri_dbe);
10042 10042 return (is_downrev);
10043 10043 }
↓ open down ↓ |
9499 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX