Print this page
*** NO COMMENTS ***
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ new/usr/src/uts/common/fs/nfs/nfs_srv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /*
26 26 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
27 27 * All rights reserved.
28 28 */
29 29
30 30 #include <sys/param.h>
31 31 #include <sys/types.h>
32 32 #include <sys/systm.h>
33 33 #include <sys/cred.h>
34 34 #include <sys/buf.h>
35 35 #include <sys/vfs.h>
36 36 #include <sys/vnode.h>
37 37 #include <sys/uio.h>
38 38 #include <sys/stat.h>
39 39 #include <sys/errno.h>
40 40 #include <sys/sysmacros.h>
41 41 #include <sys/statvfs.h>
42 42 #include <sys/kmem.h>
43 43 #include <sys/kstat.h>
44 44 #include <sys/dirent.h>
45 45 #include <sys/cmn_err.h>
46 46 #include <sys/debug.h>
47 47 #include <sys/vtrace.h>
48 48 #include <sys/mode.h>
49 49 #include <sys/acl.h>
50 50 #include <sys/nbmlock.h>
51 51 #include <sys/policy.h>
52 52 #include <sys/sdt.h>
53 53
54 54 #include <rpc/types.h>
55 55 #include <rpc/auth.h>
56 56 #include <rpc/svc.h>
57 57
58 58 #include <nfs/nfs.h>
59 59 #include <nfs/export.h>
60 60 #include <nfs/nfs_cmd.h>
61 61
62 62 #include <vm/hat.h>
63 63 #include <vm/as.h>
64 64 #include <vm/seg.h>
65 65 #include <vm/seg_map.h>
66 66 #include <vm/seg_kmem.h>
67 67
68 68 #include <sys/strsubr.h>
69 69
70 70 /*
71 71 * These are the interface routines for the server side of the
72 72 * Network File System. See the NFS version 2 protocol specification
73 73 * for a description of this interface.
74 74 */
75 75
76 76 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
77 77 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
78 78 cred_t *);
79 79
80 80 /*
81 81 * Some "over the wire" UNIX file types. These are encoded
82 82 * into the mode. This needs to be fixed in the next rev.
83 83 */
84 84 #define IFMT 0170000 /* type of file */
85 85 #define IFCHR 0020000 /* character special */
86 86 #define IFBLK 0060000 /* block special */
87 87 #define IFSOCK 0140000 /* socket */
88 88
89 89 u_longlong_t nfs2_srv_caller_id;
90 90
91 91 /*
92 92 * Get file attributes.
93 93 * Returns the current attributes of the file with the given fhandle.
94 94 */
95 95 /* ARGSUSED */
96 96 void
97 97 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
98 98 struct svc_req *req, cred_t *cr)
99 99 {
100 100 int error;
101 101 vnode_t *vp;
102 102 struct vattr va;
103 103
104 104 vp = nfs_fhtovp(fhp, exi);
105 105 if (vp == NULL) {
106 106 ns->ns_status = NFSERR_STALE;
107 107 return;
108 108 }
109 109
110 110 /*
111 111 * Do the getattr.
112 112 */
113 113 va.va_mask = AT_ALL; /* we want all the attributes */
114 114
115 115 error = rfs4_delegated_getattr(vp, &va, 0, cr);
116 116
117 117 /* check for overflows */
118 118 if (!error) {
119 119 /* Lie about the object type for a referral */
120 120 if (vn_is_nfs_reparse(vp, cr))
121 121 va.va_type = VLNK;
122 122
123 123 acl_perm(vp, exi, &va, cr);
124 124 error = vattr_to_nattr(&va, &ns->ns_attr);
125 125 }
126 126
127 127 VN_RELE(vp);
128 128
129 129 ns->ns_status = puterrno(error);
130 130 }
131 131 void *
132 132 rfs_getattr_getfh(fhandle_t *fhp)
133 133 {
134 134 return (fhp);
135 135 }
136 136
137 137 /*
138 138 * Set file attributes.
139 139 * Sets the attributes of the file with the given fhandle. Returns
140 140 * the new attributes.
141 141 */
142 142 void
143 143 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
144 144 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
145 145 {
146 146 int error;
147 147 int flag;
148 148 int in_crit = 0;
149 149 vnode_t *vp;
150 150 struct vattr va;
151 151 struct vattr bva;
152 152 struct flock64 bf;
153 153 caller_context_t ct;
154 154
155 155
156 156 vp = nfs_fhtovp(&args->saa_fh, exi);
157 157 if (vp == NULL) {
158 158 ns->ns_status = NFSERR_STALE;
159 159 return;
160 160 }
161 161
162 162 if (rdonly(exi, req) || vn_is_readonly(vp)) {
163 163 VN_RELE(vp);
164 164 ns->ns_status = NFSERR_ROFS;
165 165 return;
166 166 }
167 167
168 168 error = sattr_to_vattr(&args->saa_sa, &va);
169 169 if (error) {
170 170 VN_RELE(vp);
171 171 ns->ns_status = puterrno(error);
172 172 return;
173 173 }
174 174
175 175 /*
176 176 * If the client is requesting a change to the mtime,
177 177 * but the nanosecond field is set to 1 billion, then
178 178 * this is a flag to the server that it should set the
179 179 * atime and mtime fields to the server's current time.
180 180 * The 1 billion number actually came from the client
181 181 * as 1 million, but the units in the over the wire
182 182 * request are microseconds instead of nanoseconds.
183 183 *
184 184 * This is an overload of the protocol and should be
185 185 * documented in the NFS Version 2 protocol specification.
186 186 */
187 187 if (va.va_mask & AT_MTIME) {
188 188 if (va.va_mtime.tv_nsec == 1000000000) {
189 189 gethrestime(&va.va_mtime);
190 190 va.va_atime = va.va_mtime;
191 191 va.va_mask |= AT_ATIME;
192 192 flag = 0;
193 193 } else
194 194 flag = ATTR_UTIME;
195 195 } else
196 196 flag = 0;
197 197
198 198 /*
199 199 * If the filesystem is exported with nosuid, then mask off
200 200 * the setuid and setgid bits.
201 201 */
202 202 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
203 203 (exi->exi_export.ex_flags & EX_NOSUID))
204 204 va.va_mode &= ~(VSUID | VSGID);
205 205
206 206 ct.cc_sysid = 0;
207 207 ct.cc_pid = 0;
208 208 ct.cc_caller_id = nfs2_srv_caller_id;
209 209 ct.cc_flags = CC_DONTBLOCK;
210 210
211 211 /*
212 212 * We need to specially handle size changes because it is
213 213 * possible for the client to create a file with modes
214 214 * which indicate read-only, but with the file opened for
215 215 * writing. If the client then tries to set the size of
216 216 * the file, then the normal access checking done in
217 217 * VOP_SETATTR would prevent the client from doing so,
218 218 * although it should be legal for it to do so. To get
219 219 * around this, we do the access checking for ourselves
220 220 * and then use VOP_SPACE which doesn't do the access
221 221 * checking which VOP_SETATTR does. VOP_SPACE can only
222 222 * operate on VREG files, let VOP_SETATTR handle the other
223 223 * extremely rare cases.
224 224 * Also the client should not be allowed to change the
225 225 * size of the file if there is a conflicting non-blocking
226 226 * mandatory lock in the region of change.
227 227 */
228 228 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
229 229 if (nbl_need_check(vp)) {
230 230 nbl_start_crit(vp, RW_READER);
231 231 in_crit = 1;
232 232 }
233 233
234 234 bva.va_mask = AT_UID | AT_SIZE;
235 235
236 236 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
237 237
238 238 if (error) {
239 239 if (in_crit)
240 240 nbl_end_crit(vp);
241 241 VN_RELE(vp);
242 242 ns->ns_status = puterrno(error);
243 243 return;
244 244 }
245 245
246 246 if (in_crit) {
247 247 u_offset_t offset;
248 248 ssize_t length;
249 249
250 250 if (va.va_size < bva.va_size) {
251 251 offset = va.va_size;
252 252 length = bva.va_size - va.va_size;
253 253 } else {
254 254 offset = bva.va_size;
255 255 length = va.va_size - bva.va_size;
256 256 }
257 257 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
258 258 NULL)) {
259 259 error = EACCES;
260 260 }
261 261 }
262 262
263 263 if (crgetuid(cr) == bva.va_uid && !error &&
264 264 va.va_size != bva.va_size) {
265 265 va.va_mask &= ~AT_SIZE;
266 266 bf.l_type = F_WRLCK;
267 267 bf.l_whence = 0;
268 268 bf.l_start = (off64_t)va.va_size;
269 269 bf.l_len = 0;
270 270 bf.l_sysid = 0;
271 271 bf.l_pid = 0;
272 272
273 273 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
274 274 (offset_t)va.va_size, cr, &ct);
275 275 }
276 276 if (in_crit)
277 277 nbl_end_crit(vp);
278 278 } else
279 279 error = 0;
280 280
281 281 /*
282 282 * Do the setattr.
283 283 */
284 284 if (!error && va.va_mask) {
285 285 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
286 286 }
287 287
288 288 /*
289 289 * check if the monitor on either vop_space or vop_setattr detected
290 290 * a delegation conflict and if so, mark the thread flag as
291 291 * wouldblock so that the response is dropped and the client will
292 292 * try again.
293 293 */
294 294 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
295 295 VN_RELE(vp);
296 296 curthread->t_flag |= T_WOULDBLOCK;
297 297 return;
298 298 }
299 299
300 300 if (!error) {
301 301 va.va_mask = AT_ALL; /* get everything */
302 302
303 303 error = rfs4_delegated_getattr(vp, &va, 0, cr);
304 304
305 305 /* check for overflows */
306 306 if (!error) {
307 307 acl_perm(vp, exi, &va, cr);
308 308 error = vattr_to_nattr(&va, &ns->ns_attr);
309 309 }
310 310 }
311 311
312 312 ct.cc_flags = 0;
313 313
314 314 /*
315 315 * Force modified metadata out to stable storage.
316 316 */
317 317 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
318 318
↓ open down ↓ |
318 lines elided |
↑ open up ↑ |
319 319 VN_RELE(vp);
320 320
321 321 ns->ns_status = puterrno(error);
322 322 }
323 323 void *
324 324 rfs_setattr_getfh(struct nfssaargs *args)
325 325 {
326 326 return (&args->saa_fh);
327 327 }
328 328
329 +/* Change and release @exip and @vpp only in success */
330 +int
331 +rfs_cross_mnt(vnode_t **vpp, struct exportinfo **exip)
332 +{
333 + struct exportinfo *exi;
334 + vnode_t *vp;
335 + fid_t fid;
336 + int error;
337 +
338 + vp = *vpp;
339 +
340 + /* traverse() releases argument in success */
341 + VN_HOLD(*vpp);
342 +
343 + if ((error = traverse(&vp)) != 0) {
344 + VN_RELE(*vpp);
345 + return (error);
346 + }
347 +
348 + bzero(&fid, sizeof (fid));
349 + fid.fid_len = MAXFIDSZ;
350 + error = VOP_FID(vp, &fid, NULL);
351 + if (error) {
352 + VN_RELE(vp);
353 + return (error);
354 + }
355 +
356 + exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid);
357 + if (exi == NULL ||
358 + (exi->exi_export.ex_flags & EX_NOHIDE) == 0) {
359 + /* It is not error, just subdir is not exported
360 + * or "nohide" is not set
361 + */
362 + VN_RELE(vp);
363 + } else {
364 + /* go to submount */
365 + exi_rele(*exip);
366 + *exip = exi;
367 +
368 + VN_RELE(*vpp);
369 + *vpp = vp;
370 + }
371 + return (0);
372 +}
373 +
329 374 /*
330 375 * Directory lookup.
331 376 * Returns an fhandle and file attributes for file name in a directory.
332 377 */
333 378 /* ARGSUSED */
334 379 void
335 380 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
336 381 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
337 382 {
338 383 int error;
339 384 vnode_t *dvp;
340 385 vnode_t *vp;
341 386 struct vattr va;
342 387 fhandle_t *fhp = da->da_fhandle;
343 388 struct sec_ol sec = {0, 0};
344 389 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
345 390 char *name;
346 391 struct sockaddr *ca;
347 392
348 393 /*
349 394 * Trusted Extension doesn't support NFSv2. MOUNT
350 395 * will reject v2 clients. Need to prevent v2 client
351 396 * access via WebNFS here.
352 397 */
353 398 if (is_system_labeled() && req->rq_vers == 2) {
354 399 dr->dr_status = NFSERR_ACCES;
355 400 return;
356 401 }
357 402
358 403 /*
359 404 * Disallow NULL paths
360 405 */
361 406 if (da->da_name == NULL || *da->da_name == '\0') {
362 407 dr->dr_status = NFSERR_ACCES;
363 408 return;
364 409 }
365 410
366 411 /*
367 412 * Allow lookups from the root - the default
368 413 * location of the public filehandle.
369 414 */
370 415 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
371 416 dvp = rootdir;
372 417 VN_HOLD(dvp);
373 418 } else {
374 419 dvp = nfs_fhtovp(fhp, exi);
375 420 if (dvp == NULL) {
376 421 dr->dr_status = NFSERR_STALE;
377 422 return;
378 423 }
379 424 }
380 425
381 426 /*
382 427 * Not allow lookup beyond root.
383 428 * If the filehandle matches a filehandle of the exi,
384 429 * then the ".." refers beyond the root of an exported filesystem.
385 430 */
386 431 if (strcmp(da->da_name, "..") == 0 &&
387 432 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
388 433 VN_RELE(dvp);
389 434 dr->dr_status = NFSERR_NOENT;
390 435 return;
391 436 }
↓ open down ↓ |
53 lines elided |
↑ open up ↑ |
392 437
393 438 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
394 439 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
395 440 MAXPATHLEN);
396 441
397 442 if (name == NULL) {
398 443 dr->dr_status = NFSERR_ACCES;
399 444 return;
400 445 }
401 446
447 + exi_hold(exi);
448 +
402 449 /*
403 450 * If the public filehandle is used then allow
404 451 * a multi-component lookup, i.e. evaluate
405 452 * a pathname and follow symbolic links if
406 453 * necessary.
407 454 *
408 455 * This may result in a vnode in another filesystem
409 456 * which is OK as long as the filesystem is exported.
410 457 */
411 458 if (PUBLIC_FH2(fhp)) {
459 + struct exportinfo *new;
460 +
412 461 publicfh_flag = TRUE;
413 - error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
462 + error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &new,
414 463 &sec);
464 +
465 + if (error == 0) {
466 + exi_rele(exi);
467 + exi = new;
468 + }
415 469 } else {
416 470 /*
417 471 * Do a normal single component lookup.
418 472 */
419 473 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
420 474 NULL, NULL, NULL);
421 475 }
422 476
423 477 if (name != da->da_name)
424 478 kmem_free(name, MAXPATHLEN);
425 479
480 + if (error == 0 && vn_ismntpt(vp)) {
481 + error = rfs_cross_mnt(&vp, &exi);
482 + if (error)
483 + VN_RELE(vp);
484 + }
426 485
427 486 if (!error) {
428 487 va.va_mask = AT_ALL; /* we want everything */
429 488
430 489 error = rfs4_delegated_getattr(vp, &va, 0, cr);
431 490
432 491 /* check for overflows */
433 492 if (!error) {
434 493 acl_perm(vp, exi, &va, cr);
435 494 error = vattr_to_nattr(&va, &dr->dr_attr);
436 495 if (!error) {
437 496 if (sec.sec_flags & SEC_QUERY)
438 497 error = makefh_ol(&dr->dr_fhandle, exi,
439 498 sec.sec_index);
440 499 else {
441 500 error = makefh(&dr->dr_fhandle, vp,
442 501 exi);
443 502 if (!error && publicfh_flag &&
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
444 503 !chk_clnt_sec(exi, req))
445 504 auth_weak = TRUE;
446 505 }
447 506 }
448 507 }
449 508 VN_RELE(vp);
450 509 }
451 510
452 511 VN_RELE(dvp);
453 512
454 - /*
455 - * If publicfh_flag is true then we have called rfs_publicfh_mclookup
456 - * and have obtained a new exportinfo in exi which needs to be
457 - * released. Note the the original exportinfo pointed to by exi
458 - * will be released by the caller, comon_dispatch.
513 + /* The passed argument exportinfo is released by the
514 + * caller, comon_dispatch
459 515 */
460 - if (publicfh_flag && exi != NULL)
461 - exi_rele(exi);
516 + exi_rele(exi);
462 517
463 518 /*
464 519 * If it's public fh, no 0x81, and client's flavor is
465 520 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
466 521 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
467 522 */
468 523 if (auth_weak)
469 524 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
470 525 else
471 526 dr->dr_status = puterrno(error);
472 527 }
473 528 void *
474 529 rfs_lookup_getfh(struct nfsdiropargs *da)
475 530 {
476 531 return (da->da_fhandle);
477 532 }
478 533
479 534 /*
480 535 * Read symbolic link.
481 536 * Returns the string in the symbolic link at the given fhandle.
482 537 */
483 538 /* ARGSUSED */
484 539 void
485 540 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
486 541 struct svc_req *req, cred_t *cr)
487 542 {
488 543 int error;
489 544 struct iovec iov;
490 545 struct uio uio;
491 546 vnode_t *vp;
492 547 struct vattr va;
493 548 struct sockaddr *ca;
494 549 char *name = NULL;
495 550 int is_referral = 0;
496 551
497 552 vp = nfs_fhtovp(fhp, exi);
498 553 if (vp == NULL) {
499 554 rl->rl_data = NULL;
500 555 rl->rl_status = NFSERR_STALE;
501 556 return;
502 557 }
503 558
504 559 va.va_mask = AT_MODE;
505 560
506 561 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
507 562
508 563 if (error) {
509 564 VN_RELE(vp);
510 565 rl->rl_data = NULL;
511 566 rl->rl_status = puterrno(error);
512 567 return;
513 568 }
514 569
515 570 if (MANDLOCK(vp, va.va_mode)) {
516 571 VN_RELE(vp);
517 572 rl->rl_data = NULL;
518 573 rl->rl_status = NFSERR_ACCES;
519 574 return;
520 575 }
521 576
522 577 /* We lied about the object type for a referral */
523 578 if (vn_is_nfs_reparse(vp, cr))
524 579 is_referral = 1;
525 580
526 581 /*
527 582 * XNFS and RFC1094 require us to return ENXIO if argument
528 583 * is not a link. BUGID 1138002.
529 584 */
530 585 if (vp->v_type != VLNK && !is_referral) {
531 586 VN_RELE(vp);
532 587 rl->rl_data = NULL;
533 588 rl->rl_status = NFSERR_NXIO;
534 589 return;
535 590 }
536 591
537 592 /*
538 593 * Allocate data for pathname. This will be freed by rfs_rlfree.
539 594 */
540 595 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
541 596
542 597 if (is_referral) {
543 598 char *s;
544 599 size_t strsz;
545 600
546 601 /* Get an artificial symlink based on a referral */
547 602 s = build_symlink(vp, cr, &strsz);
548 603 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
549 604 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
550 605 vnode_t *, vp, char *, s);
551 606 if (s == NULL)
552 607 error = EINVAL;
553 608 else {
554 609 error = 0;
555 610 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
556 611 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
557 612 kmem_free(s, strsz);
558 613 }
559 614
560 615 } else {
561 616
562 617 /*
563 618 * Set up io vector to read sym link data
564 619 */
565 620 iov.iov_base = rl->rl_data;
566 621 iov.iov_len = NFS_MAXPATHLEN;
567 622 uio.uio_iov = &iov;
568 623 uio.uio_iovcnt = 1;
569 624 uio.uio_segflg = UIO_SYSSPACE;
570 625 uio.uio_extflg = UIO_COPY_CACHED;
571 626 uio.uio_loffset = (offset_t)0;
572 627 uio.uio_resid = NFS_MAXPATHLEN;
573 628
574 629 /*
575 630 * Do the readlink.
576 631 */
577 632 error = VOP_READLINK(vp, &uio, cr, NULL);
578 633
579 634 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
580 635
581 636 if (!error)
582 637 rl->rl_data[rl->rl_count] = '\0';
583 638
584 639 }
585 640
586 641
587 642 VN_RELE(vp);
588 643
589 644 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
590 645 name = nfscmd_convname(ca, exi, rl->rl_data,
591 646 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
592 647
593 648 if (name != NULL && name != rl->rl_data) {
594 649 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
595 650 rl->rl_data = name;
596 651 }
597 652
598 653 /*
599 654 * XNFS and RFC1094 require us to return ENXIO if argument
600 655 * is not a link. UFS returns EINVAL if this is the case,
601 656 * so we do the mapping here. BUGID 1138002.
602 657 */
603 658 if (error == EINVAL)
604 659 rl->rl_status = NFSERR_NXIO;
605 660 else
606 661 rl->rl_status = puterrno(error);
607 662
608 663 }
609 664 void *
610 665 rfs_readlink_getfh(fhandle_t *fhp)
611 666 {
612 667 return (fhp);
613 668 }
614 669 /*
615 670 * Free data allocated by rfs_readlink
616 671 */
617 672 void
618 673 rfs_rlfree(struct nfsrdlnres *rl)
619 674 {
620 675 if (rl->rl_data != NULL)
621 676 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
622 677 }
623 678
624 679 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
625 680
626 681 /*
627 682 * Read data.
628 683 * Returns some data read from the file at the given fhandle.
629 684 */
630 685 /* ARGSUSED */
631 686 void
632 687 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
633 688 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
634 689 {
635 690 vnode_t *vp;
636 691 int error;
637 692 struct vattr va;
638 693 struct iovec iov;
639 694 struct uio uio;
640 695 mblk_t *mp;
641 696 int alloc_err = 0;
642 697 int in_crit = 0;
643 698 caller_context_t ct;
644 699
645 700 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
646 701 if (vp == NULL) {
647 702 rr->rr_data = NULL;
648 703 rr->rr_status = NFSERR_STALE;
649 704 return;
650 705 }
651 706
652 707 if (vp->v_type != VREG) {
653 708 VN_RELE(vp);
654 709 rr->rr_data = NULL;
655 710 rr->rr_status = NFSERR_ISDIR;
656 711 return;
657 712 }
658 713
659 714 ct.cc_sysid = 0;
660 715 ct.cc_pid = 0;
661 716 ct.cc_caller_id = nfs2_srv_caller_id;
662 717 ct.cc_flags = CC_DONTBLOCK;
663 718
664 719 /*
665 720 * Enter the critical region before calling VOP_RWLOCK
666 721 * to avoid a deadlock with write requests.
667 722 */
668 723 if (nbl_need_check(vp)) {
669 724 nbl_start_crit(vp, RW_READER);
670 725 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
671 726 0, NULL)) {
672 727 nbl_end_crit(vp);
673 728 VN_RELE(vp);
674 729 rr->rr_data = NULL;
675 730 rr->rr_status = NFSERR_ACCES;
676 731 return;
677 732 }
678 733 in_crit = 1;
679 734 }
680 735
681 736 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
682 737
683 738 /* check if a monitor detected a delegation conflict */
684 739 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
685 740 VN_RELE(vp);
686 741 /* mark as wouldblock so response is dropped */
687 742 curthread->t_flag |= T_WOULDBLOCK;
688 743
689 744 rr->rr_data = NULL;
690 745 return;
691 746 }
692 747
693 748 va.va_mask = AT_ALL;
694 749
695 750 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
696 751
697 752 if (error) {
698 753 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
699 754 if (in_crit)
700 755 nbl_end_crit(vp);
701 756
702 757 VN_RELE(vp);
703 758 rr->rr_data = NULL;
704 759 rr->rr_status = puterrno(error);
705 760
706 761 return;
707 762 }
708 763
709 764 /*
710 765 * This is a kludge to allow reading of files created
711 766 * with no read permission. The owner of the file
712 767 * is always allowed to read it.
713 768 */
714 769 if (crgetuid(cr) != va.va_uid) {
715 770 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
716 771
717 772 if (error) {
718 773 /*
719 774 * Exec is the same as read over the net because
720 775 * of demand loading.
721 776 */
722 777 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
723 778 }
724 779 if (error) {
725 780 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
726 781 if (in_crit)
727 782 nbl_end_crit(vp);
728 783 VN_RELE(vp);
729 784 rr->rr_data = NULL;
730 785 rr->rr_status = puterrno(error);
731 786
732 787 return;
733 788 }
734 789 }
735 790
736 791 if (MANDLOCK(vp, va.va_mode)) {
737 792 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
738 793 if (in_crit)
739 794 nbl_end_crit(vp);
740 795
741 796 VN_RELE(vp);
742 797 rr->rr_data = NULL;
743 798 rr->rr_status = NFSERR_ACCES;
744 799
745 800 return;
746 801 }
747 802
748 803 rr->rr_ok.rrok_wlist_len = 0;
749 804 rr->rr_ok.rrok_wlist = NULL;
750 805
751 806 if ((u_offset_t)ra->ra_offset >= va.va_size) {
752 807 rr->rr_count = 0;
753 808 rr->rr_data = NULL;
754 809 /*
755 810 * In this case, status is NFS_OK, but there is no data
756 811 * to encode. So set rr_mp to NULL.
757 812 */
758 813 rr->rr_mp = NULL;
759 814 rr->rr_ok.rrok_wlist = ra->ra_wlist;
760 815 if (rr->rr_ok.rrok_wlist)
761 816 clist_zero_len(rr->rr_ok.rrok_wlist);
762 817 goto done;
763 818 }
764 819
765 820 if (ra->ra_wlist) {
766 821 mp = NULL;
767 822 rr->rr_mp = NULL;
768 823 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
769 824 if (ra->ra_count > iov.iov_len) {
770 825 rr->rr_data = NULL;
771 826 rr->rr_status = NFSERR_INVAL;
772 827 goto done;
773 828 }
774 829 } else {
775 830 /*
776 831 * mp will contain the data to be sent out in the read reply.
777 832 * This will be freed after the reply has been sent out (by the
778 833 * driver).
779 834 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
780 835 * that the call to xdrmblk_putmblk() never fails.
781 836 */
782 837 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
783 838 &alloc_err);
784 839 ASSERT(mp != NULL);
785 840 ASSERT(alloc_err == 0);
786 841
787 842 rr->rr_mp = mp;
788 843
789 844 /*
790 845 * Set up io vector
791 846 */
792 847 iov.iov_base = (caddr_t)mp->b_datap->db_base;
793 848 iov.iov_len = ra->ra_count;
794 849 }
795 850
796 851 uio.uio_iov = &iov;
797 852 uio.uio_iovcnt = 1;
798 853 uio.uio_segflg = UIO_SYSSPACE;
799 854 uio.uio_extflg = UIO_COPY_CACHED;
800 855 uio.uio_loffset = (offset_t)ra->ra_offset;
801 856 uio.uio_resid = ra->ra_count;
802 857
803 858 error = VOP_READ(vp, &uio, 0, cr, &ct);
804 859
805 860 if (error) {
806 861 if (mp)
807 862 freeb(mp);
808 863
809 864 /*
810 865 * check if a monitor detected a delegation conflict and
811 866 * mark as wouldblock so response is dropped
812 867 */
813 868 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
814 869 curthread->t_flag |= T_WOULDBLOCK;
815 870 else
816 871 rr->rr_status = puterrno(error);
817 872
818 873 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
819 874 if (in_crit)
820 875 nbl_end_crit(vp);
821 876
822 877 VN_RELE(vp);
823 878 rr->rr_data = NULL;
824 879
825 880 return;
826 881 }
827 882
828 883 /*
829 884 * Get attributes again so we can send the latest access
830 885 * time to the client side for his cache.
831 886 */
832 887 va.va_mask = AT_ALL;
833 888
834 889 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
835 890
836 891 if (error) {
837 892 if (mp)
838 893 freeb(mp);
839 894
840 895 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
841 896 if (in_crit)
842 897 nbl_end_crit(vp);
843 898
844 899 VN_RELE(vp);
845 900 rr->rr_data = NULL;
846 901 rr->rr_status = puterrno(error);
847 902
848 903 return;
849 904 }
850 905
851 906 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
852 907
853 908 if (mp) {
854 909 rr->rr_data = (char *)mp->b_datap->db_base;
855 910 } else {
856 911 if (ra->ra_wlist) {
857 912 rr->rr_data = (caddr_t)iov.iov_base;
858 913 if (!rdma_setup_read_data2(ra, rr)) {
859 914 rr->rr_data = NULL;
860 915 rr->rr_status = puterrno(NFSERR_INVAL);
861 916 }
862 917 }
863 918 }
864 919 done:
865 920 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
866 921 if (in_crit)
867 922 nbl_end_crit(vp);
868 923
869 924 acl_perm(vp, exi, &va, cr);
870 925
871 926 /* check for overflows */
872 927 error = vattr_to_nattr(&va, &rr->rr_attr);
873 928
874 929 VN_RELE(vp);
875 930
876 931 rr->rr_status = puterrno(error);
877 932 }
878 933
879 934 /*
880 935 * Free data allocated by rfs_read
881 936 */
882 937 void
883 938 rfs_rdfree(struct nfsrdresult *rr)
884 939 {
885 940 mblk_t *mp;
886 941
887 942 if (rr->rr_status == NFS_OK) {
888 943 mp = rr->rr_mp;
889 944 if (mp != NULL)
890 945 freeb(mp);
891 946 }
892 947 }
893 948
894 949 void *
895 950 rfs_read_getfh(struct nfsreadargs *ra)
896 951 {
897 952 return (&ra->ra_fhandle);
898 953 }
899 954
900 955 #define MAX_IOVECS 12
901 956
902 957 #ifdef DEBUG
903 958 static int rfs_write_sync_hits = 0;
904 959 static int rfs_write_sync_misses = 0;
905 960 #endif
906 961
907 962 /*
908 963 * Write data to file.
909 964 * Returns attributes of a file after writing some data to it.
910 965 *
911 966 * Any changes made here, especially in error handling might have
912 967 * to also be done in rfs_write (which clusters write requests).
913 968 */
914 969 void
915 970 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
916 971 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
917 972 {
918 973 int error;
919 974 vnode_t *vp;
920 975 rlim64_t rlimit;
921 976 struct vattr va;
922 977 struct uio uio;
923 978 struct iovec iov[MAX_IOVECS];
924 979 mblk_t *m;
925 980 struct iovec *iovp;
926 981 int iovcnt;
927 982 cred_t *savecred;
928 983 int in_crit = 0;
929 984 caller_context_t ct;
930 985
931 986 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
932 987 if (vp == NULL) {
933 988 ns->ns_status = NFSERR_STALE;
934 989 return;
935 990 }
936 991
937 992 if (rdonly(exi, req)) {
938 993 VN_RELE(vp);
939 994 ns->ns_status = NFSERR_ROFS;
940 995 return;
941 996 }
942 997
943 998 if (vp->v_type != VREG) {
944 999 VN_RELE(vp);
945 1000 ns->ns_status = NFSERR_ISDIR;
946 1001 return;
947 1002 }
948 1003
949 1004 ct.cc_sysid = 0;
950 1005 ct.cc_pid = 0;
951 1006 ct.cc_caller_id = nfs2_srv_caller_id;
952 1007 ct.cc_flags = CC_DONTBLOCK;
953 1008
954 1009 va.va_mask = AT_UID|AT_MODE;
955 1010
956 1011 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
957 1012
958 1013 if (error) {
959 1014 VN_RELE(vp);
960 1015 ns->ns_status = puterrno(error);
961 1016
962 1017 return;
963 1018 }
964 1019
965 1020 if (crgetuid(cr) != va.va_uid) {
966 1021 /*
967 1022 * This is a kludge to allow writes of files created
968 1023 * with read only permission. The owner of the file
969 1024 * is always allowed to write it.
970 1025 */
971 1026 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
972 1027
973 1028 if (error) {
974 1029 VN_RELE(vp);
975 1030 ns->ns_status = puterrno(error);
976 1031 return;
977 1032 }
978 1033 }
979 1034
980 1035 /*
981 1036 * Can't access a mandatory lock file. This might cause
982 1037 * the NFS service thread to block forever waiting for a
983 1038 * lock to be released that will never be released.
984 1039 */
985 1040 if (MANDLOCK(vp, va.va_mode)) {
986 1041 VN_RELE(vp);
987 1042 ns->ns_status = NFSERR_ACCES;
988 1043 return;
989 1044 }
990 1045
991 1046 /*
992 1047 * We have to enter the critical region before calling VOP_RWLOCK
993 1048 * to avoid a deadlock with ufs.
994 1049 */
995 1050 if (nbl_need_check(vp)) {
996 1051 nbl_start_crit(vp, RW_READER);
997 1052 in_crit = 1;
998 1053 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
999 1054 wa->wa_count, 0, NULL)) {
1000 1055 error = EACCES;
1001 1056 goto out;
1002 1057 }
1003 1058 }
1004 1059
1005 1060 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1006 1061
1007 1062 /* check if a monitor detected a delegation conflict */
1008 1063 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1009 1064 VN_RELE(vp);
1010 1065 /* mark as wouldblock so response is dropped */
1011 1066 curthread->t_flag |= T_WOULDBLOCK;
1012 1067 return;
1013 1068 }
1014 1069
1015 1070 if (wa->wa_data || wa->wa_rlist) {
1016 1071 /* Do the RDMA thing if necessary */
1017 1072 if (wa->wa_rlist) {
1018 1073 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1019 1074 iov[0].iov_len = wa->wa_count;
1020 1075 } else {
1021 1076 iov[0].iov_base = wa->wa_data;
1022 1077 iov[0].iov_len = wa->wa_count;
1023 1078 }
1024 1079 uio.uio_iov = iov;
1025 1080 uio.uio_iovcnt = 1;
1026 1081 uio.uio_segflg = UIO_SYSSPACE;
1027 1082 uio.uio_extflg = UIO_COPY_DEFAULT;
1028 1083 uio.uio_loffset = (offset_t)wa->wa_offset;
1029 1084 uio.uio_resid = wa->wa_count;
1030 1085 /*
1031 1086 * The limit is checked on the client. We
1032 1087 * should allow any size writes here.
1033 1088 */
1034 1089 uio.uio_llimit = curproc->p_fsz_ctl;
1035 1090 rlimit = uio.uio_llimit - wa->wa_offset;
1036 1091 if (rlimit < (rlim64_t)uio.uio_resid)
1037 1092 uio.uio_resid = (uint_t)rlimit;
1038 1093
1039 1094 /*
1040 1095 * for now we assume no append mode
1041 1096 */
1042 1097 /*
1043 1098 * We're changing creds because VM may fault and we need
1044 1099 * the cred of the current thread to be used if quota
1045 1100 * checking is enabled.
1046 1101 */
1047 1102 savecred = curthread->t_cred;
1048 1103 curthread->t_cred = cr;
1049 1104 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1050 1105 curthread->t_cred = savecred;
1051 1106 } else {
1052 1107 iovcnt = 0;
1053 1108 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1054 1109 iovcnt++;
1055 1110 if (iovcnt <= MAX_IOVECS) {
1056 1111 #ifdef DEBUG
1057 1112 rfs_write_sync_hits++;
1058 1113 #endif
1059 1114 iovp = iov;
1060 1115 } else {
1061 1116 #ifdef DEBUG
1062 1117 rfs_write_sync_misses++;
1063 1118 #endif
1064 1119 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1065 1120 }
1066 1121 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1067 1122 uio.uio_iov = iovp;
1068 1123 uio.uio_iovcnt = iovcnt;
1069 1124 uio.uio_segflg = UIO_SYSSPACE;
1070 1125 uio.uio_extflg = UIO_COPY_DEFAULT;
1071 1126 uio.uio_loffset = (offset_t)wa->wa_offset;
1072 1127 uio.uio_resid = wa->wa_count;
1073 1128 /*
1074 1129 * The limit is checked on the client. We
1075 1130 * should allow any size writes here.
1076 1131 */
1077 1132 uio.uio_llimit = curproc->p_fsz_ctl;
1078 1133 rlimit = uio.uio_llimit - wa->wa_offset;
1079 1134 if (rlimit < (rlim64_t)uio.uio_resid)
1080 1135 uio.uio_resid = (uint_t)rlimit;
1081 1136
1082 1137 /*
1083 1138 * For now we assume no append mode.
1084 1139 */
1085 1140 /*
1086 1141 * We're changing creds because VM may fault and we need
1087 1142 * the cred of the current thread to be used if quota
1088 1143 * checking is enabled.
1089 1144 */
1090 1145 savecred = curthread->t_cred;
1091 1146 curthread->t_cred = cr;
1092 1147 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1093 1148 curthread->t_cred = savecred;
1094 1149
1095 1150 if (iovp != iov)
1096 1151 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1097 1152 }
1098 1153
1099 1154 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1100 1155
1101 1156 if (!error) {
1102 1157 /*
1103 1158 * Get attributes again so we send the latest mod
1104 1159 * time to the client side for his cache.
1105 1160 */
1106 1161 va.va_mask = AT_ALL; /* now we want everything */
1107 1162
1108 1163 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1109 1164
1110 1165 /* check for overflows */
1111 1166 if (!error) {
1112 1167 acl_perm(vp, exi, &va, cr);
1113 1168 error = vattr_to_nattr(&va, &ns->ns_attr);
1114 1169 }
1115 1170 }
1116 1171
1117 1172 out:
1118 1173 if (in_crit)
1119 1174 nbl_end_crit(vp);
1120 1175 VN_RELE(vp);
1121 1176
1122 1177 /* check if a monitor detected a delegation conflict */
1123 1178 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1124 1179 /* mark as wouldblock so response is dropped */
1125 1180 curthread->t_flag |= T_WOULDBLOCK;
1126 1181 else
1127 1182 ns->ns_status = puterrno(error);
1128 1183
1129 1184 }
1130 1185
1131 1186 struct rfs_async_write {
1132 1187 struct nfswriteargs *wa;
1133 1188 struct nfsattrstat *ns;
1134 1189 struct svc_req *req;
1135 1190 cred_t *cr;
1136 1191 kthread_t *thread;
1137 1192 struct rfs_async_write *list;
1138 1193 };
1139 1194
1140 1195 struct rfs_async_write_list {
1141 1196 fhandle_t *fhp;
1142 1197 kcondvar_t cv;
1143 1198 struct rfs_async_write *list;
1144 1199 struct rfs_async_write_list *next;
1145 1200 };
1146 1201
1147 1202 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1148 1203 static kmutex_t rfs_async_write_lock;
1149 1204 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1150 1205
1151 1206 #define MAXCLIOVECS 42
1152 1207 #define RFSWRITE_INITVAL (enum nfsstat) -1
1153 1208
1154 1209 #ifdef DEBUG
1155 1210 static int rfs_write_hits = 0;
1156 1211 static int rfs_write_misses = 0;
1157 1212 #endif
1158 1213
1159 1214 /*
1160 1215 * Write data to file.
1161 1216 * Returns attributes of a file after writing some data to it.
1162 1217 */
1163 1218 void
1164 1219 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1165 1220 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
1166 1221 {
1167 1222 int error;
1168 1223 vnode_t *vp;
1169 1224 rlim64_t rlimit;
1170 1225 struct vattr va;
1171 1226 struct uio uio;
1172 1227 struct rfs_async_write_list *lp;
1173 1228 struct rfs_async_write_list *nlp;
1174 1229 struct rfs_async_write *rp;
1175 1230 struct rfs_async_write *nrp;
1176 1231 struct rfs_async_write *trp;
1177 1232 struct rfs_async_write *lrp;
1178 1233 int data_written;
1179 1234 int iovcnt;
1180 1235 mblk_t *m;
1181 1236 struct iovec *iovp;
1182 1237 struct iovec *niovp;
1183 1238 struct iovec iov[MAXCLIOVECS];
1184 1239 int count;
1185 1240 int rcount;
1186 1241 uint_t off;
1187 1242 uint_t len;
1188 1243 struct rfs_async_write nrpsp;
1189 1244 struct rfs_async_write_list nlpsp;
1190 1245 ushort_t t_flag;
1191 1246 cred_t *savecred;
1192 1247 int in_crit = 0;
1193 1248 caller_context_t ct;
1194 1249
1195 1250 if (!rfs_write_async) {
1196 1251 rfs_write_sync(wa, ns, exi, req, cr);
1197 1252 return;
1198 1253 }
1199 1254
1200 1255 /*
1201 1256 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1202 1257 * is considered an OK.
1203 1258 */
1204 1259 ns->ns_status = RFSWRITE_INITVAL;
1205 1260
1206 1261 nrp = &nrpsp;
1207 1262 nrp->wa = wa;
1208 1263 nrp->ns = ns;
1209 1264 nrp->req = req;
1210 1265 nrp->cr = cr;
1211 1266 nrp->thread = curthread;
1212 1267
1213 1268 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1214 1269
1215 1270 /*
1216 1271 * Look to see if there is already a cluster started
1217 1272 * for this file.
1218 1273 */
1219 1274 mutex_enter(&rfs_async_write_lock);
1220 1275 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
1221 1276 if (bcmp(&wa->wa_fhandle, lp->fhp,
1222 1277 sizeof (fhandle_t)) == 0)
1223 1278 break;
1224 1279 }
1225 1280
1226 1281 /*
1227 1282 * If lp is non-NULL, then there is already a cluster
1228 1283 * started. We need to place ourselves in the cluster
1229 1284 * list in the right place as determined by starting
1230 1285 * offset. Conflicts with non-blocking mandatory locked
1231 1286 * regions will be checked when the cluster is processed.
1232 1287 */
1233 1288 if (lp != NULL) {
1234 1289 rp = lp->list;
1235 1290 trp = NULL;
1236 1291 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1237 1292 trp = rp;
1238 1293 rp = rp->list;
1239 1294 }
1240 1295 nrp->list = rp;
1241 1296 if (trp == NULL)
1242 1297 lp->list = nrp;
1243 1298 else
1244 1299 trp->list = nrp;
1245 1300 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1246 1301 cv_wait(&lp->cv, &rfs_async_write_lock);
1247 1302 mutex_exit(&rfs_async_write_lock);
1248 1303
1249 1304 return;
1250 1305 }
1251 1306
1252 1307 /*
1253 1308 * No cluster started yet, start one and add ourselves
1254 1309 * to the list of clusters.
1255 1310 */
1256 1311 nrp->list = NULL;
1257 1312
1258 1313 nlp = &nlpsp;
1259 1314 nlp->fhp = &wa->wa_fhandle;
1260 1315 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1261 1316 nlp->list = nrp;
1262 1317 nlp->next = NULL;
1263 1318
1264 1319 if (rfs_async_write_head == NULL) {
1265 1320 rfs_async_write_head = nlp;
1266 1321 } else {
1267 1322 lp = rfs_async_write_head;
1268 1323 while (lp->next != NULL)
1269 1324 lp = lp->next;
1270 1325 lp->next = nlp;
1271 1326 }
1272 1327 mutex_exit(&rfs_async_write_lock);
1273 1328
1274 1329 /*
1275 1330 * Convert the file handle common to all of the requests
1276 1331 * in this cluster to a vnode.
1277 1332 */
1278 1333 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1279 1334 if (vp == NULL) {
1280 1335 mutex_enter(&rfs_async_write_lock);
1281 1336 if (rfs_async_write_head == nlp)
1282 1337 rfs_async_write_head = nlp->next;
1283 1338 else {
1284 1339 lp = rfs_async_write_head;
1285 1340 while (lp->next != nlp)
1286 1341 lp = lp->next;
1287 1342 lp->next = nlp->next;
1288 1343 }
1289 1344 t_flag = curthread->t_flag & T_WOULDBLOCK;
1290 1345 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1291 1346 rp->ns->ns_status = NFSERR_STALE;
1292 1347 rp->thread->t_flag |= t_flag;
1293 1348 }
1294 1349 cv_broadcast(&nlp->cv);
1295 1350 mutex_exit(&rfs_async_write_lock);
1296 1351
1297 1352 return;
1298 1353 }
1299 1354
1300 1355 /*
1301 1356 * Can only write regular files. Attempts to write any
1302 1357 * other file types fail with EISDIR.
1303 1358 */
1304 1359 if (vp->v_type != VREG) {
1305 1360 VN_RELE(vp);
1306 1361 mutex_enter(&rfs_async_write_lock);
1307 1362 if (rfs_async_write_head == nlp)
1308 1363 rfs_async_write_head = nlp->next;
1309 1364 else {
1310 1365 lp = rfs_async_write_head;
1311 1366 while (lp->next != nlp)
1312 1367 lp = lp->next;
1313 1368 lp->next = nlp->next;
1314 1369 }
1315 1370 t_flag = curthread->t_flag & T_WOULDBLOCK;
1316 1371 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1317 1372 rp->ns->ns_status = NFSERR_ISDIR;
1318 1373 rp->thread->t_flag |= t_flag;
1319 1374 }
1320 1375 cv_broadcast(&nlp->cv);
1321 1376 mutex_exit(&rfs_async_write_lock);
1322 1377
1323 1378 return;
1324 1379 }
1325 1380
1326 1381 /*
1327 1382 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1328 1383 * deadlock with ufs.
1329 1384 */
1330 1385 if (nbl_need_check(vp)) {
1331 1386 nbl_start_crit(vp, RW_READER);
1332 1387 in_crit = 1;
1333 1388 }
1334 1389
1335 1390 ct.cc_sysid = 0;
1336 1391 ct.cc_pid = 0;
1337 1392 ct.cc_caller_id = nfs2_srv_caller_id;
1338 1393 ct.cc_flags = CC_DONTBLOCK;
1339 1394
1340 1395 /*
1341 1396 * Lock the file for writing. This operation provides
1342 1397 * the delay which allows clusters to grow.
1343 1398 */
1344 1399 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1345 1400
1346 1401 /* check if a monitor detected a delegation conflict */
1347 1402 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1348 1403 if (in_crit)
1349 1404 nbl_end_crit(vp);
1350 1405 VN_RELE(vp);
1351 1406 /* mark as wouldblock so response is dropped */
1352 1407 curthread->t_flag |= T_WOULDBLOCK;
1353 1408 mutex_enter(&rfs_async_write_lock);
1354 1409 if (rfs_async_write_head == nlp)
1355 1410 rfs_async_write_head = nlp->next;
1356 1411 else {
1357 1412 lp = rfs_async_write_head;
1358 1413 while (lp->next != nlp)
1359 1414 lp = lp->next;
1360 1415 lp->next = nlp->next;
1361 1416 }
1362 1417 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1363 1418 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1364 1419 rp->ns->ns_status = puterrno(error);
1365 1420 rp->thread->t_flag |= T_WOULDBLOCK;
1366 1421 }
1367 1422 }
1368 1423 cv_broadcast(&nlp->cv);
1369 1424 mutex_exit(&rfs_async_write_lock);
1370 1425
1371 1426 return;
1372 1427 }
1373 1428
1374 1429 /*
1375 1430 * Disconnect this cluster from the list of clusters.
1376 1431 * The cluster that is being dealt with must be fixed
1377 1432 * in size after this point, so there is no reason
1378 1433 * to leave it on the list so that new requests can
1379 1434 * find it.
1380 1435 *
1381 1436 * The algorithm is that the first write request will
1382 1437 * create a cluster, convert the file handle to a
1383 1438 * vnode pointer, and then lock the file for writing.
1384 1439 * This request is not likely to be clustered with
1385 1440 * any others. However, the next request will create
1386 1441 * a new cluster and be blocked in VOP_RWLOCK while
1387 1442 * the first request is being processed. This delay
1388 1443 * will allow more requests to be clustered in this
1389 1444 * second cluster.
1390 1445 */
1391 1446 mutex_enter(&rfs_async_write_lock);
1392 1447 if (rfs_async_write_head == nlp)
1393 1448 rfs_async_write_head = nlp->next;
1394 1449 else {
1395 1450 lp = rfs_async_write_head;
1396 1451 while (lp->next != nlp)
1397 1452 lp = lp->next;
1398 1453 lp->next = nlp->next;
1399 1454 }
1400 1455 mutex_exit(&rfs_async_write_lock);
1401 1456
1402 1457 /*
1403 1458 * Step through the list of requests in this cluster.
1404 1459 * We need to check permissions to make sure that all
1405 1460 * of the requests have sufficient permission to write
1406 1461 * the file. A cluster can be composed of requests
1407 1462 * from different clients and different users on each
1408 1463 * client.
1409 1464 *
1410 1465 * As a side effect, we also calculate the size of the
1411 1466 * byte range that this cluster encompasses.
1412 1467 */
1413 1468 rp = nlp->list;
1414 1469 off = rp->wa->wa_offset;
1415 1470 len = (uint_t)0;
1416 1471 do {
1417 1472 if (rdonly(exi, rp->req)) {
1418 1473 rp->ns->ns_status = NFSERR_ROFS;
1419 1474 t_flag = curthread->t_flag & T_WOULDBLOCK;
1420 1475 rp->thread->t_flag |= t_flag;
1421 1476 continue;
1422 1477 }
1423 1478
1424 1479 va.va_mask = AT_UID|AT_MODE;
1425 1480
1426 1481 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1427 1482
1428 1483 if (!error) {
1429 1484 if (crgetuid(rp->cr) != va.va_uid) {
1430 1485 /*
1431 1486 * This is a kludge to allow writes of files
1432 1487 * created with read only permission. The
1433 1488 * owner of the file is always allowed to
1434 1489 * write it.
1435 1490 */
1436 1491 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1437 1492 }
1438 1493 if (!error && MANDLOCK(vp, va.va_mode))
1439 1494 error = EACCES;
1440 1495 }
1441 1496
1442 1497 /*
1443 1498 * Check for a conflict with a nbmand-locked region.
1444 1499 */
1445 1500 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1446 1501 rp->wa->wa_count, 0, NULL)) {
1447 1502 error = EACCES;
1448 1503 }
1449 1504
1450 1505 if (error) {
1451 1506 rp->ns->ns_status = puterrno(error);
1452 1507 t_flag = curthread->t_flag & T_WOULDBLOCK;
1453 1508 rp->thread->t_flag |= t_flag;
1454 1509 continue;
1455 1510 }
1456 1511 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1457 1512 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1458 1513 } while ((rp = rp->list) != NULL);
1459 1514
1460 1515 /*
1461 1516 * Step through the cluster attempting to gather as many
1462 1517 * requests which are contiguous as possible. These
1463 1518 * contiguous requests are handled via one call to VOP_WRITE
1464 1519 * instead of different calls to VOP_WRITE. We also keep
1465 1520 * track of the fact that any data was written.
1466 1521 */
1467 1522 rp = nlp->list;
1468 1523 data_written = 0;
1469 1524 do {
1470 1525 /*
1471 1526 * Skip any requests which are already marked as having an
1472 1527 * error.
1473 1528 */
1474 1529 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1475 1530 rp = rp->list;
1476 1531 continue;
1477 1532 }
1478 1533
1479 1534 /*
1480 1535 * Count the number of iovec's which are required
1481 1536 * to handle this set of requests. One iovec is
1482 1537 * needed for each data buffer, whether addressed
1483 1538 * by wa_data or by the b_rptr pointers in the
1484 1539 * mblk chains.
1485 1540 */
1486 1541 iovcnt = 0;
1487 1542 lrp = rp;
1488 1543 for (;;) {
1489 1544 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1490 1545 iovcnt++;
1491 1546 else {
1492 1547 m = lrp->wa->wa_mblk;
1493 1548 while (m != NULL) {
1494 1549 iovcnt++;
1495 1550 m = m->b_cont;
1496 1551 }
1497 1552 }
1498 1553 if (lrp->list == NULL ||
1499 1554 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1500 1555 lrp->wa->wa_offset + lrp->wa->wa_count !=
1501 1556 lrp->list->wa->wa_offset) {
1502 1557 lrp = lrp->list;
1503 1558 break;
1504 1559 }
1505 1560 lrp = lrp->list;
1506 1561 }
1507 1562
1508 1563 if (iovcnt <= MAXCLIOVECS) {
1509 1564 #ifdef DEBUG
1510 1565 rfs_write_hits++;
1511 1566 #endif
1512 1567 niovp = iov;
1513 1568 } else {
1514 1569 #ifdef DEBUG
1515 1570 rfs_write_misses++;
1516 1571 #endif
1517 1572 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1518 1573 }
1519 1574 /*
1520 1575 * Put together the scatter/gather iovecs.
1521 1576 */
1522 1577 iovp = niovp;
1523 1578 trp = rp;
1524 1579 count = 0;
1525 1580 do {
1526 1581 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1527 1582 if (trp->wa->wa_rlist) {
1528 1583 iovp->iov_base =
1529 1584 (char *)((trp->wa->wa_rlist)->
1530 1585 u.c_daddr3);
1531 1586 iovp->iov_len = trp->wa->wa_count;
1532 1587 } else {
1533 1588 iovp->iov_base = trp->wa->wa_data;
1534 1589 iovp->iov_len = trp->wa->wa_count;
1535 1590 }
1536 1591 iovp++;
1537 1592 } else {
1538 1593 m = trp->wa->wa_mblk;
1539 1594 rcount = trp->wa->wa_count;
1540 1595 while (m != NULL) {
1541 1596 iovp->iov_base = (caddr_t)m->b_rptr;
1542 1597 iovp->iov_len = (m->b_wptr - m->b_rptr);
1543 1598 rcount -= iovp->iov_len;
1544 1599 if (rcount < 0)
1545 1600 iovp->iov_len += rcount;
1546 1601 iovp++;
1547 1602 if (rcount <= 0)
1548 1603 break;
1549 1604 m = m->b_cont;
1550 1605 }
1551 1606 }
1552 1607 count += trp->wa->wa_count;
1553 1608 trp = trp->list;
1554 1609 } while (trp != lrp);
1555 1610
1556 1611 uio.uio_iov = niovp;
1557 1612 uio.uio_iovcnt = iovcnt;
1558 1613 uio.uio_segflg = UIO_SYSSPACE;
1559 1614 uio.uio_extflg = UIO_COPY_DEFAULT;
1560 1615 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1561 1616 uio.uio_resid = count;
1562 1617 /*
1563 1618 * The limit is checked on the client. We
1564 1619 * should allow any size writes here.
1565 1620 */
1566 1621 uio.uio_llimit = curproc->p_fsz_ctl;
1567 1622 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1568 1623 if (rlimit < (rlim64_t)uio.uio_resid)
1569 1624 uio.uio_resid = (uint_t)rlimit;
1570 1625
1571 1626 /*
1572 1627 * For now we assume no append mode.
1573 1628 */
1574 1629
1575 1630 /*
1576 1631 * We're changing creds because VM may fault
1577 1632 * and we need the cred of the current
1578 1633 * thread to be used if quota * checking is
1579 1634 * enabled.
1580 1635 */
1581 1636 savecred = curthread->t_cred;
1582 1637 curthread->t_cred = cr;
1583 1638 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1584 1639 curthread->t_cred = savecred;
1585 1640
1586 1641 /* check if a monitor detected a delegation conflict */
1587 1642 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1588 1643 /* mark as wouldblock so response is dropped */
1589 1644 curthread->t_flag |= T_WOULDBLOCK;
1590 1645
1591 1646 if (niovp != iov)
1592 1647 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1593 1648
1594 1649 if (!error) {
1595 1650 data_written = 1;
1596 1651 /*
1597 1652 * Get attributes again so we send the latest mod
1598 1653 * time to the client side for his cache.
1599 1654 */
1600 1655 va.va_mask = AT_ALL; /* now we want everything */
1601 1656
1602 1657 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1603 1658
1604 1659 if (!error)
1605 1660 acl_perm(vp, exi, &va, rp->cr);
1606 1661 }
1607 1662
1608 1663 /*
1609 1664 * Fill in the status responses for each request
1610 1665 * which was just handled. Also, copy the latest
1611 1666 * attributes in to the attribute responses if
1612 1667 * appropriate.
1613 1668 */
1614 1669 t_flag = curthread->t_flag & T_WOULDBLOCK;
1615 1670 do {
1616 1671 rp->thread->t_flag |= t_flag;
1617 1672 /* check for overflows */
1618 1673 if (!error) {
1619 1674 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1620 1675 }
1621 1676 rp->ns->ns_status = puterrno(error);
1622 1677 rp = rp->list;
1623 1678 } while (rp != lrp);
1624 1679 } while (rp != NULL);
1625 1680
1626 1681 /*
1627 1682 * If any data was written at all, then we need to flush
1628 1683 * the data and metadata to stable storage.
1629 1684 */
1630 1685 if (data_written) {
1631 1686 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1632 1687
1633 1688 if (!error) {
1634 1689 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1635 1690 }
1636 1691 }
1637 1692
1638 1693 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1639 1694
1640 1695 if (in_crit)
1641 1696 nbl_end_crit(vp);
1642 1697 VN_RELE(vp);
1643 1698
1644 1699 t_flag = curthread->t_flag & T_WOULDBLOCK;
1645 1700 mutex_enter(&rfs_async_write_lock);
1646 1701 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1647 1702 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1648 1703 rp->ns->ns_status = puterrno(error);
1649 1704 rp->thread->t_flag |= t_flag;
1650 1705 }
1651 1706 }
1652 1707 cv_broadcast(&nlp->cv);
1653 1708 mutex_exit(&rfs_async_write_lock);
1654 1709
1655 1710 }
1656 1711
1657 1712 void *
1658 1713 rfs_write_getfh(struct nfswriteargs *wa)
1659 1714 {
1660 1715 return (&wa->wa_fhandle);
1661 1716 }
1662 1717
1663 1718 /*
1664 1719 * Create a file.
1665 1720 * Creates a file with given attributes and returns those attributes
1666 1721 * and an fhandle for the new file.
1667 1722 */
1668 1723 void
1669 1724 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1670 1725 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
1671 1726 {
1672 1727 int error;
1673 1728 int lookuperr;
1674 1729 int in_crit = 0;
1675 1730 struct vattr va;
1676 1731 vnode_t *vp;
1677 1732 vnode_t *realvp;
1678 1733 vnode_t *dvp;
1679 1734 char *name = args->ca_da.da_name;
1680 1735 vnode_t *tvp = NULL;
1681 1736 int mode;
1682 1737 int lookup_ok;
1683 1738 bool_t trunc;
1684 1739 struct sockaddr *ca;
1685 1740
1686 1741 /*
1687 1742 * Disallow NULL paths
1688 1743 */
1689 1744 if (name == NULL || *name == '\0') {
1690 1745 dr->dr_status = NFSERR_ACCES;
1691 1746 return;
1692 1747 }
1693 1748
1694 1749 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1695 1750 if (dvp == NULL) {
1696 1751 dr->dr_status = NFSERR_STALE;
1697 1752 return;
1698 1753 }
1699 1754
1700 1755 error = sattr_to_vattr(args->ca_sa, &va);
1701 1756 if (error) {
1702 1757 dr->dr_status = puterrno(error);
1703 1758 return;
1704 1759 }
1705 1760
1706 1761 /*
1707 1762 * Must specify the mode.
1708 1763 */
1709 1764 if (!(va.va_mask & AT_MODE)) {
1710 1765 VN_RELE(dvp);
1711 1766 dr->dr_status = NFSERR_INVAL;
1712 1767 return;
1713 1768 }
1714 1769
1715 1770 /*
1716 1771 * This is a completely gross hack to make mknod
1717 1772 * work over the wire until we can wack the protocol
1718 1773 */
1719 1774 if ((va.va_mode & IFMT) == IFCHR) {
1720 1775 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1721 1776 va.va_type = VFIFO; /* xtra kludge for named pipe */
1722 1777 else {
1723 1778 va.va_type = VCHR;
1724 1779 /*
1725 1780 * uncompress the received dev_t
1726 1781 * if the top half is zero indicating a request
1727 1782 * from an `older style' OS.
1728 1783 */
1729 1784 if ((va.va_size & 0xffff0000) == 0)
1730 1785 va.va_rdev = nfsv2_expdev(va.va_size);
1731 1786 else
1732 1787 va.va_rdev = (dev_t)va.va_size;
1733 1788 }
1734 1789 va.va_mask &= ~AT_SIZE;
1735 1790 } else if ((va.va_mode & IFMT) == IFBLK) {
1736 1791 va.va_type = VBLK;
1737 1792 /*
1738 1793 * uncompress the received dev_t
1739 1794 * if the top half is zero indicating a request
1740 1795 * from an `older style' OS.
1741 1796 */
1742 1797 if ((va.va_size & 0xffff0000) == 0)
1743 1798 va.va_rdev = nfsv2_expdev(va.va_size);
1744 1799 else
1745 1800 va.va_rdev = (dev_t)va.va_size;
1746 1801 va.va_mask &= ~AT_SIZE;
1747 1802 } else if ((va.va_mode & IFMT) == IFSOCK) {
1748 1803 va.va_type = VSOCK;
1749 1804 } else {
1750 1805 va.va_type = VREG;
1751 1806 }
1752 1807 va.va_mode &= ~IFMT;
1753 1808 va.va_mask |= AT_TYPE;
1754 1809
1755 1810 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1756 1811 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1757 1812 MAXPATHLEN);
1758 1813 if (name == NULL) {
1759 1814 dr->dr_status = puterrno(EINVAL);
1760 1815 return;
1761 1816 }
1762 1817
1763 1818 /*
1764 1819 * Why was the choice made to use VWRITE as the mode to the
1765 1820 * call to VOP_CREATE ? This results in a bug. When a client
1766 1821 * opens a file that already exists and is RDONLY, the second
1767 1822 * open fails with an EACESS because of the mode.
1768 1823 * bug ID 1054648.
1769 1824 */
1770 1825 lookup_ok = 0;
1771 1826 mode = VWRITE;
1772 1827 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1773 1828 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1774 1829 NULL, NULL, NULL);
1775 1830 if (!error) {
1776 1831 struct vattr at;
1777 1832
1778 1833 lookup_ok = 1;
1779 1834 at.va_mask = AT_MODE;
1780 1835 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1781 1836 if (!error)
1782 1837 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1783 1838 VN_RELE(tvp);
1784 1839 tvp = NULL;
1785 1840 }
1786 1841 }
1787 1842
1788 1843 if (!lookup_ok) {
1789 1844 if (rdonly(exi, req)) {
1790 1845 error = EROFS;
1791 1846 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1792 1847 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1793 1848 error = EPERM;
1794 1849 } else {
1795 1850 error = 0;
1796 1851 }
1797 1852 }
1798 1853
1799 1854 /*
1800 1855 * If file size is being modified on an already existing file
1801 1856 * make sure that there are no conflicting non-blocking mandatory
1802 1857 * locks in the region being manipulated. Return EACCES if there
1803 1858 * are conflicting locks.
1804 1859 */
1805 1860 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1806 1861 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1807 1862 NULL, NULL, NULL);
1808 1863
1809 1864 if (!lookuperr &&
1810 1865 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1811 1866 VN_RELE(tvp);
1812 1867 curthread->t_flag |= T_WOULDBLOCK;
1813 1868 goto out;
1814 1869 }
1815 1870
1816 1871 if (!lookuperr && nbl_need_check(tvp)) {
1817 1872 /*
1818 1873 * The file exists. Now check if it has any
1819 1874 * conflicting non-blocking mandatory locks
1820 1875 * in the region being changed.
1821 1876 */
1822 1877 struct vattr bva;
1823 1878 u_offset_t offset;
1824 1879 ssize_t length;
1825 1880
1826 1881 nbl_start_crit(tvp, RW_READER);
1827 1882 in_crit = 1;
1828 1883
1829 1884 bva.va_mask = AT_SIZE;
1830 1885 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1831 1886 if (!error) {
1832 1887 if (va.va_size < bva.va_size) {
1833 1888 offset = va.va_size;
1834 1889 length = bva.va_size - va.va_size;
1835 1890 } else {
1836 1891 offset = bva.va_size;
1837 1892 length = va.va_size - bva.va_size;
1838 1893 }
1839 1894 if (length) {
1840 1895 if (nbl_conflict(tvp, NBL_WRITE,
1841 1896 offset, length, 0, NULL)) {
1842 1897 error = EACCES;
1843 1898 }
1844 1899 }
1845 1900 }
1846 1901 if (error) {
1847 1902 nbl_end_crit(tvp);
1848 1903 VN_RELE(tvp);
1849 1904 in_crit = 0;
1850 1905 }
1851 1906 } else if (tvp != NULL) {
1852 1907 VN_RELE(tvp);
1853 1908 }
1854 1909 }
1855 1910
1856 1911 if (!error) {
1857 1912 /*
1858 1913 * If filesystem is shared with nosuid the remove any
1859 1914 * setuid/setgid bits on create.
1860 1915 */
1861 1916 if (va.va_type == VREG &&
1862 1917 exi->exi_export.ex_flags & EX_NOSUID)
1863 1918 va.va_mode &= ~(VSUID | VSGID);
1864 1919
1865 1920 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1866 1921 NULL, NULL);
1867 1922
1868 1923 if (!error) {
1869 1924
1870 1925 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1871 1926 trunc = TRUE;
1872 1927 else
1873 1928 trunc = FALSE;
1874 1929
1875 1930 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1876 1931 VN_RELE(vp);
1877 1932 curthread->t_flag |= T_WOULDBLOCK;
1878 1933 goto out;
1879 1934 }
1880 1935 va.va_mask = AT_ALL;
1881 1936
1882 1937 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1883 1938
1884 1939 /* check for overflows */
1885 1940 if (!error) {
1886 1941 acl_perm(vp, exi, &va, cr);
1887 1942 error = vattr_to_nattr(&va, &dr->dr_attr);
1888 1943 if (!error) {
1889 1944 error = makefh(&dr->dr_fhandle, vp,
1890 1945 exi);
1891 1946 }
1892 1947 }
1893 1948 /*
1894 1949 * Force modified metadata out to stable storage.
1895 1950 *
1896 1951 * if a underlying vp exists, pass it to VOP_FSYNC
1897 1952 */
1898 1953 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1899 1954 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
1900 1955 else
1901 1956 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1902 1957 VN_RELE(vp);
1903 1958 }
1904 1959
1905 1960 if (in_crit) {
1906 1961 nbl_end_crit(tvp);
1907 1962 VN_RELE(tvp);
1908 1963 }
1909 1964 }
1910 1965
1911 1966 /*
1912 1967 * Force modified data and metadata out to stable storage.
1913 1968 */
1914 1969 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1915 1970
1916 1971 out:
1917 1972
1918 1973 VN_RELE(dvp);
1919 1974
1920 1975 dr->dr_status = puterrno(error);
1921 1976
1922 1977 if (name != args->ca_da.da_name)
1923 1978 kmem_free(name, MAXPATHLEN);
1924 1979 }
1925 1980 void *
1926 1981 rfs_create_getfh(struct nfscreatargs *args)
1927 1982 {
1928 1983 return (args->ca_da.da_fhandle);
1929 1984 }
1930 1985
1931 1986 /*
1932 1987 * Remove a file.
1933 1988 * Remove named file from parent directory.
1934 1989 */
1935 1990 void
1936 1991 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
1937 1992 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
1938 1993 {
1939 1994 int error = 0;
1940 1995 vnode_t *vp;
1941 1996 vnode_t *targvp;
1942 1997 int in_crit = 0;
1943 1998
1944 1999 /*
1945 2000 * Disallow NULL paths
1946 2001 */
1947 2002 if (da->da_name == NULL || *da->da_name == '\0') {
1948 2003 *status = NFSERR_ACCES;
1949 2004 return;
1950 2005 }
1951 2006
1952 2007 vp = nfs_fhtovp(da->da_fhandle, exi);
1953 2008 if (vp == NULL) {
1954 2009 *status = NFSERR_STALE;
1955 2010 return;
1956 2011 }
1957 2012
1958 2013 if (rdonly(exi, req)) {
1959 2014 VN_RELE(vp);
1960 2015 *status = NFSERR_ROFS;
1961 2016 return;
1962 2017 }
1963 2018
1964 2019 /*
1965 2020 * Check for a conflict with a non-blocking mandatory share reservation.
1966 2021 */
1967 2022 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
1968 2023 NULL, cr, NULL, NULL, NULL);
1969 2024 if (error != 0) {
1970 2025 VN_RELE(vp);
1971 2026 *status = puterrno(error);
1972 2027 return;
1973 2028 }
1974 2029
1975 2030 /*
1976 2031 * If the file is delegated to an v4 client, then initiate
1977 2032 * recall and drop this request (by setting T_WOULDBLOCK).
1978 2033 * The client will eventually re-transmit the request and
1979 2034 * (hopefully), by then, the v4 client will have returned
1980 2035 * the delegation.
1981 2036 */
1982 2037
1983 2038 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
1984 2039 VN_RELE(vp);
1985 2040 VN_RELE(targvp);
1986 2041 curthread->t_flag |= T_WOULDBLOCK;
1987 2042 return;
1988 2043 }
1989 2044
1990 2045 if (nbl_need_check(targvp)) {
1991 2046 nbl_start_crit(targvp, RW_READER);
1992 2047 in_crit = 1;
1993 2048 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
1994 2049 error = EACCES;
1995 2050 goto out;
1996 2051 }
1997 2052 }
1998 2053
1999 2054 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2000 2055
2001 2056 /*
2002 2057 * Force modified data and metadata out to stable storage.
2003 2058 */
2004 2059 (void) VOP_FSYNC(vp, 0, cr, NULL);
2005 2060
2006 2061 out:
2007 2062 if (in_crit)
2008 2063 nbl_end_crit(targvp);
2009 2064 VN_RELE(targvp);
2010 2065 VN_RELE(vp);
2011 2066
2012 2067 *status = puterrno(error);
2013 2068
2014 2069 }
2015 2070
2016 2071 void *
2017 2072 rfs_remove_getfh(struct nfsdiropargs *da)
2018 2073 {
2019 2074 return (da->da_fhandle);
2020 2075 }
2021 2076
2022 2077 /*
2023 2078 * rename a file
2024 2079 * Give a file (from) a new name (to).
2025 2080 */
2026 2081 void
2027 2082 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2028 2083 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2029 2084 {
2030 2085 int error = 0;
2031 2086 vnode_t *fromvp;
2032 2087 vnode_t *tovp;
2033 2088 struct exportinfo *to_exi;
2034 2089 fhandle_t *fh;
2035 2090 vnode_t *srcvp;
2036 2091 vnode_t *targvp;
2037 2092 int in_crit = 0;
2038 2093
2039 2094 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2040 2095 if (fromvp == NULL) {
2041 2096 *status = NFSERR_STALE;
2042 2097 return;
2043 2098 }
2044 2099
2045 2100 fh = args->rna_to.da_fhandle;
2046 2101 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2047 2102 if (to_exi == NULL) {
2048 2103 VN_RELE(fromvp);
2049 2104 *status = NFSERR_ACCES;
2050 2105 return;
2051 2106 }
2052 2107 exi_rele(to_exi);
2053 2108
2054 2109 if (to_exi != exi) {
2055 2110 VN_RELE(fromvp);
2056 2111 *status = NFSERR_XDEV;
2057 2112 return;
2058 2113 }
2059 2114
2060 2115 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2061 2116 if (tovp == NULL) {
2062 2117 VN_RELE(fromvp);
2063 2118 *status = NFSERR_STALE;
2064 2119 return;
2065 2120 }
2066 2121
2067 2122 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2068 2123 VN_RELE(tovp);
2069 2124 VN_RELE(fromvp);
2070 2125 *status = NFSERR_NOTDIR;
2071 2126 return;
2072 2127 }
2073 2128
2074 2129 /*
2075 2130 * Disallow NULL paths
2076 2131 */
2077 2132 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2078 2133 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2079 2134 VN_RELE(tovp);
2080 2135 VN_RELE(fromvp);
2081 2136 *status = NFSERR_ACCES;
2082 2137 return;
2083 2138 }
2084 2139
2085 2140 if (rdonly(exi, req)) {
2086 2141 VN_RELE(tovp);
2087 2142 VN_RELE(fromvp);
2088 2143 *status = NFSERR_ROFS;
2089 2144 return;
2090 2145 }
2091 2146
2092 2147 /*
2093 2148 * Check for a conflict with a non-blocking mandatory share reservation.
2094 2149 */
2095 2150 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2096 2151 NULL, cr, NULL, NULL, NULL);
2097 2152 if (error != 0) {
2098 2153 VN_RELE(tovp);
2099 2154 VN_RELE(fromvp);
2100 2155 *status = puterrno(error);
2101 2156 return;
2102 2157 }
2103 2158
2104 2159 /* Check for delegations on the source file */
2105 2160
2106 2161 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2107 2162 VN_RELE(tovp);
2108 2163 VN_RELE(fromvp);
2109 2164 VN_RELE(srcvp);
2110 2165 curthread->t_flag |= T_WOULDBLOCK;
2111 2166 return;
2112 2167 }
2113 2168
2114 2169 /* Check for delegation on the file being renamed over, if it exists */
2115 2170
2116 2171 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2117 2172 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2118 2173 NULL, NULL, NULL) == 0) {
2119 2174
2120 2175 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2121 2176 VN_RELE(tovp);
2122 2177 VN_RELE(fromvp);
2123 2178 VN_RELE(srcvp);
2124 2179 VN_RELE(targvp);
2125 2180 curthread->t_flag |= T_WOULDBLOCK;
2126 2181 return;
2127 2182 }
2128 2183 VN_RELE(targvp);
2129 2184 }
2130 2185
2131 2186
2132 2187 if (nbl_need_check(srcvp)) {
2133 2188 nbl_start_crit(srcvp, RW_READER);
2134 2189 in_crit = 1;
2135 2190 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2136 2191 error = EACCES;
2137 2192 goto out;
2138 2193 }
2139 2194 }
2140 2195
2141 2196 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2142 2197 tovp, args->rna_to.da_name, cr, NULL, 0);
2143 2198
2144 2199 if (error == 0)
2145 2200 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2146 2201 strlen(args->rna_to.da_name));
2147 2202
2148 2203 /*
2149 2204 * Force modified data and metadata out to stable storage.
2150 2205 */
2151 2206 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2152 2207 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2153 2208
2154 2209 out:
2155 2210 if (in_crit)
2156 2211 nbl_end_crit(srcvp);
2157 2212 VN_RELE(srcvp);
2158 2213 VN_RELE(tovp);
2159 2214 VN_RELE(fromvp);
2160 2215
2161 2216 *status = puterrno(error);
2162 2217
2163 2218 }
2164 2219 void *
2165 2220 rfs_rename_getfh(struct nfsrnmargs *args)
2166 2221 {
2167 2222 return (args->rna_from.da_fhandle);
2168 2223 }
2169 2224
2170 2225 /*
2171 2226 * Link to a file.
2172 2227 * Create a file (to) which is a hard link to the given file (from).
2173 2228 */
2174 2229 void
2175 2230 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2176 2231 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2177 2232 {
2178 2233 int error;
2179 2234 vnode_t *fromvp;
2180 2235 vnode_t *tovp;
2181 2236 struct exportinfo *to_exi;
2182 2237 fhandle_t *fh;
2183 2238
2184 2239 fromvp = nfs_fhtovp(args->la_from, exi);
2185 2240 if (fromvp == NULL) {
2186 2241 *status = NFSERR_STALE;
2187 2242 return;
2188 2243 }
2189 2244
2190 2245 fh = args->la_to.da_fhandle;
2191 2246 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2192 2247 if (to_exi == NULL) {
2193 2248 VN_RELE(fromvp);
2194 2249 *status = NFSERR_ACCES;
2195 2250 return;
2196 2251 }
2197 2252 exi_rele(to_exi);
2198 2253
2199 2254 if (to_exi != exi) {
2200 2255 VN_RELE(fromvp);
2201 2256 *status = NFSERR_XDEV;
2202 2257 return;
2203 2258 }
2204 2259
2205 2260 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2206 2261 if (tovp == NULL) {
2207 2262 VN_RELE(fromvp);
2208 2263 *status = NFSERR_STALE;
2209 2264 return;
2210 2265 }
2211 2266
2212 2267 if (tovp->v_type != VDIR) {
2213 2268 VN_RELE(tovp);
2214 2269 VN_RELE(fromvp);
2215 2270 *status = NFSERR_NOTDIR;
2216 2271 return;
2217 2272 }
2218 2273 /*
2219 2274 * Disallow NULL paths
2220 2275 */
2221 2276 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2222 2277 VN_RELE(tovp);
2223 2278 VN_RELE(fromvp);
2224 2279 *status = NFSERR_ACCES;
2225 2280 return;
2226 2281 }
2227 2282
2228 2283 if (rdonly(exi, req)) {
2229 2284 VN_RELE(tovp);
2230 2285 VN_RELE(fromvp);
2231 2286 *status = NFSERR_ROFS;
2232 2287 return;
2233 2288 }
2234 2289
2235 2290 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2236 2291
2237 2292 /*
2238 2293 * Force modified data and metadata out to stable storage.
2239 2294 */
2240 2295 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2241 2296 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2242 2297
2243 2298 VN_RELE(tovp);
2244 2299 VN_RELE(fromvp);
2245 2300
2246 2301 *status = puterrno(error);
2247 2302
2248 2303 }
2249 2304 void *
2250 2305 rfs_link_getfh(struct nfslinkargs *args)
2251 2306 {
2252 2307 return (args->la_from);
2253 2308 }
2254 2309
2255 2310 /*
2256 2311 * Symbolicly link to a file.
2257 2312 * Create a file (to) with the given attributes which is a symbolic link
2258 2313 * to the given path name (to).
2259 2314 */
2260 2315 void
2261 2316 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2262 2317 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2263 2318 {
2264 2319 int error;
2265 2320 struct vattr va;
2266 2321 vnode_t *vp;
2267 2322 vnode_t *svp;
2268 2323 int lerror;
2269 2324 struct sockaddr *ca;
2270 2325 char *name = NULL;
2271 2326
2272 2327 /*
2273 2328 * Disallow NULL paths
2274 2329 */
2275 2330 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2276 2331 *status = NFSERR_ACCES;
2277 2332 return;
2278 2333 }
2279 2334
2280 2335 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2281 2336 if (vp == NULL) {
2282 2337 *status = NFSERR_STALE;
2283 2338 return;
2284 2339 }
2285 2340
2286 2341 if (rdonly(exi, req)) {
2287 2342 VN_RELE(vp);
2288 2343 *status = NFSERR_ROFS;
2289 2344 return;
2290 2345 }
2291 2346
2292 2347 error = sattr_to_vattr(args->sla_sa, &va);
2293 2348 if (error) {
2294 2349 VN_RELE(vp);
2295 2350 *status = puterrno(error);
2296 2351 return;
2297 2352 }
2298 2353
2299 2354 if (!(va.va_mask & AT_MODE)) {
2300 2355 VN_RELE(vp);
2301 2356 *status = NFSERR_INVAL;
2302 2357 return;
2303 2358 }
2304 2359
2305 2360 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2306 2361 name = nfscmd_convname(ca, exi, args->sla_tnm,
2307 2362 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2308 2363
2309 2364 if (name == NULL) {
2310 2365 *status = NFSERR_ACCES;
2311 2366 return;
2312 2367 }
2313 2368
2314 2369 va.va_type = VLNK;
2315 2370 va.va_mask |= AT_TYPE;
2316 2371
2317 2372 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2318 2373
2319 2374 /*
2320 2375 * Force new data and metadata out to stable storage.
2321 2376 */
2322 2377 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2323 2378 NULL, cr, NULL, NULL, NULL);
2324 2379
2325 2380 if (!lerror) {
2326 2381 (void) VOP_FSYNC(svp, 0, cr, NULL);
2327 2382 VN_RELE(svp);
2328 2383 }
2329 2384
2330 2385 /*
2331 2386 * Force modified data and metadata out to stable storage.
2332 2387 */
2333 2388 (void) VOP_FSYNC(vp, 0, cr, NULL);
2334 2389
2335 2390 VN_RELE(vp);
2336 2391
2337 2392 *status = puterrno(error);
2338 2393 if (name != args->sla_tnm)
2339 2394 kmem_free(name, MAXPATHLEN);
2340 2395
2341 2396 }
2342 2397 void *
2343 2398 rfs_symlink_getfh(struct nfsslargs *args)
2344 2399 {
2345 2400 return (args->sla_from.da_fhandle);
2346 2401 }
2347 2402
2348 2403 /*
2349 2404 * Make a directory.
2350 2405 * Create a directory with the given name, parent directory, and attributes.
2351 2406 * Returns a file handle and attributes for the new directory.
2352 2407 */
2353 2408 void
2354 2409 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2355 2410 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2356 2411 {
2357 2412 int error;
2358 2413 struct vattr va;
2359 2414 vnode_t *dvp = NULL;
2360 2415 vnode_t *vp;
2361 2416 char *name = args->ca_da.da_name;
2362 2417
2363 2418 /*
2364 2419 * Disallow NULL paths
2365 2420 */
2366 2421 if (name == NULL || *name == '\0') {
2367 2422 dr->dr_status = NFSERR_ACCES;
2368 2423 return;
2369 2424 }
2370 2425
2371 2426 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2372 2427 if (vp == NULL) {
2373 2428 dr->dr_status = NFSERR_STALE;
2374 2429 return;
2375 2430 }
2376 2431
2377 2432 if (rdonly(exi, req)) {
2378 2433 VN_RELE(vp);
2379 2434 dr->dr_status = NFSERR_ROFS;
2380 2435 return;
2381 2436 }
2382 2437
2383 2438 error = sattr_to_vattr(args->ca_sa, &va);
2384 2439 if (error) {
2385 2440 VN_RELE(vp);
2386 2441 dr->dr_status = puterrno(error);
2387 2442 return;
2388 2443 }
2389 2444
2390 2445 if (!(va.va_mask & AT_MODE)) {
2391 2446 VN_RELE(vp);
2392 2447 dr->dr_status = NFSERR_INVAL;
2393 2448 return;
2394 2449 }
2395 2450
2396 2451 va.va_type = VDIR;
2397 2452 va.va_mask |= AT_TYPE;
2398 2453
2399 2454 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2400 2455
2401 2456 if (!error) {
2402 2457 /*
2403 2458 * Attribtutes of the newly created directory should
2404 2459 * be returned to the client.
2405 2460 */
2406 2461 va.va_mask = AT_ALL; /* We want everything */
2407 2462 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2408 2463
2409 2464 /* check for overflows */
2410 2465 if (!error) {
2411 2466 acl_perm(vp, exi, &va, cr);
2412 2467 error = vattr_to_nattr(&va, &dr->dr_attr);
2413 2468 if (!error) {
2414 2469 error = makefh(&dr->dr_fhandle, dvp, exi);
2415 2470 }
2416 2471 }
2417 2472 /*
2418 2473 * Force new data and metadata out to stable storage.
2419 2474 */
2420 2475 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2421 2476 VN_RELE(dvp);
2422 2477 }
2423 2478
2424 2479 /*
2425 2480 * Force modified data and metadata out to stable storage.
2426 2481 */
2427 2482 (void) VOP_FSYNC(vp, 0, cr, NULL);
2428 2483
2429 2484 VN_RELE(vp);
2430 2485
2431 2486 dr->dr_status = puterrno(error);
2432 2487
2433 2488 }
2434 2489 void *
2435 2490 rfs_mkdir_getfh(struct nfscreatargs *args)
2436 2491 {
2437 2492 return (args->ca_da.da_fhandle);
2438 2493 }
2439 2494
2440 2495 /*
2441 2496 * Remove a directory.
2442 2497 * Remove the given directory name from the given parent directory.
2443 2498 */
2444 2499 void
2445 2500 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2446 2501 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2447 2502 {
2448 2503 int error;
2449 2504 vnode_t *vp;
2450 2505
2451 2506
2452 2507 /*
2453 2508 * Disallow NULL paths
2454 2509 */
2455 2510 if (da->da_name == NULL || *da->da_name == '\0') {
2456 2511 *status = NFSERR_ACCES;
2457 2512 return;
2458 2513 }
2459 2514
2460 2515 vp = nfs_fhtovp(da->da_fhandle, exi);
2461 2516 if (vp == NULL) {
2462 2517 *status = NFSERR_STALE;
2463 2518 return;
2464 2519 }
2465 2520
2466 2521 if (rdonly(exi, req)) {
2467 2522 VN_RELE(vp);
2468 2523 *status = NFSERR_ROFS;
2469 2524 return;
2470 2525 }
2471 2526
2472 2527 /*
2473 2528 * VOP_RMDIR now takes a new third argument (the current
2474 2529 * directory of the process). That's because someone
2475 2530 * wants to return EINVAL if one tries to remove ".".
2476 2531 * Of course, NFS servers have no idea what their
2477 2532 * clients' current directories are. We fake it by
2478 2533 * supplying a vnode known to exist and illegal to
2479 2534 * remove.
2480 2535 */
2481 2536 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
2482 2537
2483 2538 /*
2484 2539 * Force modified data and metadata out to stable storage.
2485 2540 */
2486 2541 (void) VOP_FSYNC(vp, 0, cr, NULL);
2487 2542
2488 2543 VN_RELE(vp);
2489 2544
2490 2545 /*
2491 2546 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2492 2547 * if the directory is not empty. A System V NFS server
2493 2548 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2494 2549 * over the wire.
2495 2550 */
2496 2551 if (error == EEXIST)
2497 2552 *status = NFSERR_NOTEMPTY;
2498 2553 else
2499 2554 *status = puterrno(error);
2500 2555
2501 2556 }
2502 2557 void *
2503 2558 rfs_rmdir_getfh(struct nfsdiropargs *da)
2504 2559 {
2505 2560 return (da->da_fhandle);
2506 2561 }
2507 2562
2508 2563 /* ARGSUSED */
2509 2564 void
2510 2565 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2511 2566 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2512 2567 {
2513 2568 int error;
2514 2569 int iseof;
2515 2570 struct iovec iov;
2516 2571 struct uio uio;
2517 2572 vnode_t *vp;
2518 2573 char *ndata = NULL;
2519 2574 struct sockaddr *ca;
2520 2575 size_t nents;
2521 2576 int ret;
2522 2577
2523 2578 vp = nfs_fhtovp(&rda->rda_fh, exi);
2524 2579 if (vp == NULL) {
2525 2580 rd->rd_entries = NULL;
2526 2581 rd->rd_status = NFSERR_STALE;
2527 2582 return;
2528 2583 }
2529 2584
2530 2585 if (vp->v_type != VDIR) {
2531 2586 VN_RELE(vp);
2532 2587 rd->rd_entries = NULL;
2533 2588 rd->rd_status = NFSERR_NOTDIR;
2534 2589 return;
2535 2590 }
2536 2591
2537 2592 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2538 2593
2539 2594 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2540 2595
2541 2596 if (error) {
2542 2597 rd->rd_entries = NULL;
2543 2598 goto bad;
2544 2599 }
2545 2600
2546 2601 if (rda->rda_count == 0) {
2547 2602 rd->rd_entries = NULL;
2548 2603 rd->rd_size = 0;
2549 2604 rd->rd_eof = FALSE;
2550 2605 goto bad;
2551 2606 }
2552 2607
2553 2608 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2554 2609
2555 2610 /*
2556 2611 * Allocate data for entries. This will be freed by rfs_rddirfree.
2557 2612 */
2558 2613 rd->rd_bufsize = (uint_t)rda->rda_count;
2559 2614 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2560 2615
2561 2616 /*
2562 2617 * Set up io vector to read directory data
2563 2618 */
2564 2619 iov.iov_base = (caddr_t)rd->rd_entries;
2565 2620 iov.iov_len = rda->rda_count;
2566 2621 uio.uio_iov = &iov;
2567 2622 uio.uio_iovcnt = 1;
2568 2623 uio.uio_segflg = UIO_SYSSPACE;
2569 2624 uio.uio_extflg = UIO_COPY_CACHED;
2570 2625 uio.uio_loffset = (offset_t)rda->rda_offset;
2571 2626 uio.uio_resid = rda->rda_count;
2572 2627
2573 2628 /*
2574 2629 * read directory
2575 2630 */
2576 2631 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2577 2632
2578 2633 /*
2579 2634 * Clean up
2580 2635 */
2581 2636 if (!error) {
2582 2637 /*
2583 2638 * set size and eof
2584 2639 */
2585 2640 if (uio.uio_resid == rda->rda_count) {
2586 2641 rd->rd_size = 0;
2587 2642 rd->rd_eof = TRUE;
2588 2643 } else {
2589 2644 rd->rd_size = (uint32_t)(rda->rda_count -
2590 2645 uio.uio_resid);
2591 2646 rd->rd_eof = iseof ? TRUE : FALSE;
2592 2647 }
2593 2648 }
2594 2649
2595 2650 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2596 2651 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2597 2652 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2598 2653 rda->rda_count, &ndata);
2599 2654
2600 2655 if (ret != 0) {
2601 2656 size_t dropbytes;
2602 2657 /*
2603 2658 * We had to drop one or more entries in order to fit
2604 2659 * during the character conversion. We need to patch
2605 2660 * up the size and eof info.
2606 2661 */
2607 2662 if (rd->rd_eof)
2608 2663 rd->rd_eof = FALSE;
2609 2664 dropbytes = nfscmd_dropped_entrysize(
2610 2665 (struct dirent64 *)rd->rd_entries, nents, ret);
2611 2666 rd->rd_size -= dropbytes;
2612 2667 }
2613 2668 if (ndata == NULL) {
2614 2669 ndata = (char *)rd->rd_entries;
2615 2670 } else if (ndata != (char *)rd->rd_entries) {
2616 2671 kmem_free(rd->rd_entries, rd->rd_bufsize);
2617 2672 rd->rd_entries = (void *)ndata;
2618 2673 rd->rd_bufsize = rda->rda_count;
2619 2674 }
2620 2675
2621 2676 bad:
2622 2677 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2623 2678
2624 2679 #if 0 /* notyet */
2625 2680 /*
2626 2681 * Don't do this. It causes local disk writes when just
2627 2682 * reading the file and the overhead is deemed larger
2628 2683 * than the benefit.
2629 2684 */
2630 2685 /*
2631 2686 * Force modified metadata out to stable storage.
2632 2687 */
2633 2688 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2634 2689 #endif
2635 2690
2636 2691 VN_RELE(vp);
2637 2692
2638 2693 rd->rd_status = puterrno(error);
2639 2694
2640 2695 }
2641 2696 void *
2642 2697 rfs_readdir_getfh(struct nfsrddirargs *rda)
2643 2698 {
2644 2699 return (&rda->rda_fh);
2645 2700 }
2646 2701 void
2647 2702 rfs_rddirfree(struct nfsrddirres *rd)
2648 2703 {
2649 2704 if (rd->rd_entries != NULL)
2650 2705 kmem_free(rd->rd_entries, rd->rd_bufsize);
2651 2706 }
2652 2707
2653 2708 /* ARGSUSED */
2654 2709 void
2655 2710 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2656 2711 struct svc_req *req, cred_t *cr)
2657 2712 {
2658 2713 int error;
2659 2714 struct statvfs64 sb;
2660 2715 vnode_t *vp;
2661 2716
2662 2717 vp = nfs_fhtovp(fh, exi);
2663 2718 if (vp == NULL) {
2664 2719 fs->fs_status = NFSERR_STALE;
2665 2720 return;
2666 2721 }
2667 2722
2668 2723 error = VFS_STATVFS(vp->v_vfsp, &sb);
2669 2724
2670 2725 if (!error) {
2671 2726 fs->fs_tsize = nfstsize();
2672 2727 fs->fs_bsize = sb.f_frsize;
2673 2728 fs->fs_blocks = sb.f_blocks;
2674 2729 fs->fs_bfree = sb.f_bfree;
2675 2730 fs->fs_bavail = sb.f_bavail;
2676 2731 }
2677 2732
2678 2733 VN_RELE(vp);
2679 2734
2680 2735 fs->fs_status = puterrno(error);
2681 2736
2682 2737 }
2683 2738 void *
2684 2739 rfs_statfs_getfh(fhandle_t *fh)
2685 2740 {
2686 2741 return (fh);
2687 2742 }
2688 2743
2689 2744 static int
2690 2745 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2691 2746 {
2692 2747 vap->va_mask = 0;
2693 2748
2694 2749 /*
2695 2750 * There was a sign extension bug in some VFS based systems
2696 2751 * which stored the mode as a short. When it would get
2697 2752 * assigned to a u_long, no sign extension would occur.
2698 2753 * It needed to, but this wasn't noticed because sa_mode
2699 2754 * would then get assigned back to the short, thus ignoring
2700 2755 * the upper 16 bits of sa_mode.
2701 2756 *
2702 2757 * To make this implementation work for both broken
2703 2758 * clients and good clients, we check for both versions
2704 2759 * of the mode.
2705 2760 */
2706 2761 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2707 2762 sa->sa_mode != (uint32_t)-1) {
2708 2763 vap->va_mask |= AT_MODE;
2709 2764 vap->va_mode = sa->sa_mode;
2710 2765 }
2711 2766 if (sa->sa_uid != (uint32_t)-1) {
2712 2767 vap->va_mask |= AT_UID;
2713 2768 vap->va_uid = sa->sa_uid;
2714 2769 }
2715 2770 if (sa->sa_gid != (uint32_t)-1) {
2716 2771 vap->va_mask |= AT_GID;
2717 2772 vap->va_gid = sa->sa_gid;
2718 2773 }
2719 2774 if (sa->sa_size != (uint32_t)-1) {
2720 2775 vap->va_mask |= AT_SIZE;
2721 2776 vap->va_size = sa->sa_size;
2722 2777 }
2723 2778 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2724 2779 sa->sa_atime.tv_usec != (int32_t)-1) {
2725 2780 #ifndef _LP64
2726 2781 /* return error if time overflow */
2727 2782 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2728 2783 return (EOVERFLOW);
2729 2784 #endif
2730 2785 vap->va_mask |= AT_ATIME;
2731 2786 /*
2732 2787 * nfs protocol defines times as unsigned so don't extend sign,
2733 2788 * unless sysadmin set nfs_allow_preepoch_time.
2734 2789 */
2735 2790 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2736 2791 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2737 2792 }
2738 2793 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2739 2794 sa->sa_mtime.tv_usec != (int32_t)-1) {
2740 2795 #ifndef _LP64
2741 2796 /* return error if time overflow */
2742 2797 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2743 2798 return (EOVERFLOW);
2744 2799 #endif
2745 2800 vap->va_mask |= AT_MTIME;
2746 2801 /*
2747 2802 * nfs protocol defines times as unsigned so don't extend sign,
2748 2803 * unless sysadmin set nfs_allow_preepoch_time.
2749 2804 */
2750 2805 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2751 2806 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2752 2807 }
2753 2808 return (0);
2754 2809 }
2755 2810
2756 2811 static enum nfsftype vt_to_nf[] = {
2757 2812 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2758 2813 };
2759 2814
2760 2815 /*
2761 2816 * check the following fields for overflow: nodeid, size, and time.
2762 2817 * There could be a problem when converting 64-bit LP64 fields
2763 2818 * into 32-bit ones. Return an error if there is an overflow.
2764 2819 */
2765 2820 int
2766 2821 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2767 2822 {
2768 2823 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2769 2824 na->na_type = vt_to_nf[vap->va_type];
2770 2825
2771 2826 if (vap->va_mode == (unsigned short) -1)
2772 2827 na->na_mode = (uint32_t)-1;
2773 2828 else
2774 2829 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2775 2830
2776 2831 if (vap->va_uid == (unsigned short)(-1))
2777 2832 na->na_uid = (uint32_t)(-1);
2778 2833 else if (vap->va_uid == UID_NOBODY)
2779 2834 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2780 2835 else
2781 2836 na->na_uid = vap->va_uid;
2782 2837
2783 2838 if (vap->va_gid == (unsigned short)(-1))
2784 2839 na->na_gid = (uint32_t)-1;
2785 2840 else if (vap->va_gid == GID_NOBODY)
2786 2841 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2787 2842 else
2788 2843 na->na_gid = vap->va_gid;
2789 2844
2790 2845 /*
2791 2846 * Do we need to check fsid for overflow? It is 64-bit in the
2792 2847 * vattr, but are bigger than 32 bit values supported?
2793 2848 */
2794 2849 na->na_fsid = vap->va_fsid;
2795 2850
2796 2851 na->na_nodeid = vap->va_nodeid;
2797 2852
2798 2853 /*
2799 2854 * Check to make sure that the nodeid is representable over the
2800 2855 * wire without losing bits.
2801 2856 */
2802 2857 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2803 2858 return (EFBIG);
2804 2859 na->na_nlink = vap->va_nlink;
2805 2860
2806 2861 /*
2807 2862 * Check for big files here, instead of at the caller. See
2808 2863 * comments in cstat for large special file explanation.
2809 2864 */
2810 2865 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2811 2866 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2812 2867 return (EFBIG);
2813 2868 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2814 2869 /* UNKNOWN_SIZE | OVERFLOW */
2815 2870 na->na_size = MAXOFF32_T;
2816 2871 } else
2817 2872 na->na_size = vap->va_size;
2818 2873 } else
2819 2874 na->na_size = vap->va_size;
2820 2875
2821 2876 /*
2822 2877 * If the vnode times overflow the 32-bit times that NFS2
2823 2878 * uses on the wire then return an error.
2824 2879 */
2825 2880 if (!NFS_VAP_TIME_OK(vap)) {
2826 2881 return (EOVERFLOW);
2827 2882 }
2828 2883 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2829 2884 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2830 2885
2831 2886 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2832 2887 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2833 2888
2834 2889 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2835 2890 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2836 2891
2837 2892 /*
2838 2893 * If the dev_t will fit into 16 bits then compress
2839 2894 * it, otherwise leave it alone. See comments in
2840 2895 * nfs_client.c.
2841 2896 */
2842 2897 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2843 2898 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2844 2899 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2845 2900 else
2846 2901 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2847 2902
2848 2903 na->na_blocks = vap->va_nblocks;
2849 2904 na->na_blocksize = vap->va_blksize;
2850 2905
2851 2906 /*
2852 2907 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2853 2908 * over-the-wire protocols for named-pipe vnodes. It remaps the
2854 2909 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2855 2910 *
2856 2911 * BUYER BEWARE:
2857 2912 * If you are porting the NFS to a non-Sun server, you probably
2858 2913 * don't want to include the following block of code. The
2859 2914 * over-the-wire special file types will be changing with the
2860 2915 * NFS Protocol Revision.
2861 2916 */
2862 2917 if (vap->va_type == VFIFO)
2863 2918 NA_SETFIFO(na);
2864 2919 return (0);
2865 2920 }
2866 2921
2867 2922 /*
2868 2923 * acl v2 support: returns approximate permission.
2869 2924 * default: returns minimal permission (more restrictive)
2870 2925 * aclok: returns maximal permission (less restrictive)
2871 2926 * This routine changes the permissions that are alaredy in *va.
2872 2927 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2873 2928 * CLASS_OBJ is always the same as GROUP_OBJ entry.
2874 2929 */
2875 2930 static void
2876 2931 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
2877 2932 {
2878 2933 vsecattr_t vsa;
2879 2934 int aclcnt;
2880 2935 aclent_t *aclentp;
2881 2936 mode_t mask_perm;
2882 2937 mode_t grp_perm;
2883 2938 mode_t other_perm;
2884 2939 mode_t other_orig;
2885 2940 int error;
2886 2941
2887 2942 /* dont care default acl */
2888 2943 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
2889 2944 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
2890 2945
2891 2946 if (!error) {
2892 2947 aclcnt = vsa.vsa_aclcnt;
2893 2948 if (aclcnt > MIN_ACL_ENTRIES) {
2894 2949 /* non-trivial ACL */
2895 2950 aclentp = vsa.vsa_aclentp;
2896 2951 if (exi->exi_export.ex_flags & EX_ACLOK) {
2897 2952 /* maximal permissions */
2898 2953 grp_perm = 0;
2899 2954 other_perm = 0;
2900 2955 for (; aclcnt > 0; aclcnt--, aclentp++) {
2901 2956 switch (aclentp->a_type) {
2902 2957 case USER_OBJ:
2903 2958 break;
2904 2959 case USER:
2905 2960 grp_perm |=
2906 2961 aclentp->a_perm << 3;
2907 2962 other_perm |= aclentp->a_perm;
2908 2963 break;
2909 2964 case GROUP_OBJ:
2910 2965 grp_perm |=
2911 2966 aclentp->a_perm << 3;
2912 2967 break;
2913 2968 case GROUP:
2914 2969 other_perm |= aclentp->a_perm;
2915 2970 break;
2916 2971 case OTHER_OBJ:
2917 2972 other_orig = aclentp->a_perm;
2918 2973 break;
2919 2974 case CLASS_OBJ:
2920 2975 mask_perm = aclentp->a_perm;
2921 2976 break;
2922 2977 default:
2923 2978 break;
2924 2979 }
2925 2980 }
2926 2981 grp_perm &= mask_perm << 3;
2927 2982 other_perm &= mask_perm;
2928 2983 other_perm |= other_orig;
2929 2984
2930 2985 } else {
2931 2986 /* minimal permissions */
2932 2987 grp_perm = 070;
2933 2988 other_perm = 07;
2934 2989 for (; aclcnt > 0; aclcnt--, aclentp++) {
2935 2990 switch (aclentp->a_type) {
2936 2991 case USER_OBJ:
2937 2992 break;
2938 2993 case USER:
2939 2994 case CLASS_OBJ:
2940 2995 grp_perm &=
2941 2996 aclentp->a_perm << 3;
2942 2997 other_perm &=
2943 2998 aclentp->a_perm;
2944 2999 break;
2945 3000 case GROUP_OBJ:
2946 3001 grp_perm &=
2947 3002 aclentp->a_perm << 3;
2948 3003 break;
2949 3004 case GROUP:
2950 3005 other_perm &=
2951 3006 aclentp->a_perm;
2952 3007 break;
2953 3008 case OTHER_OBJ:
2954 3009 other_perm &=
2955 3010 aclentp->a_perm;
2956 3011 break;
2957 3012 default:
2958 3013 break;
2959 3014 }
2960 3015 }
2961 3016 }
2962 3017 /* copy to va */
2963 3018 va->va_mode &= ~077;
2964 3019 va->va_mode |= grp_perm | other_perm;
2965 3020 }
2966 3021 if (vsa.vsa_aclcnt)
2967 3022 kmem_free(vsa.vsa_aclentp,
2968 3023 vsa.vsa_aclcnt * sizeof (aclent_t));
2969 3024 }
2970 3025 }
2971 3026
2972 3027 void
2973 3028 rfs_srvrinit(void)
2974 3029 {
2975 3030 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
2976 3031 nfs2_srv_caller_id = fs_new_caller_id();
2977 3032 }
2978 3033
2979 3034 void
2980 3035 rfs_srvrfini(void)
2981 3036 {
2982 3037 mutex_destroy(&rfs_async_write_lock);
2983 3038 }
2984 3039
2985 3040 static int
2986 3041 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
2987 3042 {
2988 3043 struct clist *wcl;
2989 3044 int wlist_len;
2990 3045 uint32_t count = rr->rr_count;
2991 3046
2992 3047 wcl = ra->ra_wlist;
2993 3048
2994 3049 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
2995 3050 return (FALSE);
2996 3051 }
2997 3052
2998 3053 wcl = ra->ra_wlist;
2999 3054 rr->rr_ok.rrok_wlist_len = wlist_len;
3000 3055 rr->rr_ok.rrok_wlist = wcl;
3001 3056
3002 3057 return (TRUE);
3003 3058 }
↓ open down ↓ |
2532 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX