1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/sysmacros.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/fcntl.h> 35 #include <sys/vfs.h> 36 #include <sys/vnode.h> 37 #include <sys/share.h> 38 #include <sys/cmn_err.h> 39 #include <sys/kmem.h> 40 #include <sys/debug.h> 41 #include <sys/t_lock.h> 42 #include <sys/errno.h> 43 #include <sys/nbmlock.h> 44 45 int share_debug = 0; 46 47 #ifdef DEBUG 48 static void print_shares(struct vnode *); 49 static void print_share(struct shrlock *); 50 #endif 51 52 static int isreadonly(struct vnode *); 53 static void do_cleanshares(struct vnode *, pid_t, int32_t); 54 55 56 /* 57 * Add the share reservation shr to vp. 58 */ 59 int 60 add_share(struct vnode *vp, struct shrlock *shr) 61 { 62 struct shrlocklist *shrl; 63 64 /* 65 * An access of zero is not legal, however some older clients 66 * generate it anyways. Allow the request only if it is 67 * coming from a remote system. Be generous in what you 68 * accept and strict in what you send. 69 */ 70 if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) { 71 return (EINVAL); 72 } 73 74 /* 75 * Sanity check to make sure we have valid options. 76 * There is known overlap but it doesn't hurt to be careful. 77 */ 78 if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) { 79 return (EINVAL); 80 } 81 if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT| 82 F_MANDDNY|F_RMDNY)) { 83 return (EINVAL); 84 } 85 86 mutex_enter(&vp->v_lock); 87 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 88 /* 89 * If the share owner matches previous request 90 * do special handling. 91 */ 92 if ((shrl->shr->s_sysid == shr->s_sysid) && 93 (shrl->shr->s_pid == shr->s_pid) && 94 (shrl->shr->s_own_len == shr->s_own_len) && 95 bcmp(shrl->shr->s_owner, shr->s_owner, 96 shr->s_own_len) == 0) { 97 98 /* 99 * If the existing request is F_COMPAT and 100 * is the first share then allow any F_COMPAT 101 * from the same process. Trick: If the existing 102 * F_COMPAT is write access then it must have 103 * the same owner as the first. 104 */ 105 if ((shrl->shr->s_deny & F_COMPAT) && 106 (shr->s_deny & F_COMPAT) && 107 ((shrl->next == NULL) || 108 (shrl->shr->s_access & F_WRACC))) 109 break; 110 } 111 112 /* 113 * If a first share has been done in compatibility mode 114 * handle the special cases. 115 */ 116 if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) { 117 118 if (!(shr->s_deny & F_COMPAT)) { 119 /* 120 * If not compat and want write access or 121 * want to deny read or 122 * write exists, fails 123 */ 124 if ((shr->s_access & F_WRACC) || 125 (shr->s_deny & F_RDDNY) || 126 (shrl->shr->s_access & F_WRACC)) { 127 mutex_exit(&vp->v_lock); 128 return (EAGAIN); 129 } 130 /* 131 * If read only file allow, this may allow 132 * a deny write but that is meaningless on 133 * a read only file. 134 */ 135 if (isreadonly(vp)) 136 break; 137 mutex_exit(&vp->v_lock); 138 return (EAGAIN); 139 } 140 /* 141 * This is a compat request and read access 142 * and the first was also read access 143 * we always allow it, otherwise we reject because 144 * we have handled the only valid write case above. 145 */ 146 if ((shr->s_access == F_RDACC) && 147 (shrl->shr->s_access == F_RDACC)) 148 break; 149 mutex_exit(&vp->v_lock); 150 return (EAGAIN); 151 } 152 153 /* 154 * If we are trying to share in compatibility mode 155 * and the current share is compat (and not the first) 156 * we don't know enough. 157 */ 158 if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT)) 159 continue; 160 161 /* 162 * If this is a compat we check for what can't succeed. 163 */ 164 if (shr->s_deny & F_COMPAT) { 165 /* 166 * If we want write access or 167 * if anyone is denying read or 168 * if anyone has write access we fail 169 */ 170 if ((shr->s_access & F_WRACC) || 171 (shrl->shr->s_deny & F_RDDNY) || 172 (shrl->shr->s_access & F_WRACC)) { 173 mutex_exit(&vp->v_lock); 174 return (EAGAIN); 175 } 176 /* 177 * If the first was opened with only read access 178 * and is a read only file we allow. 179 */ 180 if (shrl->next == NULL) { 181 if ((shrl->shr->s_access == F_RDACC) && 182 isreadonly(vp)) { 183 break; 184 } 185 mutex_exit(&vp->v_lock); 186 return (EAGAIN); 187 } 188 /* 189 * We still can't determine our fate so continue 190 */ 191 continue; 192 } 193 194 /* 195 * Simple bitwise test, if we are trying to access what 196 * someone else is denying or we are trying to deny 197 * what someone else is accessing we fail. 198 */ 199 if ((shr->s_access & shrl->shr->s_deny) || 200 (shr->s_deny & shrl->shr->s_access)) { 201 mutex_exit(&vp->v_lock); 202 return (EAGAIN); 203 } 204 } 205 206 shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP); 207 shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP); 208 shrl->shr->s_access = shr->s_access; 209 shrl->shr->s_deny = shr->s_deny; 210 211 /* 212 * Make sure no other deny modes are also set with F_COMPAT 213 */ 214 if (shrl->shr->s_deny & F_COMPAT) 215 shrl->shr->s_deny = F_COMPAT; 216 shrl->shr->s_sysid = shr->s_sysid; /* XXX ref cnt? */ 217 shrl->shr->s_pid = shr->s_pid; 218 shrl->shr->s_own_len = shr->s_own_len; 219 shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP); 220 bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len); 221 shrl->next = vp->v_shrlocks; 222 vp->v_shrlocks = shrl; 223 #ifdef DEBUG 224 if (share_debug) 225 print_shares(vp); 226 #endif 227 228 mutex_exit(&vp->v_lock); 229 230 return (0); 231 } 232 233 /* 234 * nlmid sysid pid 235 * ===== ===== === 236 * !=0 !=0 =0 in cluster; NLM lock 237 * !=0 =0 =0 in cluster; special case for NLM lock 238 * !=0 =0 !=0 in cluster; PXFS local lock 239 * !=0 !=0 !=0 cannot happen 240 * =0 !=0 =0 not in cluster; NLM lock 241 * =0 =0 !=0 not in cluster; local lock 242 * =0 =0 =0 cannot happen 243 * =0 !=0 !=0 cannot happen 244 */ 245 static int 246 is_match_for_del(struct shrlock *shr, struct shrlock *element) 247 { 248 int nlmid1, nlmid2; 249 int result = 0; 250 251 nlmid1 = GETNLMID(shr->s_sysid); 252 nlmid2 = GETNLMID(element->s_sysid); 253 254 if (nlmid1 != 0) { /* in a cluster */ 255 if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) { 256 /* 257 * Lock obtained through nlm server. Just need to 258 * compare whole sysids. pid will always = 0. 259 */ 260 result = shr->s_sysid == element->s_sysid; 261 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) { 262 /* 263 * This is a special case. The NLM server wishes to 264 * delete all share locks obtained through nlmid1. 265 */ 266 result = (nlmid1 == nlmid2); 267 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) { 268 /* 269 * Lock obtained locally through PXFS. Match nlmids 270 * and pids. 271 */ 272 result = (nlmid1 == nlmid2 && 273 shr->s_pid == element->s_pid); 274 } 275 } else { /* not in a cluster */ 276 result = ((shr->s_sysid == 0 && 277 shr->s_pid == element->s_pid) || 278 (shr->s_sysid != 0 && 279 shr->s_sysid == element->s_sysid)); 280 } 281 return (result); 282 } 283 284 /* 285 * Delete the given share reservation. Returns 0 if okay, EINVAL if the 286 * share could not be found. If the share reservation is an NBMAND share 287 * reservation, signal anyone waiting for the share to go away (e.g., 288 * blocking lock requests). 289 */ 290 291 int 292 del_share(struct vnode *vp, struct shrlock *shr) 293 { 294 struct shrlocklist *shrl; 295 struct shrlocklist **shrlp; 296 int found = 0; 297 int is_nbmand = 0; 298 299 mutex_enter(&vp->v_lock); 300 /* 301 * Delete the shares with the matching sysid and owner 302 * But if own_len == 0 and sysid == 0 delete all with matching pid 303 * But if own_len == 0 delete all with matching sysid. 304 */ 305 shrlp = &vp->v_shrlocks; 306 while (*shrlp) { 307 if ((shr->s_own_len == (*shrlp)->shr->s_own_len && 308 (bcmp(shr->s_owner, (*shrlp)->shr->s_owner, 309 shr->s_own_len) == 0)) || 310 311 (shr->s_own_len == 0 && 312 is_match_for_del(shr, (*shrlp)->shr))) { 313 314 shrl = *shrlp; 315 *shrlp = shrl->next; 316 317 if (shrl->shr->s_deny & F_MANDDNY) 318 is_nbmand = 1; 319 320 /* XXX deref sysid */ 321 kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len); 322 kmem_free(shrl->shr, sizeof (struct shrlock)); 323 kmem_free(shrl, sizeof (struct shrlocklist)); 324 found++; 325 continue; 326 } 327 shrlp = &(*shrlp)->next; 328 } 329 330 if (is_nbmand) 331 cv_broadcast(&vp->v_cv); 332 333 mutex_exit(&vp->v_lock); 334 return (found ? 0 : EINVAL); 335 } 336 337 /* 338 * Clean up all local share reservations that the given process has with 339 * the given file. 340 */ 341 void 342 cleanshares(struct vnode *vp, pid_t pid) 343 { 344 do_cleanshares(vp, pid, 0); 345 } 346 347 /* 348 * Cleanup all remote share reservations that 349 * were made by the given sysid on given vnode. 350 */ 351 void 352 cleanshares_by_sysid(struct vnode *vp, int32_t sysid) 353 { 354 if (sysid == 0) 355 return; 356 357 do_cleanshares(vp, 0, sysid); 358 } 359 360 /* 361 * Cleanup share reservations on given vnode made 362 * by the either given pid or sysid. 363 * If sysid is 0, remove all shares made by given pid, 364 * otherwise all shares made by the given sysid will 365 * be removed. 366 */ 367 static void 368 do_cleanshares(struct vnode *vp, pid_t pid, int32_t sysid) 369 { 370 struct shrlock shr; 371 372 if (vp->v_shrlocks == NULL) 373 return; 374 375 shr.s_access = 0; 376 shr.s_deny = 0; 377 shr.s_pid = pid; 378 shr.s_sysid = sysid; 379 shr.s_own_len = 0; 380 shr.s_owner = NULL; 381 382 (void) del_share(vp, &shr); 383 } 384 385 static int 386 is_match_for_has_remote(int32_t sysid1, int32_t sysid2) 387 { 388 int result = 0; 389 390 if (GETNLMID(sysid1) != 0) { /* in a cluster */ 391 if (GETSYSID(sysid1) != 0) { 392 /* 393 * Lock obtained through nlm server. Just need to 394 * compare whole sysids. 395 */ 396 result = (sysid1 == sysid2); 397 } else if (GETSYSID(sysid1) == 0) { 398 /* 399 * This is a special case. The NLM server identified 400 * by nlmid1 wishes to find out if it has obtained 401 * any share locks on the vnode. 402 */ 403 result = (GETNLMID(sysid1) == GETNLMID(sysid2)); 404 } 405 } else { /* not in a cluster */ 406 result = ((sysid1 != 0 && sysid1 == sysid2) || 407 (sysid1 == 0 && sysid2 != 0)); 408 } 409 return (result); 410 } 411 412 413 /* 414 * Determine whether there are any shares for the given vnode 415 * with a remote sysid. Returns zero if not, non-zero if there are. 416 * If sysid is non-zero then determine if this sysid has a share. 417 * 418 * Note that the return value from this function is potentially invalid 419 * once it has been returned. The caller is responsible for providing its 420 * own synchronization mechanism to ensure that the return value is useful. 421 */ 422 int 423 shr_has_remote_shares(vnode_t *vp, int32_t sysid) 424 { 425 struct shrlocklist *shrl; 426 int result = 0; 427 428 mutex_enter(&vp->v_lock); 429 shrl = vp->v_shrlocks; 430 while (shrl) { 431 if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) { 432 433 result = 1; 434 break; 435 } 436 shrl = shrl->next; 437 } 438 mutex_exit(&vp->v_lock); 439 return (result); 440 } 441 442 static int 443 isreadonly(struct vnode *vp) 444 { 445 return (vp->v_type != VCHR && vp->v_type != VBLK && 446 vp->v_type != VFIFO && vn_is_readonly(vp)); 447 } 448 449 #ifdef DEBUG 450 static void 451 print_shares(struct vnode *vp) 452 { 453 struct shrlocklist *shrl; 454 455 if (vp->v_shrlocks == NULL) { 456 printf("<NULL>\n"); 457 return; 458 } 459 460 shrl = vp->v_shrlocks; 461 while (shrl) { 462 print_share(shrl->shr); 463 shrl = shrl->next; 464 } 465 } 466 467 static void 468 print_share(struct shrlock *shr) 469 { 470 int i; 471 472 if (shr == NULL) { 473 printf("<NULL>\n"); 474 return; 475 } 476 477 printf(" access(%d): ", shr->s_access); 478 if (shr->s_access & F_RDACC) 479 printf("R"); 480 if (shr->s_access & F_WRACC) 481 printf("W"); 482 if ((shr->s_access & (F_RDACC|F_WRACC)) == 0) 483 printf("N"); 484 printf("\n"); 485 printf(" deny: "); 486 if (shr->s_deny & F_COMPAT) 487 printf("C"); 488 if (shr->s_deny & F_RDDNY) 489 printf("R"); 490 if (shr->s_deny & F_WRDNY) 491 printf("W"); 492 if (shr->s_deny == F_NODNY) 493 printf("N"); 494 printf("\n"); 495 printf(" sysid: %d\n", shr->s_sysid); 496 printf(" pid: %d\n", shr->s_pid); 497 printf(" owner: [%d]", shr->s_own_len); 498 printf("'"); 499 for (i = 0; i < shr->s_own_len; i++) 500 printf("%02x", (unsigned)shr->s_owner[i]); 501 printf("'\n"); 502 } 503 #endif 504 505 /* 506 * Return non-zero if the given I/O request conflicts with a registered 507 * share reservation. 508 * 509 * A process is identified by the tuple (sysid, pid). When the caller 510 * context is passed to nbl_share_conflict, the sysid and pid in the 511 * caller context are used. Otherwise the sysid is zero, and the pid is 512 * taken from the current process. 513 * 514 * Conflict Algorithm: 515 * 1. An op request of NBL_READ will fail if a different 516 * process has a mandatory share reservation with deny read. 517 * 518 * 2. An op request of NBL_WRITE will fail if a different 519 * process has a mandatory share reservation with deny write. 520 * 521 * 3. An op request of NBL_READWRITE will fail if a different 522 * process has a mandatory share reservation with deny read 523 * or deny write. 524 * 525 * 4. An op request of NBL_REMOVE will fail if there is 526 * a mandatory share reservation with an access of read, 527 * write, or remove. (Anything other than meta data access). 528 * 529 * 5. An op request of NBL_RENAME will fail if there is 530 * a mandatory share reservation with: 531 * a) access write or access remove 532 * or 533 * b) access read and deny remove 534 * 535 * Otherwise there is no conflict and the op request succeeds. 536 * 537 * This behavior is required for interoperability between 538 * the nfs server, cifs server, and local access. 539 * This behavior can result in non-posix semantics. 540 * 541 * When mandatory share reservations are enabled, a process 542 * should call nbl_share_conflict to determine if the 543 * desired operation would conflict with an existing share 544 * reservation. 545 * 546 * The call to nbl_share_conflict may be skipped if the 547 * process has an existing share reservation and the operation 548 * is being performed in the context of that existing share 549 * reservation. 550 */ 551 int 552 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct) 553 { 554 struct shrlocklist *shrl; 555 int conflict = 0; 556 pid_t pid; 557 int sysid; 558 559 ASSERT(nbl_in_crit(vp)); 560 561 if (ct == NULL) { 562 pid = curproc->p_pid; 563 sysid = 0; 564 } else { 565 pid = ct->cc_pid; 566 sysid = ct->cc_sysid; 567 } 568 569 mutex_enter(&vp->v_lock); 570 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 571 if (!(shrl->shr->s_deny & F_MANDDNY)) 572 continue; 573 /* 574 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to 575 * check if the share reservation being examined 576 * belongs to the current process. 577 * NBL_REMOVE and NBL_RENAME do not. 578 * This behavior is required by the conflict 579 * algorithm described above. 580 */ 581 switch (op) { 582 case NBL_READ: 583 if ((shrl->shr->s_deny & F_RDDNY) && 584 (shrl->shr->s_sysid != sysid || 585 shrl->shr->s_pid != pid)) 586 conflict = 1; 587 break; 588 case NBL_WRITE: 589 if ((shrl->shr->s_deny & F_WRDNY) && 590 (shrl->shr->s_sysid != sysid || 591 shrl->shr->s_pid != pid)) 592 conflict = 1; 593 break; 594 case NBL_READWRITE: 595 if ((shrl->shr->s_deny & F_RWDNY) && 596 (shrl->shr->s_sysid != sysid || 597 shrl->shr->s_pid != pid)) 598 conflict = 1; 599 break; 600 case NBL_REMOVE: 601 if (shrl->shr->s_access & (F_RWACC|F_RMACC)) 602 conflict = 1; 603 break; 604 case NBL_RENAME: 605 if (shrl->shr->s_access & (F_WRACC|F_RMACC)) 606 conflict = 1; 607 608 else if ((shrl->shr->s_access & F_RDACC) && 609 (shrl->shr->s_deny & F_RMDNY)) 610 conflict = 1; 611 break; 612 #ifdef DEBUG 613 default: 614 cmn_err(CE_PANIC, 615 "nbl_share_conflict: bogus op (%d)", 616 op); 617 break; 618 #endif 619 } 620 if (conflict) 621 break; 622 } 623 624 mutex_exit(&vp->v_lock); 625 return (conflict); 626 } 627 628 /* 629 * Determine if the given process has a NBMAND share reservation on the 630 * given vnode. Returns 1 if the process has such a share reservation, 631 * returns 0 otherwise. 632 */ 633 int 634 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid) 635 { 636 struct shrlocklist *shrl; 637 638 /* 639 * Any NBMAND share reservation on the vp for this process? 640 */ 641 mutex_enter(&vp->v_lock); 642 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 643 if (shrl->shr->s_sysid == 0 && 644 (shrl->shr->s_deny & F_MANDDNY) && 645 (shrl->shr->s_pid == pid)) { 646 mutex_exit(&vp->v_lock); 647 return (1); 648 } 649 } 650 mutex_exit(&vp->v_lock); 651 652 return (0); 653 }