1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/sysmacros.h> 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/fcntl.h> 31 #include <sys/vfs.h> 32 #include <sys/vnode.h> 33 #include <sys/share.h> 34 #include <sys/cmn_err.h> 35 #include <sys/kmem.h> 36 #include <sys/debug.h> 37 #include <sys/t_lock.h> 38 #include <sys/errno.h> 39 #include <sys/nbmlock.h> 40 41 int share_debug = 0; 42 43 #ifdef DEBUG 44 static void print_shares(struct vnode *); 45 static void print_share(struct shrlock *); 46 #endif 47 48 static int isreadonly(struct vnode *); 49 50 /* 51 * Add the share reservation shr to vp. 52 */ 53 int 54 add_share(struct vnode *vp, struct shrlock *shr) 55 { 56 struct shrlocklist *shrl; 57 58 /* 59 * An access of zero is not legal, however some older clients 60 * generate it anyways. Allow the request only if it is 61 * coming from a remote system. Be generous in what you 62 * accept and strict in what you send. 63 */ 64 if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) { 65 return (EINVAL); 66 } 67 68 /* 69 * Sanity check to make sure we have valid options. 70 * There is known overlap but it doesn't hurt to be careful. 71 */ 72 if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) { 73 return (EINVAL); 74 } 75 if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT| 76 F_MANDDNY|F_RMDNY)) { 77 return (EINVAL); 78 } 79 80 mutex_enter(&vp->v_lock); 81 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 82 /* 83 * If the share owner matches previous request 84 * do special handling. 85 */ 86 if ((shrl->shr->s_sysid == shr->s_sysid) && 87 (shrl->shr->s_pid == shr->s_pid) && 88 (shrl->shr->s_own_len == shr->s_own_len) && 89 bcmp(shrl->shr->s_owner, shr->s_owner, 90 shr->s_own_len) == 0) { 91 92 /* 93 * If the existing request is F_COMPAT and 94 * is the first share then allow any F_COMPAT 95 * from the same process. Trick: If the existing 96 * F_COMPAT is write access then it must have 97 * the same owner as the first. 98 */ 99 if ((shrl->shr->s_deny & F_COMPAT) && 100 (shr->s_deny & F_COMPAT) && 101 ((shrl->next == NULL) || 102 (shrl->shr->s_access & F_WRACC))) 103 break; 104 } 105 106 /* 107 * If a first share has been done in compatibility mode 108 * handle the special cases. 109 */ 110 if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) { 111 112 if (!(shr->s_deny & F_COMPAT)) { 113 /* 114 * If not compat and want write access or 115 * want to deny read or 116 * write exists, fails 117 */ 118 if ((shr->s_access & F_WRACC) || 119 (shr->s_deny & F_RDDNY) || 120 (shrl->shr->s_access & F_WRACC)) { 121 mutex_exit(&vp->v_lock); 122 return (EAGAIN); 123 } 124 /* 125 * If read only file allow, this may allow 126 * a deny write but that is meaningless on 127 * a read only file. 128 */ 129 if (isreadonly(vp)) 130 break; 131 mutex_exit(&vp->v_lock); 132 return (EAGAIN); 133 } 134 /* 135 * This is a compat request and read access 136 * and the first was also read access 137 * we always allow it, otherwise we reject because 138 * we have handled the only valid write case above. 139 */ 140 if ((shr->s_access == F_RDACC) && 141 (shrl->shr->s_access == F_RDACC)) 142 break; 143 mutex_exit(&vp->v_lock); 144 return (EAGAIN); 145 } 146 147 /* 148 * If we are trying to share in compatibility mode 149 * and the current share is compat (and not the first) 150 * we don't know enough. 151 */ 152 if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT)) 153 continue; 154 155 /* 156 * If this is a compat we check for what can't succeed. 157 */ 158 if (shr->s_deny & F_COMPAT) { 159 /* 160 * If we want write access or 161 * if anyone is denying read or 162 * if anyone has write access we fail 163 */ 164 if ((shr->s_access & F_WRACC) || 165 (shrl->shr->s_deny & F_RDDNY) || 166 (shrl->shr->s_access & F_WRACC)) { 167 mutex_exit(&vp->v_lock); 168 return (EAGAIN); 169 } 170 /* 171 * If the first was opened with only read access 172 * and is a read only file we allow. 173 */ 174 if (shrl->next == NULL) { 175 if ((shrl->shr->s_access == F_RDACC) && 176 isreadonly(vp)) { 177 break; 178 } 179 mutex_exit(&vp->v_lock); 180 return (EAGAIN); 181 } 182 /* 183 * We still can't determine our fate so continue 184 */ 185 continue; 186 } 187 188 /* 189 * Simple bitwise test, if we are trying to access what 190 * someone else is denying or we are trying to deny 191 * what someone else is accessing we fail. 192 */ 193 if ((shr->s_access & shrl->shr->s_deny) || 194 (shr->s_deny & shrl->shr->s_access)) { 195 mutex_exit(&vp->v_lock); 196 return (EAGAIN); 197 } 198 } 199 200 shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP); 201 shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP); 202 shrl->shr->s_access = shr->s_access; 203 shrl->shr->s_deny = shr->s_deny; 204 205 /* 206 * Make sure no other deny modes are also set with F_COMPAT 207 */ 208 if (shrl->shr->s_deny & F_COMPAT) 209 shrl->shr->s_deny = F_COMPAT; 210 shrl->shr->s_sysid = shr->s_sysid; /* XXX ref cnt? */ 211 shrl->shr->s_pid = shr->s_pid; 212 shrl->shr->s_own_len = shr->s_own_len; 213 shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP); 214 bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len); 215 shrl->next = vp->v_shrlocks; 216 vp->v_shrlocks = shrl; 217 #ifdef DEBUG 218 if (share_debug) 219 print_shares(vp); 220 #endif 221 222 mutex_exit(&vp->v_lock); 223 224 return (0); 225 } 226 227 /* 228 * nlmid sysid pid 229 * ===== ===== === 230 * !=0 !=0 =0 in cluster; NLM lock 231 * !=0 =0 =0 in cluster; special case for NLM lock 232 * !=0 =0 !=0 in cluster; PXFS local lock 233 * !=0 !=0 !=0 cannot happen 234 * =0 !=0 =0 not in cluster; NLM lock 235 * =0 =0 !=0 not in cluster; local lock 236 * =0 =0 =0 cannot happen 237 * =0 !=0 !=0 cannot happen 238 */ 239 static int 240 is_match_for_del(struct shrlock *shr, struct shrlock *element) 241 { 242 int nlmid1, nlmid2; 243 int result = 0; 244 245 nlmid1 = GETNLMID(shr->s_sysid); 246 nlmid2 = GETNLMID(element->s_sysid); 247 248 if (nlmid1 != 0) { /* in a cluster */ 249 if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) { 250 /* 251 * Lock obtained through nlm server. Just need to 252 * compare whole sysids. pid will always = 0. 253 */ 254 result = shr->s_sysid == element->s_sysid; 255 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) { 256 /* 257 * This is a special case. The NLM server wishes to 258 * delete all share locks obtained through nlmid1. 259 */ 260 result = (nlmid1 == nlmid2); 261 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) { 262 /* 263 * Lock obtained locally through PXFS. Match nlmids 264 * and pids. 265 */ 266 result = (nlmid1 == nlmid2 && 267 shr->s_pid == element->s_pid); 268 } 269 } else { /* not in a cluster */ 270 result = ((shr->s_sysid == 0 && 271 shr->s_pid == element->s_pid) || 272 (shr->s_sysid != 0 && 273 shr->s_sysid == element->s_sysid)); 274 } 275 return (result); 276 } 277 278 /* 279 * Delete the given share reservation. Returns 0 if okay, EINVAL if the 280 * share could not be found. If the share reservation is an NBMAND share 281 * reservation, signal anyone waiting for the share to go away (e.g., 282 * blocking lock requests). 283 */ 284 285 int 286 del_share(struct vnode *vp, struct shrlock *shr) 287 { 288 struct shrlocklist *shrl; 289 struct shrlocklist **shrlp; 290 int found = 0; 291 int is_nbmand = 0; 292 293 mutex_enter(&vp->v_lock); 294 /* 295 * Delete the shares with the matching sysid and owner 296 * But if own_len == 0 and sysid == 0 delete all with matching pid 297 * But if own_len == 0 delete all with matching sysid. 298 */ 299 shrlp = &vp->v_shrlocks; 300 while (*shrlp) { 301 if ((shr->s_own_len == (*shrlp)->shr->s_own_len && 302 (bcmp(shr->s_owner, (*shrlp)->shr->s_owner, 303 shr->s_own_len) == 0)) || 304 305 (shr->s_own_len == 0 && 306 is_match_for_del(shr, (*shrlp)->shr))) { 307 308 shrl = *shrlp; 309 *shrlp = shrl->next; 310 311 if (shrl->shr->s_deny & F_MANDDNY) 312 is_nbmand = 1; 313 314 /* XXX deref sysid */ 315 kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len); 316 kmem_free(shrl->shr, sizeof (struct shrlock)); 317 kmem_free(shrl, sizeof (struct shrlocklist)); 318 found++; 319 continue; 320 } 321 shrlp = &(*shrlp)->next; 322 } 323 324 if (is_nbmand) 325 cv_broadcast(&vp->v_cv); 326 327 mutex_exit(&vp->v_lock); 328 return (found ? 0 : EINVAL); 329 } 330 331 /* 332 * Clean up all local share reservations that the given process has with 333 * the given file. 334 */ 335 void 336 cleanshares(struct vnode *vp, pid_t pid) 337 { 338 struct shrlock shr; 339 340 if (vp->v_shrlocks == NULL) 341 return; 342 343 shr.s_access = 0; 344 shr.s_deny = 0; 345 shr.s_pid = pid; 346 shr.s_sysid = 0; 347 shr.s_own_len = 0; 348 shr.s_owner = NULL; 349 350 (void) del_share(vp, &shr); 351 } 352 353 static int 354 is_match_for_has_remote(int32_t sysid1, int32_t sysid2) 355 { 356 int result = 0; 357 358 if (GETNLMID(sysid1) != 0) { /* in a cluster */ 359 if (GETSYSID(sysid1) != 0) { 360 /* 361 * Lock obtained through nlm server. Just need to 362 * compare whole sysids. 363 */ 364 result = (sysid1 == sysid2); 365 } else if (GETSYSID(sysid1) == 0) { 366 /* 367 * This is a special case. The NLM server identified 368 * by nlmid1 wishes to find out if it has obtained 369 * any share locks on the vnode. 370 */ 371 result = (GETNLMID(sysid1) == GETNLMID(sysid2)); 372 } 373 } else { /* not in a cluster */ 374 result = ((sysid1 != 0 && sysid1 == sysid2) || 375 (sysid1 == 0 && sysid2 != 0)); 376 } 377 return (result); 378 } 379 380 381 /* 382 * Determine whether there are any shares for the given vnode 383 * with a remote sysid. Returns zero if not, non-zero if there are. 384 * If sysid is non-zero then determine if this sysid has a share. 385 * 386 * Note that the return value from this function is potentially invalid 387 * once it has been returned. The caller is responsible for providing its 388 * own synchronization mechanism to ensure that the return value is useful. 389 */ 390 int 391 shr_has_remote_shares(vnode_t *vp, int32_t sysid) 392 { 393 struct shrlocklist *shrl; 394 int result = 0; 395 396 mutex_enter(&vp->v_lock); 397 shrl = vp->v_shrlocks; 398 while (shrl) { 399 if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) { 400 401 result = 1; 402 break; 403 } 404 shrl = shrl->next; 405 } 406 mutex_exit(&vp->v_lock); 407 return (result); 408 } 409 410 static int 411 isreadonly(struct vnode *vp) 412 { 413 return (vp->v_type != VCHR && vp->v_type != VBLK && 414 vp->v_type != VFIFO && vn_is_readonly(vp)); 415 } 416 417 #ifdef DEBUG 418 static void 419 print_shares(struct vnode *vp) 420 { 421 struct shrlocklist *shrl; 422 423 if (vp->v_shrlocks == NULL) { 424 printf("<NULL>\n"); 425 return; 426 } 427 428 shrl = vp->v_shrlocks; 429 while (shrl) { 430 print_share(shrl->shr); 431 shrl = shrl->next; 432 } 433 } 434 435 static void 436 print_share(struct shrlock *shr) 437 { 438 int i; 439 440 if (shr == NULL) { 441 printf("<NULL>\n"); 442 return; 443 } 444 445 printf(" access(%d): ", shr->s_access); 446 if (shr->s_access & F_RDACC) 447 printf("R"); 448 if (shr->s_access & F_WRACC) 449 printf("W"); 450 if ((shr->s_access & (F_RDACC|F_WRACC)) == 0) 451 printf("N"); 452 printf("\n"); 453 printf(" deny: "); 454 if (shr->s_deny & F_COMPAT) 455 printf("C"); 456 if (shr->s_deny & F_RDDNY) 457 printf("R"); 458 if (shr->s_deny & F_WRDNY) 459 printf("W"); 460 if (shr->s_deny == F_NODNY) 461 printf("N"); 462 printf("\n"); 463 printf(" sysid: %d\n", shr->s_sysid); 464 printf(" pid: %d\n", shr->s_pid); 465 printf(" owner: [%d]", shr->s_own_len); 466 printf("'"); 467 for (i = 0; i < shr->s_own_len; i++) 468 printf("%02x", (unsigned)shr->s_owner[i]); 469 printf("'\n"); 470 } 471 #endif 472 473 /* 474 * Return non-zero if the given I/O request conflicts with a registered 475 * share reservation. 476 * 477 * A process is identified by the tuple (sysid, pid). When the caller 478 * context is passed to nbl_share_conflict, the sysid and pid in the 479 * caller context are used. Otherwise the sysid is zero, and the pid is 480 * taken from the current process. 481 * 482 * Conflict Algorithm: 483 * 1. An op request of NBL_READ will fail if a different 484 * process has a mandatory share reservation with deny read. 485 * 486 * 2. An op request of NBL_WRITE will fail if a different 487 * process has a mandatory share reservation with deny write. 488 * 489 * 3. An op request of NBL_READWRITE will fail if a different 490 * process has a mandatory share reservation with deny read 491 * or deny write. 492 * 493 * 4. An op request of NBL_REMOVE will fail if there is 494 * a mandatory share reservation with an access of read, 495 * write, or remove. (Anything other than meta data access). 496 * 497 * 5. An op request of NBL_RENAME will fail if there is 498 * a mandatory share reservation with: 499 * a) access write or access remove 500 * or 501 * b) access read and deny remove 502 * 503 * Otherwise there is no conflict and the op request succeeds. 504 * 505 * This behavior is required for interoperability between 506 * the nfs server, cifs server, and local access. 507 * This behavior can result in non-posix semantics. 508 * 509 * When mandatory share reservations are enabled, a process 510 * should call nbl_share_conflict to determine if the 511 * desired operation would conflict with an existing share 512 * reservation. 513 * 514 * The call to nbl_share_conflict may be skipped if the 515 * process has an existing share reservation and the operation 516 * is being performed in the context of that existing share 517 * reservation. 518 */ 519 int 520 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct) 521 { 522 struct shrlocklist *shrl; 523 int conflict = 0; 524 pid_t pid; 525 int sysid; 526 527 ASSERT(nbl_in_crit(vp)); 528 529 if (ct == NULL) { 530 pid = curproc->p_pid; 531 sysid = 0; 532 } else { 533 pid = ct->cc_pid; 534 sysid = ct->cc_sysid; 535 } 536 537 mutex_enter(&vp->v_lock); 538 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 539 if (!(shrl->shr->s_deny & F_MANDDNY)) 540 continue; 541 /* 542 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to 543 * check if the share reservation being examined 544 * belongs to the current process. 545 * NBL_REMOVE and NBL_RENAME do not. 546 * This behavior is required by the conflict 547 * algorithm described above. 548 */ 549 switch (op) { 550 case NBL_READ: 551 if ((shrl->shr->s_deny & F_RDDNY) && 552 (shrl->shr->s_sysid != sysid || 553 shrl->shr->s_pid != pid)) 554 conflict = 1; 555 break; 556 case NBL_WRITE: 557 if ((shrl->shr->s_deny & F_WRDNY) && 558 (shrl->shr->s_sysid != sysid || 559 shrl->shr->s_pid != pid)) 560 conflict = 1; 561 break; 562 case NBL_READWRITE: 563 if ((shrl->shr->s_deny & F_RWDNY) && 564 (shrl->shr->s_sysid != sysid || 565 shrl->shr->s_pid != pid)) 566 conflict = 1; 567 break; 568 case NBL_REMOVE: 569 if (shrl->shr->s_access & (F_RWACC|F_RMACC)) 570 conflict = 1; 571 break; 572 case NBL_RENAME: 573 if (shrl->shr->s_access & (F_WRACC|F_RMACC)) 574 conflict = 1; 575 576 else if ((shrl->shr->s_access & F_RDACC) && 577 (shrl->shr->s_deny & F_RMDNY)) 578 conflict = 1; 579 break; 580 #ifdef DEBUG 581 default: 582 cmn_err(CE_PANIC, 583 "nbl_share_conflict: bogus op (%d)", 584 op); 585 break; 586 #endif 587 } 588 if (conflict) 589 break; 590 } 591 592 mutex_exit(&vp->v_lock); 593 return (conflict); 594 } 595 596 /* 597 * Determine if the given process has a NBMAND share reservation on the 598 * given vnode. Returns 1 if the process has such a share reservation, 599 * returns 0 otherwise. 600 */ 601 int 602 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid) 603 { 604 struct shrlocklist *shrl; 605 606 /* 607 * Any NBMAND share reservation on the vp for this process? 608 */ 609 mutex_enter(&vp->v_lock); 610 for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) { 611 if (shrl->shr->s_sysid == 0 && 612 (shrl->shr->s_deny & F_MANDDNY) && 613 (shrl->shr->s_pid == pid)) { 614 mutex_exit(&vp->v_lock); 615 return (1); 616 } 617 } 618 mutex_exit(&vp->v_lock); 619 620 return (0); 621 }