1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/sysmacros.h>
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/fcntl.h>
  31 #include <sys/vfs.h>
  32 #include <sys/vnode.h>
  33 #include <sys/share.h>
  34 #include <sys/cmn_err.h>
  35 #include <sys/kmem.h>
  36 #include <sys/debug.h>
  37 #include <sys/t_lock.h>
  38 #include <sys/errno.h>
  39 #include <sys/nbmlock.h>
  40 
  41 int share_debug = 0;
  42 
  43 #ifdef DEBUG
  44 static void print_shares(struct vnode *);
  45 static void print_share(struct shrlock *);
  46 #endif
  47 
  48 static int isreadonly(struct vnode *);
  49 
  50 /*
  51  * Add the share reservation shr to vp.
  52  */
  53 int
  54 add_share(struct vnode *vp, struct shrlock *shr)
  55 {
  56         struct shrlocklist *shrl;
  57 
  58         /*
  59          * An access of zero is not legal, however some older clients
  60          * generate it anyways.  Allow the request only if it is
  61          * coming from a remote system.  Be generous in what you
  62          * accept and strict in what you send.
  63          */
  64         if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) {
  65                 return (EINVAL);
  66         }
  67 
  68         /*
  69          * Sanity check to make sure we have valid options.
  70          * There is known overlap but it doesn't hurt to be careful.
  71          */
  72         if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) {
  73                 return (EINVAL);
  74         }
  75         if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT|
  76             F_MANDDNY|F_RMDNY)) {
  77                 return (EINVAL);
  78         }
  79 
  80         mutex_enter(&vp->v_lock);
  81         for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
  82                 /*
  83                  * If the share owner matches previous request
  84                  * do special handling.
  85                  */
  86                 if ((shrl->shr->s_sysid == shr->s_sysid) &&
  87                     (shrl->shr->s_pid == shr->s_pid) &&
  88                     (shrl->shr->s_own_len == shr->s_own_len) &&
  89                     bcmp(shrl->shr->s_owner, shr->s_owner,
  90                     shr->s_own_len) == 0) {
  91 
  92                         /*
  93                          * If the existing request is F_COMPAT and
  94                          * is the first share then allow any F_COMPAT
  95                          * from the same process.  Trick:  If the existing
  96                          * F_COMPAT is write access then it must have
  97                          * the same owner as the first.
  98                          */
  99                         if ((shrl->shr->s_deny & F_COMPAT) &&
 100                             (shr->s_deny & F_COMPAT) &&
 101                             ((shrl->next == NULL) ||
 102                             (shrl->shr->s_access & F_WRACC)))
 103                                 break;
 104                 }
 105 
 106                 /*
 107                  * If a first share has been done in compatibility mode
 108                  * handle the special cases.
 109                  */
 110                 if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) {
 111 
 112                         if (!(shr->s_deny & F_COMPAT)) {
 113                                 /*
 114                                  * If not compat and want write access or
 115                                  * want to deny read or
 116                                  * write exists, fails
 117                                  */
 118                                 if ((shr->s_access & F_WRACC) ||
 119                                     (shr->s_deny & F_RDDNY) ||
 120                                     (shrl->shr->s_access & F_WRACC)) {
 121                                         mutex_exit(&vp->v_lock);
 122                                         return (EAGAIN);
 123                                 }
 124                                 /*
 125                                  * If read only file allow, this may allow
 126                                  * a deny write but that is meaningless on
 127                                  * a read only file.
 128                                  */
 129                                 if (isreadonly(vp))
 130                                         break;
 131                                 mutex_exit(&vp->v_lock);
 132                                 return (EAGAIN);
 133                         }
 134                         /*
 135                          * This is a compat request and read access
 136                          * and the first was also read access
 137                          * we always allow it, otherwise we reject because
 138                          * we have handled the only valid write case above.
 139                          */
 140                         if ((shr->s_access == F_RDACC) &&
 141                             (shrl->shr->s_access == F_RDACC))
 142                                 break;
 143                         mutex_exit(&vp->v_lock);
 144                         return (EAGAIN);
 145                 }
 146 
 147                 /*
 148                  * If we are trying to share in compatibility mode
 149                  * and the current share is compat (and not the first)
 150                  * we don't know enough.
 151                  */
 152                 if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT))
 153                         continue;
 154 
 155                 /*
 156                  * If this is a compat we check for what can't succeed.
 157                  */
 158                 if (shr->s_deny & F_COMPAT) {
 159                         /*
 160                          * If we want write access or
 161                          * if anyone is denying read or
 162                          * if anyone has write access we fail
 163                          */
 164                         if ((shr->s_access & F_WRACC) ||
 165                             (shrl->shr->s_deny & F_RDDNY) ||
 166                             (shrl->shr->s_access & F_WRACC)) {
 167                                 mutex_exit(&vp->v_lock);
 168                                 return (EAGAIN);
 169                         }
 170                         /*
 171                          * If the first was opened with only read access
 172                          * and is a read only file we allow.
 173                          */
 174                         if (shrl->next == NULL) {
 175                                 if ((shrl->shr->s_access == F_RDACC) &&
 176                                     isreadonly(vp)) {
 177                                         break;
 178                                 }
 179                                 mutex_exit(&vp->v_lock);
 180                                 return (EAGAIN);
 181                         }
 182                         /*
 183                          * We still can't determine our fate so continue
 184                          */
 185                         continue;
 186                 }
 187 
 188                 /*
 189                  * Simple bitwise test, if we are trying to access what
 190                  * someone else is denying or we are trying to deny
 191                  * what someone else is accessing we fail.
 192                  */
 193                 if ((shr->s_access & shrl->shr->s_deny) ||
 194                     (shr->s_deny & shrl->shr->s_access)) {
 195                         mutex_exit(&vp->v_lock);
 196                         return (EAGAIN);
 197                 }
 198         }
 199 
 200         shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP);
 201         shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP);
 202         shrl->shr->s_access = shr->s_access;
 203         shrl->shr->s_deny = shr->s_deny;
 204 
 205         /*
 206          * Make sure no other deny modes are also set with F_COMPAT
 207          */
 208         if (shrl->shr->s_deny & F_COMPAT)
 209                 shrl->shr->s_deny = F_COMPAT;
 210         shrl->shr->s_sysid = shr->s_sysid;             /* XXX ref cnt? */
 211         shrl->shr->s_pid = shr->s_pid;
 212         shrl->shr->s_own_len = shr->s_own_len;
 213         shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP);
 214         bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len);
 215         shrl->next = vp->v_shrlocks;
 216         vp->v_shrlocks = shrl;
 217 #ifdef DEBUG
 218         if (share_debug)
 219                 print_shares(vp);
 220 #endif
 221 
 222         mutex_exit(&vp->v_lock);
 223 
 224         return (0);
 225 }
 226 
 227 /*
 228  *      nlmid   sysid   pid
 229  *      =====   =====   ===
 230  *      !=0     !=0     =0      in cluster; NLM lock
 231  *      !=0     =0      =0      in cluster; special case for NLM lock
 232  *      !=0     =0      !=0     in cluster; PXFS local lock
 233  *      !=0     !=0     !=0     cannot happen
 234  *      =0      !=0     =0      not in cluster; NLM lock
 235  *      =0      =0      !=0     not in cluster; local lock
 236  *      =0      =0      =0      cannot happen
 237  *      =0      !=0     !=0     cannot happen
 238  */
 239 static int
 240 is_match_for_del(struct shrlock *shr, struct shrlock *element)
 241 {
 242         int nlmid1, nlmid2;
 243         int result = 0;
 244 
 245         nlmid1 = GETNLMID(shr->s_sysid);
 246         nlmid2 = GETNLMID(element->s_sysid);
 247 
 248         if (nlmid1 != 0) {              /* in a cluster */
 249                 if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) {
 250                         /*
 251                          * Lock obtained through nlm server.  Just need to
 252                          * compare whole sysids.  pid will always = 0.
 253                          */
 254                         result = shr->s_sysid == element->s_sysid;
 255                 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) {
 256                         /*
 257                          * This is a special case.  The NLM server wishes to
 258                          * delete all share locks obtained through nlmid1.
 259                          */
 260                         result = (nlmid1 == nlmid2);
 261                 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) {
 262                         /*
 263                          * Lock obtained locally through PXFS.  Match nlmids
 264                          * and pids.
 265                          */
 266                         result = (nlmid1 == nlmid2 &&
 267                             shr->s_pid == element->s_pid);
 268                 }
 269         } else {                        /* not in a cluster */
 270                 result = ((shr->s_sysid == 0 &&
 271                     shr->s_pid == element->s_pid) ||
 272                     (shr->s_sysid != 0 &&
 273                     shr->s_sysid == element->s_sysid));
 274         }
 275         return (result);
 276 }
 277 
 278 /*
 279  * Delete the given share reservation.  Returns 0 if okay, EINVAL if the
 280  * share could not be found.  If the share reservation is an NBMAND share
 281  * reservation, signal anyone waiting for the share to go away (e.g.,
 282  * blocking lock requests).
 283  */
 284 
 285 int
 286 del_share(struct vnode *vp, struct shrlock *shr)
 287 {
 288         struct shrlocklist *shrl;
 289         struct shrlocklist **shrlp;
 290         int found = 0;
 291         int is_nbmand = 0;
 292 
 293         mutex_enter(&vp->v_lock);
 294         /*
 295          * Delete the shares with the matching sysid and owner
 296          * But if own_len == 0 and sysid == 0 delete all with matching pid
 297          * But if own_len == 0 delete all with matching sysid.
 298          */
 299         shrlp = &vp->v_shrlocks;
 300         while (*shrlp) {
 301                 if ((shr->s_own_len == (*shrlp)->shr->s_own_len &&
 302                     (bcmp(shr->s_owner, (*shrlp)->shr->s_owner,
 303                     shr->s_own_len) == 0)) ||
 304 
 305                     (shr->s_own_len == 0 &&
 306                     is_match_for_del(shr, (*shrlp)->shr))) {
 307 
 308                         shrl = *shrlp;
 309                         *shrlp = shrl->next;
 310 
 311                         if (shrl->shr->s_deny & F_MANDDNY)
 312                                 is_nbmand = 1;
 313 
 314                         /* XXX deref sysid */
 315                         kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len);
 316                         kmem_free(shrl->shr, sizeof (struct shrlock));
 317                         kmem_free(shrl, sizeof (struct shrlocklist));
 318                         found++;
 319                         continue;
 320                 }
 321                 shrlp = &(*shrlp)->next;
 322         }
 323 
 324         if (is_nbmand)
 325                 cv_broadcast(&vp->v_cv);
 326 
 327         mutex_exit(&vp->v_lock);
 328         return (found ? 0 : EINVAL);
 329 }
 330 
 331 /*
 332  * Clean up all local share reservations that the given process has with
 333  * the given file.
 334  */
 335 void
 336 cleanshares(struct vnode *vp, pid_t pid)
 337 {
 338         struct shrlock shr;
 339 
 340         if (vp->v_shrlocks == NULL)
 341                 return;
 342 
 343         shr.s_access = 0;
 344         shr.s_deny = 0;
 345         shr.s_pid = pid;
 346         shr.s_sysid = 0;
 347         shr.s_own_len = 0;
 348         shr.s_owner = NULL;
 349 
 350         (void) del_share(vp, &shr);
 351 }
 352 
 353 static int
 354 is_match_for_has_remote(int32_t sysid1, int32_t sysid2)
 355 {
 356         int result = 0;
 357 
 358         if (GETNLMID(sysid1) != 0) { /* in a cluster */
 359                 if (GETSYSID(sysid1) != 0) {
 360                         /*
 361                          * Lock obtained through nlm server.  Just need to
 362                          * compare whole sysids.
 363                          */
 364                         result = (sysid1 == sysid2);
 365                 } else if (GETSYSID(sysid1) == 0) {
 366                         /*
 367                          * This is a special case.  The NLM server identified
 368                          * by nlmid1 wishes to find out if it has obtained
 369                          * any share locks on the vnode.
 370                          */
 371                         result = (GETNLMID(sysid1) == GETNLMID(sysid2));
 372                 }
 373         } else {                        /* not in a cluster */
 374                 result = ((sysid1 != 0 && sysid1 == sysid2) ||
 375                     (sysid1 == 0 && sysid2 != 0));
 376         }
 377         return (result);
 378 }
 379 
 380 
 381 /*
 382  * Determine whether there are any shares for the given vnode
 383  * with a remote sysid. Returns zero if not, non-zero if there are.
 384  * If sysid is non-zero then determine if this sysid has a share.
 385  *
 386  * Note that the return value from this function is potentially invalid
 387  * once it has been returned.  The caller is responsible for providing its
 388  * own synchronization mechanism to ensure that the return value is useful.
 389  */
 390 int
 391 shr_has_remote_shares(vnode_t *vp, int32_t sysid)
 392 {
 393         struct shrlocklist *shrl;
 394         int result = 0;
 395 
 396         mutex_enter(&vp->v_lock);
 397         shrl = vp->v_shrlocks;
 398         while (shrl) {
 399                 if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) {
 400 
 401                         result = 1;
 402                         break;
 403                 }
 404                 shrl = shrl->next;
 405         }
 406         mutex_exit(&vp->v_lock);
 407         return (result);
 408 }
 409 
 410 static int
 411 isreadonly(struct vnode *vp)
 412 {
 413         return (vp->v_type != VCHR && vp->v_type != VBLK &&
 414             vp->v_type != VFIFO && vn_is_readonly(vp));
 415 }
 416 
 417 #ifdef DEBUG
 418 static void
 419 print_shares(struct vnode *vp)
 420 {
 421         struct shrlocklist *shrl;
 422 
 423         if (vp->v_shrlocks == NULL) {
 424                 printf("<NULL>\n");
 425                 return;
 426         }
 427 
 428         shrl = vp->v_shrlocks;
 429         while (shrl) {
 430                 print_share(shrl->shr);
 431                 shrl = shrl->next;
 432         }
 433 }
 434 
 435 static void
 436 print_share(struct shrlock *shr)
 437 {
 438         int i;
 439 
 440         if (shr == NULL) {
 441                 printf("<NULL>\n");
 442                 return;
 443         }
 444 
 445         printf("    access(%d): ", shr->s_access);
 446         if (shr->s_access & F_RDACC)
 447                 printf("R");
 448         if (shr->s_access & F_WRACC)
 449                 printf("W");
 450         if ((shr->s_access & (F_RDACC|F_WRACC)) == 0)
 451                 printf("N");
 452         printf("\n");
 453         printf("    deny:       ");
 454         if (shr->s_deny & F_COMPAT)
 455                 printf("C");
 456         if (shr->s_deny & F_RDDNY)
 457                 printf("R");
 458         if (shr->s_deny & F_WRDNY)
 459                 printf("W");
 460         if (shr->s_deny == F_NODNY)
 461                 printf("N");
 462         printf("\n");
 463         printf("    sysid:      %d\n", shr->s_sysid);
 464         printf("    pid:        %d\n", shr->s_pid);
 465         printf("    owner:      [%d]", shr->s_own_len);
 466         printf("'");
 467         for (i = 0; i < shr->s_own_len; i++)
 468                 printf("%02x", (unsigned)shr->s_owner[i]);
 469         printf("'\n");
 470 }
 471 #endif
 472 
 473 /*
 474  * Return non-zero if the given I/O request conflicts with a registered
 475  * share reservation.
 476  *
 477  * A process is identified by the tuple (sysid, pid). When the caller
 478  * context is passed to nbl_share_conflict, the sysid and pid in the
 479  * caller context are used. Otherwise the sysid is zero, and the pid is
 480  * taken from the current process.
 481  *
 482  * Conflict Algorithm:
 483  *   1. An op request of NBL_READ will fail if a different
 484  *      process has a mandatory share reservation with deny read.
 485  *
 486  *   2. An op request of NBL_WRITE will fail if a different
 487  *      process has a mandatory share reservation with deny write.
 488  *
 489  *   3. An op request of NBL_READWRITE will fail if a different
 490  *      process has a mandatory share reservation with deny read
 491  *      or deny write.
 492  *
 493  *   4. An op request of NBL_REMOVE will fail if there is
 494  *      a mandatory share reservation with an access of read,
 495  *      write, or remove. (Anything other than meta data access).
 496  *
 497  *   5. An op request of NBL_RENAME will fail if there is
 498  *      a mandatory share reservation with:
 499  *        a) access write or access remove
 500  *      or
 501  *        b) access read and deny remove
 502  *
 503  *   Otherwise there is no conflict and the op request succeeds.
 504  *
 505  * This behavior is required for interoperability between
 506  * the nfs server, cifs server, and local access.
 507  * This behavior can result in non-posix semantics.
 508  *
 509  * When mandatory share reservations are enabled, a process
 510  * should call nbl_share_conflict to determine if the
 511  * desired operation would conflict with an existing share
 512  * reservation.
 513  *
 514  * The call to nbl_share_conflict may be skipped if the
 515  * process has an existing share reservation and the operation
 516  * is being performed in the context of that existing share
 517  * reservation.
 518  */
 519 int
 520 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct)
 521 {
 522         struct shrlocklist *shrl;
 523         int conflict = 0;
 524         pid_t pid;
 525         int sysid;
 526 
 527         ASSERT(nbl_in_crit(vp));
 528 
 529         if (ct == NULL) {
 530                 pid = curproc->p_pid;
 531                 sysid = 0;
 532         } else {
 533                 pid = ct->cc_pid;
 534                 sysid = ct->cc_sysid;
 535         }
 536 
 537         mutex_enter(&vp->v_lock);
 538         for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
 539                 if (!(shrl->shr->s_deny & F_MANDDNY))
 540                         continue;
 541                 /*
 542                  * NBL_READ, NBL_WRITE, and NBL_READWRITE need to
 543                  * check if the share reservation being examined
 544                  * belongs to the current process.
 545                  * NBL_REMOVE and NBL_RENAME do not.
 546                  * This behavior is required by the conflict
 547                  * algorithm described above.
 548                  */
 549                 switch (op) {
 550                 case NBL_READ:
 551                         if ((shrl->shr->s_deny & F_RDDNY) &&
 552                             (shrl->shr->s_sysid != sysid ||
 553                             shrl->shr->s_pid != pid))
 554                                 conflict = 1;
 555                         break;
 556                 case NBL_WRITE:
 557                         if ((shrl->shr->s_deny & F_WRDNY) &&
 558                             (shrl->shr->s_sysid != sysid ||
 559                             shrl->shr->s_pid != pid))
 560                                 conflict = 1;
 561                         break;
 562                 case NBL_READWRITE:
 563                         if ((shrl->shr->s_deny & F_RWDNY) &&
 564                             (shrl->shr->s_sysid != sysid ||
 565                             shrl->shr->s_pid != pid))
 566                                 conflict = 1;
 567                         break;
 568                 case NBL_REMOVE:
 569                         if (shrl->shr->s_access & (F_RWACC|F_RMACC))
 570                                 conflict = 1;
 571                         break;
 572                 case NBL_RENAME:
 573                         if (shrl->shr->s_access & (F_WRACC|F_RMACC))
 574                                 conflict = 1;
 575 
 576                         else if ((shrl->shr->s_access & F_RDACC) &&
 577                             (shrl->shr->s_deny & F_RMDNY))
 578                                 conflict = 1;
 579                         break;
 580 #ifdef DEBUG
 581                 default:
 582                         cmn_err(CE_PANIC,
 583                             "nbl_share_conflict: bogus op (%d)",
 584                             op);
 585                         break;
 586 #endif
 587                 }
 588                 if (conflict)
 589                         break;
 590         }
 591 
 592         mutex_exit(&vp->v_lock);
 593         return (conflict);
 594 }
 595 
 596 /*
 597  * Determine if the given process has a NBMAND share reservation on the
 598  * given vnode. Returns 1 if the process has such a share reservation,
 599  * returns 0 otherwise.
 600  */
 601 int
 602 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid)
 603 {
 604         struct shrlocklist *shrl;
 605 
 606         /*
 607          * Any NBMAND share reservation on the vp for this process?
 608          */
 609         mutex_enter(&vp->v_lock);
 610         for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
 611                 if (shrl->shr->s_sysid == 0 &&
 612                     (shrl->shr->s_deny & F_MANDDNY) &&
 613                     (shrl->shr->s_pid == pid)) {
 614                         mutex_exit(&vp->v_lock);
 615                         return (1);
 616                 }
 617         }
 618         mutex_exit(&vp->v_lock);
 619 
 620         return (0);
 621 }