1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/sysmacros.h>
  32 #include <sys/param.h>
  33 #include <sys/systm.h>
  34 #include <sys/fcntl.h>
  35 #include <sys/vfs.h>
  36 #include <sys/vnode.h>
  37 #include <sys/share.h>
  38 #include <sys/cmn_err.h>
  39 #include <sys/kmem.h>
  40 #include <sys/debug.h>
  41 #include <sys/t_lock.h>
  42 #include <sys/errno.h>
  43 #include <sys/nbmlock.h>
  44 
  45 int share_debug = 0;
  46 
  47 #ifdef DEBUG
  48 static void print_shares(struct vnode *);
  49 static void print_share(struct shrlock *);
  50 #endif
  51 
  52 static int isreadonly(struct vnode *);
  53 static void do_cleanshares(struct vnode *, pid_t, int32_t);
  54 
  55 
  56 /*
  57  * Add the share reservation shr to vp.
  58  */
  59 int
  60 add_share(struct vnode *vp, struct shrlock *shr)
  61 {
  62         struct shrlocklist *shrl;
  63 
  64         /*
  65          * An access of zero is not legal, however some older clients
  66          * generate it anyways.  Allow the request only if it is
  67          * coming from a remote system.  Be generous in what you
  68          * accept and strict in what you send.
  69          */
  70         if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) {
  71                 return (EINVAL);
  72         }
  73 
  74         /*
  75          * Sanity check to make sure we have valid options.
  76          * There is known overlap but it doesn't hurt to be careful.
  77          */
  78         if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) {
  79                 return (EINVAL);
  80         }
  81         if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT|
  82             F_MANDDNY|F_RMDNY)) {
  83                 return (EINVAL);
  84         }
  85 
  86         mutex_enter(&vp->v_lock);
  87         for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
  88                 /*
  89                  * If the share owner matches previous request
  90                  * do special handling.
  91                  */
  92                 if ((shrl->shr->s_sysid == shr->s_sysid) &&
  93                     (shrl->shr->s_pid == shr->s_pid) &&
  94                     (shrl->shr->s_own_len == shr->s_own_len) &&
  95                     bcmp(shrl->shr->s_owner, shr->s_owner,
  96                     shr->s_own_len) == 0) {
  97 
  98                         /*
  99                          * If the existing request is F_COMPAT and
 100                          * is the first share then allow any F_COMPAT
 101                          * from the same process.  Trick:  If the existing
 102                          * F_COMPAT is write access then it must have
 103                          * the same owner as the first.
 104                          */
 105                         if ((shrl->shr->s_deny & F_COMPAT) &&
 106                             (shr->s_deny & F_COMPAT) &&
 107                             ((shrl->next == NULL) ||
 108                             (shrl->shr->s_access & F_WRACC)))
 109                                 break;
 110                 }
 111 
 112                 /*
 113                  * If a first share has been done in compatibility mode
 114                  * handle the special cases.
 115                  */
 116                 if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) {
 117 
 118                         if (!(shr->s_deny & F_COMPAT)) {
 119                                 /*
 120                                  * If not compat and want write access or
 121                                  * want to deny read or
 122                                  * write exists, fails
 123                                  */
 124                                 if ((shr->s_access & F_WRACC) ||
 125                                     (shr->s_deny & F_RDDNY) ||
 126                                     (shrl->shr->s_access & F_WRACC)) {
 127                                         mutex_exit(&vp->v_lock);
 128                                         return (EAGAIN);
 129                                 }
 130                                 /*
 131                                  * If read only file allow, this may allow
 132                                  * a deny write but that is meaningless on
 133                                  * a read only file.
 134                                  */
 135                                 if (isreadonly(vp))
 136                                         break;
 137                                 mutex_exit(&vp->v_lock);
 138                                 return (EAGAIN);
 139                         }
 140                         /*
 141                          * This is a compat request and read access
 142                          * and the first was also read access
 143                          * we always allow it, otherwise we reject because
 144                          * we have handled the only valid write case above.
 145                          */
 146                         if ((shr->s_access == F_RDACC) &&
 147                             (shrl->shr->s_access == F_RDACC))
 148                                 break;
 149                         mutex_exit(&vp->v_lock);
 150                         return (EAGAIN);
 151                 }
 152 
 153                 /*
 154                  * If we are trying to share in compatibility mode
 155                  * and the current share is compat (and not the first)
 156                  * we don't know enough.
 157                  */
 158                 if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT))
 159                         continue;
 160 
 161                 /*
 162                  * If this is a compat we check for what can't succeed.
 163                  */
 164                 if (shr->s_deny & F_COMPAT) {
 165                         /*
 166                          * If we want write access or
 167                          * if anyone is denying read or
 168                          * if anyone has write access we fail
 169                          */
 170                         if ((shr->s_access & F_WRACC) ||
 171                             (shrl->shr->s_deny & F_RDDNY) ||
 172                             (shrl->shr->s_access & F_WRACC)) {
 173                                 mutex_exit(&vp->v_lock);
 174                                 return (EAGAIN);
 175                         }
 176                         /*
 177                          * If the first was opened with only read access
 178                          * and is a read only file we allow.
 179                          */
 180                         if (shrl->next == NULL) {
 181                                 if ((shrl->shr->s_access == F_RDACC) &&
 182                                     isreadonly(vp)) {
 183                                         break;
 184                                 }
 185                                 mutex_exit(&vp->v_lock);
 186                                 return (EAGAIN);
 187                         }
 188                         /*
 189                          * We still can't determine our fate so continue
 190                          */
 191                         continue;
 192                 }
 193 
 194                 /*
 195                  * Simple bitwise test, if we are trying to access what
 196                  * someone else is denying or we are trying to deny
 197                  * what someone else is accessing we fail.
 198                  */
 199                 if ((shr->s_access & shrl->shr->s_deny) ||
 200                     (shr->s_deny & shrl->shr->s_access)) {
 201                         mutex_exit(&vp->v_lock);
 202                         return (EAGAIN);
 203                 }
 204         }
 205 
 206         shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP);
 207         shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP);
 208         shrl->shr->s_access = shr->s_access;
 209         shrl->shr->s_deny = shr->s_deny;
 210 
 211         /*
 212          * Make sure no other deny modes are also set with F_COMPAT
 213          */
 214         if (shrl->shr->s_deny & F_COMPAT)
 215                 shrl->shr->s_deny = F_COMPAT;
 216         shrl->shr->s_sysid = shr->s_sysid;             /* XXX ref cnt? */
 217         shrl->shr->s_pid = shr->s_pid;
 218         shrl->shr->s_own_len = shr->s_own_len;
 219         shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP);
 220         bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len);
 221         shrl->next = vp->v_shrlocks;
 222         vp->v_shrlocks = shrl;
 223 #ifdef DEBUG
 224         if (share_debug)
 225                 print_shares(vp);
 226 #endif
 227 
 228         mutex_exit(&vp->v_lock);
 229 
 230         return (0);
 231 }
 232 
 233 /*
 234  *      nlmid   sysid   pid
 235  *      =====   =====   ===
 236  *      !=0     !=0     =0      in cluster; NLM lock
 237  *      !=0     =0      =0      in cluster; special case for NLM lock
 238  *      !=0     =0      !=0     in cluster; PXFS local lock
 239  *      !=0     !=0     !=0     cannot happen
 240  *      =0      !=0     =0      not in cluster; NLM lock
 241  *      =0      =0      !=0     not in cluster; local lock
 242  *      =0      =0      =0      cannot happen
 243  *      =0      !=0     !=0     cannot happen
 244  */
 245 static int
 246 is_match_for_del(struct shrlock *shr, struct shrlock *element)
 247 {
 248         int nlmid1, nlmid2;
 249         int result = 0;
 250 
 251         nlmid1 = GETNLMID(shr->s_sysid);
 252         nlmid2 = GETNLMID(element->s_sysid);
 253 
 254         if (nlmid1 != 0) {              /* in a cluster */
 255                 if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) {
 256                         /*
 257                          * Lock obtained through nlm server.  Just need to
 258                          * compare whole sysids.  pid will always = 0.
 259                          */
 260                         result = shr->s_sysid == element->s_sysid;
 261                 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) {
 262                         /*
 263                          * This is a special case.  The NLM server wishes to
 264                          * delete all share locks obtained through nlmid1.
 265                          */
 266                         result = (nlmid1 == nlmid2);
 267                 } else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) {
 268                         /*
 269                          * Lock obtained locally through PXFS.  Match nlmids
 270                          * and pids.
 271                          */
 272                         result = (nlmid1 == nlmid2 &&
 273                             shr->s_pid == element->s_pid);
 274                 }
 275         } else {                        /* not in a cluster */
 276                 result = ((shr->s_sysid == 0 &&
 277                     shr->s_pid == element->s_pid) ||
 278                     (shr->s_sysid != 0 &&
 279                     shr->s_sysid == element->s_sysid));
 280         }
 281         return (result);
 282 }
 283 
 284 /*
 285  * Delete the given share reservation.  Returns 0 if okay, EINVAL if the
 286  * share could not be found.  If the share reservation is an NBMAND share
 287  * reservation, signal anyone waiting for the share to go away (e.g.,
 288  * blocking lock requests).
 289  */
 290 
 291 int
 292 del_share(struct vnode *vp, struct shrlock *shr)
 293 {
 294         struct shrlocklist *shrl;
 295         struct shrlocklist **shrlp;
 296         int found = 0;
 297         int is_nbmand = 0;
 298 
 299         mutex_enter(&vp->v_lock);
 300         /*
 301          * Delete the shares with the matching sysid and owner
 302          * But if own_len == 0 and sysid == 0 delete all with matching pid
 303          * But if own_len == 0 delete all with matching sysid.
 304          */
 305         shrlp = &vp->v_shrlocks;
 306         while (*shrlp) {
 307                 if ((shr->s_own_len == (*shrlp)->shr->s_own_len &&
 308                     (bcmp(shr->s_owner, (*shrlp)->shr->s_owner,
 309                     shr->s_own_len) == 0)) ||
 310 
 311                     (shr->s_own_len == 0 &&
 312                     is_match_for_del(shr, (*shrlp)->shr))) {
 313 
 314                         shrl = *shrlp;
 315                         *shrlp = shrl->next;
 316 
 317                         if (shrl->shr->s_deny & F_MANDDNY)
 318                                 is_nbmand = 1;
 319 
 320                         /* XXX deref sysid */
 321                         kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len);
 322                         kmem_free(shrl->shr, sizeof (struct shrlock));
 323                         kmem_free(shrl, sizeof (struct shrlocklist));
 324                         found++;
 325                         continue;
 326                 }
 327                 shrlp = &(*shrlp)->next;
 328         }
 329 
 330         if (is_nbmand)
 331                 cv_broadcast(&vp->v_cv);
 332 
 333         mutex_exit(&vp->v_lock);
 334         return (found ? 0 : EINVAL);
 335 }
 336 
 337 /*
 338  * Clean up all local share reservations that the given process has with
 339  * the given file.
 340  */
 341 void
 342 cleanshares(struct vnode *vp, pid_t pid)
 343 {
 344         do_cleanshares(vp, pid, 0);
 345 }
 346 
 347 /*
 348  * Cleanup all remote share reservations that
 349  * were made by the given sysid on given vnode.
 350  */
 351 void
 352 cleanshares_by_sysid(struct vnode *vp, int32_t sysid)
 353 {
 354         if (sysid == 0)
 355                 return;
 356 
 357         do_cleanshares(vp, 0, sysid);
 358 }
 359 
 360 /*
 361  * Cleanup share reservations on given vnode made
 362  * by the either given pid or sysid.
 363  * If sysid is 0, remove all shares made by given pid,
 364  * otherwise all shares made by the given sysid will
 365  * be removed.
 366  */
 367 static void
 368 do_cleanshares(struct vnode *vp, pid_t pid, int32_t sysid)
 369 {
 370         struct shrlock shr;
 371 
 372         if (vp->v_shrlocks == NULL)
 373                 return;
 374 
 375         shr.s_access = 0;
 376         shr.s_deny = 0;
 377         shr.s_pid = pid;
 378         shr.s_sysid = sysid;
 379         shr.s_own_len = 0;
 380         shr.s_owner = NULL;
 381 
 382         (void) del_share(vp, &shr);
 383 }
 384 
 385 static int
 386 is_match_for_has_remote(int32_t sysid1, int32_t sysid2)
 387 {
 388         int result = 0;
 389 
 390         if (GETNLMID(sysid1) != 0) { /* in a cluster */
 391                 if (GETSYSID(sysid1) != 0) {
 392                         /*
 393                          * Lock obtained through nlm server.  Just need to
 394                          * compare whole sysids.
 395                          */
 396                         result = (sysid1 == sysid2);
 397                 } else if (GETSYSID(sysid1) == 0) {
 398                         /*
 399                          * This is a special case.  The NLM server identified
 400                          * by nlmid1 wishes to find out if it has obtained
 401                          * any share locks on the vnode.
 402                          */
 403                         result = (GETNLMID(sysid1) == GETNLMID(sysid2));
 404                 }
 405         } else {                        /* not in a cluster */
 406                 result = ((sysid1 != 0 && sysid1 == sysid2) ||
 407                     (sysid1 == 0 && sysid2 != 0));
 408         }
 409         return (result);
 410 }
 411 
 412 
 413 /*
 414  * Determine whether there are any shares for the given vnode
 415  * with a remote sysid. Returns zero if not, non-zero if there are.
 416  * If sysid is non-zero then determine if this sysid has a share.
 417  *
 418  * Note that the return value from this function is potentially invalid
 419  * once it has been returned.  The caller is responsible for providing its
 420  * own synchronization mechanism to ensure that the return value is useful.
 421  */
 422 int
 423 shr_has_remote_shares(vnode_t *vp, int32_t sysid)
 424 {
 425         struct shrlocklist *shrl;
 426         int result = 0;
 427 
 428         mutex_enter(&vp->v_lock);
 429         shrl = vp->v_shrlocks;
 430         while (shrl) {
 431                 if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) {
 432 
 433                         result = 1;
 434                         break;
 435                 }
 436                 shrl = shrl->next;
 437         }
 438         mutex_exit(&vp->v_lock);
 439         return (result);
 440 }
 441 
 442 static int
 443 isreadonly(struct vnode *vp)
 444 {
 445         return (vp->v_type != VCHR && vp->v_type != VBLK &&
 446             vp->v_type != VFIFO && vn_is_readonly(vp));
 447 }
 448 
 449 #ifdef DEBUG
 450 static void
 451 print_shares(struct vnode *vp)
 452 {
 453         struct shrlocklist *shrl;
 454 
 455         if (vp->v_shrlocks == NULL) {
 456                 printf("<NULL>\n");
 457                 return;
 458         }
 459 
 460         shrl = vp->v_shrlocks;
 461         while (shrl) {
 462                 print_share(shrl->shr);
 463                 shrl = shrl->next;
 464         }
 465 }
 466 
 467 static void
 468 print_share(struct shrlock *shr)
 469 {
 470         int i;
 471 
 472         if (shr == NULL) {
 473                 printf("<NULL>\n");
 474                 return;
 475         }
 476 
 477         printf("    access(%d): ", shr->s_access);
 478         if (shr->s_access & F_RDACC)
 479                 printf("R");
 480         if (shr->s_access & F_WRACC)
 481                 printf("W");
 482         if ((shr->s_access & (F_RDACC|F_WRACC)) == 0)
 483                 printf("N");
 484         printf("\n");
 485         printf("    deny:       ");
 486         if (shr->s_deny & F_COMPAT)
 487                 printf("C");
 488         if (shr->s_deny & F_RDDNY)
 489                 printf("R");
 490         if (shr->s_deny & F_WRDNY)
 491                 printf("W");
 492         if (shr->s_deny == F_NODNY)
 493                 printf("N");
 494         printf("\n");
 495         printf("    sysid:      %d\n", shr->s_sysid);
 496         printf("    pid:        %d\n", shr->s_pid);
 497         printf("    owner:      [%d]", shr->s_own_len);
 498         printf("'");
 499         for (i = 0; i < shr->s_own_len; i++)
 500                 printf("%02x", (unsigned)shr->s_owner[i]);
 501         printf("'\n");
 502 }
 503 #endif
 504 
 505 /*
 506  * Return non-zero if the given I/O request conflicts with a registered
 507  * share reservation.
 508  *
 509  * A process is identified by the tuple (sysid, pid). When the caller
 510  * context is passed to nbl_share_conflict, the sysid and pid in the
 511  * caller context are used. Otherwise the sysid is zero, and the pid is
 512  * taken from the current process.
 513  *
 514  * Conflict Algorithm:
 515  *   1. An op request of NBL_READ will fail if a different
 516  *      process has a mandatory share reservation with deny read.
 517  *
 518  *   2. An op request of NBL_WRITE will fail if a different
 519  *      process has a mandatory share reservation with deny write.
 520  *
 521  *   3. An op request of NBL_READWRITE will fail if a different
 522  *      process has a mandatory share reservation with deny read
 523  *      or deny write.
 524  *
 525  *   4. An op request of NBL_REMOVE will fail if there is
 526  *      a mandatory share reservation with an access of read,
 527  *      write, or remove. (Anything other than meta data access).
 528  *
 529  *   5. An op request of NBL_RENAME will fail if there is
 530  *      a mandatory share reservation with:
 531  *        a) access write or access remove
 532  *      or
 533  *        b) access read and deny remove
 534  *
 535  *   Otherwise there is no conflict and the op request succeeds.
 536  *
 537  * This behavior is required for interoperability between
 538  * the nfs server, cifs server, and local access.
 539  * This behavior can result in non-posix semantics.
 540  *
 541  * When mandatory share reservations are enabled, a process
 542  * should call nbl_share_conflict to determine if the
 543  * desired operation would conflict with an existing share
 544  * reservation.
 545  *
 546  * The call to nbl_share_conflict may be skipped if the
 547  * process has an existing share reservation and the operation
 548  * is being performed in the context of that existing share
 549  * reservation.
 550  */
 551 int
 552 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct)
 553 {
 554         struct shrlocklist *shrl;
 555         int conflict = 0;
 556         pid_t pid;
 557         int sysid;
 558 
 559         ASSERT(nbl_in_crit(vp));
 560 
 561         if (ct == NULL) {
 562                 pid = curproc->p_pid;
 563                 sysid = 0;
 564         } else {
 565                 pid = ct->cc_pid;
 566                 sysid = ct->cc_sysid;
 567         }
 568 
 569         mutex_enter(&vp->v_lock);
 570         for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
 571                 if (!(shrl->shr->s_deny & F_MANDDNY))
 572                         continue;
 573                 /*
 574                  * NBL_READ, NBL_WRITE, and NBL_READWRITE need to
 575                  * check if the share reservation being examined
 576                  * belongs to the current process.
 577                  * NBL_REMOVE and NBL_RENAME do not.
 578                  * This behavior is required by the conflict
 579                  * algorithm described above.
 580                  */
 581                 switch (op) {
 582                 case NBL_READ:
 583                         if ((shrl->shr->s_deny & F_RDDNY) &&
 584                             (shrl->shr->s_sysid != sysid ||
 585                             shrl->shr->s_pid != pid))
 586                                 conflict = 1;
 587                         break;
 588                 case NBL_WRITE:
 589                         if ((shrl->shr->s_deny & F_WRDNY) &&
 590                             (shrl->shr->s_sysid != sysid ||
 591                             shrl->shr->s_pid != pid))
 592                                 conflict = 1;
 593                         break;
 594                 case NBL_READWRITE:
 595                         if ((shrl->shr->s_deny & F_RWDNY) &&
 596                             (shrl->shr->s_sysid != sysid ||
 597                             shrl->shr->s_pid != pid))
 598                                 conflict = 1;
 599                         break;
 600                 case NBL_REMOVE:
 601                         if (shrl->shr->s_access & (F_RWACC|F_RMACC))
 602                                 conflict = 1;
 603                         break;
 604                 case NBL_RENAME:
 605                         if (shrl->shr->s_access & (F_WRACC|F_RMACC))
 606                                 conflict = 1;
 607 
 608                         else if ((shrl->shr->s_access & F_RDACC) &&
 609                             (shrl->shr->s_deny & F_RMDNY))
 610                                 conflict = 1;
 611                         break;
 612 #ifdef DEBUG
 613                 default:
 614                         cmn_err(CE_PANIC,
 615                             "nbl_share_conflict: bogus op (%d)",
 616                             op);
 617                         break;
 618 #endif
 619                 }
 620                 if (conflict)
 621                         break;
 622         }
 623 
 624         mutex_exit(&vp->v_lock);
 625         return (conflict);
 626 }
 627 
 628 /*
 629  * Determine if the given process has a NBMAND share reservation on the
 630  * given vnode. Returns 1 if the process has such a share reservation,
 631  * returns 0 otherwise.
 632  */
 633 int
 634 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid)
 635 {
 636         struct shrlocklist *shrl;
 637 
 638         /*
 639          * Any NBMAND share reservation on the vp for this process?
 640          */
 641         mutex_enter(&vp->v_lock);
 642         for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
 643                 if (shrl->shr->s_sysid == 0 &&
 644                     (shrl->shr->s_deny & F_MANDDNY) &&
 645                     (shrl->shr->s_pid == pid)) {
 646                         mutex_exit(&vp->v_lock);
 647                         return (1);
 648                 }
 649         }
 650         mutex_exit(&vp->v_lock);
 651 
 652         return (0);
 653 }