1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/param.h>
  26 #include <sys/isa_defs.h>
  27 #include <sys/types.h>
  28 #include <sys/sysmacros.h>
  29 #include <sys/cred.h>
  30 #include <sys/systm.h>
  31 #include <sys/errno.h>
  32 #include <sys/fcntl.h>
  33 #include <sys/pathname.h>
  34 #include <sys/stat.h>
  35 #include <sys/vfs.h>
  36 #include <sys/acl.h>
  37 #include <sys/file.h>
  38 #include <sys/sunddi.h>
  39 #include <sys/debug.h>
  40 #include <sys/cmn_err.h>
  41 #include <sys/vnode.h>
  42 #include <sys/mode.h>
  43 #include <sys/nvpair.h>
  44 #include <sys/attr.h>
  45 #include <sys/gfs.h>
  46 #include <sys/mutex.h>
  47 #include <fs/fs_subr.h>
  48 #include <sys/kidmap.h>
  49 
  50 typedef struct {
  51         gfs_file_t      xattr_gfs_private;
  52         xattr_view_t    xattr_view;
  53 } xattr_file_t;
  54 
  55 typedef struct {
  56         gfs_dir_t       xattr_gfs_private;
  57         vnode_t         *xattr_realvp;  /* Only used for VOP_REALVP */
  58 } xattr_dir_t;
  59 
  60 /*
  61  * xattr_realvp is only used for VOP_REALVP, this is so we don't
  62  * keep an unnecessary hold on the *real* xattr dir unless we have
  63  * no other choice.
  64  */
  65 
  66 /* ARGSUSED */
  67 static int
  68 xattr_file_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
  69 {
  70         xattr_file_t *np = (*vpp)->v_data;
  71 
  72         if ((np->xattr_view == XATTR_VIEW_READONLY) && (flags & FWRITE))
  73                 return (EACCES);
  74 
  75         return (0);
  76 }
  77 
  78 /* ARGSUSED */
  79 static int
  80 xattr_file_access(vnode_t *vp, int mode, int flags, cred_t *cr,
  81     caller_context_t *ct)
  82 {
  83         xattr_file_t *np = vp->v_data;
  84 
  85         if ((np->xattr_view == XATTR_VIEW_READONLY) && (mode & VWRITE))
  86                 return (EACCES);
  87 
  88         return (0);
  89 }
  90 
  91 /* ARGSUSED */
  92 static int
  93 xattr_file_close(vnode_t *vp, int flags, int count, offset_t off,
  94     cred_t *cr, caller_context_t *ct)
  95 {
  96         cleanlocks(vp, ddi_get_pid(), 0);
  97         cleanshares(vp, ddi_get_pid());
  98         return (0);
  99 }
 100 
 101 static int
 102 xattr_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
 103 {
 104         xattr_fid_t     *xfidp;
 105         vnode_t         *pvp, *savevp;
 106         int             error;
 107         uint16_t        orig_len;
 108 
 109         if (fidp->fid_len < XATTR_FIDSZ) {
 110                 fidp->fid_len = XATTR_FIDSZ;
 111                 return (ENOSPC);
 112         }
 113 
 114         savevp = pvp = gfs_file_parent(vp);
 115         mutex_enter(&savevp->v_lock);
 116         if (pvp->v_flag & V_XATTRDIR) {
 117                 pvp = gfs_file_parent(pvp);
 118         }
 119         mutex_exit(&savevp->v_lock);
 120 
 121         xfidp = (xattr_fid_t *)fidp;
 122         orig_len = fidp->fid_len;
 123         fidp->fid_len = sizeof (xfidp->parent_fid);
 124 
 125         error = VOP_FID(pvp, fidp, ct);
 126         if (error) {
 127                 fidp->fid_len = orig_len;
 128                 return (error);
 129         }
 130 
 131         xfidp->parent_len = fidp->fid_len;
 132         fidp->fid_len = XATTR_FIDSZ;
 133         xfidp->dir_offset = gfs_file_inode(vp);
 134 
 135         return (0);
 136 }
 137 
 138 /* ARGSUSED */
 139 static int
 140 xattr_fill_nvlist(vnode_t *vp, xattr_view_t xattr_view, nvlist_t *nvlp,
 141     cred_t *cr, caller_context_t *ct)
 142 {
 143         int error;
 144         f_attr_t attr;
 145         uint64_t fsid;
 146         xvattr_t xvattr;
 147         xoptattr_t *xoap;       /* Pointer to optional attributes */
 148         vnode_t *ppvp;
 149         const char *domain;
 150         uint32_t rid;
 151 
 152         xva_init(&xvattr);
 153 
 154         if ((xoap = xva_getxoptattr(&xvattr)) == NULL)
 155                 return (EINVAL);
 156 
 157         /*
 158          * For detecting ephemeral uid/gid
 159          */
 160         xvattr.xva_vattr.va_mask |= (AT_UID|AT_GID);
 161 
 162         /*
 163          * We need to access the real fs object.
 164          * vp points to a GFS file; ppvp points to the real object.
 165          */
 166         ppvp = gfs_file_parent(gfs_file_parent(vp));
 167 
 168         /*
 169          * Iterate through the attrs associated with this view
 170          */
 171 
 172         for (attr = 0; attr < F_ATTR_ALL; attr++) {
 173                 if (xattr_view != attr_to_xattr_view(attr)) {
 174                         continue;
 175                 }
 176 
 177                 switch (attr) {
 178                 case F_SYSTEM:
 179                         XVA_SET_REQ(&xvattr, XAT_SYSTEM);
 180                         break;
 181                 case F_READONLY:
 182                         XVA_SET_REQ(&xvattr, XAT_READONLY);
 183                         break;
 184                 case F_HIDDEN:
 185                         XVA_SET_REQ(&xvattr, XAT_HIDDEN);
 186                         break;
 187                 case F_ARCHIVE:
 188                         XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
 189                         break;
 190                 case F_IMMUTABLE:
 191                         XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
 192                         break;
 193                 case F_APPENDONLY:
 194                         XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
 195                         break;
 196                 case F_NOUNLINK:
 197                         XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
 198                         break;
 199                 case F_OPAQUE:
 200                         XVA_SET_REQ(&xvattr, XAT_OPAQUE);
 201                         break;
 202                 case F_NODUMP:
 203                         XVA_SET_REQ(&xvattr, XAT_NODUMP);
 204                         break;
 205                 case F_AV_QUARANTINED:
 206                         XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
 207                         break;
 208                 case F_AV_MODIFIED:
 209                         XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
 210                         break;
 211                 case F_AV_SCANSTAMP:
 212                         if (ppvp->v_type == VREG)
 213                                 XVA_SET_REQ(&xvattr, XAT_AV_SCANSTAMP);
 214                         break;
 215                 case F_CRTIME:
 216                         XVA_SET_REQ(&xvattr, XAT_CREATETIME);
 217                         break;
 218                 case F_FSID:
 219                         fsid = (((uint64_t)vp->v_vfsp->vfs_fsid.val[0] << 32) |
 220                             (uint64_t)(vp->v_vfsp->vfs_fsid.val[1] &
 221                             0xffffffff));
 222                         VERIFY(nvlist_add_uint64(nvlp, attr_to_name(attr),
 223                             fsid) == 0);
 224                         break;
 225                 case F_REPARSE:
 226                         XVA_SET_REQ(&xvattr, XAT_REPARSE);
 227                         break;
 228                 case F_GEN:
 229                         XVA_SET_REQ(&xvattr, XAT_GEN);
 230                         break;
 231                 case F_OFFLINE:
 232                         XVA_SET_REQ(&xvattr, XAT_OFFLINE);
 233                         break;
 234                 case F_SPARSE:
 235                         XVA_SET_REQ(&xvattr, XAT_SPARSE);
 236                         break;
 237                 default:
 238                         break;
 239                 }
 240         }
 241 
 242         error = VOP_GETATTR(ppvp, &xvattr.xva_vattr, 0, cr, ct);
 243         if (error)
 244                 return (error);
 245 
 246         /*
 247          * Process all the optional attributes together here.  Notice that
 248          * xoap was set when the optional attribute bits were set above.
 249          */
 250         if ((xvattr.xva_vattr.va_mask & AT_XVATTR) && xoap) {
 251                 if (XVA_ISSET_RTN(&xvattr, XAT_READONLY)) {
 252                         VERIFY(nvlist_add_boolean_value(nvlp,
 253                             attr_to_name(F_READONLY),
 254                             xoap->xoa_readonly) == 0);
 255                 }
 256                 if (XVA_ISSET_RTN(&xvattr, XAT_HIDDEN)) {
 257                         VERIFY(nvlist_add_boolean_value(nvlp,
 258                             attr_to_name(F_HIDDEN),
 259                             xoap->xoa_hidden) == 0);
 260                 }
 261                 if (XVA_ISSET_RTN(&xvattr, XAT_SYSTEM)) {
 262                         VERIFY(nvlist_add_boolean_value(nvlp,
 263                             attr_to_name(F_SYSTEM),
 264                             xoap->xoa_system) == 0);
 265                 }
 266                 if (XVA_ISSET_RTN(&xvattr, XAT_ARCHIVE)) {
 267                         VERIFY(nvlist_add_boolean_value(nvlp,
 268                             attr_to_name(F_ARCHIVE),
 269                             xoap->xoa_archive) == 0);
 270                 }
 271                 if (XVA_ISSET_RTN(&xvattr, XAT_IMMUTABLE)) {
 272                         VERIFY(nvlist_add_boolean_value(nvlp,
 273                             attr_to_name(F_IMMUTABLE),
 274                             xoap->xoa_immutable) == 0);
 275                 }
 276                 if (XVA_ISSET_RTN(&xvattr, XAT_NOUNLINK)) {
 277                         VERIFY(nvlist_add_boolean_value(nvlp,
 278                             attr_to_name(F_NOUNLINK),
 279                             xoap->xoa_nounlink) == 0);
 280                 }
 281                 if (XVA_ISSET_RTN(&xvattr, XAT_APPENDONLY)) {
 282                         VERIFY(nvlist_add_boolean_value(nvlp,
 283                             attr_to_name(F_APPENDONLY),
 284                             xoap->xoa_appendonly) == 0);
 285                 }
 286                 if (XVA_ISSET_RTN(&xvattr, XAT_NODUMP)) {
 287                         VERIFY(nvlist_add_boolean_value(nvlp,
 288                             attr_to_name(F_NODUMP),
 289                             xoap->xoa_nodump) == 0);
 290                 }
 291                 if (XVA_ISSET_RTN(&xvattr, XAT_OPAQUE)) {
 292                         VERIFY(nvlist_add_boolean_value(nvlp,
 293                             attr_to_name(F_OPAQUE),
 294                             xoap->xoa_opaque) == 0);
 295                 }
 296                 if (XVA_ISSET_RTN(&xvattr, XAT_AV_QUARANTINED)) {
 297                         VERIFY(nvlist_add_boolean_value(nvlp,
 298                             attr_to_name(F_AV_QUARANTINED),
 299                             xoap->xoa_av_quarantined) == 0);
 300                 }
 301                 if (XVA_ISSET_RTN(&xvattr, XAT_AV_MODIFIED)) {
 302                         VERIFY(nvlist_add_boolean_value(nvlp,
 303                             attr_to_name(F_AV_MODIFIED),
 304                             xoap->xoa_av_modified) == 0);
 305                 }
 306                 if (XVA_ISSET_RTN(&xvattr, XAT_AV_SCANSTAMP)) {
 307                         VERIFY(nvlist_add_uint8_array(nvlp,
 308                             attr_to_name(F_AV_SCANSTAMP),
 309                             xoap->xoa_av_scanstamp,
 310                             sizeof (xoap->xoa_av_scanstamp)) == 0);
 311                 }
 312                 if (XVA_ISSET_RTN(&xvattr, XAT_CREATETIME)) {
 313                         VERIFY(nvlist_add_uint64_array(nvlp,
 314                             attr_to_name(F_CRTIME),
 315                             (uint64_t *)&(xoap->xoa_createtime),
 316                             sizeof (xoap->xoa_createtime) /
 317                             sizeof (uint64_t)) == 0);
 318                 }
 319                 if (XVA_ISSET_RTN(&xvattr, XAT_REPARSE)) {
 320                         VERIFY(nvlist_add_boolean_value(nvlp,
 321                             attr_to_name(F_REPARSE),
 322                             xoap->xoa_reparse) == 0);
 323                 }
 324                 if (XVA_ISSET_RTN(&xvattr, XAT_GEN)) {
 325                         VERIFY(nvlist_add_uint64(nvlp,
 326                             attr_to_name(F_GEN),
 327                             xoap->xoa_generation) == 0);
 328                 }
 329                 if (XVA_ISSET_RTN(&xvattr, XAT_OFFLINE)) {
 330                         VERIFY(nvlist_add_boolean_value(nvlp,
 331                             attr_to_name(F_OFFLINE),
 332                             xoap->xoa_offline) == 0);
 333                 }
 334                 if (XVA_ISSET_RTN(&xvattr, XAT_SPARSE)) {
 335                         VERIFY(nvlist_add_boolean_value(nvlp,
 336                             attr_to_name(F_SPARSE),
 337                             xoap->xoa_sparse) == 0);
 338                 }
 339         }
 340         /*
 341          * Check for optional ownersid/groupsid
 342          */
 343 
 344         if (xvattr.xva_vattr.va_uid > MAXUID) {
 345                 nvlist_t *nvl_sid;
 346 
 347                 if (nvlist_alloc(&nvl_sid, NV_UNIQUE_NAME, KM_SLEEP))
 348                         return (ENOMEM);
 349 
 350                 if (kidmap_getsidbyuid(crgetzone(cr), xvattr.xva_vattr.va_uid,
 351                     &domain, &rid) == 0) {
 352                         VERIFY(nvlist_add_string(nvl_sid,
 353                             SID_DOMAIN, domain) == 0);
 354                         VERIFY(nvlist_add_uint32(nvl_sid, SID_RID, rid) == 0);
 355                         VERIFY(nvlist_add_nvlist(nvlp, attr_to_name(F_OWNERSID),
 356                             nvl_sid) == 0);
 357                 }
 358                 nvlist_free(nvl_sid);
 359         }
 360         if (xvattr.xva_vattr.va_gid > MAXUID) {
 361                 nvlist_t *nvl_sid;
 362 
 363                 if (nvlist_alloc(&nvl_sid, NV_UNIQUE_NAME, KM_SLEEP))
 364                         return (ENOMEM);
 365 
 366                 if (kidmap_getsidbygid(crgetzone(cr), xvattr.xva_vattr.va_gid,
 367                     &domain, &rid) == 0) {
 368                         VERIFY(nvlist_add_string(nvl_sid,
 369                             SID_DOMAIN, domain) == 0);
 370                         VERIFY(nvlist_add_uint32(nvl_sid, SID_RID, rid) == 0);
 371                         VERIFY(nvlist_add_nvlist(nvlp, attr_to_name(F_GROUPSID),
 372                             nvl_sid) == 0);
 373                 }
 374                 nvlist_free(nvl_sid);
 375         }
 376 
 377         return (0);
 378 }
 379 
 380 /*
 381  * The size of a sysattr file is the size of the nvlist that will be
 382  * returned by xattr_file_read().  A call to xattr_file_write() could
 383  * change the size of that nvlist.  That size is not stored persistently
 384  * so xattr_fill_nvlist() calls VOP_GETATTR so that it can be calculated.
 385  */
 386 static int
 387 xattr_file_size(vnode_t *vp, xattr_view_t xattr_view, size_t *size,
 388     cred_t *cr, caller_context_t *ct)
 389 {
 390         nvlist_t *nvl;
 391 
 392         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) {
 393                 return (ENOMEM);
 394         }
 395 
 396         if (xattr_fill_nvlist(vp, xattr_view, nvl, cr, ct)) {
 397                 nvlist_free(nvl);
 398                 return (EFAULT);
 399         }
 400 
 401         VERIFY(nvlist_size(nvl, size, NV_ENCODE_XDR) == 0);
 402         nvlist_free(nvl);
 403         return (0);
 404 }
 405 
 406 /* ARGSUSED */
 407 static int
 408 xattr_file_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
 409     caller_context_t *ct)
 410 {
 411         xattr_file_t *np = vp->v_data;
 412         timestruc_t now;
 413         size_t size;
 414         int error;
 415         vnode_t *pvp;
 416         vattr_t pvattr;
 417 
 418         vap->va_type = VREG;
 419         vap->va_mode = MAKEIMODE(vap->va_type,
 420             (np->xattr_view == XATTR_VIEW_READONLY ? 0444 : 0644));
 421         vap->va_nodeid = gfs_file_inode(vp);
 422         vap->va_nlink = 1;
 423         pvp = gfs_file_parent(vp);
 424         (void) memset(&pvattr, 0, sizeof (pvattr));
 425         pvattr.va_mask = AT_CTIME|AT_MTIME;
 426         error = VOP_GETATTR(pvp, &pvattr, flags, cr, ct);
 427         if (error) {
 428                 return (error);
 429         }
 430         vap->va_ctime = pvattr.va_ctime;
 431         vap->va_mtime = pvattr.va_mtime;
 432         gethrestime(&now);
 433         vap->va_atime = now;
 434         vap->va_uid = 0;
 435         vap->va_gid = 0;
 436         vap->va_rdev = 0;
 437         vap->va_blksize = DEV_BSIZE;
 438         vap->va_seq = 0;
 439         vap->va_fsid = vp->v_vfsp->vfs_dev;
 440         error = xattr_file_size(vp, np->xattr_view, &size, cr, ct);
 441         vap->va_size = size;
 442         vap->va_nblocks = howmany(vap->va_size, vap->va_blksize);
 443         return (error);
 444 }
 445 
 446 /* ARGSUSED */
 447 static int
 448 xattr_file_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 449     caller_context_t *ct)
 450 {
 451         xattr_file_t *np = vp->v_data;
 452         xattr_view_t xattr_view = np->xattr_view;
 453         char *buf;
 454         size_t filesize;
 455         nvlist_t *nvl;
 456         int error;
 457 
 458         /*
 459          * Validate file offset and fasttrack empty reads
 460          */
 461         if (uiop->uio_loffset < (offset_t)0)
 462                 return (EINVAL);
 463 
 464         if (uiop->uio_resid == 0)
 465                 return (0);
 466 
 467         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP))
 468                 return (ENOMEM);
 469 
 470         if (xattr_fill_nvlist(vp, xattr_view, nvl, cr, ct)) {
 471                 nvlist_free(nvl);
 472                 return (EFAULT);
 473         }
 474 
 475         VERIFY(nvlist_size(nvl, &filesize, NV_ENCODE_XDR) == 0);
 476 
 477         if (uiop->uio_loffset >= filesize) {
 478                 nvlist_free(nvl);
 479                 return (0);
 480         }
 481 
 482         buf = kmem_alloc(filesize, KM_SLEEP);
 483         VERIFY(nvlist_pack(nvl, &buf, &filesize, NV_ENCODE_XDR,
 484             KM_SLEEP) == 0);
 485 
 486         error = uiomove((caddr_t)buf, filesize, UIO_READ, uiop);
 487         kmem_free(buf, filesize);
 488         nvlist_free(nvl);
 489         return (error);
 490 }
 491 
 492 /* ARGSUSED */
 493 static int
 494 xattr_file_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 495     caller_context_t *ct)
 496 {
 497         int error = 0;
 498         char *buf;
 499         char *domain;
 500         uint32_t rid;
 501         ssize_t size = uiop->uio_resid;
 502         nvlist_t *nvp;
 503         nvpair_t *pair = NULL;
 504         vnode_t *ppvp;
 505         xvattr_t xvattr;
 506         xoptattr_t *xoap = NULL;        /* Pointer to optional attributes */
 507 
 508         if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0)
 509                 return (EINVAL);
 510 
 511         /*
 512          * Validate file offset and size.
 513          */
 514         if (uiop->uio_loffset < (offset_t)0)
 515                 return (EINVAL);
 516 
 517         if (size == 0)
 518                 return (EINVAL);
 519 
 520         xva_init(&xvattr);
 521 
 522         if ((xoap = xva_getxoptattr(&xvattr)) == NULL) {
 523                 return (EINVAL);
 524         }
 525 
 526         /*
 527          * Copy and unpack the nvlist
 528          */
 529         buf = kmem_alloc(size, KM_SLEEP);
 530         if (uiomove((caddr_t)buf, size, UIO_WRITE, uiop)) {
 531                 return (EFAULT);
 532         }
 533 
 534         if (nvlist_unpack(buf, size, &nvp, KM_SLEEP) != 0) {
 535                 kmem_free(buf, size);
 536                 uiop->uio_resid = size;
 537                 return (EINVAL);
 538         }
 539         kmem_free(buf, size);
 540 
 541         /*
 542          * Fasttrack empty writes (nvlist with no nvpairs)
 543          */
 544         if (nvlist_next_nvpair(nvp, NULL) == 0)
 545                 return (0);
 546 
 547         ppvp = gfs_file_parent(gfs_file_parent(vp));
 548 
 549         while (pair = nvlist_next_nvpair(nvp, pair)) {
 550                 data_type_t type;
 551                 f_attr_t attr;
 552                 boolean_t value;
 553                 uint64_t *time, *times;
 554                 uint_t elem, nelems;
 555                 nvlist_t *nvp_sid;
 556                 uint8_t *scanstamp;
 557 
 558                 /*
 559                  * Validate the name and type of each attribute.
 560                  * Log any unknown names and continue.  This will
 561                  * help if additional attributes are added later.
 562                  */
 563                 type = nvpair_type(pair);
 564                 if ((attr = name_to_attr(nvpair_name(pair))) == F_ATTR_INVAL) {
 565                         cmn_err(CE_WARN, "Unknown attribute %s",
 566                             nvpair_name(pair));
 567                         continue;
 568                 }
 569 
 570                 /*
 571                  * Verify nvlist type matches required type and view is OK
 572                  */
 573 
 574                 if (type != attr_to_data_type(attr) ||
 575                     (attr_to_xattr_view(attr) == XATTR_VIEW_READONLY)) {
 576                         nvlist_free(nvp);
 577                         return (EINVAL);
 578                 }
 579 
 580                 /*
 581                  * For OWNERSID/GROUPSID make sure the target
 582                  * file system support ephemeral ID's
 583                  */
 584                 if ((attr == F_OWNERSID || attr == F_GROUPSID) &&
 585                     (!(vp->v_vfsp->vfs_flag & VFS_XID))) {
 586                         nvlist_free(nvp);
 587                         return (EINVAL);
 588                 }
 589 
 590                 /*
 591                  * Retrieve data from nvpair
 592                  */
 593                 switch (type) {
 594                 case DATA_TYPE_BOOLEAN_VALUE:
 595                         if (nvpair_value_boolean_value(pair, &value)) {
 596                                 nvlist_free(nvp);
 597                                 return (EINVAL);
 598                         }
 599                         break;
 600                 case DATA_TYPE_UINT64_ARRAY:
 601                         if (nvpair_value_uint64_array(pair, &times, &nelems)) {
 602                                 nvlist_free(nvp);
 603                                 return (EINVAL);
 604                         }
 605                         break;
 606                 case DATA_TYPE_NVLIST:
 607                         if (nvpair_value_nvlist(pair, &nvp_sid)) {
 608                                 nvlist_free(nvp);
 609                                 return (EINVAL);
 610                         }
 611                         break;
 612                 case DATA_TYPE_UINT8_ARRAY:
 613                         if (nvpair_value_uint8_array(pair,
 614                             &scanstamp, &nelems)) {
 615                                 nvlist_free(nvp);
 616                                 return (EINVAL);
 617                         }
 618                         break;
 619                 default:
 620                         nvlist_free(nvp);
 621                         return (EINVAL);
 622                 }
 623 
 624                 switch (attr) {
 625                 /*
 626                  * If we have several similar optional attributes to
 627                  * process then we should do it all together here so that
 628                  * xoap and the requested bitmap can be set in one place.
 629                  */
 630                 case F_READONLY:
 631                         XVA_SET_REQ(&xvattr, XAT_READONLY);
 632                         xoap->xoa_readonly = value;
 633                         break;
 634                 case F_HIDDEN:
 635                         XVA_SET_REQ(&xvattr, XAT_HIDDEN);
 636                         xoap->xoa_hidden = value;
 637                         break;
 638                 case F_SYSTEM:
 639                         XVA_SET_REQ(&xvattr, XAT_SYSTEM);
 640                         xoap->xoa_system = value;
 641                         break;
 642                 case F_ARCHIVE:
 643                         XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
 644                         xoap->xoa_archive = value;
 645                         break;
 646                 case F_IMMUTABLE:
 647                         XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
 648                         xoap->xoa_immutable = value;
 649                         break;
 650                 case F_NOUNLINK:
 651                         XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
 652                         xoap->xoa_nounlink = value;
 653                         break;
 654                 case F_APPENDONLY:
 655                         XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
 656                         xoap->xoa_appendonly = value;
 657                         break;
 658                 case F_NODUMP:
 659                         XVA_SET_REQ(&xvattr, XAT_NODUMP);
 660                         xoap->xoa_nodump = value;
 661                         break;
 662                 case F_AV_QUARANTINED:
 663                         XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
 664                         xoap->xoa_av_quarantined = value;
 665                         break;
 666                 case F_AV_MODIFIED:
 667                         XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
 668                         xoap->xoa_av_modified = value;
 669                         break;
 670                 case F_CRTIME:
 671                         XVA_SET_REQ(&xvattr, XAT_CREATETIME);
 672                         time = (uint64_t *)&(xoap->xoa_createtime);
 673                         for (elem = 0; elem < nelems; elem++)
 674                                 *time++ = times[elem];
 675                         break;
 676                 case F_OWNERSID:
 677                 case F_GROUPSID:
 678                         if (nvlist_lookup_string(nvp_sid, SID_DOMAIN,
 679                             &domain) || nvlist_lookup_uint32(nvp_sid, SID_RID,
 680                             &rid)) {
 681                                 nvlist_free(nvp);
 682                                 return (EINVAL);
 683                         }
 684 
 685                         /*
 686                          * Now map domain+rid to ephemeral id's
 687                          *
 688                          * If mapping fails, then the uid/gid will
 689                          * be set to UID_NOBODY by Winchester.
 690                          */
 691 
 692                         if (attr == F_OWNERSID) {
 693                                 (void) kidmap_getuidbysid(crgetzone(cr), domain,
 694                                     rid, &xvattr.xva_vattr.va_uid);
 695                                 xvattr.xva_vattr.va_mask |= AT_UID;
 696                         } else {
 697                                 (void) kidmap_getgidbysid(crgetzone(cr), domain,
 698                                     rid, &xvattr.xva_vattr.va_gid);
 699                                 xvattr.xva_vattr.va_mask |= AT_GID;
 700                         }
 701                         break;
 702                 case F_AV_SCANSTAMP:
 703                         if (ppvp->v_type == VREG) {
 704                                 XVA_SET_REQ(&xvattr, XAT_AV_SCANSTAMP);
 705                                 (void) memcpy(xoap->xoa_av_scanstamp,
 706                                     scanstamp, nelems);
 707                         } else {
 708                                 nvlist_free(nvp);
 709                                 return (EINVAL);
 710                         }
 711                         break;
 712                 case F_REPARSE:
 713                         XVA_SET_REQ(&xvattr, XAT_REPARSE);
 714                         xoap->xoa_reparse = value;
 715                         break;
 716                 case F_OFFLINE:
 717                         XVA_SET_REQ(&xvattr, XAT_OFFLINE);
 718                         xoap->xoa_offline = value;
 719                         break;
 720                 case F_SPARSE:
 721                         XVA_SET_REQ(&xvattr, XAT_SPARSE);
 722                         xoap->xoa_sparse = value;
 723                         break;
 724                 default:
 725                         break;
 726                 }
 727         }
 728 
 729         ppvp = gfs_file_parent(gfs_file_parent(vp));
 730         error = VOP_SETATTR(ppvp, &xvattr.xva_vattr, 0, cr, ct);
 731         if (error)
 732                 uiop->uio_resid = size;
 733 
 734         nvlist_free(nvp);
 735         return (error);
 736 }
 737 
 738 static int
 739 xattr_file_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
 740     caller_context_t *ct)
 741 {
 742         switch (cmd) {
 743         case _PC_XATTR_EXISTS:
 744         case _PC_SATTR_ENABLED:
 745         case _PC_SATTR_EXISTS:
 746                 *valp = 0;
 747                 return (0);
 748         default:
 749                 return (fs_pathconf(vp, cmd, valp, cr, ct));
 750         }
 751 }
 752 
 753 vnodeops_t *xattr_file_ops;
 754 
 755 static const fs_operation_def_t xattr_file_tops[] = {
 756         { VOPNAME_OPEN,         { .vop_open = xattr_file_open }         },
 757         { VOPNAME_CLOSE,        { .vop_close = xattr_file_close }       },
 758         { VOPNAME_READ,         { .vop_read = xattr_file_read }         },
 759         { VOPNAME_WRITE,        { .vop_write = xattr_file_write }       },
 760         { VOPNAME_IOCTL,        { .error = fs_ioctl }                   },
 761         { VOPNAME_GETATTR,      { .vop_getattr = xattr_file_getattr }   },
 762         { VOPNAME_ACCESS,       { .vop_access = xattr_file_access }     },
 763         { VOPNAME_READDIR,      { .error = fs_notdir }                  },
 764         { VOPNAME_SEEK,         { .vop_seek = fs_seek }                 },
 765         { VOPNAME_INACTIVE,     { .vop_inactive = gfs_vop_inactive }    },
 766         { VOPNAME_FID,          { .vop_fid = xattr_common_fid }         },
 767         { VOPNAME_PATHCONF,     { .vop_pathconf = xattr_file_pathconf } },
 768         { VOPNAME_PUTPAGE,      { .error = fs_putpage }                 },
 769         { VOPNAME_FSYNC,        { .error = fs_fsync }                   },
 770         { NULL }
 771 };
 772 
 773 vnode_t *
 774 xattr_mkfile(vnode_t *pvp, xattr_view_t xattr_view)
 775 {
 776         vnode_t *vp;
 777         xattr_file_t *np;
 778 
 779         vp = gfs_file_create(sizeof (xattr_file_t), pvp, xattr_file_ops);
 780         np = vp->v_data;
 781         np->xattr_view = xattr_view;
 782         vp->v_flag |= V_SYSATTR;
 783         return (vp);
 784 }
 785 
 786 vnode_t *
 787 xattr_mkfile_ro(vnode_t *pvp)
 788 {
 789         return (xattr_mkfile(pvp, XATTR_VIEW_READONLY));
 790 }
 791 
 792 vnode_t *
 793 xattr_mkfile_rw(vnode_t *pvp)
 794 {
 795         return (xattr_mkfile(pvp, XATTR_VIEW_READWRITE));
 796 }
 797 
 798 vnodeops_t *xattr_dir_ops;
 799 
 800 static gfs_dirent_t xattr_dirents[] = {
 801         { VIEW_READONLY, xattr_mkfile_ro, GFS_CACHE_VNODE, },
 802         { VIEW_READWRITE, xattr_mkfile_rw, GFS_CACHE_VNODE, },
 803         { NULL },
 804 };
 805 
 806 #define XATTRDIR_NENTS  ((sizeof (xattr_dirents) / sizeof (gfs_dirent_t)) - 1)
 807 
 808 static int
 809 is_sattr_name(char *s)
 810 {
 811         int i;
 812 
 813         for (i = 0; i < XATTRDIR_NENTS; ++i) {
 814                 if (strcmp(s, xattr_dirents[i].gfse_name) == 0) {
 815                         return (1);
 816                 }
 817         }
 818         return (0);
 819 }
 820 
 821 /*
 822  * Given the name of an extended attribute file, determine if there is a
 823  * normalization conflict with a sysattr view name.
 824  */
 825 int
 826 xattr_sysattr_casechk(char *s)
 827 {
 828         int i;
 829 
 830         for (i = 0; i < XATTRDIR_NENTS; ++i) {
 831                 if (strcasecmp(s, xattr_dirents[i].gfse_name) == 0)
 832                         return (1);
 833         }
 834         return (0);
 835 }
 836 
 837 static int
 838 xattr_copy(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
 839     cred_t *cr, caller_context_t *ct)
 840 {
 841         xvattr_t xvattr;
 842         vnode_t *pdvp;
 843         int error;
 844 
 845         /*
 846          * Only copy system attrs if the views are the same
 847          */
 848         if (strcmp(snm, tnm) != 0)
 849                 return (EINVAL);
 850 
 851         xva_init(&xvattr);
 852 
 853         XVA_SET_REQ(&xvattr, XAT_SYSTEM);
 854         XVA_SET_REQ(&xvattr, XAT_READONLY);
 855         XVA_SET_REQ(&xvattr, XAT_HIDDEN);
 856         XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
 857         XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
 858         XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
 859         XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
 860         XVA_SET_REQ(&xvattr, XAT_NODUMP);
 861         XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
 862         XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
 863         XVA_SET_REQ(&xvattr, XAT_CREATETIME);
 864         XVA_SET_REQ(&xvattr, XAT_REPARSE);
 865         XVA_SET_REQ(&xvattr, XAT_OFFLINE);
 866         XVA_SET_REQ(&xvattr, XAT_SPARSE);
 867 
 868         pdvp = gfs_file_parent(sdvp);
 869         error = VOP_GETATTR(pdvp, &xvattr.xva_vattr, 0, cr, ct);
 870         if (error)
 871                 return (error);
 872 
 873         pdvp = gfs_file_parent(tdvp);
 874         error = VOP_SETATTR(pdvp, &xvattr.xva_vattr, 0, cr, ct);
 875         return (error);
 876 }
 877 
 878 static int
 879 xattr_dir_realdir(vnode_t *dvp, vnode_t **realdvp, int lookup_flags,
 880     cred_t *cr, caller_context_t *ct)
 881 {
 882         vnode_t *pvp;
 883         int error;
 884         struct pathname pn;
 885         char *startnm = "";
 886 
 887         *realdvp = NULL;
 888 
 889         pvp = gfs_file_parent(dvp);
 890 
 891         error = pn_get(startnm, UIO_SYSSPACE, &pn);
 892         if (error) {
 893                 VN_RELE(pvp);
 894                 return (error);
 895         }
 896 
 897         /*
 898          * Set the LOOKUP_HAVE_SYSATTR_DIR flag so that we don't get into an
 899          * infinite loop with fop_lookup calling back to xattr_dir_lookup.
 900          */
 901         lookup_flags |= LOOKUP_HAVE_SYSATTR_DIR;
 902         error = VOP_LOOKUP(pvp, startnm, realdvp, &pn, lookup_flags,
 903             rootvp, cr, ct, NULL, NULL);
 904         pn_free(&pn);
 905 
 906         return (error);
 907 }
 908 
 909 /* ARGSUSED */
 910 static int
 911 xattr_dir_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
 912 {
 913         if (flags & FWRITE) {
 914                 return (EACCES);
 915         }
 916 
 917         return (0);
 918 }
 919 
 920 /* ARGSUSED */
 921 static int
 922 xattr_dir_close(vnode_t *vpp, int flags, int count, offset_t off, cred_t *cr,
 923     caller_context_t *ct)
 924 {
 925         return (0);
 926 }
 927 
 928 /*
 929  * Retrieve the attributes on an xattr directory.  If there is a "real"
 930  * xattr directory, use that.  Otherwise, get the attributes (represented
 931  * by PARENT_ATTRMASK) from the "parent" node and fill in the rest.  Note
 932  * that VOP_GETATTR() could turn off bits in the va_mask.
 933  */
 934 
 935 #define PARENT_ATTRMASK (AT_UID|AT_GID|AT_RDEV|AT_CTIME|AT_MTIME)
 936 
 937 /* ARGSUSED */
 938 static int
 939 xattr_dir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
 940     caller_context_t *ct)
 941 {
 942         timestruc_t now;
 943         vnode_t *pvp;
 944         int error;
 945 
 946         error = xattr_dir_realdir(vp, &pvp, LOOKUP_XATTR, cr, ct);
 947         if (error == 0) {
 948                 error = VOP_GETATTR(pvp, vap, 0, cr, ct);
 949                 VN_RELE(pvp);
 950                 if (error) {
 951                         return (error);
 952                 }
 953                 vap->va_nlink += XATTRDIR_NENTS;
 954                 vap->va_size += XATTRDIR_NENTS;
 955                 return (0);
 956         }
 957 
 958         /*
 959          * There is no real xattr directory.  Cobble together
 960          * an entry using info from the parent object (if needed)
 961          * plus information common to all xattrs.
 962          */
 963         if (vap->va_mask & PARENT_ATTRMASK) {
 964                 vattr_t pvattr;
 965                 uint_t  off_bits;
 966 
 967                 pvp = gfs_file_parent(vp);
 968                 (void) memset(&pvattr, 0, sizeof (pvattr));
 969                 pvattr.va_mask = PARENT_ATTRMASK;
 970                 error = VOP_GETATTR(pvp, &pvattr, 0, cr, ct);
 971                 if (error) {
 972                         return (error);
 973                 }
 974 
 975                 /*
 976                  * VOP_GETATTR() might have turned off some bits in
 977                  * pvattr.va_mask.  This means that the underlying
 978                  * file system couldn't process those attributes.
 979                  * We need to make sure those bits get turned off
 980                  * in the vattr_t structure that gets passed back
 981                  * to the caller.  Figure out which bits were turned
 982                  * off (if any) then set pvattr.va_mask before it
 983                  * gets copied to the vattr_t that the caller sees.
 984                  */
 985                 off_bits = (pvattr.va_mask ^ PARENT_ATTRMASK) & PARENT_ATTRMASK;
 986                 pvattr.va_mask = vap->va_mask & ~off_bits;
 987                 *vap = pvattr;
 988         }
 989 
 990         vap->va_type = VDIR;
 991         vap->va_mode = MAKEIMODE(vap->va_type, S_ISVTX | 0777);
 992         vap->va_fsid = vp->v_vfsp->vfs_dev;
 993         vap->va_nodeid = gfs_file_inode(vp);
 994         vap->va_nlink = XATTRDIR_NENTS+2;
 995         vap->va_size = vap->va_nlink;
 996         gethrestime(&now);
 997         vap->va_atime = now;
 998         vap->va_blksize = 0;
 999         vap->va_nblocks = 0;
1000         vap->va_seq = 0;
1001         return (0);
1002 }
1003 
1004 static int
1005 xattr_dir_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1006     caller_context_t *ct)
1007 {
1008         vnode_t *realvp;
1009         int error;
1010 
1011         /*
1012          * If there is a real xattr directory, do the setattr there.
1013          * Otherwise, just return success.  The GFS directory is transient,
1014          * and any setattr changes can disappear anyway.
1015          */
1016         error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
1017         if (error == 0) {
1018                 error = VOP_SETATTR(realvp, vap, flags, cr, ct);
1019                 VN_RELE(realvp);
1020         }
1021         if (error == ENOENT) {
1022                 error = 0;
1023         }
1024         return (error);
1025 }
1026 
1027 /* ARGSUSED */
1028 static int
1029 xattr_dir_access(vnode_t *vp, int mode, int flags, cred_t *cr,
1030     caller_context_t *ct)
1031 {
1032         int error;
1033         vnode_t *realvp = NULL;
1034 
1035         if (mode & VWRITE) {
1036                 return (EACCES);
1037         }
1038 
1039         error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
1040 
1041         if (realvp)
1042                 VN_RELE(realvp);
1043 
1044         /*
1045          * No real xattr dir isn't an error
1046          * an error of EINVAL indicates attributes on attributes
1047          * are not supported.  In that case just allow access to the
1048          * transient directory.
1049          */
1050         return ((error == ENOENT || error == EINVAL) ? 0 : error);
1051 }
1052 
1053 static int
1054 xattr_dir_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl,
1055     int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
1056     vsecattr_t *vsecp)
1057 {
1058         vnode_t *pvp;
1059         int error;
1060 
1061         *vpp = NULL;
1062 
1063         /*
1064          * Don't allow creation of extended attributes with sysattr names.
1065          */
1066         if (is_sattr_name(name)) {
1067                 return (gfs_dir_lookup(dvp, name, vpp, cr, 0, NULL, NULL));
1068         }
1069 
1070         error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR|CREATE_XATTR_DIR,
1071             cr, ct);
1072         if (error == 0) {
1073                 error = VOP_CREATE(pvp, name, vap, excl, mode, vpp, cr, flag,
1074                     ct, vsecp);
1075                 VN_RELE(pvp);
1076         }
1077         return (error);
1078 }
1079 
1080 static int
1081 xattr_dir_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct,
1082     int flags)
1083 {
1084         vnode_t *pvp;
1085         int error;
1086 
1087         if (is_sattr_name(name)) {
1088                 return (EACCES);
1089         }
1090 
1091         error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR, cr, ct);
1092         if (error == 0) {
1093                 error = VOP_REMOVE(pvp, name, cr, ct, flags);
1094                 VN_RELE(pvp);
1095         }
1096         return (error);
1097 }
1098 
1099 static int
1100 xattr_dir_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
1101     caller_context_t *ct, int flags)
1102 {
1103         vnode_t *pvp;
1104         int error;
1105 
1106         if (svp->v_flag & V_SYSATTR) {
1107                 return (EINVAL);
1108         }
1109 
1110         error = xattr_dir_realdir(tdvp, &pvp, LOOKUP_XATTR, cr, ct);
1111         if (error == 0) {
1112                 error = VOP_LINK(pvp, svp, name, cr, ct, flags);
1113                 VN_RELE(pvp);
1114         }
1115         return (error);
1116 }
1117 
1118 static int
1119 xattr_dir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
1120     cred_t *cr, caller_context_t *ct, int flags)
1121 {
1122         vnode_t *spvp, *tpvp;
1123         int error;
1124         int held_tgt;
1125 
1126         if (is_sattr_name(snm) || is_sattr_name(tnm))
1127                 return (xattr_copy(sdvp, snm, tdvp, tnm, cr, ct));
1128         /*
1129          * We know that sdvp is a GFS dir, or we wouldn't be here.
1130          * Get the real unnamed directory.
1131          */
1132         error = xattr_dir_realdir(sdvp, &spvp, LOOKUP_XATTR, cr, ct);
1133         if (error) {
1134                 return (error);
1135         }
1136 
1137         if (sdvp == tdvp) {
1138                 /*
1139                  * If the source and target are the same GFS directory, the
1140                  * underlying unnamed source and target dir will be the same.
1141                  */
1142                 tpvp = spvp;
1143                 VN_HOLD(tpvp);
1144                 held_tgt = 1;
1145         } else if (tdvp->v_flag & V_SYSATTR) {
1146                 /*
1147                  * If the target dir is a different GFS directory,
1148                  * find its underlying unnamed dir.
1149                  */
1150                 error = xattr_dir_realdir(tdvp, &tpvp, LOOKUP_XATTR, cr, ct);
1151                 if (error) {
1152                         VN_RELE(spvp);
1153                         return (error);
1154                 }
1155                 held_tgt = 1;
1156         } else {
1157                 /*
1158                  * Target dir is outside of GFS, pass it on through.
1159                  */
1160                 tpvp = tdvp;
1161                 held_tgt = 0;
1162         }
1163 
1164         error = VOP_RENAME(spvp, snm, tpvp, tnm, cr, ct, flags);
1165 
1166         if (held_tgt) {
1167                 VN_RELE(tpvp);
1168         }
1169         VN_RELE(spvp);
1170 
1171         return (error);
1172 }
1173 
1174 /*
1175  * readdir_xattr_casecmp: given a system attribute name, see if there
1176  * is a real xattr with the same normalized name.
1177  */
1178 static int
1179 readdir_xattr_casecmp(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
1180     int *eflags)
1181 {
1182         int error;
1183         vnode_t *vp;
1184         struct pathname pn;
1185 
1186         *eflags = 0;
1187 
1188         error = pn_get(nm, UIO_SYSSPACE, &pn);
1189         if (error == 0) {
1190                 error = VOP_LOOKUP(dvp, nm, &vp, &pn,
1191                     FIGNORECASE, rootvp, cr, ct, NULL, NULL);
1192                 if (error == 0) {
1193                         *eflags = ED_CASE_CONFLICT;
1194                         VN_RELE(vp);
1195                 } else if (error == ENOENT) {
1196                         error = 0;
1197                 }
1198                 pn_free(&pn);
1199         }
1200 
1201         return (error);
1202 }
1203 
1204 static int
1205 xattr_dir_readdir(vnode_t *dvp, uio_t *uiop, cred_t *cr, int *eofp,
1206     caller_context_t *ct, int flags)
1207 {
1208         vnode_t *pvp;
1209         int error;
1210         int local_eof;
1211         int reset_off = 0;
1212         int has_xattrs = 0;
1213 
1214         if (eofp == NULL) {
1215                 eofp = &local_eof;
1216         }
1217         *eofp = 0;
1218 
1219         /*
1220          * See if there is a real extended attribute directory.
1221          */
1222         error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR, cr, ct);
1223         if (error == 0) {
1224                 has_xattrs = 1;
1225         }
1226 
1227         /*
1228          * Start by reading up the static entries.
1229          */
1230         if (uiop->uio_loffset == 0) {
1231                 ino64_t pino, ino;
1232                 offset_t off;
1233                 gfs_dir_t *dp = dvp->v_data;
1234                 gfs_readdir_state_t gstate;
1235 
1236                 if (has_xattrs) {
1237                         /*
1238                          * If there is a real xattr dir, skip . and ..
1239                          * in the GFS dir.  We'll pick them up below
1240                          * when we call into the underlying fs.
1241                          */
1242                         uiop->uio_loffset = GFS_STATIC_ENTRY_OFFSET;
1243                 }
1244                 error = gfs_get_parent_ino(dvp, cr, ct, &pino, &ino);
1245                 if (error == 0) {
1246                         error = gfs_readdir_init(&gstate, dp->gfsd_maxlen, 1,
1247                             uiop, pino, ino, flags);
1248                 }
1249                 if (error) {
1250                         if (has_xattrs)
1251                                 VN_RELE(pvp);
1252                         return (error);
1253                 }
1254 
1255                 while ((error = gfs_readdir_pred(&gstate, uiop, &off)) == 0 &&
1256                     !*eofp) {
1257                         if (off >= 0 && off < dp->gfsd_nstatic) {
1258                                 int eflags;
1259 
1260                                 /*
1261                                  * Check to see if this sysattr set name has a
1262                                  * case-insensitive conflict with a real xattr
1263                                  * name.
1264                                  */
1265                                 eflags = 0;
1266                                 if ((flags & V_RDDIR_ENTFLAGS) && has_xattrs) {
1267                                         error = readdir_xattr_casecmp(pvp,
1268                                             dp->gfsd_static[off].gfse_name,
1269                                             cr, ct, &eflags);
1270                                         if (error)
1271                                                 break;
1272                                 }
1273                                 ino = dp->gfsd_inode(dvp, off);
1274 
1275                                 error = gfs_readdir_emit(&gstate, uiop, off,
1276                                     ino, dp->gfsd_static[off].gfse_name,
1277                                     eflags);
1278                                 if (error)
1279                                         break;
1280                         } else {
1281                                 *eofp = 1;
1282                         }
1283                 }
1284 
1285                 error = gfs_readdir_fini(&gstate, error, eofp, *eofp);
1286                 if (error) {
1287                         if (has_xattrs)
1288                                 VN_RELE(pvp);
1289                         return (error);
1290                 }
1291 
1292                 /*
1293                  * We must read all of the static entries in the first
1294                  * call.  Otherwise we won't know if uio_loffset in a
1295                  * subsequent call refers to the static entries or to those
1296                  * in an underlying fs.
1297                  */
1298                 if (*eofp == 0)
1299                         return (EINVAL);
1300                 reset_off = 1;
1301         }
1302 
1303         if (!has_xattrs) {
1304                 *eofp = 1;
1305                 return (0);
1306         }
1307 
1308         *eofp = 0;
1309         if (reset_off) {
1310                 uiop->uio_loffset = 0;
1311         }
1312         (void) VOP_RWLOCK(pvp, V_WRITELOCK_FALSE, NULL);
1313         error = VOP_READDIR(pvp, uiop, cr, eofp, ct, flags);
1314         VOP_RWUNLOCK(pvp, V_WRITELOCK_FALSE, NULL);
1315         VN_RELE(pvp);
1316 
1317         return (error);
1318 }
1319 
1320 /* ARGSUSED */
1321 static void
1322 xattr_dir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1323 {
1324         gfs_file_t *fp;
1325         xattr_dir_t *xattr_dir;
1326 
1327         mutex_enter(&vp->v_lock);
1328         xattr_dir = vp->v_data;
1329         if (xattr_dir->xattr_realvp) {
1330                 VN_RELE(xattr_dir->xattr_realvp);
1331                 xattr_dir->xattr_realvp = NULL;
1332         }
1333         mutex_exit(&vp->v_lock);
1334         fp = gfs_dir_inactive(vp);
1335         if (fp != NULL) {
1336                 kmem_free(fp, fp->gfs_size);
1337         }
1338 }
1339 
1340 static int
1341 xattr_dir_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
1342     caller_context_t *ct)
1343 {
1344         switch (cmd) {
1345         case _PC_XATTR_EXISTS:
1346         case _PC_SATTR_ENABLED:
1347         case _PC_SATTR_EXISTS:
1348                 *valp = 0;
1349                 return (0);
1350         default:
1351                 return (fs_pathconf(vp, cmd, valp, cr, ct));
1352         }
1353 }
1354 
1355 /* ARGSUSED */
1356 static int
1357 xattr_dir_realvp(vnode_t *vp, vnode_t **realvp, caller_context_t *ct)
1358 {
1359         xattr_dir_t *xattr_dir;
1360 
1361         mutex_enter(&vp->v_lock);
1362         xattr_dir = vp->v_data;
1363         if (xattr_dir->xattr_realvp) {
1364                 *realvp = xattr_dir->xattr_realvp;
1365                 mutex_exit(&vp->v_lock);
1366                 return (0);
1367         } else {
1368                 vnode_t *xdvp;
1369                 int error;
1370 
1371                 mutex_exit(&vp->v_lock);
1372                 if ((error = xattr_dir_realdir(vp, &xdvp,
1373                     LOOKUP_XATTR, kcred, NULL)) == 0) {
1374                         /*
1375                          * verify we aren't racing with another thread
1376                          * to find the xattr_realvp
1377                          */
1378                         mutex_enter(&vp->v_lock);
1379                         if (xattr_dir->xattr_realvp == NULL) {
1380                                 xattr_dir->xattr_realvp = xdvp;
1381                                 *realvp = xdvp;
1382                                 mutex_exit(&vp->v_lock);
1383                         } else {
1384                                 *realvp = xattr_dir->xattr_realvp;
1385                                 mutex_exit(&vp->v_lock);
1386                                 VN_RELE(xdvp);
1387                         }
1388                 }
1389                 return (error);
1390         }
1391 }
1392 
1393 static const fs_operation_def_t xattr_dir_tops[] = {
1394         { VOPNAME_OPEN,         { .vop_open = xattr_dir_open }          },
1395         { VOPNAME_CLOSE,        { .vop_close = xattr_dir_close }        },
1396         { VOPNAME_IOCTL,        { .error = fs_inval }                   },
1397         { VOPNAME_GETATTR,      { .vop_getattr = xattr_dir_getattr }    },
1398         { VOPNAME_SETATTR,      { .vop_setattr = xattr_dir_setattr }    },
1399         { VOPNAME_ACCESS,       { .vop_access = xattr_dir_access }      },
1400         { VOPNAME_READDIR,      { .vop_readdir = xattr_dir_readdir }    },
1401         { VOPNAME_LOOKUP,       { .vop_lookup = gfs_vop_lookup }        },
1402         { VOPNAME_CREATE,       { .vop_create = xattr_dir_create }      },
1403         { VOPNAME_REMOVE,       { .vop_remove = xattr_dir_remove }      },
1404         { VOPNAME_LINK,         { .vop_link = xattr_dir_link }          },
1405         { VOPNAME_RENAME,       { .vop_rename = xattr_dir_rename }      },
1406         { VOPNAME_MKDIR,        { .error = fs_inval }                   },
1407         { VOPNAME_SEEK,         { .vop_seek = fs_seek }                 },
1408         { VOPNAME_INACTIVE,     { .vop_inactive = xattr_dir_inactive }  },
1409         { VOPNAME_FID,          { .vop_fid = xattr_common_fid }         },
1410         { VOPNAME_PATHCONF,     { .vop_pathconf = xattr_dir_pathconf }  },
1411         { VOPNAME_REALVP,       { .vop_realvp = xattr_dir_realvp } },
1412         { NULL,                 { NULL } }
1413 };
1414 
1415 static gfs_opsvec_t xattr_opsvec[] = {
1416         { "xattr dir", xattr_dir_tops, &xattr_dir_ops },
1417         { "system attributes", xattr_file_tops, &xattr_file_ops },
1418         { NULL, NULL, NULL }
1419 };
1420 
1421 static int
1422 xattr_lookup_cb(vnode_t *vp, const char *nm, vnode_t **vpp, ino64_t *inop,
1423     cred_t *cr, int flags, int *deflags, pathname_t *rpnp)
1424 {
1425         vnode_t *pvp;
1426         struct pathname pn;
1427         int error;
1428 
1429         *vpp = NULL;
1430         *inop = 0;
1431 
1432         error = xattr_dir_realdir(vp, &pvp, LOOKUP_XATTR|CREATE_XATTR_DIR,
1433             cr, NULL);
1434 
1435         /*
1436          * Return ENOENT for EACCES requests during lookup.  Once an
1437          * attribute create is attempted EACCES will be returned.
1438          */
1439         if (error) {
1440                 if (error == EACCES)
1441                         return (ENOENT);
1442                 return (error);
1443         }
1444 
1445         error = pn_get((char *)nm, UIO_SYSSPACE, &pn);
1446         if (error == 0) {
1447                 error = VOP_LOOKUP(pvp, (char *)nm, vpp, &pn, flags, rootvp,
1448                     cr, NULL, deflags, rpnp);
1449                 pn_free(&pn);
1450         }
1451         VN_RELE(pvp);
1452 
1453         return (error);
1454 }
1455 
1456 /* ARGSUSED */
1457 static ino64_t
1458 xattrdir_do_ino(vnode_t *vp, int index)
1459 {
1460         /*
1461          * We use index 0 for the directory fid.  Start
1462          * the file numbering at 1.
1463          */
1464         return ((ino64_t)index+1);
1465 }
1466 
1467 void
1468 xattr_init(void)
1469 {
1470         VERIFY(gfs_make_opsvec(xattr_opsvec) == 0);
1471 }
1472 
1473 int
1474 xattr_dir_lookup(vnode_t *dvp, vnode_t **vpp, int flags, cred_t *cr)
1475 {
1476         int error = 0;
1477 
1478         *vpp = NULL;
1479 
1480         if (dvp->v_type != VDIR && dvp->v_type != VREG)
1481                 return (EINVAL);
1482 
1483         mutex_enter(&dvp->v_lock);
1484 
1485         /*
1486          * If we're already in sysattr space, don't allow creation
1487          * of another level of sysattrs.
1488          */
1489         if (dvp->v_flag & V_SYSATTR) {
1490                 mutex_exit(&dvp->v_lock);
1491                 return (EINVAL);
1492         }
1493 
1494         if (dvp->v_xattrdir != NULL) {
1495                 *vpp = dvp->v_xattrdir;
1496                 VN_HOLD(*vpp);
1497         } else {
1498                 ulong_t val;
1499                 int xattrs_allowed = dvp->v_vfsp->vfs_flag & VFS_XATTR;
1500                 int sysattrs_allowed = 1;
1501 
1502                 /*
1503                  * We have to drop the lock on dvp.  gfs_dir_create will
1504                  * grab it for a VN_HOLD.
1505                  */
1506                 mutex_exit(&dvp->v_lock);
1507 
1508                 /*
1509                  * If dvp allows xattr creation, but not sysattr
1510                  * creation, return the real xattr dir vp. We can't
1511                  * use the vfs feature mask here because _PC_SATTR_ENABLED
1512                  * has vnode-level granularity (e.g. .zfs).
1513                  */
1514                 error = VOP_PATHCONF(dvp, _PC_SATTR_ENABLED, &val, cr, NULL);
1515                 if (error != 0 || val == 0)
1516                         sysattrs_allowed = 0;
1517 
1518                 if (!xattrs_allowed && !sysattrs_allowed)
1519                         return (EINVAL);
1520 
1521                 if (!sysattrs_allowed) {
1522                         struct pathname pn;
1523                         char *nm = "";
1524 
1525                         error = pn_get(nm, UIO_SYSSPACE, &pn);
1526                         if (error)
1527                                 return (error);
1528                         error = VOP_LOOKUP(dvp, nm, vpp, &pn,
1529                             flags|LOOKUP_HAVE_SYSATTR_DIR, rootvp, cr, NULL,
1530                             NULL, NULL);
1531                         pn_free(&pn);
1532                         return (error);
1533                 }
1534 
1535                 /*
1536                  * Note that we act as if we were given CREATE_XATTR_DIR,
1537                  * but only for creation of the GFS directory.
1538                  */
1539                 *vpp = gfs_dir_create(
1540                     sizeof (xattr_dir_t), dvp, xattr_dir_ops, xattr_dirents,
1541                     xattrdir_do_ino, MAXNAMELEN, NULL, xattr_lookup_cb);
1542                 mutex_enter(&dvp->v_lock);
1543                 if (dvp->v_xattrdir != NULL) {
1544                         /*
1545                          * We lost the race to create the xattr dir.
1546                          * Destroy this one, use the winner.  We can't
1547                          * just call VN_RELE(*vpp), because the vnode
1548                          * is only partially initialized.
1549                          */
1550                         gfs_dir_t *dp = (*vpp)->v_data;
1551 
1552                         ASSERT((*vpp)->v_count == 1);
1553                         vn_free(*vpp);
1554 
1555                         mutex_destroy(&dp->gfsd_lock);
1556                         kmem_free(dp->gfsd_static,
1557                             dp->gfsd_nstatic * sizeof (gfs_dirent_t));
1558                         kmem_free(dp, dp->gfsd_file.gfs_size);
1559 
1560                         /*
1561                          * There is an implied VN_HOLD(dvp) here.  We should
1562                          * be doing a VN_RELE(dvp) to clean up the reference
1563                          * from *vpp, and then a VN_HOLD(dvp) for the new
1564                          * reference.  Instead, we just leave the count alone.
1565                          */
1566 
1567                         *vpp = dvp->v_xattrdir;
1568                         VN_HOLD(*vpp);
1569                 } else {
1570                         (*vpp)->v_flag |= (V_XATTRDIR|V_SYSATTR);
1571                         dvp->v_xattrdir = *vpp;
1572                 }
1573         }
1574         mutex_exit(&dvp->v_lock);
1575 
1576         return (error);
1577 }
1578 
1579 int
1580 xattr_dir_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
1581 {
1582         int error;
1583         vnode_t *pvp, *dvp;
1584         xattr_fid_t *xfidp;
1585         struct pathname pn;
1586         char *nm;
1587         uint16_t orig_len;
1588 
1589         *vpp = NULL;
1590 
1591         if (fidp->fid_len < XATTR_FIDSZ)
1592                 return (EINVAL);
1593 
1594         xfidp = (xattr_fid_t *)fidp;
1595         orig_len = fidp->fid_len;
1596         fidp->fid_len = xfidp->parent_len;
1597 
1598         error = VFS_VGET(vfsp, &pvp, fidp);
1599         fidp->fid_len = orig_len;
1600         if (error)
1601                 return (error);
1602 
1603         /*
1604          * Start by getting the GFS sysattr directory.  We might need
1605          * to recreate it during the VOP_LOOKUP.
1606          */
1607         nm = "";
1608         error = pn_get(nm, UIO_SYSSPACE, &pn);
1609         if (error) {
1610                 VN_RELE(pvp);
1611                 return (EINVAL);
1612         }
1613 
1614         error = VOP_LOOKUP(pvp, nm, &dvp, &pn, LOOKUP_XATTR|CREATE_XATTR_DIR,
1615             rootvp, CRED(), NULL, NULL, NULL);
1616         pn_free(&pn);
1617         VN_RELE(pvp);
1618         if (error)
1619                 return (error);
1620 
1621         if (xfidp->dir_offset == 0) {
1622                 /*
1623                  * If we were looking for the directory, we're done.
1624                  */
1625                 *vpp = dvp;
1626                 return (0);
1627         }
1628 
1629         if (xfidp->dir_offset > XATTRDIR_NENTS) {
1630                 VN_RELE(dvp);
1631                 return (EINVAL);
1632         }
1633 
1634         nm = xattr_dirents[xfidp->dir_offset - 1].gfse_name;
1635 
1636         error = pn_get(nm, UIO_SYSSPACE, &pn);
1637         if (error) {
1638                 VN_RELE(dvp);
1639                 return (EINVAL);
1640         }
1641 
1642         error = VOP_LOOKUP(dvp, nm, vpp, &pn, 0, rootvp, CRED(), NULL,
1643             NULL, NULL);
1644 
1645         pn_free(&pn);
1646         VN_RELE(dvp);
1647 
1648         return (error);
1649 }