1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/t_lock.h>
  29 #include <sys/param.h>
  30 #include <sys/systm.h>
  31 #include <sys/bitmap.h>
  32 #include <sys/debug.h>
  33 #include <sys/errno.h>
  34 #include <sys/strsubr.h>
  35 #include <sys/cmn_err.h>
  36 #include <sys/sysmacros.h>
  37 #include <sys/filio.h>
  38 #include <sys/flock.h>
  39 #include <sys/stat.h>
  40 #include <sys/share.h>
  41 
  42 #include <sys/vfs.h>
  43 #include <sys/vfs_opreg.h>
  44 
  45 #include <sys/sockio.h>
  46 #include <sys/socket.h>
  47 #include <sys/socketvar.h>
  48 #include <sys/strsun.h>
  49 
  50 #include <fs/sockfs/sockcommon.h>
  51 #include <fs/sockfs/socktpi.h>
  52 
  53 /*
  54  * Generic vnode ops
  55  */
  56 static int      socket_vop_open(struct vnode **, int, struct cred *,
  57                     caller_context_t *);
  58 static int      socket_vop_close(struct vnode *, int, int, offset_t,
  59                     struct cred *, caller_context_t *);
  60 static int      socket_vop_read(struct vnode *, struct uio *, int,
  61                     struct cred *, caller_context_t *);
  62 static int      socket_vop_write(struct vnode *, struct uio *, int,
  63                     struct cred *, caller_context_t *);
  64 static int      socket_vop_ioctl(struct vnode *, int, intptr_t, int,
  65                     struct cred *, int32_t *, caller_context_t *);
  66 static int      socket_vop_setfl(struct vnode *, int, int, cred_t *,
  67                     caller_context_t *);
  68 static int      socket_vop_getattr(struct vnode *, struct vattr *, int,
  69                     struct cred *, caller_context_t *);
  70 static int      socket_vop_setattr(struct vnode *, struct vattr *, int,
  71                     struct cred *, caller_context_t *);
  72 static int      socket_vop_access(struct vnode *, int, int, struct cred *,
  73                     caller_context_t *);
  74 static int      socket_vop_fsync(struct vnode *, int, struct cred *,
  75                     caller_context_t *);
  76 static void     socket_vop_inactive(struct vnode *, struct cred *,
  77                     caller_context_t *);
  78 static int      socket_vop_fid(struct vnode *, struct fid *,
  79                     caller_context_t *);
  80 static int      socket_vop_seek(struct vnode *, offset_t, offset_t *,
  81                     caller_context_t *);
  82 static int      socket_vop_poll(struct vnode *, short, int, short *,
  83                     struct pollhead **, caller_context_t *);
  84 
  85 extern int      socket_close_internal(struct sonode *, int, cred_t *);
  86 extern void     socket_destroy_internal(struct sonode *, cred_t *);
  87 
  88 struct vnodeops *socket_vnodeops;
  89 const fs_operation_def_t socket_vnodeops_template[] = {
  90         VOPNAME_OPEN,           { .vop_open = socket_vop_open },
  91         VOPNAME_CLOSE,          { .vop_close = socket_vop_close },
  92         VOPNAME_READ,           { .vop_read = socket_vop_read },
  93         VOPNAME_WRITE,          { .vop_write = socket_vop_write },
  94         VOPNAME_IOCTL,          { .vop_ioctl = socket_vop_ioctl },
  95         VOPNAME_SETFL,          { .vop_setfl = socket_vop_setfl },
  96         VOPNAME_GETATTR,        { .vop_getattr = socket_vop_getattr },
  97         VOPNAME_SETATTR,        { .vop_setattr = socket_vop_setattr },
  98         VOPNAME_ACCESS,         { .vop_access = socket_vop_access },
  99         VOPNAME_FSYNC,          { .vop_fsync = socket_vop_fsync },
 100         VOPNAME_INACTIVE,       { .vop_inactive = socket_vop_inactive },
 101         VOPNAME_FID,            { .vop_fid = socket_vop_fid },
 102         VOPNAME_SEEK,           { .vop_seek = socket_vop_seek },
 103         VOPNAME_POLL,           { .vop_poll = socket_vop_poll },
 104         VOPNAME_DISPOSE,        { .error = fs_error },
 105         NULL,                   NULL
 106 };
 107 
 108 
 109 /*
 110  * generic vnode ops
 111  */
 112 
 113 /*ARGSUSED*/
 114 static int
 115 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr,
 116     caller_context_t *ct)
 117 {
 118         struct vnode *vp = *vpp;
 119         struct sonode *so = VTOSO(vp);
 120 
 121         flag &= ~FCREAT;            /* paranoia */
 122         mutex_enter(&so->so_lock);
 123         so->so_count++;
 124         mutex_exit(&so->so_lock);
 125 
 126         ASSERT(so->so_count != 0);   /* wraparound */
 127         ASSERT(vp->v_type == VSOCK);
 128 
 129         return (0);
 130 }
 131 
 132 /*ARGSUSED*/
 133 static int
 134 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset,
 135     struct cred *cr, caller_context_t *ct)
 136 {
 137         struct sonode *so;
 138         int error = 0;
 139 
 140         so = VTOSO(vp);
 141         ASSERT(vp->v_type == VSOCK);
 142 
 143         cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
 144         cleanshares(vp, ttoproc(curthread)->p_pid);
 145 
 146         if (vp->v_stream)
 147                 strclean(vp);
 148 
 149         if (count > 1) {
 150                 dprint(2, ("socket_vop_close: count %d\n", count));
 151                 return (0);
 152         }
 153 
 154         mutex_enter(&so->so_lock);
 155         if (--so->so_count == 0) {
 156                 /*
 157                  * Initiate connection shutdown.
 158                  */
 159                 mutex_exit(&so->so_lock);
 160                 error = socket_close_internal(so, flag, cr);
 161         } else {
 162                 mutex_exit(&so->so_lock);
 163         }
 164 
 165         return (error);
 166 }
 167 
 168 /*ARGSUSED2*/
 169 static int
 170 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
 171     caller_context_t *ct)
 172 {
 173         struct sonode *so = VTOSO(vp);
 174         struct nmsghdr lmsg;
 175 
 176         ASSERT(vp->v_type == VSOCK);
 177         bzero((void *)&lmsg, sizeof (lmsg));
 178 
 179         return (socket_recvmsg(so, &lmsg, uiop, cr));
 180 }
 181 
 182 /*ARGSUSED2*/
 183 static int
 184 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag,
 185     struct cred *cr, caller_context_t *ct)
 186 {
 187         struct sonode *so = VTOSO(vp);
 188         struct nmsghdr lmsg;
 189 
 190         ASSERT(vp->v_type == VSOCK);
 191         bzero((void *)&lmsg, sizeof (lmsg));
 192 
 193         if (!(so->so_mode & SM_BYTESTREAM)) {
 194                 /*
 195                  * If the socket is not byte stream set MSG_EOR
 196                  */
 197                 lmsg.msg_flags = MSG_EOR;
 198         }
 199 
 200         return (socket_sendmsg(so, &lmsg, uiop, cr));
 201 }
 202 
 203 /*ARGSUSED4*/
 204 static int
 205 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
 206     struct cred *cr, int32_t *rvalp, caller_context_t *ct)
 207 {
 208         struct sonode *so = VTOSO(vp);
 209 
 210         ASSERT(vp->v_type == VSOCK);
 211 
 212         return (socket_ioctl(so, cmd, arg, mode, cr, rvalp));
 213 }
 214 
 215 /*
 216  * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
 217  * from listener to acceptor.
 218  */
 219 /* ARGSUSED */
 220 static int
 221 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr,
 222     caller_context_t *ct)
 223 {
 224         struct sonode *so = VTOSO(vp);
 225         int error = 0;
 226 
 227         ASSERT(vp->v_type == VSOCK);
 228 
 229         mutex_enter(&so->so_lock);
 230         if (nflags & FNDELAY)
 231                 so->so_state |= SS_NDELAY;
 232         else
 233                 so->so_state &= ~SS_NDELAY;
 234         if (nflags & FNONBLOCK)
 235                 so->so_state |= SS_NONBLOCK;
 236         else
 237                 so->so_state &= ~SS_NONBLOCK;
 238         mutex_exit(&so->so_lock);
 239 
 240         if (so->so_state & SS_ASYNC)
 241                 oflags |= FASYNC;
 242         /*
 243          * Sets/clears the SS_ASYNC flag based on the presence/absence
 244          * of the FASYNC flag passed to fcntl(F_SETFL).
 245          * This exists solely for BSD fcntl() FASYNC compatibility.
 246          */
 247         if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) {
 248                 int async = nflags & FASYNC;
 249                 int32_t rv;
 250 
 251                 /*
 252                  * For non-TPI sockets all we have to do is set/remove the
 253                  * SS_ASYNC bit, but for TPI it is more involved. For that
 254                  * reason we delegate the job to the protocol's ioctl handler.
 255                  */
 256                 error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL,
 257                     cr, &rv);
 258         }
 259         return (error);
 260 }
 261 
 262 
 263 /*
 264  * Get the made up attributes for the vnode.
 265  * 4.3BSD returns the current time for all the timestamps.
 266  * 4.4BSD returns 0 for all the timestamps.
 267  * Here we use the access and modified times recorded in the sonode.
 268  *
 269  * Just like in BSD there is not effect on the underlying file system node
 270  * bound to an AF_UNIX pathname.
 271  *
 272  * When sockmod has been popped this will act just like a stream. Since
 273  * a socket is always a clone there is no need to inspect the attributes
 274  * of the "realvp".
 275  */
 276 /* ARGSUSED */
 277 int
 278 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags,
 279     struct cred *cr, caller_context_t *ct)
 280 {
 281         dev_t           fsid;
 282         struct sonode   *so;
 283         static int      sonode_shift = 0;
 284 
 285         /*
 286          * Calculate the amount of bitshift to a sonode pointer which will
 287          * still keep it unique.  See below.
 288          */
 289         if (sonode_shift == 0)
 290                 sonode_shift = highbit(sizeof (struct sonode));
 291         ASSERT(sonode_shift > 0);
 292 
 293         so = VTOSO(vp);
 294         fsid = sockdev;
 295 
 296         if (so->so_version == SOV_STREAM) {
 297                 /*
 298                  * The imaginary "sockmod" has been popped - act
 299                  * as a stream
 300                  */
 301                 vap->va_type = VCHR;
 302                 vap->va_mode = 0;
 303         } else {
 304                 vap->va_type = vp->v_type;
 305                 vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|
 306                     S_IROTH|S_IWOTH;
 307         }
 308         vap->va_uid = vap->va_gid = 0;
 309         vap->va_fsid = fsid;
 310         /*
 311          * If the va_nodeid is > MAX_USHORT, then i386 stats might fail.
 312          * So we shift down the sonode pointer to try and get the most
 313          * uniqueness into 16-bits.
 314          */
 315         vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF;
 316         vap->va_nlink = 0;
 317         vap->va_size = 0;
 318 
 319         /*
 320          * We need to zero out the va_rdev to avoid some fstats getting
 321          * EOVERFLOW.  This also mimics SunOS 4.x and BSD behavior.
 322          */
 323         vap->va_rdev = (dev_t)0;
 324         vap->va_blksize = MAXBSIZE;
 325         vap->va_nblocks = btod(vap->va_size);
 326 
 327         if (!SOCK_IS_NONSTR(so)) {
 328                 sotpi_info_t *sti = SOTOTPI(so);
 329 
 330                 mutex_enter(&so->so_lock);
 331                 vap->va_atime.tv_sec = sti->sti_atime;
 332                 vap->va_mtime.tv_sec = sti->sti_mtime;
 333                 vap->va_ctime.tv_sec = sti->sti_ctime;
 334                 mutex_exit(&so->so_lock);
 335         } else {
 336                 vap->va_atime.tv_sec = 0;
 337                 vap->va_mtime.tv_sec = 0;
 338                 vap->va_ctime.tv_sec = 0;
 339         }
 340 
 341         vap->va_atime.tv_nsec = 0;
 342         vap->va_mtime.tv_nsec = 0;
 343         vap->va_ctime.tv_nsec = 0;
 344         vap->va_seq = 0;
 345 
 346         return (0);
 347 }
 348 
 349 /*
 350  * Set attributes.
 351  * Just like in BSD there is not effect on the underlying file system node
 352  * bound to an AF_UNIX pathname.
 353  *
 354  * When sockmod has been popped this will act just like a stream. Since
 355  * a socket is always a clone there is no need to modify the attributes
 356  * of the "realvp".
 357  */
 358 /* ARGSUSED */
 359 int
 360 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags,
 361     struct cred *cr, caller_context_t *ct)
 362 {
 363         struct sonode *so = VTOSO(vp);
 364 
 365         /*
 366          * If times were changed, and we have a STREAMS socket, then update
 367          * the sonode.
 368          */
 369         if (!SOCK_IS_NONSTR(so)) {
 370                 sotpi_info_t *sti = SOTOTPI(so);
 371 
 372                 mutex_enter(&so->so_lock);
 373                 if (vap->va_mask & AT_ATIME)
 374                         sti->sti_atime = vap->va_atime.tv_sec;
 375                 if (vap->va_mask & AT_MTIME) {
 376                         sti->sti_mtime = vap->va_mtime.tv_sec;
 377                         sti->sti_ctime = gethrestime_sec();
 378                 }
 379                 mutex_exit(&so->so_lock);
 380         }
 381 
 382         return (0);
 383 }
 384 
 385 /*
 386  * Check if user is allowed to access vp. For non-STREAMS based sockets,
 387  * there might not be a device attached to the file system. So for those
 388  * types of sockets there are no permissions to check.
 389  *
 390  * XXX Should there be some other mechanism to check access rights?
 391  */
 392 /*ARGSUSED*/
 393 int
 394 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr,
 395     caller_context_t *ct)
 396 {
 397         struct sonode *so = VTOSO(vp);
 398 
 399         if (!SOCK_IS_NONSTR(so)) {
 400                 ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL);
 401                 return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode,
 402                     mode, flags, cr, NULL));
 403         }
 404         return (0);
 405 }
 406 
 407 /*
 408  * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL.
 409  * This code does the same to be compatible and also to not give an
 410  * application the impression that the data has actually been "synced"
 411  * to the other end of the connection.
 412  */
 413 /* ARGSUSED */
 414 int
 415 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr,
 416     caller_context_t *ct)
 417 {
 418         return (EINVAL);
 419 }
 420 
 421 /*ARGSUSED*/
 422 static void
 423 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
 424 {
 425         struct sonode *so = VTOSO(vp);
 426 
 427         ASSERT(vp->v_type == VSOCK);
 428 
 429         mutex_enter(&vp->v_lock);
 430         /*
 431          * If no one has reclaimed the vnode, remove from the
 432          * cache now.
 433          */
 434         if (vp->v_count < 1)
 435                 cmn_err(CE_PANIC, "socket_inactive: Bad v_count");
 436 
 437         /*
 438          * Drop the temporary hold by vn_rele now
 439          */
 440         if (--vp->v_count != 0) {
 441                 mutex_exit(&vp->v_lock);
 442                 return;
 443         }
 444         mutex_exit(&vp->v_lock);
 445 
 446 
 447         ASSERT(!vn_has_cached_data(vp));
 448 
 449         /* socket specfic clean-up */
 450         socket_destroy_internal(so, cr);
 451 }
 452 
 453 /* ARGSUSED */
 454 int
 455 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
 456 {
 457         return (EINVAL);
 458 }
 459 
 460 /*
 461  * Sockets are not seekable.
 462  * (and there is a bug to fix STREAMS to make them fail this as well).
 463  */
 464 /*ARGSUSED*/
 465 int
 466 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
 467     caller_context_t *ct)
 468 {
 469         return (ESPIPE);
 470 }
 471 
 472 /*ARGSUSED*/
 473 static int
 474 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp,
 475     struct pollhead **phpp, caller_context_t *ct)
 476 {
 477         struct sonode *so = VTOSO(vp);
 478 
 479         ASSERT(vp->v_type == VSOCK);
 480 
 481         return (socket_poll(so, events, anyyet, reventsp, phpp));
 482 }