1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/t_lock.h>
  29 #include <sys/param.h>
  30 #include <sys/systm.h>
  31 #include <sys/bitmap.h>
  32 #include <sys/debug.h>
  33 #include <sys/errno.h>
  34 #include <sys/strsubr.h>
  35 #include <sys/cmn_err.h>
  36 #include <sys/sysmacros.h>
  37 #include <sys/filio.h>
  38 #include <sys/flock.h>
  39 #include <sys/stat.h>
  40 #include <sys/share.h>
  41 
  42 #include <sys/vfs.h>
  43 #include <sys/vfs_opreg.h>
  44 
  45 #include <sys/sockio.h>
  46 #include <sys/socket.h>
  47 #include <sys/socketvar.h>
  48 #include <sys/strsun.h>
  49 
  50 #include <fs/sockfs/sockcommon.h>
  51 #include <fs/sockfs/socktpi.h>
  52 
  53 /*
  54  * Generic vnode ops
  55  */
  56 static int      socket_vop_open(struct vnode **, int, struct cred *,
  57                     caller_context_t *);
  58 static int      socket_vop_close(struct vnode *, int, int, offset_t,
  59                     struct cred *, caller_context_t *);
  60 static int      socket_vop_read(struct vnode *, struct uio *, int,
  61                     struct cred *, caller_context_t *);
  62 static int      socket_vop_write(struct vnode *, struct uio *, int,
  63                     struct cred *, caller_context_t *);
  64 static int      socket_vop_ioctl(struct vnode *, int, intptr_t, int,
  65                     struct cred *, int32_t *, caller_context_t *);
  66 static int      socket_vop_setfl(struct vnode *, int, int, cred_t *,
  67                     caller_context_t *);
  68 static int      socket_vop_getattr(struct vnode *, struct vattr *, int,
  69                     struct cred *, caller_context_t *);
  70 static int      socket_vop_setattr(struct vnode *, struct vattr *, int,
  71                     struct cred *, caller_context_t *);
  72 static int      socket_vop_access(struct vnode *, int, int, struct cred *,
  73                     caller_context_t *);
  74 static int      socket_vop_fsync(struct vnode *, int, struct cred *,
  75                     caller_context_t *);
  76 static void     socket_vop_inactive(struct vnode *, struct cred *,
  77                     caller_context_t *);
  78 static int      socket_vop_fid(struct vnode *, struct fid *,
  79                     caller_context_t *);
  80 static int      socket_vop_seek(struct vnode *, offset_t, offset_t *,
  81                     caller_context_t *);
  82 static int      socket_vop_poll(struct vnode *, short, int, short *,
  83                     struct pollhead **, caller_context_t *);
  84 
  85 extern int      socket_close_internal(struct sonode *, int, cred_t *);
  86 extern void     socket_destroy_internal(struct sonode *, cred_t *);
  87 
  88 struct vnodeops *socket_vnodeops;
  89 const fs_operation_def_t socket_vnodeops_template[] = {
  90         VOPNAME_OPEN,           { .vop_open = socket_vop_open },
  91         VOPNAME_CLOSE,          { .vop_close = socket_vop_close },
  92         VOPNAME_READ,           { .vop_read = socket_vop_read },
  93         VOPNAME_WRITE,          { .vop_write = socket_vop_write },
  94         VOPNAME_IOCTL,          { .vop_ioctl = socket_vop_ioctl },
  95         VOPNAME_SETFL,          { .vop_setfl = socket_vop_setfl },
  96         VOPNAME_GETATTR,        { .vop_getattr = socket_vop_getattr },
  97         VOPNAME_SETATTR,        { .vop_setattr = socket_vop_setattr },
  98         VOPNAME_ACCESS,         { .vop_access = socket_vop_access },
  99         VOPNAME_FSYNC,          { .vop_fsync = socket_vop_fsync },
 100         VOPNAME_INACTIVE,       { .vop_inactive = socket_vop_inactive },
 101         VOPNAME_FID,            { .vop_fid = socket_vop_fid },
 102         VOPNAME_SEEK,           { .vop_seek = socket_vop_seek },
 103         VOPNAME_POLL,           { .vop_poll = socket_vop_poll },
 104         VOPNAME_DISPOSE,        { .error = fs_error },
 105         NULL,                   NULL
 106 };
 107 
 108 
 109 /*
 110  * generic vnode ops
 111  */
 112 
 113 /*ARGSUSED*/
 114 static int
 115 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr,
 116     caller_context_t *ct)
 117 {
 118         struct vnode *vp = *vpp;
 119         struct sonode *so = VTOSO(vp);
 120 
 121         flag &= ~FCREAT;            /* paranoia */
 122         mutex_enter(&so->so_lock);
 123         so->so_count++;
 124         mutex_exit(&so->so_lock);
 125 
 126         if (!(curproc->p_flag & SSYS))
 127                 sonode_insert_pid(so, curproc->p_pidp->pid_id);
 128 
 129         ASSERT(so->so_count != 0);   /* wraparound */
 130         ASSERT(vp->v_type == VSOCK);
 131 
 132         return (0);
 133 }
 134 
 135 /*ARGSUSED*/
 136 static int
 137 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset,
 138     struct cred *cr, caller_context_t *ct)
 139 {
 140         struct sonode *so;
 141         int error = 0;
 142 
 143         so = VTOSO(vp);
 144         ASSERT(vp->v_type == VSOCK);
 145 
 146         cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
 147         cleanshares(vp, ttoproc(curthread)->p_pid);
 148 
 149         if (vp->v_stream)
 150                 strclean(vp);
 151 
 152         if (count > 1) {
 153                 dprint(2, ("socket_vop_close: count %d\n", count));
 154                 return (0);
 155         }
 156 
 157         mutex_enter(&so->so_lock);
 158         if (--so->so_count == 0) {
 159                 /*
 160                  * Initiate connection shutdown.
 161                  */
 162                 mutex_exit(&so->so_lock);
 163                 error = socket_close_internal(so, flag, cr);
 164         } else {
 165                 mutex_exit(&so->so_lock);
 166         }
 167 
 168         return (error);
 169 }
 170 
 171 /*ARGSUSED2*/
 172 static int
 173 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
 174     caller_context_t *ct)
 175 {
 176         struct sonode *so = VTOSO(vp);
 177         struct nmsghdr lmsg;
 178 
 179         ASSERT(vp->v_type == VSOCK);
 180         bzero((void *)&lmsg, sizeof (lmsg));
 181 
 182         return (socket_recvmsg(so, &lmsg, uiop, cr));
 183 }
 184 
 185 /*ARGSUSED2*/
 186 static int
 187 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag,
 188     struct cred *cr, caller_context_t *ct)
 189 {
 190         struct sonode *so = VTOSO(vp);
 191         struct nmsghdr lmsg;
 192 
 193         ASSERT(vp->v_type == VSOCK);
 194         bzero((void *)&lmsg, sizeof (lmsg));
 195 
 196         if (!(so->so_mode & SM_BYTESTREAM)) {
 197                 /*
 198                  * If the socket is not byte stream set MSG_EOR
 199                  */
 200                 lmsg.msg_flags = MSG_EOR;
 201         }
 202 
 203         return (socket_sendmsg(so, &lmsg, uiop, cr));
 204 }
 205 
 206 /*ARGSUSED4*/
 207 static int
 208 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
 209     struct cred *cr, int32_t *rvalp, caller_context_t *ct)
 210 {
 211         struct sonode *so = VTOSO(vp);
 212 
 213         ASSERT(vp->v_type == VSOCK);
 214 
 215         switch (cmd) {
 216         case F_ASSOCI_PID:
 217                 if (cr != kcred)
 218                         return (EPERM);
 219                 if (!(curproc->p_flag & SSYS))
 220                         sonode_insert_pid(so, (pid_t)arg);
 221                 return (0);
 222 
 223         case F_DASSOC_PID:
 224                 if (cr != kcred)
 225                         return (EPERM);
 226                 if (!(curproc->p_flag & SSYS))
 227                         sonode_remove_pid(so, (pid_t)arg);
 228                 return (0);
 229         }
 230 
 231         return (socket_ioctl(so, cmd, arg, mode, cr, rvalp));
 232 }
 233 
 234 /*
 235  * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
 236  * from listener to acceptor.
 237  */
 238 /* ARGSUSED */
 239 static int
 240 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr,
 241     caller_context_t *ct)
 242 {
 243         struct sonode *so = VTOSO(vp);
 244         int error = 0;
 245 
 246         ASSERT(vp->v_type == VSOCK);
 247 
 248         mutex_enter(&so->so_lock);
 249         if (nflags & FNDELAY)
 250                 so->so_state |= SS_NDELAY;
 251         else
 252                 so->so_state &= ~SS_NDELAY;
 253         if (nflags & FNONBLOCK)
 254                 so->so_state |= SS_NONBLOCK;
 255         else
 256                 so->so_state &= ~SS_NONBLOCK;
 257         mutex_exit(&so->so_lock);
 258 
 259         if (so->so_state & SS_ASYNC)
 260                 oflags |= FASYNC;
 261         /*
 262          * Sets/clears the SS_ASYNC flag based on the presence/absence
 263          * of the FASYNC flag passed to fcntl(F_SETFL).
 264          * This exists solely for BSD fcntl() FASYNC compatibility.
 265          */
 266         if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) {
 267                 int async = nflags & FASYNC;
 268                 int32_t rv;
 269 
 270                 /*
 271                  * For non-TPI sockets all we have to do is set/remove the
 272                  * SS_ASYNC bit, but for TPI it is more involved. For that
 273                  * reason we delegate the job to the protocol's ioctl handler.
 274                  */
 275                 error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL,
 276                     cr, &rv);
 277         }
 278         return (error);
 279 }
 280 
 281 
 282 /*
 283  * Get the made up attributes for the vnode.
 284  * 4.3BSD returns the current time for all the timestamps.
 285  * 4.4BSD returns 0 for all the timestamps.
 286  * Here we use the access and modified times recorded in the sonode.
 287  *
 288  * Just like in BSD there is not effect on the underlying file system node
 289  * bound to an AF_UNIX pathname.
 290  *
 291  * When sockmod has been popped this will act just like a stream. Since
 292  * a socket is always a clone there is no need to inspect the attributes
 293  * of the "realvp".
 294  */
 295 /* ARGSUSED */
 296 int
 297 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags,
 298     struct cred *cr, caller_context_t *ct)
 299 {
 300         dev_t           fsid;
 301         struct sonode   *so;
 302         static int      sonode_shift = 0;
 303 
 304         /*
 305          * Calculate the amount of bitshift to a sonode pointer which will
 306          * still keep it unique.  See below.
 307          */
 308         if (sonode_shift == 0)
 309                 sonode_shift = highbit(sizeof (struct sonode));
 310         ASSERT(sonode_shift > 0);
 311 
 312         so = VTOSO(vp);
 313         fsid = sockdev;
 314 
 315         if (so->so_version == SOV_STREAM) {
 316                 /*
 317                  * The imaginary "sockmod" has been popped - act
 318                  * as a stream
 319                  */
 320                 vap->va_type = VCHR;
 321                 vap->va_mode = 0;
 322         } else {
 323                 vap->va_type = vp->v_type;
 324                 vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|
 325                     S_IROTH|S_IWOTH;
 326         }
 327         vap->va_uid = vap->va_gid = 0;
 328         vap->va_fsid = fsid;
 329         /*
 330          * If the va_nodeid is > MAX_USHORT, then i386 stats might fail.
 331          * So we shift down the sonode pointer to try and get the most
 332          * uniqueness into 16-bits.
 333          */
 334         vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF;
 335         vap->va_nlink = 0;
 336         vap->va_size = 0;
 337 
 338         /*
 339          * We need to zero out the va_rdev to avoid some fstats getting
 340          * EOVERFLOW.  This also mimics SunOS 4.x and BSD behavior.
 341          */
 342         vap->va_rdev = (dev_t)0;
 343         vap->va_blksize = MAXBSIZE;
 344         vap->va_nblocks = btod(vap->va_size);
 345 
 346         if (!SOCK_IS_NONSTR(so)) {
 347                 sotpi_info_t *sti = SOTOTPI(so);
 348 
 349                 mutex_enter(&so->so_lock);
 350                 vap->va_atime.tv_sec = sti->sti_atime;
 351                 vap->va_mtime.tv_sec = sti->sti_mtime;
 352                 vap->va_ctime.tv_sec = sti->sti_ctime;
 353                 mutex_exit(&so->so_lock);
 354         } else {
 355                 vap->va_atime.tv_sec = 0;
 356                 vap->va_mtime.tv_sec = 0;
 357                 vap->va_ctime.tv_sec = 0;
 358         }
 359 
 360         vap->va_atime.tv_nsec = 0;
 361         vap->va_mtime.tv_nsec = 0;
 362         vap->va_ctime.tv_nsec = 0;
 363         vap->va_seq = 0;
 364 
 365         return (0);
 366 }
 367 
 368 /*
 369  * Set attributes.
 370  * Just like in BSD there is not effect on the underlying file system node
 371  * bound to an AF_UNIX pathname.
 372  *
 373  * When sockmod has been popped this will act just like a stream. Since
 374  * a socket is always a clone there is no need to modify the attributes
 375  * of the "realvp".
 376  */
 377 /* ARGSUSED */
 378 int
 379 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags,
 380     struct cred *cr, caller_context_t *ct)
 381 {
 382         struct sonode *so = VTOSO(vp);
 383 
 384         /*
 385          * If times were changed, and we have a STREAMS socket, then update
 386          * the sonode.
 387          */
 388         if (!SOCK_IS_NONSTR(so)) {
 389                 sotpi_info_t *sti = SOTOTPI(so);
 390 
 391                 mutex_enter(&so->so_lock);
 392                 if (vap->va_mask & AT_ATIME)
 393                         sti->sti_atime = vap->va_atime.tv_sec;
 394                 if (vap->va_mask & AT_MTIME) {
 395                         sti->sti_mtime = vap->va_mtime.tv_sec;
 396                         sti->sti_ctime = gethrestime_sec();
 397                 }
 398                 mutex_exit(&so->so_lock);
 399         }
 400 
 401         return (0);
 402 }
 403 
 404 /*
 405  * Check if user is allowed to access vp. For non-STREAMS based sockets,
 406  * there might not be a device attached to the file system. So for those
 407  * types of sockets there are no permissions to check.
 408  *
 409  * XXX Should there be some other mechanism to check access rights?
 410  */
 411 /*ARGSUSED*/
 412 int
 413 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr,
 414     caller_context_t *ct)
 415 {
 416         struct sonode *so = VTOSO(vp);
 417 
 418         if (!SOCK_IS_NONSTR(so)) {
 419                 ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL);
 420                 return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode,
 421                     mode, flags, cr, NULL));
 422         }
 423         return (0);
 424 }
 425 
 426 /*
 427  * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL.
 428  * This code does the same to be compatible and also to not give an
 429  * application the impression that the data has actually been "synced"
 430  * to the other end of the connection.
 431  */
 432 /* ARGSUSED */
 433 int
 434 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr,
 435     caller_context_t *ct)
 436 {
 437         return (EINVAL);
 438 }
 439 
 440 /*ARGSUSED*/
 441 static void
 442 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
 443 {
 444         struct sonode *so = VTOSO(vp);
 445 
 446         ASSERT(vp->v_type == VSOCK);
 447 
 448         mutex_enter(&vp->v_lock);
 449         /*
 450          * If no one has reclaimed the vnode, remove from the
 451          * cache now.
 452          */
 453         if (vp->v_count < 1)
 454                 cmn_err(CE_PANIC, "socket_inactive: Bad v_count");
 455 
 456         /*
 457          * Drop the temporary hold by vn_rele now
 458          */
 459         if (--vp->v_count != 0) {
 460                 mutex_exit(&vp->v_lock);
 461                 return;
 462         }
 463         mutex_exit(&vp->v_lock);
 464 
 465 
 466         ASSERT(!vn_has_cached_data(vp));
 467 
 468         /* socket specfic clean-up */
 469         socket_destroy_internal(so, cr);
 470 }
 471 
 472 /* ARGSUSED */
 473 int
 474 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
 475 {
 476         return (EINVAL);
 477 }
 478 
 479 /*
 480  * Sockets are not seekable.
 481  * (and there is a bug to fix STREAMS to make them fail this as well).
 482  */
 483 /*ARGSUSED*/
 484 int
 485 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
 486     caller_context_t *ct)
 487 {
 488         return (ESPIPE);
 489 }
 490 
 491 /*ARGSUSED*/
 492 static int
 493 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp,
 494     struct pollhead **phpp, caller_context_t *ct)
 495 {
 496         struct sonode *so = VTOSO(vp);
 497 
 498         ASSERT(vp->v_type == VSOCK);
 499 
 500         return (socket_poll(so, events, anyyet, reventsp, phpp));
 501 }