1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/t_lock.h>
  29 #include <sys/param.h>
  30 #include <sys/systm.h>
  31 #include <sys/bitmap.h>
  32 #include <sys/debug.h>
  33 #include <sys/errno.h>
  34 #include <sys/strsubr.h>
  35 #include <sys/cmn_err.h>
  36 #include <sys/sysmacros.h>
  37 #include <sys/filio.h>
  38 #include <sys/flock.h>
  39 #include <sys/stat.h>
  40 #include <sys/share.h>
  41 
  42 #include <sys/vfs.h>
  43 #include <sys/vfs_opreg.h>
  44 
  45 #include <sys/sockio.h>
  46 #include <sys/socket.h>
  47 #include <sys/socketvar.h>
  48 #include <sys/strsun.h>
  49 
  50 #include <fs/sockfs/sockcommon.h>
  51 #include <fs/sockfs/socktpi.h>
  52 
  53 /*
  54  * Generic vnode ops
  55  */
  56 static int      socket_vop_open(struct vnode **, int, struct cred *,
  57                     caller_context_t *);
  58 static int      socket_vop_close(struct vnode *, int, int, offset_t,
  59                     struct cred *, caller_context_t *);
  60 static int      socket_vop_read(struct vnode *, struct uio *, int,
  61                     struct cred *, caller_context_t *);
  62 static int      socket_vop_write(struct vnode *, struct uio *, int,
  63                     struct cred *, caller_context_t *);
  64 static int      socket_vop_ioctl(struct vnode *, int, intptr_t, int,
  65                     struct cred *, int32_t *, caller_context_t *);
  66 static int      socket_vop_setfl(struct vnode *, int, int, cred_t *,
  67                     caller_context_t *);
  68 static int      socket_vop_getattr(struct vnode *, struct vattr *, int,
  69                     struct cred *, caller_context_t *);
  70 static int      socket_vop_setattr(struct vnode *, struct vattr *, int,
  71                     struct cred *, caller_context_t *);
  72 static int      socket_vop_access(struct vnode *, int, int, struct cred *,
  73                     caller_context_t *);
  74 static int      socket_vop_fsync(struct vnode *, int, struct cred *,
  75                     caller_context_t *);
  76 static void     socket_vop_inactive(struct vnode *, struct cred *,
  77                     caller_context_t *);
  78 static int      socket_vop_fid(struct vnode *, struct fid *,
  79                     caller_context_t *);
  80 static int      socket_vop_seek(struct vnode *, offset_t, offset_t *,
  81                     caller_context_t *);
  82 static int      socket_vop_poll(struct vnode *, short, int, short *,
  83                     struct pollhead **, caller_context_t *);
  84 
  85 extern int      socket_close_internal(struct sonode *, int, cred_t *);
  86 extern void     socket_destroy_internal(struct sonode *, cred_t *);
  87 
  88 struct vnodeops *socket_vnodeops;
  89 const fs_operation_def_t socket_vnodeops_template[] = {
  90         VOPNAME_OPEN,           { .vop_open = socket_vop_open },
  91         VOPNAME_CLOSE,          { .vop_close = socket_vop_close },
  92         VOPNAME_READ,           { .vop_read = socket_vop_read },
  93         VOPNAME_WRITE,          { .vop_write = socket_vop_write },
  94         VOPNAME_IOCTL,          { .vop_ioctl = socket_vop_ioctl },
  95         VOPNAME_SETFL,          { .vop_setfl = socket_vop_setfl },
  96         VOPNAME_GETATTR,        { .vop_getattr = socket_vop_getattr },
  97         VOPNAME_SETATTR,        { .vop_setattr = socket_vop_setattr },
  98         VOPNAME_ACCESS,         { .vop_access = socket_vop_access },
  99         VOPNAME_FSYNC,          { .vop_fsync = socket_vop_fsync },
 100         VOPNAME_INACTIVE,       { .vop_inactive = socket_vop_inactive },
 101         VOPNAME_FID,            { .vop_fid = socket_vop_fid },
 102         VOPNAME_SEEK,           { .vop_seek = socket_vop_seek },
 103         VOPNAME_POLL,           { .vop_poll = socket_vop_poll },
 104         VOPNAME_DISPOSE,        { .error = fs_error },
 105         NULL,                   NULL
 106 };
 107 
 108 
 109 /*
 110  * generic vnode ops
 111  */
 112 
 113 /*ARGSUSED*/
 114 static int
 115 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr,
 116     caller_context_t *ct)
 117 {
 118         struct vnode *vp = *vpp;
 119         struct sonode *so = VTOSO(vp);
 120 
 121         flag &= ~FCREAT;            /* paranoia */
 122         mutex_enter(&so->so_lock);
 123         so->so_count++;
 124         mutex_exit(&so->so_lock);
 125 
 126         sonode_insert_pid(so, curproc);
 127 
 128         ASSERT(so->so_count != 0);   /* wraparound */
 129         ASSERT(vp->v_type == VSOCK);
 130 
 131         return (0);
 132 }
 133 
 134 /*ARGSUSED*/
 135 static int
 136 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset,
 137     struct cred *cr, caller_context_t *ct)
 138 {
 139         struct sonode *so;
 140         int error = 0;
 141 
 142         so = VTOSO(vp);
 143         ASSERT(vp->v_type == VSOCK);
 144 
 145         cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
 146         cleanshares(vp, ttoproc(curthread)->p_pid);
 147 
 148         if (vp->v_stream)
 149                 strclean(vp);
 150 
 151         if (count > 1) {
 152                 dprint(2, ("socket_vop_close: count %d\n", count));
 153                 return (0);
 154         }
 155 
 156         mutex_enter(&so->so_lock);
 157         if (--so->so_count == 0) {
 158                 /*
 159                  * Initiate connection shutdown.
 160                  */
 161                 mutex_exit(&so->so_lock);
 162                 error = socket_close_internal(so, flag, cr);
 163         } else {
 164                 mutex_exit(&so->so_lock);
 165         }
 166 
 167         return (error);
 168 }
 169 
 170 /*ARGSUSED2*/
 171 static int
 172 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
 173     caller_context_t *ct)
 174 {
 175         struct sonode *so = VTOSO(vp);
 176         struct nmsghdr lmsg;
 177 
 178         ASSERT(vp->v_type == VSOCK);
 179         bzero((void *)&lmsg, sizeof (lmsg));
 180 
 181         return (socket_recvmsg(so, &lmsg, uiop, cr));
 182 }
 183 
 184 /*ARGSUSED2*/
 185 static int
 186 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag,
 187     struct cred *cr, caller_context_t *ct)
 188 {
 189         struct sonode *so = VTOSO(vp);
 190         struct nmsghdr lmsg;
 191 
 192         ASSERT(vp->v_type == VSOCK);
 193         bzero((void *)&lmsg, sizeof (lmsg));
 194 
 195         if (!(so->so_mode & SM_BYTESTREAM)) {
 196                 /*
 197                  * If the socket is not byte stream set MSG_EOR
 198                  */
 199                 lmsg.msg_flags = MSG_EOR;
 200         }
 201 
 202         return (socket_sendmsg(so, &lmsg, uiop, cr));
 203 }
 204 
 205 /*ARGSUSED4*/
 206 static int
 207 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
 208     struct cred *cr, int32_t *rvalp, caller_context_t *ct)
 209 {
 210         struct sonode *so = VTOSO(vp);
 211 
 212         ASSERT(vp->v_type == VSOCK);
 213 
 214         switch (cmd) {
 215                 case F_FORKED: {
 216                         if (cr != kcred)
 217                                 return (-1);
 218                         sonode_insert_pid(so, (proc_t *)arg);
 219                         return (0);
 220                 }
 221 
 222                 case F_CLOSED: {
 223                         if (cr != kcred)
 224                                 return (-1);
 225                         sonode_remove_pid(so, (proc_t *)arg);
 226                         return (0);
 227                 }
 228         }
 229 
 230         return (socket_ioctl(so, cmd, arg, mode, cr, rvalp));
 231 }
 232 
 233 /*
 234  * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
 235  * from listener to acceptor.
 236  */
 237 /* ARGSUSED */
 238 static int
 239 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr,
 240     caller_context_t *ct)
 241 {
 242         struct sonode *so = VTOSO(vp);
 243         int error = 0;
 244 
 245         ASSERT(vp->v_type == VSOCK);
 246 
 247         mutex_enter(&so->so_lock);
 248         if (nflags & FNDELAY)
 249                 so->so_state |= SS_NDELAY;
 250         else
 251                 so->so_state &= ~SS_NDELAY;
 252         if (nflags & FNONBLOCK)
 253                 so->so_state |= SS_NONBLOCK;
 254         else
 255                 so->so_state &= ~SS_NONBLOCK;
 256         mutex_exit(&so->so_lock);
 257 
 258         if (so->so_state & SS_ASYNC)
 259                 oflags |= FASYNC;
 260         /*
 261          * Sets/clears the SS_ASYNC flag based on the presence/absence
 262          * of the FASYNC flag passed to fcntl(F_SETFL).
 263          * This exists solely for BSD fcntl() FASYNC compatibility.
 264          */
 265         if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) {
 266                 int async = nflags & FASYNC;
 267                 int32_t rv;
 268 
 269                 /*
 270                  * For non-TPI sockets all we have to do is set/remove the
 271                  * SS_ASYNC bit, but for TPI it is more involved. For that
 272                  * reason we delegate the job to the protocol's ioctl handler.
 273                  */
 274                 error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL,
 275                     cr, &rv);
 276         }
 277         return (error);
 278 }
 279 
 280 
 281 /*
 282  * Get the made up attributes for the vnode.
 283  * 4.3BSD returns the current time for all the timestamps.
 284  * 4.4BSD returns 0 for all the timestamps.
 285  * Here we use the access and modified times recorded in the sonode.
 286  *
 287  * Just like in BSD there is not effect on the underlying file system node
 288  * bound to an AF_UNIX pathname.
 289  *
 290  * When sockmod has been popped this will act just like a stream. Since
 291  * a socket is always a clone there is no need to inspect the attributes
 292  * of the "realvp".
 293  */
 294 /* ARGSUSED */
 295 int
 296 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags,
 297     struct cred *cr, caller_context_t *ct)
 298 {
 299         dev_t           fsid;
 300         struct sonode   *so;
 301         static int      sonode_shift = 0;
 302 
 303         /*
 304          * Calculate the amount of bitshift to a sonode pointer which will
 305          * still keep it unique.  See below.
 306          */
 307         if (sonode_shift == 0)
 308                 sonode_shift = highbit(sizeof (struct sonode));
 309         ASSERT(sonode_shift > 0);
 310 
 311         so = VTOSO(vp);
 312         fsid = sockdev;
 313 
 314         if (so->so_version == SOV_STREAM) {
 315                 /*
 316                  * The imaginary "sockmod" has been popped - act
 317                  * as a stream
 318                  */
 319                 vap->va_type = VCHR;
 320                 vap->va_mode = 0;
 321         } else {
 322                 vap->va_type = vp->v_type;
 323                 vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|
 324                     S_IROTH|S_IWOTH;
 325         }
 326         vap->va_uid = vap->va_gid = 0;
 327         vap->va_fsid = fsid;
 328         /*
 329          * If the va_nodeid is > MAX_USHORT, then i386 stats might fail.
 330          * So we shift down the sonode pointer to try and get the most
 331          * uniqueness into 16-bits.
 332          */
 333         vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF;
 334         vap->va_nlink = 0;
 335         vap->va_size = 0;
 336 
 337         /*
 338          * We need to zero out the va_rdev to avoid some fstats getting
 339          * EOVERFLOW.  This also mimics SunOS 4.x and BSD behavior.
 340          */
 341         vap->va_rdev = (dev_t)0;
 342         vap->va_blksize = MAXBSIZE;
 343         vap->va_nblocks = btod(vap->va_size);
 344 
 345         if (!SOCK_IS_NONSTR(so)) {
 346                 sotpi_info_t *sti = SOTOTPI(so);
 347 
 348                 mutex_enter(&so->so_lock);
 349                 vap->va_atime.tv_sec = sti->sti_atime;
 350                 vap->va_mtime.tv_sec = sti->sti_mtime;
 351                 vap->va_ctime.tv_sec = sti->sti_ctime;
 352                 mutex_exit(&so->so_lock);
 353         } else {
 354                 vap->va_atime.tv_sec = 0;
 355                 vap->va_mtime.tv_sec = 0;
 356                 vap->va_ctime.tv_sec = 0;
 357         }
 358 
 359         vap->va_atime.tv_nsec = 0;
 360         vap->va_mtime.tv_nsec = 0;
 361         vap->va_ctime.tv_nsec = 0;
 362         vap->va_seq = 0;
 363 
 364         return (0);
 365 }
 366 
 367 /*
 368  * Set attributes.
 369  * Just like in BSD there is not effect on the underlying file system node
 370  * bound to an AF_UNIX pathname.
 371  *
 372  * When sockmod has been popped this will act just like a stream. Since
 373  * a socket is always a clone there is no need to modify the attributes
 374  * of the "realvp".
 375  */
 376 /* ARGSUSED */
 377 int
 378 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags,
 379     struct cred *cr, caller_context_t *ct)
 380 {
 381         struct sonode *so = VTOSO(vp);
 382 
 383         /*
 384          * If times were changed, and we have a STREAMS socket, then update
 385          * the sonode.
 386          */
 387         if (!SOCK_IS_NONSTR(so)) {
 388                 sotpi_info_t *sti = SOTOTPI(so);
 389 
 390                 mutex_enter(&so->so_lock);
 391                 if (vap->va_mask & AT_ATIME)
 392                         sti->sti_atime = vap->va_atime.tv_sec;
 393                 if (vap->va_mask & AT_MTIME) {
 394                         sti->sti_mtime = vap->va_mtime.tv_sec;
 395                         sti->sti_ctime = gethrestime_sec();
 396                 }
 397                 mutex_exit(&so->so_lock);
 398         }
 399 
 400         return (0);
 401 }
 402 
 403 /*
 404  * Check if user is allowed to access vp. For non-STREAMS based sockets,
 405  * there might not be a device attached to the file system. So for those
 406  * types of sockets there are no permissions to check.
 407  *
 408  * XXX Should there be some other mechanism to check access rights?
 409  */
 410 /*ARGSUSED*/
 411 int
 412 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr,
 413     caller_context_t *ct)
 414 {
 415         struct sonode *so = VTOSO(vp);
 416 
 417         if (!SOCK_IS_NONSTR(so)) {
 418                 ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL);
 419                 return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode,
 420                     mode, flags, cr, NULL));
 421         }
 422         return (0);
 423 }
 424 
 425 /*
 426  * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL.
 427  * This code does the same to be compatible and also to not give an
 428  * application the impression that the data has actually been "synced"
 429  * to the other end of the connection.
 430  */
 431 /* ARGSUSED */
 432 int
 433 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr,
 434     caller_context_t *ct)
 435 {
 436         return (EINVAL);
 437 }
 438 
 439 /*ARGSUSED*/
 440 static void
 441 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
 442 {
 443         struct sonode *so = VTOSO(vp);
 444 
 445         ASSERT(vp->v_type == VSOCK);
 446 
 447         mutex_enter(&vp->v_lock);
 448         /*
 449          * If no one has reclaimed the vnode, remove from the
 450          * cache now.
 451          */
 452         if (vp->v_count < 1)
 453                 cmn_err(CE_PANIC, "socket_inactive: Bad v_count");
 454 
 455         /*
 456          * Drop the temporary hold by vn_rele now
 457          */
 458         if (--vp->v_count != 0) {
 459                 mutex_exit(&vp->v_lock);
 460                 return;
 461         }
 462         mutex_exit(&vp->v_lock);
 463 
 464 
 465         ASSERT(!vn_has_cached_data(vp));
 466 
 467         /* socket specfic clean-up */
 468         socket_destroy_internal(so, cr);
 469 }
 470 
 471 /* ARGSUSED */
 472 int
 473 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
 474 {
 475         return (EINVAL);
 476 }
 477 
 478 /*
 479  * Sockets are not seekable.
 480  * (and there is a bug to fix STREAMS to make them fail this as well).
 481  */
 482 /*ARGSUSED*/
 483 int
 484 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
 485     caller_context_t *ct)
 486 {
 487         return (ESPIPE);
 488 }
 489 
 490 /*ARGSUSED*/
 491 static int
 492 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp,
 493     struct pollhead **phpp, caller_context_t *ct)
 494 {
 495         struct sonode *so = VTOSO(vp);
 496 
 497         ASSERT(vp->v_type == VSOCK);
 498 
 499         return (socket_poll(so, events, anyyet, reventsp, phpp));
 500 }