1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/bitmap.h> 32 #include <sys/debug.h> 33 #include <sys/errno.h> 34 #include <sys/strsubr.h> 35 #include <sys/cmn_err.h> 36 #include <sys/sysmacros.h> 37 #include <sys/filio.h> 38 #include <sys/flock.h> 39 #include <sys/stat.h> 40 #include <sys/share.h> 41 42 #include <sys/vfs.h> 43 #include <sys/vfs_opreg.h> 44 45 #include <sys/sockio.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/strsun.h> 49 50 #include <fs/sockfs/sockcommon.h> 51 #include <fs/sockfs/socktpi.h> 52 53 /* 54 * Generic vnode ops 55 */ 56 static int socket_vop_open(struct vnode **, int, struct cred *, 57 caller_context_t *); 58 static int socket_vop_close(struct vnode *, int, int, offset_t, 59 struct cred *, caller_context_t *); 60 static int socket_vop_read(struct vnode *, struct uio *, int, 61 struct cred *, caller_context_t *); 62 static int socket_vop_write(struct vnode *, struct uio *, int, 63 struct cred *, caller_context_t *); 64 static int socket_vop_ioctl(struct vnode *, int, intptr_t, int, 65 struct cred *, int32_t *, caller_context_t *); 66 static int socket_vop_setfl(struct vnode *, int, int, cred_t *, 67 caller_context_t *); 68 static int socket_vop_getattr(struct vnode *, struct vattr *, int, 69 struct cred *, caller_context_t *); 70 static int socket_vop_setattr(struct vnode *, struct vattr *, int, 71 struct cred *, caller_context_t *); 72 static int socket_vop_access(struct vnode *, int, int, struct cred *, 73 caller_context_t *); 74 static int socket_vop_fsync(struct vnode *, int, struct cred *, 75 caller_context_t *); 76 static void socket_vop_inactive(struct vnode *, struct cred *, 77 caller_context_t *); 78 static int socket_vop_fid(struct vnode *, struct fid *, 79 caller_context_t *); 80 static int socket_vop_seek(struct vnode *, offset_t, offset_t *, 81 caller_context_t *); 82 static int socket_vop_poll(struct vnode *, short, int, short *, 83 struct pollhead **, caller_context_t *); 84 85 extern int socket_close_internal(struct sonode *, int, cred_t *); 86 extern void socket_destroy_internal(struct sonode *, cred_t *); 87 88 struct vnodeops *socket_vnodeops; 89 const fs_operation_def_t socket_vnodeops_template[] = { 90 { VOPNAME_OPEN, { .vop_open = socket_vop_open } }, 91 { VOPNAME_CLOSE, { .vop_close = socket_vop_close } }, 92 { VOPNAME_READ, { .vop_read = socket_vop_read } }, 93 { VOPNAME_WRITE, { .vop_write = socket_vop_write } }, 94 { VOPNAME_IOCTL, { .vop_ioctl = socket_vop_ioctl } }, 95 { VOPNAME_SETFL, { .vop_setfl = socket_vop_setfl } }, 96 { VOPNAME_GETATTR, { .vop_getattr = socket_vop_getattr } }, 97 { VOPNAME_SETATTR, { .vop_setattr = socket_vop_setattr } }, 98 { VOPNAME_ACCESS, { .vop_access = socket_vop_access } }, 99 { VOPNAME_FSYNC, { .vop_fsync = socket_vop_fsync } }, 100 { VOPNAME_INACTIVE, { .vop_inactive = socket_vop_inactive } }, 101 { VOPNAME_FID, { .vop_fid = socket_vop_fid } }, 102 { VOPNAME_SEEK, { .vop_seek = socket_vop_seek } }, 103 { VOPNAME_POLL, { .vop_poll = socket_vop_poll } }, 104 { VOPNAME_DISPOSE, { .error = fs_error } }, 105 { NULL, { NULL } } 106 }; 107 108 109 /* 110 * generic vnode ops 111 */ 112 113 /*ARGSUSED*/ 114 static int 115 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr, 116 caller_context_t *ct) 117 { 118 struct vnode *vp = *vpp; 119 struct sonode *so = VTOSO(vp); 120 121 flag &= ~FCREAT; /* paranoia */ 122 mutex_enter(&so->so_lock); 123 so->so_count++; 124 mutex_exit(&so->so_lock); 125 126 ASSERT(so->so_count != 0); /* wraparound */ 127 ASSERT(vp->v_type == VSOCK); 128 129 return (0); 130 } 131 132 /*ARGSUSED*/ 133 static int 134 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset, 135 struct cred *cr, caller_context_t *ct) 136 { 137 struct sonode *so; 138 int error = 0; 139 140 so = VTOSO(vp); 141 ASSERT(vp->v_type == VSOCK); 142 143 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 144 cleanshares(vp, ttoproc(curthread)->p_pid); 145 146 if (vp->v_stream) 147 strclean(vp); 148 149 if (count > 1) { 150 dprint(2, ("socket_vop_close: count %d\n", count)); 151 return (0); 152 } 153 154 mutex_enter(&so->so_lock); 155 if (--so->so_count == 0) { 156 /* 157 * Initiate connection shutdown. 158 */ 159 mutex_exit(&so->so_lock); 160 error = socket_close_internal(so, flag, cr); 161 } else { 162 mutex_exit(&so->so_lock); 163 } 164 165 return (error); 166 } 167 168 /*ARGSUSED2*/ 169 static int 170 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr, 171 caller_context_t *ct) 172 { 173 struct sonode *so = VTOSO(vp); 174 struct nmsghdr lmsg; 175 176 ASSERT(vp->v_type == VSOCK); 177 bzero((void *)&lmsg, sizeof (lmsg)); 178 179 return (socket_recvmsg(so, &lmsg, uiop, cr)); 180 } 181 182 /*ARGSUSED2*/ 183 static int 184 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag, 185 struct cred *cr, caller_context_t *ct) 186 { 187 struct sonode *so = VTOSO(vp); 188 struct nmsghdr lmsg; 189 190 ASSERT(vp->v_type == VSOCK); 191 bzero((void *)&lmsg, sizeof (lmsg)); 192 193 if (!(so->so_mode & SM_BYTESTREAM)) { 194 /* 195 * If the socket is not byte stream set MSG_EOR 196 */ 197 lmsg.msg_flags = MSG_EOR; 198 } 199 200 return (socket_sendmsg(so, &lmsg, uiop, cr)); 201 } 202 203 /*ARGSUSED4*/ 204 static int 205 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 206 struct cred *cr, int32_t *rvalp, caller_context_t *ct) 207 { 208 struct sonode *so = VTOSO(vp); 209 210 ASSERT(vp->v_type == VSOCK); 211 212 return (socket_ioctl(so, cmd, arg, mode, cr, rvalp)); 213 } 214 215 /* 216 * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited 217 * from listener to acceptor. 218 */ 219 /* ARGSUSED */ 220 static int 221 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, 222 caller_context_t *ct) 223 { 224 struct sonode *so = VTOSO(vp); 225 int error = 0; 226 227 ASSERT(vp->v_type == VSOCK); 228 229 mutex_enter(&so->so_lock); 230 if (nflags & FNDELAY) 231 so->so_state |= SS_NDELAY; 232 else 233 so->so_state &= ~SS_NDELAY; 234 if (nflags & FNONBLOCK) 235 so->so_state |= SS_NONBLOCK; 236 else 237 so->so_state &= ~SS_NONBLOCK; 238 mutex_exit(&so->so_lock); 239 240 if (so->so_state & SS_ASYNC) 241 oflags |= FASYNC; 242 /* 243 * Sets/clears the SS_ASYNC flag based on the presence/absence 244 * of the FASYNC flag passed to fcntl(F_SETFL). 245 * This exists solely for BSD fcntl() FASYNC compatibility. 246 */ 247 if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) { 248 int async = nflags & FASYNC; 249 int32_t rv; 250 251 /* 252 * For non-TPI sockets all we have to do is set/remove the 253 * SS_ASYNC bit, but for TPI it is more involved. For that 254 * reason we delegate the job to the protocol's ioctl handler. 255 */ 256 error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL, 257 cr, &rv); 258 } 259 return (error); 260 } 261 262 263 /* 264 * Get the made up attributes for the vnode. 265 * 4.3BSD returns the current time for all the timestamps. 266 * 4.4BSD returns 0 for all the timestamps. 267 * Here we use the access and modified times recorded in the sonode. 268 * 269 * Just like in BSD there is not effect on the underlying file system node 270 * bound to an AF_UNIX pathname. 271 * 272 * When sockmod has been popped this will act just like a stream. Since 273 * a socket is always a clone there is no need to inspect the attributes 274 * of the "realvp". 275 */ 276 /* ARGSUSED */ 277 int 278 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags, 279 struct cred *cr, caller_context_t *ct) 280 { 281 dev_t fsid; 282 struct sonode *so; 283 static int sonode_shift = 0; 284 285 /* 286 * Calculate the amount of bitshift to a sonode pointer which will 287 * still keep it unique. See below. 288 */ 289 if (sonode_shift == 0) 290 sonode_shift = highbit(sizeof (struct sonode)); 291 ASSERT(sonode_shift > 0); 292 293 so = VTOSO(vp); 294 fsid = sockdev; 295 296 if (so->so_version == SOV_STREAM) { 297 /* 298 * The imaginary "sockmod" has been popped - act 299 * as a stream 300 */ 301 vap->va_type = VCHR; 302 vap->va_mode = 0; 303 } else { 304 vap->va_type = vp->v_type; 305 vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP| 306 S_IROTH|S_IWOTH; 307 } 308 vap->va_uid = vap->va_gid = 0; 309 vap->va_fsid = fsid; 310 /* 311 * If the va_nodeid is > MAX_USHORT, then i386 stats might fail. 312 * So we shift down the sonode pointer to try and get the most 313 * uniqueness into 16-bits. 314 */ 315 vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF; 316 vap->va_nlink = 0; 317 vap->va_size = 0; 318 319 /* 320 * We need to zero out the va_rdev to avoid some fstats getting 321 * EOVERFLOW. This also mimics SunOS 4.x and BSD behavior. 322 */ 323 vap->va_rdev = (dev_t)0; 324 vap->va_blksize = MAXBSIZE; 325 vap->va_nblocks = btod(vap->va_size); 326 327 if (!SOCK_IS_NONSTR(so)) { 328 sotpi_info_t *sti = SOTOTPI(so); 329 330 mutex_enter(&so->so_lock); 331 vap->va_atime.tv_sec = sti->sti_atime; 332 vap->va_mtime.tv_sec = sti->sti_mtime; 333 vap->va_ctime.tv_sec = sti->sti_ctime; 334 mutex_exit(&so->so_lock); 335 } else { 336 vap->va_atime.tv_sec = 0; 337 vap->va_mtime.tv_sec = 0; 338 vap->va_ctime.tv_sec = 0; 339 } 340 341 vap->va_atime.tv_nsec = 0; 342 vap->va_mtime.tv_nsec = 0; 343 vap->va_ctime.tv_nsec = 0; 344 vap->va_seq = 0; 345 346 return (0); 347 } 348 349 /* 350 * Set attributes. 351 * Just like in BSD there is not effect on the underlying file system node 352 * bound to an AF_UNIX pathname. 353 * 354 * When sockmod has been popped this will act just like a stream. Since 355 * a socket is always a clone there is no need to modify the attributes 356 * of the "realvp". 357 */ 358 /* ARGSUSED */ 359 int 360 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags, 361 struct cred *cr, caller_context_t *ct) 362 { 363 struct sonode *so = VTOSO(vp); 364 365 /* 366 * If times were changed, and we have a STREAMS socket, then update 367 * the sonode. 368 */ 369 if (!SOCK_IS_NONSTR(so)) { 370 sotpi_info_t *sti = SOTOTPI(so); 371 372 mutex_enter(&so->so_lock); 373 if (vap->va_mask & AT_ATIME) 374 sti->sti_atime = vap->va_atime.tv_sec; 375 if (vap->va_mask & AT_MTIME) { 376 sti->sti_mtime = vap->va_mtime.tv_sec; 377 sti->sti_ctime = gethrestime_sec(); 378 } 379 mutex_exit(&so->so_lock); 380 } 381 382 return (0); 383 } 384 385 /* 386 * Check if user is allowed to access vp. For non-STREAMS based sockets, 387 * there might not be a device attached to the file system. So for those 388 * types of sockets there are no permissions to check. 389 * 390 * XXX Should there be some other mechanism to check access rights? 391 */ 392 /*ARGSUSED*/ 393 int 394 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr, 395 caller_context_t *ct) 396 { 397 struct sonode *so = VTOSO(vp); 398 399 if (!SOCK_IS_NONSTR(so)) { 400 ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL); 401 return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode, 402 mode, flags, cr, NULL)); 403 } 404 return (0); 405 } 406 407 /* 408 * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL. 409 * This code does the same to be compatible and also to not give an 410 * application the impression that the data has actually been "synced" 411 * to the other end of the connection. 412 */ 413 /* ARGSUSED */ 414 int 415 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr, 416 caller_context_t *ct) 417 { 418 return (EINVAL); 419 } 420 421 /*ARGSUSED*/ 422 static void 423 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct) 424 { 425 struct sonode *so = VTOSO(vp); 426 427 ASSERT(vp->v_type == VSOCK); 428 429 mutex_enter(&vp->v_lock); 430 /* 431 * If no one has reclaimed the vnode, remove from the 432 * cache now. 433 */ 434 if (vp->v_count < 1) 435 cmn_err(CE_PANIC, "socket_inactive: Bad v_count"); 436 437 /* 438 * Drop the temporary hold by vn_rele now 439 */ 440 if (--vp->v_count != 0) { 441 mutex_exit(&vp->v_lock); 442 return; 443 } 444 mutex_exit(&vp->v_lock); 445 446 447 ASSERT(!vn_has_cached_data(vp)); 448 449 /* socket specfic clean-up */ 450 socket_destroy_internal(so, cr); 451 } 452 453 /* ARGSUSED */ 454 int 455 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 456 { 457 return (EINVAL); 458 } 459 460 /* 461 * Sockets are not seekable. 462 * (and there is a bug to fix STREAMS to make them fail this as well). 463 */ 464 /*ARGSUSED*/ 465 int 466 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 467 caller_context_t *ct) 468 { 469 return (ESPIPE); 470 } 471 472 /*ARGSUSED*/ 473 static int 474 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp, 475 struct pollhead **phpp, caller_context_t *ct) 476 { 477 struct sonode *so = VTOSO(vp); 478 479 ASSERT(vp->v_type == VSOCK); 480 481 return (socket_poll(so, events, anyyet, reventsp, phpp)); 482 }