1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/bitmap.h> 32 #include <sys/debug.h> 33 #include <sys/errno.h> 34 #include <sys/strsubr.h> 35 #include <sys/cmn_err.h> 36 #include <sys/sysmacros.h> 37 #include <sys/filio.h> 38 #include <sys/flock.h> 39 #include <sys/stat.h> 40 #include <sys/share.h> 41 42 #include <sys/vfs.h> 43 #include <sys/vfs_opreg.h> 44 45 #include <sys/sockio.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/strsun.h> 49 50 #include <fs/sockfs/sockcommon.h> 51 #include <fs/sockfs/socktpi.h> 52 53 /* 54 * Generic vnode ops 55 */ 56 static int socket_vop_open(struct vnode **, int, struct cred *, 57 caller_context_t *); 58 static int socket_vop_close(struct vnode *, int, int, offset_t, 59 struct cred *, caller_context_t *); 60 static int socket_vop_read(struct vnode *, struct uio *, int, 61 struct cred *, caller_context_t *); 62 static int socket_vop_write(struct vnode *, struct uio *, int, 63 struct cred *, caller_context_t *); 64 static int socket_vop_ioctl(struct vnode *, int, intptr_t, int, 65 struct cred *, int32_t *, caller_context_t *); 66 static int socket_vop_setfl(struct vnode *, int, int, cred_t *, 67 caller_context_t *); 68 static int socket_vop_getattr(struct vnode *, struct vattr *, int, 69 struct cred *, caller_context_t *); 70 static int socket_vop_setattr(struct vnode *, struct vattr *, int, 71 struct cred *, caller_context_t *); 72 static int socket_vop_access(struct vnode *, int, int, struct cred *, 73 caller_context_t *); 74 static int socket_vop_fsync(struct vnode *, int, struct cred *, 75 caller_context_t *); 76 static void socket_vop_inactive(struct vnode *, struct cred *, 77 caller_context_t *); 78 static int socket_vop_fid(struct vnode *, struct fid *, 79 caller_context_t *); 80 static int socket_vop_seek(struct vnode *, offset_t, offset_t *, 81 caller_context_t *); 82 static int socket_vop_poll(struct vnode *, short, int, short *, 83 struct pollhead **, caller_context_t *); 84 85 extern int socket_close_internal(struct sonode *, int, cred_t *); 86 extern void socket_destroy_internal(struct sonode *, cred_t *); 87 88 struct vnodeops *socket_vnodeops; 89 const fs_operation_def_t socket_vnodeops_template[] = { 90 VOPNAME_OPEN, { .vop_open = socket_vop_open }, 91 VOPNAME_CLOSE, { .vop_close = socket_vop_close }, 92 VOPNAME_READ, { .vop_read = socket_vop_read }, 93 VOPNAME_WRITE, { .vop_write = socket_vop_write }, 94 VOPNAME_IOCTL, { .vop_ioctl = socket_vop_ioctl }, 95 VOPNAME_SETFL, { .vop_setfl = socket_vop_setfl }, 96 VOPNAME_GETATTR, { .vop_getattr = socket_vop_getattr }, 97 VOPNAME_SETATTR, { .vop_setattr = socket_vop_setattr }, 98 VOPNAME_ACCESS, { .vop_access = socket_vop_access }, 99 VOPNAME_FSYNC, { .vop_fsync = socket_vop_fsync }, 100 VOPNAME_INACTIVE, { .vop_inactive = socket_vop_inactive }, 101 VOPNAME_FID, { .vop_fid = socket_vop_fid }, 102 VOPNAME_SEEK, { .vop_seek = socket_vop_seek }, 103 VOPNAME_POLL, { .vop_poll = socket_vop_poll }, 104 VOPNAME_DISPOSE, { .error = fs_error }, 105 NULL, NULL 106 }; 107 108 109 /* 110 * generic vnode ops 111 */ 112 113 /*ARGSUSED*/ 114 static int 115 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr, 116 caller_context_t *ct) 117 { 118 struct vnode *vp = *vpp; 119 struct sonode *so = VTOSO(vp); 120 121 flag &= ~FCREAT; /* paranoia */ 122 mutex_enter(&so->so_lock); 123 so->so_count++; 124 mutex_exit(&so->so_lock); 125 126 sonode_insert_pid(so, curproc); 127 128 ASSERT(so->so_count != 0); /* wraparound */ 129 ASSERT(vp->v_type == VSOCK); 130 131 return (0); 132 } 133 134 /*ARGSUSED*/ 135 static int 136 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset, 137 struct cred *cr, caller_context_t *ct) 138 { 139 struct sonode *so; 140 int error = 0; 141 142 so = VTOSO(vp); 143 ASSERT(vp->v_type == VSOCK); 144 145 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 146 cleanshares(vp, ttoproc(curthread)->p_pid); 147 148 if (vp->v_stream) 149 strclean(vp); 150 151 if (count > 1) { 152 dprint(2, ("socket_vop_close: count %d\n", count)); 153 return (0); 154 } 155 156 mutex_enter(&so->so_lock); 157 if (--so->so_count == 0) { 158 /* 159 * Initiate connection shutdown. 160 */ 161 mutex_exit(&so->so_lock); 162 error = socket_close_internal(so, flag, cr); 163 } else { 164 mutex_exit(&so->so_lock); 165 } 166 167 return (error); 168 } 169 170 /*ARGSUSED2*/ 171 static int 172 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr, 173 caller_context_t *ct) 174 { 175 struct sonode *so = VTOSO(vp); 176 struct nmsghdr lmsg; 177 178 ASSERT(vp->v_type == VSOCK); 179 bzero((void *)&lmsg, sizeof (lmsg)); 180 181 return (socket_recvmsg(so, &lmsg, uiop, cr)); 182 } 183 184 /*ARGSUSED2*/ 185 static int 186 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag, 187 struct cred *cr, caller_context_t *ct) 188 { 189 struct sonode *so = VTOSO(vp); 190 struct nmsghdr lmsg; 191 192 ASSERT(vp->v_type == VSOCK); 193 bzero((void *)&lmsg, sizeof (lmsg)); 194 195 if (!(so->so_mode & SM_BYTESTREAM)) { 196 /* 197 * If the socket is not byte stream set MSG_EOR 198 */ 199 lmsg.msg_flags = MSG_EOR; 200 } 201 202 return (socket_sendmsg(so, &lmsg, uiop, cr)); 203 } 204 205 /*ARGSUSED4*/ 206 static int 207 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 208 struct cred *cr, int32_t *rvalp, caller_context_t *ct) 209 { 210 struct sonode *so = VTOSO(vp); 211 212 ASSERT(vp->v_type == VSOCK); 213 214 switch (cmd) { 215 case F_FORKED: { 216 if (cr != kcred) 217 return (-1); 218 sonode_insert_pid(so, (proc_t *)arg); 219 return (0); 220 } 221 222 case F_CLOSED: { 223 if (cr != kcred) 224 return (-1); 225 sonode_remove_pid(so, (proc_t *)arg); 226 return (0); 227 } 228 } 229 230 return (socket_ioctl(so, cmd, arg, mode, cr, rvalp)); 231 } 232 233 /* 234 * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited 235 * from listener to acceptor. 236 */ 237 /* ARGSUSED */ 238 static int 239 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, 240 caller_context_t *ct) 241 { 242 struct sonode *so = VTOSO(vp); 243 int error = 0; 244 245 ASSERT(vp->v_type == VSOCK); 246 247 mutex_enter(&so->so_lock); 248 if (nflags & FNDELAY) 249 so->so_state |= SS_NDELAY; 250 else 251 so->so_state &= ~SS_NDELAY; 252 if (nflags & FNONBLOCK) 253 so->so_state |= SS_NONBLOCK; 254 else 255 so->so_state &= ~SS_NONBLOCK; 256 mutex_exit(&so->so_lock); 257 258 if (so->so_state & SS_ASYNC) 259 oflags |= FASYNC; 260 /* 261 * Sets/clears the SS_ASYNC flag based on the presence/absence 262 * of the FASYNC flag passed to fcntl(F_SETFL). 263 * This exists solely for BSD fcntl() FASYNC compatibility. 264 */ 265 if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) { 266 int async = nflags & FASYNC; 267 int32_t rv; 268 269 /* 270 * For non-TPI sockets all we have to do is set/remove the 271 * SS_ASYNC bit, but for TPI it is more involved. For that 272 * reason we delegate the job to the protocol's ioctl handler. 273 */ 274 error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL, 275 cr, &rv); 276 } 277 return (error); 278 } 279 280 281 /* 282 * Get the made up attributes for the vnode. 283 * 4.3BSD returns the current time for all the timestamps. 284 * 4.4BSD returns 0 for all the timestamps. 285 * Here we use the access and modified times recorded in the sonode. 286 * 287 * Just like in BSD there is not effect on the underlying file system node 288 * bound to an AF_UNIX pathname. 289 * 290 * When sockmod has been popped this will act just like a stream. Since 291 * a socket is always a clone there is no need to inspect the attributes 292 * of the "realvp". 293 */ 294 /* ARGSUSED */ 295 int 296 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags, 297 struct cred *cr, caller_context_t *ct) 298 { 299 dev_t fsid; 300 struct sonode *so; 301 static int sonode_shift = 0; 302 303 /* 304 * Calculate the amount of bitshift to a sonode pointer which will 305 * still keep it unique. See below. 306 */ 307 if (sonode_shift == 0) 308 sonode_shift = highbit(sizeof (struct sonode)); 309 ASSERT(sonode_shift > 0); 310 311 so = VTOSO(vp); 312 fsid = sockdev; 313 314 if (so->so_version == SOV_STREAM) { 315 /* 316 * The imaginary "sockmod" has been popped - act 317 * as a stream 318 */ 319 vap->va_type = VCHR; 320 vap->va_mode = 0; 321 } else { 322 vap->va_type = vp->v_type; 323 vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP| 324 S_IROTH|S_IWOTH; 325 } 326 vap->va_uid = vap->va_gid = 0; 327 vap->va_fsid = fsid; 328 /* 329 * If the va_nodeid is > MAX_USHORT, then i386 stats might fail. 330 * So we shift down the sonode pointer to try and get the most 331 * uniqueness into 16-bits. 332 */ 333 vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF; 334 vap->va_nlink = 0; 335 vap->va_size = 0; 336 337 /* 338 * We need to zero out the va_rdev to avoid some fstats getting 339 * EOVERFLOW. This also mimics SunOS 4.x and BSD behavior. 340 */ 341 vap->va_rdev = (dev_t)0; 342 vap->va_blksize = MAXBSIZE; 343 vap->va_nblocks = btod(vap->va_size); 344 345 if (!SOCK_IS_NONSTR(so)) { 346 sotpi_info_t *sti = SOTOTPI(so); 347 348 mutex_enter(&so->so_lock); 349 vap->va_atime.tv_sec = sti->sti_atime; 350 vap->va_mtime.tv_sec = sti->sti_mtime; 351 vap->va_ctime.tv_sec = sti->sti_ctime; 352 mutex_exit(&so->so_lock); 353 } else { 354 vap->va_atime.tv_sec = 0; 355 vap->va_mtime.tv_sec = 0; 356 vap->va_ctime.tv_sec = 0; 357 } 358 359 vap->va_atime.tv_nsec = 0; 360 vap->va_mtime.tv_nsec = 0; 361 vap->va_ctime.tv_nsec = 0; 362 vap->va_seq = 0; 363 364 return (0); 365 } 366 367 /* 368 * Set attributes. 369 * Just like in BSD there is not effect on the underlying file system node 370 * bound to an AF_UNIX pathname. 371 * 372 * When sockmod has been popped this will act just like a stream. Since 373 * a socket is always a clone there is no need to modify the attributes 374 * of the "realvp". 375 */ 376 /* ARGSUSED */ 377 int 378 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags, 379 struct cred *cr, caller_context_t *ct) 380 { 381 struct sonode *so = VTOSO(vp); 382 383 /* 384 * If times were changed, and we have a STREAMS socket, then update 385 * the sonode. 386 */ 387 if (!SOCK_IS_NONSTR(so)) { 388 sotpi_info_t *sti = SOTOTPI(so); 389 390 mutex_enter(&so->so_lock); 391 if (vap->va_mask & AT_ATIME) 392 sti->sti_atime = vap->va_atime.tv_sec; 393 if (vap->va_mask & AT_MTIME) { 394 sti->sti_mtime = vap->va_mtime.tv_sec; 395 sti->sti_ctime = gethrestime_sec(); 396 } 397 mutex_exit(&so->so_lock); 398 } 399 400 return (0); 401 } 402 403 /* 404 * Check if user is allowed to access vp. For non-STREAMS based sockets, 405 * there might not be a device attached to the file system. So for those 406 * types of sockets there are no permissions to check. 407 * 408 * XXX Should there be some other mechanism to check access rights? 409 */ 410 /*ARGSUSED*/ 411 int 412 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr, 413 caller_context_t *ct) 414 { 415 struct sonode *so = VTOSO(vp); 416 417 if (!SOCK_IS_NONSTR(so)) { 418 ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL); 419 return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode, 420 mode, flags, cr, NULL)); 421 } 422 return (0); 423 } 424 425 /* 426 * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL. 427 * This code does the same to be compatible and also to not give an 428 * application the impression that the data has actually been "synced" 429 * to the other end of the connection. 430 */ 431 /* ARGSUSED */ 432 int 433 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr, 434 caller_context_t *ct) 435 { 436 return (EINVAL); 437 } 438 439 /*ARGSUSED*/ 440 static void 441 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct) 442 { 443 struct sonode *so = VTOSO(vp); 444 445 ASSERT(vp->v_type == VSOCK); 446 447 mutex_enter(&vp->v_lock); 448 /* 449 * If no one has reclaimed the vnode, remove from the 450 * cache now. 451 */ 452 if (vp->v_count < 1) 453 cmn_err(CE_PANIC, "socket_inactive: Bad v_count"); 454 455 /* 456 * Drop the temporary hold by vn_rele now 457 */ 458 if (--vp->v_count != 0) { 459 mutex_exit(&vp->v_lock); 460 return; 461 } 462 mutex_exit(&vp->v_lock); 463 464 465 ASSERT(!vn_has_cached_data(vp)); 466 467 /* socket specfic clean-up */ 468 socket_destroy_internal(so, cr); 469 } 470 471 /* ARGSUSED */ 472 int 473 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 474 { 475 return (EINVAL); 476 } 477 478 /* 479 * Sockets are not seekable. 480 * (and there is a bug to fix STREAMS to make them fail this as well). 481 */ 482 /*ARGSUSED*/ 483 int 484 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 485 caller_context_t *ct) 486 { 487 return (ESPIPE); 488 } 489 490 /*ARGSUSED*/ 491 static int 492 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp, 493 struct pollhead **phpp, caller_context_t *ct) 494 { 495 struct sonode *so = VTOSO(vp); 496 497 ASSERT(vp->v_type == VSOCK); 498 499 return (socket_poll(so, events, anyyet, reventsp, phpp)); 500 }