1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/bitmap.h> 32 #include <sys/debug.h> 33 #include <sys/errno.h> 34 #include <sys/strsubr.h> 35 #include <sys/cmn_err.h> 36 #include <sys/sysmacros.h> 37 #include <sys/filio.h> 38 #include <sys/flock.h> 39 #include <sys/stat.h> 40 #include <sys/share.h> 41 42 #include <sys/vfs.h> 43 #include <sys/vfs_opreg.h> 44 45 #include <sys/sockio.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/strsun.h> 49 50 #include <fs/sockfs/sockcommon.h> 51 #include <fs/sockfs/socktpi.h> 52 53 /* 54 * Generic vnode ops 55 */ 56 static int socket_vop_open(struct vnode **, int, struct cred *, 57 caller_context_t *); 58 static int socket_vop_close(struct vnode *, int, int, offset_t, 59 struct cred *, caller_context_t *); 60 static int socket_vop_read(struct vnode *, struct uio *, int, 61 struct cred *, caller_context_t *); 62 static int socket_vop_write(struct vnode *, struct uio *, int, 63 struct cred *, caller_context_t *); 64 static int socket_vop_ioctl(struct vnode *, int, intptr_t, int, 65 struct cred *, int32_t *, caller_context_t *); 66 static int socket_vop_setfl(struct vnode *, int, int, cred_t *, 67 caller_context_t *); 68 static int socket_vop_getattr(struct vnode *, struct vattr *, int, 69 struct cred *, caller_context_t *); 70 static int socket_vop_setattr(struct vnode *, struct vattr *, int, 71 struct cred *, caller_context_t *); 72 static int socket_vop_access(struct vnode *, int, int, struct cred *, 73 caller_context_t *); 74 static int socket_vop_fsync(struct vnode *, int, struct cred *, 75 caller_context_t *); 76 static void socket_vop_inactive(struct vnode *, struct cred *, 77 caller_context_t *); 78 static int socket_vop_fid(struct vnode *, struct fid *, 79 caller_context_t *); 80 static int socket_vop_seek(struct vnode *, offset_t, offset_t *, 81 caller_context_t *); 82 static int socket_vop_poll(struct vnode *, short, int, short *, 83 struct pollhead **, caller_context_t *); 84 85 extern int socket_close_internal(struct sonode *, int, cred_t *); 86 extern void socket_destroy_internal(struct sonode *, cred_t *); 87 88 struct vnodeops *socket_vnodeops; 89 const fs_operation_def_t socket_vnodeops_template[] = { 90 VOPNAME_OPEN, { .vop_open = socket_vop_open }, 91 VOPNAME_CLOSE, { .vop_close = socket_vop_close }, 92 VOPNAME_READ, { .vop_read = socket_vop_read }, 93 VOPNAME_WRITE, { .vop_write = socket_vop_write }, 94 VOPNAME_IOCTL, { .vop_ioctl = socket_vop_ioctl }, 95 VOPNAME_SETFL, { .vop_setfl = socket_vop_setfl }, 96 VOPNAME_GETATTR, { .vop_getattr = socket_vop_getattr }, 97 VOPNAME_SETATTR, { .vop_setattr = socket_vop_setattr }, 98 VOPNAME_ACCESS, { .vop_access = socket_vop_access }, 99 VOPNAME_FSYNC, { .vop_fsync = socket_vop_fsync }, 100 VOPNAME_INACTIVE, { .vop_inactive = socket_vop_inactive }, 101 VOPNAME_FID, { .vop_fid = socket_vop_fid }, 102 VOPNAME_SEEK, { .vop_seek = socket_vop_seek }, 103 VOPNAME_POLL, { .vop_poll = socket_vop_poll }, 104 VOPNAME_DISPOSE, { .error = fs_error }, 105 NULL, NULL 106 }; 107 108 109 /* 110 * generic vnode ops 111 */ 112 113 /*ARGSUSED*/ 114 static int 115 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr, 116 caller_context_t *ct) 117 { 118 struct vnode *vp = *vpp; 119 struct sonode *so = VTOSO(vp); 120 121 flag &= ~FCREAT; /* paranoia */ 122 mutex_enter(&so->so_lock); 123 so->so_count++; 124 mutex_exit(&so->so_lock); 125 126 if (!(curproc->p_flag & SSYS)) 127 sonode_insert_pid(so, curproc->p_pidp->pid_id); 128 129 ASSERT(so->so_count != 0); /* wraparound */ 130 ASSERT(vp->v_type == VSOCK); 131 132 return (0); 133 } 134 135 /*ARGSUSED*/ 136 static int 137 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset, 138 struct cred *cr, caller_context_t *ct) 139 { 140 struct sonode *so; 141 int error = 0; 142 143 so = VTOSO(vp); 144 ASSERT(vp->v_type == VSOCK); 145 146 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 147 cleanshares(vp, ttoproc(curthread)->p_pid); 148 149 if (vp->v_stream) 150 strclean(vp); 151 152 if (count > 1) { 153 dprint(2, ("socket_vop_close: count %d\n", count)); 154 return (0); 155 } 156 157 mutex_enter(&so->so_lock); 158 if (--so->so_count == 0) { 159 /* 160 * Initiate connection shutdown. 161 */ 162 mutex_exit(&so->so_lock); 163 error = socket_close_internal(so, flag, cr); 164 } else { 165 mutex_exit(&so->so_lock); 166 } 167 168 return (error); 169 } 170 171 /*ARGSUSED2*/ 172 static int 173 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr, 174 caller_context_t *ct) 175 { 176 struct sonode *so = VTOSO(vp); 177 struct nmsghdr lmsg; 178 179 ASSERT(vp->v_type == VSOCK); 180 bzero((void *)&lmsg, sizeof (lmsg)); 181 182 return (socket_recvmsg(so, &lmsg, uiop, cr)); 183 } 184 185 /*ARGSUSED2*/ 186 static int 187 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag, 188 struct cred *cr, caller_context_t *ct) 189 { 190 struct sonode *so = VTOSO(vp); 191 struct nmsghdr lmsg; 192 193 ASSERT(vp->v_type == VSOCK); 194 bzero((void *)&lmsg, sizeof (lmsg)); 195 196 if (!(so->so_mode & SM_BYTESTREAM)) { 197 /* 198 * If the socket is not byte stream set MSG_EOR 199 */ 200 lmsg.msg_flags = MSG_EOR; 201 } 202 203 return (socket_sendmsg(so, &lmsg, uiop, cr)); 204 } 205 206 /*ARGSUSED4*/ 207 static int 208 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 209 struct cred *cr, int32_t *rvalp, caller_context_t *ct) 210 { 211 struct sonode *so = VTOSO(vp); 212 213 ASSERT(vp->v_type == VSOCK); 214 215 switch (cmd) { 216 case F_ASSOCI_PID: 217 if (cr != kcred) 218 return (EPERM); 219 if (!(curproc->p_flag & SSYS)) 220 sonode_insert_pid(so, (pid_t)arg); 221 return (0); 222 223 case F_DASSOC_PID: 224 if (cr != kcred) 225 return (EPERM); 226 if (!(curproc->p_flag & SSYS)) 227 sonode_remove_pid(so, (pid_t)arg); 228 return (0); 229 } 230 231 return (socket_ioctl(so, cmd, arg, mode, cr, rvalp)); 232 } 233 234 /* 235 * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited 236 * from listener to acceptor. 237 */ 238 /* ARGSUSED */ 239 static int 240 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, 241 caller_context_t *ct) 242 { 243 struct sonode *so = VTOSO(vp); 244 int error = 0; 245 246 ASSERT(vp->v_type == VSOCK); 247 248 mutex_enter(&so->so_lock); 249 if (nflags & FNDELAY) 250 so->so_state |= SS_NDELAY; 251 else 252 so->so_state &= ~SS_NDELAY; 253 if (nflags & FNONBLOCK) 254 so->so_state |= SS_NONBLOCK; 255 else 256 so->so_state &= ~SS_NONBLOCK; 257 mutex_exit(&so->so_lock); 258 259 if (so->so_state & SS_ASYNC) 260 oflags |= FASYNC; 261 /* 262 * Sets/clears the SS_ASYNC flag based on the presence/absence 263 * of the FASYNC flag passed to fcntl(F_SETFL). 264 * This exists solely for BSD fcntl() FASYNC compatibility. 265 */ 266 if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) { 267 int async = nflags & FASYNC; 268 int32_t rv; 269 270 /* 271 * For non-TPI sockets all we have to do is set/remove the 272 * SS_ASYNC bit, but for TPI it is more involved. For that 273 * reason we delegate the job to the protocol's ioctl handler. 274 */ 275 error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL, 276 cr, &rv); 277 } 278 return (error); 279 } 280 281 282 /* 283 * Get the made up attributes for the vnode. 284 * 4.3BSD returns the current time for all the timestamps. 285 * 4.4BSD returns 0 for all the timestamps. 286 * Here we use the access and modified times recorded in the sonode. 287 * 288 * Just like in BSD there is not effect on the underlying file system node 289 * bound to an AF_UNIX pathname. 290 * 291 * When sockmod has been popped this will act just like a stream. Since 292 * a socket is always a clone there is no need to inspect the attributes 293 * of the "realvp". 294 */ 295 /* ARGSUSED */ 296 int 297 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags, 298 struct cred *cr, caller_context_t *ct) 299 { 300 dev_t fsid; 301 struct sonode *so; 302 static int sonode_shift = 0; 303 304 /* 305 * Calculate the amount of bitshift to a sonode pointer which will 306 * still keep it unique. See below. 307 */ 308 if (sonode_shift == 0) 309 sonode_shift = highbit(sizeof (struct sonode)); 310 ASSERT(sonode_shift > 0); 311 312 so = VTOSO(vp); 313 fsid = sockdev; 314 315 if (so->so_version == SOV_STREAM) { 316 /* 317 * The imaginary "sockmod" has been popped - act 318 * as a stream 319 */ 320 vap->va_type = VCHR; 321 vap->va_mode = 0; 322 } else { 323 vap->va_type = vp->v_type; 324 vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP| 325 S_IROTH|S_IWOTH; 326 } 327 vap->va_uid = vap->va_gid = 0; 328 vap->va_fsid = fsid; 329 /* 330 * If the va_nodeid is > MAX_USHORT, then i386 stats might fail. 331 * So we shift down the sonode pointer to try and get the most 332 * uniqueness into 16-bits. 333 */ 334 vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF; 335 vap->va_nlink = 0; 336 vap->va_size = 0; 337 338 /* 339 * We need to zero out the va_rdev to avoid some fstats getting 340 * EOVERFLOW. This also mimics SunOS 4.x and BSD behavior. 341 */ 342 vap->va_rdev = (dev_t)0; 343 vap->va_blksize = MAXBSIZE; 344 vap->va_nblocks = btod(vap->va_size); 345 346 if (!SOCK_IS_NONSTR(so)) { 347 sotpi_info_t *sti = SOTOTPI(so); 348 349 mutex_enter(&so->so_lock); 350 vap->va_atime.tv_sec = sti->sti_atime; 351 vap->va_mtime.tv_sec = sti->sti_mtime; 352 vap->va_ctime.tv_sec = sti->sti_ctime; 353 mutex_exit(&so->so_lock); 354 } else { 355 vap->va_atime.tv_sec = 0; 356 vap->va_mtime.tv_sec = 0; 357 vap->va_ctime.tv_sec = 0; 358 } 359 360 vap->va_atime.tv_nsec = 0; 361 vap->va_mtime.tv_nsec = 0; 362 vap->va_ctime.tv_nsec = 0; 363 vap->va_seq = 0; 364 365 return (0); 366 } 367 368 /* 369 * Set attributes. 370 * Just like in BSD there is not effect on the underlying file system node 371 * bound to an AF_UNIX pathname. 372 * 373 * When sockmod has been popped this will act just like a stream. Since 374 * a socket is always a clone there is no need to modify the attributes 375 * of the "realvp". 376 */ 377 /* ARGSUSED */ 378 int 379 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags, 380 struct cred *cr, caller_context_t *ct) 381 { 382 struct sonode *so = VTOSO(vp); 383 384 /* 385 * If times were changed, and we have a STREAMS socket, then update 386 * the sonode. 387 */ 388 if (!SOCK_IS_NONSTR(so)) { 389 sotpi_info_t *sti = SOTOTPI(so); 390 391 mutex_enter(&so->so_lock); 392 if (vap->va_mask & AT_ATIME) 393 sti->sti_atime = vap->va_atime.tv_sec; 394 if (vap->va_mask & AT_MTIME) { 395 sti->sti_mtime = vap->va_mtime.tv_sec; 396 sti->sti_ctime = gethrestime_sec(); 397 } 398 mutex_exit(&so->so_lock); 399 } 400 401 return (0); 402 } 403 404 /* 405 * Check if user is allowed to access vp. For non-STREAMS based sockets, 406 * there might not be a device attached to the file system. So for those 407 * types of sockets there are no permissions to check. 408 * 409 * XXX Should there be some other mechanism to check access rights? 410 */ 411 /*ARGSUSED*/ 412 int 413 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr, 414 caller_context_t *ct) 415 { 416 struct sonode *so = VTOSO(vp); 417 418 if (!SOCK_IS_NONSTR(so)) { 419 ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL); 420 return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode, 421 mode, flags, cr, NULL)); 422 } 423 return (0); 424 } 425 426 /* 427 * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL. 428 * This code does the same to be compatible and also to not give an 429 * application the impression that the data has actually been "synced" 430 * to the other end of the connection. 431 */ 432 /* ARGSUSED */ 433 int 434 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr, 435 caller_context_t *ct) 436 { 437 return (EINVAL); 438 } 439 440 /*ARGSUSED*/ 441 static void 442 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct) 443 { 444 struct sonode *so = VTOSO(vp); 445 446 ASSERT(vp->v_type == VSOCK); 447 448 mutex_enter(&vp->v_lock); 449 /* 450 * If no one has reclaimed the vnode, remove from the 451 * cache now. 452 */ 453 if (vp->v_count < 1) 454 cmn_err(CE_PANIC, "socket_inactive: Bad v_count"); 455 456 /* 457 * Drop the temporary hold by vn_rele now 458 */ 459 if (--vp->v_count != 0) { 460 mutex_exit(&vp->v_lock); 461 return; 462 } 463 mutex_exit(&vp->v_lock); 464 465 466 ASSERT(!vn_has_cached_data(vp)); 467 468 /* socket specfic clean-up */ 469 socket_destroy_internal(so, cr); 470 } 471 472 /* ARGSUSED */ 473 int 474 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 475 { 476 return (EINVAL); 477 } 478 479 /* 480 * Sockets are not seekable. 481 * (and there is a bug to fix STREAMS to make them fail this as well). 482 */ 483 /*ARGSUSED*/ 484 int 485 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 486 caller_context_t *ct) 487 { 488 return (ESPIPE); 489 } 490 491 /*ARGSUSED*/ 492 static int 493 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp, 494 struct pollhead **phpp, caller_context_t *ct) 495 { 496 struct sonode *so = VTOSO(vp); 497 498 ASSERT(vp->v_type == VSOCK); 499 500 return (socket_poll(so, events, anyyet, reventsp, phpp)); 501 }