1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/types.h>
28 #include <sys/t_lock.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/bitmap.h>
32 #include <sys/debug.h>
33 #include <sys/errno.h>
34 #include <sys/strsubr.h>
35 #include <sys/cmn_err.h>
36 #include <sys/sysmacros.h>
37 #include <sys/filio.h>
38 #include <sys/flock.h>
39 #include <sys/stat.h>
40 #include <sys/share.h>
41
42 #include <sys/vfs.h>
43 #include <sys/vfs_opreg.h>
44
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/strsun.h>
49
50 #include <fs/sockfs/sockcommon.h>
51 #include <fs/sockfs/socktpi.h>
52
53 /*
54 * Generic vnode ops
55 */
56 static int socket_vop_open(struct vnode **, int, struct cred *,
57 caller_context_t *);
58 static int socket_vop_close(struct vnode *, int, int, offset_t,
59 struct cred *, caller_context_t *);
60 static int socket_vop_read(struct vnode *, struct uio *, int,
61 struct cred *, caller_context_t *);
62 static int socket_vop_write(struct vnode *, struct uio *, int,
63 struct cred *, caller_context_t *);
64 static int socket_vop_ioctl(struct vnode *, int, intptr_t, int,
65 struct cred *, int32_t *, caller_context_t *);
66 static int socket_vop_setfl(struct vnode *, int, int, cred_t *,
67 caller_context_t *);
68 static int socket_vop_getattr(struct vnode *, struct vattr *, int,
69 struct cred *, caller_context_t *);
70 static int socket_vop_setattr(struct vnode *, struct vattr *, int,
71 struct cred *, caller_context_t *);
72 static int socket_vop_access(struct vnode *, int, int, struct cred *,
73 caller_context_t *);
74 static int socket_vop_fsync(struct vnode *, int, struct cred *,
75 caller_context_t *);
76 static void socket_vop_inactive(struct vnode *, struct cred *,
77 caller_context_t *);
78 static int socket_vop_fid(struct vnode *, struct fid *,
79 caller_context_t *);
80 static int socket_vop_seek(struct vnode *, offset_t, offset_t *,
81 caller_context_t *);
82 static int socket_vop_poll(struct vnode *, short, int, short *,
83 struct pollhead **, caller_context_t *);
84
85 extern int socket_close_internal(struct sonode *, int, cred_t *);
86 extern void socket_destroy_internal(struct sonode *, cred_t *);
87
88 struct vnodeops *socket_vnodeops;
89 const fs_operation_def_t socket_vnodeops_template[] = {
90 VOPNAME_OPEN, { .vop_open = socket_vop_open },
91 VOPNAME_CLOSE, { .vop_close = socket_vop_close },
92 VOPNAME_READ, { .vop_read = socket_vop_read },
93 VOPNAME_WRITE, { .vop_write = socket_vop_write },
94 VOPNAME_IOCTL, { .vop_ioctl = socket_vop_ioctl },
95 VOPNAME_SETFL, { .vop_setfl = socket_vop_setfl },
96 VOPNAME_GETATTR, { .vop_getattr = socket_vop_getattr },
97 VOPNAME_SETATTR, { .vop_setattr = socket_vop_setattr },
98 VOPNAME_ACCESS, { .vop_access = socket_vop_access },
99 VOPNAME_FSYNC, { .vop_fsync = socket_vop_fsync },
100 VOPNAME_INACTIVE, { .vop_inactive = socket_vop_inactive },
101 VOPNAME_FID, { .vop_fid = socket_vop_fid },
102 VOPNAME_SEEK, { .vop_seek = socket_vop_seek },
103 VOPNAME_POLL, { .vop_poll = socket_vop_poll },
104 VOPNAME_DISPOSE, { .error = fs_error },
105 NULL, NULL
106 };
107
108
109 /*
110 * generic vnode ops
111 */
112
113 /*ARGSUSED*/
114 static int
115 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr,
116 caller_context_t *ct)
117 {
118 struct vnode *vp = *vpp;
119 struct sonode *so = VTOSO(vp);
120
121 flag &= ~FCREAT; /* paranoia */
122 mutex_enter(&so->so_lock);
123 so->so_count++;
124 mutex_exit(&so->so_lock);
125
126 if (!(curproc->p_flag & SSYS))
127 sonode_insert_pid(so, curproc->p_pidp->pid_id);
128
129 ASSERT(so->so_count != 0); /* wraparound */
130 ASSERT(vp->v_type == VSOCK);
131
132 return (0);
133 }
134
135 /*ARGSUSED*/
136 static int
137 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset,
138 struct cred *cr, caller_context_t *ct)
139 {
140 struct sonode *so;
141 int error = 0;
142
143 so = VTOSO(vp);
144 ASSERT(vp->v_type == VSOCK);
145
146 cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
147 cleanshares(vp, ttoproc(curthread)->p_pid);
148
149 if (vp->v_stream)
150 strclean(vp);
151
152 if (count > 1) {
153 dprint(2, ("socket_vop_close: count %d\n", count));
154 return (0);
155 }
156
157 mutex_enter(&so->so_lock);
158 if (--so->so_count == 0) {
159 /*
160 * Initiate connection shutdown.
161 */
162 mutex_exit(&so->so_lock);
163 error = socket_close_internal(so, flag, cr);
164 } else {
165 mutex_exit(&so->so_lock);
166 }
167
168 return (error);
169 }
170
171 /*ARGSUSED2*/
172 static int
173 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
174 caller_context_t *ct)
175 {
176 struct sonode *so = VTOSO(vp);
177 struct nmsghdr lmsg;
178
179 ASSERT(vp->v_type == VSOCK);
180 bzero((void *)&lmsg, sizeof (lmsg));
181
182 return (socket_recvmsg(so, &lmsg, uiop, cr));
183 }
184
185 /*ARGSUSED2*/
186 static int
187 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag,
188 struct cred *cr, caller_context_t *ct)
189 {
190 struct sonode *so = VTOSO(vp);
191 struct nmsghdr lmsg;
192
193 ASSERT(vp->v_type == VSOCK);
194 bzero((void *)&lmsg, sizeof (lmsg));
195
196 if (!(so->so_mode & SM_BYTESTREAM)) {
197 /*
198 * If the socket is not byte stream set MSG_EOR
199 */
200 lmsg.msg_flags = MSG_EOR;
201 }
202
203 return (socket_sendmsg(so, &lmsg, uiop, cr));
204 }
205
206 /*ARGSUSED4*/
207 static int
208 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
209 struct cred *cr, int32_t *rvalp, caller_context_t *ct)
210 {
211 struct sonode *so = VTOSO(vp);
212
213 ASSERT(vp->v_type == VSOCK);
214
215 switch (cmd) {
216 case F_ASSOCI_PID:
217 if (cr != kcred)
218 return (EPERM);
219 if (!(curproc->p_flag & SSYS))
220 sonode_insert_pid(so, (pid_t)arg);
221 return (0);
222
223 case F_DASSOC_PID:
224 if (cr != kcred)
225 return (EPERM);
226 if (!(curproc->p_flag & SSYS))
227 sonode_remove_pid(so, (pid_t)arg);
228 return (0);
229 }
230
231 return (socket_ioctl(so, cmd, arg, mode, cr, rvalp));
232 }
233
234 /*
235 * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
236 * from listener to acceptor.
237 */
238 /* ARGSUSED */
239 static int
240 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr,
241 caller_context_t *ct)
242 {
243 struct sonode *so = VTOSO(vp);
244 int error = 0;
245
246 ASSERT(vp->v_type == VSOCK);
247
248 mutex_enter(&so->so_lock);
249 if (nflags & FNDELAY)
250 so->so_state |= SS_NDELAY;
251 else
252 so->so_state &= ~SS_NDELAY;
253 if (nflags & FNONBLOCK)
254 so->so_state |= SS_NONBLOCK;
255 else
256 so->so_state &= ~SS_NONBLOCK;
257 mutex_exit(&so->so_lock);
258
259 if (so->so_state & SS_ASYNC)
260 oflags |= FASYNC;
261 /*
262 * Sets/clears the SS_ASYNC flag based on the presence/absence
263 * of the FASYNC flag passed to fcntl(F_SETFL).
264 * This exists solely for BSD fcntl() FASYNC compatibility.
265 */
266 if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) {
267 int async = nflags & FASYNC;
268 int32_t rv;
269
270 /*
271 * For non-TPI sockets all we have to do is set/remove the
272 * SS_ASYNC bit, but for TPI it is more involved. For that
273 * reason we delegate the job to the protocol's ioctl handler.
274 */
275 error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL,
276 cr, &rv);
277 }
278 return (error);
279 }
280
281
282 /*
283 * Get the made up attributes for the vnode.
284 * 4.3BSD returns the current time for all the timestamps.
285 * 4.4BSD returns 0 for all the timestamps.
286 * Here we use the access and modified times recorded in the sonode.
287 *
288 * Just like in BSD there is not effect on the underlying file system node
289 * bound to an AF_UNIX pathname.
290 *
291 * When sockmod has been popped this will act just like a stream. Since
292 * a socket is always a clone there is no need to inspect the attributes
293 * of the "realvp".
294 */
295 /* ARGSUSED */
296 int
297 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags,
298 struct cred *cr, caller_context_t *ct)
299 {
300 dev_t fsid;
301 struct sonode *so;
302 static int sonode_shift = 0;
303
304 /*
305 * Calculate the amount of bitshift to a sonode pointer which will
306 * still keep it unique. See below.
307 */
308 if (sonode_shift == 0)
309 sonode_shift = highbit(sizeof (struct sonode));
310 ASSERT(sonode_shift > 0);
311
312 so = VTOSO(vp);
313 fsid = sockdev;
314
315 if (so->so_version == SOV_STREAM) {
316 /*
317 * The imaginary "sockmod" has been popped - act
318 * as a stream
319 */
320 vap->va_type = VCHR;
321 vap->va_mode = 0;
322 } else {
323 vap->va_type = vp->v_type;
324 vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|
325 S_IROTH|S_IWOTH;
326 }
327 vap->va_uid = vap->va_gid = 0;
328 vap->va_fsid = fsid;
329 /*
330 * If the va_nodeid is > MAX_USHORT, then i386 stats might fail.
331 * So we shift down the sonode pointer to try and get the most
332 * uniqueness into 16-bits.
333 */
334 vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF;
335 vap->va_nlink = 0;
336 vap->va_size = 0;
337
338 /*
339 * We need to zero out the va_rdev to avoid some fstats getting
340 * EOVERFLOW. This also mimics SunOS 4.x and BSD behavior.
341 */
342 vap->va_rdev = (dev_t)0;
343 vap->va_blksize = MAXBSIZE;
344 vap->va_nblocks = btod(vap->va_size);
345
346 if (!SOCK_IS_NONSTR(so)) {
347 sotpi_info_t *sti = SOTOTPI(so);
348
349 mutex_enter(&so->so_lock);
350 vap->va_atime.tv_sec = sti->sti_atime;
351 vap->va_mtime.tv_sec = sti->sti_mtime;
352 vap->va_ctime.tv_sec = sti->sti_ctime;
353 mutex_exit(&so->so_lock);
354 } else {
355 vap->va_atime.tv_sec = 0;
356 vap->va_mtime.tv_sec = 0;
357 vap->va_ctime.tv_sec = 0;
358 }
359
360 vap->va_atime.tv_nsec = 0;
361 vap->va_mtime.tv_nsec = 0;
362 vap->va_ctime.tv_nsec = 0;
363 vap->va_seq = 0;
364
365 return (0);
366 }
367
368 /*
369 * Set attributes.
370 * Just like in BSD there is not effect on the underlying file system node
371 * bound to an AF_UNIX pathname.
372 *
373 * When sockmod has been popped this will act just like a stream. Since
374 * a socket is always a clone there is no need to modify the attributes
375 * of the "realvp".
376 */
377 /* ARGSUSED */
378 int
379 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags,
380 struct cred *cr, caller_context_t *ct)
381 {
382 struct sonode *so = VTOSO(vp);
383
384 /*
385 * If times were changed, and we have a STREAMS socket, then update
386 * the sonode.
387 */
388 if (!SOCK_IS_NONSTR(so)) {
389 sotpi_info_t *sti = SOTOTPI(so);
390
391 mutex_enter(&so->so_lock);
392 if (vap->va_mask & AT_ATIME)
393 sti->sti_atime = vap->va_atime.tv_sec;
394 if (vap->va_mask & AT_MTIME) {
395 sti->sti_mtime = vap->va_mtime.tv_sec;
396 sti->sti_ctime = gethrestime_sec();
397 }
398 mutex_exit(&so->so_lock);
399 }
400
401 return (0);
402 }
403
404 /*
405 * Check if user is allowed to access vp. For non-STREAMS based sockets,
406 * there might not be a device attached to the file system. So for those
407 * types of sockets there are no permissions to check.
408 *
409 * XXX Should there be some other mechanism to check access rights?
410 */
411 /*ARGSUSED*/
412 int
413 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr,
414 caller_context_t *ct)
415 {
416 struct sonode *so = VTOSO(vp);
417
418 if (!SOCK_IS_NONSTR(so)) {
419 ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL);
420 return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode,
421 mode, flags, cr, NULL));
422 }
423 return (0);
424 }
425
426 /*
427 * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL.
428 * This code does the same to be compatible and also to not give an
429 * application the impression that the data has actually been "synced"
430 * to the other end of the connection.
431 */
432 /* ARGSUSED */
433 int
434 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr,
435 caller_context_t *ct)
436 {
437 return (EINVAL);
438 }
439
440 /*ARGSUSED*/
441 static void
442 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
443 {
444 struct sonode *so = VTOSO(vp);
445
446 ASSERT(vp->v_type == VSOCK);
447
448 mutex_enter(&vp->v_lock);
449 /*
450 * If no one has reclaimed the vnode, remove from the
451 * cache now.
452 */
453 if (vp->v_count < 1)
454 cmn_err(CE_PANIC, "socket_inactive: Bad v_count");
455
456 /*
457 * Drop the temporary hold by vn_rele now
458 */
459 if (--vp->v_count != 0) {
460 mutex_exit(&vp->v_lock);
461 return;
462 }
463 mutex_exit(&vp->v_lock);
464
465
466 ASSERT(!vn_has_cached_data(vp));
467
468 /* socket specfic clean-up */
469 socket_destroy_internal(so, cr);
470 }
471
472 /* ARGSUSED */
473 int
474 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
475 {
476 return (EINVAL);
477 }
478
479 /*
480 * Sockets are not seekable.
481 * (and there is a bug to fix STREAMS to make them fail this as well).
482 */
483 /*ARGSUSED*/
484 int
485 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
486 caller_context_t *ct)
487 {
488 return (ESPIPE);
489 }
490
491 /*ARGSUSED*/
492 static int
493 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp,
494 struct pollhead **phpp, caller_context_t *ct)
495 {
496 struct sonode *so = VTOSO(vp);
497
498 ASSERT(vp->v_type == VSOCK);
499
500 return (socket_poll(so, events, anyyet, reventsp, phpp));
501 }