1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/types.h>
28 #include <sys/t_lock.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/bitmap.h>
32 #include <sys/debug.h>
33 #include <sys/errno.h>
34 #include <sys/strsubr.h>
35 #include <sys/cmn_err.h>
36 #include <sys/sysmacros.h>
37 #include <sys/filio.h>
38 #include <sys/flock.h>
39 #include <sys/stat.h>
40 #include <sys/share.h>
41
42 #include <sys/vfs.h>
43 #include <sys/vfs_opreg.h>
44
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/strsun.h>
49
50 #include <fs/sockfs/sockcommon.h>
51 #include <fs/sockfs/socktpi.h>
52
53 /*
54 * Generic vnode ops
55 */
56 static int socket_vop_open(struct vnode **, int, struct cred *,
57 caller_context_t *);
58 static int socket_vop_close(struct vnode *, int, int, offset_t,
59 struct cred *, caller_context_t *);
60 static int socket_vop_read(struct vnode *, struct uio *, int,
61 struct cred *, caller_context_t *);
62 static int socket_vop_write(struct vnode *, struct uio *, int,
63 struct cred *, caller_context_t *);
64 static int socket_vop_ioctl(struct vnode *, int, intptr_t, int,
65 struct cred *, int32_t *, caller_context_t *);
66 static int socket_vop_setfl(struct vnode *, int, int, cred_t *,
67 caller_context_t *);
68 static int socket_vop_getattr(struct vnode *, struct vattr *, int,
69 struct cred *, caller_context_t *);
70 static int socket_vop_setattr(struct vnode *, struct vattr *, int,
71 struct cred *, caller_context_t *);
72 static int socket_vop_access(struct vnode *, int, int, struct cred *,
73 caller_context_t *);
74 static int socket_vop_fsync(struct vnode *, int, struct cred *,
75 caller_context_t *);
76 static void socket_vop_inactive(struct vnode *, struct cred *,
77 caller_context_t *);
78 static int socket_vop_fid(struct vnode *, struct fid *,
79 caller_context_t *);
80 static int socket_vop_seek(struct vnode *, offset_t, offset_t *,
81 caller_context_t *);
82 static int socket_vop_poll(struct vnode *, short, int, short *,
83 struct pollhead **, caller_context_t *);
84
85 extern int socket_close_internal(struct sonode *, int, cred_t *);
86 extern void socket_destroy_internal(struct sonode *, cred_t *);
87
88 struct vnodeops *socket_vnodeops;
89 const fs_operation_def_t socket_vnodeops_template[] = {
90 VOPNAME_OPEN, { .vop_open = socket_vop_open },
91 VOPNAME_CLOSE, { .vop_close = socket_vop_close },
92 VOPNAME_READ, { .vop_read = socket_vop_read },
93 VOPNAME_WRITE, { .vop_write = socket_vop_write },
94 VOPNAME_IOCTL, { .vop_ioctl = socket_vop_ioctl },
95 VOPNAME_SETFL, { .vop_setfl = socket_vop_setfl },
96 VOPNAME_GETATTR, { .vop_getattr = socket_vop_getattr },
97 VOPNAME_SETATTR, { .vop_setattr = socket_vop_setattr },
98 VOPNAME_ACCESS, { .vop_access = socket_vop_access },
99 VOPNAME_FSYNC, { .vop_fsync = socket_vop_fsync },
100 VOPNAME_INACTIVE, { .vop_inactive = socket_vop_inactive },
101 VOPNAME_FID, { .vop_fid = socket_vop_fid },
102 VOPNAME_SEEK, { .vop_seek = socket_vop_seek },
103 VOPNAME_POLL, { .vop_poll = socket_vop_poll },
104 VOPNAME_DISPOSE, { .error = fs_error },
105 NULL, NULL
106 };
107
108
109 /*
110 * generic vnode ops
111 */
112
113 /*ARGSUSED*/
114 static int
115 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr,
116 caller_context_t *ct)
117 {
118 struct vnode *vp = *vpp;
119 struct sonode *so = VTOSO(vp);
120
121 flag &= ~FCREAT; /* paranoia */
122 mutex_enter(&so->so_lock);
123 so->so_count++;
124 mutex_exit(&so->so_lock);
125
126 sonode_insert_pid(so, curproc);
127
128 ASSERT(so->so_count != 0); /* wraparound */
129 ASSERT(vp->v_type == VSOCK);
130
131 return (0);
132 }
133
134 /*ARGSUSED*/
135 static int
136 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset,
137 struct cred *cr, caller_context_t *ct)
138 {
139 struct sonode *so;
140 int error = 0;
141
142 so = VTOSO(vp);
143 ASSERT(vp->v_type == VSOCK);
144
145 cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
146 cleanshares(vp, ttoproc(curthread)->p_pid);
147
148 if (vp->v_stream)
149 strclean(vp);
150
151 if (count > 1) {
152 dprint(2, ("socket_vop_close: count %d\n", count));
153 return (0);
154 }
155
156 mutex_enter(&so->so_lock);
157 if (--so->so_count == 0) {
158 /*
159 * Initiate connection shutdown.
160 */
161 mutex_exit(&so->so_lock);
162 error = socket_close_internal(so, flag, cr);
163 } else {
164 mutex_exit(&so->so_lock);
165 }
166
167 return (error);
168 }
169
170 /*ARGSUSED2*/
171 static int
172 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
173 caller_context_t *ct)
174 {
175 struct sonode *so = VTOSO(vp);
176 struct nmsghdr lmsg;
177
178 ASSERT(vp->v_type == VSOCK);
179 bzero((void *)&lmsg, sizeof (lmsg));
180
181 return (socket_recvmsg(so, &lmsg, uiop, cr));
182 }
183
184 /*ARGSUSED2*/
185 static int
186 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag,
187 struct cred *cr, caller_context_t *ct)
188 {
189 struct sonode *so = VTOSO(vp);
190 struct nmsghdr lmsg;
191
192 ASSERT(vp->v_type == VSOCK);
193 bzero((void *)&lmsg, sizeof (lmsg));
194
195 if (!(so->so_mode & SM_BYTESTREAM)) {
196 /*
197 * If the socket is not byte stream set MSG_EOR
198 */
199 lmsg.msg_flags = MSG_EOR;
200 }
201
202 return (socket_sendmsg(so, &lmsg, uiop, cr));
203 }
204
205 /*ARGSUSED4*/
206 static int
207 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
208 struct cred *cr, int32_t *rvalp, caller_context_t *ct)
209 {
210 struct sonode *so = VTOSO(vp);
211
212 ASSERT(vp->v_type == VSOCK);
213
214 switch (cmd) {
215 case F_FORKED: {
216 if (cr != kcred)
217 return (-1);
218 sonode_insert_pid(so, (proc_t *)arg);
219 return (0);
220 }
221
222 case F_CLOSED: {
223 if (cr != kcred)
224 return (-1);
225 sonode_remove_pid(so, (proc_t *)arg);
226 return (0);
227 }
228 }
229
230 return (socket_ioctl(so, cmd, arg, mode, cr, rvalp));
231 }
232
233 /*
234 * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
235 * from listener to acceptor.
236 */
237 /* ARGSUSED */
238 static int
239 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr,
240 caller_context_t *ct)
241 {
242 struct sonode *so = VTOSO(vp);
243 int error = 0;
244
245 ASSERT(vp->v_type == VSOCK);
246
247 mutex_enter(&so->so_lock);
248 if (nflags & FNDELAY)
249 so->so_state |= SS_NDELAY;
250 else
251 so->so_state &= ~SS_NDELAY;
252 if (nflags & FNONBLOCK)
253 so->so_state |= SS_NONBLOCK;
254 else
255 so->so_state &= ~SS_NONBLOCK;
256 mutex_exit(&so->so_lock);
257
258 if (so->so_state & SS_ASYNC)
259 oflags |= FASYNC;
260 /*
261 * Sets/clears the SS_ASYNC flag based on the presence/absence
262 * of the FASYNC flag passed to fcntl(F_SETFL).
263 * This exists solely for BSD fcntl() FASYNC compatibility.
264 */
265 if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) {
266 int async = nflags & FASYNC;
267 int32_t rv;
268
269 /*
270 * For non-TPI sockets all we have to do is set/remove the
271 * SS_ASYNC bit, but for TPI it is more involved. For that
272 * reason we delegate the job to the protocol's ioctl handler.
273 */
274 error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL,
275 cr, &rv);
276 }
277 return (error);
278 }
279
280
281 /*
282 * Get the made up attributes for the vnode.
283 * 4.3BSD returns the current time for all the timestamps.
284 * 4.4BSD returns 0 for all the timestamps.
285 * Here we use the access and modified times recorded in the sonode.
286 *
287 * Just like in BSD there is not effect on the underlying file system node
288 * bound to an AF_UNIX pathname.
289 *
290 * When sockmod has been popped this will act just like a stream. Since
291 * a socket is always a clone there is no need to inspect the attributes
292 * of the "realvp".
293 */
294 /* ARGSUSED */
295 int
296 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags,
297 struct cred *cr, caller_context_t *ct)
298 {
299 dev_t fsid;
300 struct sonode *so;
301 static int sonode_shift = 0;
302
303 /*
304 * Calculate the amount of bitshift to a sonode pointer which will
305 * still keep it unique. See below.
306 */
307 if (sonode_shift == 0)
308 sonode_shift = highbit(sizeof (struct sonode));
309 ASSERT(sonode_shift > 0);
310
311 so = VTOSO(vp);
312 fsid = sockdev;
313
314 if (so->so_version == SOV_STREAM) {
315 /*
316 * The imaginary "sockmod" has been popped - act
317 * as a stream
318 */
319 vap->va_type = VCHR;
320 vap->va_mode = 0;
321 } else {
322 vap->va_type = vp->v_type;
323 vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|
324 S_IROTH|S_IWOTH;
325 }
326 vap->va_uid = vap->va_gid = 0;
327 vap->va_fsid = fsid;
328 /*
329 * If the va_nodeid is > MAX_USHORT, then i386 stats might fail.
330 * So we shift down the sonode pointer to try and get the most
331 * uniqueness into 16-bits.
332 */
333 vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF;
334 vap->va_nlink = 0;
335 vap->va_size = 0;
336
337 /*
338 * We need to zero out the va_rdev to avoid some fstats getting
339 * EOVERFLOW. This also mimics SunOS 4.x and BSD behavior.
340 */
341 vap->va_rdev = (dev_t)0;
342 vap->va_blksize = MAXBSIZE;
343 vap->va_nblocks = btod(vap->va_size);
344
345 if (!SOCK_IS_NONSTR(so)) {
346 sotpi_info_t *sti = SOTOTPI(so);
347
348 mutex_enter(&so->so_lock);
349 vap->va_atime.tv_sec = sti->sti_atime;
350 vap->va_mtime.tv_sec = sti->sti_mtime;
351 vap->va_ctime.tv_sec = sti->sti_ctime;
352 mutex_exit(&so->so_lock);
353 } else {
354 vap->va_atime.tv_sec = 0;
355 vap->va_mtime.tv_sec = 0;
356 vap->va_ctime.tv_sec = 0;
357 }
358
359 vap->va_atime.tv_nsec = 0;
360 vap->va_mtime.tv_nsec = 0;
361 vap->va_ctime.tv_nsec = 0;
362 vap->va_seq = 0;
363
364 return (0);
365 }
366
367 /*
368 * Set attributes.
369 * Just like in BSD there is not effect on the underlying file system node
370 * bound to an AF_UNIX pathname.
371 *
372 * When sockmod has been popped this will act just like a stream. Since
373 * a socket is always a clone there is no need to modify the attributes
374 * of the "realvp".
375 */
376 /* ARGSUSED */
377 int
378 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags,
379 struct cred *cr, caller_context_t *ct)
380 {
381 struct sonode *so = VTOSO(vp);
382
383 /*
384 * If times were changed, and we have a STREAMS socket, then update
385 * the sonode.
386 */
387 if (!SOCK_IS_NONSTR(so)) {
388 sotpi_info_t *sti = SOTOTPI(so);
389
390 mutex_enter(&so->so_lock);
391 if (vap->va_mask & AT_ATIME)
392 sti->sti_atime = vap->va_atime.tv_sec;
393 if (vap->va_mask & AT_MTIME) {
394 sti->sti_mtime = vap->va_mtime.tv_sec;
395 sti->sti_ctime = gethrestime_sec();
396 }
397 mutex_exit(&so->so_lock);
398 }
399
400 return (0);
401 }
402
403 /*
404 * Check if user is allowed to access vp. For non-STREAMS based sockets,
405 * there might not be a device attached to the file system. So for those
406 * types of sockets there are no permissions to check.
407 *
408 * XXX Should there be some other mechanism to check access rights?
409 */
410 /*ARGSUSED*/
411 int
412 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr,
413 caller_context_t *ct)
414 {
415 struct sonode *so = VTOSO(vp);
416
417 if (!SOCK_IS_NONSTR(so)) {
418 ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL);
419 return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode,
420 mode, flags, cr, NULL));
421 }
422 return (0);
423 }
424
425 /*
426 * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL.
427 * This code does the same to be compatible and also to not give an
428 * application the impression that the data has actually been "synced"
429 * to the other end of the connection.
430 */
431 /* ARGSUSED */
432 int
433 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr,
434 caller_context_t *ct)
435 {
436 return (EINVAL);
437 }
438
439 /*ARGSUSED*/
440 static void
441 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
442 {
443 struct sonode *so = VTOSO(vp);
444
445 ASSERT(vp->v_type == VSOCK);
446
447 mutex_enter(&vp->v_lock);
448 /*
449 * If no one has reclaimed the vnode, remove from the
450 * cache now.
451 */
452 if (vp->v_count < 1)
453 cmn_err(CE_PANIC, "socket_inactive: Bad v_count");
454
455 /*
456 * Drop the temporary hold by vn_rele now
457 */
458 if (--vp->v_count != 0) {
459 mutex_exit(&vp->v_lock);
460 return;
461 }
462 mutex_exit(&vp->v_lock);
463
464
465 ASSERT(!vn_has_cached_data(vp));
466
467 /* socket specfic clean-up */
468 socket_destroy_internal(so, cr);
469 }
470
471 /* ARGSUSED */
472 int
473 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
474 {
475 return (EINVAL);
476 }
477
478 /*
479 * Sockets are not seekable.
480 * (and there is a bug to fix STREAMS to make them fail this as well).
481 */
482 /*ARGSUSED*/
483 int
484 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
485 caller_context_t *ct)
486 {
487 return (ESPIPE);
488 }
489
490 /*ARGSUSED*/
491 static int
492 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp,
493 struct pollhead **phpp, caller_context_t *ct)
494 {
495 struct sonode *so = VTOSO(vp);
496
497 ASSERT(vp->v_type == VSOCK);
498
499 return (socket_poll(so, events, anyyet, reventsp, phpp));
500 }