1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * STREAMS Packet Filter Module
28 *
29 * This module applies a filter to messages arriving on its read
30 * queue, passing on messages that the filter accepts adn discarding
31 * the others. It supports ioctls for setting the filter.
32 *
33 * On the write side, the module simply passes everything through
34 * unchanged.
35 *
36 * Based on SunOS 4.x version. This version has minor changes:
37 * - general SVR4 porting stuff
38 * - change name and prefixes from "nit" buffer to streams buffer
39 * - multithreading assumes configured as D_MTQPAIR
40 */
41
42 #include <sys/types.h>
43 #include <sys/sysmacros.h>
44 #include <sys/errno.h>
45 #include <sys/debug.h>
46 #include <sys/time.h>
47 #include <sys/stropts.h>
48 #include <sys/stream.h>
49 #include <sys/conf.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/kmem.h>
53 #include <sys/strsun.h>
54 #include <sys/pfmod.h>
55 #include <sys/modctl.h>
56 #include <netinet/in.h>
57
58 /*
59 * Expanded version of the Packetfilt structure that includes
60 * some additional fields that aid filter execution efficiency.
61 */
62 struct epacketfilt {
63 struct Pf_ext_packetfilt pf;
64 #define pf_Priority pf.Pf_Priority
65 #define pf_FilterLen pf.Pf_FilterLen
66 #define pf_Filter pf.Pf_Filter
67 /* pointer to word immediately past end of filter */
68 ushort_t *pf_FilterEnd;
69 /* length in bytes of packet prefix the filter examines */
70 ushort_t pf_PByteLen;
71 };
72
73 /*
74 * (Internal) packet descriptor for FilterPacket
75 */
76 struct packdesc {
77 ushort_t *pd_hdr; /* header starting address */
78 uint_t pd_hdrlen; /* header length in shorts */
79 ushort_t *pd_body; /* body starting address */
80 uint_t pd_bodylen; /* body length in shorts */
81 };
82
83
84 /*
85 * Function prototypes.
86 */
87 static int pfopen(queue_t *, dev_t *, int, int, cred_t *);
88 static int pfclose(queue_t *);
89 static void pfioctl(queue_t *wq, mblk_t *mp);
90 static int FilterPacket(struct packdesc *, struct epacketfilt *);
91 /*
92 * To save instructions, since STREAMS ignores the return value
93 * from these functions, they are defined as void here. Kind of icky, but...
94 */
95 static void pfwput(queue_t *, mblk_t *);
96 static void pfrput(queue_t *, mblk_t *);
97
98 static struct module_info pf_minfo = {
99 22, /* mi_idnum */
100 "pfmod", /* mi_idname */
101 0, /* mi_minpsz */
102 INFPSZ, /* mi_maxpsz */
103 0, /* mi_hiwat */
104 0 /* mi_lowat */
105 };
106
107 static struct qinit pf_rinit = {
108 (int (*)())pfrput, /* qi_putp */
109 NULL,
110 pfopen, /* qi_qopen */
111 pfclose, /* qi_qclose */
112 NULL, /* qi_qadmin */
113 &pf_minfo, /* qi_minfo */
114 NULL /* qi_mstat */
115 };
116
117 static struct qinit pf_winit = {
118 (int (*)())pfwput, /* qi_putp */
119 NULL, /* qi_srvp */
120 NULL, /* qi_qopen */
121 NULL, /* qi_qclose */
122 NULL, /* qi_qadmin */
123 &pf_minfo, /* qi_minfo */
124 NULL /* qi_mstat */
125 };
126
127 static struct streamtab pf_info = {
128 &pf_rinit, /* st_rdinit */
129 &pf_winit, /* st_wrinit */
130 NULL, /* st_muxrinit */
131 NULL /* st_muxwinit */
132 };
133
134 static struct fmodsw fsw = {
135 "pfmod",
136 &pf_info,
137 D_MTQPAIR | D_MP
138 };
139
140 static struct modlstrmod modlstrmod = {
141 &mod_strmodops, "streams packet filter module", &fsw
142 };
143
144 static struct modlinkage modlinkage = {
145 MODREV_1, &modlstrmod, NULL
146 };
147
148 int
149 _init(void)
150 {
151 return (mod_install(&modlinkage));
152 }
153
154 int
155 _fini(void)
156 {
157 return (mod_remove(&modlinkage));
158 }
159
160 int
161 _info(struct modinfo *modinfop)
162 {
163 return (mod_info(&modlinkage, modinfop));
164 }
165
166 /*ARGSUSED*/
167 static int
168 pfopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
169 {
170 struct epacketfilt *pfp;
171
172 ASSERT(rq);
173
174 if (sflag != MODOPEN)
175 return (EINVAL);
176
177 if (rq->q_ptr)
178 return (0);
179
180 /*
181 * Allocate and initialize per-Stream structure.
182 */
183 pfp = kmem_alloc(sizeof (struct epacketfilt), KM_SLEEP);
184 rq->q_ptr = WR(rq)->q_ptr = (char *)pfp;
185
186 qprocson(rq);
187
188 return (0);
189 }
190
191 static int
192 pfclose(queue_t *rq)
193 {
194 struct epacketfilt *pfp = (struct epacketfilt *)rq->q_ptr;
195
196 ASSERT(pfp);
197
198 qprocsoff(rq);
199
200 kmem_free(pfp, sizeof (struct epacketfilt));
201 rq->q_ptr = WR(rq)->q_ptr = NULL;
202
203 return (0);
204 }
205
206 /*
207 * Write-side put procedure. Its main task is to detect ioctls.
208 * Other message types are passed on through.
209 */
210 static void
211 pfwput(queue_t *wq, mblk_t *mp)
212 {
213 switch (mp->b_datap->db_type) {
214 case M_IOCTL:
215 pfioctl(wq, mp);
216 break;
217
218 default:
219 putnext(wq, mp);
220 break;
221 }
222 }
223
224 /*
225 * Read-side put procedure. It's responsible for applying the
226 * packet filter and passing upstream message on or discarding it
227 * depending upon the results.
228 *
229 * Upstream messages can start with zero or more M_PROTO mblks
230 * which are skipped over before executing the packet filter
231 * on any remaining M_DATA mblks.
232 */
233 static void
234 pfrput(queue_t *rq, mblk_t *mp)
235 {
236 struct epacketfilt *pfp = (struct epacketfilt *)rq->q_ptr;
237 mblk_t *mbp, *mpp;
238 struct packdesc pd;
239 int need;
240
241 ASSERT(pfp);
242
243 switch (DB_TYPE(mp)) {
244 case M_PROTO:
245 case M_DATA:
246 /*
247 * Skip over protocol information and find the start
248 * of the message body, saving the overall message
249 * start in mpp.
250 */
251 for (mpp = mp; mp && (DB_TYPE(mp) == M_PROTO); mp = mp->b_cont)
252 ;
253
254 /*
255 * Null body (exclusive of M_PROTO blocks) ==> accept.
256 * Note that a null body is not the same as an empty body.
257 */
258 if (mp == NULL) {
259 putnext(rq, mpp);
260 break;
261 }
262
263 /*
264 * Pull the packet up to the length required by
265 * the filter. Note that doing so destroys sharing
266 * relationships, which is unfortunate, since the
267 * results of pulling up here are likely to be useful
268 * for shared messages applied to a filter on a sibling
269 * stream.
270 *
271 * Most packet sources will provide the packet in two
272 * logical pieces: an initial header in a single mblk,
273 * and a body in a sequence of mblks hooked to the
274 * header. We're prepared to deal with variant forms,
275 * but in any case, the pullup applies only to the body
276 * part.
277 */
278 mbp = mp->b_cont;
279 need = pfp->pf_PByteLen;
280 if (mbp && (MBLKL(mbp) < need)) {
281 int len = msgdsize(mbp);
282
283 /* XXX discard silently on pullupmsg failure */
284 if (pullupmsg(mbp, MIN(need, len)) == 0) {
285 freemsg(mpp);
286 break;
287 }
288 }
289
290 /*
291 * Misalignment (not on short boundary) ==> reject.
292 */
293 if (((uintptr_t)mp->b_rptr & (sizeof (ushort_t) - 1)) ||
294 (mbp != NULL &&
295 ((uintptr_t)mbp->b_rptr & (sizeof (ushort_t) - 1)))) {
296 freemsg(mpp);
297 break;
298 }
299
300 /*
301 * These assignments are distasteful, but necessary,
302 * since the packet filter wants to work in terms of
303 * shorts. Odd bytes at the end of header or data can't
304 * participate in the filtering operation.
305 */
306 pd.pd_hdr = (ushort_t *)mp->b_rptr;
307 pd.pd_hdrlen = (mp->b_wptr - mp->b_rptr) / sizeof (ushort_t);
308 if (mbp) {
309 pd.pd_body = (ushort_t *)mbp->b_rptr;
310 pd.pd_bodylen = (mbp->b_wptr - mbp->b_rptr) /
311 sizeof (ushort_t);
312 } else {
313 pd.pd_body = NULL;
314 pd.pd_bodylen = 0;
315 }
316
317 /*
318 * Apply the filter.
319 */
320 if (FilterPacket(&pd, pfp))
321 putnext(rq, mpp);
322 else
323 freemsg(mpp);
324
325 break;
326
327 default:
328 putnext(rq, mp);
329 break;
330 }
331
332 }
333
334 /*
335 * Handle write-side M_IOCTL messages.
336 */
337 static void
338 pfioctl(queue_t *wq, mblk_t *mp)
339 {
340 struct epacketfilt *pfp = (struct epacketfilt *)wq->q_ptr;
341 struct Pf_ext_packetfilt *upfp;
342 struct packetfilt *opfp;
343 ushort_t *fwp;
344 int arg;
345 int maxoff = 0;
346 int maxoffreg = 0;
347 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
348 int error;
349
350 switch (iocp->ioc_cmd) {
351 case PFIOCSETF:
352 /*
353 * Verify argument length. Since the size of packet filter
354 * got increased (ENMAXFILTERS was bumped up to 2047), to
355 * maintain backwards binary compatibility, we need to
356 * check for both possible sizes.
357 */
358 switch (iocp->ioc_count) {
359 case sizeof (struct Pf_ext_packetfilt):
360 error = miocpullup(mp,
361 sizeof (struct Pf_ext_packetfilt));
362 if (error != 0) {
363 miocnak(wq, mp, 0, error);
364 return;
365 }
366 upfp = (struct Pf_ext_packetfilt *)mp->b_cont->b_rptr;
367 if (upfp->Pf_FilterLen > PF_MAXFILTERS) {
368 miocnak(wq, mp, 0, EINVAL);
369 return;
370 }
371
372 bcopy(upfp, pfp, sizeof (struct Pf_ext_packetfilt));
373 pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
374 break;
375
376 case sizeof (struct packetfilt):
377 error = miocpullup(mp, sizeof (struct packetfilt));
378 if (error != 0) {
379 miocnak(wq, mp, 0, error);
380 return;
381 }
382 opfp = (struct packetfilt *)mp->b_cont->b_rptr;
383 /* this strange comparison keeps gcc from complaining */
384 if (opfp->Pf_FilterLen - 1 >= ENMAXFILTERS) {
385 miocnak(wq, mp, 0, EINVAL);
386 return;
387 }
388
389 pfp->pf.Pf_Priority = opfp->Pf_Priority;
390 pfp->pf.Pf_FilterLen = (unsigned int)opfp->Pf_FilterLen;
391
392 bcopy(opfp->Pf_Filter, pfp->pf.Pf_Filter,
393 sizeof (opfp->Pf_Filter));
394 pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
395 break;
396
397 default:
398 miocnak(wq, mp, 0, EINVAL);
399 return;
400 }
401
402 /*
403 * Find and record maximum byte offset that the
404 * filter users. We use this when executing the
405 * filter to determine how much of the packet
406 * body to pull up. This code depends on the
407 * filter encoding.
408 */
409 for (fwp = pfp->pf_Filter; fwp < pfp->pf_FilterEnd; fwp++) {
410 arg = *fwp & ((1 << ENF_NBPA) - 1);
411 switch (arg) {
412 default:
413 if ((arg -= ENF_PUSHWORD) > maxoff)
414 maxoff = arg;
415 break;
416
417 case ENF_LOAD_OFFSET:
418 /* Point to the offset */
419 fwp++;
420 if (*fwp > maxoffreg)
421 maxoffreg = *fwp;
422 break;
423
424 case ENF_PUSHLIT:
425 case ENF_BRTR:
426 case ENF_BRFL:
427 /* Skip over the literal. */
428 fwp++;
429 break;
430
431 case ENF_PUSHZERO:
432 case ENF_PUSHONE:
433 case ENF_PUSHFFFF:
434 case ENF_PUSHFF00:
435 case ENF_PUSH00FF:
436 case ENF_PUSHFF00_N:
437 case ENF_PUSH00FF_N:
438 case ENF_NOPUSH:
439 case ENF_POP:
440 break;
441 }
442 }
443
444 /*
445 * Convert word offset to length in bytes.
446 */
447 pfp->pf_PByteLen = (maxoff + maxoffreg + 1) * sizeof (ushort_t);
448 miocack(wq, mp, 0, 0);
449 break;
450
451 default:
452 putnext(wq, mp);
453 break;
454 }
455 }
456
457 /* #define DEBUG 1 */
458 /* #define INNERDEBUG 1 */
459
460 #ifdef INNERDEBUG
461 #define enprintf(a) printf a
462 #else
463 #define enprintf(a)
464 #endif
465
466 /*
467 * Apply the packet filter given by pfp to the packet given by
468 * pp. Return nonzero iff the filter accepts the packet.
469 *
470 * The packet comes in two pieces, a header and a body, since
471 * that's the most convenient form for our caller. The header
472 * is in contiguous memory, whereas the body is in a mbuf.
473 * Our caller will have adjusted the mbuf chain so that its first
474 * min(MLEN, length(body)) bytes are guaranteed contiguous. For
475 * the sake of efficiency (and some laziness) the filter is prepared
476 * to examine only these two contiguous pieces. Furthermore, it
477 * assumes that the header length is even, so that there's no need
478 * to glue the last byte of header to the first byte of data.
479 */
480
481 #define opx(i) ((i) >> ENF_NBPA)
482
483 static int
484 FilterPacket(struct packdesc *pp, struct epacketfilt *pfp)
485 {
486 int maxhdr = pp->pd_hdrlen;
487 int maxword = maxhdr + pp->pd_bodylen;
488 ushort_t *sp;
489 ushort_t *fp;
490 ushort_t *fpe;
491 unsigned op;
492 unsigned arg;
493 unsigned offreg = 0;
494 ushort_t stack[ENMAXFILTERS+1];
495
496 fp = &pfp->pf_Filter[0];
497 fpe = pfp->pf_FilterEnd;
498
499 enprintf(("FilterPacket(%p, %p, %p, %p):\n", pp, pfp, fp, fpe));
500
501 /*
502 * Push TRUE on stack to start. The stack size is chosen such
503 * that overflow can't occur -- each operation can push at most
504 * one item on the stack, and the stack size equals the maximum
505 * program length.
506 */
507 sp = &stack[ENMAXFILTERS];
508 *sp = 1;
509
510 while (fp < fpe) {
511 op = *fp >> ENF_NBPA;
512 arg = *fp & ((1 << ENF_NBPA) - 1);
513 fp++;
514
515 switch (arg) {
516 default:
517 arg -= ENF_PUSHWORD;
518 /*
519 * Since arg is unsigned,
520 * if it were less than ENF_PUSHWORD before,
521 * it would now be huge.
522 */
523 if (arg + offreg < maxhdr)
524 *--sp = pp->pd_hdr[arg + offreg];
525 else if (arg + offreg < maxword)
526 *--sp = pp->pd_body[arg - maxhdr + offreg];
527 else {
528 enprintf(("=>0(len)\n"));
529 return (0);
530 }
531 break;
532 case ENF_PUSHLIT:
533 *--sp = *fp++;
534 break;
535 case ENF_PUSHZERO:
536 *--sp = 0;
537 break;
538 case ENF_PUSHONE:
539 *--sp = 1;
540 break;
541 case ENF_PUSHFFFF:
542 *--sp = 0xffff;
543 break;
544 case ENF_PUSHFF00:
545 *--sp = 0xff00;
546 break;
547 case ENF_PUSH00FF:
548 *--sp = 0x00ff;
549 break;
550 case ENF_PUSHFF00_N:
551 *--sp = htons(0xff00);
552 break;
553 case ENF_PUSH00FF_N:
554 *--sp = htons(0x00ff);
555 break;
556 case ENF_LOAD_OFFSET:
557 offreg = *fp++;
558 break;
559 case ENF_BRTR:
560 if (*sp != 0)
561 fp += *fp;
562 else
563 fp++;
564 if (fp >= fpe) {
565 enprintf(("BRTR: fp>=fpe\n"));
566 return (0);
567 }
568 break;
569 case ENF_BRFL:
570 if (*sp == 0)
571 fp += *fp;
572 else
573 fp++;
574 if (fp >= fpe) {
575 enprintf(("BRFL: fp>=fpe\n"));
576 return (0);
577 }
578 break;
579 case ENF_POP:
580 ++sp;
581 if (sp > &stack[ENMAXFILTERS]) {
582 enprintf(("stack underflow\n"));
583 return (0);
584 }
585 break;
586 case ENF_NOPUSH:
587 break;
588 }
589
590 if (sp < &stack[2]) { /* check stack overflow: small yellow zone */
591 enprintf(("=>0(--sp)\n"));
592 return (0);
593 }
594
595 if (op == ENF_NOP)
596 continue;
597
598 /*
599 * all non-NOP operators binary, must have at least two operands
600 * on stack to evaluate.
601 */
602 if (sp > &stack[ENMAXFILTERS-2]) {
603 enprintf(("=>0(sp++)\n"));
604 return (0);
605 }
606
607 arg = *sp++;
608 switch (op) {
609 default:
610 enprintf(("=>0(def)\n"));
611 return (0);
612 case opx(ENF_AND):
613 *sp &= arg;
614 break;
615 case opx(ENF_OR):
616 *sp |= arg;
617 break;
618 case opx(ENF_XOR):
619 *sp ^= arg;
620 break;
621 case opx(ENF_EQ):
622 *sp = (*sp == arg);
623 break;
624 case opx(ENF_NEQ):
625 *sp = (*sp != arg);
626 break;
627 case opx(ENF_LT):
628 *sp = (*sp < arg);
629 break;
630 case opx(ENF_LE):
631 *sp = (*sp <= arg);
632 break;
633 case opx(ENF_GT):
634 *sp = (*sp > arg);
635 break;
636 case opx(ENF_GE):
637 *sp = (*sp >= arg);
638 break;
639
640 /* short-circuit operators */
641
642 case opx(ENF_COR):
643 if (*sp++ == arg) {
644 enprintf(("=>COR %x\n", *sp));
645 return (1);
646 }
647 break;
648 case opx(ENF_CAND):
649 if (*sp++ != arg) {
650 enprintf(("=>CAND %x\n", *sp));
651 return (0);
652 }
653 break;
654 case opx(ENF_CNOR):
655 if (*sp++ == arg) {
656 enprintf(("=>COR %x\n", *sp));
657 return (0);
658 }
659 break;
660 case opx(ENF_CNAND):
661 if (*sp++ != arg) {
662 enprintf(("=>CNAND %x\n", *sp));
663 return (1);
664 }
665 break;
666 }
667 }
668 enprintf(("=>%x\n", *sp));
669 return (*sp);
670 }