1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27 
  28 /*
  29  * STREAMS Packet Filter Module
  30  *
  31  * This module applies a filter to messages arriving on its read
  32  * queue, passing on messages that the filter accepts adn discarding
  33  * the others.  It supports ioctls for setting the filter.
  34  *
  35  * On the write side, the module simply passes everything through
  36  * unchanged.
  37  *
  38  * Based on SunOS 4.x version.  This version has minor changes:
  39  *      - general SVR4 porting stuff
  40  *      - change name and prefixes from "nit" buffer to streams buffer
  41  *      - multithreading assumes configured as D_MTQPAIR
  42  */
  43 
  44 #include <sys/types.h>
  45 #include <sys/sysmacros.h>
  46 #include <sys/errno.h>
  47 #include <sys/debug.h>
  48 #include <sys/time.h>
  49 #include <sys/stropts.h>
  50 #include <sys/stream.h>
  51 #include <sys/conf.h>
  52 #include <sys/ddi.h>
  53 #include <sys/sunddi.h>
  54 #include <sys/kmem.h>
  55 #include <sys/strsun.h>
  56 #include <sys/pfmod.h>
  57 #include <sys/modctl.h>
  58 
  59 /*
  60  * Expanded version of the Packetfilt structure that includes
  61  * some additional fields that aid filter execution efficiency.
  62  */
  63 struct epacketfilt {
  64         struct Pf_ext_packetfilt        pf;
  65 #define pf_Priority     pf.Pf_Priority
  66 #define pf_FilterLen    pf.Pf_FilterLen
  67 #define pf_Filter       pf.Pf_Filter
  68         /* pointer to word immediately past end of filter */
  69         ushort_t                *pf_FilterEnd;
  70         /* length in bytes of packet prefix the filter examines */
  71         ushort_t                pf_PByteLen;
  72 };
  73 
  74 /*
  75  * (Internal) packet descriptor for FilterPacket
  76  */
  77 struct packdesc {
  78         ushort_t        *pd_hdr;        /* header starting address */
  79         uint_t          pd_hdrlen;      /* header length in shorts */
  80         ushort_t        *pd_body;       /* body starting address */
  81         uint_t          pd_bodylen;     /* body length in shorts */
  82 };
  83 
  84 
  85 /*
  86  * Function prototypes.
  87  */
  88 static  int     pfopen(queue_t *, dev_t *, int, int, cred_t *);
  89 static  int     pfclose(queue_t *);
  90 static void     pfioctl(queue_t *wq, mblk_t *mp);
  91 static  int     FilterPacket(struct packdesc *, struct epacketfilt *);
  92 /*
  93  * To save instructions, since STREAMS ignores the return value
  94  * from these functions, they are defined as void here. Kind of icky, but...
  95  */
  96 static void     pfwput(queue_t *, mblk_t *);
  97 static void     pfrput(queue_t *, mblk_t *);
  98 
  99 static struct module_info pf_minfo = {
 100         22,             /* mi_idnum */
 101         "pfmod",        /* mi_idname */
 102         0,              /* mi_minpsz */
 103         INFPSZ,         /* mi_maxpsz */
 104         0,              /* mi_hiwat */
 105         0               /* mi_lowat */
 106 };
 107 
 108 static struct qinit pf_rinit = {
 109         (int (*)())pfrput,      /* qi_putp */
 110         NULL,
 111         pfopen,                 /* qi_qopen */
 112         pfclose,                /* qi_qclose */
 113         NULL,                   /* qi_qadmin */
 114         &pf_minfo,          /* qi_minfo */
 115         NULL                    /* qi_mstat */
 116 };
 117 
 118 static struct qinit pf_winit = {
 119         (int (*)())pfwput,      /* qi_putp */
 120         NULL,                   /* qi_srvp */
 121         NULL,                   /* qi_qopen */
 122         NULL,                   /* qi_qclose */
 123         NULL,                   /* qi_qadmin */
 124         &pf_minfo,          /* qi_minfo */
 125         NULL                    /* qi_mstat */
 126 };
 127 
 128 static struct streamtab pf_info = {
 129         &pf_rinit,  /* st_rdinit */
 130         &pf_winit,  /* st_wrinit */
 131         NULL,           /* st_muxrinit */
 132         NULL            /* st_muxwinit */
 133 };
 134 
 135 static struct fmodsw fsw = {
 136         "pfmod",
 137         &pf_info,
 138         D_MTQPAIR | D_MP
 139 };
 140 
 141 static struct modlstrmod modlstrmod = {
 142         &mod_strmodops, "streams packet filter module", &fsw
 143 };
 144 
 145 static struct modlinkage modlinkage = {
 146         MODREV_1, &modlstrmod, NULL
 147 };
 148 
 149 int
 150 _init(void)
 151 {
 152         return (mod_install(&modlinkage));
 153 }
 154 
 155 int
 156 _fini(void)
 157 {
 158         return (mod_remove(&modlinkage));
 159 }
 160 
 161 int
 162 _info(struct modinfo *modinfop)
 163 {
 164         return (mod_info(&modlinkage, modinfop));
 165 }
 166 
 167 /*ARGSUSED*/
 168 static int
 169 pfopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
 170 {
 171         struct epacketfilt      *pfp;
 172 
 173         ASSERT(rq);
 174 
 175         if (sflag != MODOPEN)
 176                 return (EINVAL);
 177 
 178         if (rq->q_ptr)
 179                 return (0);
 180 
 181         /*
 182          * Allocate and initialize per-Stream structure.
 183          */
 184         pfp = kmem_alloc(sizeof (struct epacketfilt), KM_SLEEP);
 185         rq->q_ptr = WR(rq)->q_ptr = (char *)pfp;
 186 
 187         qprocson(rq);
 188 
 189         return (0);
 190 }
 191 
 192 static int
 193 pfclose(queue_t *rq)
 194 {
 195         struct  epacketfilt     *pfp = (struct epacketfilt *)rq->q_ptr;
 196 
 197         ASSERT(pfp);
 198 
 199         qprocsoff(rq);
 200 
 201         kmem_free(pfp, sizeof (struct epacketfilt));
 202         rq->q_ptr = WR(rq)->q_ptr = NULL;
 203 
 204         return (0);
 205 }
 206 
 207 /*
 208  * Write-side put procedure.  Its main task is to detect ioctls.
 209  * Other message types are passed on through.
 210  */
 211 static void
 212 pfwput(queue_t *wq, mblk_t *mp)
 213 {
 214         switch (mp->b_datap->db_type) {
 215         case M_IOCTL:
 216                 pfioctl(wq, mp);
 217                 break;
 218 
 219         default:
 220                 putnext(wq, mp);
 221                 break;
 222         }
 223 }
 224 
 225 /*
 226  * Read-side put procedure.  It's responsible for applying the
 227  * packet filter and passing upstream message on or discarding it
 228  * depending upon the results.
 229  *
 230  * Upstream messages can start with zero or more M_PROTO mblks
 231  * which are skipped over before executing the packet filter
 232  * on any remaining M_DATA mblks.
 233  */
 234 static void
 235 pfrput(queue_t *rq, mblk_t *mp)
 236 {
 237         struct  epacketfilt     *pfp = (struct epacketfilt *)rq->q_ptr;
 238         mblk_t  *mbp, *mpp;
 239         struct  packdesc        pd;
 240         int     need;
 241 
 242         ASSERT(pfp);
 243 
 244         switch (DB_TYPE(mp)) {
 245         case M_PROTO:
 246         case M_DATA:
 247                 /*
 248                  * Skip over protocol information and find the start
 249                  * of the message body, saving the overall message
 250                  * start in mpp.
 251                  */
 252                 for (mpp = mp; mp && (DB_TYPE(mp) == M_PROTO); mp = mp->b_cont)
 253                         ;
 254 
 255                 /*
 256                  * Null body (exclusive of M_PROTO blocks) ==> accept.
 257                  * Note that a null body is not the same as an empty body.
 258                  */
 259                 if (mp == NULL) {
 260                         putnext(rq, mpp);
 261                         break;
 262                 }
 263 
 264                 /*
 265                  * Pull the packet up to the length required by
 266                  * the filter.  Note that doing so destroys sharing
 267                  * relationships, which is unfortunate, since the
 268                  * results of pulling up here are likely to be useful
 269                  * for shared messages applied to a filter on a sibling
 270                  * stream.
 271                  *
 272                  * Most packet sources will provide the packet in two
 273                  * logical pieces: an initial header in a single mblk,
 274                  * and a body in a sequence of mblks hooked to the
 275                  * header.  We're prepared to deal with variant forms,
 276                  * but in any case, the pullup applies only to the body
 277                  * part.
 278                  */
 279                 mbp = mp->b_cont;
 280                 need = pfp->pf_PByteLen;
 281                 if (mbp && (MBLKL(mbp) < need)) {
 282                         int len = msgdsize(mbp);
 283 
 284                         /* XXX discard silently on pullupmsg failure */
 285                         if (pullupmsg(mbp, MIN(need, len)) == 0) {
 286                                 freemsg(mpp);
 287                                 break;
 288                         }
 289                 }
 290 
 291                 /*
 292                  * Misalignment (not on short boundary) ==> reject.
 293                  */
 294                 if (((uintptr_t)mp->b_rptr & (sizeof (ushort_t) - 1)) ||
 295                     (mbp != NULL &&
 296                     ((uintptr_t)mbp->b_rptr & (sizeof (ushort_t) - 1)))) {
 297                         freemsg(mpp);
 298                         break;
 299                 }
 300 
 301                 /*
 302                  * These assignments are distasteful, but necessary,
 303                  * since the packet filter wants to work in terms of
 304                  * shorts.  Odd bytes at the end of header or data can't
 305                  * participate in the filtering operation.
 306                  */
 307                 pd.pd_hdr = (ushort_t *)mp->b_rptr;
 308                 pd.pd_hdrlen = (mp->b_wptr - mp->b_rptr) / sizeof (ushort_t);
 309                 if (mbp) {
 310                         pd.pd_body = (ushort_t *)mbp->b_rptr;
 311                         pd.pd_bodylen = (mbp->b_wptr - mbp->b_rptr) /
 312                                                         sizeof (ushort_t);
 313                 } else {
 314                         pd.pd_body = NULL;
 315                         pd.pd_bodylen = 0;
 316                 }
 317 
 318                 /*
 319                  * Apply the filter.
 320                  */
 321                 if (FilterPacket(&pd, pfp))
 322                         putnext(rq, mpp);
 323                 else
 324                         freemsg(mpp);
 325 
 326                 break;
 327 
 328         default:
 329                 putnext(rq, mp);
 330                 break;
 331         }
 332 
 333 }
 334 
 335 /*
 336  * Handle write-side M_IOCTL messages.
 337  */
 338 static void
 339 pfioctl(queue_t *wq, mblk_t *mp)
 340 {
 341         struct  epacketfilt     *pfp = (struct epacketfilt *)wq->q_ptr;
 342         struct  Pf_ext_packetfilt       *upfp;
 343         struct  packetfilt      *opfp;
 344         ushort_t        *fwp;
 345         int     arg;
 346         int     maxoff = 0;
 347         int     maxoffreg = 0;
 348         struct iocblk   *iocp = (struct iocblk *)mp->b_rptr;
 349         int     error;
 350 
 351         switch (iocp->ioc_cmd) {
 352         case PFIOCSETF:
 353                 /*
 354                  * Verify argument length. Since the size of packet filter
 355                  * got increased (ENMAXFILTERS was bumped up to 2047), to
 356                  * maintain backwards binary compatibility, we need to
 357                  * check for both possible sizes.
 358                  */
 359                 switch (iocp->ioc_count) {
 360                 case sizeof (struct Pf_ext_packetfilt):
 361                         error = miocpullup(mp,
 362                             sizeof (struct Pf_ext_packetfilt));
 363                         if (error != 0) {
 364                                 miocnak(wq, mp, 0, error);
 365                                 return;
 366                         }
 367                         upfp = (struct Pf_ext_packetfilt *)mp->b_cont->b_rptr;
 368                         if (upfp->Pf_FilterLen > PF_MAXFILTERS) {
 369                                 miocnak(wq, mp, 0, EINVAL);
 370                                 return;
 371                         }
 372 
 373                         bcopy(upfp, pfp, sizeof (struct Pf_ext_packetfilt));
 374                         pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
 375                         break;
 376 
 377                 case sizeof (struct packetfilt):
 378                         error = miocpullup(mp, sizeof (struct packetfilt));
 379                         if (error != 0) {
 380                                 miocnak(wq, mp, 0, error);
 381                                 return;
 382                         }
 383                         opfp = (struct packetfilt *)mp->b_cont->b_rptr;
 384                         /* this strange comparison keeps gcc from complaining */
 385                         if (opfp->Pf_FilterLen - 1 >= ENMAXFILTERS) {
 386                                 miocnak(wq, mp, 0, EINVAL);
 387                                 return;
 388                         }
 389 
 390                         pfp->pf.Pf_Priority = opfp->Pf_Priority;
 391                         pfp->pf.Pf_FilterLen = (unsigned int)opfp->Pf_FilterLen;
 392 
 393                         bcopy(opfp->Pf_Filter, pfp->pf.Pf_Filter,
 394                             sizeof (opfp->Pf_Filter));
 395                         pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
 396                         break;
 397 
 398                 default:
 399                         miocnak(wq, mp, 0, EINVAL);
 400                         return;
 401                 }
 402 
 403                 /*
 404                  * Find and record maximum byte offset that the
 405                  * filter users.  We use this when executing the
 406                  * filter to determine how much of the packet
 407                  * body to pull up.  This code depends on the
 408                  * filter encoding.
 409                  */
 410                 for (fwp = pfp->pf_Filter; fwp < pfp->pf_FilterEnd; fwp++) {
 411                         arg = *fwp & ((1 << ENF_NBPA) - 1);
 412                         switch (arg) {
 413                         default:
 414                                 if ((arg -= ENF_PUSHWORD) > maxoff)
 415                                         maxoff = arg;
 416                                 break;
 417 
 418                         case ENF_LOAD_OFFSET:
 419                                 /* Point to the offset */
 420                                 fwp++;
 421                                 if (*fwp > maxoffreg)
 422                                         maxoffreg = *fwp;
 423                                 break;
 424 
 425                         case ENF_PUSHLIT:
 426                         case ENF_BRTR:
 427                         case ENF_BRFL:
 428                                 /* Skip over the literal. */
 429                                 fwp++;
 430                                 break;
 431 
 432                         case ENF_PUSHZERO:
 433                         case ENF_PUSHONE:
 434                         case ENF_PUSHFFFF:
 435                         case ENF_PUSHFF00:
 436                         case ENF_PUSH00FF:
 437                         case ENF_NOPUSH:
 438                         case ENF_POP:
 439                                 break;
 440                         }
 441                 }
 442 
 443                 /*
 444                  * Convert word offset to length in bytes.
 445                  */
 446                 pfp->pf_PByteLen = (maxoff + maxoffreg + 1) * sizeof (ushort_t);
 447                 miocack(wq, mp, 0, 0);
 448                 break;
 449 
 450         default:
 451                 putnext(wq, mp);
 452                 break;
 453         }
 454 }
 455 
 456 /* #define      DEBUG   1 */
 457 /* #define      INNERDEBUG      1 */
 458 
 459 #ifdef  INNERDEBUG
 460 #define enprintf(a)     printf a
 461 #else
 462 #define enprintf(a)
 463 #endif
 464 
 465 /*
 466  * Apply the packet filter given by pfp to the packet given by
 467  * pp.  Return nonzero iff the filter accepts the packet.
 468  *
 469  * The packet comes in two pieces, a header and a body, since
 470  * that's the most convenient form for our caller.  The header
 471  * is in contiguous memory, whereas the body is in a mbuf.
 472  * Our caller will have adjusted the mbuf chain so that its first
 473  * min(MLEN, length(body)) bytes are guaranteed contiguous.  For
 474  * the sake of efficiency (and some laziness) the filter is prepared
 475  * to examine only these two contiguous pieces.  Furthermore, it
 476  * assumes that the header length is even, so that there's no need
 477  * to glue the last byte of header to the first byte of data.
 478  */
 479 
 480 #define opx(i)  ((i) >> ENF_NBPA)
 481 
 482 static int
 483 FilterPacket(struct packdesc *pp, struct epacketfilt *pfp)
 484 {
 485         int             maxhdr = pp->pd_hdrlen;
 486         int             maxword = maxhdr + pp->pd_bodylen;
 487         ushort_t        *sp;
 488         ushort_t        *fp;
 489         ushort_t        *fpe;
 490         unsigned        op;
 491         unsigned        arg;
 492         unsigned        offreg = 0;
 493         ushort_t        stack[ENMAXFILTERS+1];
 494 
 495         fp = &pfp->pf_Filter[0];
 496         fpe = pfp->pf_FilterEnd;
 497 
 498         enprintf(("FilterPacket(%p, %p, %p, %p):\n", pp, pfp, fp, fpe));
 499 
 500         /*
 501          * Push TRUE on stack to start.  The stack size is chosen such
 502          * that overflow can't occur -- each operation can push at most
 503          * one item on the stack, and the stack size equals the maximum
 504          * program length.
 505          */
 506         sp = &stack[ENMAXFILTERS];
 507         *sp = 1;
 508 
 509         while (fp < fpe) {
 510         op = *fp >> ENF_NBPA;
 511         arg = *fp & ((1 << ENF_NBPA) - 1);
 512         fp++;
 513 
 514         switch (arg) {
 515         default:
 516                 arg -= ENF_PUSHWORD;
 517                 /*
 518                  * Since arg is unsigned,
 519                  * if it were less than ENF_PUSHWORD before,
 520                  * it would now be huge.
 521                  */
 522                 if (arg + offreg < maxhdr)
 523                         *--sp = pp->pd_hdr[arg + offreg];
 524                 else if (arg + offreg < maxword)
 525                         *--sp = pp->pd_body[arg - maxhdr + offreg];
 526                 else {
 527                         enprintf(("=>0(len)\n"));
 528                         return (0);
 529                 }
 530                 break;
 531         case ENF_PUSHLIT:
 532                 *--sp = *fp++;
 533                 break;
 534         case ENF_PUSHZERO:
 535                 *--sp = 0;
 536                 break;
 537         case ENF_PUSHONE:
 538                 *--sp = 1;
 539                 break;
 540         case ENF_PUSHFFFF:
 541                 *--sp = 0xffff;
 542                 break;
 543         case ENF_PUSHFF00:
 544                 *--sp = 0xff00;
 545                 break;
 546         case ENF_PUSH00FF:
 547                 *--sp = 0x00ff;
 548                 break;
 549         case ENF_LOAD_OFFSET:
 550                 offreg = *fp++;
 551                 break;
 552         case ENF_BRTR:
 553                 if (*sp != 0)
 554                         fp += *fp;
 555                 else
 556                         fp++;
 557                 if (fp >= fpe) {
 558                         enprintf(("BRTR: fp>=fpe\n"));
 559                         return (0);
 560                 }
 561                 break;
 562         case ENF_BRFL:
 563                 if (*sp == 0)
 564                         fp += *fp;
 565                 else
 566                         fp++;
 567                 if (fp >= fpe) {
 568                         enprintf(("BRFL: fp>=fpe\n"));
 569                         return (0);
 570                 }
 571                 break;
 572         case ENF_POP:
 573                 ++sp;
 574                 if (sp > &stack[ENMAXFILTERS]) {
 575                         enprintf(("stack underflow\n"));
 576                         return (0);
 577                 }
 578                 break;
 579         case ENF_NOPUSH:
 580                 break;
 581         }
 582 
 583         if (sp < &stack[2]) {    /* check stack overflow: small yellow zone */
 584                 enprintf(("=>0(--sp)\n"));
 585                 return (0);
 586         }
 587 
 588         if (op == ENF_NOP)
 589                 continue;
 590 
 591         /*
 592          * all non-NOP operators binary, must have at least two operands
 593          * on stack to evaluate.
 594          */
 595         if (sp > &stack[ENMAXFILTERS-2]) {
 596                 enprintf(("=>0(sp++)\n"));
 597                 return (0);
 598         }
 599 
 600         arg = *sp++;
 601         switch (op) {
 602         default:
 603                 enprintf(("=>0(def)\n"));
 604                 return (0);
 605         case opx(ENF_AND):
 606                 *sp &= arg;
 607                 break;
 608         case opx(ENF_OR):
 609                 *sp |= arg;
 610                 break;
 611         case opx(ENF_XOR):
 612                 *sp ^= arg;
 613                 break;
 614         case opx(ENF_EQ):
 615                 *sp = (*sp == arg);
 616                 break;
 617         case opx(ENF_NEQ):
 618                 *sp = (*sp != arg);
 619                 break;
 620         case opx(ENF_LT):
 621                 *sp = (*sp < arg);
 622                 break;
 623         case opx(ENF_LE):
 624                 *sp = (*sp <= arg);
 625                 break;
 626         case opx(ENF_GT):
 627                 *sp = (*sp > arg);
 628                 break;
 629         case opx(ENF_GE):
 630                 *sp = (*sp >= arg);
 631                 break;
 632 
 633         /* short-circuit operators */
 634 
 635         case opx(ENF_COR):
 636                 if (*sp++ == arg) {
 637                         enprintf(("=>COR %x\n", *sp));
 638                         return (1);
 639                 }
 640                 break;
 641         case opx(ENF_CAND):
 642                 if (*sp++ != arg) {
 643                         enprintf(("=>CAND %x\n", *sp));
 644                         return (0);
 645                 }
 646                 break;
 647         case opx(ENF_CNOR):
 648                 if (*sp++ == arg) {
 649                         enprintf(("=>COR %x\n", *sp));
 650                         return (0);
 651                 }
 652                 break;
 653         case opx(ENF_CNAND):
 654                 if (*sp++ != arg) {
 655                         enprintf(("=>CNAND %x\n", *sp));
 656                         return (1);
 657                 }
 658                 break;
 659         }
 660         }
 661         enprintf(("=>%x\n", *sp));
 662         return (*sp);
 663 }