1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * STREAMS Packet Filter Module
  28  *
  29  * This module applies a filter to messages arriving on its read
  30  * queue, passing on messages that the filter accepts adn discarding
  31  * the others.  It supports ioctls for setting the filter.
  32  *
  33  * On the write side, the module simply passes everything through
  34  * unchanged.
  35  *
  36  * Based on SunOS 4.x version.  This version has minor changes:
  37  *      - general SVR4 porting stuff
  38  *      - change name and prefixes from "nit" buffer to streams buffer
  39  *      - multithreading assumes configured as D_MTQPAIR
  40  */
  41 
  42 #include <sys/types.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/errno.h>
  45 #include <sys/debug.h>
  46 #include <sys/time.h>
  47 #include <sys/stropts.h>
  48 #include <sys/stream.h>
  49 #include <sys/conf.h>
  50 #include <sys/ddi.h>
  51 #include <sys/sunddi.h>
  52 #include <sys/kmem.h>
  53 #include <sys/strsun.h>
  54 #include <sys/pfmod.h>
  55 #include <sys/modctl.h>
  56 #include <netinet/in.h>
  57 
  58 /*
  59  * Expanded version of the Packetfilt structure that includes
  60  * some additional fields that aid filter execution efficiency.
  61  */
  62 struct epacketfilt {
  63         struct Pf_ext_packetfilt        pf;
  64 #define pf_Priority     pf.Pf_Priority
  65 #define pf_FilterLen    pf.Pf_FilterLen
  66 #define pf_Filter       pf.Pf_Filter
  67         /* pointer to word immediately past end of filter */
  68         ushort_t                *pf_FilterEnd;
  69         /* length in bytes of packet prefix the filter examines */
  70         ushort_t                pf_PByteLen;
  71 };
  72 
  73 /*
  74  * (Internal) packet descriptor for FilterPacket
  75  */
  76 struct packdesc {
  77         ushort_t        *pd_hdr;        /* header starting address */
  78         uint_t          pd_hdrlen;      /* header length in shorts */
  79         ushort_t        *pd_body;       /* body starting address */
  80         uint_t          pd_bodylen;     /* body length in shorts */
  81 };
  82 
  83 
  84 /*
  85  * Function prototypes.
  86  */
  87 static  int     pfopen(queue_t *, dev_t *, int, int, cred_t *);
  88 static  int     pfclose(queue_t *);
  89 static void     pfioctl(queue_t *wq, mblk_t *mp);
  90 static  int     FilterPacket(struct packdesc *, struct epacketfilt *);
  91 /*
  92  * To save instructions, since STREAMS ignores the return value
  93  * from these functions, they are defined as void here. Kind of icky, but...
  94  */
  95 static void     pfwput(queue_t *, mblk_t *);
  96 static void     pfrput(queue_t *, mblk_t *);
  97 
  98 static struct module_info pf_minfo = {
  99         22,             /* mi_idnum */
 100         "pfmod",        /* mi_idname */
 101         0,              /* mi_minpsz */
 102         INFPSZ,         /* mi_maxpsz */
 103         0,              /* mi_hiwat */
 104         0               /* mi_lowat */
 105 };
 106 
 107 static struct qinit pf_rinit = {
 108         (int (*)())pfrput,      /* qi_putp */
 109         NULL,
 110         pfopen,                 /* qi_qopen */
 111         pfclose,                /* qi_qclose */
 112         NULL,                   /* qi_qadmin */
 113         &pf_minfo,          /* qi_minfo */
 114         NULL                    /* qi_mstat */
 115 };
 116 
 117 static struct qinit pf_winit = {
 118         (int (*)())pfwput,      /* qi_putp */
 119         NULL,                   /* qi_srvp */
 120         NULL,                   /* qi_qopen */
 121         NULL,                   /* qi_qclose */
 122         NULL,                   /* qi_qadmin */
 123         &pf_minfo,          /* qi_minfo */
 124         NULL                    /* qi_mstat */
 125 };
 126 
 127 static struct streamtab pf_info = {
 128         &pf_rinit,  /* st_rdinit */
 129         &pf_winit,  /* st_wrinit */
 130         NULL,           /* st_muxrinit */
 131         NULL            /* st_muxwinit */
 132 };
 133 
 134 static struct fmodsw fsw = {
 135         "pfmod",
 136         &pf_info,
 137         D_MTQPAIR | D_MP
 138 };
 139 
 140 static struct modlstrmod modlstrmod = {
 141         &mod_strmodops, "streams packet filter module", &fsw
 142 };
 143 
 144 static struct modlinkage modlinkage = {
 145         MODREV_1, &modlstrmod, NULL
 146 };
 147 
 148 int
 149 _init(void)
 150 {
 151         return (mod_install(&modlinkage));
 152 }
 153 
 154 int
 155 _fini(void)
 156 {
 157         return (mod_remove(&modlinkage));
 158 }
 159 
 160 int
 161 _info(struct modinfo *modinfop)
 162 {
 163         return (mod_info(&modlinkage, modinfop));
 164 }
 165 
 166 /*ARGSUSED*/
 167 static int
 168 pfopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
 169 {
 170         struct epacketfilt      *pfp;
 171 
 172         ASSERT(rq);
 173 
 174         if (sflag != MODOPEN)
 175                 return (EINVAL);
 176 
 177         if (rq->q_ptr)
 178                 return (0);
 179 
 180         /*
 181          * Allocate and initialize per-Stream structure.
 182          */
 183         pfp = kmem_alloc(sizeof (struct epacketfilt), KM_SLEEP);
 184         rq->q_ptr = WR(rq)->q_ptr = (char *)pfp;
 185 
 186         qprocson(rq);
 187 
 188         return (0);
 189 }
 190 
 191 static int
 192 pfclose(queue_t *rq)
 193 {
 194         struct  epacketfilt     *pfp = (struct epacketfilt *)rq->q_ptr;
 195 
 196         ASSERT(pfp);
 197 
 198         qprocsoff(rq);
 199 
 200         kmem_free(pfp, sizeof (struct epacketfilt));
 201         rq->q_ptr = WR(rq)->q_ptr = NULL;
 202 
 203         return (0);
 204 }
 205 
 206 /*
 207  * Write-side put procedure.  Its main task is to detect ioctls.
 208  * Other message types are passed on through.
 209  */
 210 static void
 211 pfwput(queue_t *wq, mblk_t *mp)
 212 {
 213         switch (mp->b_datap->db_type) {
 214         case M_IOCTL:
 215                 pfioctl(wq, mp);
 216                 break;
 217 
 218         default:
 219                 putnext(wq, mp);
 220                 break;
 221         }
 222 }
 223 
 224 /*
 225  * Read-side put procedure.  It's responsible for applying the
 226  * packet filter and passing upstream message on or discarding it
 227  * depending upon the results.
 228  *
 229  * Upstream messages can start with zero or more M_PROTO mblks
 230  * which are skipped over before executing the packet filter
 231  * on any remaining M_DATA mblks.
 232  */
 233 static void
 234 pfrput(queue_t *rq, mblk_t *mp)
 235 {
 236         struct  epacketfilt     *pfp = (struct epacketfilt *)rq->q_ptr;
 237         mblk_t  *mbp, *mpp;
 238         struct  packdesc        pd;
 239         int     need;
 240 
 241         ASSERT(pfp);
 242 
 243         switch (DB_TYPE(mp)) {
 244         case M_PROTO:
 245         case M_DATA:
 246                 /*
 247                  * Skip over protocol information and find the start
 248                  * of the message body, saving the overall message
 249                  * start in mpp.
 250                  */
 251                 for (mpp = mp; mp && (DB_TYPE(mp) == M_PROTO); mp = mp->b_cont)
 252                         ;
 253 
 254                 /*
 255                  * Null body (exclusive of M_PROTO blocks) ==> accept.
 256                  * Note that a null body is not the same as an empty body.
 257                  */
 258                 if (mp == NULL) {
 259                         putnext(rq, mpp);
 260                         break;
 261                 }
 262 
 263                 /*
 264                  * Pull the packet up to the length required by
 265                  * the filter.  Note that doing so destroys sharing
 266                  * relationships, which is unfortunate, since the
 267                  * results of pulling up here are likely to be useful
 268                  * for shared messages applied to a filter on a sibling
 269                  * stream.
 270                  *
 271                  * Most packet sources will provide the packet in two
 272                  * logical pieces: an initial header in a single mblk,
 273                  * and a body in a sequence of mblks hooked to the
 274                  * header.  We're prepared to deal with variant forms,
 275                  * but in any case, the pullup applies only to the body
 276                  * part.
 277                  */
 278                 mbp = mp->b_cont;
 279                 need = pfp->pf_PByteLen;
 280                 if (mbp && (MBLKL(mbp) < need)) {
 281                         int len = msgdsize(mbp);
 282 
 283                         /* XXX discard silently on pullupmsg failure */
 284                         if (pullupmsg(mbp, MIN(need, len)) == 0) {
 285                                 freemsg(mpp);
 286                                 break;
 287                         }
 288                 }
 289 
 290                 /*
 291                  * Misalignment (not on short boundary) ==> reject.
 292                  */
 293                 if (((uintptr_t)mp->b_rptr & (sizeof (ushort_t) - 1)) ||
 294                     (mbp != NULL &&
 295                     ((uintptr_t)mbp->b_rptr & (sizeof (ushort_t) - 1)))) {
 296                         freemsg(mpp);
 297                         break;
 298                 }
 299 
 300                 /*
 301                  * These assignments are distasteful, but necessary,
 302                  * since the packet filter wants to work in terms of
 303                  * shorts.  Odd bytes at the end of header or data can't
 304                  * participate in the filtering operation.
 305                  */
 306                 pd.pd_hdr = (ushort_t *)mp->b_rptr;
 307                 pd.pd_hdrlen = (mp->b_wptr - mp->b_rptr) / sizeof (ushort_t);
 308                 if (mbp) {
 309                         pd.pd_body = (ushort_t *)mbp->b_rptr;
 310                         pd.pd_bodylen = (mbp->b_wptr - mbp->b_rptr) /
 311                             sizeof (ushort_t);
 312                 } else {
 313                         pd.pd_body = NULL;
 314                         pd.pd_bodylen = 0;
 315                 }
 316 
 317                 /*
 318                  * Apply the filter.
 319                  */
 320                 if (FilterPacket(&pd, pfp))
 321                         putnext(rq, mpp);
 322                 else
 323                         freemsg(mpp);
 324 
 325                 break;
 326 
 327         default:
 328                 putnext(rq, mp);
 329                 break;
 330         }
 331 
 332 }
 333 
 334 /*
 335  * Handle write-side M_IOCTL messages.
 336  */
 337 static void
 338 pfioctl(queue_t *wq, mblk_t *mp)
 339 {
 340         struct  epacketfilt     *pfp = (struct epacketfilt *)wq->q_ptr;
 341         struct  Pf_ext_packetfilt       *upfp;
 342         struct  packetfilt      *opfp;
 343         ushort_t        *fwp;
 344         int     arg;
 345         int     maxoff = 0;
 346         int     maxoffreg = 0;
 347         struct iocblk   *iocp = (struct iocblk *)mp->b_rptr;
 348         int     error;
 349 
 350         switch (iocp->ioc_cmd) {
 351         case PFIOCSETF:
 352                 /*
 353                  * Verify argument length. Since the size of packet filter
 354                  * got increased (ENMAXFILTERS was bumped up to 2047), to
 355                  * maintain backwards binary compatibility, we need to
 356                  * check for both possible sizes.
 357                  */
 358                 switch (iocp->ioc_count) {
 359                 case sizeof (struct Pf_ext_packetfilt):
 360                         error = miocpullup(mp,
 361                             sizeof (struct Pf_ext_packetfilt));
 362                         if (error != 0) {
 363                                 miocnak(wq, mp, 0, error);
 364                                 return;
 365                         }
 366                         upfp = (struct Pf_ext_packetfilt *)mp->b_cont->b_rptr;
 367                         if (upfp->Pf_FilterLen > PF_MAXFILTERS) {
 368                                 miocnak(wq, mp, 0, EINVAL);
 369                                 return;
 370                         }
 371 
 372                         bcopy(upfp, pfp, sizeof (struct Pf_ext_packetfilt));
 373                         pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
 374                         break;
 375 
 376                 case sizeof (struct packetfilt):
 377                         error = miocpullup(mp, sizeof (struct packetfilt));
 378                         if (error != 0) {
 379                                 miocnak(wq, mp, 0, error);
 380                                 return;
 381                         }
 382                         opfp = (struct packetfilt *)mp->b_cont->b_rptr;
 383                         /* this strange comparison keeps gcc from complaining */
 384                         if (opfp->Pf_FilterLen - 1 >= ENMAXFILTERS) {
 385                                 miocnak(wq, mp, 0, EINVAL);
 386                                 return;
 387                         }
 388 
 389                         pfp->pf.Pf_Priority = opfp->Pf_Priority;
 390                         pfp->pf.Pf_FilterLen = (unsigned int)opfp->Pf_FilterLen;
 391 
 392                         bcopy(opfp->Pf_Filter, pfp->pf.Pf_Filter,
 393                             sizeof (opfp->Pf_Filter));
 394                         pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
 395                         break;
 396 
 397                 default:
 398                         miocnak(wq, mp, 0, EINVAL);
 399                         return;
 400                 }
 401 
 402                 /*
 403                  * Find and record maximum byte offset that the
 404                  * filter users.  We use this when executing the
 405                  * filter to determine how much of the packet
 406                  * body to pull up.  This code depends on the
 407                  * filter encoding.
 408                  */
 409                 for (fwp = pfp->pf_Filter; fwp < pfp->pf_FilterEnd; fwp++) {
 410                         arg = *fwp & ((1 << ENF_NBPA) - 1);
 411                         switch (arg) {
 412                         default:
 413                                 if ((arg -= ENF_PUSHWORD) > maxoff)
 414                                         maxoff = arg;
 415                                 break;
 416 
 417                         case ENF_LOAD_OFFSET:
 418                                 /* Point to the offset */
 419                                 fwp++;
 420                                 if (*fwp > maxoffreg)
 421                                         maxoffreg = *fwp;
 422                                 break;
 423 
 424                         case ENF_PUSHLIT:
 425                         case ENF_BRTR:
 426                         case ENF_BRFL:
 427                                 /* Skip over the literal. */
 428                                 fwp++;
 429                                 break;
 430 
 431                         case ENF_PUSHZERO:
 432                         case ENF_PUSHONE:
 433                         case ENF_PUSHFFFF:
 434                         case ENF_PUSHFF00:
 435                         case ENF_PUSH00FF:
 436                         case ENF_PUSHFF00_N:
 437                         case ENF_PUSH00FF_N:
 438                         case ENF_NOPUSH:
 439                         case ENF_POP:
 440                                 break;
 441                         }
 442                 }
 443 
 444                 /*
 445                  * Convert word offset to length in bytes.
 446                  */
 447                 pfp->pf_PByteLen = (maxoff + maxoffreg + 1) * sizeof (ushort_t);
 448                 miocack(wq, mp, 0, 0);
 449                 break;
 450 
 451         default:
 452                 putnext(wq, mp);
 453                 break;
 454         }
 455 }
 456 
 457 /* #define      DEBUG   1 */
 458 /* #define      INNERDEBUG      1 */
 459 
 460 #ifdef  INNERDEBUG
 461 #define enprintf(a)     printf a
 462 #else
 463 #define enprintf(a)
 464 #endif
 465 
 466 /*
 467  * Apply the packet filter given by pfp to the packet given by
 468  * pp.  Return nonzero iff the filter accepts the packet.
 469  *
 470  * The packet comes in two pieces, a header and a body, since
 471  * that's the most convenient form for our caller.  The header
 472  * is in contiguous memory, whereas the body is in a mbuf.
 473  * Our caller will have adjusted the mbuf chain so that its first
 474  * min(MLEN, length(body)) bytes are guaranteed contiguous.  For
 475  * the sake of efficiency (and some laziness) the filter is prepared
 476  * to examine only these two contiguous pieces.  Furthermore, it
 477  * assumes that the header length is even, so that there's no need
 478  * to glue the last byte of header to the first byte of data.
 479  */
 480 
 481 #define opx(i)  ((i) >> ENF_NBPA)
 482 
 483 static int
 484 FilterPacket(struct packdesc *pp, struct epacketfilt *pfp)
 485 {
 486         int             maxhdr = pp->pd_hdrlen;
 487         int             maxword = maxhdr + pp->pd_bodylen;
 488         ushort_t        *sp;
 489         ushort_t        *fp;
 490         ushort_t        *fpe;
 491         unsigned        op;
 492         unsigned        arg;
 493         unsigned        offreg = 0;
 494         ushort_t        stack[ENMAXFILTERS+1];
 495 
 496         fp = &pfp->pf_Filter[0];
 497         fpe = pfp->pf_FilterEnd;
 498 
 499         enprintf(("FilterPacket(%p, %p, %p, %p):\n", pp, pfp, fp, fpe));
 500 
 501         /*
 502          * Push TRUE on stack to start.  The stack size is chosen such
 503          * that overflow can't occur -- each operation can push at most
 504          * one item on the stack, and the stack size equals the maximum
 505          * program length.
 506          */
 507         sp = &stack[ENMAXFILTERS];
 508         *sp = 1;
 509 
 510         while (fp < fpe) {
 511         op = *fp >> ENF_NBPA;
 512         arg = *fp & ((1 << ENF_NBPA) - 1);
 513         fp++;
 514 
 515         switch (arg) {
 516         default:
 517                 arg -= ENF_PUSHWORD;
 518                 /*
 519                  * Since arg is unsigned,
 520                  * if it were less than ENF_PUSHWORD before,
 521                  * it would now be huge.
 522                  */
 523                 if (arg + offreg < maxhdr)
 524                         *--sp = pp->pd_hdr[arg + offreg];
 525                 else if (arg + offreg < maxword)
 526                         *--sp = pp->pd_body[arg - maxhdr + offreg];
 527                 else {
 528                         enprintf(("=>0(len)\n"));
 529                         return (0);
 530                 }
 531                 break;
 532         case ENF_PUSHLIT:
 533                 *--sp = *fp++;
 534                 break;
 535         case ENF_PUSHZERO:
 536                 *--sp = 0;
 537                 break;
 538         case ENF_PUSHONE:
 539                 *--sp = 1;
 540                 break;
 541         case ENF_PUSHFFFF:
 542                 *--sp = 0xffff;
 543                 break;
 544         case ENF_PUSHFF00:
 545                 *--sp = 0xff00;
 546                 break;
 547         case ENF_PUSH00FF:
 548                 *--sp = 0x00ff;
 549                 break;
 550         case ENF_PUSHFF00_N:
 551                 *--sp = htons(0xff00);
 552                 break;
 553         case ENF_PUSH00FF_N:
 554                 *--sp = htons(0x00ff);
 555                 break;
 556         case ENF_LOAD_OFFSET:
 557                 offreg = *fp++;
 558                 break;
 559         case ENF_BRTR:
 560                 if (*sp != 0)
 561                         fp += *fp;
 562                 else
 563                         fp++;
 564                 if (fp >= fpe) {
 565                         enprintf(("BRTR: fp>=fpe\n"));
 566                         return (0);
 567                 }
 568                 break;
 569         case ENF_BRFL:
 570                 if (*sp == 0)
 571                         fp += *fp;
 572                 else
 573                         fp++;
 574                 if (fp >= fpe) {
 575                         enprintf(("BRFL: fp>=fpe\n"));
 576                         return (0);
 577                 }
 578                 break;
 579         case ENF_POP:
 580                 ++sp;
 581                 if (sp > &stack[ENMAXFILTERS]) {
 582                         enprintf(("stack underflow\n"));
 583                         return (0);
 584                 }
 585                 break;
 586         case ENF_NOPUSH:
 587                 break;
 588         }
 589 
 590         if (sp < &stack[2]) {    /* check stack overflow: small yellow zone */
 591                 enprintf(("=>0(--sp)\n"));
 592                 return (0);
 593         }
 594 
 595         if (op == ENF_NOP)
 596                 continue;
 597 
 598         /*
 599          * all non-NOP operators binary, must have at least two operands
 600          * on stack to evaluate.
 601          */
 602         if (sp > &stack[ENMAXFILTERS-2]) {
 603                 enprintf(("=>0(sp++)\n"));
 604                 return (0);
 605         }
 606 
 607         arg = *sp++;
 608         switch (op) {
 609         default:
 610                 enprintf(("=>0(def)\n"));
 611                 return (0);
 612         case opx(ENF_AND):
 613                 *sp &= arg;
 614                 break;
 615         case opx(ENF_OR):
 616                 *sp |= arg;
 617                 break;
 618         case opx(ENF_XOR):
 619                 *sp ^= arg;
 620                 break;
 621         case opx(ENF_EQ):
 622                 *sp = (*sp == arg);
 623                 break;
 624         case opx(ENF_NEQ):
 625                 *sp = (*sp != arg);
 626                 break;
 627         case opx(ENF_LT):
 628                 *sp = (*sp < arg);
 629                 break;
 630         case opx(ENF_LE):
 631                 *sp = (*sp <= arg);
 632                 break;
 633         case opx(ENF_GT):
 634                 *sp = (*sp > arg);
 635                 break;
 636         case opx(ENF_GE):
 637                 *sp = (*sp >= arg);
 638                 break;
 639 
 640         /* short-circuit operators */
 641 
 642         case opx(ENF_COR):
 643                 if (*sp++ == arg) {
 644                         enprintf(("=>COR %x\n", *sp));
 645                         return (1);
 646                 }
 647                 break;
 648         case opx(ENF_CAND):
 649                 if (*sp++ != arg) {
 650                         enprintf(("=>CAND %x\n", *sp));
 651                         return (0);
 652                 }
 653                 break;
 654         case opx(ENF_CNOR):
 655                 if (*sp++ == arg) {
 656                         enprintf(("=>COR %x\n", *sp));
 657                         return (0);
 658                 }
 659                 break;
 660         case opx(ENF_CNAND):
 661                 if (*sp++ != arg) {
 662                         enprintf(("=>CNAND %x\n", *sp));
 663                         return (1);
 664                 }
 665                 break;
 666         }
 667         }
 668         enprintf(("=>%x\n", *sp));
 669         return (*sp);
 670 }