1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * STREAMS Packet Filter Module
  28  *
  29  * This module applies a filter to messages arriving on its read
  30  * queue, passing on messages that the filter accepts adn discarding
  31  * the others.  It supports ioctls for setting the filter.
  32  *
  33  * On the write side, the module simply passes everything through
  34  * unchanged.
  35  *
  36  * Based on SunOS 4.x version.  This version has minor changes:
  37  *      - general SVR4 porting stuff
  38  *      - change name and prefixes from "nit" buffer to streams buffer
  39  *      - multithreading assumes configured as D_MTQPAIR
  40  */
  41 
  42 #include <sys/types.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/errno.h>
  45 #include <sys/debug.h>
  46 #include <sys/time.h>
  47 #include <sys/stropts.h>
  48 #include <sys/stream.h>
  49 #include <sys/conf.h>
  50 #include <sys/ddi.h>
  51 #include <sys/sunddi.h>
  52 #include <sys/kmem.h>
  53 #include <sys/strsun.h>
  54 #include <sys/pfmod.h>
  55 #include <sys/modctl.h>
  56 
  57 /*
  58  * Expanded version of the Packetfilt structure that includes
  59  * some additional fields that aid filter execution efficiency.
  60  */
  61 struct epacketfilt {
  62         struct Pf_ext_packetfilt        pf;
  63 #define pf_Priority     pf.Pf_Priority
  64 #define pf_FilterLen    pf.Pf_FilterLen
  65 #define pf_Filter       pf.Pf_Filter
  66         /* pointer to word immediately past end of filter */
  67         ushort_t                *pf_FilterEnd;
  68         /* length in bytes of packet prefix the filter examines */
  69         ushort_t                pf_PByteLen;
  70 };
  71 
  72 /*
  73  * (Internal) packet descriptor for FilterPacket
  74  */
  75 struct packdesc {
  76         ushort_t        *pd_hdr;        /* header starting address */
  77         uint_t          pd_hdrlen;      /* header length in shorts */
  78         ushort_t        *pd_body;       /* body starting address */
  79         uint_t          pd_bodylen;     /* body length in shorts */
  80 };
  81 
  82 
  83 /*
  84  * Function prototypes.
  85  */
  86 static  int     pfopen(queue_t *, dev_t *, int, int, cred_t *);
  87 static  int     pfclose(queue_t *);
  88 static void     pfioctl(queue_t *wq, mblk_t *mp);
  89 static  int     FilterPacket(struct packdesc *, struct epacketfilt *);
  90 /*
  91  * To save instructions, since STREAMS ignores the return value
  92  * from these functions, they are defined as void here. Kind of icky, but...
  93  */
  94 static void     pfwput(queue_t *, mblk_t *);
  95 static void     pfrput(queue_t *, mblk_t *);
  96 
  97 static struct module_info pf_minfo = {
  98         22,             /* mi_idnum */
  99         "pfmod",        /* mi_idname */
 100         0,              /* mi_minpsz */
 101         INFPSZ,         /* mi_maxpsz */
 102         0,              /* mi_hiwat */
 103         0               /* mi_lowat */
 104 };
 105 
 106 static struct qinit pf_rinit = {
 107         (int (*)())pfrput,      /* qi_putp */
 108         NULL,
 109         pfopen,                 /* qi_qopen */
 110         pfclose,                /* qi_qclose */
 111         NULL,                   /* qi_qadmin */
 112         &pf_minfo,          /* qi_minfo */
 113         NULL                    /* qi_mstat */
 114 };
 115 
 116 static struct qinit pf_winit = {
 117         (int (*)())pfwput,      /* qi_putp */
 118         NULL,                   /* qi_srvp */
 119         NULL,                   /* qi_qopen */
 120         NULL,                   /* qi_qclose */
 121         NULL,                   /* qi_qadmin */
 122         &pf_minfo,          /* qi_minfo */
 123         NULL                    /* qi_mstat */
 124 };
 125 
 126 static struct streamtab pf_info = {
 127         &pf_rinit,  /* st_rdinit */
 128         &pf_winit,  /* st_wrinit */
 129         NULL,           /* st_muxrinit */
 130         NULL            /* st_muxwinit */
 131 };
 132 
 133 static struct fmodsw fsw = {
 134         "pfmod",
 135         &pf_info,
 136         D_MTQPAIR | D_MP
 137 };
 138 
 139 static struct modlstrmod modlstrmod = {
 140         &mod_strmodops, "streams packet filter module", &fsw
 141 };
 142 
 143 static struct modlinkage modlinkage = {
 144         MODREV_1, { &modlstrmod, NULL }
 145 };
 146 
 147 int
 148 _init(void)
 149 {
 150         return (mod_install(&modlinkage));
 151 }
 152 
 153 int
 154 _fini(void)
 155 {
 156         return (mod_remove(&modlinkage));
 157 }
 158 
 159 int
 160 _info(struct modinfo *modinfop)
 161 {
 162         return (mod_info(&modlinkage, modinfop));
 163 }
 164 
 165 /*ARGSUSED*/
 166 static int
 167 pfopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
 168 {
 169         struct epacketfilt      *pfp;
 170 
 171         ASSERT(rq);
 172 
 173         if (sflag != MODOPEN)
 174                 return (EINVAL);
 175 
 176         if (rq->q_ptr)
 177                 return (0);
 178 
 179         /*
 180          * Allocate and initialize per-Stream structure.
 181          */
 182         pfp = kmem_alloc(sizeof (struct epacketfilt), KM_SLEEP);
 183         rq->q_ptr = WR(rq)->q_ptr = (char *)pfp;
 184 
 185         qprocson(rq);
 186 
 187         return (0);
 188 }
 189 
 190 static int
 191 pfclose(queue_t *rq)
 192 {
 193         struct  epacketfilt     *pfp = (struct epacketfilt *)rq->q_ptr;
 194 
 195         ASSERT(pfp);
 196 
 197         qprocsoff(rq);
 198 
 199         kmem_free(pfp, sizeof (struct epacketfilt));
 200         rq->q_ptr = WR(rq)->q_ptr = NULL;
 201 
 202         return (0);
 203 }
 204 
 205 /*
 206  * Write-side put procedure.  Its main task is to detect ioctls.
 207  * Other message types are passed on through.
 208  */
 209 static void
 210 pfwput(queue_t *wq, mblk_t *mp)
 211 {
 212         switch (mp->b_datap->db_type) {
 213         case M_IOCTL:
 214                 pfioctl(wq, mp);
 215                 break;
 216 
 217         default:
 218                 putnext(wq, mp);
 219                 break;
 220         }
 221 }
 222 
 223 /*
 224  * Read-side put procedure.  It's responsible for applying the
 225  * packet filter and passing upstream message on or discarding it
 226  * depending upon the results.
 227  *
 228  * Upstream messages can start with zero or more M_PROTO mblks
 229  * which are skipped over before executing the packet filter
 230  * on any remaining M_DATA mblks.
 231  */
 232 static void
 233 pfrput(queue_t *rq, mblk_t *mp)
 234 {
 235         struct  epacketfilt     *pfp = (struct epacketfilt *)rq->q_ptr;
 236         mblk_t  *mbp, *mpp;
 237         struct  packdesc        pd;
 238         int     need;
 239 
 240         ASSERT(pfp);
 241 
 242         switch (DB_TYPE(mp)) {
 243         case M_PROTO:
 244         case M_DATA:
 245                 /*
 246                  * Skip over protocol information and find the start
 247                  * of the message body, saving the overall message
 248                  * start in mpp.
 249                  */
 250                 for (mpp = mp; mp && (DB_TYPE(mp) == M_PROTO); mp = mp->b_cont)
 251                         ;
 252 
 253                 /*
 254                  * Null body (exclusive of M_PROTO blocks) ==> accept.
 255                  * Note that a null body is not the same as an empty body.
 256                  */
 257                 if (mp == NULL) {
 258                         putnext(rq, mpp);
 259                         break;
 260                 }
 261 
 262                 /*
 263                  * Pull the packet up to the length required by
 264                  * the filter.  Note that doing so destroys sharing
 265                  * relationships, which is unfortunate, since the
 266                  * results of pulling up here are likely to be useful
 267                  * for shared messages applied to a filter on a sibling
 268                  * stream.
 269                  *
 270                  * Most packet sources will provide the packet in two
 271                  * logical pieces: an initial header in a single mblk,
 272                  * and a body in a sequence of mblks hooked to the
 273                  * header.  We're prepared to deal with variant forms,
 274                  * but in any case, the pullup applies only to the body
 275                  * part.
 276                  */
 277                 mbp = mp->b_cont;
 278                 need = pfp->pf_PByteLen;
 279                 if (mbp && (MBLKL(mbp) < need)) {
 280                         int len = msgdsize(mbp);
 281 
 282                         /* XXX discard silently on pullupmsg failure */
 283                         if (pullupmsg(mbp, MIN(need, len)) == 0) {
 284                                 freemsg(mpp);
 285                                 break;
 286                         }
 287                 }
 288 
 289                 /*
 290                  * Misalignment (not on short boundary) ==> reject.
 291                  */
 292                 if (((uintptr_t)mp->b_rptr & (sizeof (ushort_t) - 1)) ||
 293                     (mbp != NULL &&
 294                     ((uintptr_t)mbp->b_rptr & (sizeof (ushort_t) - 1)))) {
 295                         freemsg(mpp);
 296                         break;
 297                 }
 298 
 299                 /*
 300                  * These assignments are distasteful, but necessary,
 301                  * since the packet filter wants to work in terms of
 302                  * shorts.  Odd bytes at the end of header or data can't
 303                  * participate in the filtering operation.
 304                  */
 305                 pd.pd_hdr = (ushort_t *)mp->b_rptr;
 306                 pd.pd_hdrlen = (mp->b_wptr - mp->b_rptr) / sizeof (ushort_t);
 307                 if (mbp) {
 308                         pd.pd_body = (ushort_t *)mbp->b_rptr;
 309                         pd.pd_bodylen = (mbp->b_wptr - mbp->b_rptr) /
 310                                                         sizeof (ushort_t);
 311                 } else {
 312                         pd.pd_body = NULL;
 313                         pd.pd_bodylen = 0;
 314                 }
 315 
 316                 /*
 317                  * Apply the filter.
 318                  */
 319                 if (FilterPacket(&pd, pfp))
 320                         putnext(rq, mpp);
 321                 else
 322                         freemsg(mpp);
 323 
 324                 break;
 325 
 326         default:
 327                 putnext(rq, mp);
 328                 break;
 329         }
 330 
 331 }
 332 
 333 /*
 334  * Handle write-side M_IOCTL messages.
 335  */
 336 static void
 337 pfioctl(queue_t *wq, mblk_t *mp)
 338 {
 339         struct  epacketfilt     *pfp = (struct epacketfilt *)wq->q_ptr;
 340         struct  Pf_ext_packetfilt       *upfp;
 341         struct  packetfilt      *opfp;
 342         ushort_t        *fwp;
 343         int     arg;
 344         int     maxoff = 0;
 345         int     maxoffreg = 0;
 346         struct iocblk   *iocp = (struct iocblk *)mp->b_rptr;
 347         int     error;
 348 
 349         switch (iocp->ioc_cmd) {
 350         case PFIOCSETF:
 351                 /*
 352                  * Verify argument length. Since the size of packet filter
 353                  * got increased (ENMAXFILTERS was bumped up to 2047), to
 354                  * maintain backwards binary compatibility, we need to
 355                  * check for both possible sizes.
 356                  */
 357                 switch (iocp->ioc_count) {
 358                 case sizeof (struct Pf_ext_packetfilt):
 359                         error = miocpullup(mp,
 360                             sizeof (struct Pf_ext_packetfilt));
 361                         if (error != 0) {
 362                                 miocnak(wq, mp, 0, error);
 363                                 return;
 364                         }
 365                         upfp = (struct Pf_ext_packetfilt *)mp->b_cont->b_rptr;
 366                         if (upfp->Pf_FilterLen > PF_MAXFILTERS) {
 367                                 miocnak(wq, mp, 0, EINVAL);
 368                                 return;
 369                         }
 370 
 371                         bcopy(upfp, pfp, sizeof (struct Pf_ext_packetfilt));
 372                         pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
 373                         break;
 374 
 375                 case sizeof (struct packetfilt):
 376                         error = miocpullup(mp, sizeof (struct packetfilt));
 377                         if (error != 0) {
 378                                 miocnak(wq, mp, 0, error);
 379                                 return;
 380                         }
 381                         opfp = (struct packetfilt *)mp->b_cont->b_rptr;
 382                         /* this strange comparison keeps gcc from complaining */
 383                         if (opfp->Pf_FilterLen - 1 >= ENMAXFILTERS) {
 384                                 miocnak(wq, mp, 0, EINVAL);
 385                                 return;
 386                         }
 387 
 388                         pfp->pf.Pf_Priority = opfp->Pf_Priority;
 389                         pfp->pf.Pf_FilterLen = (unsigned int)opfp->Pf_FilterLen;
 390 
 391                         bcopy(opfp->Pf_Filter, pfp->pf.Pf_Filter,
 392                             sizeof (opfp->Pf_Filter));
 393                         pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
 394                         break;
 395 
 396                 default:
 397                         miocnak(wq, mp, 0, EINVAL);
 398                         return;
 399                 }
 400 
 401                 /*
 402                  * Find and record maximum byte offset that the
 403                  * filter users.  We use this when executing the
 404                  * filter to determine how much of the packet
 405                  * body to pull up.  This code depends on the
 406                  * filter encoding.
 407                  */
 408                 for (fwp = pfp->pf_Filter; fwp < pfp->pf_FilterEnd; fwp++) {
 409                         arg = *fwp & ((1 << ENF_NBPA) - 1);
 410                         switch (arg) {
 411                         default:
 412                                 if ((arg -= ENF_PUSHWORD) > maxoff)
 413                                         maxoff = arg;
 414                                 break;
 415 
 416                         case ENF_LOAD_OFFSET:
 417                                 /* Point to the offset */
 418                                 fwp++;
 419                                 if (*fwp > maxoffreg)
 420                                         maxoffreg = *fwp;
 421                                 break;
 422 
 423                         case ENF_PUSHLIT:
 424                         case ENF_BRTR:
 425                         case ENF_BRFL:
 426                                 /* Skip over the literal. */
 427                                 fwp++;
 428                                 break;
 429 
 430                         case ENF_PUSHZERO:
 431                         case ENF_PUSHONE:
 432                         case ENF_PUSHFFFF:
 433                         case ENF_PUSHFF00:
 434                         case ENF_PUSH00FF:
 435                         case ENF_NOPUSH:
 436                         case ENF_POP:
 437                                 break;
 438                         }
 439                 }
 440 
 441                 /*
 442                  * Convert word offset to length in bytes.
 443                  */
 444                 pfp->pf_PByteLen = (maxoff + maxoffreg + 1) * sizeof (ushort_t);
 445                 miocack(wq, mp, 0, 0);
 446                 break;
 447 
 448         default:
 449                 putnext(wq, mp);
 450                 break;
 451         }
 452 }
 453 
 454 /* #define      DEBUG   1 */
 455 /* #define      INNERDEBUG      1 */
 456 
 457 #ifdef  INNERDEBUG
 458 #define enprintf(a)     printf a
 459 #else
 460 #define enprintf(a)
 461 #endif
 462 
 463 /*
 464  * Apply the packet filter given by pfp to the packet given by
 465  * pp.  Return nonzero iff the filter accepts the packet.
 466  *
 467  * The packet comes in two pieces, a header and a body, since
 468  * that's the most convenient form for our caller.  The header
 469  * is in contiguous memory, whereas the body is in a mbuf.
 470  * Our caller will have adjusted the mbuf chain so that its first
 471  * min(MLEN, length(body)) bytes are guaranteed contiguous.  For
 472  * the sake of efficiency (and some laziness) the filter is prepared
 473  * to examine only these two contiguous pieces.  Furthermore, it
 474  * assumes that the header length is even, so that there's no need
 475  * to glue the last byte of header to the first byte of data.
 476  */
 477 
 478 #define opx(i)  ((i) >> ENF_NBPA)
 479 
 480 static int
 481 FilterPacket(struct packdesc *pp, struct epacketfilt *pfp)
 482 {
 483         int             maxhdr = pp->pd_hdrlen;
 484         int             maxword = maxhdr + pp->pd_bodylen;
 485         ushort_t        *sp;
 486         ushort_t        *fp;
 487         ushort_t        *fpe;
 488         unsigned        op;
 489         unsigned        arg;
 490         unsigned        offreg = 0;
 491         ushort_t        stack[ENMAXFILTERS+1];
 492 
 493         fp = &pfp->pf_Filter[0];
 494         fpe = pfp->pf_FilterEnd;
 495 
 496         enprintf(("FilterPacket(%p, %p, %p, %p):\n", pp, pfp, fp, fpe));
 497 
 498         /*
 499          * Push TRUE on stack to start.  The stack size is chosen such
 500          * that overflow can't occur -- each operation can push at most
 501          * one item on the stack, and the stack size equals the maximum
 502          * program length.
 503          */
 504         sp = &stack[ENMAXFILTERS];
 505         *sp = 1;
 506 
 507         while (fp < fpe) {
 508         op = *fp >> ENF_NBPA;
 509         arg = *fp & ((1 << ENF_NBPA) - 1);
 510         fp++;
 511 
 512         switch (arg) {
 513         default:
 514                 arg -= ENF_PUSHWORD;
 515                 /*
 516                  * Since arg is unsigned,
 517                  * if it were less than ENF_PUSHWORD before,
 518                  * it would now be huge.
 519                  */
 520                 if (arg + offreg < maxhdr)
 521                         *--sp = pp->pd_hdr[arg + offreg];
 522                 else if (arg + offreg < maxword)
 523                         *--sp = pp->pd_body[arg - maxhdr + offreg];
 524                 else {
 525                         enprintf(("=>0(len)\n"));
 526                         return (0);
 527                 }
 528                 break;
 529         case ENF_PUSHLIT:
 530                 *--sp = *fp++;
 531                 break;
 532         case ENF_PUSHZERO:
 533                 *--sp = 0;
 534                 break;
 535         case ENF_PUSHONE:
 536                 *--sp = 1;
 537                 break;
 538         case ENF_PUSHFFFF:
 539                 *--sp = 0xffff;
 540                 break;
 541         case ENF_PUSHFF00:
 542                 *--sp = 0xff00;
 543                 break;
 544         case ENF_PUSH00FF:
 545                 *--sp = 0x00ff;
 546                 break;
 547         case ENF_LOAD_OFFSET:
 548                 offreg = *fp++;
 549                 break;
 550         case ENF_BRTR:
 551                 if (*sp != 0)
 552                         fp += *fp;
 553                 else
 554                         fp++;
 555                 if (fp >= fpe) {
 556                         enprintf(("BRTR: fp>=fpe\n"));
 557                         return (0);
 558                 }
 559                 break;
 560         case ENF_BRFL:
 561                 if (*sp == 0)
 562                         fp += *fp;
 563                 else
 564                         fp++;
 565                 if (fp >= fpe) {
 566                         enprintf(("BRFL: fp>=fpe\n"));
 567                         return (0);
 568                 }
 569                 break;
 570         case ENF_POP:
 571                 ++sp;
 572                 if (sp > &stack[ENMAXFILTERS]) {
 573                         enprintf(("stack underflow\n"));
 574                         return (0);
 575                 }
 576                 break;
 577         case ENF_NOPUSH:
 578                 break;
 579         }
 580 
 581         if (sp < &stack[2]) {    /* check stack overflow: small yellow zone */
 582                 enprintf(("=>0(--sp)\n"));
 583                 return (0);
 584         }
 585 
 586         if (op == ENF_NOP)
 587                 continue;
 588 
 589         /*
 590          * all non-NOP operators binary, must have at least two operands
 591          * on stack to evaluate.
 592          */
 593         if (sp > &stack[ENMAXFILTERS-2]) {
 594                 enprintf(("=>0(sp++)\n"));
 595                 return (0);
 596         }
 597 
 598         arg = *sp++;
 599         switch (op) {
 600         default:
 601                 enprintf(("=>0(def)\n"));
 602                 return (0);
 603         case opx(ENF_AND):
 604                 *sp &= arg;
 605                 break;
 606         case opx(ENF_OR):
 607                 *sp |= arg;
 608                 break;
 609         case opx(ENF_XOR):
 610                 *sp ^= arg;
 611                 break;
 612         case opx(ENF_EQ):
 613                 *sp = (*sp == arg);
 614                 break;
 615         case opx(ENF_NEQ):
 616                 *sp = (*sp != arg);
 617                 break;
 618         case opx(ENF_LT):
 619                 *sp = (*sp < arg);
 620                 break;
 621         case opx(ENF_LE):
 622                 *sp = (*sp <= arg);
 623                 break;
 624         case opx(ENF_GT):
 625                 *sp = (*sp > arg);
 626                 break;
 627         case opx(ENF_GE):
 628                 *sp = (*sp >= arg);
 629                 break;
 630 
 631         /* short-circuit operators */
 632 
 633         case opx(ENF_COR):
 634                 if (*sp++ == arg) {
 635                         enprintf(("=>COR %x\n", *sp));
 636                         return (1);
 637                 }
 638                 break;
 639         case opx(ENF_CAND):
 640                 if (*sp++ != arg) {
 641                         enprintf(("=>CAND %x\n", *sp));
 642                         return (0);
 643                 }
 644                 break;
 645         case opx(ENF_CNOR):
 646                 if (*sp++ == arg) {
 647                         enprintf(("=>COR %x\n", *sp));
 648                         return (0);
 649                 }
 650                 break;
 651         case opx(ENF_CNAND):
 652                 if (*sp++ != arg) {
 653                         enprintf(("=>CNAND %x\n", *sp));
 654                         return (1);
 655                 }
 656                 break;
 657         }
 658         }
 659         enprintf(("=>%x\n", *sp));
 660         return (*sp);
 661 }