1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 
  27 /*
  28  * Description: logindmux.c
  29  *
  30  * The logindmux driver is used with login modules (like telmod/rlmod).
  31  * This is a 1x1 cloning mux and two of these muxes are used. The lower link
  32  * of one of the muxes receives input from net and the lower link of the
  33  * other mux receives input from pseudo terminal subsystem.
  34  *
  35  * The logdmux_qexch_lock mutex manages the race between LOGDMX_IOC_QEXCHANGE,
  36  * logdmuxunlink() and logdmuxclose(), so that the instance selected as a peer
  37  * in LOGDMX_IOC_QEXCHANGE cannot be unlinked or closed until the qexchange
  38  * is complete; see the inline comments in the code for details.
  39  *
  40  * The logdmux_peerq_lock mutex manages the race between logdmuxlwsrv() and
  41  * logdmuxlrput() (when null'ing tmxp->peerq during LOGDMUX_UNLINK_REQ
  42  * processing).
  43  *
  44  * The logdmux_minor_lock mutex serializes the growth of logdmux_minor_arena
  45  * (the arena is grown gradually rather than allocated all at once so that
  46  * minor numbers are recycled sooner; for simplicity it is never shrunk).
  47  *
  48  * The unlink operation is implemented using protocol messages that flow
  49  * between the two logindmux peer instances. The instance processing the
  50  * I_UNLINK ioctl will send a LOGDMUX_UNLINK_REQ protocol message to its
  51  * peer to indicate that it wishes to unlink; the peer will process this
  52  * message in its lrput, null its tmxp->peerq and then send a
  53  * LOGDMUX_UNLINK_RESP protocol message in reply to indicate that the
  54  * unlink can proceed; having received the reply in its lrput, the
  55  * instance processing the I_UNLINK can then continue. To ensure that only
  56  * one of the peer instances will be actively processing an I_UNLINK at
  57  * any one time, a single structure (an unlinkinfo_t containing a mutex,
  58  * state variable and pointer to an M_CTL mblk) is allocated during
  59  * the processing of the LOGDMX_IOC_QEXCHANGE ioctl. The two instances, if
  60  * trying to unlink simultaneously, will race to get control of this
  61  * structure which contains the resources necessary to process the
  62  * I_UNLINK. The instance that wins this race will be able to continue
  63  * with the unlink whilst the other instance will be obliged to wait.
  64  */
  65 
  66 #include <sys/types.h>
  67 #include <sys/param.h>
  68 #include <sys/errno.h>
  69 #include <sys/debug.h>
  70 #include <sys/stropts.h>
  71 #include <sys/stream.h>
  72 #include <sys/logindmux.h>
  73 #include <sys/logindmux_impl.h>
  74 #include <sys/stat.h>
  75 #include <sys/kmem.h>
  76 #include <sys/vmem.h>
  77 #include <sys/strsun.h>
  78 #include <sys/sysmacros.h>
  79 #include <sys/mkdev.h>
  80 #include <sys/ddi.h>
  81 #include <sys/sunddi.h>
  82 #include <sys/modctl.h>
  83 #include <sys/termios.h>
  84 #include <sys/cmn_err.h>
  85 
  86 static int logdmuxopen(queue_t *, dev_t *, int, int, cred_t *);
  87 static int logdmuxclose(queue_t *, int, cred_t *);
  88 static int logdmuxursrv(queue_t *);
  89 static int logdmuxuwput(queue_t *, mblk_t *);
  90 static int logdmuxlrput(queue_t *, mblk_t *);
  91 static int logdmuxlrsrv(queue_t *);
  92 static int logdmuxlwsrv(queue_t *);
  93 static int logdmuxuwsrv(queue_t *);
  94 static int logdmux_alloc_unlinkinfo(struct tmx *, struct tmx *);
  95 
  96 static void logdmuxlink(queue_t *, mblk_t *);
  97 static void logdmuxunlink(queue_t *, mblk_t *);
  98 static void logdmux_finish_unlink(queue_t *, mblk_t *);
  99 static void logdmux_unlink_timer(void *arg);
 100 static void recover(queue_t *, mblk_t *, size_t);
 101 static void flushq_dataonly(queue_t *);
 102 
 103 static kmutex_t logdmux_qexch_lock;
 104 static kmutex_t logdmux_peerq_lock;
 105 static kmutex_t logdmux_minor_lock;
 106 static minor_t  logdmux_maxminor = 256; /* grown as necessary */
 107 static vmem_t   *logdmux_minor_arena;
 108 static void     *logdmux_statep;
 109 
 110 static struct module_info logdmuxm_info = {
 111         LOGDMX_ID,
 112         "logindmux",
 113         0,
 114         256,
 115         512,
 116         256
 117 };
 118 
 119 static struct qinit logdmuxurinit = {
 120         NULL,
 121         logdmuxursrv,
 122         logdmuxopen,
 123         logdmuxclose,
 124         NULL,
 125         &logdmuxm_info
 126 };
 127 
 128 static struct qinit logdmuxuwinit = {
 129         logdmuxuwput,
 130         logdmuxuwsrv,
 131         NULL,
 132         NULL,
 133         NULL,
 134         &logdmuxm_info
 135 };
 136 
 137 static struct qinit logdmuxlrinit = {
 138         logdmuxlrput,
 139         logdmuxlrsrv,
 140         NULL,
 141         NULL,
 142         NULL,
 143         &logdmuxm_info
 144 };
 145 
 146 static struct qinit logdmuxlwinit = {
 147         NULL,
 148         logdmuxlwsrv,
 149         NULL,
 150         NULL,
 151         NULL,
 152         &logdmuxm_info
 153 };
 154 
 155 struct streamtab logdmuxinfo = {
 156         &logdmuxurinit,
 157         &logdmuxuwinit,
 158         &logdmuxlrinit,
 159         &logdmuxlwinit
 160 };
 161 
 162 static int logdmux_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 163 static int logdmux_attach(dev_info_t *, ddi_attach_cmd_t);
 164 static int logdmux_detach(dev_info_t *, ddi_detach_cmd_t);
 165 static dev_info_t *logdmux_dip;
 166 
 167 DDI_DEFINE_STREAM_OPS(logdmux_ops, nulldev, nulldev, logdmux_attach,
 168     logdmux_detach, nulldev, logdmux_info, D_MP | D_MTPERQ, &logdmuxinfo,
 169     ddi_quiesce_not_needed);
 170 
 171 static struct modldrv modldrv = {
 172         &mod_driverops,
 173         "logindmux driver",
 174         &logdmux_ops
 175 };
 176 
 177 static struct modlinkage modlinkage = {
 178         MODREV_1, &modldrv, NULL
 179 };
 180 
 181 int
 182 _init(void)
 183 {
 184         int     ret;
 185 
 186         mutex_init(&logdmux_peerq_lock, NULL, MUTEX_DRIVER, NULL);
 187         mutex_init(&logdmux_qexch_lock, NULL, MUTEX_DRIVER, NULL);
 188 
 189         if ((ret = mod_install(&modlinkage)) != 0) {
 190                 mutex_destroy(&logdmux_peerq_lock);
 191                 mutex_destroy(&logdmux_qexch_lock);
 192                 return (ret);
 193         }
 194 
 195         logdmux_minor_arena = vmem_create("logdmux_minor", (void *)1,
 196             logdmux_maxminor, 1, NULL, NULL, NULL, 0,
 197             VM_SLEEP | VMC_IDENTIFIER);
 198         (void) ddi_soft_state_init(&logdmux_statep, sizeof (struct tmx), 1);
 199 
 200         return (0);
 201 }
 202 
 203 int
 204 _fini(void)
 205 {
 206         int     ret;
 207 
 208         if ((ret = mod_remove(&modlinkage)) == 0) {
 209                 mutex_destroy(&logdmux_peerq_lock);
 210                 mutex_destroy(&logdmux_qexch_lock);
 211                 ddi_soft_state_fini(&logdmux_statep);
 212                 vmem_destroy(logdmux_minor_arena);
 213                 logdmux_minor_arena = NULL;
 214         }
 215 
 216         return (ret);
 217 }
 218 
 219 int
 220 _info(struct modinfo *modinfop)
 221 {
 222         return (mod_info(&modlinkage, modinfop));
 223 }
 224 
 225 static int
 226 logdmux_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 227 {
 228         if (cmd != DDI_ATTACH)
 229                 return (DDI_FAILURE);
 230 
 231         if (ddi_create_minor_node(devi, "logindmux", S_IFCHR, 0, DDI_PSEUDO,
 232             CLONE_DEV) == DDI_FAILURE)
 233                 return (DDI_FAILURE);
 234 
 235         logdmux_dip = devi;
 236         return (DDI_SUCCESS);
 237 }
 238 
 239 static int
 240 logdmux_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
 241 {
 242         if (cmd != DDI_DETACH)
 243                 return (DDI_FAILURE);
 244 
 245         ddi_remove_minor_node(devi, NULL);
 246         return (DDI_SUCCESS);
 247 }
 248 
 249 /* ARGSUSED */
 250 static int
 251 logdmux_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 252 {
 253         int error;
 254 
 255         switch (infocmd) {
 256         case DDI_INFO_DEVT2DEVINFO:
 257                 if (logdmux_dip == NULL) {
 258                         error = DDI_FAILURE;
 259                 } else {
 260                         *result = logdmux_dip;
 261                         error = DDI_SUCCESS;
 262                 }
 263                 break;
 264         case DDI_INFO_DEVT2INSTANCE:
 265                 *result = (void *)0;
 266                 error = DDI_SUCCESS;
 267                 break;
 268         default:
 269                 error = DDI_FAILURE;
 270         }
 271         return (error);
 272 }
 273 
 274 /*
 275  * Logindmux open routine
 276  */
 277 /*ARGSUSED*/
 278 static int
 279 logdmuxopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp)
 280 {
 281         struct  tmx *tmxp;
 282         minor_t minor, omaxminor;
 283 
 284         if (sflag != CLONEOPEN)
 285                 return (EINVAL);
 286 
 287         mutex_enter(&logdmux_minor_lock);
 288         if (vmem_size(logdmux_minor_arena, VMEM_FREE) == 0) {
 289                 /*
 290                  * The arena has been exhausted; grow by powers of two
 291                  * up to MAXMIN; bail if we've run out of minors.
 292                  */
 293                 if (logdmux_maxminor == MAXMIN) {
 294                         mutex_exit(&logdmux_minor_lock);
 295                         return (ENOMEM);
 296                 }
 297 
 298                 omaxminor = logdmux_maxminor;
 299                 logdmux_maxminor = MIN(logdmux_maxminor << 1, MAXMIN);
 300 
 301                 (void) vmem_add(logdmux_minor_arena,
 302                     (void *)(uintptr_t)(omaxminor + 1),
 303                     logdmux_maxminor - omaxminor, VM_SLEEP);
 304         }
 305         minor = (minor_t)(uintptr_t)
 306             vmem_alloc(logdmux_minor_arena, 1, VM_SLEEP);
 307         mutex_exit(&logdmux_minor_lock);
 308 
 309         if (ddi_soft_state_zalloc(logdmux_statep, minor) == DDI_FAILURE) {
 310                 vmem_free(logdmux_minor_arena, (void *)(uintptr_t)minor, 1);
 311                 return (ENOMEM);
 312         }
 313 
 314         tmxp = ddi_get_soft_state(logdmux_statep, minor);
 315         tmxp->rdq = q;
 316         tmxp->muxq = NULL;
 317         tmxp->peerq = NULL;
 318         tmxp->unlinkinfop = NULL;
 319         tmxp->dev0 = minor;
 320 
 321         *devp = makedevice(getmajor(*devp), tmxp->dev0);
 322         q->q_ptr = tmxp;
 323         WR(q)->q_ptr = tmxp;
 324 
 325         qprocson(q);
 326         return (0);
 327 }
 328 
 329 /*
 330  * Logindmux close routine gets called when telnet connection is closed
 331  */
 332 /*ARGSUSED*/
 333 static int
 334 logdmuxclose(queue_t *q, int flag, cred_t *crp)
 335 {
 336         struct tmx      *tmxp = q->q_ptr;
 337         minor_t         minor = tmxp->dev0;
 338 
 339         ASSERT(tmxp->muxq == NULL);
 340         ASSERT(tmxp->peerq == NULL);
 341 
 342         qprocsoff(q);
 343         if (tmxp->wbufcid != 0) {
 344                 qunbufcall(q, tmxp->wbufcid);
 345                 tmxp->wbufcid = 0;
 346         }
 347         if (tmxp->rbufcid != 0) {
 348                 qunbufcall(q, tmxp->rbufcid);
 349                 tmxp->rbufcid = 0;
 350         }
 351         if (tmxp->rtimoutid != 0) {
 352                 (void) quntimeout(q, tmxp->rtimoutid);
 353                 tmxp->rtimoutid = 0;
 354         }
 355         if (tmxp->wtimoutid != 0) {
 356                 (void) quntimeout(q, tmxp->wtimoutid);
 357                 tmxp->wtimoutid = 0;
 358         }
 359         if (tmxp->utimoutid != 0) {
 360                 (void) quntimeout(q, tmxp->utimoutid);
 361                 tmxp->utimoutid = 0;
 362         }
 363 
 364         /*
 365          * Hold logdmux_qexch_lock to prevent another thread that might be
 366          * in LOGDMX_IOC_QEXCHANGE from looking up our state while we're
 367          * disposing of it.
 368          */
 369         mutex_enter(&logdmux_qexch_lock);
 370         ddi_soft_state_free(logdmux_statep, minor);
 371         vmem_free(logdmux_minor_arena, (void *)(uintptr_t)minor, 1);
 372         mutex_exit(&logdmux_qexch_lock);
 373 
 374         q->q_ptr = NULL;
 375         WR(q)->q_ptr = NULL;
 376 
 377         return (0);
 378 }
 379 
 380 /*
 381  * Upper read service routine
 382  */
 383 static int
 384 logdmuxursrv(queue_t *q)
 385 {
 386         struct tmx *tmxp = q->q_ptr;
 387 
 388         if (tmxp->muxq != NULL)
 389                 qenable(RD(tmxp->muxq));
 390         return (0);
 391 }
 392 
 393 /*
 394  * This routine gets called when telnet daemon sends data or ioctl messages
 395  * to upper mux queue.
 396  */
 397 static int
 398 logdmuxuwput(queue_t *q, mblk_t *mp)
 399 {
 400         queue_t         *qp;
 401         mblk_t          *newmp;
 402         struct iocblk   *ioc;
 403         minor_t         minor;
 404         STRUCT_HANDLE(protocol_arg, protoh);
 405         struct tmx      *tmxp, *tmxpeerp;
 406         int             error;
 407 
 408         tmxp = q->q_ptr;
 409 
 410         switch (mp->b_datap->db_type) {
 411 
 412         case M_IOCTL:
 413                 ASSERT(MBLKL(mp) == sizeof (struct iocblk));
 414 
 415                 ioc = (struct iocblk *)mp->b_rptr;
 416                 switch (ioc->ioc_cmd) {
 417                 /*
 418                  * This is a special ioctl which exchanges q info
 419                  * of the two peers, connected to netf and ptmx.
 420                  */
 421                 case LOGDMX_IOC_QEXCHANGE:
 422                         error = miocpullup(mp,
 423                             SIZEOF_STRUCT(protocol_arg, ioc->ioc_flag));
 424                         if (error != 0) {
 425                                 miocnak(q, mp, 0, error);
 426                                 break;
 427                         }
 428                         STRUCT_SET_HANDLE(protoh, ioc->ioc_flag,
 429                             (struct protocol_arg *)mp->b_cont->b_rptr);
 430 #ifdef _SYSCALL32_IMPL
 431                         if ((ioc->ioc_flag & DATAMODEL_MASK) ==
 432                             DATAMODEL_ILP32) {
 433                                 minor = getminor(expldev(
 434                                     STRUCT_FGET(protoh, dev)));
 435                         } else
 436 #endif
 437                         {
 438                                 minor = getminor(STRUCT_FGET(protoh, dev));
 439                         }
 440 
 441                         /*
 442                          * The second argument to ddi_get_soft_state() is
 443                          * interpreted as an `int', so prohibit negative
 444                          * values.
 445                          */
 446                         if ((int)minor < 0) {
 447                                 miocnak(q, mp, 0, EINVAL);
 448                                 break;
 449                         }
 450 
 451                         /*
 452                          * We must hold logdmux_qexch_lock while looking up
 453                          * the proposed peer to prevent another thread from
 454                          * simultaneously I_UNLINKing or closing it.
 455                          */
 456                         mutex_enter(&logdmux_qexch_lock);
 457 
 458                         /*
 459                          * For LOGDMX_IOC_QEXCHANGE to succeed, our peer must
 460                          * exist (and not be us), and both we and our peer
 461                          * must be I_LINKed (i.e., muxq must not be NULL) and
 462                          * not already have a peer.
 463                          */
 464                         tmxpeerp = ddi_get_soft_state(logdmux_statep, minor);
 465                         if (tmxpeerp == NULL || tmxpeerp == tmxp ||
 466                             tmxpeerp->muxq == NULL || tmxpeerp->peerq != NULL ||
 467                             tmxp->muxq == NULL || tmxp->peerq != NULL) {
 468                                 mutex_exit(&logdmux_qexch_lock);
 469                                 miocnak(q, mp, 0, EINVAL);
 470                                 break;
 471                         }
 472 
 473                         /*
 474                          * If `flag' is set then exchange queues and assume
 475                          * tmxp refers to the ptmx stream.
 476                          */
 477                         if (STRUCT_FGET(protoh, flag)) {
 478                                 /*
 479                                  * Allocate and populate the structure we
 480                                  * need when processing an I_UNLINK ioctl.
 481                                  * Give both logindmux instances a pointer
 482                                  * to it from their tmx structure.
 483                                  */
 484                                 if ((error = logdmux_alloc_unlinkinfo(
 485                                     tmxp, tmxpeerp)) != 0) {
 486                                         mutex_exit(&logdmux_qexch_lock);
 487                                         miocnak(q, mp, 0, error);
 488                                         break;
 489                                 }
 490                                 tmxp->peerq = tmxpeerp->muxq;
 491                                 tmxpeerp->peerq = tmxp->muxq;
 492                                 tmxp->isptm = B_TRUE;
 493                         }
 494                         mutex_exit(&logdmux_qexch_lock);
 495                         miocack(q, mp, 0, 0);
 496                         break;
 497 
 498                 case I_LINK:
 499                         ASSERT(MBLKL(mp->b_cont) == sizeof (struct linkblk));
 500                         logdmuxlink(q, mp);
 501                         break;
 502 
 503                 case I_UNLINK:
 504                         ASSERT(MBLKL(mp->b_cont) == sizeof (struct linkblk));
 505                         logdmuxunlink(q, mp);
 506                         break;
 507 
 508                 default:
 509                         if (tmxp->muxq == NULL) {
 510                                 miocnak(q, mp, 0, EINVAL);
 511                                 return (0);
 512                         }
 513                         putnext(tmxp->muxq, mp);
 514                         break;
 515                 }
 516 
 517                 break;
 518 
 519         case M_DATA:
 520                 if (!tmxp->isptm) {
 521                         if ((newmp = allocb(sizeof (char), BPRI_MED)) == NULL) {
 522                                 recover(q, mp, sizeof (char));
 523                                 return (0);
 524                         }
 525                         newmp->b_datap->db_type = M_CTL;
 526                         *newmp->b_wptr++ = M_CTL_MAGIC_NUMBER;
 527                         newmp->b_cont = mp;
 528                         mp = newmp;
 529                 }
 530                 /* FALLTHRU */
 531 
 532         case M_PROTO:
 533         case M_PCPROTO:
 534                 qp = tmxp->muxq;
 535                 if (qp == NULL) {
 536                         merror(q, mp, EINVAL);
 537                         return (0);
 538                 }
 539 
 540                 if (queclass(mp) < QPCTL) {
 541                         if (q->q_first != NULL || !canputnext(qp)) {
 542                                 (void) putq(q, mp);
 543                                 return (0);
 544                         }
 545                 }
 546                 putnext(qp, mp);
 547                 break;
 548 
 549         case M_FLUSH:
 550                 if (*mp->b_rptr & FLUSHW)
 551                         flushq(q, FLUSHALL);
 552 
 553                 if (tmxp->muxq != NULL) {
 554                         putnext(tmxp->muxq, mp);
 555                         return (0);
 556                 }
 557 
 558                 *mp->b_rptr &= ~FLUSHW;
 559                 if (*mp->b_rptr & FLUSHR)
 560                         qreply(q, mp);
 561                 else
 562                         freemsg(mp);
 563                 break;
 564 
 565         default:
 566                 cmn_err(CE_NOTE, "logdmuxuwput: received unexpected message"
 567                     " of type 0x%x", mp->b_datap->db_type);
 568                 freemsg(mp);
 569         }
 570         return (0);
 571 }
 572 
 573 /*
 574  * Upper write service routine
 575  */
 576 static int
 577 logdmuxuwsrv(queue_t *q)
 578 {
 579         mblk_t          *mp, *newmp;
 580         queue_t         *qp;
 581         struct tmx      *tmxp = q->q_ptr;
 582 
 583         while ((mp = getq(q)) != NULL) {
 584                 switch (mp->b_datap->db_type) {
 585                 case M_DATA:
 586                         if (!tmxp->isptm) {
 587                                 if ((newmp = allocb(sizeof (char), BPRI_MED)) ==
 588                                     NULL) {
 589                                         recover(q, mp, sizeof (char));
 590                                         return (0);
 591                                 }
 592                                 newmp->b_datap->db_type = M_CTL;
 593                                 *newmp->b_wptr++ = M_CTL_MAGIC_NUMBER;
 594                                 newmp->b_cont = mp;
 595                                 mp = newmp;
 596                         }
 597                         /* FALLTHRU */
 598 
 599                 case M_CTL:
 600                 case M_PROTO:
 601                         if (tmxp->muxq == NULL) {
 602                                 merror(q, mp, EIO);
 603                                 break;
 604                         }
 605                         qp = tmxp->muxq;
 606                         if (!canputnext(qp)) {
 607                                 (void) putbq(q, mp);
 608                                 return (0);
 609                         }
 610                         putnext(qp, mp);
 611                         break;
 612 
 613 
 614                 default:
 615                         cmn_err(CE_NOTE, "logdmuxuwsrv: received unexpected"
 616                             " message of type 0x%x", mp->b_datap->db_type);
 617                         freemsg(mp);
 618                 }
 619         }
 620         return (0);
 621 }
 622 
 623 /*
 624  * Logindmux lower put routine detects from which of the two lower queues
 625  * the data needs to be read from and writes it out to its peer queue.
 626  * For protocol, it detects M_CTL and sends its data to the daemon. Also,
 627  * for ioctl and other types of messages, it lets the daemon handle it.
 628  */
 629 static int
 630 logdmuxlrput(queue_t *q, mblk_t *mp)
 631 {
 632         mblk_t          *savemp;
 633         queue_t         *qp;
 634         struct iocblk   *ioc;
 635         struct tmx      *tmxp = q->q_ptr;
 636         uchar_t         flush;
 637         uint_t          *messagep;
 638         unlinkinfo_t    *unlinkinfop = tmxp->unlinkinfop;
 639 
 640         if (tmxp->muxq == NULL || tmxp->peerq == NULL) {
 641                 freemsg(mp);
 642                 return (0);
 643         }
 644 
 645         /*
 646          * If there's already a message on our queue and the incoming
 647          * message is not of a high-priority, enqueue the message --
 648          * but not if it's a logindmux protocol message.
 649          */
 650         if ((q->q_first != NULL) && (queclass(mp) < QPCTL) &&
 651             (!LOGDMUX_PROTO_MBLK(mp))) {
 652                 (void) putq(q, mp);
 653                 return (0);
 654         }
 655 
 656         switch (mp->b_datap->db_type) {
 657 
 658         case M_IOCTL:
 659                 ioc = (struct iocblk *)mp->b_rptr;
 660                 switch (ioc->ioc_cmd) {
 661 
 662                 case TIOCSWINSZ:
 663                 case TCSETAF:
 664                 case TCSETSF:
 665                 case TCSETA:
 666                 case TCSETAW:
 667                 case TCSETS:
 668                 case TCSETSW:
 669                 case TCSBRK:
 670                 case TIOCSTI:
 671                         qp = tmxp->peerq;
 672                         break;
 673 
 674                 default:
 675                         cmn_err(CE_NOTE, "logdmuxlrput: received unexpected"
 676                             " request for ioctl 0x%x", ioc->ioc_cmd);
 677 
 678                         /* NAK unrecognized ioctl's. */
 679                         miocnak(q, mp, 0, 0);
 680                         return (0);
 681                 }
 682                 break;
 683 
 684         case M_DATA:
 685         case M_HANGUP:
 686                 qp = tmxp->peerq;
 687                 break;
 688 
 689         case M_CTL:
 690                 /*
 691                  * The protocol messages that flow between the peers
 692                  * to implement the unlink functionality are M_CTLs
 693                  * which have the M_IOCTL/I_UNLINK mblk of the ioctl
 694                  * attached via b_cont.  LOGDMUX_PROTO_MBLK() uses
 695                  * this to determine whether a particular M_CTL is a
 696                  * peer protocol message.
 697                  */
 698                 if (LOGDMUX_PROTO_MBLK(mp)) {
 699                         messagep = (uint_t *)mp->b_rptr;
 700 
 701                         switch (*messagep) {
 702 
 703                         case LOGDMUX_UNLINK_REQ:
 704                                 /*
 705                                  * We've received a message from our
 706                                  * peer indicating that it wants to
 707                                  * unlink.
 708                                  */
 709                                 *messagep = LOGDMUX_UNLINK_RESP;
 710                                 qp = tmxp->peerq;
 711 
 712                                 mutex_enter(&logdmux_peerq_lock);
 713                                 tmxp->peerq = NULL;
 714                                 mutex_exit(&logdmux_peerq_lock);
 715 
 716                                 put(RD(qp), mp);
 717                                 return (0);
 718 
 719                         case LOGDMUX_UNLINK_RESP:
 720                                 /*
 721                                  * We've received a positive response
 722                                  * from our peer to an earlier
 723                                  * LOGDMUX_UNLINK_REQ that we sent.
 724                                  * We can now carry on with the unlink.
 725                                  */
 726                                 qp = tmxp->rdq;
 727                                 mutex_enter(&unlinkinfop->state_lock);
 728                                 ASSERT(unlinkinfop->state ==
 729                                     LOGDMUX_UNLINK_PENDING);
 730                                 unlinkinfop->state = LOGDMUX_UNLINKED;
 731                                 mutex_exit(&unlinkinfop->state_lock);
 732                                 logdmux_finish_unlink(WR(qp), mp->b_cont);
 733                                 return (0);
 734                         }
 735                 }
 736 
 737                 qp = tmxp->rdq;
 738                 if (q->q_first != NULL || !canputnext(qp)) {
 739                         (void) putq(q, mp);
 740                         return (0);
 741                 }
 742                 if ((MBLKL(mp) == 1) && (*mp->b_rptr == M_CTL_MAGIC_NUMBER)) {
 743                         savemp = mp->b_cont;
 744                         freeb(mp);
 745                         mp = savemp;
 746                 }
 747                 putnext(qp, mp);
 748                 return (0);
 749 
 750         case M_IOCACK:
 751         case M_IOCNAK:
 752         case M_PROTO:
 753         case M_PCPROTO:
 754         case M_PCSIG:
 755         case M_SETOPTS:
 756                 qp = tmxp->rdq;
 757                 break;
 758 
 759         case M_ERROR:
 760                 if (tmxp->isptm) {
 761                         /*
 762                          * This error is from ptm.  We could tell TCP to
 763                          * shutdown the connection, but it's easier to just
 764                          * wait for the daemon to get SIGCHLD and close from
 765                          * above.
 766                          */
 767                         freemsg(mp);
 768                         return (0);
 769                 }
 770                 /*
 771                  * This is from TCP.  Don't really know why we'd
 772                  * get this, but we have a pretty good idea what
 773                  * to do:  Send M_HANGUP to the pty.
 774                  */
 775                 mp->b_datap->db_type = M_HANGUP;
 776                 mp->b_wptr = mp->b_rptr;
 777                 qp = tmxp->peerq;
 778                 break;
 779 
 780         case M_FLUSH:
 781                 if (*mp->b_rptr & FLUSHR)
 782                         flushq_dataonly(q);
 783 
 784                 if (mp->b_flag & MSGMARK) {
 785                         /*
 786                          * This M_FLUSH has been marked by the module
 787                          * below as intended for the upper queue,
 788                          * not the peer queue.
 789                          */
 790                         qp = tmxp->rdq;
 791                         mp->b_flag &= ~MSGMARK;
 792                 } else {
 793                         /*
 794                          * Wrap this M_FLUSH through the mux.
 795                          * The FLUSHR and FLUSHW bits must be
 796                          * reversed.
 797                          */
 798                         qp = tmxp->peerq;
 799                         flush = *mp->b_rptr;
 800                         *mp->b_rptr &= ~(FLUSHR | FLUSHW);
 801                         if (flush & FLUSHW)
 802                                 *mp->b_rptr |= FLUSHR;
 803                         if (flush & FLUSHR)
 804                                 *mp->b_rptr |= FLUSHW;
 805                 }
 806                 break;
 807 
 808         case M_START:
 809         case M_STOP:
 810         case M_STARTI:
 811         case M_STOPI:
 812                 freemsg(mp);
 813                 return (0);
 814 
 815         default:
 816                 cmn_err(CE_NOTE, "logdmuxlrput: received unexpected "
 817                     "message of type 0x%x", mp->b_datap->db_type);
 818                 freemsg(mp);
 819                 return (0);
 820         }
 821         if (queclass(mp) < QPCTL) {
 822                 if (q->q_first != NULL || !canputnext(qp)) {
 823                         (void) putq(q, mp);
 824                         return (0);
 825                 }
 826         }
 827         putnext(qp, mp);
 828         return (0);
 829 }
 830 
 831 /*
 832  * Lower read service routine
 833  */
 834 static int
 835 logdmuxlrsrv(queue_t *q)
 836 {
 837         mblk_t          *mp, *savemp;
 838         queue_t         *qp;
 839         struct iocblk   *ioc;
 840         struct tmx      *tmxp = q->q_ptr;
 841 
 842         while ((mp = getq(q)) != NULL) {
 843                 if (tmxp->muxq == NULL || tmxp->peerq == NULL) {
 844                         freemsg(mp);
 845                         continue;
 846                 }
 847 
 848                 switch (mp->b_datap->db_type) {
 849 
 850                 case M_IOCTL:
 851                         ioc = (struct iocblk *)mp->b_rptr;
 852 
 853                         switch (ioc->ioc_cmd) {
 854 
 855                         case TIOCSWINSZ:
 856                         case TCSETAF:
 857                         case TCSETSF:
 858                         case TCSETA:
 859                         case TCSETAW:
 860                         case TCSETS:
 861                         case TCSETSW:
 862                         case TCSBRK:
 863                         case TIOCSTI:
 864                                 qp = tmxp->peerq;
 865                                 break;
 866 
 867                         default:
 868                                 cmn_err(CE_NOTE, "logdmuxlrsrv: received "
 869                                     "unexpected request for ioctl 0x%x",
 870                                     ioc->ioc_cmd);
 871 
 872                                 /* NAK unrecognized ioctl's. */
 873                                 miocnak(q, mp, 0, 0);
 874                                 continue;
 875                         }
 876                         break;
 877 
 878                 case M_DATA:
 879                 case M_HANGUP:
 880                         qp = tmxp->peerq;
 881                         break;
 882 
 883                 case M_CTL:
 884                         qp = tmxp->rdq;
 885                         if (!canputnext(qp)) {
 886                                 (void) putbq(q, mp);
 887                                 return (0);
 888                         }
 889                         if (MBLKL(mp) == 1 &&
 890                             (*mp->b_rptr == M_CTL_MAGIC_NUMBER)) {
 891                                 savemp = mp->b_cont;
 892                                 freeb(mp);
 893                                 mp = savemp;
 894                         }
 895                         putnext(qp, mp);
 896                         continue;
 897 
 898                 case M_PROTO:
 899                 case M_SETOPTS:
 900                         qp = tmxp->rdq;
 901                         break;
 902 
 903                 default:
 904                         cmn_err(CE_NOTE, "logdmuxlrsrv: received unexpected "
 905                             "message of type 0x%x", mp->b_datap->db_type);
 906                         freemsg(mp);
 907                         continue;
 908                 }
 909                 ASSERT(queclass(mp) < QPCTL);
 910                 if (!canputnext(qp)) {
 911                         (void) putbq(q, mp);
 912                         return (0);
 913                 }
 914                 putnext(qp, mp);
 915         }
 916         return (0);
 917 }
 918 
 919 /*
 920  * Lower side write service procedure.  No messages are ever placed on
 921  * the write queue here, this just back-enables all of the upper side
 922  * write service procedures.
 923  */
 924 static int
 925 logdmuxlwsrv(queue_t *q)
 926 {
 927         struct tmx *tmxp = q->q_ptr;
 928 
 929         /*
 930          * Qenable upper write queue and find out which lower
 931          * queue needs to be restarted with flow control.
 932          * Qenable the peer queue so canputnext will
 933          * succeed on next call to logdmuxlrput.
 934          */
 935         qenable(WR(tmxp->rdq));
 936 
 937         mutex_enter(&logdmux_peerq_lock);
 938         if (tmxp->peerq != NULL)
 939                 qenable(RD(tmxp->peerq));
 940         mutex_exit(&logdmux_peerq_lock);
 941 
 942         return (0);
 943 }
 944 
 945 /*
 946  * This routine does I_LINK operation.
 947  */
 948 static void
 949 logdmuxlink(queue_t *q, mblk_t *mp)
 950 {
 951         struct tmx      *tmxp = q->q_ptr;
 952         struct linkblk  *lp = (struct linkblk *)mp->b_cont->b_rptr;
 953 
 954         /*
 955          * Fail if we're already linked.
 956          */
 957         if (tmxp->muxq != NULL) {
 958                 miocnak(q, mp, 0, EINVAL);
 959                 return;
 960         }
 961 
 962         tmxp->muxq = lp->l_qbot;
 963         tmxp->muxq->q_ptr = tmxp;
 964         RD(tmxp->muxq)->q_ptr = tmxp;
 965 
 966         miocack(q, mp, 0, 0);
 967 }
 968 
 969 /*
 970  * logdmuxunlink() is called from logdmuxuwput() and is the first of two
 971  * functions which process an I_UNLINK ioctl. logdmuxunlink() will determine
 972  * the state of logindmux peer linkage and, based on this, control when the
 973  * second function, logdmux_finish_unlink(), is called.  It's
 974  * logdmux_finish_unlink() that's sending the M_IOCACK upstream and
 975  * resetting the link state.
 976  */
 977 static void
 978 logdmuxunlink(queue_t *q, mblk_t *mp)
 979 {
 980         struct tmx      *tmxp = q->q_ptr;
 981         unlinkinfo_t    *unlinkinfop;
 982 
 983         /*
 984          * If we don't have a peer, just unlink.  Note that this check needs
 985          * to be done under logdmux_qexch_lock to prevent racing with
 986          * LOGDMX_IOC_QEXCHANGE, and we *must* set muxq to NULL prior to
 987          * releasing the lock so that LOGDMX_IOC_QEXCHANGE will not consider
 988          * us as a possible peer anymore (if it already considers us to be a
 989          * peer, then unlinkinfop will not be NULL) -- NULLing muxq precludes
 990          * use of logdmux_finish_unlink() here.
 991          */
 992         mutex_enter(&logdmux_qexch_lock);
 993         unlinkinfop = tmxp->unlinkinfop;
 994         if (unlinkinfop == NULL) {
 995                 ASSERT(tmxp->peerq == NULL);
 996                 tmxp->muxq = NULL;
 997                 mutex_exit(&logdmux_qexch_lock);
 998                 miocack(q, mp, 0, 0);
 999                 return;
1000         }
1001         mutex_exit(&logdmux_qexch_lock);
1002 
1003         mutex_enter(&unlinkinfop->state_lock);
1004 
1005         switch (unlinkinfop->state) {
1006 
1007         case LOGDMUX_LINKED:
1008                 /*
1009                  * We're the first instance to process an I_UNLINK --
1010                  * ie, the peer instance is still there. We'll change
1011                  * the state so that only one instance is executing an
1012                  * I_UNLINK at any one time.
1013                  */
1014                 unlinkinfop->state = LOGDMUX_UNLINK_PENDING;
1015                 mutex_exit(&unlinkinfop->state_lock);
1016                 /*
1017                  * Attach the original M_IOCTL message to a
1018                  * LOGDMUX_UNLINK_REQ message and send it to our peer to
1019                  * tell it to unlink from us. When it has completed the
1020                  * task, it will send us a LOGDMUX_UNLINK_RESP message
1021                  * with the original M_IOCTL still attached, which will be
1022                  * processed in our logdmuxlrput(). At that point, we will
1023                  * call logdmux_finish_unlink() to complete the unlink
1024                  * operation using the attached M_IOCTL.
1025                  */
1026                 unlinkinfop->prot_mp->b_cont = mp;
1027                 /*
1028                  * Put the M_CTL directly to the peer's lower RQ.
1029                  */
1030                 put(RD(tmxp->peerq), unlinkinfop->prot_mp);
1031                 break;
1032 
1033         case LOGDMUX_UNLINK_PENDING:
1034                 mutex_exit(&unlinkinfop->state_lock);
1035                 /*
1036                  * Our peer is actively processing an I_UNLINK itself.
1037                  * We have to wait for the peer to complete and we use
1038                  * qtimeout as a way to poll for its completion.
1039                  * We save a reference to our mblk so that we can send
1040                  * it upstream once our peer is done.
1041                  */
1042                 tmxp->unlink_mp = mp;
1043                 tmxp->utimoutid = qtimeout(q, logdmux_unlink_timer, q,
1044                     drv_usectohz(LOGDMUX_POLL_WAIT));
1045                 break;
1046 
1047         case LOGDMUX_UNLINKED:
1048                 /*
1049                  * Our peer is no longer linked so we can proceed.
1050                  */
1051                 mutex_exit(&unlinkinfop->state_lock);
1052                 mutex_destroy(&unlinkinfop->state_lock);
1053                 freeb(unlinkinfop->prot_mp);
1054                 kmem_free(unlinkinfop, sizeof (unlinkinfo_t));
1055                 logdmux_finish_unlink(q, mp);
1056                 break;
1057 
1058         default:
1059                 mutex_exit(&unlinkinfop->state_lock);
1060                 cmn_err(CE_PANIC,
1061                     "logdmuxunlink: peer linkage is in an unrecognized state");
1062                 break;
1063         }
1064 }
1065 
1066 /*
1067  * Finish the unlink operation.  Note that no locks should be held since
1068  * this routine calls into other queues.
1069  */
1070 static void
1071 logdmux_finish_unlink(queue_t *q, mblk_t *unlink_mp)
1072 {
1073         struct tmx *tmxp = q->q_ptr;
1074         mblk_t *mp;
1075 
1076         /*
1077          * Flush any write side data downstream.
1078          */
1079         while ((mp = getq(WR(q))) != NULL)
1080                 putnext(tmxp->muxq, mp);
1081 
1082         /*
1083          * Note that we do not NULL out q_ptr since another thread (e.g., a
1084          * STREAMS service thread) might call logdmuxlrput() between the time
1085          * we exit the logindmux perimeter and the time the STREAMS framework
1086          * resets q_ptr to stdata (since muxq is set to NULL, any messages
1087          * will just be discarded).
1088          */
1089         tmxp->muxq = NULL;
1090         tmxp->unlinkinfop = NULL;
1091         tmxp->peerq = NULL;
1092         miocack(q, unlink_mp, 0, 0);
1093 }
1094 
1095 /*
1096  * logdmux_unlink_timer() is executed by qtimeout(). This function will
1097  * check unlinkinfop->state to determine whether the peer has completed
1098  * its I_UNLINK. If it hasn't, we use qtimeout() to initiate another poll.
1099  */
1100 static void
1101 logdmux_unlink_timer(void *arg)
1102 {
1103         queue_t         *q = arg;
1104         struct  tmx     *tmxp = q->q_ptr;
1105         unlinkinfo_t    *unlinkinfop = tmxp->unlinkinfop;
1106 
1107         tmxp->utimoutid = 0;
1108 
1109         mutex_enter(&unlinkinfop->state_lock);
1110 
1111         if (unlinkinfop->state != LOGDMUX_UNLINKED) {
1112                 ASSERT(unlinkinfop->state == LOGDMUX_UNLINK_PENDING);
1113                 mutex_exit(&unlinkinfop->state_lock);
1114                 /*
1115                  * We need to wait longer for our peer to complete.
1116                  */
1117                 tmxp->utimoutid = qtimeout(q, logdmux_unlink_timer, q,
1118                     drv_usectohz(LOGDMUX_POLL_WAIT));
1119         } else {
1120                 /*
1121                  * Our peer is no longer linked so we can proceed with
1122                  * the cleanup.
1123                  */
1124                 mutex_exit(&unlinkinfop->state_lock);
1125                 mutex_destroy(&unlinkinfop->state_lock);
1126                 freeb(unlinkinfop->prot_mp);
1127                 kmem_free(unlinkinfop, sizeof (unlinkinfo_t));
1128                 logdmux_finish_unlink(q, tmxp->unlink_mp);
1129         }
1130 }
1131 
1132 static void
1133 logdmux_timer(void *arg)
1134 {
1135         queue_t         *q = arg;
1136         struct tmx      *tmxp = q->q_ptr;
1137 
1138         ASSERT(tmxp != NULL);
1139 
1140         if (q->q_flag & QREADR) {
1141                 ASSERT(tmxp->rtimoutid != 0);
1142                 tmxp->rtimoutid = 0;
1143         } else {
1144                 ASSERT(tmxp->wtimoutid != 0);
1145                 tmxp->wtimoutid = 0;
1146         }
1147         enableok(q);
1148         qenable(q);
1149 }
1150 
1151 static void
1152 logdmux_buffer(void *arg)
1153 {
1154         queue_t         *q = arg;
1155         struct tmx      *tmxp = q->q_ptr;
1156 
1157         ASSERT(tmxp != NULL);
1158 
1159         if (q->q_flag & QREADR) {
1160                 ASSERT(tmxp->rbufcid != 0);
1161                 tmxp->rbufcid = 0;
1162         } else {
1163                 ASSERT(tmxp->wbufcid != 0);
1164                 tmxp->wbufcid = 0;
1165         }
1166         enableok(q);
1167         qenable(q);
1168 }
1169 
1170 static void
1171 recover(queue_t *q, mblk_t *mp, size_t size)
1172 {
1173         timeout_id_t    tid;
1174         bufcall_id_t    bid;
1175         struct  tmx     *tmxp = q->q_ptr;
1176 
1177         /*
1178          * Avoid re-enabling the queue.
1179          */
1180         ASSERT(queclass(mp) < QPCTL);
1181         ASSERT(WR(q)->q_next == NULL); /* Called from upper queue only */
1182         noenable(q);
1183         (void) putbq(q, mp);
1184 
1185         /*
1186          * Make sure there is at most one outstanding request per queue.
1187          */
1188         if (q->q_flag & QREADR) {
1189                 if (tmxp->rtimoutid != 0 || tmxp->rbufcid != 0)
1190                         return;
1191         } else {
1192                 if (tmxp->wtimoutid != 0 || tmxp->wbufcid != 0)
1193                         return;
1194         }
1195         if (!(bid = qbufcall(RD(q), size, BPRI_MED, logdmux_buffer, q))) {
1196                 tid = qtimeout(RD(q), logdmux_timer, q, drv_usectohz(SIMWAIT));
1197                 if (q->q_flag & QREADR)
1198                         tmxp->rtimoutid = tid;
1199                 else
1200                         tmxp->wtimoutid = tid;
1201         } else  {
1202                 if (q->q_flag & QREADR)
1203                         tmxp->rbufcid = bid;
1204                 else
1205                         tmxp->wbufcid = bid;
1206         }
1207 }
1208 
1209 static void
1210 flushq_dataonly(queue_t *q)
1211 {
1212         mblk_t *mp, *nmp;
1213 
1214         /*
1215          * Since we are already in the perimeter, and we are not a put-shared
1216          * perimeter, we don't need to freeze the stream or anything to
1217          * be ensured of exclusivity.
1218          */
1219         mp = q->q_first;
1220         while (mp != NULL) {
1221                 if (mp->b_datap->db_type == M_DATA) {
1222                         nmp = mp->b_next;
1223                         rmvq(q, mp);
1224                         freemsg(mp);
1225                         mp = nmp;
1226                 } else {
1227                         mp = mp->b_next;
1228                 }
1229         }
1230 }
1231 
1232 /*
1233  * logdmux_alloc_unlinkinfo() is called from logdmuxuwput() during the
1234  * processing of a LOGDMX_IOC_QEXCHANGE ioctl() to allocate the
1235  * unlinkinfo_t which is needed during the processing of an I_UNLINK.
1236  */
1237 static int
1238 logdmux_alloc_unlinkinfo(struct tmx *t0, struct tmx *t1)
1239 {
1240         unlinkinfo_t    *p;
1241         uint_t          *messagep;
1242 
1243         if ((p = kmem_zalloc(sizeof (unlinkinfo_t), KM_NOSLEEP)) == NULL)
1244                 return (ENOSR);
1245 
1246         if ((p->prot_mp = allocb(sizeof (uint_t), BPRI_MED)) == NULL) {
1247                 kmem_free(p, sizeof (unlinkinfo_t));
1248                 return (ENOSR);
1249         }
1250 
1251         DB_TYPE(p->prot_mp) = M_CTL;
1252         messagep = (uint_t *)p->prot_mp->b_wptr;
1253         *messagep = LOGDMUX_UNLINK_REQ;
1254         p->prot_mp->b_wptr += sizeof (*messagep);
1255         p->state = LOGDMUX_LINKED;
1256         mutex_init(&p->state_lock, NULL, MUTEX_DRIVER, NULL);
1257 
1258         t0->unlinkinfop = t1->unlinkinfop = p;
1259 
1260         return (0);
1261 }