1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Standard module for handling DLPI Style 2 attach/detach
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/conf.h>
  32 #include <sys/modctl.h>
  33 #include <sys/cmn_err.h>
  34 #include <sys/sunddi.h>
  35 #include <sys/esunddi.h>
  36 #include <sys/strsubr.h>
  37 #include <sys/ddi.h>
  38 #include <sys/dlpi.h>
  39 #include <sys/strsun.h>
  40 #include <sys/policy.h>
  41 
  42 static struct streamtab drstab;
  43 
  44 static struct fmodsw fsw = {
  45         DRMODNAME,
  46         &drstab,
  47         D_MP
  48 };
  49 
  50 
  51 /*
  52  * Module linkage information for the kernel.
  53  */
  54 
  55 static struct modlstrmod modlstrmod = {
  56         &mod_strmodops, "dr compatibility for DLPI style 2 drivers", &fsw
  57 };
  58 
  59 
  60 static struct modlinkage modlinkage = {
  61         MODREV_1, &modlstrmod, NULL
  62 };
  63 
  64 
  65 int
  66 _init(void)
  67 {
  68         return (mod_install(&modlinkage));
  69 }
  70 
  71 int
  72 _fini(void)
  73 {
  74         return (mod_remove(&modlinkage));
  75 }
  76 
  77 int
  78 _info(struct modinfo *modinfop)
  79 {
  80         return (mod_info(&modlinkage, modinfop));
  81 }
  82 
  83 
  84 static int      dropen(queue_t *, dev_t *, int, int, cred_t *);
  85 static int      drclose(queue_t *, int, cred_t *);
  86 static int      drrput(queue_t *, mblk_t *);
  87 static int      drwput(queue_t *, mblk_t *);
  88 
  89 static struct module_info drinfo = {
  90         0,
  91         DRMODNAME,
  92         0,
  93         INFPSZ,
  94         1,
  95         0
  96 };
  97 
  98 static struct qinit drrinit = {
  99         (int (*)())drrput,
 100         NULL,
 101         dropen,
 102         drclose,
 103         NULL,
 104         &drinfo
 105 };
 106 
 107 static struct qinit drwinit = {
 108         (int (*)())drwput,
 109         NULL,
 110         NULL,
 111         NULL,
 112         NULL,
 113         &drinfo
 114 };
 115 
 116 static struct streamtab drstab = {
 117         &drrinit,
 118         &drwinit,
 119         NULL,
 120         NULL
 121 };
 122 
 123 /*
 124  * This module is pushed directly on top of the bottom driver
 125  * in a DLPI style-2 stream by stropen(). It intercepts
 126  * DL_ATTACH_REQ/DL_DETACH_REQ messages on the write side
 127  * and acks on the read side, calls qassociate where needed.
 128  * The primary purpose is to workaround a DR race condition
 129  * affecting non-DDI compliant DLPI style 2 drivers, which may
 130  * cause the system to panic.
 131  *
 132  * The following action is taken:
 133  * Write side (drwput):
 134  *      attach request: hold driver instance assuming ppa == instance.
 135  *              This way, the instance cannot be detached while the
 136  *              driver is processing DL_ATTACH_REQ.
 137  *
 138  *              On a successful hold, store the dip in a ring buffer
 139  *              to be processed lated by the read side.
 140  *              If hold fails (most likely ppa != instance), we store
 141  *              NULL in the ring buffer and read side won't take
 142  *              any action on ack.
 143  *
 144  * Read side (drrput):
 145  *      attach success: if (dip held on write side) associate queue with dip
 146  *      attach failure: if (dip held on write side) release hold on dip
 147  *      detach success: associate queue with NULL
 148  *      detach failure: do nothing
 149  *
 150  * The module assumes that incoming DL_ATTACH_REQ/DL_DETACH_REQ
 151  * messages are ordered (non-concurrent) and the bottom
 152  * driver processes them and sends acknowledgements in the same
 153  * order. This assumption is reasonable because concurrent
 154  * association results in non-deterministic queue behavior.
 155  * The module is coded carefully such that unordered messages
 156  * do not result in a system panic.
 157  *
 158  * The module handles multiple outstanding messages queued
 159  * in the bottom driver. Messages processed on the write side
 160  * but not yet arrived at read side are placed in the ring buffer
 161  * dr_dip[], between dr_nfirst and dr_nlast. The write side is
 162  * producer and the read side is the consumer. The buffer is full
 163  * when dr_nfirst == dr_nlast.
 164  *
 165  * The current size of the ring buffer is 64 (MAX_DLREQS) per stream.
 166  * During normal testing, we have not seen outstanding messages
 167  * above 10.
 168  */
 169 
 170 #define MAX_DLREQS      64
 171 #define INCR(x)         {(x)++; if ((x) >= MAX_DLREQS) (x) = 0; }
 172 
 173 struct drstate {
 174         kmutex_t dr_lock;
 175         major_t dr_major;
 176         int dr_nfirst;
 177         int dr_nlast;
 178         dev_info_t *dr_dip[MAX_DLREQS];
 179 };
 180 
 181 /* ARGSUSED1 */
 182 static int
 183 dropen(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *crp)
 184 {
 185         struct drstate *dsp;
 186 
 187         if (sflag != MODOPEN) { /* must be a pushed module */
 188                 return (EINVAL);
 189         }
 190 
 191         if (secpolicy_net_rawaccess(crp) != 0) {
 192                 return (EPERM);
 193         }
 194 
 195         if (q->q_ptr != NULL) {
 196                 return (0);     /* already open */
 197         }
 198 
 199         dsp = kmem_zalloc(sizeof (*dsp), KM_SLEEP);
 200         dsp->dr_major = getmajor(*devp);
 201         mutex_init(&dsp->dr_lock, NULL, MUTEX_DEFAULT, NULL);
 202         q->q_ptr = OTHERQ(q)->q_ptr = dsp;
 203         qprocson(q);
 204         ddi_assoc_queue_with_devi(q, NULL);
 205         return (0);
 206 }
 207 
 208 /* ARGSUSED1 */
 209 static int
 210 drclose(queue_t *q, int cflag, cred_t *crp)
 211 {
 212         struct drstate *dsp = q->q_ptr;
 213 
 214         ASSERT(dsp);
 215         ddi_assoc_queue_with_devi(q, NULL);
 216         qprocsoff(q);
 217 
 218         mutex_destroy(&dsp->dr_lock);
 219         kmem_free(dsp, sizeof (*dsp));
 220         q->q_ptr = NULL;
 221 
 222         return (0);
 223 }
 224 
 225 static int
 226 drrput(queue_t *q, mblk_t *mp)
 227 {
 228         struct drstate *dsp;
 229         union DL_primitives *dlp;
 230         dev_info_t *dip;
 231 
 232         switch (DB_TYPE(mp)) {
 233         case M_PROTO:
 234         case M_PCPROTO:
 235                 break;
 236         default:
 237                 putnext(q, mp);
 238                 return (0);
 239         }
 240 
 241         /* make sure size is sufficient for dl_primitive */
 242         if (MBLKL(mp) < sizeof (t_uscalar_t)) {
 243                 putnext(q, mp);
 244                 return (0);
 245         }
 246 
 247         dlp = (union DL_primitives *)mp->b_rptr;
 248         switch (dlp->dl_primitive) {
 249         case DL_OK_ACK: {
 250                 /* check for proper size, let upper layer deal with error */
 251                 if (MBLKL(mp) < DL_OK_ACK_SIZE) {
 252                         putnext(q, mp);
 253                         return (0);
 254                 }
 255 
 256                 dsp = q->q_ptr;
 257                 switch (dlp->ok_ack.dl_correct_primitive) {
 258                 case DL_ATTACH_REQ:
 259                         /*
 260                          * ddi_assoc_queue_with_devi() will hold dip,
 261                          * so release after association.
 262                          *
 263                          * dip is NULL means we didn't hold dip on read side.
 264                          * (unlikely, but possible), so we do nothing.
 265                          */
 266                         mutex_enter(&dsp->dr_lock);
 267                         dip = dsp->dr_dip[dsp->dr_nlast];
 268                         dsp->dr_dip[dsp->dr_nlast] = NULL;
 269                         INCR(dsp->dr_nlast);
 270                         mutex_exit(&dsp->dr_lock);
 271                         if (dip) {
 272                                 ddi_assoc_queue_with_devi(q, dip);
 273                                 ddi_release_devi(dip);
 274                         }
 275                         break;
 276 
 277                 case DL_DETACH_REQ:
 278                         ddi_assoc_queue_with_devi(q, NULL);
 279                         break;
 280                 default:
 281                         break;
 282                 }
 283                 break;
 284         }
 285         case DL_ERROR_ACK:
 286                 if (dlp->error_ack.dl_error_primitive != DL_ATTACH_REQ)
 287                         break;
 288 
 289                 dsp = q->q_ptr;
 290                 mutex_enter(&dsp->dr_lock);
 291                 dip = dsp->dr_dip[dsp->dr_nlast];
 292                 dsp->dr_dip[dsp->dr_nlast] = NULL;
 293                 INCR(dsp->dr_nlast);
 294                 mutex_exit(&dsp->dr_lock);
 295                 /*
 296                  * Release dip on attach failure
 297                  */
 298                 if (dip) {
 299                         ddi_release_devi(dip);
 300                 }
 301                 break;
 302         default:
 303                 break;
 304         }
 305 
 306         putnext(q, mp);
 307         return (0);
 308 }
 309 
 310 /*
 311  * Detect dl attach, hold the dip to prevent it from detaching
 312  */
 313 static int
 314 drwput(queue_t *q, mblk_t *mp)
 315 {
 316         struct drstate *dsp;
 317         union DL_primitives *dlp;
 318         dev_info_t *dip;
 319 
 320         switch (DB_TYPE(mp)) {
 321         case M_PROTO:
 322         case M_PCPROTO:
 323                 break;
 324         default:
 325                 putnext(q, mp);
 326                 return (0);
 327         }
 328 
 329         /* make sure size is sufficient for dl_primitive */
 330         if (MBLKL(mp) < sizeof (t_uscalar_t)) {
 331                 putnext(q, mp);
 332                 return (0);
 333         }
 334 
 335         dlp = (union DL_primitives *)mp->b_rptr;
 336         switch (dlp->dl_primitive) {
 337         case DL_ATTACH_REQ:
 338                 /*
 339                  * Check for proper size of the message.
 340                  *
 341                  * If size is correct, get the ppa and attempt to
 342                  * hold the device assuming ppa is instance.
 343                  *
 344                  * If size is wrong, we can't get the ppa, but
 345                  * still increment dr_nfirst because the read side
 346                  * will get a error ack on DL_ATTACH_REQ.
 347                  */
 348                 dip = NULL;
 349                 dsp = q->q_ptr;
 350                 if (MBLKL(mp) >= DL_OK_ACK_SIZE) {
 351                         dip = ddi_hold_devi_by_instance(dsp->dr_major,
 352                             dlp->attach_req.dl_ppa, E_DDI_HOLD_DEVI_NOATTACH);
 353                 }
 354 
 355                 mutex_enter(&dsp->dr_lock);
 356                 dsp->dr_dip[dsp->dr_nfirst] = dip;
 357                 INCR(dsp->dr_nfirst);
 358                 /*
 359                  * Check if ring buffer is full. If so, assert in debug
 360                  * kernel and produce a warning in non-debug kernel.
 361                  */
 362                 ASSERT(dsp->dr_nfirst != dsp->dr_nlast);
 363                 if (dsp->dr_nfirst == dsp->dr_nlast) {
 364                         cmn_err(CE_WARN, "drcompat: internal buffer full");
 365                 }
 366                 mutex_exit(&dsp->dr_lock);
 367                 break;
 368         default:
 369                 break;
 370         }
 371 
 372         putnext(q, mp);
 373         return (0);
 374 }