1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 
  27 #include <sys/types.h>
  28 #include <sys/stat.h>
  29 #include <sys/conf.h>
  30 #include <sys/ddi.h>
  31 #include <sys/sunddi.h>
  32 #include <sys/modctl.h>
  33 #include <inet/ip.h>
  34 #include <sys/ib/clients/rds/rdsib_ib.h>
  35 #include <sys/ib/clients/rds/rdsib_buf.h>
  36 #include <sys/ib/clients/rds/rdsib_cm.h>
  37 #include <sys/ib/clients/rds/rdsib_protocol.h>
  38 #include <sys/ib/clients/rds/rds_transport.h>
  39 #include <sys/ib/clients/rds/rds_kstat.h>
  40 
  41 /*
  42  * Global Configuration Variables
  43  * As defined in RDS proposal
  44  */
  45 uint_t          MaxNodes                = RDS_MAX_NODES;
  46 uint_t          RdsPktSize;
  47 uint_t          NDataRX;
  48 uint_t          MaxDataSendBuffers      = RDS_MAX_DATA_SEND_BUFFERS;
  49 uint_t          MaxDataRecvBuffers      = RDS_MAX_DATA_RECV_BUFFERS;
  50 uint_t          MaxCtrlSendBuffers      = RDS_MAX_CTRL_SEND_BUFFERS;
  51 uint_t          MaxCtrlRecvBuffers      = RDS_MAX_CTRL_RECV_BUFFERS;
  52 uint_t          DataRecvBufferLWM       = RDS_DATA_RECV_BUFFER_LWM;
  53 uint_t          CtrlRecvBufferLWM       = RDS_CTRL_RECV_BUFFER_LWM;
  54 uint_t          PendingRxPktsHWM        = RDS_PENDING_RX_PKTS_HWM;
  55 uint_t          MinRnrRetry             = RDS_IB_RNR_RETRY;
  56 uint8_t         IBPathRetryCount        = RDS_IB_PATH_RETRY;
  57 uint8_t         IBPktLifeTime           = RDS_IB_PKT_LT;
  58 
  59 extern int rdsib_open_ib();
  60 extern void rdsib_close_ib();
  61 extern void rds_resume_port(in_port_t port);
  62 extern int rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip,
  63     in_port_t sendport, in_port_t recvport, zoneid_t zoneid);
  64 extern boolean_t rds_if_lookup_by_name(char *devname);
  65 
  66 rds_transport_ops_t rds_ib_transport_ops = {
  67         rdsib_open_ib,
  68         rdsib_close_ib,
  69         rds_sendmsg,
  70         rds_resume_port,
  71         rds_if_lookup_by_name
  72 };
  73 
  74 /* global */
  75 rds_state_t     *rdsib_statep = NULL;
  76 krwlock_t       rds_loopback_portmap_lock;
  77 uint8_t         rds_loopback_portmap[RDS_PORT_MAP_SIZE];
  78 ddi_taskq_t     *rds_taskq = NULL;
  79 dev_info_t      *rdsib_dev_info = NULL;
  80 uint_t          rds_rx_pkts_pending_hwm;
  81 
  82 #ifdef DEBUG
  83 uint32_t        rdsdbglvl = RDS_LOG_L3;
  84 #else
  85 uint32_t        rdsdbglvl = RDS_LOG_L2;
  86 #endif
  87 
  88 #define         RDS_NUM_TASKQ_THREADS   4
  89 
  90 static int rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
  91 static int rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
  92 static int rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
  93     void **result);
  94 static void rds_read_config_values(dev_info_t *dip);
  95 
  96 /* Driver entry points */
  97 static struct cb_ops    rdsib_cb_ops = {
  98         nulldev,                /* open */
  99         nulldev,                /* close */
 100         nodev,                  /* strategy */
 101         nodev,                  /* print */
 102         nodev,                  /* dump */
 103         nodev,                  /* read */
 104         nodev,                  /* write */
 105         nodev,                  /* ioctl */
 106         nodev,                  /* devmap */
 107         nodev,                  /* mmap */
 108         nodev,                  /* segmap */
 109         nochpoll,               /* poll */
 110         ddi_prop_op,            /* prop_op */
 111         NULL,                   /* stream */
 112         D_MP,                   /* cb_flag */
 113         CB_REV,                 /* rev */
 114         nodev,                  /* int (*cb_aread)() */
 115         nodev,                  /* int (*cb_awrite)() */
 116 };
 117 
 118 /* Device options */
 119 static struct dev_ops rdsib_ops = {
 120         DEVO_REV,               /* devo_rev, */
 121         0,                      /* refcnt  */
 122         rdsib_info,             /* info */
 123         nulldev,                /* identify */
 124         nulldev,                /* probe */
 125         rdsib_attach,           /* attach */
 126         rdsib_detach,           /* detach */
 127         nodev,                  /* reset */
 128         &rdsib_cb_ops,              /* driver ops - devctl interfaces */
 129         NULL,                   /* bus operations */
 130         NULL,                   /* power */
 131         ddi_quiesce_not_needed, /* devo_quiesce */
 132 };
 133 
 134 /*
 135  * Module linkage information.
 136  */
 137 #define RDS_DEVDESC     "RDS IB driver"
 138 static struct modldrv rdsib_modldrv = {
 139         &mod_driverops,             /* Driver module */
 140         RDS_DEVDESC,            /* Driver name and version */
 141         &rdsib_ops,         /* Driver ops */
 142 };
 143 
 144 static struct modlinkage rdsib_modlinkage = {
 145         MODREV_1,
 146         { (void *)&rdsib_modldrv, NULL }
 147 };
 148 
 149 /* Called from _init */
 150 int
 151 rdsib_init()
 152 {
 153         /* RDS supports only one instance */
 154         rdsib_statep = kmem_zalloc(sizeof (rds_state_t), KM_SLEEP);
 155 
 156         rw_init(&rdsib_statep->rds_sessionlock, NULL, RW_DRIVER, NULL);
 157         rw_init(&rdsib_statep->rds_hca_lock, NULL, RW_DRIVER, NULL);
 158 
 159         rw_init(&rds_loopback_portmap_lock, NULL, RW_DRIVER, NULL);
 160         bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);
 161 
 162         mutex_init(&rds_dpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
 163         cv_init(&rds_dpool.pool_cv, NULL, CV_DRIVER, NULL);
 164         mutex_init(&rds_cpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
 165         cv_init(&rds_cpool.pool_cv, NULL, CV_DRIVER, NULL);
 166 
 167         /* Initialize logging */
 168         rds_logging_initialization();
 169 
 170         RDS_SET_NPORT(1); /* this should never be 0 */
 171 
 172         ASSERT(rds_transport_ops == NULL);
 173         rds_transport_ops = &rds_ib_transport_ops;
 174 
 175         return (0);
 176 }
 177 
 178 /* Called from _fini */
 179 void
 180 rdsib_fini()
 181 {
 182         /* Stop logging */
 183         rds_logging_destroy();
 184 
 185         cv_destroy(&rds_dpool.pool_cv);
 186         mutex_destroy(&rds_dpool.pool_lock);
 187         cv_destroy(&rds_cpool.pool_cv);
 188         mutex_destroy(&rds_cpool.pool_lock);
 189 
 190         rw_destroy(&rds_loopback_portmap_lock);
 191 
 192         rw_destroy(&rdsib_statep->rds_hca_lock);
 193         rw_destroy(&rdsib_statep->rds_sessionlock);
 194         kmem_free(rdsib_statep, sizeof (rds_state_t));
 195 
 196         rds_transport_ops = NULL;
 197 }
 198 
 199 int
 200 _init(void)
 201 {
 202         int     ret;
 203 
 204         if (ibt_hw_is_present() == 0) {
 205                 return (ENODEV);
 206         }
 207 
 208         ret = rdsib_init();
 209         if (ret != 0) {
 210                 return (ret);
 211         }
 212 
 213         ret = mod_install(&rdsib_modlinkage);
 214         if (ret != 0) {
 215                 /*
 216                  * Could not load module
 217                  */
 218                 rdsib_fini();
 219                 return (ret);
 220         }
 221 
 222         return (0);
 223 }
 224 
 225 int
 226 _fini()
 227 {
 228         int     ret;
 229 
 230         /*
 231          * Remove module
 232          */
 233         if ((ret = mod_remove(&rdsib_modlinkage)) != 0) {
 234                 return (ret);
 235         }
 236 
 237         rdsib_fini();
 238 
 239         return (0);
 240 }
 241 
 242 int
 243 _info(struct modinfo *modinfop)
 244 {
 245         return (mod_info(&rdsib_modlinkage, modinfop));
 246 }
 247 
 248 static int
 249 rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 250 {
 251         int     ret;
 252 
 253         RDS_DPRINTF2("rdsib_attach", "enter");
 254 
 255         if (cmd != DDI_ATTACH)
 256                 return (DDI_FAILURE);
 257 
 258         if (rdsib_dev_info != NULL) {
 259                 RDS_DPRINTF1("rdsib_attach", "Multiple RDS instances are"
 260                     " not supported (rds_dev_info: 0x%p)", rdsib_dev_info);
 261                 return (DDI_FAILURE);
 262         }
 263 
 264         rdsib_dev_info = dip;
 265         rds_read_config_values(dip);
 266 
 267         rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS,
 268             TASKQ_DEFAULTPRI, 0);
 269         if (rds_taskq == NULL) {
 270                 RDS_DPRINTF1("rdsib_attach",
 271                     "ddi_taskq_create failed for rds_taskq");
 272                 rdsib_dev_info = NULL;
 273                 return (DDI_FAILURE);
 274         }
 275 
 276         ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0);
 277         if (ret != DDI_SUCCESS) {
 278                 RDS_DPRINTF1("rdsib_attach",
 279                     "ddi_create_minor_node failed: %d", ret);
 280                 ddi_taskq_destroy(rds_taskq);
 281                 rds_taskq = NULL;
 282                 rdsib_dev_info = NULL;
 283                 return (DDI_FAILURE);
 284         }
 285 
 286         /* Max number of receive buffers on the system */
 287         NDataRX = (MaxNodes - 1) * MaxDataRecvBuffers * 2;
 288 
 289         /*
 290          * High water mark for the receive buffers in the system. If the
 291          * number of buffers used crosses this mark then all sockets in
 292          * would be stalled. The port quota for the sockets is set based
 293          * on this limit.
 294          */
 295         rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100;
 296 
 297         ret = rdsib_initialize_ib();
 298         if (ret != 0) {
 299                 RDS_DPRINTF1("rdsib_attach",
 300                     "rdsib_initialize_ib failed: %d", ret);
 301                 ddi_taskq_destroy(rds_taskq);
 302                 rds_taskq = NULL;
 303                 rdsib_dev_info = NULL;
 304                 return (DDI_FAILURE);
 305         }
 306 
 307         RDS_DPRINTF2("rdsib_attach", "return");
 308 
 309         return (DDI_SUCCESS);
 310 }
 311 
 312 static int
 313 rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 314 {
 315         RDS_DPRINTF2("rdsib_detach", "enter");
 316 
 317         if (cmd != DDI_DETACH)
 318                 return (DDI_FAILURE);
 319 
 320         rdsib_deinitialize_ib();
 321 
 322         ddi_remove_minor_node(dip, "rdsib");
 323 
 324         /* destroy taskq */
 325         if (rds_taskq != NULL) {
 326                 ddi_taskq_destroy(rds_taskq);
 327                 rds_taskq = NULL;
 328         }
 329 
 330         rdsib_dev_info = NULL;
 331 
 332         RDS_DPRINTF2("rdsib_detach", "return");
 333 
 334         return (DDI_SUCCESS);
 335 }
 336 
 337 /* ARGSUSED */
 338 static int
 339 rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
 340 {
 341         int ret = DDI_FAILURE;
 342 
 343         switch (cmd) {
 344         case DDI_INFO_DEVT2DEVINFO:
 345                 if (rdsib_dev_info != NULL) {
 346                         *result = (void *)rdsib_dev_info;
 347                         ret = DDI_SUCCESS;
 348                 }
 349                 break;
 350 
 351         case DDI_INFO_DEVT2INSTANCE:
 352                 *result = NULL;
 353                 ret = DDI_SUCCESS;
 354                 break;
 355 
 356         default:
 357                 break;
 358         }
 359 
 360         return (ret);
 361 }
 362 
 363 static void
 364 rds_read_config_values(dev_info_t *dip)
 365 {
 366         MaxNodes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
 367             "MaxNodes", RDS_MAX_NODES);
 368 
 369         UserBufferSize = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 370             DDI_PROP_DONTPASS, "UserBufferSize", RDS_USER_DATA_BUFFER_SIZE);
 371 
 372         MaxDataSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 373             DDI_PROP_DONTPASS, "MaxDataSendBuffers", RDS_MAX_DATA_SEND_BUFFERS);
 374 
 375         MaxDataRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 376             DDI_PROP_DONTPASS, "MaxDataRecvBuffers", RDS_MAX_DATA_RECV_BUFFERS);
 377 
 378         MaxCtrlSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 379             DDI_PROP_DONTPASS, "MaxCtrlSendBuffers", RDS_MAX_CTRL_SEND_BUFFERS);
 380 
 381         MaxCtrlRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 382             DDI_PROP_DONTPASS, "MaxCtrlRecvBuffers", RDS_MAX_CTRL_RECV_BUFFERS);
 383 
 384         DataRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 385             DDI_PROP_DONTPASS, "DataRecvBufferLWM", RDS_DATA_RECV_BUFFER_LWM);
 386 
 387         CtrlRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 388             DDI_PROP_DONTPASS, "CtrlRecvBufferLWM", RDS_CTRL_RECV_BUFFER_LWM);
 389 
 390         PendingRxPktsHWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 391             DDI_PROP_DONTPASS, "PendingRxPktsHWM", RDS_PENDING_RX_PKTS_HWM);
 392 
 393         MinRnrRetry = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
 394             "MinRnrRetry", RDS_IB_RNR_RETRY);
 395 
 396         IBPathRetryCount = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 397             DDI_PROP_DONTPASS, "IBPathRetryCount", RDS_IB_PATH_RETRY);
 398 
 399         IBPktLifeTime = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 400             DDI_PROP_DONTPASS, "IBPktLifeTime", RDS_IB_PKT_LT);
 401 
 402         rdsdbglvl = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
 403             "rdsdbglvl", RDS_LOG_L2);
 404 
 405         if (MaxNodes < 2) {
 406                 cmn_err(CE_WARN, "MaxNodes is set to less than 2");
 407                 MaxNodes = 2;
 408         }
 409 }