1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 
  27 #include <sys/types.h>
  28 #include <sys/stat.h>
  29 #include <sys/conf.h>
  30 #include <sys/ddi.h>
  31 #include <sys/sunddi.h>
  32 #include <sys/modctl.h>
  33 #include <inet/ip.h>
  34 #include <sys/ib/clients/rds/rdsib_ib.h>
  35 #include <sys/ib/clients/rds/rdsib_buf.h>
  36 #include <sys/ib/clients/rds/rdsib_cm.h>
  37 #include <sys/ib/clients/rds/rdsib_protocol.h>
  38 #include <sys/ib/clients/rds/rds_transport.h>
  39 #include <sys/ib/clients/rds/rds_kstat.h>
  40 
  41 /*
  42  * Global Configuration Variables
  43  * As defined in RDS proposal
  44  */
  45 uint_t          MaxNodes                = RDS_MAX_NODES;
  46 uint_t          RdsPktSize;
  47 uint_t          NDataRX;
  48 uint_t          MaxDataSendBuffers      = RDS_MAX_DATA_SEND_BUFFERS;
  49 uint_t          MaxDataRecvBuffers      = RDS_MAX_DATA_RECV_BUFFERS;
  50 uint_t          MaxCtrlSendBuffers      = RDS_MAX_CTRL_SEND_BUFFERS;
  51 uint_t          MaxCtrlRecvBuffers      = RDS_MAX_CTRL_RECV_BUFFERS;
  52 uint_t          DataRecvBufferLWM       = RDS_DATA_RECV_BUFFER_LWM;
  53 uint_t          CtrlRecvBufferLWM       = RDS_CTRL_RECV_BUFFER_LWM;
  54 uint_t          PendingRxPktsHWM        = RDS_PENDING_RX_PKTS_HWM;
  55 uint_t          MinRnrRetry             = RDS_IB_RNR_RETRY;
  56 uint8_t         IBPathRetryCount        = RDS_IB_PATH_RETRY;
  57 uint8_t         IBPktLifeTime           = RDS_IB_PKT_LT;
  58 
  59 extern int rdsib_open_ib();
  60 extern void rdsib_close_ib();
  61 extern void rds_resume_port(in_port_t port);
  62 extern int rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip,
  63     in_port_t sendport, in_port_t recvport, zoneid_t zoneid);
  64 extern boolean_t rds_if_lookup_by_name(char *devname);
  65 
  66 rds_transport_ops_t rds_ib_transport_ops = {
  67         rdsib_open_ib,
  68         rdsib_close_ib,
  69         rds_sendmsg,
  70         rds_resume_port,
  71         rds_if_lookup_by_name
  72 };
  73 
  74 /* global */
  75 rds_state_t     *rdsib_statep = NULL;
  76 krwlock_t       rds_loopback_portmap_lock;
  77 uint8_t         rds_loopback_portmap[RDS_PORT_MAP_SIZE];
  78 ddi_taskq_t     *rds_taskq = NULL;
  79 dev_info_t      *rdsib_dev_info = NULL;
  80 uint_t          rds_rx_pkts_pending_hwm;
  81 
  82 #ifdef DEBUG
  83 uint32_t        rdsdbglvl = RDS_LOG_L3;
  84 #else
  85 uint32_t        rdsdbglvl = RDS_LOG_L2;
  86 #endif
  87 
  88 #define         RDS_NUM_TASKQ_THREADS   4
  89 
  90 static int rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
  91 static int rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
  92 static int rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
  93     void **result);
  94 static void rds_read_config_values(dev_info_t *dip);
  95 
  96 /* Driver entry points */
  97 static struct cb_ops    rdsib_cb_ops = {
  98         nulldev,                /* open */
  99         nulldev,                /* close */
 100         nodev,                  /* strategy */
 101         nodev,                  /* print */
 102         nodev,                  /* dump */
 103         nodev,                  /* read */
 104         nodev,                  /* write */
 105         nodev,                  /* ioctl */
 106         nodev,                  /* devmap */
 107         nodev,                  /* mmap */
 108         nodev,                  /* segmap */
 109         nochpoll,               /* poll */
 110         ddi_prop_op,            /* prop_op */
 111         NULL,                   /* stream */
 112         D_MP,                   /* cb_flag */
 113         CB_REV,                 /* rev */
 114         nodev,                  /* int (*cb_aread)() */
 115         nodev,                  /* int (*cb_awrite)() */
 116 };
 117 
 118 /* Device options */
 119 static struct dev_ops rdsib_ops = {
 120         DEVO_REV,               /* devo_rev, */
 121         0,                      /* refcnt  */
 122         rdsib_info,             /* info */
 123         nulldev,                /* identify */
 124         nulldev,                /* probe */
 125         rdsib_attach,           /* attach */
 126         rdsib_detach,           /* detach */
 127         nodev,                  /* reset */
 128         &rdsib_cb_ops,              /* driver ops - devctl interfaces */
 129         NULL,                   /* bus operations */
 130         NULL,                   /* power */
 131         ddi_quiesce_not_needed, /* devo_quiesce */
 132 };
 133 
 134 /*
 135  * Module linkage information.
 136  */
 137 #define RDS_DEVDESC     "RDS IB driver"
 138 static struct modldrv rdsib_modldrv = {
 139         &mod_driverops,             /* Driver module */
 140         RDS_DEVDESC,            /* Driver name and version */
 141         &rdsib_ops,         /* Driver ops */
 142 };
 143 
 144 static struct modlinkage rdsib_modlinkage = {
 145         MODREV_1,
 146         (void *)&rdsib_modldrv,
 147         NULL
 148 };
 149 
 150 /* Called from _init */
 151 int
 152 rdsib_init()
 153 {
 154         /* RDS supports only one instance */
 155         rdsib_statep = kmem_zalloc(sizeof (rds_state_t), KM_SLEEP);
 156 
 157         rw_init(&rdsib_statep->rds_sessionlock, NULL, RW_DRIVER, NULL);
 158         rw_init(&rdsib_statep->rds_hca_lock, NULL, RW_DRIVER, NULL);
 159 
 160         rw_init(&rds_loopback_portmap_lock, NULL, RW_DRIVER, NULL);
 161         bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);
 162 
 163         mutex_init(&rds_dpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
 164         cv_init(&rds_dpool.pool_cv, NULL, CV_DRIVER, NULL);
 165         mutex_init(&rds_cpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
 166         cv_init(&rds_cpool.pool_cv, NULL, CV_DRIVER, NULL);
 167 
 168         /* Initialize logging */
 169         rds_logging_initialization();
 170 
 171         RDS_SET_NPORT(1); /* this should never be 0 */
 172 
 173         ASSERT(rds_transport_ops == NULL);
 174         rds_transport_ops = &rds_ib_transport_ops;
 175 
 176         return (0);
 177 }
 178 
 179 /* Called from _fini */
 180 void
 181 rdsib_fini()
 182 {
 183         /* Stop logging */
 184         rds_logging_destroy();
 185 
 186         cv_destroy(&rds_dpool.pool_cv);
 187         mutex_destroy(&rds_dpool.pool_lock);
 188         cv_destroy(&rds_cpool.pool_cv);
 189         mutex_destroy(&rds_cpool.pool_lock);
 190 
 191         rw_destroy(&rds_loopback_portmap_lock);
 192 
 193         rw_destroy(&rdsib_statep->rds_hca_lock);
 194         rw_destroy(&rdsib_statep->rds_sessionlock);
 195         kmem_free(rdsib_statep, sizeof (rds_state_t));
 196 
 197         rds_transport_ops = NULL;
 198 }
 199 
 200 int
 201 _init(void)
 202 {
 203         int     ret;
 204 
 205         if (ibt_hw_is_present() == 0) {
 206                 return (ENODEV);
 207         }
 208 
 209         ret = rdsib_init();
 210         if (ret != 0) {
 211                 return (ret);
 212         }
 213 
 214         ret = mod_install(&rdsib_modlinkage);
 215         if (ret != 0) {
 216                 /*
 217                  * Could not load module
 218                  */
 219                 rdsib_fini();
 220                 return (ret);
 221         }
 222 
 223         return (0);
 224 }
 225 
 226 int
 227 _fini()
 228 {
 229         int     ret;
 230 
 231         /*
 232          * Remove module
 233          */
 234         if ((ret = mod_remove(&rdsib_modlinkage)) != 0) {
 235                 return (ret);
 236         }
 237 
 238         rdsib_fini();
 239 
 240         return (0);
 241 }
 242 
 243 int
 244 _info(struct modinfo *modinfop)
 245 {
 246         return (mod_info(&rdsib_modlinkage, modinfop));
 247 }
 248 
 249 static int
 250 rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 251 {
 252         int     ret;
 253 
 254         RDS_DPRINTF2("rdsib_attach", "enter");
 255 
 256         if (cmd != DDI_ATTACH)
 257                 return (DDI_FAILURE);
 258 
 259         if (rdsib_dev_info != NULL) {
 260                 RDS_DPRINTF1("rdsib_attach", "Multiple RDS instances are"
 261                     " not supported (rds_dev_info: 0x%p)", rdsib_dev_info);
 262                 return (DDI_FAILURE);
 263         }
 264 
 265         rdsib_dev_info = dip;
 266         rds_read_config_values(dip);
 267 
 268         rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS,
 269             TASKQ_DEFAULTPRI, 0);
 270         if (rds_taskq == NULL) {
 271                 RDS_DPRINTF1("rdsib_attach",
 272                     "ddi_taskq_create failed for rds_taskq");
 273                 rdsib_dev_info = NULL;
 274                 return (DDI_FAILURE);
 275         }
 276 
 277         ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0);
 278         if (ret != DDI_SUCCESS) {
 279                 RDS_DPRINTF1("rdsib_attach",
 280                     "ddi_create_minor_node failed: %d", ret);
 281                 ddi_taskq_destroy(rds_taskq);
 282                 rds_taskq = NULL;
 283                 rdsib_dev_info = NULL;
 284                 return (DDI_FAILURE);
 285         }
 286 
 287         /* Max number of receive buffers on the system */
 288         NDataRX = (MaxNodes - 1) * MaxDataRecvBuffers * 2;
 289 
 290         /*
 291          * High water mark for the receive buffers in the system. If the
 292          * number of buffers used crosses this mark then all sockets in
 293          * would be stalled. The port quota for the sockets is set based
 294          * on this limit.
 295          */
 296         rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100;
 297 
 298         ret = rdsib_initialize_ib();
 299         if (ret != 0) {
 300                 RDS_DPRINTF1("rdsib_attach",
 301                     "rdsib_initialize_ib failed: %d", ret);
 302                 ddi_taskq_destroy(rds_taskq);
 303                 rds_taskq = NULL;
 304                 rdsib_dev_info = NULL;
 305                 return (DDI_FAILURE);
 306         }
 307 
 308         RDS_DPRINTF2("rdsib_attach", "return");
 309 
 310         return (DDI_SUCCESS);
 311 }
 312 
 313 static int
 314 rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 315 {
 316         RDS_DPRINTF2("rdsib_detach", "enter");
 317 
 318         if (cmd != DDI_DETACH)
 319                 return (DDI_FAILURE);
 320 
 321         rdsib_deinitialize_ib();
 322 
 323         ddi_remove_minor_node(dip, "rdsib");
 324 
 325         /* destroy taskq */
 326         if (rds_taskq != NULL) {
 327                 ddi_taskq_destroy(rds_taskq);
 328                 rds_taskq = NULL;
 329         }
 330 
 331         rdsib_dev_info = NULL;
 332 
 333         RDS_DPRINTF2("rdsib_detach", "return");
 334 
 335         return (DDI_SUCCESS);
 336 }
 337 
 338 /* ARGSUSED */
 339 static int
 340 rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
 341 {
 342         int ret = DDI_FAILURE;
 343 
 344         switch (cmd) {
 345         case DDI_INFO_DEVT2DEVINFO:
 346                 if (rdsib_dev_info != NULL) {
 347                         *result = (void *)rdsib_dev_info;
 348                         ret = DDI_SUCCESS;
 349                 }
 350                 break;
 351 
 352         case DDI_INFO_DEVT2INSTANCE:
 353                 *result = NULL;
 354                 ret = DDI_SUCCESS;
 355                 break;
 356 
 357         default:
 358                 break;
 359         }
 360 
 361         return (ret);
 362 }
 363 
 364 static void
 365 rds_read_config_values(dev_info_t *dip)
 366 {
 367         MaxNodes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
 368             "MaxNodes", RDS_MAX_NODES);
 369 
 370         UserBufferSize = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 371             DDI_PROP_DONTPASS, "UserBufferSize", RDS_USER_DATA_BUFFER_SIZE);
 372 
 373         MaxDataSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 374             DDI_PROP_DONTPASS, "MaxDataSendBuffers", RDS_MAX_DATA_SEND_BUFFERS);
 375 
 376         MaxDataRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 377             DDI_PROP_DONTPASS, "MaxDataRecvBuffers", RDS_MAX_DATA_RECV_BUFFERS);
 378 
 379         MaxCtrlSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 380             DDI_PROP_DONTPASS, "MaxCtrlSendBuffers", RDS_MAX_CTRL_SEND_BUFFERS);
 381 
 382         MaxCtrlRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 383             DDI_PROP_DONTPASS, "MaxCtrlRecvBuffers", RDS_MAX_CTRL_RECV_BUFFERS);
 384 
 385         DataRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 386             DDI_PROP_DONTPASS, "DataRecvBufferLWM", RDS_DATA_RECV_BUFFER_LWM);
 387 
 388         CtrlRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 389             DDI_PROP_DONTPASS, "CtrlRecvBufferLWM", RDS_CTRL_RECV_BUFFER_LWM);
 390 
 391         PendingRxPktsHWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 392             DDI_PROP_DONTPASS, "PendingRxPktsHWM", RDS_PENDING_RX_PKTS_HWM);
 393 
 394         MinRnrRetry = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
 395             "MinRnrRetry", RDS_IB_RNR_RETRY);
 396 
 397         IBPathRetryCount = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 398             DDI_PROP_DONTPASS, "IBPathRetryCount", RDS_IB_PATH_RETRY);
 399 
 400         IBPktLifeTime = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 401             DDI_PROP_DONTPASS, "IBPktLifeTime", RDS_IB_PKT_LT);
 402 
 403         rdsdbglvl = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
 404             "rdsdbglvl", RDS_LOG_L2);
 405 
 406         if (MaxNodes < 2) {
 407                 cmn_err(CE_WARN, "MaxNodes is set to less than 2");
 408                 MaxNodes = 2;
 409         }
 410 }