1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <sys/conf.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/modctl.h>
33 #include <inet/ip.h>
34 #include <sys/ib/clients/rds/rdsib_ib.h>
35 #include <sys/ib/clients/rds/rdsib_buf.h>
36 #include <sys/ib/clients/rds/rdsib_cm.h>
37 #include <sys/ib/clients/rds/rdsib_protocol.h>
38 #include <sys/ib/clients/rds/rds_transport.h>
39 #include <sys/ib/clients/rds/rds_kstat.h>
40
41 /*
42 * Global Configuration Variables
43 * As defined in RDS proposal
44 */
45 uint_t MaxNodes = RDS_MAX_NODES;
46 uint_t RdsPktSize;
47 uint_t NDataRX;
48 uint_t MaxDataSendBuffers = RDS_MAX_DATA_SEND_BUFFERS;
49 uint_t MaxDataRecvBuffers = RDS_MAX_DATA_RECV_BUFFERS;
50 uint_t MaxCtrlSendBuffers = RDS_MAX_CTRL_SEND_BUFFERS;
51 uint_t MaxCtrlRecvBuffers = RDS_MAX_CTRL_RECV_BUFFERS;
52 uint_t DataRecvBufferLWM = RDS_DATA_RECV_BUFFER_LWM;
53 uint_t CtrlRecvBufferLWM = RDS_CTRL_RECV_BUFFER_LWM;
54 uint_t PendingRxPktsHWM = RDS_PENDING_RX_PKTS_HWM;
55 uint_t MinRnrRetry = RDS_IB_RNR_RETRY;
56 uint8_t IBPathRetryCount = RDS_IB_PATH_RETRY;
57 uint8_t IBPktLifeTime = RDS_IB_PKT_LT;
58
59 extern int rdsib_open_ib();
60 extern void rdsib_close_ib();
61 extern void rds_resume_port(in_port_t port);
62 extern int rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip,
63 in_port_t sendport, in_port_t recvport, zoneid_t zoneid);
64 extern boolean_t rds_if_lookup_by_name(char *devname);
65
66 rds_transport_ops_t rds_ib_transport_ops = {
67 rdsib_open_ib,
68 rdsib_close_ib,
69 rds_sendmsg,
70 rds_resume_port,
71 rds_if_lookup_by_name
72 };
73
74 /* global */
75 rds_state_t *rdsib_statep = NULL;
76 krwlock_t rds_loopback_portmap_lock;
77 uint8_t rds_loopback_portmap[RDS_PORT_MAP_SIZE];
78 ddi_taskq_t *rds_taskq = NULL;
79 dev_info_t *rdsib_dev_info = NULL;
80 uint_t rds_rx_pkts_pending_hwm;
81
82 #ifdef DEBUG
83 uint32_t rdsdbglvl = RDS_LOG_L3;
84 #else
85 uint32_t rdsdbglvl = RDS_LOG_L2;
86 #endif
87
88 #define RDS_NUM_TASKQ_THREADS 4
89
90 static int rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
91 static int rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
92 static int rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
93 void **result);
94 static void rds_read_config_values(dev_info_t *dip);
95
96 /* Driver entry points */
97 static struct cb_ops rdsib_cb_ops = {
98 nulldev, /* open */
99 nulldev, /* close */
100 nodev, /* strategy */
101 nodev, /* print */
102 nodev, /* dump */
103 nodev, /* read */
104 nodev, /* write */
105 nodev, /* ioctl */
106 nodev, /* devmap */
107 nodev, /* mmap */
108 nodev, /* segmap */
109 nochpoll, /* poll */
110 ddi_prop_op, /* prop_op */
111 NULL, /* stream */
112 D_MP, /* cb_flag */
113 CB_REV, /* rev */
114 nodev, /* int (*cb_aread)() */
115 nodev, /* int (*cb_awrite)() */
116 };
117
118 /* Device options */
119 static struct dev_ops rdsib_ops = {
120 DEVO_REV, /* devo_rev, */
121 0, /* refcnt */
122 rdsib_info, /* info */
123 nulldev, /* identify */
124 nulldev, /* probe */
125 rdsib_attach, /* attach */
126 rdsib_detach, /* detach */
127 nodev, /* reset */
128 &rdsib_cb_ops, /* driver ops - devctl interfaces */
129 NULL, /* bus operations */
130 NULL, /* power */
131 ddi_quiesce_not_needed, /* devo_quiesce */
132 };
133
134 /*
135 * Module linkage information.
136 */
137 #define RDS_DEVDESC "RDS IB driver"
138 static struct modldrv rdsib_modldrv = {
139 &mod_driverops, /* Driver module */
140 RDS_DEVDESC, /* Driver name and version */
141 &rdsib_ops, /* Driver ops */
142 };
143
144 static struct modlinkage rdsib_modlinkage = {
145 MODREV_1,
146 { (void *)&rdsib_modldrv, NULL }
147 };
148
149 /* Called from _init */
150 int
151 rdsib_init()
152 {
153 /* RDS supports only one instance */
154 rdsib_statep = kmem_zalloc(sizeof (rds_state_t), KM_SLEEP);
155
156 rw_init(&rdsib_statep->rds_sessionlock, NULL, RW_DRIVER, NULL);
157 rw_init(&rdsib_statep->rds_hca_lock, NULL, RW_DRIVER, NULL);
158
159 rw_init(&rds_loopback_portmap_lock, NULL, RW_DRIVER, NULL);
160 bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);
161
162 mutex_init(&rds_dpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
163 cv_init(&rds_dpool.pool_cv, NULL, CV_DRIVER, NULL);
164 mutex_init(&rds_cpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
165 cv_init(&rds_cpool.pool_cv, NULL, CV_DRIVER, NULL);
166
167 /* Initialize logging */
168 rds_logging_initialization();
169
170 RDS_SET_NPORT(1); /* this should never be 0 */
171
172 ASSERT(rds_transport_ops == NULL);
173 rds_transport_ops = &rds_ib_transport_ops;
174
175 return (0);
176 }
177
178 /* Called from _fini */
179 void
180 rdsib_fini()
181 {
182 /* Stop logging */
183 rds_logging_destroy();
184
185 cv_destroy(&rds_dpool.pool_cv);
186 mutex_destroy(&rds_dpool.pool_lock);
187 cv_destroy(&rds_cpool.pool_cv);
188 mutex_destroy(&rds_cpool.pool_lock);
189
190 rw_destroy(&rds_loopback_portmap_lock);
191
192 rw_destroy(&rdsib_statep->rds_hca_lock);
193 rw_destroy(&rdsib_statep->rds_sessionlock);
194 kmem_free(rdsib_statep, sizeof (rds_state_t));
195
196 rds_transport_ops = NULL;
197 }
198
199 int
200 _init(void)
201 {
202 int ret;
203
204 if (ibt_hw_is_present() == 0) {
205 return (ENODEV);
206 }
207
208 ret = rdsib_init();
209 if (ret != 0) {
210 return (ret);
211 }
212
213 ret = mod_install(&rdsib_modlinkage);
214 if (ret != 0) {
215 /*
216 * Could not load module
217 */
218 rdsib_fini();
219 return (ret);
220 }
221
222 return (0);
223 }
224
225 int
226 _fini()
227 {
228 int ret;
229
230 /*
231 * Remove module
232 */
233 if ((ret = mod_remove(&rdsib_modlinkage)) != 0) {
234 return (ret);
235 }
236
237 rdsib_fini();
238
239 return (0);
240 }
241
242 int
243 _info(struct modinfo *modinfop)
244 {
245 return (mod_info(&rdsib_modlinkage, modinfop));
246 }
247
248 static int
249 rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
250 {
251 int ret;
252
253 RDS_DPRINTF2("rdsib_attach", "enter");
254
255 if (cmd != DDI_ATTACH)
256 return (DDI_FAILURE);
257
258 if (rdsib_dev_info != NULL) {
259 RDS_DPRINTF1("rdsib_attach", "Multiple RDS instances are"
260 " not supported (rds_dev_info: 0x%p)", rdsib_dev_info);
261 return (DDI_FAILURE);
262 }
263
264 rdsib_dev_info = dip;
265 rds_read_config_values(dip);
266
267 rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS,
268 TASKQ_DEFAULTPRI, 0);
269 if (rds_taskq == NULL) {
270 RDS_DPRINTF1("rdsib_attach",
271 "ddi_taskq_create failed for rds_taskq");
272 rdsib_dev_info = NULL;
273 return (DDI_FAILURE);
274 }
275
276 ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0);
277 if (ret != DDI_SUCCESS) {
278 RDS_DPRINTF1("rdsib_attach",
279 "ddi_create_minor_node failed: %d", ret);
280 ddi_taskq_destroy(rds_taskq);
281 rds_taskq = NULL;
282 rdsib_dev_info = NULL;
283 return (DDI_FAILURE);
284 }
285
286 /* Max number of receive buffers on the system */
287 NDataRX = (MaxNodes - 1) * MaxDataRecvBuffers * 2;
288
289 /*
290 * High water mark for the receive buffers in the system. If the
291 * number of buffers used crosses this mark then all sockets in
292 * would be stalled. The port quota for the sockets is set based
293 * on this limit.
294 */
295 rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100;
296
297 ret = rdsib_initialize_ib();
298 if (ret != 0) {
299 RDS_DPRINTF1("rdsib_attach",
300 "rdsib_initialize_ib failed: %d", ret);
301 ddi_taskq_destroy(rds_taskq);
302 rds_taskq = NULL;
303 rdsib_dev_info = NULL;
304 return (DDI_FAILURE);
305 }
306
307 RDS_DPRINTF2("rdsib_attach", "return");
308
309 return (DDI_SUCCESS);
310 }
311
312 static int
313 rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
314 {
315 RDS_DPRINTF2("rdsib_detach", "enter");
316
317 if (cmd != DDI_DETACH)
318 return (DDI_FAILURE);
319
320 rdsib_deinitialize_ib();
321
322 ddi_remove_minor_node(dip, "rdsib");
323
324 /* destroy taskq */
325 if (rds_taskq != NULL) {
326 ddi_taskq_destroy(rds_taskq);
327 rds_taskq = NULL;
328 }
329
330 rdsib_dev_info = NULL;
331
332 RDS_DPRINTF2("rdsib_detach", "return");
333
334 return (DDI_SUCCESS);
335 }
336
337 /* ARGSUSED */
338 static int
339 rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
340 {
341 int ret = DDI_FAILURE;
342
343 switch (cmd) {
344 case DDI_INFO_DEVT2DEVINFO:
345 if (rdsib_dev_info != NULL) {
346 *result = (void *)rdsib_dev_info;
347 ret = DDI_SUCCESS;
348 }
349 break;
350
351 case DDI_INFO_DEVT2INSTANCE:
352 *result = NULL;
353 ret = DDI_SUCCESS;
354 break;
355
356 default:
357 break;
358 }
359
360 return (ret);
361 }
362
363 static void
364 rds_read_config_values(dev_info_t *dip)
365 {
366 MaxNodes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
367 "MaxNodes", RDS_MAX_NODES);
368
369 UserBufferSize = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
370 DDI_PROP_DONTPASS, "UserBufferSize", RDS_USER_DATA_BUFFER_SIZE);
371
372 MaxDataSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
373 DDI_PROP_DONTPASS, "MaxDataSendBuffers", RDS_MAX_DATA_SEND_BUFFERS);
374
375 MaxDataRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
376 DDI_PROP_DONTPASS, "MaxDataRecvBuffers", RDS_MAX_DATA_RECV_BUFFERS);
377
378 MaxCtrlSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
379 DDI_PROP_DONTPASS, "MaxCtrlSendBuffers", RDS_MAX_CTRL_SEND_BUFFERS);
380
381 MaxCtrlRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
382 DDI_PROP_DONTPASS, "MaxCtrlRecvBuffers", RDS_MAX_CTRL_RECV_BUFFERS);
383
384 DataRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
385 DDI_PROP_DONTPASS, "DataRecvBufferLWM", RDS_DATA_RECV_BUFFER_LWM);
386
387 CtrlRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
388 DDI_PROP_DONTPASS, "CtrlRecvBufferLWM", RDS_CTRL_RECV_BUFFER_LWM);
389
390 PendingRxPktsHWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
391 DDI_PROP_DONTPASS, "PendingRxPktsHWM", RDS_PENDING_RX_PKTS_HWM);
392
393 MinRnrRetry = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
394 "MinRnrRetry", RDS_IB_RNR_RETRY);
395
396 IBPathRetryCount = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
397 DDI_PROP_DONTPASS, "IBPathRetryCount", RDS_IB_PATH_RETRY);
398
399 IBPktLifeTime = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
400 DDI_PROP_DONTPASS, "IBPktLifeTime", RDS_IB_PKT_LT);
401
402 rdsdbglvl = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
403 "rdsdbglvl", RDS_LOG_L2);
404
405 if (MaxNodes < 2) {
406 cmn_err(CE_WARN, "MaxNodes is set to less than 2");
407 MaxNodes = 2;
408 }
409 }