Print this page
8368 remove warlock leftovers from usr/src/uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/clients/rdsv3/ib.c
+++ new/usr/src/uts/common/io/ib/clients/rdsv3/ib.c
1 1 /*
2 2 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3 3 */
4 4
5 5 /*
6 6 * This file contains code imported from the OFED rds source file ib.c
7 7 * Oracle elects to have and use the contents of ib.c under and governed
8 8 * by the OpenIB.org BSD license (see below for full license text). However,
9 9 * the following notice accompanied the original version of this file:
10 10 */
11 11
12 12 /*
13 13 * Copyright (c) 2006 Oracle. All rights reserved.
14 14 *
15 15 * This software is available to you under a choice of one of two
16 16 * licenses. You may choose to be licensed under the terms of the GNU
17 17 * General Public License (GPL) Version 2, available from the file
18 18 * COPYING in the main directory of this source tree, or the
19 19 * OpenIB.org BSD license below:
20 20 *
21 21 * Redistribution and use in source and binary forms, with or
22 22 * without modification, are permitted provided that the following
23 23 * conditions are met:
24 24 *
25 25 * - Redistributions of source code must retain the above
26 26 * copyright notice, this list of conditions and the following
27 27 * disclaimer.
28 28 *
29 29 * - Redistributions in binary form must reproduce the above
30 30 * copyright notice, this list of conditions and the following
31 31 * disclaimer in the documentation and/or other materials
32 32 * provided with the distribution.
33 33 *
34 34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38 38 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39 39 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40 40 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41 41 * SOFTWARE.
42 42 *
43 43 */
44 44 #include <sys/sysmacros.h>
45 45 #include <sys/rds.h>
46 46
47 47 #include <sys/ib/ibtl/ibti.h>
48 48 #include <sys/ib/clients/rdsv3/rdsv3.h>
49 49 #include <sys/ib/clients/rdsv3/ib.h>
50 50 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
51 51
52 52 unsigned int rdsv3_ib_retry_count = RDSV3_IB_DEFAULT_RETRY_COUNT;
53 53
54 54 struct list rdsv3_ib_devices;
55 55
56 56 /* NOTE: if also grabbing ibdev lock, grab this first */
57 57 kmutex_t ib_nodev_conns_lock;
58 58 list_t ib_nodev_conns;
59 59
60 60 extern int rdsv3_ib_frag_constructor(void *buf, void *arg, int kmflags);
61 61 extern void rdsv3_ib_frag_destructor(void *buf, void *arg);
62 62
63 63 void
64 64 rdsv3_ib_add_one(ib_device_t *device)
65 65 {
66 66 struct rdsv3_ib_device *rds_ibdev;
67 67 ibt_hca_attr_t *dev_attr;
68 68 char name[64];
69 69
70 70 RDSV3_DPRINTF2("rdsv3_ib_add_one", "device: %p", device);
71 71
72 72 /* Only handle IB (no iWARP) devices */
73 73 if (device->node_type != RDMA_NODE_IB_CA)
74 74 return;
75 75
76 76 dev_attr = (ibt_hca_attr_t *)kmem_alloc(sizeof (*dev_attr),
77 77 KM_NOSLEEP);
78 78 if (!dev_attr)
79 79 return;
80 80
81 81 if (ibt_query_hca(ib_get_ibt_hca_hdl(device), dev_attr)) {
82 82 RDSV3_DPRINTF2("rdsv3_ib_add_one",
83 83 "Query device failed for %s", device->name);
84 84 goto free_attr;
85 85 }
86 86
87 87 /* We depend on Reserved Lkey */
88 88 if (!(dev_attr->hca_flags2 & IBT_HCA2_RES_LKEY)) {
89 89 RDSV3_DPRINTF2("rdsv3_ib_add_one",
90 90 "Reserved Lkey support is required: %s",
91 91 device->name);
92 92 goto free_attr;
93 93 }
94 94
95 95 rds_ibdev = kmem_zalloc(sizeof (*rds_ibdev), KM_NOSLEEP);
96 96 if (!rds_ibdev)
97 97 goto free_attr;
98 98
99 99 rds_ibdev->ibt_hca_hdl = ib_get_ibt_hca_hdl(device);
100 100 rds_ibdev->hca_attr = *dev_attr;
101 101
102 102 rw_init(&rds_ibdev->rwlock, NULL, RW_DRIVER, NULL);
103 103 mutex_init(&rds_ibdev->spinlock, NULL, MUTEX_DRIVER, NULL);
104 104
105 105 rds_ibdev->max_wrs = dev_attr->hca_max_chan_sz;
106 106 rds_ibdev->max_sge = min(dev_attr->hca_max_sgl, RDSV3_IB_MAX_SGE);
107 107
108 108 rds_ibdev->max_initiator_depth = (uint_t)dev_attr->hca_max_rdma_in_qp;
109 109 rds_ibdev->max_responder_resources =
110 110 (uint_t)dev_attr->hca_max_rdma_in_qp;
111 111
112 112 rds_ibdev->dev = device;
113 113 rds_ibdev->pd = ib_alloc_pd(device);
114 114 if (IS_ERR(rds_ibdev->pd))
115 115 goto free_dev;
116 116
117 117 if (rdsv3_ib_create_mr_pool(rds_ibdev) != 0) {
118 118 goto free_dev;
119 119 }
120 120
121 121 if (rdsv3_ib_create_inc_pool(rds_ibdev) != 0) {
122 122 rdsv3_ib_destroy_mr_pool(rds_ibdev);
123 123 goto free_dev;
124 124 }
125 125
126 126 (void) snprintf(name, 64, "RDSV3_IB_FRAG_%llx",
127 127 (longlong_t)htonll(dev_attr->hca_node_guid));
128 128 rds_ibdev->ib_frag_slab = kmem_cache_create(name,
129 129 sizeof (struct rdsv3_page_frag), 0, rdsv3_ib_frag_constructor,
130 130 rdsv3_ib_frag_destructor, NULL, (void *)rds_ibdev, NULL, 0);
131 131 if (rds_ibdev->ib_frag_slab == NULL) {
132 132 RDSV3_DPRINTF2("rdsv3_ib_add_one",
133 133 "kmem_cache_create for ib_frag_slab failed for device: %s",
134 134 device->name);
135 135 rdsv3_ib_destroy_mr_pool(rds_ibdev);
136 136 rdsv3_ib_destroy_inc_pool(rds_ibdev);
137 137 goto free_dev;
138 138 }
139 139
140 140 rds_ibdev->aft_hcagp = rdsv3_af_grp_create(rds_ibdev->ibt_hca_hdl,
141 141 (uint64_t)rds_ibdev->hca_attr.hca_node_guid);
142 142 if (rds_ibdev->aft_hcagp == NULL) {
143 143 rdsv3_ib_destroy_mr_pool(rds_ibdev);
144 144 rdsv3_ib_destroy_inc_pool(rds_ibdev);
145 145 kmem_cache_destroy(rds_ibdev->ib_frag_slab);
146 146 goto free_dev;
147 147 }
148 148 rds_ibdev->fmr_soft_cq = rdsv3_af_thr_create(rdsv3_ib_drain_mrlist_fn,
149 149 (void *)rds_ibdev->fmr_pool, SCQ_HCA_BIND_CPU,
150 150 rds_ibdev->aft_hcagp);
151 151 if (rds_ibdev->fmr_soft_cq == NULL) {
152 152 rdsv3_af_grp_destroy(rds_ibdev->aft_hcagp);
153 153 rdsv3_ib_destroy_mr_pool(rds_ibdev);
154 154 rdsv3_ib_destroy_inc_pool(rds_ibdev);
155 155 kmem_cache_destroy(rds_ibdev->ib_frag_slab);
156 156 goto free_dev;
157 157 }
158 158
159 159 rds_ibdev->inc_soft_cq = rdsv3_af_thr_create(rdsv3_ib_drain_inclist,
160 160 (void *)rds_ibdev->inc_pool, SCQ_HCA_BIND_CPU,
161 161 rds_ibdev->aft_hcagp);
162 162 if (rds_ibdev->inc_soft_cq == NULL) {
163 163 rdsv3_af_thr_destroy(rds_ibdev->fmr_soft_cq);
164 164 rdsv3_af_grp_destroy(rds_ibdev->aft_hcagp);
165 165 rdsv3_ib_destroy_mr_pool(rds_ibdev);
166 166 rdsv3_ib_destroy_inc_pool(rds_ibdev);
167 167 kmem_cache_destroy(rds_ibdev->ib_frag_slab);
168 168 goto free_dev;
169 169 }
170 170
171 171 list_create(&rds_ibdev->ipaddr_list, sizeof (struct rdsv3_ib_ipaddr),
172 172 offsetof(struct rdsv3_ib_ipaddr, list));
173 173 list_create(&rds_ibdev->conn_list, sizeof (struct rdsv3_ib_connection),
174 174 offsetof(struct rdsv3_ib_connection, ib_node));
175 175
176 176 list_insert_tail(&rdsv3_ib_devices, rds_ibdev);
177 177
178 178 ib_set_client_data(device, &rdsv3_ib_client, rds_ibdev);
179 179
180 180 RDSV3_DPRINTF2("rdsv3_ib_add_one", "Return: device: %p", device);
181 181
182 182 goto free_attr;
183 183
184 184 err_pd:
185 185 (void) ib_dealloc_pd(rds_ibdev->pd);
186 186 free_dev:
187 187 mutex_destroy(&rds_ibdev->spinlock);
188 188 rw_destroy(&rds_ibdev->rwlock);
189 189 kmem_free(rds_ibdev, sizeof (*rds_ibdev));
190 190 free_attr:
191 191 kmem_free(dev_attr, sizeof (*dev_attr));
192 192 }
193 193
194 194 void
195 195 rdsv3_ib_remove_one(struct ib_device *device)
196 196 {
197 197 struct rdsv3_ib_device *rds_ibdev;
198 198 struct rdsv3_ib_ipaddr *i_ipaddr, *i_next;
199 199
200 200 RDSV3_DPRINTF2("rdsv3_ib_remove_one", "device: %p", device);
201 201
202 202 rds_ibdev = ib_get_client_data(device, &rdsv3_ib_client);
203 203 if (!rds_ibdev)
204 204 return;
205 205
206 206 RDSV3_FOR_EACH_LIST_NODE_SAFE(i_ipaddr, i_next, &rds_ibdev->ipaddr_list,
207 207 list) {
208 208 list_remove_node(&i_ipaddr->list);
209 209 kmem_free(i_ipaddr, sizeof (*i_ipaddr));
210 210 }
211 211
212 212 rdsv3_ib_destroy_conns(rds_ibdev);
213 213
214 214 if (rds_ibdev->fmr_soft_cq)
215 215 rdsv3_af_thr_destroy(rds_ibdev->fmr_soft_cq);
216 216 if (rds_ibdev->inc_soft_cq)
217 217 rdsv3_af_thr_destroy(rds_ibdev->inc_soft_cq);
↓ open down ↓ |
217 lines elided |
↑ open up ↑ |
218 218
219 219 rdsv3_ib_destroy_mr_pool(rds_ibdev);
220 220 rdsv3_ib_destroy_inc_pool(rds_ibdev);
221 221
222 222 kmem_cache_destroy(rds_ibdev->ib_frag_slab);
223 223
224 224 rdsv3_af_grp_destroy(rds_ibdev->aft_hcagp);
225 225
226 226 #if 0
227 227 while (ib_dealloc_pd(rds_ibdev->pd)) {
228 -#ifndef __lock_lint
229 228 RDSV3_DPRINTF5("rdsv3_ib_remove_one",
230 229 "%s-%d Failed to dealloc pd %p",
231 230 __func__, __LINE__, rds_ibdev->pd);
232 -#endif
233 231 delay(drv_usectohz(1000));
234 232 }
235 233 #else
236 234 if (ib_dealloc_pd(rds_ibdev->pd)) {
237 -#ifndef __lock_lint
238 235 RDSV3_DPRINTF2("rdsv3_ib_remove_one",
239 236 "Failed to dealloc pd %p\n", rds_ibdev->pd);
240 -#endif
241 237 }
242 238 #endif
243 239
244 240 list_destroy(&rds_ibdev->ipaddr_list);
245 241 list_destroy(&rds_ibdev->conn_list);
246 242 list_remove_node(&rds_ibdev->list);
247 243 mutex_destroy(&rds_ibdev->spinlock);
248 244 rw_destroy(&rds_ibdev->rwlock);
249 245 kmem_free(rds_ibdev, sizeof (*rds_ibdev));
250 246
251 247 RDSV3_DPRINTF2("rdsv3_ib_remove_one", "Return: device: %p", device);
252 248 }
253 249
254 -#ifndef __lock_lint
255 250 struct ib_client rdsv3_ib_client = {
256 251 .name = "rdsv3_ib",
257 252 .add = rdsv3_ib_add_one,
258 253 .remove = rdsv3_ib_remove_one,
259 254 .clnt_hdl = NULL,
260 255 .state = IB_CLNT_UNINITIALIZED
261 256 };
262 -#else
263 -struct ib_client rdsv3_ib_client = {
264 - "rdsv3_ib",
265 - rdsv3_ib_add_one,
266 - rdsv3_ib_remove_one,
267 - NULL,
268 - NULL,
269 - IB_CLNT_UNINITIALIZED
270 -};
271 -#endif
272 257
273 258 static int
274 259 rds_ib_conn_info_visitor(struct rdsv3_connection *conn,
275 260 void *buffer)
276 261 {
277 262 struct rds_info_rdma_connection *iinfo = buffer;
278 263 struct rdsv3_ib_connection *ic;
279 264
280 265 RDSV3_DPRINTF4("rds_ib_conn_info_visitor", "conn: %p buffer: %p",
281 266 conn, buffer);
282 267
283 268 /* We will only ever look at IB transports */
284 269 if (conn->c_trans != &rdsv3_ib_transport)
285 270 return (0);
286 271
287 272 iinfo->src_addr = conn->c_laddr;
288 273 iinfo->dst_addr = conn->c_faddr;
289 274
290 275 (void) memset(&iinfo->src_gid, 0, sizeof (iinfo->src_gid));
291 276 (void) memset(&iinfo->dst_gid, 0, sizeof (iinfo->dst_gid));
292 277 if (rdsv3_conn_state(conn) == RDSV3_CONN_UP) {
293 278 struct rdsv3_ib_device *rds_ibdev;
294 279 struct rdma_dev_addr *dev_addr;
295 280
296 281 ic = conn->c_transport_data;
297 282 dev_addr = &ic->i_cm_id->route.addr.dev_addr;
298 283
299 284 ib_addr_get_sgid(dev_addr, (union ib_gid *)&iinfo->src_gid);
300 285 ib_addr_get_dgid(dev_addr, (union ib_gid *)&iinfo->dst_gid);
301 286
302 287 rds_ibdev = ib_get_client_data(ic->i_cm_id->device,
303 288 &rdsv3_ib_client);
304 289 iinfo->max_send_wr = ic->i_send_ring.w_nr;
305 290 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
306 291 iinfo->max_send_sge = rds_ibdev->max_sge;
307 292 }
308 293
309 294 RDSV3_DPRINTF4("rds_ib_conn_info_visitor", "conn: %p buffer: %p",
310 295 conn, buffer);
311 296 return (1);
312 297 }
313 298
314 299 static void
315 300 rds_ib_ic_info(struct rsock *sock, unsigned int len,
316 301 struct rdsv3_info_iterator *iter,
317 302 struct rdsv3_info_lengths *lens)
318 303 {
319 304 RDSV3_DPRINTF4("rds_ib_ic_info", "sk: %p iter: %p, lens: %p, len: %d",
320 305 sock, iter, lens, len);
321 306
322 307 rdsv3_for_each_conn_info(sock, len, iter, lens,
323 308 rds_ib_conn_info_visitor,
324 309 sizeof (struct rds_info_rdma_connection));
325 310 }
326 311
327 312 /*
328 313 * Early RDS/IB was built to only bind to an address if there is an IPoIB
329 314 * device with that address set.
330 315 *
331 316 * If it were me, I'd advocate for something more flexible. Sending and
332 317 * receiving should be device-agnostic. Transports would try and maintain
333 318 * connections between peers who have messages queued. Userspace would be
334 319 * allowed to influence which paths have priority. We could call userspace
335 320 * asserting this policy "routing".
336 321 */
337 322 static int
338 323 rds_ib_laddr_check(uint32_be_t addr)
339 324 {
340 325 int ret;
341 326 struct rdma_cm_id *cm_id;
342 327 struct sockaddr_in sin;
343 328
344 329 RDSV3_DPRINTF4("rds_ib_laddr_check", "addr: %x", ntohl(addr));
345 330
346 331 /*
347 332 * Create a CMA ID and try to bind it. This catches both
348 333 * IB and iWARP capable NICs.
349 334 */
350 335 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP);
351 336 if (!cm_id)
352 337 return (-EADDRNOTAVAIL);
353 338
354 339 (void) memset(&sin, 0, sizeof (sin));
355 340 sin.sin_family = AF_INET;
356 341 sin.sin_addr.s_addr = rdsv3_scaddr_to_ibaddr(addr);
357 342
358 343 /* rdma_bind_addr will only succeed for IB & iWARP devices */
359 344 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
360 345 /*
361 346 * due to this, we will claim to support iWARP devices unless we
362 347 * check node_type.
363 348 */
364 349 if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA)
365 350 ret = -EADDRNOTAVAIL;
366 351
367 352 RDSV3_DPRINTF5("rds_ib_laddr_check",
368 353 "addr %u.%u.%u.%u ret %d node type %d",
369 354 NIPQUAD(addr), ret,
370 355 cm_id->device ? cm_id->device->node_type : -1);
371 356
372 357 rdma_destroy_id(cm_id);
373 358
374 359 return (ret);
375 360 }
376 361
377 362 void
378 363 rdsv3_ib_exit(void)
379 364 {
380 365 RDSV3_DPRINTF4("rds_ib_exit", "Enter");
381 366
382 367 rdsv3_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
383 368 rdsv3_ib_destroy_nodev_conns();
384 369 ib_unregister_client(&rdsv3_ib_client);
385 370 rdsv3_ib_sysctl_exit();
386 371 rdsv3_ib_recv_exit();
↓ open down ↓ |
105 lines elided |
↑ open up ↑ |
387 372 rdsv3_trans_unregister(&rdsv3_ib_transport);
388 373 kmem_free(rdsv3_ib_stats,
389 374 nr_cpus * sizeof (struct rdsv3_ib_statistics));
390 375 mutex_destroy(&ib_nodev_conns_lock);
391 376 list_destroy(&ib_nodev_conns);
392 377 list_destroy(&rdsv3_ib_devices);
393 378
394 379 RDSV3_DPRINTF4("rds_ib_exit", "Return");
395 380 }
396 381
397 -#ifndef __lock_lint
398 382 struct rdsv3_transport rdsv3_ib_transport = {
399 383 .laddr_check = rds_ib_laddr_check,
400 384 .xmit_complete = rdsv3_ib_xmit_complete,
401 385 .xmit = rdsv3_ib_xmit,
402 386 .xmit_cong_map = NULL,
403 387 .xmit_rdma = rdsv3_ib_xmit_rdma,
404 388 .recv = rdsv3_ib_recv,
405 389 .conn_alloc = rdsv3_ib_conn_alloc,
406 390 .conn_free = rdsv3_ib_conn_free,
407 391 .conn_connect = rdsv3_ib_conn_connect,
408 392 .conn_shutdown = rdsv3_ib_conn_shutdown,
409 393 .inc_copy_to_user = rdsv3_ib_inc_copy_to_user,
410 394 .inc_free = rdsv3_ib_inc_free,
411 395 .cm_initiate_connect = rdsv3_ib_cm_initiate_connect,
412 396 .cm_handle_connect = rdsv3_ib_cm_handle_connect,
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
413 397 .cm_connect_complete = rdsv3_ib_cm_connect_complete,
414 398 .stats_info_copy = rdsv3_ib_stats_info_copy,
415 399 .exit = rdsv3_ib_exit,
416 400 .get_mr = rdsv3_ib_get_mr,
417 401 .sync_mr = rdsv3_ib_sync_mr,
418 402 .free_mr = rdsv3_ib_free_mr,
419 403 .flush_mrs = rdsv3_ib_flush_mrs,
420 404 .t_name = "infiniband",
421 405 .t_type = RDS_TRANS_IB
422 406 };
423 -#else
424 -struct rdsv3_transport rdsv3_ib_transport;
425 -#endif
426 407
427 408 int
428 409 rdsv3_ib_init(void)
429 410 {
430 411 int ret;
431 412
432 413 RDSV3_DPRINTF4("rds_ib_init", "Enter");
433 414
434 415 list_create(&rdsv3_ib_devices, sizeof (struct rdsv3_ib_device),
435 416 offsetof(struct rdsv3_ib_device, list));
436 417 list_create(&ib_nodev_conns, sizeof (struct rdsv3_ib_connection),
437 418 offsetof(struct rdsv3_ib_connection, ib_node));
438 419 mutex_init(&ib_nodev_conns_lock, NULL, MUTEX_DRIVER, NULL);
439 420
440 421 /* allocate space for ib statistics */
441 422 ASSERT(rdsv3_ib_stats == NULL);
442 423 rdsv3_ib_stats = kmem_zalloc(nr_cpus *
443 424 sizeof (struct rdsv3_ib_statistics), KM_SLEEP);
444 425
445 426 rdsv3_ib_client.dip = rdsv3_dev_info;
446 427 ret = ib_register_client(&rdsv3_ib_client);
447 428 if (ret)
448 429 goto out;
449 430
450 431 ret = rdsv3_ib_sysctl_init();
451 432 if (ret)
452 433 goto out_ibreg;
453 434
454 435 ret = rdsv3_ib_recv_init();
455 436 if (ret)
456 437 goto out_sysctl;
457 438
458 439 ret = rdsv3_trans_register(&rdsv3_ib_transport);
459 440 if (ret)
460 441 goto out_recv;
461 442
462 443 rdsv3_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
463 444
464 445 RDSV3_DPRINTF4("rds_ib_init", "Return");
465 446
466 447 return (0);
467 448
468 449 out_recv:
469 450 rdsv3_ib_recv_exit();
470 451 out_sysctl:
471 452 rdsv3_ib_sysctl_exit();
472 453 out_ibreg:
473 454 ib_unregister_client(&rdsv3_ib_client);
474 455 out:
475 456 kmem_free(rdsv3_ib_stats,
476 457 nr_cpus * sizeof (struct rdsv3_ib_statistics));
477 458 mutex_destroy(&ib_nodev_conns_lock);
478 459 list_destroy(&ib_nodev_conns);
479 460 list_destroy(&rdsv3_ib_devices);
480 461 return (ret);
481 462 }
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX