1 /*
2 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3 */
4
5 /*
6 * This file contains code imported from the OFED rds source file message.c
7 * Oracle elects to have and use the contents of message.c under and governed
8 * by the OpenIB.org BSD license (see below for full license text). However,
9 * the following notice accompanied the original version of this file:
10 */
11
12 /*
13 * Copyright (c) 2006 Oracle. All rights reserved.
14 *
15 * This software is available to you under a choice of one of two
16 * licenses. You may choose to be licensed under the terms of the GNU
17 * General Public License (GPL) Version 2, available from the file
18 * COPYING in the main directory of this source tree, or the
19 * OpenIB.org BSD license below:
20 *
21 * Redistribution and use in source and binary forms, with or
22 * without modification, are permitted provided that the following
23 * conditions are met:
24 *
25 * - Redistributions of source code must retain the above
26 * copyright notice, this list of conditions and the following
27 * disclaimer.
28 *
29 * - Redistributions in binary form must reproduce the above
30 * copyright notice, this list of conditions and the following
31 * disclaimer in the documentation and/or other materials
32 * provided with the distribution.
33 *
34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41 * SOFTWARE.
42 *
43 */
44 #include <sys/rds.h>
45
46 #include <sys/ib/clients/rdsv3/rdsv3.h>
47 #include <sys/ib/clients/rdsv3/rdma.h>
48 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
49
50 static unsigned int rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = {
51 [RDSV3_EXTHDR_NONE] = 0,
52 [RDSV3_EXTHDR_VERSION] = sizeof (struct rdsv3_ext_header_version),
53 [RDSV3_EXTHDR_RDMA] = sizeof (struct rdsv3_ext_header_rdma),
54 [RDSV3_EXTHDR_RDMA_DEST] = sizeof (struct rdsv3_ext_header_rdma_dest),
55 };
56
57 void
58 rdsv3_message_addref(struct rdsv3_message *rm)
59 {
60 RDSV3_DPRINTF5("rdsv3_message_addref", "addref rm %p ref %d",
61 rm, atomic_get(&rm->m_refcount));
62 atomic_inc_32(&rm->m_refcount);
63 }
64
65 /*
66 * This relies on dma_map_sg() not touching sg[].page during merging.
67 */
68 static void
69 rdsv3_message_purge(struct rdsv3_message *rm)
70 {
71 unsigned long i;
72
73 RDSV3_DPRINTF4("rdsv3_message_purge", "Enter(rm: %p)", rm);
74
75 if (test_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags))
76 return;
77
78 for (i = 0; i < rm->m_nents; i++) {
79 RDSV3_DPRINTF5("rdsv3_message_purge", "putting data page %p\n",
80 (void *)rdsv3_sg_page(&rm->m_sg[i]));
81 /* XXX will have to put_page for page refs */
82 kmem_free(rdsv3_sg_page(&rm->m_sg[i]),
83 rdsv3_sg_len(&rm->m_sg[i]));
84 }
85
86 if (rm->m_rdma_op)
87 rdsv3_rdma_free_op(rm->m_rdma_op);
88 if (rm->m_rdma_mr) {
89 struct rdsv3_mr *mr = rm->m_rdma_mr;
90 if (mr->r_refcount == 0) {
91 RDSV3_DPRINTF4("rdsv3_message_purge ASSERT 0",
92 "rm %p mr %p", rm, mr);
93 return;
94 }
95 if (mr->r_refcount == 0xdeadbeef) {
96 RDSV3_DPRINTF4("rdsv3_message_purge ASSERT deadbeef",
97 "rm %p mr %p", rm, mr);
98 return;
99 }
100 if (atomic_dec_and_test(&mr->r_refcount)) {
101 rm->m_rdma_mr = NULL;
102 __rdsv3_put_mr_final(mr);
103 }
104 }
105
106 RDSV3_DPRINTF4("rdsv3_message_purge", "Return(rm: %p)", rm);
107
108 }
109
110 void
111 rdsv3_message_put(struct rdsv3_message *rm)
112 {
113 RDSV3_DPRINTF5("rdsv3_message_put",
114 "put rm %p ref %d\n", rm, atomic_get(&rm->m_refcount));
115
116 if (atomic_dec_and_test(&rm->m_refcount)) {
117 ASSERT(!list_link_active(&rm->m_sock_item));
118 ASSERT(!list_link_active(&rm->m_conn_item));
119 rdsv3_message_purge(rm);
120
121 kmem_free(rm, sizeof (struct rdsv3_message) +
122 (rm->m_nents * sizeof (struct rdsv3_scatterlist)));
123 }
124 }
125
126 void
127 rdsv3_message_inc_free(struct rdsv3_incoming *inc)
128 {
129 struct rdsv3_message *rm =
130 container_of(inc, struct rdsv3_message, m_inc);
131 rdsv3_message_put(rm);
132 }
133
134 void
135 rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport,
136 uint16_be_t dport, uint64_t seq)
137 {
138 hdr->h_flags = 0;
139 hdr->h_sport = sport;
140 hdr->h_dport = dport;
141 hdr->h_sequence = htonll(seq);
142 hdr->h_exthdr[0] = RDSV3_EXTHDR_NONE;
143 }
144
145 int
146 rdsv3_message_add_extension(struct rdsv3_header *hdr,
147 unsigned int type, const void *data, unsigned int len)
148 {
149 unsigned int ext_len = sizeof (uint8_t) + len;
150 unsigned char *dst;
151
152 RDSV3_DPRINTF4("rdsv3_message_add_extension", "Enter");
153
154 /* For now, refuse to add more than one extension header */
155 if (hdr->h_exthdr[0] != RDSV3_EXTHDR_NONE)
156 return (0);
157
158 if (type >= __RDSV3_EXTHDR_MAX ||
159 len != rdsv3_exthdr_size[type])
160 return (0);
161
162 if (ext_len >= RDSV3_HEADER_EXT_SPACE)
163 return (0);
164 dst = hdr->h_exthdr;
165
166 *dst++ = type;
167 (void) memcpy(dst, data, len);
168
169 dst[len] = RDSV3_EXTHDR_NONE;
170
171 RDSV3_DPRINTF4("rdsv3_message_add_extension", "Return");
172 return (1);
173 }
174
175 /*
176 * If a message has extension headers, retrieve them here.
177 * Call like this:
178 *
179 * unsigned int pos = 0;
180 *
181 * while (1) {
182 * buflen = sizeof(buffer);
183 * type = rdsv3_message_next_extension(hdr, &pos, buffer, &buflen);
184 * if (type == RDSV3_EXTHDR_NONE)
185 * break;
186 * ...
187 * }
188 */
189 int
190 rdsv3_message_next_extension(struct rdsv3_header *hdr,
191 unsigned int *pos, void *buf, unsigned int *buflen)
192 {
193 unsigned int offset, ext_type, ext_len;
194 uint8_t *src = hdr->h_exthdr;
195
196 RDSV3_DPRINTF4("rdsv3_message_next_extension", "Enter");
197
198 offset = *pos;
199 if (offset >= RDSV3_HEADER_EXT_SPACE)
200 goto none;
201
202 /*
203 * Get the extension type and length. For now, the
204 * length is implied by the extension type.
205 */
206 ext_type = src[offset++];
207
208 if (ext_type == RDSV3_EXTHDR_NONE || ext_type >= __RDSV3_EXTHDR_MAX)
209 goto none;
210 ext_len = rdsv3_exthdr_size[ext_type];
211 if (offset + ext_len > RDSV3_HEADER_EXT_SPACE)
212 goto none;
213
214 *pos = offset + ext_len;
215 if (ext_len < *buflen)
216 *buflen = ext_len;
217 (void) memcpy(buf, src + offset, *buflen);
218 return (ext_type);
219
220 none:
221 *pos = RDSV3_HEADER_EXT_SPACE;
222 *buflen = 0;
223 return (RDSV3_EXTHDR_NONE);
224 }
225
226 int
227 rdsv3_message_add_version_extension(struct rdsv3_header *hdr,
228 unsigned int version)
229 {
230 struct rdsv3_ext_header_version ext_hdr;
231
232 ext_hdr.h_version = htonl(version);
233 return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_VERSION,
234 &ext_hdr, sizeof (ext_hdr)));
235 }
236
237 int
238 rdsv3_message_get_version_extension(struct rdsv3_header *hdr,
239 unsigned int *version)
240 {
241 struct rdsv3_ext_header_version ext_hdr;
242 unsigned int pos = 0, len = sizeof (ext_hdr);
243
244 RDSV3_DPRINTF4("rdsv3_message_get_version_extension", "Enter");
245
246 /*
247 * We assume the version extension is the only one present
248 */
249 if (rdsv3_message_next_extension(hdr, &pos, &ext_hdr, &len) !=
250 RDSV3_EXTHDR_VERSION)
251 return (0);
252 *version = ntohl(ext_hdr.h_version);
253 return (1);
254 }
255
256 int
257 rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, uint32_t r_key,
258 uint32_t offset)
259 {
260 struct rdsv3_ext_header_rdma_dest ext_hdr;
261
262 ext_hdr.h_rdma_rkey = htonl(r_key);
263 ext_hdr.h_rdma_offset = htonl(offset);
264 return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_RDMA_DEST,
265 &ext_hdr, sizeof (ext_hdr)));
266 }
267
268 struct rdsv3_message *
269 rdsv3_message_alloc(unsigned int nents, int gfp)
270 {
271 struct rdsv3_message *rm;
272
273 RDSV3_DPRINTF4("rdsv3_message_alloc", "Enter(nents: %d)", nents);
274
275 rm = kmem_zalloc(sizeof (struct rdsv3_message) +
276 (nents * sizeof (struct rdsv3_scatterlist)), gfp);
277 if (!rm)
278 goto out;
279
280 rm->m_refcount = 1;
281 list_link_init(&rm->m_sock_item);
282 list_link_init(&rm->m_conn_item);
283 mutex_init(&rm->m_rs_lock, NULL, MUTEX_DRIVER, NULL);
284 rdsv3_init_waitqueue(&rm->m_flush_wait);
285
286 RDSV3_DPRINTF4("rdsv3_message_alloc", "Return(rm: %p)", rm);
287 out:
288 return (rm);
289 }
290
291 struct rdsv3_message *
292 rdsv3_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
293 {
294 struct rdsv3_message *rm;
295 unsigned int i;
296
297 RDSV3_DPRINTF4("rdsv3_message_map_pages", "Enter(len: %d)", total_len);
298
299 rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP);
300 if (rm == NULL)
301 return (ERR_PTR(-ENOMEM));
302
303 set_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags);
304 rm->m_inc.i_hdr.h_len = htonl(total_len);
305 rm->m_nents = ceil(total_len, PAGE_SIZE);
306 for (i = 0; i < rm->m_nents; ++i) {
307 rdsv3_sg_set_page(&rm->m_sg[i],
308 page_addrs[i],
309 PAGE_SIZE, 0);
310 }
311
312 return (rm);
313 }
314
315 struct rdsv3_message *
316 rdsv3_message_copy_from_user(struct uio *uiop,
317 size_t total_len)
318 {
319 struct rdsv3_message *rm;
320 struct rdsv3_scatterlist *sg;
321 int ret;
322
323 RDSV3_DPRINTF4("rdsv3_message_copy_from_user", "Enter: %d", total_len);
324
325 rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP);
326 if (rm == NULL) {
327 ret = -ENOMEM;
328 goto out;
329 }
330
331 rm->m_inc.i_hdr.h_len = htonl(total_len);
332
333 /*
334 * now allocate and copy in the data payload.
335 */
336 sg = rm->m_sg;
337
338 while (total_len) {
339 if (rdsv3_sg_page(sg) == NULL) {
340 ret = rdsv3_page_remainder_alloc(sg, total_len, 0);
341 if (ret)
342 goto out;
343 rm->m_nents++;
344 }
345
346 ret = uiomove(rdsv3_sg_page(sg), rdsv3_sg_len(sg), UIO_WRITE,
347 uiop);
348 if (ret) {
349 RDSV3_DPRINTF2("rdsv3_message_copy_from_user",
350 "uiomove failed");
351 ret = -ret;
352 goto out;
353 }
354
355 total_len -= rdsv3_sg_len(sg);
356 sg++;
357 }
358 ret = 0;
359 out:
360 if (ret) {
361 if (rm)
362 rdsv3_message_put(rm);
363 rm = ERR_PTR(ret);
364 }
365 return (rm);
366 }
367
368 int
369 rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc,
370 uio_t *uiop, size_t size)
371 {
372 struct rdsv3_message *rm;
373 struct rdsv3_scatterlist *sg;
374 unsigned long to_copy;
375 unsigned long vec_off;
376 int copied;
377 int ret;
378 uint32_t len;
379
380 rm = container_of(inc, struct rdsv3_message, m_inc);
381 len = ntohl(rm->m_inc.i_hdr.h_len);
382
383 RDSV3_DPRINTF4("rdsv3_message_inc_copy_to_user",
384 "Enter(rm: %p, len: %d)", rm, len);
385
386 sg = rm->m_sg;
387 vec_off = 0;
388 copied = 0;
389
390 while (copied < size && copied < len) {
391
392 to_copy = min(len - copied, sg->length - vec_off);
393 to_copy = min(size - copied, to_copy);
394
395 RDSV3_DPRINTF5("rdsv3_message_inc_copy_to_user",
396 "copying %lu bytes to user iov %p from sg [%p, %u] + %lu\n",
397 to_copy, uiop,
398 rdsv3_sg_page(sg), sg->length, vec_off);
399
400 ret = uiomove(rdsv3_sg_page(sg), to_copy, UIO_READ, uiop);
401 if (ret)
402 break;
403
404 vec_off += to_copy;
405 copied += to_copy;
406
407 if (vec_off == sg->length) {
408 vec_off = 0;
409 sg++;
410 }
411 }
412
413 return (copied);
414 }
415
416 /*
417 * If the message is still on the send queue, wait until the transport
418 * is done with it. This is particularly important for RDMA operations.
419 */
420 /* ARGSUSED */
421 void
422 rdsv3_message_wait(struct rdsv3_message *rm)
423 {
424 rdsv3_wait_event(&rm->m_flush_wait,
425 !test_bit(RDSV3_MSG_MAPPED, &rm->m_flags));
426 }
427
428 void
429 rdsv3_message_unmapped(struct rdsv3_message *rm)
430 {
431 clear_bit(RDSV3_MSG_MAPPED, &rm->m_flags);
432 rdsv3_wake_up_all(&rm->m_flush_wait);
433 }