Print this page
3866 panic in idm module
3867 stmfCreateLu failed: GUID_IN_USE
3868 iscsi target not accepting any new connections
Reviewed by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed by: Jeremy Jones <jeremy@delphix.com>
Reviewed by: Eric Diven <eric.diven@delphix.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/idm/idm_so.c
+++ new/usr/src/uts/common/io/idm/idm_so.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /*
26 26 * Copyright (c) 2013 by Delphix. All rights reserved.
27 27 */
28 28
29 29 #include <sys/conf.h>
30 30 #include <sys/stat.h>
31 31 #include <sys/file.h>
32 32 #include <sys/ddi.h>
33 33 #include <sys/sunddi.h>
34 34 #include <sys/modctl.h>
35 35 #include <sys/priv.h>
36 36 #include <sys/cpuvar.h>
37 37 #include <sys/socket.h>
38 38 #include <sys/strsubr.h>
39 39 #include <sys/sysmacros.h>
40 40 #include <sys/sdt.h>
41 41 #include <netinet/tcp.h>
42 42 #include <inet/tcp.h>
43 43 #include <sys/socketvar.h>
44 44 #include <sys/pathname.h>
45 45 #include <sys/fs/snode.h>
46 46 #include <sys/fs/dv_node.h>
47 47 #include <sys/vnode.h>
48 48 #include <netinet/in.h>
49 49 #include <net/if.h>
50 50 #include <sys/sockio.h>
51 51 #include <sys/ksocket.h>
52 52 #include <sys/filio.h> /* FIONBIO */
53 53 #include <sys/iscsi_protocol.h>
54 54 #include <sys/idm/idm.h>
55 55 #include <sys/idm/idm_so.h>
56 56 #include <sys/idm/idm_text.h>
57 57
58 58 #define IN_PROGRESS_DELAY 1
59 59
60 60 /*
61 61 * in6addr_any is currently all zeroes, but use the macro in case this
62 62 * ever changes.
63 63 */
64 64 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
65 65
66 66 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
67 67 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
68 68 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
69 69
70 70 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
71 71 static void idm_so_conn_destroy_common(idm_conn_t *ic);
72 72 static void idm_so_conn_connect_common(idm_conn_t *ic);
73 73
74 74 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
75 75 boolean_t boot_conn);
76 76 static void idm_set_postconnect_options(ksocket_t so);
77 77 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
78 78
79 79 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
80 80 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
81 81 idm_buf_t *idb, uint32_t offset, uint32_t length);
82 82 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
83 83 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
84 84 idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
85 85
86 86 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
87 87 uint32_t ro, uint32_t dlength);
88 88
89 89 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
90 90 nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
91 91
92 92 static void idm_so_socket_set_nonblock(struct sonode *node);
93 93 static void idm_so_socket_set_block(struct sonode *node);
94 94
95 95 /*
96 96 * Transport ops prototypes
97 97 */
98 98 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
99 99 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
100 100 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
101 101 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
102 102 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
103 103 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
104 104 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
105 105 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
106 106 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
107 107 static void idm_so_notice_key_values(idm_conn_t *it,
108 108 nvlist_t *negotiated_nvl);
109 109 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
110 110 nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
111 111 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
112 112 idm_transport_caps_t *caps);
113 113 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
114 114 static void idm_so_buf_free(idm_buf_t *idb);
115 115 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
116 116 static void idm_so_buf_teardown(idm_buf_t *idb);
117 117 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
118 118 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
119 119 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
120 120 static void idm_so_tgt_svc_offline(idm_svc_t *is);
121 121 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
122 122 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
123 123 static void idm_so_conn_disconnect(idm_conn_t *ic);
124 124 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
125 125 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
126 126 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
127 127
128 128 /*
129 129 * IDM Native Sockets transport operations
130 130 */
131 131 static
132 132 idm_transport_ops_t idm_so_transport_ops = {
133 133 idm_so_tx, /* it_tx_pdu */
134 134 idm_so_buf_tx_to_ini, /* it_buf_tx_to_ini */
135 135 idm_so_buf_rx_from_ini, /* it_buf_rx_from_ini */
136 136 idm_so_rx_datain, /* it_rx_datain */
137 137 idm_so_rx_rtt, /* it_rx_rtt */
138 138 idm_so_rx_dataout, /* it_rx_dataout */
139 139 NULL, /* it_alloc_conn_rsrc */
140 140 NULL, /* it_free_conn_rsrc */
141 141 NULL, /* it_tgt_enable_datamover */
142 142 NULL, /* it_ini_enable_datamover */
143 143 NULL, /* it_conn_terminate */
144 144 idm_so_free_task_rsrc, /* it_free_task_rsrc */
145 145 idm_so_negotiate_key_values, /* it_negotiate_key_values */
146 146 idm_so_notice_key_values, /* it_notice_key_values */
147 147 idm_so_conn_is_capable, /* it_conn_is_capable */
148 148 idm_so_buf_alloc, /* it_buf_alloc */
149 149 idm_so_buf_free, /* it_buf_free */
150 150 idm_so_buf_setup, /* it_buf_setup */
151 151 idm_so_buf_teardown, /* it_buf_teardown */
152 152 idm_so_tgt_svc_create, /* it_tgt_svc_create */
153 153 idm_so_tgt_svc_destroy, /* it_tgt_svc_destroy */
154 154 idm_so_tgt_svc_online, /* it_tgt_svc_online */
155 155 idm_so_tgt_svc_offline, /* it_tgt_svc_offline */
156 156 idm_so_tgt_conn_destroy, /* it_tgt_conn_destroy */
157 157 idm_so_tgt_conn_connect, /* it_tgt_conn_connect */
158 158 idm_so_conn_disconnect, /* it_tgt_conn_disconnect */
159 159 idm_so_ini_conn_create, /* it_ini_conn_create */
160 160 idm_so_ini_conn_destroy, /* it_ini_conn_destroy */
161 161 idm_so_ini_conn_connect, /* it_ini_conn_connect */
162 162 idm_so_conn_disconnect, /* it_ini_conn_disconnect */
163 163 idm_so_declare_key_values /* it_declare_key_values */
164 164 };
165 165
166 166 kmutex_t idm_so_timed_socket_mutex;
167 167
168 168 int32_t idm_so_sndbuf = IDM_SNDBUF_SIZE;
169 169 int32_t idm_so_rcvbuf = IDM_RCVBUF_SIZE;
170 170
171 171 /*
172 172 * idm_so_init()
173 173 * Sockets transport initialization
174 174 */
175 175 void
176 176 idm_so_init(idm_transport_t *it)
177 177 {
178 178 /* Cache for IDM Data and R2T Transmit PDU's */
179 179 idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
180 180 sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
181 181 &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
182 182
183 183 /* Cache for IDM Receive PDU's */
184 184 idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
185 185 sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
186 186 &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
187 187
188 188 /* 128k buffer cache */
189 189 idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
190 190 IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
191 191
192 192 /* Set the sockets transport ops */
193 193 it->it_ops = &idm_so_transport_ops;
194 194
195 195 mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
196 196
197 197 }
198 198
199 199 /*
200 200 * idm_so_fini()
201 201 * Sockets transport teardown
202 202 */
203 203 void
204 204 idm_so_fini(void)
205 205 {
206 206 kmem_cache_destroy(idm.idm_so_128k_buf_cache);
207 207 kmem_cache_destroy(idm.idm_sotx_pdu_cache);
208 208 kmem_cache_destroy(idm.idm_sorx_pdu_cache);
209 209 mutex_destroy(&idm_so_timed_socket_mutex);
210 210 }
211 211
212 212 ksocket_t
213 213 idm_socreate(int domain, int type, int protocol)
214 214 {
215 215 ksocket_t ks;
216 216
217 217 if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
218 218 CRED())) {
219 219 return (ks);
220 220 } else {
221 221 return (NULL);
222 222 }
223 223 }
224 224
225 225 /*
226 226 * idm_soshutdown will disconnect the socket and prevent subsequent PDU
227 227 * reception and transmission. The sonode still exists but its state
228 228 * gets modified to indicate it is no longer connected. Calls to
229 229 * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
230 230 * regain control of a thread stuck in idm_sorecv.
231 231 */
232 232 void
233 233 idm_soshutdown(ksocket_t so)
234 234 {
235 235 (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
236 236 }
237 237
238 238 /*
239 239 * idm_sodestroy releases all resources associated with a socket previously
240 240 * created with idm_socreate. The socket must be shutdown using
241 241 * idm_soshutdown before the socket is destroyed with idm_sodestroy,
242 242 * otherwise undefined behavior will result.
243 243 */
244 244 void
245 245 idm_sodestroy(ksocket_t ks)
246 246 {
247 247 (void) ksocket_close(ks, CRED());
248 248 }
249 249
250 250 /*
251 251 * Function to compare two addresses in sockaddr_storage format
252 252 */
253 253
254 254 int
255 255 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
256 256 const struct sockaddr_storage *cmp_ss2,
257 257 boolean_t v4_mapped_as_v4,
258 258 boolean_t compare_ports)
259 259 {
260 260 struct sockaddr_storage mapped_v4_ss1, mapped_v4_ss2;
261 261 const struct sockaddr_storage *ss1, *ss2;
262 262 struct in_addr *in1, *in2;
263 263 struct in6_addr *in61, *in62;
264 264 int i;
265 265
266 266 /*
267 267 * Normalize V4-mapped IPv6 addresses into V4 format if
268 268 * v4_mapped_as_v4 is B_TRUE.
269 269 */
270 270 ss1 = cmp_ss1;
271 271 ss2 = cmp_ss2;
272 272 if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
273 273 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
274 274 if (IN6_IS_ADDR_V4MAPPED(in61)) {
275 275 bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
276 276 mapped_v4_ss1.ss_family = AF_INET;
277 277 ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
278 278 ((struct sockaddr_in *)ss1)->sin_port;
279 279 IN6_V4MAPPED_TO_INADDR(in61,
280 280 &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
281 281 ss1 = &mapped_v4_ss1;
282 282 }
283 283 }
284 284 ss2 = cmp_ss2;
285 285 if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
286 286 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
287 287 if (IN6_IS_ADDR_V4MAPPED(in62)) {
288 288 bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
289 289 mapped_v4_ss2.ss_family = AF_INET;
290 290 ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
291 291 ((struct sockaddr_in *)ss2)->sin_port;
292 292 IN6_V4MAPPED_TO_INADDR(in62,
293 293 &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
294 294 ss2 = &mapped_v4_ss2;
295 295 }
296 296 }
297 297
298 298 /*
299 299 * Compare ports, then address family, then ip address
300 300 */
301 301 if (compare_ports &&
302 302 (((struct sockaddr_in *)ss1)->sin_port !=
303 303 ((struct sockaddr_in *)ss2)->sin_port)) {
304 304 if (((struct sockaddr_in *)ss1)->sin_port >
305 305 ((struct sockaddr_in *)ss2)->sin_port)
306 306 return (1);
307 307 else
308 308 return (-1);
309 309 }
310 310
311 311 /*
312 312 * ports are the same
313 313 */
314 314 if (ss1->ss_family != ss2->ss_family) {
315 315 if (ss1->ss_family == AF_INET)
316 316 return (1);
317 317 else
318 318 return (-1);
319 319 }
320 320
321 321 /*
322 322 * address families are the same
323 323 */
324 324 if (ss1->ss_family == AF_INET) {
325 325 in1 = &((struct sockaddr_in *)ss1)->sin_addr;
326 326 in2 = &((struct sockaddr_in *)ss2)->sin_addr;
327 327
328 328 if (in1->s_addr > in2->s_addr)
329 329 return (1);
330 330 else if (in1->s_addr < in2->s_addr)
331 331 return (-1);
332 332 else
333 333 return (0);
334 334 } else if (ss1->ss_family == AF_INET6) {
335 335 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
336 336 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
337 337
338 338 for (i = 0; i < 4; i++) {
339 339 if (in61->s6_addr32[i] > in62->s6_addr32[i])
340 340 return (1);
341 341 else if (in61->s6_addr32[i] < in62->s6_addr32[i])
342 342 return (-1);
343 343 }
344 344 return (0);
345 345 }
346 346
347 347 return (1);
348 348 }
349 349
350 350 /*
351 351 * IP address filter functions to flag addresses that should not
352 352 * go out to initiators through discovery.
353 353 */
354 354 static boolean_t
355 355 idm_v4_addr_okay(struct in_addr *in_addr)
356 356 {
357 357 in_addr_t addr = ntohl(in_addr->s_addr);
358 358
359 359 if ((INADDR_NONE == addr) ||
360 360 (IN_MULTICAST(addr)) ||
361 361 ((addr >> IN_CLASSA_NSHIFT) == 0) ||
362 362 ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
363 363 return (B_FALSE);
364 364 }
365 365 return (B_TRUE);
366 366 }
367 367
368 368 static boolean_t
369 369 idm_v6_addr_okay(struct in6_addr *addr6)
370 370 {
371 371
372 372 if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
373 373 (IN6_IS_ADDR_LOOPBACK(addr6)) ||
374 374 (IN6_IS_ADDR_MULTICAST(addr6)) ||
375 375 (IN6_IS_ADDR_V4MAPPED(addr6)) ||
376 376 (IN6_IS_ADDR_V4COMPAT(addr6)) ||
377 377 (IN6_IS_ADDR_LINKLOCAL(addr6))) {
378 378 return (B_FALSE);
379 379 }
380 380 return (B_TRUE);
381 381 }
382 382
383 383 /*
384 384 * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
385 385 * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
386 386 */
387 387 int
388 388 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
389 389 {
390 390 ksocket_t so4, so6;
391 391 struct lifnum lifn;
392 392 struct lifconf lifc;
393 393 struct lifreq *lp;
394 394 int rval;
395 395 int numifs;
396 396 int bufsize;
397 397 void *buf;
398 398 int i, j, n, rc;
399 399 struct sockaddr_storage ss;
400 400 struct sockaddr_in *sin;
401 401 struct sockaddr_in6 *sin6;
402 402 idm_addr_t *ip;
403 403 idm_addr_list_t *ipaddr = NULL;
404 404 int size_ipaddr;
405 405
406 406 *ipaddr_p = NULL;
407 407 size_ipaddr = 0;
408 408 buf = NULL;
409 409
410 410 /* create an ipv4 and ipv6 UDP socket */
411 411 if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
412 412 return (0);
413 413 if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
414 414 idm_sodestroy(so6);
415 415 return (0);
416 416 }
417 417
418 418
419 419 retry_count:
420 420 /* snapshot the current number of interfaces */
421 421 lifn.lifn_family = PF_UNSPEC;
422 422 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
423 423 lifn.lifn_count = 0;
424 424 /* use vp6 for ioctls with unspecified families by default */
425 425 if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
426 426 != 0) {
427 427 goto cleanup;
428 428 }
429 429
430 430 numifs = lifn.lifn_count;
431 431 if (numifs <= 0) {
432 432 goto cleanup;
433 433 }
434 434
435 435 /* allocate extra room in case more interfaces appear */
436 436 numifs += 10;
437 437
438 438 /* get the interface names and ip addresses */
439 439 bufsize = numifs * sizeof (struct lifreq);
440 440 buf = kmem_alloc(bufsize, KM_SLEEP);
441 441
442 442 lifc.lifc_family = AF_UNSPEC;
443 443 lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
444 444 lifc.lifc_len = bufsize;
445 445 lifc.lifc_buf = buf;
446 446 rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
447 447 if (rc != 0) {
448 448 goto cleanup;
449 449 }
450 450 /* if our extra room is used up, try again */
451 451 if (bufsize <= lifc.lifc_len) {
452 452 kmem_free(buf, bufsize);
453 453 buf = NULL;
454 454 goto retry_count;
455 455 }
456 456 /* calc actual number of ifconfs */
457 457 n = lifc.lifc_len / sizeof (struct lifreq);
458 458
459 459 /* get ip address */
460 460 if (n > 0) {
461 461 size_ipaddr = sizeof (idm_addr_list_t) +
462 462 (n - 1) * sizeof (idm_addr_t);
463 463 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
464 464 } else {
465 465 goto cleanup;
466 466 }
467 467
468 468 /*
469 469 * Examine the array of interfaces and filter uninteresting ones
470 470 */
471 471 for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
472 472
473 473 /*
474 474 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
475 475 */
476 476 ss = lp->lifr_addr;
477 477 /*
478 478 * fetch the flags using the socket of the correct family
479 479 */
480 480 switch (ss.ss_family) {
481 481 case AF_INET:
482 482 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
483 483 &rval, CRED());
484 484 break;
485 485 case AF_INET6:
486 486 rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
487 487 &rval, CRED());
488 488 break;
489 489 default:
490 490 continue;
491 491 }
492 492 if (rc == 0) {
493 493 /*
494 494 * If we got the flags, skip uninteresting
495 495 * interfaces based on flags
496 496 */
497 497 if ((lp->lifr_flags & IFF_UP) != IFF_UP)
498 498 continue;
499 499 if (lp->lifr_flags &
500 500 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
501 501 continue;
502 502 }
503 503
504 504 /* save ip address */
505 505 ip = &ipaddr->al_addrs[j];
506 506 switch (ss.ss_family) {
507 507 case AF_INET:
508 508 sin = (struct sockaddr_in *)&ss;
509 509 if (!idm_v4_addr_okay(&sin->sin_addr))
510 510 continue;
511 511 ip->a_addr.i_addr.in4 = sin->sin_addr;
512 512 ip->a_addr.i_insize = sizeof (struct in_addr);
513 513 break;
514 514 case AF_INET6:
515 515 sin6 = (struct sockaddr_in6 *)&ss;
516 516 if (!idm_v6_addr_okay(&sin6->sin6_addr))
517 517 continue;
518 518 ip->a_addr.i_addr.in6 = sin6->sin6_addr;
519 519 ip->a_addr.i_insize = sizeof (struct in6_addr);
520 520 break;
521 521 default:
522 522 continue;
523 523 }
524 524 j++;
525 525 }
526 526
527 527 if (j == 0) {
528 528 /* no valid ifaddr */
529 529 kmem_free(ipaddr, size_ipaddr);
530 530 size_ipaddr = 0;
531 531 ipaddr = NULL;
532 532 } else {
533 533 ipaddr->al_out_cnt = j;
534 534 }
535 535
536 536
537 537 cleanup:
538 538 idm_sodestroy(so6);
539 539 idm_sodestroy(so4);
540 540
541 541 if (buf != NULL)
542 542 kmem_free(buf, bufsize);
543 543
544 544 *ipaddr_p = ipaddr;
545 545 return (size_ipaddr);
546 546 }
547 547
548 548 int
549 549 idm_sorecv(ksocket_t so, void *msg, size_t len)
550 550 {
551 551 iovec_t iov;
552 552
553 553 ASSERT(so != NULL);
554 554 ASSERT(len != 0);
555 555
556 556 /*
557 557 * Fill in iovec and receive data
558 558 */
559 559 iov.iov_base = msg;
560 560 iov.iov_len = len;
561 561
562 562 return (idm_iov_sorecv(so, &iov, 1, len));
563 563 }
564 564
565 565 /*
566 566 * idm_sosendto - Sends a buffered data on a non-connected socket.
567 567 *
568 568 * This function puts the data provided on the wire by calling sosendmsg.
569 569 * It will return only when all the data has been sent or if an error
570 570 * occurs.
571 571 *
572 572 * Returns 0 for success, the socket errno value if sosendmsg fails, and
573 573 * -1 if sosendmsg returns success but uio_resid != 0
574 574 */
575 575 int
576 576 idm_sosendto(ksocket_t so, void *buff, size_t len,
577 577 struct sockaddr *name, socklen_t namelen)
578 578 {
579 579 struct msghdr msg;
580 580 struct iovec iov[1];
581 581 int error;
582 582 size_t sent = 0;
583 583
584 584 iov[0].iov_base = buff;
585 585 iov[0].iov_len = len;
586 586
587 587 /* Initialization of the message header. */
588 588 bzero(&msg, sizeof (msg));
589 589 msg.msg_iov = iov;
590 590 msg.msg_iovlen = 1;
591 591 msg.msg_name = name;
592 592 msg.msg_namelen = namelen;
593 593
594 594 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
595 595 /* Data sent */
596 596 if (sent == len) {
597 597 /* All data sent. Success. */
598 598 return (0);
599 599 } else {
600 600 /* Not all data was sent. Failure */
601 601 return (-1);
602 602 }
603 603 }
604 604
605 605 /* Send failed */
606 606 return (error);
607 607 }
608 608
609 609 /*
610 610 * idm_iov_sosend - Sends an iovec on a connection.
611 611 *
612 612 * This function puts the data provided on the wire by calling sosendmsg.
613 613 * It will return only when all the data has been sent or if an error
614 614 * occurs.
615 615 *
616 616 * Returns 0 for success, the socket errno value if sosendmsg fails, and
617 617 * -1 if sosendmsg returns success but uio_resid != 0
618 618 */
619 619 int
620 620 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
621 621 {
622 622 struct msghdr msg;
623 623 int error;
624 624 size_t sent = 0;
625 625
626 626 ASSERT(iop != NULL);
627 627
628 628 /* Initialization of the message header. */
629 629 bzero(&msg, sizeof (msg));
630 630 msg.msg_iov = iop;
631 631 msg.msg_iovlen = iovlen;
632 632
633 633 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
634 634 == 0) {
635 635 /* Data sent */
636 636 if (sent == total_len) {
637 637 /* All data sent. Success. */
638 638 return (0);
639 639 } else {
640 640 /* Not all data was sent. Failure */
641 641 return (-1);
642 642 }
643 643 }
644 644
645 645 /* Send failed */
646 646 return (error);
647 647 }
648 648
649 649 /*
650 650 * idm_iov_sorecv - Receives an iovec from a connection
651 651 *
652 652 * This function gets the data asked for from the socket. It will return
653 653 * only when all the requested data has been retrieved or if an error
654 654 * occurs.
655 655 *
656 656 * Returns 0 for success, the socket errno value if sorecvmsg fails, and
657 657 * -1 if sorecvmsg returns success but uio_resid != 0
658 658 */
659 659 int
660 660 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
661 661 {
662 662 struct msghdr msg;
663 663 int error;
664 664 size_t recv;
665 665 int flags;
666 666
667 667 ASSERT(iop != NULL);
668 668
669 669 /* Initialization of the message header. */
670 670 bzero(&msg, sizeof (msg));
671 671 msg.msg_iov = iop;
672 672 msg.msg_iovlen = iovlen;
673 673 flags = MSG_WAITALL;
674 674
675 675 if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
676 676 == 0) {
677 677 /* Received data */
678 678 if (recv == total_len) {
679 679 /* All requested data received. Success */
680 680 return (0);
681 681 } else {
682 682 /*
683 683 * Not all data was received. The connection has
684 684 * probably failed.
685 685 */
686 686 return (-1);
687 687 }
688 688 }
689 689
690 690 /* Receive failed */
691 691 return (error);
692 692 }
693 693
694 694 static void
695 695 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
696 696 {
697 697 int conn_abort = 10000;
698 698 int conn_notify = 2000;
699 699 int abort = 30000;
700 700
701 701 /* Pre-connect socket options */
702 702 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
703 703 TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
704 704 CRED());
705 705 if (boot_conn == B_FALSE) {
706 706 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
707 707 TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
708 708 CRED());
709 709 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
710 710 TCP_ABORT_THRESHOLD,
711 711 (char *)&abort, sizeof (int), CRED());
712 712 }
713 713 }
714 714
715 715 static void
716 716 idm_set_postconnect_options(ksocket_t ks)
717 717 {
718 718 const int on = 1;
719 719
720 720 /* Set connect options */
721 721 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
722 722 (char *)&idm_so_rcvbuf, sizeof (int), CRED());
723 723 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
724 724 (char *)&idm_so_sndbuf, sizeof (int), CRED());
725 725 (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
726 726 (char *)&on, sizeof (on), CRED());
727 727 }
728 728
729 729 static uint32_t
730 730 n2h24(const uchar_t *ptr)
731 731 {
732 732 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
733 733 }
734 734
735 735
736 736 static idm_status_t
737 737 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
738 738 {
739 739 iscsi_hdr_t *bhs;
740 740 uint32_t hdr_digest_crc;
741 741 uint32_t crc_calculated;
742 742 void *new_hdr;
743 743 int ahslen = 0;
744 744 int total_len = 0;
745 745 int iovlen = 0;
746 746 struct iovec iov[2];
747 747 idm_so_conn_t *so_conn;
748 748 int rc;
749 749
750 750 so_conn = ic->ic_transport_private;
751 751
752 752 /*
753 753 * Read BHS
754 754 */
755 755 bhs = pdu->isp_hdr;
756 756 rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
757 757 if (rc != IDM_STATUS_SUCCESS) {
758 758 return (IDM_STATUS_FAIL);
759 759 }
760 760
761 761 /*
762 762 * Check actual AHS length against the amount available in the buffer
763 763 */
764 764 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
765 765 (bhs->hlength * sizeof (uint32_t));
766 766 pdu->isp_datalen = n2h24(bhs->dlength);
767 767 if (ic->ic_conn_type == CONN_TYPE_TGT &&
768 768 pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
769 769 IDM_CONN_LOG(CE_WARN,
770 770 "idm_sorecvhdr: exceeded the max data segment length");
771 771 return (IDM_STATUS_FAIL);
772 772 }
773 773 if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
774 774 /* Allocate a new header segment and change the callback */
775 775 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
776 776 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
777 777 pdu->isp_hdr = new_hdr;
778 778 pdu->isp_flags |= IDM_PDU_ADDL_HDR;
779 779
780 780 /*
781 781 * This callback will restore the expected values after
782 782 * the RX PDU has been processed.
783 783 */
784 784 pdu->isp_callback = idm_sorx_addl_pdu_cb;
785 785 }
786 786
787 787 /*
788 788 * Setup receipt of additional header and header digest (if enabled).
789 789 */
790 790 if (bhs->hlength > 0) {
791 791 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
792 792 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
793 793 iov[iovlen].iov_len = ahslen;
794 794 total_len += iov[iovlen].iov_len;
795 795 iovlen++;
796 796 }
797 797
798 798 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
799 799 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
800 800 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
801 801 total_len += iov[iovlen].iov_len;
802 802 iovlen++;
803 803 }
804 804
805 805 if ((iovlen != 0) &&
806 806 (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
807 807 total_len) != 0)) {
808 808 return (IDM_STATUS_FAIL);
809 809 }
810 810
811 811 /*
812 812 * Validate header digest if enabled
813 813 */
814 814 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
815 815 crc_calculated = idm_crc32c(pdu->isp_hdr,
816 816 sizeof (iscsi_hdr_t) + ahslen);
817 817 if (crc_calculated != hdr_digest_crc) {
818 818 /* Invalid Header Digest */
819 819 return (IDM_STATUS_HEADER_DIGEST);
820 820 }
821 821 }
822 822
823 823 return (0);
824 824 }
825 825
826 826 /*
827 827 * idm_so_ini_conn_create()
828 828 * Allocate the sockets transport connection resources.
829 829 */
830 830 static idm_status_t
831 831 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
832 832 {
833 833 ksocket_t so;
834 834 idm_so_conn_t *so_conn;
835 835 idm_status_t idmrc;
836 836
837 837 so = idm_socreate(cr->cr_domain, cr->cr_type,
838 838 cr->cr_protocol);
839 839 if (so == NULL) {
840 840 return (IDM_STATUS_FAIL);
841 841 }
842 842
843 843 /* Bind the socket if configured to do so */
844 844 if (cr->cr_bound) {
845 845 if (ksocket_bind(so, &cr->cr_bound_addr.sin,
846 846 SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
847 847 idm_sodestroy(so);
848 848 return (IDM_STATUS_FAIL);
849 849 }
850 850 }
851 851
852 852 idmrc = idm_so_conn_create_common(ic, so);
853 853 if (idmrc != IDM_STATUS_SUCCESS) {
854 854 idm_soshutdown(so);
855 855 idm_sodestroy(so);
856 856 return (IDM_STATUS_FAIL);
857 857 }
858 858
859 859 so_conn = ic->ic_transport_private;
860 860 /* Set up socket options */
861 861 idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
862 862
863 863 return (IDM_STATUS_SUCCESS);
864 864 }
865 865
866 866 /*
867 867 * idm_so_ini_conn_destroy()
868 868 * Tear down the sockets transport connection resources.
869 869 */
870 870 static void
871 871 idm_so_ini_conn_destroy(idm_conn_t *ic)
872 872 {
873 873 idm_so_conn_destroy_common(ic);
874 874 }
875 875
876 876 /*
877 877 * idm_so_ini_conn_connect()
878 878 * Establish the connection referred to by the handle previously allocated via
879 879 * idm_so_ini_conn_create().
880 880 */
881 881 static idm_status_t
882 882 idm_so_ini_conn_connect(idm_conn_t *ic)
883 883 {
884 884 idm_so_conn_t *so_conn;
885 885 struct sonode *node = NULL;
886 886 int rc;
887 887 clock_t lbolt, conn_login_max, conn_login_interval;
888 888 boolean_t nonblock;
889 889
890 890 so_conn = ic->ic_transport_private;
891 891 nonblock = ic->ic_conn_params.nonblock_socket;
892 892 conn_login_max = ic->ic_conn_params.conn_login_max;
893 893 conn_login_interval = ddi_get_lbolt() +
894 894 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
895 895
896 896 if (nonblock == B_TRUE) {
897 897 node = ((struct sonode *)(so_conn->ic_so));
898 898 /* Set to none block socket mode */
899 899 idm_so_socket_set_nonblock(node);
900 900 do {
901 901 rc = ksocket_connect(so_conn->ic_so,
902 902 &ic->ic_ini_dst_addr.sin,
903 903 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
904 904 CRED());
905 905 if (rc == 0 || rc == EISCONN) {
906 906 /* socket success or already success */
907 907 rc = IDM_STATUS_SUCCESS;
908 908 break;
909 909 }
910 910 if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
911 911 (rc == ECONNRESET)) {
912 912 /* socket connection timeout or refuse */
913 913 break;
914 914 }
915 915 lbolt = ddi_get_lbolt();
916 916 if (lbolt > conn_login_max) {
917 917 /*
918 918 * Connection retry timeout,
919 919 * failed connect to target.
920 920 */
921 921 break;
922 922 }
923 923 if (lbolt < conn_login_interval) {
924 924 if ((rc == EINPROGRESS) || (rc == EALREADY)) {
925 925 /* TCP connect still in progress */
926 926 delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
927 927 continue;
928 928 } else {
929 929 delay(conn_login_interval - lbolt);
930 930 }
931 931 }
932 932 conn_login_interval = ddi_get_lbolt() +
933 933 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
934 934 } while (rc != 0);
935 935 /* resume to nonblock mode */
936 936 if (rc == IDM_STATUS_SUCCESS) {
937 937 idm_so_socket_set_block(node);
938 938 }
939 939 } else {
940 940 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
941 941 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
942 942 }
943 943
944 944 if (rc != 0) {
945 945 idm_soshutdown(so_conn->ic_so);
946 946 return (IDM_STATUS_FAIL);
947 947 }
948 948
949 949 idm_so_conn_connect_common(ic);
950 950
951 951 idm_set_postconnect_options(so_conn->ic_so);
952 952
953 953 return (IDM_STATUS_SUCCESS);
954 954 }
955 955
956 956 idm_status_t
957 957 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
958 958 {
959 959 idm_status_t idmrc;
960 960
961 961 idm_set_postconnect_options(new_so);
962 962 idmrc = idm_so_conn_create_common(ic, new_so);
963 963
964 964 return (idmrc);
965 965 }
966 966
967 967 static void
968 968 idm_so_tgt_conn_destroy(idm_conn_t *ic)
969 969 {
970 970 idm_so_conn_destroy_common(ic);
971 971 }
972 972
973 973 /*
974 974 * idm_so_tgt_conn_connect()
975 975 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
976 976 * is invoked from the SM as a result of an inbound connection request.
977 977 */
978 978 static idm_status_t
979 979 idm_so_tgt_conn_connect(idm_conn_t *ic)
980 980 {
981 981 idm_so_conn_connect_common(ic);
982 982
983 983 return (IDM_STATUS_SUCCESS);
984 984 }
985 985
986 986 static idm_status_t
987 987 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
988 988 {
989 989 idm_so_conn_t *so_conn;
990 990
991 991 so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
992 992 so_conn->ic_so = new_so;
993 993
994 994 ic->ic_transport_private = so_conn;
995 995 ic->ic_transport_hdrlen = 0;
996 996
997 997 /* Set the scoreboarding flag on this connection */
998 998 ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
999 999 ic->ic_conn_params.max_recv_dataseglen =
1000 1000 ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1001 1001 ic->ic_conn_params.max_xmit_dataseglen =
1002 1002 ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1003 1003
1004 1004 /*
1005 1005 * Initialize tx thread mutex and list
1006 1006 */
1007 1007 mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1008 1008 cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1009 1009 list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1010 1010 offsetof(idm_pdu_t, idm_tx_link));
1011 1011
1012 1012 return (IDM_STATUS_SUCCESS);
1013 1013 }
1014 1014
1015 1015 static void
1016 1016 idm_so_conn_destroy_common(idm_conn_t *ic)
1017 1017 {
1018 1018 idm_so_conn_t *so_conn = ic->ic_transport_private;
1019 1019
1020 1020 ic->ic_transport_private = NULL;
1021 1021 idm_sodestroy(so_conn->ic_so);
1022 1022 list_destroy(&so_conn->ic_tx_list);
1023 1023 mutex_destroy(&so_conn->ic_tx_mutex);
1024 1024 cv_destroy(&so_conn->ic_tx_cv);
1025 1025
1026 1026 kmem_free(so_conn, sizeof (idm_so_conn_t));
1027 1027 }
1028 1028
1029 1029 static void
1030 1030 idm_so_conn_connect_common(idm_conn_t *ic)
1031 1031 {
1032 1032 idm_so_conn_t *so_conn;
1033 1033 struct sockaddr_in6 t_addr;
1034 1034 socklen_t t_addrlen = 0;
1035 1035
1036 1036 so_conn = ic->ic_transport_private;
1037 1037 bzero(&t_addr, sizeof (struct sockaddr_in6));
1038 1038 t_addrlen = sizeof (struct sockaddr_in6);
1039 1039
1040 1040 /* Set the local and remote addresses in the idm conn handle */
1041 1041 (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1042 1042 &t_addrlen, CRED());
1043 1043 bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1044 1044 (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1045 1045 &t_addrlen, CRED());
1046 1046 bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1047 1047
1048 1048 mutex_enter(&ic->ic_mutex);
1049 1049 so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1050 1050 &p0, TS_RUN, minclsyspri);
1051 1051 so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1052 1052 &p0, TS_RUN, minclsyspri);
1053 1053
1054 1054 while (so_conn->ic_rx_thread_did == 0 ||
1055 1055 so_conn->ic_tx_thread_did == 0)
1056 1056 cv_wait(&ic->ic_cv, &ic->ic_mutex);
1057 1057 mutex_exit(&ic->ic_mutex);
1058 1058 }
1059 1059
1060 1060 /*
1061 1061 * idm_so_conn_disconnect()
1062 1062 * Shutdown the socket connection and stop the thread
1063 1063 */
1064 1064 static void
1065 1065 idm_so_conn_disconnect(idm_conn_t *ic)
1066 1066 {
1067 1067 idm_so_conn_t *so_conn;
1068 1068
1069 1069 so_conn = ic->ic_transport_private;
1070 1070
1071 1071 mutex_enter(&ic->ic_mutex);
1072 1072 so_conn->ic_rx_thread_running = B_FALSE;
1073 1073 so_conn->ic_tx_thread_running = B_FALSE;
1074 1074 /* We need to wakeup the TX thread */
1075 1075 mutex_enter(&so_conn->ic_tx_mutex);
1076 1076 cv_signal(&so_conn->ic_tx_cv);
1077 1077 mutex_exit(&so_conn->ic_tx_mutex);
1078 1078 mutex_exit(&ic->ic_mutex);
1079 1079
1080 1080 /* This should wakeup the RX thread if it is sleeping */
1081 1081 idm_soshutdown(so_conn->ic_so);
1082 1082
1083 1083 thread_join(so_conn->ic_tx_thread_did);
1084 1084 thread_join(so_conn->ic_rx_thread_did);
1085 1085 }
1086 1086
1087 1087 /*
1088 1088 * idm_so_tgt_svc_create()
1089 1089 * Establish a service on an IP address and port. idm_svc_req_t contains
1090 1090 * the service parameters.
1091 1091 */
1092 1092 /*ARGSUSED*/
1093 1093 static idm_status_t
1094 1094 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1095 1095 {
1096 1096 idm_so_svc_t *so_svc;
1097 1097
1098 1098 so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1099 1099
1100 1100 /* Set the new sockets service in svc handle */
1101 1101 is->is_so_svc = (void *)so_svc;
1102 1102
1103 1103 return (IDM_STATUS_SUCCESS);
1104 1104 }
1105 1105
1106 1106 /*
1107 1107 * idm_so_tgt_svc_destroy()
1108 1108 * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1109 1109 */
1110 1110 static void
1111 1111 idm_so_tgt_svc_destroy(idm_svc_t *is)
1112 1112 {
1113 1113 /* the socket will have been torn down; free the service */
1114 1114 kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1115 1115 }
1116 1116
1117 1117 /*
1118 1118 * idm_so_tgt_svc_online()
1119 1119 * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1120 1120 */
1121 1121
1122 1122 static idm_status_t
1123 1123 idm_so_tgt_svc_online(idm_svc_t *is)
1124 1124 {
1125 1125 idm_so_svc_t *so_svc;
1126 1126 idm_svc_req_t *sr = &is->is_svc_req;
1127 1127 struct sockaddr_in6 sin6_ip;
1128 1128 const uint32_t on = 1;
1129 1129 const uint32_t off = 0;
1130 1130
1131 1131 mutex_enter(&is->is_mutex);
1132 1132 so_svc = (idm_so_svc_t *)is->is_so_svc;
1133 1133
1134 1134 /*
1135 1135 * Try creating an IPv6 socket first
1136 1136 */
1137 1137 if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1138 1138 mutex_exit(&is->is_mutex);
1139 1139 return (IDM_STATUS_FAIL);
1140 1140 } else {
1141 1141 bzero(&sin6_ip, sizeof (sin6_ip));
1142 1142 sin6_ip.sin6_family = AF_INET6;
1143 1143 sin6_ip.sin6_port = htons(sr->sr_port);
1144 1144 sin6_ip.sin6_addr = in6addr_any;
1145 1145
1146 1146 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1147 1147 SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1148 1148 /*
1149 1149 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1150 1150 */
1151 1151 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1152 1152 SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1153 1153
1154 1154 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1155 1155 sizeof (sin6_ip), CRED()) != 0) {
1156 1156 mutex_exit(&is->is_mutex);
1157 1157 idm_sodestroy(so_svc->is_so);
1158 1158 return (IDM_STATUS_FAIL);
1159 1159 }
1160 1160 }
1161 1161
1162 1162 idm_set_postconnect_options(so_svc->is_so);
1163 1163
1164 1164 if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1165 1165 mutex_exit(&is->is_mutex);
1166 1166 idm_soshutdown(so_svc->is_so);
1167 1167 idm_sodestroy(so_svc->is_so);
1168 1168 return (IDM_STATUS_FAIL);
1169 1169 }
1170 1170
1171 1171 /* Launch a watch thread */
1172 1172 so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1173 1173 is, 0, &p0, TS_RUN, minclsyspri);
1174 1174
1175 1175 if (so_svc->is_thread == NULL) {
1176 1176 /* Failure to launch; teardown the socket */
1177 1177 mutex_exit(&is->is_mutex);
1178 1178 idm_soshutdown(so_svc->is_so);
1179 1179 idm_sodestroy(so_svc->is_so);
1180 1180 return (IDM_STATUS_FAIL);
1181 1181 }
1182 1182 ksocket_hold(so_svc->is_so);
1183 1183 /* Wait for the port watcher thread to start */
1184 1184 while (!so_svc->is_thread_running)
1185 1185 cv_wait(&is->is_cv, &is->is_mutex);
1186 1186 mutex_exit(&is->is_mutex);
1187 1187
1188 1188 return (IDM_STATUS_SUCCESS);
1189 1189 }
1190 1190
1191 1191 /*
1192 1192 * idm_so_tgt_svc_offline
1193 1193 *
1194 1194 * Stop listening on the IP address and port identified by idm_svc_t.
1195 1195 */
1196 1196 static void
1197 1197 idm_so_tgt_svc_offline(idm_svc_t *is)
1198 1198 {
1199 1199 idm_so_svc_t *so_svc;
1200 1200 mutex_enter(&is->is_mutex);
1201 1201 so_svc = (idm_so_svc_t *)is->is_so_svc;
1202 1202 so_svc->is_thread_running = B_FALSE;
1203 1203 mutex_exit(&is->is_mutex);
1204 1204
1205 1205 /*
1206 1206 * Teardown socket
1207 1207 */
1208 1208 idm_sodestroy(so_svc->is_so);
1209 1209
1210 1210 /*
1211 1211 * Now we expect the port watcher thread to terminate
1212 1212 */
1213 1213 thread_join(so_svc->is_thread_did);
1214 1214 }
1215 1215
1216 1216 /*
1217 1217 * Watch thread for target service connection establishment.
1218 1218 */
1219 1219 void
1220 1220 idm_so_svc_port_watcher(void *arg)
1221 1221 {
1222 1222 idm_svc_t *svc = arg;
1223 1223 ksocket_t new_so;
1224 1224 idm_conn_t *ic;
1225 1225 idm_status_t idmrc;
1226 1226 idm_so_svc_t *so_svc;
1227 1227 int rc;
1228 1228 const uint32_t off = 0;
1229 1229 struct sockaddr_in6 t_addr;
1230 1230 socklen_t t_addrlen;
1231 1231
1232 1232 bzero(&t_addr, sizeof (struct sockaddr_in6));
1233 1233 t_addrlen = sizeof (struct sockaddr_in6);
1234 1234 mutex_enter(&svc->is_mutex);
1235 1235
1236 1236 so_svc = svc->is_so_svc;
1237 1237 so_svc->is_thread_running = B_TRUE;
1238 1238 so_svc->is_thread_did = so_svc->is_thread->t_did;
1239 1239
1240 1240 cv_signal(&svc->is_cv);
1241 1241
↓ open down ↓ |
1241 lines elided |
↑ open up ↑ |
1242 1242 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1243 1243 svc->is_svc_req.sr_port);
1244 1244
1245 1245 while (so_svc->is_thread_running) {
1246 1246 mutex_exit(&svc->is_mutex);
1247 1247
1248 1248 if ((rc = ksocket_accept(so_svc->is_so,
1249 1249 (struct sockaddr *)&t_addr, &t_addrlen,
1250 1250 &new_so, CRED())) != 0) {
1251 1251 mutex_enter(&svc->is_mutex);
1252 - if (rc == ECONNABORTED)
1253 - continue;
1254 - /* Connection problem */
1255 - break;
1252 + if (rc != ECONNABORTED && rc != EINTR) {
1253 + IDM_SVC_LOG(CE_NOTE, "idm_so_svc_port_watcher:"
1254 + " ksocket_accept failed %d", rc);
1255 + }
1256 + /*
1257 + * Unclean shutdown of this thread is not handled
1258 + * wait for !is_thread_running.
1259 + */
1260 + continue;
1256 1261 }
1257 1262 /*
1258 1263 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1259 1264 */
1260 1265 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1261 1266 (char *)&off, sizeof (off), CRED());
1262 1267
1263 1268 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1264 1269 &ic);
1265 1270 if (idmrc != IDM_STATUS_SUCCESS) {
1266 1271 /* Drop connection */
1267 1272 idm_soshutdown(new_so);
1268 1273 idm_sodestroy(new_so);
1269 1274 mutex_enter(&svc->is_mutex);
1270 1275 continue;
1271 1276 }
1272 1277
1273 1278 idmrc = idm_so_tgt_conn_create(ic, new_so);
1274 1279 if (idmrc != IDM_STATUS_SUCCESS) {
1275 1280 idm_svc_conn_destroy(ic);
1276 1281 idm_soshutdown(new_so);
1277 1282 idm_sodestroy(new_so);
1278 1283 mutex_enter(&svc->is_mutex);
1279 1284 continue;
1280 1285 }
1281 1286
1282 1287 /*
1283 1288 * Kick the state machine. At CS_S3_XPT_UP the state machine
1284 1289 * will notify the client (target) about the new connection.
1285 1290 */
1286 1291 idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1287 1292
1288 1293 mutex_enter(&svc->is_mutex);
1289 1294 }
1290 1295 ksocket_rele(so_svc->is_so);
1291 1296 so_svc->is_thread_running = B_FALSE;
1292 1297 mutex_exit(&svc->is_mutex);
1293 1298
1294 1299 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1295 1300 svc->is_svc_req.sr_port);
1296 1301
1297 1302 thread_exit();
1298 1303 }
1299 1304
1300 1305 /*
1301 1306 * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1302 1307 * frees resources associated with the task.
1303 1308 *
1304 1309 * It's not clear that this should return idm_status_t. What do we do
1305 1310 * if it fails?
1306 1311 */
1307 1312 static idm_status_t
1308 1313 idm_so_free_task_rsrc(idm_task_t *idt)
1309 1314 {
1310 1315 idm_buf_t *idb, *next_idb;
1311 1316
1312 1317 /*
1313 1318 * There is nothing to cleanup on initiator connections
1314 1319 */
1315 1320 if (IDM_CONN_ISINI(idt->idt_ic))
1316 1321 return (IDM_STATUS_SUCCESS);
1317 1322
1318 1323 /*
1319 1324 * If this is a target connection, call idm_buf_rx_from_ini_done for
1320 1325 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1321 1326 *
1322 1327 * In addition, remove any buffers associated with this task from
1323 1328 * the ic_tx_list. We'll do this by walking the idt_inbufv list, but
1324 1329 * items don't actually get removed from that list (and completion
1325 1330 * routines called) until idm_task_cleanup.
1326 1331 */
1327 1332 mutex_enter(&idt->idt_mutex);
1328 1333
1329 1334 for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1330 1335 next_idb = list_next(&idt->idt_outbufv, idb);
1331 1336 if (idb->idb_in_transport) {
1332 1337 /*
1333 1338 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1334 1339 */
1335 1340 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1336 1341 uintptr_t, idb->idb_buf,
1337 1342 uint32_t, idb->idb_bufoffset,
1338 1343 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1339 1344 uint32_t, idb->idb_xfer_len,
1340 1345 int, XFER_BUF_RX_FROM_INI);
1341 1346 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1342 1347 mutex_enter(&idt->idt_mutex);
1343 1348 }
1344 1349 }
1345 1350
1346 1351 for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1347 1352 next_idb = list_next(&idt->idt_inbufv, idb);
1348 1353 /*
1349 1354 * We want to remove these items from the tx_list as well,
1350 1355 * but knowing it's in the idt_inbufv list is not a guarantee
1351 1356 * that it's in the tx_list. If it's on the tx list then
1352 1357 * let idm_sotx_thread() clean it up.
1353 1358 */
1354 1359 if (idb->idb_in_transport && !idb->idb_tx_thread) {
1355 1360 /*
1356 1361 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1357 1362 */
1358 1363 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1359 1364 uintptr_t, idb->idb_buf,
1360 1365 uint32_t, idb->idb_bufoffset,
1361 1366 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1362 1367 uint32_t, idb->idb_xfer_len,
1363 1368 int, XFER_BUF_TX_TO_INI);
1364 1369 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1365 1370 mutex_enter(&idt->idt_mutex);
1366 1371 }
1367 1372 }
1368 1373
1369 1374 mutex_exit(&idt->idt_mutex);
1370 1375
1371 1376 return (IDM_STATUS_SUCCESS);
1372 1377 }
1373 1378
1374 1379 /*
1375 1380 * idm_so_negotiate_key_values() validates the key values for this connection
1376 1381 */
1377 1382 /* ARGSUSED */
1378 1383 static kv_status_t
1379 1384 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1380 1385 nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1381 1386 {
1382 1387 /* All parameters are negotiated at the iscsit level */
1383 1388 return (KV_HANDLED);
1384 1389 }
1385 1390
1386 1391 /*
1387 1392 * idm_so_notice_key_values() activates the negotiated key values for
1388 1393 * this connection.
1389 1394 */
1390 1395 static void
1391 1396 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1392 1397 {
1393 1398 char *nvp_name;
1394 1399 nvpair_t *nvp;
1395 1400 nvpair_t *next_nvp;
1396 1401 int nvrc;
1397 1402 idm_status_t idm_status;
1398 1403 const idm_kv_xlate_t *ikvx;
1399 1404 uint64_t num_val;
1400 1405
1401 1406 for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1402 1407 nvp != NULL; nvp = next_nvp) {
1403 1408 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1404 1409 nvp_name = nvpair_name(nvp);
1405 1410
1406 1411 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1407 1412 switch (ikvx->ik_key_id) {
1408 1413 case KI_HEADER_DIGEST:
1409 1414 case KI_DATA_DIGEST:
1410 1415 idm_status = idm_so_handle_digest(it, nvp, ikvx);
1411 1416 ASSERT(idm_status == 0);
1412 1417
1413 1418 /* Remove processed item from negotiated_nvl list */
1414 1419 nvrc = nvlist_remove_all(
1415 1420 negotiated_nvl, ikvx->ik_key_name);
1416 1421 ASSERT(nvrc == 0);
1417 1422 break;
1418 1423 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1419 1424 /*
1420 1425 * Just pass the value down to idm layer.
1421 1426 * No need to remove it from negotiated_nvl list here.
1422 1427 */
1423 1428 nvrc = nvpair_value_uint64(nvp, &num_val);
1424 1429 ASSERT(nvrc == 0);
1425 1430 it->ic_conn_params.max_xmit_dataseglen =
1426 1431 (uint32_t)num_val;
1427 1432 break;
1428 1433 default:
1429 1434 break;
1430 1435 }
1431 1436 }
1432 1437 }
1433 1438
1434 1439 /*
1435 1440 * idm_so_declare_key_values() declares the key values for this connection
1436 1441 */
1437 1442 /* ARGSUSED */
1438 1443 static kv_status_t
1439 1444 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1440 1445 nvlist_t *outgoing_nvl)
1441 1446 {
1442 1447 char *nvp_name;
1443 1448 nvpair_t *nvp;
1444 1449 nvpair_t *next_nvp;
1445 1450 kv_status_t kvrc;
1446 1451 int nvrc = 0;
1447 1452 const idm_kv_xlate_t *ikvx;
1448 1453 uint64_t num_val;
1449 1454
1450 1455 for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1451 1456 nvp != NULL && nvrc == 0; nvp = next_nvp) {
1452 1457 next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1453 1458 nvp_name = nvpair_name(nvp);
1454 1459
1455 1460 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1456 1461 switch (ikvx->ik_key_id) {
1457 1462 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1458 1463 if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1459 1464 break;
1460 1465 }
1461 1466 if (outgoing_nvl &&
1462 1467 (nvrc = nvlist_add_uint64(outgoing_nvl,
1463 1468 nvp_name, num_val)) != 0) {
1464 1469 break;
1465 1470 }
1466 1471 it->ic_conn_params.max_recv_dataseglen =
1467 1472 (uint32_t)num_val;
1468 1473 break;
1469 1474 default:
1470 1475 break;
1471 1476 }
1472 1477 }
1473 1478 kvrc = idm_nvstat_to_kvstat(nvrc);
1474 1479 return (kvrc);
1475 1480 }
1476 1481
1477 1482 static idm_status_t
1478 1483 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1479 1484 const idm_kv_xlate_t *ikvx)
1480 1485 {
1481 1486 int nvrc;
1482 1487 char *digest_choice_string;
1483 1488
1484 1489 nvrc = nvpair_value_string(digest_choice,
1485 1490 &digest_choice_string);
1486 1491 ASSERT(nvrc == 0);
1487 1492 if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1488 1493 switch (ikvx->ik_key_id) {
1489 1494 case KI_HEADER_DIGEST:
1490 1495 it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1491 1496 break;
1492 1497 case KI_DATA_DIGEST:
1493 1498 it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1494 1499 break;
1495 1500 default:
1496 1501 ASSERT(0);
1497 1502 break;
1498 1503 }
1499 1504 } else if (strcasecmp(digest_choice_string, "none") == 0) {
1500 1505 switch (ikvx->ik_key_id) {
1501 1506 case KI_HEADER_DIGEST:
1502 1507 it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1503 1508 break;
1504 1509 case KI_DATA_DIGEST:
1505 1510 it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1506 1511 break;
1507 1512 default:
1508 1513 ASSERT(0);
1509 1514 break;
1510 1515 }
1511 1516 } else {
1512 1517 ASSERT(0);
1513 1518 }
1514 1519
1515 1520 return (IDM_STATUS_SUCCESS);
1516 1521 }
1517 1522
1518 1523
1519 1524 /*
1520 1525 * idm_so_conn_is_capable() verifies that the passed connection is provided
1521 1526 * for by the sockets interface.
1522 1527 */
1523 1528 /* ARGSUSED */
1524 1529 static boolean_t
1525 1530 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1526 1531 {
1527 1532 return (B_TRUE);
1528 1533 }
1529 1534
1530 1535 /*
1531 1536 * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1532 1537 * idm_sorecv_scsidata() function invoked earlier actually reads the data
1533 1538 * off the socket into the appropriate buffers.
1534 1539 */
1535 1540 static void
1536 1541 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1537 1542 {
1538 1543 iscsi_data_hdr_t *bhs;
1539 1544 idm_task_t *idt;
1540 1545 idm_buf_t *idb;
1541 1546 uint32_t datasn;
1542 1547 size_t offset;
1543 1548 iscsi_hdr_t *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1544 1549 iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1545 1550
1546 1551 ASSERT(ic != NULL);
1547 1552 ASSERT(pdu != NULL);
1548 1553
1549 1554 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1550 1555 datasn = ntohl(bhs->datasn);
1551 1556 offset = ntohl(bhs->offset);
1552 1557
1553 1558 ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
1554 1559
1555 1560 /*
1556 1561 * Look up the task corresponding to the initiator task tag
1557 1562 * to get the buffers affiliated with the task.
1558 1563 */
1559 1564 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1560 1565 if (idt == NULL) {
1561 1566 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1562 1567 idm_pdu_rx_protocol_error(ic, pdu);
1563 1568 return;
1564 1569 }
1565 1570
1566 1571 idb = pdu->isp_sorx_buf;
1567 1572 if (idb == NULL) {
1568 1573 IDM_CONN_LOG(CE_WARN,
1569 1574 "idm_so_rx_datain: failed to find buffer");
1570 1575 idm_task_rele(idt);
1571 1576 idm_pdu_rx_protocol_error(ic, pdu);
1572 1577 return;
1573 1578 }
1574 1579
1575 1580 /*
1576 1581 * DataSN values should be sequential and should not have any gaps or
1577 1582 * repetitions. Check the DataSN with the one stored in the task.
1578 1583 */
1579 1584 if (datasn == idt->idt_exp_datasn) {
1580 1585 idt->idt_exp_datasn++; /* keep track of DataSN received */
1581 1586 } else {
1582 1587 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1583 1588 idm_task_rele(idt);
1584 1589 idm_pdu_rx_protocol_error(ic, pdu);
1585 1590 return;
1586 1591 }
1587 1592
1588 1593 /*
1589 1594 * PDUs in a sequence should be in continuously increasing
1590 1595 * address offset
1591 1596 */
1592 1597 if (offset != idb->idb_exp_offset) {
1593 1598 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1594 1599 idm_task_rele(idt);
1595 1600 idm_pdu_rx_protocol_error(ic, pdu);
1596 1601 return;
1597 1602 }
1598 1603 /* Expected next relative buffer offset */
1599 1604 idb->idb_exp_offset += n2h24(bhs->dlength);
1600 1605 idt->idt_rx_bytes += n2h24(bhs->dlength);
1601 1606
1602 1607 idm_task_rele(idt);
1603 1608
1604 1609 /*
1605 1610 * For now call scsi_rsp which will process the data rsp
1606 1611 * Revisit, need to provide an explicit client entry point for
1607 1612 * phase collapse completions.
1608 1613 */
1609 1614 if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
1610 1615 (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1611 1616 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1612 1617 }
1613 1618
1614 1619 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1615 1620 }
1616 1621
1617 1622 /*
1618 1623 * The idm_so_rx_dataout() function is used by the iSCSI target to read
1619 1624 * data from the Data-Out PDU sent by the iSCSI initiator.
1620 1625 *
1621 1626 * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1622 1627 * task to get the buffers associated with the PDU. A PDU might span buffers.
1623 1628 * The data is then read into the respective buffer.
1624 1629 */
1625 1630 static void
1626 1631 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1627 1632 {
1628 1633
1629 1634 iscsi_data_hdr_t *bhs;
1630 1635 idm_task_t *idt;
1631 1636 idm_buf_t *idb;
1632 1637 size_t offset;
1633 1638
1634 1639 ASSERT(ic != NULL);
1635 1640 ASSERT(pdu != NULL);
1636 1641
1637 1642 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1638 1643 offset = ntohl(bhs->offset);
1639 1644 ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
1640 1645
1641 1646 /*
1642 1647 * Look up the task corresponding to the initiator task tag
1643 1648 * to get the buffers affiliated with the task.
1644 1649 */
1645 1650 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1646 1651 if (idt == NULL) {
1647 1652 IDM_CONN_LOG(CE_WARN,
1648 1653 "idm_so_rx_dataout: failed to find task");
1649 1654 idm_pdu_rx_protocol_error(ic, pdu);
1650 1655 return;
1651 1656 }
1652 1657
1653 1658 idb = pdu->isp_sorx_buf;
1654 1659 if (idb == NULL) {
1655 1660 IDM_CONN_LOG(CE_WARN,
1656 1661 "idm_so_rx_dataout: failed to find buffer");
1657 1662 idm_task_rele(idt);
1658 1663 idm_pdu_rx_protocol_error(ic, pdu);
1659 1664 return;
1660 1665 }
1661 1666
1662 1667 /* Keep track of data transferred - check data offsets */
1663 1668 if (offset != idb->idb_exp_offset) {
1664 1669 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1665 1670 "%ld, %d", offset, idb->idb_exp_offset);
1666 1671 idm_task_rele(idt);
1667 1672 idm_pdu_rx_protocol_error(ic, pdu);
1668 1673 return;
1669 1674 }
1670 1675 /* Expected next relative offset */
1671 1676 idb->idb_exp_offset += ntoh24(bhs->dlength);
1672 1677 idt->idt_rx_bytes += n2h24(bhs->dlength);
1673 1678
1674 1679 /*
1675 1680 * Call the buffer callback when the transfer is complete
1676 1681 *
1677 1682 * The connection state machine should only abort tasks after
1678 1683 * shutting down the connection so we are assured that there
1679 1684 * won't be a simultaneous attempt to abort this task at the
1680 1685 * same time as we are processing this PDU (due to a connection
1681 1686 * state change).
1682 1687 */
1683 1688 if (bhs->flags & ISCSI_FLAG_FINAL) {
1684 1689 /*
1685 1690 * We only want to call idm_buf_rx_from_ini_done once
1686 1691 * per transfer. It's possible that this task has
1687 1692 * already been aborted in which case
1688 1693 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1689 1694 * for each buffer with idb_in_transport==B_TRUE. To
1690 1695 * close this window and ensure that this doesn't happen,
1691 1696 * we'll clear idb->idb_in_transport now while holding
1692 1697 * the task mutex. This is only really an issue for
1693 1698 * SCSI task abort -- if tasks were being aborted because
1694 1699 * of a connection state change the state machine would
1695 1700 * have already stopped the receive thread.
1696 1701 */
1697 1702 mutex_enter(&idt->idt_mutex);
1698 1703
1699 1704 /*
1700 1705 * Release the task hold here (obtained in idm_task_find)
1701 1706 * because the task may complete synchronously during
1702 1707 * idm_buf_rx_from_ini_done. Since we still have an active
1703 1708 * buffer we know there is at least one additional hold on idt.
1704 1709 */
1705 1710 idm_task_rele(idt);
1706 1711
1707 1712 /*
1708 1713 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1709 1714 */
1710 1715 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1711 1716 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1712 1717 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1713 1718 uint32_t, idb->idb_xfer_len,
1714 1719 int, XFER_BUF_RX_FROM_INI);
1715 1720 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1716 1721 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1717 1722 return;
1718 1723 }
1719 1724
1720 1725 idm_task_rele(idt);
1721 1726 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1722 1727 }
1723 1728
1724 1729 /*
1725 1730 * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1726 1731 * the R2T PDU sent by the iSCSI target indicating that it is ready to
1727 1732 * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1728 1733 * and looks up the task in the task tree using the itt to get the output
1729 1734 * buffers associated the task. The R2T PDU contains the offset of the
1730 1735 * requested data and the data length. This function then constructs a
1731 1736 * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1732 1737 * PDU is associated with the R2T by the Target Transfer Tag (ttt).
1733 1738 */
1734 1739
1735 1740 static void
1736 1741 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1737 1742 {
1738 1743 idm_task_t *idt;
1739 1744 idm_buf_t *idb;
1740 1745 iscsi_rtt_hdr_t *rtt_hdr;
1741 1746 uint32_t data_offset;
1742 1747 uint32_t data_length;
1743 1748
1744 1749 ASSERT(ic != NULL);
1745 1750 ASSERT(pdu != NULL);
1746 1751
1747 1752 rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1748 1753 data_offset = ntohl(rtt_hdr->data_offset);
1749 1754 data_length = ntohl(rtt_hdr->data_length);
1750 1755 idt = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1751 1756
1752 1757 if (idt == NULL) {
1753 1758 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1754 1759 idm_pdu_rx_protocol_error(ic, pdu);
1755 1760 return;
1756 1761 }
1757 1762
1758 1763 /* Find the buffer bound to the task by the iSCSI initiator */
1759 1764 mutex_enter(&idt->idt_mutex);
1760 1765 idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1761 1766 if (idb == NULL) {
1762 1767 mutex_exit(&idt->idt_mutex);
1763 1768 idm_task_rele(idt);
1764 1769 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1765 1770 idm_pdu_rx_protocol_error(ic, pdu);
1766 1771 return;
1767 1772 }
1768 1773
1769 1774 /* return buffer contains this data */
1770 1775 if (data_offset + data_length > idb->idb_buflen) {
1771 1776 /* Overflow */
1772 1777 mutex_exit(&idt->idt_mutex);
1773 1778 idm_task_rele(idt);
1774 1779 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1775 1780 "buffer");
1776 1781 idm_pdu_rx_protocol_error(ic, pdu);
1777 1782 return;
1778 1783 }
1779 1784
1780 1785 idt->idt_r2t_ttt = rtt_hdr->ttt;
1781 1786 idt->idt_exp_datasn = 0;
1782 1787
1783 1788 idm_so_send_rtt_data(ic, idt, idb, data_offset,
1784 1789 ntohl(rtt_hdr->data_length));
1785 1790 /*
1786 1791 * the idt_mutex is released in idm_so_send_rtt_data
1787 1792 */
1788 1793
1789 1794 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1790 1795 idm_task_rele(idt);
1791 1796
1792 1797 }
1793 1798
1794 1799 idm_status_t
1795 1800 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1796 1801 {
1797 1802 uint8_t pad[ISCSI_PAD_WORD_LEN];
1798 1803 int pad_len;
1799 1804 uint32_t data_digest_crc;
1800 1805 uint32_t crc_calculated;
1801 1806 int total_len;
1802 1807 idm_so_conn_t *so_conn;
1803 1808
1804 1809 so_conn = ic->ic_transport_private;
1805 1810
1806 1811 pad_len = ((ISCSI_PAD_WORD_LEN -
1807 1812 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1808 1813 (ISCSI_PAD_WORD_LEN - 1));
1809 1814
1810 1815 ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1811 1816
1812 1817 total_len = pdu->isp_datalen;
1813 1818
1814 1819 if (pad_len) {
1815 1820 pdu->isp_iov[pdu->isp_iovlen].iov_base = (char *)&pad;
1816 1821 pdu->isp_iov[pdu->isp_iovlen].iov_len = pad_len;
1817 1822 total_len += pad_len;
1818 1823 pdu->isp_iovlen++;
1819 1824 }
1820 1825
1821 1826 /* setup data digest */
1822 1827 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1823 1828 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1824 1829 (char *)&data_digest_crc;
1825 1830 pdu->isp_iov[pdu->isp_iovlen].iov_len =
1826 1831 sizeof (data_digest_crc);
1827 1832 total_len += sizeof (data_digest_crc);
1828 1833 pdu->isp_iovlen++;
1829 1834 }
1830 1835
1831 1836 pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1832 1837
1833 1838 if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1834 1839 pdu->isp_iovlen, total_len) != 0) {
1835 1840 return (IDM_STATUS_IO);
1836 1841 }
1837 1842
1838 1843 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1839 1844 crc_calculated = idm_crc32c(pdu->isp_data,
1840 1845 pdu->isp_datalen);
1841 1846 if (pad_len) {
1842 1847 crc_calculated = idm_crc32c_continued((char *)&pad,
1843 1848 pad_len, crc_calculated);
1844 1849 }
1845 1850 if (crc_calculated != data_digest_crc) {
1846 1851 IDM_CONN_LOG(CE_WARN,
1847 1852 "idm_sorecvdata: "
1848 1853 "CRC error: actual 0x%x, calc 0x%x",
1849 1854 data_digest_crc, crc_calculated);
1850 1855
1851 1856 /* Invalid Data Digest */
1852 1857 return (IDM_STATUS_DATA_DIGEST);
1853 1858 }
1854 1859 }
1855 1860
1856 1861 return (IDM_STATUS_SUCCESS);
1857 1862 }
1858 1863
1859 1864 /*
1860 1865 * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1861 1866 * Data-type PDU header must be read into the idm_pdu_t structure prior to
1862 1867 * calling this function.
1863 1868 */
1864 1869 idm_status_t
1865 1870 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1866 1871 {
1867 1872 iscsi_data_hdr_t *bhs;
1868 1873 idm_task_t *task;
1869 1874 uint32_t offset;
1870 1875 uint8_t opcode;
1871 1876 uint32_t dlength;
1872 1877 list_t *buflst;
1873 1878 uint32_t xfer_bytes;
1874 1879 idm_status_t status;
1875 1880
1876 1881 ASSERT(ic != NULL);
1877 1882 ASSERT(pdu != NULL);
1878 1883
1879 1884 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1880 1885
1881 1886 offset = ntohl(bhs->offset);
1882 1887 opcode = bhs->opcode;
1883 1888 dlength = n2h24(bhs->dlength);
1884 1889
1885 1890 ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
1886 1891 (opcode == ISCSI_OP_SCSI_DATA));
1887 1892
1888 1893 /*
1889 1894 * Successful lookup implicitly gets a "hold" on the task. This
1890 1895 * hold must be released before leaving this function. At one
1891 1896 * point we were caching this task context and retaining the hold
1892 1897 * but it turned out to be very difficult to release the hold properly.
1893 1898 * The task can be aborted and the connection shutdown between this
1894 1899 * call and the subsequent expected call to idm_so_rx_datain/
1895 1900 * idm_so_rx_dataout (in which case those functions are not called).
1896 1901 * Releasing the hold in the PDU callback doesn't work well either
1897 1902 * because the whole task may be completed by then at which point
1898 1903 * it is too late to release the hold -- for better or worse this
1899 1904 * code doesn't wait on the refcnts during normal operation.
1900 1905 * idm_task_find() is very fast and it is not a huge burden if we
1901 1906 * have to do it twice.
1902 1907 */
1903 1908 task = idm_task_find(ic, bhs->itt, bhs->ttt);
1904 1909 if (task == NULL) {
1905 1910 IDM_CONN_LOG(CE_WARN,
1906 1911 "idm_sorecv_scsidata: could not find task");
1907 1912 return (IDM_STATUS_FAIL);
1908 1913 }
1909 1914
1910 1915 mutex_enter(&task->idt_mutex);
1911 1916 buflst = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
1912 1917 &task->idt_inbufv : &task->idt_outbufv;
1913 1918 pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
1914 1919 mutex_exit(&task->idt_mutex);
1915 1920
1916 1921 if (pdu->isp_sorx_buf == NULL) {
1917 1922 idm_task_rele(task);
1918 1923 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
1919 1924 "buffer for offset %x opcode=%x",
1920 1925 offset, opcode);
1921 1926 return (IDM_STATUS_FAIL);
1922 1927 }
1923 1928
1924 1929 xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
1925 1930 ASSERT(xfer_bytes != 0);
1926 1931 if (xfer_bytes != dlength) {
1927 1932 idm_task_rele(task);
1928 1933 /*
1929 1934 * Buffer overflow, connection error. The PDU data is still
1930 1935 * sitting in the socket so we can't use the connection
1931 1936 * again until that data is drained.
1932 1937 */
1933 1938 return (IDM_STATUS_FAIL);
1934 1939 }
1935 1940
1936 1941 status = idm_sorecvdata(ic, pdu);
1937 1942
1938 1943 idm_task_rele(task);
1939 1944
1940 1945 return (status);
1941 1946 }
1942 1947
1943 1948 static uint32_t
1944 1949 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
1945 1950 {
1946 1951 uint32_t buf_ro = ro - idb->idb_bufoffset;
1947 1952 uint32_t xfer_len = min(dlength, idb->idb_buflen - buf_ro);
1948 1953
1949 1954 ASSERT(ro >= idb->idb_bufoffset);
1950 1955
1951 1956 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1952 1957 (caddr_t)idb->idb_buf + buf_ro;
1953 1958 pdu->isp_iov[pdu->isp_iovlen].iov_len = xfer_len;
1954 1959 pdu->isp_iovlen++;
1955 1960
1956 1961 return (xfer_len);
1957 1962 }
1958 1963
1959 1964 int
1960 1965 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1961 1966 {
1962 1967 pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
1963 1968 ASSERT(pdu->isp_data != NULL);
1964 1969
1965 1970 pdu->isp_databuflen = pdu->isp_datalen;
1966 1971 pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
1967 1972 pdu->isp_iov[0].iov_len = pdu->isp_datalen;
1968 1973 pdu->isp_iovlen = 1;
1969 1974 /*
1970 1975 * Since we are associating a new data buffer with this received
1971 1976 * PDU we need to set a specific callback to free the data
1972 1977 * after the PDU is processed.
1973 1978 */
1974 1979 pdu->isp_flags |= IDM_PDU_ADDL_DATA;
1975 1980 pdu->isp_callback = idm_sorx_addl_pdu_cb;
1976 1981
1977 1982 return (idm_sorecvdata(ic, pdu));
1978 1983 }
1979 1984
1980 1985 void
1981 1986 idm_sorx_thread(void *arg)
1982 1987 {
1983 1988 boolean_t conn_failure = B_FALSE;
1984 1989 idm_conn_t *ic = (idm_conn_t *)arg;
1985 1990 idm_so_conn_t *so_conn;
1986 1991 idm_pdu_t *pdu;
1987 1992 idm_status_t rc;
1988 1993
1989 1994 idm_conn_hold(ic);
1990 1995
1991 1996 mutex_enter(&ic->ic_mutex);
1992 1997
1993 1998 so_conn = ic->ic_transport_private;
1994 1999 so_conn->ic_rx_thread_running = B_TRUE;
1995 2000 so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
1996 2001 cv_signal(&ic->ic_cv);
1997 2002
1998 2003 while (so_conn->ic_rx_thread_running) {
1999 2004 mutex_exit(&ic->ic_mutex);
2000 2005
2001 2006 /*
2002 2007 * Get PDU with default header size (large enough for
2003 2008 * BHS plus any anticipated AHS). PDU from
2004 2009 * the cache will have all values set correctly
2005 2010 * for sockets RX including callback.
2006 2011 */
2007 2012 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2008 2013 pdu->isp_ic = ic;
2009 2014 pdu->isp_flags = 0;
2010 2015 pdu->isp_transport_hdrlen = 0;
2011 2016
2012 2017 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2013 2018 /*
2014 2019 * Call idm_pdu_complete so that we call the callback
2015 2020 * and ensure any memory allocated in idm_sorecvhdr
2016 2021 * gets freed up.
2017 2022 */
2018 2023 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2019 2024
2020 2025 /*
2021 2026 * If ic_rx_thread_running is still set then
2022 2027 * this is some kind of connection problem
2023 2028 * on the socket. In this case we want to
2024 2029 * generate an event. Otherwise some other
2025 2030 * thread closed the socket due to another
2026 2031 * issue in which case we don't need to
2027 2032 * generate an event.
2028 2033 */
2029 2034 mutex_enter(&ic->ic_mutex);
2030 2035 if (so_conn->ic_rx_thread_running) {
2031 2036 conn_failure = B_TRUE;
2032 2037 so_conn->ic_rx_thread_running = B_FALSE;
2033 2038 }
2034 2039
2035 2040 continue;
2036 2041 }
2037 2042
2038 2043 /*
2039 2044 * Header has been read and validated. Now we need
2040 2045 * to read the PDU data payload (if present). SCSI data
2041 2046 * need to be transferred from the socket directly into
2042 2047 * the associated transfer buffer for the SCSI task.
2043 2048 */
2044 2049 if (pdu->isp_datalen != 0) {
2045 2050 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2046 2051 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2047 2052 rc = idm_sorecv_scsidata(ic, pdu);
2048 2053 /*
2049 2054 * All SCSI errors are fatal to the
2050 2055 * connection right now since we have no
2051 2056 * place to put the data. What we need
2052 2057 * is some kind of sink to dispose of unwanted
2053 2058 * SCSI data. For example an invalid task tag
2054 2059 * should not kill the connection (although
2055 2060 * we may want to drop the connection).
2056 2061 */
2057 2062 } else {
2058 2063 /*
2059 2064 * Not data PDUs so allocate a buffer for the
2060 2065 * data segment and read the remaining data.
2061 2066 */
2062 2067 rc = idm_sorecv_nonscsidata(ic, pdu);
2063 2068 }
2064 2069 if (rc != 0) {
2065 2070 /*
2066 2071 * Call idm_pdu_complete so that we call the
2067 2072 * callback and ensure any memory allocated
2068 2073 * in idm_sorecvhdr gets freed up.
2069 2074 */
2070 2075 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2071 2076
2072 2077 /*
2073 2078 * If ic_rx_thread_running is still set then
2074 2079 * this is some kind of connection problem
2075 2080 * on the socket. In this case we want to
2076 2081 * generate an event. Otherwise some other
2077 2082 * thread closed the socket due to another
2078 2083 * issue in which case we don't need to
2079 2084 * generate an event.
2080 2085 */
2081 2086 mutex_enter(&ic->ic_mutex);
2082 2087 if (so_conn->ic_rx_thread_running) {
2083 2088 conn_failure = B_TRUE;
2084 2089 so_conn->ic_rx_thread_running = B_FALSE;
2085 2090 }
2086 2091 continue;
2087 2092 }
2088 2093 }
2089 2094
2090 2095 /*
2091 2096 * Process RX PDU
2092 2097 */
2093 2098 idm_pdu_rx(ic, pdu);
2094 2099
2095 2100 mutex_enter(&ic->ic_mutex);
2096 2101 }
2097 2102
2098 2103 mutex_exit(&ic->ic_mutex);
2099 2104
2100 2105 /*
2101 2106 * If we dropped out of the RX processing loop because of
2102 2107 * a socket problem or other connection failure (including
2103 2108 * digest errors) then we need to generate a state machine
2104 2109 * event to shut the connection down.
2105 2110 * If the state machine is already in, for example, INIT_ERROR, this
2106 2111 * event will get dropped, and the TX thread will never be notified
2107 2112 * to shut down. To be safe, we'll just notify it here.
2108 2113 */
2109 2114 if (conn_failure) {
2110 2115 if (so_conn->ic_tx_thread_running) {
2111 2116 so_conn->ic_tx_thread_running = B_FALSE;
2112 2117 mutex_enter(&so_conn->ic_tx_mutex);
2113 2118 cv_signal(&so_conn->ic_tx_cv);
2114 2119 mutex_exit(&so_conn->ic_tx_mutex);
2115 2120 }
2116 2121
2117 2122 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2118 2123 }
2119 2124
2120 2125 idm_conn_rele(ic);
2121 2126
2122 2127 thread_exit();
2123 2128 }
2124 2129
2125 2130 /*
2126 2131 * idm_so_tx
2127 2132 *
2128 2133 * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2129 2134 * point. By definition, it is supposed to be fast. So, simply queue
2130 2135 * the entry and return. The real work is done by idm_i_so_tx() via
2131 2136 * idm_sotx_thread().
2132 2137 */
2133 2138
2134 2139 static void
2135 2140 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2136 2141 {
2137 2142 idm_so_conn_t *so_conn = ic->ic_transport_private;
2138 2143
2139 2144 ASSERT(pdu->isp_ic == ic);
2140 2145 mutex_enter(&so_conn->ic_tx_mutex);
2141 2146
2142 2147 if (!so_conn->ic_tx_thread_running) {
2143 2148 mutex_exit(&so_conn->ic_tx_mutex);
2144 2149 idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2145 2150 return;
2146 2151 }
2147 2152
2148 2153 list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2149 2154 cv_signal(&so_conn->ic_tx_cv);
2150 2155 mutex_exit(&so_conn->ic_tx_mutex);
2151 2156 }
2152 2157
2153 2158 static idm_status_t
2154 2159 idm_i_so_tx(idm_pdu_t *pdu)
2155 2160 {
2156 2161 idm_conn_t *ic = pdu->isp_ic;
2157 2162 idm_status_t status = IDM_STATUS_SUCCESS;
2158 2163 uint8_t pad[ISCSI_PAD_WORD_LEN];
2159 2164 int pad_len;
2160 2165 uint32_t hdr_digest_crc;
2161 2166 uint32_t data_digest_crc = 0;
2162 2167 int total_len = 0;
2163 2168 int iovlen = 0;
2164 2169 struct iovec iov[6];
2165 2170 idm_so_conn_t *so_conn;
2166 2171
2167 2172 so_conn = ic->ic_transport_private;
2168 2173
2169 2174 /* Setup BHS */
2170 2175 iov[iovlen].iov_base = (caddr_t)pdu->isp_hdr;
2171 2176 iov[iovlen].iov_len = pdu->isp_hdrlen;
2172 2177 total_len += iov[iovlen].iov_len;
2173 2178 iovlen++;
2174 2179
2175 2180 /* Setup header digest */
2176 2181 if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2177 2182 (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2178 2183 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2179 2184
2180 2185 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
2181 2186 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
2182 2187 total_len += iov[iovlen].iov_len;
2183 2188 iovlen++;
2184 2189 }
2185 2190
2186 2191 /* Setup the data */
2187 2192 if (pdu->isp_datalen) {
2188 2193 idm_task_t *idt;
2189 2194 idm_buf_t *idb;
2190 2195 iscsi_data_hdr_t *ihp;
2191 2196 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2192 2197 /* Write of immediate data */
2193 2198 if (ic->ic_ffp &&
2194 2199 (ihp->opcode == ISCSI_OP_SCSI_CMD ||
2195 2200 ihp->opcode == ISCSI_OP_SCSI_DATA)) {
2196 2201 idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2197 2202 if (idt) {
2198 2203 mutex_enter(&idt->idt_mutex);
2199 2204 idb = idm_buf_find(&idt->idt_outbufv, 0);
2200 2205 mutex_exit(&idt->idt_mutex);
2201 2206 /*
2202 2207 * If the initiator call to idm_buf_alloc
2203 2208 * failed then we can get to this point
2204 2209 * without a bound buffer. The associated
2205 2210 * connection failure will clean things up
2206 2211 * later. It would be nice to come up with
2207 2212 * a cleaner way to handle this. In
2208 2213 * particular it seems absurd to look up
2209 2214 * the task and the buffer just to update
2210 2215 * this counter.
2211 2216 */
2212 2217 if (idb)
2213 2218 idb->idb_xfer_len += pdu->isp_datalen;
2214 2219 idm_task_rele(idt);
2215 2220 }
2216 2221 }
2217 2222
2218 2223 iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2219 2224 iov[iovlen].iov_len = pdu->isp_datalen;
2220 2225 total_len += iov[iovlen].iov_len;
2221 2226 iovlen++;
2222 2227 }
2223 2228
2224 2229 /* Setup the data pad if necessary */
2225 2230 pad_len = ((ISCSI_PAD_WORD_LEN -
2226 2231 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2227 2232 (ISCSI_PAD_WORD_LEN - 1));
2228 2233
2229 2234 if (pad_len) {
2230 2235 bzero(pad, sizeof (pad));
2231 2236 iov[iovlen].iov_base = (void *)&pad;
2232 2237 iov[iovlen].iov_len = pad_len;
2233 2238 total_len += iov[iovlen].iov_len;
2234 2239 iovlen++;
2235 2240 }
2236 2241
2237 2242 /*
2238 2243 * Setup the data digest if enabled. Data-digest is not sent
2239 2244 * for login-phase PDUs.
2240 2245 */
2241 2246 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2242 2247 ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2243 2248 (pdu->isp_datalen || pad_len)) {
2244 2249 /*
2245 2250 * RFC3720/10.2.3: A zero-length Data Segment also
2246 2251 * implies a zero-length data digest.
2247 2252 */
2248 2253 if (pdu->isp_datalen) {
2249 2254 data_digest_crc = idm_crc32c(pdu->isp_data,
2250 2255 pdu->isp_datalen);
2251 2256 }
2252 2257 if (pad_len) {
2253 2258 data_digest_crc = idm_crc32c_continued(&pad,
2254 2259 pad_len, data_digest_crc);
2255 2260 }
2256 2261
2257 2262 iov[iovlen].iov_base = (caddr_t)&data_digest_crc;
2258 2263 iov[iovlen].iov_len = sizeof (data_digest_crc);
2259 2264 total_len += iov[iovlen].iov_len;
2260 2265 iovlen++;
2261 2266 }
2262 2267
2263 2268 /* Transmit the PDU */
2264 2269 if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2265 2270 total_len) != 0) {
2266 2271 /* Set error status */
2267 2272 IDM_CONN_LOG(CE_WARN,
2268 2273 "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2269 2274 "data: %p", (void *) so_conn->ic_so, (void *) ic,
2270 2275 (void *) pdu->isp_data);
2271 2276 status = IDM_STATUS_IO;
2272 2277 }
2273 2278
2274 2279 /*
2275 2280 * Success does not mean that the PDU actually reached the
2276 2281 * remote node since it could get dropped along the way.
2277 2282 */
2278 2283 idm_pdu_complete(pdu, status);
2279 2284
2280 2285 return (status);
2281 2286 }
2282 2287
2283 2288 /*
2284 2289 * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2285 2290 * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2286 2291 * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2287 2292 * A target can invoke this function multiple times for a single read command
2288 2293 * (identified by the same ITT) to split the input into several sequences.
2289 2294 *
2290 2295 * DataSN starts with 0 for the first data PDU of an input command and advances
2291 2296 * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2292 2297 * which is set to 1 for the last data PDU of a sequence.
2293 2298 * If the initiator supports phase collapse, the status bit must be set along
2294 2299 * with the F bit to indicate that the status is shipped together with the last
2295 2300 * Data-In PDU.
2296 2301 *
2297 2302 * The data PDUs within a sequence will be sent in order with the buffer offset
2298 2303 * in increasing order. i.e. initiator and target must have negotiated the
2299 2304 * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2300 2305 *
2301 2306 * Caller holds idt->idt_mutex
2302 2307 */
2303 2308 static idm_status_t
2304 2309 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2305 2310 {
2306 2311 idm_so_conn_t *so_conn = idb->idb_ic->ic_transport_private;
2307 2312 idm_pdu_t tmppdu;
2308 2313
2309 2314 ASSERT(mutex_owned(&idt->idt_mutex));
2310 2315
2311 2316 /*
2312 2317 * Put the idm_buf_t on the tx queue. It will be transmitted by
2313 2318 * idm_sotx_thread.
2314 2319 */
2315 2320 mutex_enter(&so_conn->ic_tx_mutex);
2316 2321
2317 2322 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2318 2323 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2319 2324 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2320 2325 uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2321 2326
2322 2327 if (!so_conn->ic_tx_thread_running) {
2323 2328 mutex_exit(&so_conn->ic_tx_mutex);
2324 2329 /*
2325 2330 * Don't release idt->idt_mutex since we're supposed to hold
2326 2331 * in when calling idm_buf_tx_to_ini_done
2327 2332 */
2328 2333 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2329 2334 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2330 2335 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2331 2336 uint32_t, idb->idb_xfer_len,
2332 2337 int, XFER_BUF_TX_TO_INI);
2333 2338 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2334 2339 return (IDM_STATUS_FAIL);
2335 2340 }
2336 2341
2337 2342 /*
2338 2343 * Build a template for the data PDU headers we will use so that
2339 2344 * the SN values will stay consistent with other PDU's we are
2340 2345 * transmitting like R2T and SCSI status.
2341 2346 */
2342 2347 bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2343 2348 tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2344 2349 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2345 2350 ISCSI_OP_SCSI_DATA_RSP);
2346 2351 idb->idb_tx_thread = B_TRUE;
2347 2352 list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2348 2353 cv_signal(&so_conn->ic_tx_cv);
2349 2354 mutex_exit(&so_conn->ic_tx_mutex);
2350 2355 mutex_exit(&idt->idt_mutex);
2351 2356
2352 2357 /*
2353 2358 * Returning success here indicates the transfer was successfully
2354 2359 * dispatched -- it does not mean that the transfer completed
2355 2360 * successfully.
2356 2361 */
2357 2362 return (IDM_STATUS_SUCCESS);
2358 2363 }
2359 2364
2360 2365 /*
2361 2366 * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2362 2367 * data blocks it is ready to receive from the initiator in response to a WRITE
2363 2368 * SCSI command. The target iSCSI layer passes the information about the desired
2364 2369 * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2365 2370 * offset and datalen are passed via the 'idb' argument.
2366 2371 *
2367 2372 * Scope for Prototype build:
2368 2373 * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2369 2374 * negotiated the "InitialR2T" to "Yes".
2370 2375 *
2371 2376 * Caller holds idt->idt_mutex
2372 2377 */
2373 2378 static idm_status_t
2374 2379 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2375 2380 {
2376 2381 idm_pdu_t *pdu;
2377 2382 iscsi_rtt_hdr_t *rtt;
2378 2383
2379 2384 ASSERT(mutex_owned(&idt->idt_mutex));
2380 2385
2381 2386 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2382 2387 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2383 2388 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2384 2389 uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2385 2390
2386 2391 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2387 2392 pdu->isp_ic = idt->idt_ic;
2388 2393 pdu->isp_flags = IDM_PDU_SET_STATSN;
2389 2394 bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2390 2395
2391 2396 /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2392 2397 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2393 2398
2394 2399 /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2395 2400 rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2396 2401
2397 2402 rtt->opcode = ISCSI_OP_RTT_RSP;
2398 2403 rtt->flags = ISCSI_FLAG_FINAL;
2399 2404 rtt->data_offset = htonl(idb->idb_bufoffset);
2400 2405 rtt->data_length = htonl(idb->idb_xfer_len);
2401 2406 rtt->rttsn = htonl(idt->idt_exp_rttsn++);
2402 2407
2403 2408 /* Keep track of buffer offsets */
2404 2409 idb->idb_exp_offset = idb->idb_bufoffset;
2405 2410 mutex_exit(&idt->idt_mutex);
2406 2411
2407 2412 /*
2408 2413 * Transmit the PDU.
2409 2414 */
2410 2415 idm_pdu_tx(pdu);
2411 2416
2412 2417 return (IDM_STATUS_SUCCESS);
2413 2418 }
2414 2419
2415 2420 static idm_status_t
2416 2421 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2417 2422 {
2418 2423 if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2419 2424 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2420 2425 KM_NOSLEEP);
2421 2426 idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2422 2427 } else {
2423 2428 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2424 2429 idb->idb_buf_private = NULL;
2425 2430 }
2426 2431
2427 2432 if (idb->idb_buf == NULL) {
2428 2433 IDM_CONN_LOG(CE_NOTE,
2429 2434 "idm_so_buf_alloc: failed buffer allocation");
2430 2435 return (IDM_STATUS_FAIL);
2431 2436 }
2432 2437
2433 2438 return (IDM_STATUS_SUCCESS);
2434 2439 }
2435 2440
2436 2441 /* ARGSUSED */
2437 2442 static idm_status_t
2438 2443 idm_so_buf_setup(idm_buf_t *idb)
2439 2444 {
2440 2445 /* Ensure bufalloc'd flag is unset */
2441 2446 idb->idb_bufalloc = B_FALSE;
2442 2447
2443 2448 return (IDM_STATUS_SUCCESS);
2444 2449 }
2445 2450
2446 2451 /* ARGSUSED */
2447 2452 static void
2448 2453 idm_so_buf_teardown(idm_buf_t *idb)
2449 2454 {
2450 2455 /* nothing to do here */
2451 2456 }
2452 2457
2453 2458 static void
2454 2459 idm_so_buf_free(idm_buf_t *idb)
2455 2460 {
2456 2461 if (idb->idb_buf_private == NULL) {
2457 2462 kmem_free(idb->idb_buf, idb->idb_buflen);
2458 2463 } else {
2459 2464 kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2460 2465 }
2461 2466 }
2462 2467
2463 2468 static void
2464 2469 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2465 2470 uint32_t offset, uint32_t length)
2466 2471 {
2467 2472 idm_so_conn_t *so_conn = ic->ic_transport_private;
2468 2473 idm_pdu_t tmppdu;
2469 2474 idm_buf_t *rtt_buf;
2470 2475
2471 2476 ASSERT(mutex_owned(&idt->idt_mutex));
2472 2477
2473 2478 /*
2474 2479 * Allocate a buffer to represent the RTT transfer. We could further
2475 2480 * optimize this by allocating the buffers internally from an rtt
2476 2481 * specific buffer cache since this is socket-specific code but for
2477 2482 * now we will keep it simple.
2478 2483 */
2479 2484 rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2480 2485 if (rtt_buf == NULL) {
2481 2486 /*
2482 2487 * If we're in FFP then the failure was likely a resource
2483 2488 * allocation issue and we should close the connection by
2484 2489 * sending a CE_TRANSPORT_FAIL event.
2485 2490 *
2486 2491 * If we're not in FFP then idm_buf_alloc will always
2487 2492 * fail and the state is transitioning to "complete" anyway
2488 2493 * so we won't bother to send an event.
2489 2494 */
2490 2495 mutex_enter(&ic->ic_state_mutex);
2491 2496 if (ic->ic_ffp)
2492 2497 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2493 2498 NULL, CT_NONE);
2494 2499 mutex_exit(&ic->ic_state_mutex);
2495 2500 mutex_exit(&idt->idt_mutex);
2496 2501 return;
2497 2502 }
2498 2503
2499 2504 rtt_buf->idb_buf_cb = NULL;
2500 2505 rtt_buf->idb_cb_arg = NULL;
2501 2506 rtt_buf->idb_bufoffset = offset;
2502 2507 rtt_buf->idb_xfer_len = length;
2503 2508 rtt_buf->idb_ic = idt->idt_ic;
2504 2509 rtt_buf->idb_task_binding = idt;
2505 2510
2506 2511 /*
2507 2512 * The new buffer (if any) represents an additional
2508 2513 * reference on the task
2509 2514 */
2510 2515 idm_task_hold(idt);
2511 2516 mutex_exit(&idt->idt_mutex);
2512 2517
2513 2518 /*
2514 2519 * Put the idm_buf_t on the tx queue. It will be transmitted by
2515 2520 * idm_sotx_thread.
2516 2521 */
2517 2522 mutex_enter(&so_conn->ic_tx_mutex);
2518 2523
2519 2524 if (!so_conn->ic_tx_thread_running) {
2520 2525 idm_buf_free(rtt_buf);
2521 2526 mutex_exit(&so_conn->ic_tx_mutex);
2522 2527 idm_task_rele(idt);
2523 2528 return;
2524 2529 }
2525 2530
2526 2531 /*
2527 2532 * Build a template for the data PDU headers we will use so that
2528 2533 * the SN values will stay consistent with other PDU's we are
2529 2534 * transmitting like R2T and SCSI status.
2530 2535 */
2531 2536 bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2532 2537 tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2533 2538 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2534 2539 ISCSI_OP_SCSI_DATA);
2535 2540 rtt_buf->idb_tx_thread = B_TRUE;
2536 2541 rtt_buf->idb_in_transport = B_TRUE;
2537 2542 list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2538 2543 cv_signal(&so_conn->ic_tx_cv);
2539 2544 mutex_exit(&so_conn->ic_tx_mutex);
2540 2545 }
2541 2546
2542 2547 static void
2543 2548 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2544 2549 {
2545 2550 /*
2546 2551 * Don't worry about status -- we assume any error handling
2547 2552 * is performed by the caller (idm_sotx_thread).
2548 2553 */
2549 2554 idb->idb_in_transport = B_FALSE;
2550 2555 idm_task_rele(idt);
2551 2556 idm_buf_free(idb);
2552 2557 }
2553 2558
2554 2559 static idm_status_t
2555 2560 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2556 2561 uint32_t buf_region_offset, uint32_t buf_region_length)
2557 2562 {
2558 2563 idm_conn_t *ic;
2559 2564 uint32_t max_dataseglen;
2560 2565 size_t remainder, chunk;
2561 2566 uint32_t data_offset = buf_region_offset;
2562 2567 iscsi_data_hdr_t *bhs;
2563 2568 idm_pdu_t *pdu;
2564 2569 idm_status_t tx_status;
2565 2570
2566 2571 ASSERT(mutex_owned(&idt->idt_mutex));
2567 2572
2568 2573 ic = idt->idt_ic;
2569 2574
2570 2575 max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2571 2576 remainder = buf_region_length;
2572 2577
2573 2578 while (remainder) {
2574 2579 if (idt->idt_state != TASK_ACTIVE) {
2575 2580 ASSERT((idt->idt_state != TASK_IDLE) &&
2576 2581 (idt->idt_state != TASK_COMPLETE));
2577 2582 return (IDM_STATUS_ABORTED);
2578 2583 }
2579 2584
2580 2585 /* check to see if we need to chunk the data */
2581 2586 if (remainder > max_dataseglen) {
2582 2587 chunk = max_dataseglen;
2583 2588 } else {
2584 2589 chunk = remainder;
2585 2590 }
2586 2591
2587 2592 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2588 2593 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2589 2594 pdu->isp_ic = ic;
2590 2595 pdu->isp_flags = 0; /* initialize isp_flags */
2591 2596
2592 2597 /*
2593 2598 * We've already built a build a header template
2594 2599 * to use during the transfer. Use this template so that
2595 2600 * the SN values stay consistent with any unrelated PDU's
2596 2601 * being transmitted.
2597 2602 */
2598 2603 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2599 2604 sizeof (iscsi_hdr_t));
2600 2605
2601 2606 /*
2602 2607 * Set DataSN, data offset, and flags in BHS
2603 2608 * For the prototype build, A = 0, S = 0, U = 0
2604 2609 */
2605 2610 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2606 2611
2607 2612 bhs->datasn = htonl(idt->idt_exp_datasn++);
2608 2613
2609 2614 hton24(bhs->dlength, chunk);
2610 2615 bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2611 2616
2612 2617 /* setup data */
2613 2618 pdu->isp_data = (uint8_t *)idb->idb_buf + data_offset;
2614 2619 pdu->isp_datalen = (uint_t)chunk;
2615 2620
2616 2621 if (chunk == remainder) {
2617 2622 bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2618 2623 /* Piggyback the status with the last data PDU */
2619 2624 if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2620 2625 pdu->isp_flags |= IDM_PDU_SET_STATSN |
2621 2626 IDM_PDU_ADVANCE_STATSN;
2622 2627 (*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2623 2628 (idt, pdu);
2624 2629 idt->idt_flags |=
2625 2630 IDM_TASK_PHASECOLLAPSE_SUCCESS;
2626 2631
2627 2632 }
2628 2633 }
2629 2634
2630 2635 remainder -= chunk;
2631 2636 data_offset += chunk;
2632 2637
2633 2638 /* Instrument the data-send DTrace probe. */
2634 2639 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2635 2640 DTRACE_ISCSI_2(data__send,
2636 2641 idm_conn_t *, idt->idt_ic,
2637 2642 iscsi_data_rsp_hdr_t *,
2638 2643 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2639 2644 }
2640 2645
2641 2646 /*
2642 2647 * Now that we're done working with idt_exp_datasn,
2643 2648 * idt->idt_state and idb->idb_bufoffset we can release
2644 2649 * the task lock -- don't want to hold it across the
2645 2650 * call to idm_i_so_tx since we could block.
2646 2651 */
2647 2652 mutex_exit(&idt->idt_mutex);
2648 2653
2649 2654 /*
2650 2655 * Transmit the PDU. Call the internal routine directly
2651 2656 * as there is already implicit ordering.
2652 2657 */
2653 2658 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2654 2659 mutex_enter(&idt->idt_mutex);
2655 2660 return (tx_status);
2656 2661 }
2657 2662
2658 2663 mutex_enter(&idt->idt_mutex);
2659 2664 idt->idt_tx_bytes += chunk;
2660 2665 }
2661 2666
2662 2667 return (IDM_STATUS_SUCCESS);
2663 2668 }
2664 2669
2665 2670 /*
2666 2671 * TX PDU cache
2667 2672 */
2668 2673 /* ARGSUSED */
2669 2674 int
2670 2675 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2671 2676 {
2672 2677 idm_pdu_t *pdu = hdl;
2673 2678
2674 2679 bzero(pdu, sizeof (idm_pdu_t));
2675 2680 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2676 2681 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2677 2682 pdu->isp_callback = idm_sotx_cache_pdu_cb;
2678 2683 pdu->isp_magic = IDM_PDU_MAGIC;
2679 2684 bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2680 2685
2681 2686 return (0);
2682 2687 }
2683 2688
2684 2689 /* ARGSUSED */
2685 2690 void
2686 2691 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2687 2692 {
2688 2693 /* reset values between use */
2689 2694 pdu->isp_datalen = 0;
2690 2695
2691 2696 kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2692 2697 }
2693 2698
2694 2699 /*
2695 2700 * RX PDU cache
2696 2701 */
2697 2702 /* ARGSUSED */
2698 2703 int
2699 2704 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2700 2705 {
2701 2706 idm_pdu_t *pdu = hdl;
2702 2707
2703 2708 bzero(pdu, sizeof (idm_pdu_t));
2704 2709 pdu->isp_magic = IDM_PDU_MAGIC;
2705 2710 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2706 2711 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2707 2712
2708 2713 return (0);
2709 2714 }
2710 2715
2711 2716 /* ARGSUSED */
2712 2717 static void
2713 2718 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2714 2719 {
2715 2720 pdu->isp_iovlen = 0;
2716 2721 pdu->isp_sorx_buf = 0;
2717 2722 kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2718 2723 }
2719 2724
2720 2725 static void
2721 2726 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2722 2727 {
2723 2728 /*
2724 2729 * We had to modify our cached RX PDU with a longer header buffer
2725 2730 * and/or a longer data buffer. Release the new buffers and fix
2726 2731 * the fields back to what we would expect for a cached RX PDU.
2727 2732 */
2728 2733 if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2729 2734 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2730 2735 }
2731 2736 if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2732 2737 kmem_free(pdu->isp_data, pdu->isp_datalen);
2733 2738 }
2734 2739 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2735 2740 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2736 2741 pdu->isp_data = NULL;
2737 2742 pdu->isp_datalen = 0;
2738 2743 pdu->isp_sorx_buf = 0;
2739 2744 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2740 2745 idm_sorx_cache_pdu_cb(pdu, status);
2741 2746 }
2742 2747
2743 2748 /*
2744 2749 * This thread is only active when I/O is queued for transmit
2745 2750 * because the socket is busy.
2746 2751 */
2747 2752 void
2748 2753 idm_sotx_thread(void *arg)
2749 2754 {
2750 2755 idm_conn_t *ic = arg;
2751 2756 idm_tx_obj_t *object, *next;
2752 2757 idm_so_conn_t *so_conn;
2753 2758 idm_status_t status = IDM_STATUS_SUCCESS;
2754 2759
2755 2760 idm_conn_hold(ic);
2756 2761
2757 2762 mutex_enter(&ic->ic_mutex);
2758 2763 so_conn = ic->ic_transport_private;
2759 2764 so_conn->ic_tx_thread_running = B_TRUE;
2760 2765 so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2761 2766 cv_signal(&ic->ic_cv);
2762 2767 mutex_exit(&ic->ic_mutex);
2763 2768
2764 2769 mutex_enter(&so_conn->ic_tx_mutex);
2765 2770
2766 2771 while (so_conn->ic_tx_thread_running) {
2767 2772 while (list_is_empty(&so_conn->ic_tx_list)) {
2768 2773 DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2769 2774 cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2770 2775 DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2771 2776
2772 2777 if (!so_conn->ic_tx_thread_running) {
2773 2778 goto tx_bail;
2774 2779 }
2775 2780 }
2776 2781
2777 2782 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2778 2783 list_remove(&so_conn->ic_tx_list, object);
2779 2784 mutex_exit(&so_conn->ic_tx_mutex);
2780 2785
2781 2786 switch (object->idm_tx_obj_magic) {
2782 2787 case IDM_PDU_MAGIC: {
2783 2788 idm_pdu_t *pdu = (idm_pdu_t *)object;
2784 2789 DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2785 2790 idm_pdu_t *, (idm_pdu_t *)object);
2786 2791
2787 2792 if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2788 2793 /* No IDM task */
2789 2794 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2790 2795 }
2791 2796 status = idm_i_so_tx((idm_pdu_t *)object);
2792 2797 break;
2793 2798 }
2794 2799 case IDM_BUF_MAGIC: {
2795 2800 idm_buf_t *idb = (idm_buf_t *)object;
2796 2801 idm_task_t *idt = idb->idb_task_binding;
2797 2802
2798 2803 DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2799 2804 idm_buf_t *, idb);
2800 2805
2801 2806 mutex_enter(&idt->idt_mutex);
2802 2807 status = idm_so_send_buf_region(idt,
2803 2808 idb, 0, idb->idb_xfer_len);
2804 2809
2805 2810 /*
2806 2811 * TX thread owns the buffer so we expect it to
2807 2812 * be "in transport"
2808 2813 */
2809 2814 ASSERT(idb->idb_in_transport);
2810 2815 if (IDM_CONN_ISTGT(ic)) {
2811 2816 /*
2812 2817 * idm_buf_tx_to_ini_done releases
2813 2818 * idt->idt_mutex
2814 2819 */
2815 2820 DTRACE_ISCSI_8(xfer__done,
2816 2821 idm_conn_t *, idt->idt_ic,
2817 2822 uintptr_t, idb->idb_buf,
2818 2823 uint32_t, idb->idb_bufoffset,
2819 2824 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2820 2825 uint32_t, idb->idb_xfer_len,
2821 2826 int, XFER_BUF_TX_TO_INI);
2822 2827 idm_buf_tx_to_ini_done(idt, idb, status);
2823 2828 } else {
2824 2829 idm_so_send_rtt_data_done(idt, idb);
2825 2830 mutex_exit(&idt->idt_mutex);
2826 2831 }
2827 2832 break;
2828 2833 }
2829 2834
2830 2835 default:
2831 2836 IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2832 2837 "(0x%08x)", object->idm_tx_obj_magic);
2833 2838 status = IDM_STATUS_FAIL;
2834 2839 }
2835 2840
2836 2841 mutex_enter(&so_conn->ic_tx_mutex);
2837 2842
2838 2843 if (status != IDM_STATUS_SUCCESS) {
2839 2844 so_conn->ic_tx_thread_running = B_FALSE;
2840 2845 idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2841 2846 }
2842 2847 }
2843 2848
2844 2849 /*
2845 2850 * Before we leave, we need to abort every item remaining in the
2846 2851 * TX list.
2847 2852 */
2848 2853
2849 2854 tx_bail:
2850 2855 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2851 2856
2852 2857 while (object != NULL) {
2853 2858 next = list_next(&so_conn->ic_tx_list, object);
2854 2859
2855 2860 list_remove(&so_conn->ic_tx_list, object);
2856 2861 switch (object->idm_tx_obj_magic) {
2857 2862 case IDM_PDU_MAGIC:
2858 2863 idm_pdu_complete((idm_pdu_t *)object,
2859 2864 IDM_STATUS_ABORTED);
2860 2865 break;
2861 2866
2862 2867 case IDM_BUF_MAGIC: {
2863 2868 idm_buf_t *idb = (idm_buf_t *)object;
2864 2869 idm_task_t *idt = idb->idb_task_binding;
2865 2870 mutex_exit(&so_conn->ic_tx_mutex);
2866 2871 mutex_enter(&idt->idt_mutex);
2867 2872 /*
2868 2873 * TX thread owns the buffer so we expect it to
2869 2874 * be "in transport"
2870 2875 */
2871 2876 ASSERT(idb->idb_in_transport);
2872 2877 if (IDM_CONN_ISTGT(ic)) {
2873 2878 /*
2874 2879 * idm_buf_tx_to_ini_done releases
2875 2880 * idt->idt_mutex
2876 2881 */
2877 2882 DTRACE_ISCSI_8(xfer__done,
2878 2883 idm_conn_t *, idt->idt_ic,
2879 2884 uintptr_t, idb->idb_buf,
2880 2885 uint32_t, idb->idb_bufoffset,
2881 2886 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2882 2887 uint32_t, idb->idb_xfer_len,
2883 2888 int, XFER_BUF_TX_TO_INI);
2884 2889 idm_buf_tx_to_ini_done(idt, idb,
2885 2890 IDM_STATUS_ABORTED);
2886 2891 } else {
2887 2892 idm_so_send_rtt_data_done(idt, idb);
2888 2893 mutex_exit(&idt->idt_mutex);
2889 2894 }
2890 2895 mutex_enter(&so_conn->ic_tx_mutex);
2891 2896 break;
2892 2897 }
2893 2898 default:
2894 2899 IDM_CONN_LOG(CE_WARN,
2895 2900 "idm_sotx_thread: Unexpected magic "
2896 2901 "(0x%08x)", object->idm_tx_obj_magic);
2897 2902 }
2898 2903
2899 2904 object = next;
2900 2905 }
2901 2906
2902 2907 mutex_exit(&so_conn->ic_tx_mutex);
2903 2908 idm_conn_rele(ic);
2904 2909 thread_exit();
2905 2910 /*NOTREACHED*/
2906 2911 }
2907 2912
2908 2913 static void
2909 2914 idm_so_socket_set_nonblock(struct sonode *node)
2910 2915 {
2911 2916 (void) VOP_SETFL(node->so_vnode, node->so_flag,
2912 2917 (node->so_state | FNONBLOCK), CRED(), NULL);
2913 2918 }
2914 2919
2915 2920 static void
2916 2921 idm_so_socket_set_block(struct sonode *node)
2917 2922 {
2918 2923 (void) VOP_SETFL(node->so_vnode, node->so_flag,
2919 2924 (node->so_state & (~FNONBLOCK)), CRED(), NULL);
2920 2925 }
2921 2926
2922 2927
2923 2928 /*
2924 2929 * Called by kernel sockets when the connection has been accepted or
2925 2930 * rejected. In early volo, a "disconnect" callback was sent instead of
2926 2931 * "connectfailed", so we check for both.
2927 2932 */
2928 2933 /* ARGSUSED */
2929 2934 void
2930 2935 idm_so_timed_socket_connect_cb(ksocket_t ks,
2931 2936 ksocket_callback_event_t ev, void *arg, uintptr_t info)
2932 2937 {
2933 2938 idm_so_timed_socket_t *itp = arg;
2934 2939 ASSERT(itp != NULL);
2935 2940 ASSERT(ev == KSOCKET_EV_CONNECTED ||
2936 2941 ev == KSOCKET_EV_CONNECTFAILED ||
2937 2942 ev == KSOCKET_EV_DISCONNECTED);
2938 2943
2939 2944 mutex_enter(&idm_so_timed_socket_mutex);
2940 2945 itp->it_callback_called = B_TRUE;
2941 2946 if (ev == KSOCKET_EV_CONNECTED) {
2942 2947 itp->it_socket_error_code = 0;
2943 2948 } else {
2944 2949 /* Make sure the error code is non-zero on error */
2945 2950 if (info == 0)
2946 2951 info = ECONNRESET;
2947 2952 itp->it_socket_error_code = (int)info;
2948 2953 }
2949 2954 cv_signal(&itp->it_cv);
2950 2955 mutex_exit(&idm_so_timed_socket_mutex);
2951 2956 }
2952 2957
2953 2958 int
2954 2959 idm_so_timed_socket_connect(ksocket_t ks,
2955 2960 struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
2956 2961 {
2957 2962 clock_t conn_login_max;
2958 2963 int rc, nonblocking, rval;
2959 2964 idm_so_timed_socket_t it;
2960 2965 ksocket_callbacks_t ks_cb;
2961 2966
2962 2967 conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
2963 2968
2964 2969 /*
2965 2970 * Set to non-block socket mode, with callback on connect
2966 2971 * Early volo used "disconnected" instead of "connectfailed",
2967 2972 * so set callback to look for both.
2968 2973 */
2969 2974 bzero(&it, sizeof (it));
2970 2975 ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
2971 2976 KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
2972 2977 ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
2973 2978 ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
2974 2979 ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
2975 2980 cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
2976 2981 rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
2977 2982 if (rc != 0)
2978 2983 return (rc);
2979 2984
2980 2985 /* Set to non-blocking mode */
2981 2986 nonblocking = 1;
2982 2987 rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
2983 2988 CRED());
2984 2989 if (rc != 0)
2985 2990 goto cleanup;
2986 2991
2987 2992 bzero(&it, sizeof (it));
2988 2993 for (;;) {
2989 2994 /*
2990 2995 * Warning -- in a loopback scenario, the call to
2991 2996 * the connect_cb can occur inside the call to
2992 2997 * ksocket_connect. Do not hold the mutex around the
2993 2998 * call to ksocket_connect.
2994 2999 */
2995 3000 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
2996 3001 if (rc == 0 || rc == EISCONN) {
2997 3002 /* socket success or already success */
2998 3003 rc = 0;
2999 3004 break;
3000 3005 }
3001 3006 if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3002 3007 break;
3003 3008 }
3004 3009
3005 3010 /* TCP connect still in progress. See if out of time. */
3006 3011 if (ddi_get_lbolt() > conn_login_max) {
3007 3012 /*
3008 3013 * Connection retry timeout,
3009 3014 * failed connect to target.
3010 3015 */
3011 3016 rc = ETIMEDOUT;
3012 3017 break;
3013 3018 }
3014 3019
3015 3020 /*
3016 3021 * TCP connect still in progress. Sleep until callback.
3017 3022 * Do NOT go to sleep if the callback already occurred!
3018 3023 */
3019 3024 mutex_enter(&idm_so_timed_socket_mutex);
3020 3025 if (!it.it_callback_called) {
3021 3026 (void) cv_timedwait(&it.it_cv,
3022 3027 &idm_so_timed_socket_mutex, conn_login_max);
3023 3028 }
3024 3029 if (it.it_callback_called) {
3025 3030 rc = it.it_socket_error_code;
3026 3031 mutex_exit(&idm_so_timed_socket_mutex);
3027 3032 break;
3028 3033 }
3029 3034 /* If timer expires, go call ksocket_connect one last time. */
3030 3035 mutex_exit(&idm_so_timed_socket_mutex);
3031 3036 }
3032 3037
3033 3038 /* resume blocking mode */
3034 3039 nonblocking = 0;
3035 3040 (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3036 3041 CRED());
3037 3042 cleanup:
3038 3043 (void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3039 3044 cv_destroy(&it.it_cv);
3040 3045 if (rc != 0) {
3041 3046 idm_soshutdown(ks);
3042 3047 }
3043 3048 return (rc);
3044 3049 }
3045 3050
3046 3051
3047 3052 void
3048 3053 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3049 3054 {
3050 3055 int dp_addr_size;
3051 3056 struct sockaddr_in *sin;
3052 3057 struct sockaddr_in6 *sin6;
3053 3058
3054 3059 /* Build sockaddr_storage for this portal (idm_addr_t) */
3055 3060 bzero(sa, sizeof (*sa));
3056 3061 dp_addr_size = dportal->a_addr.i_insize;
3057 3062 if (dp_addr_size == sizeof (struct in_addr)) {
3058 3063 /* IPv4 */
3059 3064 sa->ss_family = AF_INET;
3060 3065 sin = (struct sockaddr_in *)sa;
3061 3066 sin->sin_port = htons(dportal->a_port);
3062 3067 bcopy(&dportal->a_addr.i_addr.in4,
3063 3068 &sin->sin_addr, sizeof (struct in_addr));
3064 3069 } else if (dp_addr_size == sizeof (struct in6_addr)) {
3065 3070 /* IPv6 */
3066 3071 sa->ss_family = AF_INET6;
3067 3072 sin6 = (struct sockaddr_in6 *)sa;
3068 3073 sin6->sin6_port = htons(dportal->a_port);
3069 3074 bcopy(&dportal->a_addr.i_addr.in6,
3070 3075 &sin6->sin6_addr, sizeof (struct in6_addr));
3071 3076 } else {
3072 3077 ASSERT(0);
3073 3078 }
3074 3079 }
3075 3080
3076 3081
3077 3082 /*
3078 3083 * return a human-readable form of a sockaddr_storage, in the form
3079 3084 * [ip-address]:port. This is used in calls to logging functions.
3080 3085 * If several calls to idm_sa_ntop are made within the same invocation
3081 3086 * of a logging function, then each one needs its own buf.
3082 3087 */
3083 3088 const char *
3084 3089 idm_sa_ntop(const struct sockaddr_storage *sa,
3085 3090 char *buf, size_t size)
3086 3091 {
3087 3092 static const char bogus_ip[] = "[0].-1";
3088 3093 char tmp[INET6_ADDRSTRLEN];
3089 3094
3090 3095 switch (sa->ss_family) {
3091 3096 case AF_INET6:
3092 3097 {
3093 3098 const struct sockaddr_in6 *in6 =
3094 3099 (const struct sockaddr_in6 *) sa;
3095 3100
3096 3101 if (inet_ntop(in6->sin6_family,
3097 3102 &in6->sin6_addr, tmp, sizeof (tmp)) == NULL) {
3098 3103 goto err;
3099 3104 }
3100 3105 if (strlen(tmp) + sizeof ("[].65535") > size) {
3101 3106 goto err;
3102 3107 }
3103 3108 /* struct sockaddr_storage gets port info from v4 loc */
3104 3109 (void) snprintf(buf, size, "[%s].%u", tmp,
3105 3110 ntohs(in6->sin6_port));
3106 3111 return (buf);
3107 3112 }
3108 3113 case AF_INET:
3109 3114 {
3110 3115 const struct sockaddr_in *in =
3111 3116 (const struct sockaddr_in *) sa;
3112 3117
3113 3118 if (inet_ntop(in->sin_family, &in->sin_addr,
3114 3119 tmp, sizeof (tmp)) == NULL) {
3115 3120 goto err;
3116 3121 }
3117 3122 if (strlen(tmp) + sizeof ("[].65535") > size) {
3118 3123 goto err;
3119 3124 }
3120 3125 (void) snprintf(buf, size, "[%s].%u", tmp,
3121 3126 ntohs(in->sin_port));
3122 3127 return (buf);
3123 3128 }
3124 3129 default:
3125 3130 break;
3126 3131 }
3127 3132 err:
3128 3133 (void) snprintf(buf, size, "%s", bogus_ip);
3129 3134 return (buf);
3130 3135 }
↓ open down ↓ |
1865 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX