Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/rsm/rsm.c
+++ new/usr/src/uts/common/io/rsm/rsm.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 * Copyright 2012 Milan Jurik. All rights reserved.
25 25 */
26 26
27 27
28 28 /*
29 29 * Overview of the RSM Kernel Agent:
30 30 * ---------------------------------
31 31 *
32 32 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
33 33 * kernel agent is a pseudo device driver which makes use of the RSMPI
34 34 * interface on behalf of the RSMAPI user library.
35 35 *
36 36 * The kernel agent functionality can be categorized into the following
37 37 * components:
38 38 * 1. Driver Infrastructure
39 39 * 2. Export/Import Segment Management
40 40 * 3. Internal resource allocation/deallocation
41 41 *
42 42 * The driver infrastructure includes the basic module loading entry points
43 43 * like _init, _info, _fini to load, unload and report information about
44 44 * the driver module. The driver infrastructure also includes the
45 45 * autoconfiguration entry points namely, attach, detach and getinfo for
46 46 * the device autoconfiguration.
47 47 *
48 48 * The kernel agent is a pseudo character device driver and exports
49 49 * a cb_ops structure which defines the driver entry points for character
50 50 * device access. This includes the open and close entry points. The
51 51 * other entry points provided include ioctl, devmap and segmap and chpoll.
52 52 * read and write entry points are not used since the device is memory
53 53 * mapped. Also ddi_prop_op is used for the prop_op entry point.
54 54 *
55 55 * The ioctl entry point supports a number of commands, which are used by
56 56 * the RSMAPI library in order to export and import segments. These
57 57 * commands include commands for binding and rebinding the physical pages
58 58 * allocated to the virtual address range, publishing the export segment,
59 59 * unpublishing and republishing an export segment, creating an
60 60 * import segment and a virtual connection from this import segment to
61 61 * an export segment, performing scatter-gather data transfer, barrier
62 62 * operations.
63 63 *
64 64 *
65 65 * Export and Import segments:
66 66 * ---------------------------
67 67 *
68 68 * In order to create an RSM export segment a process allocates a range in its
69 69 * virtual address space for the segment using standard Solaris interfaces.
70 70 * The process then calls RSMAPI, which in turn makes an ioctl call to the
71 71 * RSM kernel agent for an allocation of physical memory pages and for
72 72 * creation of the export segment by binding these pages to the virtual
73 73 * address range. These pages are locked in memory so that remote accesses
74 74 * are always applied to the correct page. Then the RSM segment is published,
75 75 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
76 76 * is assigned to it.
77 77 *
78 78 * In order to import a published RSM segment, RSMAPI creates an import
79 79 * segment and forms a virtual connection across the interconnect to the
80 80 * export segment, via an ioctl into the kernel agent with the connect
81 81 * command. The import segment setup is completed by mapping the
82 82 * local device memory into the importers virtual address space. The
83 83 * mapping of the import segment is handled by the segmap/devmap
84 84 * infrastructure described as follows.
85 85 *
86 86 * Segmap and Devmap interfaces:
87 87 *
88 88 * The RSM kernel agent allows device memory to be directly accessed by user
89 89 * threads via memory mapping. In order to do so, the RSM kernel agent
90 90 * supports the devmap and segmap entry points.
91 91 *
92 92 * The segmap entry point(rsm_segmap) is responsible for setting up a memory
93 93 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
94 94 * responsible for exporting the device memory to the user applications.
95 95 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
96 96 * control is transfered to the devmap_setup call which calls rsm_devmap.
97 97 *
98 98 * rsm_devmap validates the user mapping to the device or kernel memory
99 99 * and passes the information to the system for setting up the mapping. The
100 100 * actual setting up of the mapping is done by devmap_devmem_setup(for
101 101 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
102 102 * registered for device context management via the devmap_devmem_setup
103 103 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
104 104 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
105 105 * is created, a mapping is freed, a mapping is accessed or an existing
106 106 * mapping is duplicated respectively. These callbacks allow the RSM kernel
107 107 * agent to maintain state information associated with the mappings.
108 108 * The state information is mainly in the form of a cookie list for the import
109 109 * segment for which mapping has been done.
110 110 *
111 111 * Forced disconnect of import segments:
112 112 *
113 113 * When an exported segment is unpublished, the exporter sends a forced
114 114 * disconnect message to all its importers. The importer segments are
115 115 * unloaded and disconnected. This involves unloading the original
116 116 * mappings and remapping to a preallocated kernel trash page. This is
117 117 * done by devmap_umem_remap. The trash/dummy page is a kernel page,
118 118 * preallocated by the kernel agent during attach using ddi_umem_alloc with
119 119 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
120 120 * due to unloading of the original mappings.
121 121 *
122 122 * Additionally every segment has a mapping generation number associated
123 123 * with it. This is an entry in the barrier generation page, created
124 124 * during attach time. This mapping generation number for the import
125 125 * segments is incremented on a force disconnect to notify the application
126 126 * of the force disconnect. On this notification, the application needs
127 127 * to reconnect the segment to establish a new legitimate mapping.
128 128 *
129 129 *
130 130 * Locks used in the kernel agent:
131 131 * -------------------------------
132 132 *
133 133 * The kernel agent uses a variety of mutexes and condition variables for
134 134 * mutual exclusion of the shared data structures and for synchronization
135 135 * between the various threads. Some of the locks are described as follows.
136 136 *
137 137 * Each resource structure, which represents either an export/import segment
138 138 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
139 139 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
140 140 * rsmseglock_acquire and rsmseglock_release macros. An additional
141 141 * lock called the rsmsi_lock is used for the shared import data structure
142 142 * that is relevant for resources representing import segments. There is
143 143 * also a condition variable associated with the resource called s_cv. This
144 144 * is used to wait for events like the segment state change etc.
145 145 *
146 146 * The resource structures are allocated from a pool of resource structures,
147 147 * called rsm_resource. This pool is protected via a reader-writer lock,
148 148 * called rsmrc_lock.
149 149 *
150 150 * There are two separate hash tables, one for the export segments and
151 151 * one for the import segments. The export segments are inserted into the
152 152 * export segment hash table only after they have been published and the
153 153 * import segments are inserted in the import segments list only after they
154 154 * have successfully connected to an exported segment. These tables are
155 155 * protected via reader-writer locks.
156 156 *
157 157 * Debug Support in the kernel agent:
158 158 * ----------------------------------
159 159 *
160 160 * Debugging support in the kernel agent is provided by the following
161 161 * macros.
162 162 *
163 163 * DBG_PRINTF((category, level, message)) is a macro which logs a debug
164 164 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
165 165 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
166 166 * on the definition of the category and level. All messages that belong to
167 167 * the specified category(rsmdbg_category) and are of an equal or greater
168 168 * severity than the specified level(rsmdbg_level) are logged. The message
169 169 * is a string which uses the same formatting rules as the strings used in
170 170 * printf.
171 171 *
172 172 * The category defines which component of the kernel agent has logged this
173 173 * message. There are a number of categories that have been defined such as
174 174 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
175 175 * DBG_ADDCATEGORY is used to add in another category to the currently
176 176 * specified category value so that the component using this new category
177 177 * can also effectively log debug messages. Thus, the category of a specific
178 178 * message is some combination of the available categories and we can define
179 179 * sub-categories if we want a finer level of granularity.
180 180 *
181 181 * The level defines the severity of the message. Different level values are
182 182 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
183 183 * the least severe(debug level is 0).
184 184 *
185 185 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
186 186 * variable or a string respectively.
187 187 *
188 188 *
189 189 * NOTES:
190 190 *
191 191 * Special Fork and Exec Handling:
192 192 * -------------------------------
193 193 *
194 194 * The backing physical pages of an exported segment are always locked down.
195 195 * Thus, there are two cases in which a process having exported segments
196 196 * will cause a cpu to hang: (1) the process invokes exec; (2) a process
197 197 * forks and invokes exit before the duped file descriptors for the export
198 198 * segments are closed in the child process. The hang is caused because the
199 199 * address space release algorithm in Solaris VM subsystem is based on a
200 200 * non-blocking loop which does not terminate while segments are locked
201 201 * down. In addition to this, Solaris VM subsystem lacks a callback
202 202 * mechanism to the rsm kernel agent to allow unlocking these export
203 203 * segment pages.
204 204 *
205 205 * In order to circumvent this problem, the kernel agent does the following.
206 206 * The Solaris VM subsystem keeps memory segments in increasing order of
207 207 * virtual addressses. Thus a special page(special_exit_offset) is allocated
208 208 * by the kernel agent and is mmapped into the heap area of the process address
209 209 * space(the mmap is done by the RSMAPI library). During the mmap processing
210 210 * of this special page by the devmap infrastructure, a callback(the same
211 211 * devmap context management callbacks discussed above) is registered for an
212 212 * unmap.
213 213 *
214 214 * As discussed above, this page is processed by the Solaris address space
215 215 * release code before any of the exported segments pages(which are allocated
216 216 * from high memory). It is during this processing that the unmap callback gets
217 217 * called and this callback is responsible for force destroying the exported
218 218 * segments and thus eliminating the problem of locked pages.
219 219 *
220 220 * Flow-control:
221 221 * ------------
222 222 *
223 223 * A credit based flow control algorithm is used for messages whose
224 224 * processing cannot be done in the interrupt context because it might
225 225 * involve invoking rsmpi calls, or might take a long time to complete
226 226 * or might need to allocate resources. The algorithm operates on a per
227 227 * path basis. To send a message the pathend needs to have a credit and
228 228 * it consumes one for every message that is flow controlled. On the
229 229 * receiving pathend the message is put on a msgbuf_queue and a task is
230 230 * dispatched on the worker thread - recv_taskq where it is processed.
231 231 * After processing the message, the receiving pathend dequeues the message,
232 232 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
233 233 * credits to the sender pathend.
234 234 *
235 235 * RSM_DRTEST:
236 236 * -----------
237 237 *
238 238 * This is used to enable the DR testing using a test driver on test
239 239 * platforms which do not supported DR.
240 240 *
241 241 */
242 242
243 243 #include <sys/types.h>
244 244 #include <sys/param.h>
245 245 #include <sys/user.h>
246 246 #include <sys/buf.h>
247 247 #include <sys/systm.h>
248 248 #include <sys/cred.h>
249 249 #include <sys/vm.h>
250 250 #include <sys/uio.h>
251 251 #include <vm/seg.h>
252 252 #include <vm/page.h>
253 253 #include <sys/stat.h>
254 254
255 255 #include <sys/time.h>
256 256 #include <sys/errno.h>
257 257
258 258 #include <sys/file.h>
259 259 #include <sys/uio.h>
260 260 #include <sys/proc.h>
261 261 #include <sys/mman.h>
262 262 #include <sys/open.h>
263 263 #include <sys/atomic.h>
264 264 #include <sys/mem_config.h>
265 265
266 266
267 267 #include <sys/ddi.h>
268 268 #include <sys/devops.h>
269 269 #include <sys/ddidevmap.h>
270 270 #include <sys/sunddi.h>
271 271 #include <sys/esunddi.h>
272 272 #include <sys/ddi_impldefs.h>
273 273
274 274 #include <sys/kmem.h>
275 275 #include <sys/conf.h>
276 276 #include <sys/devops.h>
277 277 #include <sys/ddi_impldefs.h>
278 278
279 279 #include <sys/modctl.h>
280 280
281 281 #include <sys/policy.h>
282 282 #include <sys/types.h>
283 283 #include <sys/conf.h>
284 284 #include <sys/param.h>
285 285
286 286 #include <sys/taskq.h>
287 287
288 288 #include <sys/rsm/rsm_common.h>
289 289 #include <sys/rsm/rsmapi_common.h>
290 290 #include <sys/rsm/rsm.h>
291 291 #include <rsm_in.h>
292 292 #include <sys/rsm/rsmka_path_int.h>
293 293 #include <sys/rsm/rsmpi.h>
294 294
295 295 #include <sys/modctl.h>
296 296 #include <sys/debug.h>
297 297
298 298 #include <sys/tuneable.h>
299 299
300 300 #ifdef RSM_DRTEST
301 301 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
302 302 void *arg);
303 303 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
304 304 void *arg);
305 305 #endif
306 306
307 307 extern void dbg_printf(int category, int level, char *fmt, ...);
308 308 extern void rsmka_pathmanager_init();
309 309 extern void rsmka_pathmanager_cleanup();
310 310 extern void rele_sendq_token(sendq_token_t *);
311 311 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
312 312 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
313 313 extern int rsmka_topology_ioctl(caddr_t, int, int);
314 314
315 315 extern pri_t maxclsyspri;
316 316 extern work_queue_t work_queue;
317 317 extern kmutex_t ipc_info_lock;
318 318 extern kmutex_t ipc_info_cvlock;
319 319 extern kcondvar_t ipc_info_cv;
320 320 extern kmutex_t path_hold_cvlock;
321 321 extern kcondvar_t path_hold_cv;
322 322
323 323 extern kmutex_t rsmka_buf_lock;
324 324
325 325 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
326 326 extern adapter_t *rsmka_lookup_adapter(char *, int);
327 327 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
328 328 extern boolean_t rsmka_do_path_active(path_t *, int);
329 329 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
330 330 extern void rsmka_release_adapter(adapter_t *);
331 331 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
332 332 extern void rsmka_dequeue_msgbuf(path_t *path);
333 333 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
334 334 /* lint -w2 */
335 335
336 336 static int rsm_open(dev_t *, int, int, cred_t *);
337 337 static int rsm_close(dev_t, int, int, cred_t *);
338 338 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
339 339 cred_t *credp, int *rvalp);
340 340 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
341 341 uint_t);
342 342 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
343 343 uint_t, uint_t, cred_t *);
344 344 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
345 345 struct pollhead **phpp);
346 346
347 347 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
348 348 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
349 349 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
350 350
351 351 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
352 352 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
353 353 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
354 354 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
355 355 rsm_permission_t);
356 356 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
357 357 static void rsmacl_free(rsmapi_access_entry_t *, int);
358 358 static void rsmpiacl_free(rsm_access_entry_t *, int);
359 359
360 360 static int rsm_inc_pgcnt(pgcnt_t);
361 361 static void rsm_dec_pgcnt(pgcnt_t);
362 362 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
363 363 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
364 364 size_t *);
365 365 static void exporter_quiesce();
366 366 static void rsmseg_suspend(rsmseg_t *, int *);
367 367 static void rsmsegshare_suspend(rsmseg_t *);
368 368 static int rsmseg_resume(rsmseg_t *, void **);
369 369 static int rsmsegshare_resume(rsmseg_t *);
370 370
371 371 static struct cb_ops rsm_cb_ops = {
372 372 rsm_open, /* open */
373 373 rsm_close, /* close */
374 374 nodev, /* strategy */
375 375 nodev, /* print */
376 376 nodev, /* dump */
377 377 nodev, /* read */
378 378 nodev, /* write */
379 379 rsm_ioctl, /* ioctl */
380 380 rsm_devmap, /* devmap */
381 381 NULL, /* mmap */
382 382 rsm_segmap, /* segmap */
383 383 rsm_chpoll, /* poll */
384 384 ddi_prop_op, /* cb_prop_op */
385 385 0, /* streamtab */
386 386 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */
387 387 0,
388 388 0,
389 389 0
390 390 };
391 391
392 392 static struct dev_ops rsm_ops = {
393 393 DEVO_REV, /* devo_rev, */
394 394 0, /* refcnt */
395 395 rsm_info, /* get_dev_info */
396 396 nulldev, /* identify */
397 397 nulldev, /* probe */
398 398 rsm_attach, /* attach */
399 399 rsm_detach, /* detach */
400 400 nodev, /* reset */
401 401 &rsm_cb_ops, /* driver operations */
402 402 (struct bus_ops *)0, /* bus operations */
403 403 0,
404 404 ddi_quiesce_not_needed, /* quiesce */
405 405 };
406 406
407 407 /*
408 408 * Module linkage information for the kernel.
↓ open down ↓ |
408 lines elided |
↑ open up ↑ |
409 409 */
410 410
411 411 static struct modldrv modldrv = {
412 412 &mod_driverops, /* Type of module. This one is a pseudo driver */
413 413 "Remote Shared Memory Driver",
414 414 &rsm_ops, /* driver ops */
415 415 };
416 416
417 417 static struct modlinkage modlinkage = {
418 418 MODREV_1,
419 - (void *)&modldrv,
420 - 0,
421 - 0,
422 - 0
419 + { (void *)&modldrv, NULL }
423 420 };
424 421
425 422 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
426 423 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
427 424 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
428 425
429 426 static kphysm_setup_vector_t rsm_dr_callback_vec = {
430 427 KPHYSM_SETUP_VECTOR_VERSION,
431 428 rsm_dr_callback_post_add,
432 429 rsm_dr_callback_pre_del,
433 430 rsm_dr_callback_post_del
434 431 };
435 432
436 433 /* This flag can be changed to 0 to help with PIT testing */
437 434 int rsmka_modunloadok = 1;
438 435 int no_reply_cnt = 0;
439 436
440 437 uint64_t rsm_ctrlmsg_errcnt = 0;
441 438 uint64_t rsm_ipcsend_errcnt = 0;
442 439
443 440 #define MAX_NODES 64
444 441
445 442 static struct rsm_driver_data rsm_drv_data;
446 443 static struct rsmresource_table rsm_resource;
447 444
448 445 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
449 446 static void rsmresource_destroy(void);
450 447 static int rsmresource_alloc(minor_t *);
451 448 static rsmresource_t *rsmresource_free(minor_t rnum);
452 449 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
453 450 static int rsm_unpublish(rsmseg_t *seg, int mode);
454 451 static int rsm_unbind(rsmseg_t *seg);
455 452 static uint_t rsmhash(rsm_memseg_id_t key);
456 453 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
457 454 static void rsmhash_free(rsmhash_table_t *rhash, int size);
458 455 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
459 456 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
460 457 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
461 458 void *cookie);
462 459 int rsm_disconnect(rsmseg_t *seg);
463 460 void rsmseg_unload(rsmseg_t *);
464 461 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
465 462
466 463 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
467 464 rsm_intr_q_op_t opcode, rsm_addr_t src,
468 465 void *data, size_t size, rsm_intr_hand_arg_t arg);
469 466
470 467 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
471 468
472 469 rsm_node_id_t my_nodeid;
473 470
474 471 /* cookie, va, offsets and length for the barrier */
475 472 static rsm_gnum_t *bar_va;
476 473 static ddi_umem_cookie_t bar_cookie;
477 474 static off_t barrier_offset;
478 475 static size_t barrier_size;
479 476 static int max_segs;
480 477
481 478 /* cookie for the trash memory */
482 479 static ddi_umem_cookie_t remap_cookie;
483 480
484 481 static rsm_memseg_id_t rsm_nextavail_segmentid;
485 482
486 483 extern taskq_t *work_taskq;
487 484 extern char *taskq_name;
488 485
489 486 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */
490 487
491 488 static rsmhash_table_t rsm_export_segs; /* list of exported segs */
492 489 rsmhash_table_t rsm_import_segs; /* list of imported segs */
493 490 static rsmhash_table_t rsm_event_queues; /* list of event queues */
494 491
495 492 static rsm_ipc_t rsm_ipc; /* ipc info */
496 493
497 494 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
498 495 static list_head_t rsm_suspend_list;
499 496
500 497 /* list of descriptors for remote importers */
501 498 static importers_table_t importer_list;
502 499
503 500 kmutex_t rsm_suspend_cvlock;
504 501 kcondvar_t rsm_suspend_cv;
505 502
506 503 static kmutex_t rsm_lock;
507 504
508 505 adapter_t loopback_adapter;
509 506 rsm_controller_attr_t loopback_attr;
510 507
511 508 int rsmipc_send_controlmsg(path_t *path, int msgtype);
512 509
513 510 void rsmka_init_loopback();
514 511
515 512 int rsmka_null_seg_create(
516 513 rsm_controller_handle_t,
517 514 rsm_memseg_export_handle_t *,
518 515 size_t,
519 516 uint_t,
520 517 rsm_memory_local_t *,
521 518 rsm_resource_callback_t,
522 519 rsm_resource_callback_arg_t);
523 520
524 521 int rsmka_null_seg_destroy(
525 522 rsm_memseg_export_handle_t);
526 523
527 524 int rsmka_null_bind(
528 525 rsm_memseg_export_handle_t,
529 526 off_t,
530 527 rsm_memory_local_t *,
531 528 rsm_resource_callback_t,
532 529 rsm_resource_callback_arg_t);
533 530
534 531 int rsmka_null_unbind(
535 532 rsm_memseg_export_handle_t,
536 533 off_t,
537 534 size_t);
538 535
539 536 int rsmka_null_rebind(
540 537 rsm_memseg_export_handle_t,
541 538 off_t,
542 539 rsm_memory_local_t *,
543 540 rsm_resource_callback_t,
544 541 rsm_resource_callback_arg_t);
545 542
546 543 int rsmka_null_publish(
547 544 rsm_memseg_export_handle_t,
548 545 rsm_access_entry_t [],
549 546 uint_t,
550 547 rsm_memseg_id_t,
551 548 rsm_resource_callback_t,
552 549 rsm_resource_callback_arg_t);
553 550
554 551
555 552 int rsmka_null_republish(
556 553 rsm_memseg_export_handle_t,
557 554 rsm_access_entry_t [],
558 555 uint_t,
559 556 rsm_resource_callback_t,
560 557 rsm_resource_callback_arg_t);
561 558
562 559 int rsmka_null_unpublish(
563 560 rsm_memseg_export_handle_t);
564 561
565 562 rsm_ops_t null_rsmpi_ops;
566 563
567 564 /*
568 565 * data and locks to keep track of total amount of exported memory
569 566 */
570 567 static pgcnt_t rsm_pgcnt;
571 568 static pgcnt_t rsm_pgcnt_max; /* max allowed */
572 569 static kmutex_t rsm_pgcnt_lock;
573 570
574 571 static int rsm_enable_dr;
575 572
576 573 static char loopback_str[] = "loopback";
577 574
578 575 int rsm_hash_size;
579 576
580 577 /*
581 578 * The locking model is as follows:
582 579 *
583 580 * Local operations:
584 581 * find resource - grab reader lock on resouce list
585 582 * insert rc - grab writer lock
586 583 * delete rc - grab writer lock and resource mutex
587 584 * read/write - no lock
588 585 *
589 586 * Remote invocations:
590 587 * find resource - grab read lock and resource mutex
591 588 *
592 589 * State:
593 590 * resource state - grab resource mutex
594 591 */
595 592
596 593 int
597 594 _init(void)
598 595 {
599 596 int e;
600 597
601 598 e = mod_install(&modlinkage);
602 599 if (e != 0) {
603 600 return (e);
604 601 }
605 602
606 603 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
607 604
608 605 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
609 606
610 607
611 608 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
612 609
613 610 rsm_hash_size = RSM_HASHSZ;
614 611
615 612 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
616 613
617 614 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
618 615
619 616 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
620 617
621 618 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
622 619 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
623 620
624 621 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
625 622 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
626 623
627 624 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
628 625 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
629 626
630 627 rsm_ipc.count = RSMIPC_SZ;
631 628 rsm_ipc.wanted = 0;
632 629 rsm_ipc.sequence = 0;
633 630
634 631 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
635 632
636 633 for (e = 0; e < RSMIPC_SZ; e++) {
637 634 rsmipc_slot_t *slot = &rsm_ipc.slots[e];
638 635
639 636 RSMIPC_SET(slot, RSMIPC_FREE);
640 637 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
641 638 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
642 639 }
643 640
644 641 /*
645 642 * Initialize the suspend message list
646 643 */
647 644 rsm_suspend_list.list_head = NULL;
648 645 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
649 646
650 647 /*
651 648 * It is assumed here that configuration data is available
652 649 * during system boot since _init may be called at that time.
653 650 */
654 651
655 652 rsmka_pathmanager_init();
656 653
657 654 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
658 655 "rsm: _init done\n"));
659 656
660 657 return (DDI_SUCCESS);
661 658
662 659 }
663 660
664 661 int
665 662 _info(struct modinfo *modinfop)
666 663 {
667 664
668 665 return (mod_info(&modlinkage, modinfop));
669 666 }
670 667
671 668 int
672 669 _fini(void)
673 670 {
674 671 int e;
675 672
676 673 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
677 674 "rsm: _fini enter\n"));
678 675
679 676 /*
680 677 * The rsmka_modunloadok flag is simply used to help with
681 678 * the PIT testing. Make this flag 0 to disallow modunload.
682 679 */
683 680 if (rsmka_modunloadok == 0)
684 681 return (EBUSY);
685 682
686 683 /* rsm_detach will be called as a result of mod_remove */
687 684 e = mod_remove(&modlinkage);
688 685 if (e) {
689 686 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
690 687 "Unable to fini RSM %x\n", e));
691 688 return (e);
692 689 }
693 690
694 691 rsmka_pathmanager_cleanup();
695 692
696 693 rw_destroy(&rsm_resource.rsmrc_lock);
697 694
698 695 rw_destroy(&rsm_export_segs.rsmhash_rw);
699 696 rw_destroy(&rsm_import_segs.rsmhash_rw);
700 697 rw_destroy(&rsm_event_queues.rsmhash_rw);
701 698
702 699 mutex_destroy(&importer_list.lock);
703 700
704 701 mutex_destroy(&rsm_ipc.lock);
705 702 cv_destroy(&rsm_ipc.cv);
706 703
707 704 (void) mutex_destroy(&rsm_suspend_list.list_lock);
708 705
709 706 (void) mutex_destroy(&rsm_pgcnt_lock);
710 707
711 708 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
712 709
713 710 return (DDI_SUCCESS);
714 711
715 712 }
716 713
717 714 /*ARGSUSED1*/
718 715 static int
719 716 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
720 717 {
721 718 minor_t rnum;
722 719 int percent;
723 720 int ret;
724 721 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
725 722
726 723 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
727 724
728 725 switch (cmd) {
729 726 case DDI_ATTACH:
730 727 break;
731 728 case DDI_RESUME:
732 729 default:
733 730 DBG_PRINTF((category, RSM_ERR,
734 731 "rsm:rsm_attach - cmd not supported\n"));
735 732 return (DDI_FAILURE);
736 733 }
737 734
738 735 if (rsm_dip != NULL) {
739 736 DBG_PRINTF((category, RSM_ERR,
740 737 "rsm:rsm_attach - supports only "
741 738 "one instance\n"));
742 739 return (DDI_FAILURE);
743 740 }
744 741
745 742 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
746 743 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
747 744 "enable-dynamic-reconfiguration", 1);
748 745
749 746 mutex_enter(&rsm_drv_data.drv_lock);
750 747 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
751 748 mutex_exit(&rsm_drv_data.drv_lock);
752 749
753 750 if (rsm_enable_dr) {
754 751 #ifdef RSM_DRTEST
755 752 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
756 753 (void *)NULL);
757 754 #else
758 755 ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
759 756 (void *)NULL);
760 757 #endif
761 758 if (ret != 0) {
762 759 mutex_exit(&rsm_drv_data.drv_lock);
763 760 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
764 761 "reconfiguration setup failed\n");
765 762 return (DDI_FAILURE);
766 763 }
767 764 }
768 765
769 766 mutex_enter(&rsm_drv_data.drv_lock);
770 767 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
771 768 rsm_drv_data.drv_state = RSM_DRV_OK;
772 769 cv_broadcast(&rsm_drv_data.drv_cv);
773 770 mutex_exit(&rsm_drv_data.drv_lock);
774 771
775 772 /*
776 773 * page_list_read_lock();
777 774 * xx_setup();
778 775 * page_list_read_unlock();
779 776 */
780 777
781 778 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
782 779 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
783 780 "segment-hashtable-size", RSM_HASHSZ);
784 781 if (rsm_hash_size == 0) {
785 782 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
786 783 "rsm: segment-hashtable-size in rsm.conf "
787 784 "must be greater than 0, defaulting to 128\n"));
788 785 rsm_hash_size = RSM_HASHSZ;
789 786 }
790 787
791 788 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
792 789 rsm_hash_size));
793 790
794 791 rsm_pgcnt = 0;
795 792
796 793 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
797 794 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
798 795 "max-exported-memory", 0);
799 796 if (percent < 0) {
800 797 DBG_PRINTF((category, RSM_ERR,
801 798 "rsm:rsm_attach not enough memory available to "
802 799 "export, or max-exported-memory set incorrectly.\n"));
803 800 return (DDI_FAILURE);
804 801 }
805 802 /* 0 indicates no fixed upper limit. maxmem is the max */
806 803 /* available pageable physical mem */
807 804 rsm_pgcnt_max = (percent*maxmem)/100;
808 805
809 806 if (rsm_pgcnt_max > 0) {
810 807 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
811 808 "rsm: Available physical memory = %lu pages, "
812 809 "Max exportable memory = %lu pages",
813 810 maxmem, rsm_pgcnt_max));
814 811 }
815 812
816 813 /*
817 814 * Create minor number
818 815 */
819 816 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
820 817 DBG_PRINTF((category, RSM_ERR,
821 818 "rsm: rsm_attach - Unable to get "
822 819 "minor number\n"));
823 820 return (DDI_FAILURE);
824 821 }
825 822
826 823 ASSERT(rnum == RSM_DRIVER_MINOR);
827 824
828 825 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
829 826 rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
830 827 DBG_PRINTF((category, RSM_ERR,
831 828 "rsm: rsm_attach - unable to allocate "
832 829 "minor #\n"));
833 830 return (DDI_FAILURE);
834 831 }
835 832
836 833 rsm_dip = devi;
837 834 /*
838 835 * Allocate the hashtables
839 836 */
840 837 rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
841 838 rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
842 839
843 840 importer_list.bucket = (importing_token_t **)
844 841 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP);
845 842
846 843 /*
847 844 * Allocate a resource struct
848 845 */
849 846 {
850 847 rsmresource_t *p;
851 848
852 849 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
853 850
854 851 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
855 852
856 853 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
857 854 }
858 855
859 856 /*
860 857 * Based on the rsm.conf property max-segments, determine the maximum
861 858 * number of segments that can be exported/imported. This is then used
862 859 * to determine the size for barrier failure pages.
863 860 */
864 861
865 862 /* First get the max number of segments from the rsm.conf file */
866 863 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
867 864 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
868 865 "max-segments", 0);
869 866 if (max_segs == 0) {
870 867 /* Use default number of segments */
871 868 max_segs = RSM_MAX_NUM_SEG;
872 869 }
873 870
874 871 /*
875 872 * Based on the max number of segments allowed, determine the barrier
876 873 * page size. add 1 to max_segs since the barrier page itself uses
877 874 * a slot
878 875 */
879 876 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
880 877 PAGESIZE);
881 878
882 879 /*
883 880 * allocation of the barrier failure page
884 881 */
885 882 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
886 883 DDI_UMEM_SLEEP, &bar_cookie);
887 884
888 885 /*
889 886 * Set the barrier_offset
890 887 */
891 888 barrier_offset = 0;
892 889
893 890 /*
894 891 * Allocate a trash memory and get a cookie for it. This will be used
895 892 * when remapping segments during force disconnects. Allocate the
896 893 * trash memory with a large size which is page aligned.
897 894 */
898 895 (void) ddi_umem_alloc((size_t)TRASHSIZE,
899 896 DDI_UMEM_TRASH, &remap_cookie);
900 897
901 898 /* initialize user segment id allocation variable */
902 899 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
903 900
904 901 /*
905 902 * initialize the null_rsmpi_ops vector and the loopback adapter
906 903 */
907 904 rsmka_init_loopback();
908 905
909 906
910 907 ddi_report_dev(devi);
911 908
912 909 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
913 910
914 911 return (DDI_SUCCESS);
915 912 }
916 913
917 914 /*
918 915 * The call to mod_remove in the _fine routine will cause the system
919 916 * to call rsm_detach
920 917 */
921 918 /*ARGSUSED*/
922 919 static int
923 920 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
924 921 {
925 922 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
926 923
927 924 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
928 925
929 926 switch (cmd) {
930 927 case DDI_DETACH:
931 928 break;
932 929 default:
933 930 DBG_PRINTF((category, RSM_ERR,
934 931 "rsm:rsm_detach - cmd %x not supported\n",
935 932 cmd));
936 933 return (DDI_FAILURE);
937 934 }
938 935
939 936 mutex_enter(&rsm_drv_data.drv_lock);
940 937 while (rsm_drv_data.drv_state != RSM_DRV_OK)
941 938 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
942 939 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
943 940 mutex_exit(&rsm_drv_data.drv_lock);
944 941
945 942 /*
946 943 * Unregister the DR callback functions
947 944 */
948 945 if (rsm_enable_dr) {
949 946 #ifdef RSM_DRTEST
950 947 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
951 948 (void *)NULL);
952 949 #else
953 950 kphysm_setup_func_unregister(&rsm_dr_callback_vec,
954 951 (void *)NULL);
955 952 #endif
956 953 }
957 954
958 955 mutex_enter(&rsm_drv_data.drv_lock);
959 956 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
960 957 rsm_drv_data.drv_state = RSM_DRV_NEW;
961 958 mutex_exit(&rsm_drv_data.drv_lock);
962 959
963 960 ASSERT(rsm_suspend_list.list_head == NULL);
964 961
965 962 /*
966 963 * Release all resources, seglist, controller, ...
967 964 */
968 965
969 966 /* remove intersend queues */
970 967 /* remove registered services */
971 968
972 969
973 970 ddi_remove_minor_node(dip, DRIVER_NAME);
974 971 rsm_dip = NULL;
975 972
976 973 /*
977 974 * Free minor zero resource
978 975 */
979 976 {
980 977 rsmresource_t *p;
981 978
982 979 p = rsmresource_free(RSM_DRIVER_MINOR);
983 980 if (p) {
984 981 mutex_destroy(&p->rsmrc_lock);
985 982 kmem_free((void *)p, sizeof (*p));
986 983 }
987 984 }
988 985
989 986 /*
990 987 * Free resource table
991 988 */
992 989
993 990 rsmresource_destroy();
994 991
995 992 /*
996 993 * Free the hash tables
997 994 */
998 995 rsmhash_free(&rsm_export_segs, rsm_hash_size);
999 996 rsmhash_free(&rsm_import_segs, rsm_hash_size);
1000 997
1001 998 kmem_free((void *)importer_list.bucket,
1002 999 rsm_hash_size * sizeof (importing_token_t *));
1003 1000 importer_list.bucket = NULL;
1004 1001
1005 1002
1006 1003 /* free barrier page */
1007 1004 if (bar_cookie != NULL) {
1008 1005 ddi_umem_free(bar_cookie);
1009 1006 }
1010 1007 bar_va = NULL;
1011 1008 bar_cookie = NULL;
1012 1009
1013 1010 /*
1014 1011 * Free the memory allocated for the trash
1015 1012 */
1016 1013 if (remap_cookie != NULL) {
1017 1014 ddi_umem_free(remap_cookie);
1018 1015 }
1019 1016 remap_cookie = NULL;
1020 1017
1021 1018 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1022 1019
1023 1020 return (DDI_SUCCESS);
1024 1021 }
1025 1022
1026 1023 /*ARGSUSED*/
1027 1024 static int
1028 1025 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1029 1026 {
1030 1027 register int error;
1031 1028 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1032 1029
1033 1030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1034 1031
1035 1032 switch (infocmd) {
1036 1033 case DDI_INFO_DEVT2DEVINFO:
1037 1034 if (rsm_dip == NULL)
1038 1035 error = DDI_FAILURE;
1039 1036 else {
1040 1037 *result = (void *)rsm_dip;
1041 1038 error = DDI_SUCCESS;
1042 1039 }
1043 1040 break;
1044 1041 case DDI_INFO_DEVT2INSTANCE:
1045 1042 *result = (void *)0;
1046 1043 error = DDI_SUCCESS;
1047 1044 break;
1048 1045 default:
1049 1046 error = DDI_FAILURE;
1050 1047 }
1051 1048
1052 1049 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1053 1050 return (error);
1054 1051 }
1055 1052
1056 1053 adapter_t *
1057 1054 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1058 1055 {
1059 1056 adapter_t *adapter;
1060 1057 char adapter_devname[MAXNAMELEN];
1061 1058 int instance;
1062 1059 DBG_DEFINE(category,
1063 1060 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1064 1061
1065 1062 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1066 1063
1067 1064 instance = msg->cnum;
1068 1065
1069 1066 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1070 1067 return (NULL);
1071 1068 }
1072 1069
1073 1070 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1074 1071 return (NULL);
1075 1072
1076 1073 if (strcmp(adapter_devname, "loopback") == 0)
1077 1074 return (&loopback_adapter);
1078 1075
1079 1076 adapter = rsmka_lookup_adapter(adapter_devname, instance);
1080 1077
1081 1078 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1082 1079
1083 1080 return (adapter);
1084 1081 }
1085 1082
1086 1083
1087 1084 /*
1088 1085 * *********************** Resource Number Management ********************
1089 1086 * All resources are stored in a simple hash table. The table is an array
1090 1087 * of pointers to resource blks. Each blk contains:
1091 1088 * base - base number of this blk
1092 1089 * used - number of used slots in this blk.
1093 1090 * blks - array of pointers to resource items.
1094 1091 * An entry in a resource blk is empty if it's NULL.
1095 1092 *
1096 1093 * We start with no resource array. Each time we run out of slots, we
1097 1094 * reallocate a new larger array and copy the pointer to the new array and
1098 1095 * a new resource blk is allocated and added to the hash table.
1099 1096 *
1100 1097 * The resource control block contains:
1101 1098 * root - array of pointer of resource blks
1102 1099 * sz - current size of array.
1103 1100 * len - last valid entry in array.
1104 1101 *
1105 1102 * A search operation based on a resource number is as follows:
1106 1103 * index = rnum / RESOURCE_BLKSZ;
1107 1104 * ASSERT(index < resource_block.len);
1108 1105 * ASSERT(index < resource_block.sz);
1109 1106 * offset = rnum % RESOURCE_BLKSZ;
1110 1107 * ASSERT(offset >= resource_block.root[index]->base);
1111 1108 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1112 1109 * return resource_block.root[index]->blks[offset];
1113 1110 *
1114 1111 * A resource blk is freed with its used count reachs zero.
1115 1112 */
1116 1113 static int
1117 1114 rsmresource_alloc(minor_t *rnum)
1118 1115 {
1119 1116
1120 1117 /* search for available resource slot */
1121 1118 int i, j, empty = -1;
1122 1119 rsmresource_blk_t *blk;
1123 1120
1124 1121 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1125 1122 "rsmresource_alloc enter\n"));
1126 1123
1127 1124 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1128 1125
1129 1126 /* Try to find an empty slot */
1130 1127 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1131 1128 blk = rsm_resource.rsmrc_root[i];
1132 1129 if (blk != NULL && blk->rsmrcblk_avail > 0) {
1133 1130 /* found an empty slot in this blk */
1134 1131 for (j = 0; j < RSMRC_BLKSZ; j++) {
1135 1132 if (blk->rsmrcblk_blks[j] == NULL) {
1136 1133 *rnum = (minor_t)
1137 1134 (j + (i * RSMRC_BLKSZ));
1138 1135 /*
1139 1136 * obey gen page limits
1140 1137 */
1141 1138 if (*rnum >= max_segs + 1) {
1142 1139 if (empty < 0) {
1143 1140 rw_exit(&rsm_resource.
1144 1141 rsmrc_lock);
1145 1142 DBG_PRINTF((
1146 1143 RSM_KERNEL_ALL,
1147 1144 RSM_ERR,
1148 1145 "rsmresource"
1149 1146 "_alloc failed:"
1150 1147 "not enough res"
1151 1148 "%d\n", *rnum));
1152 1149 return (RSMERR_INSUFFICIENT_RESOURCES);
1153 1150 } else {
1154 1151 /* use empty slot */
1155 1152 break;
1156 1153 }
1157 1154
1158 1155 }
1159 1156
1160 1157 blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1161 1158 blk->rsmrcblk_avail--;
1162 1159 rw_exit(&rsm_resource.rsmrc_lock);
1163 1160 DBG_PRINTF((RSM_KERNEL_ALL,
1164 1161 RSM_DEBUG_VERBOSE,
1165 1162 "rsmresource_alloc done\n"));
1166 1163 return (RSM_SUCCESS);
1167 1164 }
1168 1165 }
1169 1166 } else if (blk == NULL && empty < 0) {
1170 1167 /* remember first empty slot */
1171 1168 empty = i;
1172 1169 }
1173 1170 }
1174 1171
1175 1172 /* Couldn't find anything, allocate a new blk */
1176 1173 /*
1177 1174 * Do we need to reallocate the root array
1178 1175 */
1179 1176 if (empty < 0) {
1180 1177 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1181 1178 /*
1182 1179 * Allocate new array and copy current stuff into it
1183 1180 */
1184 1181 rsmresource_blk_t **p;
1185 1182 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1186 1183 RSMRC_BLKSZ;
1187 1184 /*
1188 1185 * Don't allocate more that max valid rnum
1189 1186 */
1190 1187 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1191 1188 max_segs + 1) {
1192 1189 rw_exit(&rsm_resource.rsmrc_lock);
1193 1190 return (RSMERR_INSUFFICIENT_RESOURCES);
1194 1191 }
1195 1192
1196 1193 p = (rsmresource_blk_t **)kmem_zalloc(
1197 1194 newsz * sizeof (*p),
1198 1195 KM_SLEEP);
1199 1196
1200 1197 if (rsm_resource.rsmrc_root) {
1201 1198 uint_t oldsz;
1202 1199
1203 1200 oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1204 1201 (int)sizeof (*p));
1205 1202
1206 1203 /*
1207 1204 * Copy old data into new space and
1208 1205 * free old stuff
1209 1206 */
1210 1207 bcopy(rsm_resource.rsmrc_root, p, oldsz);
1211 1208 kmem_free(rsm_resource.rsmrc_root, oldsz);
1212 1209 }
1213 1210
1214 1211 rsm_resource.rsmrc_root = p;
1215 1212 rsm_resource.rsmrc_sz = (int)newsz;
1216 1213 }
1217 1214
1218 1215 empty = rsm_resource.rsmrc_len;
1219 1216 rsm_resource.rsmrc_len++;
1220 1217 }
1221 1218
1222 1219 /*
1223 1220 * Allocate a new blk
1224 1221 */
1225 1222 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1226 1223 ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1227 1224 rsm_resource.rsmrc_root[empty] = blk;
1228 1225 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1229 1226
1230 1227 /*
1231 1228 * Allocate slot
1232 1229 */
1233 1230
1234 1231 *rnum = (minor_t)(empty * RSMRC_BLKSZ);
1235 1232
1236 1233 /*
1237 1234 * watch out not to exceed bounds of barrier page
1238 1235 */
1239 1236 if (*rnum >= max_segs + 1) {
1240 1237 rw_exit(&rsm_resource.rsmrc_lock);
1241 1238 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1242 1239 "rsmresource_alloc failed %d\n", *rnum));
1243 1240
1244 1241 return (RSMERR_INSUFFICIENT_RESOURCES);
1245 1242 }
1246 1243 blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1247 1244
1248 1245
1249 1246 rw_exit(&rsm_resource.rsmrc_lock);
1250 1247
1251 1248 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1252 1249 "rsmresource_alloc done\n"));
1253 1250
1254 1251 return (RSM_SUCCESS);
1255 1252 }
1256 1253
1257 1254 static rsmresource_t *
1258 1255 rsmresource_free(minor_t rnum)
1259 1256 {
1260 1257
1261 1258 /* search for available resource slot */
1262 1259 int i, j;
1263 1260 rsmresource_blk_t *blk;
1264 1261 rsmresource_t *p;
1265 1262
1266 1263 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1267 1264 "rsmresource_free enter\n"));
1268 1265
1269 1266 i = (int)(rnum / RSMRC_BLKSZ);
1270 1267 j = (int)(rnum % RSMRC_BLKSZ);
1271 1268
1272 1269 if (i >= rsm_resource.rsmrc_len) {
1273 1270 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1274 1271 "rsmresource_free done\n"));
1275 1272 return (NULL);
1276 1273 }
1277 1274
1278 1275 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1279 1276
1280 1277 ASSERT(rsm_resource.rsmrc_root);
1281 1278 ASSERT(i < rsm_resource.rsmrc_len);
1282 1279 ASSERT(i < rsm_resource.rsmrc_sz);
1283 1280 blk = rsm_resource.rsmrc_root[i];
1284 1281 if (blk == NULL) {
1285 1282 rw_exit(&rsm_resource.rsmrc_lock);
1286 1283 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1287 1284 "rsmresource_free done\n"));
1288 1285 return (NULL);
1289 1286 }
1290 1287
1291 1288 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1292 1289
1293 1290 p = blk->rsmrcblk_blks[j];
1294 1291 if (p == RSMRC_RESERVED) {
1295 1292 p = NULL;
1296 1293 }
1297 1294
1298 1295 blk->rsmrcblk_blks[j] = NULL;
1299 1296 blk->rsmrcblk_avail++;
1300 1297 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1301 1298 /* free this blk */
1302 1299 kmem_free(blk, sizeof (*blk));
1303 1300 rsm_resource.rsmrc_root[i] = NULL;
1304 1301 }
1305 1302
1306 1303 rw_exit(&rsm_resource.rsmrc_lock);
1307 1304
1308 1305 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1309 1306 "rsmresource_free done\n"));
1310 1307
1311 1308 return (p);
1312 1309 }
1313 1310
1314 1311 static rsmresource_t *
1315 1312 rsmresource_lookup(minor_t rnum, int lock)
1316 1313 {
1317 1314 int i, j;
1318 1315 rsmresource_blk_t *blk;
1319 1316 rsmresource_t *p;
1320 1317
1321 1318 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1322 1319 "rsmresource_lookup enter\n"));
1323 1320
1324 1321 /* Find resource and lock it in READER mode */
1325 1322 /* search for available resource slot */
1326 1323
1327 1324 i = (int)(rnum / RSMRC_BLKSZ);
1328 1325 j = (int)(rnum % RSMRC_BLKSZ);
1329 1326
1330 1327 if (i >= rsm_resource.rsmrc_len) {
1331 1328 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1332 1329 "rsmresource_lookup done\n"));
1333 1330 return (NULL);
1334 1331 }
1335 1332
1336 1333 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1337 1334
1338 1335 blk = rsm_resource.rsmrc_root[i];
1339 1336 if (blk != NULL) {
1340 1337 ASSERT(i < rsm_resource.rsmrc_len);
1341 1338 ASSERT(i < rsm_resource.rsmrc_sz);
1342 1339
1343 1340 p = blk->rsmrcblk_blks[j];
1344 1341 if (lock == RSM_LOCK) {
1345 1342 if (p != RSMRC_RESERVED) {
1346 1343 mutex_enter(&p->rsmrc_lock);
1347 1344 } else {
1348 1345 p = NULL;
1349 1346 }
1350 1347 }
1351 1348 } else {
1352 1349 p = NULL;
1353 1350 }
1354 1351 rw_exit(&rsm_resource.rsmrc_lock);
1355 1352
1356 1353 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1357 1354 "rsmresource_lookup done\n"));
1358 1355
1359 1356 return (p);
1360 1357 }
1361 1358
1362 1359 static void
1363 1360 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1364 1361 {
1365 1362 /* Find resource and lock it in READER mode */
1366 1363 /* Caller can upgrade if need be */
1367 1364 /* search for available resource slot */
1368 1365 int i, j;
1369 1366 rsmresource_blk_t *blk;
1370 1367
1371 1368 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1372 1369 "rsmresource_insert enter\n"));
1373 1370
1374 1371 i = (int)(rnum / RSMRC_BLKSZ);
1375 1372 j = (int)(rnum % RSMRC_BLKSZ);
1376 1373
1377 1374 p->rsmrc_type = type;
1378 1375 p->rsmrc_num = rnum;
1379 1376
1380 1377 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1381 1378
1382 1379 ASSERT(rsm_resource.rsmrc_root);
1383 1380 ASSERT(i < rsm_resource.rsmrc_len);
1384 1381 ASSERT(i < rsm_resource.rsmrc_sz);
1385 1382
1386 1383 blk = rsm_resource.rsmrc_root[i];
1387 1384 ASSERT(blk);
1388 1385
1389 1386 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1390 1387
1391 1388 blk->rsmrcblk_blks[j] = p;
1392 1389
1393 1390 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1394 1391 "rsmresource_insert done\n"));
1395 1392
1396 1393 rw_exit(&rsm_resource.rsmrc_lock);
1397 1394 }
1398 1395
1399 1396 static void
1400 1397 rsmresource_destroy()
1401 1398 {
1402 1399 int i, j;
1403 1400
1404 1401 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1405 1402 "rsmresource_destroy enter\n"));
1406 1403
1407 1404 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1408 1405
1409 1406 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1410 1407 rsmresource_blk_t *blk;
1411 1408
1412 1409 blk = rsm_resource.rsmrc_root[i];
1413 1410 if (blk == NULL) {
1414 1411 continue;
1415 1412 }
1416 1413 for (j = 0; j < RSMRC_BLKSZ; j++) {
1417 1414 if (blk->rsmrcblk_blks[j] != NULL) {
1418 1415 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1419 1416 "Not null slot %d, %lx\n", j,
1420 1417 (size_t)blk->rsmrcblk_blks[j]));
1421 1418 }
1422 1419 }
1423 1420 kmem_free(blk, sizeof (*blk));
1424 1421 rsm_resource.rsmrc_root[i] = NULL;
1425 1422 }
1426 1423 if (rsm_resource.rsmrc_root) {
1427 1424 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1428 1425 kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1429 1426 rsm_resource.rsmrc_root = NULL;
1430 1427 rsm_resource.rsmrc_len = 0;
1431 1428 rsm_resource.rsmrc_sz = 0;
1432 1429 }
1433 1430
1434 1431 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1435 1432 "rsmresource_destroy done\n"));
1436 1433
1437 1434 rw_exit(&rsm_resource.rsmrc_lock);
1438 1435 }
1439 1436
1440 1437
1441 1438 /* ******************** Generic Key Hash Table Management ********* */
1442 1439 static rsmresource_t *
1443 1440 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1444 1441 rsm_resource_state_t state)
1445 1442 {
1446 1443 rsmresource_t *p;
1447 1444 uint_t hashval;
1448 1445 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1449 1446
1450 1447 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1451 1448
1452 1449 hashval = rsmhash(key);
1453 1450
1454 1451 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1455 1452 key, hashval));
1456 1453
1457 1454 rw_enter(&rhash->rsmhash_rw, RW_READER);
1458 1455
1459 1456 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1460 1457
1461 1458 for (; p; p = p->rsmrc_next) {
1462 1459 if (p->rsmrc_key == key) {
1463 1460 /* acquire resource lock */
1464 1461 RSMRC_LOCK(p);
1465 1462 break;
1466 1463 }
1467 1464 }
1468 1465
1469 1466 rw_exit(&rhash->rsmhash_rw);
1470 1467
1471 1468 if (p != NULL && p->rsmrc_state != state) {
1472 1469 /* state changed, release lock and return null */
1473 1470 RSMRC_UNLOCK(p);
1474 1471 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1475 1472 "rsmhash_lookup done: state changed\n"));
1476 1473 return (NULL);
1477 1474 }
1478 1475
1479 1476 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1480 1477
1481 1478 return (p);
1482 1479 }
1483 1480
1484 1481 static void
1485 1482 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1486 1483 {
1487 1484 rsmresource_t *p, **back;
1488 1485 uint_t hashval;
1489 1486 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1490 1487
1491 1488 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1492 1489
1493 1490 hashval = rsmhash(rcelm->rsmrc_key);
1494 1491
1495 1492 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1496 1493 rcelm->rsmrc_key, hashval));
1497 1494
1498 1495 /*
1499 1496 * It's ok not to find the segment.
1500 1497 */
1501 1498 rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1502 1499
1503 1500 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1504 1501
1505 1502 for (; (p = *back) != NULL; back = &p->rsmrc_next) {
1506 1503 if (p == rcelm) {
1507 1504 *back = rcelm->rsmrc_next;
1508 1505 break;
1509 1506 }
1510 1507 }
1511 1508
1512 1509 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1513 1510
1514 1511 rw_exit(&rhash->rsmhash_rw);
1515 1512 }
1516 1513
1517 1514 static int
1518 1515 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1519 1516 int dup_check, rsm_resource_state_t state)
1520 1517 {
1521 1518 rsmresource_t *p = NULL, **bktp;
1522 1519 uint_t hashval;
1523 1520 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1524 1521
1525 1522 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1526 1523
1527 1524 /* lock table */
1528 1525 rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1529 1526
1530 1527 /*
1531 1528 * If the current resource state is other than the state passed in
1532 1529 * then the resource is (probably) already on the list. eg. for an
1533 1530 * import segment if the state is not RSM_STATE_NEW then it's on the
1534 1531 * list already.
1535 1532 */
1536 1533 RSMRC_LOCK(new);
1537 1534 if (new->rsmrc_state != state) {
1538 1535 RSMRC_UNLOCK(new);
1539 1536 rw_exit(&rhash->rsmhash_rw);
1540 1537 return (RSMERR_BAD_SEG_HNDL);
1541 1538 }
1542 1539
1543 1540 hashval = rsmhash(key);
1544 1541 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1545 1542
1546 1543 if (dup_check) {
1547 1544 /*
1548 1545 * Used for checking export segments; don't want to have
1549 1546 * the same key used for multiple segments.
1550 1547 */
1551 1548
1552 1549 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1553 1550
1554 1551 for (; p; p = p->rsmrc_next) {
1555 1552 if (p->rsmrc_key == key) {
1556 1553 RSMRC_UNLOCK(new);
1557 1554 break;
1558 1555 }
1559 1556 }
1560 1557 }
1561 1558
1562 1559 if (p == NULL) {
1563 1560 /* Key doesn't exist, add it */
1564 1561
1565 1562 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1566 1563
1567 1564 new->rsmrc_key = key;
1568 1565 new->rsmrc_next = *bktp;
1569 1566 *bktp = new;
1570 1567 }
1571 1568
1572 1569 rw_exit(&rhash->rsmhash_rw);
1573 1570
1574 1571 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1575 1572
1576 1573 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1577 1574 }
1578 1575
1579 1576 /*
1580 1577 * XOR each byte of the key.
1581 1578 */
1582 1579 static uint_t
1583 1580 rsmhash(rsm_memseg_id_t key)
1584 1581 {
1585 1582 uint_t hash = key;
1586 1583
1587 1584 hash ^= (key >> 8);
1588 1585 hash ^= (key >> 16);
1589 1586 hash ^= (key >> 24);
1590 1587
1591 1588 return (hash % rsm_hash_size);
1592 1589
1593 1590 }
1594 1591
1595 1592 /*
1596 1593 * generic function to get a specific bucket
1597 1594 */
1598 1595 static void *
1599 1596 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1600 1597 {
1601 1598
1602 1599 if (rhash->bucket == NULL)
1603 1600 return (NULL);
1604 1601 else
1605 1602 return ((void *)rhash->bucket[hashval]);
1606 1603 }
1607 1604
1608 1605 /*
1609 1606 * generic function to get a specific bucket's address
1610 1607 */
1611 1608 static void **
1612 1609 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1613 1610 {
1614 1611 if (rhash->bucket == NULL)
1615 1612 return (NULL);
1616 1613 else
1617 1614 return ((void **)&(rhash->bucket[hashval]));
1618 1615 }
1619 1616
1620 1617 /*
1621 1618 * generic function to alloc a hash table
1622 1619 */
1623 1620 static void
1624 1621 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1625 1622 {
1626 1623 rhash->bucket = (rsmresource_t **)
1627 1624 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1628 1625 }
1629 1626
1630 1627 /*
1631 1628 * generic function to free a hash table
1632 1629 */
1633 1630 static void
1634 1631 rsmhash_free(rsmhash_table_t *rhash, int size)
1635 1632 {
1636 1633
1637 1634 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1638 1635 rhash->bucket = NULL;
1639 1636
1640 1637 }
1641 1638 /* *********************** Exported Segment Key Management ************ */
1642 1639
1643 1640 #define rsmexport_add(new, key) \
1644 1641 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1645 1642 RSM_STATE_BIND)
1646 1643
1647 1644 #define rsmexport_rm(arg) \
1648 1645 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1649 1646
1650 1647 #define rsmexport_lookup(key) \
1651 1648 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1652 1649
1653 1650 /* ************************** Import Segment List Management ********** */
1654 1651
1655 1652 /*
1656 1653 * Add segment to import list. This will be useful for paging and loopback
1657 1654 * segment unloading.
1658 1655 */
1659 1656 #define rsmimport_add(arg, key) \
1660 1657 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1661 1658 RSM_STATE_NEW)
1662 1659
1663 1660 #define rsmimport_rm(arg) \
1664 1661 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1665 1662
1666 1663 /*
1667 1664 * #define rsmimport_lookup(key) \
1668 1665 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1669 1666 */
1670 1667
1671 1668 /*
1672 1669 * increase the ref count and make the import segment point to the
1673 1670 * shared data structure. Return a pointer to the share data struct
1674 1671 * and the shared data struct is locked upon return
1675 1672 */
1676 1673 static rsm_import_share_t *
1677 1674 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1678 1675 rsmseg_t *segp)
1679 1676 {
1680 1677 uint_t hash;
1681 1678 rsmresource_t *p;
1682 1679 rsm_import_share_t *shdatap;
1683 1680 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1684 1681
1685 1682 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1686 1683
1687 1684 hash = rsmhash(key);
1688 1685 /* lock table */
1689 1686 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1690 1687 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1691 1688 key, hash));
1692 1689
1693 1690 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1694 1691
1695 1692 for (; p; p = p->rsmrc_next) {
1696 1693 /*
1697 1694 * Look for an entry that is importing the same exporter
1698 1695 * with the share data structure allocated.
1699 1696 */
1700 1697 if ((p->rsmrc_key == key) &&
1701 1698 (p->rsmrc_node == node) &&
1702 1699 (p->rsmrc_adapter == adapter) &&
1703 1700 (((rsmseg_t *)p)->s_share != NULL)) {
1704 1701 shdatap = ((rsmseg_t *)p)->s_share;
1705 1702 break;
1706 1703 }
1707 1704 }
1708 1705
1709 1706 if (p == NULL) {
1710 1707 /* we are the first importer, create the shared data struct */
1711 1708 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1712 1709 shdatap->rsmsi_state = RSMSI_STATE_NEW;
1713 1710 shdatap->rsmsi_segid = key;
1714 1711 shdatap->rsmsi_node = node;
1715 1712 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1716 1713 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1717 1714 }
1718 1715
1719 1716 rsmseglock_acquire(segp);
1720 1717
1721 1718 /* we grab the shared lock before returning from this function */
1722 1719 mutex_enter(&shdatap->rsmsi_lock);
1723 1720
1724 1721 shdatap->rsmsi_refcnt++;
1725 1722 segp->s_share = shdatap;
1726 1723
1727 1724 rsmseglock_release(segp);
1728 1725
1729 1726 rw_exit(&rsm_import_segs.rsmhash_rw);
1730 1727
1731 1728 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1732 1729
1733 1730 return (shdatap);
1734 1731 }
1735 1732
1736 1733 /*
1737 1734 * the shared data structure should be locked before calling
1738 1735 * rsmsharecv_signal().
1739 1736 * Change the state and signal any waiting segments.
1740 1737 */
1741 1738 void
1742 1739 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1743 1740 {
1744 1741 ASSERT(rsmsharelock_held(seg));
1745 1742
1746 1743 if (seg->s_share->rsmsi_state == oldstate) {
1747 1744 seg->s_share->rsmsi_state = newstate;
1748 1745 cv_broadcast(&seg->s_share->rsmsi_cv);
1749 1746 }
1750 1747 }
1751 1748
1752 1749 /*
1753 1750 * Add to the hash table
1754 1751 */
1755 1752 static void
1756 1753 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1757 1754 void *cookie)
1758 1755 {
1759 1756
1760 1757 importing_token_t *head;
1761 1758 importing_token_t *new_token;
1762 1759 int index;
1763 1760
1764 1761 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1765 1762
1766 1763 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1767 1764
1768 1765 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1769 1766 new_token->importing_node = node;
1770 1767 new_token->key = key;
1771 1768 new_token->import_segment_cookie = cookie;
1772 1769 new_token->importing_adapter_hwaddr = hwaddr;
1773 1770
1774 1771 index = rsmhash(key);
1775 1772
1776 1773 mutex_enter(&importer_list.lock);
1777 1774
1778 1775 head = importer_list.bucket[index];
1779 1776 importer_list.bucket[index] = new_token;
1780 1777 new_token->next = head;
1781 1778 mutex_exit(&importer_list.lock);
1782 1779
1783 1780 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1784 1781 }
1785 1782
1786 1783 static void
1787 1784 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie)
1788 1785 {
1789 1786
1790 1787 importing_token_t *prev, *token = NULL;
1791 1788 int index;
1792 1789 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1793 1790
1794 1791 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1795 1792
1796 1793 index = rsmhash(key);
1797 1794
1798 1795 mutex_enter(&importer_list.lock);
1799 1796
1800 1797 token = importer_list.bucket[index];
1801 1798
1802 1799 prev = token;
1803 1800 while (token != NULL) {
1804 1801 if (token->importing_node == node &&
1805 1802 token->import_segment_cookie == cookie) {
1806 1803 if (prev == token)
1807 1804 importer_list.bucket[index] = token->next;
1808 1805 else
1809 1806 prev->next = token->next;
1810 1807 kmem_free((void *)token, sizeof (*token));
1811 1808 break;
1812 1809 } else {
1813 1810 prev = token;
1814 1811 token = token->next;
1815 1812 }
1816 1813 }
1817 1814
1818 1815 mutex_exit(&importer_list.lock);
1819 1816
1820 1817 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1821 1818
1822 1819
1823 1820 }
1824 1821
1825 1822 /* **************************Segment Structure Management ************* */
1826 1823
1827 1824 /*
1828 1825 * Free segment structure
1829 1826 */
1830 1827 static void
1831 1828 rsmseg_free(rsmseg_t *seg)
1832 1829 {
1833 1830
1834 1831 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1835 1832
1836 1833 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1837 1834
1838 1835 /* need to take seglock here to avoid race with rsmmap_unmap() */
1839 1836 rsmseglock_acquire(seg);
1840 1837 if (seg->s_ckl != NULL) {
1841 1838 /* Segment is still busy */
1842 1839 seg->s_state = RSM_STATE_END;
1843 1840 rsmseglock_release(seg);
1844 1841 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1845 1842 "rsmseg_free done\n"));
1846 1843 return;
1847 1844 }
1848 1845
1849 1846 rsmseglock_release(seg);
1850 1847
1851 1848 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1852 1849
1853 1850 /*
1854 1851 * If it's an importer decrement the refcount
1855 1852 * and if its down to zero free the shared data structure.
1856 1853 * This is where failures during rsm_connect() are unrefcounted
1857 1854 */
1858 1855 if (seg->s_share != NULL) {
1859 1856
1860 1857 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1861 1858
1862 1859 rsmsharelock_acquire(seg);
1863 1860
1864 1861 ASSERT(seg->s_share->rsmsi_refcnt > 0);
1865 1862
1866 1863 seg->s_share->rsmsi_refcnt--;
1867 1864
1868 1865 if (seg->s_share->rsmsi_refcnt == 0) {
1869 1866 rsmsharelock_release(seg);
1870 1867 mutex_destroy(&seg->s_share->rsmsi_lock);
1871 1868 cv_destroy(&seg->s_share->rsmsi_cv);
1872 1869 kmem_free((void *)(seg->s_share),
1873 1870 sizeof (rsm_import_share_t));
1874 1871 } else {
1875 1872 rsmsharelock_release(seg);
1876 1873 }
1877 1874 /*
1878 1875 * The following needs to be done after any
1879 1876 * rsmsharelock calls which use seg->s_share.
1880 1877 */
1881 1878 seg->s_share = NULL;
1882 1879 }
1883 1880
1884 1881 cv_destroy(&seg->s_cv);
1885 1882 mutex_destroy(&seg->s_lock);
1886 1883 rsmacl_free(seg->s_acl, seg->s_acl_len);
1887 1884 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1888 1885 if (seg->s_adapter)
1889 1886 rsmka_release_adapter(seg->s_adapter);
1890 1887
1891 1888 kmem_free((void *)seg, sizeof (*seg));
1892 1889
1893 1890 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1894 1891
1895 1892 }
1896 1893
1897 1894
1898 1895 static rsmseg_t *
1899 1896 rsmseg_alloc(minor_t num, struct cred *cred)
1900 1897 {
1901 1898 rsmseg_t *new;
1902 1899 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1903 1900
1904 1901 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1905 1902 /*
1906 1903 * allocate memory for new segment. This should be a segkmem cache.
1907 1904 */
1908 1905 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1909 1906
1910 1907 new->s_state = RSM_STATE_NEW;
1911 1908 new->s_minor = num;
1912 1909 new->s_acl_len = 0;
1913 1910 new->s_cookie = NULL;
1914 1911 new->s_adapter = NULL;
1915 1912
1916 1913 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1917 1914 /* we don't have a key yet, will set at export/connect */
1918 1915 new->s_uid = crgetuid(cred);
1919 1916 new->s_gid = crgetgid(cred);
1920 1917
1921 1918 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1922 1919 cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1923 1920
1924 1921 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1925 1922
1926 1923 return (new);
1927 1924 }
1928 1925
1929 1926 /* ******************************** Driver Open/Close/Poll *************** */
1930 1927
1931 1928 /*ARGSUSED1*/
1932 1929 static int
1933 1930 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1934 1931 {
1935 1932 minor_t rnum;
1936 1933 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1937 1934
1938 1935 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1939 1936 /*
1940 1937 * Char only
1941 1938 */
1942 1939 if (otyp != OTYP_CHR) {
1943 1940 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1944 1941 return (EINVAL);
1945 1942 }
1946 1943
1947 1944 /*
1948 1945 * Only zero can be opened, clones are used for resources.
1949 1946 */
1950 1947 if (getminor(*devp) != RSM_DRIVER_MINOR) {
1951 1948 DBG_PRINTF((category, RSM_ERR,
1952 1949 "rsm_open: bad minor %d\n", getminor(*devp)));
1953 1950 return (ENODEV);
1954 1951 }
1955 1952
1956 1953 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1957 1954 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1958 1955 return (EPERM);
1959 1956 }
1960 1957
1961 1958 if (!(flag & FWRITE)) {
1962 1959 /*
1963 1960 * The library function _rsm_librsm_init calls open for
1964 1961 * /dev/rsm with flag set to O_RDONLY. We want a valid
1965 1962 * file descriptor to be returned for minor device zero.
1966 1963 */
1967 1964
1968 1965 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1969 1966 "rsm_open RDONLY done\n"));
1970 1967 return (DDI_SUCCESS);
1971 1968 }
1972 1969
1973 1970 /*
1974 1971 * - allocate new minor number and segment.
1975 1972 * - add segment to list of all segments.
1976 1973 * - set minordev data to segment
1977 1974 * - update devp argument to new device
1978 1975 * - update s_cred to cred; make sure you do crhold(cred);
1979 1976 */
1980 1977
1981 1978 /* allocate a new resource number */
1982 1979 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1983 1980 /*
1984 1981 * We will bind this minor to a specific resource in first
1985 1982 * ioctl
1986 1983 */
1987 1984 *devp = makedevice(getmajor(*devp), rnum);
1988 1985 } else {
1989 1986 return (EAGAIN);
1990 1987 }
1991 1988
1992 1989 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1993 1990 return (DDI_SUCCESS);
1994 1991 }
1995 1992
1996 1993 static void
1997 1994 rsmseg_close(rsmseg_t *seg, int force_flag)
1998 1995 {
1999 1996 int e = RSM_SUCCESS;
2000 1997
2001 1998 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2002 1999
2003 2000 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2004 2001
2005 2002 rsmseglock_acquire(seg);
2006 2003 if (!force_flag && (seg->s_hdr.rsmrc_type ==
2007 2004 RSM_RESOURCE_EXPORT_SEGMENT)) {
2008 2005 /*
2009 2006 * If we are processing rsm_close wait for force_destroy
2010 2007 * processing to complete since force_destroy processing
2011 2008 * needs to finish first before we can free the segment.
2012 2009 * force_destroy is only for export segments
2013 2010 */
2014 2011 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2015 2012 cv_wait(&seg->s_cv, &seg->s_lock);
2016 2013 }
2017 2014 }
2018 2015 rsmseglock_release(seg);
2019 2016
2020 2017 /* It's ok to read the state without a lock */
2021 2018 switch (seg->s_state) {
2022 2019 case RSM_STATE_EXPORT:
2023 2020 case RSM_STATE_EXPORT_QUIESCING:
2024 2021 case RSM_STATE_EXPORT_QUIESCED:
2025 2022 e = rsm_unpublish(seg, 1);
2026 2023 /* FALLTHRU */
2027 2024 case RSM_STATE_BIND_QUIESCED:
2028 2025 /* FALLTHRU */
2029 2026 case RSM_STATE_BIND:
2030 2027 e = rsm_unbind(seg);
2031 2028 if (e != RSM_SUCCESS && force_flag == 1)
2032 2029 return;
2033 2030 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2034 2031 /* FALLTHRU */
2035 2032 case RSM_STATE_NEW_QUIESCED:
2036 2033 rsmseglock_acquire(seg);
2037 2034 seg->s_state = RSM_STATE_NEW;
2038 2035 cv_broadcast(&seg->s_cv);
2039 2036 rsmseglock_release(seg);
2040 2037 break;
2041 2038 case RSM_STATE_NEW:
2042 2039 break;
2043 2040 case RSM_STATE_ZOMBIE:
2044 2041 /*
2045 2042 * Segments in this state have been removed off the
2046 2043 * exported segments list and have been unpublished
2047 2044 * and unbind. These segments have been removed during
2048 2045 * a callback to the rsm_export_force_destroy, which
2049 2046 * is called for the purpose of unlocking these
2050 2047 * exported memory segments when a process exits but
2051 2048 * leaves the segments locked down since rsm_close is
2052 2049 * is not called for the segments. This can happen
2053 2050 * when a process calls fork or exec and then exits.
2054 2051 * Once the segments are in the ZOMBIE state, all that
2055 2052 * remains is to destroy them when rsm_close is called.
2056 2053 * This is done here. Thus, for such segments the
2057 2054 * the state is changed to new so that later in this
2058 2055 * function rsmseg_free is called.
2059 2056 */
2060 2057 rsmseglock_acquire(seg);
2061 2058 seg->s_state = RSM_STATE_NEW;
2062 2059 rsmseglock_release(seg);
2063 2060 break;
2064 2061 case RSM_STATE_MAP_QUIESCE:
2065 2062 case RSM_STATE_ACTIVE:
2066 2063 /* Disconnect will handle the unmap */
2067 2064 case RSM_STATE_CONN_QUIESCE:
2068 2065 case RSM_STATE_CONNECT:
2069 2066 case RSM_STATE_DISCONNECT:
2070 2067 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2071 2068 (void) rsm_disconnect(seg);
2072 2069 break;
2073 2070 case RSM_STATE_MAPPING:
2074 2071 /*FALLTHRU*/
2075 2072 case RSM_STATE_END:
2076 2073 DBG_PRINTF((category, RSM_ERR,
2077 2074 "Invalid segment state %d in rsm_close\n", seg->s_state));
2078 2075 break;
2079 2076 default:
2080 2077 DBG_PRINTF((category, RSM_ERR,
2081 2078 "Invalid segment state %d in rsm_close\n", seg->s_state));
2082 2079 break;
2083 2080 }
2084 2081
2085 2082 /*
2086 2083 * check state.
2087 2084 * - make sure you do crfree(s_cred);
2088 2085 * release segment and minor number
2089 2086 */
2090 2087 ASSERT(seg->s_state == RSM_STATE_NEW);
2091 2088
2092 2089 /*
2093 2090 * The export_force_destroy callback is created to unlock
2094 2091 * the exported segments of a process
2095 2092 * when the process does a fork or exec and then exits calls this
2096 2093 * function with the force flag set to 1 which indicates that the
2097 2094 * segment state must be converted to ZOMBIE. This state means that the
2098 2095 * segments still exist and have been unlocked and most importantly the
2099 2096 * only operation allowed is to destroy them on an rsm_close.
2100 2097 */
2101 2098 if (force_flag) {
2102 2099 rsmseglock_acquire(seg);
2103 2100 seg->s_state = RSM_STATE_ZOMBIE;
2104 2101 rsmseglock_release(seg);
2105 2102 } else {
2106 2103 rsmseg_free(seg);
2107 2104 }
2108 2105
2109 2106 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2110 2107 }
2111 2108
2112 2109 static int
2113 2110 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2114 2111 {
2115 2112 minor_t rnum = getminor(dev);
2116 2113 rsmresource_t *res;
2117 2114 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2118 2115
2119 2116 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2120 2117
2121 2118 flag = flag; cred = cred;
2122 2119
2123 2120 if (otyp != OTYP_CHR)
2124 2121 return (EINVAL);
2125 2122
2126 2123 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2127 2124
2128 2125 /*
2129 2126 * At this point we are the last reference to the resource.
2130 2127 * Free resource number from resource table.
2131 2128 * It's ok to remove number before we free the segment.
2132 2129 * We need to lock the resource to protect against remote calls.
2133 2130 */
2134 2131 if (rnum == RSM_DRIVER_MINOR ||
2135 2132 (res = rsmresource_free(rnum)) == NULL) {
2136 2133 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2137 2134 return (DDI_SUCCESS);
2138 2135 }
2139 2136
2140 2137 switch (res->rsmrc_type) {
2141 2138 case RSM_RESOURCE_EXPORT_SEGMENT:
2142 2139 case RSM_RESOURCE_IMPORT_SEGMENT:
2143 2140 rsmseg_close((rsmseg_t *)res, 0);
2144 2141 break;
2145 2142 case RSM_RESOURCE_BAR:
2146 2143 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2147 2144 break;
2148 2145 default:
2149 2146 break;
2150 2147 }
2151 2148
2152 2149 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2153 2150
2154 2151 return (DDI_SUCCESS);
2155 2152 }
2156 2153
2157 2154 /*
2158 2155 * rsm_inc_pgcnt
2159 2156 *
2160 2157 * Description: increment rsm page counter.
2161 2158 *
2162 2159 * Parameters: pgcnt_t pnum; number of pages to be used
2163 2160 *
2164 2161 * Returns: RSM_SUCCESS if memory limit not exceeded
2165 2162 * ENOSPC if memory limit exceeded. In this case, the
2166 2163 * page counter remains unchanged.
2167 2164 *
2168 2165 */
2169 2166 static int
2170 2167 rsm_inc_pgcnt(pgcnt_t pnum)
2171 2168 {
2172 2169 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2173 2170 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2174 2171 return (RSM_SUCCESS);
2175 2172 }
2176 2173
2177 2174 mutex_enter(&rsm_pgcnt_lock);
2178 2175
2179 2176 if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2180 2177 /* ensure that limits have not been exceeded */
2181 2178 mutex_exit(&rsm_pgcnt_lock);
2182 2179 return (RSMERR_INSUFFICIENT_MEM);
2183 2180 }
2184 2181
2185 2182 rsm_pgcnt += pnum;
2186 2183 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2187 2184 rsm_pgcnt));
2188 2185 mutex_exit(&rsm_pgcnt_lock);
2189 2186
2190 2187 return (RSM_SUCCESS);
2191 2188 }
2192 2189
2193 2190 /*
2194 2191 * rsm_dec_pgcnt
2195 2192 *
2196 2193 * Description: decrement rsm page counter.
2197 2194 *
2198 2195 * Parameters: pgcnt_t pnum; number of pages freed
2199 2196 *
2200 2197 */
2201 2198 static void
2202 2199 rsm_dec_pgcnt(pgcnt_t pnum)
2203 2200 {
2204 2201 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2205 2202
2206 2203 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2207 2204 return;
2208 2205 }
2209 2206
2210 2207 mutex_enter(&rsm_pgcnt_lock);
2211 2208 ASSERT(rsm_pgcnt >= pnum);
2212 2209 rsm_pgcnt -= pnum;
2213 2210 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2214 2211 rsm_pgcnt));
2215 2212 mutex_exit(&rsm_pgcnt_lock);
2216 2213 }
2217 2214
2218 2215 static struct umem_callback_ops rsm_as_ops = {
2219 2216 UMEM_CALLBACK_VERSION, /* version number */
2220 2217 rsm_export_force_destroy,
2221 2218 };
2222 2219
2223 2220 static int
2224 2221 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2225 2222 proc_t *procp)
2226 2223 {
2227 2224 int error = RSM_SUCCESS;
2228 2225 ulong_t pnum;
2229 2226 struct umem_callback_ops *callbackops = &rsm_as_ops;
2230 2227
2231 2228 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2232 2229
2233 2230 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2234 2231
2235 2232 /*
2236 2233 * Make sure vaddr and len are aligned on a page boundary
2237 2234 */
2238 2235 if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2239 2236 return (RSMERR_BAD_ADDR);
2240 2237 }
2241 2238
2242 2239 if (len & (PAGESIZE - 1)) {
2243 2240 return (RSMERR_BAD_LENGTH);
2244 2241 }
2245 2242
2246 2243 /*
2247 2244 * Find number of pages
2248 2245 */
2249 2246 pnum = btopr(len);
2250 2247 error = rsm_inc_pgcnt(pnum);
2251 2248 if (error != RSM_SUCCESS) {
2252 2249 DBG_PRINTF((category, RSM_ERR,
2253 2250 "rsm_bind_pages:mem limit exceeded\n"));
2254 2251 return (RSMERR_INSUFFICIENT_MEM);
2255 2252 }
2256 2253
2257 2254 error = umem_lockmemory(vaddr, len,
2258 2255 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2259 2256 cookie,
2260 2257 callbackops, procp);
2261 2258
2262 2259 if (error) {
2263 2260 rsm_dec_pgcnt(pnum);
2264 2261 DBG_PRINTF((category, RSM_ERR,
2265 2262 "rsm_bind_pages:ddi_umem_lock failed\n"));
2266 2263 /*
2267 2264 * ddi_umem_lock, in the case of failure, returns one of
2268 2265 * the following three errors. These are translated into
2269 2266 * the RSMERR namespace and returned.
2270 2267 */
2271 2268 if (error == EFAULT)
2272 2269 return (RSMERR_BAD_ADDR);
2273 2270 else if (error == EACCES)
2274 2271 return (RSMERR_PERM_DENIED);
2275 2272 else
2276 2273 return (RSMERR_INSUFFICIENT_MEM);
2277 2274 }
2278 2275
2279 2276 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2280 2277
2281 2278 return (error);
2282 2279
2283 2280 }
2284 2281
2285 2282 static int
2286 2283 rsm_unbind_pages(rsmseg_t *seg)
2287 2284 {
2288 2285 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2289 2286
2290 2287 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2291 2288
2292 2289 ASSERT(rsmseglock_held(seg));
2293 2290
2294 2291 if (seg->s_cookie != NULL) {
2295 2292 /* unlock address range */
2296 2293 ddi_umem_unlock(seg->s_cookie);
2297 2294 rsm_dec_pgcnt(btopr(seg->s_len));
2298 2295 seg->s_cookie = NULL;
2299 2296 }
2300 2297
2301 2298 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2302 2299
2303 2300 return (RSM_SUCCESS);
2304 2301 }
2305 2302
2306 2303
2307 2304 static int
2308 2305 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2309 2306 {
2310 2307 int e;
2311 2308 adapter_t *adapter;
2312 2309 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2313 2310
2314 2311 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2315 2312
2316 2313 adapter = rsm_getadapter(msg, mode);
2317 2314 if (adapter == NULL) {
2318 2315 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2319 2316 "rsm_bind done:no adapter\n"));
2320 2317 return (RSMERR_CTLR_NOT_PRESENT);
2321 2318 }
2322 2319
2323 2320 /* lock address range */
2324 2321 if (msg->vaddr == NULL) {
2325 2322 rsmka_release_adapter(adapter);
2326 2323 DBG_PRINTF((category, RSM_ERR,
2327 2324 "rsm: rsm_bind done: invalid vaddr\n"));
2328 2325 return (RSMERR_BAD_ADDR);
2329 2326 }
2330 2327 if (msg->len <= 0) {
2331 2328 rsmka_release_adapter(adapter);
2332 2329 DBG_PRINTF((category, RSM_ERR,
2333 2330 "rsm_bind: invalid length\n"));
2334 2331 return (RSMERR_BAD_LENGTH);
2335 2332 }
2336 2333
2337 2334 /* Lock segment */
2338 2335 rsmseglock_acquire(seg);
2339 2336
2340 2337 while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2341 2338 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2342 2339 DBG_PRINTF((category, RSM_DEBUG,
2343 2340 "rsm_bind done: cv_wait INTERRUPTED"));
2344 2341 rsmka_release_adapter(adapter);
2345 2342 rsmseglock_release(seg);
2346 2343 return (RSMERR_INTERRUPTED);
2347 2344 }
2348 2345 }
2349 2346
2350 2347 ASSERT(seg->s_state == RSM_STATE_NEW);
2351 2348
2352 2349 ASSERT(seg->s_cookie == NULL);
2353 2350
2354 2351 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2355 2352 if (e == RSM_SUCCESS) {
2356 2353 seg->s_flags |= RSM_USER_MEMORY;
2357 2354 if (msg->perm & RSM_ALLOW_REBIND) {
2358 2355 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2359 2356 }
2360 2357 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2361 2358 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2362 2359 }
2363 2360 seg->s_region.r_vaddr = msg->vaddr;
2364 2361 /*
2365 2362 * Set the s_pid value in the segment structure. This is used
2366 2363 * to identify exported segments belonging to a particular
2367 2364 * process so that when the process exits, these segments can
2368 2365 * be unlocked forcefully even if rsm_close is not called on
2369 2366 * process exit since there maybe other processes referencing
2370 2367 * them (for example on a fork or exec).
2371 2368 * The s_pid value is also used to authenticate the process
2372 2369 * doing a publish or unpublish on the export segment. Only
2373 2370 * the creator of the export segment has a right to do a
2374 2371 * publish or unpublish and unbind on the segment.
2375 2372 */
2376 2373 seg->s_pid = ddi_get_pid();
2377 2374 seg->s_len = msg->len;
2378 2375 seg->s_state = RSM_STATE_BIND;
2379 2376 seg->s_adapter = adapter;
2380 2377 seg->s_proc = curproc;
2381 2378 } else {
2382 2379 rsmka_release_adapter(adapter);
2383 2380 DBG_PRINTF((category, RSM_WARNING,
2384 2381 "unable to lock down pages\n"));
2385 2382 }
2386 2383
2387 2384 msg->rnum = seg->s_minor;
2388 2385 /* Unlock segment */
2389 2386 rsmseglock_release(seg);
2390 2387
2391 2388 if (e == RSM_SUCCESS) {
2392 2389 /* copyout the resource number */
2393 2390 #ifdef _MULTI_DATAMODEL
2394 2391 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2395 2392 rsm_ioctlmsg32_t msg32;
2396 2393
2397 2394 msg32.rnum = msg->rnum;
2398 2395 if (ddi_copyout((caddr_t)&msg32.rnum,
2399 2396 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2400 2397 sizeof (minor_t), mode)) {
2401 2398 rsmka_release_adapter(adapter);
2402 2399 e = RSMERR_BAD_ADDR;
2403 2400 }
2404 2401 }
2405 2402 #endif
2406 2403 if (ddi_copyout((caddr_t)&msg->rnum,
2407 2404 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2408 2405 sizeof (minor_t), mode)) {
2409 2406 rsmka_release_adapter(adapter);
2410 2407 e = RSMERR_BAD_ADDR;
2411 2408 }
2412 2409 }
2413 2410
2414 2411 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2415 2412
2416 2413 return (e);
2417 2414 }
2418 2415
2419 2416 static void
2420 2417 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2421 2418 rsm_memseg_id_t ex_segid,
2422 2419 ddi_umem_cookie_t cookie)
2423 2420
2424 2421 {
2425 2422 rsmresource_t *p = NULL;
2426 2423 rsmhash_table_t *rhash = &rsm_import_segs;
2427 2424 uint_t index;
2428 2425
2429 2426 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2430 2427 "rsm_remap_local_importers enter\n"));
2431 2428
2432 2429 index = rsmhash(ex_segid);
2433 2430
2434 2431 rw_enter(&rhash->rsmhash_rw, RW_READER);
2435 2432
2436 2433 p = rsmhash_getbkt(rhash, index);
2437 2434
2438 2435 for (; p; p = p->rsmrc_next) {
2439 2436 rsmseg_t *seg = (rsmseg_t *)p;
2440 2437 rsmseglock_acquire(seg);
2441 2438 /*
2442 2439 * Change the s_cookie value of only the local importers
2443 2440 * which have been mapped (in state RSM_STATE_ACTIVE).
2444 2441 * Note that there is no need to change the s_cookie value
2445 2442 * if the imported segment is in RSM_STATE_MAPPING since
2446 2443 * eventually the s_cookie will be updated via the mapping
2447 2444 * functionality.
2448 2445 */
2449 2446 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2450 2447 (seg->s_state == RSM_STATE_ACTIVE)) {
2451 2448 seg->s_cookie = cookie;
2452 2449 }
2453 2450 rsmseglock_release(seg);
2454 2451 }
2455 2452 rw_exit(&rhash->rsmhash_rw);
2456 2453
2457 2454 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2458 2455 "rsm_remap_local_importers done\n"));
2459 2456 }
2460 2457
2461 2458 static int
2462 2459 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2463 2460 {
2464 2461 int e;
2465 2462 adapter_t *adapter;
2466 2463 ddi_umem_cookie_t cookie;
2467 2464 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2468 2465
2469 2466 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2470 2467
2471 2468 /* Check for permissions to rebind */
2472 2469 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2473 2470 return (RSMERR_REBIND_NOT_ALLOWED);
2474 2471 }
2475 2472
2476 2473 if (seg->s_pid != ddi_get_pid() &&
2477 2474 ddi_get_pid() != 0) {
2478 2475 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2479 2476 return (RSMERR_NOT_CREATOR);
2480 2477 }
2481 2478
2482 2479 /*
2483 2480 * We will not be allowing partial rebind and hence length passed
2484 2481 * in must be same as segment length
2485 2482 */
2486 2483 if (msg->vaddr == NULL) {
2487 2484 DBG_PRINTF((category, RSM_ERR,
2488 2485 "rsm_rebind done: null msg->vaddr\n"));
2489 2486 return (RSMERR_BAD_ADDR);
2490 2487 }
2491 2488 if (msg->len != seg->s_len) {
2492 2489 DBG_PRINTF((category, RSM_ERR,
2493 2490 "rsm_rebind: invalid length\n"));
2494 2491 return (RSMERR_BAD_LENGTH);
2495 2492 }
2496 2493
2497 2494 /* Lock segment */
2498 2495 rsmseglock_acquire(seg);
2499 2496
2500 2497 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2501 2498 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2502 2499 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2503 2500 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2504 2501 rsmseglock_release(seg);
2505 2502 DBG_PRINTF((category, RSM_DEBUG,
2506 2503 "rsm_rebind done: cv_wait INTERRUPTED"));
2507 2504 return (RSMERR_INTERRUPTED);
2508 2505 }
2509 2506 }
2510 2507
2511 2508 /* verify segment state */
2512 2509 if ((seg->s_state != RSM_STATE_BIND) &&
2513 2510 (seg->s_state != RSM_STATE_EXPORT)) {
2514 2511 /* Unlock segment */
2515 2512 rsmseglock_release(seg);
2516 2513 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2517 2514 "rsm_rebind done: invalid state\n"));
2518 2515 return (RSMERR_BAD_SEG_HNDL);
2519 2516 }
2520 2517
2521 2518 ASSERT(seg->s_cookie != NULL);
2522 2519
2523 2520 if (msg->vaddr == seg->s_region.r_vaddr) {
2524 2521 rsmseglock_release(seg);
2525 2522 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2526 2523 return (RSM_SUCCESS);
2527 2524 }
2528 2525
2529 2526 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2530 2527 if (e == RSM_SUCCESS) {
2531 2528 struct buf *xbuf;
2532 2529 dev_t sdev = 0;
2533 2530 rsm_memory_local_t mem;
2534 2531
2535 2532 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2536 2533 sdev, 0, NULL, DDI_UMEM_SLEEP);
2537 2534 ASSERT(xbuf != NULL);
2538 2535
2539 2536 mem.ms_type = RSM_MEM_BUF;
2540 2537 mem.ms_bp = xbuf;
2541 2538
2542 2539 adapter = seg->s_adapter;
2543 2540 e = adapter->rsmpi_ops->rsm_rebind(
2544 2541 seg->s_handle.out, 0, &mem,
2545 2542 RSM_RESOURCE_DONTWAIT, NULL);
2546 2543
2547 2544 if (e == RSM_SUCCESS) {
2548 2545 /*
2549 2546 * unbind the older pages, and unload local importers;
2550 2547 * but don't disconnect importers
2551 2548 */
2552 2549 (void) rsm_unbind_pages(seg);
2553 2550 seg->s_cookie = cookie;
2554 2551 seg->s_region.r_vaddr = msg->vaddr;
2555 2552 rsm_remap_local_importers(my_nodeid, seg->s_segid,
2556 2553 cookie);
2557 2554 } else {
2558 2555 /*
2559 2556 * Unbind the pages associated with "cookie" by the
2560 2557 * rsm_bind_pages calls prior to this. This is
2561 2558 * similar to what is done in the rsm_unbind_pages
2562 2559 * routine for the seg->s_cookie.
2563 2560 */
2564 2561 ddi_umem_unlock(cookie);
2565 2562 rsm_dec_pgcnt(btopr(msg->len));
2566 2563 DBG_PRINTF((category, RSM_ERR,
2567 2564 "rsm_rebind failed with %d\n", e));
2568 2565 }
2569 2566 /*
2570 2567 * At present there is no dependency on the existence of xbuf.
2571 2568 * So we can free it here. If in the future this changes, it can
2572 2569 * be freed sometime during the segment destroy.
2573 2570 */
2574 2571 freerbuf(xbuf);
2575 2572 }
2576 2573
2577 2574 /* Unlock segment */
2578 2575 rsmseglock_release(seg);
2579 2576
2580 2577 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2581 2578
2582 2579 return (e);
2583 2580 }
2584 2581
2585 2582 static int
2586 2583 rsm_unbind(rsmseg_t *seg)
2587 2584 {
2588 2585 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2589 2586
2590 2587 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2591 2588
2592 2589 rsmseglock_acquire(seg);
2593 2590
2594 2591 /* verify segment state */
2595 2592 if ((seg->s_state != RSM_STATE_BIND) &&
2596 2593 (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2597 2594 rsmseglock_release(seg);
2598 2595 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2599 2596 "rsm_unbind: invalid state\n"));
2600 2597 return (RSMERR_BAD_SEG_HNDL);
2601 2598 }
2602 2599
2603 2600 /* unlock current range */
2604 2601 (void) rsm_unbind_pages(seg);
2605 2602
2606 2603 if (seg->s_state == RSM_STATE_BIND) {
2607 2604 seg->s_state = RSM_STATE_NEW;
2608 2605 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2609 2606 seg->s_state = RSM_STATE_NEW_QUIESCED;
2610 2607 }
2611 2608
2612 2609 rsmseglock_release(seg);
2613 2610
2614 2611 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2615 2612
2616 2613 return (RSM_SUCCESS);
2617 2614 }
2618 2615
2619 2616 /* **************************** Exporter Access List Management ******* */
2620 2617 static void
2621 2618 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2622 2619 {
2623 2620 int acl_sz;
2624 2621 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2625 2622
2626 2623 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2627 2624
2628 2625 /* acl could be NULL */
2629 2626
2630 2627 if (acl != NULL && acl_len > 0) {
2631 2628 acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2632 2629 kmem_free((void *)acl, acl_sz);
2633 2630 }
2634 2631
2635 2632 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2636 2633 }
2637 2634
2638 2635 static void
2639 2636 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2640 2637 {
2641 2638 int acl_sz;
2642 2639 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2643 2640
2644 2641 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2645 2642
2646 2643 if (acl != NULL && acl_len > 0) {
2647 2644 acl_sz = acl_len * sizeof (rsm_access_entry_t);
2648 2645 kmem_free((void *)acl, acl_sz);
2649 2646 }
2650 2647
2651 2648 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2652 2649
2653 2650 }
2654 2651
2655 2652 static int
2656 2653 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2657 2654 rsmapi_access_entry_t **list, int *len, int loopback)
2658 2655 {
2659 2656 rsmapi_access_entry_t *acl;
2660 2657 int acl_len;
2661 2658 int i;
2662 2659 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2663 2660
2664 2661 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2665 2662
2666 2663 *len = 0;
2667 2664 *list = NULL;
2668 2665
2669 2666 acl_len = msg->acl_len;
2670 2667 if ((loopback && acl_len > 1) || (acl_len < 0) ||
2671 2668 (acl_len > MAX_NODES)) {
2672 2669 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2673 2670 "rsmacl_build done: acl invalid\n"));
2674 2671 return (RSMERR_BAD_ACL);
2675 2672 }
2676 2673
2677 2674 if (acl_len > 0 && acl_len <= MAX_NODES) {
2678 2675 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2679 2676
2680 2677 acl = kmem_alloc(acl_size, KM_SLEEP);
2681 2678
2682 2679 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2683 2680 acl_size, mode)) {
2684 2681 kmem_free((void *) acl, acl_size);
2685 2682 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2686 2683 "rsmacl_build done: BAD_ADDR\n"));
2687 2684 return (RSMERR_BAD_ADDR);
2688 2685 }
2689 2686
2690 2687 /*
2691 2688 * Verify access list
2692 2689 */
2693 2690 for (i = 0; i < acl_len; i++) {
2694 2691 if (acl[i].ae_node > MAX_NODES ||
2695 2692 (loopback && (acl[i].ae_node != my_nodeid)) ||
2696 2693 acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2697 2694 /* invalid entry */
2698 2695 kmem_free((void *) acl, acl_size);
2699 2696 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2700 2697 "rsmacl_build done: EINVAL\n"));
2701 2698 return (RSMERR_BAD_ACL);
2702 2699 }
2703 2700 }
2704 2701
2705 2702 *len = acl_len;
2706 2703 *list = acl;
2707 2704 }
2708 2705
2709 2706 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2710 2707
2711 2708 return (DDI_SUCCESS);
2712 2709 }
2713 2710
2714 2711 static int
2715 2712 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2716 2713 int acl_len, adapter_t *adapter)
2717 2714 {
2718 2715 rsm_access_entry_t *acl;
2719 2716 rsm_addr_t hwaddr;
2720 2717 int i;
2721 2718 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2722 2719
2723 2720 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2724 2721
2725 2722 if (src != NULL) {
2726 2723 size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2727 2724 acl = kmem_alloc(acl_size, KM_SLEEP);
2728 2725
2729 2726 /*
2730 2727 * translate access list
2731 2728 */
2732 2729 for (i = 0; i < acl_len; i++) {
2733 2730 if (src[i].ae_node == my_nodeid) {
2734 2731 acl[i].ae_addr = adapter->hwaddr;
2735 2732 } else {
2736 2733 hwaddr = get_remote_hwaddr(adapter,
2737 2734 src[i].ae_node);
2738 2735 if ((int64_t)hwaddr < 0) {
2739 2736 /* invalid hwaddr */
2740 2737 kmem_free((void *) acl, acl_size);
2741 2738 DBG_PRINTF((category,
2742 2739 RSM_DEBUG_VERBOSE,
2743 2740 "rsmpiacl_create done:"
2744 2741 "EINVAL hwaddr\n"));
2745 2742 return (RSMERR_INTERNAL_ERROR);
2746 2743 }
2747 2744 acl[i].ae_addr = hwaddr;
2748 2745 }
2749 2746 /* rsmpi understands only RSM_PERM_XXXX */
2750 2747 acl[i].ae_permission =
2751 2748 src[i].ae_permission & RSM_PERM_RDWR;
2752 2749 }
2753 2750 *dest = acl;
2754 2751 } else {
2755 2752 *dest = NULL;
2756 2753 }
2757 2754
2758 2755 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2759 2756
2760 2757 return (RSM_SUCCESS);
2761 2758 }
2762 2759
2763 2760 static int
2764 2761 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2765 2762 rsmipc_reply_t *reply)
2766 2763 {
2767 2764
2768 2765 int i;
2769 2766 rsmseg_t *seg;
2770 2767 rsm_memseg_id_t key = req->rsmipc_key;
2771 2768 rsm_permission_t perm = req->rsmipc_perm;
2772 2769 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2773 2770
2774 2771 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2775 2772 "rsmsegacl_validate enter\n"));
2776 2773
2777 2774 /*
2778 2775 * Find segment and grab its lock. The reason why we grab the segment
2779 2776 * lock in side the search is to avoid the race when the segment is
2780 2777 * being deleted and we already have a pointer to it.
2781 2778 */
2782 2779 seg = rsmexport_lookup(key);
2783 2780 if (!seg) {
2784 2781 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2785 2782 "rsmsegacl_validate done: %u ENXIO\n", key));
2786 2783 return (RSMERR_SEG_NOT_PUBLISHED);
2787 2784 }
2788 2785
2789 2786 ASSERT(rsmseglock_held(seg));
2790 2787 ASSERT(seg->s_state == RSM_STATE_EXPORT);
2791 2788
2792 2789 /*
2793 2790 * We implement a 2-level protection scheme.
2794 2791 * First, we check if local/remote host has access rights.
2795 2792 * Second, we check if the user has access rights.
2796 2793 *
2797 2794 * This routine only validates the rnode access_list
2798 2795 */
2799 2796 if (seg->s_acl_len > 0) {
2800 2797 /*
2801 2798 * Check host access list
2802 2799 */
2803 2800 ASSERT(seg->s_acl != NULL);
2804 2801 for (i = 0; i < seg->s_acl_len; i++) {
2805 2802 if (seg->s_acl[i].ae_node == rnode) {
2806 2803 perm &= seg->s_acl[i].ae_permission;
2807 2804 goto found;
2808 2805 }
2809 2806 }
2810 2807 /* rnode is not found in the list */
2811 2808 rsmseglock_release(seg);
2812 2809 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2813 2810 "rsmsegacl_validate done: EPERM\n"));
2814 2811 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2815 2812 } else {
2816 2813 /* use default owner creation umask */
2817 2814 perm &= seg->s_mode;
2818 2815 }
2819 2816
2820 2817 found:
2821 2818 /* update perm for this node */
2822 2819 reply->rsmipc_mode = perm;
2823 2820 reply->rsmipc_uid = seg->s_uid;
2824 2821 reply->rsmipc_gid = seg->s_gid;
2825 2822 reply->rsmipc_segid = seg->s_segid;
2826 2823 reply->rsmipc_seglen = seg->s_len;
2827 2824
2828 2825 /*
2829 2826 * Perm of requesting node is valid; source will validate user
2830 2827 */
2831 2828 rsmseglock_release(seg);
2832 2829
2833 2830 /*
2834 2831 * Add the importer to the list right away, if connect fails
2835 2832 * the importer will ask the exporter to remove it.
2836 2833 */
2837 2834 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2838 2835 req->rsmipc_segment_cookie);
2839 2836
2840 2837 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2841 2838
2842 2839 return (RSM_SUCCESS);
2843 2840 }
2844 2841
2845 2842
2846 2843 /* ************************** Exporter Calls ************************* */
2847 2844
2848 2845 static int
2849 2846 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2850 2847 {
2851 2848 int e;
2852 2849 int acl_len;
2853 2850 rsmapi_access_entry_t *acl;
2854 2851 rsm_access_entry_t *rsmpi_acl;
2855 2852 rsm_memory_local_t mem;
2856 2853 struct buf *xbuf;
2857 2854 dev_t sdev = 0;
2858 2855 adapter_t *adapter;
2859 2856 rsm_memseg_id_t segment_id = 0;
2860 2857 int loopback_flag = 0;
2861 2858 int create_flags = 0;
2862 2859 rsm_resource_callback_t callback_flag;
2863 2860 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2864 2861
2865 2862 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2866 2863
2867 2864 if (seg->s_adapter == &loopback_adapter)
2868 2865 loopback_flag = 1;
2869 2866
2870 2867 if (seg->s_pid != ddi_get_pid() &&
2871 2868 ddi_get_pid() != 0) {
2872 2869 DBG_PRINTF((category, RSM_ERR,
2873 2870 "rsm_publish: Not creator\n"));
2874 2871 return (RSMERR_NOT_CREATOR);
2875 2872 }
2876 2873
2877 2874 /*
2878 2875 * Get per node access list
2879 2876 */
2880 2877 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2881 2878 if (e != DDI_SUCCESS) {
2882 2879 DBG_PRINTF((category, RSM_ERR,
2883 2880 "rsm_publish done: rsmacl_build failed\n"));
2884 2881 return (e);
2885 2882 }
2886 2883
2887 2884 /*
2888 2885 * The application provided msg->key is used for resolving a
2889 2886 * segment id according to the following:
2890 2887 * key = 0 Kernel Agent selects the segment id
2891 2888 * key <= RSM_DLPI_ID_END Reserved for system usage except
2892 2889 * RSMLIB range
2893 2890 * key < RSM_USER_APP_ID_BASE segment id = key
2894 2891 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2895 2892 *
2896 2893 * rsm_nextavail_segmentid is initialized to 0x80000000 and
2897 2894 * overflows to zero after 0x80000000 allocations.
2898 2895 * An algorithm is needed which allows reinitialization and provides
2899 2896 * for reallocation after overflow. For now, ENOMEM is returned
2900 2897 * once the overflow condition has occurred.
2901 2898 */
2902 2899 if (msg->key == 0) {
2903 2900 mutex_enter(&rsm_lock);
2904 2901 segment_id = rsm_nextavail_segmentid;
2905 2902 if (segment_id != 0) {
2906 2903 rsm_nextavail_segmentid++;
2907 2904 mutex_exit(&rsm_lock);
2908 2905 } else {
2909 2906 mutex_exit(&rsm_lock);
2910 2907 DBG_PRINTF((category, RSM_ERR,
2911 2908 "rsm_publish done: no more keys avlbl\n"));
2912 2909 return (RSMERR_INSUFFICIENT_RESOURCES);
2913 2910 }
2914 2911 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2915 2912 /* range reserved for internal use by base/ndi libraries */
2916 2913 segment_id = msg->key;
2917 2914 else if (msg->key <= RSM_DLPI_ID_END)
2918 2915 return (RSMERR_RESERVED_SEGID);
2919 2916 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2920 2917 segment_id = msg->key;
2921 2918 else {
2922 2919 DBG_PRINTF((category, RSM_ERR,
2923 2920 "rsm_publish done: invalid key %u\n", msg->key));
2924 2921 return (RSMERR_RESERVED_SEGID);
2925 2922 }
2926 2923
2927 2924 /* Add key to exportlist; The segment lock is held on success */
2928 2925 e = rsmexport_add(seg, segment_id);
2929 2926 if (e) {
2930 2927 rsmacl_free(acl, acl_len);
2931 2928 DBG_PRINTF((category, RSM_ERR,
2932 2929 "rsm_publish done: export_add failed: %d\n", e));
2933 2930 return (e);
2934 2931 }
2935 2932
2936 2933 seg->s_segid = segment_id;
2937 2934
2938 2935 if ((seg->s_state != RSM_STATE_BIND) &&
2939 2936 (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2940 2937 /* state changed since then, free acl and return */
2941 2938 rsmseglock_release(seg);
2942 2939 rsmexport_rm(seg);
2943 2940 rsmacl_free(acl, acl_len);
2944 2941 DBG_PRINTF((category, RSM_ERR,
2945 2942 "rsm_publish done: segment in wrong state: %d\n",
2946 2943 seg->s_state));
2947 2944 return (RSMERR_BAD_SEG_HNDL);
2948 2945 }
2949 2946
2950 2947 /*
2951 2948 * If this is for a local memory handle and permissions are zero,
2952 2949 * then the surrogate segment is very large and we want to skip
2953 2950 * allocation of DVMA space.
2954 2951 *
2955 2952 * Careful! If the user didn't use an ACL list, acl will be a NULL
2956 2953 * pointer. Check that before dereferencing it.
2957 2954 */
2958 2955 if (acl != (rsmapi_access_entry_t *)NULL) {
2959 2956 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2960 2957 goto skipdriver;
2961 2958 }
2962 2959
2963 2960 /* create segment */
2964 2961 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2965 2962 sdev, 0, NULL, DDI_UMEM_SLEEP);
2966 2963 ASSERT(xbuf != NULL);
2967 2964
2968 2965 mem.ms_type = RSM_MEM_BUF;
2969 2966 mem.ms_bp = xbuf;
2970 2967
2971 2968 /* This call includes a bind operations */
2972 2969
2973 2970 adapter = seg->s_adapter;
2974 2971 /*
2975 2972 * create a acl list with hwaddr for RSMPI publish
2976 2973 */
2977 2974 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2978 2975
2979 2976 if (e != RSM_SUCCESS) {
2980 2977 rsmseglock_release(seg);
2981 2978 rsmexport_rm(seg);
2982 2979 rsmacl_free(acl, acl_len);
2983 2980 freerbuf(xbuf);
2984 2981 DBG_PRINTF((category, RSM_ERR,
2985 2982 "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2986 2983 return (e);
2987 2984 }
2988 2985
2989 2986 if (seg->s_state == RSM_STATE_BIND) {
2990 2987 /* create segment */
2991 2988
2992 2989 /* This call includes a bind operations */
2993 2990
2994 2991 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2995 2992 create_flags = RSM_ALLOW_UNBIND_REBIND;
2996 2993 }
2997 2994
2998 2995 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
2999 2996 callback_flag = RSM_RESOURCE_DONTWAIT;
3000 2997 } else {
3001 2998 callback_flag = RSM_RESOURCE_SLEEP;
3002 2999 }
3003 3000
3004 3001 e = adapter->rsmpi_ops->rsm_seg_create(
3005 3002 adapter->rsmpi_handle,
3006 3003 &seg->s_handle.out, seg->s_len,
3007 3004 create_flags, &mem,
3008 3005 callback_flag, NULL);
3009 3006 /*
3010 3007 * At present there is no dependency on the existence of xbuf.
3011 3008 * So we can free it here. If in the future this changes, it can
3012 3009 * be freed sometime during the segment destroy.
3013 3010 */
3014 3011 freerbuf(xbuf);
3015 3012
3016 3013 if (e != RSM_SUCCESS) {
3017 3014 rsmseglock_release(seg);
3018 3015 rsmexport_rm(seg);
3019 3016 rsmacl_free(acl, acl_len);
3020 3017 rsmpiacl_free(rsmpi_acl, acl_len);
3021 3018 DBG_PRINTF((category, RSM_ERR,
3022 3019 "rsm_publish done: export_create failed: %d\n", e));
3023 3020 /*
3024 3021 * The following assertion ensures that the two errors
3025 3022 * related to the length and its alignment do not occur
3026 3023 * since they have been checked during export_create
3027 3024 */
3028 3025 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3029 3026 e != RSMERR_BAD_LENGTH);
3030 3027 if (e == RSMERR_NOT_MEM)
3031 3028 e = RSMERR_INSUFFICIENT_MEM;
3032 3029
3033 3030 return (e);
3034 3031 }
3035 3032 /* export segment, this should create an IMMU mapping */
3036 3033 e = adapter->rsmpi_ops->rsm_publish(
3037 3034 seg->s_handle.out,
3038 3035 rsmpi_acl, acl_len,
3039 3036 seg->s_segid,
3040 3037 RSM_RESOURCE_DONTWAIT, NULL);
3041 3038
3042 3039 if (e != RSM_SUCCESS) {
3043 3040 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3044 3041 rsmseglock_release(seg);
3045 3042 rsmexport_rm(seg);
3046 3043 rsmacl_free(acl, acl_len);
3047 3044 rsmpiacl_free(rsmpi_acl, acl_len);
3048 3045 DBG_PRINTF((category, RSM_ERR,
3049 3046 "rsm_publish done: export_publish failed: %d\n",
3050 3047 e));
3051 3048 return (e);
3052 3049 }
3053 3050 }
3054 3051
3055 3052 seg->s_acl_in = rsmpi_acl;
3056 3053
3057 3054 skipdriver:
3058 3055 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3059 3056 seg->s_acl_len = acl_len;
3060 3057 seg->s_acl = acl;
3061 3058
3062 3059 if (seg->s_state == RSM_STATE_BIND) {
3063 3060 seg->s_state = RSM_STATE_EXPORT;
3064 3061 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3065 3062 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3066 3063 cv_broadcast(&seg->s_cv);
3067 3064 }
3068 3065
3069 3066 rsmseglock_release(seg);
3070 3067
3071 3068 /*
3072 3069 * If the segment id was solicited, then return it in
3073 3070 * the original incoming message.
3074 3071 */
3075 3072 if (msg->key == 0) {
3076 3073 msg->key = segment_id;
3077 3074 #ifdef _MULTI_DATAMODEL
3078 3075 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3079 3076 rsm_ioctlmsg32_t msg32;
3080 3077
3081 3078 msg32.key = msg->key;
3082 3079 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3083 3080 "rsm_publish done\n"));
3084 3081 return (ddi_copyout((caddr_t)&msg32,
3085 3082 (caddr_t)dataptr, sizeof (msg32), mode));
3086 3083 }
3087 3084 #endif
3088 3085 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3089 3086 "rsm_publish done\n"));
3090 3087 return (ddi_copyout((caddr_t)msg,
3091 3088 (caddr_t)dataptr, sizeof (*msg), mode));
3092 3089 }
3093 3090
3094 3091 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3095 3092 return (DDI_SUCCESS);
3096 3093 }
3097 3094
3098 3095 /*
3099 3096 * This function modifies the access control list of an already published
3100 3097 * segment. There is no effect on import segments which are already
3101 3098 * connected.
3102 3099 */
3103 3100 static int
3104 3101 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3105 3102 {
3106 3103 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl;
3107 3104 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl;
3108 3105 int new_acl_len, old_acl_len, tmp_acl_len;
3109 3106 int e, i;
3110 3107 adapter_t *adapter;
3111 3108 int loopback_flag = 0;
3112 3109 rsm_memseg_id_t key;
3113 3110 rsm_permission_t permission;
3114 3111 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3115 3112
3116 3113 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3117 3114
3118 3115 if ((seg->s_state != RSM_STATE_EXPORT) &&
3119 3116 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3120 3117 (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3121 3118 return (RSMERR_SEG_NOT_PUBLISHED);
3122 3119
3123 3120 if (seg->s_pid != ddi_get_pid() &&
3124 3121 ddi_get_pid() != 0) {
3125 3122 DBG_PRINTF((category, RSM_ERR,
3126 3123 "rsm_republish: Not owner\n"));
3127 3124 return (RSMERR_NOT_CREATOR);
3128 3125 }
3129 3126
3130 3127 if (seg->s_adapter == &loopback_adapter)
3131 3128 loopback_flag = 1;
3132 3129
3133 3130 /*
3134 3131 * Build new list first
3135 3132 */
3136 3133 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3137 3134 if (e) {
3138 3135 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3139 3136 "rsm_republish done: rsmacl_build failed %d", e));
3140 3137 return (e);
3141 3138 }
3142 3139
3143 3140 /* Lock segment */
3144 3141 rsmseglock_acquire(seg);
3145 3142 /*
3146 3143 * a republish is in progress - REPUBLISH message is being
3147 3144 * sent to the importers so wait for it to complete OR
3148 3145 * wait till DR completes
3149 3146 */
3150 3147 while (((seg->s_state == RSM_STATE_EXPORT) &&
3151 3148 (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3152 3149 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3153 3150 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3154 3151 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3155 3152 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3156 3153 "rsm_republish done: cv_wait INTERRUPTED"));
3157 3154 rsmseglock_release(seg);
3158 3155 rsmacl_free(new_acl, new_acl_len);
3159 3156 return (RSMERR_INTERRUPTED);
3160 3157 }
3161 3158 }
3162 3159
3163 3160 /* recheck if state is valid */
3164 3161 if (seg->s_state != RSM_STATE_EXPORT) {
3165 3162 rsmseglock_release(seg);
3166 3163 rsmacl_free(new_acl, new_acl_len);
3167 3164 return (RSMERR_SEG_NOT_PUBLISHED);
3168 3165 }
3169 3166
3170 3167 key = seg->s_key;
3171 3168 old_acl = seg->s_acl;
3172 3169 old_acl_len = seg->s_acl_len;
3173 3170
3174 3171 seg->s_acl = new_acl;
3175 3172 seg->s_acl_len = new_acl_len;
3176 3173
3177 3174 /*
3178 3175 * This call will only be meaningful if and when the interconnect
3179 3176 * layer makes use of the access list
3180 3177 */
3181 3178 adapter = seg->s_adapter;
3182 3179 /*
3183 3180 * create a acl list with hwaddr for RSMPI publish
3184 3181 */
3185 3182 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3186 3183
3187 3184 if (e != RSM_SUCCESS) {
3188 3185 seg->s_acl = old_acl;
3189 3186 seg->s_acl_len = old_acl_len;
3190 3187 rsmseglock_release(seg);
3191 3188 rsmacl_free(new_acl, new_acl_len);
3192 3189 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3193 3190 "rsm_republish done: rsmpiacl_create failed %d", e));
3194 3191 return (e);
3195 3192 }
3196 3193 rsmpi_old_acl = seg->s_acl_in;
3197 3194 seg->s_acl_in = rsmpi_new_acl;
3198 3195
3199 3196 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3200 3197 seg->s_acl_in, seg->s_acl_len,
3201 3198 RSM_RESOURCE_DONTWAIT, NULL);
3202 3199
3203 3200 if (e != RSM_SUCCESS) {
3204 3201 seg->s_acl = old_acl;
3205 3202 seg->s_acl_in = rsmpi_old_acl;
3206 3203 seg->s_acl_len = old_acl_len;
3207 3204 rsmseglock_release(seg);
3208 3205 rsmacl_free(new_acl, new_acl_len);
3209 3206 rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3210 3207
3211 3208 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3212 3209 "rsm_republish done: rsmpi republish failed %d\n", e));
3213 3210 return (e);
3214 3211 }
3215 3212
3216 3213 /* create a tmp copy of the new acl */
3217 3214 tmp_acl_len = new_acl_len;
3218 3215 if (tmp_acl_len > 0) {
3219 3216 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3220 3217 for (i = 0; i < tmp_acl_len; i++) {
3221 3218 tmp_acl[i].ae_node = new_acl[i].ae_node;
3222 3219 tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3223 3220 }
3224 3221 /*
3225 3222 * The default permission of a node which was in the old
3226 3223 * ACL but not in the new ACL is 0 ie no access.
3227 3224 */
3228 3225 permission = 0;
3229 3226 } else {
3230 3227 /*
3231 3228 * NULL acl means all importers can connect and
3232 3229 * default permission will be owner creation umask
3233 3230 */
3234 3231 tmp_acl = NULL;
3235 3232 permission = seg->s_mode;
3236 3233 }
3237 3234
3238 3235 /* make other republishers to wait for republish to complete */
3239 3236 seg->s_flags |= RSM_REPUBLISH_WAIT;
3240 3237
3241 3238 rsmseglock_release(seg);
3242 3239
3243 3240 /* send the new perms to the importing nodes */
3244 3241 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3245 3242
3246 3243 rsmseglock_acquire(seg);
3247 3244 seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3248 3245 /* wake up any one waiting for republish to complete */
3249 3246 cv_broadcast(&seg->s_cv);
3250 3247 rsmseglock_release(seg);
3251 3248
3252 3249 rsmacl_free(tmp_acl, tmp_acl_len);
3253 3250 rsmacl_free(old_acl, old_acl_len);
3254 3251 rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3255 3252
3256 3253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3257 3254 return (DDI_SUCCESS);
3258 3255 }
3259 3256
3260 3257 static int
3261 3258 rsm_unpublish(rsmseg_t *seg, int mode)
3262 3259 {
3263 3260 rsmapi_access_entry_t *acl;
3264 3261 rsm_access_entry_t *rsmpi_acl;
3265 3262 int acl_len;
3266 3263 int e;
3267 3264 adapter_t *adapter;
3268 3265 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3269 3266
3270 3267 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3271 3268
3272 3269 if (seg->s_pid != ddi_get_pid() &&
3273 3270 ddi_get_pid() != 0) {
3274 3271 DBG_PRINTF((category, RSM_ERR,
3275 3272 "rsm_unpublish: Not creator\n"));
3276 3273 return (RSMERR_NOT_CREATOR);
3277 3274 }
3278 3275
3279 3276 rsmseglock_acquire(seg);
3280 3277 /*
3281 3278 * wait for QUIESCING to complete here before rsmexport_rm
3282 3279 * is called because the SUSPEND_COMPLETE mesg which changes
3283 3280 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3284 3281 * signals the cv_wait needs to find it in the hashtable.
3285 3282 */
3286 3283 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3287 3284 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3288 3285 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3289 3286 rsmseglock_release(seg);
3290 3287 DBG_PRINTF((category, RSM_ERR,
3291 3288 "rsm_unpublish done: cv_wait INTR qscing"
3292 3289 "getv/putv in progress"));
3293 3290 return (RSMERR_INTERRUPTED);
3294 3291 }
3295 3292 }
3296 3293
3297 3294 /* verify segment state */
3298 3295 if ((seg->s_state != RSM_STATE_EXPORT) &&
3299 3296 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3300 3297 rsmseglock_release(seg);
3301 3298 DBG_PRINTF((category, RSM_ERR,
3302 3299 "rsm_unpublish done: bad state %x\n", seg->s_state));
3303 3300 return (RSMERR_SEG_NOT_PUBLISHED);
3304 3301 }
3305 3302
3306 3303 rsmseglock_release(seg);
3307 3304
3308 3305 rsmexport_rm(seg);
3309 3306
3310 3307 rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3311 3308
3312 3309 rsmseglock_acquire(seg);
3313 3310 /*
3314 3311 * wait for republish to complete
3315 3312 */
3316 3313 while ((seg->s_state == RSM_STATE_EXPORT) &&
3317 3314 (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3318 3315 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3319 3316 DBG_PRINTF((category, RSM_ERR,
3320 3317 "rsm_unpublish done: cv_wait INTR repubing"));
3321 3318 rsmseglock_release(seg);
3322 3319 return (RSMERR_INTERRUPTED);
3323 3320 }
3324 3321 }
3325 3322
3326 3323 if ((seg->s_state != RSM_STATE_EXPORT) &&
3327 3324 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3328 3325 DBG_PRINTF((category, RSM_ERR,
3329 3326 "rsm_unpublish done: invalid state"));
3330 3327 rsmseglock_release(seg);
3331 3328 return (RSMERR_SEG_NOT_PUBLISHED);
3332 3329 }
3333 3330
3334 3331 /*
3335 3332 * check for putv/get surrogate segment which was not published
3336 3333 * to the driver.
3337 3334 *
3338 3335 * Be certain to see if there is an ACL first! If this segment was
3339 3336 * not published with an ACL, acl will be a null pointer. Check
3340 3337 * that before dereferencing it.
3341 3338 */
3342 3339 acl = seg->s_acl;
3343 3340 if (acl != (rsmapi_access_entry_t *)NULL) {
3344 3341 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3345 3342 goto bypass;
3346 3343 }
3347 3344
3348 3345 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3349 3346 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3350 3347 goto bypass;
3351 3348
3352 3349 adapter = seg->s_adapter;
3353 3350 for (;;) {
3354 3351 if (seg->s_state != RSM_STATE_EXPORT) {
3355 3352 rsmseglock_release(seg);
3356 3353 DBG_PRINTF((category, RSM_ERR,
3357 3354 "rsm_unpublish done: bad state %x\n",
3358 3355 seg->s_state));
3359 3356 return (RSMERR_SEG_NOT_PUBLISHED);
3360 3357 }
3361 3358
3362 3359 /* unpublish from adapter */
3363 3360 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3364 3361
3365 3362 if (e == RSM_SUCCESS) {
3366 3363 break;
3367 3364 }
3368 3365
3369 3366 if (e == RSMERR_SEG_IN_USE && mode == 1) {
3370 3367 /*
3371 3368 * wait for unpublish to succeed, it's busy.
3372 3369 */
3373 3370 seg->s_flags |= RSM_EXPORT_WAIT;
3374 3371
3375 3372 /* wait for a max of 1 ms - this is an empirical */
3376 3373 /* value that was found by some minimal testing */
3377 3374 /* can be fine tuned when we have better numbers */
3378 3375 /* A long term fix would be to send cv_signal */
3379 3376 /* from the intr callback routine */
3380 3377 /* currently nobody signals this wait */
3381 3378 (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock,
3382 3379 drv_usectohz(1000), TR_CLOCK_TICK);
3383 3380
3384 3381 DBG_PRINTF((category, RSM_ERR,
3385 3382 "rsm_unpublish: SEG_IN_USE\n"));
3386 3383
3387 3384 seg->s_flags &= ~RSM_EXPORT_WAIT;
3388 3385 } else {
3389 3386 if (mode == 1) {
3390 3387 DBG_PRINTF((category, RSM_ERR,
3391 3388 "rsm:rsmpi unpublish err %x\n", e));
3392 3389 seg->s_state = RSM_STATE_BIND;
3393 3390 }
3394 3391 rsmseglock_release(seg);
3395 3392 return (e);
3396 3393 }
3397 3394 }
3398 3395
3399 3396 /* Free segment */
3400 3397 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3401 3398
3402 3399 if (e != RSM_SUCCESS) {
3403 3400 DBG_PRINTF((category, RSM_ERR,
3404 3401 "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3405 3402 seg->s_key, e));
3406 3403 }
3407 3404
3408 3405 bypass:
3409 3406 acl = seg->s_acl;
3410 3407 rsmpi_acl = seg->s_acl_in;
3411 3408 acl_len = seg->s_acl_len;
3412 3409
3413 3410 seg->s_acl = NULL;
3414 3411 seg->s_acl_in = NULL;
3415 3412 seg->s_acl_len = 0;
3416 3413
3417 3414 if (seg->s_state == RSM_STATE_EXPORT) {
3418 3415 seg->s_state = RSM_STATE_BIND;
3419 3416 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3420 3417 seg->s_state = RSM_STATE_BIND_QUIESCED;
3421 3418 cv_broadcast(&seg->s_cv);
3422 3419 }
3423 3420
3424 3421 rsmseglock_release(seg);
3425 3422
3426 3423 rsmacl_free(acl, acl_len);
3427 3424 rsmpiacl_free(rsmpi_acl, acl_len);
3428 3425
3429 3426 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3430 3427
3431 3428 return (DDI_SUCCESS);
3432 3429 }
3433 3430
3434 3431 /*
3435 3432 * Called from rsm_unpublish to force an unload and disconnection of all
3436 3433 * importers of the unpublished segment.
3437 3434 *
3438 3435 * First build the list of segments requiring a force disconnect, then
3439 3436 * send a request for each.
3440 3437 */
3441 3438 static void
3442 3439 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3443 3440 rsm_node_id_t ex_nodeid)
3444 3441 {
3445 3442 rsmipc_request_t request;
3446 3443 importing_token_t *prev_token, *token, *tmp_token, *tokp;
3447 3444 importing_token_t *force_disconnect_list = NULL;
3448 3445 int index;
3449 3446
3450 3447 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3451 3448 "rsm_send_importer_disconnects enter\n"));
3452 3449
3453 3450 index = rsmhash(ex_segid);
3454 3451
3455 3452 mutex_enter(&importer_list.lock);
3456 3453
3457 3454 prev_token = NULL;
3458 3455 token = importer_list.bucket[index];
3459 3456
3460 3457 while (token != NULL) {
3461 3458 if (token->key == ex_segid) {
3462 3459 /*
3463 3460 * take it off the importer list and add it
3464 3461 * to the force disconnect list.
3465 3462 */
3466 3463 if (prev_token == NULL)
3467 3464 importer_list.bucket[index] = token->next;
3468 3465 else
3469 3466 prev_token->next = token->next;
3470 3467 tmp_token = token;
3471 3468 token = token->next;
3472 3469 if (force_disconnect_list == NULL) {
3473 3470 force_disconnect_list = tmp_token;
3474 3471 tmp_token->next = NULL;
3475 3472 } else {
3476 3473 tokp = force_disconnect_list;
3477 3474 /*
3478 3475 * make sure that the tmp_token's node
3479 3476 * is not already on the force disconnect
3480 3477 * list.
3481 3478 */
3482 3479 while (tokp != NULL) {
3483 3480 if (tokp->importing_node ==
3484 3481 tmp_token->importing_node) {
3485 3482 break;
3486 3483 }
3487 3484 tokp = tokp->next;
3488 3485 }
3489 3486 if (tokp == NULL) {
3490 3487 tmp_token->next =
3491 3488 force_disconnect_list;
3492 3489 force_disconnect_list = tmp_token;
3493 3490 } else {
3494 3491 kmem_free((void *)tmp_token,
3495 3492 sizeof (*token));
3496 3493 }
3497 3494 }
3498 3495
3499 3496 } else {
3500 3497 prev_token = token;
3501 3498 token = token->next;
3502 3499 }
3503 3500 }
3504 3501 mutex_exit(&importer_list.lock);
3505 3502
3506 3503 token = force_disconnect_list;
3507 3504 while (token != NULL) {
3508 3505 if (token->importing_node == my_nodeid) {
3509 3506 rsm_force_unload(ex_nodeid, ex_segid,
3510 3507 DISCONNECT);
3511 3508 } else {
3512 3509 request.rsmipc_hdr.rsmipc_type =
3513 3510 RSMIPC_MSG_DISCONNECT;
3514 3511 request.rsmipc_key = token->key;
3515 3512 for (;;) {
3516 3513 if (rsmipc_send(token->importing_node,
3517 3514 &request,
3518 3515 RSM_NO_REPLY) == RSM_SUCCESS) {
3519 3516 break;
3520 3517 } else {
3521 3518 delay(drv_usectohz(10000));
3522 3519 }
3523 3520 }
3524 3521 }
3525 3522 tmp_token = token;
3526 3523 token = token->next;
3527 3524 kmem_free((void *)tmp_token, sizeof (*token));
3528 3525 }
3529 3526
3530 3527 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3531 3528 "rsm_send_importer_disconnects done\n"));
3532 3529 }
3533 3530
3534 3531 /*
3535 3532 * This function is used as a callback for unlocking the pages locked
3536 3533 * down by a process which then does a fork or an exec.
3537 3534 * It marks the export segments corresponding to umem cookie given by
3538 3535 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3539 3536 * destroyed later when an rsm_close occurs).
3540 3537 */
3541 3538 static void
3542 3539 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3543 3540 {
3544 3541 rsmresource_blk_t *blk;
3545 3542 rsmresource_t *p;
3546 3543 rsmseg_t *eseg = NULL;
3547 3544 int i, j;
3548 3545 int found = 0;
3549 3546
3550 3547 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3551 3548 "rsm_export_force_destroy enter\n"));
3552 3549
3553 3550 /*
3554 3551 * Walk the resource list and locate the export segment (either
3555 3552 * in the BIND or the EXPORT state) which corresponds to the
3556 3553 * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3557 3554 * Change the state to ZOMBIE by calling rsmseg_close with the
3558 3555 * force_flag argument (the second argument) set to 1. Also,
3559 3556 * unpublish and unbind the segment, but don't free it. Free it
3560 3557 * only on a rsm_close call for the segment.
3561 3558 */
3562 3559 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3563 3560
3564 3561 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3565 3562 blk = rsm_resource.rsmrc_root[i];
3566 3563 if (blk == NULL) {
3567 3564 continue;
3568 3565 }
3569 3566
3570 3567 for (j = 0; j < RSMRC_BLKSZ; j++) {
3571 3568 p = blk->rsmrcblk_blks[j];
3572 3569 if ((p != NULL) && (p != RSMRC_RESERVED) &&
3573 3570 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3574 3571 eseg = (rsmseg_t *)p;
3575 3572 if (eseg->s_cookie != ck)
3576 3573 continue; /* continue searching */
3577 3574 /*
3578 3575 * Found the segment, set flag to indicate
3579 3576 * force destroy processing is in progress
3580 3577 */
3581 3578 rsmseglock_acquire(eseg);
3582 3579 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3583 3580 rsmseglock_release(eseg);
3584 3581 found = 1;
3585 3582 break;
3586 3583 }
3587 3584 }
3588 3585
3589 3586 if (found)
3590 3587 break;
3591 3588 }
3592 3589
3593 3590 rw_exit(&rsm_resource.rsmrc_lock);
3594 3591
3595 3592 if (found) {
3596 3593 ASSERT(eseg != NULL);
3597 3594 /* call rsmseg_close with force flag set to 1 */
3598 3595 rsmseg_close(eseg, 1);
3599 3596 /*
3600 3597 * force destroy processing done, clear flag and signal any
3601 3598 * thread waiting in rsmseg_close.
3602 3599 */
3603 3600 rsmseglock_acquire(eseg);
3604 3601 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3605 3602 cv_broadcast(&eseg->s_cv);
3606 3603 rsmseglock_release(eseg);
3607 3604 }
3608 3605
3609 3606 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3610 3607 "rsm_export_force_destroy done\n"));
3611 3608 }
3612 3609
3613 3610 /* ******************************* Remote Calls *********************** */
3614 3611 static void
3615 3612 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3616 3613 {
3617 3614 rsmipc_reply_t reply;
3618 3615 DBG_DEFINE(category,
3619 3616 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3620 3617
3621 3618 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3622 3619 "rsm_intr_segconnect enter\n"));
3623 3620
3624 3621 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3625 3622
3626 3623 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3627 3624 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3628 3625
3629 3626 (void) rsmipc_send(src, NULL, &reply);
3630 3627
3631 3628 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3632 3629 "rsm_intr_segconnect done\n"));
3633 3630 }
3634 3631
3635 3632
3636 3633 /*
3637 3634 * When an exported segment is unpublished the exporter sends an ipc
3638 3635 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher
3639 3636 * calls this function. The import list is scanned; segments which match the
3640 3637 * exported segment id are unloaded and disconnected.
3641 3638 *
3642 3639 * Will also be called from rsm_rebind with disconnect_flag FALSE.
3643 3640 *
3644 3641 */
3645 3642 static void
3646 3643 rsm_force_unload(rsm_node_id_t src_nodeid,
3647 3644 rsm_memseg_id_t ex_segid,
3648 3645 boolean_t disconnect_flag)
3649 3646
3650 3647 {
3651 3648 rsmresource_t *p = NULL;
3652 3649 rsmhash_table_t *rhash = &rsm_import_segs;
3653 3650 uint_t index;
3654 3651 DBG_DEFINE(category,
3655 3652 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3656 3653
3657 3654 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3658 3655
3659 3656 index = rsmhash(ex_segid);
3660 3657
3661 3658 rw_enter(&rhash->rsmhash_rw, RW_READER);
3662 3659
3663 3660 p = rsmhash_getbkt(rhash, index);
3664 3661
3665 3662 for (; p; p = p->rsmrc_next) {
3666 3663 rsmseg_t *seg = (rsmseg_t *)p;
3667 3664 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3668 3665 /*
3669 3666 * In order to make rsmseg_unload and rsm_force_unload
3670 3667 * thread safe, acquire the segment lock here.
3671 3668 * rsmseg_unload is responsible for releasing the lock.
3672 3669 * rsmseg_unload releases the lock just before a call
3673 3670 * to rsmipc_send or in case of an early exit which
3674 3671 * occurs if the segment was in the state
3675 3672 * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3676 3673 */
3677 3674 rsmseglock_acquire(seg);
3678 3675 if (disconnect_flag)
3679 3676 seg->s_flags |= RSM_FORCE_DISCONNECT;
3680 3677 rsmseg_unload(seg);
3681 3678 }
3682 3679 }
3683 3680 rw_exit(&rhash->rsmhash_rw);
3684 3681
3685 3682 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3686 3683 }
3687 3684
3688 3685 static void
3689 3686 rsm_intr_reply(rsmipc_msghdr_t *msg)
3690 3687 {
3691 3688 /*
3692 3689 * Find slot for cookie in reply.
3693 3690 * Match sequence with sequence in cookie
3694 3691 * If no match; return
3695 3692 * Try to grap lock of slot, if locked return
3696 3693 * copy data into reply slot area
3697 3694 * signal waiter
3698 3695 */
3699 3696 rsmipc_slot_t *slot;
3700 3697 rsmipc_cookie_t *cookie;
3701 3698 void *data = (void *) msg;
3702 3699 size_t size = sizeof (rsmipc_reply_t);
3703 3700 DBG_DEFINE(category,
3704 3701 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3705 3702
3706 3703 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3707 3704
3708 3705 cookie = &msg->rsmipc_cookie;
3709 3706 if (cookie->ic.index >= RSMIPC_SZ) {
3710 3707 DBG_PRINTF((category, RSM_ERR,
3711 3708 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3712 3709 return;
3713 3710 }
3714 3711
3715 3712 ASSERT(cookie->ic.index < RSMIPC_SZ);
3716 3713 slot = &rsm_ipc.slots[cookie->ic.index];
3717 3714 mutex_enter(&slot->rsmipc_lock);
3718 3715 if (slot->rsmipc_cookie.value == cookie->value) {
3719 3716 /* found a match */
3720 3717 if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3721 3718 bcopy(data, slot->rsmipc_data, size);
3722 3719 RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3723 3720 cv_signal(&slot->rsmipc_cv);
3724 3721 }
3725 3722 } else {
3726 3723 DBG_PRINTF((category, RSM_DEBUG,
3727 3724 "rsm: rsm_intr_reply mismatched reply %d\n",
3728 3725 cookie->ic.index));
3729 3726 }
3730 3727 mutex_exit(&slot->rsmipc_lock);
3731 3728 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3732 3729 }
3733 3730
3734 3731 /*
3735 3732 * This function gets dispatched on the worker thread when we receive
3736 3733 * the SQREADY message. This function sends the SQREADY_ACK message.
3737 3734 */
3738 3735 static void
3739 3736 rsm_sqready_ack_deferred(void *arg)
3740 3737 {
3741 3738 path_t *path = (path_t *)arg;
3742 3739 DBG_DEFINE(category,
3743 3740 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3744 3741
3745 3742 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3746 3743 "rsm_sqready_ack_deferred enter\n"));
3747 3744
3748 3745 mutex_enter(&path->mutex);
3749 3746
3750 3747 /*
3751 3748 * If path is not active no point in sending the ACK
3752 3749 * because the whole SQREADY protocol will again start
3753 3750 * when the path becomes active.
3754 3751 */
3755 3752 if (path->state != RSMKA_PATH_ACTIVE) {
3756 3753 /*
3757 3754 * decrement the path refcnt incremented in rsm_proc_sqready
3758 3755 */
3759 3756 PATH_RELE_NOLOCK(path);
3760 3757 mutex_exit(&path->mutex);
3761 3758 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3762 3759 "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3763 3760 return;
3764 3761 }
3765 3762
3766 3763 /* send an SQREADY_ACK message */
3767 3764 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3768 3765
3769 3766 /* initialize credits to the max level */
3770 3767 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3771 3768
3772 3769 /* wake up any send that is waiting for credits */
3773 3770 cv_broadcast(&path->sendq_token.sendq_cv);
3774 3771
3775 3772 /*
3776 3773 * decrement the path refcnt since we incremented it in
3777 3774 * rsm_proc_sqready
3778 3775 */
3779 3776 PATH_RELE_NOLOCK(path);
3780 3777
3781 3778 mutex_exit(&path->mutex);
3782 3779
3783 3780 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3784 3781 "rsm_sqready_ack_deferred done\n"));
3785 3782 }
3786 3783
3787 3784 /*
3788 3785 * Process the SQREADY message
3789 3786 */
3790 3787 static void
3791 3788 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3792 3789 rsm_intr_hand_arg_t arg)
3793 3790 {
3794 3791 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3795 3792 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3796 3793 path_t *path;
3797 3794 DBG_DEFINE(category,
3798 3795 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3799 3796
3800 3797 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3801 3798
3802 3799 /* look up the path - incr the path refcnt */
3803 3800 path = rsm_find_path(hdlr_argp->adapter_name,
3804 3801 hdlr_argp->adapter_instance, src_hwaddr);
3805 3802
3806 3803 /*
3807 3804 * No path exists or path is not active - drop the message
3808 3805 */
3809 3806 if (path == NULL) {
3810 3807 DBG_PRINTF((category, RSM_DEBUG,
3811 3808 "rsm_proc_sqready done: msg dropped no path\n"));
3812 3809 return;
3813 3810 }
3814 3811
3815 3812 mutex_exit(&path->mutex);
3816 3813
3817 3814 /* drain any tasks from the previous incarnation */
3818 3815 taskq_wait(path->recv_taskq);
3819 3816
3820 3817 mutex_enter(&path->mutex);
3821 3818 /*
3822 3819 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3823 3820 * in the meanwhile we received an SQREADY message, blindly reset
3824 3821 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3825 3822 * and forget about the SQREADY that we sent.
3826 3823 */
3827 3824 path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3828 3825
3829 3826 if (path->state != RSMKA_PATH_ACTIVE) {
3830 3827 /* decr refcnt and drop the mutex */
3831 3828 PATH_RELE_NOLOCK(path);
3832 3829 mutex_exit(&path->mutex);
3833 3830 DBG_PRINTF((category, RSM_DEBUG,
3834 3831 "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3835 3832 return;
3836 3833 }
3837 3834
3838 3835 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3839 3836 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3840 3837
3841 3838 /*
3842 3839 * The sender's local incarnation number is our remote incarnation
3843 3840 * number save it in the path data structure
3844 3841 */
3845 3842 path->remote_incn = msg->rsmipc_local_incn;
3846 3843 path->sendq_token.msgbuf_avail = 0;
3847 3844 path->procmsg_cnt = 0;
3848 3845
3849 3846 /*
3850 3847 * path is active - dispatch task to send SQREADY_ACK - remember
3851 3848 * RSMPI calls can't be done in interrupt context
3852 3849 *
3853 3850 * We can use the recv_taskq to send because the remote endpoint
3854 3851 * cannot start sending messages till it receives SQREADY_ACK hence
3855 3852 * at this point there are no tasks on recv_taskq.
3856 3853 *
3857 3854 * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3858 3855 */
3859 3856 (void) taskq_dispatch(path->recv_taskq,
3860 3857 rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3861 3858
3862 3859 mutex_exit(&path->mutex);
3863 3860
3864 3861
3865 3862 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3866 3863 }
3867 3864
3868 3865 /*
3869 3866 * Process the SQREADY_ACK message
3870 3867 */
3871 3868 static void
3872 3869 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3873 3870 rsm_intr_hand_arg_t arg)
3874 3871 {
3875 3872 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3876 3873 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3877 3874 path_t *path;
3878 3875 DBG_DEFINE(category,
3879 3876 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3880 3877
3881 3878 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3882 3879 "rsm_proc_sqready_ack enter\n"));
3883 3880
3884 3881 /* look up the path - incr the path refcnt */
3885 3882 path = rsm_find_path(hdlr_argp->adapter_name,
3886 3883 hdlr_argp->adapter_instance, src_hwaddr);
3887 3884
3888 3885 /*
3889 3886 * drop the message if - no path exists or path is not active
3890 3887 * or if its not waiting for SQREADY_ACK message
3891 3888 */
3892 3889 if (path == NULL) {
3893 3890 DBG_PRINTF((category, RSM_DEBUG,
3894 3891 "rsm_proc_sqready_ack done: msg dropped no path\n"));
3895 3892 return;
3896 3893 }
3897 3894
3898 3895 if ((path->state != RSMKA_PATH_ACTIVE) ||
3899 3896 !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3900 3897 /* decrement the refcnt */
3901 3898 PATH_RELE_NOLOCK(path);
3902 3899 mutex_exit(&path->mutex);
3903 3900 DBG_PRINTF((category, RSM_DEBUG,
3904 3901 "rsm_proc_sqready_ack done: msg dropped\n"));
3905 3902 return;
3906 3903 }
3907 3904
3908 3905 /*
3909 3906 * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3910 3907 * sent, if not drop it.
3911 3908 */
3912 3909 if (path->local_incn != msghdr->rsmipc_incn) {
3913 3910 /* decrement the refcnt */
3914 3911 PATH_RELE_NOLOCK(path);
3915 3912 mutex_exit(&path->mutex);
3916 3913 DBG_PRINTF((category, RSM_DEBUG,
3917 3914 "rsm_proc_sqready_ack done: msg old incn %lld\n",
3918 3915 msghdr->rsmipc_incn));
3919 3916 return;
3920 3917 }
3921 3918
3922 3919 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3923 3920 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3924 3921
3925 3922 /*
3926 3923 * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3927 3924 */
3928 3925 path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3929 3926
3930 3927 /* save the remote sendq incn number */
3931 3928 path->remote_incn = msg->rsmipc_local_incn;
3932 3929
3933 3930 /* initialize credits to the max level */
3934 3931 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3935 3932
3936 3933 /* wake up any send that is waiting for credits */
3937 3934 cv_broadcast(&path->sendq_token.sendq_cv);
3938 3935
3939 3936 /* decrement the refcnt */
3940 3937 PATH_RELE_NOLOCK(path);
3941 3938
3942 3939 mutex_exit(&path->mutex);
3943 3940
3944 3941 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3945 3942 "rsm_proc_sqready_ack done\n"));
3946 3943 }
3947 3944
3948 3945 /*
3949 3946 * process the RSMIPC_MSG_CREDIT message
3950 3947 */
3951 3948 static void
3952 3949 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3953 3950 rsm_intr_hand_arg_t arg)
3954 3951 {
3955 3952 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3956 3953 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3957 3954 path_t *path;
3958 3955 DBG_DEFINE(category,
3959 3956 RSM_KERNEL_AGENT | RSM_FUNC_ALL |
3960 3957 RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3961 3958
3962 3959 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3963 3960
3964 3961 /* look up the path - incr the path refcnt */
3965 3962 path = rsm_find_path(hdlr_argp->adapter_name,
3966 3963 hdlr_argp->adapter_instance, src_hwaddr);
3967 3964
3968 3965 if (path == NULL) {
3969 3966 DBG_PRINTF((category, RSM_DEBUG,
3970 3967 "rsm_add_credits enter: path not found\n"));
3971 3968 return;
3972 3969 }
3973 3970
3974 3971 /* the path is not active - discard credits */
3975 3972 if (path->state != RSMKA_PATH_ACTIVE) {
3976 3973 PATH_RELE_NOLOCK(path);
3977 3974 mutex_exit(&path->mutex);
3978 3975 DBG_PRINTF((category, RSM_DEBUG,
3979 3976 "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3980 3977 return;
3981 3978 }
3982 3979
3983 3980 /*
3984 3981 * Check if these credits are for current incarnation of the path.
3985 3982 */
3986 3983 if (path->local_incn != msghdr->rsmipc_incn) {
3987 3984 /* decrement the refcnt */
3988 3985 PATH_RELE_NOLOCK(path);
3989 3986 mutex_exit(&path->mutex);
3990 3987 DBG_PRINTF((category, RSM_DEBUG,
3991 3988 "rsm_add_credits enter: old incn %lld\n",
3992 3989 msghdr->rsmipc_incn));
3993 3990 return;
3994 3991 }
3995 3992
3996 3993 DBG_PRINTF((category, RSM_DEBUG,
3997 3994 "rsm_add_credits:path=%lx new-creds=%d "
3998 3995 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
3999 3996 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
4000 3997 src_hwaddr));
4001 3998
4002 3999
4003 4000 /* add credits to the path's sendq */
4004 4001 path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4005 4002
4006 4003 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4007 4004
4008 4005 /* wake up any send that is waiting for credits */
4009 4006 cv_broadcast(&path->sendq_token.sendq_cv);
4010 4007
4011 4008 /* decrement the refcnt */
4012 4009 PATH_RELE_NOLOCK(path);
4013 4010
4014 4011 mutex_exit(&path->mutex);
4015 4012
4016 4013 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4017 4014 }
4018 4015
4019 4016 static void
4020 4017 rsm_intr_event(rsmipc_request_t *msg)
4021 4018 {
4022 4019 rsmseg_t *seg;
4023 4020 rsmresource_t *p;
4024 4021 rsm_node_id_t src_node;
4025 4022 DBG_DEFINE(category,
4026 4023 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4027 4024
4028 4025 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4029 4026
4030 4027 src_node = msg->rsmipc_hdr.rsmipc_src;
4031 4028
4032 4029 if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4033 4030 /* This is for an import segment */
4034 4031 uint_t hashval = rsmhash(msg->rsmipc_key);
4035 4032
4036 4033 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4037 4034
4038 4035 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4039 4036
4040 4037 for (; p; p = p->rsmrc_next) {
4041 4038 if ((p->rsmrc_key == msg->rsmipc_key) &&
4042 4039 (p->rsmrc_node == src_node)) {
4043 4040 seg = (rsmseg_t *)p;
4044 4041 rsmseglock_acquire(seg);
4045 4042
4046 4043 atomic_inc_32(&seg->s_pollevent);
4047 4044
4048 4045 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4049 4046 pollwakeup(&seg->s_poll, POLLRDNORM);
4050 4047
4051 4048 rsmseglock_release(seg);
4052 4049 }
4053 4050 }
4054 4051
4055 4052 rw_exit(&rsm_import_segs.rsmhash_rw);
4056 4053 } else {
4057 4054 /* This is for an export segment */
4058 4055 seg = rsmexport_lookup(msg->rsmipc_key);
4059 4056 if (!seg) {
4060 4057 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4061 4058 "rsm_intr_event done: exp seg not found\n"));
4062 4059 return;
4063 4060 }
4064 4061
4065 4062 ASSERT(rsmseglock_held(seg));
4066 4063
4067 4064 atomic_inc_32(&seg->s_pollevent);
4068 4065
4069 4066 /*
4070 4067 * We must hold the segment lock here, or else the segment
4071 4068 * can be freed while pollwakeup is using it. This implies
4072 4069 * that we MUST NOT grab the segment lock during rsm_chpoll,
4073 4070 * as outlined in the chpoll(2) man page.
4074 4071 */
4075 4072 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4076 4073 pollwakeup(&seg->s_poll, POLLRDNORM);
4077 4074
4078 4075 rsmseglock_release(seg);
4079 4076 }
4080 4077
4081 4078 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4082 4079 }
4083 4080
4084 4081 /*
4085 4082 * The exporter did a republish and changed the ACL - this change is only
4086 4083 * visible to new importers.
4087 4084 */
4088 4085 static void
4089 4086 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4090 4087 rsm_permission_t perm)
4091 4088 {
4092 4089
4093 4090 rsmresource_t *p;
4094 4091 rsmseg_t *seg;
4095 4092 uint_t hashval = rsmhash(key);
4096 4093 DBG_DEFINE(category,
4097 4094 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4098 4095
4099 4096 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4100 4097
4101 4098 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4102 4099
4103 4100 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4104 4101
4105 4102 for (; p; p = p->rsmrc_next) {
4106 4103 /*
4107 4104 * find the importer and update the permission in the shared
4108 4105 * data structure. Any new importers will use the new perms
4109 4106 */
4110 4107 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4111 4108 seg = (rsmseg_t *)p;
4112 4109
4113 4110 rsmseglock_acquire(seg);
4114 4111 rsmsharelock_acquire(seg);
4115 4112 seg->s_share->rsmsi_mode = perm;
4116 4113 rsmsharelock_release(seg);
4117 4114 rsmseglock_release(seg);
4118 4115
4119 4116 break;
4120 4117 }
4121 4118 }
4122 4119
4123 4120 rw_exit(&rsm_import_segs.rsmhash_rw);
4124 4121
4125 4122 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4126 4123 }
4127 4124
4128 4125 void
4129 4126 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4130 4127 {
4131 4128 int done = 1; /* indicate all SUSPENDS have been acked */
4132 4129 list_element_t *elem;
4133 4130 DBG_DEFINE(category,
4134 4131 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4135 4132
4136 4133 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4137 4134 "rsm_suspend_complete enter\n"));
4138 4135
4139 4136 mutex_enter(&rsm_suspend_list.list_lock);
4140 4137
4141 4138 if (rsm_suspend_list.list_head == NULL) {
4142 4139 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4143 4140 "rsm_suspend_complete done: suspend_list is empty\n"));
4144 4141 mutex_exit(&rsm_suspend_list.list_lock);
4145 4142 return;
4146 4143 }
4147 4144
4148 4145 elem = rsm_suspend_list.list_head;
4149 4146 while (elem != NULL) {
4150 4147 if (elem->nodeid == src_node) {
4151 4148 /* clear the pending flag for the node */
4152 4149 elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4153 4150 elem->flags |= flag;
4154 4151 }
4155 4152
4156 4153 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4157 4154 done = 0; /* still some nodes have not yet ACKED */
4158 4155
4159 4156 elem = elem->next;
4160 4157 }
4161 4158
4162 4159 mutex_exit(&rsm_suspend_list.list_lock);
4163 4160
4164 4161 if (!done) {
4165 4162 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4166 4163 "rsm_suspend_complete done: acks pending\n"));
4167 4164 return;
4168 4165 }
4169 4166 /*
4170 4167 * Now that we are done with suspending all the remote importers
4171 4168 * time to quiesce the local exporters
4172 4169 */
4173 4170 exporter_quiesce();
4174 4171
4175 4172 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4176 4173 "rsm_suspend_complete done\n"));
4177 4174 }
4178 4175
4179 4176 static void
4180 4177 exporter_quiesce()
4181 4178 {
4182 4179 int i, e;
4183 4180 rsmresource_t *current;
4184 4181 rsmseg_t *seg;
4185 4182 adapter_t *adapter;
4186 4183 DBG_DEFINE(category,
4187 4184 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4188 4185
4189 4186 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4190 4187 /*
4191 4188 * The importers send a SUSPEND_COMPLETE to the exporter node
4192 4189 * Unpublish, unbind the export segment and
4193 4190 * move the segments to the EXPORT_QUIESCED state
4194 4191 */
4195 4192
4196 4193 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4197 4194
4198 4195 for (i = 0; i < rsm_hash_size; i++) {
4199 4196 current = rsm_export_segs.bucket[i];
4200 4197 while (current != NULL) {
4201 4198 seg = (rsmseg_t *)current;
4202 4199 rsmseglock_acquire(seg);
4203 4200 if (current->rsmrc_state ==
4204 4201 RSM_STATE_EXPORT_QUIESCING) {
4205 4202 adapter = seg->s_adapter;
4206 4203 /*
4207 4204 * some local memory handles are not published
4208 4205 * check if it was published
4209 4206 */
4210 4207 if ((seg->s_acl == NULL) ||
4211 4208 (seg->s_acl[0].ae_node != my_nodeid) ||
4212 4209 (seg->s_acl[0].ae_permission != 0)) {
4213 4210
4214 4211 e = adapter->rsmpi_ops->rsm_unpublish(
4215 4212 seg->s_handle.out);
4216 4213 DBG_PRINTF((category, RSM_DEBUG,
4217 4214 "exporter_quiesce:unpub %d\n", e));
4218 4215
4219 4216 e = adapter->rsmpi_ops->rsm_seg_destroy(
4220 4217 seg->s_handle.out);
4221 4218
4222 4219 DBG_PRINTF((category, RSM_DEBUG,
4223 4220 "exporter_quiesce:destroy %d\n",
4224 4221 e));
4225 4222 }
4226 4223
4227 4224 (void) rsm_unbind_pages(seg);
4228 4225 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4229 4226 cv_broadcast(&seg->s_cv);
4230 4227 }
4231 4228 rsmseglock_release(seg);
4232 4229 current = current->rsmrc_next;
4233 4230 }
4234 4231 }
4235 4232 rw_exit(&rsm_export_segs.rsmhash_rw);
4236 4233
4237 4234 /*
4238 4235 * All the local segments we are done with the pre-del processing
4239 4236 * - time to move to PREDEL_COMPLETED.
4240 4237 */
4241 4238
4242 4239 mutex_enter(&rsm_drv_data.drv_lock);
4243 4240
4244 4241 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4245 4242
4246 4243 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4247 4244
4248 4245 cv_broadcast(&rsm_drv_data.drv_cv);
4249 4246
4250 4247 mutex_exit(&rsm_drv_data.drv_lock);
4251 4248
4252 4249 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4253 4250 }
4254 4251
4255 4252 static void
4256 4253 importer_suspend(rsm_node_id_t src_node)
4257 4254 {
4258 4255 int i;
4259 4256 int susp_flg; /* true means already suspended */
4260 4257 int num_importers;
4261 4258 rsmresource_t *p = NULL, *curp;
4262 4259 rsmhash_table_t *rhash = &rsm_import_segs;
4263 4260 rsmseg_t *seg;
4264 4261 rsmipc_request_t request;
4265 4262 DBG_DEFINE(category,
4266 4263 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4267 4264
4268 4265 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4269 4266
4270 4267 rw_enter(&rhash->rsmhash_rw, RW_READER);
4271 4268 for (i = 0; i < rsm_hash_size; i++) {
4272 4269 p = rhash->bucket[i];
4273 4270
4274 4271 /*
4275 4272 * Suspend all importers with same <node, key> pair.
4276 4273 * After the last one of the shared importers has been
4277 4274 * suspended - suspend the shared mappings/connection.
4278 4275 */
4279 4276 for (; p; p = p->rsmrc_next) {
4280 4277 rsmseg_t *first = (rsmseg_t *)p;
4281 4278 if ((first->s_node != src_node) ||
4282 4279 (first->s_state == RSM_STATE_DISCONNECT))
4283 4280 continue; /* go to next entry */
4284 4281 /*
4285 4282 * search the rest of the bucket for
4286 4283 * other siblings (imprtrs with the same key)
4287 4284 * of "first" and suspend them.
4288 4285 * All importers with same key fall in
4289 4286 * the same bucket.
4290 4287 */
4291 4288 num_importers = 0;
4292 4289 for (curp = p; curp; curp = curp->rsmrc_next) {
4293 4290 seg = (rsmseg_t *)curp;
4294 4291
4295 4292 rsmseglock_acquire(seg);
4296 4293
4297 4294 if ((seg->s_node != first->s_node) ||
4298 4295 (seg->s_key != first->s_key) ||
4299 4296 (seg->s_state == RSM_STATE_DISCONNECT)) {
4300 4297 /*
4301 4298 * either not a peer segment or its a
4302 4299 * disconnected segment - skip it
4303 4300 */
4304 4301 rsmseglock_release(seg);
4305 4302 continue;
4306 4303 }
4307 4304
4308 4305 rsmseg_suspend(seg, &susp_flg);
4309 4306
4310 4307 if (susp_flg) { /* seg already suspended */
4311 4308 rsmseglock_release(seg);
4312 4309 break; /* the inner for loop */
4313 4310 }
4314 4311
4315 4312 num_importers++;
4316 4313 rsmsharelock_acquire(seg);
4317 4314 /*
4318 4315 * we've processed all importers that are
4319 4316 * siblings of "first"
4320 4317 */
4321 4318 if (num_importers ==
4322 4319 seg->s_share->rsmsi_refcnt) {
4323 4320 rsmsharelock_release(seg);
4324 4321 rsmseglock_release(seg);
4325 4322 break;
4326 4323 }
4327 4324 rsmsharelock_release(seg);
4328 4325 rsmseglock_release(seg);
4329 4326 }
4330 4327
4331 4328 /*
4332 4329 * All the importers with the same key and
4333 4330 * nodeid as "first" have been suspended.
4334 4331 * Now suspend the shared connect/mapping.
4335 4332 * This is done only once.
4336 4333 */
4337 4334 if (!susp_flg) {
4338 4335 rsmsegshare_suspend(seg);
4339 4336 }
4340 4337 }
4341 4338 }
4342 4339
4343 4340 rw_exit(&rhash->rsmhash_rw);
4344 4341
4345 4342 /* send an ACK for SUSPEND message */
4346 4343 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4347 4344 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4348 4345
4349 4346
4350 4347 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4351 4348
4352 4349 }
4353 4350
4354 4351 static void
4355 4352 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4356 4353 {
4357 4354 int recheck_state;
4358 4355 rsmcookie_t *hdl;
4359 4356 DBG_DEFINE(category,
4360 4357 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4361 4358
4362 4359 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4363 4360 "rsmseg_suspend enter: key=%u\n", seg->s_key));
4364 4361
4365 4362 *susp_flg = 0;
4366 4363
4367 4364 ASSERT(rsmseglock_held(seg));
4368 4365 /* wait if putv/getv is in progress */
4369 4366 while (seg->s_rdmacnt > 0)
4370 4367 cv_wait(&seg->s_cv, &seg->s_lock);
4371 4368
4372 4369 do {
4373 4370 recheck_state = 0;
4374 4371
4375 4372 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4376 4373 "rsmseg_suspend:segment %x state=%d\n",
4377 4374 seg->s_key, seg->s_state));
4378 4375
4379 4376 switch (seg->s_state) {
4380 4377 case RSM_STATE_NEW:
4381 4378 /* not a valid state */
4382 4379 break;
4383 4380 case RSM_STATE_CONNECTING:
4384 4381 seg->s_state = RSM_STATE_ABORT_CONNECT;
4385 4382 break;
4386 4383 case RSM_STATE_ABORT_CONNECT:
4387 4384 break;
4388 4385 case RSM_STATE_CONNECT:
4389 4386 seg->s_handle.in = NULL;
4390 4387 seg->s_state = RSM_STATE_CONN_QUIESCE;
4391 4388 break;
4392 4389 case RSM_STATE_MAPPING:
4393 4390 /* wait until segment leaves the mapping state */
4394 4391 while (seg->s_state == RSM_STATE_MAPPING)
4395 4392 cv_wait(&seg->s_cv, &seg->s_lock);
4396 4393 recheck_state = 1;
4397 4394 break;
4398 4395 case RSM_STATE_ACTIVE:
4399 4396 /* unload the mappings */
4400 4397 if (seg->s_ckl != NULL) {
4401 4398 hdl = seg->s_ckl;
4402 4399 for (; hdl != NULL; hdl = hdl->c_next) {
4403 4400 (void) devmap_unload(hdl->c_dhp,
4404 4401 hdl->c_off, hdl->c_len);
4405 4402 }
4406 4403 }
4407 4404 seg->s_mapinfo = NULL;
4408 4405 seg->s_state = RSM_STATE_MAP_QUIESCE;
4409 4406 break;
4410 4407 case RSM_STATE_CONN_QUIESCE:
4411 4408 /* FALLTHRU */
4412 4409 case RSM_STATE_MAP_QUIESCE:
4413 4410 /* rsmseg_suspend already done for seg */
4414 4411 *susp_flg = 1;
4415 4412 break;
4416 4413 case RSM_STATE_DISCONNECT:
4417 4414 break;
4418 4415 default:
4419 4416 ASSERT(0); /* invalid state */
4420 4417 }
4421 4418 } while (recheck_state);
4422 4419
4423 4420 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4424 4421 }
4425 4422
4426 4423 static void
4427 4424 rsmsegshare_suspend(rsmseg_t *seg)
4428 4425 {
4429 4426 int e;
4430 4427 adapter_t *adapter;
4431 4428 rsm_import_share_t *sharedp;
4432 4429 DBG_DEFINE(category,
4433 4430 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4434 4431
4435 4432 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4436 4433 "rsmsegshare_suspend enter\n"));
4437 4434
4438 4435 rsmseglock_acquire(seg);
4439 4436 rsmsharelock_acquire(seg);
4440 4437
4441 4438 sharedp = seg->s_share;
4442 4439 adapter = seg->s_adapter;
4443 4440 switch (sharedp->rsmsi_state) {
4444 4441 case RSMSI_STATE_NEW:
4445 4442 break;
4446 4443 case RSMSI_STATE_CONNECTING:
4447 4444 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4448 4445 break;
4449 4446 case RSMSI_STATE_ABORT_CONNECT:
4450 4447 break;
4451 4448 case RSMSI_STATE_CONNECTED:
4452 4449 /* do the rsmpi disconnect */
4453 4450 if (sharedp->rsmsi_node != my_nodeid) {
4454 4451 e = adapter->rsmpi_ops->
4455 4452 rsm_disconnect(sharedp->rsmsi_handle);
4456 4453
4457 4454 DBG_PRINTF((category, RSM_DEBUG,
4458 4455 "rsm:rsmpi disconnect seg=%x:err=%d\n",
4459 4456 sharedp->rsmsi_segid, e));
4460 4457 }
4461 4458
4462 4459 sharedp->rsmsi_handle = NULL;
4463 4460
4464 4461 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4465 4462 break;
4466 4463 case RSMSI_STATE_CONN_QUIESCE:
4467 4464 break;
4468 4465 case RSMSI_STATE_MAPPED:
4469 4466 /* do the rsmpi unmap and disconnect */
4470 4467 if (sharedp->rsmsi_node != my_nodeid) {
4471 4468 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4472 4469
4473 4470 DBG_PRINTF((category, RSM_DEBUG,
4474 4471 "rsmshare_suspend: rsmpi unmap %d\n", e));
4475 4472
4476 4473 e = adapter->rsmpi_ops->
4477 4474 rsm_disconnect(sharedp->rsmsi_handle);
4478 4475 DBG_PRINTF((category, RSM_DEBUG,
4479 4476 "rsm:rsmpi disconnect seg=%x:err=%d\n",
4480 4477 sharedp->rsmsi_segid, e));
4481 4478 }
4482 4479
4483 4480 sharedp->rsmsi_handle = NULL;
4484 4481
4485 4482 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4486 4483 break;
4487 4484 case RSMSI_STATE_MAP_QUIESCE:
4488 4485 break;
4489 4486 case RSMSI_STATE_DISCONNECTED:
4490 4487 break;
4491 4488 default:
4492 4489 ASSERT(0); /* invalid state */
4493 4490 }
4494 4491
4495 4492 rsmsharelock_release(seg);
4496 4493 rsmseglock_release(seg);
4497 4494
4498 4495 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4499 4496 "rsmsegshare_suspend done\n"));
4500 4497 }
4501 4498
4502 4499 /*
4503 4500 * This should get called on receiving a RESUME message or from
4504 4501 * the pathmanger if the node undergoing DR dies.
4505 4502 */
4506 4503 static void
4507 4504 importer_resume(rsm_node_id_t src_node)
4508 4505 {
4509 4506 int i;
4510 4507 rsmresource_t *p = NULL;
4511 4508 rsmhash_table_t *rhash = &rsm_import_segs;
4512 4509 void *cookie;
4513 4510 DBG_DEFINE(category,
4514 4511 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4515 4512
4516 4513 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4517 4514
4518 4515 rw_enter(&rhash->rsmhash_rw, RW_READER);
4519 4516
4520 4517 for (i = 0; i < rsm_hash_size; i++) {
4521 4518 p = rhash->bucket[i];
4522 4519
4523 4520 for (; p; p = p->rsmrc_next) {
4524 4521 rsmseg_t *seg = (rsmseg_t *)p;
4525 4522
4526 4523 rsmseglock_acquire(seg);
4527 4524
4528 4525 /* process only importers of node undergoing DR */
4529 4526 if (seg->s_node != src_node) {
4530 4527 rsmseglock_release(seg);
4531 4528 continue;
4532 4529 }
4533 4530
4534 4531 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4535 4532 rsmipc_request_t request;
4536 4533 /*
4537 4534 * rsmpi map/connect failed
4538 4535 * inform the exporter so that it can
4539 4536 * remove the importer.
4540 4537 */
4541 4538 request.rsmipc_hdr.rsmipc_type =
4542 4539 RSMIPC_MSG_NOTIMPORTING;
4543 4540 request.rsmipc_key = seg->s_segid;
4544 4541 request.rsmipc_segment_cookie = cookie;
4545 4542 rsmseglock_release(seg);
4546 4543 (void) rsmipc_send(seg->s_node, &request,
4547 4544 RSM_NO_REPLY);
4548 4545 } else {
4549 4546 rsmseglock_release(seg);
4550 4547 }
4551 4548 }
4552 4549 }
4553 4550
4554 4551 rw_exit(&rhash->rsmhash_rw);
4555 4552
4556 4553 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4557 4554 }
4558 4555
4559 4556 static int
4560 4557 rsmseg_resume(rsmseg_t *seg, void **cookie)
4561 4558 {
4562 4559 int e;
4563 4560 int retc;
4564 4561 off_t dev_offset;
4565 4562 size_t maplen;
4566 4563 uint_t maxprot;
4567 4564 rsm_mapinfo_t *p;
4568 4565 rsmcookie_t *hdl;
4569 4566 rsm_import_share_t *sharedp;
4570 4567 DBG_DEFINE(category,
4571 4568 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4572 4569
4573 4570 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4574 4571 "rsmseg_resume enter: key=%u\n", seg->s_key));
4575 4572
4576 4573 *cookie = NULL;
4577 4574
4578 4575 ASSERT(rsmseglock_held(seg));
4579 4576
4580 4577 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4581 4578 (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4582 4579 return (RSM_SUCCESS);
4583 4580 }
4584 4581
4585 4582 sharedp = seg->s_share;
4586 4583
4587 4584 rsmsharelock_acquire(seg);
4588 4585
4589 4586 /* resume the shared connection and/or mapping */
4590 4587 retc = rsmsegshare_resume(seg);
4591 4588
4592 4589 if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4593 4590 /* shared state can either be connected or mapped */
4594 4591 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4595 4592 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4596 4593 ASSERT(retc == RSM_SUCCESS);
4597 4594 seg->s_handle.in = sharedp->rsmsi_handle;
4598 4595 rsmsharelock_release(seg);
4599 4596 seg->s_state = RSM_STATE_CONNECT;
4600 4597
4601 4598 } else { /* error in rsmpi connect during resume */
4602 4599 seg->s_handle.in = NULL;
4603 4600 seg->s_state = RSM_STATE_DISCONNECT;
4604 4601
4605 4602 sharedp->rsmsi_refcnt--;
4606 4603 cookie = (void *)sharedp->rsmsi_cookie;
4607 4604
4608 4605 if (sharedp->rsmsi_refcnt == 0) {
4609 4606 ASSERT(sharedp->rsmsi_mapcnt == 0);
4610 4607 rsmsharelock_release(seg);
4611 4608
4612 4609 /* clean up the shared data structure */
4613 4610 mutex_destroy(&sharedp->rsmsi_lock);
4614 4611 cv_destroy(&sharedp->rsmsi_cv);
4615 4612 kmem_free((void *)(sharedp),
4616 4613 sizeof (rsm_import_share_t));
4617 4614
4618 4615 } else {
4619 4616 rsmsharelock_release(seg);
4620 4617 }
4621 4618 /*
4622 4619 * The following needs to be done after any
4623 4620 * rsmsharelock calls which use seg->s_share.
4624 4621 */
4625 4622 seg->s_share = NULL;
4626 4623 }
4627 4624
4628 4625 /* signal any waiting segment */
4629 4626 cv_broadcast(&seg->s_cv);
4630 4627
4631 4628 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4632 4629 "rsmseg_resume done:state=%d\n", seg->s_state));
4633 4630 return (retc);
4634 4631 }
4635 4632
4636 4633 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4637 4634
4638 4635 /* Setup protections for remap */
4639 4636 maxprot = PROT_USER;
4640 4637 if (seg->s_mode & RSM_PERM_READ) {
4641 4638 maxprot |= PROT_READ;
4642 4639 }
4643 4640 if (seg->s_mode & RSM_PERM_WRITE) {
4644 4641 maxprot |= PROT_WRITE;
4645 4642 }
4646 4643
4647 4644 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4648 4645 /* error in rsmpi connect or map during resume */
4649 4646
4650 4647 /* remap to trash page */
4651 4648 ASSERT(seg->s_ckl != NULL);
4652 4649
4653 4650 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4654 4651 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4655 4652 remap_cookie, hdl->c_off, hdl->c_len,
4656 4653 maxprot, 0, NULL);
4657 4654
4658 4655 DBG_PRINTF((category, RSM_ERR,
4659 4656 "rsmseg_resume:remap=%d\n", e));
4660 4657 }
4661 4658
4662 4659 seg->s_handle.in = NULL;
4663 4660 seg->s_state = RSM_STATE_DISCONNECT;
4664 4661
4665 4662 sharedp->rsmsi_refcnt--;
4666 4663
4667 4664 sharedp->rsmsi_mapcnt--;
4668 4665 seg->s_mapinfo = NULL;
4669 4666
4670 4667 if (sharedp->rsmsi_refcnt == 0) {
4671 4668 ASSERT(sharedp->rsmsi_mapcnt == 0);
4672 4669 rsmsharelock_release(seg);
4673 4670
4674 4671 /* clean up the shared data structure */
4675 4672 mutex_destroy(&sharedp->rsmsi_lock);
4676 4673 cv_destroy(&sharedp->rsmsi_cv);
4677 4674 kmem_free((void *)(sharedp),
4678 4675 sizeof (rsm_import_share_t));
4679 4676
4680 4677 } else {
4681 4678 rsmsharelock_release(seg);
4682 4679 }
4683 4680 /*
4684 4681 * The following needs to be done after any
4685 4682 * rsmsharelock calls which use seg->s_share.
4686 4683 */
4687 4684 seg->s_share = NULL;
4688 4685
4689 4686 /* signal any waiting segment */
4690 4687 cv_broadcast(&seg->s_cv);
4691 4688
4692 4689 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4693 4690 "rsmseg_resume done:seg=%x,err=%d\n",
4694 4691 seg->s_key, retc));
4695 4692 return (retc);
4696 4693
4697 4694 }
4698 4695
4699 4696 seg->s_handle.in = sharedp->rsmsi_handle;
4700 4697
4701 4698 if (seg->s_node == my_nodeid) { /* loopback */
4702 4699 ASSERT(seg->s_mapinfo == NULL);
4703 4700
4704 4701 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4705 4702 e = devmap_umem_remap(hdl->c_dhp,
4706 4703 rsm_dip, seg->s_cookie,
4707 4704 hdl->c_off, hdl->c_len,
4708 4705 maxprot, 0, NULL);
4709 4706
4710 4707 DBG_PRINTF((category, RSM_ERR,
4711 4708 "rsmseg_resume:remap=%d\n", e));
4712 4709 }
4713 4710 } else { /* remote exporter */
4714 4711 /* remap to the new rsmpi maps */
4715 4712 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4716 4713
4717 4714 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4718 4715 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4719 4716 &dev_offset, &maplen);
4720 4717 e = devmap_devmem_remap(hdl->c_dhp,
4721 4718 p->dip, p->dev_register, dev_offset,
4722 4719 maplen, maxprot, 0, NULL);
4723 4720
4724 4721 DBG_PRINTF((category, RSM_ERR,
4725 4722 "rsmseg_resume:remap=%d\n", e));
4726 4723 }
4727 4724 }
4728 4725
4729 4726 rsmsharelock_release(seg);
4730 4727
4731 4728 seg->s_state = RSM_STATE_ACTIVE;
4732 4729 cv_broadcast(&seg->s_cv);
4733 4730
4734 4731 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4735 4732
4736 4733 return (retc);
4737 4734 }
4738 4735
4739 4736 static int
4740 4737 rsmsegshare_resume(rsmseg_t *seg)
4741 4738 {
4742 4739 int e = RSM_SUCCESS;
4743 4740 adapter_t *adapter;
4744 4741 rsm_import_share_t *sharedp;
4745 4742 DBG_DEFINE(category,
4746 4743 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4747 4744
4748 4745 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4749 4746
4750 4747 ASSERT(rsmseglock_held(seg));
4751 4748 ASSERT(rsmsharelock_held(seg));
4752 4749
4753 4750 sharedp = seg->s_share;
4754 4751
4755 4752 /*
4756 4753 * If we are not in a xxxx_QUIESCE state that means shared
4757 4754 * connect/mapping processing has been already been done
4758 4755 * so return success.
4759 4756 */
4760 4757 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4761 4758 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4762 4759 return (RSM_SUCCESS);
4763 4760 }
4764 4761
4765 4762 adapter = seg->s_adapter;
4766 4763
4767 4764 if (sharedp->rsmsi_node != my_nodeid) {
4768 4765 rsm_addr_t hwaddr;
4769 4766 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4770 4767
4771 4768 e = adapter->rsmpi_ops->rsm_connect(
4772 4769 adapter->rsmpi_handle, hwaddr,
4773 4770 sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4774 4771
4775 4772 DBG_PRINTF((category, RSM_DEBUG,
4776 4773 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4777 4774 sharedp->rsmsi_segid, e));
4778 4775
4779 4776 if (e != RSM_SUCCESS) {
4780 4777 /* when do we send the NOT_IMPORTING message */
4781 4778 sharedp->rsmsi_handle = NULL;
4782 4779 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4783 4780 /* signal any waiting segment */
4784 4781 cv_broadcast(&sharedp->rsmsi_cv);
4785 4782 return (e);
4786 4783 }
4787 4784 }
4788 4785
4789 4786 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4790 4787 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4791 4788 /* signal any waiting segment */
4792 4789 cv_broadcast(&sharedp->rsmsi_cv);
4793 4790 return (e);
4794 4791 }
4795 4792
4796 4793 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4797 4794
4798 4795 /* do the rsmpi map of the whole segment here */
4799 4796 if (sharedp->rsmsi_node != my_nodeid) {
4800 4797 size_t mapped_len;
4801 4798 rsm_mapinfo_t *p;
4802 4799
4803 4800 /*
4804 4801 * We need to do rsmpi maps with <off, lens> identical to
4805 4802 * the old mapinfo list because the segment mapping handles
4806 4803 * dhp and such need the fragmentation of rsmpi maps to be
4807 4804 * identical to what it was during the mmap of the segment
4808 4805 */
4809 4806 p = sharedp->rsmsi_mapinfo;
4810 4807
4811 4808 while (p != NULL) {
4812 4809 mapped_len = 0;
4813 4810
4814 4811 e = adapter->rsmpi_ops->rsm_map(
4815 4812 sharedp->rsmsi_handle, p->start_offset,
4816 4813 p->individual_len, &mapped_len,
4817 4814 &p->dip, &p->dev_register, &p->dev_offset,
4818 4815 NULL, NULL);
4819 4816
4820 4817 if (e != 0) {
4821 4818 DBG_PRINTF((category, RSM_ERR,
4822 4819 "rsmsegshare_resume: rsmpi map err=%d\n",
4823 4820 e));
4824 4821 break;
4825 4822 }
4826 4823
4827 4824 if (mapped_len != p->individual_len) {
4828 4825 DBG_PRINTF((category, RSM_ERR,
4829 4826 "rsmsegshare_resume: rsmpi maplen"
4830 4827 "< reqlen=%lx\n", mapped_len));
4831 4828 e = RSMERR_BAD_LENGTH;
4832 4829 break;
4833 4830 }
4834 4831
4835 4832 p = p->next;
4836 4833
4837 4834 }
4838 4835
4839 4836
4840 4837 if (e != RSM_SUCCESS) { /* rsmpi map failed */
4841 4838 int err;
4842 4839 /* Check if this is the first rsm_map */
4843 4840 if (p != sharedp->rsmsi_mapinfo) {
4844 4841 /*
4845 4842 * A single rsm_unmap undoes multiple rsm_maps.
4846 4843 */
4847 4844 (void) seg->s_adapter->rsmpi_ops->
4848 4845 rsm_unmap(sharedp->rsmsi_handle);
4849 4846 }
4850 4847
4851 4848 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4852 4849 sharedp->rsmsi_mapinfo = NULL;
4853 4850
4854 4851 err = adapter->rsmpi_ops->
4855 4852 rsm_disconnect(sharedp->rsmsi_handle);
4856 4853
4857 4854 DBG_PRINTF((category, RSM_DEBUG,
4858 4855 "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4859 4856 sharedp->rsmsi_segid, err));
4860 4857
4861 4858 sharedp->rsmsi_handle = NULL;
4862 4859 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4863 4860
4864 4861 /* signal the waiting segments */
4865 4862 cv_broadcast(&sharedp->rsmsi_cv);
4866 4863 DBG_PRINTF((category, RSM_DEBUG,
4867 4864 "rsmsegshare_resume done: rsmpi map err\n"));
4868 4865 return (e);
4869 4866 }
4870 4867 }
4871 4868
4872 4869 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4873 4870
4874 4871 /* signal any waiting segment */
4875 4872 cv_broadcast(&sharedp->rsmsi_cv);
4876 4873
4877 4874 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4878 4875
4879 4876 return (e);
4880 4877 }
4881 4878
4882 4879 /*
4883 4880 * this is the routine that gets called by recv_taskq which is the
4884 4881 * thread that processes messages that are flow-controlled.
4885 4882 */
4886 4883 static void
4887 4884 rsm_intr_proc_deferred(void *arg)
4888 4885 {
4889 4886 path_t *path = (path_t *)arg;
4890 4887 rsmipc_request_t *msg;
4891 4888 rsmipc_msghdr_t *msghdr;
4892 4889 rsm_node_id_t src_node;
4893 4890 msgbuf_elem_t *head;
4894 4891 int e;
4895 4892 DBG_DEFINE(category,
4896 4893 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4897 4894
4898 4895 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4899 4896 "rsm_intr_proc_deferred enter\n"));
4900 4897
4901 4898 mutex_enter(&path->mutex);
4902 4899
4903 4900 /* use the head of the msgbuf_queue */
4904 4901 head = rsmka_gethead_msgbuf(path);
4905 4902
4906 4903 mutex_exit(&path->mutex);
4907 4904
4908 4905 msg = (rsmipc_request_t *)&(head->msg);
4909 4906 msghdr = (rsmipc_msghdr_t *)msg;
4910 4907
4911 4908 src_node = msghdr->rsmipc_src;
4912 4909
4913 4910 /*
4914 4911 * messages that need to send a reply should check the message version
4915 4912 * before processing the message. And all messages that need to
4916 4913 * send a reply should be processed here by the worker thread.
4917 4914 */
4918 4915 switch (msghdr->rsmipc_type) {
4919 4916 case RSMIPC_MSG_SEGCONNECT:
4920 4917 if (msghdr->rsmipc_version != RSM_VERSION) {
4921 4918 rsmipc_reply_t reply;
4922 4919 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4923 4920 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4924 4921 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4925 4922 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4926 4923 } else {
4927 4924 rsm_intr_segconnect(src_node, msg);
4928 4925 }
4929 4926 break;
4930 4927 case RSMIPC_MSG_DISCONNECT:
4931 4928 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4932 4929 break;
4933 4930 case RSMIPC_MSG_SUSPEND:
4934 4931 importer_suspend(src_node);
4935 4932 break;
4936 4933 case RSMIPC_MSG_SUSPEND_DONE:
4937 4934 rsm_suspend_complete(src_node, 0);
4938 4935 break;
4939 4936 case RSMIPC_MSG_RESUME:
4940 4937 importer_resume(src_node);
4941 4938 break;
4942 4939 default:
4943 4940 ASSERT(0);
4944 4941 }
4945 4942
4946 4943 mutex_enter(&path->mutex);
4947 4944
4948 4945 rsmka_dequeue_msgbuf(path);
4949 4946
4950 4947 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4951 4948 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4952 4949 path->procmsg_cnt++;
4953 4950
4954 4951 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4955 4952
4956 4953 /* No need to send credits if path is going down */
4957 4954 if ((path->state == RSMKA_PATH_ACTIVE) &&
4958 4955 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4959 4956 /*
4960 4957 * send credits and reset procmsg_cnt if success otherwise
4961 4958 * credits will be sent after processing the next message
4962 4959 */
4963 4960 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4964 4961 if (e == 0)
4965 4962 path->procmsg_cnt = 0;
4966 4963 else
4967 4964 DBG_PRINTF((category, RSM_ERR,
4968 4965 "rsm_intr_proc_deferred:send credits err=%d\n", e));
4969 4966 }
4970 4967
4971 4968 /*
4972 4969 * decrement the path refcnt since we incremented it in
4973 4970 * rsm_intr_callback_dispatch
4974 4971 */
4975 4972 PATH_RELE_NOLOCK(path);
4976 4973
4977 4974 mutex_exit(&path->mutex);
4978 4975
4979 4976 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4980 4977 "rsm_intr_proc_deferred done\n"));
4981 4978 }
4982 4979
4983 4980 /*
4984 4981 * Flow-controlled messages are enqueued and dispatched onto a taskq here
4985 4982 */
4986 4983 static void
4987 4984 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4988 4985 rsm_intr_hand_arg_t arg)
4989 4986 {
4990 4987 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
4991 4988 path_t *path;
4992 4989 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4993 4990 DBG_DEFINE(category,
4994 4991 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4995 4992
4996 4993 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4997 4994 "rsm_intr_callback_dispatch enter\n"));
4998 4995 ASSERT(data && hdlr_argp);
4999 4996
5000 4997 /* look up the path - incr the path refcnt */
5001 4998 path = rsm_find_path(hdlr_argp->adapter_name,
5002 4999 hdlr_argp->adapter_instance, src_hwaddr);
5003 5000
5004 5001 /* the path has been removed - drop this message */
5005 5002 if (path == NULL) {
5006 5003 DBG_PRINTF((category, RSM_DEBUG,
5007 5004 "rsm_intr_callback_dispatch done: msg dropped\n"));
5008 5005 return;
5009 5006 }
5010 5007 /* the path is not active - don't accept new messages */
5011 5008 if (path->state != RSMKA_PATH_ACTIVE) {
5012 5009 PATH_RELE_NOLOCK(path);
5013 5010 mutex_exit(&path->mutex);
5014 5011 DBG_PRINTF((category, RSM_DEBUG,
5015 5012 "rsm_intr_callback_dispatch done: msg dropped"
5016 5013 " path=%lx !ACTIVE\n", path));
5017 5014 return;
5018 5015 }
5019 5016
5020 5017 /*
5021 5018 * Check if this message was sent to an older incarnation
5022 5019 * of the path/sendq.
5023 5020 */
5024 5021 if (path->local_incn != msghdr->rsmipc_incn) {
5025 5022 /* decrement the refcnt */
5026 5023 PATH_RELE_NOLOCK(path);
5027 5024 mutex_exit(&path->mutex);
5028 5025 DBG_PRINTF((category, RSM_DEBUG,
5029 5026 "rsm_intr_callback_dispatch done: old incn %lld\n",
5030 5027 msghdr->rsmipc_incn));
5031 5028 return;
5032 5029 }
5033 5030
5034 5031 /* copy and enqueue msg on the path's msgbuf queue */
5035 5032 rsmka_enqueue_msgbuf(path, data);
5036 5033
5037 5034 /*
5038 5035 * schedule task to process messages - ignore retval from
5039 5036 * task_dispatch because we sender cannot send more than
5040 5037 * what receiver can handle.
5041 5038 */
5042 5039 (void) taskq_dispatch(path->recv_taskq,
5043 5040 rsm_intr_proc_deferred, path, KM_NOSLEEP);
5044 5041
5045 5042 mutex_exit(&path->mutex);
5046 5043
5047 5044 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5048 5045 "rsm_intr_callback_dispatch done\n"));
5049 5046 }
5050 5047
5051 5048 /*
5052 5049 * This procedure is called from rsm_srv_func when a remote node creates a
5053 5050 * a send queue. This event is used as a hint that an earlier failed
5054 5051 * attempt to create a send queue to that remote node may now succeed and
5055 5052 * should be retried. Indication of an earlier failed attempt is provided
5056 5053 * by the RSMKA_SQCREATE_PENDING flag.
5057 5054 */
5058 5055 static void
5059 5056 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5060 5057 {
5061 5058 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
5062 5059 path_t *path;
5063 5060 DBG_DEFINE(category,
5064 5061 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5065 5062
5066 5063 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5067 5064 "rsm_sqcreateop_callback enter\n"));
5068 5065
5069 5066 /* look up the path - incr the path refcnt */
5070 5067 path = rsm_find_path(hdlr_argp->adapter_name,
5071 5068 hdlr_argp->adapter_instance, src_hwaddr);
5072 5069
5073 5070 if (path == NULL) {
5074 5071 DBG_PRINTF((category, RSM_DEBUG,
5075 5072 "rsm_sqcreateop_callback done: no path\n"));
5076 5073 return;
5077 5074 }
5078 5075
5079 5076 if ((path->state == RSMKA_PATH_UP) &&
5080 5077 (path->flags & RSMKA_SQCREATE_PENDING)) {
5081 5078 /*
5082 5079 * previous attempt to create sendq had failed, retry
5083 5080 * it and move to RSMKA_PATH_ACTIVE state if successful.
5084 5081 * the refcnt will be decremented in the do_deferred_work
5085 5082 */
5086 5083 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5087 5084 } else {
5088 5085 /* decrement the refcnt */
5089 5086 PATH_RELE_NOLOCK(path);
5090 5087 }
5091 5088 mutex_exit(&path->mutex);
5092 5089
5093 5090 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5094 5091 "rsm_sqcreateop_callback done\n"));
5095 5092 }
5096 5093
5097 5094 static void
5098 5095 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5099 5096 {
5100 5097 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5101 5098 rsmipc_request_t *msg = (rsmipc_request_t *)data;
5102 5099 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5103 5100 rsm_node_id_t src_node;
5104 5101 DBG_DEFINE(category,
5105 5102 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5106 5103
5107 5104 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5108 5105 "src=%d, type=%d\n", msghdr->rsmipc_src,
5109 5106 msghdr->rsmipc_type));
5110 5107
5111 5108 /*
5112 5109 * Check for the version number in the msg header. If it is not
5113 5110 * RSM_VERSION, drop the message. In the future, we need to manage
5114 5111 * incompatible version numbers in some way
5115 5112 */
5116 5113 if (msghdr->rsmipc_version != RSM_VERSION) {
5117 5114 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5118 5115 /*
5119 5116 * Drop requests that don't have a reply right here
5120 5117 * Request with reply will send a BAD_VERSION reply
5121 5118 * when they get processed by the worker thread.
5122 5119 */
5123 5120 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5124 5121 return;
5125 5122 }
5126 5123
5127 5124 }
5128 5125
5129 5126 src_node = msghdr->rsmipc_src;
5130 5127
5131 5128 switch (msghdr->rsmipc_type) {
5132 5129 case RSMIPC_MSG_SEGCONNECT:
5133 5130 case RSMIPC_MSG_DISCONNECT:
5134 5131 case RSMIPC_MSG_SUSPEND:
5135 5132 case RSMIPC_MSG_SUSPEND_DONE:
5136 5133 case RSMIPC_MSG_RESUME:
5137 5134 /*
5138 5135 * These message types are handled by a worker thread using
5139 5136 * the flow-control algorithm.
5140 5137 * Any message processing that does one or more of the
5141 5138 * following should be handled in a worker thread.
5142 5139 * - allocates resources and might sleep
5143 5140 * - makes RSMPI calls down to the interconnect driver
5144 5141 * this by defn include requests with reply.
5145 5142 * - takes a long duration of time
5146 5143 */
5147 5144 rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5148 5145 break;
5149 5146 case RSMIPC_MSG_NOTIMPORTING:
5150 5147 importer_list_rm(src_node, msg->rsmipc_key,
5151 5148 msg->rsmipc_segment_cookie);
5152 5149 break;
5153 5150 case RSMIPC_MSG_SQREADY:
5154 5151 rsm_proc_sqready(data, src_hwaddr, arg);
5155 5152 break;
5156 5153 case RSMIPC_MSG_SQREADY_ACK:
5157 5154 rsm_proc_sqready_ack(data, src_hwaddr, arg);
5158 5155 break;
5159 5156 case RSMIPC_MSG_CREDIT:
5160 5157 rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5161 5158 break;
5162 5159 case RSMIPC_MSG_REPLY:
5163 5160 rsm_intr_reply(msghdr);
5164 5161 break;
5165 5162 case RSMIPC_MSG_BELL:
5166 5163 rsm_intr_event(msg);
5167 5164 break;
5168 5165 case RSMIPC_MSG_IMPORTING:
5169 5166 importer_list_add(src_node, msg->rsmipc_key,
5170 5167 msg->rsmipc_adapter_hwaddr,
5171 5168 msg->rsmipc_segment_cookie);
5172 5169 break;
5173 5170 case RSMIPC_MSG_REPUBLISH:
5174 5171 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5175 5172 break;
5176 5173 default:
5177 5174 DBG_PRINTF((category, RSM_DEBUG,
5178 5175 "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5179 5176 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5180 5177 }
5181 5178
5182 5179 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5183 5180
5184 5181 }
5185 5182
5186 5183 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5187 5184 rsm_intr_q_op_t opcode, rsm_addr_t src,
5188 5185 void *data, size_t size, rsm_intr_hand_arg_t arg)
5189 5186 {
5190 5187 DBG_DEFINE(category,
5191 5188 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5192 5189
5193 5190 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5194 5191
5195 5192 switch (opcode) {
5196 5193 case RSM_INTR_Q_OP_CREATE:
5197 5194 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5198 5195 rsm_sqcreateop_callback(src, arg);
5199 5196 break;
5200 5197 case RSM_INTR_Q_OP_DESTROY:
5201 5198 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5202 5199 break;
5203 5200 case RSM_INTR_Q_OP_RECEIVE:
5204 5201 rsm_intr_callback(data, src, arg);
5205 5202 break;
5206 5203 default:
5207 5204 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5208 5205 "rsm_srv_func: unknown opcode = %x\n", opcode));
5209 5206 }
5210 5207
5211 5208 chd = chd;
5212 5209 size = size;
5213 5210
5214 5211 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5215 5212
5216 5213 return (RSM_INTR_HAND_CLAIMED);
5217 5214 }
5218 5215
5219 5216 /* *************************** IPC slots ************************* */
5220 5217 static rsmipc_slot_t *
5221 5218 rsmipc_alloc()
5222 5219 {
5223 5220 int i;
5224 5221 rsmipc_slot_t *slot;
5225 5222 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5226 5223
5227 5224 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5228 5225
5229 5226 /* try to find a free slot, if not wait */
5230 5227 mutex_enter(&rsm_ipc.lock);
5231 5228
5232 5229 while (rsm_ipc.count == 0) {
5233 5230 rsm_ipc.wanted = 1;
5234 5231 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5235 5232 }
5236 5233
5237 5234 /* An empty slot is available, find it */
5238 5235 slot = &rsm_ipc.slots[0];
5239 5236 for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5240 5237 if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5241 5238 RSMIPC_CLEAR(slot, RSMIPC_FREE);
5242 5239 break;
5243 5240 }
5244 5241 }
5245 5242
5246 5243 ASSERT(i < RSMIPC_SZ);
5247 5244 rsm_ipc.count--; /* one less is available */
5248 5245 rsm_ipc.sequence++; /* new sequence */
5249 5246
5250 5247 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5251 5248 slot->rsmipc_cookie.ic.index = (uint_t)i;
5252 5249
5253 5250 mutex_exit(&rsm_ipc.lock);
5254 5251
5255 5252 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5256 5253
5257 5254 return (slot);
5258 5255 }
5259 5256
5260 5257 static void
5261 5258 rsmipc_free(rsmipc_slot_t *slot)
5262 5259 {
5263 5260 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5264 5261
5265 5262 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5266 5263
5267 5264 ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5268 5265 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5269 5266
5270 5267 mutex_enter(&rsm_ipc.lock);
5271 5268
5272 5269 RSMIPC_SET(slot, RSMIPC_FREE);
5273 5270
5274 5271 slot->rsmipc_cookie.ic.sequence = 0;
5275 5272
5276 5273 mutex_exit(&slot->rsmipc_lock);
5277 5274 rsm_ipc.count++;
5278 5275 ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5279 5276 if (rsm_ipc.wanted) {
5280 5277 rsm_ipc.wanted = 0;
5281 5278 cv_broadcast(&rsm_ipc.cv);
5282 5279 }
5283 5280
5284 5281 mutex_exit(&rsm_ipc.lock);
5285 5282
5286 5283 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5287 5284 }
5288 5285
5289 5286 static int
5290 5287 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5291 5288 {
5292 5289 int e = 0;
5293 5290 int credit_check = 0;
5294 5291 int retry_cnt = 0;
5295 5292 int min_retry_cnt = 10;
5296 5293 rsm_send_t is;
5297 5294 rsmipc_slot_t *rslot;
5298 5295 adapter_t *adapter;
5299 5296 path_t *path;
5300 5297 sendq_token_t *sendq_token;
5301 5298 sendq_token_t *used_sendq_token = NULL;
5302 5299 rsm_send_q_handle_t ipc_handle;
5303 5300 DBG_DEFINE(category,
5304 5301 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5305 5302
5306 5303 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5307 5304 dest));
5308 5305
5309 5306 /*
5310 5307 * Check if this is a local case
5311 5308 */
5312 5309 if (dest == my_nodeid) {
5313 5310 switch (req->rsmipc_hdr.rsmipc_type) {
5314 5311 case RSMIPC_MSG_SEGCONNECT:
5315 5312 reply->rsmipc_status = (short)rsmsegacl_validate(
5316 5313 req, dest, reply);
5317 5314 break;
5318 5315 case RSMIPC_MSG_BELL:
5319 5316 req->rsmipc_hdr.rsmipc_src = dest;
5320 5317 rsm_intr_event(req);
5321 5318 break;
5322 5319 case RSMIPC_MSG_IMPORTING:
5323 5320 importer_list_add(dest, req->rsmipc_key,
5324 5321 req->rsmipc_adapter_hwaddr,
5325 5322 req->rsmipc_segment_cookie);
5326 5323 break;
5327 5324 case RSMIPC_MSG_NOTIMPORTING:
5328 5325 importer_list_rm(dest, req->rsmipc_key,
5329 5326 req->rsmipc_segment_cookie);
5330 5327 break;
5331 5328 case RSMIPC_MSG_REPUBLISH:
5332 5329 importer_update(dest, req->rsmipc_key,
5333 5330 req->rsmipc_perm);
5334 5331 break;
5335 5332 case RSMIPC_MSG_SUSPEND:
5336 5333 importer_suspend(dest);
5337 5334 break;
5338 5335 case RSMIPC_MSG_SUSPEND_DONE:
5339 5336 rsm_suspend_complete(dest, 0);
5340 5337 break;
5341 5338 case RSMIPC_MSG_RESUME:
5342 5339 importer_resume(dest);
5343 5340 break;
5344 5341 default:
5345 5342 ASSERT(0);
5346 5343 }
5347 5344 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5348 5345 "rsmipc_send done\n"));
5349 5346 return (0);
5350 5347 }
5351 5348
5352 5349 if (dest >= MAX_NODES) {
5353 5350 DBG_PRINTF((category, RSM_ERR,
5354 5351 "rsm: rsmipc_send bad node number %x\n", dest));
5355 5352 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5356 5353 }
5357 5354
5358 5355 /*
5359 5356 * Oh boy! we are going remote.
5360 5357 */
5361 5358
5362 5359 /*
5363 5360 * identify if we need to have credits to send this message
5364 5361 * - only selected requests are flow controlled
5365 5362 */
5366 5363 if (req != NULL) {
5367 5364 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5368 5365 "rsmipc_send:request type=%d\n",
5369 5366 req->rsmipc_hdr.rsmipc_type));
5370 5367
5371 5368 switch (req->rsmipc_hdr.rsmipc_type) {
5372 5369 case RSMIPC_MSG_SEGCONNECT:
5373 5370 case RSMIPC_MSG_DISCONNECT:
5374 5371 case RSMIPC_MSG_IMPORTING:
5375 5372 case RSMIPC_MSG_SUSPEND:
5376 5373 case RSMIPC_MSG_SUSPEND_DONE:
5377 5374 case RSMIPC_MSG_RESUME:
5378 5375 credit_check = 1;
5379 5376 break;
5380 5377 default:
5381 5378 credit_check = 0;
5382 5379 }
5383 5380 }
5384 5381
5385 5382 again:
5386 5383 if (retry_cnt++ == min_retry_cnt) {
5387 5384 /* backoff before further retries for 10ms */
5388 5385 delay(drv_usectohz(10000));
5389 5386 retry_cnt = 0; /* reset retry_cnt */
5390 5387 }
5391 5388 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5392 5389 if (sendq_token == NULL) {
5393 5390 DBG_PRINTF((category, RSM_ERR,
5394 5391 "rsm: rsmipc_send no device to reach node %d\n", dest));
5395 5392 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5396 5393 }
5397 5394
5398 5395 if ((sendq_token == used_sendq_token) &&
5399 5396 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5400 5397 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5401 5398 rele_sendq_token(sendq_token);
5402 5399 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5403 5400 return (RSMERR_CONN_ABORTED);
5404 5401 } else
5405 5402 used_sendq_token = sendq_token;
5406 5403
5407 5404 /* lint -save -e413 */
5408 5405 path = SQ_TOKEN_TO_PATH(sendq_token);
5409 5406 adapter = path->local_adapter;
5410 5407 /* lint -restore */
5411 5408 ipc_handle = sendq_token->rsmpi_sendq_handle;
5412 5409
5413 5410 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5414 5411 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5415 5412
5416 5413 if (reply == NULL) {
5417 5414 /* Send request without ack */
5418 5415 /*
5419 5416 * Set the rsmipc_version number in the msghdr for KA
5420 5417 * communication versioning
5421 5418 */
5422 5419 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5423 5420 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5424 5421 /*
5425 5422 * remote endpoints incn should match the value in our
5426 5423 * path's remote_incn field. No need to grab any lock
5427 5424 * since we have refcnted the path in rsmka_get_sendq_token
5428 5425 */
5429 5426 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5430 5427
5431 5428 is.is_data = (void *)req;
5432 5429 is.is_size = sizeof (*req);
5433 5430 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5434 5431 is.is_wait = 0;
5435 5432
5436 5433 if (credit_check) {
5437 5434 mutex_enter(&path->mutex);
5438 5435 /*
5439 5436 * wait till we recv credits or path goes down. If path
5440 5437 * goes down rsm_send will fail and we handle the error
5441 5438 * then
5442 5439 */
5443 5440 while ((sendq_token->msgbuf_avail == 0) &&
5444 5441 (path->state == RSMKA_PATH_ACTIVE)) {
5445 5442 e = cv_wait_sig(&sendq_token->sendq_cv,
5446 5443 &path->mutex);
5447 5444 if (e == 0) {
5448 5445 mutex_exit(&path->mutex);
5449 5446 no_reply_cnt++;
5450 5447 rele_sendq_token(sendq_token);
5451 5448 DBG_PRINTF((category, RSM_DEBUG,
5452 5449 "rsmipc_send done: "
5453 5450 "cv_wait INTERRUPTED"));
5454 5451 return (RSMERR_INTERRUPTED);
5455 5452 }
5456 5453 }
5457 5454
5458 5455 /*
5459 5456 * path is not active retry on another path.
5460 5457 */
5461 5458 if (path->state != RSMKA_PATH_ACTIVE) {
5462 5459 mutex_exit(&path->mutex);
5463 5460 rele_sendq_token(sendq_token);
5464 5461 e = RSMERR_CONN_ABORTED;
5465 5462 DBG_PRINTF((category, RSM_ERR,
5466 5463 "rsm: rsmipc_send: path !ACTIVE"));
5467 5464 goto again;
5468 5465 }
5469 5466
5470 5467 ASSERT(sendq_token->msgbuf_avail > 0);
5471 5468
5472 5469 /*
5473 5470 * reserve a msgbuf
5474 5471 */
5475 5472 sendq_token->msgbuf_avail--;
5476 5473
5477 5474 mutex_exit(&path->mutex);
5478 5475
5479 5476 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5480 5477 NULL);
5481 5478
5482 5479 if (e != RSM_SUCCESS) {
5483 5480 mutex_enter(&path->mutex);
5484 5481 /*
5485 5482 * release the reserved msgbuf since
5486 5483 * the send failed
5487 5484 */
5488 5485 sendq_token->msgbuf_avail++;
5489 5486 cv_broadcast(&sendq_token->sendq_cv);
5490 5487 mutex_exit(&path->mutex);
5491 5488 }
5492 5489 } else
5493 5490 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5494 5491 NULL);
5495 5492
5496 5493 no_reply_cnt++;
5497 5494 rele_sendq_token(sendq_token);
5498 5495 if (e != RSM_SUCCESS) {
5499 5496 DBG_PRINTF((category, RSM_ERR,
5500 5497 "rsm: rsmipc_send no reply send"
5501 5498 " err = %d no reply count = %d\n",
5502 5499 e, no_reply_cnt));
5503 5500 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5504 5501 e != RSMERR_BAD_BARRIER_HNDL);
5505 5502 atomic_inc_64(&rsm_ipcsend_errcnt);
5506 5503 goto again;
5507 5504 } else {
5508 5505 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5509 5506 "rsmipc_send done\n"));
5510 5507 return (e);
5511 5508 }
5512 5509
5513 5510 }
5514 5511
5515 5512 if (req == NULL) {
5516 5513 /* Send reply - No flow control is done for reply */
5517 5514 /*
5518 5515 * Set the version in the msg header for KA communication
5519 5516 * versioning
5520 5517 */
5521 5518 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5522 5519 reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5523 5520 /* incn number is not used for reply msgs currently */
5524 5521 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5525 5522
5526 5523 is.is_data = (void *)reply;
5527 5524 is.is_size = sizeof (*reply);
5528 5525 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5529 5526 is.is_wait = 0;
5530 5527 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5531 5528 rele_sendq_token(sendq_token);
5532 5529 if (e != RSM_SUCCESS) {
5533 5530 DBG_PRINTF((category, RSM_ERR,
5534 5531 "rsm: rsmipc_send reply send"
5535 5532 " err = %d\n", e));
5536 5533 atomic_inc_64(&rsm_ipcsend_errcnt);
5537 5534 goto again;
5538 5535 } else {
5539 5536 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5540 5537 "rsmipc_send done\n"));
5541 5538 return (e);
5542 5539 }
5543 5540 }
5544 5541
5545 5542 /* Reply needed */
5546 5543 rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5547 5544
5548 5545 mutex_enter(&rslot->rsmipc_lock);
5549 5546
5550 5547 rslot->rsmipc_data = (void *)reply;
5551 5548 RSMIPC_SET(rslot, RSMIPC_PENDING);
5552 5549
5553 5550 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5554 5551 /*
5555 5552 * Set the rsmipc_version number in the msghdr for KA
5556 5553 * communication versioning
5557 5554 */
5558 5555 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5559 5556 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5560 5557 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5561 5558 /*
5562 5559 * remote endpoints incn should match the value in our
5563 5560 * path's remote_incn field. No need to grab any lock
5564 5561 * since we have refcnted the path in rsmka_get_sendq_token
5565 5562 */
5566 5563 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5567 5564
5568 5565 is.is_data = (void *)req;
5569 5566 is.is_size = sizeof (*req);
5570 5567 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5571 5568 is.is_wait = 0;
5572 5569 if (credit_check) {
5573 5570
5574 5571 mutex_enter(&path->mutex);
5575 5572 /*
5576 5573 * wait till we recv credits or path goes down. If path
5577 5574 * goes down rsm_send will fail and we handle the error
5578 5575 * then.
5579 5576 */
5580 5577 while ((sendq_token->msgbuf_avail == 0) &&
5581 5578 (path->state == RSMKA_PATH_ACTIVE)) {
5582 5579 e = cv_wait_sig(&sendq_token->sendq_cv,
5583 5580 &path->mutex);
5584 5581 if (e == 0) {
5585 5582 mutex_exit(&path->mutex);
5586 5583 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5587 5584 rsmipc_free(rslot);
5588 5585 rele_sendq_token(sendq_token);
5589 5586 DBG_PRINTF((category, RSM_DEBUG,
5590 5587 "rsmipc_send done: "
5591 5588 "cv_wait INTERRUPTED"));
5592 5589 return (RSMERR_INTERRUPTED);
5593 5590 }
5594 5591 }
5595 5592
5596 5593 /*
5597 5594 * path is not active retry on another path.
5598 5595 */
5599 5596 if (path->state != RSMKA_PATH_ACTIVE) {
5600 5597 mutex_exit(&path->mutex);
5601 5598 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5602 5599 rsmipc_free(rslot);
5603 5600 rele_sendq_token(sendq_token);
5604 5601 e = RSMERR_CONN_ABORTED;
5605 5602 DBG_PRINTF((category, RSM_ERR,
5606 5603 "rsm: rsmipc_send: path !ACTIVE"));
5607 5604 goto again;
5608 5605 }
5609 5606
5610 5607 ASSERT(sendq_token->msgbuf_avail > 0);
5611 5608
5612 5609 /*
5613 5610 * reserve a msgbuf
5614 5611 */
5615 5612 sendq_token->msgbuf_avail--;
5616 5613
5617 5614 mutex_exit(&path->mutex);
5618 5615
5619 5616 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5620 5617 NULL);
5621 5618
5622 5619 if (e != RSM_SUCCESS) {
5623 5620 mutex_enter(&path->mutex);
5624 5621 /*
5625 5622 * release the reserved msgbuf since
5626 5623 * the send failed
5627 5624 */
5628 5625 sendq_token->msgbuf_avail++;
5629 5626 cv_broadcast(&sendq_token->sendq_cv);
5630 5627 mutex_exit(&path->mutex);
5631 5628 }
5632 5629 } else
5633 5630 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5634 5631 NULL);
5635 5632
5636 5633 if (e != RSM_SUCCESS) {
5637 5634 DBG_PRINTF((category, RSM_ERR,
5638 5635 "rsm: rsmipc_send rsmpi send err = %d\n", e));
5639 5636 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5640 5637 rsmipc_free(rslot);
5641 5638 rele_sendq_token(sendq_token);
5642 5639 atomic_inc_64(&rsm_ipcsend_errcnt);
5643 5640 goto again;
5644 5641 }
5645 5642
5646 5643 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5647 5644 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5648 5645 drv_usectohz(5000000), TR_CLOCK_TICK);
5649 5646 if (e < 0) {
5650 5647 /* timed out - retry */
5651 5648 e = RSMERR_TIMEOUT;
5652 5649 } else if (e == 0) {
5653 5650 /* signalled - return error */
5654 5651 e = RSMERR_INTERRUPTED;
5655 5652 break;
5656 5653 } else {
5657 5654 e = RSM_SUCCESS;
5658 5655 }
5659 5656 }
5660 5657
5661 5658 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5662 5659 rsmipc_free(rslot);
5663 5660 rele_sendq_token(sendq_token);
5664 5661
5665 5662 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5666 5663 return (e);
5667 5664 }
5668 5665
5669 5666 static int
5670 5667 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie)
5671 5668 {
5672 5669 rsmipc_request_t request;
5673 5670
5674 5671 /*
5675 5672 * inform the exporter to delete this importer
5676 5673 */
5677 5674 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5678 5675 request.rsmipc_key = segid;
5679 5676 request.rsmipc_segment_cookie = cookie;
5680 5677 return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5681 5678 }
5682 5679
5683 5680 static void
5684 5681 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl,
5685 5682 int acl_len, rsm_permission_t default_permission)
5686 5683 {
5687 5684 int i;
5688 5685 importing_token_t *token;
5689 5686 rsmipc_request_t request;
5690 5687 republish_token_t *republish_list = NULL;
5691 5688 republish_token_t *rp;
5692 5689 rsm_permission_t permission;
5693 5690 int index;
5694 5691
5695 5692 /*
5696 5693 * send the new access mode to all the nodes that have imported
5697 5694 * this segment.
5698 5695 * If the new acl does not have a node that was present in
5699 5696 * the old acl a access permission of 0 is sent.
5700 5697 */
5701 5698
5702 5699 index = rsmhash(segid);
5703 5700
5704 5701 /*
5705 5702 * create a list of node/permissions to send the republish message
5706 5703 */
5707 5704 mutex_enter(&importer_list.lock);
5708 5705
5709 5706 token = importer_list.bucket[index];
5710 5707 while (token != NULL) {
5711 5708 if (segid == token->key) {
5712 5709 permission = default_permission;
5713 5710
5714 5711 for (i = 0; i < acl_len; i++) {
5715 5712 if (token->importing_node == acl[i].ae_node) {
5716 5713 permission = acl[i].ae_permission;
5717 5714 break;
5718 5715 }
5719 5716 }
5720 5717 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5721 5718
5722 5719 rp->key = segid;
5723 5720 rp->importing_node = token->importing_node;
5724 5721 rp->permission = permission;
5725 5722 rp->next = republish_list;
5726 5723 republish_list = rp;
5727 5724 }
5728 5725 token = token->next;
5729 5726 }
5730 5727
5731 5728 mutex_exit(&importer_list.lock);
5732 5729
5733 5730 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5734 5731 request.rsmipc_key = segid;
5735 5732
5736 5733 while (republish_list != NULL) {
5737 5734 request.rsmipc_perm = republish_list->permission;
5738 5735 (void) rsmipc_send(republish_list->importing_node,
5739 5736 &request, RSM_NO_REPLY);
5740 5737 rp = republish_list;
5741 5738 republish_list = republish_list->next;
5742 5739 kmem_free(rp, sizeof (republish_token_t));
5743 5740 }
5744 5741 }
5745 5742
5746 5743 static void
5747 5744 rsm_send_suspend()
5748 5745 {
5749 5746 int i, e;
5750 5747 rsmipc_request_t request;
5751 5748 list_element_t *tokp;
5752 5749 list_element_t *head = NULL;
5753 5750 importing_token_t *token;
5754 5751 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5755 5752 "rsm_send_suspend enter\n"));
5756 5753
5757 5754 /*
5758 5755 * create a list of node to send the suspend message
5759 5756 *
5760 5757 * Currently the whole importer list is scanned and we obtain
5761 5758 * all the nodes - this basically gets all nodes that at least
5762 5759 * import one segment from the local node.
5763 5760 *
5764 5761 * no need to grab the rsm_suspend_list lock here since we are
5765 5762 * single threaded when suspend is called.
5766 5763 */
5767 5764
5768 5765 mutex_enter(&importer_list.lock);
5769 5766 for (i = 0; i < rsm_hash_size; i++) {
5770 5767
5771 5768 token = importer_list.bucket[i];
5772 5769
5773 5770 while (token != NULL) {
5774 5771
5775 5772 tokp = head;
5776 5773
5777 5774 /*
5778 5775 * make sure that the token's node
5779 5776 * is not already on the suspend list
5780 5777 */
5781 5778 while (tokp != NULL) {
5782 5779 if (tokp->nodeid == token->importing_node) {
5783 5780 break;
5784 5781 }
5785 5782 tokp = tokp->next;
5786 5783 }
5787 5784
5788 5785 if (tokp == NULL) { /* not in suspend list */
5789 5786 tokp = kmem_zalloc(sizeof (list_element_t),
5790 5787 KM_SLEEP);
5791 5788 tokp->nodeid = token->importing_node;
5792 5789 tokp->next = head;
5793 5790 head = tokp;
5794 5791 }
5795 5792
5796 5793 token = token->next;
5797 5794 }
5798 5795 }
5799 5796 mutex_exit(&importer_list.lock);
5800 5797
5801 5798 if (head == NULL) { /* no importers so go ahead and quiesce segments */
5802 5799 exporter_quiesce();
5803 5800 return;
5804 5801 }
5805 5802
5806 5803 mutex_enter(&rsm_suspend_list.list_lock);
5807 5804 ASSERT(rsm_suspend_list.list_head == NULL);
5808 5805 /*
5809 5806 * update the suspend list righaway so that if a node dies the
5810 5807 * pathmanager can set the NODE dead flag
5811 5808 */
5812 5809 rsm_suspend_list.list_head = head;
5813 5810 mutex_exit(&rsm_suspend_list.list_lock);
5814 5811
5815 5812 tokp = head;
5816 5813
5817 5814 while (tokp != NULL) {
5818 5815 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5819 5816 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5820 5817 /*
5821 5818 * Error in rsmipc_send currently happens due to inaccessibility
5822 5819 * of the remote node.
5823 5820 */
5824 5821 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5825 5822 tokp->flags |= RSM_SUSPEND_ACKPENDING;
5826 5823 }
5827 5824
5828 5825 tokp = tokp->next;
5829 5826 }
5830 5827
5831 5828 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5832 5829 "rsm_send_suspend done\n"));
5833 5830
5834 5831 }
5835 5832
5836 5833 static void
5837 5834 rsm_send_resume()
5838 5835 {
5839 5836 rsmipc_request_t request;
5840 5837 list_element_t *elem, *head;
5841 5838
5842 5839 /*
5843 5840 * save the suspend list so that we know where to send
5844 5841 * the resume messages and make the suspend list head
5845 5842 * NULL.
5846 5843 */
5847 5844 mutex_enter(&rsm_suspend_list.list_lock);
5848 5845 head = rsm_suspend_list.list_head;
5849 5846 rsm_suspend_list.list_head = NULL;
5850 5847 mutex_exit(&rsm_suspend_list.list_lock);
5851 5848
5852 5849 while (head != NULL) {
5853 5850 elem = head;
5854 5851 head = head->next;
5855 5852
5856 5853 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5857 5854
5858 5855 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5859 5856
5860 5857 kmem_free((void *)elem, sizeof (list_element_t));
5861 5858
5862 5859 }
5863 5860
5864 5861 }
5865 5862
5866 5863 /*
5867 5864 * This function takes path and sends a message using the sendq
5868 5865 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5869 5866 * and RSMIPC_MSG_CREDIT are sent using this function.
5870 5867 */
5871 5868 int
5872 5869 rsmipc_send_controlmsg(path_t *path, int msgtype)
5873 5870 {
5874 5871 int e;
5875 5872 int retry_cnt = 0;
5876 5873 int min_retry_cnt = 10;
5877 5874 adapter_t *adapter;
5878 5875 rsm_send_t is;
5879 5876 rsm_send_q_handle_t ipc_handle;
5880 5877 rsmipc_controlmsg_t msg;
5881 5878 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5882 5879
5883 5880 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5884 5881 "rsmipc_send_controlmsg enter\n"));
5885 5882
5886 5883 ASSERT(MUTEX_HELD(&path->mutex));
5887 5884
5888 5885 adapter = path->local_adapter;
5889 5886
5890 5887 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5891 5888 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5892 5889 my_nodeid, adapter->hwaddr, path->remote_node,
5893 5890 path->remote_hwaddr, path->procmsg_cnt));
5894 5891
5895 5892 if (path->state != RSMKA_PATH_ACTIVE) {
5896 5893 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5897 5894 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5898 5895 return (1);
5899 5896 }
5900 5897
5901 5898 ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5902 5899
5903 5900 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5904 5901 msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5905 5902 msg.rsmipc_hdr.rsmipc_type = msgtype;
5906 5903 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5907 5904
5908 5905 if (msgtype == RSMIPC_MSG_CREDIT)
5909 5906 msg.rsmipc_credits = path->procmsg_cnt;
5910 5907
5911 5908 msg.rsmipc_local_incn = path->local_incn;
5912 5909
5913 5910 msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5914 5911 /* incr the sendq, path refcnt */
5915 5912 PATH_HOLD_NOLOCK(path);
5916 5913 SENDQ_TOKEN_HOLD(path);
5917 5914
5918 5915 do {
5919 5916 /* drop the path lock before doing the rsm_send */
5920 5917 mutex_exit(&path->mutex);
5921 5918
5922 5919 is.is_data = (void *)&msg;
5923 5920 is.is_size = sizeof (msg);
5924 5921 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5925 5922 is.is_wait = 0;
5926 5923
5927 5924 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5928 5925
5929 5926 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5930 5927 e != RSMERR_BAD_BARRIER_HNDL);
5931 5928
5932 5929 mutex_enter(&path->mutex);
5933 5930
5934 5931 if (e == RSM_SUCCESS) {
5935 5932 break;
5936 5933 }
5937 5934 /* error counter for statistics */
5938 5935 atomic_inc_64(&rsm_ctrlmsg_errcnt);
5939 5936
5940 5937 DBG_PRINTF((category, RSM_ERR,
5941 5938 "rsmipc_send_controlmsg:rsm_send error=%d", e));
5942 5939
5943 5940 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5944 5941 (void) cv_reltimedwait(&path->sendq_token.sendq_cv,
5945 5942 &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK);
5946 5943 retry_cnt = 0;
5947 5944 }
5948 5945 } while (path->state == RSMKA_PATH_ACTIVE);
5949 5946
5950 5947 /* decrement the sendq,path refcnt that we incr before rsm_send */
5951 5948 SENDQ_TOKEN_RELE(path);
5952 5949 PATH_RELE_NOLOCK(path);
5953 5950
5954 5951 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5955 5952 "rsmipc_send_controlmsg done=%d", e));
5956 5953 return (e);
5957 5954 }
5958 5955
5959 5956 /*
5960 5957 * Called from rsm_force_unload and path_importer_disconnect. The memory
5961 5958 * mapping for the imported segment is removed and the segment is
5962 5959 * disconnected at the interconnect layer if disconnect_flag is TRUE.
5963 5960 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5964 5961 * and FALSE from rsm_rebind.
5965 5962 *
5966 5963 * When subsequent accesses cause page faulting, the dummy page is mapped
5967 5964 * to resolve the fault, and the mapping generation number is incremented
5968 5965 * so that the application can be notified on a close barrier operation.
5969 5966 *
5970 5967 * It is important to note that the caller of rsmseg_unload is responsible for
5971 5968 * acquiring the segment lock before making a call to rsmseg_unload. This is
5972 5969 * required to make the caller and rsmseg_unload thread safe. The segment lock
5973 5970 * will be released by the rsmseg_unload function.
5974 5971 */
5975 5972 void
5976 5973 rsmseg_unload(rsmseg_t *im_seg)
5977 5974 {
5978 5975 rsmcookie_t *hdl;
5979 5976 void *shared_cookie;
5980 5977 rsmipc_request_t request;
5981 5978 uint_t maxprot;
5982 5979
5983 5980 DBG_DEFINE(category,
5984 5981 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5985 5982
5986 5983 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5987 5984
5988 5985 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5989 5986
5990 5987 /* wait until segment leaves the mapping state */
5991 5988 while (im_seg->s_state == RSM_STATE_MAPPING)
5992 5989 cv_wait(&im_seg->s_cv, &im_seg->s_lock);
5993 5990 /*
5994 5991 * An unload is only necessary if the segment is connected. However,
5995 5992 * if the segment was on the import list in state RSM_STATE_CONNECTING
5996 5993 * then a connection was in progress. Change to RSM_STATE_NEW
5997 5994 * here to cause an early exit from the connection process.
5998 5995 */
5999 5996 if (im_seg->s_state == RSM_STATE_NEW) {
6000 5997 rsmseglock_release(im_seg);
6001 5998 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6002 5999 "rsmseg_unload done: RSM_STATE_NEW\n"));
6003 6000 return;
6004 6001 } else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6005 6002 im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6006 6003 rsmsharelock_acquire(im_seg);
6007 6004 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6008 6005 rsmsharelock_release(im_seg);
6009 6006 rsmseglock_release(im_seg);
6010 6007 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6011 6008 "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6012 6009 return;
6013 6010 }
6014 6011
6015 6012 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6016 6013 if (im_seg->s_ckl != NULL) {
6017 6014 int e;
6018 6015 /* Setup protections for remap */
6019 6016 maxprot = PROT_USER;
6020 6017 if (im_seg->s_mode & RSM_PERM_READ) {
6021 6018 maxprot |= PROT_READ;
6022 6019 }
6023 6020 if (im_seg->s_mode & RSM_PERM_WRITE) {
6024 6021 maxprot |= PROT_WRITE;
6025 6022 }
6026 6023 hdl = im_seg->s_ckl;
6027 6024 for (; hdl != NULL; hdl = hdl->c_next) {
6028 6025 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6029 6026 remap_cookie,
6030 6027 hdl->c_off, hdl->c_len,
6031 6028 maxprot, 0, NULL);
6032 6029
6033 6030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6034 6031 "remap returns %d\n", e));
6035 6032 }
6036 6033 }
6037 6034
6038 6035 (void) rsm_closeconnection(im_seg, &shared_cookie);
6039 6036
6040 6037 if (shared_cookie != NULL) {
6041 6038 /*
6042 6039 * inform the exporting node so this import
6043 6040 * can be deleted from the list of importers.
6044 6041 */
6045 6042 request.rsmipc_hdr.rsmipc_type =
6046 6043 RSMIPC_MSG_NOTIMPORTING;
6047 6044 request.rsmipc_key = im_seg->s_segid;
6048 6045 request.rsmipc_segment_cookie = shared_cookie;
6049 6046 rsmseglock_release(im_seg);
6050 6047 (void) rsmipc_send(im_seg->s_node, &request,
6051 6048 RSM_NO_REPLY);
6052 6049 } else {
6053 6050 rsmseglock_release(im_seg);
6054 6051 }
6055 6052 }
6056 6053 else
6057 6054 rsmseglock_release(im_seg);
6058 6055
6059 6056 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6060 6057
6061 6058 }
6062 6059
6063 6060 /* ****************************** Importer Calls ************************ */
6064 6061
6065 6062 static int
6066 6063 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6067 6064 {
6068 6065 int shifts = 0;
6069 6066
6070 6067 if (crgetuid(cr) != owner) {
6071 6068 shifts += 3;
6072 6069 if (!groupmember(group, cr))
6073 6070 shifts += 3;
6074 6071 }
6075 6072
6076 6073 mode &= ~(perm << shifts);
6077 6074
6078 6075 if (mode == 0)
6079 6076 return (0);
6080 6077
6081 6078 return (secpolicy_rsm_access(cr, owner, mode));
6082 6079 }
6083 6080
6084 6081
6085 6082 static int
6086 6083 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6087 6084 intptr_t dataptr, int mode)
6088 6085 {
6089 6086 int e;
6090 6087 int recheck_state = 0;
6091 6088 void *shared_cookie;
6092 6089 rsmipc_request_t request;
6093 6090 rsmipc_reply_t reply;
6094 6091 rsm_permission_t access;
6095 6092 adapter_t *adapter;
6096 6093 rsm_addr_t addr = 0;
6097 6094 rsm_import_share_t *sharedp;
6098 6095 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6099 6096
6100 6097 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6101 6098
6102 6099 adapter = rsm_getadapter(msg, mode);
6103 6100 if (adapter == NULL) {
6104 6101 DBG_PRINTF((category, RSM_ERR,
6105 6102 "rsm_connect done:ENODEV adapter=NULL\n"));
6106 6103 return (RSMERR_CTLR_NOT_PRESENT);
6107 6104 }
6108 6105
6109 6106 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6110 6107 rsmka_release_adapter(adapter);
6111 6108 DBG_PRINTF((category, RSM_ERR,
6112 6109 "rsm_connect done:ENODEV loopback\n"));
6113 6110 return (RSMERR_CTLR_NOT_PRESENT);
6114 6111 }
6115 6112
6116 6113
6117 6114 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6118 6115 ASSERT(seg->s_state == RSM_STATE_NEW);
6119 6116
6120 6117 /*
6121 6118 * Translate perm to access
6122 6119 */
6123 6120 if (msg->perm & ~RSM_PERM_RDWR) {
6124 6121 rsmka_release_adapter(adapter);
6125 6122 DBG_PRINTF((category, RSM_ERR,
6126 6123 "rsm_connect done:EINVAL invalid perms\n"));
6127 6124 return (RSMERR_BAD_PERMS);
6128 6125 }
6129 6126 access = 0;
6130 6127 if (msg->perm & RSM_PERM_READ)
6131 6128 access |= RSM_ACCESS_READ;
6132 6129 if (msg->perm & RSM_PERM_WRITE)
6133 6130 access |= RSM_ACCESS_WRITE;
6134 6131
6135 6132 seg->s_node = msg->nodeid;
6136 6133
6137 6134 /*
6138 6135 * Adding to the import list locks the segment; release the segment
6139 6136 * lock so we can get the reply for the send.
6140 6137 */
6141 6138 e = rsmimport_add(seg, msg->key);
6142 6139 if (e) {
6143 6140 rsmka_release_adapter(adapter);
6144 6141 DBG_PRINTF((category, RSM_ERR,
6145 6142 "rsm_connect done:rsmimport_add failed %d\n", e));
6146 6143 return (e);
6147 6144 }
6148 6145 seg->s_state = RSM_STATE_CONNECTING;
6149 6146
6150 6147 /*
6151 6148 * Set the s_adapter field here so as to have a valid comparison of
6152 6149 * the adapter and the s_adapter value during rsmshare_get. For
6153 6150 * any error, set s_adapter to NULL before doing a release_adapter
6154 6151 */
6155 6152 seg->s_adapter = adapter;
6156 6153
6157 6154 rsmseglock_release(seg);
6158 6155
6159 6156 /*
6160 6157 * get the pointer to the shared data structure; the
6161 6158 * shared data is locked and refcount has been incremented
6162 6159 */
6163 6160 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6164 6161
6165 6162 ASSERT(rsmsharelock_held(seg));
6166 6163
6167 6164 do {
6168 6165 /* flag indicates whether we need to recheck the state */
6169 6166 recheck_state = 0;
6170 6167 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6171 6168 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6172 6169 switch (sharedp->rsmsi_state) {
6173 6170 case RSMSI_STATE_NEW:
6174 6171 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6175 6172 break;
6176 6173 case RSMSI_STATE_CONNECTING:
6177 6174 /* FALLTHRU */
6178 6175 case RSMSI_STATE_CONN_QUIESCE:
6179 6176 /* FALLTHRU */
6180 6177 case RSMSI_STATE_MAP_QUIESCE:
6181 6178 /* wait for the state to change */
6182 6179 while ((sharedp->rsmsi_state ==
6183 6180 RSMSI_STATE_CONNECTING) ||
6184 6181 (sharedp->rsmsi_state ==
6185 6182 RSMSI_STATE_CONN_QUIESCE) ||
6186 6183 (sharedp->rsmsi_state ==
6187 6184 RSMSI_STATE_MAP_QUIESCE)) {
6188 6185 if (cv_wait_sig(&sharedp->rsmsi_cv,
6189 6186 &sharedp->rsmsi_lock) == 0) {
6190 6187 /* signalled - clean up and return */
6191 6188 rsmsharelock_release(seg);
6192 6189 rsmimport_rm(seg);
6193 6190 seg->s_adapter = NULL;
6194 6191 rsmka_release_adapter(adapter);
6195 6192 seg->s_state = RSM_STATE_NEW;
6196 6193 DBG_PRINTF((category, RSM_ERR,
6197 6194 "rsm_connect done: INTERRUPTED\n"));
6198 6195 return (RSMERR_INTERRUPTED);
6199 6196 }
6200 6197 }
6201 6198 /*
6202 6199 * the state changed, loop back and check what it is
6203 6200 */
6204 6201 recheck_state = 1;
6205 6202 break;
6206 6203 case RSMSI_STATE_ABORT_CONNECT:
6207 6204 /* exit the loop and clean up further down */
6208 6205 break;
6209 6206 case RSMSI_STATE_CONNECTED:
6210 6207 /* already connected, good - fall through */
6211 6208 case RSMSI_STATE_MAPPED:
6212 6209 /* already mapped, wow - fall through */
6213 6210 /* access validation etc is done further down */
6214 6211 break;
6215 6212 case RSMSI_STATE_DISCONNECTED:
6216 6213 /* disconnected - so reconnect now */
6217 6214 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6218 6215 break;
6219 6216 default:
6220 6217 ASSERT(0); /* Invalid State */
6221 6218 }
6222 6219 } while (recheck_state);
6223 6220
6224 6221 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6225 6222 /* we are the first to connect */
6226 6223 rsmsharelock_release(seg);
6227 6224
6228 6225 if (msg->nodeid != my_nodeid) {
6229 6226 addr = get_remote_hwaddr(adapter, msg->nodeid);
6230 6227
6231 6228 if ((int64_t)addr < 0) {
6232 6229 rsmsharelock_acquire(seg);
6233 6230 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6234 6231 RSMSI_STATE_NEW);
6235 6232 rsmsharelock_release(seg);
6236 6233 rsmimport_rm(seg);
6237 6234 seg->s_adapter = NULL;
6238 6235 rsmka_release_adapter(adapter);
6239 6236 seg->s_state = RSM_STATE_NEW;
6240 6237 DBG_PRINTF((category, RSM_ERR,
6241 6238 "rsm_connect done: hwaddr<0\n"));
6242 6239 return (RSMERR_INTERNAL_ERROR);
6243 6240 }
6244 6241 } else {
6245 6242 addr = adapter->hwaddr;
6246 6243 }
6247 6244
6248 6245 /*
6249 6246 * send request to node [src, dest, key, msgid] and get back
6250 6247 * [status, msgid, cookie]
6251 6248 */
6252 6249 request.rsmipc_key = msg->key;
6253 6250 /*
6254 6251 * we need the s_mode of the exporter so pass
6255 6252 * RSM_ACCESS_TRUSTED
6256 6253 */
6257 6254 request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6258 6255 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6259 6256 request.rsmipc_adapter_hwaddr = addr;
6260 6257 request.rsmipc_segment_cookie = sharedp;
6261 6258
6262 6259 e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6263 6260 if (e) {
6264 6261 rsmsharelock_acquire(seg);
6265 6262 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6266 6263 RSMSI_STATE_NEW);
6267 6264 rsmsharelock_release(seg);
6268 6265 rsmimport_rm(seg);
6269 6266 seg->s_adapter = NULL;
6270 6267 rsmka_release_adapter(adapter);
6271 6268 seg->s_state = RSM_STATE_NEW;
6272 6269 DBG_PRINTF((category, RSM_ERR,
6273 6270 "rsm_connect done:rsmipc_send failed %d\n", e));
6274 6271 return (e);
6275 6272 }
6276 6273
6277 6274 if (reply.rsmipc_status != RSM_SUCCESS) {
6278 6275 rsmsharelock_acquire(seg);
6279 6276 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6280 6277 RSMSI_STATE_NEW);
6281 6278 rsmsharelock_release(seg);
6282 6279 rsmimport_rm(seg);
6283 6280 seg->s_adapter = NULL;
6284 6281 rsmka_release_adapter(adapter);
6285 6282 seg->s_state = RSM_STATE_NEW;
6286 6283 DBG_PRINTF((category, RSM_ERR,
6287 6284 "rsm_connect done:rsmipc_send reply err %d\n",
6288 6285 reply.rsmipc_status));
6289 6286 return (reply.rsmipc_status);
6290 6287 }
6291 6288
6292 6289 rsmsharelock_acquire(seg);
6293 6290 /* store the information recvd into the shared data struct */
6294 6291 sharedp->rsmsi_mode = reply.rsmipc_mode;
6295 6292 sharedp->rsmsi_uid = reply.rsmipc_uid;
6296 6293 sharedp->rsmsi_gid = reply.rsmipc_gid;
6297 6294 sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6298 6295 sharedp->rsmsi_cookie = sharedp;
6299 6296 }
6300 6297
6301 6298 rsmsharelock_release(seg);
6302 6299
6303 6300 /*
6304 6301 * Get the segment lock and check for a force disconnect
6305 6302 * from the export side which would have changed the state
6306 6303 * back to RSM_STATE_NEW. Once the segment lock is acquired a
6307 6304 * force disconnect will be held off until the connection
6308 6305 * has completed.
6309 6306 */
6310 6307 rsmseglock_acquire(seg);
6311 6308 rsmsharelock_acquire(seg);
6312 6309 ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6313 6310 seg->s_state == RSM_STATE_ABORT_CONNECT);
6314 6311
6315 6312 shared_cookie = sharedp->rsmsi_cookie;
6316 6313
6317 6314 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6318 6315 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6319 6316 seg->s_state = RSM_STATE_NEW;
6320 6317 seg->s_adapter = NULL;
6321 6318 rsmsharelock_release(seg);
6322 6319 rsmseglock_release(seg);
6323 6320 rsmimport_rm(seg);
6324 6321 rsmka_release_adapter(adapter);
6325 6322
6326 6323 rsmsharelock_acquire(seg);
6327 6324 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6328 6325 /*
6329 6326 * set a flag indicating abort handling has been
6330 6327 * done
6331 6328 */
6332 6329 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6333 6330 rsmsharelock_release(seg);
6334 6331 /* send a message to exporter - only once */
6335 6332 (void) rsm_send_notimporting(msg->nodeid,
6336 6333 msg->key, shared_cookie);
6337 6334 rsmsharelock_acquire(seg);
6338 6335 /*
6339 6336 * wake up any waiting importers and inform that
6340 6337 * connection has been aborted
6341 6338 */
6342 6339 cv_broadcast(&sharedp->rsmsi_cv);
6343 6340 }
6344 6341 rsmsharelock_release(seg);
6345 6342
6346 6343 DBG_PRINTF((category, RSM_ERR,
6347 6344 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6348 6345 return (RSMERR_INTERRUPTED);
6349 6346 }
6350 6347
6351 6348
6352 6349 /*
6353 6350 * We need to verify that this process has access
6354 6351 */
6355 6352 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6356 6353 access & sharedp->rsmsi_mode,
6357 6354 (int)(msg->perm & RSM_PERM_RDWR), cred);
6358 6355 if (e) {
6359 6356 rsmsharelock_release(seg);
6360 6357 seg->s_state = RSM_STATE_NEW;
6361 6358 seg->s_adapter = NULL;
6362 6359 rsmseglock_release(seg);
6363 6360 rsmimport_rm(seg);
6364 6361 rsmka_release_adapter(adapter);
6365 6362 /*
6366 6363 * No need to lock segment it has been removed
6367 6364 * from the hash table
6368 6365 */
6369 6366 rsmsharelock_acquire(seg);
6370 6367 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6371 6368 rsmsharelock_release(seg);
6372 6369 /* this is the first importer */
6373 6370
6374 6371 (void) rsm_send_notimporting(msg->nodeid, msg->key,
6375 6372 shared_cookie);
6376 6373 rsmsharelock_acquire(seg);
6377 6374 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6378 6375 cv_broadcast(&sharedp->rsmsi_cv);
6379 6376 }
6380 6377 rsmsharelock_release(seg);
6381 6378
6382 6379 DBG_PRINTF((category, RSM_ERR,
6383 6380 "rsm_connect done: ipcaccess failed\n"));
6384 6381 return (RSMERR_PERM_DENIED);
6385 6382 }
6386 6383
6387 6384 /* update state and cookie */
6388 6385 seg->s_segid = sharedp->rsmsi_segid;
6389 6386 seg->s_len = sharedp->rsmsi_seglen;
6390 6387 seg->s_mode = access & sharedp->rsmsi_mode;
6391 6388 seg->s_pid = ddi_get_pid();
6392 6389 seg->s_mapinfo = NULL;
6393 6390
6394 6391 if (seg->s_node != my_nodeid) {
6395 6392 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6396 6393 e = adapter->rsmpi_ops->rsm_connect(
6397 6394 adapter->rsmpi_handle,
6398 6395 addr, seg->s_segid, &sharedp->rsmsi_handle);
6399 6396
6400 6397 if (e != RSM_SUCCESS) {
6401 6398 seg->s_state = RSM_STATE_NEW;
6402 6399 seg->s_adapter = NULL;
6403 6400 rsmsharelock_release(seg);
6404 6401 rsmseglock_release(seg);
6405 6402 rsmimport_rm(seg);
6406 6403 rsmka_release_adapter(adapter);
6407 6404 /*
6408 6405 * inform the exporter to delete this importer
6409 6406 */
6410 6407 (void) rsm_send_notimporting(msg->nodeid,
6411 6408 msg->key, shared_cookie);
6412 6409
6413 6410 /*
6414 6411 * Now inform any waiting importers to
6415 6412 * retry connect. This needs to be done
6416 6413 * after sending notimporting so that
6417 6414 * the notimporting is sent before a waiting
6418 6415 * importer sends a segconnect while retrying
6419 6416 *
6420 6417 * No need to lock segment it has been removed
6421 6418 * from the hash table
6422 6419 */
6423 6420
6424 6421 rsmsharelock_acquire(seg);
6425 6422 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6426 6423 cv_broadcast(&sharedp->rsmsi_cv);
6427 6424 rsmsharelock_release(seg);
6428 6425
6429 6426 DBG_PRINTF((category, RSM_ERR,
6430 6427 "rsm_connect error %d\n", e));
6431 6428 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6432 6429 return (
6433 6430 RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6434 6431 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6435 6432 (e == RSMERR_UNKNOWN_RSM_ADDR))
6436 6433 return (RSMERR_REMOTE_NODE_UNREACHABLE);
6437 6434 else
6438 6435 return (e);
6439 6436 }
6440 6437
6441 6438 }
6442 6439 seg->s_handle.in = sharedp->rsmsi_handle;
6443 6440
6444 6441 }
6445 6442
6446 6443 seg->s_state = RSM_STATE_CONNECT;
6447 6444
6448 6445
6449 6446 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */
6450 6447 if (bar_va) {
6451 6448 /* increment generation number on barrier page */
6452 6449 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6453 6450 /* return user off into barrier page where status will be */
6454 6451 msg->off = (int)seg->s_hdr.rsmrc_num;
6455 6452 msg->gnum = bar_va[msg->off]; /* gnum race */
6456 6453 } else {
6457 6454 msg->off = 0;
6458 6455 msg->gnum = 0; /* gnum race */
6459 6456 }
6460 6457
6461 6458 msg->len = (int)sharedp->rsmsi_seglen;
6462 6459 msg->rnum = seg->s_minor;
6463 6460 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6464 6461 rsmsharelock_release(seg);
6465 6462 rsmseglock_release(seg);
6466 6463
6467 6464 /* Return back to user the segment size & perm in case it's needed */
6468 6465
6469 6466 #ifdef _MULTI_DATAMODEL
6470 6467 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6471 6468 rsm_ioctlmsg32_t msg32;
6472 6469
6473 6470 if (msg->len > UINT_MAX)
6474 6471 msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6475 6472 else
6476 6473 msg32.len = msg->len;
6477 6474 msg32.off = msg->off;
6478 6475 msg32.perm = msg->perm;
6479 6476 msg32.gnum = msg->gnum;
6480 6477 msg32.rnum = msg->rnum;
6481 6478
6482 6479 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6483 6480 "rsm_connect done\n"));
6484 6481
6485 6482 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6486 6483 sizeof (msg32), mode))
6487 6484 return (RSMERR_BAD_ADDR);
6488 6485 else
6489 6486 return (RSM_SUCCESS);
6490 6487 }
6491 6488 #endif
6492 6489 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6493 6490
6494 6491 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6495 6492 mode))
6496 6493 return (RSMERR_BAD_ADDR);
6497 6494 else
6498 6495 return (RSM_SUCCESS);
6499 6496 }
6500 6497
6501 6498 static int
6502 6499 rsm_unmap(rsmseg_t *seg)
6503 6500 {
6504 6501 int err;
6505 6502 adapter_t *adapter;
6506 6503 rsm_import_share_t *sharedp;
6507 6504 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6508 6505
6509 6506 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6510 6507 "rsm_unmap enter %u\n", seg->s_segid));
6511 6508
6512 6509 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6513 6510
6514 6511 /* assert seg is locked */
6515 6512 ASSERT(rsmseglock_held(seg));
6516 6513 ASSERT(seg->s_state != RSM_STATE_MAPPING);
6517 6514
6518 6515 if ((seg->s_state != RSM_STATE_ACTIVE) &&
6519 6516 (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6520 6517 /* segment unmap has already been done */
6521 6518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6522 6519 return (RSM_SUCCESS);
6523 6520 }
6524 6521
6525 6522 sharedp = seg->s_share;
6526 6523
6527 6524 rsmsharelock_acquire(seg);
6528 6525
6529 6526 /*
6530 6527 * - shared data struct is in MAPPED or MAP_QUIESCE state
6531 6528 */
6532 6529
6533 6530 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6534 6531 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6535 6532
6536 6533 /*
6537 6534 * Unmap pages - previously rsm_memseg_import_unmap was called only if
6538 6535 * the segment cookie list was NULL; but it is always NULL when
6539 6536 * called from rsmmap_unmap and won't be NULL when called for
6540 6537 * a force disconnect - so the check for NULL cookie list was removed
6541 6538 */
6542 6539
6543 6540 ASSERT(sharedp->rsmsi_mapcnt > 0);
6544 6541
6545 6542 sharedp->rsmsi_mapcnt--;
6546 6543
6547 6544 if (sharedp->rsmsi_mapcnt == 0) {
6548 6545 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6549 6546 /* unmap the shared RSMPI mapping */
6550 6547 adapter = seg->s_adapter;
6551 6548 if (seg->s_node != my_nodeid) {
6552 6549 ASSERT(sharedp->rsmsi_handle != NULL);
6553 6550 err = adapter->rsmpi_ops->
6554 6551 rsm_unmap(sharedp->rsmsi_handle);
6555 6552 DBG_PRINTF((category, RSM_DEBUG,
6556 6553 "rsm_unmap: rsmpi unmap %d\n", err));
6557 6554 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6558 6555 sharedp->rsmsi_mapinfo = NULL;
6559 6556 }
6560 6557 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6561 6558 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6562 6559 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6563 6560 }
6564 6561 }
6565 6562
6566 6563 rsmsharelock_release(seg);
6567 6564
6568 6565 /*
6569 6566 * The s_cookie field is used to store the cookie returned from the
6570 6567 * ddi_umem_lock when binding the pages for an export segment. This
6571 6568 * is the primary use of the s_cookie field and does not normally
6572 6569 * pertain to any importing segment except in the loopback case.
6573 6570 * For the loopback case, the import segment and export segment are
6574 6571 * on the same node, the s_cookie field of the segment structure for
6575 6572 * the importer is initialized to the s_cookie field in the exported
6576 6573 * segment during the map operation and is used during the call to
6577 6574 * devmap_umem_setup for the import mapping.
6578 6575 * Thus, during unmap, we simply need to set s_cookie to NULL to
6579 6576 * indicate that the mapping no longer exists.
6580 6577 */
6581 6578 seg->s_cookie = NULL;
6582 6579
6583 6580 seg->s_mapinfo = NULL;
6584 6581
6585 6582 if (seg->s_state == RSM_STATE_ACTIVE)
6586 6583 seg->s_state = RSM_STATE_CONNECT;
6587 6584 else
6588 6585 seg->s_state = RSM_STATE_CONN_QUIESCE;
6589 6586
6590 6587 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6591 6588
6592 6589 return (RSM_SUCCESS);
6593 6590 }
6594 6591
6595 6592 /*
6596 6593 * cookie returned here if not null indicates that it is
6597 6594 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6598 6595 * message.
6599 6596 */
6600 6597 static int
6601 6598 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6602 6599 {
6603 6600 int e;
6604 6601 adapter_t *adapter;
6605 6602 rsm_import_share_t *sharedp;
6606 6603 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6607 6604
6608 6605 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6609 6606 "rsm_closeconnection enter\n"));
6610 6607
6611 6608 *cookie = (void *)NULL;
6612 6609
6613 6610 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6614 6611
6615 6612 /* assert seg is locked */
6616 6613 ASSERT(rsmseglock_held(seg));
6617 6614
6618 6615 if (seg->s_state == RSM_STATE_DISCONNECT) {
6619 6616 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6620 6617 "rsm_closeconnection done: already disconnected\n"));
6621 6618 return (RSM_SUCCESS);
6622 6619 }
6623 6620
6624 6621 /* wait for all putv/getv ops to get done */
6625 6622 while (seg->s_rdmacnt > 0) {
6626 6623 cv_wait(&seg->s_cv, &seg->s_lock);
6627 6624 }
6628 6625
6629 6626 (void) rsm_unmap(seg);
6630 6627
6631 6628 ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6632 6629 seg->s_state == RSM_STATE_CONN_QUIESCE);
6633 6630
6634 6631 adapter = seg->s_adapter;
6635 6632 sharedp = seg->s_share;
6636 6633
6637 6634 ASSERT(sharedp != NULL);
6638 6635
6639 6636 rsmsharelock_acquire(seg);
6640 6637
6641 6638 /*
6642 6639 * Disconnect on adapter
6643 6640 *
6644 6641 * The current algorithm is stateless, I don't have to contact
6645 6642 * server when I go away. He only gives me permissions. Of course,
6646 6643 * the adapters will talk to terminate the connect.
6647 6644 *
6648 6645 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6649 6646 */
6650 6647 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6651 6648 (sharedp->rsmsi_node != my_nodeid)) {
6652 6649
6653 6650 if (sharedp->rsmsi_refcnt == 1) {
6654 6651 /* this is the last importer */
6655 6652 ASSERT(sharedp->rsmsi_mapcnt == 0);
6656 6653
6657 6654 e = adapter->rsmpi_ops->
6658 6655 rsm_disconnect(sharedp->rsmsi_handle);
6659 6656 if (e != RSM_SUCCESS) {
6660 6657 DBG_PRINTF((category, RSM_DEBUG,
6661 6658 "rsm:disconnect failed seg=%x:err=%d\n",
6662 6659 seg->s_key, e));
6663 6660 }
6664 6661 }
6665 6662 }
6666 6663
6667 6664 seg->s_handle.in = NULL;
6668 6665
6669 6666 sharedp->rsmsi_refcnt--;
6670 6667
6671 6668 if (sharedp->rsmsi_refcnt == 0) {
6672 6669 *cookie = (void *)sharedp->rsmsi_cookie;
6673 6670 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6674 6671 sharedp->rsmsi_handle = NULL;
6675 6672 rsmsharelock_release(seg);
6676 6673
6677 6674 /* clean up the shared data structure */
6678 6675 mutex_destroy(&sharedp->rsmsi_lock);
6679 6676 cv_destroy(&sharedp->rsmsi_cv);
6680 6677 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6681 6678
6682 6679 } else {
6683 6680 rsmsharelock_release(seg);
6684 6681 }
6685 6682
6686 6683 /* increment generation number on barrier page */
6687 6684 if (bar_va) {
6688 6685 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6689 6686 }
6690 6687
6691 6688 /*
6692 6689 * The following needs to be done after any
6693 6690 * rsmsharelock calls which use seg->s_share.
6694 6691 */
6695 6692 seg->s_share = NULL;
6696 6693
6697 6694 seg->s_state = RSM_STATE_DISCONNECT;
6698 6695 /* signal anyone waiting in the CONN_QUIESCE state */
6699 6696 cv_broadcast(&seg->s_cv);
6700 6697
6701 6698 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6702 6699 "rsm_closeconnection done\n"));
6703 6700
6704 6701 return (RSM_SUCCESS);
6705 6702 }
6706 6703
6707 6704 int
6708 6705 rsm_disconnect(rsmseg_t *seg)
6709 6706 {
6710 6707 rsmipc_request_t request;
6711 6708 void *shared_cookie;
6712 6709 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6713 6710
6714 6711 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6715 6712
6716 6713 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6717 6714
6718 6715 /* assert seg isn't locked */
6719 6716 ASSERT(!rsmseglock_held(seg));
6720 6717
6721 6718
6722 6719 /* Remove segment from imported list */
6723 6720 rsmimport_rm(seg);
6724 6721
6725 6722 /* acquire the segment */
6726 6723 rsmseglock_acquire(seg);
6727 6724
6728 6725 /* wait until segment leaves the mapping state */
6729 6726 while (seg->s_state == RSM_STATE_MAPPING)
6730 6727 cv_wait(&seg->s_cv, &seg->s_lock);
6731 6728
6732 6729 if (seg->s_state == RSM_STATE_DISCONNECT) {
6733 6730 seg->s_state = RSM_STATE_NEW;
6734 6731 rsmseglock_release(seg);
6735 6732 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6736 6733 "rsm_disconnect done: already disconnected\n"));
6737 6734 return (RSM_SUCCESS);
6738 6735 }
6739 6736
6740 6737 (void) rsm_closeconnection(seg, &shared_cookie);
6741 6738
6742 6739 /* update state */
6743 6740 seg->s_state = RSM_STATE_NEW;
6744 6741
6745 6742 if (shared_cookie != NULL) {
6746 6743 /*
6747 6744 * This is the last importer so inform the exporting node
6748 6745 * so this import can be deleted from the list of importers.
6749 6746 */
6750 6747 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6751 6748 request.rsmipc_key = seg->s_segid;
6752 6749 request.rsmipc_segment_cookie = shared_cookie;
6753 6750 rsmseglock_release(seg);
6754 6751 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6755 6752 } else {
6756 6753 rsmseglock_release(seg);
6757 6754 }
6758 6755
6759 6756 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6760 6757
6761 6758 return (DDI_SUCCESS);
6762 6759 }
6763 6760
6764 6761 /*ARGSUSED*/
6765 6762 static int
6766 6763 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6767 6764 struct pollhead **phpp)
6768 6765 {
6769 6766 minor_t rnum;
6770 6767 rsmresource_t *res;
6771 6768 rsmseg_t *seg;
6772 6769 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6773 6770
6774 6771 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6775 6772
6776 6773 /* find minor, no lock */
6777 6774 rnum = getminor(dev);
6778 6775 res = rsmresource_lookup(rnum, RSM_NOLOCK);
6779 6776
6780 6777 /* poll is supported only for export/import segments */
6781 6778 if ((res == NULL) || (res == RSMRC_RESERVED) ||
6782 6779 (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6783 6780 return (ENXIO);
6784 6781 }
6785 6782
6786 6783 *reventsp = 0;
6787 6784
6788 6785 /*
6789 6786 * An exported segment must be in state RSM_STATE_EXPORT; an
6790 6787 * imported segment must be in state RSM_STATE_ACTIVE.
6791 6788 */
6792 6789 seg = (rsmseg_t *)res;
6793 6790
6794 6791 if (seg->s_pollevent) {
6795 6792 *reventsp = POLLRDNORM;
6796 6793 } else if (!anyyet) {
6797 6794 /* cannot take segment lock here */
6798 6795 *phpp = &seg->s_poll;
6799 6796 seg->s_pollflag |= RSM_SEGMENT_POLL;
6800 6797 }
6801 6798 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6802 6799 return (0);
6803 6800 }
6804 6801
6805 6802
6806 6803
6807 6804 /* ************************* IOCTL Commands ********************* */
6808 6805
6809 6806 static rsmseg_t *
6810 6807 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6811 6808 rsm_resource_type_t type)
6812 6809 {
6813 6810 /* get segment from resource handle */
6814 6811 rsmseg_t *seg;
6815 6812 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6816 6813
6817 6814 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6818 6815
6819 6816
6820 6817 if (res != RSMRC_RESERVED) {
6821 6818 seg = (rsmseg_t *)res;
6822 6819 } else {
6823 6820 /* Allocate segment now and bind it */
6824 6821 seg = rsmseg_alloc(rnum, credp);
6825 6822
6826 6823 /*
6827 6824 * if DR pre-processing is going on or DR is in progress
6828 6825 * then the new export segments should be in the NEW_QSCD state
6829 6826 */
6830 6827 if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6831 6828 mutex_enter(&rsm_drv_data.drv_lock);
6832 6829 if ((rsm_drv_data.drv_state ==
6833 6830 RSM_DRV_PREDEL_STARTED) ||
6834 6831 (rsm_drv_data.drv_state ==
6835 6832 RSM_DRV_PREDEL_COMPLETED) ||
6836 6833 (rsm_drv_data.drv_state ==
6837 6834 RSM_DRV_DR_IN_PROGRESS)) {
6838 6835 seg->s_state = RSM_STATE_NEW_QUIESCED;
6839 6836 }
6840 6837 mutex_exit(&rsm_drv_data.drv_lock);
6841 6838 }
6842 6839
6843 6840 rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6844 6841 }
6845 6842
6846 6843 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6847 6844
6848 6845 return (seg);
6849 6846 }
6850 6847
6851 6848 static int
6852 6849 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6853 6850 int mode, cred_t *credp)
6854 6851 {
6855 6852 int error;
6856 6853 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6857 6854
6858 6855 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6859 6856
6860 6857 arg = arg;
6861 6858 credp = credp;
6862 6859
6863 6860 ASSERT(seg != NULL);
6864 6861
6865 6862 switch (cmd) {
6866 6863 case RSM_IOCTL_BIND:
6867 6864 error = rsm_bind(seg, msg, arg, mode);
6868 6865 break;
6869 6866 case RSM_IOCTL_REBIND:
6870 6867 error = rsm_rebind(seg, msg);
6871 6868 break;
6872 6869 case RSM_IOCTL_UNBIND:
6873 6870 error = ENOTSUP;
6874 6871 break;
6875 6872 case RSM_IOCTL_PUBLISH:
6876 6873 error = rsm_publish(seg, msg, arg, mode);
6877 6874 break;
6878 6875 case RSM_IOCTL_REPUBLISH:
6879 6876 error = rsm_republish(seg, msg, mode);
6880 6877 break;
6881 6878 case RSM_IOCTL_UNPUBLISH:
6882 6879 error = rsm_unpublish(seg, 1);
6883 6880 break;
6884 6881 default:
6885 6882 error = EINVAL;
6886 6883 break;
6887 6884 }
6888 6885
6889 6886 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6890 6887 error));
6891 6888
6892 6889 return (error);
6893 6890 }
6894 6891 static int
6895 6892 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6896 6893 int mode, cred_t *credp)
6897 6894 {
6898 6895 int error;
6899 6896 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6900 6897
6901 6898 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6902 6899
6903 6900 ASSERT(seg);
6904 6901
6905 6902 switch (cmd) {
6906 6903 case RSM_IOCTL_CONNECT:
6907 6904 error = rsm_connect(seg, msg, credp, arg, mode);
6908 6905 break;
6909 6906 default:
6910 6907 error = EINVAL;
6911 6908 break;
6912 6909 }
6913 6910
6914 6911 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6915 6912 error));
6916 6913 return (error);
6917 6914 }
6918 6915
6919 6916 static int
6920 6917 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6921 6918 int mode)
6922 6919 {
6923 6920 int e;
6924 6921 adapter_t *adapter;
6925 6922 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6926 6923
6927 6924 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6928 6925
6929 6926
6930 6927 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6931 6928 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6932 6929 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6933 6930 return (RSMERR_CONN_ABORTED);
6934 6931 } else if (seg->s_node == my_nodeid) {
6935 6932 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6936 6933 "rsmbar_ioctl done: loopback\n"));
6937 6934 return (RSM_SUCCESS);
6938 6935 }
6939 6936
6940 6937 adapter = seg->s_adapter;
6941 6938
6942 6939 switch (cmd) {
6943 6940 case RSM_IOCTL_BAR_CHECK:
6944 6941 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6945 6942 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6946 6943 return (bar_va ? RSM_SUCCESS : EINVAL);
6947 6944 case RSM_IOCTL_BAR_OPEN:
6948 6945 e = adapter->rsmpi_ops->
6949 6946 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6950 6947 break;
6951 6948 case RSM_IOCTL_BAR_ORDER:
6952 6949 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6953 6950 break;
6954 6951 case RSM_IOCTL_BAR_CLOSE:
6955 6952 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6956 6953 break;
6957 6954 default:
6958 6955 e = EINVAL;
6959 6956 break;
6960 6957 }
6961 6958
6962 6959 if (e == RSM_SUCCESS) {
6963 6960 #ifdef _MULTI_DATAMODEL
6964 6961 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6965 6962 rsm_ioctlmsg32_t msg32;
6966 6963 int i;
6967 6964
6968 6965 for (i = 0; i < 4; i++) {
6969 6966 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6970 6967 }
6971 6968
6972 6969 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6973 6970 "rsmbar_ioctl done\n"));
6974 6971 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6975 6972 sizeof (msg32), mode))
6976 6973 return (RSMERR_BAD_ADDR);
6977 6974 else
6978 6975 return (RSM_SUCCESS);
6979 6976 }
6980 6977 #endif
6981 6978 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6982 6979 "rsmbar_ioctl done\n"));
6983 6980 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6984 6981 sizeof (*msg), mode))
6985 6982 return (RSMERR_BAD_ADDR);
6986 6983 else
6987 6984 return (RSM_SUCCESS);
6988 6985 }
6989 6986
6990 6987 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6991 6988 "rsmbar_ioctl done: error=%d\n", e));
6992 6989
6993 6990 return (e);
6994 6991 }
6995 6992
6996 6993 /*
6997 6994 * Ring the doorbell of the export segment to which this segment is
6998 6995 * connected.
6999 6996 */
7000 6997 static int
7001 6998 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7002 6999 {
7003 7000 int e = 0;
7004 7001 rsmipc_request_t request;
7005 7002
7006 7003 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7007 7004
7008 7005 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7009 7006
7010 7007 request.rsmipc_key = seg->s_segid;
7011 7008 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7012 7009 request.rsmipc_segment_cookie = NULL;
7013 7010 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7014 7011
7015 7012 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7016 7013 "exportbell_ioctl done: %d\n", e));
7017 7014
7018 7015 return (e);
7019 7016 }
7020 7017
7021 7018 /*
7022 7019 * Ring the doorbells of all segments importing this segment
7023 7020 */
7024 7021 static int
7025 7022 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7026 7023 {
7027 7024 importing_token_t *token = NULL;
7028 7025 rsmipc_request_t request;
7029 7026 int index;
7030 7027
7031 7028 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7032 7029
7033 7030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7034 7031
7035 7032 ASSERT(seg->s_state != RSM_STATE_NEW &&
7036 7033 seg->s_state != RSM_STATE_NEW_QUIESCED);
7037 7034
7038 7035 request.rsmipc_key = seg->s_segid;
7039 7036 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7040 7037
7041 7038 index = rsmhash(seg->s_segid);
7042 7039
7043 7040 token = importer_list.bucket[index];
7044 7041
7045 7042 while (token != NULL) {
7046 7043 if (seg->s_key == token->key) {
7047 7044 request.rsmipc_segment_cookie =
7048 7045 token->import_segment_cookie;
7049 7046 (void) rsmipc_send(token->importing_node,
7050 7047 &request, RSM_NO_REPLY);
7051 7048 }
7052 7049 token = token->next;
7053 7050 }
7054 7051
7055 7052 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7056 7053 "importbell_ioctl done\n"));
7057 7054 return (RSM_SUCCESS);
7058 7055 }
7059 7056
7060 7057 static int
7061 7058 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7062 7059 rsm_poll_event_t **eventspp, int mode)
7063 7060 {
7064 7061 rsm_poll_event_t *evlist = NULL;
7065 7062 size_t evlistsz;
7066 7063 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7067 7064
7068 7065 #ifdef _MULTI_DATAMODEL
7069 7066 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7070 7067 int i;
7071 7068 rsm_consume_event_msg32_t cemsg32 = {0};
7072 7069 rsm_poll_event32_t event32[RSM_MAX_POLLFDS];
7073 7070 rsm_poll_event32_t *evlist32;
7074 7071 size_t evlistsz32;
7075 7072
7076 7073 /* copyin the ioctl message */
7077 7074 if (ddi_copyin(arg, (caddr_t)&cemsg32,
7078 7075 sizeof (rsm_consume_event_msg32_t), mode)) {
7079 7076 DBG_PRINTF((category, RSM_ERR,
7080 7077 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7081 7078 return (RSMERR_BAD_ADDR);
7082 7079 }
7083 7080 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7084 7081 msgp->numents = (int)cemsg32.numents;
7085 7082
7086 7083 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7087 7084 /*
7088 7085 * If numents is large alloc events list on heap otherwise
7089 7086 * use the address of array that was passed in.
7090 7087 */
7091 7088 if (msgp->numents > RSM_MAX_POLLFDS) {
7092 7089 if (msgp->numents > max_segs) { /* validate numents */
7093 7090 DBG_PRINTF((category, RSM_ERR,
7094 7091 "consumeevent_copyin: "
7095 7092 "RSMERR_BAD_ARGS_ERRORS\n"));
7096 7093 return (RSMERR_BAD_ARGS_ERRORS);
7097 7094 }
7098 7095 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7099 7096 } else {
7100 7097 evlist32 = event32;
7101 7098 }
7102 7099
7103 7100 /* copyin the seglist into the rsm_poll_event32_t array */
7104 7101 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7105 7102 evlistsz32, mode)) {
7106 7103 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7107 7104 kmem_free(evlist32, evlistsz32);
7108 7105 }
7109 7106 DBG_PRINTF((category, RSM_ERR,
7110 7107 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7111 7108 return (RSMERR_BAD_ADDR);
7112 7109 }
7113 7110
7114 7111 /* evlist and evlistsz are based on rsm_poll_event_t type */
7115 7112 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7116 7113
7117 7114 if (msgp->numents > RSM_MAX_POLLFDS) {
7118 7115 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7119 7116 *eventspp = evlist;
7120 7117 } else {
7121 7118 evlist = *eventspp;
7122 7119 }
7123 7120 /*
7124 7121 * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7125 7122 * array
7126 7123 */
7127 7124 for (i = 0; i < msgp->numents; i++) {
7128 7125 evlist[i].rnum = evlist32[i].rnum;
7129 7126 evlist[i].fdsidx = evlist32[i].fdsidx;
7130 7127 evlist[i].revent = evlist32[i].revent;
7131 7128 }
7132 7129 /* free the temp 32-bit event list */
7133 7130 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7134 7131 kmem_free(evlist32, evlistsz32);
7135 7132 }
7136 7133
7137 7134 return (RSM_SUCCESS);
7138 7135 }
7139 7136 #endif
7140 7137 /* copyin the ioctl message */
7141 7138 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7142 7139 mode)) {
7143 7140 DBG_PRINTF((category, RSM_ERR,
7144 7141 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7145 7142 return (RSMERR_BAD_ADDR);
7146 7143 }
7147 7144 /*
7148 7145 * If numents is large alloc events list on heap otherwise
7149 7146 * use the address of array that was passed in.
7150 7147 */
7151 7148 if (msgp->numents > RSM_MAX_POLLFDS) {
7152 7149 if (msgp->numents > max_segs) { /* validate numents */
7153 7150 DBG_PRINTF((category, RSM_ERR,
7154 7151 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7155 7152 return (RSMERR_BAD_ARGS_ERRORS);
7156 7153 }
7157 7154 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7158 7155 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7159 7156 *eventspp = evlist;
7160 7157 }
7161 7158
7162 7159 /* copyin the seglist */
7163 7160 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7164 7161 sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7165 7162 if (evlist) {
7166 7163 kmem_free(evlist, evlistsz);
7167 7164 *eventspp = NULL;
7168 7165 }
7169 7166 DBG_PRINTF((category, RSM_ERR,
7170 7167 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7171 7168 return (RSMERR_BAD_ADDR);
7172 7169 }
7173 7170
7174 7171 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7175 7172 "consumeevent_copyin done\n"));
7176 7173 return (RSM_SUCCESS);
7177 7174 }
7178 7175
7179 7176 static int
7180 7177 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7181 7178 rsm_poll_event_t *eventsp, int mode)
7182 7179 {
7183 7180 size_t evlistsz;
7184 7181 int err = RSM_SUCCESS;
7185 7182 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7186 7183
7187 7184 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7188 7185 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7189 7186 msgp->numents, eventsp));
7190 7187
7191 7188 #ifdef _MULTI_DATAMODEL
7192 7189 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7193 7190 int i;
7194 7191 rsm_poll_event32_t event32[RSM_MAX_POLLFDS];
7195 7192 rsm_poll_event32_t *evlist32;
7196 7193 size_t evlistsz32;
7197 7194
7198 7195 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7199 7196 if (msgp->numents > RSM_MAX_POLLFDS) {
7200 7197 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7201 7198 } else {
7202 7199 evlist32 = event32;
7203 7200 }
7204 7201
7205 7202 /*
7206 7203 * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7207 7204 * array
7208 7205 */
7209 7206 for (i = 0; i < msgp->numents; i++) {
7210 7207 evlist32[i].rnum = eventsp[i].rnum;
7211 7208 evlist32[i].fdsidx = eventsp[i].fdsidx;
7212 7209 evlist32[i].revent = eventsp[i].revent;
7213 7210 }
7214 7211
7215 7212 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7216 7213 evlistsz32, mode)) {
7217 7214 err = RSMERR_BAD_ADDR;
7218 7215 }
7219 7216
7220 7217 if (msgp->numents > RSM_MAX_POLLFDS) {
7221 7218 if (evlist32) { /* free the temp 32-bit event list */
7222 7219 kmem_free(evlist32, evlistsz32);
7223 7220 }
7224 7221 /*
7225 7222 * eventsp and evlistsz are based on rsm_poll_event_t
7226 7223 * type
7227 7224 */
7228 7225 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7229 7226 /* event list on the heap and needs to be freed here */
7230 7227 if (eventsp) {
7231 7228 kmem_free(eventsp, evlistsz);
7232 7229 }
7233 7230 }
7234 7231
7235 7232 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7236 7233 "consumeevent_copyout done: err=%d\n", err));
7237 7234 return (err);
7238 7235 }
7239 7236 #endif
7240 7237 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7241 7238
7242 7239 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7243 7240 mode)) {
7244 7241 err = RSMERR_BAD_ADDR;
7245 7242 }
7246 7243
7247 7244 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7248 7245 /* event list on the heap and needs to be freed here */
7249 7246 kmem_free(eventsp, evlistsz);
7250 7247 }
7251 7248
7252 7249 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7253 7250 "consumeevent_copyout done: err=%d\n", err));
7254 7251 return (err);
7255 7252 }
7256 7253
7257 7254 static int
7258 7255 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7259 7256 {
7260 7257 int rc;
7261 7258 int i;
7262 7259 minor_t rnum;
7263 7260 rsm_consume_event_msg_t msg = {0};
7264 7261 rsmseg_t *seg;
7265 7262 rsm_poll_event_t *event_list;
7266 7263 rsm_poll_event_t events[RSM_MAX_POLLFDS];
7267 7264 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7268 7265
7269 7266 event_list = events;
7270 7267
7271 7268 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7272 7269 RSM_SUCCESS) {
7273 7270 return (rc);
7274 7271 }
7275 7272
7276 7273 for (i = 0; i < msg.numents; i++) {
7277 7274 rnum = event_list[i].rnum;
7278 7275 event_list[i].revent = 0;
7279 7276 /* get the segment structure */
7280 7277 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7281 7278 if (seg) {
7282 7279 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7283 7280 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7284 7281 seg));
7285 7282 if (seg->s_pollevent) {
7286 7283 /* consume the event */
7287 7284 atomic_dec_32(&seg->s_pollevent);
7288 7285 event_list[i].revent = POLLRDNORM;
7289 7286 }
7290 7287 rsmseglock_release(seg);
7291 7288 }
7292 7289 }
7293 7290
7294 7291 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7295 7292 RSM_SUCCESS) {
7296 7293 return (rc);
7297 7294 }
7298 7295
7299 7296 return (RSM_SUCCESS);
7300 7297 }
7301 7298
7302 7299 static int
7303 7300 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7304 7301 {
7305 7302 int size;
7306 7303 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7307 7304
7308 7305 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7309 7306
7310 7307 #ifdef _MULTI_DATAMODEL
7311 7308 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7312 7309 rsmka_iovec32_t *iovec32, *iovec32_base;
7313 7310 int i;
7314 7311
7315 7312 size = count * sizeof (rsmka_iovec32_t);
7316 7313 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7317 7314 if (ddi_copyin((caddr_t)user_vec,
7318 7315 (caddr_t)iovec32, size, mode)) {
7319 7316 kmem_free(iovec32, size);
7320 7317 DBG_PRINTF((category, RSM_DEBUG,
7321 7318 "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7322 7319 return (RSMERR_BAD_ADDR);
7323 7320 }
7324 7321
7325 7322 for (i = 0; i < count; i++, iovec++, iovec32++) {
7326 7323 iovec->io_type = (int)iovec32->io_type;
7327 7324 if (iovec->io_type == RSM_HANDLE_TYPE)
7328 7325 iovec->local.segid = (rsm_memseg_id_t)
7329 7326 iovec32->local;
7330 7327 else
7331 7328 iovec->local.vaddr =
7332 7329 (caddr_t)(uintptr_t)iovec32->local;
7333 7330 iovec->local_offset = (size_t)iovec32->local_offset;
7334 7331 iovec->remote_offset = (size_t)iovec32->remote_offset;
7335 7332 iovec->transfer_len = (size_t)iovec32->transfer_len;
7336 7333
7337 7334 }
7338 7335 kmem_free(iovec32_base, size);
7339 7336 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7340 7337 "iovec_copyin done\n"));
7341 7338 return (DDI_SUCCESS);
7342 7339 }
7343 7340 #endif
7344 7341
7345 7342 size = count * sizeof (rsmka_iovec_t);
7346 7343 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7347 7344 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7348 7345 "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7349 7346 return (RSMERR_BAD_ADDR);
7350 7347 }
7351 7348
7352 7349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7353 7350
7354 7351 return (DDI_SUCCESS);
7355 7352 }
7356 7353
7357 7354
7358 7355 static int
7359 7356 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7360 7357 {
7361 7358 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7362 7359
7363 7360 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7364 7361
7365 7362 #ifdef _MULTI_DATAMODEL
7366 7363 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7367 7364 rsmka_scat_gath32_t sg_io32;
7368 7365
7369 7366 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7370 7367 mode)) {
7371 7368 DBG_PRINTF((category, RSM_DEBUG,
7372 7369 "sgio_copyin done: returning EFAULT\n"));
7373 7370 return (RSMERR_BAD_ADDR);
7374 7371 }
7375 7372 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7376 7373 sg_io->io_request_count = (size_t)sg_io32.io_request_count;
7377 7374 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7378 7375 sg_io->flags = (size_t)sg_io32.flags;
7379 7376 sg_io->remote_handle = (rsm_memseg_import_handle_t)
7380 7377 (uintptr_t)sg_io32.remote_handle;
7381 7378 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7382 7379 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7383 7380 "sgio_copyin done\n"));
7384 7381 return (DDI_SUCCESS);
7385 7382 }
7386 7383 #endif
7387 7384 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7388 7385 mode)) {
7389 7386 DBG_PRINTF((category, RSM_DEBUG,
7390 7387 "sgio_copyin done: returning EFAULT\n"));
7391 7388 return (RSMERR_BAD_ADDR);
7392 7389 }
7393 7390 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7394 7391 return (DDI_SUCCESS);
7395 7392 }
7396 7393
7397 7394 static int
7398 7395 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7399 7396 {
7400 7397 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7401 7398
7402 7399 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7403 7400 "sgio_resid_copyout enter\n"));
7404 7401
7405 7402 #ifdef _MULTI_DATAMODEL
7406 7403 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7407 7404 rsmka_scat_gath32_t sg_io32;
7408 7405
7409 7406 sg_io32.io_residual_count = sg_io->io_residual_count;
7410 7407 sg_io32.flags = sg_io->flags;
7411 7408
7412 7409 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7413 7410 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7414 7411 sizeof (uint32_t), mode)) {
7415 7412
7416 7413 DBG_PRINTF((category, RSM_ERR,
7417 7414 "sgio_resid_copyout error: rescnt\n"));
7418 7415 return (RSMERR_BAD_ADDR);
7419 7416 }
7420 7417
7421 7418 if (ddi_copyout((caddr_t)&sg_io32.flags,
7422 7419 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7423 7420 sizeof (uint32_t), mode)) {
7424 7421
7425 7422 DBG_PRINTF((category, RSM_ERR,
7426 7423 "sgio_resid_copyout error: flags\n"));
7427 7424 return (RSMERR_BAD_ADDR);
7428 7425 }
7429 7426 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7430 7427 "sgio_resid_copyout done\n"));
7431 7428 return (DDI_SUCCESS);
7432 7429 }
7433 7430 #endif
7434 7431 if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7435 7432 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7436 7433 sizeof (ulong_t), mode)) {
7437 7434
7438 7435 DBG_PRINTF((category, RSM_ERR,
7439 7436 "sgio_resid_copyout error:rescnt\n"));
7440 7437 return (RSMERR_BAD_ADDR);
7441 7438 }
7442 7439
7443 7440 if (ddi_copyout((caddr_t)&sg_io->flags,
7444 7441 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7445 7442 sizeof (uint_t), mode)) {
7446 7443
7447 7444 DBG_PRINTF((category, RSM_ERR,
7448 7445 "sgio_resid_copyout error:flags\n"));
7449 7446 return (RSMERR_BAD_ADDR);
7450 7447 }
7451 7448
7452 7449 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7453 7450 return (DDI_SUCCESS);
7454 7451 }
7455 7452
7456 7453
7457 7454 static int
7458 7455 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7459 7456 {
7460 7457 rsmka_scat_gath_t sg_io;
7461 7458 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN];
7462 7459 rsmka_iovec_t *ka_iovec;
7463 7460 rsmka_iovec_t *ka_iovec_start;
7464 7461 rsmpi_scat_gath_t rsmpi_sg_io;
7465 7462 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN];
7466 7463 rsmpi_iovec_t *iovec;
7467 7464 rsmpi_iovec_t *iovec_start = NULL;
7468 7465 rsmapi_access_entry_t *acl;
7469 7466 rsmresource_t *res;
7470 7467 minor_t rnum;
7471 7468 rsmseg_t *im_seg, *ex_seg;
7472 7469 int e;
7473 7470 int error = 0;
7474 7471 uint_t i;
7475 7472 uint_t iov_proc = 0; /* num of iovecs processed */
7476 7473 size_t size = 0;
7477 7474 size_t ka_size;
7478 7475
7479 7476 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7480 7477
7481 7478 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7482 7479
7483 7480 credp = credp;
7484 7481
7485 7482 /*
7486 7483 * Copyin the scatter/gather structure and build new structure
7487 7484 * for rsmpi.
7488 7485 */
7489 7486 e = sgio_copyin(arg, &sg_io, mode);
7490 7487 if (e != DDI_SUCCESS) {
7491 7488 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7492 7489 "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7493 7490 return (e);
7494 7491 }
7495 7492
7496 7493 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7497 7494 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7498 7495 "rsm_iovec_ioctl done: request_count(%d) too large\n",
7499 7496 sg_io.io_request_count));
7500 7497 return (RSMERR_BAD_SGIO);
7501 7498 }
7502 7499
7503 7500 rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7504 7501 rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7505 7502 rsmpi_sg_io.io_segflg = 0;
7506 7503
7507 7504 /* Allocate memory and copyin io vector array */
7508 7505 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7509 7506 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t);
7510 7507 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7511 7508 } else {
7512 7509 ka_iovec_start = ka_iovec = ka_iovec_arr;
7513 7510 }
7514 7511 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7515 7512 sg_io.io_request_count, mode);
7516 7513 if (e != DDI_SUCCESS) {
7517 7514 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7518 7515 kmem_free(ka_iovec, ka_size);
7519 7516 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7520 7517 "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7521 7518 return (e);
7522 7519 }
7523 7520
7524 7521 /* get the import segment descriptor */
7525 7522 rnum = getminor(dev);
7526 7523 res = rsmresource_lookup(rnum, RSM_LOCK);
7527 7524
7528 7525 /*
7529 7526 * The following sequence of locking may (or MAY NOT) cause a
7530 7527 * deadlock but this is currently not addressed here since the
7531 7528 * implementation will be changed to incorporate the use of
7532 7529 * reference counting for both the import and the export segments.
7533 7530 */
7534 7531
7535 7532 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7536 7533
7537 7534 im_seg = (rsmseg_t *)res;
7538 7535
7539 7536 if (im_seg == NULL) {
7540 7537 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7541 7538 kmem_free(ka_iovec, ka_size);
7542 7539 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7543 7540 "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7544 7541 return (EINVAL);
7545 7542 }
7546 7543 /* putv/getv supported is supported only on import segments */
7547 7544 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7548 7545 rsmseglock_release(im_seg);
7549 7546 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7550 7547 kmem_free(ka_iovec, ka_size);
7551 7548 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7552 7549 "rsm_iovec_ioctl done: not an import segment\n"));
7553 7550 return (EINVAL);
7554 7551 }
7555 7552
7556 7553 /*
7557 7554 * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7558 7555 * as well as wait for a local DR to complete.
7559 7556 */
7560 7557 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7561 7558 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7562 7559 (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7563 7560 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7564 7561 DBG_PRINTF((category, RSM_DEBUG,
7565 7562 "rsm_iovec_ioctl done: cv_wait INTR"));
7566 7563 rsmseglock_release(im_seg);
7567 7564 return (RSMERR_INTERRUPTED);
7568 7565 }
7569 7566 }
7570 7567
7571 7568 if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7572 7569 (im_seg->s_state != RSM_STATE_ACTIVE)) {
7573 7570
7574 7571 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7575 7572 im_seg->s_state == RSM_STATE_NEW);
7576 7573
7577 7574 DBG_PRINTF((category, RSM_DEBUG,
7578 7575 "rsm_iovec_ioctl done: im_seg not conn/map"));
7579 7576 rsmseglock_release(im_seg);
7580 7577 e = RSMERR_BAD_SGIO;
7581 7578 goto out;
7582 7579 }
7583 7580
7584 7581 im_seg->s_rdmacnt++;
7585 7582 rsmseglock_release(im_seg);
7586 7583
7587 7584 /*
7588 7585 * Allocate and set up the io vector for rsmpi
7589 7586 */
7590 7587 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7591 7588 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7592 7589 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7593 7590 } else {
7594 7591 iovec_start = iovec = iovec_arr;
7595 7592 }
7596 7593
7597 7594 rsmpi_sg_io.iovec = iovec;
7598 7595 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7599 7596 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7600 7597 ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7601 7598
7602 7599 if (ex_seg == NULL) {
7603 7600 e = RSMERR_BAD_SGIO;
7604 7601 break;
7605 7602 }
7606 7603 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7607 7604
7608 7605 acl = ex_seg->s_acl;
7609 7606 if (acl[0].ae_permission == 0) {
7610 7607 struct buf *xbuf;
7611 7608 dev_t sdev = 0;
7612 7609
7613 7610 xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7614 7611 0, ex_seg->s_len, B_WRITE,
7615 7612 sdev, 0, NULL, DDI_UMEM_SLEEP);
7616 7613
7617 7614 ASSERT(xbuf != NULL);
7618 7615
7619 7616 iovec->local_mem.ms_type = RSM_MEM_BUF;
7620 7617 iovec->local_mem.ms_memory.bp = xbuf;
7621 7618 } else {
7622 7619 iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7623 7620 iovec->local_mem.ms_memory.handle =
7624 7621 ex_seg->s_handle.out;
7625 7622 }
7626 7623 ex_seg->s_rdmacnt++; /* refcnt the handle */
7627 7624 rsmseglock_release(ex_seg);
7628 7625 } else {
7629 7626 iovec->local_mem.ms_type = RSM_MEM_VADDR;
7630 7627 iovec->local_mem.ms_memory.vr.vaddr =
7631 7628 ka_iovec->local.vaddr;
7632 7629 }
7633 7630
7634 7631 iovec->local_offset = ka_iovec->local_offset;
7635 7632 iovec->remote_handle = im_seg->s_handle.in;
7636 7633 iovec->remote_offset = ka_iovec->remote_offset;
7637 7634 iovec->transfer_length = ka_iovec->transfer_len;
7638 7635 iovec++;
7639 7636 ka_iovec++;
7640 7637 }
7641 7638
7642 7639 if (iov_proc < sg_io.io_request_count) {
7643 7640 /* error while processing handle */
7644 7641 rsmseglock_acquire(im_seg);
7645 7642 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */
7646 7643 if (im_seg->s_rdmacnt == 0) {
7647 7644 cv_broadcast(&im_seg->s_cv);
7648 7645 }
7649 7646 rsmseglock_release(im_seg);
7650 7647 goto out;
7651 7648 }
7652 7649
7653 7650 /* call rsmpi */
7654 7651 if (cmd == RSM_IOCTL_PUTV)
7655 7652 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7656 7653 im_seg->s_adapter->rsmpi_handle,
7657 7654 &rsmpi_sg_io);
7658 7655 else if (cmd == RSM_IOCTL_GETV)
7659 7656 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7660 7657 im_seg->s_adapter->rsmpi_handle,
7661 7658 &rsmpi_sg_io);
7662 7659 else {
7663 7660 e = EINVAL;
7664 7661 DBG_PRINTF((category, RSM_DEBUG,
7665 7662 "iovec_ioctl: bad command = %x\n", cmd));
7666 7663 }
7667 7664
7668 7665
7669 7666 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7670 7667 "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7671 7668
7672 7669 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7673 7670
7674 7671 /*
7675 7672 * Check for implicit signal post flag and do the signal
7676 7673 * post if needed
7677 7674 */
7678 7675 if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7679 7676 e == RSM_SUCCESS) {
7680 7677 rsmipc_request_t request;
7681 7678
7682 7679 request.rsmipc_key = im_seg->s_segid;
7683 7680 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7684 7681 request.rsmipc_segment_cookie = NULL;
7685 7682 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7686 7683 /*
7687 7684 * Reset the implicit signal post flag to 0 to indicate
7688 7685 * that the signal post has been done and need not be
7689 7686 * done in the RSMAPI library
7690 7687 */
7691 7688 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7692 7689 }
7693 7690
7694 7691 rsmseglock_acquire(im_seg);
7695 7692 im_seg->s_rdmacnt--;
7696 7693 if (im_seg->s_rdmacnt == 0) {
7697 7694 cv_broadcast(&im_seg->s_cv);
7698 7695 }
7699 7696 rsmseglock_release(im_seg);
7700 7697 error = sgio_resid_copyout(arg, &sg_io, mode);
7701 7698 out:
7702 7699 iovec = iovec_start;
7703 7700 ka_iovec = ka_iovec_start;
7704 7701 for (i = 0; i < iov_proc; i++) {
7705 7702 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7706 7703 ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7707 7704
7708 7705 ASSERT(ex_seg != NULL);
7709 7706 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7710 7707
7711 7708 ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7712 7709 if (ex_seg->s_rdmacnt == 0) {
7713 7710 cv_broadcast(&ex_seg->s_cv);
7714 7711 }
7715 7712 rsmseglock_release(ex_seg);
7716 7713 }
7717 7714
7718 7715 ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7719 7716
7720 7717 /*
7721 7718 * At present there is no dependency on the existence of xbufs
7722 7719 * created by ddi_umem_iosetup for each of the iovecs. So we
7723 7720 * can these xbufs here.
7724 7721 */
7725 7722 if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7726 7723 freerbuf(iovec->local_mem.ms_memory.bp);
7727 7724 }
7728 7725
7729 7726 iovec++;
7730 7727 ka_iovec++;
7731 7728 }
7732 7729
7733 7730 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7734 7731 if (iovec_start)
7735 7732 kmem_free(iovec_start, size);
7736 7733 kmem_free(ka_iovec_start, ka_size);
7737 7734 }
7738 7735
7739 7736 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7740 7737 "rsm_iovec_ioctl done %d\n", e));
7741 7738 /* if RSMPI call fails return that else return copyout's retval */
7742 7739 return ((e != RSM_SUCCESS) ? e : error);
7743 7740
7744 7741 }
7745 7742
7746 7743
7747 7744 static int
7748 7745 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7749 7746 {
7750 7747 adapter_t *adapter;
7751 7748 rsm_addr_t addr;
7752 7749 rsm_node_id_t node;
7753 7750 int rval = DDI_SUCCESS;
7754 7751 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7755 7752
7756 7753 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7757 7754
7758 7755 adapter = rsm_getadapter(msg, mode);
7759 7756 if (adapter == NULL) {
7760 7757 DBG_PRINTF((category, RSM_DEBUG,
7761 7758 "rsmaddr_ioctl done: adapter not found\n"));
7762 7759 return (RSMERR_CTLR_NOT_PRESENT);
7763 7760 }
7764 7761
7765 7762 switch (cmd) {
7766 7763 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7767 7764 /* returns the hwaddr in msg->hwaddr */
7768 7765 if (msg->nodeid == my_nodeid) {
7769 7766 msg->hwaddr = adapter->hwaddr;
7770 7767 } else {
7771 7768 addr = get_remote_hwaddr(adapter, msg->nodeid);
7772 7769 if ((int64_t)addr < 0) {
7773 7770 rval = RSMERR_INTERNAL_ERROR;
7774 7771 } else {
7775 7772 msg->hwaddr = addr;
7776 7773 }
7777 7774 }
7778 7775 break;
7779 7776 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7780 7777 /* returns the nodeid in msg->nodeid */
7781 7778 if (msg->hwaddr == adapter->hwaddr) {
7782 7779 msg->nodeid = my_nodeid;
7783 7780 } else {
7784 7781 node = get_remote_nodeid(adapter, msg->hwaddr);
7785 7782 if ((int)node < 0) {
7786 7783 rval = RSMERR_INTERNAL_ERROR;
7787 7784 } else {
7788 7785 msg->nodeid = (rsm_node_id_t)node;
7789 7786 }
7790 7787 }
7791 7788 break;
7792 7789 default:
7793 7790 rval = EINVAL;
7794 7791 break;
7795 7792 }
7796 7793
7797 7794 rsmka_release_adapter(adapter);
7798 7795 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7799 7796 "rsmaddr_ioctl done: %d\n", rval));
7800 7797 return (rval);
7801 7798 }
7802 7799
7803 7800 static int
7804 7801 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7805 7802 {
7806 7803 DBG_DEFINE(category,
7807 7804 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7808 7805
7809 7806 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7810 7807
7811 7808 #ifdef _MULTI_DATAMODEL
7812 7809
7813 7810 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7814 7811 rsm_ioctlmsg32_t msg32;
7815 7812 int i;
7816 7813
7817 7814 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7818 7815 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7819 7816 "rsm_ddi_copyin done: EFAULT\n"));
7820 7817 return (RSMERR_BAD_ADDR);
7821 7818 }
7822 7819 msg->len = msg32.len;
7823 7820 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7824 7821 msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7825 7822 msg->key = msg32.key;
7826 7823 msg->acl_len = msg32.acl_len;
7827 7824 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7828 7825 msg->cnum = msg32.cnum;
7829 7826 msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7830 7827 msg->cname_len = msg32.cname_len;
7831 7828 msg->nodeid = msg32.nodeid;
7832 7829 msg->hwaddr = msg32.hwaddr;
7833 7830 msg->perm = msg32.perm;
7834 7831 for (i = 0; i < 4; i++) {
7835 7832 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7836 7833 }
7837 7834 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7838 7835 "rsm_ddi_copyin done\n"));
7839 7836 return (RSM_SUCCESS);
7840 7837 }
7841 7838 #endif
7842 7839 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7843 7840 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7844 7841 return (RSMERR_BAD_ADDR);
7845 7842 else
7846 7843 return (RSM_SUCCESS);
7847 7844 }
7848 7845
7849 7846 static int
7850 7847 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7851 7848 {
7852 7849 rsmka_int_controller_attr_t rsm_cattr;
7853 7850 DBG_DEFINE(category,
7854 7851 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7855 7852
7856 7853 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7857 7854 "rsmattr_ddi_copyout enter\n"));
7858 7855 /*
7859 7856 * need to copy appropriate data from rsm_controller_attr_t
7860 7857 * to rsmka_int_controller_attr_t
7861 7858 */
7862 7859 #ifdef _MULTI_DATAMODEL
7863 7860 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7864 7861 rsmka_int_controller_attr32_t rsm_cattr32;
7865 7862
7866 7863 rsm_cattr32.attr_direct_access_sizes =
7867 7864 adapter->rsm_attr.attr_direct_access_sizes;
7868 7865 rsm_cattr32.attr_atomic_sizes =
7869 7866 adapter->rsm_attr.attr_atomic_sizes;
7870 7867 rsm_cattr32.attr_page_size =
7871 7868 adapter->rsm_attr.attr_page_size;
7872 7869 if (adapter->rsm_attr.attr_max_export_segment_size >
7873 7870 UINT_MAX)
7874 7871 rsm_cattr32.attr_max_export_segment_size =
7875 7872 RSM_MAXSZ_PAGE_ALIGNED;
7876 7873 else
7877 7874 rsm_cattr32.attr_max_export_segment_size =
7878 7875 adapter->rsm_attr.attr_max_export_segment_size;
7879 7876 if (adapter->rsm_attr.attr_tot_export_segment_size >
7880 7877 UINT_MAX)
7881 7878 rsm_cattr32.attr_tot_export_segment_size =
7882 7879 RSM_MAXSZ_PAGE_ALIGNED;
7883 7880 else
7884 7881 rsm_cattr32.attr_tot_export_segment_size =
7885 7882 adapter->rsm_attr.attr_tot_export_segment_size;
7886 7883 if (adapter->rsm_attr.attr_max_export_segments >
7887 7884 UINT_MAX)
7888 7885 rsm_cattr32.attr_max_export_segments =
7889 7886 UINT_MAX;
7890 7887 else
7891 7888 rsm_cattr32.attr_max_export_segments =
7892 7889 adapter->rsm_attr.attr_max_export_segments;
7893 7890 if (adapter->rsm_attr.attr_max_import_map_size >
7894 7891 UINT_MAX)
7895 7892 rsm_cattr32.attr_max_import_map_size =
7896 7893 RSM_MAXSZ_PAGE_ALIGNED;
7897 7894 else
7898 7895 rsm_cattr32.attr_max_import_map_size =
7899 7896 adapter->rsm_attr.attr_max_import_map_size;
7900 7897 if (adapter->rsm_attr.attr_tot_import_map_size >
7901 7898 UINT_MAX)
7902 7899 rsm_cattr32.attr_tot_import_map_size =
7903 7900 RSM_MAXSZ_PAGE_ALIGNED;
7904 7901 else
7905 7902 rsm_cattr32.attr_tot_import_map_size =
7906 7903 adapter->rsm_attr.attr_tot_import_map_size;
7907 7904 if (adapter->rsm_attr.attr_max_import_segments >
7908 7905 UINT_MAX)
7909 7906 rsm_cattr32.attr_max_import_segments =
7910 7907 UINT_MAX;
7911 7908 else
7912 7909 rsm_cattr32.attr_max_import_segments =
7913 7910 adapter->rsm_attr.attr_max_import_segments;
7914 7911 rsm_cattr32.attr_controller_addr =
7915 7912 adapter->rsm_attr.attr_controller_addr;
7916 7913
7917 7914 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7918 7915 "rsmattr_ddi_copyout done\n"));
7919 7916 if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7920 7917 sizeof (rsmka_int_controller_attr32_t), mode)) {
7921 7918 return (RSMERR_BAD_ADDR);
7922 7919 }
7923 7920 else
7924 7921 return (RSM_SUCCESS);
7925 7922 }
7926 7923 #endif
7927 7924 rsm_cattr.attr_direct_access_sizes =
7928 7925 adapter->rsm_attr.attr_direct_access_sizes;
7929 7926 rsm_cattr.attr_atomic_sizes =
7930 7927 adapter->rsm_attr.attr_atomic_sizes;
7931 7928 rsm_cattr.attr_page_size =
7932 7929 adapter->rsm_attr.attr_page_size;
7933 7930 rsm_cattr.attr_max_export_segment_size =
7934 7931 adapter->rsm_attr.attr_max_export_segment_size;
7935 7932 rsm_cattr.attr_tot_export_segment_size =
7936 7933 adapter->rsm_attr.attr_tot_export_segment_size;
7937 7934 rsm_cattr.attr_max_export_segments =
7938 7935 adapter->rsm_attr.attr_max_export_segments;
7939 7936 rsm_cattr.attr_max_import_map_size =
7940 7937 adapter->rsm_attr.attr_max_import_map_size;
7941 7938 rsm_cattr.attr_tot_import_map_size =
7942 7939 adapter->rsm_attr.attr_tot_import_map_size;
7943 7940 rsm_cattr.attr_max_import_segments =
7944 7941 adapter->rsm_attr.attr_max_import_segments;
7945 7942 rsm_cattr.attr_controller_addr =
7946 7943 adapter->rsm_attr.attr_controller_addr;
7947 7944 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7948 7945 "rsmattr_ddi_copyout done\n"));
7949 7946 if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7950 7947 sizeof (rsmka_int_controller_attr_t), mode)) {
7951 7948 return (RSMERR_BAD_ADDR);
7952 7949 }
7953 7950 else
7954 7951 return (RSM_SUCCESS);
7955 7952 }
7956 7953
7957 7954 /*ARGSUSED*/
7958 7955 static int
7959 7956 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7960 7957 int *rvalp)
7961 7958 {
7962 7959 rsmseg_t *seg;
7963 7960 rsmresource_t *res;
7964 7961 minor_t rnum;
7965 7962 rsm_ioctlmsg_t msg = {0};
7966 7963 int error;
7967 7964 adapter_t *adapter;
7968 7965 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7969 7966
7970 7967 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7971 7968
7972 7969 if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7973 7970 error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7974 7971 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7975 7972 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7976 7973 return (error);
7977 7974 }
7978 7975
7979 7976 /* topology cmd does not use the arg common to other cmds */
7980 7977 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7981 7978 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7982 7979 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7983 7980 "rsm_ioctl done: %d\n", error));
7984 7981 return (error);
7985 7982 }
7986 7983
7987 7984 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7988 7985 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7989 7986 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7990 7987 "rsm_ioctl done: %d\n", error));
7991 7988 return (error);
7992 7989 }
7993 7990
7994 7991 /*
7995 7992 * try to load arguments
7996 7993 */
7997 7994 if (cmd != RSM_IOCTL_RING_BELL &&
7998 7995 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
7999 7996 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8000 7997 "rsm_ioctl done: EFAULT\n"));
8001 7998 return (RSMERR_BAD_ADDR);
8002 7999 }
8003 8000
8004 8001 if (cmd == RSM_IOCTL_ATTR) {
8005 8002 adapter = rsm_getadapter(&msg, mode);
8006 8003 if (adapter == NULL) {
8007 8004 DBG_PRINTF((category, RSM_DEBUG,
8008 8005 "rsm_ioctl done: ENODEV\n"));
8009 8006 return (RSMERR_CTLR_NOT_PRESENT);
8010 8007 }
8011 8008 error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8012 8009 rsmka_release_adapter(adapter);
8013 8010 DBG_PRINTF((category, RSM_DEBUG,
8014 8011 "rsm_ioctl:after copyout %d\n", error));
8015 8012 return (error);
8016 8013 }
8017 8014
8018 8015 if (cmd == RSM_IOCTL_BAR_INFO) {
8019 8016 /* Return library off,len of barrier page */
8020 8017 msg.off = barrier_offset;
8021 8018 msg.len = (int)barrier_size;
8022 8019 #ifdef _MULTI_DATAMODEL
8023 8020 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8024 8021 rsm_ioctlmsg32_t msg32;
8025 8022
8026 8023 if (msg.len > UINT_MAX)
8027 8024 msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8028 8025 else
8029 8026 msg32.len = (int32_t)msg.len;
8030 8027 msg32.off = (int32_t)msg.off;
8031 8028 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8032 8029 "rsm_ioctl done\n"));
8033 8030 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8034 8031 sizeof (msg32), mode))
8035 8032 return (RSMERR_BAD_ADDR);
8036 8033 else
8037 8034 return (RSM_SUCCESS);
8038 8035 }
8039 8036 #endif
8040 8037 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8041 8038 "rsm_ioctl done\n"));
8042 8039 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8043 8040 sizeof (msg), mode))
8044 8041 return (RSMERR_BAD_ADDR);
8045 8042 else
8046 8043 return (RSM_SUCCESS);
8047 8044 }
8048 8045
8049 8046 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8050 8047 /* map the nodeid or hwaddr */
8051 8048 error = rsmaddr_ioctl(cmd, &msg, mode);
8052 8049 if (error == RSM_SUCCESS) {
8053 8050 #ifdef _MULTI_DATAMODEL
8054 8051 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8055 8052 rsm_ioctlmsg32_t msg32;
8056 8053
8057 8054 msg32.hwaddr = (uint64_t)msg.hwaddr;
8058 8055 msg32.nodeid = (uint32_t)msg.nodeid;
8059 8056
8060 8057 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8061 8058 "rsm_ioctl done\n"));
8062 8059 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8063 8060 sizeof (msg32), mode))
8064 8061 return (RSMERR_BAD_ADDR);
8065 8062 else
8066 8063 return (RSM_SUCCESS);
8067 8064 }
8068 8065 #endif
8069 8066 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8070 8067 "rsm_ioctl done\n"));
8071 8068 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8072 8069 sizeof (msg), mode))
8073 8070 return (RSMERR_BAD_ADDR);
8074 8071 else
8075 8072 return (RSM_SUCCESS);
8076 8073 }
8077 8074 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8078 8075 "rsm_ioctl done: %d\n", error));
8079 8076 return (error);
8080 8077 }
8081 8078
8082 8079 /* Find resource and look it in read mode */
8083 8080 rnum = getminor(dev);
8084 8081 res = rsmresource_lookup(rnum, RSM_NOLOCK);
8085 8082 ASSERT(res != NULL);
8086 8083
8087 8084 /*
8088 8085 * Find command group
8089 8086 */
8090 8087 switch (RSM_IOCTL_CMDGRP(cmd)) {
8091 8088 case RSM_IOCTL_EXPORT_SEG:
8092 8089 /*
8093 8090 * Export list is searched during publish, loopback and
8094 8091 * remote lookup call.
8095 8092 */
8096 8093 seg = rsmresource_seg(res, rnum, credp,
8097 8094 RSM_RESOURCE_EXPORT_SEGMENT);
8098 8095 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8099 8096 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8100 8097 credp);
8101 8098 } else { /* export ioctl on an import/barrier resource */
8102 8099 error = RSMERR_BAD_SEG_HNDL;
8103 8100 }
8104 8101 break;
8105 8102 case RSM_IOCTL_IMPORT_SEG:
8106 8103 /* Import list is searched during remote unmap call. */
8107 8104 seg = rsmresource_seg(res, rnum, credp,
8108 8105 RSM_RESOURCE_IMPORT_SEGMENT);
8109 8106 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8110 8107 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8111 8108 credp);
8112 8109 } else { /* import ioctl on an export/barrier resource */
8113 8110 error = RSMERR_BAD_SEG_HNDL;
8114 8111 }
8115 8112 break;
8116 8113 case RSM_IOCTL_BAR:
8117 8114 if (res != RSMRC_RESERVED &&
8118 8115 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8119 8116 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8120 8117 mode);
8121 8118 } else { /* invalid res value */
8122 8119 error = RSMERR_BAD_SEG_HNDL;
8123 8120 }
8124 8121 break;
8125 8122 case RSM_IOCTL_BELL:
8126 8123 if (res != RSMRC_RESERVED) {
8127 8124 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8128 8125 error = exportbell_ioctl((rsmseg_t *)res, cmd);
8129 8126 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8130 8127 error = importbell_ioctl((rsmseg_t *)res, cmd);
8131 8128 else /* RSM_RESOURCE_BAR */
8132 8129 error = RSMERR_BAD_SEG_HNDL;
8133 8130 } else { /* invalid res value */
8134 8131 error = RSMERR_BAD_SEG_HNDL;
8135 8132 }
8136 8133 break;
8137 8134 default:
8138 8135 error = EINVAL;
8139 8136 }
8140 8137
8141 8138 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8142 8139 error));
8143 8140 return (error);
8144 8141 }
8145 8142
8146 8143
8147 8144 /* **************************** Segment Mapping Operations ********* */
8148 8145 static rsm_mapinfo_t *
8149 8146 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8150 8147 size_t *map_len)
8151 8148 {
8152 8149 rsm_mapinfo_t *p;
8153 8150 /*
8154 8151 * Find the correct mapinfo structure to use during the mapping
8155 8152 * from the seg->s_mapinfo list.
8156 8153 * The seg->s_mapinfo list contains in reverse order the mappings
8157 8154 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8158 8155 * access the correct entry within this list for the mapping
8159 8156 * requested.
8160 8157 *
8161 8158 * The algorithm for selecting a list entry is as follows:
8162 8159 *
8163 8160 * When start_offset of an entry <= off we have found the entry
8164 8161 * we were looking for. Adjust the dev_offset and map_len (needs
8165 8162 * to be PAGESIZE aligned).
8166 8163 */
8167 8164 p = seg->s_mapinfo;
8168 8165 for (; p; p = p->next) {
8169 8166 if (p->start_offset <= off) {
8170 8167 *dev_offset = p->dev_offset + off - p->start_offset;
8171 8168 *map_len = (len > p->individual_len) ?
8172 8169 p->individual_len : ptob(btopr(len));
8173 8170 return (p);
8174 8171 }
8175 8172 p = p->next;
8176 8173 }
8177 8174
8178 8175 return (NULL);
8179 8176 }
8180 8177
8181 8178 static void
8182 8179 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo)
8183 8180 {
8184 8181 rsm_mapinfo_t *p;
8185 8182
8186 8183 while (mapinfo != NULL) {
8187 8184 p = mapinfo;
8188 8185 mapinfo = mapinfo->next;
8189 8186 kmem_free(p, sizeof (*p));
8190 8187 }
8191 8188 }
8192 8189
8193 8190 static int
8194 8191 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8195 8192 size_t len, void **pvtp)
8196 8193 {
8197 8194 rsmcookie_t *p;
8198 8195 rsmresource_t *res;
8199 8196 rsmseg_t *seg;
8200 8197 minor_t rnum;
8201 8198 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8202 8199
8203 8200 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8204 8201
8205 8202 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8206 8203 "rsmmap_map: dhp = %x\n", dhp));
8207 8204
8208 8205 flags = flags;
8209 8206
8210 8207 rnum = getminor(dev);
8211 8208 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8212 8209 ASSERT(res != NULL);
8213 8210
8214 8211 seg = (rsmseg_t *)res;
8215 8212
8216 8213 rsmseglock_acquire(seg);
8217 8214
8218 8215 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8219 8216
8220 8217 /*
8221 8218 * Allocate structure and add cookie to segment list
8222 8219 */
8223 8220 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8224 8221
8225 8222 p->c_dhp = dhp;
8226 8223 p->c_off = off;
8227 8224 p->c_len = len;
8228 8225 p->c_next = seg->s_ckl;
8229 8226 seg->s_ckl = p;
8230 8227
8231 8228 *pvtp = (void *)seg;
8232 8229
8233 8230 rsmseglock_release(seg);
8234 8231
8235 8232 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8236 8233 return (DDI_SUCCESS);
8237 8234 }
8238 8235
8239 8236 /*
8240 8237 * Page fault handling is done here. The prerequisite mapping setup
8241 8238 * has been done in rsm_devmap with calls to ddi_devmem_setup or
8242 8239 * ddi_umem_setup
8243 8240 */
8244 8241 static int
8245 8242 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8246 8243 uint_t type, uint_t rw)
8247 8244 {
8248 8245 int e;
8249 8246 rsmseg_t *seg = (rsmseg_t *)pvt;
8250 8247 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8251 8248
8252 8249 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8253 8250
8254 8251 rsmseglock_acquire(seg);
8255 8252
8256 8253 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8257 8254
8258 8255 while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8259 8256 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8260 8257 DBG_PRINTF((category, RSM_DEBUG,
8261 8258 "rsmmap_access done: cv_wait INTR"));
8262 8259 rsmseglock_release(seg);
8263 8260 return (RSMERR_INTERRUPTED);
8264 8261 }
8265 8262 }
8266 8263
8267 8264 ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8268 8265 seg->s_state == RSM_STATE_ACTIVE);
8269 8266
8270 8267 if (seg->s_state == RSM_STATE_DISCONNECT)
8271 8268 seg->s_flags |= RSM_IMPORT_DUMMY;
8272 8269
8273 8270 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8274 8271 "rsmmap_access: dhp = %x\n", dhp));
8275 8272
8276 8273 rsmseglock_release(seg);
8277 8274
8278 8275 if (e = devmap_load(dhp, offset, len, type, rw)) {
8279 8276 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
8280 8277 }
8281 8278
8282 8279
8283 8280 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8284 8281
8285 8282 return (e);
8286 8283 }
8287 8284
8288 8285 static int
8289 8286 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8290 8287 void **newpvt)
8291 8288 {
8292 8289 rsmseg_t *seg = (rsmseg_t *)oldpvt;
8293 8290 rsmcookie_t *p, *old;
8294 8291 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8295 8292
8296 8293 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8297 8294
8298 8295 /*
8299 8296 * Same as map, create an entry to hold cookie and add it to
8300 8297 * connect segment list. The oldpvt is a pointer to segment.
8301 8298 * Return segment pointer in newpvt.
8302 8299 */
8303 8300 rsmseglock_acquire(seg);
8304 8301
8305 8302 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8306 8303
8307 8304 /*
8308 8305 * Find old cookie
8309 8306 */
8310 8307 for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8311 8308 if (old->c_dhp == dhp) {
8312 8309 break;
8313 8310 }
8314 8311 }
8315 8312 if (old == NULL) {
8316 8313 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8317 8314 "rsmmap_dup done: EINVAL\n"));
8318 8315 rsmseglock_release(seg);
8319 8316 return (EINVAL);
8320 8317 }
8321 8318
8322 8319 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8323 8320
8324 8321 p->c_dhp = new_dhp;
8325 8322 p->c_off = old->c_off;
8326 8323 p->c_len = old->c_len;
8327 8324 p->c_next = seg->s_ckl;
8328 8325 seg->s_ckl = p;
8329 8326
8330 8327 *newpvt = (void *)seg;
8331 8328
8332 8329 rsmseglock_release(seg);
8333 8330
8334 8331 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8335 8332
8336 8333 return (DDI_SUCCESS);
8337 8334 }
8338 8335
8339 8336 static void
8340 8337 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8341 8338 devmap_cookie_t new_dhp1, void **pvtp1,
8342 8339 devmap_cookie_t new_dhp2, void **pvtp2)
8343 8340 {
8344 8341 /*
8345 8342 * Remove pvtp structure from segment list.
8346 8343 */
8347 8344 rsmseg_t *seg = (rsmseg_t *)pvtp;
8348 8345 int freeflag;
8349 8346
8350 8347 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8351 8348
8352 8349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8353 8350
8354 8351 off = off; len = len;
8355 8352 pvtp1 = pvtp1; pvtp2 = pvtp2;
8356 8353
8357 8354 rsmseglock_acquire(seg);
8358 8355
8359 8356 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8360 8357
8361 8358 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8362 8359 "rsmmap_unmap: dhp = %x\n", dhp));
8363 8360 /*
8364 8361 * We can go ahead and remove the dhps even if we are in
8365 8362 * the MAPPING state because the dhps being removed here
8366 8363 * belong to a different mmap and we are holding the segment
8367 8364 * lock.
8368 8365 */
8369 8366 if (new_dhp1 == NULL && new_dhp2 == NULL) {
8370 8367 /* find and remove dhp handle */
8371 8368 rsmcookie_t *tmp, **back = &seg->s_ckl;
8372 8369
8373 8370 while (*back != NULL) {
8374 8371 tmp = *back;
8375 8372 if (tmp->c_dhp == dhp) {
8376 8373 *back = tmp->c_next;
8377 8374 kmem_free(tmp, sizeof (*tmp));
8378 8375 break;
8379 8376 }
8380 8377 back = &tmp->c_next;
8381 8378 }
8382 8379 } else {
8383 8380 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8384 8381 "rsmmap_unmap:parital unmap"
8385 8382 "new_dhp1 %lx, new_dhp2 %lx\n",
8386 8383 (size_t)new_dhp1, (size_t)new_dhp2));
8387 8384 }
8388 8385
8389 8386 /*
8390 8387 * rsmmap_unmap is called for each mapping cookie on the list.
8391 8388 * When the list becomes empty and we are not in the MAPPING
8392 8389 * state then unmap in the rsmpi driver.
8393 8390 */
8394 8391 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8395 8392 (void) rsm_unmap(seg);
8396 8393
8397 8394 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8398 8395 freeflag = 1;
8399 8396 } else {
8400 8397 freeflag = 0;
8401 8398 }
8402 8399
8403 8400 rsmseglock_release(seg);
8404 8401
8405 8402 if (freeflag) {
8406 8403 /* Free the segment structure */
8407 8404 rsmseg_free(seg);
8408 8405 }
8409 8406 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8410 8407
8411 8408 }
8412 8409
8413 8410 static struct devmap_callback_ctl rsmmap_ops = {
8414 8411 DEVMAP_OPS_REV, /* devmap_ops version number */
8415 8412 rsmmap_map, /* devmap_ops map routine */
8416 8413 rsmmap_access, /* devmap_ops access routine */
8417 8414 rsmmap_dup, /* devmap_ops dup routine */
8418 8415 rsmmap_unmap, /* devmap_ops unmap routine */
8419 8416 };
8420 8417
8421 8418 static int
8422 8419 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8423 8420 size_t *maplen, uint_t model /*ARGSUSED*/)
8424 8421 {
8425 8422 struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8426 8423 int err;
8427 8424 uint_t maxprot;
8428 8425 minor_t rnum;
8429 8426 rsmseg_t *seg;
8430 8427 off_t dev_offset;
8431 8428 size_t cur_len;
8432 8429 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8433 8430
8434 8431 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8435 8432
8436 8433 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8437 8434 "rsm_devmap: off = %lx, len = %lx\n", off, len));
8438 8435 rnum = getminor(dev);
8439 8436 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8440 8437 ASSERT(seg != NULL);
8441 8438
8442 8439 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8443 8440 if ((off == barrier_offset) &&
8444 8441 (len == barrier_size)) {
8445 8442
8446 8443 ASSERT(bar_va != NULL && bar_cookie != NULL);
8447 8444
8448 8445 /*
8449 8446 * The offset argument in devmap_umem_setup represents
8450 8447 * the offset within the kernel memory defined by the
8451 8448 * cookie. We use this offset as barrier_offset.
8452 8449 */
8453 8450 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8454 8451 barrier_offset, len, PROT_USER|PROT_READ,
8455 8452 DEVMAP_DEFAULTS, 0);
8456 8453
8457 8454 if (err != 0) {
8458 8455 DBG_PRINTF((category, RSM_ERR,
8459 8456 "rsm_devmap done: %d\n", err));
8460 8457 return (RSMERR_MAP_FAILED);
8461 8458 }
8462 8459 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8463 8460 "rsm_devmap done: %d\n", err));
8464 8461
8465 8462 *maplen = barrier_size;
8466 8463
8467 8464 return (err);
8468 8465 } else {
8469 8466 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8470 8467 "rsm_devmap done: %d\n", err));
8471 8468 return (RSMERR_MAP_FAILED);
8472 8469 }
8473 8470 }
8474 8471
8475 8472 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8476 8473 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8477 8474
8478 8475 /*
8479 8476 * Make sure we still have permission for the map operation.
8480 8477 */
8481 8478 maxprot = PROT_USER;
8482 8479 if (seg->s_mode & RSM_PERM_READ) {
8483 8480 maxprot |= PROT_READ;
8484 8481 }
8485 8482
8486 8483 if (seg->s_mode & RSM_PERM_WRITE) {
8487 8484 maxprot |= PROT_WRITE;
8488 8485 }
8489 8486
8490 8487 /*
8491 8488 * For each devmap call, rsmmap_map is called. This maintains driver
8492 8489 * private information for the mapping. Thus, if there are multiple
8493 8490 * devmap calls there will be multiple rsmmap_map calls and for each
8494 8491 * call, the mapping information will be stored.
8495 8492 * In case of an error during the processing of the devmap call, error
8496 8493 * will be returned. This error return causes the caller of rsm_devmap
8497 8494 * to undo all the mappings by calling rsmmap_unmap for each one.
8498 8495 * rsmmap_unmap will free up the private information for the requested
8499 8496 * mapping.
8500 8497 */
8501 8498 if (seg->s_node != my_nodeid) {
8502 8499 rsm_mapinfo_t *p;
8503 8500
8504 8501 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8505 8502 if (p == NULL) {
8506 8503 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8507 8504 "rsm_devmap: incorrect mapping info\n"));
8508 8505 return (RSMERR_MAP_FAILED);
8509 8506 }
8510 8507 err = devmap_devmem_setup(dhc, p->dip,
8511 8508 callbackops, p->dev_register,
8512 8509 dev_offset, cur_len, maxprot,
8513 8510 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8514 8511
8515 8512 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8516 8513 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8517 8514 "off=%lx,len=%lx\n",
8518 8515 p->dip, p->dev_register, dev_offset, off, cur_len));
8519 8516
8520 8517 if (err != 0) {
8521 8518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8522 8519 "rsm_devmap: devmap_devmem_setup failed %d\n",
8523 8520 err));
8524 8521 return (RSMERR_MAP_FAILED);
8525 8522 }
8526 8523 /* cur_len is always an integral multiple pagesize */
8527 8524 ASSERT((cur_len & (PAGESIZE-1)) == 0);
8528 8525 *maplen = cur_len;
8529 8526 return (err);
8530 8527
8531 8528 } else {
8532 8529 err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8533 8530 seg->s_cookie, off, len, maxprot,
8534 8531 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8535 8532 if (err != 0) {
8536 8533 DBG_PRINTF((category, RSM_DEBUG,
8537 8534 "rsm_devmap: devmap_umem_setup failed %d\n",
8538 8535 err));
8539 8536 return (RSMERR_MAP_FAILED);
8540 8537 }
8541 8538 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8542 8539 "rsm_devmap: loopback done\n"));
8543 8540
8544 8541 *maplen = ptob(btopr(len));
8545 8542
8546 8543 return (err);
8547 8544 }
8548 8545 }
8549 8546
8550 8547 /*
8551 8548 * We can use the devmap framework for mapping device memory to user space by
8552 8549 * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8553 8550 * processing calls this entry point and devmap_setup is called within this
8554 8551 * function, which eventually calls rsm_devmap
8555 8552 */
8556 8553 static int
8557 8554 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8558 8555 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8559 8556 {
8560 8557 int error = 0;
8561 8558 int old_state;
8562 8559 minor_t rnum;
8563 8560 rsmseg_t *seg, *eseg;
8564 8561 adapter_t *adapter;
8565 8562 rsm_import_share_t *sharedp;
8566 8563 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8567 8564
8568 8565 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8569 8566
8570 8567 /*
8571 8568 * find segment
8572 8569 */
8573 8570 rnum = getminor(dev);
8574 8571 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8575 8572
8576 8573 if (seg == NULL) {
8577 8574 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8578 8575 "rsm_segmap done: invalid segment\n"));
8579 8576 return (EINVAL);
8580 8577 }
8581 8578
8582 8579 /*
8583 8580 * the user is trying to map a resource that has not been
8584 8581 * defined yet. The library uses this to map in the
8585 8582 * barrier page.
8586 8583 */
8587 8584 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8588 8585 rsmseglock_release(seg);
8589 8586
8590 8587 /*
8591 8588 * The mapping for the barrier page is identified
8592 8589 * by the special offset barrier_offset
8593 8590 */
8594 8591
8595 8592 if (off == (off_t)barrier_offset ||
8596 8593 len == (off_t)barrier_size) {
8597 8594 if (bar_cookie == NULL || bar_va == NULL) {
8598 8595 DBG_PRINTF((category, RSM_DEBUG,
8599 8596 "rsm_segmap: bar cookie/va is NULL\n"));
8600 8597 return (EINVAL);
8601 8598 }
8602 8599
8603 8600 error = devmap_setup(dev, (offset_t)off, as, addrp,
8604 8601 (size_t)len, prot, maxprot, flags, cred);
8605 8602
8606 8603 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8607 8604 "rsm_segmap done: %d\n", error));
8608 8605 return (error);
8609 8606 } else {
8610 8607 DBG_PRINTF((category, RSM_DEBUG,
8611 8608 "rsm_segmap: bad offset/length\n"));
8612 8609 return (EINVAL);
8613 8610 }
8614 8611 }
8615 8612
8616 8613 /* Make sure you can only map imported segments */
8617 8614 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8618 8615 rsmseglock_release(seg);
8619 8616 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8620 8617 "rsm_segmap done: not an import segment\n"));
8621 8618 return (EINVAL);
8622 8619 }
8623 8620 /* check means library is broken */
8624 8621 ASSERT(seg->s_hdr.rsmrc_num == rnum);
8625 8622
8626 8623 /* wait for the segment to become unquiesced */
8627 8624 while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8628 8625 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8629 8626 rsmseglock_release(seg);
8630 8627 DBG_PRINTF((category, RSM_DEBUG,
8631 8628 "rsm_segmap done: cv_wait INTR"));
8632 8629 return (ENODEV);
8633 8630 }
8634 8631 }
8635 8632
8636 8633 /* wait until segment leaves the mapping state */
8637 8634 while (seg->s_state == RSM_STATE_MAPPING)
8638 8635 cv_wait(&seg->s_cv, &seg->s_lock);
8639 8636
8640 8637 /*
8641 8638 * we allow multiple maps of the same segment in the KA
8642 8639 * and it works because we do an rsmpi map of the whole
8643 8640 * segment during the first map and all the device mapping
8644 8641 * information needed in rsm_devmap is in the mapinfo list.
8645 8642 */
8646 8643 if ((seg->s_state != RSM_STATE_CONNECT) &&
8647 8644 (seg->s_state != RSM_STATE_ACTIVE)) {
8648 8645 rsmseglock_release(seg);
8649 8646 DBG_PRINTF((category, RSM_DEBUG,
8650 8647 "rsm_segmap done: segment not connected\n"));
8651 8648 return (ENODEV);
8652 8649 }
8653 8650
8654 8651 /*
8655 8652 * Make sure we are not mapping a larger segment than what's
8656 8653 * exported
8657 8654 */
8658 8655 if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8659 8656 rsmseglock_release(seg);
8660 8657 DBG_PRINTF((category, RSM_DEBUG,
8661 8658 "rsm_segmap done: off+len>seg size\n"));
8662 8659 return (ENXIO);
8663 8660 }
8664 8661
8665 8662 /*
8666 8663 * Make sure we still have permission for the map operation.
8667 8664 */
8668 8665 maxprot = PROT_USER;
8669 8666 if (seg->s_mode & RSM_PERM_READ) {
8670 8667 maxprot |= PROT_READ;
8671 8668 }
8672 8669
8673 8670 if (seg->s_mode & RSM_PERM_WRITE) {
8674 8671 maxprot |= PROT_WRITE;
8675 8672 }
8676 8673
8677 8674 if ((prot & maxprot) != prot) {
8678 8675 /* No permission */
8679 8676 rsmseglock_release(seg);
8680 8677 DBG_PRINTF((category, RSM_DEBUG,
8681 8678 "rsm_segmap done: no permission\n"));
8682 8679 return (EACCES);
8683 8680 }
8684 8681
8685 8682 old_state = seg->s_state;
8686 8683
8687 8684 ASSERT(seg->s_share != NULL);
8688 8685
8689 8686 rsmsharelock_acquire(seg);
8690 8687
8691 8688 sharedp = seg->s_share;
8692 8689
8693 8690 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8694 8691 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8695 8692
8696 8693 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8697 8694 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8698 8695 rsmsharelock_release(seg);
8699 8696 rsmseglock_release(seg);
8700 8697 DBG_PRINTF((category, RSM_DEBUG,
8701 8698 "rsm_segmap done:RSMSI_STATE %d invalid\n",
8702 8699 sharedp->rsmsi_state));
8703 8700 return (ENODEV);
8704 8701 }
8705 8702
8706 8703 /*
8707 8704 * Do the map - since we want importers to share mappings
8708 8705 * we do the rsmpi map for the whole segment
8709 8706 */
8710 8707 if (seg->s_node != my_nodeid) {
8711 8708 uint_t dev_register;
8712 8709 off_t dev_offset;
8713 8710 dev_info_t *dip;
8714 8711 size_t tmp_len;
8715 8712 size_t total_length_mapped = 0;
8716 8713 size_t length_to_map = seg->s_len;
8717 8714 off_t tmp_off = 0;
8718 8715 rsm_mapinfo_t *p;
8719 8716
8720 8717 /*
8721 8718 * length_to_map = seg->s_len is always an integral
8722 8719 * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8723 8720 * list is a multiple of PAGESIZE - RSMPI map ensures this
8724 8721 */
8725 8722
8726 8723 adapter = seg->s_adapter;
8727 8724 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8728 8725 sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8729 8726
8730 8727 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8731 8728 error = 0;
8732 8729 /* map the whole segment */
8733 8730 while (total_length_mapped < seg->s_len) {
8734 8731 tmp_len = 0;
8735 8732
8736 8733 error = adapter->rsmpi_ops->rsm_map(
8737 8734 seg->s_handle.in, tmp_off,
8738 8735 length_to_map, &tmp_len,
8739 8736 &dip, &dev_register, &dev_offset,
8740 8737 NULL, NULL);
8741 8738
8742 8739 if (error != 0)
8743 8740 break;
8744 8741
8745 8742 /*
8746 8743 * Store the mapping info obtained from rsm_map
8747 8744 */
8748 8745 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8749 8746 p->dev_register = dev_register;
8750 8747 p->dev_offset = dev_offset;
8751 8748 p->dip = dip;
8752 8749 p->individual_len = tmp_len;
8753 8750 p->start_offset = tmp_off;
8754 8751 p->next = sharedp->rsmsi_mapinfo;
8755 8752 sharedp->rsmsi_mapinfo = p;
8756 8753
8757 8754 total_length_mapped += tmp_len;
8758 8755 length_to_map -= tmp_len;
8759 8756 tmp_off += tmp_len;
8760 8757 }
8761 8758 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8762 8759
8763 8760 if (error != RSM_SUCCESS) {
8764 8761 /* Check if this is the the first rsm_map */
8765 8762 if (sharedp->rsmsi_mapinfo != NULL) {
8766 8763 /*
8767 8764 * A single rsm_unmap undoes
8768 8765 * multiple rsm_maps.
8769 8766 */
8770 8767 (void) seg->s_adapter->rsmpi_ops->
8771 8768 rsm_unmap(sharedp->rsmsi_handle);
8772 8769 rsm_free_mapinfo(sharedp->
8773 8770 rsmsi_mapinfo);
8774 8771 }
8775 8772 sharedp->rsmsi_mapinfo = NULL;
8776 8773 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8777 8774 rsmsharelock_release(seg);
8778 8775 rsmseglock_release(seg);
8779 8776 DBG_PRINTF((category, RSM_DEBUG,
8780 8777 "rsm_segmap done: rsmpi map err %d\n",
8781 8778 error));
8782 8779 ASSERT(error != RSMERR_BAD_LENGTH &&
8783 8780 error != RSMERR_BAD_MEM_ALIGNMENT &&
8784 8781 error != RSMERR_BAD_SEG_HNDL);
8785 8782 if (error == RSMERR_UNSUPPORTED_OPERATION)
8786 8783 return (ENOTSUP);
8787 8784 else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8788 8785 return (EAGAIN);
8789 8786 else if (error == RSMERR_CONN_ABORTED)
8790 8787 return (ENODEV);
8791 8788 else
8792 8789 return (error);
8793 8790 } else {
8794 8791 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8795 8792 }
8796 8793 } else {
8797 8794 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8798 8795 }
8799 8796
8800 8797 sharedp->rsmsi_mapcnt++;
8801 8798
8802 8799 rsmsharelock_release(seg);
8803 8800
8804 8801 /* move to an intermediate mapping state */
8805 8802 seg->s_state = RSM_STATE_MAPPING;
8806 8803 rsmseglock_release(seg);
8807 8804
8808 8805 error = devmap_setup(dev, (offset_t)off, as, addrp,
8809 8806 len, prot, maxprot, flags, cred);
8810 8807
8811 8808 rsmseglock_acquire(seg);
8812 8809 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8813 8810
8814 8811 if (error == DDI_SUCCESS) {
8815 8812 seg->s_state = RSM_STATE_ACTIVE;
8816 8813 } else {
8817 8814 rsmsharelock_acquire(seg);
8818 8815
8819 8816 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8820 8817
8821 8818 sharedp->rsmsi_mapcnt--;
8822 8819 if (sharedp->rsmsi_mapcnt == 0) {
8823 8820 /* unmap the shared RSMPI mapping */
8824 8821 ASSERT(sharedp->rsmsi_handle != NULL);
8825 8822 (void) adapter->rsmpi_ops->
8826 8823 rsm_unmap(sharedp->rsmsi_handle);
8827 8824 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8828 8825 sharedp->rsmsi_mapinfo = NULL;
8829 8826 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8830 8827 }
8831 8828
8832 8829 rsmsharelock_release(seg);
8833 8830 seg->s_state = old_state;
8834 8831 DBG_PRINTF((category, RSM_ERR,
8835 8832 "rsm: devmap_setup failed %d\n", error));
8836 8833 }
8837 8834 cv_broadcast(&seg->s_cv);
8838 8835 rsmseglock_release(seg);
8839 8836 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8840 8837 error));
8841 8838 return (error);
8842 8839 } else {
8843 8840 /*
8844 8841 * For loopback, the export segment mapping cookie (s_cookie)
8845 8842 * is also used as the s_cookie value for its import segments
8846 8843 * during mapping.
8847 8844 * Note that reference counting for s_cookie of the export
8848 8845 * segment is not required due to the following:
8849 8846 * We never have a case of the export segment being destroyed,
8850 8847 * leaving the import segments with a stale value for the
8851 8848 * s_cookie field, since a force disconnect is done prior to a
8852 8849 * destroy of an export segment. The force disconnect causes
8853 8850 * the s_cookie value to be reset to NULL. Also for the
8854 8851 * rsm_rebind operation, we change the s_cookie value of the
8855 8852 * export segment as well as of all its local (loopback)
8856 8853 * importers.
8857 8854 */
8858 8855 DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8859 8856
8860 8857 rsmsharelock_release(seg);
8861 8858 /*
8862 8859 * In order to maintain the lock ordering between the export
8863 8860 * and import segment locks, we need to acquire the export
8864 8861 * segment lock first and only then acquire the import
8865 8862 * segment lock.
8866 8863 * The above is necessary to avoid any deadlock scenarios
8867 8864 * with rsm_rebind which also acquires both the export
8868 8865 * and import segment locks in the above mentioned order.
8869 8866 * Based on code inspection, there seem to be no other
8870 8867 * situations in which both the export and import segment
8871 8868 * locks are acquired either in the same or opposite order
8872 8869 * as mentioned above.
8873 8870 * Thus in order to conform to the above lock order, we
8874 8871 * need to change the state of the import segment to
8875 8872 * RSM_STATE_MAPPING, release the lock. Once this is done we
8876 8873 * can now safely acquire the export segment lock first
8877 8874 * followed by the import segment lock which is as per
8878 8875 * the lock order mentioned above.
8879 8876 */
8880 8877 /* move to an intermediate mapping state */
8881 8878 seg->s_state = RSM_STATE_MAPPING;
8882 8879 rsmseglock_release(seg);
8883 8880
8884 8881 eseg = rsmexport_lookup(seg->s_key);
8885 8882
8886 8883 if (eseg == NULL) {
8887 8884 rsmseglock_acquire(seg);
8888 8885 /*
8889 8886 * Revert to old_state and signal any waiters
8890 8887 * The shared state is not changed
8891 8888 */
8892 8889
8893 8890 seg->s_state = old_state;
8894 8891 cv_broadcast(&seg->s_cv);
8895 8892 rsmseglock_release(seg);
8896 8893 DBG_PRINTF((category, RSM_DEBUG,
8897 8894 "rsm_segmap done: key %d not found\n", seg->s_key));
8898 8895 return (ENODEV);
8899 8896 }
8900 8897
8901 8898 rsmsharelock_acquire(seg);
8902 8899 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8903 8900 sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8904 8901
8905 8902 sharedp->rsmsi_mapcnt++;
8906 8903 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8907 8904 rsmsharelock_release(seg);
8908 8905
8909 8906 ASSERT(eseg->s_cookie != NULL);
8910 8907
8911 8908 /*
8912 8909 * It is not required or necessary to acquire the import
8913 8910 * segment lock here to change the value of s_cookie since
8914 8911 * no one will touch the import segment as long as it is
8915 8912 * in the RSM_STATE_MAPPING state.
8916 8913 */
8917 8914 seg->s_cookie = eseg->s_cookie;
8918 8915
8919 8916 rsmseglock_release(eseg);
8920 8917
8921 8918 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8922 8919 prot, maxprot, flags, cred);
8923 8920
8924 8921 rsmseglock_acquire(seg);
8925 8922 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8926 8923 if (error == 0) {
8927 8924 seg->s_state = RSM_STATE_ACTIVE;
8928 8925 } else {
8929 8926 rsmsharelock_acquire(seg);
8930 8927
8931 8928 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8932 8929
8933 8930 sharedp->rsmsi_mapcnt--;
8934 8931 if (sharedp->rsmsi_mapcnt == 0) {
8935 8932 sharedp->rsmsi_mapinfo = NULL;
8936 8933 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8937 8934 }
8938 8935 rsmsharelock_release(seg);
8939 8936 seg->s_state = old_state;
8940 8937 seg->s_cookie = NULL;
8941 8938 }
8942 8939 cv_broadcast(&seg->s_cv);
8943 8940 rsmseglock_release(seg);
8944 8941 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8945 8942 "rsm_segmap done: %d\n", error));
8946 8943 return (error);
8947 8944 }
8948 8945 }
8949 8946
8950 8947 int
8951 8948 rsmka_null_seg_create(
8952 8949 rsm_controller_handle_t argcp,
8953 8950 rsm_memseg_export_handle_t *handle,
8954 8951 size_t size,
8955 8952 uint_t flags,
8956 8953 rsm_memory_local_t *memory,
8957 8954 rsm_resource_callback_t callback,
8958 8955 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8959 8956 {
8960 8957 return (RSM_SUCCESS);
8961 8958 }
8962 8959
8963 8960
8964 8961 int
8965 8962 rsmka_null_seg_destroy(
8966 8963 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/)
8967 8964 {
8968 8965 return (RSM_SUCCESS);
8969 8966 }
8970 8967
8971 8968
8972 8969 int
8973 8970 rsmka_null_bind(
8974 8971 rsm_memseg_export_handle_t argmemseg,
8975 8972 off_t offset,
8976 8973 rsm_memory_local_t *argmemory,
8977 8974 rsm_resource_callback_t callback,
8978 8975 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8979 8976 {
8980 8977 return (RSM_SUCCESS);
8981 8978 }
8982 8979
8983 8980
8984 8981 int
8985 8982 rsmka_null_unbind(
8986 8983 rsm_memseg_export_handle_t argmemseg,
8987 8984 off_t offset,
8988 8985 size_t length /*ARGSUSED*/)
8989 8986 {
8990 8987 return (DDI_SUCCESS);
8991 8988 }
8992 8989
8993 8990 int
8994 8991 rsmka_null_rebind(
8995 8992 rsm_memseg_export_handle_t argmemseg,
8996 8993 off_t offset,
8997 8994 rsm_memory_local_t *memory,
8998 8995 rsm_resource_callback_t callback,
8999 8996 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9000 8997 {
9001 8998 return (RSM_SUCCESS);
9002 8999 }
9003 9000
9004 9001 int
9005 9002 rsmka_null_publish(
9006 9003 rsm_memseg_export_handle_t argmemseg,
9007 9004 rsm_access_entry_t access_list[],
9008 9005 uint_t access_list_length,
9009 9006 rsm_memseg_id_t segment_id,
9010 9007 rsm_resource_callback_t callback,
9011 9008 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9012 9009 {
9013 9010 return (RSM_SUCCESS);
9014 9011 }
9015 9012
9016 9013
9017 9014 int
9018 9015 rsmka_null_republish(
9019 9016 rsm_memseg_export_handle_t memseg,
9020 9017 rsm_access_entry_t access_list[],
9021 9018 uint_t access_list_length,
9022 9019 rsm_resource_callback_t callback,
9023 9020 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9024 9021 {
9025 9022 return (RSM_SUCCESS);
9026 9023 }
9027 9024
9028 9025 int
9029 9026 rsmka_null_unpublish(
9030 9027 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/)
9031 9028 {
9032 9029 return (RSM_SUCCESS);
9033 9030 }
9034 9031
9035 9032
9036 9033 void
9037 9034 rsmka_init_loopback()
9038 9035 {
9039 9036 rsm_ops_t *ops = &null_rsmpi_ops;
9040 9037 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9041 9038
9042 9039 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9043 9040 "rsmka_init_loopback enter\n"));
9044 9041
9045 9042 /* initialize null ops vector */
9046 9043 ops->rsm_seg_create = rsmka_null_seg_create;
9047 9044 ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9048 9045 ops->rsm_bind = rsmka_null_bind;
9049 9046 ops->rsm_unbind = rsmka_null_unbind;
9050 9047 ops->rsm_rebind = rsmka_null_rebind;
9051 9048 ops->rsm_publish = rsmka_null_publish;
9052 9049 ops->rsm_unpublish = rsmka_null_unpublish;
9053 9050 ops->rsm_republish = rsmka_null_republish;
9054 9051
9055 9052 /* initialize attributes for loopback adapter */
9056 9053 loopback_attr.attr_name = loopback_str;
9057 9054 loopback_attr.attr_page_size = 0x8; /* 8K */
9058 9055
9059 9056 /* initialize loopback adapter */
9060 9057 loopback_adapter.rsm_attr = loopback_attr;
9061 9058 loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9062 9059 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9063 9060 "rsmka_init_loopback done\n"));
9064 9061 }
9065 9062
9066 9063 /* ************** DR functions ********************************** */
9067 9064 static void
9068 9065 rsm_quiesce_exp_seg(rsmresource_t *resp)
9069 9066 {
9070 9067 int recheck_state;
9071 9068 rsmseg_t *segp = (rsmseg_t *)resp;
9072 9069 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9073 9070 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9074 9071
9075 9072 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9076 9073 "%s enter: key=%u\n", function, segp->s_key));
9077 9074
9078 9075 rsmseglock_acquire(segp);
9079 9076 do {
9080 9077 recheck_state = 0;
9081 9078 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9082 9079 (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9083 9080 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9084 9081 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9085 9082 rsmseglock_release(segp);
9086 9083 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9087 9084 "%s done:state =%d\n", function,
9088 9085 segp->s_state));
9089 9086 return;
9090 9087 }
9091 9088
9092 9089 if (segp->s_state == RSM_STATE_NEW) {
9093 9090 segp->s_state = RSM_STATE_NEW_QUIESCED;
9094 9091 rsmseglock_release(segp);
9095 9092 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9096 9093 "%s done:state =%d\n", function,
9097 9094 segp->s_state));
9098 9095 return;
9099 9096 }
9100 9097
9101 9098 if (segp->s_state == RSM_STATE_BIND) {
9102 9099 /* unbind */
9103 9100 (void) rsm_unbind_pages(segp);
9104 9101 segp->s_state = RSM_STATE_BIND_QUIESCED;
9105 9102 rsmseglock_release(segp);
9106 9103 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9107 9104 "%s done:state =%d\n", function,
9108 9105 segp->s_state));
9109 9106 return;
9110 9107 }
9111 9108
9112 9109 if (segp->s_state == RSM_STATE_EXPORT) {
9113 9110 /*
9114 9111 * wait for putv/getv to complete if the segp is
9115 9112 * a local memory handle
9116 9113 */
9117 9114 while ((segp->s_state == RSM_STATE_EXPORT) &&
9118 9115 (segp->s_rdmacnt != 0)) {
9119 9116 cv_wait(&segp->s_cv, &segp->s_lock);
9120 9117 }
9121 9118
9122 9119 if (segp->s_state != RSM_STATE_EXPORT) {
9123 9120 /*
9124 9121 * state changed need to see what it
9125 9122 * should be changed to.
9126 9123 */
9127 9124 recheck_state = 1;
9128 9125 continue;
9129 9126 }
9130 9127
9131 9128 segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9132 9129 rsmseglock_release(segp);
9133 9130 /*
9134 9131 * send SUSPEND messages - currently it will be
9135 9132 * done at the end
9136 9133 */
9137 9134 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9138 9135 "%s done:state =%d\n", function,
9139 9136 segp->s_state));
9140 9137 return;
9141 9138 }
9142 9139 } while (recheck_state);
9143 9140
9144 9141 rsmseglock_release(segp);
9145 9142 }
9146 9143
9147 9144 static void
9148 9145 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9149 9146 {
9150 9147 int ret;
9151 9148 rsmseg_t *segp = (rsmseg_t *)resp;
9152 9149 rsmapi_access_entry_t *acl;
9153 9150 rsm_access_entry_t *rsmpi_acl;
9154 9151 int acl_len;
9155 9152 int create_flags = 0;
9156 9153 struct buf *xbuf;
9157 9154 rsm_memory_local_t mem;
9158 9155 adapter_t *adapter;
9159 9156 dev_t sdev = 0;
9160 9157 rsm_resource_callback_t callback_flag;
9161 9158 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9162 9159 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9163 9160
9164 9161 rsmseglock_acquire(segp);
9165 9162
9166 9163 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9167 9164 "%s enter: key=%u, state=%d\n", function, segp->s_key,
9168 9165 segp->s_state));
9169 9166
9170 9167 if ((segp->s_state == RSM_STATE_NEW) ||
9171 9168 (segp->s_state == RSM_STATE_BIND) ||
9172 9169 (segp->s_state == RSM_STATE_EXPORT)) {
9173 9170 rsmseglock_release(segp);
9174 9171 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9175 9172 function, segp->s_state));
9176 9173 return;
9177 9174 }
9178 9175
9179 9176 if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9180 9177 segp->s_state = RSM_STATE_NEW;
9181 9178 cv_broadcast(&segp->s_cv);
9182 9179 rsmseglock_release(segp);
9183 9180 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9184 9181 function, segp->s_state));
9185 9182 return;
9186 9183 }
9187 9184
9188 9185 if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9189 9186 /* bind the segment */
9190 9187 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9191 9188 segp->s_len, segp->s_proc);
9192 9189 if (ret == RSM_SUCCESS) { /* bind successful */
9193 9190 segp->s_state = RSM_STATE_BIND;
9194 9191 } else { /* bind failed - resource unavailable */
9195 9192 segp->s_state = RSM_STATE_NEW;
9196 9193 }
9197 9194 cv_broadcast(&segp->s_cv);
9198 9195 rsmseglock_release(segp);
9199 9196 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9200 9197 "%s done: bind_qscd bind = %d\n", function, ret));
9201 9198 return;
9202 9199 }
9203 9200
9204 9201 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9205 9202 /* wait for the segment to move to EXPORT_QUIESCED state */
9206 9203 cv_wait(&segp->s_cv, &segp->s_lock);
9207 9204 }
9208 9205
9209 9206 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9210 9207 /* bind the segment */
9211 9208 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9212 9209 segp->s_len, segp->s_proc);
9213 9210
9214 9211 if (ret != RSM_SUCCESS) {
9215 9212 /* bind failed - resource unavailable */
9216 9213 acl_len = segp->s_acl_len;
9217 9214 acl = segp->s_acl;
9218 9215 rsmpi_acl = segp->s_acl_in;
9219 9216 segp->s_acl_len = 0;
9220 9217 segp->s_acl = NULL;
9221 9218 segp->s_acl_in = NULL;
9222 9219 rsmseglock_release(segp);
9223 9220
9224 9221 rsmexport_rm(segp);
9225 9222 rsmacl_free(acl, acl_len);
9226 9223 rsmpiacl_free(rsmpi_acl, acl_len);
9227 9224
9228 9225 rsmseglock_acquire(segp);
9229 9226 segp->s_state = RSM_STATE_NEW;
9230 9227 cv_broadcast(&segp->s_cv);
9231 9228 rsmseglock_release(segp);
9232 9229 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9233 9230 "%s done: exp_qscd bind failed = %d\n",
9234 9231 function, ret));
9235 9232 return;
9236 9233 }
9237 9234 /*
9238 9235 * publish the segment
9239 9236 * if successful
9240 9237 * segp->s_state = RSM_STATE_EXPORT;
9241 9238 * else failed
9242 9239 * segp->s_state = RSM_STATE_BIND;
9243 9240 */
9244 9241
9245 9242 /* check whether it is a local_memory_handle */
9246 9243 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9247 9244 if ((segp->s_acl[0].ae_node == my_nodeid) &&
9248 9245 (segp->s_acl[0].ae_permission == 0)) {
9249 9246 segp->s_state = RSM_STATE_EXPORT;
9250 9247 cv_broadcast(&segp->s_cv);
9251 9248 rsmseglock_release(segp);
9252 9249 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9253 9250 "%s done:exp_qscd\n", function));
9254 9251 return;
9255 9252 }
9256 9253 }
9257 9254 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9258 9255 sdev, 0, NULL, DDI_UMEM_SLEEP);
9259 9256 ASSERT(xbuf != NULL);
9260 9257
9261 9258 mem.ms_type = RSM_MEM_BUF;
9262 9259 mem.ms_bp = xbuf;
9263 9260
9264 9261 adapter = segp->s_adapter;
9265 9262
9266 9263 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9267 9264 create_flags = RSM_ALLOW_UNBIND_REBIND;
9268 9265 }
9269 9266
9270 9267 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9271 9268 callback_flag = RSM_RESOURCE_DONTWAIT;
9272 9269 } else {
9273 9270 callback_flag = RSM_RESOURCE_SLEEP;
9274 9271 }
9275 9272
9276 9273 ret = adapter->rsmpi_ops->rsm_seg_create(
9277 9274 adapter->rsmpi_handle, &segp->s_handle.out,
9278 9275 segp->s_len, create_flags, &mem,
9279 9276 callback_flag, NULL);
9280 9277
9281 9278 if (ret != RSM_SUCCESS) {
9282 9279 acl_len = segp->s_acl_len;
9283 9280 acl = segp->s_acl;
9284 9281 rsmpi_acl = segp->s_acl_in;
9285 9282 segp->s_acl_len = 0;
9286 9283 segp->s_acl = NULL;
9287 9284 segp->s_acl_in = NULL;
9288 9285 rsmseglock_release(segp);
9289 9286
9290 9287 rsmexport_rm(segp);
9291 9288 rsmacl_free(acl, acl_len);
9292 9289 rsmpiacl_free(rsmpi_acl, acl_len);
9293 9290
9294 9291 rsmseglock_acquire(segp);
9295 9292 segp->s_state = RSM_STATE_BIND;
9296 9293 cv_broadcast(&segp->s_cv);
9297 9294 rsmseglock_release(segp);
9298 9295 DBG_PRINTF((category, RSM_ERR,
9299 9296 "%s done: exp_qscd create failed = %d\n",
9300 9297 function, ret));
9301 9298 return;
9302 9299 }
9303 9300
9304 9301 ret = adapter->rsmpi_ops->rsm_publish(
9305 9302 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9306 9303 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9307 9304
9308 9305 if (ret != RSM_SUCCESS) {
9309 9306 acl_len = segp->s_acl_len;
9310 9307 acl = segp->s_acl;
9311 9308 rsmpi_acl = segp->s_acl_in;
9312 9309 segp->s_acl_len = 0;
9313 9310 segp->s_acl = NULL;
9314 9311 segp->s_acl_in = NULL;
9315 9312 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9316 9313 rsmseglock_release(segp);
9317 9314
9318 9315 rsmexport_rm(segp);
9319 9316 rsmacl_free(acl, acl_len);
9320 9317 rsmpiacl_free(rsmpi_acl, acl_len);
9321 9318
9322 9319 rsmseglock_acquire(segp);
9323 9320 segp->s_state = RSM_STATE_BIND;
9324 9321 cv_broadcast(&segp->s_cv);
9325 9322 rsmseglock_release(segp);
9326 9323 DBG_PRINTF((category, RSM_ERR,
9327 9324 "%s done: exp_qscd publish failed = %d\n",
9328 9325 function, ret));
9329 9326 return;
9330 9327 }
9331 9328
9332 9329 segp->s_state = RSM_STATE_EXPORT;
9333 9330 cv_broadcast(&segp->s_cv);
9334 9331 rsmseglock_release(segp);
9335 9332 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9336 9333 function));
9337 9334 return;
9338 9335 }
9339 9336
9340 9337 rsmseglock_release(segp);
9341 9338
9342 9339 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9343 9340 }
9344 9341
9345 9342 static void
9346 9343 rsm_quiesce_imp_seg(rsmresource_t *resp)
9347 9344 {
9348 9345 rsmseg_t *segp = (rsmseg_t *)resp;
9349 9346 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9350 9347 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9351 9348
9352 9349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9353 9350 "%s enter: key=%u\n", function, segp->s_key));
9354 9351
9355 9352 rsmseglock_acquire(segp);
9356 9353 segp->s_flags |= RSM_DR_INPROGRESS;
9357 9354
9358 9355 while (segp->s_rdmacnt != 0) {
9359 9356 /* wait for the RDMA to complete */
9360 9357 cv_wait(&segp->s_cv, &segp->s_lock);
9361 9358 }
9362 9359
9363 9360 rsmseglock_release(segp);
9364 9361
9365 9362 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9366 9363
9367 9364 }
9368 9365
9369 9366 static void
9370 9367 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9371 9368 {
9372 9369 rsmseg_t *segp = (rsmseg_t *)resp;
9373 9370 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9374 9371 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9375 9372
9376 9373 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9377 9374 "%s enter: key=%u\n", function, segp->s_key));
9378 9375
9379 9376 rsmseglock_acquire(segp);
9380 9377
9381 9378 segp->s_flags &= ~RSM_DR_INPROGRESS;
9382 9379 /* wake up any waiting putv/getv ops */
9383 9380 cv_broadcast(&segp->s_cv);
9384 9381
9385 9382 rsmseglock_release(segp);
9386 9383
9387 9384 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9388 9385
9389 9386
9390 9387 }
9391 9388
9392 9389 static void
9393 9390 rsm_process_exp_seg(rsmresource_t *resp, int event)
9394 9391 {
9395 9392 if (event == RSM_DR_QUIESCE)
9396 9393 rsm_quiesce_exp_seg(resp);
9397 9394 else /* UNQUIESCE */
9398 9395 rsm_unquiesce_exp_seg(resp);
9399 9396 }
9400 9397
9401 9398 static void
9402 9399 rsm_process_imp_seg(rsmresource_t *resp, int event)
9403 9400 {
9404 9401 if (event == RSM_DR_QUIESCE)
9405 9402 rsm_quiesce_imp_seg(resp);
9406 9403 else /* UNQUIESCE */
9407 9404 rsm_unquiesce_imp_seg(resp);
9408 9405 }
9409 9406
9410 9407 static void
9411 9408 rsm_dr_process_local_segments(int event)
9412 9409 {
9413 9410
9414 9411 int i, j;
9415 9412 rsmresource_blk_t *blk;
9416 9413 rsmresource_t *p;
9417 9414 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9418 9415
9419 9416 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9420 9417 "rsm_dr_process_local_segments enter\n"));
9421 9418
9422 9419 /* iterate through the resource structure */
9423 9420
9424 9421 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9425 9422
9426 9423 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9427 9424 blk = rsm_resource.rsmrc_root[i];
9428 9425 if (blk != NULL) {
9429 9426 for (j = 0; j < RSMRC_BLKSZ; j++) {
9430 9427 p = blk->rsmrcblk_blks[j];
9431 9428 if ((p != NULL) && (p != RSMRC_RESERVED)) {
9432 9429 /* valid resource */
9433 9430 if (p->rsmrc_type ==
9434 9431 RSM_RESOURCE_EXPORT_SEGMENT)
9435 9432 rsm_process_exp_seg(p, event);
9436 9433 else if (p->rsmrc_type ==
9437 9434 RSM_RESOURCE_IMPORT_SEGMENT)
9438 9435 rsm_process_imp_seg(p, event);
9439 9436 }
9440 9437 }
9441 9438 }
9442 9439 }
9443 9440
9444 9441 rw_exit(&rsm_resource.rsmrc_lock);
9445 9442
9446 9443 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9447 9444 "rsm_dr_process_local_segments done\n"));
9448 9445 }
9449 9446
9450 9447 /* *************** DR callback functions ************ */
9451 9448 static void
9452 9449 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9453 9450 {
9454 9451 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9455 9452 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9456 9453 "rsm_dr_callback_post_add is a no-op\n"));
9457 9454 /* Noop */
9458 9455 }
9459 9456
9460 9457 static int
9461 9458 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9462 9459 {
9463 9460 int recheck_state = 0;
9464 9461 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9465 9462
9466 9463 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9467 9464 "rsm_dr_callback_pre_del enter\n"));
9468 9465
9469 9466 mutex_enter(&rsm_drv_data.drv_lock);
9470 9467
9471 9468 do {
9472 9469 recheck_state = 0;
9473 9470 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9474 9471 "rsm_dr_callback_pre_del:state=%d\n",
9475 9472 rsm_drv_data.drv_state));
9476 9473
9477 9474 switch (rsm_drv_data.drv_state) {
9478 9475 case RSM_DRV_NEW:
9479 9476 /*
9480 9477 * The state should usually never be RSM_DRV_NEW
9481 9478 * since in this state the callbacks have not yet
9482 9479 * been registered. So, ASSERT.
9483 9480 */
9484 9481 ASSERT(0);
9485 9482 return (0);
9486 9483 case RSM_DRV_REG_PROCESSING:
9487 9484 /*
9488 9485 * The driver is in the process of registering
9489 9486 * with the DR framework. So, wait till the
9490 9487 * registration process is complete.
9491 9488 */
9492 9489 recheck_state = 1;
9493 9490 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9494 9491 break;
9495 9492 case RSM_DRV_UNREG_PROCESSING:
9496 9493 /*
9497 9494 * If the state is RSM_DRV_UNREG_PROCESSING, the
9498 9495 * module is in the process of detaching and
9499 9496 * unregistering the callbacks from the DR
9500 9497 * framework. So, simply return.
9501 9498 */
9502 9499 mutex_exit(&rsm_drv_data.drv_lock);
9503 9500 DBG_PRINTF((category, RSM_DEBUG,
9504 9501 "rsm_dr_callback_pre_del:"
9505 9502 "pre-del on NEW/UNREG\n"));
9506 9503 return (0);
9507 9504 case RSM_DRV_OK:
9508 9505 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9509 9506 break;
9510 9507 case RSM_DRV_PREDEL_STARTED:
9511 9508 /* FALLTHRU */
9512 9509 case RSM_DRV_PREDEL_COMPLETED:
9513 9510 /* FALLTHRU */
9514 9511 case RSM_DRV_POSTDEL_IN_PROGRESS:
9515 9512 recheck_state = 1;
9516 9513 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9517 9514 break;
9518 9515 case RSM_DRV_DR_IN_PROGRESS:
9519 9516 rsm_drv_data.drv_memdel_cnt++;
9520 9517 mutex_exit(&rsm_drv_data.drv_lock);
9521 9518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9522 9519 "rsm_dr_callback_pre_del done\n"));
9523 9520 return (0);
9524 9521 /* break; */
9525 9522 default:
9526 9523 ASSERT(0);
9527 9524 break;
9528 9525 }
9529 9526
9530 9527 } while (recheck_state);
9531 9528
9532 9529 rsm_drv_data.drv_memdel_cnt++;
9533 9530
9534 9531 mutex_exit(&rsm_drv_data.drv_lock);
9535 9532
9536 9533 /* Do all the quiescing stuff here */
9537 9534 DBG_PRINTF((category, RSM_DEBUG,
9538 9535 "rsm_dr_callback_pre_del: quiesce things now\n"));
9539 9536
9540 9537 rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9541 9538
9542 9539 /*
9543 9540 * now that all local segments have been quiesced lets inform
9544 9541 * the importers
9545 9542 */
9546 9543 rsm_send_suspend();
9547 9544
9548 9545 /*
9549 9546 * In response to the suspend message the remote node(s) will process
9550 9547 * the segments and send a suspend_complete message. Till all
9551 9548 * the nodes send the suspend_complete message we wait in the
9552 9549 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9553 9550 * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9554 9551 */
9555 9552 mutex_enter(&rsm_drv_data.drv_lock);
9556 9553
9557 9554 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9558 9555 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9559 9556 }
9560 9557
9561 9558 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9562 9559
9563 9560 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9564 9561 cv_broadcast(&rsm_drv_data.drv_cv);
9565 9562
9566 9563 mutex_exit(&rsm_drv_data.drv_lock);
9567 9564
9568 9565 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9569 9566 "rsm_dr_callback_pre_del done\n"));
9570 9567
9571 9568 return (0);
9572 9569 }
9573 9570
9574 9571 static void
9575 9572 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9576 9573 {
9577 9574 int recheck_state = 0;
9578 9575 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9579 9576
9580 9577 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9581 9578 "rsm_dr_callback_post_del enter\n"));
9582 9579
9583 9580 mutex_enter(&rsm_drv_data.drv_lock);
9584 9581
9585 9582 do {
9586 9583 recheck_state = 0;
9587 9584 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9588 9585 "rsm_dr_callback_post_del:state=%d\n",
9589 9586 rsm_drv_data.drv_state));
9590 9587
9591 9588 switch (rsm_drv_data.drv_state) {
9592 9589 case RSM_DRV_NEW:
9593 9590 /*
9594 9591 * The driver state cannot not be RSM_DRV_NEW
9595 9592 * since in this state the callbacks have not
9596 9593 * yet been registered.
9597 9594 */
9598 9595 ASSERT(0);
9599 9596 return;
9600 9597 case RSM_DRV_REG_PROCESSING:
9601 9598 /*
9602 9599 * The driver is in the process of registering with
9603 9600 * the DR framework. Wait till the registration is
9604 9601 * complete.
9605 9602 */
9606 9603 recheck_state = 1;
9607 9604 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9608 9605 break;
9609 9606 case RSM_DRV_UNREG_PROCESSING:
9610 9607 /*
9611 9608 * RSM_DRV_UNREG_PROCESSING state means the module
9612 9609 * is detaching and unregistering the callbacks
9613 9610 * from the DR framework. So simply return.
9614 9611 */
9615 9612 /* FALLTHRU */
9616 9613 case RSM_DRV_OK:
9617 9614 /*
9618 9615 * RSM_DRV_OK means we missed the pre-del
9619 9616 * corresponding to this post-del coz we had not
9620 9617 * registered yet, so simply return.
9621 9618 */
9622 9619 mutex_exit(&rsm_drv_data.drv_lock);
9623 9620 DBG_PRINTF((category, RSM_DEBUG,
9624 9621 "rsm_dr_callback_post_del:"
9625 9622 "post-del on OK/UNREG\n"));
9626 9623 return;
9627 9624 /* break; */
9628 9625 case RSM_DRV_PREDEL_STARTED:
9629 9626 /* FALLTHRU */
9630 9627 case RSM_DRV_PREDEL_COMPLETED:
9631 9628 /* FALLTHRU */
9632 9629 case RSM_DRV_POSTDEL_IN_PROGRESS:
9633 9630 recheck_state = 1;
9634 9631 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9635 9632 break;
9636 9633 case RSM_DRV_DR_IN_PROGRESS:
9637 9634 rsm_drv_data.drv_memdel_cnt--;
9638 9635 if (rsm_drv_data.drv_memdel_cnt > 0) {
9639 9636 mutex_exit(&rsm_drv_data.drv_lock);
9640 9637 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9641 9638 "rsm_dr_callback_post_del done:\n"));
9642 9639 return;
9643 9640 }
9644 9641 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9645 9642 break;
9646 9643 default:
9647 9644 ASSERT(0);
9648 9645 return;
9649 9646 /* break; */
9650 9647 }
9651 9648 } while (recheck_state);
9652 9649
9653 9650 mutex_exit(&rsm_drv_data.drv_lock);
9654 9651
9655 9652 /* Do all the unquiescing stuff here */
9656 9653 DBG_PRINTF((category, RSM_DEBUG,
9657 9654 "rsm_dr_callback_post_del: unquiesce things now\n"));
9658 9655
9659 9656 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9660 9657
9661 9658 /*
9662 9659 * now that all local segments have been unquiesced lets inform
9663 9660 * the importers
9664 9661 */
9665 9662 rsm_send_resume();
9666 9663
9667 9664 mutex_enter(&rsm_drv_data.drv_lock);
9668 9665
9669 9666 rsm_drv_data.drv_state = RSM_DRV_OK;
9670 9667
9671 9668 cv_broadcast(&rsm_drv_data.drv_cv);
9672 9669
9673 9670 mutex_exit(&rsm_drv_data.drv_lock);
9674 9671
9675 9672 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9676 9673 "rsm_dr_callback_post_del done\n"));
9677 9674
9678 9675 return;
9679 9676
9680 9677 }
↓ open down ↓ |
9248 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX