1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2012 Milan Jurik. All rights reserved.
25 */
26
27
28 /*
29 * Overview of the RSM Kernel Agent:
30 * ---------------------------------
31 *
32 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
33 * kernel agent is a pseudo device driver which makes use of the RSMPI
34 * interface on behalf of the RSMAPI user library.
35 *
36 * The kernel agent functionality can be categorized into the following
37 * components:
38 * 1. Driver Infrastructure
39 * 2. Export/Import Segment Management
40 * 3. Internal resource allocation/deallocation
41 *
42 * The driver infrastructure includes the basic module loading entry points
43 * like _init, _info, _fini to load, unload and report information about
44 * the driver module. The driver infrastructure also includes the
45 * autoconfiguration entry points namely, attach, detach and getinfo for
46 * the device autoconfiguration.
47 *
48 * The kernel agent is a pseudo character device driver and exports
49 * a cb_ops structure which defines the driver entry points for character
50 * device access. This includes the open and close entry points. The
51 * other entry points provided include ioctl, devmap and segmap and chpoll.
52 * read and write entry points are not used since the device is memory
53 * mapped. Also ddi_prop_op is used for the prop_op entry point.
54 *
55 * The ioctl entry point supports a number of commands, which are used by
56 * the RSMAPI library in order to export and import segments. These
57 * commands include commands for binding and rebinding the physical pages
58 * allocated to the virtual address range, publishing the export segment,
59 * unpublishing and republishing an export segment, creating an
60 * import segment and a virtual connection from this import segment to
61 * an export segment, performing scatter-gather data transfer, barrier
62 * operations.
63 *
64 *
65 * Export and Import segments:
66 * ---------------------------
67 *
68 * In order to create an RSM export segment a process allocates a range in its
69 * virtual address space for the segment using standard Solaris interfaces.
70 * The process then calls RSMAPI, which in turn makes an ioctl call to the
71 * RSM kernel agent for an allocation of physical memory pages and for
72 * creation of the export segment by binding these pages to the virtual
73 * address range. These pages are locked in memory so that remote accesses
74 * are always applied to the correct page. Then the RSM segment is published,
75 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
76 * is assigned to it.
77 *
78 * In order to import a published RSM segment, RSMAPI creates an import
79 * segment and forms a virtual connection across the interconnect to the
80 * export segment, via an ioctl into the kernel agent with the connect
81 * command. The import segment setup is completed by mapping the
82 * local device memory into the importers virtual address space. The
83 * mapping of the import segment is handled by the segmap/devmap
84 * infrastructure described as follows.
85 *
86 * Segmap and Devmap interfaces:
87 *
88 * The RSM kernel agent allows device memory to be directly accessed by user
89 * threads via memory mapping. In order to do so, the RSM kernel agent
90 * supports the devmap and segmap entry points.
91 *
92 * The segmap entry point(rsm_segmap) is responsible for setting up a memory
93 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
94 * responsible for exporting the device memory to the user applications.
95 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
96 * control is transfered to the devmap_setup call which calls rsm_devmap.
97 *
98 * rsm_devmap validates the user mapping to the device or kernel memory
99 * and passes the information to the system for setting up the mapping. The
100 * actual setting up of the mapping is done by devmap_devmem_setup(for
101 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
102 * registered for device context management via the devmap_devmem_setup
103 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
104 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
105 * is created, a mapping is freed, a mapping is accessed or an existing
106 * mapping is duplicated respectively. These callbacks allow the RSM kernel
107 * agent to maintain state information associated with the mappings.
108 * The state information is mainly in the form of a cookie list for the import
109 * segment for which mapping has been done.
110 *
111 * Forced disconnect of import segments:
112 *
113 * When an exported segment is unpublished, the exporter sends a forced
114 * disconnect message to all its importers. The importer segments are
115 * unloaded and disconnected. This involves unloading the original
116 * mappings and remapping to a preallocated kernel trash page. This is
117 * done by devmap_umem_remap. The trash/dummy page is a kernel page,
118 * preallocated by the kernel agent during attach using ddi_umem_alloc with
119 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
120 * due to unloading of the original mappings.
121 *
122 * Additionally every segment has a mapping generation number associated
123 * with it. This is an entry in the barrier generation page, created
124 * during attach time. This mapping generation number for the import
125 * segments is incremented on a force disconnect to notify the application
126 * of the force disconnect. On this notification, the application needs
127 * to reconnect the segment to establish a new legitimate mapping.
128 *
129 *
130 * Locks used in the kernel agent:
131 * -------------------------------
132 *
133 * The kernel agent uses a variety of mutexes and condition variables for
134 * mutual exclusion of the shared data structures and for synchronization
135 * between the various threads. Some of the locks are described as follows.
136 *
137 * Each resource structure, which represents either an export/import segment
138 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
139 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
140 * rsmseglock_acquire and rsmseglock_release macros. An additional
141 * lock called the rsmsi_lock is used for the shared import data structure
142 * that is relevant for resources representing import segments. There is
143 * also a condition variable associated with the resource called s_cv. This
144 * is used to wait for events like the segment state change etc.
145 *
146 * The resource structures are allocated from a pool of resource structures,
147 * called rsm_resource. This pool is protected via a reader-writer lock,
148 * called rsmrc_lock.
149 *
150 * There are two separate hash tables, one for the export segments and
151 * one for the import segments. The export segments are inserted into the
152 * export segment hash table only after they have been published and the
153 * import segments are inserted in the import segments list only after they
154 * have successfully connected to an exported segment. These tables are
155 * protected via reader-writer locks.
156 *
157 * Debug Support in the kernel agent:
158 * ----------------------------------
159 *
160 * Debugging support in the kernel agent is provided by the following
161 * macros.
162 *
163 * DBG_PRINTF((category, level, message)) is a macro which logs a debug
164 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
165 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
166 * on the definition of the category and level. All messages that belong to
167 * the specified category(rsmdbg_category) and are of an equal or greater
168 * severity than the specified level(rsmdbg_level) are logged. The message
169 * is a string which uses the same formatting rules as the strings used in
170 * printf.
171 *
172 * The category defines which component of the kernel agent has logged this
173 * message. There are a number of categories that have been defined such as
174 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
175 * DBG_ADDCATEGORY is used to add in another category to the currently
176 * specified category value so that the component using this new category
177 * can also effectively log debug messages. Thus, the category of a specific
178 * message is some combination of the available categories and we can define
179 * sub-categories if we want a finer level of granularity.
180 *
181 * The level defines the severity of the message. Different level values are
182 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
183 * the least severe(debug level is 0).
184 *
185 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
186 * variable or a string respectively.
187 *
188 *
189 * NOTES:
190 *
191 * Special Fork and Exec Handling:
192 * -------------------------------
193 *
194 * The backing physical pages of an exported segment are always locked down.
195 * Thus, there are two cases in which a process having exported segments
196 * will cause a cpu to hang: (1) the process invokes exec; (2) a process
197 * forks and invokes exit before the duped file descriptors for the export
198 * segments are closed in the child process. The hang is caused because the
199 * address space release algorithm in Solaris VM subsystem is based on a
200 * non-blocking loop which does not terminate while segments are locked
201 * down. In addition to this, Solaris VM subsystem lacks a callback
202 * mechanism to the rsm kernel agent to allow unlocking these export
203 * segment pages.
204 *
205 * In order to circumvent this problem, the kernel agent does the following.
206 * The Solaris VM subsystem keeps memory segments in increasing order of
207 * virtual addressses. Thus a special page(special_exit_offset) is allocated
208 * by the kernel agent and is mmapped into the heap area of the process address
209 * space(the mmap is done by the RSMAPI library). During the mmap processing
210 * of this special page by the devmap infrastructure, a callback(the same
211 * devmap context management callbacks discussed above) is registered for an
212 * unmap.
213 *
214 * As discussed above, this page is processed by the Solaris address space
215 * release code before any of the exported segments pages(which are allocated
216 * from high memory). It is during this processing that the unmap callback gets
217 * called and this callback is responsible for force destroying the exported
218 * segments and thus eliminating the problem of locked pages.
219 *
220 * Flow-control:
221 * ------------
222 *
223 * A credit based flow control algorithm is used for messages whose
224 * processing cannot be done in the interrupt context because it might
225 * involve invoking rsmpi calls, or might take a long time to complete
226 * or might need to allocate resources. The algorithm operates on a per
227 * path basis. To send a message the pathend needs to have a credit and
228 * it consumes one for every message that is flow controlled. On the
229 * receiving pathend the message is put on a msgbuf_queue and a task is
230 * dispatched on the worker thread - recv_taskq where it is processed.
231 * After processing the message, the receiving pathend dequeues the message,
232 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
233 * credits to the sender pathend.
234 *
235 * RSM_DRTEST:
236 * -----------
237 *
238 * This is used to enable the DR testing using a test driver on test
239 * platforms which do not supported DR.
240 *
241 */
242
243 #include <sys/types.h>
244 #include <sys/param.h>
245 #include <sys/user.h>
246 #include <sys/buf.h>
247 #include <sys/systm.h>
248 #include <sys/cred.h>
249 #include <sys/vm.h>
250 #include <sys/uio.h>
251 #include <vm/seg.h>
252 #include <vm/page.h>
253 #include <sys/stat.h>
254
255 #include <sys/time.h>
256 #include <sys/errno.h>
257
258 #include <sys/file.h>
259 #include <sys/uio.h>
260 #include <sys/proc.h>
261 #include <sys/mman.h>
262 #include <sys/open.h>
263 #include <sys/atomic.h>
264 #include <sys/mem_config.h>
265
266
267 #include <sys/ddi.h>
268 #include <sys/devops.h>
269 #include <sys/ddidevmap.h>
270 #include <sys/sunddi.h>
271 #include <sys/esunddi.h>
272 #include <sys/ddi_impldefs.h>
273
274 #include <sys/kmem.h>
275 #include <sys/conf.h>
276 #include <sys/devops.h>
277 #include <sys/ddi_impldefs.h>
278
279 #include <sys/modctl.h>
280
281 #include <sys/policy.h>
282 #include <sys/types.h>
283 #include <sys/conf.h>
284 #include <sys/param.h>
285
286 #include <sys/taskq.h>
287
288 #include <sys/rsm/rsm_common.h>
289 #include <sys/rsm/rsmapi_common.h>
290 #include <sys/rsm/rsm.h>
291 #include <rsm_in.h>
292 #include <sys/rsm/rsmka_path_int.h>
293 #include <sys/rsm/rsmpi.h>
294
295 #include <sys/modctl.h>
296 #include <sys/debug.h>
297
298 #include <sys/tuneable.h>
299
300 #ifdef RSM_DRTEST
301 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
302 void *arg);
303 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
304 void *arg);
305 #endif
306
307 extern void dbg_printf(int category, int level, char *fmt, ...);
308 extern void rsmka_pathmanager_init();
309 extern void rsmka_pathmanager_cleanup();
310 extern void rele_sendq_token(sendq_token_t *);
311 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
312 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
313 extern int rsmka_topology_ioctl(caddr_t, int, int);
314
315 extern pri_t maxclsyspri;
316 extern work_queue_t work_queue;
317 extern kmutex_t ipc_info_lock;
318 extern kmutex_t ipc_info_cvlock;
319 extern kcondvar_t ipc_info_cv;
320 extern kmutex_t path_hold_cvlock;
321 extern kcondvar_t path_hold_cv;
322
323 extern kmutex_t rsmka_buf_lock;
324
325 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
326 extern adapter_t *rsmka_lookup_adapter(char *, int);
327 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
328 extern boolean_t rsmka_do_path_active(path_t *, int);
329 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
330 extern void rsmka_release_adapter(adapter_t *);
331 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
332 extern void rsmka_dequeue_msgbuf(path_t *path);
333 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
334 /* lint -w2 */
335
336 static int rsm_open(dev_t *, int, int, cred_t *);
337 static int rsm_close(dev_t, int, int, cred_t *);
338 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
339 cred_t *credp, int *rvalp);
340 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
341 uint_t);
342 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
343 uint_t, uint_t, cred_t *);
344 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
345 struct pollhead **phpp);
346
347 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
348 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
349 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
350
351 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
352 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
353 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
354 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
355 rsm_permission_t);
356 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
357 static void rsmacl_free(rsmapi_access_entry_t *, int);
358 static void rsmpiacl_free(rsm_access_entry_t *, int);
359
360 static int rsm_inc_pgcnt(pgcnt_t);
361 static void rsm_dec_pgcnt(pgcnt_t);
362 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
363 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
364 size_t *);
365 static void exporter_quiesce();
366 static void rsmseg_suspend(rsmseg_t *, int *);
367 static void rsmsegshare_suspend(rsmseg_t *);
368 static int rsmseg_resume(rsmseg_t *, void **);
369 static int rsmsegshare_resume(rsmseg_t *);
370
371 static struct cb_ops rsm_cb_ops = {
372 rsm_open, /* open */
373 rsm_close, /* close */
374 nodev, /* strategy */
375 nodev, /* print */
376 nodev, /* dump */
377 nodev, /* read */
378 nodev, /* write */
379 rsm_ioctl, /* ioctl */
380 rsm_devmap, /* devmap */
381 NULL, /* mmap */
382 rsm_segmap, /* segmap */
383 rsm_chpoll, /* poll */
384 ddi_prop_op, /* cb_prop_op */
385 0, /* streamtab */
386 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */
387 0,
388 0,
389 0
390 };
391
392 static struct dev_ops rsm_ops = {
393 DEVO_REV, /* devo_rev, */
394 0, /* refcnt */
395 rsm_info, /* get_dev_info */
396 nulldev, /* identify */
397 nulldev, /* probe */
398 rsm_attach, /* attach */
399 rsm_detach, /* detach */
400 nodev, /* reset */
401 &rsm_cb_ops, /* driver operations */
402 (struct bus_ops *)0, /* bus operations */
403 0,
404 ddi_quiesce_not_needed, /* quiesce */
405 };
406
407 /*
408 * Module linkage information for the kernel.
409 */
410
411 static struct modldrv modldrv = {
412 &mod_driverops, /* Type of module. This one is a pseudo driver */
413 "Remote Shared Memory Driver",
414 &rsm_ops, /* driver ops */
415 };
416
417 static struct modlinkage modlinkage = {
418 MODREV_1,
419 { (void *)&modldrv, NULL }
420 };
421
422 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
423 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
424 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
425
426 static kphysm_setup_vector_t rsm_dr_callback_vec = {
427 KPHYSM_SETUP_VECTOR_VERSION,
428 rsm_dr_callback_post_add,
429 rsm_dr_callback_pre_del,
430 rsm_dr_callback_post_del
431 };
432
433 /* This flag can be changed to 0 to help with PIT testing */
434 int rsmka_modunloadok = 1;
435 int no_reply_cnt = 0;
436
437 uint64_t rsm_ctrlmsg_errcnt = 0;
438 uint64_t rsm_ipcsend_errcnt = 0;
439
440 #define MAX_NODES 64
441
442 static struct rsm_driver_data rsm_drv_data;
443 static struct rsmresource_table rsm_resource;
444
445 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
446 static void rsmresource_destroy(void);
447 static int rsmresource_alloc(minor_t *);
448 static rsmresource_t *rsmresource_free(minor_t rnum);
449 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
450 static int rsm_unpublish(rsmseg_t *seg, int mode);
451 static int rsm_unbind(rsmseg_t *seg);
452 static uint_t rsmhash(rsm_memseg_id_t key);
453 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
454 static void rsmhash_free(rsmhash_table_t *rhash, int size);
455 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
456 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
457 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
458 void *cookie);
459 int rsm_disconnect(rsmseg_t *seg);
460 void rsmseg_unload(rsmseg_t *);
461 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
462
463 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
464 rsm_intr_q_op_t opcode, rsm_addr_t src,
465 void *data, size_t size, rsm_intr_hand_arg_t arg);
466
467 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
468
469 rsm_node_id_t my_nodeid;
470
471 /* cookie, va, offsets and length for the barrier */
472 static rsm_gnum_t *bar_va;
473 static ddi_umem_cookie_t bar_cookie;
474 static off_t barrier_offset;
475 static size_t barrier_size;
476 static int max_segs;
477
478 /* cookie for the trash memory */
479 static ddi_umem_cookie_t remap_cookie;
480
481 static rsm_memseg_id_t rsm_nextavail_segmentid;
482
483 extern taskq_t *work_taskq;
484 extern char *taskq_name;
485
486 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */
487
488 static rsmhash_table_t rsm_export_segs; /* list of exported segs */
489 rsmhash_table_t rsm_import_segs; /* list of imported segs */
490 static rsmhash_table_t rsm_event_queues; /* list of event queues */
491
492 static rsm_ipc_t rsm_ipc; /* ipc info */
493
494 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
495 static list_head_t rsm_suspend_list;
496
497 /* list of descriptors for remote importers */
498 static importers_table_t importer_list;
499
500 kmutex_t rsm_suspend_cvlock;
501 kcondvar_t rsm_suspend_cv;
502
503 static kmutex_t rsm_lock;
504
505 adapter_t loopback_adapter;
506 rsm_controller_attr_t loopback_attr;
507
508 int rsmipc_send_controlmsg(path_t *path, int msgtype);
509
510 void rsmka_init_loopback();
511
512 int rsmka_null_seg_create(
513 rsm_controller_handle_t,
514 rsm_memseg_export_handle_t *,
515 size_t,
516 uint_t,
517 rsm_memory_local_t *,
518 rsm_resource_callback_t,
519 rsm_resource_callback_arg_t);
520
521 int rsmka_null_seg_destroy(
522 rsm_memseg_export_handle_t);
523
524 int rsmka_null_bind(
525 rsm_memseg_export_handle_t,
526 off_t,
527 rsm_memory_local_t *,
528 rsm_resource_callback_t,
529 rsm_resource_callback_arg_t);
530
531 int rsmka_null_unbind(
532 rsm_memseg_export_handle_t,
533 off_t,
534 size_t);
535
536 int rsmka_null_rebind(
537 rsm_memseg_export_handle_t,
538 off_t,
539 rsm_memory_local_t *,
540 rsm_resource_callback_t,
541 rsm_resource_callback_arg_t);
542
543 int rsmka_null_publish(
544 rsm_memseg_export_handle_t,
545 rsm_access_entry_t [],
546 uint_t,
547 rsm_memseg_id_t,
548 rsm_resource_callback_t,
549 rsm_resource_callback_arg_t);
550
551
552 int rsmka_null_republish(
553 rsm_memseg_export_handle_t,
554 rsm_access_entry_t [],
555 uint_t,
556 rsm_resource_callback_t,
557 rsm_resource_callback_arg_t);
558
559 int rsmka_null_unpublish(
560 rsm_memseg_export_handle_t);
561
562 rsm_ops_t null_rsmpi_ops;
563
564 /*
565 * data and locks to keep track of total amount of exported memory
566 */
567 static pgcnt_t rsm_pgcnt;
568 static pgcnt_t rsm_pgcnt_max; /* max allowed */
569 static kmutex_t rsm_pgcnt_lock;
570
571 static int rsm_enable_dr;
572
573 static char loopback_str[] = "loopback";
574
575 int rsm_hash_size;
576
577 /*
578 * The locking model is as follows:
579 *
580 * Local operations:
581 * find resource - grab reader lock on resouce list
582 * insert rc - grab writer lock
583 * delete rc - grab writer lock and resource mutex
584 * read/write - no lock
585 *
586 * Remote invocations:
587 * find resource - grab read lock and resource mutex
588 *
589 * State:
590 * resource state - grab resource mutex
591 */
592
593 int
594 _init(void)
595 {
596 int e;
597
598 e = mod_install(&modlinkage);
599 if (e != 0) {
600 return (e);
601 }
602
603 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
604
605 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
606
607
608 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
609
610 rsm_hash_size = RSM_HASHSZ;
611
612 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
613
614 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
615
616 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
617
618 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
619 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
620
621 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
622 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
623
624 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
625 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
626
627 rsm_ipc.count = RSMIPC_SZ;
628 rsm_ipc.wanted = 0;
629 rsm_ipc.sequence = 0;
630
631 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
632
633 for (e = 0; e < RSMIPC_SZ; e++) {
634 rsmipc_slot_t *slot = &rsm_ipc.slots[e];
635
636 RSMIPC_SET(slot, RSMIPC_FREE);
637 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
638 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
639 }
640
641 /*
642 * Initialize the suspend message list
643 */
644 rsm_suspend_list.list_head = NULL;
645 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
646
647 /*
648 * It is assumed here that configuration data is available
649 * during system boot since _init may be called at that time.
650 */
651
652 rsmka_pathmanager_init();
653
654 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
655 "rsm: _init done\n"));
656
657 return (DDI_SUCCESS);
658
659 }
660
661 int
662 _info(struct modinfo *modinfop)
663 {
664
665 return (mod_info(&modlinkage, modinfop));
666 }
667
668 int
669 _fini(void)
670 {
671 int e;
672
673 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
674 "rsm: _fini enter\n"));
675
676 /*
677 * The rsmka_modunloadok flag is simply used to help with
678 * the PIT testing. Make this flag 0 to disallow modunload.
679 */
680 if (rsmka_modunloadok == 0)
681 return (EBUSY);
682
683 /* rsm_detach will be called as a result of mod_remove */
684 e = mod_remove(&modlinkage);
685 if (e) {
686 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
687 "Unable to fini RSM %x\n", e));
688 return (e);
689 }
690
691 rsmka_pathmanager_cleanup();
692
693 rw_destroy(&rsm_resource.rsmrc_lock);
694
695 rw_destroy(&rsm_export_segs.rsmhash_rw);
696 rw_destroy(&rsm_import_segs.rsmhash_rw);
697 rw_destroy(&rsm_event_queues.rsmhash_rw);
698
699 mutex_destroy(&importer_list.lock);
700
701 mutex_destroy(&rsm_ipc.lock);
702 cv_destroy(&rsm_ipc.cv);
703
704 (void) mutex_destroy(&rsm_suspend_list.list_lock);
705
706 (void) mutex_destroy(&rsm_pgcnt_lock);
707
708 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
709
710 return (DDI_SUCCESS);
711
712 }
713
714 /*ARGSUSED1*/
715 static int
716 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
717 {
718 minor_t rnum;
719 int percent;
720 int ret;
721 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
722
723 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
724
725 switch (cmd) {
726 case DDI_ATTACH:
727 break;
728 case DDI_RESUME:
729 default:
730 DBG_PRINTF((category, RSM_ERR,
731 "rsm:rsm_attach - cmd not supported\n"));
732 return (DDI_FAILURE);
733 }
734
735 if (rsm_dip != NULL) {
736 DBG_PRINTF((category, RSM_ERR,
737 "rsm:rsm_attach - supports only "
738 "one instance\n"));
739 return (DDI_FAILURE);
740 }
741
742 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
743 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
744 "enable-dynamic-reconfiguration", 1);
745
746 mutex_enter(&rsm_drv_data.drv_lock);
747 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
748 mutex_exit(&rsm_drv_data.drv_lock);
749
750 if (rsm_enable_dr) {
751 #ifdef RSM_DRTEST
752 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
753 (void *)NULL);
754 #else
755 ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
756 (void *)NULL);
757 #endif
758 if (ret != 0) {
759 mutex_exit(&rsm_drv_data.drv_lock);
760 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
761 "reconfiguration setup failed\n");
762 return (DDI_FAILURE);
763 }
764 }
765
766 mutex_enter(&rsm_drv_data.drv_lock);
767 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
768 rsm_drv_data.drv_state = RSM_DRV_OK;
769 cv_broadcast(&rsm_drv_data.drv_cv);
770 mutex_exit(&rsm_drv_data.drv_lock);
771
772 /*
773 * page_list_read_lock();
774 * xx_setup();
775 * page_list_read_unlock();
776 */
777
778 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
779 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
780 "segment-hashtable-size", RSM_HASHSZ);
781 if (rsm_hash_size == 0) {
782 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
783 "rsm: segment-hashtable-size in rsm.conf "
784 "must be greater than 0, defaulting to 128\n"));
785 rsm_hash_size = RSM_HASHSZ;
786 }
787
788 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
789 rsm_hash_size));
790
791 rsm_pgcnt = 0;
792
793 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
794 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
795 "max-exported-memory", 0);
796 if (percent < 0) {
797 DBG_PRINTF((category, RSM_ERR,
798 "rsm:rsm_attach not enough memory available to "
799 "export, or max-exported-memory set incorrectly.\n"));
800 return (DDI_FAILURE);
801 }
802 /* 0 indicates no fixed upper limit. maxmem is the max */
803 /* available pageable physical mem */
804 rsm_pgcnt_max = (percent*maxmem)/100;
805
806 if (rsm_pgcnt_max > 0) {
807 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
808 "rsm: Available physical memory = %lu pages, "
809 "Max exportable memory = %lu pages",
810 maxmem, rsm_pgcnt_max));
811 }
812
813 /*
814 * Create minor number
815 */
816 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
817 DBG_PRINTF((category, RSM_ERR,
818 "rsm: rsm_attach - Unable to get "
819 "minor number\n"));
820 return (DDI_FAILURE);
821 }
822
823 ASSERT(rnum == RSM_DRIVER_MINOR);
824
825 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
826 rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
827 DBG_PRINTF((category, RSM_ERR,
828 "rsm: rsm_attach - unable to allocate "
829 "minor #\n"));
830 return (DDI_FAILURE);
831 }
832
833 rsm_dip = devi;
834 /*
835 * Allocate the hashtables
836 */
837 rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
838 rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
839
840 importer_list.bucket = (importing_token_t **)
841 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP);
842
843 /*
844 * Allocate a resource struct
845 */
846 {
847 rsmresource_t *p;
848
849 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
850
851 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
852
853 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
854 }
855
856 /*
857 * Based on the rsm.conf property max-segments, determine the maximum
858 * number of segments that can be exported/imported. This is then used
859 * to determine the size for barrier failure pages.
860 */
861
862 /* First get the max number of segments from the rsm.conf file */
863 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
864 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
865 "max-segments", 0);
866 if (max_segs == 0) {
867 /* Use default number of segments */
868 max_segs = RSM_MAX_NUM_SEG;
869 }
870
871 /*
872 * Based on the max number of segments allowed, determine the barrier
873 * page size. add 1 to max_segs since the barrier page itself uses
874 * a slot
875 */
876 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
877 PAGESIZE);
878
879 /*
880 * allocation of the barrier failure page
881 */
882 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
883 DDI_UMEM_SLEEP, &bar_cookie);
884
885 /*
886 * Set the barrier_offset
887 */
888 barrier_offset = 0;
889
890 /*
891 * Allocate a trash memory and get a cookie for it. This will be used
892 * when remapping segments during force disconnects. Allocate the
893 * trash memory with a large size which is page aligned.
894 */
895 (void) ddi_umem_alloc((size_t)TRASHSIZE,
896 DDI_UMEM_TRASH, &remap_cookie);
897
898 /* initialize user segment id allocation variable */
899 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
900
901 /*
902 * initialize the null_rsmpi_ops vector and the loopback adapter
903 */
904 rsmka_init_loopback();
905
906
907 ddi_report_dev(devi);
908
909 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
910
911 return (DDI_SUCCESS);
912 }
913
914 /*
915 * The call to mod_remove in the _fine routine will cause the system
916 * to call rsm_detach
917 */
918 /*ARGSUSED*/
919 static int
920 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
921 {
922 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
923
924 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
925
926 switch (cmd) {
927 case DDI_DETACH:
928 break;
929 default:
930 DBG_PRINTF((category, RSM_ERR,
931 "rsm:rsm_detach - cmd %x not supported\n",
932 cmd));
933 return (DDI_FAILURE);
934 }
935
936 mutex_enter(&rsm_drv_data.drv_lock);
937 while (rsm_drv_data.drv_state != RSM_DRV_OK)
938 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
939 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
940 mutex_exit(&rsm_drv_data.drv_lock);
941
942 /*
943 * Unregister the DR callback functions
944 */
945 if (rsm_enable_dr) {
946 #ifdef RSM_DRTEST
947 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
948 (void *)NULL);
949 #else
950 kphysm_setup_func_unregister(&rsm_dr_callback_vec,
951 (void *)NULL);
952 #endif
953 }
954
955 mutex_enter(&rsm_drv_data.drv_lock);
956 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
957 rsm_drv_data.drv_state = RSM_DRV_NEW;
958 mutex_exit(&rsm_drv_data.drv_lock);
959
960 ASSERT(rsm_suspend_list.list_head == NULL);
961
962 /*
963 * Release all resources, seglist, controller, ...
964 */
965
966 /* remove intersend queues */
967 /* remove registered services */
968
969
970 ddi_remove_minor_node(dip, DRIVER_NAME);
971 rsm_dip = NULL;
972
973 /*
974 * Free minor zero resource
975 */
976 {
977 rsmresource_t *p;
978
979 p = rsmresource_free(RSM_DRIVER_MINOR);
980 if (p) {
981 mutex_destroy(&p->rsmrc_lock);
982 kmem_free((void *)p, sizeof (*p));
983 }
984 }
985
986 /*
987 * Free resource table
988 */
989
990 rsmresource_destroy();
991
992 /*
993 * Free the hash tables
994 */
995 rsmhash_free(&rsm_export_segs, rsm_hash_size);
996 rsmhash_free(&rsm_import_segs, rsm_hash_size);
997
998 kmem_free((void *)importer_list.bucket,
999 rsm_hash_size * sizeof (importing_token_t *));
1000 importer_list.bucket = NULL;
1001
1002
1003 /* free barrier page */
1004 if (bar_cookie != NULL) {
1005 ddi_umem_free(bar_cookie);
1006 }
1007 bar_va = NULL;
1008 bar_cookie = NULL;
1009
1010 /*
1011 * Free the memory allocated for the trash
1012 */
1013 if (remap_cookie != NULL) {
1014 ddi_umem_free(remap_cookie);
1015 }
1016 remap_cookie = NULL;
1017
1018 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1019
1020 return (DDI_SUCCESS);
1021 }
1022
1023 /*ARGSUSED*/
1024 static int
1025 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1026 {
1027 register int error;
1028 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1029
1030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1031
1032 switch (infocmd) {
1033 case DDI_INFO_DEVT2DEVINFO:
1034 if (rsm_dip == NULL)
1035 error = DDI_FAILURE;
1036 else {
1037 *result = (void *)rsm_dip;
1038 error = DDI_SUCCESS;
1039 }
1040 break;
1041 case DDI_INFO_DEVT2INSTANCE:
1042 *result = (void *)0;
1043 error = DDI_SUCCESS;
1044 break;
1045 default:
1046 error = DDI_FAILURE;
1047 }
1048
1049 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1050 return (error);
1051 }
1052
1053 adapter_t *
1054 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1055 {
1056 adapter_t *adapter;
1057 char adapter_devname[MAXNAMELEN];
1058 int instance;
1059 DBG_DEFINE(category,
1060 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1061
1062 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1063
1064 instance = msg->cnum;
1065
1066 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1067 return (NULL);
1068 }
1069
1070 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1071 return (NULL);
1072
1073 if (strcmp(adapter_devname, "loopback") == 0)
1074 return (&loopback_adapter);
1075
1076 adapter = rsmka_lookup_adapter(adapter_devname, instance);
1077
1078 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1079
1080 return (adapter);
1081 }
1082
1083
1084 /*
1085 * *********************** Resource Number Management ********************
1086 * All resources are stored in a simple hash table. The table is an array
1087 * of pointers to resource blks. Each blk contains:
1088 * base - base number of this blk
1089 * used - number of used slots in this blk.
1090 * blks - array of pointers to resource items.
1091 * An entry in a resource blk is empty if it's NULL.
1092 *
1093 * We start with no resource array. Each time we run out of slots, we
1094 * reallocate a new larger array and copy the pointer to the new array and
1095 * a new resource blk is allocated and added to the hash table.
1096 *
1097 * The resource control block contains:
1098 * root - array of pointer of resource blks
1099 * sz - current size of array.
1100 * len - last valid entry in array.
1101 *
1102 * A search operation based on a resource number is as follows:
1103 * index = rnum / RESOURCE_BLKSZ;
1104 * ASSERT(index < resource_block.len);
1105 * ASSERT(index < resource_block.sz);
1106 * offset = rnum % RESOURCE_BLKSZ;
1107 * ASSERT(offset >= resource_block.root[index]->base);
1108 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1109 * return resource_block.root[index]->blks[offset];
1110 *
1111 * A resource blk is freed with its used count reachs zero.
1112 */
1113 static int
1114 rsmresource_alloc(minor_t *rnum)
1115 {
1116
1117 /* search for available resource slot */
1118 int i, j, empty = -1;
1119 rsmresource_blk_t *blk;
1120
1121 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1122 "rsmresource_alloc enter\n"));
1123
1124 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1125
1126 /* Try to find an empty slot */
1127 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1128 blk = rsm_resource.rsmrc_root[i];
1129 if (blk != NULL && blk->rsmrcblk_avail > 0) {
1130 /* found an empty slot in this blk */
1131 for (j = 0; j < RSMRC_BLKSZ; j++) {
1132 if (blk->rsmrcblk_blks[j] == NULL) {
1133 *rnum = (minor_t)
1134 (j + (i * RSMRC_BLKSZ));
1135 /*
1136 * obey gen page limits
1137 */
1138 if (*rnum >= max_segs + 1) {
1139 if (empty < 0) {
1140 rw_exit(&rsm_resource.
1141 rsmrc_lock);
1142 DBG_PRINTF((
1143 RSM_KERNEL_ALL,
1144 RSM_ERR,
1145 "rsmresource"
1146 "_alloc failed:"
1147 "not enough res"
1148 "%d\n", *rnum));
1149 return (RSMERR_INSUFFICIENT_RESOURCES);
1150 } else {
1151 /* use empty slot */
1152 break;
1153 }
1154
1155 }
1156
1157 blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1158 blk->rsmrcblk_avail--;
1159 rw_exit(&rsm_resource.rsmrc_lock);
1160 DBG_PRINTF((RSM_KERNEL_ALL,
1161 RSM_DEBUG_VERBOSE,
1162 "rsmresource_alloc done\n"));
1163 return (RSM_SUCCESS);
1164 }
1165 }
1166 } else if (blk == NULL && empty < 0) {
1167 /* remember first empty slot */
1168 empty = i;
1169 }
1170 }
1171
1172 /* Couldn't find anything, allocate a new blk */
1173 /*
1174 * Do we need to reallocate the root array
1175 */
1176 if (empty < 0) {
1177 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1178 /*
1179 * Allocate new array and copy current stuff into it
1180 */
1181 rsmresource_blk_t **p;
1182 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1183 RSMRC_BLKSZ;
1184 /*
1185 * Don't allocate more that max valid rnum
1186 */
1187 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1188 max_segs + 1) {
1189 rw_exit(&rsm_resource.rsmrc_lock);
1190 return (RSMERR_INSUFFICIENT_RESOURCES);
1191 }
1192
1193 p = (rsmresource_blk_t **)kmem_zalloc(
1194 newsz * sizeof (*p),
1195 KM_SLEEP);
1196
1197 if (rsm_resource.rsmrc_root) {
1198 uint_t oldsz;
1199
1200 oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1201 (int)sizeof (*p));
1202
1203 /*
1204 * Copy old data into new space and
1205 * free old stuff
1206 */
1207 bcopy(rsm_resource.rsmrc_root, p, oldsz);
1208 kmem_free(rsm_resource.rsmrc_root, oldsz);
1209 }
1210
1211 rsm_resource.rsmrc_root = p;
1212 rsm_resource.rsmrc_sz = (int)newsz;
1213 }
1214
1215 empty = rsm_resource.rsmrc_len;
1216 rsm_resource.rsmrc_len++;
1217 }
1218
1219 /*
1220 * Allocate a new blk
1221 */
1222 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1223 ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1224 rsm_resource.rsmrc_root[empty] = blk;
1225 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1226
1227 /*
1228 * Allocate slot
1229 */
1230
1231 *rnum = (minor_t)(empty * RSMRC_BLKSZ);
1232
1233 /*
1234 * watch out not to exceed bounds of barrier page
1235 */
1236 if (*rnum >= max_segs + 1) {
1237 rw_exit(&rsm_resource.rsmrc_lock);
1238 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1239 "rsmresource_alloc failed %d\n", *rnum));
1240
1241 return (RSMERR_INSUFFICIENT_RESOURCES);
1242 }
1243 blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1244
1245
1246 rw_exit(&rsm_resource.rsmrc_lock);
1247
1248 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1249 "rsmresource_alloc done\n"));
1250
1251 return (RSM_SUCCESS);
1252 }
1253
1254 static rsmresource_t *
1255 rsmresource_free(minor_t rnum)
1256 {
1257
1258 /* search for available resource slot */
1259 int i, j;
1260 rsmresource_blk_t *blk;
1261 rsmresource_t *p;
1262
1263 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1264 "rsmresource_free enter\n"));
1265
1266 i = (int)(rnum / RSMRC_BLKSZ);
1267 j = (int)(rnum % RSMRC_BLKSZ);
1268
1269 if (i >= rsm_resource.rsmrc_len) {
1270 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1271 "rsmresource_free done\n"));
1272 return (NULL);
1273 }
1274
1275 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1276
1277 ASSERT(rsm_resource.rsmrc_root);
1278 ASSERT(i < rsm_resource.rsmrc_len);
1279 ASSERT(i < rsm_resource.rsmrc_sz);
1280 blk = rsm_resource.rsmrc_root[i];
1281 if (blk == NULL) {
1282 rw_exit(&rsm_resource.rsmrc_lock);
1283 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1284 "rsmresource_free done\n"));
1285 return (NULL);
1286 }
1287
1288 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1289
1290 p = blk->rsmrcblk_blks[j];
1291 if (p == RSMRC_RESERVED) {
1292 p = NULL;
1293 }
1294
1295 blk->rsmrcblk_blks[j] = NULL;
1296 blk->rsmrcblk_avail++;
1297 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1298 /* free this blk */
1299 kmem_free(blk, sizeof (*blk));
1300 rsm_resource.rsmrc_root[i] = NULL;
1301 }
1302
1303 rw_exit(&rsm_resource.rsmrc_lock);
1304
1305 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1306 "rsmresource_free done\n"));
1307
1308 return (p);
1309 }
1310
1311 static rsmresource_t *
1312 rsmresource_lookup(minor_t rnum, int lock)
1313 {
1314 int i, j;
1315 rsmresource_blk_t *blk;
1316 rsmresource_t *p;
1317
1318 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1319 "rsmresource_lookup enter\n"));
1320
1321 /* Find resource and lock it in READER mode */
1322 /* search for available resource slot */
1323
1324 i = (int)(rnum / RSMRC_BLKSZ);
1325 j = (int)(rnum % RSMRC_BLKSZ);
1326
1327 if (i >= rsm_resource.rsmrc_len) {
1328 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1329 "rsmresource_lookup done\n"));
1330 return (NULL);
1331 }
1332
1333 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1334
1335 blk = rsm_resource.rsmrc_root[i];
1336 if (blk != NULL) {
1337 ASSERT(i < rsm_resource.rsmrc_len);
1338 ASSERT(i < rsm_resource.rsmrc_sz);
1339
1340 p = blk->rsmrcblk_blks[j];
1341 if (lock == RSM_LOCK) {
1342 if (p != RSMRC_RESERVED) {
1343 mutex_enter(&p->rsmrc_lock);
1344 } else {
1345 p = NULL;
1346 }
1347 }
1348 } else {
1349 p = NULL;
1350 }
1351 rw_exit(&rsm_resource.rsmrc_lock);
1352
1353 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1354 "rsmresource_lookup done\n"));
1355
1356 return (p);
1357 }
1358
1359 static void
1360 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1361 {
1362 /* Find resource and lock it in READER mode */
1363 /* Caller can upgrade if need be */
1364 /* search for available resource slot */
1365 int i, j;
1366 rsmresource_blk_t *blk;
1367
1368 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1369 "rsmresource_insert enter\n"));
1370
1371 i = (int)(rnum / RSMRC_BLKSZ);
1372 j = (int)(rnum % RSMRC_BLKSZ);
1373
1374 p->rsmrc_type = type;
1375 p->rsmrc_num = rnum;
1376
1377 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1378
1379 ASSERT(rsm_resource.rsmrc_root);
1380 ASSERT(i < rsm_resource.rsmrc_len);
1381 ASSERT(i < rsm_resource.rsmrc_sz);
1382
1383 blk = rsm_resource.rsmrc_root[i];
1384 ASSERT(blk);
1385
1386 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1387
1388 blk->rsmrcblk_blks[j] = p;
1389
1390 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1391 "rsmresource_insert done\n"));
1392
1393 rw_exit(&rsm_resource.rsmrc_lock);
1394 }
1395
1396 static void
1397 rsmresource_destroy()
1398 {
1399 int i, j;
1400
1401 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1402 "rsmresource_destroy enter\n"));
1403
1404 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1405
1406 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1407 rsmresource_blk_t *blk;
1408
1409 blk = rsm_resource.rsmrc_root[i];
1410 if (blk == NULL) {
1411 continue;
1412 }
1413 for (j = 0; j < RSMRC_BLKSZ; j++) {
1414 if (blk->rsmrcblk_blks[j] != NULL) {
1415 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1416 "Not null slot %d, %lx\n", j,
1417 (size_t)blk->rsmrcblk_blks[j]));
1418 }
1419 }
1420 kmem_free(blk, sizeof (*blk));
1421 rsm_resource.rsmrc_root[i] = NULL;
1422 }
1423 if (rsm_resource.rsmrc_root) {
1424 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1425 kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1426 rsm_resource.rsmrc_root = NULL;
1427 rsm_resource.rsmrc_len = 0;
1428 rsm_resource.rsmrc_sz = 0;
1429 }
1430
1431 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1432 "rsmresource_destroy done\n"));
1433
1434 rw_exit(&rsm_resource.rsmrc_lock);
1435 }
1436
1437
1438 /* ******************** Generic Key Hash Table Management ********* */
1439 static rsmresource_t *
1440 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1441 rsm_resource_state_t state)
1442 {
1443 rsmresource_t *p;
1444 uint_t hashval;
1445 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1446
1447 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1448
1449 hashval = rsmhash(key);
1450
1451 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1452 key, hashval));
1453
1454 rw_enter(&rhash->rsmhash_rw, RW_READER);
1455
1456 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1457
1458 for (; p; p = p->rsmrc_next) {
1459 if (p->rsmrc_key == key) {
1460 /* acquire resource lock */
1461 RSMRC_LOCK(p);
1462 break;
1463 }
1464 }
1465
1466 rw_exit(&rhash->rsmhash_rw);
1467
1468 if (p != NULL && p->rsmrc_state != state) {
1469 /* state changed, release lock and return null */
1470 RSMRC_UNLOCK(p);
1471 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1472 "rsmhash_lookup done: state changed\n"));
1473 return (NULL);
1474 }
1475
1476 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1477
1478 return (p);
1479 }
1480
1481 static void
1482 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1483 {
1484 rsmresource_t *p, **back;
1485 uint_t hashval;
1486 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1487
1488 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1489
1490 hashval = rsmhash(rcelm->rsmrc_key);
1491
1492 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1493 rcelm->rsmrc_key, hashval));
1494
1495 /*
1496 * It's ok not to find the segment.
1497 */
1498 rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1499
1500 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1501
1502 for (; (p = *back) != NULL; back = &p->rsmrc_next) {
1503 if (p == rcelm) {
1504 *back = rcelm->rsmrc_next;
1505 break;
1506 }
1507 }
1508
1509 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1510
1511 rw_exit(&rhash->rsmhash_rw);
1512 }
1513
1514 static int
1515 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1516 int dup_check, rsm_resource_state_t state)
1517 {
1518 rsmresource_t *p = NULL, **bktp;
1519 uint_t hashval;
1520 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1521
1522 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1523
1524 /* lock table */
1525 rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1526
1527 /*
1528 * If the current resource state is other than the state passed in
1529 * then the resource is (probably) already on the list. eg. for an
1530 * import segment if the state is not RSM_STATE_NEW then it's on the
1531 * list already.
1532 */
1533 RSMRC_LOCK(new);
1534 if (new->rsmrc_state != state) {
1535 RSMRC_UNLOCK(new);
1536 rw_exit(&rhash->rsmhash_rw);
1537 return (RSMERR_BAD_SEG_HNDL);
1538 }
1539
1540 hashval = rsmhash(key);
1541 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1542
1543 if (dup_check) {
1544 /*
1545 * Used for checking export segments; don't want to have
1546 * the same key used for multiple segments.
1547 */
1548
1549 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1550
1551 for (; p; p = p->rsmrc_next) {
1552 if (p->rsmrc_key == key) {
1553 RSMRC_UNLOCK(new);
1554 break;
1555 }
1556 }
1557 }
1558
1559 if (p == NULL) {
1560 /* Key doesn't exist, add it */
1561
1562 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1563
1564 new->rsmrc_key = key;
1565 new->rsmrc_next = *bktp;
1566 *bktp = new;
1567 }
1568
1569 rw_exit(&rhash->rsmhash_rw);
1570
1571 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1572
1573 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1574 }
1575
1576 /*
1577 * XOR each byte of the key.
1578 */
1579 static uint_t
1580 rsmhash(rsm_memseg_id_t key)
1581 {
1582 uint_t hash = key;
1583
1584 hash ^= (key >> 8);
1585 hash ^= (key >> 16);
1586 hash ^= (key >> 24);
1587
1588 return (hash % rsm_hash_size);
1589
1590 }
1591
1592 /*
1593 * generic function to get a specific bucket
1594 */
1595 static void *
1596 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1597 {
1598
1599 if (rhash->bucket == NULL)
1600 return (NULL);
1601 else
1602 return ((void *)rhash->bucket[hashval]);
1603 }
1604
1605 /*
1606 * generic function to get a specific bucket's address
1607 */
1608 static void **
1609 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1610 {
1611 if (rhash->bucket == NULL)
1612 return (NULL);
1613 else
1614 return ((void **)&(rhash->bucket[hashval]));
1615 }
1616
1617 /*
1618 * generic function to alloc a hash table
1619 */
1620 static void
1621 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1622 {
1623 rhash->bucket = (rsmresource_t **)
1624 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1625 }
1626
1627 /*
1628 * generic function to free a hash table
1629 */
1630 static void
1631 rsmhash_free(rsmhash_table_t *rhash, int size)
1632 {
1633
1634 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1635 rhash->bucket = NULL;
1636
1637 }
1638 /* *********************** Exported Segment Key Management ************ */
1639
1640 #define rsmexport_add(new, key) \
1641 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1642 RSM_STATE_BIND)
1643
1644 #define rsmexport_rm(arg) \
1645 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1646
1647 #define rsmexport_lookup(key) \
1648 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1649
1650 /* ************************** Import Segment List Management ********** */
1651
1652 /*
1653 * Add segment to import list. This will be useful for paging and loopback
1654 * segment unloading.
1655 */
1656 #define rsmimport_add(arg, key) \
1657 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1658 RSM_STATE_NEW)
1659
1660 #define rsmimport_rm(arg) \
1661 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1662
1663 /*
1664 * #define rsmimport_lookup(key) \
1665 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1666 */
1667
1668 /*
1669 * increase the ref count and make the import segment point to the
1670 * shared data structure. Return a pointer to the share data struct
1671 * and the shared data struct is locked upon return
1672 */
1673 static rsm_import_share_t *
1674 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1675 rsmseg_t *segp)
1676 {
1677 uint_t hash;
1678 rsmresource_t *p;
1679 rsm_import_share_t *shdatap;
1680 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1681
1682 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1683
1684 hash = rsmhash(key);
1685 /* lock table */
1686 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1687 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1688 key, hash));
1689
1690 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1691
1692 for (; p; p = p->rsmrc_next) {
1693 /*
1694 * Look for an entry that is importing the same exporter
1695 * with the share data structure allocated.
1696 */
1697 if ((p->rsmrc_key == key) &&
1698 (p->rsmrc_node == node) &&
1699 (p->rsmrc_adapter == adapter) &&
1700 (((rsmseg_t *)p)->s_share != NULL)) {
1701 shdatap = ((rsmseg_t *)p)->s_share;
1702 break;
1703 }
1704 }
1705
1706 if (p == NULL) {
1707 /* we are the first importer, create the shared data struct */
1708 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1709 shdatap->rsmsi_state = RSMSI_STATE_NEW;
1710 shdatap->rsmsi_segid = key;
1711 shdatap->rsmsi_node = node;
1712 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1713 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1714 }
1715
1716 rsmseglock_acquire(segp);
1717
1718 /* we grab the shared lock before returning from this function */
1719 mutex_enter(&shdatap->rsmsi_lock);
1720
1721 shdatap->rsmsi_refcnt++;
1722 segp->s_share = shdatap;
1723
1724 rsmseglock_release(segp);
1725
1726 rw_exit(&rsm_import_segs.rsmhash_rw);
1727
1728 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1729
1730 return (shdatap);
1731 }
1732
1733 /*
1734 * the shared data structure should be locked before calling
1735 * rsmsharecv_signal().
1736 * Change the state and signal any waiting segments.
1737 */
1738 void
1739 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1740 {
1741 ASSERT(rsmsharelock_held(seg));
1742
1743 if (seg->s_share->rsmsi_state == oldstate) {
1744 seg->s_share->rsmsi_state = newstate;
1745 cv_broadcast(&seg->s_share->rsmsi_cv);
1746 }
1747 }
1748
1749 /*
1750 * Add to the hash table
1751 */
1752 static void
1753 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1754 void *cookie)
1755 {
1756
1757 importing_token_t *head;
1758 importing_token_t *new_token;
1759 int index;
1760
1761 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1762
1763 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1764
1765 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1766 new_token->importing_node = node;
1767 new_token->key = key;
1768 new_token->import_segment_cookie = cookie;
1769 new_token->importing_adapter_hwaddr = hwaddr;
1770
1771 index = rsmhash(key);
1772
1773 mutex_enter(&importer_list.lock);
1774
1775 head = importer_list.bucket[index];
1776 importer_list.bucket[index] = new_token;
1777 new_token->next = head;
1778 mutex_exit(&importer_list.lock);
1779
1780 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1781 }
1782
1783 static void
1784 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie)
1785 {
1786
1787 importing_token_t *prev, *token = NULL;
1788 int index;
1789 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1790
1791 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1792
1793 index = rsmhash(key);
1794
1795 mutex_enter(&importer_list.lock);
1796
1797 token = importer_list.bucket[index];
1798
1799 prev = token;
1800 while (token != NULL) {
1801 if (token->importing_node == node &&
1802 token->import_segment_cookie == cookie) {
1803 if (prev == token)
1804 importer_list.bucket[index] = token->next;
1805 else
1806 prev->next = token->next;
1807 kmem_free((void *)token, sizeof (*token));
1808 break;
1809 } else {
1810 prev = token;
1811 token = token->next;
1812 }
1813 }
1814
1815 mutex_exit(&importer_list.lock);
1816
1817 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1818
1819
1820 }
1821
1822 /* **************************Segment Structure Management ************* */
1823
1824 /*
1825 * Free segment structure
1826 */
1827 static void
1828 rsmseg_free(rsmseg_t *seg)
1829 {
1830
1831 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1832
1833 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1834
1835 /* need to take seglock here to avoid race with rsmmap_unmap() */
1836 rsmseglock_acquire(seg);
1837 if (seg->s_ckl != NULL) {
1838 /* Segment is still busy */
1839 seg->s_state = RSM_STATE_END;
1840 rsmseglock_release(seg);
1841 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1842 "rsmseg_free done\n"));
1843 return;
1844 }
1845
1846 rsmseglock_release(seg);
1847
1848 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1849
1850 /*
1851 * If it's an importer decrement the refcount
1852 * and if its down to zero free the shared data structure.
1853 * This is where failures during rsm_connect() are unrefcounted
1854 */
1855 if (seg->s_share != NULL) {
1856
1857 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1858
1859 rsmsharelock_acquire(seg);
1860
1861 ASSERT(seg->s_share->rsmsi_refcnt > 0);
1862
1863 seg->s_share->rsmsi_refcnt--;
1864
1865 if (seg->s_share->rsmsi_refcnt == 0) {
1866 rsmsharelock_release(seg);
1867 mutex_destroy(&seg->s_share->rsmsi_lock);
1868 cv_destroy(&seg->s_share->rsmsi_cv);
1869 kmem_free((void *)(seg->s_share),
1870 sizeof (rsm_import_share_t));
1871 } else {
1872 rsmsharelock_release(seg);
1873 }
1874 /*
1875 * The following needs to be done after any
1876 * rsmsharelock calls which use seg->s_share.
1877 */
1878 seg->s_share = NULL;
1879 }
1880
1881 cv_destroy(&seg->s_cv);
1882 mutex_destroy(&seg->s_lock);
1883 rsmacl_free(seg->s_acl, seg->s_acl_len);
1884 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1885 if (seg->s_adapter)
1886 rsmka_release_adapter(seg->s_adapter);
1887
1888 kmem_free((void *)seg, sizeof (*seg));
1889
1890 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1891
1892 }
1893
1894
1895 static rsmseg_t *
1896 rsmseg_alloc(minor_t num, struct cred *cred)
1897 {
1898 rsmseg_t *new;
1899 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1900
1901 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1902 /*
1903 * allocate memory for new segment. This should be a segkmem cache.
1904 */
1905 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1906
1907 new->s_state = RSM_STATE_NEW;
1908 new->s_minor = num;
1909 new->s_acl_len = 0;
1910 new->s_cookie = NULL;
1911 new->s_adapter = NULL;
1912
1913 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1914 /* we don't have a key yet, will set at export/connect */
1915 new->s_uid = crgetuid(cred);
1916 new->s_gid = crgetgid(cred);
1917
1918 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1919 cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1920
1921 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1922
1923 return (new);
1924 }
1925
1926 /* ******************************** Driver Open/Close/Poll *************** */
1927
1928 /*ARGSUSED1*/
1929 static int
1930 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1931 {
1932 minor_t rnum;
1933 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1934
1935 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1936 /*
1937 * Char only
1938 */
1939 if (otyp != OTYP_CHR) {
1940 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1941 return (EINVAL);
1942 }
1943
1944 /*
1945 * Only zero can be opened, clones are used for resources.
1946 */
1947 if (getminor(*devp) != RSM_DRIVER_MINOR) {
1948 DBG_PRINTF((category, RSM_ERR,
1949 "rsm_open: bad minor %d\n", getminor(*devp)));
1950 return (ENODEV);
1951 }
1952
1953 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1954 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1955 return (EPERM);
1956 }
1957
1958 if (!(flag & FWRITE)) {
1959 /*
1960 * The library function _rsm_librsm_init calls open for
1961 * /dev/rsm with flag set to O_RDONLY. We want a valid
1962 * file descriptor to be returned for minor device zero.
1963 */
1964
1965 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1966 "rsm_open RDONLY done\n"));
1967 return (DDI_SUCCESS);
1968 }
1969
1970 /*
1971 * - allocate new minor number and segment.
1972 * - add segment to list of all segments.
1973 * - set minordev data to segment
1974 * - update devp argument to new device
1975 * - update s_cred to cred; make sure you do crhold(cred);
1976 */
1977
1978 /* allocate a new resource number */
1979 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1980 /*
1981 * We will bind this minor to a specific resource in first
1982 * ioctl
1983 */
1984 *devp = makedevice(getmajor(*devp), rnum);
1985 } else {
1986 return (EAGAIN);
1987 }
1988
1989 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1990 return (DDI_SUCCESS);
1991 }
1992
1993 static void
1994 rsmseg_close(rsmseg_t *seg, int force_flag)
1995 {
1996 int e = RSM_SUCCESS;
1997
1998 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1999
2000 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2001
2002 rsmseglock_acquire(seg);
2003 if (!force_flag && (seg->s_hdr.rsmrc_type ==
2004 RSM_RESOURCE_EXPORT_SEGMENT)) {
2005 /*
2006 * If we are processing rsm_close wait for force_destroy
2007 * processing to complete since force_destroy processing
2008 * needs to finish first before we can free the segment.
2009 * force_destroy is only for export segments
2010 */
2011 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2012 cv_wait(&seg->s_cv, &seg->s_lock);
2013 }
2014 }
2015 rsmseglock_release(seg);
2016
2017 /* It's ok to read the state without a lock */
2018 switch (seg->s_state) {
2019 case RSM_STATE_EXPORT:
2020 case RSM_STATE_EXPORT_QUIESCING:
2021 case RSM_STATE_EXPORT_QUIESCED:
2022 e = rsm_unpublish(seg, 1);
2023 /* FALLTHRU */
2024 case RSM_STATE_BIND_QUIESCED:
2025 /* FALLTHRU */
2026 case RSM_STATE_BIND:
2027 e = rsm_unbind(seg);
2028 if (e != RSM_SUCCESS && force_flag == 1)
2029 return;
2030 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2031 /* FALLTHRU */
2032 case RSM_STATE_NEW_QUIESCED:
2033 rsmseglock_acquire(seg);
2034 seg->s_state = RSM_STATE_NEW;
2035 cv_broadcast(&seg->s_cv);
2036 rsmseglock_release(seg);
2037 break;
2038 case RSM_STATE_NEW:
2039 break;
2040 case RSM_STATE_ZOMBIE:
2041 /*
2042 * Segments in this state have been removed off the
2043 * exported segments list and have been unpublished
2044 * and unbind. These segments have been removed during
2045 * a callback to the rsm_export_force_destroy, which
2046 * is called for the purpose of unlocking these
2047 * exported memory segments when a process exits but
2048 * leaves the segments locked down since rsm_close is
2049 * is not called for the segments. This can happen
2050 * when a process calls fork or exec and then exits.
2051 * Once the segments are in the ZOMBIE state, all that
2052 * remains is to destroy them when rsm_close is called.
2053 * This is done here. Thus, for such segments the
2054 * the state is changed to new so that later in this
2055 * function rsmseg_free is called.
2056 */
2057 rsmseglock_acquire(seg);
2058 seg->s_state = RSM_STATE_NEW;
2059 rsmseglock_release(seg);
2060 break;
2061 case RSM_STATE_MAP_QUIESCE:
2062 case RSM_STATE_ACTIVE:
2063 /* Disconnect will handle the unmap */
2064 case RSM_STATE_CONN_QUIESCE:
2065 case RSM_STATE_CONNECT:
2066 case RSM_STATE_DISCONNECT:
2067 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2068 (void) rsm_disconnect(seg);
2069 break;
2070 case RSM_STATE_MAPPING:
2071 /*FALLTHRU*/
2072 case RSM_STATE_END:
2073 DBG_PRINTF((category, RSM_ERR,
2074 "Invalid segment state %d in rsm_close\n", seg->s_state));
2075 break;
2076 default:
2077 DBG_PRINTF((category, RSM_ERR,
2078 "Invalid segment state %d in rsm_close\n", seg->s_state));
2079 break;
2080 }
2081
2082 /*
2083 * check state.
2084 * - make sure you do crfree(s_cred);
2085 * release segment and minor number
2086 */
2087 ASSERT(seg->s_state == RSM_STATE_NEW);
2088
2089 /*
2090 * The export_force_destroy callback is created to unlock
2091 * the exported segments of a process
2092 * when the process does a fork or exec and then exits calls this
2093 * function with the force flag set to 1 which indicates that the
2094 * segment state must be converted to ZOMBIE. This state means that the
2095 * segments still exist and have been unlocked and most importantly the
2096 * only operation allowed is to destroy them on an rsm_close.
2097 */
2098 if (force_flag) {
2099 rsmseglock_acquire(seg);
2100 seg->s_state = RSM_STATE_ZOMBIE;
2101 rsmseglock_release(seg);
2102 } else {
2103 rsmseg_free(seg);
2104 }
2105
2106 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2107 }
2108
2109 static int
2110 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2111 {
2112 minor_t rnum = getminor(dev);
2113 rsmresource_t *res;
2114 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2115
2116 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2117
2118 flag = flag; cred = cred;
2119
2120 if (otyp != OTYP_CHR)
2121 return (EINVAL);
2122
2123 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2124
2125 /*
2126 * At this point we are the last reference to the resource.
2127 * Free resource number from resource table.
2128 * It's ok to remove number before we free the segment.
2129 * We need to lock the resource to protect against remote calls.
2130 */
2131 if (rnum == RSM_DRIVER_MINOR ||
2132 (res = rsmresource_free(rnum)) == NULL) {
2133 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2134 return (DDI_SUCCESS);
2135 }
2136
2137 switch (res->rsmrc_type) {
2138 case RSM_RESOURCE_EXPORT_SEGMENT:
2139 case RSM_RESOURCE_IMPORT_SEGMENT:
2140 rsmseg_close((rsmseg_t *)res, 0);
2141 break;
2142 case RSM_RESOURCE_BAR:
2143 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2144 break;
2145 default:
2146 break;
2147 }
2148
2149 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2150
2151 return (DDI_SUCCESS);
2152 }
2153
2154 /*
2155 * rsm_inc_pgcnt
2156 *
2157 * Description: increment rsm page counter.
2158 *
2159 * Parameters: pgcnt_t pnum; number of pages to be used
2160 *
2161 * Returns: RSM_SUCCESS if memory limit not exceeded
2162 * ENOSPC if memory limit exceeded. In this case, the
2163 * page counter remains unchanged.
2164 *
2165 */
2166 static int
2167 rsm_inc_pgcnt(pgcnt_t pnum)
2168 {
2169 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2170 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2171 return (RSM_SUCCESS);
2172 }
2173
2174 mutex_enter(&rsm_pgcnt_lock);
2175
2176 if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2177 /* ensure that limits have not been exceeded */
2178 mutex_exit(&rsm_pgcnt_lock);
2179 return (RSMERR_INSUFFICIENT_MEM);
2180 }
2181
2182 rsm_pgcnt += pnum;
2183 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2184 rsm_pgcnt));
2185 mutex_exit(&rsm_pgcnt_lock);
2186
2187 return (RSM_SUCCESS);
2188 }
2189
2190 /*
2191 * rsm_dec_pgcnt
2192 *
2193 * Description: decrement rsm page counter.
2194 *
2195 * Parameters: pgcnt_t pnum; number of pages freed
2196 *
2197 */
2198 static void
2199 rsm_dec_pgcnt(pgcnt_t pnum)
2200 {
2201 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2202
2203 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2204 return;
2205 }
2206
2207 mutex_enter(&rsm_pgcnt_lock);
2208 ASSERT(rsm_pgcnt >= pnum);
2209 rsm_pgcnt -= pnum;
2210 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2211 rsm_pgcnt));
2212 mutex_exit(&rsm_pgcnt_lock);
2213 }
2214
2215 static struct umem_callback_ops rsm_as_ops = {
2216 UMEM_CALLBACK_VERSION, /* version number */
2217 rsm_export_force_destroy,
2218 };
2219
2220 static int
2221 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2222 proc_t *procp)
2223 {
2224 int error = RSM_SUCCESS;
2225 ulong_t pnum;
2226 struct umem_callback_ops *callbackops = &rsm_as_ops;
2227
2228 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2229
2230 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2231
2232 /*
2233 * Make sure vaddr and len are aligned on a page boundary
2234 */
2235 if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2236 return (RSMERR_BAD_ADDR);
2237 }
2238
2239 if (len & (PAGESIZE - 1)) {
2240 return (RSMERR_BAD_LENGTH);
2241 }
2242
2243 /*
2244 * Find number of pages
2245 */
2246 pnum = btopr(len);
2247 error = rsm_inc_pgcnt(pnum);
2248 if (error != RSM_SUCCESS) {
2249 DBG_PRINTF((category, RSM_ERR,
2250 "rsm_bind_pages:mem limit exceeded\n"));
2251 return (RSMERR_INSUFFICIENT_MEM);
2252 }
2253
2254 error = umem_lockmemory(vaddr, len,
2255 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2256 cookie,
2257 callbackops, procp);
2258
2259 if (error) {
2260 rsm_dec_pgcnt(pnum);
2261 DBG_PRINTF((category, RSM_ERR,
2262 "rsm_bind_pages:ddi_umem_lock failed\n"));
2263 /*
2264 * ddi_umem_lock, in the case of failure, returns one of
2265 * the following three errors. These are translated into
2266 * the RSMERR namespace and returned.
2267 */
2268 if (error == EFAULT)
2269 return (RSMERR_BAD_ADDR);
2270 else if (error == EACCES)
2271 return (RSMERR_PERM_DENIED);
2272 else
2273 return (RSMERR_INSUFFICIENT_MEM);
2274 }
2275
2276 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2277
2278 return (error);
2279
2280 }
2281
2282 static int
2283 rsm_unbind_pages(rsmseg_t *seg)
2284 {
2285 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2286
2287 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2288
2289 ASSERT(rsmseglock_held(seg));
2290
2291 if (seg->s_cookie != NULL) {
2292 /* unlock address range */
2293 ddi_umem_unlock(seg->s_cookie);
2294 rsm_dec_pgcnt(btopr(seg->s_len));
2295 seg->s_cookie = NULL;
2296 }
2297
2298 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2299
2300 return (RSM_SUCCESS);
2301 }
2302
2303
2304 static int
2305 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2306 {
2307 int e;
2308 adapter_t *adapter;
2309 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2310
2311 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2312
2313 adapter = rsm_getadapter(msg, mode);
2314 if (adapter == NULL) {
2315 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2316 "rsm_bind done:no adapter\n"));
2317 return (RSMERR_CTLR_NOT_PRESENT);
2318 }
2319
2320 /* lock address range */
2321 if (msg->vaddr == NULL) {
2322 rsmka_release_adapter(adapter);
2323 DBG_PRINTF((category, RSM_ERR,
2324 "rsm: rsm_bind done: invalid vaddr\n"));
2325 return (RSMERR_BAD_ADDR);
2326 }
2327 if (msg->len <= 0) {
2328 rsmka_release_adapter(adapter);
2329 DBG_PRINTF((category, RSM_ERR,
2330 "rsm_bind: invalid length\n"));
2331 return (RSMERR_BAD_LENGTH);
2332 }
2333
2334 /* Lock segment */
2335 rsmseglock_acquire(seg);
2336
2337 while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2338 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2339 DBG_PRINTF((category, RSM_DEBUG,
2340 "rsm_bind done: cv_wait INTERRUPTED"));
2341 rsmka_release_adapter(adapter);
2342 rsmseglock_release(seg);
2343 return (RSMERR_INTERRUPTED);
2344 }
2345 }
2346
2347 ASSERT(seg->s_state == RSM_STATE_NEW);
2348
2349 ASSERT(seg->s_cookie == NULL);
2350
2351 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2352 if (e == RSM_SUCCESS) {
2353 seg->s_flags |= RSM_USER_MEMORY;
2354 if (msg->perm & RSM_ALLOW_REBIND) {
2355 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2356 }
2357 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2358 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2359 }
2360 seg->s_region.r_vaddr = msg->vaddr;
2361 /*
2362 * Set the s_pid value in the segment structure. This is used
2363 * to identify exported segments belonging to a particular
2364 * process so that when the process exits, these segments can
2365 * be unlocked forcefully even if rsm_close is not called on
2366 * process exit since there maybe other processes referencing
2367 * them (for example on a fork or exec).
2368 * The s_pid value is also used to authenticate the process
2369 * doing a publish or unpublish on the export segment. Only
2370 * the creator of the export segment has a right to do a
2371 * publish or unpublish and unbind on the segment.
2372 */
2373 seg->s_pid = ddi_get_pid();
2374 seg->s_len = msg->len;
2375 seg->s_state = RSM_STATE_BIND;
2376 seg->s_adapter = adapter;
2377 seg->s_proc = curproc;
2378 } else {
2379 rsmka_release_adapter(adapter);
2380 DBG_PRINTF((category, RSM_WARNING,
2381 "unable to lock down pages\n"));
2382 }
2383
2384 msg->rnum = seg->s_minor;
2385 /* Unlock segment */
2386 rsmseglock_release(seg);
2387
2388 if (e == RSM_SUCCESS) {
2389 /* copyout the resource number */
2390 #ifdef _MULTI_DATAMODEL
2391 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2392 rsm_ioctlmsg32_t msg32;
2393
2394 msg32.rnum = msg->rnum;
2395 if (ddi_copyout((caddr_t)&msg32.rnum,
2396 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2397 sizeof (minor_t), mode)) {
2398 rsmka_release_adapter(adapter);
2399 e = RSMERR_BAD_ADDR;
2400 }
2401 }
2402 #endif
2403 if (ddi_copyout((caddr_t)&msg->rnum,
2404 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2405 sizeof (minor_t), mode)) {
2406 rsmka_release_adapter(adapter);
2407 e = RSMERR_BAD_ADDR;
2408 }
2409 }
2410
2411 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2412
2413 return (e);
2414 }
2415
2416 static void
2417 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2418 rsm_memseg_id_t ex_segid,
2419 ddi_umem_cookie_t cookie)
2420
2421 {
2422 rsmresource_t *p = NULL;
2423 rsmhash_table_t *rhash = &rsm_import_segs;
2424 uint_t index;
2425
2426 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2427 "rsm_remap_local_importers enter\n"));
2428
2429 index = rsmhash(ex_segid);
2430
2431 rw_enter(&rhash->rsmhash_rw, RW_READER);
2432
2433 p = rsmhash_getbkt(rhash, index);
2434
2435 for (; p; p = p->rsmrc_next) {
2436 rsmseg_t *seg = (rsmseg_t *)p;
2437 rsmseglock_acquire(seg);
2438 /*
2439 * Change the s_cookie value of only the local importers
2440 * which have been mapped (in state RSM_STATE_ACTIVE).
2441 * Note that there is no need to change the s_cookie value
2442 * if the imported segment is in RSM_STATE_MAPPING since
2443 * eventually the s_cookie will be updated via the mapping
2444 * functionality.
2445 */
2446 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2447 (seg->s_state == RSM_STATE_ACTIVE)) {
2448 seg->s_cookie = cookie;
2449 }
2450 rsmseglock_release(seg);
2451 }
2452 rw_exit(&rhash->rsmhash_rw);
2453
2454 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2455 "rsm_remap_local_importers done\n"));
2456 }
2457
2458 static int
2459 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2460 {
2461 int e;
2462 adapter_t *adapter;
2463 ddi_umem_cookie_t cookie;
2464 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2465
2466 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2467
2468 /* Check for permissions to rebind */
2469 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2470 return (RSMERR_REBIND_NOT_ALLOWED);
2471 }
2472
2473 if (seg->s_pid != ddi_get_pid() &&
2474 ddi_get_pid() != 0) {
2475 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2476 return (RSMERR_NOT_CREATOR);
2477 }
2478
2479 /*
2480 * We will not be allowing partial rebind and hence length passed
2481 * in must be same as segment length
2482 */
2483 if (msg->vaddr == NULL) {
2484 DBG_PRINTF((category, RSM_ERR,
2485 "rsm_rebind done: null msg->vaddr\n"));
2486 return (RSMERR_BAD_ADDR);
2487 }
2488 if (msg->len != seg->s_len) {
2489 DBG_PRINTF((category, RSM_ERR,
2490 "rsm_rebind: invalid length\n"));
2491 return (RSMERR_BAD_LENGTH);
2492 }
2493
2494 /* Lock segment */
2495 rsmseglock_acquire(seg);
2496
2497 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2498 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2499 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2500 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2501 rsmseglock_release(seg);
2502 DBG_PRINTF((category, RSM_DEBUG,
2503 "rsm_rebind done: cv_wait INTERRUPTED"));
2504 return (RSMERR_INTERRUPTED);
2505 }
2506 }
2507
2508 /* verify segment state */
2509 if ((seg->s_state != RSM_STATE_BIND) &&
2510 (seg->s_state != RSM_STATE_EXPORT)) {
2511 /* Unlock segment */
2512 rsmseglock_release(seg);
2513 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2514 "rsm_rebind done: invalid state\n"));
2515 return (RSMERR_BAD_SEG_HNDL);
2516 }
2517
2518 ASSERT(seg->s_cookie != NULL);
2519
2520 if (msg->vaddr == seg->s_region.r_vaddr) {
2521 rsmseglock_release(seg);
2522 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2523 return (RSM_SUCCESS);
2524 }
2525
2526 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2527 if (e == RSM_SUCCESS) {
2528 struct buf *xbuf;
2529 dev_t sdev = 0;
2530 rsm_memory_local_t mem;
2531
2532 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2533 sdev, 0, NULL, DDI_UMEM_SLEEP);
2534 ASSERT(xbuf != NULL);
2535
2536 mem.ms_type = RSM_MEM_BUF;
2537 mem.ms_bp = xbuf;
2538
2539 adapter = seg->s_adapter;
2540 e = adapter->rsmpi_ops->rsm_rebind(
2541 seg->s_handle.out, 0, &mem,
2542 RSM_RESOURCE_DONTWAIT, NULL);
2543
2544 if (e == RSM_SUCCESS) {
2545 /*
2546 * unbind the older pages, and unload local importers;
2547 * but don't disconnect importers
2548 */
2549 (void) rsm_unbind_pages(seg);
2550 seg->s_cookie = cookie;
2551 seg->s_region.r_vaddr = msg->vaddr;
2552 rsm_remap_local_importers(my_nodeid, seg->s_segid,
2553 cookie);
2554 } else {
2555 /*
2556 * Unbind the pages associated with "cookie" by the
2557 * rsm_bind_pages calls prior to this. This is
2558 * similar to what is done in the rsm_unbind_pages
2559 * routine for the seg->s_cookie.
2560 */
2561 ddi_umem_unlock(cookie);
2562 rsm_dec_pgcnt(btopr(msg->len));
2563 DBG_PRINTF((category, RSM_ERR,
2564 "rsm_rebind failed with %d\n", e));
2565 }
2566 /*
2567 * At present there is no dependency on the existence of xbuf.
2568 * So we can free it here. If in the future this changes, it can
2569 * be freed sometime during the segment destroy.
2570 */
2571 freerbuf(xbuf);
2572 }
2573
2574 /* Unlock segment */
2575 rsmseglock_release(seg);
2576
2577 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2578
2579 return (e);
2580 }
2581
2582 static int
2583 rsm_unbind(rsmseg_t *seg)
2584 {
2585 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2586
2587 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2588
2589 rsmseglock_acquire(seg);
2590
2591 /* verify segment state */
2592 if ((seg->s_state != RSM_STATE_BIND) &&
2593 (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2594 rsmseglock_release(seg);
2595 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2596 "rsm_unbind: invalid state\n"));
2597 return (RSMERR_BAD_SEG_HNDL);
2598 }
2599
2600 /* unlock current range */
2601 (void) rsm_unbind_pages(seg);
2602
2603 if (seg->s_state == RSM_STATE_BIND) {
2604 seg->s_state = RSM_STATE_NEW;
2605 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2606 seg->s_state = RSM_STATE_NEW_QUIESCED;
2607 }
2608
2609 rsmseglock_release(seg);
2610
2611 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2612
2613 return (RSM_SUCCESS);
2614 }
2615
2616 /* **************************** Exporter Access List Management ******* */
2617 static void
2618 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2619 {
2620 int acl_sz;
2621 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2622
2623 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2624
2625 /* acl could be NULL */
2626
2627 if (acl != NULL && acl_len > 0) {
2628 acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2629 kmem_free((void *)acl, acl_sz);
2630 }
2631
2632 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2633 }
2634
2635 static void
2636 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2637 {
2638 int acl_sz;
2639 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2640
2641 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2642
2643 if (acl != NULL && acl_len > 0) {
2644 acl_sz = acl_len * sizeof (rsm_access_entry_t);
2645 kmem_free((void *)acl, acl_sz);
2646 }
2647
2648 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2649
2650 }
2651
2652 static int
2653 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2654 rsmapi_access_entry_t **list, int *len, int loopback)
2655 {
2656 rsmapi_access_entry_t *acl;
2657 int acl_len;
2658 int i;
2659 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2660
2661 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2662
2663 *len = 0;
2664 *list = NULL;
2665
2666 acl_len = msg->acl_len;
2667 if ((loopback && acl_len > 1) || (acl_len < 0) ||
2668 (acl_len > MAX_NODES)) {
2669 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2670 "rsmacl_build done: acl invalid\n"));
2671 return (RSMERR_BAD_ACL);
2672 }
2673
2674 if (acl_len > 0 && acl_len <= MAX_NODES) {
2675 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2676
2677 acl = kmem_alloc(acl_size, KM_SLEEP);
2678
2679 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2680 acl_size, mode)) {
2681 kmem_free((void *) acl, acl_size);
2682 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2683 "rsmacl_build done: BAD_ADDR\n"));
2684 return (RSMERR_BAD_ADDR);
2685 }
2686
2687 /*
2688 * Verify access list
2689 */
2690 for (i = 0; i < acl_len; i++) {
2691 if (acl[i].ae_node > MAX_NODES ||
2692 (loopback && (acl[i].ae_node != my_nodeid)) ||
2693 acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2694 /* invalid entry */
2695 kmem_free((void *) acl, acl_size);
2696 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2697 "rsmacl_build done: EINVAL\n"));
2698 return (RSMERR_BAD_ACL);
2699 }
2700 }
2701
2702 *len = acl_len;
2703 *list = acl;
2704 }
2705
2706 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2707
2708 return (DDI_SUCCESS);
2709 }
2710
2711 static int
2712 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2713 int acl_len, adapter_t *adapter)
2714 {
2715 rsm_access_entry_t *acl;
2716 rsm_addr_t hwaddr;
2717 int i;
2718 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2719
2720 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2721
2722 if (src != NULL) {
2723 size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2724 acl = kmem_alloc(acl_size, KM_SLEEP);
2725
2726 /*
2727 * translate access list
2728 */
2729 for (i = 0; i < acl_len; i++) {
2730 if (src[i].ae_node == my_nodeid) {
2731 acl[i].ae_addr = adapter->hwaddr;
2732 } else {
2733 hwaddr = get_remote_hwaddr(adapter,
2734 src[i].ae_node);
2735 if ((int64_t)hwaddr < 0) {
2736 /* invalid hwaddr */
2737 kmem_free((void *) acl, acl_size);
2738 DBG_PRINTF((category,
2739 RSM_DEBUG_VERBOSE,
2740 "rsmpiacl_create done:"
2741 "EINVAL hwaddr\n"));
2742 return (RSMERR_INTERNAL_ERROR);
2743 }
2744 acl[i].ae_addr = hwaddr;
2745 }
2746 /* rsmpi understands only RSM_PERM_XXXX */
2747 acl[i].ae_permission =
2748 src[i].ae_permission & RSM_PERM_RDWR;
2749 }
2750 *dest = acl;
2751 } else {
2752 *dest = NULL;
2753 }
2754
2755 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2756
2757 return (RSM_SUCCESS);
2758 }
2759
2760 static int
2761 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2762 rsmipc_reply_t *reply)
2763 {
2764
2765 int i;
2766 rsmseg_t *seg;
2767 rsm_memseg_id_t key = req->rsmipc_key;
2768 rsm_permission_t perm = req->rsmipc_perm;
2769 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2770
2771 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2772 "rsmsegacl_validate enter\n"));
2773
2774 /*
2775 * Find segment and grab its lock. The reason why we grab the segment
2776 * lock in side the search is to avoid the race when the segment is
2777 * being deleted and we already have a pointer to it.
2778 */
2779 seg = rsmexport_lookup(key);
2780 if (!seg) {
2781 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2782 "rsmsegacl_validate done: %u ENXIO\n", key));
2783 return (RSMERR_SEG_NOT_PUBLISHED);
2784 }
2785
2786 ASSERT(rsmseglock_held(seg));
2787 ASSERT(seg->s_state == RSM_STATE_EXPORT);
2788
2789 /*
2790 * We implement a 2-level protection scheme.
2791 * First, we check if local/remote host has access rights.
2792 * Second, we check if the user has access rights.
2793 *
2794 * This routine only validates the rnode access_list
2795 */
2796 if (seg->s_acl_len > 0) {
2797 /*
2798 * Check host access list
2799 */
2800 ASSERT(seg->s_acl != NULL);
2801 for (i = 0; i < seg->s_acl_len; i++) {
2802 if (seg->s_acl[i].ae_node == rnode) {
2803 perm &= seg->s_acl[i].ae_permission;
2804 goto found;
2805 }
2806 }
2807 /* rnode is not found in the list */
2808 rsmseglock_release(seg);
2809 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2810 "rsmsegacl_validate done: EPERM\n"));
2811 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2812 } else {
2813 /* use default owner creation umask */
2814 perm &= seg->s_mode;
2815 }
2816
2817 found:
2818 /* update perm for this node */
2819 reply->rsmipc_mode = perm;
2820 reply->rsmipc_uid = seg->s_uid;
2821 reply->rsmipc_gid = seg->s_gid;
2822 reply->rsmipc_segid = seg->s_segid;
2823 reply->rsmipc_seglen = seg->s_len;
2824
2825 /*
2826 * Perm of requesting node is valid; source will validate user
2827 */
2828 rsmseglock_release(seg);
2829
2830 /*
2831 * Add the importer to the list right away, if connect fails
2832 * the importer will ask the exporter to remove it.
2833 */
2834 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2835 req->rsmipc_segment_cookie);
2836
2837 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2838
2839 return (RSM_SUCCESS);
2840 }
2841
2842
2843 /* ************************** Exporter Calls ************************* */
2844
2845 static int
2846 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2847 {
2848 int e;
2849 int acl_len;
2850 rsmapi_access_entry_t *acl;
2851 rsm_access_entry_t *rsmpi_acl;
2852 rsm_memory_local_t mem;
2853 struct buf *xbuf;
2854 dev_t sdev = 0;
2855 adapter_t *adapter;
2856 rsm_memseg_id_t segment_id = 0;
2857 int loopback_flag = 0;
2858 int create_flags = 0;
2859 rsm_resource_callback_t callback_flag;
2860 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2861
2862 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2863
2864 if (seg->s_adapter == &loopback_adapter)
2865 loopback_flag = 1;
2866
2867 if (seg->s_pid != ddi_get_pid() &&
2868 ddi_get_pid() != 0) {
2869 DBG_PRINTF((category, RSM_ERR,
2870 "rsm_publish: Not creator\n"));
2871 return (RSMERR_NOT_CREATOR);
2872 }
2873
2874 /*
2875 * Get per node access list
2876 */
2877 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2878 if (e != DDI_SUCCESS) {
2879 DBG_PRINTF((category, RSM_ERR,
2880 "rsm_publish done: rsmacl_build failed\n"));
2881 return (e);
2882 }
2883
2884 /*
2885 * The application provided msg->key is used for resolving a
2886 * segment id according to the following:
2887 * key = 0 Kernel Agent selects the segment id
2888 * key <= RSM_DLPI_ID_END Reserved for system usage except
2889 * RSMLIB range
2890 * key < RSM_USER_APP_ID_BASE segment id = key
2891 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2892 *
2893 * rsm_nextavail_segmentid is initialized to 0x80000000 and
2894 * overflows to zero after 0x80000000 allocations.
2895 * An algorithm is needed which allows reinitialization and provides
2896 * for reallocation after overflow. For now, ENOMEM is returned
2897 * once the overflow condition has occurred.
2898 */
2899 if (msg->key == 0) {
2900 mutex_enter(&rsm_lock);
2901 segment_id = rsm_nextavail_segmentid;
2902 if (segment_id != 0) {
2903 rsm_nextavail_segmentid++;
2904 mutex_exit(&rsm_lock);
2905 } else {
2906 mutex_exit(&rsm_lock);
2907 DBG_PRINTF((category, RSM_ERR,
2908 "rsm_publish done: no more keys avlbl\n"));
2909 return (RSMERR_INSUFFICIENT_RESOURCES);
2910 }
2911 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2912 /* range reserved for internal use by base/ndi libraries */
2913 segment_id = msg->key;
2914 else if (msg->key <= RSM_DLPI_ID_END)
2915 return (RSMERR_RESERVED_SEGID);
2916 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2917 segment_id = msg->key;
2918 else {
2919 DBG_PRINTF((category, RSM_ERR,
2920 "rsm_publish done: invalid key %u\n", msg->key));
2921 return (RSMERR_RESERVED_SEGID);
2922 }
2923
2924 /* Add key to exportlist; The segment lock is held on success */
2925 e = rsmexport_add(seg, segment_id);
2926 if (e) {
2927 rsmacl_free(acl, acl_len);
2928 DBG_PRINTF((category, RSM_ERR,
2929 "rsm_publish done: export_add failed: %d\n", e));
2930 return (e);
2931 }
2932
2933 seg->s_segid = segment_id;
2934
2935 if ((seg->s_state != RSM_STATE_BIND) &&
2936 (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2937 /* state changed since then, free acl and return */
2938 rsmseglock_release(seg);
2939 rsmexport_rm(seg);
2940 rsmacl_free(acl, acl_len);
2941 DBG_PRINTF((category, RSM_ERR,
2942 "rsm_publish done: segment in wrong state: %d\n",
2943 seg->s_state));
2944 return (RSMERR_BAD_SEG_HNDL);
2945 }
2946
2947 /*
2948 * If this is for a local memory handle and permissions are zero,
2949 * then the surrogate segment is very large and we want to skip
2950 * allocation of DVMA space.
2951 *
2952 * Careful! If the user didn't use an ACL list, acl will be a NULL
2953 * pointer. Check that before dereferencing it.
2954 */
2955 if (acl != (rsmapi_access_entry_t *)NULL) {
2956 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2957 goto skipdriver;
2958 }
2959
2960 /* create segment */
2961 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2962 sdev, 0, NULL, DDI_UMEM_SLEEP);
2963 ASSERT(xbuf != NULL);
2964
2965 mem.ms_type = RSM_MEM_BUF;
2966 mem.ms_bp = xbuf;
2967
2968 /* This call includes a bind operations */
2969
2970 adapter = seg->s_adapter;
2971 /*
2972 * create a acl list with hwaddr for RSMPI publish
2973 */
2974 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2975
2976 if (e != RSM_SUCCESS) {
2977 rsmseglock_release(seg);
2978 rsmexport_rm(seg);
2979 rsmacl_free(acl, acl_len);
2980 freerbuf(xbuf);
2981 DBG_PRINTF((category, RSM_ERR,
2982 "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2983 return (e);
2984 }
2985
2986 if (seg->s_state == RSM_STATE_BIND) {
2987 /* create segment */
2988
2989 /* This call includes a bind operations */
2990
2991 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2992 create_flags = RSM_ALLOW_UNBIND_REBIND;
2993 }
2994
2995 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
2996 callback_flag = RSM_RESOURCE_DONTWAIT;
2997 } else {
2998 callback_flag = RSM_RESOURCE_SLEEP;
2999 }
3000
3001 e = adapter->rsmpi_ops->rsm_seg_create(
3002 adapter->rsmpi_handle,
3003 &seg->s_handle.out, seg->s_len,
3004 create_flags, &mem,
3005 callback_flag, NULL);
3006 /*
3007 * At present there is no dependency on the existence of xbuf.
3008 * So we can free it here. If in the future this changes, it can
3009 * be freed sometime during the segment destroy.
3010 */
3011 freerbuf(xbuf);
3012
3013 if (e != RSM_SUCCESS) {
3014 rsmseglock_release(seg);
3015 rsmexport_rm(seg);
3016 rsmacl_free(acl, acl_len);
3017 rsmpiacl_free(rsmpi_acl, acl_len);
3018 DBG_PRINTF((category, RSM_ERR,
3019 "rsm_publish done: export_create failed: %d\n", e));
3020 /*
3021 * The following assertion ensures that the two errors
3022 * related to the length and its alignment do not occur
3023 * since they have been checked during export_create
3024 */
3025 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3026 e != RSMERR_BAD_LENGTH);
3027 if (e == RSMERR_NOT_MEM)
3028 e = RSMERR_INSUFFICIENT_MEM;
3029
3030 return (e);
3031 }
3032 /* export segment, this should create an IMMU mapping */
3033 e = adapter->rsmpi_ops->rsm_publish(
3034 seg->s_handle.out,
3035 rsmpi_acl, acl_len,
3036 seg->s_segid,
3037 RSM_RESOURCE_DONTWAIT, NULL);
3038
3039 if (e != RSM_SUCCESS) {
3040 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3041 rsmseglock_release(seg);
3042 rsmexport_rm(seg);
3043 rsmacl_free(acl, acl_len);
3044 rsmpiacl_free(rsmpi_acl, acl_len);
3045 DBG_PRINTF((category, RSM_ERR,
3046 "rsm_publish done: export_publish failed: %d\n",
3047 e));
3048 return (e);
3049 }
3050 }
3051
3052 seg->s_acl_in = rsmpi_acl;
3053
3054 skipdriver:
3055 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3056 seg->s_acl_len = acl_len;
3057 seg->s_acl = acl;
3058
3059 if (seg->s_state == RSM_STATE_BIND) {
3060 seg->s_state = RSM_STATE_EXPORT;
3061 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3062 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3063 cv_broadcast(&seg->s_cv);
3064 }
3065
3066 rsmseglock_release(seg);
3067
3068 /*
3069 * If the segment id was solicited, then return it in
3070 * the original incoming message.
3071 */
3072 if (msg->key == 0) {
3073 msg->key = segment_id;
3074 #ifdef _MULTI_DATAMODEL
3075 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3076 rsm_ioctlmsg32_t msg32;
3077
3078 msg32.key = msg->key;
3079 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3080 "rsm_publish done\n"));
3081 return (ddi_copyout((caddr_t)&msg32,
3082 (caddr_t)dataptr, sizeof (msg32), mode));
3083 }
3084 #endif
3085 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3086 "rsm_publish done\n"));
3087 return (ddi_copyout((caddr_t)msg,
3088 (caddr_t)dataptr, sizeof (*msg), mode));
3089 }
3090
3091 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3092 return (DDI_SUCCESS);
3093 }
3094
3095 /*
3096 * This function modifies the access control list of an already published
3097 * segment. There is no effect on import segments which are already
3098 * connected.
3099 */
3100 static int
3101 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3102 {
3103 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl;
3104 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl;
3105 int new_acl_len, old_acl_len, tmp_acl_len;
3106 int e, i;
3107 adapter_t *adapter;
3108 int loopback_flag = 0;
3109 rsm_memseg_id_t key;
3110 rsm_permission_t permission;
3111 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3112
3113 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3114
3115 if ((seg->s_state != RSM_STATE_EXPORT) &&
3116 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3117 (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3118 return (RSMERR_SEG_NOT_PUBLISHED);
3119
3120 if (seg->s_pid != ddi_get_pid() &&
3121 ddi_get_pid() != 0) {
3122 DBG_PRINTF((category, RSM_ERR,
3123 "rsm_republish: Not owner\n"));
3124 return (RSMERR_NOT_CREATOR);
3125 }
3126
3127 if (seg->s_adapter == &loopback_adapter)
3128 loopback_flag = 1;
3129
3130 /*
3131 * Build new list first
3132 */
3133 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3134 if (e) {
3135 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3136 "rsm_republish done: rsmacl_build failed %d", e));
3137 return (e);
3138 }
3139
3140 /* Lock segment */
3141 rsmseglock_acquire(seg);
3142 /*
3143 * a republish is in progress - REPUBLISH message is being
3144 * sent to the importers so wait for it to complete OR
3145 * wait till DR completes
3146 */
3147 while (((seg->s_state == RSM_STATE_EXPORT) &&
3148 (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3149 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3150 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3151 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3152 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3153 "rsm_republish done: cv_wait INTERRUPTED"));
3154 rsmseglock_release(seg);
3155 rsmacl_free(new_acl, new_acl_len);
3156 return (RSMERR_INTERRUPTED);
3157 }
3158 }
3159
3160 /* recheck if state is valid */
3161 if (seg->s_state != RSM_STATE_EXPORT) {
3162 rsmseglock_release(seg);
3163 rsmacl_free(new_acl, new_acl_len);
3164 return (RSMERR_SEG_NOT_PUBLISHED);
3165 }
3166
3167 key = seg->s_key;
3168 old_acl = seg->s_acl;
3169 old_acl_len = seg->s_acl_len;
3170
3171 seg->s_acl = new_acl;
3172 seg->s_acl_len = new_acl_len;
3173
3174 /*
3175 * This call will only be meaningful if and when the interconnect
3176 * layer makes use of the access list
3177 */
3178 adapter = seg->s_adapter;
3179 /*
3180 * create a acl list with hwaddr for RSMPI publish
3181 */
3182 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3183
3184 if (e != RSM_SUCCESS) {
3185 seg->s_acl = old_acl;
3186 seg->s_acl_len = old_acl_len;
3187 rsmseglock_release(seg);
3188 rsmacl_free(new_acl, new_acl_len);
3189 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3190 "rsm_republish done: rsmpiacl_create failed %d", e));
3191 return (e);
3192 }
3193 rsmpi_old_acl = seg->s_acl_in;
3194 seg->s_acl_in = rsmpi_new_acl;
3195
3196 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3197 seg->s_acl_in, seg->s_acl_len,
3198 RSM_RESOURCE_DONTWAIT, NULL);
3199
3200 if (e != RSM_SUCCESS) {
3201 seg->s_acl = old_acl;
3202 seg->s_acl_in = rsmpi_old_acl;
3203 seg->s_acl_len = old_acl_len;
3204 rsmseglock_release(seg);
3205 rsmacl_free(new_acl, new_acl_len);
3206 rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3207
3208 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3209 "rsm_republish done: rsmpi republish failed %d\n", e));
3210 return (e);
3211 }
3212
3213 /* create a tmp copy of the new acl */
3214 tmp_acl_len = new_acl_len;
3215 if (tmp_acl_len > 0) {
3216 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3217 for (i = 0; i < tmp_acl_len; i++) {
3218 tmp_acl[i].ae_node = new_acl[i].ae_node;
3219 tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3220 }
3221 /*
3222 * The default permission of a node which was in the old
3223 * ACL but not in the new ACL is 0 ie no access.
3224 */
3225 permission = 0;
3226 } else {
3227 /*
3228 * NULL acl means all importers can connect and
3229 * default permission will be owner creation umask
3230 */
3231 tmp_acl = NULL;
3232 permission = seg->s_mode;
3233 }
3234
3235 /* make other republishers to wait for republish to complete */
3236 seg->s_flags |= RSM_REPUBLISH_WAIT;
3237
3238 rsmseglock_release(seg);
3239
3240 /* send the new perms to the importing nodes */
3241 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3242
3243 rsmseglock_acquire(seg);
3244 seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3245 /* wake up any one waiting for republish to complete */
3246 cv_broadcast(&seg->s_cv);
3247 rsmseglock_release(seg);
3248
3249 rsmacl_free(tmp_acl, tmp_acl_len);
3250 rsmacl_free(old_acl, old_acl_len);
3251 rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3252
3253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3254 return (DDI_SUCCESS);
3255 }
3256
3257 static int
3258 rsm_unpublish(rsmseg_t *seg, int mode)
3259 {
3260 rsmapi_access_entry_t *acl;
3261 rsm_access_entry_t *rsmpi_acl;
3262 int acl_len;
3263 int e;
3264 adapter_t *adapter;
3265 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3266
3267 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3268
3269 if (seg->s_pid != ddi_get_pid() &&
3270 ddi_get_pid() != 0) {
3271 DBG_PRINTF((category, RSM_ERR,
3272 "rsm_unpublish: Not creator\n"));
3273 return (RSMERR_NOT_CREATOR);
3274 }
3275
3276 rsmseglock_acquire(seg);
3277 /*
3278 * wait for QUIESCING to complete here before rsmexport_rm
3279 * is called because the SUSPEND_COMPLETE mesg which changes
3280 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3281 * signals the cv_wait needs to find it in the hashtable.
3282 */
3283 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3284 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3285 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3286 rsmseglock_release(seg);
3287 DBG_PRINTF((category, RSM_ERR,
3288 "rsm_unpublish done: cv_wait INTR qscing"
3289 "getv/putv in progress"));
3290 return (RSMERR_INTERRUPTED);
3291 }
3292 }
3293
3294 /* verify segment state */
3295 if ((seg->s_state != RSM_STATE_EXPORT) &&
3296 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3297 rsmseglock_release(seg);
3298 DBG_PRINTF((category, RSM_ERR,
3299 "rsm_unpublish done: bad state %x\n", seg->s_state));
3300 return (RSMERR_SEG_NOT_PUBLISHED);
3301 }
3302
3303 rsmseglock_release(seg);
3304
3305 rsmexport_rm(seg);
3306
3307 rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3308
3309 rsmseglock_acquire(seg);
3310 /*
3311 * wait for republish to complete
3312 */
3313 while ((seg->s_state == RSM_STATE_EXPORT) &&
3314 (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3315 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3316 DBG_PRINTF((category, RSM_ERR,
3317 "rsm_unpublish done: cv_wait INTR repubing"));
3318 rsmseglock_release(seg);
3319 return (RSMERR_INTERRUPTED);
3320 }
3321 }
3322
3323 if ((seg->s_state != RSM_STATE_EXPORT) &&
3324 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3325 DBG_PRINTF((category, RSM_ERR,
3326 "rsm_unpublish done: invalid state"));
3327 rsmseglock_release(seg);
3328 return (RSMERR_SEG_NOT_PUBLISHED);
3329 }
3330
3331 /*
3332 * check for putv/get surrogate segment which was not published
3333 * to the driver.
3334 *
3335 * Be certain to see if there is an ACL first! If this segment was
3336 * not published with an ACL, acl will be a null pointer. Check
3337 * that before dereferencing it.
3338 */
3339 acl = seg->s_acl;
3340 if (acl != (rsmapi_access_entry_t *)NULL) {
3341 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3342 goto bypass;
3343 }
3344
3345 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3346 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3347 goto bypass;
3348
3349 adapter = seg->s_adapter;
3350 for (;;) {
3351 if (seg->s_state != RSM_STATE_EXPORT) {
3352 rsmseglock_release(seg);
3353 DBG_PRINTF((category, RSM_ERR,
3354 "rsm_unpublish done: bad state %x\n",
3355 seg->s_state));
3356 return (RSMERR_SEG_NOT_PUBLISHED);
3357 }
3358
3359 /* unpublish from adapter */
3360 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3361
3362 if (e == RSM_SUCCESS) {
3363 break;
3364 }
3365
3366 if (e == RSMERR_SEG_IN_USE && mode == 1) {
3367 /*
3368 * wait for unpublish to succeed, it's busy.
3369 */
3370 seg->s_flags |= RSM_EXPORT_WAIT;
3371
3372 /* wait for a max of 1 ms - this is an empirical */
3373 /* value that was found by some minimal testing */
3374 /* can be fine tuned when we have better numbers */
3375 /* A long term fix would be to send cv_signal */
3376 /* from the intr callback routine */
3377 /* currently nobody signals this wait */
3378 (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock,
3379 drv_usectohz(1000), TR_CLOCK_TICK);
3380
3381 DBG_PRINTF((category, RSM_ERR,
3382 "rsm_unpublish: SEG_IN_USE\n"));
3383
3384 seg->s_flags &= ~RSM_EXPORT_WAIT;
3385 } else {
3386 if (mode == 1) {
3387 DBG_PRINTF((category, RSM_ERR,
3388 "rsm:rsmpi unpublish err %x\n", e));
3389 seg->s_state = RSM_STATE_BIND;
3390 }
3391 rsmseglock_release(seg);
3392 return (e);
3393 }
3394 }
3395
3396 /* Free segment */
3397 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3398
3399 if (e != RSM_SUCCESS) {
3400 DBG_PRINTF((category, RSM_ERR,
3401 "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3402 seg->s_key, e));
3403 }
3404
3405 bypass:
3406 acl = seg->s_acl;
3407 rsmpi_acl = seg->s_acl_in;
3408 acl_len = seg->s_acl_len;
3409
3410 seg->s_acl = NULL;
3411 seg->s_acl_in = NULL;
3412 seg->s_acl_len = 0;
3413
3414 if (seg->s_state == RSM_STATE_EXPORT) {
3415 seg->s_state = RSM_STATE_BIND;
3416 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3417 seg->s_state = RSM_STATE_BIND_QUIESCED;
3418 cv_broadcast(&seg->s_cv);
3419 }
3420
3421 rsmseglock_release(seg);
3422
3423 rsmacl_free(acl, acl_len);
3424 rsmpiacl_free(rsmpi_acl, acl_len);
3425
3426 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3427
3428 return (DDI_SUCCESS);
3429 }
3430
3431 /*
3432 * Called from rsm_unpublish to force an unload and disconnection of all
3433 * importers of the unpublished segment.
3434 *
3435 * First build the list of segments requiring a force disconnect, then
3436 * send a request for each.
3437 */
3438 static void
3439 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3440 rsm_node_id_t ex_nodeid)
3441 {
3442 rsmipc_request_t request;
3443 importing_token_t *prev_token, *token, *tmp_token, *tokp;
3444 importing_token_t *force_disconnect_list = NULL;
3445 int index;
3446
3447 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3448 "rsm_send_importer_disconnects enter\n"));
3449
3450 index = rsmhash(ex_segid);
3451
3452 mutex_enter(&importer_list.lock);
3453
3454 prev_token = NULL;
3455 token = importer_list.bucket[index];
3456
3457 while (token != NULL) {
3458 if (token->key == ex_segid) {
3459 /*
3460 * take it off the importer list and add it
3461 * to the force disconnect list.
3462 */
3463 if (prev_token == NULL)
3464 importer_list.bucket[index] = token->next;
3465 else
3466 prev_token->next = token->next;
3467 tmp_token = token;
3468 token = token->next;
3469 if (force_disconnect_list == NULL) {
3470 force_disconnect_list = tmp_token;
3471 tmp_token->next = NULL;
3472 } else {
3473 tokp = force_disconnect_list;
3474 /*
3475 * make sure that the tmp_token's node
3476 * is not already on the force disconnect
3477 * list.
3478 */
3479 while (tokp != NULL) {
3480 if (tokp->importing_node ==
3481 tmp_token->importing_node) {
3482 break;
3483 }
3484 tokp = tokp->next;
3485 }
3486 if (tokp == NULL) {
3487 tmp_token->next =
3488 force_disconnect_list;
3489 force_disconnect_list = tmp_token;
3490 } else {
3491 kmem_free((void *)tmp_token,
3492 sizeof (*token));
3493 }
3494 }
3495
3496 } else {
3497 prev_token = token;
3498 token = token->next;
3499 }
3500 }
3501 mutex_exit(&importer_list.lock);
3502
3503 token = force_disconnect_list;
3504 while (token != NULL) {
3505 if (token->importing_node == my_nodeid) {
3506 rsm_force_unload(ex_nodeid, ex_segid,
3507 DISCONNECT);
3508 } else {
3509 request.rsmipc_hdr.rsmipc_type =
3510 RSMIPC_MSG_DISCONNECT;
3511 request.rsmipc_key = token->key;
3512 for (;;) {
3513 if (rsmipc_send(token->importing_node,
3514 &request,
3515 RSM_NO_REPLY) == RSM_SUCCESS) {
3516 break;
3517 } else {
3518 delay(drv_usectohz(10000));
3519 }
3520 }
3521 }
3522 tmp_token = token;
3523 token = token->next;
3524 kmem_free((void *)tmp_token, sizeof (*token));
3525 }
3526
3527 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3528 "rsm_send_importer_disconnects done\n"));
3529 }
3530
3531 /*
3532 * This function is used as a callback for unlocking the pages locked
3533 * down by a process which then does a fork or an exec.
3534 * It marks the export segments corresponding to umem cookie given by
3535 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3536 * destroyed later when an rsm_close occurs).
3537 */
3538 static void
3539 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3540 {
3541 rsmresource_blk_t *blk;
3542 rsmresource_t *p;
3543 rsmseg_t *eseg = NULL;
3544 int i, j;
3545 int found = 0;
3546
3547 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3548 "rsm_export_force_destroy enter\n"));
3549
3550 /*
3551 * Walk the resource list and locate the export segment (either
3552 * in the BIND or the EXPORT state) which corresponds to the
3553 * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3554 * Change the state to ZOMBIE by calling rsmseg_close with the
3555 * force_flag argument (the second argument) set to 1. Also,
3556 * unpublish and unbind the segment, but don't free it. Free it
3557 * only on a rsm_close call for the segment.
3558 */
3559 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3560
3561 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3562 blk = rsm_resource.rsmrc_root[i];
3563 if (blk == NULL) {
3564 continue;
3565 }
3566
3567 for (j = 0; j < RSMRC_BLKSZ; j++) {
3568 p = blk->rsmrcblk_blks[j];
3569 if ((p != NULL) && (p != RSMRC_RESERVED) &&
3570 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3571 eseg = (rsmseg_t *)p;
3572 if (eseg->s_cookie != ck)
3573 continue; /* continue searching */
3574 /*
3575 * Found the segment, set flag to indicate
3576 * force destroy processing is in progress
3577 */
3578 rsmseglock_acquire(eseg);
3579 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3580 rsmseglock_release(eseg);
3581 found = 1;
3582 break;
3583 }
3584 }
3585
3586 if (found)
3587 break;
3588 }
3589
3590 rw_exit(&rsm_resource.rsmrc_lock);
3591
3592 if (found) {
3593 ASSERT(eseg != NULL);
3594 /* call rsmseg_close with force flag set to 1 */
3595 rsmseg_close(eseg, 1);
3596 /*
3597 * force destroy processing done, clear flag and signal any
3598 * thread waiting in rsmseg_close.
3599 */
3600 rsmseglock_acquire(eseg);
3601 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3602 cv_broadcast(&eseg->s_cv);
3603 rsmseglock_release(eseg);
3604 }
3605
3606 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3607 "rsm_export_force_destroy done\n"));
3608 }
3609
3610 /* ******************************* Remote Calls *********************** */
3611 static void
3612 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3613 {
3614 rsmipc_reply_t reply;
3615 DBG_DEFINE(category,
3616 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3617
3618 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3619 "rsm_intr_segconnect enter\n"));
3620
3621 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3622
3623 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3624 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3625
3626 (void) rsmipc_send(src, NULL, &reply);
3627
3628 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3629 "rsm_intr_segconnect done\n"));
3630 }
3631
3632
3633 /*
3634 * When an exported segment is unpublished the exporter sends an ipc
3635 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher
3636 * calls this function. The import list is scanned; segments which match the
3637 * exported segment id are unloaded and disconnected.
3638 *
3639 * Will also be called from rsm_rebind with disconnect_flag FALSE.
3640 *
3641 */
3642 static void
3643 rsm_force_unload(rsm_node_id_t src_nodeid,
3644 rsm_memseg_id_t ex_segid,
3645 boolean_t disconnect_flag)
3646
3647 {
3648 rsmresource_t *p = NULL;
3649 rsmhash_table_t *rhash = &rsm_import_segs;
3650 uint_t index;
3651 DBG_DEFINE(category,
3652 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3653
3654 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3655
3656 index = rsmhash(ex_segid);
3657
3658 rw_enter(&rhash->rsmhash_rw, RW_READER);
3659
3660 p = rsmhash_getbkt(rhash, index);
3661
3662 for (; p; p = p->rsmrc_next) {
3663 rsmseg_t *seg = (rsmseg_t *)p;
3664 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3665 /*
3666 * In order to make rsmseg_unload and rsm_force_unload
3667 * thread safe, acquire the segment lock here.
3668 * rsmseg_unload is responsible for releasing the lock.
3669 * rsmseg_unload releases the lock just before a call
3670 * to rsmipc_send or in case of an early exit which
3671 * occurs if the segment was in the state
3672 * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3673 */
3674 rsmseglock_acquire(seg);
3675 if (disconnect_flag)
3676 seg->s_flags |= RSM_FORCE_DISCONNECT;
3677 rsmseg_unload(seg);
3678 }
3679 }
3680 rw_exit(&rhash->rsmhash_rw);
3681
3682 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3683 }
3684
3685 static void
3686 rsm_intr_reply(rsmipc_msghdr_t *msg)
3687 {
3688 /*
3689 * Find slot for cookie in reply.
3690 * Match sequence with sequence in cookie
3691 * If no match; return
3692 * Try to grap lock of slot, if locked return
3693 * copy data into reply slot area
3694 * signal waiter
3695 */
3696 rsmipc_slot_t *slot;
3697 rsmipc_cookie_t *cookie;
3698 void *data = (void *) msg;
3699 size_t size = sizeof (rsmipc_reply_t);
3700 DBG_DEFINE(category,
3701 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3702
3703 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3704
3705 cookie = &msg->rsmipc_cookie;
3706 if (cookie->ic.index >= RSMIPC_SZ) {
3707 DBG_PRINTF((category, RSM_ERR,
3708 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3709 return;
3710 }
3711
3712 ASSERT(cookie->ic.index < RSMIPC_SZ);
3713 slot = &rsm_ipc.slots[cookie->ic.index];
3714 mutex_enter(&slot->rsmipc_lock);
3715 if (slot->rsmipc_cookie.value == cookie->value) {
3716 /* found a match */
3717 if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3718 bcopy(data, slot->rsmipc_data, size);
3719 RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3720 cv_signal(&slot->rsmipc_cv);
3721 }
3722 } else {
3723 DBG_PRINTF((category, RSM_DEBUG,
3724 "rsm: rsm_intr_reply mismatched reply %d\n",
3725 cookie->ic.index));
3726 }
3727 mutex_exit(&slot->rsmipc_lock);
3728 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3729 }
3730
3731 /*
3732 * This function gets dispatched on the worker thread when we receive
3733 * the SQREADY message. This function sends the SQREADY_ACK message.
3734 */
3735 static void
3736 rsm_sqready_ack_deferred(void *arg)
3737 {
3738 path_t *path = (path_t *)arg;
3739 DBG_DEFINE(category,
3740 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3741
3742 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3743 "rsm_sqready_ack_deferred enter\n"));
3744
3745 mutex_enter(&path->mutex);
3746
3747 /*
3748 * If path is not active no point in sending the ACK
3749 * because the whole SQREADY protocol will again start
3750 * when the path becomes active.
3751 */
3752 if (path->state != RSMKA_PATH_ACTIVE) {
3753 /*
3754 * decrement the path refcnt incremented in rsm_proc_sqready
3755 */
3756 PATH_RELE_NOLOCK(path);
3757 mutex_exit(&path->mutex);
3758 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3759 "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3760 return;
3761 }
3762
3763 /* send an SQREADY_ACK message */
3764 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3765
3766 /* initialize credits to the max level */
3767 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3768
3769 /* wake up any send that is waiting for credits */
3770 cv_broadcast(&path->sendq_token.sendq_cv);
3771
3772 /*
3773 * decrement the path refcnt since we incremented it in
3774 * rsm_proc_sqready
3775 */
3776 PATH_RELE_NOLOCK(path);
3777
3778 mutex_exit(&path->mutex);
3779
3780 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3781 "rsm_sqready_ack_deferred done\n"));
3782 }
3783
3784 /*
3785 * Process the SQREADY message
3786 */
3787 static void
3788 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3789 rsm_intr_hand_arg_t arg)
3790 {
3791 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3792 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3793 path_t *path;
3794 DBG_DEFINE(category,
3795 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3796
3797 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3798
3799 /* look up the path - incr the path refcnt */
3800 path = rsm_find_path(hdlr_argp->adapter_name,
3801 hdlr_argp->adapter_instance, src_hwaddr);
3802
3803 /*
3804 * No path exists or path is not active - drop the message
3805 */
3806 if (path == NULL) {
3807 DBG_PRINTF((category, RSM_DEBUG,
3808 "rsm_proc_sqready done: msg dropped no path\n"));
3809 return;
3810 }
3811
3812 mutex_exit(&path->mutex);
3813
3814 /* drain any tasks from the previous incarnation */
3815 taskq_wait(path->recv_taskq);
3816
3817 mutex_enter(&path->mutex);
3818 /*
3819 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3820 * in the meanwhile we received an SQREADY message, blindly reset
3821 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3822 * and forget about the SQREADY that we sent.
3823 */
3824 path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3825
3826 if (path->state != RSMKA_PATH_ACTIVE) {
3827 /* decr refcnt and drop the mutex */
3828 PATH_RELE_NOLOCK(path);
3829 mutex_exit(&path->mutex);
3830 DBG_PRINTF((category, RSM_DEBUG,
3831 "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3832 return;
3833 }
3834
3835 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3836 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3837
3838 /*
3839 * The sender's local incarnation number is our remote incarnation
3840 * number save it in the path data structure
3841 */
3842 path->remote_incn = msg->rsmipc_local_incn;
3843 path->sendq_token.msgbuf_avail = 0;
3844 path->procmsg_cnt = 0;
3845
3846 /*
3847 * path is active - dispatch task to send SQREADY_ACK - remember
3848 * RSMPI calls can't be done in interrupt context
3849 *
3850 * We can use the recv_taskq to send because the remote endpoint
3851 * cannot start sending messages till it receives SQREADY_ACK hence
3852 * at this point there are no tasks on recv_taskq.
3853 *
3854 * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3855 */
3856 (void) taskq_dispatch(path->recv_taskq,
3857 rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3858
3859 mutex_exit(&path->mutex);
3860
3861
3862 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3863 }
3864
3865 /*
3866 * Process the SQREADY_ACK message
3867 */
3868 static void
3869 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3870 rsm_intr_hand_arg_t arg)
3871 {
3872 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3873 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3874 path_t *path;
3875 DBG_DEFINE(category,
3876 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3877
3878 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3879 "rsm_proc_sqready_ack enter\n"));
3880
3881 /* look up the path - incr the path refcnt */
3882 path = rsm_find_path(hdlr_argp->adapter_name,
3883 hdlr_argp->adapter_instance, src_hwaddr);
3884
3885 /*
3886 * drop the message if - no path exists or path is not active
3887 * or if its not waiting for SQREADY_ACK message
3888 */
3889 if (path == NULL) {
3890 DBG_PRINTF((category, RSM_DEBUG,
3891 "rsm_proc_sqready_ack done: msg dropped no path\n"));
3892 return;
3893 }
3894
3895 if ((path->state != RSMKA_PATH_ACTIVE) ||
3896 !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3897 /* decrement the refcnt */
3898 PATH_RELE_NOLOCK(path);
3899 mutex_exit(&path->mutex);
3900 DBG_PRINTF((category, RSM_DEBUG,
3901 "rsm_proc_sqready_ack done: msg dropped\n"));
3902 return;
3903 }
3904
3905 /*
3906 * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3907 * sent, if not drop it.
3908 */
3909 if (path->local_incn != msghdr->rsmipc_incn) {
3910 /* decrement the refcnt */
3911 PATH_RELE_NOLOCK(path);
3912 mutex_exit(&path->mutex);
3913 DBG_PRINTF((category, RSM_DEBUG,
3914 "rsm_proc_sqready_ack done: msg old incn %lld\n",
3915 msghdr->rsmipc_incn));
3916 return;
3917 }
3918
3919 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3920 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3921
3922 /*
3923 * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3924 */
3925 path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3926
3927 /* save the remote sendq incn number */
3928 path->remote_incn = msg->rsmipc_local_incn;
3929
3930 /* initialize credits to the max level */
3931 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3932
3933 /* wake up any send that is waiting for credits */
3934 cv_broadcast(&path->sendq_token.sendq_cv);
3935
3936 /* decrement the refcnt */
3937 PATH_RELE_NOLOCK(path);
3938
3939 mutex_exit(&path->mutex);
3940
3941 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3942 "rsm_proc_sqready_ack done\n"));
3943 }
3944
3945 /*
3946 * process the RSMIPC_MSG_CREDIT message
3947 */
3948 static void
3949 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3950 rsm_intr_hand_arg_t arg)
3951 {
3952 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3953 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3954 path_t *path;
3955 DBG_DEFINE(category,
3956 RSM_KERNEL_AGENT | RSM_FUNC_ALL |
3957 RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3958
3959 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3960
3961 /* look up the path - incr the path refcnt */
3962 path = rsm_find_path(hdlr_argp->adapter_name,
3963 hdlr_argp->adapter_instance, src_hwaddr);
3964
3965 if (path == NULL) {
3966 DBG_PRINTF((category, RSM_DEBUG,
3967 "rsm_add_credits enter: path not found\n"));
3968 return;
3969 }
3970
3971 /* the path is not active - discard credits */
3972 if (path->state != RSMKA_PATH_ACTIVE) {
3973 PATH_RELE_NOLOCK(path);
3974 mutex_exit(&path->mutex);
3975 DBG_PRINTF((category, RSM_DEBUG,
3976 "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3977 return;
3978 }
3979
3980 /*
3981 * Check if these credits are for current incarnation of the path.
3982 */
3983 if (path->local_incn != msghdr->rsmipc_incn) {
3984 /* decrement the refcnt */
3985 PATH_RELE_NOLOCK(path);
3986 mutex_exit(&path->mutex);
3987 DBG_PRINTF((category, RSM_DEBUG,
3988 "rsm_add_credits enter: old incn %lld\n",
3989 msghdr->rsmipc_incn));
3990 return;
3991 }
3992
3993 DBG_PRINTF((category, RSM_DEBUG,
3994 "rsm_add_credits:path=%lx new-creds=%d "
3995 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
3996 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
3997 src_hwaddr));
3998
3999
4000 /* add credits to the path's sendq */
4001 path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4002
4003 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4004
4005 /* wake up any send that is waiting for credits */
4006 cv_broadcast(&path->sendq_token.sendq_cv);
4007
4008 /* decrement the refcnt */
4009 PATH_RELE_NOLOCK(path);
4010
4011 mutex_exit(&path->mutex);
4012
4013 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4014 }
4015
4016 static void
4017 rsm_intr_event(rsmipc_request_t *msg)
4018 {
4019 rsmseg_t *seg;
4020 rsmresource_t *p;
4021 rsm_node_id_t src_node;
4022 DBG_DEFINE(category,
4023 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4024
4025 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4026
4027 src_node = msg->rsmipc_hdr.rsmipc_src;
4028
4029 if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4030 /* This is for an import segment */
4031 uint_t hashval = rsmhash(msg->rsmipc_key);
4032
4033 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4034
4035 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4036
4037 for (; p; p = p->rsmrc_next) {
4038 if ((p->rsmrc_key == msg->rsmipc_key) &&
4039 (p->rsmrc_node == src_node)) {
4040 seg = (rsmseg_t *)p;
4041 rsmseglock_acquire(seg);
4042
4043 atomic_inc_32(&seg->s_pollevent);
4044
4045 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4046 pollwakeup(&seg->s_poll, POLLRDNORM);
4047
4048 rsmseglock_release(seg);
4049 }
4050 }
4051
4052 rw_exit(&rsm_import_segs.rsmhash_rw);
4053 } else {
4054 /* This is for an export segment */
4055 seg = rsmexport_lookup(msg->rsmipc_key);
4056 if (!seg) {
4057 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4058 "rsm_intr_event done: exp seg not found\n"));
4059 return;
4060 }
4061
4062 ASSERT(rsmseglock_held(seg));
4063
4064 atomic_inc_32(&seg->s_pollevent);
4065
4066 /*
4067 * We must hold the segment lock here, or else the segment
4068 * can be freed while pollwakeup is using it. This implies
4069 * that we MUST NOT grab the segment lock during rsm_chpoll,
4070 * as outlined in the chpoll(2) man page.
4071 */
4072 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4073 pollwakeup(&seg->s_poll, POLLRDNORM);
4074
4075 rsmseglock_release(seg);
4076 }
4077
4078 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4079 }
4080
4081 /*
4082 * The exporter did a republish and changed the ACL - this change is only
4083 * visible to new importers.
4084 */
4085 static void
4086 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4087 rsm_permission_t perm)
4088 {
4089
4090 rsmresource_t *p;
4091 rsmseg_t *seg;
4092 uint_t hashval = rsmhash(key);
4093 DBG_DEFINE(category,
4094 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4095
4096 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4097
4098 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4099
4100 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4101
4102 for (; p; p = p->rsmrc_next) {
4103 /*
4104 * find the importer and update the permission in the shared
4105 * data structure. Any new importers will use the new perms
4106 */
4107 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4108 seg = (rsmseg_t *)p;
4109
4110 rsmseglock_acquire(seg);
4111 rsmsharelock_acquire(seg);
4112 seg->s_share->rsmsi_mode = perm;
4113 rsmsharelock_release(seg);
4114 rsmseglock_release(seg);
4115
4116 break;
4117 }
4118 }
4119
4120 rw_exit(&rsm_import_segs.rsmhash_rw);
4121
4122 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4123 }
4124
4125 void
4126 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4127 {
4128 int done = 1; /* indicate all SUSPENDS have been acked */
4129 list_element_t *elem;
4130 DBG_DEFINE(category,
4131 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4132
4133 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4134 "rsm_suspend_complete enter\n"));
4135
4136 mutex_enter(&rsm_suspend_list.list_lock);
4137
4138 if (rsm_suspend_list.list_head == NULL) {
4139 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4140 "rsm_suspend_complete done: suspend_list is empty\n"));
4141 mutex_exit(&rsm_suspend_list.list_lock);
4142 return;
4143 }
4144
4145 elem = rsm_suspend_list.list_head;
4146 while (elem != NULL) {
4147 if (elem->nodeid == src_node) {
4148 /* clear the pending flag for the node */
4149 elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4150 elem->flags |= flag;
4151 }
4152
4153 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4154 done = 0; /* still some nodes have not yet ACKED */
4155
4156 elem = elem->next;
4157 }
4158
4159 mutex_exit(&rsm_suspend_list.list_lock);
4160
4161 if (!done) {
4162 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4163 "rsm_suspend_complete done: acks pending\n"));
4164 return;
4165 }
4166 /*
4167 * Now that we are done with suspending all the remote importers
4168 * time to quiesce the local exporters
4169 */
4170 exporter_quiesce();
4171
4172 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4173 "rsm_suspend_complete done\n"));
4174 }
4175
4176 static void
4177 exporter_quiesce()
4178 {
4179 int i, e;
4180 rsmresource_t *current;
4181 rsmseg_t *seg;
4182 adapter_t *adapter;
4183 DBG_DEFINE(category,
4184 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4185
4186 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4187 /*
4188 * The importers send a SUSPEND_COMPLETE to the exporter node
4189 * Unpublish, unbind the export segment and
4190 * move the segments to the EXPORT_QUIESCED state
4191 */
4192
4193 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4194
4195 for (i = 0; i < rsm_hash_size; i++) {
4196 current = rsm_export_segs.bucket[i];
4197 while (current != NULL) {
4198 seg = (rsmseg_t *)current;
4199 rsmseglock_acquire(seg);
4200 if (current->rsmrc_state ==
4201 RSM_STATE_EXPORT_QUIESCING) {
4202 adapter = seg->s_adapter;
4203 /*
4204 * some local memory handles are not published
4205 * check if it was published
4206 */
4207 if ((seg->s_acl == NULL) ||
4208 (seg->s_acl[0].ae_node != my_nodeid) ||
4209 (seg->s_acl[0].ae_permission != 0)) {
4210
4211 e = adapter->rsmpi_ops->rsm_unpublish(
4212 seg->s_handle.out);
4213 DBG_PRINTF((category, RSM_DEBUG,
4214 "exporter_quiesce:unpub %d\n", e));
4215
4216 e = adapter->rsmpi_ops->rsm_seg_destroy(
4217 seg->s_handle.out);
4218
4219 DBG_PRINTF((category, RSM_DEBUG,
4220 "exporter_quiesce:destroy %d\n",
4221 e));
4222 }
4223
4224 (void) rsm_unbind_pages(seg);
4225 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4226 cv_broadcast(&seg->s_cv);
4227 }
4228 rsmseglock_release(seg);
4229 current = current->rsmrc_next;
4230 }
4231 }
4232 rw_exit(&rsm_export_segs.rsmhash_rw);
4233
4234 /*
4235 * All the local segments we are done with the pre-del processing
4236 * - time to move to PREDEL_COMPLETED.
4237 */
4238
4239 mutex_enter(&rsm_drv_data.drv_lock);
4240
4241 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4242
4243 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4244
4245 cv_broadcast(&rsm_drv_data.drv_cv);
4246
4247 mutex_exit(&rsm_drv_data.drv_lock);
4248
4249 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4250 }
4251
4252 static void
4253 importer_suspend(rsm_node_id_t src_node)
4254 {
4255 int i;
4256 int susp_flg; /* true means already suspended */
4257 int num_importers;
4258 rsmresource_t *p = NULL, *curp;
4259 rsmhash_table_t *rhash = &rsm_import_segs;
4260 rsmseg_t *seg;
4261 rsmipc_request_t request;
4262 DBG_DEFINE(category,
4263 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4264
4265 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4266
4267 rw_enter(&rhash->rsmhash_rw, RW_READER);
4268 for (i = 0; i < rsm_hash_size; i++) {
4269 p = rhash->bucket[i];
4270
4271 /*
4272 * Suspend all importers with same <node, key> pair.
4273 * After the last one of the shared importers has been
4274 * suspended - suspend the shared mappings/connection.
4275 */
4276 for (; p; p = p->rsmrc_next) {
4277 rsmseg_t *first = (rsmseg_t *)p;
4278 if ((first->s_node != src_node) ||
4279 (first->s_state == RSM_STATE_DISCONNECT))
4280 continue; /* go to next entry */
4281 /*
4282 * search the rest of the bucket for
4283 * other siblings (imprtrs with the same key)
4284 * of "first" and suspend them.
4285 * All importers with same key fall in
4286 * the same bucket.
4287 */
4288 num_importers = 0;
4289 for (curp = p; curp; curp = curp->rsmrc_next) {
4290 seg = (rsmseg_t *)curp;
4291
4292 rsmseglock_acquire(seg);
4293
4294 if ((seg->s_node != first->s_node) ||
4295 (seg->s_key != first->s_key) ||
4296 (seg->s_state == RSM_STATE_DISCONNECT)) {
4297 /*
4298 * either not a peer segment or its a
4299 * disconnected segment - skip it
4300 */
4301 rsmseglock_release(seg);
4302 continue;
4303 }
4304
4305 rsmseg_suspend(seg, &susp_flg);
4306
4307 if (susp_flg) { /* seg already suspended */
4308 rsmseglock_release(seg);
4309 break; /* the inner for loop */
4310 }
4311
4312 num_importers++;
4313 rsmsharelock_acquire(seg);
4314 /*
4315 * we've processed all importers that are
4316 * siblings of "first"
4317 */
4318 if (num_importers ==
4319 seg->s_share->rsmsi_refcnt) {
4320 rsmsharelock_release(seg);
4321 rsmseglock_release(seg);
4322 break;
4323 }
4324 rsmsharelock_release(seg);
4325 rsmseglock_release(seg);
4326 }
4327
4328 /*
4329 * All the importers with the same key and
4330 * nodeid as "first" have been suspended.
4331 * Now suspend the shared connect/mapping.
4332 * This is done only once.
4333 */
4334 if (!susp_flg) {
4335 rsmsegshare_suspend(seg);
4336 }
4337 }
4338 }
4339
4340 rw_exit(&rhash->rsmhash_rw);
4341
4342 /* send an ACK for SUSPEND message */
4343 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4344 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4345
4346
4347 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4348
4349 }
4350
4351 static void
4352 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4353 {
4354 int recheck_state;
4355 rsmcookie_t *hdl;
4356 DBG_DEFINE(category,
4357 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4358
4359 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4360 "rsmseg_suspend enter: key=%u\n", seg->s_key));
4361
4362 *susp_flg = 0;
4363
4364 ASSERT(rsmseglock_held(seg));
4365 /* wait if putv/getv is in progress */
4366 while (seg->s_rdmacnt > 0)
4367 cv_wait(&seg->s_cv, &seg->s_lock);
4368
4369 do {
4370 recheck_state = 0;
4371
4372 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4373 "rsmseg_suspend:segment %x state=%d\n",
4374 seg->s_key, seg->s_state));
4375
4376 switch (seg->s_state) {
4377 case RSM_STATE_NEW:
4378 /* not a valid state */
4379 break;
4380 case RSM_STATE_CONNECTING:
4381 seg->s_state = RSM_STATE_ABORT_CONNECT;
4382 break;
4383 case RSM_STATE_ABORT_CONNECT:
4384 break;
4385 case RSM_STATE_CONNECT:
4386 seg->s_handle.in = NULL;
4387 seg->s_state = RSM_STATE_CONN_QUIESCE;
4388 break;
4389 case RSM_STATE_MAPPING:
4390 /* wait until segment leaves the mapping state */
4391 while (seg->s_state == RSM_STATE_MAPPING)
4392 cv_wait(&seg->s_cv, &seg->s_lock);
4393 recheck_state = 1;
4394 break;
4395 case RSM_STATE_ACTIVE:
4396 /* unload the mappings */
4397 if (seg->s_ckl != NULL) {
4398 hdl = seg->s_ckl;
4399 for (; hdl != NULL; hdl = hdl->c_next) {
4400 (void) devmap_unload(hdl->c_dhp,
4401 hdl->c_off, hdl->c_len);
4402 }
4403 }
4404 seg->s_mapinfo = NULL;
4405 seg->s_state = RSM_STATE_MAP_QUIESCE;
4406 break;
4407 case RSM_STATE_CONN_QUIESCE:
4408 /* FALLTHRU */
4409 case RSM_STATE_MAP_QUIESCE:
4410 /* rsmseg_suspend already done for seg */
4411 *susp_flg = 1;
4412 break;
4413 case RSM_STATE_DISCONNECT:
4414 break;
4415 default:
4416 ASSERT(0); /* invalid state */
4417 }
4418 } while (recheck_state);
4419
4420 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4421 }
4422
4423 static void
4424 rsmsegshare_suspend(rsmseg_t *seg)
4425 {
4426 int e;
4427 adapter_t *adapter;
4428 rsm_import_share_t *sharedp;
4429 DBG_DEFINE(category,
4430 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4431
4432 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4433 "rsmsegshare_suspend enter\n"));
4434
4435 rsmseglock_acquire(seg);
4436 rsmsharelock_acquire(seg);
4437
4438 sharedp = seg->s_share;
4439 adapter = seg->s_adapter;
4440 switch (sharedp->rsmsi_state) {
4441 case RSMSI_STATE_NEW:
4442 break;
4443 case RSMSI_STATE_CONNECTING:
4444 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4445 break;
4446 case RSMSI_STATE_ABORT_CONNECT:
4447 break;
4448 case RSMSI_STATE_CONNECTED:
4449 /* do the rsmpi disconnect */
4450 if (sharedp->rsmsi_node != my_nodeid) {
4451 e = adapter->rsmpi_ops->
4452 rsm_disconnect(sharedp->rsmsi_handle);
4453
4454 DBG_PRINTF((category, RSM_DEBUG,
4455 "rsm:rsmpi disconnect seg=%x:err=%d\n",
4456 sharedp->rsmsi_segid, e));
4457 }
4458
4459 sharedp->rsmsi_handle = NULL;
4460
4461 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4462 break;
4463 case RSMSI_STATE_CONN_QUIESCE:
4464 break;
4465 case RSMSI_STATE_MAPPED:
4466 /* do the rsmpi unmap and disconnect */
4467 if (sharedp->rsmsi_node != my_nodeid) {
4468 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4469
4470 DBG_PRINTF((category, RSM_DEBUG,
4471 "rsmshare_suspend: rsmpi unmap %d\n", e));
4472
4473 e = adapter->rsmpi_ops->
4474 rsm_disconnect(sharedp->rsmsi_handle);
4475 DBG_PRINTF((category, RSM_DEBUG,
4476 "rsm:rsmpi disconnect seg=%x:err=%d\n",
4477 sharedp->rsmsi_segid, e));
4478 }
4479
4480 sharedp->rsmsi_handle = NULL;
4481
4482 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4483 break;
4484 case RSMSI_STATE_MAP_QUIESCE:
4485 break;
4486 case RSMSI_STATE_DISCONNECTED:
4487 break;
4488 default:
4489 ASSERT(0); /* invalid state */
4490 }
4491
4492 rsmsharelock_release(seg);
4493 rsmseglock_release(seg);
4494
4495 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4496 "rsmsegshare_suspend done\n"));
4497 }
4498
4499 /*
4500 * This should get called on receiving a RESUME message or from
4501 * the pathmanger if the node undergoing DR dies.
4502 */
4503 static void
4504 importer_resume(rsm_node_id_t src_node)
4505 {
4506 int i;
4507 rsmresource_t *p = NULL;
4508 rsmhash_table_t *rhash = &rsm_import_segs;
4509 void *cookie;
4510 DBG_DEFINE(category,
4511 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4512
4513 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4514
4515 rw_enter(&rhash->rsmhash_rw, RW_READER);
4516
4517 for (i = 0; i < rsm_hash_size; i++) {
4518 p = rhash->bucket[i];
4519
4520 for (; p; p = p->rsmrc_next) {
4521 rsmseg_t *seg = (rsmseg_t *)p;
4522
4523 rsmseglock_acquire(seg);
4524
4525 /* process only importers of node undergoing DR */
4526 if (seg->s_node != src_node) {
4527 rsmseglock_release(seg);
4528 continue;
4529 }
4530
4531 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4532 rsmipc_request_t request;
4533 /*
4534 * rsmpi map/connect failed
4535 * inform the exporter so that it can
4536 * remove the importer.
4537 */
4538 request.rsmipc_hdr.rsmipc_type =
4539 RSMIPC_MSG_NOTIMPORTING;
4540 request.rsmipc_key = seg->s_segid;
4541 request.rsmipc_segment_cookie = cookie;
4542 rsmseglock_release(seg);
4543 (void) rsmipc_send(seg->s_node, &request,
4544 RSM_NO_REPLY);
4545 } else {
4546 rsmseglock_release(seg);
4547 }
4548 }
4549 }
4550
4551 rw_exit(&rhash->rsmhash_rw);
4552
4553 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4554 }
4555
4556 static int
4557 rsmseg_resume(rsmseg_t *seg, void **cookie)
4558 {
4559 int e;
4560 int retc;
4561 off_t dev_offset;
4562 size_t maplen;
4563 uint_t maxprot;
4564 rsm_mapinfo_t *p;
4565 rsmcookie_t *hdl;
4566 rsm_import_share_t *sharedp;
4567 DBG_DEFINE(category,
4568 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4569
4570 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4571 "rsmseg_resume enter: key=%u\n", seg->s_key));
4572
4573 *cookie = NULL;
4574
4575 ASSERT(rsmseglock_held(seg));
4576
4577 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4578 (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4579 return (RSM_SUCCESS);
4580 }
4581
4582 sharedp = seg->s_share;
4583
4584 rsmsharelock_acquire(seg);
4585
4586 /* resume the shared connection and/or mapping */
4587 retc = rsmsegshare_resume(seg);
4588
4589 if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4590 /* shared state can either be connected or mapped */
4591 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4592 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4593 ASSERT(retc == RSM_SUCCESS);
4594 seg->s_handle.in = sharedp->rsmsi_handle;
4595 rsmsharelock_release(seg);
4596 seg->s_state = RSM_STATE_CONNECT;
4597
4598 } else { /* error in rsmpi connect during resume */
4599 seg->s_handle.in = NULL;
4600 seg->s_state = RSM_STATE_DISCONNECT;
4601
4602 sharedp->rsmsi_refcnt--;
4603 cookie = (void *)sharedp->rsmsi_cookie;
4604
4605 if (sharedp->rsmsi_refcnt == 0) {
4606 ASSERT(sharedp->rsmsi_mapcnt == 0);
4607 rsmsharelock_release(seg);
4608
4609 /* clean up the shared data structure */
4610 mutex_destroy(&sharedp->rsmsi_lock);
4611 cv_destroy(&sharedp->rsmsi_cv);
4612 kmem_free((void *)(sharedp),
4613 sizeof (rsm_import_share_t));
4614
4615 } else {
4616 rsmsharelock_release(seg);
4617 }
4618 /*
4619 * The following needs to be done after any
4620 * rsmsharelock calls which use seg->s_share.
4621 */
4622 seg->s_share = NULL;
4623 }
4624
4625 /* signal any waiting segment */
4626 cv_broadcast(&seg->s_cv);
4627
4628 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4629 "rsmseg_resume done:state=%d\n", seg->s_state));
4630 return (retc);
4631 }
4632
4633 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4634
4635 /* Setup protections for remap */
4636 maxprot = PROT_USER;
4637 if (seg->s_mode & RSM_PERM_READ) {
4638 maxprot |= PROT_READ;
4639 }
4640 if (seg->s_mode & RSM_PERM_WRITE) {
4641 maxprot |= PROT_WRITE;
4642 }
4643
4644 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4645 /* error in rsmpi connect or map during resume */
4646
4647 /* remap to trash page */
4648 ASSERT(seg->s_ckl != NULL);
4649
4650 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4651 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4652 remap_cookie, hdl->c_off, hdl->c_len,
4653 maxprot, 0, NULL);
4654
4655 DBG_PRINTF((category, RSM_ERR,
4656 "rsmseg_resume:remap=%d\n", e));
4657 }
4658
4659 seg->s_handle.in = NULL;
4660 seg->s_state = RSM_STATE_DISCONNECT;
4661
4662 sharedp->rsmsi_refcnt--;
4663
4664 sharedp->rsmsi_mapcnt--;
4665 seg->s_mapinfo = NULL;
4666
4667 if (sharedp->rsmsi_refcnt == 0) {
4668 ASSERT(sharedp->rsmsi_mapcnt == 0);
4669 rsmsharelock_release(seg);
4670
4671 /* clean up the shared data structure */
4672 mutex_destroy(&sharedp->rsmsi_lock);
4673 cv_destroy(&sharedp->rsmsi_cv);
4674 kmem_free((void *)(sharedp),
4675 sizeof (rsm_import_share_t));
4676
4677 } else {
4678 rsmsharelock_release(seg);
4679 }
4680 /*
4681 * The following needs to be done after any
4682 * rsmsharelock calls which use seg->s_share.
4683 */
4684 seg->s_share = NULL;
4685
4686 /* signal any waiting segment */
4687 cv_broadcast(&seg->s_cv);
4688
4689 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4690 "rsmseg_resume done:seg=%x,err=%d\n",
4691 seg->s_key, retc));
4692 return (retc);
4693
4694 }
4695
4696 seg->s_handle.in = sharedp->rsmsi_handle;
4697
4698 if (seg->s_node == my_nodeid) { /* loopback */
4699 ASSERT(seg->s_mapinfo == NULL);
4700
4701 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4702 e = devmap_umem_remap(hdl->c_dhp,
4703 rsm_dip, seg->s_cookie,
4704 hdl->c_off, hdl->c_len,
4705 maxprot, 0, NULL);
4706
4707 DBG_PRINTF((category, RSM_ERR,
4708 "rsmseg_resume:remap=%d\n", e));
4709 }
4710 } else { /* remote exporter */
4711 /* remap to the new rsmpi maps */
4712 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4713
4714 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4715 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4716 &dev_offset, &maplen);
4717 e = devmap_devmem_remap(hdl->c_dhp,
4718 p->dip, p->dev_register, dev_offset,
4719 maplen, maxprot, 0, NULL);
4720
4721 DBG_PRINTF((category, RSM_ERR,
4722 "rsmseg_resume:remap=%d\n", e));
4723 }
4724 }
4725
4726 rsmsharelock_release(seg);
4727
4728 seg->s_state = RSM_STATE_ACTIVE;
4729 cv_broadcast(&seg->s_cv);
4730
4731 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4732
4733 return (retc);
4734 }
4735
4736 static int
4737 rsmsegshare_resume(rsmseg_t *seg)
4738 {
4739 int e = RSM_SUCCESS;
4740 adapter_t *adapter;
4741 rsm_import_share_t *sharedp;
4742 DBG_DEFINE(category,
4743 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4744
4745 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4746
4747 ASSERT(rsmseglock_held(seg));
4748 ASSERT(rsmsharelock_held(seg));
4749
4750 sharedp = seg->s_share;
4751
4752 /*
4753 * If we are not in a xxxx_QUIESCE state that means shared
4754 * connect/mapping processing has been already been done
4755 * so return success.
4756 */
4757 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4758 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4759 return (RSM_SUCCESS);
4760 }
4761
4762 adapter = seg->s_adapter;
4763
4764 if (sharedp->rsmsi_node != my_nodeid) {
4765 rsm_addr_t hwaddr;
4766 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4767
4768 e = adapter->rsmpi_ops->rsm_connect(
4769 adapter->rsmpi_handle, hwaddr,
4770 sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4771
4772 DBG_PRINTF((category, RSM_DEBUG,
4773 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4774 sharedp->rsmsi_segid, e));
4775
4776 if (e != RSM_SUCCESS) {
4777 /* when do we send the NOT_IMPORTING message */
4778 sharedp->rsmsi_handle = NULL;
4779 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4780 /* signal any waiting segment */
4781 cv_broadcast(&sharedp->rsmsi_cv);
4782 return (e);
4783 }
4784 }
4785
4786 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4787 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4788 /* signal any waiting segment */
4789 cv_broadcast(&sharedp->rsmsi_cv);
4790 return (e);
4791 }
4792
4793 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4794
4795 /* do the rsmpi map of the whole segment here */
4796 if (sharedp->rsmsi_node != my_nodeid) {
4797 size_t mapped_len;
4798 rsm_mapinfo_t *p;
4799
4800 /*
4801 * We need to do rsmpi maps with <off, lens> identical to
4802 * the old mapinfo list because the segment mapping handles
4803 * dhp and such need the fragmentation of rsmpi maps to be
4804 * identical to what it was during the mmap of the segment
4805 */
4806 p = sharedp->rsmsi_mapinfo;
4807
4808 while (p != NULL) {
4809 mapped_len = 0;
4810
4811 e = adapter->rsmpi_ops->rsm_map(
4812 sharedp->rsmsi_handle, p->start_offset,
4813 p->individual_len, &mapped_len,
4814 &p->dip, &p->dev_register, &p->dev_offset,
4815 NULL, NULL);
4816
4817 if (e != 0) {
4818 DBG_PRINTF((category, RSM_ERR,
4819 "rsmsegshare_resume: rsmpi map err=%d\n",
4820 e));
4821 break;
4822 }
4823
4824 if (mapped_len != p->individual_len) {
4825 DBG_PRINTF((category, RSM_ERR,
4826 "rsmsegshare_resume: rsmpi maplen"
4827 "< reqlen=%lx\n", mapped_len));
4828 e = RSMERR_BAD_LENGTH;
4829 break;
4830 }
4831
4832 p = p->next;
4833
4834 }
4835
4836
4837 if (e != RSM_SUCCESS) { /* rsmpi map failed */
4838 int err;
4839 /* Check if this is the first rsm_map */
4840 if (p != sharedp->rsmsi_mapinfo) {
4841 /*
4842 * A single rsm_unmap undoes multiple rsm_maps.
4843 */
4844 (void) seg->s_adapter->rsmpi_ops->
4845 rsm_unmap(sharedp->rsmsi_handle);
4846 }
4847
4848 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4849 sharedp->rsmsi_mapinfo = NULL;
4850
4851 err = adapter->rsmpi_ops->
4852 rsm_disconnect(sharedp->rsmsi_handle);
4853
4854 DBG_PRINTF((category, RSM_DEBUG,
4855 "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4856 sharedp->rsmsi_segid, err));
4857
4858 sharedp->rsmsi_handle = NULL;
4859 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4860
4861 /* signal the waiting segments */
4862 cv_broadcast(&sharedp->rsmsi_cv);
4863 DBG_PRINTF((category, RSM_DEBUG,
4864 "rsmsegshare_resume done: rsmpi map err\n"));
4865 return (e);
4866 }
4867 }
4868
4869 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4870
4871 /* signal any waiting segment */
4872 cv_broadcast(&sharedp->rsmsi_cv);
4873
4874 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4875
4876 return (e);
4877 }
4878
4879 /*
4880 * this is the routine that gets called by recv_taskq which is the
4881 * thread that processes messages that are flow-controlled.
4882 */
4883 static void
4884 rsm_intr_proc_deferred(void *arg)
4885 {
4886 path_t *path = (path_t *)arg;
4887 rsmipc_request_t *msg;
4888 rsmipc_msghdr_t *msghdr;
4889 rsm_node_id_t src_node;
4890 msgbuf_elem_t *head;
4891 int e;
4892 DBG_DEFINE(category,
4893 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4894
4895 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4896 "rsm_intr_proc_deferred enter\n"));
4897
4898 mutex_enter(&path->mutex);
4899
4900 /* use the head of the msgbuf_queue */
4901 head = rsmka_gethead_msgbuf(path);
4902
4903 mutex_exit(&path->mutex);
4904
4905 msg = (rsmipc_request_t *)&(head->msg);
4906 msghdr = (rsmipc_msghdr_t *)msg;
4907
4908 src_node = msghdr->rsmipc_src;
4909
4910 /*
4911 * messages that need to send a reply should check the message version
4912 * before processing the message. And all messages that need to
4913 * send a reply should be processed here by the worker thread.
4914 */
4915 switch (msghdr->rsmipc_type) {
4916 case RSMIPC_MSG_SEGCONNECT:
4917 if (msghdr->rsmipc_version != RSM_VERSION) {
4918 rsmipc_reply_t reply;
4919 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4920 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4921 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4922 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4923 } else {
4924 rsm_intr_segconnect(src_node, msg);
4925 }
4926 break;
4927 case RSMIPC_MSG_DISCONNECT:
4928 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4929 break;
4930 case RSMIPC_MSG_SUSPEND:
4931 importer_suspend(src_node);
4932 break;
4933 case RSMIPC_MSG_SUSPEND_DONE:
4934 rsm_suspend_complete(src_node, 0);
4935 break;
4936 case RSMIPC_MSG_RESUME:
4937 importer_resume(src_node);
4938 break;
4939 default:
4940 ASSERT(0);
4941 }
4942
4943 mutex_enter(&path->mutex);
4944
4945 rsmka_dequeue_msgbuf(path);
4946
4947 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4948 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4949 path->procmsg_cnt++;
4950
4951 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4952
4953 /* No need to send credits if path is going down */
4954 if ((path->state == RSMKA_PATH_ACTIVE) &&
4955 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4956 /*
4957 * send credits and reset procmsg_cnt if success otherwise
4958 * credits will be sent after processing the next message
4959 */
4960 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4961 if (e == 0)
4962 path->procmsg_cnt = 0;
4963 else
4964 DBG_PRINTF((category, RSM_ERR,
4965 "rsm_intr_proc_deferred:send credits err=%d\n", e));
4966 }
4967
4968 /*
4969 * decrement the path refcnt since we incremented it in
4970 * rsm_intr_callback_dispatch
4971 */
4972 PATH_RELE_NOLOCK(path);
4973
4974 mutex_exit(&path->mutex);
4975
4976 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4977 "rsm_intr_proc_deferred done\n"));
4978 }
4979
4980 /*
4981 * Flow-controlled messages are enqueued and dispatched onto a taskq here
4982 */
4983 static void
4984 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4985 rsm_intr_hand_arg_t arg)
4986 {
4987 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
4988 path_t *path;
4989 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4990 DBG_DEFINE(category,
4991 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4992
4993 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4994 "rsm_intr_callback_dispatch enter\n"));
4995 ASSERT(data && hdlr_argp);
4996
4997 /* look up the path - incr the path refcnt */
4998 path = rsm_find_path(hdlr_argp->adapter_name,
4999 hdlr_argp->adapter_instance, src_hwaddr);
5000
5001 /* the path has been removed - drop this message */
5002 if (path == NULL) {
5003 DBG_PRINTF((category, RSM_DEBUG,
5004 "rsm_intr_callback_dispatch done: msg dropped\n"));
5005 return;
5006 }
5007 /* the path is not active - don't accept new messages */
5008 if (path->state != RSMKA_PATH_ACTIVE) {
5009 PATH_RELE_NOLOCK(path);
5010 mutex_exit(&path->mutex);
5011 DBG_PRINTF((category, RSM_DEBUG,
5012 "rsm_intr_callback_dispatch done: msg dropped"
5013 " path=%lx !ACTIVE\n", path));
5014 return;
5015 }
5016
5017 /*
5018 * Check if this message was sent to an older incarnation
5019 * of the path/sendq.
5020 */
5021 if (path->local_incn != msghdr->rsmipc_incn) {
5022 /* decrement the refcnt */
5023 PATH_RELE_NOLOCK(path);
5024 mutex_exit(&path->mutex);
5025 DBG_PRINTF((category, RSM_DEBUG,
5026 "rsm_intr_callback_dispatch done: old incn %lld\n",
5027 msghdr->rsmipc_incn));
5028 return;
5029 }
5030
5031 /* copy and enqueue msg on the path's msgbuf queue */
5032 rsmka_enqueue_msgbuf(path, data);
5033
5034 /*
5035 * schedule task to process messages - ignore retval from
5036 * task_dispatch because we sender cannot send more than
5037 * what receiver can handle.
5038 */
5039 (void) taskq_dispatch(path->recv_taskq,
5040 rsm_intr_proc_deferred, path, KM_NOSLEEP);
5041
5042 mutex_exit(&path->mutex);
5043
5044 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5045 "rsm_intr_callback_dispatch done\n"));
5046 }
5047
5048 /*
5049 * This procedure is called from rsm_srv_func when a remote node creates a
5050 * a send queue. This event is used as a hint that an earlier failed
5051 * attempt to create a send queue to that remote node may now succeed and
5052 * should be retried. Indication of an earlier failed attempt is provided
5053 * by the RSMKA_SQCREATE_PENDING flag.
5054 */
5055 static void
5056 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5057 {
5058 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
5059 path_t *path;
5060 DBG_DEFINE(category,
5061 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5062
5063 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5064 "rsm_sqcreateop_callback enter\n"));
5065
5066 /* look up the path - incr the path refcnt */
5067 path = rsm_find_path(hdlr_argp->adapter_name,
5068 hdlr_argp->adapter_instance, src_hwaddr);
5069
5070 if (path == NULL) {
5071 DBG_PRINTF((category, RSM_DEBUG,
5072 "rsm_sqcreateop_callback done: no path\n"));
5073 return;
5074 }
5075
5076 if ((path->state == RSMKA_PATH_UP) &&
5077 (path->flags & RSMKA_SQCREATE_PENDING)) {
5078 /*
5079 * previous attempt to create sendq had failed, retry
5080 * it and move to RSMKA_PATH_ACTIVE state if successful.
5081 * the refcnt will be decremented in the do_deferred_work
5082 */
5083 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5084 } else {
5085 /* decrement the refcnt */
5086 PATH_RELE_NOLOCK(path);
5087 }
5088 mutex_exit(&path->mutex);
5089
5090 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5091 "rsm_sqcreateop_callback done\n"));
5092 }
5093
5094 static void
5095 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5096 {
5097 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5098 rsmipc_request_t *msg = (rsmipc_request_t *)data;
5099 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5100 rsm_node_id_t src_node;
5101 DBG_DEFINE(category,
5102 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5103
5104 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5105 "src=%d, type=%d\n", msghdr->rsmipc_src,
5106 msghdr->rsmipc_type));
5107
5108 /*
5109 * Check for the version number in the msg header. If it is not
5110 * RSM_VERSION, drop the message. In the future, we need to manage
5111 * incompatible version numbers in some way
5112 */
5113 if (msghdr->rsmipc_version != RSM_VERSION) {
5114 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5115 /*
5116 * Drop requests that don't have a reply right here
5117 * Request with reply will send a BAD_VERSION reply
5118 * when they get processed by the worker thread.
5119 */
5120 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5121 return;
5122 }
5123
5124 }
5125
5126 src_node = msghdr->rsmipc_src;
5127
5128 switch (msghdr->rsmipc_type) {
5129 case RSMIPC_MSG_SEGCONNECT:
5130 case RSMIPC_MSG_DISCONNECT:
5131 case RSMIPC_MSG_SUSPEND:
5132 case RSMIPC_MSG_SUSPEND_DONE:
5133 case RSMIPC_MSG_RESUME:
5134 /*
5135 * These message types are handled by a worker thread using
5136 * the flow-control algorithm.
5137 * Any message processing that does one or more of the
5138 * following should be handled in a worker thread.
5139 * - allocates resources and might sleep
5140 * - makes RSMPI calls down to the interconnect driver
5141 * this by defn include requests with reply.
5142 * - takes a long duration of time
5143 */
5144 rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5145 break;
5146 case RSMIPC_MSG_NOTIMPORTING:
5147 importer_list_rm(src_node, msg->rsmipc_key,
5148 msg->rsmipc_segment_cookie);
5149 break;
5150 case RSMIPC_MSG_SQREADY:
5151 rsm_proc_sqready(data, src_hwaddr, arg);
5152 break;
5153 case RSMIPC_MSG_SQREADY_ACK:
5154 rsm_proc_sqready_ack(data, src_hwaddr, arg);
5155 break;
5156 case RSMIPC_MSG_CREDIT:
5157 rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5158 break;
5159 case RSMIPC_MSG_REPLY:
5160 rsm_intr_reply(msghdr);
5161 break;
5162 case RSMIPC_MSG_BELL:
5163 rsm_intr_event(msg);
5164 break;
5165 case RSMIPC_MSG_IMPORTING:
5166 importer_list_add(src_node, msg->rsmipc_key,
5167 msg->rsmipc_adapter_hwaddr,
5168 msg->rsmipc_segment_cookie);
5169 break;
5170 case RSMIPC_MSG_REPUBLISH:
5171 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5172 break;
5173 default:
5174 DBG_PRINTF((category, RSM_DEBUG,
5175 "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5176 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5177 }
5178
5179 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5180
5181 }
5182
5183 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5184 rsm_intr_q_op_t opcode, rsm_addr_t src,
5185 void *data, size_t size, rsm_intr_hand_arg_t arg)
5186 {
5187 DBG_DEFINE(category,
5188 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5189
5190 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5191
5192 switch (opcode) {
5193 case RSM_INTR_Q_OP_CREATE:
5194 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5195 rsm_sqcreateop_callback(src, arg);
5196 break;
5197 case RSM_INTR_Q_OP_DESTROY:
5198 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5199 break;
5200 case RSM_INTR_Q_OP_RECEIVE:
5201 rsm_intr_callback(data, src, arg);
5202 break;
5203 default:
5204 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5205 "rsm_srv_func: unknown opcode = %x\n", opcode));
5206 }
5207
5208 chd = chd;
5209 size = size;
5210
5211 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5212
5213 return (RSM_INTR_HAND_CLAIMED);
5214 }
5215
5216 /* *************************** IPC slots ************************* */
5217 static rsmipc_slot_t *
5218 rsmipc_alloc()
5219 {
5220 int i;
5221 rsmipc_slot_t *slot;
5222 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5223
5224 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5225
5226 /* try to find a free slot, if not wait */
5227 mutex_enter(&rsm_ipc.lock);
5228
5229 while (rsm_ipc.count == 0) {
5230 rsm_ipc.wanted = 1;
5231 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5232 }
5233
5234 /* An empty slot is available, find it */
5235 slot = &rsm_ipc.slots[0];
5236 for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5237 if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5238 RSMIPC_CLEAR(slot, RSMIPC_FREE);
5239 break;
5240 }
5241 }
5242
5243 ASSERT(i < RSMIPC_SZ);
5244 rsm_ipc.count--; /* one less is available */
5245 rsm_ipc.sequence++; /* new sequence */
5246
5247 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5248 slot->rsmipc_cookie.ic.index = (uint_t)i;
5249
5250 mutex_exit(&rsm_ipc.lock);
5251
5252 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5253
5254 return (slot);
5255 }
5256
5257 static void
5258 rsmipc_free(rsmipc_slot_t *slot)
5259 {
5260 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5261
5262 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5263
5264 ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5265 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5266
5267 mutex_enter(&rsm_ipc.lock);
5268
5269 RSMIPC_SET(slot, RSMIPC_FREE);
5270
5271 slot->rsmipc_cookie.ic.sequence = 0;
5272
5273 mutex_exit(&slot->rsmipc_lock);
5274 rsm_ipc.count++;
5275 ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5276 if (rsm_ipc.wanted) {
5277 rsm_ipc.wanted = 0;
5278 cv_broadcast(&rsm_ipc.cv);
5279 }
5280
5281 mutex_exit(&rsm_ipc.lock);
5282
5283 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5284 }
5285
5286 static int
5287 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5288 {
5289 int e = 0;
5290 int credit_check = 0;
5291 int retry_cnt = 0;
5292 int min_retry_cnt = 10;
5293 rsm_send_t is;
5294 rsmipc_slot_t *rslot;
5295 adapter_t *adapter;
5296 path_t *path;
5297 sendq_token_t *sendq_token;
5298 sendq_token_t *used_sendq_token = NULL;
5299 rsm_send_q_handle_t ipc_handle;
5300 DBG_DEFINE(category,
5301 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5302
5303 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5304 dest));
5305
5306 /*
5307 * Check if this is a local case
5308 */
5309 if (dest == my_nodeid) {
5310 switch (req->rsmipc_hdr.rsmipc_type) {
5311 case RSMIPC_MSG_SEGCONNECT:
5312 reply->rsmipc_status = (short)rsmsegacl_validate(
5313 req, dest, reply);
5314 break;
5315 case RSMIPC_MSG_BELL:
5316 req->rsmipc_hdr.rsmipc_src = dest;
5317 rsm_intr_event(req);
5318 break;
5319 case RSMIPC_MSG_IMPORTING:
5320 importer_list_add(dest, req->rsmipc_key,
5321 req->rsmipc_adapter_hwaddr,
5322 req->rsmipc_segment_cookie);
5323 break;
5324 case RSMIPC_MSG_NOTIMPORTING:
5325 importer_list_rm(dest, req->rsmipc_key,
5326 req->rsmipc_segment_cookie);
5327 break;
5328 case RSMIPC_MSG_REPUBLISH:
5329 importer_update(dest, req->rsmipc_key,
5330 req->rsmipc_perm);
5331 break;
5332 case RSMIPC_MSG_SUSPEND:
5333 importer_suspend(dest);
5334 break;
5335 case RSMIPC_MSG_SUSPEND_DONE:
5336 rsm_suspend_complete(dest, 0);
5337 break;
5338 case RSMIPC_MSG_RESUME:
5339 importer_resume(dest);
5340 break;
5341 default:
5342 ASSERT(0);
5343 }
5344 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5345 "rsmipc_send done\n"));
5346 return (0);
5347 }
5348
5349 if (dest >= MAX_NODES) {
5350 DBG_PRINTF((category, RSM_ERR,
5351 "rsm: rsmipc_send bad node number %x\n", dest));
5352 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5353 }
5354
5355 /*
5356 * Oh boy! we are going remote.
5357 */
5358
5359 /*
5360 * identify if we need to have credits to send this message
5361 * - only selected requests are flow controlled
5362 */
5363 if (req != NULL) {
5364 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5365 "rsmipc_send:request type=%d\n",
5366 req->rsmipc_hdr.rsmipc_type));
5367
5368 switch (req->rsmipc_hdr.rsmipc_type) {
5369 case RSMIPC_MSG_SEGCONNECT:
5370 case RSMIPC_MSG_DISCONNECT:
5371 case RSMIPC_MSG_IMPORTING:
5372 case RSMIPC_MSG_SUSPEND:
5373 case RSMIPC_MSG_SUSPEND_DONE:
5374 case RSMIPC_MSG_RESUME:
5375 credit_check = 1;
5376 break;
5377 default:
5378 credit_check = 0;
5379 }
5380 }
5381
5382 again:
5383 if (retry_cnt++ == min_retry_cnt) {
5384 /* backoff before further retries for 10ms */
5385 delay(drv_usectohz(10000));
5386 retry_cnt = 0; /* reset retry_cnt */
5387 }
5388 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5389 if (sendq_token == NULL) {
5390 DBG_PRINTF((category, RSM_ERR,
5391 "rsm: rsmipc_send no device to reach node %d\n", dest));
5392 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5393 }
5394
5395 if ((sendq_token == used_sendq_token) &&
5396 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5397 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5398 rele_sendq_token(sendq_token);
5399 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5400 return (RSMERR_CONN_ABORTED);
5401 } else
5402 used_sendq_token = sendq_token;
5403
5404 /* lint -save -e413 */
5405 path = SQ_TOKEN_TO_PATH(sendq_token);
5406 adapter = path->local_adapter;
5407 /* lint -restore */
5408 ipc_handle = sendq_token->rsmpi_sendq_handle;
5409
5410 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5411 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5412
5413 if (reply == NULL) {
5414 /* Send request without ack */
5415 /*
5416 * Set the rsmipc_version number in the msghdr for KA
5417 * communication versioning
5418 */
5419 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5420 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5421 /*
5422 * remote endpoints incn should match the value in our
5423 * path's remote_incn field. No need to grab any lock
5424 * since we have refcnted the path in rsmka_get_sendq_token
5425 */
5426 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5427
5428 is.is_data = (void *)req;
5429 is.is_size = sizeof (*req);
5430 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5431 is.is_wait = 0;
5432
5433 if (credit_check) {
5434 mutex_enter(&path->mutex);
5435 /*
5436 * wait till we recv credits or path goes down. If path
5437 * goes down rsm_send will fail and we handle the error
5438 * then
5439 */
5440 while ((sendq_token->msgbuf_avail == 0) &&
5441 (path->state == RSMKA_PATH_ACTIVE)) {
5442 e = cv_wait_sig(&sendq_token->sendq_cv,
5443 &path->mutex);
5444 if (e == 0) {
5445 mutex_exit(&path->mutex);
5446 no_reply_cnt++;
5447 rele_sendq_token(sendq_token);
5448 DBG_PRINTF((category, RSM_DEBUG,
5449 "rsmipc_send done: "
5450 "cv_wait INTERRUPTED"));
5451 return (RSMERR_INTERRUPTED);
5452 }
5453 }
5454
5455 /*
5456 * path is not active retry on another path.
5457 */
5458 if (path->state != RSMKA_PATH_ACTIVE) {
5459 mutex_exit(&path->mutex);
5460 rele_sendq_token(sendq_token);
5461 e = RSMERR_CONN_ABORTED;
5462 DBG_PRINTF((category, RSM_ERR,
5463 "rsm: rsmipc_send: path !ACTIVE"));
5464 goto again;
5465 }
5466
5467 ASSERT(sendq_token->msgbuf_avail > 0);
5468
5469 /*
5470 * reserve a msgbuf
5471 */
5472 sendq_token->msgbuf_avail--;
5473
5474 mutex_exit(&path->mutex);
5475
5476 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5477 NULL);
5478
5479 if (e != RSM_SUCCESS) {
5480 mutex_enter(&path->mutex);
5481 /*
5482 * release the reserved msgbuf since
5483 * the send failed
5484 */
5485 sendq_token->msgbuf_avail++;
5486 cv_broadcast(&sendq_token->sendq_cv);
5487 mutex_exit(&path->mutex);
5488 }
5489 } else
5490 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5491 NULL);
5492
5493 no_reply_cnt++;
5494 rele_sendq_token(sendq_token);
5495 if (e != RSM_SUCCESS) {
5496 DBG_PRINTF((category, RSM_ERR,
5497 "rsm: rsmipc_send no reply send"
5498 " err = %d no reply count = %d\n",
5499 e, no_reply_cnt));
5500 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5501 e != RSMERR_BAD_BARRIER_HNDL);
5502 atomic_inc_64(&rsm_ipcsend_errcnt);
5503 goto again;
5504 } else {
5505 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5506 "rsmipc_send done\n"));
5507 return (e);
5508 }
5509
5510 }
5511
5512 if (req == NULL) {
5513 /* Send reply - No flow control is done for reply */
5514 /*
5515 * Set the version in the msg header for KA communication
5516 * versioning
5517 */
5518 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5519 reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5520 /* incn number is not used for reply msgs currently */
5521 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5522
5523 is.is_data = (void *)reply;
5524 is.is_size = sizeof (*reply);
5525 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5526 is.is_wait = 0;
5527 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5528 rele_sendq_token(sendq_token);
5529 if (e != RSM_SUCCESS) {
5530 DBG_PRINTF((category, RSM_ERR,
5531 "rsm: rsmipc_send reply send"
5532 " err = %d\n", e));
5533 atomic_inc_64(&rsm_ipcsend_errcnt);
5534 goto again;
5535 } else {
5536 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5537 "rsmipc_send done\n"));
5538 return (e);
5539 }
5540 }
5541
5542 /* Reply needed */
5543 rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5544
5545 mutex_enter(&rslot->rsmipc_lock);
5546
5547 rslot->rsmipc_data = (void *)reply;
5548 RSMIPC_SET(rslot, RSMIPC_PENDING);
5549
5550 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5551 /*
5552 * Set the rsmipc_version number in the msghdr for KA
5553 * communication versioning
5554 */
5555 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5556 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5557 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5558 /*
5559 * remote endpoints incn should match the value in our
5560 * path's remote_incn field. No need to grab any lock
5561 * since we have refcnted the path in rsmka_get_sendq_token
5562 */
5563 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5564
5565 is.is_data = (void *)req;
5566 is.is_size = sizeof (*req);
5567 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5568 is.is_wait = 0;
5569 if (credit_check) {
5570
5571 mutex_enter(&path->mutex);
5572 /*
5573 * wait till we recv credits or path goes down. If path
5574 * goes down rsm_send will fail and we handle the error
5575 * then.
5576 */
5577 while ((sendq_token->msgbuf_avail == 0) &&
5578 (path->state == RSMKA_PATH_ACTIVE)) {
5579 e = cv_wait_sig(&sendq_token->sendq_cv,
5580 &path->mutex);
5581 if (e == 0) {
5582 mutex_exit(&path->mutex);
5583 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5584 rsmipc_free(rslot);
5585 rele_sendq_token(sendq_token);
5586 DBG_PRINTF((category, RSM_DEBUG,
5587 "rsmipc_send done: "
5588 "cv_wait INTERRUPTED"));
5589 return (RSMERR_INTERRUPTED);
5590 }
5591 }
5592
5593 /*
5594 * path is not active retry on another path.
5595 */
5596 if (path->state != RSMKA_PATH_ACTIVE) {
5597 mutex_exit(&path->mutex);
5598 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5599 rsmipc_free(rslot);
5600 rele_sendq_token(sendq_token);
5601 e = RSMERR_CONN_ABORTED;
5602 DBG_PRINTF((category, RSM_ERR,
5603 "rsm: rsmipc_send: path !ACTIVE"));
5604 goto again;
5605 }
5606
5607 ASSERT(sendq_token->msgbuf_avail > 0);
5608
5609 /*
5610 * reserve a msgbuf
5611 */
5612 sendq_token->msgbuf_avail--;
5613
5614 mutex_exit(&path->mutex);
5615
5616 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5617 NULL);
5618
5619 if (e != RSM_SUCCESS) {
5620 mutex_enter(&path->mutex);
5621 /*
5622 * release the reserved msgbuf since
5623 * the send failed
5624 */
5625 sendq_token->msgbuf_avail++;
5626 cv_broadcast(&sendq_token->sendq_cv);
5627 mutex_exit(&path->mutex);
5628 }
5629 } else
5630 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5631 NULL);
5632
5633 if (e != RSM_SUCCESS) {
5634 DBG_PRINTF((category, RSM_ERR,
5635 "rsm: rsmipc_send rsmpi send err = %d\n", e));
5636 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5637 rsmipc_free(rslot);
5638 rele_sendq_token(sendq_token);
5639 atomic_inc_64(&rsm_ipcsend_errcnt);
5640 goto again;
5641 }
5642
5643 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5644 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5645 drv_usectohz(5000000), TR_CLOCK_TICK);
5646 if (e < 0) {
5647 /* timed out - retry */
5648 e = RSMERR_TIMEOUT;
5649 } else if (e == 0) {
5650 /* signalled - return error */
5651 e = RSMERR_INTERRUPTED;
5652 break;
5653 } else {
5654 e = RSM_SUCCESS;
5655 }
5656 }
5657
5658 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5659 rsmipc_free(rslot);
5660 rele_sendq_token(sendq_token);
5661
5662 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5663 return (e);
5664 }
5665
5666 static int
5667 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie)
5668 {
5669 rsmipc_request_t request;
5670
5671 /*
5672 * inform the exporter to delete this importer
5673 */
5674 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5675 request.rsmipc_key = segid;
5676 request.rsmipc_segment_cookie = cookie;
5677 return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5678 }
5679
5680 static void
5681 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl,
5682 int acl_len, rsm_permission_t default_permission)
5683 {
5684 int i;
5685 importing_token_t *token;
5686 rsmipc_request_t request;
5687 republish_token_t *republish_list = NULL;
5688 republish_token_t *rp;
5689 rsm_permission_t permission;
5690 int index;
5691
5692 /*
5693 * send the new access mode to all the nodes that have imported
5694 * this segment.
5695 * If the new acl does not have a node that was present in
5696 * the old acl a access permission of 0 is sent.
5697 */
5698
5699 index = rsmhash(segid);
5700
5701 /*
5702 * create a list of node/permissions to send the republish message
5703 */
5704 mutex_enter(&importer_list.lock);
5705
5706 token = importer_list.bucket[index];
5707 while (token != NULL) {
5708 if (segid == token->key) {
5709 permission = default_permission;
5710
5711 for (i = 0; i < acl_len; i++) {
5712 if (token->importing_node == acl[i].ae_node) {
5713 permission = acl[i].ae_permission;
5714 break;
5715 }
5716 }
5717 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5718
5719 rp->key = segid;
5720 rp->importing_node = token->importing_node;
5721 rp->permission = permission;
5722 rp->next = republish_list;
5723 republish_list = rp;
5724 }
5725 token = token->next;
5726 }
5727
5728 mutex_exit(&importer_list.lock);
5729
5730 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5731 request.rsmipc_key = segid;
5732
5733 while (republish_list != NULL) {
5734 request.rsmipc_perm = republish_list->permission;
5735 (void) rsmipc_send(republish_list->importing_node,
5736 &request, RSM_NO_REPLY);
5737 rp = republish_list;
5738 republish_list = republish_list->next;
5739 kmem_free(rp, sizeof (republish_token_t));
5740 }
5741 }
5742
5743 static void
5744 rsm_send_suspend()
5745 {
5746 int i, e;
5747 rsmipc_request_t request;
5748 list_element_t *tokp;
5749 list_element_t *head = NULL;
5750 importing_token_t *token;
5751 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5752 "rsm_send_suspend enter\n"));
5753
5754 /*
5755 * create a list of node to send the suspend message
5756 *
5757 * Currently the whole importer list is scanned and we obtain
5758 * all the nodes - this basically gets all nodes that at least
5759 * import one segment from the local node.
5760 *
5761 * no need to grab the rsm_suspend_list lock here since we are
5762 * single threaded when suspend is called.
5763 */
5764
5765 mutex_enter(&importer_list.lock);
5766 for (i = 0; i < rsm_hash_size; i++) {
5767
5768 token = importer_list.bucket[i];
5769
5770 while (token != NULL) {
5771
5772 tokp = head;
5773
5774 /*
5775 * make sure that the token's node
5776 * is not already on the suspend list
5777 */
5778 while (tokp != NULL) {
5779 if (tokp->nodeid == token->importing_node) {
5780 break;
5781 }
5782 tokp = tokp->next;
5783 }
5784
5785 if (tokp == NULL) { /* not in suspend list */
5786 tokp = kmem_zalloc(sizeof (list_element_t),
5787 KM_SLEEP);
5788 tokp->nodeid = token->importing_node;
5789 tokp->next = head;
5790 head = tokp;
5791 }
5792
5793 token = token->next;
5794 }
5795 }
5796 mutex_exit(&importer_list.lock);
5797
5798 if (head == NULL) { /* no importers so go ahead and quiesce segments */
5799 exporter_quiesce();
5800 return;
5801 }
5802
5803 mutex_enter(&rsm_suspend_list.list_lock);
5804 ASSERT(rsm_suspend_list.list_head == NULL);
5805 /*
5806 * update the suspend list righaway so that if a node dies the
5807 * pathmanager can set the NODE dead flag
5808 */
5809 rsm_suspend_list.list_head = head;
5810 mutex_exit(&rsm_suspend_list.list_lock);
5811
5812 tokp = head;
5813
5814 while (tokp != NULL) {
5815 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5816 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5817 /*
5818 * Error in rsmipc_send currently happens due to inaccessibility
5819 * of the remote node.
5820 */
5821 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5822 tokp->flags |= RSM_SUSPEND_ACKPENDING;
5823 }
5824
5825 tokp = tokp->next;
5826 }
5827
5828 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5829 "rsm_send_suspend done\n"));
5830
5831 }
5832
5833 static void
5834 rsm_send_resume()
5835 {
5836 rsmipc_request_t request;
5837 list_element_t *elem, *head;
5838
5839 /*
5840 * save the suspend list so that we know where to send
5841 * the resume messages and make the suspend list head
5842 * NULL.
5843 */
5844 mutex_enter(&rsm_suspend_list.list_lock);
5845 head = rsm_suspend_list.list_head;
5846 rsm_suspend_list.list_head = NULL;
5847 mutex_exit(&rsm_suspend_list.list_lock);
5848
5849 while (head != NULL) {
5850 elem = head;
5851 head = head->next;
5852
5853 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5854
5855 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5856
5857 kmem_free((void *)elem, sizeof (list_element_t));
5858
5859 }
5860
5861 }
5862
5863 /*
5864 * This function takes path and sends a message using the sendq
5865 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5866 * and RSMIPC_MSG_CREDIT are sent using this function.
5867 */
5868 int
5869 rsmipc_send_controlmsg(path_t *path, int msgtype)
5870 {
5871 int e;
5872 int retry_cnt = 0;
5873 int min_retry_cnt = 10;
5874 adapter_t *adapter;
5875 rsm_send_t is;
5876 rsm_send_q_handle_t ipc_handle;
5877 rsmipc_controlmsg_t msg;
5878 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5879
5880 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5881 "rsmipc_send_controlmsg enter\n"));
5882
5883 ASSERT(MUTEX_HELD(&path->mutex));
5884
5885 adapter = path->local_adapter;
5886
5887 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5888 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5889 my_nodeid, adapter->hwaddr, path->remote_node,
5890 path->remote_hwaddr, path->procmsg_cnt));
5891
5892 if (path->state != RSMKA_PATH_ACTIVE) {
5893 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5894 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5895 return (1);
5896 }
5897
5898 ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5899
5900 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5901 msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5902 msg.rsmipc_hdr.rsmipc_type = msgtype;
5903 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5904
5905 if (msgtype == RSMIPC_MSG_CREDIT)
5906 msg.rsmipc_credits = path->procmsg_cnt;
5907
5908 msg.rsmipc_local_incn = path->local_incn;
5909
5910 msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5911 /* incr the sendq, path refcnt */
5912 PATH_HOLD_NOLOCK(path);
5913 SENDQ_TOKEN_HOLD(path);
5914
5915 do {
5916 /* drop the path lock before doing the rsm_send */
5917 mutex_exit(&path->mutex);
5918
5919 is.is_data = (void *)&msg;
5920 is.is_size = sizeof (msg);
5921 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5922 is.is_wait = 0;
5923
5924 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5925
5926 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5927 e != RSMERR_BAD_BARRIER_HNDL);
5928
5929 mutex_enter(&path->mutex);
5930
5931 if (e == RSM_SUCCESS) {
5932 break;
5933 }
5934 /* error counter for statistics */
5935 atomic_inc_64(&rsm_ctrlmsg_errcnt);
5936
5937 DBG_PRINTF((category, RSM_ERR,
5938 "rsmipc_send_controlmsg:rsm_send error=%d", e));
5939
5940 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5941 (void) cv_reltimedwait(&path->sendq_token.sendq_cv,
5942 &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK);
5943 retry_cnt = 0;
5944 }
5945 } while (path->state == RSMKA_PATH_ACTIVE);
5946
5947 /* decrement the sendq,path refcnt that we incr before rsm_send */
5948 SENDQ_TOKEN_RELE(path);
5949 PATH_RELE_NOLOCK(path);
5950
5951 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5952 "rsmipc_send_controlmsg done=%d", e));
5953 return (e);
5954 }
5955
5956 /*
5957 * Called from rsm_force_unload and path_importer_disconnect. The memory
5958 * mapping for the imported segment is removed and the segment is
5959 * disconnected at the interconnect layer if disconnect_flag is TRUE.
5960 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5961 * and FALSE from rsm_rebind.
5962 *
5963 * When subsequent accesses cause page faulting, the dummy page is mapped
5964 * to resolve the fault, and the mapping generation number is incremented
5965 * so that the application can be notified on a close barrier operation.
5966 *
5967 * It is important to note that the caller of rsmseg_unload is responsible for
5968 * acquiring the segment lock before making a call to rsmseg_unload. This is
5969 * required to make the caller and rsmseg_unload thread safe. The segment lock
5970 * will be released by the rsmseg_unload function.
5971 */
5972 void
5973 rsmseg_unload(rsmseg_t *im_seg)
5974 {
5975 rsmcookie_t *hdl;
5976 void *shared_cookie;
5977 rsmipc_request_t request;
5978 uint_t maxprot;
5979
5980 DBG_DEFINE(category,
5981 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5982
5983 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5984
5985 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5986
5987 /* wait until segment leaves the mapping state */
5988 while (im_seg->s_state == RSM_STATE_MAPPING)
5989 cv_wait(&im_seg->s_cv, &im_seg->s_lock);
5990 /*
5991 * An unload is only necessary if the segment is connected. However,
5992 * if the segment was on the import list in state RSM_STATE_CONNECTING
5993 * then a connection was in progress. Change to RSM_STATE_NEW
5994 * here to cause an early exit from the connection process.
5995 */
5996 if (im_seg->s_state == RSM_STATE_NEW) {
5997 rsmseglock_release(im_seg);
5998 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5999 "rsmseg_unload done: RSM_STATE_NEW\n"));
6000 return;
6001 } else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6002 im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6003 rsmsharelock_acquire(im_seg);
6004 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6005 rsmsharelock_release(im_seg);
6006 rsmseglock_release(im_seg);
6007 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6008 "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6009 return;
6010 }
6011
6012 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6013 if (im_seg->s_ckl != NULL) {
6014 int e;
6015 /* Setup protections for remap */
6016 maxprot = PROT_USER;
6017 if (im_seg->s_mode & RSM_PERM_READ) {
6018 maxprot |= PROT_READ;
6019 }
6020 if (im_seg->s_mode & RSM_PERM_WRITE) {
6021 maxprot |= PROT_WRITE;
6022 }
6023 hdl = im_seg->s_ckl;
6024 for (; hdl != NULL; hdl = hdl->c_next) {
6025 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6026 remap_cookie,
6027 hdl->c_off, hdl->c_len,
6028 maxprot, 0, NULL);
6029
6030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6031 "remap returns %d\n", e));
6032 }
6033 }
6034
6035 (void) rsm_closeconnection(im_seg, &shared_cookie);
6036
6037 if (shared_cookie != NULL) {
6038 /*
6039 * inform the exporting node so this import
6040 * can be deleted from the list of importers.
6041 */
6042 request.rsmipc_hdr.rsmipc_type =
6043 RSMIPC_MSG_NOTIMPORTING;
6044 request.rsmipc_key = im_seg->s_segid;
6045 request.rsmipc_segment_cookie = shared_cookie;
6046 rsmseglock_release(im_seg);
6047 (void) rsmipc_send(im_seg->s_node, &request,
6048 RSM_NO_REPLY);
6049 } else {
6050 rsmseglock_release(im_seg);
6051 }
6052 }
6053 else
6054 rsmseglock_release(im_seg);
6055
6056 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6057
6058 }
6059
6060 /* ****************************** Importer Calls ************************ */
6061
6062 static int
6063 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6064 {
6065 int shifts = 0;
6066
6067 if (crgetuid(cr) != owner) {
6068 shifts += 3;
6069 if (!groupmember(group, cr))
6070 shifts += 3;
6071 }
6072
6073 mode &= ~(perm << shifts);
6074
6075 if (mode == 0)
6076 return (0);
6077
6078 return (secpolicy_rsm_access(cr, owner, mode));
6079 }
6080
6081
6082 static int
6083 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6084 intptr_t dataptr, int mode)
6085 {
6086 int e;
6087 int recheck_state = 0;
6088 void *shared_cookie;
6089 rsmipc_request_t request;
6090 rsmipc_reply_t reply;
6091 rsm_permission_t access;
6092 adapter_t *adapter;
6093 rsm_addr_t addr = 0;
6094 rsm_import_share_t *sharedp;
6095 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6096
6097 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6098
6099 adapter = rsm_getadapter(msg, mode);
6100 if (adapter == NULL) {
6101 DBG_PRINTF((category, RSM_ERR,
6102 "rsm_connect done:ENODEV adapter=NULL\n"));
6103 return (RSMERR_CTLR_NOT_PRESENT);
6104 }
6105
6106 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6107 rsmka_release_adapter(adapter);
6108 DBG_PRINTF((category, RSM_ERR,
6109 "rsm_connect done:ENODEV loopback\n"));
6110 return (RSMERR_CTLR_NOT_PRESENT);
6111 }
6112
6113
6114 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6115 ASSERT(seg->s_state == RSM_STATE_NEW);
6116
6117 /*
6118 * Translate perm to access
6119 */
6120 if (msg->perm & ~RSM_PERM_RDWR) {
6121 rsmka_release_adapter(adapter);
6122 DBG_PRINTF((category, RSM_ERR,
6123 "rsm_connect done:EINVAL invalid perms\n"));
6124 return (RSMERR_BAD_PERMS);
6125 }
6126 access = 0;
6127 if (msg->perm & RSM_PERM_READ)
6128 access |= RSM_ACCESS_READ;
6129 if (msg->perm & RSM_PERM_WRITE)
6130 access |= RSM_ACCESS_WRITE;
6131
6132 seg->s_node = msg->nodeid;
6133
6134 /*
6135 * Adding to the import list locks the segment; release the segment
6136 * lock so we can get the reply for the send.
6137 */
6138 e = rsmimport_add(seg, msg->key);
6139 if (e) {
6140 rsmka_release_adapter(adapter);
6141 DBG_PRINTF((category, RSM_ERR,
6142 "rsm_connect done:rsmimport_add failed %d\n", e));
6143 return (e);
6144 }
6145 seg->s_state = RSM_STATE_CONNECTING;
6146
6147 /*
6148 * Set the s_adapter field here so as to have a valid comparison of
6149 * the adapter and the s_adapter value during rsmshare_get. For
6150 * any error, set s_adapter to NULL before doing a release_adapter
6151 */
6152 seg->s_adapter = adapter;
6153
6154 rsmseglock_release(seg);
6155
6156 /*
6157 * get the pointer to the shared data structure; the
6158 * shared data is locked and refcount has been incremented
6159 */
6160 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6161
6162 ASSERT(rsmsharelock_held(seg));
6163
6164 do {
6165 /* flag indicates whether we need to recheck the state */
6166 recheck_state = 0;
6167 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6168 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6169 switch (sharedp->rsmsi_state) {
6170 case RSMSI_STATE_NEW:
6171 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6172 break;
6173 case RSMSI_STATE_CONNECTING:
6174 /* FALLTHRU */
6175 case RSMSI_STATE_CONN_QUIESCE:
6176 /* FALLTHRU */
6177 case RSMSI_STATE_MAP_QUIESCE:
6178 /* wait for the state to change */
6179 while ((sharedp->rsmsi_state ==
6180 RSMSI_STATE_CONNECTING) ||
6181 (sharedp->rsmsi_state ==
6182 RSMSI_STATE_CONN_QUIESCE) ||
6183 (sharedp->rsmsi_state ==
6184 RSMSI_STATE_MAP_QUIESCE)) {
6185 if (cv_wait_sig(&sharedp->rsmsi_cv,
6186 &sharedp->rsmsi_lock) == 0) {
6187 /* signalled - clean up and return */
6188 rsmsharelock_release(seg);
6189 rsmimport_rm(seg);
6190 seg->s_adapter = NULL;
6191 rsmka_release_adapter(adapter);
6192 seg->s_state = RSM_STATE_NEW;
6193 DBG_PRINTF((category, RSM_ERR,
6194 "rsm_connect done: INTERRUPTED\n"));
6195 return (RSMERR_INTERRUPTED);
6196 }
6197 }
6198 /*
6199 * the state changed, loop back and check what it is
6200 */
6201 recheck_state = 1;
6202 break;
6203 case RSMSI_STATE_ABORT_CONNECT:
6204 /* exit the loop and clean up further down */
6205 break;
6206 case RSMSI_STATE_CONNECTED:
6207 /* already connected, good - fall through */
6208 case RSMSI_STATE_MAPPED:
6209 /* already mapped, wow - fall through */
6210 /* access validation etc is done further down */
6211 break;
6212 case RSMSI_STATE_DISCONNECTED:
6213 /* disconnected - so reconnect now */
6214 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6215 break;
6216 default:
6217 ASSERT(0); /* Invalid State */
6218 }
6219 } while (recheck_state);
6220
6221 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6222 /* we are the first to connect */
6223 rsmsharelock_release(seg);
6224
6225 if (msg->nodeid != my_nodeid) {
6226 addr = get_remote_hwaddr(adapter, msg->nodeid);
6227
6228 if ((int64_t)addr < 0) {
6229 rsmsharelock_acquire(seg);
6230 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6231 RSMSI_STATE_NEW);
6232 rsmsharelock_release(seg);
6233 rsmimport_rm(seg);
6234 seg->s_adapter = NULL;
6235 rsmka_release_adapter(adapter);
6236 seg->s_state = RSM_STATE_NEW;
6237 DBG_PRINTF((category, RSM_ERR,
6238 "rsm_connect done: hwaddr<0\n"));
6239 return (RSMERR_INTERNAL_ERROR);
6240 }
6241 } else {
6242 addr = adapter->hwaddr;
6243 }
6244
6245 /*
6246 * send request to node [src, dest, key, msgid] and get back
6247 * [status, msgid, cookie]
6248 */
6249 request.rsmipc_key = msg->key;
6250 /*
6251 * we need the s_mode of the exporter so pass
6252 * RSM_ACCESS_TRUSTED
6253 */
6254 request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6255 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6256 request.rsmipc_adapter_hwaddr = addr;
6257 request.rsmipc_segment_cookie = sharedp;
6258
6259 e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6260 if (e) {
6261 rsmsharelock_acquire(seg);
6262 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6263 RSMSI_STATE_NEW);
6264 rsmsharelock_release(seg);
6265 rsmimport_rm(seg);
6266 seg->s_adapter = NULL;
6267 rsmka_release_adapter(adapter);
6268 seg->s_state = RSM_STATE_NEW;
6269 DBG_PRINTF((category, RSM_ERR,
6270 "rsm_connect done:rsmipc_send failed %d\n", e));
6271 return (e);
6272 }
6273
6274 if (reply.rsmipc_status != RSM_SUCCESS) {
6275 rsmsharelock_acquire(seg);
6276 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6277 RSMSI_STATE_NEW);
6278 rsmsharelock_release(seg);
6279 rsmimport_rm(seg);
6280 seg->s_adapter = NULL;
6281 rsmka_release_adapter(adapter);
6282 seg->s_state = RSM_STATE_NEW;
6283 DBG_PRINTF((category, RSM_ERR,
6284 "rsm_connect done:rsmipc_send reply err %d\n",
6285 reply.rsmipc_status));
6286 return (reply.rsmipc_status);
6287 }
6288
6289 rsmsharelock_acquire(seg);
6290 /* store the information recvd into the shared data struct */
6291 sharedp->rsmsi_mode = reply.rsmipc_mode;
6292 sharedp->rsmsi_uid = reply.rsmipc_uid;
6293 sharedp->rsmsi_gid = reply.rsmipc_gid;
6294 sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6295 sharedp->rsmsi_cookie = sharedp;
6296 }
6297
6298 rsmsharelock_release(seg);
6299
6300 /*
6301 * Get the segment lock and check for a force disconnect
6302 * from the export side which would have changed the state
6303 * back to RSM_STATE_NEW. Once the segment lock is acquired a
6304 * force disconnect will be held off until the connection
6305 * has completed.
6306 */
6307 rsmseglock_acquire(seg);
6308 rsmsharelock_acquire(seg);
6309 ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6310 seg->s_state == RSM_STATE_ABORT_CONNECT);
6311
6312 shared_cookie = sharedp->rsmsi_cookie;
6313
6314 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6315 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6316 seg->s_state = RSM_STATE_NEW;
6317 seg->s_adapter = NULL;
6318 rsmsharelock_release(seg);
6319 rsmseglock_release(seg);
6320 rsmimport_rm(seg);
6321 rsmka_release_adapter(adapter);
6322
6323 rsmsharelock_acquire(seg);
6324 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6325 /*
6326 * set a flag indicating abort handling has been
6327 * done
6328 */
6329 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6330 rsmsharelock_release(seg);
6331 /* send a message to exporter - only once */
6332 (void) rsm_send_notimporting(msg->nodeid,
6333 msg->key, shared_cookie);
6334 rsmsharelock_acquire(seg);
6335 /*
6336 * wake up any waiting importers and inform that
6337 * connection has been aborted
6338 */
6339 cv_broadcast(&sharedp->rsmsi_cv);
6340 }
6341 rsmsharelock_release(seg);
6342
6343 DBG_PRINTF((category, RSM_ERR,
6344 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6345 return (RSMERR_INTERRUPTED);
6346 }
6347
6348
6349 /*
6350 * We need to verify that this process has access
6351 */
6352 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6353 access & sharedp->rsmsi_mode,
6354 (int)(msg->perm & RSM_PERM_RDWR), cred);
6355 if (e) {
6356 rsmsharelock_release(seg);
6357 seg->s_state = RSM_STATE_NEW;
6358 seg->s_adapter = NULL;
6359 rsmseglock_release(seg);
6360 rsmimport_rm(seg);
6361 rsmka_release_adapter(adapter);
6362 /*
6363 * No need to lock segment it has been removed
6364 * from the hash table
6365 */
6366 rsmsharelock_acquire(seg);
6367 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6368 rsmsharelock_release(seg);
6369 /* this is the first importer */
6370
6371 (void) rsm_send_notimporting(msg->nodeid, msg->key,
6372 shared_cookie);
6373 rsmsharelock_acquire(seg);
6374 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6375 cv_broadcast(&sharedp->rsmsi_cv);
6376 }
6377 rsmsharelock_release(seg);
6378
6379 DBG_PRINTF((category, RSM_ERR,
6380 "rsm_connect done: ipcaccess failed\n"));
6381 return (RSMERR_PERM_DENIED);
6382 }
6383
6384 /* update state and cookie */
6385 seg->s_segid = sharedp->rsmsi_segid;
6386 seg->s_len = sharedp->rsmsi_seglen;
6387 seg->s_mode = access & sharedp->rsmsi_mode;
6388 seg->s_pid = ddi_get_pid();
6389 seg->s_mapinfo = NULL;
6390
6391 if (seg->s_node != my_nodeid) {
6392 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6393 e = adapter->rsmpi_ops->rsm_connect(
6394 adapter->rsmpi_handle,
6395 addr, seg->s_segid, &sharedp->rsmsi_handle);
6396
6397 if (e != RSM_SUCCESS) {
6398 seg->s_state = RSM_STATE_NEW;
6399 seg->s_adapter = NULL;
6400 rsmsharelock_release(seg);
6401 rsmseglock_release(seg);
6402 rsmimport_rm(seg);
6403 rsmka_release_adapter(adapter);
6404 /*
6405 * inform the exporter to delete this importer
6406 */
6407 (void) rsm_send_notimporting(msg->nodeid,
6408 msg->key, shared_cookie);
6409
6410 /*
6411 * Now inform any waiting importers to
6412 * retry connect. This needs to be done
6413 * after sending notimporting so that
6414 * the notimporting is sent before a waiting
6415 * importer sends a segconnect while retrying
6416 *
6417 * No need to lock segment it has been removed
6418 * from the hash table
6419 */
6420
6421 rsmsharelock_acquire(seg);
6422 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6423 cv_broadcast(&sharedp->rsmsi_cv);
6424 rsmsharelock_release(seg);
6425
6426 DBG_PRINTF((category, RSM_ERR,
6427 "rsm_connect error %d\n", e));
6428 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6429 return (
6430 RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6431 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6432 (e == RSMERR_UNKNOWN_RSM_ADDR))
6433 return (RSMERR_REMOTE_NODE_UNREACHABLE);
6434 else
6435 return (e);
6436 }
6437
6438 }
6439 seg->s_handle.in = sharedp->rsmsi_handle;
6440
6441 }
6442
6443 seg->s_state = RSM_STATE_CONNECT;
6444
6445
6446 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */
6447 if (bar_va) {
6448 /* increment generation number on barrier page */
6449 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6450 /* return user off into barrier page where status will be */
6451 msg->off = (int)seg->s_hdr.rsmrc_num;
6452 msg->gnum = bar_va[msg->off]; /* gnum race */
6453 } else {
6454 msg->off = 0;
6455 msg->gnum = 0; /* gnum race */
6456 }
6457
6458 msg->len = (int)sharedp->rsmsi_seglen;
6459 msg->rnum = seg->s_minor;
6460 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6461 rsmsharelock_release(seg);
6462 rsmseglock_release(seg);
6463
6464 /* Return back to user the segment size & perm in case it's needed */
6465
6466 #ifdef _MULTI_DATAMODEL
6467 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6468 rsm_ioctlmsg32_t msg32;
6469
6470 if (msg->len > UINT_MAX)
6471 msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6472 else
6473 msg32.len = msg->len;
6474 msg32.off = msg->off;
6475 msg32.perm = msg->perm;
6476 msg32.gnum = msg->gnum;
6477 msg32.rnum = msg->rnum;
6478
6479 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6480 "rsm_connect done\n"));
6481
6482 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6483 sizeof (msg32), mode))
6484 return (RSMERR_BAD_ADDR);
6485 else
6486 return (RSM_SUCCESS);
6487 }
6488 #endif
6489 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6490
6491 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6492 mode))
6493 return (RSMERR_BAD_ADDR);
6494 else
6495 return (RSM_SUCCESS);
6496 }
6497
6498 static int
6499 rsm_unmap(rsmseg_t *seg)
6500 {
6501 int err;
6502 adapter_t *adapter;
6503 rsm_import_share_t *sharedp;
6504 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6505
6506 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6507 "rsm_unmap enter %u\n", seg->s_segid));
6508
6509 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6510
6511 /* assert seg is locked */
6512 ASSERT(rsmseglock_held(seg));
6513 ASSERT(seg->s_state != RSM_STATE_MAPPING);
6514
6515 if ((seg->s_state != RSM_STATE_ACTIVE) &&
6516 (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6517 /* segment unmap has already been done */
6518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6519 return (RSM_SUCCESS);
6520 }
6521
6522 sharedp = seg->s_share;
6523
6524 rsmsharelock_acquire(seg);
6525
6526 /*
6527 * - shared data struct is in MAPPED or MAP_QUIESCE state
6528 */
6529
6530 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6531 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6532
6533 /*
6534 * Unmap pages - previously rsm_memseg_import_unmap was called only if
6535 * the segment cookie list was NULL; but it is always NULL when
6536 * called from rsmmap_unmap and won't be NULL when called for
6537 * a force disconnect - so the check for NULL cookie list was removed
6538 */
6539
6540 ASSERT(sharedp->rsmsi_mapcnt > 0);
6541
6542 sharedp->rsmsi_mapcnt--;
6543
6544 if (sharedp->rsmsi_mapcnt == 0) {
6545 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6546 /* unmap the shared RSMPI mapping */
6547 adapter = seg->s_adapter;
6548 if (seg->s_node != my_nodeid) {
6549 ASSERT(sharedp->rsmsi_handle != NULL);
6550 err = adapter->rsmpi_ops->
6551 rsm_unmap(sharedp->rsmsi_handle);
6552 DBG_PRINTF((category, RSM_DEBUG,
6553 "rsm_unmap: rsmpi unmap %d\n", err));
6554 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6555 sharedp->rsmsi_mapinfo = NULL;
6556 }
6557 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6558 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6559 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6560 }
6561 }
6562
6563 rsmsharelock_release(seg);
6564
6565 /*
6566 * The s_cookie field is used to store the cookie returned from the
6567 * ddi_umem_lock when binding the pages for an export segment. This
6568 * is the primary use of the s_cookie field and does not normally
6569 * pertain to any importing segment except in the loopback case.
6570 * For the loopback case, the import segment and export segment are
6571 * on the same node, the s_cookie field of the segment structure for
6572 * the importer is initialized to the s_cookie field in the exported
6573 * segment during the map operation and is used during the call to
6574 * devmap_umem_setup for the import mapping.
6575 * Thus, during unmap, we simply need to set s_cookie to NULL to
6576 * indicate that the mapping no longer exists.
6577 */
6578 seg->s_cookie = NULL;
6579
6580 seg->s_mapinfo = NULL;
6581
6582 if (seg->s_state == RSM_STATE_ACTIVE)
6583 seg->s_state = RSM_STATE_CONNECT;
6584 else
6585 seg->s_state = RSM_STATE_CONN_QUIESCE;
6586
6587 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6588
6589 return (RSM_SUCCESS);
6590 }
6591
6592 /*
6593 * cookie returned here if not null indicates that it is
6594 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6595 * message.
6596 */
6597 static int
6598 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6599 {
6600 int e;
6601 adapter_t *adapter;
6602 rsm_import_share_t *sharedp;
6603 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6604
6605 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6606 "rsm_closeconnection enter\n"));
6607
6608 *cookie = (void *)NULL;
6609
6610 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6611
6612 /* assert seg is locked */
6613 ASSERT(rsmseglock_held(seg));
6614
6615 if (seg->s_state == RSM_STATE_DISCONNECT) {
6616 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6617 "rsm_closeconnection done: already disconnected\n"));
6618 return (RSM_SUCCESS);
6619 }
6620
6621 /* wait for all putv/getv ops to get done */
6622 while (seg->s_rdmacnt > 0) {
6623 cv_wait(&seg->s_cv, &seg->s_lock);
6624 }
6625
6626 (void) rsm_unmap(seg);
6627
6628 ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6629 seg->s_state == RSM_STATE_CONN_QUIESCE);
6630
6631 adapter = seg->s_adapter;
6632 sharedp = seg->s_share;
6633
6634 ASSERT(sharedp != NULL);
6635
6636 rsmsharelock_acquire(seg);
6637
6638 /*
6639 * Disconnect on adapter
6640 *
6641 * The current algorithm is stateless, I don't have to contact
6642 * server when I go away. He only gives me permissions. Of course,
6643 * the adapters will talk to terminate the connect.
6644 *
6645 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6646 */
6647 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6648 (sharedp->rsmsi_node != my_nodeid)) {
6649
6650 if (sharedp->rsmsi_refcnt == 1) {
6651 /* this is the last importer */
6652 ASSERT(sharedp->rsmsi_mapcnt == 0);
6653
6654 e = adapter->rsmpi_ops->
6655 rsm_disconnect(sharedp->rsmsi_handle);
6656 if (e != RSM_SUCCESS) {
6657 DBG_PRINTF((category, RSM_DEBUG,
6658 "rsm:disconnect failed seg=%x:err=%d\n",
6659 seg->s_key, e));
6660 }
6661 }
6662 }
6663
6664 seg->s_handle.in = NULL;
6665
6666 sharedp->rsmsi_refcnt--;
6667
6668 if (sharedp->rsmsi_refcnt == 0) {
6669 *cookie = (void *)sharedp->rsmsi_cookie;
6670 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6671 sharedp->rsmsi_handle = NULL;
6672 rsmsharelock_release(seg);
6673
6674 /* clean up the shared data structure */
6675 mutex_destroy(&sharedp->rsmsi_lock);
6676 cv_destroy(&sharedp->rsmsi_cv);
6677 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6678
6679 } else {
6680 rsmsharelock_release(seg);
6681 }
6682
6683 /* increment generation number on barrier page */
6684 if (bar_va) {
6685 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6686 }
6687
6688 /*
6689 * The following needs to be done after any
6690 * rsmsharelock calls which use seg->s_share.
6691 */
6692 seg->s_share = NULL;
6693
6694 seg->s_state = RSM_STATE_DISCONNECT;
6695 /* signal anyone waiting in the CONN_QUIESCE state */
6696 cv_broadcast(&seg->s_cv);
6697
6698 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6699 "rsm_closeconnection done\n"));
6700
6701 return (RSM_SUCCESS);
6702 }
6703
6704 int
6705 rsm_disconnect(rsmseg_t *seg)
6706 {
6707 rsmipc_request_t request;
6708 void *shared_cookie;
6709 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6710
6711 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6712
6713 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6714
6715 /* assert seg isn't locked */
6716 ASSERT(!rsmseglock_held(seg));
6717
6718
6719 /* Remove segment from imported list */
6720 rsmimport_rm(seg);
6721
6722 /* acquire the segment */
6723 rsmseglock_acquire(seg);
6724
6725 /* wait until segment leaves the mapping state */
6726 while (seg->s_state == RSM_STATE_MAPPING)
6727 cv_wait(&seg->s_cv, &seg->s_lock);
6728
6729 if (seg->s_state == RSM_STATE_DISCONNECT) {
6730 seg->s_state = RSM_STATE_NEW;
6731 rsmseglock_release(seg);
6732 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6733 "rsm_disconnect done: already disconnected\n"));
6734 return (RSM_SUCCESS);
6735 }
6736
6737 (void) rsm_closeconnection(seg, &shared_cookie);
6738
6739 /* update state */
6740 seg->s_state = RSM_STATE_NEW;
6741
6742 if (shared_cookie != NULL) {
6743 /*
6744 * This is the last importer so inform the exporting node
6745 * so this import can be deleted from the list of importers.
6746 */
6747 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6748 request.rsmipc_key = seg->s_segid;
6749 request.rsmipc_segment_cookie = shared_cookie;
6750 rsmseglock_release(seg);
6751 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6752 } else {
6753 rsmseglock_release(seg);
6754 }
6755
6756 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6757
6758 return (DDI_SUCCESS);
6759 }
6760
6761 /*ARGSUSED*/
6762 static int
6763 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6764 struct pollhead **phpp)
6765 {
6766 minor_t rnum;
6767 rsmresource_t *res;
6768 rsmseg_t *seg;
6769 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6770
6771 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6772
6773 /* find minor, no lock */
6774 rnum = getminor(dev);
6775 res = rsmresource_lookup(rnum, RSM_NOLOCK);
6776
6777 /* poll is supported only for export/import segments */
6778 if ((res == NULL) || (res == RSMRC_RESERVED) ||
6779 (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6780 return (ENXIO);
6781 }
6782
6783 *reventsp = 0;
6784
6785 /*
6786 * An exported segment must be in state RSM_STATE_EXPORT; an
6787 * imported segment must be in state RSM_STATE_ACTIVE.
6788 */
6789 seg = (rsmseg_t *)res;
6790
6791 if (seg->s_pollevent) {
6792 *reventsp = POLLRDNORM;
6793 } else if (!anyyet) {
6794 /* cannot take segment lock here */
6795 *phpp = &seg->s_poll;
6796 seg->s_pollflag |= RSM_SEGMENT_POLL;
6797 }
6798 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6799 return (0);
6800 }
6801
6802
6803
6804 /* ************************* IOCTL Commands ********************* */
6805
6806 static rsmseg_t *
6807 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6808 rsm_resource_type_t type)
6809 {
6810 /* get segment from resource handle */
6811 rsmseg_t *seg;
6812 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6813
6814 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6815
6816
6817 if (res != RSMRC_RESERVED) {
6818 seg = (rsmseg_t *)res;
6819 } else {
6820 /* Allocate segment now and bind it */
6821 seg = rsmseg_alloc(rnum, credp);
6822
6823 /*
6824 * if DR pre-processing is going on or DR is in progress
6825 * then the new export segments should be in the NEW_QSCD state
6826 */
6827 if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6828 mutex_enter(&rsm_drv_data.drv_lock);
6829 if ((rsm_drv_data.drv_state ==
6830 RSM_DRV_PREDEL_STARTED) ||
6831 (rsm_drv_data.drv_state ==
6832 RSM_DRV_PREDEL_COMPLETED) ||
6833 (rsm_drv_data.drv_state ==
6834 RSM_DRV_DR_IN_PROGRESS)) {
6835 seg->s_state = RSM_STATE_NEW_QUIESCED;
6836 }
6837 mutex_exit(&rsm_drv_data.drv_lock);
6838 }
6839
6840 rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6841 }
6842
6843 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6844
6845 return (seg);
6846 }
6847
6848 static int
6849 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6850 int mode, cred_t *credp)
6851 {
6852 int error;
6853 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6854
6855 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6856
6857 arg = arg;
6858 credp = credp;
6859
6860 ASSERT(seg != NULL);
6861
6862 switch (cmd) {
6863 case RSM_IOCTL_BIND:
6864 error = rsm_bind(seg, msg, arg, mode);
6865 break;
6866 case RSM_IOCTL_REBIND:
6867 error = rsm_rebind(seg, msg);
6868 break;
6869 case RSM_IOCTL_UNBIND:
6870 error = ENOTSUP;
6871 break;
6872 case RSM_IOCTL_PUBLISH:
6873 error = rsm_publish(seg, msg, arg, mode);
6874 break;
6875 case RSM_IOCTL_REPUBLISH:
6876 error = rsm_republish(seg, msg, mode);
6877 break;
6878 case RSM_IOCTL_UNPUBLISH:
6879 error = rsm_unpublish(seg, 1);
6880 break;
6881 default:
6882 error = EINVAL;
6883 break;
6884 }
6885
6886 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6887 error));
6888
6889 return (error);
6890 }
6891 static int
6892 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6893 int mode, cred_t *credp)
6894 {
6895 int error;
6896 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6897
6898 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6899
6900 ASSERT(seg);
6901
6902 switch (cmd) {
6903 case RSM_IOCTL_CONNECT:
6904 error = rsm_connect(seg, msg, credp, arg, mode);
6905 break;
6906 default:
6907 error = EINVAL;
6908 break;
6909 }
6910
6911 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6912 error));
6913 return (error);
6914 }
6915
6916 static int
6917 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6918 int mode)
6919 {
6920 int e;
6921 adapter_t *adapter;
6922 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6923
6924 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6925
6926
6927 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6928 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6929 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6930 return (RSMERR_CONN_ABORTED);
6931 } else if (seg->s_node == my_nodeid) {
6932 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6933 "rsmbar_ioctl done: loopback\n"));
6934 return (RSM_SUCCESS);
6935 }
6936
6937 adapter = seg->s_adapter;
6938
6939 switch (cmd) {
6940 case RSM_IOCTL_BAR_CHECK:
6941 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6942 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6943 return (bar_va ? RSM_SUCCESS : EINVAL);
6944 case RSM_IOCTL_BAR_OPEN:
6945 e = adapter->rsmpi_ops->
6946 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6947 break;
6948 case RSM_IOCTL_BAR_ORDER:
6949 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6950 break;
6951 case RSM_IOCTL_BAR_CLOSE:
6952 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6953 break;
6954 default:
6955 e = EINVAL;
6956 break;
6957 }
6958
6959 if (e == RSM_SUCCESS) {
6960 #ifdef _MULTI_DATAMODEL
6961 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6962 rsm_ioctlmsg32_t msg32;
6963 int i;
6964
6965 for (i = 0; i < 4; i++) {
6966 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6967 }
6968
6969 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6970 "rsmbar_ioctl done\n"));
6971 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6972 sizeof (msg32), mode))
6973 return (RSMERR_BAD_ADDR);
6974 else
6975 return (RSM_SUCCESS);
6976 }
6977 #endif
6978 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6979 "rsmbar_ioctl done\n"));
6980 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6981 sizeof (*msg), mode))
6982 return (RSMERR_BAD_ADDR);
6983 else
6984 return (RSM_SUCCESS);
6985 }
6986
6987 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6988 "rsmbar_ioctl done: error=%d\n", e));
6989
6990 return (e);
6991 }
6992
6993 /*
6994 * Ring the doorbell of the export segment to which this segment is
6995 * connected.
6996 */
6997 static int
6998 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
6999 {
7000 int e = 0;
7001 rsmipc_request_t request;
7002
7003 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7004
7005 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7006
7007 request.rsmipc_key = seg->s_segid;
7008 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7009 request.rsmipc_segment_cookie = NULL;
7010 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7011
7012 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7013 "exportbell_ioctl done: %d\n", e));
7014
7015 return (e);
7016 }
7017
7018 /*
7019 * Ring the doorbells of all segments importing this segment
7020 */
7021 static int
7022 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7023 {
7024 importing_token_t *token = NULL;
7025 rsmipc_request_t request;
7026 int index;
7027
7028 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7029
7030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7031
7032 ASSERT(seg->s_state != RSM_STATE_NEW &&
7033 seg->s_state != RSM_STATE_NEW_QUIESCED);
7034
7035 request.rsmipc_key = seg->s_segid;
7036 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7037
7038 index = rsmhash(seg->s_segid);
7039
7040 token = importer_list.bucket[index];
7041
7042 while (token != NULL) {
7043 if (seg->s_key == token->key) {
7044 request.rsmipc_segment_cookie =
7045 token->import_segment_cookie;
7046 (void) rsmipc_send(token->importing_node,
7047 &request, RSM_NO_REPLY);
7048 }
7049 token = token->next;
7050 }
7051
7052 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7053 "importbell_ioctl done\n"));
7054 return (RSM_SUCCESS);
7055 }
7056
7057 static int
7058 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7059 rsm_poll_event_t **eventspp, int mode)
7060 {
7061 rsm_poll_event_t *evlist = NULL;
7062 size_t evlistsz;
7063 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7064
7065 #ifdef _MULTI_DATAMODEL
7066 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7067 int i;
7068 rsm_consume_event_msg32_t cemsg32 = {0};
7069 rsm_poll_event32_t event32[RSM_MAX_POLLFDS];
7070 rsm_poll_event32_t *evlist32;
7071 size_t evlistsz32;
7072
7073 /* copyin the ioctl message */
7074 if (ddi_copyin(arg, (caddr_t)&cemsg32,
7075 sizeof (rsm_consume_event_msg32_t), mode)) {
7076 DBG_PRINTF((category, RSM_ERR,
7077 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7078 return (RSMERR_BAD_ADDR);
7079 }
7080 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7081 msgp->numents = (int)cemsg32.numents;
7082
7083 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7084 /*
7085 * If numents is large alloc events list on heap otherwise
7086 * use the address of array that was passed in.
7087 */
7088 if (msgp->numents > RSM_MAX_POLLFDS) {
7089 if (msgp->numents > max_segs) { /* validate numents */
7090 DBG_PRINTF((category, RSM_ERR,
7091 "consumeevent_copyin: "
7092 "RSMERR_BAD_ARGS_ERRORS\n"));
7093 return (RSMERR_BAD_ARGS_ERRORS);
7094 }
7095 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7096 } else {
7097 evlist32 = event32;
7098 }
7099
7100 /* copyin the seglist into the rsm_poll_event32_t array */
7101 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7102 evlistsz32, mode)) {
7103 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7104 kmem_free(evlist32, evlistsz32);
7105 }
7106 DBG_PRINTF((category, RSM_ERR,
7107 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7108 return (RSMERR_BAD_ADDR);
7109 }
7110
7111 /* evlist and evlistsz are based on rsm_poll_event_t type */
7112 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7113
7114 if (msgp->numents > RSM_MAX_POLLFDS) {
7115 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7116 *eventspp = evlist;
7117 } else {
7118 evlist = *eventspp;
7119 }
7120 /*
7121 * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7122 * array
7123 */
7124 for (i = 0; i < msgp->numents; i++) {
7125 evlist[i].rnum = evlist32[i].rnum;
7126 evlist[i].fdsidx = evlist32[i].fdsidx;
7127 evlist[i].revent = evlist32[i].revent;
7128 }
7129 /* free the temp 32-bit event list */
7130 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7131 kmem_free(evlist32, evlistsz32);
7132 }
7133
7134 return (RSM_SUCCESS);
7135 }
7136 #endif
7137 /* copyin the ioctl message */
7138 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7139 mode)) {
7140 DBG_PRINTF((category, RSM_ERR,
7141 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7142 return (RSMERR_BAD_ADDR);
7143 }
7144 /*
7145 * If numents is large alloc events list on heap otherwise
7146 * use the address of array that was passed in.
7147 */
7148 if (msgp->numents > RSM_MAX_POLLFDS) {
7149 if (msgp->numents > max_segs) { /* validate numents */
7150 DBG_PRINTF((category, RSM_ERR,
7151 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7152 return (RSMERR_BAD_ARGS_ERRORS);
7153 }
7154 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7155 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7156 *eventspp = evlist;
7157 }
7158
7159 /* copyin the seglist */
7160 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7161 sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7162 if (evlist) {
7163 kmem_free(evlist, evlistsz);
7164 *eventspp = NULL;
7165 }
7166 DBG_PRINTF((category, RSM_ERR,
7167 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7168 return (RSMERR_BAD_ADDR);
7169 }
7170
7171 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7172 "consumeevent_copyin done\n"));
7173 return (RSM_SUCCESS);
7174 }
7175
7176 static int
7177 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7178 rsm_poll_event_t *eventsp, int mode)
7179 {
7180 size_t evlistsz;
7181 int err = RSM_SUCCESS;
7182 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7183
7184 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7185 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7186 msgp->numents, eventsp));
7187
7188 #ifdef _MULTI_DATAMODEL
7189 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7190 int i;
7191 rsm_poll_event32_t event32[RSM_MAX_POLLFDS];
7192 rsm_poll_event32_t *evlist32;
7193 size_t evlistsz32;
7194
7195 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7196 if (msgp->numents > RSM_MAX_POLLFDS) {
7197 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7198 } else {
7199 evlist32 = event32;
7200 }
7201
7202 /*
7203 * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7204 * array
7205 */
7206 for (i = 0; i < msgp->numents; i++) {
7207 evlist32[i].rnum = eventsp[i].rnum;
7208 evlist32[i].fdsidx = eventsp[i].fdsidx;
7209 evlist32[i].revent = eventsp[i].revent;
7210 }
7211
7212 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7213 evlistsz32, mode)) {
7214 err = RSMERR_BAD_ADDR;
7215 }
7216
7217 if (msgp->numents > RSM_MAX_POLLFDS) {
7218 if (evlist32) { /* free the temp 32-bit event list */
7219 kmem_free(evlist32, evlistsz32);
7220 }
7221 /*
7222 * eventsp and evlistsz are based on rsm_poll_event_t
7223 * type
7224 */
7225 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7226 /* event list on the heap and needs to be freed here */
7227 if (eventsp) {
7228 kmem_free(eventsp, evlistsz);
7229 }
7230 }
7231
7232 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7233 "consumeevent_copyout done: err=%d\n", err));
7234 return (err);
7235 }
7236 #endif
7237 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7238
7239 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7240 mode)) {
7241 err = RSMERR_BAD_ADDR;
7242 }
7243
7244 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7245 /* event list on the heap and needs to be freed here */
7246 kmem_free(eventsp, evlistsz);
7247 }
7248
7249 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7250 "consumeevent_copyout done: err=%d\n", err));
7251 return (err);
7252 }
7253
7254 static int
7255 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7256 {
7257 int rc;
7258 int i;
7259 minor_t rnum;
7260 rsm_consume_event_msg_t msg = {0};
7261 rsmseg_t *seg;
7262 rsm_poll_event_t *event_list;
7263 rsm_poll_event_t events[RSM_MAX_POLLFDS];
7264 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7265
7266 event_list = events;
7267
7268 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7269 RSM_SUCCESS) {
7270 return (rc);
7271 }
7272
7273 for (i = 0; i < msg.numents; i++) {
7274 rnum = event_list[i].rnum;
7275 event_list[i].revent = 0;
7276 /* get the segment structure */
7277 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7278 if (seg) {
7279 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7280 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7281 seg));
7282 if (seg->s_pollevent) {
7283 /* consume the event */
7284 atomic_dec_32(&seg->s_pollevent);
7285 event_list[i].revent = POLLRDNORM;
7286 }
7287 rsmseglock_release(seg);
7288 }
7289 }
7290
7291 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7292 RSM_SUCCESS) {
7293 return (rc);
7294 }
7295
7296 return (RSM_SUCCESS);
7297 }
7298
7299 static int
7300 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7301 {
7302 int size;
7303 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7304
7305 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7306
7307 #ifdef _MULTI_DATAMODEL
7308 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7309 rsmka_iovec32_t *iovec32, *iovec32_base;
7310 int i;
7311
7312 size = count * sizeof (rsmka_iovec32_t);
7313 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7314 if (ddi_copyin((caddr_t)user_vec,
7315 (caddr_t)iovec32, size, mode)) {
7316 kmem_free(iovec32, size);
7317 DBG_PRINTF((category, RSM_DEBUG,
7318 "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7319 return (RSMERR_BAD_ADDR);
7320 }
7321
7322 for (i = 0; i < count; i++, iovec++, iovec32++) {
7323 iovec->io_type = (int)iovec32->io_type;
7324 if (iovec->io_type == RSM_HANDLE_TYPE)
7325 iovec->local.segid = (rsm_memseg_id_t)
7326 iovec32->local;
7327 else
7328 iovec->local.vaddr =
7329 (caddr_t)(uintptr_t)iovec32->local;
7330 iovec->local_offset = (size_t)iovec32->local_offset;
7331 iovec->remote_offset = (size_t)iovec32->remote_offset;
7332 iovec->transfer_len = (size_t)iovec32->transfer_len;
7333
7334 }
7335 kmem_free(iovec32_base, size);
7336 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7337 "iovec_copyin done\n"));
7338 return (DDI_SUCCESS);
7339 }
7340 #endif
7341
7342 size = count * sizeof (rsmka_iovec_t);
7343 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7344 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7345 "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7346 return (RSMERR_BAD_ADDR);
7347 }
7348
7349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7350
7351 return (DDI_SUCCESS);
7352 }
7353
7354
7355 static int
7356 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7357 {
7358 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7359
7360 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7361
7362 #ifdef _MULTI_DATAMODEL
7363 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7364 rsmka_scat_gath32_t sg_io32;
7365
7366 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7367 mode)) {
7368 DBG_PRINTF((category, RSM_DEBUG,
7369 "sgio_copyin done: returning EFAULT\n"));
7370 return (RSMERR_BAD_ADDR);
7371 }
7372 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7373 sg_io->io_request_count = (size_t)sg_io32.io_request_count;
7374 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7375 sg_io->flags = (size_t)sg_io32.flags;
7376 sg_io->remote_handle = (rsm_memseg_import_handle_t)
7377 (uintptr_t)sg_io32.remote_handle;
7378 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7379 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7380 "sgio_copyin done\n"));
7381 return (DDI_SUCCESS);
7382 }
7383 #endif
7384 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7385 mode)) {
7386 DBG_PRINTF((category, RSM_DEBUG,
7387 "sgio_copyin done: returning EFAULT\n"));
7388 return (RSMERR_BAD_ADDR);
7389 }
7390 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7391 return (DDI_SUCCESS);
7392 }
7393
7394 static int
7395 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7396 {
7397 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7398
7399 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7400 "sgio_resid_copyout enter\n"));
7401
7402 #ifdef _MULTI_DATAMODEL
7403 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7404 rsmka_scat_gath32_t sg_io32;
7405
7406 sg_io32.io_residual_count = sg_io->io_residual_count;
7407 sg_io32.flags = sg_io->flags;
7408
7409 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7410 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7411 sizeof (uint32_t), mode)) {
7412
7413 DBG_PRINTF((category, RSM_ERR,
7414 "sgio_resid_copyout error: rescnt\n"));
7415 return (RSMERR_BAD_ADDR);
7416 }
7417
7418 if (ddi_copyout((caddr_t)&sg_io32.flags,
7419 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7420 sizeof (uint32_t), mode)) {
7421
7422 DBG_PRINTF((category, RSM_ERR,
7423 "sgio_resid_copyout error: flags\n"));
7424 return (RSMERR_BAD_ADDR);
7425 }
7426 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7427 "sgio_resid_copyout done\n"));
7428 return (DDI_SUCCESS);
7429 }
7430 #endif
7431 if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7432 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7433 sizeof (ulong_t), mode)) {
7434
7435 DBG_PRINTF((category, RSM_ERR,
7436 "sgio_resid_copyout error:rescnt\n"));
7437 return (RSMERR_BAD_ADDR);
7438 }
7439
7440 if (ddi_copyout((caddr_t)&sg_io->flags,
7441 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7442 sizeof (uint_t), mode)) {
7443
7444 DBG_PRINTF((category, RSM_ERR,
7445 "sgio_resid_copyout error:flags\n"));
7446 return (RSMERR_BAD_ADDR);
7447 }
7448
7449 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7450 return (DDI_SUCCESS);
7451 }
7452
7453
7454 static int
7455 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7456 {
7457 rsmka_scat_gath_t sg_io;
7458 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN];
7459 rsmka_iovec_t *ka_iovec;
7460 rsmka_iovec_t *ka_iovec_start;
7461 rsmpi_scat_gath_t rsmpi_sg_io;
7462 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN];
7463 rsmpi_iovec_t *iovec;
7464 rsmpi_iovec_t *iovec_start = NULL;
7465 rsmapi_access_entry_t *acl;
7466 rsmresource_t *res;
7467 minor_t rnum;
7468 rsmseg_t *im_seg, *ex_seg;
7469 int e;
7470 int error = 0;
7471 uint_t i;
7472 uint_t iov_proc = 0; /* num of iovecs processed */
7473 size_t size = 0;
7474 size_t ka_size;
7475
7476 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7477
7478 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7479
7480 credp = credp;
7481
7482 /*
7483 * Copyin the scatter/gather structure and build new structure
7484 * for rsmpi.
7485 */
7486 e = sgio_copyin(arg, &sg_io, mode);
7487 if (e != DDI_SUCCESS) {
7488 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7489 "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7490 return (e);
7491 }
7492
7493 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7494 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7495 "rsm_iovec_ioctl done: request_count(%d) too large\n",
7496 sg_io.io_request_count));
7497 return (RSMERR_BAD_SGIO);
7498 }
7499
7500 rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7501 rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7502 rsmpi_sg_io.io_segflg = 0;
7503
7504 /* Allocate memory and copyin io vector array */
7505 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7506 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t);
7507 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7508 } else {
7509 ka_iovec_start = ka_iovec = ka_iovec_arr;
7510 }
7511 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7512 sg_io.io_request_count, mode);
7513 if (e != DDI_SUCCESS) {
7514 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7515 kmem_free(ka_iovec, ka_size);
7516 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7517 "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7518 return (e);
7519 }
7520
7521 /* get the import segment descriptor */
7522 rnum = getminor(dev);
7523 res = rsmresource_lookup(rnum, RSM_LOCK);
7524
7525 /*
7526 * The following sequence of locking may (or MAY NOT) cause a
7527 * deadlock but this is currently not addressed here since the
7528 * implementation will be changed to incorporate the use of
7529 * reference counting for both the import and the export segments.
7530 */
7531
7532 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7533
7534 im_seg = (rsmseg_t *)res;
7535
7536 if (im_seg == NULL) {
7537 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7538 kmem_free(ka_iovec, ka_size);
7539 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7540 "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7541 return (EINVAL);
7542 }
7543 /* putv/getv supported is supported only on import segments */
7544 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7545 rsmseglock_release(im_seg);
7546 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7547 kmem_free(ka_iovec, ka_size);
7548 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7549 "rsm_iovec_ioctl done: not an import segment\n"));
7550 return (EINVAL);
7551 }
7552
7553 /*
7554 * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7555 * as well as wait for a local DR to complete.
7556 */
7557 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7558 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7559 (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7560 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7561 DBG_PRINTF((category, RSM_DEBUG,
7562 "rsm_iovec_ioctl done: cv_wait INTR"));
7563 rsmseglock_release(im_seg);
7564 return (RSMERR_INTERRUPTED);
7565 }
7566 }
7567
7568 if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7569 (im_seg->s_state != RSM_STATE_ACTIVE)) {
7570
7571 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7572 im_seg->s_state == RSM_STATE_NEW);
7573
7574 DBG_PRINTF((category, RSM_DEBUG,
7575 "rsm_iovec_ioctl done: im_seg not conn/map"));
7576 rsmseglock_release(im_seg);
7577 e = RSMERR_BAD_SGIO;
7578 goto out;
7579 }
7580
7581 im_seg->s_rdmacnt++;
7582 rsmseglock_release(im_seg);
7583
7584 /*
7585 * Allocate and set up the io vector for rsmpi
7586 */
7587 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7588 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7589 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7590 } else {
7591 iovec_start = iovec = iovec_arr;
7592 }
7593
7594 rsmpi_sg_io.iovec = iovec;
7595 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7596 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7597 ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7598
7599 if (ex_seg == NULL) {
7600 e = RSMERR_BAD_SGIO;
7601 break;
7602 }
7603 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7604
7605 acl = ex_seg->s_acl;
7606 if (acl[0].ae_permission == 0) {
7607 struct buf *xbuf;
7608 dev_t sdev = 0;
7609
7610 xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7611 0, ex_seg->s_len, B_WRITE,
7612 sdev, 0, NULL, DDI_UMEM_SLEEP);
7613
7614 ASSERT(xbuf != NULL);
7615
7616 iovec->local_mem.ms_type = RSM_MEM_BUF;
7617 iovec->local_mem.ms_memory.bp = xbuf;
7618 } else {
7619 iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7620 iovec->local_mem.ms_memory.handle =
7621 ex_seg->s_handle.out;
7622 }
7623 ex_seg->s_rdmacnt++; /* refcnt the handle */
7624 rsmseglock_release(ex_seg);
7625 } else {
7626 iovec->local_mem.ms_type = RSM_MEM_VADDR;
7627 iovec->local_mem.ms_memory.vr.vaddr =
7628 ka_iovec->local.vaddr;
7629 }
7630
7631 iovec->local_offset = ka_iovec->local_offset;
7632 iovec->remote_handle = im_seg->s_handle.in;
7633 iovec->remote_offset = ka_iovec->remote_offset;
7634 iovec->transfer_length = ka_iovec->transfer_len;
7635 iovec++;
7636 ka_iovec++;
7637 }
7638
7639 if (iov_proc < sg_io.io_request_count) {
7640 /* error while processing handle */
7641 rsmseglock_acquire(im_seg);
7642 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */
7643 if (im_seg->s_rdmacnt == 0) {
7644 cv_broadcast(&im_seg->s_cv);
7645 }
7646 rsmseglock_release(im_seg);
7647 goto out;
7648 }
7649
7650 /* call rsmpi */
7651 if (cmd == RSM_IOCTL_PUTV)
7652 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7653 im_seg->s_adapter->rsmpi_handle,
7654 &rsmpi_sg_io);
7655 else if (cmd == RSM_IOCTL_GETV)
7656 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7657 im_seg->s_adapter->rsmpi_handle,
7658 &rsmpi_sg_io);
7659 else {
7660 e = EINVAL;
7661 DBG_PRINTF((category, RSM_DEBUG,
7662 "iovec_ioctl: bad command = %x\n", cmd));
7663 }
7664
7665
7666 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7667 "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7668
7669 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7670
7671 /*
7672 * Check for implicit signal post flag and do the signal
7673 * post if needed
7674 */
7675 if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7676 e == RSM_SUCCESS) {
7677 rsmipc_request_t request;
7678
7679 request.rsmipc_key = im_seg->s_segid;
7680 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7681 request.rsmipc_segment_cookie = NULL;
7682 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7683 /*
7684 * Reset the implicit signal post flag to 0 to indicate
7685 * that the signal post has been done and need not be
7686 * done in the RSMAPI library
7687 */
7688 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7689 }
7690
7691 rsmseglock_acquire(im_seg);
7692 im_seg->s_rdmacnt--;
7693 if (im_seg->s_rdmacnt == 0) {
7694 cv_broadcast(&im_seg->s_cv);
7695 }
7696 rsmseglock_release(im_seg);
7697 error = sgio_resid_copyout(arg, &sg_io, mode);
7698 out:
7699 iovec = iovec_start;
7700 ka_iovec = ka_iovec_start;
7701 for (i = 0; i < iov_proc; i++) {
7702 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7703 ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7704
7705 ASSERT(ex_seg != NULL);
7706 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7707
7708 ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7709 if (ex_seg->s_rdmacnt == 0) {
7710 cv_broadcast(&ex_seg->s_cv);
7711 }
7712 rsmseglock_release(ex_seg);
7713 }
7714
7715 ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7716
7717 /*
7718 * At present there is no dependency on the existence of xbufs
7719 * created by ddi_umem_iosetup for each of the iovecs. So we
7720 * can these xbufs here.
7721 */
7722 if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7723 freerbuf(iovec->local_mem.ms_memory.bp);
7724 }
7725
7726 iovec++;
7727 ka_iovec++;
7728 }
7729
7730 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7731 if (iovec_start)
7732 kmem_free(iovec_start, size);
7733 kmem_free(ka_iovec_start, ka_size);
7734 }
7735
7736 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7737 "rsm_iovec_ioctl done %d\n", e));
7738 /* if RSMPI call fails return that else return copyout's retval */
7739 return ((e != RSM_SUCCESS) ? e : error);
7740
7741 }
7742
7743
7744 static int
7745 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7746 {
7747 adapter_t *adapter;
7748 rsm_addr_t addr;
7749 rsm_node_id_t node;
7750 int rval = DDI_SUCCESS;
7751 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7752
7753 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7754
7755 adapter = rsm_getadapter(msg, mode);
7756 if (adapter == NULL) {
7757 DBG_PRINTF((category, RSM_DEBUG,
7758 "rsmaddr_ioctl done: adapter not found\n"));
7759 return (RSMERR_CTLR_NOT_PRESENT);
7760 }
7761
7762 switch (cmd) {
7763 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7764 /* returns the hwaddr in msg->hwaddr */
7765 if (msg->nodeid == my_nodeid) {
7766 msg->hwaddr = adapter->hwaddr;
7767 } else {
7768 addr = get_remote_hwaddr(adapter, msg->nodeid);
7769 if ((int64_t)addr < 0) {
7770 rval = RSMERR_INTERNAL_ERROR;
7771 } else {
7772 msg->hwaddr = addr;
7773 }
7774 }
7775 break;
7776 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7777 /* returns the nodeid in msg->nodeid */
7778 if (msg->hwaddr == adapter->hwaddr) {
7779 msg->nodeid = my_nodeid;
7780 } else {
7781 node = get_remote_nodeid(adapter, msg->hwaddr);
7782 if ((int)node < 0) {
7783 rval = RSMERR_INTERNAL_ERROR;
7784 } else {
7785 msg->nodeid = (rsm_node_id_t)node;
7786 }
7787 }
7788 break;
7789 default:
7790 rval = EINVAL;
7791 break;
7792 }
7793
7794 rsmka_release_adapter(adapter);
7795 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7796 "rsmaddr_ioctl done: %d\n", rval));
7797 return (rval);
7798 }
7799
7800 static int
7801 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7802 {
7803 DBG_DEFINE(category,
7804 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7805
7806 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7807
7808 #ifdef _MULTI_DATAMODEL
7809
7810 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7811 rsm_ioctlmsg32_t msg32;
7812 int i;
7813
7814 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7815 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7816 "rsm_ddi_copyin done: EFAULT\n"));
7817 return (RSMERR_BAD_ADDR);
7818 }
7819 msg->len = msg32.len;
7820 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7821 msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7822 msg->key = msg32.key;
7823 msg->acl_len = msg32.acl_len;
7824 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7825 msg->cnum = msg32.cnum;
7826 msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7827 msg->cname_len = msg32.cname_len;
7828 msg->nodeid = msg32.nodeid;
7829 msg->hwaddr = msg32.hwaddr;
7830 msg->perm = msg32.perm;
7831 for (i = 0; i < 4; i++) {
7832 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7833 }
7834 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7835 "rsm_ddi_copyin done\n"));
7836 return (RSM_SUCCESS);
7837 }
7838 #endif
7839 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7840 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7841 return (RSMERR_BAD_ADDR);
7842 else
7843 return (RSM_SUCCESS);
7844 }
7845
7846 static int
7847 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7848 {
7849 rsmka_int_controller_attr_t rsm_cattr;
7850 DBG_DEFINE(category,
7851 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7852
7853 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7854 "rsmattr_ddi_copyout enter\n"));
7855 /*
7856 * need to copy appropriate data from rsm_controller_attr_t
7857 * to rsmka_int_controller_attr_t
7858 */
7859 #ifdef _MULTI_DATAMODEL
7860 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7861 rsmka_int_controller_attr32_t rsm_cattr32;
7862
7863 rsm_cattr32.attr_direct_access_sizes =
7864 adapter->rsm_attr.attr_direct_access_sizes;
7865 rsm_cattr32.attr_atomic_sizes =
7866 adapter->rsm_attr.attr_atomic_sizes;
7867 rsm_cattr32.attr_page_size =
7868 adapter->rsm_attr.attr_page_size;
7869 if (adapter->rsm_attr.attr_max_export_segment_size >
7870 UINT_MAX)
7871 rsm_cattr32.attr_max_export_segment_size =
7872 RSM_MAXSZ_PAGE_ALIGNED;
7873 else
7874 rsm_cattr32.attr_max_export_segment_size =
7875 adapter->rsm_attr.attr_max_export_segment_size;
7876 if (adapter->rsm_attr.attr_tot_export_segment_size >
7877 UINT_MAX)
7878 rsm_cattr32.attr_tot_export_segment_size =
7879 RSM_MAXSZ_PAGE_ALIGNED;
7880 else
7881 rsm_cattr32.attr_tot_export_segment_size =
7882 adapter->rsm_attr.attr_tot_export_segment_size;
7883 if (adapter->rsm_attr.attr_max_export_segments >
7884 UINT_MAX)
7885 rsm_cattr32.attr_max_export_segments =
7886 UINT_MAX;
7887 else
7888 rsm_cattr32.attr_max_export_segments =
7889 adapter->rsm_attr.attr_max_export_segments;
7890 if (adapter->rsm_attr.attr_max_import_map_size >
7891 UINT_MAX)
7892 rsm_cattr32.attr_max_import_map_size =
7893 RSM_MAXSZ_PAGE_ALIGNED;
7894 else
7895 rsm_cattr32.attr_max_import_map_size =
7896 adapter->rsm_attr.attr_max_import_map_size;
7897 if (adapter->rsm_attr.attr_tot_import_map_size >
7898 UINT_MAX)
7899 rsm_cattr32.attr_tot_import_map_size =
7900 RSM_MAXSZ_PAGE_ALIGNED;
7901 else
7902 rsm_cattr32.attr_tot_import_map_size =
7903 adapter->rsm_attr.attr_tot_import_map_size;
7904 if (adapter->rsm_attr.attr_max_import_segments >
7905 UINT_MAX)
7906 rsm_cattr32.attr_max_import_segments =
7907 UINT_MAX;
7908 else
7909 rsm_cattr32.attr_max_import_segments =
7910 adapter->rsm_attr.attr_max_import_segments;
7911 rsm_cattr32.attr_controller_addr =
7912 adapter->rsm_attr.attr_controller_addr;
7913
7914 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7915 "rsmattr_ddi_copyout done\n"));
7916 if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7917 sizeof (rsmka_int_controller_attr32_t), mode)) {
7918 return (RSMERR_BAD_ADDR);
7919 }
7920 else
7921 return (RSM_SUCCESS);
7922 }
7923 #endif
7924 rsm_cattr.attr_direct_access_sizes =
7925 adapter->rsm_attr.attr_direct_access_sizes;
7926 rsm_cattr.attr_atomic_sizes =
7927 adapter->rsm_attr.attr_atomic_sizes;
7928 rsm_cattr.attr_page_size =
7929 adapter->rsm_attr.attr_page_size;
7930 rsm_cattr.attr_max_export_segment_size =
7931 adapter->rsm_attr.attr_max_export_segment_size;
7932 rsm_cattr.attr_tot_export_segment_size =
7933 adapter->rsm_attr.attr_tot_export_segment_size;
7934 rsm_cattr.attr_max_export_segments =
7935 adapter->rsm_attr.attr_max_export_segments;
7936 rsm_cattr.attr_max_import_map_size =
7937 adapter->rsm_attr.attr_max_import_map_size;
7938 rsm_cattr.attr_tot_import_map_size =
7939 adapter->rsm_attr.attr_tot_import_map_size;
7940 rsm_cattr.attr_max_import_segments =
7941 adapter->rsm_attr.attr_max_import_segments;
7942 rsm_cattr.attr_controller_addr =
7943 adapter->rsm_attr.attr_controller_addr;
7944 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7945 "rsmattr_ddi_copyout done\n"));
7946 if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7947 sizeof (rsmka_int_controller_attr_t), mode)) {
7948 return (RSMERR_BAD_ADDR);
7949 }
7950 else
7951 return (RSM_SUCCESS);
7952 }
7953
7954 /*ARGSUSED*/
7955 static int
7956 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7957 int *rvalp)
7958 {
7959 rsmseg_t *seg;
7960 rsmresource_t *res;
7961 minor_t rnum;
7962 rsm_ioctlmsg_t msg = {0};
7963 int error;
7964 adapter_t *adapter;
7965 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7966
7967 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7968
7969 if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7970 error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7971 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7972 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7973 return (error);
7974 }
7975
7976 /* topology cmd does not use the arg common to other cmds */
7977 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7978 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7979 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7980 "rsm_ioctl done: %d\n", error));
7981 return (error);
7982 }
7983
7984 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7985 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7986 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7987 "rsm_ioctl done: %d\n", error));
7988 return (error);
7989 }
7990
7991 /*
7992 * try to load arguments
7993 */
7994 if (cmd != RSM_IOCTL_RING_BELL &&
7995 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
7996 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7997 "rsm_ioctl done: EFAULT\n"));
7998 return (RSMERR_BAD_ADDR);
7999 }
8000
8001 if (cmd == RSM_IOCTL_ATTR) {
8002 adapter = rsm_getadapter(&msg, mode);
8003 if (adapter == NULL) {
8004 DBG_PRINTF((category, RSM_DEBUG,
8005 "rsm_ioctl done: ENODEV\n"));
8006 return (RSMERR_CTLR_NOT_PRESENT);
8007 }
8008 error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8009 rsmka_release_adapter(adapter);
8010 DBG_PRINTF((category, RSM_DEBUG,
8011 "rsm_ioctl:after copyout %d\n", error));
8012 return (error);
8013 }
8014
8015 if (cmd == RSM_IOCTL_BAR_INFO) {
8016 /* Return library off,len of barrier page */
8017 msg.off = barrier_offset;
8018 msg.len = (int)barrier_size;
8019 #ifdef _MULTI_DATAMODEL
8020 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8021 rsm_ioctlmsg32_t msg32;
8022
8023 if (msg.len > UINT_MAX)
8024 msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8025 else
8026 msg32.len = (int32_t)msg.len;
8027 msg32.off = (int32_t)msg.off;
8028 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8029 "rsm_ioctl done\n"));
8030 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8031 sizeof (msg32), mode))
8032 return (RSMERR_BAD_ADDR);
8033 else
8034 return (RSM_SUCCESS);
8035 }
8036 #endif
8037 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8038 "rsm_ioctl done\n"));
8039 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8040 sizeof (msg), mode))
8041 return (RSMERR_BAD_ADDR);
8042 else
8043 return (RSM_SUCCESS);
8044 }
8045
8046 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8047 /* map the nodeid or hwaddr */
8048 error = rsmaddr_ioctl(cmd, &msg, mode);
8049 if (error == RSM_SUCCESS) {
8050 #ifdef _MULTI_DATAMODEL
8051 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8052 rsm_ioctlmsg32_t msg32;
8053
8054 msg32.hwaddr = (uint64_t)msg.hwaddr;
8055 msg32.nodeid = (uint32_t)msg.nodeid;
8056
8057 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8058 "rsm_ioctl done\n"));
8059 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8060 sizeof (msg32), mode))
8061 return (RSMERR_BAD_ADDR);
8062 else
8063 return (RSM_SUCCESS);
8064 }
8065 #endif
8066 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8067 "rsm_ioctl done\n"));
8068 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8069 sizeof (msg), mode))
8070 return (RSMERR_BAD_ADDR);
8071 else
8072 return (RSM_SUCCESS);
8073 }
8074 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8075 "rsm_ioctl done: %d\n", error));
8076 return (error);
8077 }
8078
8079 /* Find resource and look it in read mode */
8080 rnum = getminor(dev);
8081 res = rsmresource_lookup(rnum, RSM_NOLOCK);
8082 ASSERT(res != NULL);
8083
8084 /*
8085 * Find command group
8086 */
8087 switch (RSM_IOCTL_CMDGRP(cmd)) {
8088 case RSM_IOCTL_EXPORT_SEG:
8089 /*
8090 * Export list is searched during publish, loopback and
8091 * remote lookup call.
8092 */
8093 seg = rsmresource_seg(res, rnum, credp,
8094 RSM_RESOURCE_EXPORT_SEGMENT);
8095 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8096 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8097 credp);
8098 } else { /* export ioctl on an import/barrier resource */
8099 error = RSMERR_BAD_SEG_HNDL;
8100 }
8101 break;
8102 case RSM_IOCTL_IMPORT_SEG:
8103 /* Import list is searched during remote unmap call. */
8104 seg = rsmresource_seg(res, rnum, credp,
8105 RSM_RESOURCE_IMPORT_SEGMENT);
8106 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8107 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8108 credp);
8109 } else { /* import ioctl on an export/barrier resource */
8110 error = RSMERR_BAD_SEG_HNDL;
8111 }
8112 break;
8113 case RSM_IOCTL_BAR:
8114 if (res != RSMRC_RESERVED &&
8115 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8116 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8117 mode);
8118 } else { /* invalid res value */
8119 error = RSMERR_BAD_SEG_HNDL;
8120 }
8121 break;
8122 case RSM_IOCTL_BELL:
8123 if (res != RSMRC_RESERVED) {
8124 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8125 error = exportbell_ioctl((rsmseg_t *)res, cmd);
8126 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8127 error = importbell_ioctl((rsmseg_t *)res, cmd);
8128 else /* RSM_RESOURCE_BAR */
8129 error = RSMERR_BAD_SEG_HNDL;
8130 } else { /* invalid res value */
8131 error = RSMERR_BAD_SEG_HNDL;
8132 }
8133 break;
8134 default:
8135 error = EINVAL;
8136 }
8137
8138 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8139 error));
8140 return (error);
8141 }
8142
8143
8144 /* **************************** Segment Mapping Operations ********* */
8145 static rsm_mapinfo_t *
8146 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8147 size_t *map_len)
8148 {
8149 rsm_mapinfo_t *p;
8150 /*
8151 * Find the correct mapinfo structure to use during the mapping
8152 * from the seg->s_mapinfo list.
8153 * The seg->s_mapinfo list contains in reverse order the mappings
8154 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8155 * access the correct entry within this list for the mapping
8156 * requested.
8157 *
8158 * The algorithm for selecting a list entry is as follows:
8159 *
8160 * When start_offset of an entry <= off we have found the entry
8161 * we were looking for. Adjust the dev_offset and map_len (needs
8162 * to be PAGESIZE aligned).
8163 */
8164 p = seg->s_mapinfo;
8165 for (; p; p = p->next) {
8166 if (p->start_offset <= off) {
8167 *dev_offset = p->dev_offset + off - p->start_offset;
8168 *map_len = (len > p->individual_len) ?
8169 p->individual_len : ptob(btopr(len));
8170 return (p);
8171 }
8172 p = p->next;
8173 }
8174
8175 return (NULL);
8176 }
8177
8178 static void
8179 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo)
8180 {
8181 rsm_mapinfo_t *p;
8182
8183 while (mapinfo != NULL) {
8184 p = mapinfo;
8185 mapinfo = mapinfo->next;
8186 kmem_free(p, sizeof (*p));
8187 }
8188 }
8189
8190 static int
8191 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8192 size_t len, void **pvtp)
8193 {
8194 rsmcookie_t *p;
8195 rsmresource_t *res;
8196 rsmseg_t *seg;
8197 minor_t rnum;
8198 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8199
8200 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8201
8202 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8203 "rsmmap_map: dhp = %x\n", dhp));
8204
8205 flags = flags;
8206
8207 rnum = getminor(dev);
8208 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8209 ASSERT(res != NULL);
8210
8211 seg = (rsmseg_t *)res;
8212
8213 rsmseglock_acquire(seg);
8214
8215 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8216
8217 /*
8218 * Allocate structure and add cookie to segment list
8219 */
8220 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8221
8222 p->c_dhp = dhp;
8223 p->c_off = off;
8224 p->c_len = len;
8225 p->c_next = seg->s_ckl;
8226 seg->s_ckl = p;
8227
8228 *pvtp = (void *)seg;
8229
8230 rsmseglock_release(seg);
8231
8232 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8233 return (DDI_SUCCESS);
8234 }
8235
8236 /*
8237 * Page fault handling is done here. The prerequisite mapping setup
8238 * has been done in rsm_devmap with calls to ddi_devmem_setup or
8239 * ddi_umem_setup
8240 */
8241 static int
8242 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8243 uint_t type, uint_t rw)
8244 {
8245 int e;
8246 rsmseg_t *seg = (rsmseg_t *)pvt;
8247 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8248
8249 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8250
8251 rsmseglock_acquire(seg);
8252
8253 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8254
8255 while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8256 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8257 DBG_PRINTF((category, RSM_DEBUG,
8258 "rsmmap_access done: cv_wait INTR"));
8259 rsmseglock_release(seg);
8260 return (RSMERR_INTERRUPTED);
8261 }
8262 }
8263
8264 ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8265 seg->s_state == RSM_STATE_ACTIVE);
8266
8267 if (seg->s_state == RSM_STATE_DISCONNECT)
8268 seg->s_flags |= RSM_IMPORT_DUMMY;
8269
8270 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8271 "rsmmap_access: dhp = %x\n", dhp));
8272
8273 rsmseglock_release(seg);
8274
8275 if (e = devmap_load(dhp, offset, len, type, rw)) {
8276 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
8277 }
8278
8279
8280 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8281
8282 return (e);
8283 }
8284
8285 static int
8286 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8287 void **newpvt)
8288 {
8289 rsmseg_t *seg = (rsmseg_t *)oldpvt;
8290 rsmcookie_t *p, *old;
8291 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8292
8293 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8294
8295 /*
8296 * Same as map, create an entry to hold cookie and add it to
8297 * connect segment list. The oldpvt is a pointer to segment.
8298 * Return segment pointer in newpvt.
8299 */
8300 rsmseglock_acquire(seg);
8301
8302 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8303
8304 /*
8305 * Find old cookie
8306 */
8307 for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8308 if (old->c_dhp == dhp) {
8309 break;
8310 }
8311 }
8312 if (old == NULL) {
8313 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8314 "rsmmap_dup done: EINVAL\n"));
8315 rsmseglock_release(seg);
8316 return (EINVAL);
8317 }
8318
8319 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8320
8321 p->c_dhp = new_dhp;
8322 p->c_off = old->c_off;
8323 p->c_len = old->c_len;
8324 p->c_next = seg->s_ckl;
8325 seg->s_ckl = p;
8326
8327 *newpvt = (void *)seg;
8328
8329 rsmseglock_release(seg);
8330
8331 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8332
8333 return (DDI_SUCCESS);
8334 }
8335
8336 static void
8337 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8338 devmap_cookie_t new_dhp1, void **pvtp1,
8339 devmap_cookie_t new_dhp2, void **pvtp2)
8340 {
8341 /*
8342 * Remove pvtp structure from segment list.
8343 */
8344 rsmseg_t *seg = (rsmseg_t *)pvtp;
8345 int freeflag;
8346
8347 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8348
8349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8350
8351 off = off; len = len;
8352 pvtp1 = pvtp1; pvtp2 = pvtp2;
8353
8354 rsmseglock_acquire(seg);
8355
8356 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8357
8358 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8359 "rsmmap_unmap: dhp = %x\n", dhp));
8360 /*
8361 * We can go ahead and remove the dhps even if we are in
8362 * the MAPPING state because the dhps being removed here
8363 * belong to a different mmap and we are holding the segment
8364 * lock.
8365 */
8366 if (new_dhp1 == NULL && new_dhp2 == NULL) {
8367 /* find and remove dhp handle */
8368 rsmcookie_t *tmp, **back = &seg->s_ckl;
8369
8370 while (*back != NULL) {
8371 tmp = *back;
8372 if (tmp->c_dhp == dhp) {
8373 *back = tmp->c_next;
8374 kmem_free(tmp, sizeof (*tmp));
8375 break;
8376 }
8377 back = &tmp->c_next;
8378 }
8379 } else {
8380 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8381 "rsmmap_unmap:parital unmap"
8382 "new_dhp1 %lx, new_dhp2 %lx\n",
8383 (size_t)new_dhp1, (size_t)new_dhp2));
8384 }
8385
8386 /*
8387 * rsmmap_unmap is called for each mapping cookie on the list.
8388 * When the list becomes empty and we are not in the MAPPING
8389 * state then unmap in the rsmpi driver.
8390 */
8391 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8392 (void) rsm_unmap(seg);
8393
8394 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8395 freeflag = 1;
8396 } else {
8397 freeflag = 0;
8398 }
8399
8400 rsmseglock_release(seg);
8401
8402 if (freeflag) {
8403 /* Free the segment structure */
8404 rsmseg_free(seg);
8405 }
8406 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8407
8408 }
8409
8410 static struct devmap_callback_ctl rsmmap_ops = {
8411 DEVMAP_OPS_REV, /* devmap_ops version number */
8412 rsmmap_map, /* devmap_ops map routine */
8413 rsmmap_access, /* devmap_ops access routine */
8414 rsmmap_dup, /* devmap_ops dup routine */
8415 rsmmap_unmap, /* devmap_ops unmap routine */
8416 };
8417
8418 static int
8419 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8420 size_t *maplen, uint_t model /*ARGSUSED*/)
8421 {
8422 struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8423 int err;
8424 uint_t maxprot;
8425 minor_t rnum;
8426 rsmseg_t *seg;
8427 off_t dev_offset;
8428 size_t cur_len;
8429 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8430
8431 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8432
8433 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8434 "rsm_devmap: off = %lx, len = %lx\n", off, len));
8435 rnum = getminor(dev);
8436 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8437 ASSERT(seg != NULL);
8438
8439 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8440 if ((off == barrier_offset) &&
8441 (len == barrier_size)) {
8442
8443 ASSERT(bar_va != NULL && bar_cookie != NULL);
8444
8445 /*
8446 * The offset argument in devmap_umem_setup represents
8447 * the offset within the kernel memory defined by the
8448 * cookie. We use this offset as barrier_offset.
8449 */
8450 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8451 barrier_offset, len, PROT_USER|PROT_READ,
8452 DEVMAP_DEFAULTS, 0);
8453
8454 if (err != 0) {
8455 DBG_PRINTF((category, RSM_ERR,
8456 "rsm_devmap done: %d\n", err));
8457 return (RSMERR_MAP_FAILED);
8458 }
8459 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8460 "rsm_devmap done: %d\n", err));
8461
8462 *maplen = barrier_size;
8463
8464 return (err);
8465 } else {
8466 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8467 "rsm_devmap done: %d\n", err));
8468 return (RSMERR_MAP_FAILED);
8469 }
8470 }
8471
8472 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8473 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8474
8475 /*
8476 * Make sure we still have permission for the map operation.
8477 */
8478 maxprot = PROT_USER;
8479 if (seg->s_mode & RSM_PERM_READ) {
8480 maxprot |= PROT_READ;
8481 }
8482
8483 if (seg->s_mode & RSM_PERM_WRITE) {
8484 maxprot |= PROT_WRITE;
8485 }
8486
8487 /*
8488 * For each devmap call, rsmmap_map is called. This maintains driver
8489 * private information for the mapping. Thus, if there are multiple
8490 * devmap calls there will be multiple rsmmap_map calls and for each
8491 * call, the mapping information will be stored.
8492 * In case of an error during the processing of the devmap call, error
8493 * will be returned. This error return causes the caller of rsm_devmap
8494 * to undo all the mappings by calling rsmmap_unmap for each one.
8495 * rsmmap_unmap will free up the private information for the requested
8496 * mapping.
8497 */
8498 if (seg->s_node != my_nodeid) {
8499 rsm_mapinfo_t *p;
8500
8501 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8502 if (p == NULL) {
8503 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8504 "rsm_devmap: incorrect mapping info\n"));
8505 return (RSMERR_MAP_FAILED);
8506 }
8507 err = devmap_devmem_setup(dhc, p->dip,
8508 callbackops, p->dev_register,
8509 dev_offset, cur_len, maxprot,
8510 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8511
8512 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8513 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8514 "off=%lx,len=%lx\n",
8515 p->dip, p->dev_register, dev_offset, off, cur_len));
8516
8517 if (err != 0) {
8518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8519 "rsm_devmap: devmap_devmem_setup failed %d\n",
8520 err));
8521 return (RSMERR_MAP_FAILED);
8522 }
8523 /* cur_len is always an integral multiple pagesize */
8524 ASSERT((cur_len & (PAGESIZE-1)) == 0);
8525 *maplen = cur_len;
8526 return (err);
8527
8528 } else {
8529 err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8530 seg->s_cookie, off, len, maxprot,
8531 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8532 if (err != 0) {
8533 DBG_PRINTF((category, RSM_DEBUG,
8534 "rsm_devmap: devmap_umem_setup failed %d\n",
8535 err));
8536 return (RSMERR_MAP_FAILED);
8537 }
8538 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8539 "rsm_devmap: loopback done\n"));
8540
8541 *maplen = ptob(btopr(len));
8542
8543 return (err);
8544 }
8545 }
8546
8547 /*
8548 * We can use the devmap framework for mapping device memory to user space by
8549 * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8550 * processing calls this entry point and devmap_setup is called within this
8551 * function, which eventually calls rsm_devmap
8552 */
8553 static int
8554 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8555 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8556 {
8557 int error = 0;
8558 int old_state;
8559 minor_t rnum;
8560 rsmseg_t *seg, *eseg;
8561 adapter_t *adapter;
8562 rsm_import_share_t *sharedp;
8563 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8564
8565 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8566
8567 /*
8568 * find segment
8569 */
8570 rnum = getminor(dev);
8571 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8572
8573 if (seg == NULL) {
8574 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8575 "rsm_segmap done: invalid segment\n"));
8576 return (EINVAL);
8577 }
8578
8579 /*
8580 * the user is trying to map a resource that has not been
8581 * defined yet. The library uses this to map in the
8582 * barrier page.
8583 */
8584 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8585 rsmseglock_release(seg);
8586
8587 /*
8588 * The mapping for the barrier page is identified
8589 * by the special offset barrier_offset
8590 */
8591
8592 if (off == (off_t)barrier_offset ||
8593 len == (off_t)barrier_size) {
8594 if (bar_cookie == NULL || bar_va == NULL) {
8595 DBG_PRINTF((category, RSM_DEBUG,
8596 "rsm_segmap: bar cookie/va is NULL\n"));
8597 return (EINVAL);
8598 }
8599
8600 error = devmap_setup(dev, (offset_t)off, as, addrp,
8601 (size_t)len, prot, maxprot, flags, cred);
8602
8603 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8604 "rsm_segmap done: %d\n", error));
8605 return (error);
8606 } else {
8607 DBG_PRINTF((category, RSM_DEBUG,
8608 "rsm_segmap: bad offset/length\n"));
8609 return (EINVAL);
8610 }
8611 }
8612
8613 /* Make sure you can only map imported segments */
8614 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8615 rsmseglock_release(seg);
8616 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8617 "rsm_segmap done: not an import segment\n"));
8618 return (EINVAL);
8619 }
8620 /* check means library is broken */
8621 ASSERT(seg->s_hdr.rsmrc_num == rnum);
8622
8623 /* wait for the segment to become unquiesced */
8624 while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8625 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8626 rsmseglock_release(seg);
8627 DBG_PRINTF((category, RSM_DEBUG,
8628 "rsm_segmap done: cv_wait INTR"));
8629 return (ENODEV);
8630 }
8631 }
8632
8633 /* wait until segment leaves the mapping state */
8634 while (seg->s_state == RSM_STATE_MAPPING)
8635 cv_wait(&seg->s_cv, &seg->s_lock);
8636
8637 /*
8638 * we allow multiple maps of the same segment in the KA
8639 * and it works because we do an rsmpi map of the whole
8640 * segment during the first map and all the device mapping
8641 * information needed in rsm_devmap is in the mapinfo list.
8642 */
8643 if ((seg->s_state != RSM_STATE_CONNECT) &&
8644 (seg->s_state != RSM_STATE_ACTIVE)) {
8645 rsmseglock_release(seg);
8646 DBG_PRINTF((category, RSM_DEBUG,
8647 "rsm_segmap done: segment not connected\n"));
8648 return (ENODEV);
8649 }
8650
8651 /*
8652 * Make sure we are not mapping a larger segment than what's
8653 * exported
8654 */
8655 if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8656 rsmseglock_release(seg);
8657 DBG_PRINTF((category, RSM_DEBUG,
8658 "rsm_segmap done: off+len>seg size\n"));
8659 return (ENXIO);
8660 }
8661
8662 /*
8663 * Make sure we still have permission for the map operation.
8664 */
8665 maxprot = PROT_USER;
8666 if (seg->s_mode & RSM_PERM_READ) {
8667 maxprot |= PROT_READ;
8668 }
8669
8670 if (seg->s_mode & RSM_PERM_WRITE) {
8671 maxprot |= PROT_WRITE;
8672 }
8673
8674 if ((prot & maxprot) != prot) {
8675 /* No permission */
8676 rsmseglock_release(seg);
8677 DBG_PRINTF((category, RSM_DEBUG,
8678 "rsm_segmap done: no permission\n"));
8679 return (EACCES);
8680 }
8681
8682 old_state = seg->s_state;
8683
8684 ASSERT(seg->s_share != NULL);
8685
8686 rsmsharelock_acquire(seg);
8687
8688 sharedp = seg->s_share;
8689
8690 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8691 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8692
8693 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8694 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8695 rsmsharelock_release(seg);
8696 rsmseglock_release(seg);
8697 DBG_PRINTF((category, RSM_DEBUG,
8698 "rsm_segmap done:RSMSI_STATE %d invalid\n",
8699 sharedp->rsmsi_state));
8700 return (ENODEV);
8701 }
8702
8703 /*
8704 * Do the map - since we want importers to share mappings
8705 * we do the rsmpi map for the whole segment
8706 */
8707 if (seg->s_node != my_nodeid) {
8708 uint_t dev_register;
8709 off_t dev_offset;
8710 dev_info_t *dip;
8711 size_t tmp_len;
8712 size_t total_length_mapped = 0;
8713 size_t length_to_map = seg->s_len;
8714 off_t tmp_off = 0;
8715 rsm_mapinfo_t *p;
8716
8717 /*
8718 * length_to_map = seg->s_len is always an integral
8719 * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8720 * list is a multiple of PAGESIZE - RSMPI map ensures this
8721 */
8722
8723 adapter = seg->s_adapter;
8724 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8725 sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8726
8727 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8728 error = 0;
8729 /* map the whole segment */
8730 while (total_length_mapped < seg->s_len) {
8731 tmp_len = 0;
8732
8733 error = adapter->rsmpi_ops->rsm_map(
8734 seg->s_handle.in, tmp_off,
8735 length_to_map, &tmp_len,
8736 &dip, &dev_register, &dev_offset,
8737 NULL, NULL);
8738
8739 if (error != 0)
8740 break;
8741
8742 /*
8743 * Store the mapping info obtained from rsm_map
8744 */
8745 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8746 p->dev_register = dev_register;
8747 p->dev_offset = dev_offset;
8748 p->dip = dip;
8749 p->individual_len = tmp_len;
8750 p->start_offset = tmp_off;
8751 p->next = sharedp->rsmsi_mapinfo;
8752 sharedp->rsmsi_mapinfo = p;
8753
8754 total_length_mapped += tmp_len;
8755 length_to_map -= tmp_len;
8756 tmp_off += tmp_len;
8757 }
8758 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8759
8760 if (error != RSM_SUCCESS) {
8761 /* Check if this is the the first rsm_map */
8762 if (sharedp->rsmsi_mapinfo != NULL) {
8763 /*
8764 * A single rsm_unmap undoes
8765 * multiple rsm_maps.
8766 */
8767 (void) seg->s_adapter->rsmpi_ops->
8768 rsm_unmap(sharedp->rsmsi_handle);
8769 rsm_free_mapinfo(sharedp->
8770 rsmsi_mapinfo);
8771 }
8772 sharedp->rsmsi_mapinfo = NULL;
8773 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8774 rsmsharelock_release(seg);
8775 rsmseglock_release(seg);
8776 DBG_PRINTF((category, RSM_DEBUG,
8777 "rsm_segmap done: rsmpi map err %d\n",
8778 error));
8779 ASSERT(error != RSMERR_BAD_LENGTH &&
8780 error != RSMERR_BAD_MEM_ALIGNMENT &&
8781 error != RSMERR_BAD_SEG_HNDL);
8782 if (error == RSMERR_UNSUPPORTED_OPERATION)
8783 return (ENOTSUP);
8784 else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8785 return (EAGAIN);
8786 else if (error == RSMERR_CONN_ABORTED)
8787 return (ENODEV);
8788 else
8789 return (error);
8790 } else {
8791 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8792 }
8793 } else {
8794 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8795 }
8796
8797 sharedp->rsmsi_mapcnt++;
8798
8799 rsmsharelock_release(seg);
8800
8801 /* move to an intermediate mapping state */
8802 seg->s_state = RSM_STATE_MAPPING;
8803 rsmseglock_release(seg);
8804
8805 error = devmap_setup(dev, (offset_t)off, as, addrp,
8806 len, prot, maxprot, flags, cred);
8807
8808 rsmseglock_acquire(seg);
8809 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8810
8811 if (error == DDI_SUCCESS) {
8812 seg->s_state = RSM_STATE_ACTIVE;
8813 } else {
8814 rsmsharelock_acquire(seg);
8815
8816 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8817
8818 sharedp->rsmsi_mapcnt--;
8819 if (sharedp->rsmsi_mapcnt == 0) {
8820 /* unmap the shared RSMPI mapping */
8821 ASSERT(sharedp->rsmsi_handle != NULL);
8822 (void) adapter->rsmpi_ops->
8823 rsm_unmap(sharedp->rsmsi_handle);
8824 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8825 sharedp->rsmsi_mapinfo = NULL;
8826 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8827 }
8828
8829 rsmsharelock_release(seg);
8830 seg->s_state = old_state;
8831 DBG_PRINTF((category, RSM_ERR,
8832 "rsm: devmap_setup failed %d\n", error));
8833 }
8834 cv_broadcast(&seg->s_cv);
8835 rsmseglock_release(seg);
8836 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8837 error));
8838 return (error);
8839 } else {
8840 /*
8841 * For loopback, the export segment mapping cookie (s_cookie)
8842 * is also used as the s_cookie value for its import segments
8843 * during mapping.
8844 * Note that reference counting for s_cookie of the export
8845 * segment is not required due to the following:
8846 * We never have a case of the export segment being destroyed,
8847 * leaving the import segments with a stale value for the
8848 * s_cookie field, since a force disconnect is done prior to a
8849 * destroy of an export segment. The force disconnect causes
8850 * the s_cookie value to be reset to NULL. Also for the
8851 * rsm_rebind operation, we change the s_cookie value of the
8852 * export segment as well as of all its local (loopback)
8853 * importers.
8854 */
8855 DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8856
8857 rsmsharelock_release(seg);
8858 /*
8859 * In order to maintain the lock ordering between the export
8860 * and import segment locks, we need to acquire the export
8861 * segment lock first and only then acquire the import
8862 * segment lock.
8863 * The above is necessary to avoid any deadlock scenarios
8864 * with rsm_rebind which also acquires both the export
8865 * and import segment locks in the above mentioned order.
8866 * Based on code inspection, there seem to be no other
8867 * situations in which both the export and import segment
8868 * locks are acquired either in the same or opposite order
8869 * as mentioned above.
8870 * Thus in order to conform to the above lock order, we
8871 * need to change the state of the import segment to
8872 * RSM_STATE_MAPPING, release the lock. Once this is done we
8873 * can now safely acquire the export segment lock first
8874 * followed by the import segment lock which is as per
8875 * the lock order mentioned above.
8876 */
8877 /* move to an intermediate mapping state */
8878 seg->s_state = RSM_STATE_MAPPING;
8879 rsmseglock_release(seg);
8880
8881 eseg = rsmexport_lookup(seg->s_key);
8882
8883 if (eseg == NULL) {
8884 rsmseglock_acquire(seg);
8885 /*
8886 * Revert to old_state and signal any waiters
8887 * The shared state is not changed
8888 */
8889
8890 seg->s_state = old_state;
8891 cv_broadcast(&seg->s_cv);
8892 rsmseglock_release(seg);
8893 DBG_PRINTF((category, RSM_DEBUG,
8894 "rsm_segmap done: key %d not found\n", seg->s_key));
8895 return (ENODEV);
8896 }
8897
8898 rsmsharelock_acquire(seg);
8899 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8900 sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8901
8902 sharedp->rsmsi_mapcnt++;
8903 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8904 rsmsharelock_release(seg);
8905
8906 ASSERT(eseg->s_cookie != NULL);
8907
8908 /*
8909 * It is not required or necessary to acquire the import
8910 * segment lock here to change the value of s_cookie since
8911 * no one will touch the import segment as long as it is
8912 * in the RSM_STATE_MAPPING state.
8913 */
8914 seg->s_cookie = eseg->s_cookie;
8915
8916 rsmseglock_release(eseg);
8917
8918 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8919 prot, maxprot, flags, cred);
8920
8921 rsmseglock_acquire(seg);
8922 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8923 if (error == 0) {
8924 seg->s_state = RSM_STATE_ACTIVE;
8925 } else {
8926 rsmsharelock_acquire(seg);
8927
8928 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8929
8930 sharedp->rsmsi_mapcnt--;
8931 if (sharedp->rsmsi_mapcnt == 0) {
8932 sharedp->rsmsi_mapinfo = NULL;
8933 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8934 }
8935 rsmsharelock_release(seg);
8936 seg->s_state = old_state;
8937 seg->s_cookie = NULL;
8938 }
8939 cv_broadcast(&seg->s_cv);
8940 rsmseglock_release(seg);
8941 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8942 "rsm_segmap done: %d\n", error));
8943 return (error);
8944 }
8945 }
8946
8947 int
8948 rsmka_null_seg_create(
8949 rsm_controller_handle_t argcp,
8950 rsm_memseg_export_handle_t *handle,
8951 size_t size,
8952 uint_t flags,
8953 rsm_memory_local_t *memory,
8954 rsm_resource_callback_t callback,
8955 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8956 {
8957 return (RSM_SUCCESS);
8958 }
8959
8960
8961 int
8962 rsmka_null_seg_destroy(
8963 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/)
8964 {
8965 return (RSM_SUCCESS);
8966 }
8967
8968
8969 int
8970 rsmka_null_bind(
8971 rsm_memseg_export_handle_t argmemseg,
8972 off_t offset,
8973 rsm_memory_local_t *argmemory,
8974 rsm_resource_callback_t callback,
8975 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8976 {
8977 return (RSM_SUCCESS);
8978 }
8979
8980
8981 int
8982 rsmka_null_unbind(
8983 rsm_memseg_export_handle_t argmemseg,
8984 off_t offset,
8985 size_t length /*ARGSUSED*/)
8986 {
8987 return (DDI_SUCCESS);
8988 }
8989
8990 int
8991 rsmka_null_rebind(
8992 rsm_memseg_export_handle_t argmemseg,
8993 off_t offset,
8994 rsm_memory_local_t *memory,
8995 rsm_resource_callback_t callback,
8996 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8997 {
8998 return (RSM_SUCCESS);
8999 }
9000
9001 int
9002 rsmka_null_publish(
9003 rsm_memseg_export_handle_t argmemseg,
9004 rsm_access_entry_t access_list[],
9005 uint_t access_list_length,
9006 rsm_memseg_id_t segment_id,
9007 rsm_resource_callback_t callback,
9008 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9009 {
9010 return (RSM_SUCCESS);
9011 }
9012
9013
9014 int
9015 rsmka_null_republish(
9016 rsm_memseg_export_handle_t memseg,
9017 rsm_access_entry_t access_list[],
9018 uint_t access_list_length,
9019 rsm_resource_callback_t callback,
9020 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9021 {
9022 return (RSM_SUCCESS);
9023 }
9024
9025 int
9026 rsmka_null_unpublish(
9027 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/)
9028 {
9029 return (RSM_SUCCESS);
9030 }
9031
9032
9033 void
9034 rsmka_init_loopback()
9035 {
9036 rsm_ops_t *ops = &null_rsmpi_ops;
9037 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9038
9039 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9040 "rsmka_init_loopback enter\n"));
9041
9042 /* initialize null ops vector */
9043 ops->rsm_seg_create = rsmka_null_seg_create;
9044 ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9045 ops->rsm_bind = rsmka_null_bind;
9046 ops->rsm_unbind = rsmka_null_unbind;
9047 ops->rsm_rebind = rsmka_null_rebind;
9048 ops->rsm_publish = rsmka_null_publish;
9049 ops->rsm_unpublish = rsmka_null_unpublish;
9050 ops->rsm_republish = rsmka_null_republish;
9051
9052 /* initialize attributes for loopback adapter */
9053 loopback_attr.attr_name = loopback_str;
9054 loopback_attr.attr_page_size = 0x8; /* 8K */
9055
9056 /* initialize loopback adapter */
9057 loopback_adapter.rsm_attr = loopback_attr;
9058 loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9059 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9060 "rsmka_init_loopback done\n"));
9061 }
9062
9063 /* ************** DR functions ********************************** */
9064 static void
9065 rsm_quiesce_exp_seg(rsmresource_t *resp)
9066 {
9067 int recheck_state;
9068 rsmseg_t *segp = (rsmseg_t *)resp;
9069 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9070 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9071
9072 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9073 "%s enter: key=%u\n", function, segp->s_key));
9074
9075 rsmseglock_acquire(segp);
9076 do {
9077 recheck_state = 0;
9078 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9079 (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9080 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9081 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9082 rsmseglock_release(segp);
9083 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9084 "%s done:state =%d\n", function,
9085 segp->s_state));
9086 return;
9087 }
9088
9089 if (segp->s_state == RSM_STATE_NEW) {
9090 segp->s_state = RSM_STATE_NEW_QUIESCED;
9091 rsmseglock_release(segp);
9092 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9093 "%s done:state =%d\n", function,
9094 segp->s_state));
9095 return;
9096 }
9097
9098 if (segp->s_state == RSM_STATE_BIND) {
9099 /* unbind */
9100 (void) rsm_unbind_pages(segp);
9101 segp->s_state = RSM_STATE_BIND_QUIESCED;
9102 rsmseglock_release(segp);
9103 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9104 "%s done:state =%d\n", function,
9105 segp->s_state));
9106 return;
9107 }
9108
9109 if (segp->s_state == RSM_STATE_EXPORT) {
9110 /*
9111 * wait for putv/getv to complete if the segp is
9112 * a local memory handle
9113 */
9114 while ((segp->s_state == RSM_STATE_EXPORT) &&
9115 (segp->s_rdmacnt != 0)) {
9116 cv_wait(&segp->s_cv, &segp->s_lock);
9117 }
9118
9119 if (segp->s_state != RSM_STATE_EXPORT) {
9120 /*
9121 * state changed need to see what it
9122 * should be changed to.
9123 */
9124 recheck_state = 1;
9125 continue;
9126 }
9127
9128 segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9129 rsmseglock_release(segp);
9130 /*
9131 * send SUSPEND messages - currently it will be
9132 * done at the end
9133 */
9134 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9135 "%s done:state =%d\n", function,
9136 segp->s_state));
9137 return;
9138 }
9139 } while (recheck_state);
9140
9141 rsmseglock_release(segp);
9142 }
9143
9144 static void
9145 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9146 {
9147 int ret;
9148 rsmseg_t *segp = (rsmseg_t *)resp;
9149 rsmapi_access_entry_t *acl;
9150 rsm_access_entry_t *rsmpi_acl;
9151 int acl_len;
9152 int create_flags = 0;
9153 struct buf *xbuf;
9154 rsm_memory_local_t mem;
9155 adapter_t *adapter;
9156 dev_t sdev = 0;
9157 rsm_resource_callback_t callback_flag;
9158 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9159 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9160
9161 rsmseglock_acquire(segp);
9162
9163 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9164 "%s enter: key=%u, state=%d\n", function, segp->s_key,
9165 segp->s_state));
9166
9167 if ((segp->s_state == RSM_STATE_NEW) ||
9168 (segp->s_state == RSM_STATE_BIND) ||
9169 (segp->s_state == RSM_STATE_EXPORT)) {
9170 rsmseglock_release(segp);
9171 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9172 function, segp->s_state));
9173 return;
9174 }
9175
9176 if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9177 segp->s_state = RSM_STATE_NEW;
9178 cv_broadcast(&segp->s_cv);
9179 rsmseglock_release(segp);
9180 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9181 function, segp->s_state));
9182 return;
9183 }
9184
9185 if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9186 /* bind the segment */
9187 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9188 segp->s_len, segp->s_proc);
9189 if (ret == RSM_SUCCESS) { /* bind successful */
9190 segp->s_state = RSM_STATE_BIND;
9191 } else { /* bind failed - resource unavailable */
9192 segp->s_state = RSM_STATE_NEW;
9193 }
9194 cv_broadcast(&segp->s_cv);
9195 rsmseglock_release(segp);
9196 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9197 "%s done: bind_qscd bind = %d\n", function, ret));
9198 return;
9199 }
9200
9201 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9202 /* wait for the segment to move to EXPORT_QUIESCED state */
9203 cv_wait(&segp->s_cv, &segp->s_lock);
9204 }
9205
9206 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9207 /* bind the segment */
9208 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9209 segp->s_len, segp->s_proc);
9210
9211 if (ret != RSM_SUCCESS) {
9212 /* bind failed - resource unavailable */
9213 acl_len = segp->s_acl_len;
9214 acl = segp->s_acl;
9215 rsmpi_acl = segp->s_acl_in;
9216 segp->s_acl_len = 0;
9217 segp->s_acl = NULL;
9218 segp->s_acl_in = NULL;
9219 rsmseglock_release(segp);
9220
9221 rsmexport_rm(segp);
9222 rsmacl_free(acl, acl_len);
9223 rsmpiacl_free(rsmpi_acl, acl_len);
9224
9225 rsmseglock_acquire(segp);
9226 segp->s_state = RSM_STATE_NEW;
9227 cv_broadcast(&segp->s_cv);
9228 rsmseglock_release(segp);
9229 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9230 "%s done: exp_qscd bind failed = %d\n",
9231 function, ret));
9232 return;
9233 }
9234 /*
9235 * publish the segment
9236 * if successful
9237 * segp->s_state = RSM_STATE_EXPORT;
9238 * else failed
9239 * segp->s_state = RSM_STATE_BIND;
9240 */
9241
9242 /* check whether it is a local_memory_handle */
9243 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9244 if ((segp->s_acl[0].ae_node == my_nodeid) &&
9245 (segp->s_acl[0].ae_permission == 0)) {
9246 segp->s_state = RSM_STATE_EXPORT;
9247 cv_broadcast(&segp->s_cv);
9248 rsmseglock_release(segp);
9249 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9250 "%s done:exp_qscd\n", function));
9251 return;
9252 }
9253 }
9254 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9255 sdev, 0, NULL, DDI_UMEM_SLEEP);
9256 ASSERT(xbuf != NULL);
9257
9258 mem.ms_type = RSM_MEM_BUF;
9259 mem.ms_bp = xbuf;
9260
9261 adapter = segp->s_adapter;
9262
9263 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9264 create_flags = RSM_ALLOW_UNBIND_REBIND;
9265 }
9266
9267 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9268 callback_flag = RSM_RESOURCE_DONTWAIT;
9269 } else {
9270 callback_flag = RSM_RESOURCE_SLEEP;
9271 }
9272
9273 ret = adapter->rsmpi_ops->rsm_seg_create(
9274 adapter->rsmpi_handle, &segp->s_handle.out,
9275 segp->s_len, create_flags, &mem,
9276 callback_flag, NULL);
9277
9278 if (ret != RSM_SUCCESS) {
9279 acl_len = segp->s_acl_len;
9280 acl = segp->s_acl;
9281 rsmpi_acl = segp->s_acl_in;
9282 segp->s_acl_len = 0;
9283 segp->s_acl = NULL;
9284 segp->s_acl_in = NULL;
9285 rsmseglock_release(segp);
9286
9287 rsmexport_rm(segp);
9288 rsmacl_free(acl, acl_len);
9289 rsmpiacl_free(rsmpi_acl, acl_len);
9290
9291 rsmseglock_acquire(segp);
9292 segp->s_state = RSM_STATE_BIND;
9293 cv_broadcast(&segp->s_cv);
9294 rsmseglock_release(segp);
9295 DBG_PRINTF((category, RSM_ERR,
9296 "%s done: exp_qscd create failed = %d\n",
9297 function, ret));
9298 return;
9299 }
9300
9301 ret = adapter->rsmpi_ops->rsm_publish(
9302 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9303 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9304
9305 if (ret != RSM_SUCCESS) {
9306 acl_len = segp->s_acl_len;
9307 acl = segp->s_acl;
9308 rsmpi_acl = segp->s_acl_in;
9309 segp->s_acl_len = 0;
9310 segp->s_acl = NULL;
9311 segp->s_acl_in = NULL;
9312 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9313 rsmseglock_release(segp);
9314
9315 rsmexport_rm(segp);
9316 rsmacl_free(acl, acl_len);
9317 rsmpiacl_free(rsmpi_acl, acl_len);
9318
9319 rsmseglock_acquire(segp);
9320 segp->s_state = RSM_STATE_BIND;
9321 cv_broadcast(&segp->s_cv);
9322 rsmseglock_release(segp);
9323 DBG_PRINTF((category, RSM_ERR,
9324 "%s done: exp_qscd publish failed = %d\n",
9325 function, ret));
9326 return;
9327 }
9328
9329 segp->s_state = RSM_STATE_EXPORT;
9330 cv_broadcast(&segp->s_cv);
9331 rsmseglock_release(segp);
9332 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9333 function));
9334 return;
9335 }
9336
9337 rsmseglock_release(segp);
9338
9339 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9340 }
9341
9342 static void
9343 rsm_quiesce_imp_seg(rsmresource_t *resp)
9344 {
9345 rsmseg_t *segp = (rsmseg_t *)resp;
9346 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9347 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9348
9349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9350 "%s enter: key=%u\n", function, segp->s_key));
9351
9352 rsmseglock_acquire(segp);
9353 segp->s_flags |= RSM_DR_INPROGRESS;
9354
9355 while (segp->s_rdmacnt != 0) {
9356 /* wait for the RDMA to complete */
9357 cv_wait(&segp->s_cv, &segp->s_lock);
9358 }
9359
9360 rsmseglock_release(segp);
9361
9362 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9363
9364 }
9365
9366 static void
9367 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9368 {
9369 rsmseg_t *segp = (rsmseg_t *)resp;
9370 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9371 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9372
9373 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9374 "%s enter: key=%u\n", function, segp->s_key));
9375
9376 rsmseglock_acquire(segp);
9377
9378 segp->s_flags &= ~RSM_DR_INPROGRESS;
9379 /* wake up any waiting putv/getv ops */
9380 cv_broadcast(&segp->s_cv);
9381
9382 rsmseglock_release(segp);
9383
9384 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9385
9386
9387 }
9388
9389 static void
9390 rsm_process_exp_seg(rsmresource_t *resp, int event)
9391 {
9392 if (event == RSM_DR_QUIESCE)
9393 rsm_quiesce_exp_seg(resp);
9394 else /* UNQUIESCE */
9395 rsm_unquiesce_exp_seg(resp);
9396 }
9397
9398 static void
9399 rsm_process_imp_seg(rsmresource_t *resp, int event)
9400 {
9401 if (event == RSM_DR_QUIESCE)
9402 rsm_quiesce_imp_seg(resp);
9403 else /* UNQUIESCE */
9404 rsm_unquiesce_imp_seg(resp);
9405 }
9406
9407 static void
9408 rsm_dr_process_local_segments(int event)
9409 {
9410
9411 int i, j;
9412 rsmresource_blk_t *blk;
9413 rsmresource_t *p;
9414 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9415
9416 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9417 "rsm_dr_process_local_segments enter\n"));
9418
9419 /* iterate through the resource structure */
9420
9421 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9422
9423 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9424 blk = rsm_resource.rsmrc_root[i];
9425 if (blk != NULL) {
9426 for (j = 0; j < RSMRC_BLKSZ; j++) {
9427 p = blk->rsmrcblk_blks[j];
9428 if ((p != NULL) && (p != RSMRC_RESERVED)) {
9429 /* valid resource */
9430 if (p->rsmrc_type ==
9431 RSM_RESOURCE_EXPORT_SEGMENT)
9432 rsm_process_exp_seg(p, event);
9433 else if (p->rsmrc_type ==
9434 RSM_RESOURCE_IMPORT_SEGMENT)
9435 rsm_process_imp_seg(p, event);
9436 }
9437 }
9438 }
9439 }
9440
9441 rw_exit(&rsm_resource.rsmrc_lock);
9442
9443 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9444 "rsm_dr_process_local_segments done\n"));
9445 }
9446
9447 /* *************** DR callback functions ************ */
9448 static void
9449 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9450 {
9451 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9452 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9453 "rsm_dr_callback_post_add is a no-op\n"));
9454 /* Noop */
9455 }
9456
9457 static int
9458 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9459 {
9460 int recheck_state = 0;
9461 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9462
9463 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9464 "rsm_dr_callback_pre_del enter\n"));
9465
9466 mutex_enter(&rsm_drv_data.drv_lock);
9467
9468 do {
9469 recheck_state = 0;
9470 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9471 "rsm_dr_callback_pre_del:state=%d\n",
9472 rsm_drv_data.drv_state));
9473
9474 switch (rsm_drv_data.drv_state) {
9475 case RSM_DRV_NEW:
9476 /*
9477 * The state should usually never be RSM_DRV_NEW
9478 * since in this state the callbacks have not yet
9479 * been registered. So, ASSERT.
9480 */
9481 ASSERT(0);
9482 return (0);
9483 case RSM_DRV_REG_PROCESSING:
9484 /*
9485 * The driver is in the process of registering
9486 * with the DR framework. So, wait till the
9487 * registration process is complete.
9488 */
9489 recheck_state = 1;
9490 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9491 break;
9492 case RSM_DRV_UNREG_PROCESSING:
9493 /*
9494 * If the state is RSM_DRV_UNREG_PROCESSING, the
9495 * module is in the process of detaching and
9496 * unregistering the callbacks from the DR
9497 * framework. So, simply return.
9498 */
9499 mutex_exit(&rsm_drv_data.drv_lock);
9500 DBG_PRINTF((category, RSM_DEBUG,
9501 "rsm_dr_callback_pre_del:"
9502 "pre-del on NEW/UNREG\n"));
9503 return (0);
9504 case RSM_DRV_OK:
9505 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9506 break;
9507 case RSM_DRV_PREDEL_STARTED:
9508 /* FALLTHRU */
9509 case RSM_DRV_PREDEL_COMPLETED:
9510 /* FALLTHRU */
9511 case RSM_DRV_POSTDEL_IN_PROGRESS:
9512 recheck_state = 1;
9513 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9514 break;
9515 case RSM_DRV_DR_IN_PROGRESS:
9516 rsm_drv_data.drv_memdel_cnt++;
9517 mutex_exit(&rsm_drv_data.drv_lock);
9518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9519 "rsm_dr_callback_pre_del done\n"));
9520 return (0);
9521 /* break; */
9522 default:
9523 ASSERT(0);
9524 break;
9525 }
9526
9527 } while (recheck_state);
9528
9529 rsm_drv_data.drv_memdel_cnt++;
9530
9531 mutex_exit(&rsm_drv_data.drv_lock);
9532
9533 /* Do all the quiescing stuff here */
9534 DBG_PRINTF((category, RSM_DEBUG,
9535 "rsm_dr_callback_pre_del: quiesce things now\n"));
9536
9537 rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9538
9539 /*
9540 * now that all local segments have been quiesced lets inform
9541 * the importers
9542 */
9543 rsm_send_suspend();
9544
9545 /*
9546 * In response to the suspend message the remote node(s) will process
9547 * the segments and send a suspend_complete message. Till all
9548 * the nodes send the suspend_complete message we wait in the
9549 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9550 * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9551 */
9552 mutex_enter(&rsm_drv_data.drv_lock);
9553
9554 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9555 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9556 }
9557
9558 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9559
9560 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9561 cv_broadcast(&rsm_drv_data.drv_cv);
9562
9563 mutex_exit(&rsm_drv_data.drv_lock);
9564
9565 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9566 "rsm_dr_callback_pre_del done\n"));
9567
9568 return (0);
9569 }
9570
9571 static void
9572 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9573 {
9574 int recheck_state = 0;
9575 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9576
9577 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9578 "rsm_dr_callback_post_del enter\n"));
9579
9580 mutex_enter(&rsm_drv_data.drv_lock);
9581
9582 do {
9583 recheck_state = 0;
9584 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9585 "rsm_dr_callback_post_del:state=%d\n",
9586 rsm_drv_data.drv_state));
9587
9588 switch (rsm_drv_data.drv_state) {
9589 case RSM_DRV_NEW:
9590 /*
9591 * The driver state cannot not be RSM_DRV_NEW
9592 * since in this state the callbacks have not
9593 * yet been registered.
9594 */
9595 ASSERT(0);
9596 return;
9597 case RSM_DRV_REG_PROCESSING:
9598 /*
9599 * The driver is in the process of registering with
9600 * the DR framework. Wait till the registration is
9601 * complete.
9602 */
9603 recheck_state = 1;
9604 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9605 break;
9606 case RSM_DRV_UNREG_PROCESSING:
9607 /*
9608 * RSM_DRV_UNREG_PROCESSING state means the module
9609 * is detaching and unregistering the callbacks
9610 * from the DR framework. So simply return.
9611 */
9612 /* FALLTHRU */
9613 case RSM_DRV_OK:
9614 /*
9615 * RSM_DRV_OK means we missed the pre-del
9616 * corresponding to this post-del coz we had not
9617 * registered yet, so simply return.
9618 */
9619 mutex_exit(&rsm_drv_data.drv_lock);
9620 DBG_PRINTF((category, RSM_DEBUG,
9621 "rsm_dr_callback_post_del:"
9622 "post-del on OK/UNREG\n"));
9623 return;
9624 /* break; */
9625 case RSM_DRV_PREDEL_STARTED:
9626 /* FALLTHRU */
9627 case RSM_DRV_PREDEL_COMPLETED:
9628 /* FALLTHRU */
9629 case RSM_DRV_POSTDEL_IN_PROGRESS:
9630 recheck_state = 1;
9631 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9632 break;
9633 case RSM_DRV_DR_IN_PROGRESS:
9634 rsm_drv_data.drv_memdel_cnt--;
9635 if (rsm_drv_data.drv_memdel_cnt > 0) {
9636 mutex_exit(&rsm_drv_data.drv_lock);
9637 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9638 "rsm_dr_callback_post_del done:\n"));
9639 return;
9640 }
9641 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9642 break;
9643 default:
9644 ASSERT(0);
9645 return;
9646 /* break; */
9647 }
9648 } while (recheck_state);
9649
9650 mutex_exit(&rsm_drv_data.drv_lock);
9651
9652 /* Do all the unquiescing stuff here */
9653 DBG_PRINTF((category, RSM_DEBUG,
9654 "rsm_dr_callback_post_del: unquiesce things now\n"));
9655
9656 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9657
9658 /*
9659 * now that all local segments have been unquiesced lets inform
9660 * the importers
9661 */
9662 rsm_send_resume();
9663
9664 mutex_enter(&rsm_drv_data.drv_lock);
9665
9666 rsm_drv_data.drv_state = RSM_DRV_OK;
9667
9668 cv_broadcast(&rsm_drv_data.drv_cv);
9669
9670 mutex_exit(&rsm_drv_data.drv_lock);
9671
9672 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9673 "rsm_dr_callback_post_del done\n"));
9674
9675 return;
9676
9677 }