1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2012 Milan Jurik. All rights reserved. 25 */ 26 27 28 /* 29 * Overview of the RSM Kernel Agent: 30 * --------------------------------- 31 * 32 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM 33 * kernel agent is a pseudo device driver which makes use of the RSMPI 34 * interface on behalf of the RSMAPI user library. 35 * 36 * The kernel agent functionality can be categorized into the following 37 * components: 38 * 1. Driver Infrastructure 39 * 2. Export/Import Segment Management 40 * 3. Internal resource allocation/deallocation 41 * 42 * The driver infrastructure includes the basic module loading entry points 43 * like _init, _info, _fini to load, unload and report information about 44 * the driver module. The driver infrastructure also includes the 45 * autoconfiguration entry points namely, attach, detach and getinfo for 46 * the device autoconfiguration. 47 * 48 * The kernel agent is a pseudo character device driver and exports 49 * a cb_ops structure which defines the driver entry points for character 50 * device access. This includes the open and close entry points. The 51 * other entry points provided include ioctl, devmap and segmap and chpoll. 52 * read and write entry points are not used since the device is memory 53 * mapped. Also ddi_prop_op is used for the prop_op entry point. 54 * 55 * The ioctl entry point supports a number of commands, which are used by 56 * the RSMAPI library in order to export and import segments. These 57 * commands include commands for binding and rebinding the physical pages 58 * allocated to the virtual address range, publishing the export segment, 59 * unpublishing and republishing an export segment, creating an 60 * import segment and a virtual connection from this import segment to 61 * an export segment, performing scatter-gather data transfer, barrier 62 * operations. 63 * 64 * 65 * Export and Import segments: 66 * --------------------------- 67 * 68 * In order to create an RSM export segment a process allocates a range in its 69 * virtual address space for the segment using standard Solaris interfaces. 70 * The process then calls RSMAPI, which in turn makes an ioctl call to the 71 * RSM kernel agent for an allocation of physical memory pages and for 72 * creation of the export segment by binding these pages to the virtual 73 * address range. These pages are locked in memory so that remote accesses 74 * are always applied to the correct page. Then the RSM segment is published, 75 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id 76 * is assigned to it. 77 * 78 * In order to import a published RSM segment, RSMAPI creates an import 79 * segment and forms a virtual connection across the interconnect to the 80 * export segment, via an ioctl into the kernel agent with the connect 81 * command. The import segment setup is completed by mapping the 82 * local device memory into the importers virtual address space. The 83 * mapping of the import segment is handled by the segmap/devmap 84 * infrastructure described as follows. 85 * 86 * Segmap and Devmap interfaces: 87 * 88 * The RSM kernel agent allows device memory to be directly accessed by user 89 * threads via memory mapping. In order to do so, the RSM kernel agent 90 * supports the devmap and segmap entry points. 91 * 92 * The segmap entry point(rsm_segmap) is responsible for setting up a memory 93 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is 94 * responsible for exporting the device memory to the user applications. 95 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the 96 * control is transfered to the devmap_setup call which calls rsm_devmap. 97 * 98 * rsm_devmap validates the user mapping to the device or kernel memory 99 * and passes the information to the system for setting up the mapping. The 100 * actual setting up of the mapping is done by devmap_devmem_setup(for 101 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are 102 * registered for device context management via the devmap_devmem_setup 103 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap, 104 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping 105 * is created, a mapping is freed, a mapping is accessed or an existing 106 * mapping is duplicated respectively. These callbacks allow the RSM kernel 107 * agent to maintain state information associated with the mappings. 108 * The state information is mainly in the form of a cookie list for the import 109 * segment for which mapping has been done. 110 * 111 * Forced disconnect of import segments: 112 * 113 * When an exported segment is unpublished, the exporter sends a forced 114 * disconnect message to all its importers. The importer segments are 115 * unloaded and disconnected. This involves unloading the original 116 * mappings and remapping to a preallocated kernel trash page. This is 117 * done by devmap_umem_remap. The trash/dummy page is a kernel page, 118 * preallocated by the kernel agent during attach using ddi_umem_alloc with 119 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application 120 * due to unloading of the original mappings. 121 * 122 * Additionally every segment has a mapping generation number associated 123 * with it. This is an entry in the barrier generation page, created 124 * during attach time. This mapping generation number for the import 125 * segments is incremented on a force disconnect to notify the application 126 * of the force disconnect. On this notification, the application needs 127 * to reconnect the segment to establish a new legitimate mapping. 128 * 129 * 130 * Locks used in the kernel agent: 131 * ------------------------------- 132 * 133 * The kernel agent uses a variety of mutexes and condition variables for 134 * mutual exclusion of the shared data structures and for synchronization 135 * between the various threads. Some of the locks are described as follows. 136 * 137 * Each resource structure, which represents either an export/import segment 138 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock. 139 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the 140 * rsmseglock_acquire and rsmseglock_release macros. An additional 141 * lock called the rsmsi_lock is used for the shared import data structure 142 * that is relevant for resources representing import segments. There is 143 * also a condition variable associated with the resource called s_cv. This 144 * is used to wait for events like the segment state change etc. 145 * 146 * The resource structures are allocated from a pool of resource structures, 147 * called rsm_resource. This pool is protected via a reader-writer lock, 148 * called rsmrc_lock. 149 * 150 * There are two separate hash tables, one for the export segments and 151 * one for the import segments. The export segments are inserted into the 152 * export segment hash table only after they have been published and the 153 * import segments are inserted in the import segments list only after they 154 * have successfully connected to an exported segment. These tables are 155 * protected via reader-writer locks. 156 * 157 * Debug Support in the kernel agent: 158 * ---------------------------------- 159 * 160 * Debugging support in the kernel agent is provided by the following 161 * macros. 162 * 163 * DBG_PRINTF((category, level, message)) is a macro which logs a debug 164 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer 165 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based 166 * on the definition of the category and level. All messages that belong to 167 * the specified category(rsmdbg_category) and are of an equal or greater 168 * severity than the specified level(rsmdbg_level) are logged. The message 169 * is a string which uses the same formatting rules as the strings used in 170 * printf. 171 * 172 * The category defines which component of the kernel agent has logged this 173 * message. There are a number of categories that have been defined such as 174 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro, 175 * DBG_ADDCATEGORY is used to add in another category to the currently 176 * specified category value so that the component using this new category 177 * can also effectively log debug messages. Thus, the category of a specific 178 * message is some combination of the available categories and we can define 179 * sub-categories if we want a finer level of granularity. 180 * 181 * The level defines the severity of the message. Different level values are 182 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being 183 * the least severe(debug level is 0). 184 * 185 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug 186 * variable or a string respectively. 187 * 188 * 189 * NOTES: 190 * 191 * Special Fork and Exec Handling: 192 * ------------------------------- 193 * 194 * The backing physical pages of an exported segment are always locked down. 195 * Thus, there are two cases in which a process having exported segments 196 * will cause a cpu to hang: (1) the process invokes exec; (2) a process 197 * forks and invokes exit before the duped file descriptors for the export 198 * segments are closed in the child process. The hang is caused because the 199 * address space release algorithm in Solaris VM subsystem is based on a 200 * non-blocking loop which does not terminate while segments are locked 201 * down. In addition to this, Solaris VM subsystem lacks a callback 202 * mechanism to the rsm kernel agent to allow unlocking these export 203 * segment pages. 204 * 205 * In order to circumvent this problem, the kernel agent does the following. 206 * The Solaris VM subsystem keeps memory segments in increasing order of 207 * virtual addressses. Thus a special page(special_exit_offset) is allocated 208 * by the kernel agent and is mmapped into the heap area of the process address 209 * space(the mmap is done by the RSMAPI library). During the mmap processing 210 * of this special page by the devmap infrastructure, a callback(the same 211 * devmap context management callbacks discussed above) is registered for an 212 * unmap. 213 * 214 * As discussed above, this page is processed by the Solaris address space 215 * release code before any of the exported segments pages(which are allocated 216 * from high memory). It is during this processing that the unmap callback gets 217 * called and this callback is responsible for force destroying the exported 218 * segments and thus eliminating the problem of locked pages. 219 * 220 * Flow-control: 221 * ------------ 222 * 223 * A credit based flow control algorithm is used for messages whose 224 * processing cannot be done in the interrupt context because it might 225 * involve invoking rsmpi calls, or might take a long time to complete 226 * or might need to allocate resources. The algorithm operates on a per 227 * path basis. To send a message the pathend needs to have a credit and 228 * it consumes one for every message that is flow controlled. On the 229 * receiving pathend the message is put on a msgbuf_queue and a task is 230 * dispatched on the worker thread - recv_taskq where it is processed. 231 * After processing the message, the receiving pathend dequeues the message, 232 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends 233 * credits to the sender pathend. 234 * 235 * RSM_DRTEST: 236 * ----------- 237 * 238 * This is used to enable the DR testing using a test driver on test 239 * platforms which do not supported DR. 240 * 241 */ 242 243 #include <sys/types.h> 244 #include <sys/param.h> 245 #include <sys/user.h> 246 #include <sys/buf.h> 247 #include <sys/systm.h> 248 #include <sys/cred.h> 249 #include <sys/vm.h> 250 #include <sys/uio.h> 251 #include <vm/seg.h> 252 #include <vm/page.h> 253 #include <sys/stat.h> 254 255 #include <sys/time.h> 256 #include <sys/errno.h> 257 258 #include <sys/file.h> 259 #include <sys/uio.h> 260 #include <sys/proc.h> 261 #include <sys/mman.h> 262 #include <sys/open.h> 263 #include <sys/atomic.h> 264 #include <sys/mem_config.h> 265 266 267 #include <sys/ddi.h> 268 #include <sys/devops.h> 269 #include <sys/ddidevmap.h> 270 #include <sys/sunddi.h> 271 #include <sys/esunddi.h> 272 #include <sys/ddi_impldefs.h> 273 274 #include <sys/kmem.h> 275 #include <sys/conf.h> 276 #include <sys/devops.h> 277 #include <sys/ddi_impldefs.h> 278 279 #include <sys/modctl.h> 280 281 #include <sys/policy.h> 282 #include <sys/types.h> 283 #include <sys/conf.h> 284 #include <sys/param.h> 285 286 #include <sys/taskq.h> 287 288 #include <sys/rsm/rsm_common.h> 289 #include <sys/rsm/rsmapi_common.h> 290 #include <sys/rsm/rsm.h> 291 #include <rsm_in.h> 292 #include <sys/rsm/rsmka_path_int.h> 293 #include <sys/rsm/rsmpi.h> 294 295 #include <sys/modctl.h> 296 #include <sys/debug.h> 297 298 #include <sys/tuneable.h> 299 300 #ifdef RSM_DRTEST 301 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec, 302 void *arg); 303 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, 304 void *arg); 305 #endif 306 307 extern void dbg_printf(int category, int level, char *fmt, ...); 308 extern void rsmka_pathmanager_init(); 309 extern void rsmka_pathmanager_cleanup(); 310 extern void rele_sendq_token(sendq_token_t *); 311 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t); 312 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t); 313 extern int rsmka_topology_ioctl(caddr_t, int, int); 314 315 extern pri_t maxclsyspri; 316 extern work_queue_t work_queue; 317 extern kmutex_t ipc_info_lock; 318 extern kmutex_t ipc_info_cvlock; 319 extern kcondvar_t ipc_info_cv; 320 extern kmutex_t path_hold_cvlock; 321 extern kcondvar_t path_hold_cv; 322 323 extern kmutex_t rsmka_buf_lock; 324 325 extern path_t *rsm_find_path(char *, int, rsm_addr_t); 326 extern adapter_t *rsmka_lookup_adapter(char *, int); 327 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *); 328 extern boolean_t rsmka_do_path_active(path_t *, int); 329 extern boolean_t rsmka_check_node_alive(rsm_node_id_t); 330 extern void rsmka_release_adapter(adapter_t *); 331 extern void rsmka_enqueue_msgbuf(path_t *path, void *data); 332 extern void rsmka_dequeue_msgbuf(path_t *path); 333 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path); 334 /* lint -w2 */ 335 336 static int rsm_open(dev_t *, int, int, cred_t *); 337 static int rsm_close(dev_t, int, int, cred_t *); 338 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 339 cred_t *credp, int *rvalp); 340 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *, 341 uint_t); 342 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t, 343 uint_t, uint_t, cred_t *); 344 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 345 struct pollhead **phpp); 346 347 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 348 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t); 349 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t); 350 351 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *); 352 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t); 353 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t); 354 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int, 355 rsm_permission_t); 356 static void rsm_export_force_destroy(ddi_umem_cookie_t *); 357 static void rsmacl_free(rsmapi_access_entry_t *, int); 358 static void rsmpiacl_free(rsm_access_entry_t *, int); 359 360 static int rsm_inc_pgcnt(pgcnt_t); 361 static void rsm_dec_pgcnt(pgcnt_t); 362 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop); 363 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *, 364 size_t *); 365 static void exporter_quiesce(); 366 static void rsmseg_suspend(rsmseg_t *, int *); 367 static void rsmsegshare_suspend(rsmseg_t *); 368 static int rsmseg_resume(rsmseg_t *, void **); 369 static int rsmsegshare_resume(rsmseg_t *); 370 371 static struct cb_ops rsm_cb_ops = { 372 rsm_open, /* open */ 373 rsm_close, /* close */ 374 nodev, /* strategy */ 375 nodev, /* print */ 376 nodev, /* dump */ 377 nodev, /* read */ 378 nodev, /* write */ 379 rsm_ioctl, /* ioctl */ 380 rsm_devmap, /* devmap */ 381 NULL, /* mmap */ 382 rsm_segmap, /* segmap */ 383 rsm_chpoll, /* poll */ 384 ddi_prop_op, /* cb_prop_op */ 385 0, /* streamtab */ 386 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */ 387 0, 388 0, 389 0 390 }; 391 392 static struct dev_ops rsm_ops = { 393 DEVO_REV, /* devo_rev, */ 394 0, /* refcnt */ 395 rsm_info, /* get_dev_info */ 396 nulldev, /* identify */ 397 nulldev, /* probe */ 398 rsm_attach, /* attach */ 399 rsm_detach, /* detach */ 400 nodev, /* reset */ 401 &rsm_cb_ops, /* driver operations */ 402 (struct bus_ops *)0, /* bus operations */ 403 0, 404 ddi_quiesce_not_needed, /* quiesce */ 405 }; 406 407 /* 408 * Module linkage information for the kernel. 409 */ 410 411 static struct modldrv modldrv = { 412 &mod_driverops, /* Type of module. This one is a pseudo driver */ 413 "Remote Shared Memory Driver", 414 &rsm_ops, /* driver ops */ 415 }; 416 417 static struct modlinkage modlinkage = { 418 MODREV_1, 419 { (void *)&modldrv, NULL } 420 }; 421 422 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta); 423 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta); 424 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled); 425 426 static kphysm_setup_vector_t rsm_dr_callback_vec = { 427 KPHYSM_SETUP_VECTOR_VERSION, 428 rsm_dr_callback_post_add, 429 rsm_dr_callback_pre_del, 430 rsm_dr_callback_post_del 431 }; 432 433 /* This flag can be changed to 0 to help with PIT testing */ 434 int rsmka_modunloadok = 1; 435 int no_reply_cnt = 0; 436 437 uint64_t rsm_ctrlmsg_errcnt = 0; 438 uint64_t rsm_ipcsend_errcnt = 0; 439 440 #define MAX_NODES 64 441 442 static struct rsm_driver_data rsm_drv_data; 443 static struct rsmresource_table rsm_resource; 444 445 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t); 446 static void rsmresource_destroy(void); 447 static int rsmresource_alloc(minor_t *); 448 static rsmresource_t *rsmresource_free(minor_t rnum); 449 static int rsm_closeconnection(rsmseg_t *seg, void **cookie); 450 static int rsm_unpublish(rsmseg_t *seg, int mode); 451 static int rsm_unbind(rsmseg_t *seg); 452 static uint_t rsmhash(rsm_memseg_id_t key); 453 static void rsmhash_alloc(rsmhash_table_t *rhash, int size); 454 static void rsmhash_free(rsmhash_table_t *rhash, int size); 455 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval); 456 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval); 457 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, 458 void *cookie); 459 int rsm_disconnect(rsmseg_t *seg); 460 void rsmseg_unload(rsmseg_t *); 461 void rsm_suspend_complete(rsm_node_id_t src_node, int flag); 462 463 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 464 rsm_intr_q_op_t opcode, rsm_addr_t src, 465 void *data, size_t size, rsm_intr_hand_arg_t arg); 466 467 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t); 468 469 rsm_node_id_t my_nodeid; 470 471 /* cookie, va, offsets and length for the barrier */ 472 static rsm_gnum_t *bar_va; 473 static ddi_umem_cookie_t bar_cookie; 474 static off_t barrier_offset; 475 static size_t barrier_size; 476 static int max_segs; 477 478 /* cookie for the trash memory */ 479 static ddi_umem_cookie_t remap_cookie; 480 481 static rsm_memseg_id_t rsm_nextavail_segmentid; 482 483 extern taskq_t *work_taskq; 484 extern char *taskq_name; 485 486 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */ 487 488 static rsmhash_table_t rsm_export_segs; /* list of exported segs */ 489 rsmhash_table_t rsm_import_segs; /* list of imported segs */ 490 static rsmhash_table_t rsm_event_queues; /* list of event queues */ 491 492 static rsm_ipc_t rsm_ipc; /* ipc info */ 493 494 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */ 495 static list_head_t rsm_suspend_list; 496 497 /* list of descriptors for remote importers */ 498 static importers_table_t importer_list; 499 500 kmutex_t rsm_suspend_cvlock; 501 kcondvar_t rsm_suspend_cv; 502 503 static kmutex_t rsm_lock; 504 505 adapter_t loopback_adapter; 506 rsm_controller_attr_t loopback_attr; 507 508 int rsmipc_send_controlmsg(path_t *path, int msgtype); 509 510 void rsmka_init_loopback(); 511 512 int rsmka_null_seg_create( 513 rsm_controller_handle_t, 514 rsm_memseg_export_handle_t *, 515 size_t, 516 uint_t, 517 rsm_memory_local_t *, 518 rsm_resource_callback_t, 519 rsm_resource_callback_arg_t); 520 521 int rsmka_null_seg_destroy( 522 rsm_memseg_export_handle_t); 523 524 int rsmka_null_bind( 525 rsm_memseg_export_handle_t, 526 off_t, 527 rsm_memory_local_t *, 528 rsm_resource_callback_t, 529 rsm_resource_callback_arg_t); 530 531 int rsmka_null_unbind( 532 rsm_memseg_export_handle_t, 533 off_t, 534 size_t); 535 536 int rsmka_null_rebind( 537 rsm_memseg_export_handle_t, 538 off_t, 539 rsm_memory_local_t *, 540 rsm_resource_callback_t, 541 rsm_resource_callback_arg_t); 542 543 int rsmka_null_publish( 544 rsm_memseg_export_handle_t, 545 rsm_access_entry_t [], 546 uint_t, 547 rsm_memseg_id_t, 548 rsm_resource_callback_t, 549 rsm_resource_callback_arg_t); 550 551 552 int rsmka_null_republish( 553 rsm_memseg_export_handle_t, 554 rsm_access_entry_t [], 555 uint_t, 556 rsm_resource_callback_t, 557 rsm_resource_callback_arg_t); 558 559 int rsmka_null_unpublish( 560 rsm_memseg_export_handle_t); 561 562 rsm_ops_t null_rsmpi_ops; 563 564 /* 565 * data and locks to keep track of total amount of exported memory 566 */ 567 static pgcnt_t rsm_pgcnt; 568 static pgcnt_t rsm_pgcnt_max; /* max allowed */ 569 static kmutex_t rsm_pgcnt_lock; 570 571 static int rsm_enable_dr; 572 573 static char loopback_str[] = "loopback"; 574 575 int rsm_hash_size; 576 577 /* 578 * The locking model is as follows: 579 * 580 * Local operations: 581 * find resource - grab reader lock on resouce list 582 * insert rc - grab writer lock 583 * delete rc - grab writer lock and resource mutex 584 * read/write - no lock 585 * 586 * Remote invocations: 587 * find resource - grab read lock and resource mutex 588 * 589 * State: 590 * resource state - grab resource mutex 591 */ 592 593 int 594 _init(void) 595 { 596 int e; 597 598 e = mod_install(&modlinkage); 599 if (e != 0) { 600 return (e); 601 } 602 603 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL); 604 605 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL); 606 607 608 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL); 609 610 rsm_hash_size = RSM_HASHSZ; 611 612 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 613 614 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 615 616 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL); 617 618 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL); 619 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0); 620 621 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL); 622 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0); 623 624 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL); 625 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0); 626 627 rsm_ipc.count = RSMIPC_SZ; 628 rsm_ipc.wanted = 0; 629 rsm_ipc.sequence = 0; 630 631 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL); 632 633 for (e = 0; e < RSMIPC_SZ; e++) { 634 rsmipc_slot_t *slot = &rsm_ipc.slots[e]; 635 636 RSMIPC_SET(slot, RSMIPC_FREE); 637 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL); 638 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0); 639 } 640 641 /* 642 * Initialize the suspend message list 643 */ 644 rsm_suspend_list.list_head = NULL; 645 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL); 646 647 /* 648 * It is assumed here that configuration data is available 649 * during system boot since _init may be called at that time. 650 */ 651 652 rsmka_pathmanager_init(); 653 654 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 655 "rsm: _init done\n")); 656 657 return (DDI_SUCCESS); 658 659 } 660 661 int 662 _info(struct modinfo *modinfop) 663 { 664 665 return (mod_info(&modlinkage, modinfop)); 666 } 667 668 int 669 _fini(void) 670 { 671 int e; 672 673 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 674 "rsm: _fini enter\n")); 675 676 /* 677 * The rsmka_modunloadok flag is simply used to help with 678 * the PIT testing. Make this flag 0 to disallow modunload. 679 */ 680 if (rsmka_modunloadok == 0) 681 return (EBUSY); 682 683 /* rsm_detach will be called as a result of mod_remove */ 684 e = mod_remove(&modlinkage); 685 if (e) { 686 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR, 687 "Unable to fini RSM %x\n", e)); 688 return (e); 689 } 690 691 rsmka_pathmanager_cleanup(); 692 693 rw_destroy(&rsm_resource.rsmrc_lock); 694 695 rw_destroy(&rsm_export_segs.rsmhash_rw); 696 rw_destroy(&rsm_import_segs.rsmhash_rw); 697 rw_destroy(&rsm_event_queues.rsmhash_rw); 698 699 mutex_destroy(&importer_list.lock); 700 701 mutex_destroy(&rsm_ipc.lock); 702 cv_destroy(&rsm_ipc.cv); 703 704 (void) mutex_destroy(&rsm_suspend_list.list_lock); 705 706 (void) mutex_destroy(&rsm_pgcnt_lock); 707 708 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n")); 709 710 return (DDI_SUCCESS); 711 712 } 713 714 /*ARGSUSED1*/ 715 static int 716 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 717 { 718 minor_t rnum; 719 int percent; 720 int ret; 721 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 722 723 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n")); 724 725 switch (cmd) { 726 case DDI_ATTACH: 727 break; 728 case DDI_RESUME: 729 default: 730 DBG_PRINTF((category, RSM_ERR, 731 "rsm:rsm_attach - cmd not supported\n")); 732 return (DDI_FAILURE); 733 } 734 735 if (rsm_dip != NULL) { 736 DBG_PRINTF((category, RSM_ERR, 737 "rsm:rsm_attach - supports only " 738 "one instance\n")); 739 return (DDI_FAILURE); 740 } 741 742 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 743 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 744 "enable-dynamic-reconfiguration", 1); 745 746 mutex_enter(&rsm_drv_data.drv_lock); 747 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING; 748 mutex_exit(&rsm_drv_data.drv_lock); 749 750 if (rsm_enable_dr) { 751 #ifdef RSM_DRTEST 752 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec, 753 (void *)NULL); 754 #else 755 ret = kphysm_setup_func_register(&rsm_dr_callback_vec, 756 (void *)NULL); 757 #endif 758 if (ret != 0) { 759 mutex_exit(&rsm_drv_data.drv_lock); 760 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic " 761 "reconfiguration setup failed\n"); 762 return (DDI_FAILURE); 763 } 764 } 765 766 mutex_enter(&rsm_drv_data.drv_lock); 767 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING); 768 rsm_drv_data.drv_state = RSM_DRV_OK; 769 cv_broadcast(&rsm_drv_data.drv_cv); 770 mutex_exit(&rsm_drv_data.drv_lock); 771 772 /* 773 * page_list_read_lock(); 774 * xx_setup(); 775 * page_list_read_unlock(); 776 */ 777 778 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 779 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 780 "segment-hashtable-size", RSM_HASHSZ); 781 if (rsm_hash_size == 0) { 782 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 783 "rsm: segment-hashtable-size in rsm.conf " 784 "must be greater than 0, defaulting to 128\n")); 785 rsm_hash_size = RSM_HASHSZ; 786 } 787 788 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n", 789 rsm_hash_size)); 790 791 rsm_pgcnt = 0; 792 793 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 794 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 795 "max-exported-memory", 0); 796 if (percent < 0) { 797 DBG_PRINTF((category, RSM_ERR, 798 "rsm:rsm_attach not enough memory available to " 799 "export, or max-exported-memory set incorrectly.\n")); 800 return (DDI_FAILURE); 801 } 802 /* 0 indicates no fixed upper limit. maxmem is the max */ 803 /* available pageable physical mem */ 804 rsm_pgcnt_max = (percent*maxmem)/100; 805 806 if (rsm_pgcnt_max > 0) { 807 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 808 "rsm: Available physical memory = %lu pages, " 809 "Max exportable memory = %lu pages", 810 maxmem, rsm_pgcnt_max)); 811 } 812 813 /* 814 * Create minor number 815 */ 816 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) { 817 DBG_PRINTF((category, RSM_ERR, 818 "rsm: rsm_attach - Unable to get " 819 "minor number\n")); 820 return (DDI_FAILURE); 821 } 822 823 ASSERT(rnum == RSM_DRIVER_MINOR); 824 825 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR, 826 rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) { 827 DBG_PRINTF((category, RSM_ERR, 828 "rsm: rsm_attach - unable to allocate " 829 "minor #\n")); 830 return (DDI_FAILURE); 831 } 832 833 rsm_dip = devi; 834 /* 835 * Allocate the hashtables 836 */ 837 rsmhash_alloc(&rsm_export_segs, rsm_hash_size); 838 rsmhash_alloc(&rsm_import_segs, rsm_hash_size); 839 840 importer_list.bucket = (importing_token_t **) 841 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP); 842 843 /* 844 * Allocate a resource struct 845 */ 846 { 847 rsmresource_t *p; 848 849 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP); 850 851 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL); 852 853 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR); 854 } 855 856 /* 857 * Based on the rsm.conf property max-segments, determine the maximum 858 * number of segments that can be exported/imported. This is then used 859 * to determine the size for barrier failure pages. 860 */ 861 862 /* First get the max number of segments from the rsm.conf file */ 863 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 864 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 865 "max-segments", 0); 866 if (max_segs == 0) { 867 /* Use default number of segments */ 868 max_segs = RSM_MAX_NUM_SEG; 869 } 870 871 /* 872 * Based on the max number of segments allowed, determine the barrier 873 * page size. add 1 to max_segs since the barrier page itself uses 874 * a slot 875 */ 876 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t), 877 PAGESIZE); 878 879 /* 880 * allocation of the barrier failure page 881 */ 882 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size, 883 DDI_UMEM_SLEEP, &bar_cookie); 884 885 /* 886 * Set the barrier_offset 887 */ 888 barrier_offset = 0; 889 890 /* 891 * Allocate a trash memory and get a cookie for it. This will be used 892 * when remapping segments during force disconnects. Allocate the 893 * trash memory with a large size which is page aligned. 894 */ 895 (void) ddi_umem_alloc((size_t)TRASHSIZE, 896 DDI_UMEM_TRASH, &remap_cookie); 897 898 /* initialize user segment id allocation variable */ 899 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE; 900 901 /* 902 * initialize the null_rsmpi_ops vector and the loopback adapter 903 */ 904 rsmka_init_loopback(); 905 906 907 ddi_report_dev(devi); 908 909 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n")); 910 911 return (DDI_SUCCESS); 912 } 913 914 /* 915 * The call to mod_remove in the _fine routine will cause the system 916 * to call rsm_detach 917 */ 918 /*ARGSUSED*/ 919 static int 920 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 921 { 922 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 923 924 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n")); 925 926 switch (cmd) { 927 case DDI_DETACH: 928 break; 929 default: 930 DBG_PRINTF((category, RSM_ERR, 931 "rsm:rsm_detach - cmd %x not supported\n", 932 cmd)); 933 return (DDI_FAILURE); 934 } 935 936 mutex_enter(&rsm_drv_data.drv_lock); 937 while (rsm_drv_data.drv_state != RSM_DRV_OK) 938 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 939 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING; 940 mutex_exit(&rsm_drv_data.drv_lock); 941 942 /* 943 * Unregister the DR callback functions 944 */ 945 if (rsm_enable_dr) { 946 #ifdef RSM_DRTEST 947 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec, 948 (void *)NULL); 949 #else 950 kphysm_setup_func_unregister(&rsm_dr_callback_vec, 951 (void *)NULL); 952 #endif 953 } 954 955 mutex_enter(&rsm_drv_data.drv_lock); 956 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING); 957 rsm_drv_data.drv_state = RSM_DRV_NEW; 958 mutex_exit(&rsm_drv_data.drv_lock); 959 960 ASSERT(rsm_suspend_list.list_head == NULL); 961 962 /* 963 * Release all resources, seglist, controller, ... 964 */ 965 966 /* remove intersend queues */ 967 /* remove registered services */ 968 969 970 ddi_remove_minor_node(dip, DRIVER_NAME); 971 rsm_dip = NULL; 972 973 /* 974 * Free minor zero resource 975 */ 976 { 977 rsmresource_t *p; 978 979 p = rsmresource_free(RSM_DRIVER_MINOR); 980 if (p) { 981 mutex_destroy(&p->rsmrc_lock); 982 kmem_free((void *)p, sizeof (*p)); 983 } 984 } 985 986 /* 987 * Free resource table 988 */ 989 990 rsmresource_destroy(); 991 992 /* 993 * Free the hash tables 994 */ 995 rsmhash_free(&rsm_export_segs, rsm_hash_size); 996 rsmhash_free(&rsm_import_segs, rsm_hash_size); 997 998 kmem_free((void *)importer_list.bucket, 999 rsm_hash_size * sizeof (importing_token_t *)); 1000 importer_list.bucket = NULL; 1001 1002 1003 /* free barrier page */ 1004 if (bar_cookie != NULL) { 1005 ddi_umem_free(bar_cookie); 1006 } 1007 bar_va = NULL; 1008 bar_cookie = NULL; 1009 1010 /* 1011 * Free the memory allocated for the trash 1012 */ 1013 if (remap_cookie != NULL) { 1014 ddi_umem_free(remap_cookie); 1015 } 1016 remap_cookie = NULL; 1017 1018 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n")); 1019 1020 return (DDI_SUCCESS); 1021 } 1022 1023 /*ARGSUSED*/ 1024 static int 1025 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1026 { 1027 register int error; 1028 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 1029 1030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n")); 1031 1032 switch (infocmd) { 1033 case DDI_INFO_DEVT2DEVINFO: 1034 if (rsm_dip == NULL) 1035 error = DDI_FAILURE; 1036 else { 1037 *result = (void *)rsm_dip; 1038 error = DDI_SUCCESS; 1039 } 1040 break; 1041 case DDI_INFO_DEVT2INSTANCE: 1042 *result = (void *)0; 1043 error = DDI_SUCCESS; 1044 break; 1045 default: 1046 error = DDI_FAILURE; 1047 } 1048 1049 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n")); 1050 return (error); 1051 } 1052 1053 adapter_t * 1054 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode) 1055 { 1056 adapter_t *adapter; 1057 char adapter_devname[MAXNAMELEN]; 1058 int instance; 1059 DBG_DEFINE(category, 1060 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL); 1061 1062 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n")); 1063 1064 instance = msg->cnum; 1065 1066 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) { 1067 return (NULL); 1068 } 1069 1070 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode)) 1071 return (NULL); 1072 1073 if (strcmp(adapter_devname, "loopback") == 0) 1074 return (&loopback_adapter); 1075 1076 adapter = rsmka_lookup_adapter(adapter_devname, instance); 1077 1078 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n")); 1079 1080 return (adapter); 1081 } 1082 1083 1084 /* 1085 * *********************** Resource Number Management ******************** 1086 * All resources are stored in a simple hash table. The table is an array 1087 * of pointers to resource blks. Each blk contains: 1088 * base - base number of this blk 1089 * used - number of used slots in this blk. 1090 * blks - array of pointers to resource items. 1091 * An entry in a resource blk is empty if it's NULL. 1092 * 1093 * We start with no resource array. Each time we run out of slots, we 1094 * reallocate a new larger array and copy the pointer to the new array and 1095 * a new resource blk is allocated and added to the hash table. 1096 * 1097 * The resource control block contains: 1098 * root - array of pointer of resource blks 1099 * sz - current size of array. 1100 * len - last valid entry in array. 1101 * 1102 * A search operation based on a resource number is as follows: 1103 * index = rnum / RESOURCE_BLKSZ; 1104 * ASSERT(index < resource_block.len); 1105 * ASSERT(index < resource_block.sz); 1106 * offset = rnum % RESOURCE_BLKSZ; 1107 * ASSERT(offset >= resource_block.root[index]->base); 1108 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ); 1109 * return resource_block.root[index]->blks[offset]; 1110 * 1111 * A resource blk is freed with its used count reachs zero. 1112 */ 1113 static int 1114 rsmresource_alloc(minor_t *rnum) 1115 { 1116 1117 /* search for available resource slot */ 1118 int i, j, empty = -1; 1119 rsmresource_blk_t *blk; 1120 1121 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1122 "rsmresource_alloc enter\n")); 1123 1124 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1125 1126 /* Try to find an empty slot */ 1127 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1128 blk = rsm_resource.rsmrc_root[i]; 1129 if (blk != NULL && blk->rsmrcblk_avail > 0) { 1130 /* found an empty slot in this blk */ 1131 for (j = 0; j < RSMRC_BLKSZ; j++) { 1132 if (blk->rsmrcblk_blks[j] == NULL) { 1133 *rnum = (minor_t) 1134 (j + (i * RSMRC_BLKSZ)); 1135 /* 1136 * obey gen page limits 1137 */ 1138 if (*rnum >= max_segs + 1) { 1139 if (empty < 0) { 1140 rw_exit(&rsm_resource. 1141 rsmrc_lock); 1142 DBG_PRINTF(( 1143 RSM_KERNEL_ALL, 1144 RSM_ERR, 1145 "rsmresource" 1146 "_alloc failed:" 1147 "not enough res" 1148 "%d\n", *rnum)); 1149 return (RSMERR_INSUFFICIENT_RESOURCES); 1150 } else { 1151 /* use empty slot */ 1152 break; 1153 } 1154 1155 } 1156 1157 blk->rsmrcblk_blks[j] = RSMRC_RESERVED; 1158 blk->rsmrcblk_avail--; 1159 rw_exit(&rsm_resource.rsmrc_lock); 1160 DBG_PRINTF((RSM_KERNEL_ALL, 1161 RSM_DEBUG_VERBOSE, 1162 "rsmresource_alloc done\n")); 1163 return (RSM_SUCCESS); 1164 } 1165 } 1166 } else if (blk == NULL && empty < 0) { 1167 /* remember first empty slot */ 1168 empty = i; 1169 } 1170 } 1171 1172 /* Couldn't find anything, allocate a new blk */ 1173 /* 1174 * Do we need to reallocate the root array 1175 */ 1176 if (empty < 0) { 1177 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) { 1178 /* 1179 * Allocate new array and copy current stuff into it 1180 */ 1181 rsmresource_blk_t **p; 1182 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz + 1183 RSMRC_BLKSZ; 1184 /* 1185 * Don't allocate more that max valid rnum 1186 */ 1187 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >= 1188 max_segs + 1) { 1189 rw_exit(&rsm_resource.rsmrc_lock); 1190 return (RSMERR_INSUFFICIENT_RESOURCES); 1191 } 1192 1193 p = (rsmresource_blk_t **)kmem_zalloc( 1194 newsz * sizeof (*p), 1195 KM_SLEEP); 1196 1197 if (rsm_resource.rsmrc_root) { 1198 uint_t oldsz; 1199 1200 oldsz = (uint_t)(rsm_resource.rsmrc_sz * 1201 (int)sizeof (*p)); 1202 1203 /* 1204 * Copy old data into new space and 1205 * free old stuff 1206 */ 1207 bcopy(rsm_resource.rsmrc_root, p, oldsz); 1208 kmem_free(rsm_resource.rsmrc_root, oldsz); 1209 } 1210 1211 rsm_resource.rsmrc_root = p; 1212 rsm_resource.rsmrc_sz = (int)newsz; 1213 } 1214 1215 empty = rsm_resource.rsmrc_len; 1216 rsm_resource.rsmrc_len++; 1217 } 1218 1219 /* 1220 * Allocate a new blk 1221 */ 1222 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP); 1223 ASSERT(rsm_resource.rsmrc_root[empty] == NULL); 1224 rsm_resource.rsmrc_root[empty] = blk; 1225 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1; 1226 1227 /* 1228 * Allocate slot 1229 */ 1230 1231 *rnum = (minor_t)(empty * RSMRC_BLKSZ); 1232 1233 /* 1234 * watch out not to exceed bounds of barrier page 1235 */ 1236 if (*rnum >= max_segs + 1) { 1237 rw_exit(&rsm_resource.rsmrc_lock); 1238 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR, 1239 "rsmresource_alloc failed %d\n", *rnum)); 1240 1241 return (RSMERR_INSUFFICIENT_RESOURCES); 1242 } 1243 blk->rsmrcblk_blks[0] = RSMRC_RESERVED; 1244 1245 1246 rw_exit(&rsm_resource.rsmrc_lock); 1247 1248 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1249 "rsmresource_alloc done\n")); 1250 1251 return (RSM_SUCCESS); 1252 } 1253 1254 static rsmresource_t * 1255 rsmresource_free(minor_t rnum) 1256 { 1257 1258 /* search for available resource slot */ 1259 int i, j; 1260 rsmresource_blk_t *blk; 1261 rsmresource_t *p; 1262 1263 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1264 "rsmresource_free enter\n")); 1265 1266 i = (int)(rnum / RSMRC_BLKSZ); 1267 j = (int)(rnum % RSMRC_BLKSZ); 1268 1269 if (i >= rsm_resource.rsmrc_len) { 1270 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1271 "rsmresource_free done\n")); 1272 return (NULL); 1273 } 1274 1275 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1276 1277 ASSERT(rsm_resource.rsmrc_root); 1278 ASSERT(i < rsm_resource.rsmrc_len); 1279 ASSERT(i < rsm_resource.rsmrc_sz); 1280 blk = rsm_resource.rsmrc_root[i]; 1281 if (blk == NULL) { 1282 rw_exit(&rsm_resource.rsmrc_lock); 1283 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1284 "rsmresource_free done\n")); 1285 return (NULL); 1286 } 1287 1288 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */ 1289 1290 p = blk->rsmrcblk_blks[j]; 1291 if (p == RSMRC_RESERVED) { 1292 p = NULL; 1293 } 1294 1295 blk->rsmrcblk_blks[j] = NULL; 1296 blk->rsmrcblk_avail++; 1297 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) { 1298 /* free this blk */ 1299 kmem_free(blk, sizeof (*blk)); 1300 rsm_resource.rsmrc_root[i] = NULL; 1301 } 1302 1303 rw_exit(&rsm_resource.rsmrc_lock); 1304 1305 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1306 "rsmresource_free done\n")); 1307 1308 return (p); 1309 } 1310 1311 static rsmresource_t * 1312 rsmresource_lookup(minor_t rnum, int lock) 1313 { 1314 int i, j; 1315 rsmresource_blk_t *blk; 1316 rsmresource_t *p; 1317 1318 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1319 "rsmresource_lookup enter\n")); 1320 1321 /* Find resource and lock it in READER mode */ 1322 /* search for available resource slot */ 1323 1324 i = (int)(rnum / RSMRC_BLKSZ); 1325 j = (int)(rnum % RSMRC_BLKSZ); 1326 1327 if (i >= rsm_resource.rsmrc_len) { 1328 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1329 "rsmresource_lookup done\n")); 1330 return (NULL); 1331 } 1332 1333 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1334 1335 blk = rsm_resource.rsmrc_root[i]; 1336 if (blk != NULL) { 1337 ASSERT(i < rsm_resource.rsmrc_len); 1338 ASSERT(i < rsm_resource.rsmrc_sz); 1339 1340 p = blk->rsmrcblk_blks[j]; 1341 if (lock == RSM_LOCK) { 1342 if (p != RSMRC_RESERVED) { 1343 mutex_enter(&p->rsmrc_lock); 1344 } else { 1345 p = NULL; 1346 } 1347 } 1348 } else { 1349 p = NULL; 1350 } 1351 rw_exit(&rsm_resource.rsmrc_lock); 1352 1353 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1354 "rsmresource_lookup done\n")); 1355 1356 return (p); 1357 } 1358 1359 static void 1360 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type) 1361 { 1362 /* Find resource and lock it in READER mode */ 1363 /* Caller can upgrade if need be */ 1364 /* search for available resource slot */ 1365 int i, j; 1366 rsmresource_blk_t *blk; 1367 1368 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1369 "rsmresource_insert enter\n")); 1370 1371 i = (int)(rnum / RSMRC_BLKSZ); 1372 j = (int)(rnum % RSMRC_BLKSZ); 1373 1374 p->rsmrc_type = type; 1375 p->rsmrc_num = rnum; 1376 1377 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1378 1379 ASSERT(rsm_resource.rsmrc_root); 1380 ASSERT(i < rsm_resource.rsmrc_len); 1381 ASSERT(i < rsm_resource.rsmrc_sz); 1382 1383 blk = rsm_resource.rsmrc_root[i]; 1384 ASSERT(blk); 1385 1386 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED); 1387 1388 blk->rsmrcblk_blks[j] = p; 1389 1390 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1391 "rsmresource_insert done\n")); 1392 1393 rw_exit(&rsm_resource.rsmrc_lock); 1394 } 1395 1396 static void 1397 rsmresource_destroy() 1398 { 1399 int i, j; 1400 1401 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1402 "rsmresource_destroy enter\n")); 1403 1404 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1405 1406 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1407 rsmresource_blk_t *blk; 1408 1409 blk = rsm_resource.rsmrc_root[i]; 1410 if (blk == NULL) { 1411 continue; 1412 } 1413 for (j = 0; j < RSMRC_BLKSZ; j++) { 1414 if (blk->rsmrcblk_blks[j] != NULL) { 1415 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1416 "Not null slot %d, %lx\n", j, 1417 (size_t)blk->rsmrcblk_blks[j])); 1418 } 1419 } 1420 kmem_free(blk, sizeof (*blk)); 1421 rsm_resource.rsmrc_root[i] = NULL; 1422 } 1423 if (rsm_resource.rsmrc_root) { 1424 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *); 1425 kmem_free(rsm_resource.rsmrc_root, (uint_t)i); 1426 rsm_resource.rsmrc_root = NULL; 1427 rsm_resource.rsmrc_len = 0; 1428 rsm_resource.rsmrc_sz = 0; 1429 } 1430 1431 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1432 "rsmresource_destroy done\n")); 1433 1434 rw_exit(&rsm_resource.rsmrc_lock); 1435 } 1436 1437 1438 /* ******************** Generic Key Hash Table Management ********* */ 1439 static rsmresource_t * 1440 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key, 1441 rsm_resource_state_t state) 1442 { 1443 rsmresource_t *p; 1444 uint_t hashval; 1445 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1446 1447 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n")); 1448 1449 hashval = rsmhash(key); 1450 1451 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n", 1452 key, hashval)); 1453 1454 rw_enter(&rhash->rsmhash_rw, RW_READER); 1455 1456 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1457 1458 for (; p; p = p->rsmrc_next) { 1459 if (p->rsmrc_key == key) { 1460 /* acquire resource lock */ 1461 RSMRC_LOCK(p); 1462 break; 1463 } 1464 } 1465 1466 rw_exit(&rhash->rsmhash_rw); 1467 1468 if (p != NULL && p->rsmrc_state != state) { 1469 /* state changed, release lock and return null */ 1470 RSMRC_UNLOCK(p); 1471 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1472 "rsmhash_lookup done: state changed\n")); 1473 return (NULL); 1474 } 1475 1476 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n")); 1477 1478 return (p); 1479 } 1480 1481 static void 1482 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm) 1483 { 1484 rsmresource_t *p, **back; 1485 uint_t hashval; 1486 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1487 1488 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n")); 1489 1490 hashval = rsmhash(rcelm->rsmrc_key); 1491 1492 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n", 1493 rcelm->rsmrc_key, hashval)); 1494 1495 /* 1496 * It's ok not to find the segment. 1497 */ 1498 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1499 1500 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1501 1502 for (; (p = *back) != NULL; back = &p->rsmrc_next) { 1503 if (p == rcelm) { 1504 *back = rcelm->rsmrc_next; 1505 break; 1506 } 1507 } 1508 1509 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n")); 1510 1511 rw_exit(&rhash->rsmhash_rw); 1512 } 1513 1514 static int 1515 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key, 1516 int dup_check, rsm_resource_state_t state) 1517 { 1518 rsmresource_t *p = NULL, **bktp; 1519 uint_t hashval; 1520 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1521 1522 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n")); 1523 1524 /* lock table */ 1525 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1526 1527 /* 1528 * If the current resource state is other than the state passed in 1529 * then the resource is (probably) already on the list. eg. for an 1530 * import segment if the state is not RSM_STATE_NEW then it's on the 1531 * list already. 1532 */ 1533 RSMRC_LOCK(new); 1534 if (new->rsmrc_state != state) { 1535 RSMRC_UNLOCK(new); 1536 rw_exit(&rhash->rsmhash_rw); 1537 return (RSMERR_BAD_SEG_HNDL); 1538 } 1539 1540 hashval = rsmhash(key); 1541 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval)); 1542 1543 if (dup_check) { 1544 /* 1545 * Used for checking export segments; don't want to have 1546 * the same key used for multiple segments. 1547 */ 1548 1549 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1550 1551 for (; p; p = p->rsmrc_next) { 1552 if (p->rsmrc_key == key) { 1553 RSMRC_UNLOCK(new); 1554 break; 1555 } 1556 } 1557 } 1558 1559 if (p == NULL) { 1560 /* Key doesn't exist, add it */ 1561 1562 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1563 1564 new->rsmrc_key = key; 1565 new->rsmrc_next = *bktp; 1566 *bktp = new; 1567 } 1568 1569 rw_exit(&rhash->rsmhash_rw); 1570 1571 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n")); 1572 1573 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE); 1574 } 1575 1576 /* 1577 * XOR each byte of the key. 1578 */ 1579 static uint_t 1580 rsmhash(rsm_memseg_id_t key) 1581 { 1582 uint_t hash = key; 1583 1584 hash ^= (key >> 8); 1585 hash ^= (key >> 16); 1586 hash ^= (key >> 24); 1587 1588 return (hash % rsm_hash_size); 1589 1590 } 1591 1592 /* 1593 * generic function to get a specific bucket 1594 */ 1595 static void * 1596 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval) 1597 { 1598 1599 if (rhash->bucket == NULL) 1600 return (NULL); 1601 else 1602 return ((void *)rhash->bucket[hashval]); 1603 } 1604 1605 /* 1606 * generic function to get a specific bucket's address 1607 */ 1608 static void ** 1609 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval) 1610 { 1611 if (rhash->bucket == NULL) 1612 return (NULL); 1613 else 1614 return ((void **)&(rhash->bucket[hashval])); 1615 } 1616 1617 /* 1618 * generic function to alloc a hash table 1619 */ 1620 static void 1621 rsmhash_alloc(rsmhash_table_t *rhash, int size) 1622 { 1623 rhash->bucket = (rsmresource_t **) 1624 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP); 1625 } 1626 1627 /* 1628 * generic function to free a hash table 1629 */ 1630 static void 1631 rsmhash_free(rsmhash_table_t *rhash, int size) 1632 { 1633 1634 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t)); 1635 rhash->bucket = NULL; 1636 1637 } 1638 /* *********************** Exported Segment Key Management ************ */ 1639 1640 #define rsmexport_add(new, key) \ 1641 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \ 1642 RSM_STATE_BIND) 1643 1644 #define rsmexport_rm(arg) \ 1645 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg)) 1646 1647 #define rsmexport_lookup(key) \ 1648 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT) 1649 1650 /* ************************** Import Segment List Management ********** */ 1651 1652 /* 1653 * Add segment to import list. This will be useful for paging and loopback 1654 * segment unloading. 1655 */ 1656 #define rsmimport_add(arg, key) \ 1657 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \ 1658 RSM_STATE_NEW) 1659 1660 #define rsmimport_rm(arg) \ 1661 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg)) 1662 1663 /* 1664 * #define rsmimport_lookup(key) \ 1665 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT) 1666 */ 1667 1668 /* 1669 * increase the ref count and make the import segment point to the 1670 * shared data structure. Return a pointer to the share data struct 1671 * and the shared data struct is locked upon return 1672 */ 1673 static rsm_import_share_t * 1674 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter, 1675 rsmseg_t *segp) 1676 { 1677 uint_t hash; 1678 rsmresource_t *p; 1679 rsm_import_share_t *shdatap; 1680 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1681 1682 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n")); 1683 1684 hash = rsmhash(key); 1685 /* lock table */ 1686 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER); 1687 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n", 1688 key, hash)); 1689 1690 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash); 1691 1692 for (; p; p = p->rsmrc_next) { 1693 /* 1694 * Look for an entry that is importing the same exporter 1695 * with the share data structure allocated. 1696 */ 1697 if ((p->rsmrc_key == key) && 1698 (p->rsmrc_node == node) && 1699 (p->rsmrc_adapter == adapter) && 1700 (((rsmseg_t *)p)->s_share != NULL)) { 1701 shdatap = ((rsmseg_t *)p)->s_share; 1702 break; 1703 } 1704 } 1705 1706 if (p == NULL) { 1707 /* we are the first importer, create the shared data struct */ 1708 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP); 1709 shdatap->rsmsi_state = RSMSI_STATE_NEW; 1710 shdatap->rsmsi_segid = key; 1711 shdatap->rsmsi_node = node; 1712 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL); 1713 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0); 1714 } 1715 1716 rsmseglock_acquire(segp); 1717 1718 /* we grab the shared lock before returning from this function */ 1719 mutex_enter(&shdatap->rsmsi_lock); 1720 1721 shdatap->rsmsi_refcnt++; 1722 segp->s_share = shdatap; 1723 1724 rsmseglock_release(segp); 1725 1726 rw_exit(&rsm_import_segs.rsmhash_rw); 1727 1728 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n")); 1729 1730 return (shdatap); 1731 } 1732 1733 /* 1734 * the shared data structure should be locked before calling 1735 * rsmsharecv_signal(). 1736 * Change the state and signal any waiting segments. 1737 */ 1738 void 1739 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate) 1740 { 1741 ASSERT(rsmsharelock_held(seg)); 1742 1743 if (seg->s_share->rsmsi_state == oldstate) { 1744 seg->s_share->rsmsi_state = newstate; 1745 cv_broadcast(&seg->s_share->rsmsi_cv); 1746 } 1747 } 1748 1749 /* 1750 * Add to the hash table 1751 */ 1752 static void 1753 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr, 1754 void *cookie) 1755 { 1756 1757 importing_token_t *head; 1758 importing_token_t *new_token; 1759 int index; 1760 1761 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1762 1763 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n")); 1764 1765 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP); 1766 new_token->importing_node = node; 1767 new_token->key = key; 1768 new_token->import_segment_cookie = cookie; 1769 new_token->importing_adapter_hwaddr = hwaddr; 1770 1771 index = rsmhash(key); 1772 1773 mutex_enter(&importer_list.lock); 1774 1775 head = importer_list.bucket[index]; 1776 importer_list.bucket[index] = new_token; 1777 new_token->next = head; 1778 mutex_exit(&importer_list.lock); 1779 1780 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n")); 1781 } 1782 1783 static void 1784 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie) 1785 { 1786 1787 importing_token_t *prev, *token = NULL; 1788 int index; 1789 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1790 1791 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n")); 1792 1793 index = rsmhash(key); 1794 1795 mutex_enter(&importer_list.lock); 1796 1797 token = importer_list.bucket[index]; 1798 1799 prev = token; 1800 while (token != NULL) { 1801 if (token->importing_node == node && 1802 token->import_segment_cookie == cookie) { 1803 if (prev == token) 1804 importer_list.bucket[index] = token->next; 1805 else 1806 prev->next = token->next; 1807 kmem_free((void *)token, sizeof (*token)); 1808 break; 1809 } else { 1810 prev = token; 1811 token = token->next; 1812 } 1813 } 1814 1815 mutex_exit(&importer_list.lock); 1816 1817 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n")); 1818 1819 1820 } 1821 1822 /* **************************Segment Structure Management ************* */ 1823 1824 /* 1825 * Free segment structure 1826 */ 1827 static void 1828 rsmseg_free(rsmseg_t *seg) 1829 { 1830 1831 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1832 1833 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n")); 1834 1835 /* need to take seglock here to avoid race with rsmmap_unmap() */ 1836 rsmseglock_acquire(seg); 1837 if (seg->s_ckl != NULL) { 1838 /* Segment is still busy */ 1839 seg->s_state = RSM_STATE_END; 1840 rsmseglock_release(seg); 1841 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1842 "rsmseg_free done\n")); 1843 return; 1844 } 1845 1846 rsmseglock_release(seg); 1847 1848 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW); 1849 1850 /* 1851 * If it's an importer decrement the refcount 1852 * and if its down to zero free the shared data structure. 1853 * This is where failures during rsm_connect() are unrefcounted 1854 */ 1855 if (seg->s_share != NULL) { 1856 1857 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT); 1858 1859 rsmsharelock_acquire(seg); 1860 1861 ASSERT(seg->s_share->rsmsi_refcnt > 0); 1862 1863 seg->s_share->rsmsi_refcnt--; 1864 1865 if (seg->s_share->rsmsi_refcnt == 0) { 1866 rsmsharelock_release(seg); 1867 mutex_destroy(&seg->s_share->rsmsi_lock); 1868 cv_destroy(&seg->s_share->rsmsi_cv); 1869 kmem_free((void *)(seg->s_share), 1870 sizeof (rsm_import_share_t)); 1871 } else { 1872 rsmsharelock_release(seg); 1873 } 1874 /* 1875 * The following needs to be done after any 1876 * rsmsharelock calls which use seg->s_share. 1877 */ 1878 seg->s_share = NULL; 1879 } 1880 1881 cv_destroy(&seg->s_cv); 1882 mutex_destroy(&seg->s_lock); 1883 rsmacl_free(seg->s_acl, seg->s_acl_len); 1884 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len); 1885 if (seg->s_adapter) 1886 rsmka_release_adapter(seg->s_adapter); 1887 1888 kmem_free((void *)seg, sizeof (*seg)); 1889 1890 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n")); 1891 1892 } 1893 1894 1895 static rsmseg_t * 1896 rsmseg_alloc(minor_t num, struct cred *cred) 1897 { 1898 rsmseg_t *new; 1899 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1900 1901 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n")); 1902 /* 1903 * allocate memory for new segment. This should be a segkmem cache. 1904 */ 1905 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP); 1906 1907 new->s_state = RSM_STATE_NEW; 1908 new->s_minor = num; 1909 new->s_acl_len = 0; 1910 new->s_cookie = NULL; 1911 new->s_adapter = NULL; 1912 1913 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask; 1914 /* we don't have a key yet, will set at export/connect */ 1915 new->s_uid = crgetuid(cred); 1916 new->s_gid = crgetgid(cred); 1917 1918 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL); 1919 cv_init(&new->s_cv, NULL, CV_DRIVER, 0); 1920 1921 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n")); 1922 1923 return (new); 1924 } 1925 1926 /* ******************************** Driver Open/Close/Poll *************** */ 1927 1928 /*ARGSUSED1*/ 1929 static int 1930 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred) 1931 { 1932 minor_t rnum; 1933 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 1934 1935 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n")); 1936 /* 1937 * Char only 1938 */ 1939 if (otyp != OTYP_CHR) { 1940 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n")); 1941 return (EINVAL); 1942 } 1943 1944 /* 1945 * Only zero can be opened, clones are used for resources. 1946 */ 1947 if (getminor(*devp) != RSM_DRIVER_MINOR) { 1948 DBG_PRINTF((category, RSM_ERR, 1949 "rsm_open: bad minor %d\n", getminor(*devp))); 1950 return (ENODEV); 1951 } 1952 1953 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) { 1954 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n")); 1955 return (EPERM); 1956 } 1957 1958 if (!(flag & FWRITE)) { 1959 /* 1960 * The library function _rsm_librsm_init calls open for 1961 * /dev/rsm with flag set to O_RDONLY. We want a valid 1962 * file descriptor to be returned for minor device zero. 1963 */ 1964 1965 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1966 "rsm_open RDONLY done\n")); 1967 return (DDI_SUCCESS); 1968 } 1969 1970 /* 1971 * - allocate new minor number and segment. 1972 * - add segment to list of all segments. 1973 * - set minordev data to segment 1974 * - update devp argument to new device 1975 * - update s_cred to cred; make sure you do crhold(cred); 1976 */ 1977 1978 /* allocate a new resource number */ 1979 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) { 1980 /* 1981 * We will bind this minor to a specific resource in first 1982 * ioctl 1983 */ 1984 *devp = makedevice(getmajor(*devp), rnum); 1985 } else { 1986 return (EAGAIN); 1987 } 1988 1989 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n")); 1990 return (DDI_SUCCESS); 1991 } 1992 1993 static void 1994 rsmseg_close(rsmseg_t *seg, int force_flag) 1995 { 1996 int e = RSM_SUCCESS; 1997 1998 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 1999 2000 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n")); 2001 2002 rsmseglock_acquire(seg); 2003 if (!force_flag && (seg->s_hdr.rsmrc_type == 2004 RSM_RESOURCE_EXPORT_SEGMENT)) { 2005 /* 2006 * If we are processing rsm_close wait for force_destroy 2007 * processing to complete since force_destroy processing 2008 * needs to finish first before we can free the segment. 2009 * force_destroy is only for export segments 2010 */ 2011 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) { 2012 cv_wait(&seg->s_cv, &seg->s_lock); 2013 } 2014 } 2015 rsmseglock_release(seg); 2016 2017 /* It's ok to read the state without a lock */ 2018 switch (seg->s_state) { 2019 case RSM_STATE_EXPORT: 2020 case RSM_STATE_EXPORT_QUIESCING: 2021 case RSM_STATE_EXPORT_QUIESCED: 2022 e = rsm_unpublish(seg, 1); 2023 /* FALLTHRU */ 2024 case RSM_STATE_BIND_QUIESCED: 2025 /* FALLTHRU */ 2026 case RSM_STATE_BIND: 2027 e = rsm_unbind(seg); 2028 if (e != RSM_SUCCESS && force_flag == 1) 2029 return; 2030 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT); 2031 /* FALLTHRU */ 2032 case RSM_STATE_NEW_QUIESCED: 2033 rsmseglock_acquire(seg); 2034 seg->s_state = RSM_STATE_NEW; 2035 cv_broadcast(&seg->s_cv); 2036 rsmseglock_release(seg); 2037 break; 2038 case RSM_STATE_NEW: 2039 break; 2040 case RSM_STATE_ZOMBIE: 2041 /* 2042 * Segments in this state have been removed off the 2043 * exported segments list and have been unpublished 2044 * and unbind. These segments have been removed during 2045 * a callback to the rsm_export_force_destroy, which 2046 * is called for the purpose of unlocking these 2047 * exported memory segments when a process exits but 2048 * leaves the segments locked down since rsm_close is 2049 * is not called for the segments. This can happen 2050 * when a process calls fork or exec and then exits. 2051 * Once the segments are in the ZOMBIE state, all that 2052 * remains is to destroy them when rsm_close is called. 2053 * This is done here. Thus, for such segments the 2054 * the state is changed to new so that later in this 2055 * function rsmseg_free is called. 2056 */ 2057 rsmseglock_acquire(seg); 2058 seg->s_state = RSM_STATE_NEW; 2059 rsmseglock_release(seg); 2060 break; 2061 case RSM_STATE_MAP_QUIESCE: 2062 case RSM_STATE_ACTIVE: 2063 /* Disconnect will handle the unmap */ 2064 case RSM_STATE_CONN_QUIESCE: 2065 case RSM_STATE_CONNECT: 2066 case RSM_STATE_DISCONNECT: 2067 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 2068 (void) rsm_disconnect(seg); 2069 break; 2070 case RSM_STATE_MAPPING: 2071 /*FALLTHRU*/ 2072 case RSM_STATE_END: 2073 DBG_PRINTF((category, RSM_ERR, 2074 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2075 break; 2076 default: 2077 DBG_PRINTF((category, RSM_ERR, 2078 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2079 break; 2080 } 2081 2082 /* 2083 * check state. 2084 * - make sure you do crfree(s_cred); 2085 * release segment and minor number 2086 */ 2087 ASSERT(seg->s_state == RSM_STATE_NEW); 2088 2089 /* 2090 * The export_force_destroy callback is created to unlock 2091 * the exported segments of a process 2092 * when the process does a fork or exec and then exits calls this 2093 * function with the force flag set to 1 which indicates that the 2094 * segment state must be converted to ZOMBIE. This state means that the 2095 * segments still exist and have been unlocked and most importantly the 2096 * only operation allowed is to destroy them on an rsm_close. 2097 */ 2098 if (force_flag) { 2099 rsmseglock_acquire(seg); 2100 seg->s_state = RSM_STATE_ZOMBIE; 2101 rsmseglock_release(seg); 2102 } else { 2103 rsmseg_free(seg); 2104 } 2105 2106 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n")); 2107 } 2108 2109 static int 2110 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred) 2111 { 2112 minor_t rnum = getminor(dev); 2113 rsmresource_t *res; 2114 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2115 2116 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n")); 2117 2118 flag = flag; cred = cred; 2119 2120 if (otyp != OTYP_CHR) 2121 return (EINVAL); 2122 2123 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum)); 2124 2125 /* 2126 * At this point we are the last reference to the resource. 2127 * Free resource number from resource table. 2128 * It's ok to remove number before we free the segment. 2129 * We need to lock the resource to protect against remote calls. 2130 */ 2131 if (rnum == RSM_DRIVER_MINOR || 2132 (res = rsmresource_free(rnum)) == NULL) { 2133 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2134 return (DDI_SUCCESS); 2135 } 2136 2137 switch (res->rsmrc_type) { 2138 case RSM_RESOURCE_EXPORT_SEGMENT: 2139 case RSM_RESOURCE_IMPORT_SEGMENT: 2140 rsmseg_close((rsmseg_t *)res, 0); 2141 break; 2142 case RSM_RESOURCE_BAR: 2143 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n")); 2144 break; 2145 default: 2146 break; 2147 } 2148 2149 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2150 2151 return (DDI_SUCCESS); 2152 } 2153 2154 /* 2155 * rsm_inc_pgcnt 2156 * 2157 * Description: increment rsm page counter. 2158 * 2159 * Parameters: pgcnt_t pnum; number of pages to be used 2160 * 2161 * Returns: RSM_SUCCESS if memory limit not exceeded 2162 * ENOSPC if memory limit exceeded. In this case, the 2163 * page counter remains unchanged. 2164 * 2165 */ 2166 static int 2167 rsm_inc_pgcnt(pgcnt_t pnum) 2168 { 2169 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2170 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2171 return (RSM_SUCCESS); 2172 } 2173 2174 mutex_enter(&rsm_pgcnt_lock); 2175 2176 if (rsm_pgcnt + pnum > rsm_pgcnt_max) { 2177 /* ensure that limits have not been exceeded */ 2178 mutex_exit(&rsm_pgcnt_lock); 2179 return (RSMERR_INSUFFICIENT_MEM); 2180 } 2181 2182 rsm_pgcnt += pnum; 2183 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n", 2184 rsm_pgcnt)); 2185 mutex_exit(&rsm_pgcnt_lock); 2186 2187 return (RSM_SUCCESS); 2188 } 2189 2190 /* 2191 * rsm_dec_pgcnt 2192 * 2193 * Description: decrement rsm page counter. 2194 * 2195 * Parameters: pgcnt_t pnum; number of pages freed 2196 * 2197 */ 2198 static void 2199 rsm_dec_pgcnt(pgcnt_t pnum) 2200 { 2201 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2202 2203 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2204 return; 2205 } 2206 2207 mutex_enter(&rsm_pgcnt_lock); 2208 ASSERT(rsm_pgcnt >= pnum); 2209 rsm_pgcnt -= pnum; 2210 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n", 2211 rsm_pgcnt)); 2212 mutex_exit(&rsm_pgcnt_lock); 2213 } 2214 2215 static struct umem_callback_ops rsm_as_ops = { 2216 UMEM_CALLBACK_VERSION, /* version number */ 2217 rsm_export_force_destroy, 2218 }; 2219 2220 static int 2221 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len, 2222 proc_t *procp) 2223 { 2224 int error = RSM_SUCCESS; 2225 ulong_t pnum; 2226 struct umem_callback_ops *callbackops = &rsm_as_ops; 2227 2228 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2229 2230 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n")); 2231 2232 /* 2233 * Make sure vaddr and len are aligned on a page boundary 2234 */ 2235 if ((uintptr_t)vaddr & (PAGESIZE - 1)) { 2236 return (RSMERR_BAD_ADDR); 2237 } 2238 2239 if (len & (PAGESIZE - 1)) { 2240 return (RSMERR_BAD_LENGTH); 2241 } 2242 2243 /* 2244 * Find number of pages 2245 */ 2246 pnum = btopr(len); 2247 error = rsm_inc_pgcnt(pnum); 2248 if (error != RSM_SUCCESS) { 2249 DBG_PRINTF((category, RSM_ERR, 2250 "rsm_bind_pages:mem limit exceeded\n")); 2251 return (RSMERR_INSUFFICIENT_MEM); 2252 } 2253 2254 error = umem_lockmemory(vaddr, len, 2255 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM, 2256 cookie, 2257 callbackops, procp); 2258 2259 if (error) { 2260 rsm_dec_pgcnt(pnum); 2261 DBG_PRINTF((category, RSM_ERR, 2262 "rsm_bind_pages:ddi_umem_lock failed\n")); 2263 /* 2264 * ddi_umem_lock, in the case of failure, returns one of 2265 * the following three errors. These are translated into 2266 * the RSMERR namespace and returned. 2267 */ 2268 if (error == EFAULT) 2269 return (RSMERR_BAD_ADDR); 2270 else if (error == EACCES) 2271 return (RSMERR_PERM_DENIED); 2272 else 2273 return (RSMERR_INSUFFICIENT_MEM); 2274 } 2275 2276 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n")); 2277 2278 return (error); 2279 2280 } 2281 2282 static int 2283 rsm_unbind_pages(rsmseg_t *seg) 2284 { 2285 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2286 2287 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n")); 2288 2289 ASSERT(rsmseglock_held(seg)); 2290 2291 if (seg->s_cookie != NULL) { 2292 /* unlock address range */ 2293 ddi_umem_unlock(seg->s_cookie); 2294 rsm_dec_pgcnt(btopr(seg->s_len)); 2295 seg->s_cookie = NULL; 2296 } 2297 2298 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n")); 2299 2300 return (RSM_SUCCESS); 2301 } 2302 2303 2304 static int 2305 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2306 { 2307 int e; 2308 adapter_t *adapter; 2309 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2310 2311 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n")); 2312 2313 adapter = rsm_getadapter(msg, mode); 2314 if (adapter == NULL) { 2315 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2316 "rsm_bind done:no adapter\n")); 2317 return (RSMERR_CTLR_NOT_PRESENT); 2318 } 2319 2320 /* lock address range */ 2321 if (msg->vaddr == NULL) { 2322 rsmka_release_adapter(adapter); 2323 DBG_PRINTF((category, RSM_ERR, 2324 "rsm: rsm_bind done: invalid vaddr\n")); 2325 return (RSMERR_BAD_ADDR); 2326 } 2327 if (msg->len <= 0) { 2328 rsmka_release_adapter(adapter); 2329 DBG_PRINTF((category, RSM_ERR, 2330 "rsm_bind: invalid length\n")); 2331 return (RSMERR_BAD_LENGTH); 2332 } 2333 2334 /* Lock segment */ 2335 rsmseglock_acquire(seg); 2336 2337 while (seg->s_state == RSM_STATE_NEW_QUIESCED) { 2338 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2339 DBG_PRINTF((category, RSM_DEBUG, 2340 "rsm_bind done: cv_wait INTERRUPTED")); 2341 rsmka_release_adapter(adapter); 2342 rsmseglock_release(seg); 2343 return (RSMERR_INTERRUPTED); 2344 } 2345 } 2346 2347 ASSERT(seg->s_state == RSM_STATE_NEW); 2348 2349 ASSERT(seg->s_cookie == NULL); 2350 2351 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc); 2352 if (e == RSM_SUCCESS) { 2353 seg->s_flags |= RSM_USER_MEMORY; 2354 if (msg->perm & RSM_ALLOW_REBIND) { 2355 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND; 2356 } 2357 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) { 2358 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT; 2359 } 2360 seg->s_region.r_vaddr = msg->vaddr; 2361 /* 2362 * Set the s_pid value in the segment structure. This is used 2363 * to identify exported segments belonging to a particular 2364 * process so that when the process exits, these segments can 2365 * be unlocked forcefully even if rsm_close is not called on 2366 * process exit since there maybe other processes referencing 2367 * them (for example on a fork or exec). 2368 * The s_pid value is also used to authenticate the process 2369 * doing a publish or unpublish on the export segment. Only 2370 * the creator of the export segment has a right to do a 2371 * publish or unpublish and unbind on the segment. 2372 */ 2373 seg->s_pid = ddi_get_pid(); 2374 seg->s_len = msg->len; 2375 seg->s_state = RSM_STATE_BIND; 2376 seg->s_adapter = adapter; 2377 seg->s_proc = curproc; 2378 } else { 2379 rsmka_release_adapter(adapter); 2380 DBG_PRINTF((category, RSM_WARNING, 2381 "unable to lock down pages\n")); 2382 } 2383 2384 msg->rnum = seg->s_minor; 2385 /* Unlock segment */ 2386 rsmseglock_release(seg); 2387 2388 if (e == RSM_SUCCESS) { 2389 /* copyout the resource number */ 2390 #ifdef _MULTI_DATAMODEL 2391 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 2392 rsm_ioctlmsg32_t msg32; 2393 2394 msg32.rnum = msg->rnum; 2395 if (ddi_copyout((caddr_t)&msg32.rnum, 2396 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum, 2397 sizeof (minor_t), mode)) { 2398 rsmka_release_adapter(adapter); 2399 e = RSMERR_BAD_ADDR; 2400 } 2401 } 2402 #endif 2403 if (ddi_copyout((caddr_t)&msg->rnum, 2404 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum, 2405 sizeof (minor_t), mode)) { 2406 rsmka_release_adapter(adapter); 2407 e = RSMERR_BAD_ADDR; 2408 } 2409 } 2410 2411 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n")); 2412 2413 return (e); 2414 } 2415 2416 static void 2417 rsm_remap_local_importers(rsm_node_id_t src_nodeid, 2418 rsm_memseg_id_t ex_segid, 2419 ddi_umem_cookie_t cookie) 2420 2421 { 2422 rsmresource_t *p = NULL; 2423 rsmhash_table_t *rhash = &rsm_import_segs; 2424 uint_t index; 2425 2426 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2427 "rsm_remap_local_importers enter\n")); 2428 2429 index = rsmhash(ex_segid); 2430 2431 rw_enter(&rhash->rsmhash_rw, RW_READER); 2432 2433 p = rsmhash_getbkt(rhash, index); 2434 2435 for (; p; p = p->rsmrc_next) { 2436 rsmseg_t *seg = (rsmseg_t *)p; 2437 rsmseglock_acquire(seg); 2438 /* 2439 * Change the s_cookie value of only the local importers 2440 * which have been mapped (in state RSM_STATE_ACTIVE). 2441 * Note that there is no need to change the s_cookie value 2442 * if the imported segment is in RSM_STATE_MAPPING since 2443 * eventually the s_cookie will be updated via the mapping 2444 * functionality. 2445 */ 2446 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) && 2447 (seg->s_state == RSM_STATE_ACTIVE)) { 2448 seg->s_cookie = cookie; 2449 } 2450 rsmseglock_release(seg); 2451 } 2452 rw_exit(&rhash->rsmhash_rw); 2453 2454 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2455 "rsm_remap_local_importers done\n")); 2456 } 2457 2458 static int 2459 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg) 2460 { 2461 int e; 2462 adapter_t *adapter; 2463 ddi_umem_cookie_t cookie; 2464 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2465 2466 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n")); 2467 2468 /* Check for permissions to rebind */ 2469 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) { 2470 return (RSMERR_REBIND_NOT_ALLOWED); 2471 } 2472 2473 if (seg->s_pid != ddi_get_pid() && 2474 ddi_get_pid() != 0) { 2475 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n")); 2476 return (RSMERR_NOT_CREATOR); 2477 } 2478 2479 /* 2480 * We will not be allowing partial rebind and hence length passed 2481 * in must be same as segment length 2482 */ 2483 if (msg->vaddr == NULL) { 2484 DBG_PRINTF((category, RSM_ERR, 2485 "rsm_rebind done: null msg->vaddr\n")); 2486 return (RSMERR_BAD_ADDR); 2487 } 2488 if (msg->len != seg->s_len) { 2489 DBG_PRINTF((category, RSM_ERR, 2490 "rsm_rebind: invalid length\n")); 2491 return (RSMERR_BAD_LENGTH); 2492 } 2493 2494 /* Lock segment */ 2495 rsmseglock_acquire(seg); 2496 2497 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) || 2498 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 2499 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) { 2500 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2501 rsmseglock_release(seg); 2502 DBG_PRINTF((category, RSM_DEBUG, 2503 "rsm_rebind done: cv_wait INTERRUPTED")); 2504 return (RSMERR_INTERRUPTED); 2505 } 2506 } 2507 2508 /* verify segment state */ 2509 if ((seg->s_state != RSM_STATE_BIND) && 2510 (seg->s_state != RSM_STATE_EXPORT)) { 2511 /* Unlock segment */ 2512 rsmseglock_release(seg); 2513 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2514 "rsm_rebind done: invalid state\n")); 2515 return (RSMERR_BAD_SEG_HNDL); 2516 } 2517 2518 ASSERT(seg->s_cookie != NULL); 2519 2520 if (msg->vaddr == seg->s_region.r_vaddr) { 2521 rsmseglock_release(seg); 2522 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2523 return (RSM_SUCCESS); 2524 } 2525 2526 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc); 2527 if (e == RSM_SUCCESS) { 2528 struct buf *xbuf; 2529 dev_t sdev = 0; 2530 rsm_memory_local_t mem; 2531 2532 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE, 2533 sdev, 0, NULL, DDI_UMEM_SLEEP); 2534 ASSERT(xbuf != NULL); 2535 2536 mem.ms_type = RSM_MEM_BUF; 2537 mem.ms_bp = xbuf; 2538 2539 adapter = seg->s_adapter; 2540 e = adapter->rsmpi_ops->rsm_rebind( 2541 seg->s_handle.out, 0, &mem, 2542 RSM_RESOURCE_DONTWAIT, NULL); 2543 2544 if (e == RSM_SUCCESS) { 2545 /* 2546 * unbind the older pages, and unload local importers; 2547 * but don't disconnect importers 2548 */ 2549 (void) rsm_unbind_pages(seg); 2550 seg->s_cookie = cookie; 2551 seg->s_region.r_vaddr = msg->vaddr; 2552 rsm_remap_local_importers(my_nodeid, seg->s_segid, 2553 cookie); 2554 } else { 2555 /* 2556 * Unbind the pages associated with "cookie" by the 2557 * rsm_bind_pages calls prior to this. This is 2558 * similar to what is done in the rsm_unbind_pages 2559 * routine for the seg->s_cookie. 2560 */ 2561 ddi_umem_unlock(cookie); 2562 rsm_dec_pgcnt(btopr(msg->len)); 2563 DBG_PRINTF((category, RSM_ERR, 2564 "rsm_rebind failed with %d\n", e)); 2565 } 2566 /* 2567 * At present there is no dependency on the existence of xbuf. 2568 * So we can free it here. If in the future this changes, it can 2569 * be freed sometime during the segment destroy. 2570 */ 2571 freerbuf(xbuf); 2572 } 2573 2574 /* Unlock segment */ 2575 rsmseglock_release(seg); 2576 2577 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2578 2579 return (e); 2580 } 2581 2582 static int 2583 rsm_unbind(rsmseg_t *seg) 2584 { 2585 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2586 2587 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n")); 2588 2589 rsmseglock_acquire(seg); 2590 2591 /* verify segment state */ 2592 if ((seg->s_state != RSM_STATE_BIND) && 2593 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2594 rsmseglock_release(seg); 2595 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2596 "rsm_unbind: invalid state\n")); 2597 return (RSMERR_BAD_SEG_HNDL); 2598 } 2599 2600 /* unlock current range */ 2601 (void) rsm_unbind_pages(seg); 2602 2603 if (seg->s_state == RSM_STATE_BIND) { 2604 seg->s_state = RSM_STATE_NEW; 2605 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 2606 seg->s_state = RSM_STATE_NEW_QUIESCED; 2607 } 2608 2609 rsmseglock_release(seg); 2610 2611 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n")); 2612 2613 return (RSM_SUCCESS); 2614 } 2615 2616 /* **************************** Exporter Access List Management ******* */ 2617 static void 2618 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len) 2619 { 2620 int acl_sz; 2621 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2622 2623 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n")); 2624 2625 /* acl could be NULL */ 2626 2627 if (acl != NULL && acl_len > 0) { 2628 acl_sz = acl_len * sizeof (rsmapi_access_entry_t); 2629 kmem_free((void *)acl, acl_sz); 2630 } 2631 2632 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n")); 2633 } 2634 2635 static void 2636 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len) 2637 { 2638 int acl_sz; 2639 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2640 2641 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n")); 2642 2643 if (acl != NULL && acl_len > 0) { 2644 acl_sz = acl_len * sizeof (rsm_access_entry_t); 2645 kmem_free((void *)acl, acl_sz); 2646 } 2647 2648 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n")); 2649 2650 } 2651 2652 static int 2653 rsmacl_build(rsm_ioctlmsg_t *msg, int mode, 2654 rsmapi_access_entry_t **list, int *len, int loopback) 2655 { 2656 rsmapi_access_entry_t *acl; 2657 int acl_len; 2658 int i; 2659 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2660 2661 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n")); 2662 2663 *len = 0; 2664 *list = NULL; 2665 2666 acl_len = msg->acl_len; 2667 if ((loopback && acl_len > 1) || (acl_len < 0) || 2668 (acl_len > MAX_NODES)) { 2669 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2670 "rsmacl_build done: acl invalid\n")); 2671 return (RSMERR_BAD_ACL); 2672 } 2673 2674 if (acl_len > 0 && acl_len <= MAX_NODES) { 2675 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t); 2676 2677 acl = kmem_alloc(acl_size, KM_SLEEP); 2678 2679 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl, 2680 acl_size, mode)) { 2681 kmem_free((void *) acl, acl_size); 2682 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2683 "rsmacl_build done: BAD_ADDR\n")); 2684 return (RSMERR_BAD_ADDR); 2685 } 2686 2687 /* 2688 * Verify access list 2689 */ 2690 for (i = 0; i < acl_len; i++) { 2691 if (acl[i].ae_node > MAX_NODES || 2692 (loopback && (acl[i].ae_node != my_nodeid)) || 2693 acl[i].ae_permission > RSM_ACCESS_TRUSTED) { 2694 /* invalid entry */ 2695 kmem_free((void *) acl, acl_size); 2696 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2697 "rsmacl_build done: EINVAL\n")); 2698 return (RSMERR_BAD_ACL); 2699 } 2700 } 2701 2702 *len = acl_len; 2703 *list = acl; 2704 } 2705 2706 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n")); 2707 2708 return (DDI_SUCCESS); 2709 } 2710 2711 static int 2712 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest, 2713 int acl_len, adapter_t *adapter) 2714 { 2715 rsm_access_entry_t *acl; 2716 rsm_addr_t hwaddr; 2717 int i; 2718 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2719 2720 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n")); 2721 2722 if (src != NULL) { 2723 size_t acl_size = acl_len * sizeof (rsm_access_entry_t); 2724 acl = kmem_alloc(acl_size, KM_SLEEP); 2725 2726 /* 2727 * translate access list 2728 */ 2729 for (i = 0; i < acl_len; i++) { 2730 if (src[i].ae_node == my_nodeid) { 2731 acl[i].ae_addr = adapter->hwaddr; 2732 } else { 2733 hwaddr = get_remote_hwaddr(adapter, 2734 src[i].ae_node); 2735 if ((int64_t)hwaddr < 0) { 2736 /* invalid hwaddr */ 2737 kmem_free((void *) acl, acl_size); 2738 DBG_PRINTF((category, 2739 RSM_DEBUG_VERBOSE, 2740 "rsmpiacl_create done:" 2741 "EINVAL hwaddr\n")); 2742 return (RSMERR_INTERNAL_ERROR); 2743 } 2744 acl[i].ae_addr = hwaddr; 2745 } 2746 /* rsmpi understands only RSM_PERM_XXXX */ 2747 acl[i].ae_permission = 2748 src[i].ae_permission & RSM_PERM_RDWR; 2749 } 2750 *dest = acl; 2751 } else { 2752 *dest = NULL; 2753 } 2754 2755 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n")); 2756 2757 return (RSM_SUCCESS); 2758 } 2759 2760 static int 2761 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode, 2762 rsmipc_reply_t *reply) 2763 { 2764 2765 int i; 2766 rsmseg_t *seg; 2767 rsm_memseg_id_t key = req->rsmipc_key; 2768 rsm_permission_t perm = req->rsmipc_perm; 2769 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2770 2771 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2772 "rsmsegacl_validate enter\n")); 2773 2774 /* 2775 * Find segment and grab its lock. The reason why we grab the segment 2776 * lock in side the search is to avoid the race when the segment is 2777 * being deleted and we already have a pointer to it. 2778 */ 2779 seg = rsmexport_lookup(key); 2780 if (!seg) { 2781 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2782 "rsmsegacl_validate done: %u ENXIO\n", key)); 2783 return (RSMERR_SEG_NOT_PUBLISHED); 2784 } 2785 2786 ASSERT(rsmseglock_held(seg)); 2787 ASSERT(seg->s_state == RSM_STATE_EXPORT); 2788 2789 /* 2790 * We implement a 2-level protection scheme. 2791 * First, we check if local/remote host has access rights. 2792 * Second, we check if the user has access rights. 2793 * 2794 * This routine only validates the rnode access_list 2795 */ 2796 if (seg->s_acl_len > 0) { 2797 /* 2798 * Check host access list 2799 */ 2800 ASSERT(seg->s_acl != NULL); 2801 for (i = 0; i < seg->s_acl_len; i++) { 2802 if (seg->s_acl[i].ae_node == rnode) { 2803 perm &= seg->s_acl[i].ae_permission; 2804 goto found; 2805 } 2806 } 2807 /* rnode is not found in the list */ 2808 rsmseglock_release(seg); 2809 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2810 "rsmsegacl_validate done: EPERM\n")); 2811 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 2812 } else { 2813 /* use default owner creation umask */ 2814 perm &= seg->s_mode; 2815 } 2816 2817 found: 2818 /* update perm for this node */ 2819 reply->rsmipc_mode = perm; 2820 reply->rsmipc_uid = seg->s_uid; 2821 reply->rsmipc_gid = seg->s_gid; 2822 reply->rsmipc_segid = seg->s_segid; 2823 reply->rsmipc_seglen = seg->s_len; 2824 2825 /* 2826 * Perm of requesting node is valid; source will validate user 2827 */ 2828 rsmseglock_release(seg); 2829 2830 /* 2831 * Add the importer to the list right away, if connect fails 2832 * the importer will ask the exporter to remove it. 2833 */ 2834 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr, 2835 req->rsmipc_segment_cookie); 2836 2837 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n")); 2838 2839 return (RSM_SUCCESS); 2840 } 2841 2842 2843 /* ************************** Exporter Calls ************************* */ 2844 2845 static int 2846 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2847 { 2848 int e; 2849 int acl_len; 2850 rsmapi_access_entry_t *acl; 2851 rsm_access_entry_t *rsmpi_acl; 2852 rsm_memory_local_t mem; 2853 struct buf *xbuf; 2854 dev_t sdev = 0; 2855 adapter_t *adapter; 2856 rsm_memseg_id_t segment_id = 0; 2857 int loopback_flag = 0; 2858 int create_flags = 0; 2859 rsm_resource_callback_t callback_flag; 2860 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2861 2862 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n")); 2863 2864 if (seg->s_adapter == &loopback_adapter) 2865 loopback_flag = 1; 2866 2867 if (seg->s_pid != ddi_get_pid() && 2868 ddi_get_pid() != 0) { 2869 DBG_PRINTF((category, RSM_ERR, 2870 "rsm_publish: Not creator\n")); 2871 return (RSMERR_NOT_CREATOR); 2872 } 2873 2874 /* 2875 * Get per node access list 2876 */ 2877 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag); 2878 if (e != DDI_SUCCESS) { 2879 DBG_PRINTF((category, RSM_ERR, 2880 "rsm_publish done: rsmacl_build failed\n")); 2881 return (e); 2882 } 2883 2884 /* 2885 * The application provided msg->key is used for resolving a 2886 * segment id according to the following: 2887 * key = 0 Kernel Agent selects the segment id 2888 * key <= RSM_DLPI_ID_END Reserved for system usage except 2889 * RSMLIB range 2890 * key < RSM_USER_APP_ID_BASE segment id = key 2891 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections 2892 * 2893 * rsm_nextavail_segmentid is initialized to 0x80000000 and 2894 * overflows to zero after 0x80000000 allocations. 2895 * An algorithm is needed which allows reinitialization and provides 2896 * for reallocation after overflow. For now, ENOMEM is returned 2897 * once the overflow condition has occurred. 2898 */ 2899 if (msg->key == 0) { 2900 mutex_enter(&rsm_lock); 2901 segment_id = rsm_nextavail_segmentid; 2902 if (segment_id != 0) { 2903 rsm_nextavail_segmentid++; 2904 mutex_exit(&rsm_lock); 2905 } else { 2906 mutex_exit(&rsm_lock); 2907 DBG_PRINTF((category, RSM_ERR, 2908 "rsm_publish done: no more keys avlbl\n")); 2909 return (RSMERR_INSUFFICIENT_RESOURCES); 2910 } 2911 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END) 2912 /* range reserved for internal use by base/ndi libraries */ 2913 segment_id = msg->key; 2914 else if (msg->key <= RSM_DLPI_ID_END) 2915 return (RSMERR_RESERVED_SEGID); 2916 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1) 2917 segment_id = msg->key; 2918 else { 2919 DBG_PRINTF((category, RSM_ERR, 2920 "rsm_publish done: invalid key %u\n", msg->key)); 2921 return (RSMERR_RESERVED_SEGID); 2922 } 2923 2924 /* Add key to exportlist; The segment lock is held on success */ 2925 e = rsmexport_add(seg, segment_id); 2926 if (e) { 2927 rsmacl_free(acl, acl_len); 2928 DBG_PRINTF((category, RSM_ERR, 2929 "rsm_publish done: export_add failed: %d\n", e)); 2930 return (e); 2931 } 2932 2933 seg->s_segid = segment_id; 2934 2935 if ((seg->s_state != RSM_STATE_BIND) && 2936 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2937 /* state changed since then, free acl and return */ 2938 rsmseglock_release(seg); 2939 rsmexport_rm(seg); 2940 rsmacl_free(acl, acl_len); 2941 DBG_PRINTF((category, RSM_ERR, 2942 "rsm_publish done: segment in wrong state: %d\n", 2943 seg->s_state)); 2944 return (RSMERR_BAD_SEG_HNDL); 2945 } 2946 2947 /* 2948 * If this is for a local memory handle and permissions are zero, 2949 * then the surrogate segment is very large and we want to skip 2950 * allocation of DVMA space. 2951 * 2952 * Careful! If the user didn't use an ACL list, acl will be a NULL 2953 * pointer. Check that before dereferencing it. 2954 */ 2955 if (acl != (rsmapi_access_entry_t *)NULL) { 2956 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 2957 goto skipdriver; 2958 } 2959 2960 /* create segment */ 2961 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE, 2962 sdev, 0, NULL, DDI_UMEM_SLEEP); 2963 ASSERT(xbuf != NULL); 2964 2965 mem.ms_type = RSM_MEM_BUF; 2966 mem.ms_bp = xbuf; 2967 2968 /* This call includes a bind operations */ 2969 2970 adapter = seg->s_adapter; 2971 /* 2972 * create a acl list with hwaddr for RSMPI publish 2973 */ 2974 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter); 2975 2976 if (e != RSM_SUCCESS) { 2977 rsmseglock_release(seg); 2978 rsmexport_rm(seg); 2979 rsmacl_free(acl, acl_len); 2980 freerbuf(xbuf); 2981 DBG_PRINTF((category, RSM_ERR, 2982 "rsm_publish done: rsmpiacl_create failed: %d\n", e)); 2983 return (e); 2984 } 2985 2986 if (seg->s_state == RSM_STATE_BIND) { 2987 /* create segment */ 2988 2989 /* This call includes a bind operations */ 2990 2991 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 2992 create_flags = RSM_ALLOW_UNBIND_REBIND; 2993 } 2994 2995 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 2996 callback_flag = RSM_RESOURCE_DONTWAIT; 2997 } else { 2998 callback_flag = RSM_RESOURCE_SLEEP; 2999 } 3000 3001 e = adapter->rsmpi_ops->rsm_seg_create( 3002 adapter->rsmpi_handle, 3003 &seg->s_handle.out, seg->s_len, 3004 create_flags, &mem, 3005 callback_flag, NULL); 3006 /* 3007 * At present there is no dependency on the existence of xbuf. 3008 * So we can free it here. If in the future this changes, it can 3009 * be freed sometime during the segment destroy. 3010 */ 3011 freerbuf(xbuf); 3012 3013 if (e != RSM_SUCCESS) { 3014 rsmseglock_release(seg); 3015 rsmexport_rm(seg); 3016 rsmacl_free(acl, acl_len); 3017 rsmpiacl_free(rsmpi_acl, acl_len); 3018 DBG_PRINTF((category, RSM_ERR, 3019 "rsm_publish done: export_create failed: %d\n", e)); 3020 /* 3021 * The following assertion ensures that the two errors 3022 * related to the length and its alignment do not occur 3023 * since they have been checked during export_create 3024 */ 3025 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT && 3026 e != RSMERR_BAD_LENGTH); 3027 if (e == RSMERR_NOT_MEM) 3028 e = RSMERR_INSUFFICIENT_MEM; 3029 3030 return (e); 3031 } 3032 /* export segment, this should create an IMMU mapping */ 3033 e = adapter->rsmpi_ops->rsm_publish( 3034 seg->s_handle.out, 3035 rsmpi_acl, acl_len, 3036 seg->s_segid, 3037 RSM_RESOURCE_DONTWAIT, NULL); 3038 3039 if (e != RSM_SUCCESS) { 3040 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3041 rsmseglock_release(seg); 3042 rsmexport_rm(seg); 3043 rsmacl_free(acl, acl_len); 3044 rsmpiacl_free(rsmpi_acl, acl_len); 3045 DBG_PRINTF((category, RSM_ERR, 3046 "rsm_publish done: export_publish failed: %d\n", 3047 e)); 3048 return (e); 3049 } 3050 } 3051 3052 seg->s_acl_in = rsmpi_acl; 3053 3054 skipdriver: 3055 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */ 3056 seg->s_acl_len = acl_len; 3057 seg->s_acl = acl; 3058 3059 if (seg->s_state == RSM_STATE_BIND) { 3060 seg->s_state = RSM_STATE_EXPORT; 3061 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 3062 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 3063 cv_broadcast(&seg->s_cv); 3064 } 3065 3066 rsmseglock_release(seg); 3067 3068 /* 3069 * If the segment id was solicited, then return it in 3070 * the original incoming message. 3071 */ 3072 if (msg->key == 0) { 3073 msg->key = segment_id; 3074 #ifdef _MULTI_DATAMODEL 3075 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 3076 rsm_ioctlmsg32_t msg32; 3077 3078 msg32.key = msg->key; 3079 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3080 "rsm_publish done\n")); 3081 return (ddi_copyout((caddr_t)&msg32, 3082 (caddr_t)dataptr, sizeof (msg32), mode)); 3083 } 3084 #endif 3085 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3086 "rsm_publish done\n")); 3087 return (ddi_copyout((caddr_t)msg, 3088 (caddr_t)dataptr, sizeof (*msg), mode)); 3089 } 3090 3091 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n")); 3092 return (DDI_SUCCESS); 3093 } 3094 3095 /* 3096 * This function modifies the access control list of an already published 3097 * segment. There is no effect on import segments which are already 3098 * connected. 3099 */ 3100 static int 3101 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode) 3102 { 3103 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl; 3104 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl; 3105 int new_acl_len, old_acl_len, tmp_acl_len; 3106 int e, i; 3107 adapter_t *adapter; 3108 int loopback_flag = 0; 3109 rsm_memseg_id_t key; 3110 rsm_permission_t permission; 3111 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3112 3113 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n")); 3114 3115 if ((seg->s_state != RSM_STATE_EXPORT) && 3116 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) && 3117 (seg->s_state != RSM_STATE_EXPORT_QUIESCING)) 3118 return (RSMERR_SEG_NOT_PUBLISHED); 3119 3120 if (seg->s_pid != ddi_get_pid() && 3121 ddi_get_pid() != 0) { 3122 DBG_PRINTF((category, RSM_ERR, 3123 "rsm_republish: Not owner\n")); 3124 return (RSMERR_NOT_CREATOR); 3125 } 3126 3127 if (seg->s_adapter == &loopback_adapter) 3128 loopback_flag = 1; 3129 3130 /* 3131 * Build new list first 3132 */ 3133 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag); 3134 if (e) { 3135 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3136 "rsm_republish done: rsmacl_build failed %d", e)); 3137 return (e); 3138 } 3139 3140 /* Lock segment */ 3141 rsmseglock_acquire(seg); 3142 /* 3143 * a republish is in progress - REPUBLISH message is being 3144 * sent to the importers so wait for it to complete OR 3145 * wait till DR completes 3146 */ 3147 while (((seg->s_state == RSM_STATE_EXPORT) && 3148 (seg->s_flags & RSM_REPUBLISH_WAIT)) || 3149 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) || 3150 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) { 3151 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3152 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3153 "rsm_republish done: cv_wait INTERRUPTED")); 3154 rsmseglock_release(seg); 3155 rsmacl_free(new_acl, new_acl_len); 3156 return (RSMERR_INTERRUPTED); 3157 } 3158 } 3159 3160 /* recheck if state is valid */ 3161 if (seg->s_state != RSM_STATE_EXPORT) { 3162 rsmseglock_release(seg); 3163 rsmacl_free(new_acl, new_acl_len); 3164 return (RSMERR_SEG_NOT_PUBLISHED); 3165 } 3166 3167 key = seg->s_key; 3168 old_acl = seg->s_acl; 3169 old_acl_len = seg->s_acl_len; 3170 3171 seg->s_acl = new_acl; 3172 seg->s_acl_len = new_acl_len; 3173 3174 /* 3175 * This call will only be meaningful if and when the interconnect 3176 * layer makes use of the access list 3177 */ 3178 adapter = seg->s_adapter; 3179 /* 3180 * create a acl list with hwaddr for RSMPI publish 3181 */ 3182 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter); 3183 3184 if (e != RSM_SUCCESS) { 3185 seg->s_acl = old_acl; 3186 seg->s_acl_len = old_acl_len; 3187 rsmseglock_release(seg); 3188 rsmacl_free(new_acl, new_acl_len); 3189 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3190 "rsm_republish done: rsmpiacl_create failed %d", e)); 3191 return (e); 3192 } 3193 rsmpi_old_acl = seg->s_acl_in; 3194 seg->s_acl_in = rsmpi_new_acl; 3195 3196 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out, 3197 seg->s_acl_in, seg->s_acl_len, 3198 RSM_RESOURCE_DONTWAIT, NULL); 3199 3200 if (e != RSM_SUCCESS) { 3201 seg->s_acl = old_acl; 3202 seg->s_acl_in = rsmpi_old_acl; 3203 seg->s_acl_len = old_acl_len; 3204 rsmseglock_release(seg); 3205 rsmacl_free(new_acl, new_acl_len); 3206 rsmpiacl_free(rsmpi_new_acl, new_acl_len); 3207 3208 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3209 "rsm_republish done: rsmpi republish failed %d\n", e)); 3210 return (e); 3211 } 3212 3213 /* create a tmp copy of the new acl */ 3214 tmp_acl_len = new_acl_len; 3215 if (tmp_acl_len > 0) { 3216 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP); 3217 for (i = 0; i < tmp_acl_len; i++) { 3218 tmp_acl[i].ae_node = new_acl[i].ae_node; 3219 tmp_acl[i].ae_permission = new_acl[i].ae_permission; 3220 } 3221 /* 3222 * The default permission of a node which was in the old 3223 * ACL but not in the new ACL is 0 ie no access. 3224 */ 3225 permission = 0; 3226 } else { 3227 /* 3228 * NULL acl means all importers can connect and 3229 * default permission will be owner creation umask 3230 */ 3231 tmp_acl = NULL; 3232 permission = seg->s_mode; 3233 } 3234 3235 /* make other republishers to wait for republish to complete */ 3236 seg->s_flags |= RSM_REPUBLISH_WAIT; 3237 3238 rsmseglock_release(seg); 3239 3240 /* send the new perms to the importing nodes */ 3241 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission); 3242 3243 rsmseglock_acquire(seg); 3244 seg->s_flags &= ~RSM_REPUBLISH_WAIT; 3245 /* wake up any one waiting for republish to complete */ 3246 cv_broadcast(&seg->s_cv); 3247 rsmseglock_release(seg); 3248 3249 rsmacl_free(tmp_acl, tmp_acl_len); 3250 rsmacl_free(old_acl, old_acl_len); 3251 rsmpiacl_free(rsmpi_old_acl, old_acl_len); 3252 3253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n")); 3254 return (DDI_SUCCESS); 3255 } 3256 3257 static int 3258 rsm_unpublish(rsmseg_t *seg, int mode) 3259 { 3260 rsmapi_access_entry_t *acl; 3261 rsm_access_entry_t *rsmpi_acl; 3262 int acl_len; 3263 int e; 3264 adapter_t *adapter; 3265 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3266 3267 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n")); 3268 3269 if (seg->s_pid != ddi_get_pid() && 3270 ddi_get_pid() != 0) { 3271 DBG_PRINTF((category, RSM_ERR, 3272 "rsm_unpublish: Not creator\n")); 3273 return (RSMERR_NOT_CREATOR); 3274 } 3275 3276 rsmseglock_acquire(seg); 3277 /* 3278 * wait for QUIESCING to complete here before rsmexport_rm 3279 * is called because the SUSPEND_COMPLETE mesg which changes 3280 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and 3281 * signals the cv_wait needs to find it in the hashtable. 3282 */ 3283 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 3284 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) { 3285 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3286 rsmseglock_release(seg); 3287 DBG_PRINTF((category, RSM_ERR, 3288 "rsm_unpublish done: cv_wait INTR qscing" 3289 "getv/putv in progress")); 3290 return (RSMERR_INTERRUPTED); 3291 } 3292 } 3293 3294 /* verify segment state */ 3295 if ((seg->s_state != RSM_STATE_EXPORT) && 3296 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3297 rsmseglock_release(seg); 3298 DBG_PRINTF((category, RSM_ERR, 3299 "rsm_unpublish done: bad state %x\n", seg->s_state)); 3300 return (RSMERR_SEG_NOT_PUBLISHED); 3301 } 3302 3303 rsmseglock_release(seg); 3304 3305 rsmexport_rm(seg); 3306 3307 rsm_send_importer_disconnects(seg->s_segid, my_nodeid); 3308 3309 rsmseglock_acquire(seg); 3310 /* 3311 * wait for republish to complete 3312 */ 3313 while ((seg->s_state == RSM_STATE_EXPORT) && 3314 (seg->s_flags & RSM_REPUBLISH_WAIT)) { 3315 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3316 DBG_PRINTF((category, RSM_ERR, 3317 "rsm_unpublish done: cv_wait INTR repubing")); 3318 rsmseglock_release(seg); 3319 return (RSMERR_INTERRUPTED); 3320 } 3321 } 3322 3323 if ((seg->s_state != RSM_STATE_EXPORT) && 3324 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3325 DBG_PRINTF((category, RSM_ERR, 3326 "rsm_unpublish done: invalid state")); 3327 rsmseglock_release(seg); 3328 return (RSMERR_SEG_NOT_PUBLISHED); 3329 } 3330 3331 /* 3332 * check for putv/get surrogate segment which was not published 3333 * to the driver. 3334 * 3335 * Be certain to see if there is an ACL first! If this segment was 3336 * not published with an ACL, acl will be a null pointer. Check 3337 * that before dereferencing it. 3338 */ 3339 acl = seg->s_acl; 3340 if (acl != (rsmapi_access_entry_t *)NULL) { 3341 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 3342 goto bypass; 3343 } 3344 3345 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */ 3346 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) 3347 goto bypass; 3348 3349 adapter = seg->s_adapter; 3350 for (;;) { 3351 if (seg->s_state != RSM_STATE_EXPORT) { 3352 rsmseglock_release(seg); 3353 DBG_PRINTF((category, RSM_ERR, 3354 "rsm_unpublish done: bad state %x\n", 3355 seg->s_state)); 3356 return (RSMERR_SEG_NOT_PUBLISHED); 3357 } 3358 3359 /* unpublish from adapter */ 3360 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out); 3361 3362 if (e == RSM_SUCCESS) { 3363 break; 3364 } 3365 3366 if (e == RSMERR_SEG_IN_USE && mode == 1) { 3367 /* 3368 * wait for unpublish to succeed, it's busy. 3369 */ 3370 seg->s_flags |= RSM_EXPORT_WAIT; 3371 3372 /* wait for a max of 1 ms - this is an empirical */ 3373 /* value that was found by some minimal testing */ 3374 /* can be fine tuned when we have better numbers */ 3375 /* A long term fix would be to send cv_signal */ 3376 /* from the intr callback routine */ 3377 /* currently nobody signals this wait */ 3378 (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock, 3379 drv_usectohz(1000), TR_CLOCK_TICK); 3380 3381 DBG_PRINTF((category, RSM_ERR, 3382 "rsm_unpublish: SEG_IN_USE\n")); 3383 3384 seg->s_flags &= ~RSM_EXPORT_WAIT; 3385 } else { 3386 if (mode == 1) { 3387 DBG_PRINTF((category, RSM_ERR, 3388 "rsm:rsmpi unpublish err %x\n", e)); 3389 seg->s_state = RSM_STATE_BIND; 3390 } 3391 rsmseglock_release(seg); 3392 return (e); 3393 } 3394 } 3395 3396 /* Free segment */ 3397 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3398 3399 if (e != RSM_SUCCESS) { 3400 DBG_PRINTF((category, RSM_ERR, 3401 "rsm_unpublish: rsmpi destroy key=%x failed %x\n", 3402 seg->s_key, e)); 3403 } 3404 3405 bypass: 3406 acl = seg->s_acl; 3407 rsmpi_acl = seg->s_acl_in; 3408 acl_len = seg->s_acl_len; 3409 3410 seg->s_acl = NULL; 3411 seg->s_acl_in = NULL; 3412 seg->s_acl_len = 0; 3413 3414 if (seg->s_state == RSM_STATE_EXPORT) { 3415 seg->s_state = RSM_STATE_BIND; 3416 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) { 3417 seg->s_state = RSM_STATE_BIND_QUIESCED; 3418 cv_broadcast(&seg->s_cv); 3419 } 3420 3421 rsmseglock_release(seg); 3422 3423 rsmacl_free(acl, acl_len); 3424 rsmpiacl_free(rsmpi_acl, acl_len); 3425 3426 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n")); 3427 3428 return (DDI_SUCCESS); 3429 } 3430 3431 /* 3432 * Called from rsm_unpublish to force an unload and disconnection of all 3433 * importers of the unpublished segment. 3434 * 3435 * First build the list of segments requiring a force disconnect, then 3436 * send a request for each. 3437 */ 3438 static void 3439 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid, 3440 rsm_node_id_t ex_nodeid) 3441 { 3442 rsmipc_request_t request; 3443 importing_token_t *prev_token, *token, *tmp_token, *tokp; 3444 importing_token_t *force_disconnect_list = NULL; 3445 int index; 3446 3447 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3448 "rsm_send_importer_disconnects enter\n")); 3449 3450 index = rsmhash(ex_segid); 3451 3452 mutex_enter(&importer_list.lock); 3453 3454 prev_token = NULL; 3455 token = importer_list.bucket[index]; 3456 3457 while (token != NULL) { 3458 if (token->key == ex_segid) { 3459 /* 3460 * take it off the importer list and add it 3461 * to the force disconnect list. 3462 */ 3463 if (prev_token == NULL) 3464 importer_list.bucket[index] = token->next; 3465 else 3466 prev_token->next = token->next; 3467 tmp_token = token; 3468 token = token->next; 3469 if (force_disconnect_list == NULL) { 3470 force_disconnect_list = tmp_token; 3471 tmp_token->next = NULL; 3472 } else { 3473 tokp = force_disconnect_list; 3474 /* 3475 * make sure that the tmp_token's node 3476 * is not already on the force disconnect 3477 * list. 3478 */ 3479 while (tokp != NULL) { 3480 if (tokp->importing_node == 3481 tmp_token->importing_node) { 3482 break; 3483 } 3484 tokp = tokp->next; 3485 } 3486 if (tokp == NULL) { 3487 tmp_token->next = 3488 force_disconnect_list; 3489 force_disconnect_list = tmp_token; 3490 } else { 3491 kmem_free((void *)tmp_token, 3492 sizeof (*token)); 3493 } 3494 } 3495 3496 } else { 3497 prev_token = token; 3498 token = token->next; 3499 } 3500 } 3501 mutex_exit(&importer_list.lock); 3502 3503 token = force_disconnect_list; 3504 while (token != NULL) { 3505 if (token->importing_node == my_nodeid) { 3506 rsm_force_unload(ex_nodeid, ex_segid, 3507 DISCONNECT); 3508 } else { 3509 request.rsmipc_hdr.rsmipc_type = 3510 RSMIPC_MSG_DISCONNECT; 3511 request.rsmipc_key = token->key; 3512 for (;;) { 3513 if (rsmipc_send(token->importing_node, 3514 &request, 3515 RSM_NO_REPLY) == RSM_SUCCESS) { 3516 break; 3517 } else { 3518 delay(drv_usectohz(10000)); 3519 } 3520 } 3521 } 3522 tmp_token = token; 3523 token = token->next; 3524 kmem_free((void *)tmp_token, sizeof (*token)); 3525 } 3526 3527 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3528 "rsm_send_importer_disconnects done\n")); 3529 } 3530 3531 /* 3532 * This function is used as a callback for unlocking the pages locked 3533 * down by a process which then does a fork or an exec. 3534 * It marks the export segments corresponding to umem cookie given by 3535 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be 3536 * destroyed later when an rsm_close occurs). 3537 */ 3538 static void 3539 rsm_export_force_destroy(ddi_umem_cookie_t *ck) 3540 { 3541 rsmresource_blk_t *blk; 3542 rsmresource_t *p; 3543 rsmseg_t *eseg = NULL; 3544 int i, j; 3545 int found = 0; 3546 3547 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3548 "rsm_export_force_destroy enter\n")); 3549 3550 /* 3551 * Walk the resource list and locate the export segment (either 3552 * in the BIND or the EXPORT state) which corresponds to the 3553 * ddi_umem_cookie_t being freed up, and call rsmseg_close. 3554 * Change the state to ZOMBIE by calling rsmseg_close with the 3555 * force_flag argument (the second argument) set to 1. Also, 3556 * unpublish and unbind the segment, but don't free it. Free it 3557 * only on a rsm_close call for the segment. 3558 */ 3559 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 3560 3561 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 3562 blk = rsm_resource.rsmrc_root[i]; 3563 if (blk == NULL) { 3564 continue; 3565 } 3566 3567 for (j = 0; j < RSMRC_BLKSZ; j++) { 3568 p = blk->rsmrcblk_blks[j]; 3569 if ((p != NULL) && (p != RSMRC_RESERVED) && 3570 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) { 3571 eseg = (rsmseg_t *)p; 3572 if (eseg->s_cookie != ck) 3573 continue; /* continue searching */ 3574 /* 3575 * Found the segment, set flag to indicate 3576 * force destroy processing is in progress 3577 */ 3578 rsmseglock_acquire(eseg); 3579 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT; 3580 rsmseglock_release(eseg); 3581 found = 1; 3582 break; 3583 } 3584 } 3585 3586 if (found) 3587 break; 3588 } 3589 3590 rw_exit(&rsm_resource.rsmrc_lock); 3591 3592 if (found) { 3593 ASSERT(eseg != NULL); 3594 /* call rsmseg_close with force flag set to 1 */ 3595 rsmseg_close(eseg, 1); 3596 /* 3597 * force destroy processing done, clear flag and signal any 3598 * thread waiting in rsmseg_close. 3599 */ 3600 rsmseglock_acquire(eseg); 3601 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT; 3602 cv_broadcast(&eseg->s_cv); 3603 rsmseglock_release(eseg); 3604 } 3605 3606 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3607 "rsm_export_force_destroy done\n")); 3608 } 3609 3610 /* ******************************* Remote Calls *********************** */ 3611 static void 3612 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req) 3613 { 3614 rsmipc_reply_t reply; 3615 DBG_DEFINE(category, 3616 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3617 3618 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3619 "rsm_intr_segconnect enter\n")); 3620 3621 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply); 3622 3623 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 3624 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie; 3625 3626 (void) rsmipc_send(src, NULL, &reply); 3627 3628 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3629 "rsm_intr_segconnect done\n")); 3630 } 3631 3632 3633 /* 3634 * When an exported segment is unpublished the exporter sends an ipc 3635 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher 3636 * calls this function. The import list is scanned; segments which match the 3637 * exported segment id are unloaded and disconnected. 3638 * 3639 * Will also be called from rsm_rebind with disconnect_flag FALSE. 3640 * 3641 */ 3642 static void 3643 rsm_force_unload(rsm_node_id_t src_nodeid, 3644 rsm_memseg_id_t ex_segid, 3645 boolean_t disconnect_flag) 3646 3647 { 3648 rsmresource_t *p = NULL; 3649 rsmhash_table_t *rhash = &rsm_import_segs; 3650 uint_t index; 3651 DBG_DEFINE(category, 3652 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3653 3654 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n")); 3655 3656 index = rsmhash(ex_segid); 3657 3658 rw_enter(&rhash->rsmhash_rw, RW_READER); 3659 3660 p = rsmhash_getbkt(rhash, index); 3661 3662 for (; p; p = p->rsmrc_next) { 3663 rsmseg_t *seg = (rsmseg_t *)p; 3664 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) { 3665 /* 3666 * In order to make rsmseg_unload and rsm_force_unload 3667 * thread safe, acquire the segment lock here. 3668 * rsmseg_unload is responsible for releasing the lock. 3669 * rsmseg_unload releases the lock just before a call 3670 * to rsmipc_send or in case of an early exit which 3671 * occurs if the segment was in the state 3672 * RSM_STATE_CONNECTING or RSM_STATE_NEW. 3673 */ 3674 rsmseglock_acquire(seg); 3675 if (disconnect_flag) 3676 seg->s_flags |= RSM_FORCE_DISCONNECT; 3677 rsmseg_unload(seg); 3678 } 3679 } 3680 rw_exit(&rhash->rsmhash_rw); 3681 3682 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n")); 3683 } 3684 3685 static void 3686 rsm_intr_reply(rsmipc_msghdr_t *msg) 3687 { 3688 /* 3689 * Find slot for cookie in reply. 3690 * Match sequence with sequence in cookie 3691 * If no match; return 3692 * Try to grap lock of slot, if locked return 3693 * copy data into reply slot area 3694 * signal waiter 3695 */ 3696 rsmipc_slot_t *slot; 3697 rsmipc_cookie_t *cookie; 3698 void *data = (void *) msg; 3699 size_t size = sizeof (rsmipc_reply_t); 3700 DBG_DEFINE(category, 3701 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3702 3703 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n")); 3704 3705 cookie = &msg->rsmipc_cookie; 3706 if (cookie->ic.index >= RSMIPC_SZ) { 3707 DBG_PRINTF((category, RSM_ERR, 3708 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index)); 3709 return; 3710 } 3711 3712 ASSERT(cookie->ic.index < RSMIPC_SZ); 3713 slot = &rsm_ipc.slots[cookie->ic.index]; 3714 mutex_enter(&slot->rsmipc_lock); 3715 if (slot->rsmipc_cookie.value == cookie->value) { 3716 /* found a match */ 3717 if (RSMIPC_GET(slot, RSMIPC_PENDING)) { 3718 bcopy(data, slot->rsmipc_data, size); 3719 RSMIPC_CLEAR(slot, RSMIPC_PENDING); 3720 cv_signal(&slot->rsmipc_cv); 3721 } 3722 } else { 3723 DBG_PRINTF((category, RSM_DEBUG, 3724 "rsm: rsm_intr_reply mismatched reply %d\n", 3725 cookie->ic.index)); 3726 } 3727 mutex_exit(&slot->rsmipc_lock); 3728 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n")); 3729 } 3730 3731 /* 3732 * This function gets dispatched on the worker thread when we receive 3733 * the SQREADY message. This function sends the SQREADY_ACK message. 3734 */ 3735 static void 3736 rsm_sqready_ack_deferred(void *arg) 3737 { 3738 path_t *path = (path_t *)arg; 3739 DBG_DEFINE(category, 3740 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3741 3742 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3743 "rsm_sqready_ack_deferred enter\n")); 3744 3745 mutex_enter(&path->mutex); 3746 3747 /* 3748 * If path is not active no point in sending the ACK 3749 * because the whole SQREADY protocol will again start 3750 * when the path becomes active. 3751 */ 3752 if (path->state != RSMKA_PATH_ACTIVE) { 3753 /* 3754 * decrement the path refcnt incremented in rsm_proc_sqready 3755 */ 3756 PATH_RELE_NOLOCK(path); 3757 mutex_exit(&path->mutex); 3758 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3759 "rsm_sqready_ack_deferred done:!ACTIVE\n")); 3760 return; 3761 } 3762 3763 /* send an SQREADY_ACK message */ 3764 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK); 3765 3766 /* initialize credits to the max level */ 3767 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3768 3769 /* wake up any send that is waiting for credits */ 3770 cv_broadcast(&path->sendq_token.sendq_cv); 3771 3772 /* 3773 * decrement the path refcnt since we incremented it in 3774 * rsm_proc_sqready 3775 */ 3776 PATH_RELE_NOLOCK(path); 3777 3778 mutex_exit(&path->mutex); 3779 3780 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3781 "rsm_sqready_ack_deferred done\n")); 3782 } 3783 3784 /* 3785 * Process the SQREADY message 3786 */ 3787 static void 3788 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3789 rsm_intr_hand_arg_t arg) 3790 { 3791 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3792 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3793 path_t *path; 3794 DBG_DEFINE(category, 3795 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3796 3797 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n")); 3798 3799 /* look up the path - incr the path refcnt */ 3800 path = rsm_find_path(hdlr_argp->adapter_name, 3801 hdlr_argp->adapter_instance, src_hwaddr); 3802 3803 /* 3804 * No path exists or path is not active - drop the message 3805 */ 3806 if (path == NULL) { 3807 DBG_PRINTF((category, RSM_DEBUG, 3808 "rsm_proc_sqready done: msg dropped no path\n")); 3809 return; 3810 } 3811 3812 mutex_exit(&path->mutex); 3813 3814 /* drain any tasks from the previous incarnation */ 3815 taskq_wait(path->recv_taskq); 3816 3817 mutex_enter(&path->mutex); 3818 /* 3819 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK 3820 * in the meanwhile we received an SQREADY message, blindly reset 3821 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK 3822 * and forget about the SQREADY that we sent. 3823 */ 3824 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3825 3826 if (path->state != RSMKA_PATH_ACTIVE) { 3827 /* decr refcnt and drop the mutex */ 3828 PATH_RELE_NOLOCK(path); 3829 mutex_exit(&path->mutex); 3830 DBG_PRINTF((category, RSM_DEBUG, 3831 "rsm_proc_sqready done: msg dropped path !ACTIVE\n")); 3832 return; 3833 } 3834 3835 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx " 3836 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3837 3838 /* 3839 * The sender's local incarnation number is our remote incarnation 3840 * number save it in the path data structure 3841 */ 3842 path->remote_incn = msg->rsmipc_local_incn; 3843 path->sendq_token.msgbuf_avail = 0; 3844 path->procmsg_cnt = 0; 3845 3846 /* 3847 * path is active - dispatch task to send SQREADY_ACK - remember 3848 * RSMPI calls can't be done in interrupt context 3849 * 3850 * We can use the recv_taskq to send because the remote endpoint 3851 * cannot start sending messages till it receives SQREADY_ACK hence 3852 * at this point there are no tasks on recv_taskq. 3853 * 3854 * The path refcnt will be decremented in rsm_sqready_ack_deferred. 3855 */ 3856 (void) taskq_dispatch(path->recv_taskq, 3857 rsm_sqready_ack_deferred, path, KM_NOSLEEP); 3858 3859 mutex_exit(&path->mutex); 3860 3861 3862 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n")); 3863 } 3864 3865 /* 3866 * Process the SQREADY_ACK message 3867 */ 3868 static void 3869 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3870 rsm_intr_hand_arg_t arg) 3871 { 3872 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3873 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3874 path_t *path; 3875 DBG_DEFINE(category, 3876 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3877 3878 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3879 "rsm_proc_sqready_ack enter\n")); 3880 3881 /* look up the path - incr the path refcnt */ 3882 path = rsm_find_path(hdlr_argp->adapter_name, 3883 hdlr_argp->adapter_instance, src_hwaddr); 3884 3885 /* 3886 * drop the message if - no path exists or path is not active 3887 * or if its not waiting for SQREADY_ACK message 3888 */ 3889 if (path == NULL) { 3890 DBG_PRINTF((category, RSM_DEBUG, 3891 "rsm_proc_sqready_ack done: msg dropped no path\n")); 3892 return; 3893 } 3894 3895 if ((path->state != RSMKA_PATH_ACTIVE) || 3896 !(path->flags & RSMKA_WAIT_FOR_SQACK)) { 3897 /* decrement the refcnt */ 3898 PATH_RELE_NOLOCK(path); 3899 mutex_exit(&path->mutex); 3900 DBG_PRINTF((category, RSM_DEBUG, 3901 "rsm_proc_sqready_ack done: msg dropped\n")); 3902 return; 3903 } 3904 3905 /* 3906 * Check if this message is in response to the last RSMIPC_MSG_SQREADY 3907 * sent, if not drop it. 3908 */ 3909 if (path->local_incn != msghdr->rsmipc_incn) { 3910 /* decrement the refcnt */ 3911 PATH_RELE_NOLOCK(path); 3912 mutex_exit(&path->mutex); 3913 DBG_PRINTF((category, RSM_DEBUG, 3914 "rsm_proc_sqready_ack done: msg old incn %lld\n", 3915 msghdr->rsmipc_incn)); 3916 return; 3917 } 3918 3919 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx " 3920 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3921 3922 /* 3923 * clear the WAIT_FOR_SQACK flag since we have recvd the ack 3924 */ 3925 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3926 3927 /* save the remote sendq incn number */ 3928 path->remote_incn = msg->rsmipc_local_incn; 3929 3930 /* initialize credits to the max level */ 3931 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3932 3933 /* wake up any send that is waiting for credits */ 3934 cv_broadcast(&path->sendq_token.sendq_cv); 3935 3936 /* decrement the refcnt */ 3937 PATH_RELE_NOLOCK(path); 3938 3939 mutex_exit(&path->mutex); 3940 3941 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3942 "rsm_proc_sqready_ack done\n")); 3943 } 3944 3945 /* 3946 * process the RSMIPC_MSG_CREDIT message 3947 */ 3948 static void 3949 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3950 rsm_intr_hand_arg_t arg) 3951 { 3952 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3953 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3954 path_t *path; 3955 DBG_DEFINE(category, 3956 RSM_KERNEL_AGENT | RSM_FUNC_ALL | 3957 RSM_INTR_CALLBACK | RSM_FLOWCONTROL); 3958 3959 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n")); 3960 3961 /* look up the path - incr the path refcnt */ 3962 path = rsm_find_path(hdlr_argp->adapter_name, 3963 hdlr_argp->adapter_instance, src_hwaddr); 3964 3965 if (path == NULL) { 3966 DBG_PRINTF((category, RSM_DEBUG, 3967 "rsm_add_credits enter: path not found\n")); 3968 return; 3969 } 3970 3971 /* the path is not active - discard credits */ 3972 if (path->state != RSMKA_PATH_ACTIVE) { 3973 PATH_RELE_NOLOCK(path); 3974 mutex_exit(&path->mutex); 3975 DBG_PRINTF((category, RSM_DEBUG, 3976 "rsm_add_credits enter:path=%lx !ACTIVE\n", path)); 3977 return; 3978 } 3979 3980 /* 3981 * Check if these credits are for current incarnation of the path. 3982 */ 3983 if (path->local_incn != msghdr->rsmipc_incn) { 3984 /* decrement the refcnt */ 3985 PATH_RELE_NOLOCK(path); 3986 mutex_exit(&path->mutex); 3987 DBG_PRINTF((category, RSM_DEBUG, 3988 "rsm_add_credits enter: old incn %lld\n", 3989 msghdr->rsmipc_incn)); 3990 return; 3991 } 3992 3993 DBG_PRINTF((category, RSM_DEBUG, 3994 "rsm_add_credits:path=%lx new-creds=%d " 3995 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits, 3996 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src, 3997 src_hwaddr)); 3998 3999 4000 /* add credits to the path's sendq */ 4001 path->sendq_token.msgbuf_avail += msg->rsmipc_credits; 4002 4003 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES); 4004 4005 /* wake up any send that is waiting for credits */ 4006 cv_broadcast(&path->sendq_token.sendq_cv); 4007 4008 /* decrement the refcnt */ 4009 PATH_RELE_NOLOCK(path); 4010 4011 mutex_exit(&path->mutex); 4012 4013 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n")); 4014 } 4015 4016 static void 4017 rsm_intr_event(rsmipc_request_t *msg) 4018 { 4019 rsmseg_t *seg; 4020 rsmresource_t *p; 4021 rsm_node_id_t src_node; 4022 DBG_DEFINE(category, 4023 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4024 4025 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n")); 4026 4027 src_node = msg->rsmipc_hdr.rsmipc_src; 4028 4029 if ((seg = msg->rsmipc_segment_cookie) != NULL) { 4030 /* This is for an import segment */ 4031 uint_t hashval = rsmhash(msg->rsmipc_key); 4032 4033 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4034 4035 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4036 4037 for (; p; p = p->rsmrc_next) { 4038 if ((p->rsmrc_key == msg->rsmipc_key) && 4039 (p->rsmrc_node == src_node)) { 4040 seg = (rsmseg_t *)p; 4041 rsmseglock_acquire(seg); 4042 4043 atomic_inc_32(&seg->s_pollevent); 4044 4045 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4046 pollwakeup(&seg->s_poll, POLLRDNORM); 4047 4048 rsmseglock_release(seg); 4049 } 4050 } 4051 4052 rw_exit(&rsm_import_segs.rsmhash_rw); 4053 } else { 4054 /* This is for an export segment */ 4055 seg = rsmexport_lookup(msg->rsmipc_key); 4056 if (!seg) { 4057 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4058 "rsm_intr_event done: exp seg not found\n")); 4059 return; 4060 } 4061 4062 ASSERT(rsmseglock_held(seg)); 4063 4064 atomic_inc_32(&seg->s_pollevent); 4065 4066 /* 4067 * We must hold the segment lock here, or else the segment 4068 * can be freed while pollwakeup is using it. This implies 4069 * that we MUST NOT grab the segment lock during rsm_chpoll, 4070 * as outlined in the chpoll(2) man page. 4071 */ 4072 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4073 pollwakeup(&seg->s_poll, POLLRDNORM); 4074 4075 rsmseglock_release(seg); 4076 } 4077 4078 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n")); 4079 } 4080 4081 /* 4082 * The exporter did a republish and changed the ACL - this change is only 4083 * visible to new importers. 4084 */ 4085 static void 4086 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key, 4087 rsm_permission_t perm) 4088 { 4089 4090 rsmresource_t *p; 4091 rsmseg_t *seg; 4092 uint_t hashval = rsmhash(key); 4093 DBG_DEFINE(category, 4094 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4095 4096 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n")); 4097 4098 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4099 4100 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4101 4102 for (; p; p = p->rsmrc_next) { 4103 /* 4104 * find the importer and update the permission in the shared 4105 * data structure. Any new importers will use the new perms 4106 */ 4107 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) { 4108 seg = (rsmseg_t *)p; 4109 4110 rsmseglock_acquire(seg); 4111 rsmsharelock_acquire(seg); 4112 seg->s_share->rsmsi_mode = perm; 4113 rsmsharelock_release(seg); 4114 rsmseglock_release(seg); 4115 4116 break; 4117 } 4118 } 4119 4120 rw_exit(&rsm_import_segs.rsmhash_rw); 4121 4122 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n")); 4123 } 4124 4125 void 4126 rsm_suspend_complete(rsm_node_id_t src_node, int flag) 4127 { 4128 int done = 1; /* indicate all SUSPENDS have been acked */ 4129 list_element_t *elem; 4130 DBG_DEFINE(category, 4131 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4132 4133 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4134 "rsm_suspend_complete enter\n")); 4135 4136 mutex_enter(&rsm_suspend_list.list_lock); 4137 4138 if (rsm_suspend_list.list_head == NULL) { 4139 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4140 "rsm_suspend_complete done: suspend_list is empty\n")); 4141 mutex_exit(&rsm_suspend_list.list_lock); 4142 return; 4143 } 4144 4145 elem = rsm_suspend_list.list_head; 4146 while (elem != NULL) { 4147 if (elem->nodeid == src_node) { 4148 /* clear the pending flag for the node */ 4149 elem->flags &= ~RSM_SUSPEND_ACKPENDING; 4150 elem->flags |= flag; 4151 } 4152 4153 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING)) 4154 done = 0; /* still some nodes have not yet ACKED */ 4155 4156 elem = elem->next; 4157 } 4158 4159 mutex_exit(&rsm_suspend_list.list_lock); 4160 4161 if (!done) { 4162 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4163 "rsm_suspend_complete done: acks pending\n")); 4164 return; 4165 } 4166 /* 4167 * Now that we are done with suspending all the remote importers 4168 * time to quiesce the local exporters 4169 */ 4170 exporter_quiesce(); 4171 4172 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4173 "rsm_suspend_complete done\n")); 4174 } 4175 4176 static void 4177 exporter_quiesce() 4178 { 4179 int i, e; 4180 rsmresource_t *current; 4181 rsmseg_t *seg; 4182 adapter_t *adapter; 4183 DBG_DEFINE(category, 4184 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4185 4186 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n")); 4187 /* 4188 * The importers send a SUSPEND_COMPLETE to the exporter node 4189 * Unpublish, unbind the export segment and 4190 * move the segments to the EXPORT_QUIESCED state 4191 */ 4192 4193 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER); 4194 4195 for (i = 0; i < rsm_hash_size; i++) { 4196 current = rsm_export_segs.bucket[i]; 4197 while (current != NULL) { 4198 seg = (rsmseg_t *)current; 4199 rsmseglock_acquire(seg); 4200 if (current->rsmrc_state == 4201 RSM_STATE_EXPORT_QUIESCING) { 4202 adapter = seg->s_adapter; 4203 /* 4204 * some local memory handles are not published 4205 * check if it was published 4206 */ 4207 if ((seg->s_acl == NULL) || 4208 (seg->s_acl[0].ae_node != my_nodeid) || 4209 (seg->s_acl[0].ae_permission != 0)) { 4210 4211 e = adapter->rsmpi_ops->rsm_unpublish( 4212 seg->s_handle.out); 4213 DBG_PRINTF((category, RSM_DEBUG, 4214 "exporter_quiesce:unpub %d\n", e)); 4215 4216 e = adapter->rsmpi_ops->rsm_seg_destroy( 4217 seg->s_handle.out); 4218 4219 DBG_PRINTF((category, RSM_DEBUG, 4220 "exporter_quiesce:destroy %d\n", 4221 e)); 4222 } 4223 4224 (void) rsm_unbind_pages(seg); 4225 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 4226 cv_broadcast(&seg->s_cv); 4227 } 4228 rsmseglock_release(seg); 4229 current = current->rsmrc_next; 4230 } 4231 } 4232 rw_exit(&rsm_export_segs.rsmhash_rw); 4233 4234 /* 4235 * All the local segments we are done with the pre-del processing 4236 * - time to move to PREDEL_COMPLETED. 4237 */ 4238 4239 mutex_enter(&rsm_drv_data.drv_lock); 4240 4241 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED); 4242 4243 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED; 4244 4245 cv_broadcast(&rsm_drv_data.drv_cv); 4246 4247 mutex_exit(&rsm_drv_data.drv_lock); 4248 4249 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n")); 4250 } 4251 4252 static void 4253 importer_suspend(rsm_node_id_t src_node) 4254 { 4255 int i; 4256 int susp_flg; /* true means already suspended */ 4257 int num_importers; 4258 rsmresource_t *p = NULL, *curp; 4259 rsmhash_table_t *rhash = &rsm_import_segs; 4260 rsmseg_t *seg; 4261 rsmipc_request_t request; 4262 DBG_DEFINE(category, 4263 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4264 4265 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n")); 4266 4267 rw_enter(&rhash->rsmhash_rw, RW_READER); 4268 for (i = 0; i < rsm_hash_size; i++) { 4269 p = rhash->bucket[i]; 4270 4271 /* 4272 * Suspend all importers with same <node, key> pair. 4273 * After the last one of the shared importers has been 4274 * suspended - suspend the shared mappings/connection. 4275 */ 4276 for (; p; p = p->rsmrc_next) { 4277 rsmseg_t *first = (rsmseg_t *)p; 4278 if ((first->s_node != src_node) || 4279 (first->s_state == RSM_STATE_DISCONNECT)) 4280 continue; /* go to next entry */ 4281 /* 4282 * search the rest of the bucket for 4283 * other siblings (imprtrs with the same key) 4284 * of "first" and suspend them. 4285 * All importers with same key fall in 4286 * the same bucket. 4287 */ 4288 num_importers = 0; 4289 for (curp = p; curp; curp = curp->rsmrc_next) { 4290 seg = (rsmseg_t *)curp; 4291 4292 rsmseglock_acquire(seg); 4293 4294 if ((seg->s_node != first->s_node) || 4295 (seg->s_key != first->s_key) || 4296 (seg->s_state == RSM_STATE_DISCONNECT)) { 4297 /* 4298 * either not a peer segment or its a 4299 * disconnected segment - skip it 4300 */ 4301 rsmseglock_release(seg); 4302 continue; 4303 } 4304 4305 rsmseg_suspend(seg, &susp_flg); 4306 4307 if (susp_flg) { /* seg already suspended */ 4308 rsmseglock_release(seg); 4309 break; /* the inner for loop */ 4310 } 4311 4312 num_importers++; 4313 rsmsharelock_acquire(seg); 4314 /* 4315 * we've processed all importers that are 4316 * siblings of "first" 4317 */ 4318 if (num_importers == 4319 seg->s_share->rsmsi_refcnt) { 4320 rsmsharelock_release(seg); 4321 rsmseglock_release(seg); 4322 break; 4323 } 4324 rsmsharelock_release(seg); 4325 rsmseglock_release(seg); 4326 } 4327 4328 /* 4329 * All the importers with the same key and 4330 * nodeid as "first" have been suspended. 4331 * Now suspend the shared connect/mapping. 4332 * This is done only once. 4333 */ 4334 if (!susp_flg) { 4335 rsmsegshare_suspend(seg); 4336 } 4337 } 4338 } 4339 4340 rw_exit(&rhash->rsmhash_rw); 4341 4342 /* send an ACK for SUSPEND message */ 4343 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE; 4344 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY); 4345 4346 4347 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n")); 4348 4349 } 4350 4351 static void 4352 rsmseg_suspend(rsmseg_t *seg, int *susp_flg) 4353 { 4354 int recheck_state; 4355 rsmcookie_t *hdl; 4356 DBG_DEFINE(category, 4357 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4358 4359 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4360 "rsmseg_suspend enter: key=%u\n", seg->s_key)); 4361 4362 *susp_flg = 0; 4363 4364 ASSERT(rsmseglock_held(seg)); 4365 /* wait if putv/getv is in progress */ 4366 while (seg->s_rdmacnt > 0) 4367 cv_wait(&seg->s_cv, &seg->s_lock); 4368 4369 do { 4370 recheck_state = 0; 4371 4372 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4373 "rsmseg_suspend:segment %x state=%d\n", 4374 seg->s_key, seg->s_state)); 4375 4376 switch (seg->s_state) { 4377 case RSM_STATE_NEW: 4378 /* not a valid state */ 4379 break; 4380 case RSM_STATE_CONNECTING: 4381 seg->s_state = RSM_STATE_ABORT_CONNECT; 4382 break; 4383 case RSM_STATE_ABORT_CONNECT: 4384 break; 4385 case RSM_STATE_CONNECT: 4386 seg->s_handle.in = NULL; 4387 seg->s_state = RSM_STATE_CONN_QUIESCE; 4388 break; 4389 case RSM_STATE_MAPPING: 4390 /* wait until segment leaves the mapping state */ 4391 while (seg->s_state == RSM_STATE_MAPPING) 4392 cv_wait(&seg->s_cv, &seg->s_lock); 4393 recheck_state = 1; 4394 break; 4395 case RSM_STATE_ACTIVE: 4396 /* unload the mappings */ 4397 if (seg->s_ckl != NULL) { 4398 hdl = seg->s_ckl; 4399 for (; hdl != NULL; hdl = hdl->c_next) { 4400 (void) devmap_unload(hdl->c_dhp, 4401 hdl->c_off, hdl->c_len); 4402 } 4403 } 4404 seg->s_mapinfo = NULL; 4405 seg->s_state = RSM_STATE_MAP_QUIESCE; 4406 break; 4407 case RSM_STATE_CONN_QUIESCE: 4408 /* FALLTHRU */ 4409 case RSM_STATE_MAP_QUIESCE: 4410 /* rsmseg_suspend already done for seg */ 4411 *susp_flg = 1; 4412 break; 4413 case RSM_STATE_DISCONNECT: 4414 break; 4415 default: 4416 ASSERT(0); /* invalid state */ 4417 } 4418 } while (recheck_state); 4419 4420 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n")); 4421 } 4422 4423 static void 4424 rsmsegshare_suspend(rsmseg_t *seg) 4425 { 4426 int e; 4427 adapter_t *adapter; 4428 rsm_import_share_t *sharedp; 4429 DBG_DEFINE(category, 4430 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4431 4432 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4433 "rsmsegshare_suspend enter\n")); 4434 4435 rsmseglock_acquire(seg); 4436 rsmsharelock_acquire(seg); 4437 4438 sharedp = seg->s_share; 4439 adapter = seg->s_adapter; 4440 switch (sharedp->rsmsi_state) { 4441 case RSMSI_STATE_NEW: 4442 break; 4443 case RSMSI_STATE_CONNECTING: 4444 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 4445 break; 4446 case RSMSI_STATE_ABORT_CONNECT: 4447 break; 4448 case RSMSI_STATE_CONNECTED: 4449 /* do the rsmpi disconnect */ 4450 if (sharedp->rsmsi_node != my_nodeid) { 4451 e = adapter->rsmpi_ops-> 4452 rsm_disconnect(sharedp->rsmsi_handle); 4453 4454 DBG_PRINTF((category, RSM_DEBUG, 4455 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4456 sharedp->rsmsi_segid, e)); 4457 } 4458 4459 sharedp->rsmsi_handle = NULL; 4460 4461 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 4462 break; 4463 case RSMSI_STATE_CONN_QUIESCE: 4464 break; 4465 case RSMSI_STATE_MAPPED: 4466 /* do the rsmpi unmap and disconnect */ 4467 if (sharedp->rsmsi_node != my_nodeid) { 4468 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in); 4469 4470 DBG_PRINTF((category, RSM_DEBUG, 4471 "rsmshare_suspend: rsmpi unmap %d\n", e)); 4472 4473 e = adapter->rsmpi_ops-> 4474 rsm_disconnect(sharedp->rsmsi_handle); 4475 DBG_PRINTF((category, RSM_DEBUG, 4476 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4477 sharedp->rsmsi_segid, e)); 4478 } 4479 4480 sharedp->rsmsi_handle = NULL; 4481 4482 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE; 4483 break; 4484 case RSMSI_STATE_MAP_QUIESCE: 4485 break; 4486 case RSMSI_STATE_DISCONNECTED: 4487 break; 4488 default: 4489 ASSERT(0); /* invalid state */ 4490 } 4491 4492 rsmsharelock_release(seg); 4493 rsmseglock_release(seg); 4494 4495 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4496 "rsmsegshare_suspend done\n")); 4497 } 4498 4499 /* 4500 * This should get called on receiving a RESUME message or from 4501 * the pathmanger if the node undergoing DR dies. 4502 */ 4503 static void 4504 importer_resume(rsm_node_id_t src_node) 4505 { 4506 int i; 4507 rsmresource_t *p = NULL; 4508 rsmhash_table_t *rhash = &rsm_import_segs; 4509 void *cookie; 4510 DBG_DEFINE(category, 4511 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4512 4513 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n")); 4514 4515 rw_enter(&rhash->rsmhash_rw, RW_READER); 4516 4517 for (i = 0; i < rsm_hash_size; i++) { 4518 p = rhash->bucket[i]; 4519 4520 for (; p; p = p->rsmrc_next) { 4521 rsmseg_t *seg = (rsmseg_t *)p; 4522 4523 rsmseglock_acquire(seg); 4524 4525 /* process only importers of node undergoing DR */ 4526 if (seg->s_node != src_node) { 4527 rsmseglock_release(seg); 4528 continue; 4529 } 4530 4531 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) { 4532 rsmipc_request_t request; 4533 /* 4534 * rsmpi map/connect failed 4535 * inform the exporter so that it can 4536 * remove the importer. 4537 */ 4538 request.rsmipc_hdr.rsmipc_type = 4539 RSMIPC_MSG_NOTIMPORTING; 4540 request.rsmipc_key = seg->s_segid; 4541 request.rsmipc_segment_cookie = cookie; 4542 rsmseglock_release(seg); 4543 (void) rsmipc_send(seg->s_node, &request, 4544 RSM_NO_REPLY); 4545 } else { 4546 rsmseglock_release(seg); 4547 } 4548 } 4549 } 4550 4551 rw_exit(&rhash->rsmhash_rw); 4552 4553 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n")); 4554 } 4555 4556 static int 4557 rsmseg_resume(rsmseg_t *seg, void **cookie) 4558 { 4559 int e; 4560 int retc; 4561 off_t dev_offset; 4562 size_t maplen; 4563 uint_t maxprot; 4564 rsm_mapinfo_t *p; 4565 rsmcookie_t *hdl; 4566 rsm_import_share_t *sharedp; 4567 DBG_DEFINE(category, 4568 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4569 4570 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4571 "rsmseg_resume enter: key=%u\n", seg->s_key)); 4572 4573 *cookie = NULL; 4574 4575 ASSERT(rsmseglock_held(seg)); 4576 4577 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) && 4578 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 4579 return (RSM_SUCCESS); 4580 } 4581 4582 sharedp = seg->s_share; 4583 4584 rsmsharelock_acquire(seg); 4585 4586 /* resume the shared connection and/or mapping */ 4587 retc = rsmsegshare_resume(seg); 4588 4589 if (seg->s_state == RSM_STATE_CONN_QUIESCE) { 4590 /* shared state can either be connected or mapped */ 4591 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) || 4592 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) { 4593 ASSERT(retc == RSM_SUCCESS); 4594 seg->s_handle.in = sharedp->rsmsi_handle; 4595 rsmsharelock_release(seg); 4596 seg->s_state = RSM_STATE_CONNECT; 4597 4598 } else { /* error in rsmpi connect during resume */ 4599 seg->s_handle.in = NULL; 4600 seg->s_state = RSM_STATE_DISCONNECT; 4601 4602 sharedp->rsmsi_refcnt--; 4603 cookie = (void *)sharedp->rsmsi_cookie; 4604 4605 if (sharedp->rsmsi_refcnt == 0) { 4606 ASSERT(sharedp->rsmsi_mapcnt == 0); 4607 rsmsharelock_release(seg); 4608 4609 /* clean up the shared data structure */ 4610 mutex_destroy(&sharedp->rsmsi_lock); 4611 cv_destroy(&sharedp->rsmsi_cv); 4612 kmem_free((void *)(sharedp), 4613 sizeof (rsm_import_share_t)); 4614 4615 } else { 4616 rsmsharelock_release(seg); 4617 } 4618 /* 4619 * The following needs to be done after any 4620 * rsmsharelock calls which use seg->s_share. 4621 */ 4622 seg->s_share = NULL; 4623 } 4624 4625 /* signal any waiting segment */ 4626 cv_broadcast(&seg->s_cv); 4627 4628 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4629 "rsmseg_resume done:state=%d\n", seg->s_state)); 4630 return (retc); 4631 } 4632 4633 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE); 4634 4635 /* Setup protections for remap */ 4636 maxprot = PROT_USER; 4637 if (seg->s_mode & RSM_PERM_READ) { 4638 maxprot |= PROT_READ; 4639 } 4640 if (seg->s_mode & RSM_PERM_WRITE) { 4641 maxprot |= PROT_WRITE; 4642 } 4643 4644 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) { 4645 /* error in rsmpi connect or map during resume */ 4646 4647 /* remap to trash page */ 4648 ASSERT(seg->s_ckl != NULL); 4649 4650 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4651 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 4652 remap_cookie, hdl->c_off, hdl->c_len, 4653 maxprot, 0, NULL); 4654 4655 DBG_PRINTF((category, RSM_ERR, 4656 "rsmseg_resume:remap=%d\n", e)); 4657 } 4658 4659 seg->s_handle.in = NULL; 4660 seg->s_state = RSM_STATE_DISCONNECT; 4661 4662 sharedp->rsmsi_refcnt--; 4663 4664 sharedp->rsmsi_mapcnt--; 4665 seg->s_mapinfo = NULL; 4666 4667 if (sharedp->rsmsi_refcnt == 0) { 4668 ASSERT(sharedp->rsmsi_mapcnt == 0); 4669 rsmsharelock_release(seg); 4670 4671 /* clean up the shared data structure */ 4672 mutex_destroy(&sharedp->rsmsi_lock); 4673 cv_destroy(&sharedp->rsmsi_cv); 4674 kmem_free((void *)(sharedp), 4675 sizeof (rsm_import_share_t)); 4676 4677 } else { 4678 rsmsharelock_release(seg); 4679 } 4680 /* 4681 * The following needs to be done after any 4682 * rsmsharelock calls which use seg->s_share. 4683 */ 4684 seg->s_share = NULL; 4685 4686 /* signal any waiting segment */ 4687 cv_broadcast(&seg->s_cv); 4688 4689 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4690 "rsmseg_resume done:seg=%x,err=%d\n", 4691 seg->s_key, retc)); 4692 return (retc); 4693 4694 } 4695 4696 seg->s_handle.in = sharedp->rsmsi_handle; 4697 4698 if (seg->s_node == my_nodeid) { /* loopback */ 4699 ASSERT(seg->s_mapinfo == NULL); 4700 4701 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4702 e = devmap_umem_remap(hdl->c_dhp, 4703 rsm_dip, seg->s_cookie, 4704 hdl->c_off, hdl->c_len, 4705 maxprot, 0, NULL); 4706 4707 DBG_PRINTF((category, RSM_ERR, 4708 "rsmseg_resume:remap=%d\n", e)); 4709 } 4710 } else { /* remote exporter */ 4711 /* remap to the new rsmpi maps */ 4712 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 4713 4714 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4715 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len, 4716 &dev_offset, &maplen); 4717 e = devmap_devmem_remap(hdl->c_dhp, 4718 p->dip, p->dev_register, dev_offset, 4719 maplen, maxprot, 0, NULL); 4720 4721 DBG_PRINTF((category, RSM_ERR, 4722 "rsmseg_resume:remap=%d\n", e)); 4723 } 4724 } 4725 4726 rsmsharelock_release(seg); 4727 4728 seg->s_state = RSM_STATE_ACTIVE; 4729 cv_broadcast(&seg->s_cv); 4730 4731 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n")); 4732 4733 return (retc); 4734 } 4735 4736 static int 4737 rsmsegshare_resume(rsmseg_t *seg) 4738 { 4739 int e = RSM_SUCCESS; 4740 adapter_t *adapter; 4741 rsm_import_share_t *sharedp; 4742 DBG_DEFINE(category, 4743 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4744 4745 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n")); 4746 4747 ASSERT(rsmseglock_held(seg)); 4748 ASSERT(rsmsharelock_held(seg)); 4749 4750 sharedp = seg->s_share; 4751 4752 /* 4753 * If we are not in a xxxx_QUIESCE state that means shared 4754 * connect/mapping processing has been already been done 4755 * so return success. 4756 */ 4757 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) && 4758 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) { 4759 return (RSM_SUCCESS); 4760 } 4761 4762 adapter = seg->s_adapter; 4763 4764 if (sharedp->rsmsi_node != my_nodeid) { 4765 rsm_addr_t hwaddr; 4766 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node); 4767 4768 e = adapter->rsmpi_ops->rsm_connect( 4769 adapter->rsmpi_handle, hwaddr, 4770 sharedp->rsmsi_segid, &sharedp->rsmsi_handle); 4771 4772 DBG_PRINTF((category, RSM_DEBUG, 4773 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n", 4774 sharedp->rsmsi_segid, e)); 4775 4776 if (e != RSM_SUCCESS) { 4777 /* when do we send the NOT_IMPORTING message */ 4778 sharedp->rsmsi_handle = NULL; 4779 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4780 /* signal any waiting segment */ 4781 cv_broadcast(&sharedp->rsmsi_cv); 4782 return (e); 4783 } 4784 } 4785 4786 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) { 4787 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 4788 /* signal any waiting segment */ 4789 cv_broadcast(&sharedp->rsmsi_cv); 4790 return (e); 4791 } 4792 4793 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 4794 4795 /* do the rsmpi map of the whole segment here */ 4796 if (sharedp->rsmsi_node != my_nodeid) { 4797 size_t mapped_len; 4798 rsm_mapinfo_t *p; 4799 4800 /* 4801 * We need to do rsmpi maps with <off, lens> identical to 4802 * the old mapinfo list because the segment mapping handles 4803 * dhp and such need the fragmentation of rsmpi maps to be 4804 * identical to what it was during the mmap of the segment 4805 */ 4806 p = sharedp->rsmsi_mapinfo; 4807 4808 while (p != NULL) { 4809 mapped_len = 0; 4810 4811 e = adapter->rsmpi_ops->rsm_map( 4812 sharedp->rsmsi_handle, p->start_offset, 4813 p->individual_len, &mapped_len, 4814 &p->dip, &p->dev_register, &p->dev_offset, 4815 NULL, NULL); 4816 4817 if (e != 0) { 4818 DBG_PRINTF((category, RSM_ERR, 4819 "rsmsegshare_resume: rsmpi map err=%d\n", 4820 e)); 4821 break; 4822 } 4823 4824 if (mapped_len != p->individual_len) { 4825 DBG_PRINTF((category, RSM_ERR, 4826 "rsmsegshare_resume: rsmpi maplen" 4827 "< reqlen=%lx\n", mapped_len)); 4828 e = RSMERR_BAD_LENGTH; 4829 break; 4830 } 4831 4832 p = p->next; 4833 4834 } 4835 4836 4837 if (e != RSM_SUCCESS) { /* rsmpi map failed */ 4838 int err; 4839 /* Check if this is the first rsm_map */ 4840 if (p != sharedp->rsmsi_mapinfo) { 4841 /* 4842 * A single rsm_unmap undoes multiple rsm_maps. 4843 */ 4844 (void) seg->s_adapter->rsmpi_ops-> 4845 rsm_unmap(sharedp->rsmsi_handle); 4846 } 4847 4848 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 4849 sharedp->rsmsi_mapinfo = NULL; 4850 4851 err = adapter->rsmpi_ops-> 4852 rsm_disconnect(sharedp->rsmsi_handle); 4853 4854 DBG_PRINTF((category, RSM_DEBUG, 4855 "rsmsegshare_resume:disconn seg=%x:err=%d\n", 4856 sharedp->rsmsi_segid, err)); 4857 4858 sharedp->rsmsi_handle = NULL; 4859 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4860 4861 /* signal the waiting segments */ 4862 cv_broadcast(&sharedp->rsmsi_cv); 4863 DBG_PRINTF((category, RSM_DEBUG, 4864 "rsmsegshare_resume done: rsmpi map err\n")); 4865 return (e); 4866 } 4867 } 4868 4869 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 4870 4871 /* signal any waiting segment */ 4872 cv_broadcast(&sharedp->rsmsi_cv); 4873 4874 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n")); 4875 4876 return (e); 4877 } 4878 4879 /* 4880 * this is the routine that gets called by recv_taskq which is the 4881 * thread that processes messages that are flow-controlled. 4882 */ 4883 static void 4884 rsm_intr_proc_deferred(void *arg) 4885 { 4886 path_t *path = (path_t *)arg; 4887 rsmipc_request_t *msg; 4888 rsmipc_msghdr_t *msghdr; 4889 rsm_node_id_t src_node; 4890 msgbuf_elem_t *head; 4891 int e; 4892 DBG_DEFINE(category, 4893 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4894 4895 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4896 "rsm_intr_proc_deferred enter\n")); 4897 4898 mutex_enter(&path->mutex); 4899 4900 /* use the head of the msgbuf_queue */ 4901 head = rsmka_gethead_msgbuf(path); 4902 4903 mutex_exit(&path->mutex); 4904 4905 msg = (rsmipc_request_t *)&(head->msg); 4906 msghdr = (rsmipc_msghdr_t *)msg; 4907 4908 src_node = msghdr->rsmipc_src; 4909 4910 /* 4911 * messages that need to send a reply should check the message version 4912 * before processing the message. And all messages that need to 4913 * send a reply should be processed here by the worker thread. 4914 */ 4915 switch (msghdr->rsmipc_type) { 4916 case RSMIPC_MSG_SEGCONNECT: 4917 if (msghdr->rsmipc_version != RSM_VERSION) { 4918 rsmipc_reply_t reply; 4919 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION; 4920 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 4921 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie; 4922 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply); 4923 } else { 4924 rsm_intr_segconnect(src_node, msg); 4925 } 4926 break; 4927 case RSMIPC_MSG_DISCONNECT: 4928 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT); 4929 break; 4930 case RSMIPC_MSG_SUSPEND: 4931 importer_suspend(src_node); 4932 break; 4933 case RSMIPC_MSG_SUSPEND_DONE: 4934 rsm_suspend_complete(src_node, 0); 4935 break; 4936 case RSMIPC_MSG_RESUME: 4937 importer_resume(src_node); 4938 break; 4939 default: 4940 ASSERT(0); 4941 } 4942 4943 mutex_enter(&path->mutex); 4944 4945 rsmka_dequeue_msgbuf(path); 4946 4947 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */ 4948 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES) 4949 path->procmsg_cnt++; 4950 4951 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES); 4952 4953 /* No need to send credits if path is going down */ 4954 if ((path->state == RSMKA_PATH_ACTIVE) && 4955 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) { 4956 /* 4957 * send credits and reset procmsg_cnt if success otherwise 4958 * credits will be sent after processing the next message 4959 */ 4960 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT); 4961 if (e == 0) 4962 path->procmsg_cnt = 0; 4963 else 4964 DBG_PRINTF((category, RSM_ERR, 4965 "rsm_intr_proc_deferred:send credits err=%d\n", e)); 4966 } 4967 4968 /* 4969 * decrement the path refcnt since we incremented it in 4970 * rsm_intr_callback_dispatch 4971 */ 4972 PATH_RELE_NOLOCK(path); 4973 4974 mutex_exit(&path->mutex); 4975 4976 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4977 "rsm_intr_proc_deferred done\n")); 4978 } 4979 4980 /* 4981 * Flow-controlled messages are enqueued and dispatched onto a taskq here 4982 */ 4983 static void 4984 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr, 4985 rsm_intr_hand_arg_t arg) 4986 { 4987 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 4988 path_t *path; 4989 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 4990 DBG_DEFINE(category, 4991 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4992 4993 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4994 "rsm_intr_callback_dispatch enter\n")); 4995 ASSERT(data && hdlr_argp); 4996 4997 /* look up the path - incr the path refcnt */ 4998 path = rsm_find_path(hdlr_argp->adapter_name, 4999 hdlr_argp->adapter_instance, src_hwaddr); 5000 5001 /* the path has been removed - drop this message */ 5002 if (path == NULL) { 5003 DBG_PRINTF((category, RSM_DEBUG, 5004 "rsm_intr_callback_dispatch done: msg dropped\n")); 5005 return; 5006 } 5007 /* the path is not active - don't accept new messages */ 5008 if (path->state != RSMKA_PATH_ACTIVE) { 5009 PATH_RELE_NOLOCK(path); 5010 mutex_exit(&path->mutex); 5011 DBG_PRINTF((category, RSM_DEBUG, 5012 "rsm_intr_callback_dispatch done: msg dropped" 5013 " path=%lx !ACTIVE\n", path)); 5014 return; 5015 } 5016 5017 /* 5018 * Check if this message was sent to an older incarnation 5019 * of the path/sendq. 5020 */ 5021 if (path->local_incn != msghdr->rsmipc_incn) { 5022 /* decrement the refcnt */ 5023 PATH_RELE_NOLOCK(path); 5024 mutex_exit(&path->mutex); 5025 DBG_PRINTF((category, RSM_DEBUG, 5026 "rsm_intr_callback_dispatch done: old incn %lld\n", 5027 msghdr->rsmipc_incn)); 5028 return; 5029 } 5030 5031 /* copy and enqueue msg on the path's msgbuf queue */ 5032 rsmka_enqueue_msgbuf(path, data); 5033 5034 /* 5035 * schedule task to process messages - ignore retval from 5036 * task_dispatch because we sender cannot send more than 5037 * what receiver can handle. 5038 */ 5039 (void) taskq_dispatch(path->recv_taskq, 5040 rsm_intr_proc_deferred, path, KM_NOSLEEP); 5041 5042 mutex_exit(&path->mutex); 5043 5044 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5045 "rsm_intr_callback_dispatch done\n")); 5046 } 5047 5048 /* 5049 * This procedure is called from rsm_srv_func when a remote node creates a 5050 * a send queue. This event is used as a hint that an earlier failed 5051 * attempt to create a send queue to that remote node may now succeed and 5052 * should be retried. Indication of an earlier failed attempt is provided 5053 * by the RSMKA_SQCREATE_PENDING flag. 5054 */ 5055 static void 5056 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5057 { 5058 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 5059 path_t *path; 5060 DBG_DEFINE(category, 5061 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5062 5063 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5064 "rsm_sqcreateop_callback enter\n")); 5065 5066 /* look up the path - incr the path refcnt */ 5067 path = rsm_find_path(hdlr_argp->adapter_name, 5068 hdlr_argp->adapter_instance, src_hwaddr); 5069 5070 if (path == NULL) { 5071 DBG_PRINTF((category, RSM_DEBUG, 5072 "rsm_sqcreateop_callback done: no path\n")); 5073 return; 5074 } 5075 5076 if ((path->state == RSMKA_PATH_UP) && 5077 (path->flags & RSMKA_SQCREATE_PENDING)) { 5078 /* 5079 * previous attempt to create sendq had failed, retry 5080 * it and move to RSMKA_PATH_ACTIVE state if successful. 5081 * the refcnt will be decremented in the do_deferred_work 5082 */ 5083 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP); 5084 } else { 5085 /* decrement the refcnt */ 5086 PATH_RELE_NOLOCK(path); 5087 } 5088 mutex_exit(&path->mutex); 5089 5090 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5091 "rsm_sqcreateop_callback done\n")); 5092 } 5093 5094 static void 5095 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5096 { 5097 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 5098 rsmipc_request_t *msg = (rsmipc_request_t *)data; 5099 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data; 5100 rsm_node_id_t src_node; 5101 DBG_DEFINE(category, 5102 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5103 5104 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:" 5105 "src=%d, type=%d\n", msghdr->rsmipc_src, 5106 msghdr->rsmipc_type)); 5107 5108 /* 5109 * Check for the version number in the msg header. If it is not 5110 * RSM_VERSION, drop the message. In the future, we need to manage 5111 * incompatible version numbers in some way 5112 */ 5113 if (msghdr->rsmipc_version != RSM_VERSION) { 5114 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n")); 5115 /* 5116 * Drop requests that don't have a reply right here 5117 * Request with reply will send a BAD_VERSION reply 5118 * when they get processed by the worker thread. 5119 */ 5120 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) { 5121 return; 5122 } 5123 5124 } 5125 5126 src_node = msghdr->rsmipc_src; 5127 5128 switch (msghdr->rsmipc_type) { 5129 case RSMIPC_MSG_SEGCONNECT: 5130 case RSMIPC_MSG_DISCONNECT: 5131 case RSMIPC_MSG_SUSPEND: 5132 case RSMIPC_MSG_SUSPEND_DONE: 5133 case RSMIPC_MSG_RESUME: 5134 /* 5135 * These message types are handled by a worker thread using 5136 * the flow-control algorithm. 5137 * Any message processing that does one or more of the 5138 * following should be handled in a worker thread. 5139 * - allocates resources and might sleep 5140 * - makes RSMPI calls down to the interconnect driver 5141 * this by defn include requests with reply. 5142 * - takes a long duration of time 5143 */ 5144 rsm_intr_callback_dispatch(data, src_hwaddr, arg); 5145 break; 5146 case RSMIPC_MSG_NOTIMPORTING: 5147 importer_list_rm(src_node, msg->rsmipc_key, 5148 msg->rsmipc_segment_cookie); 5149 break; 5150 case RSMIPC_MSG_SQREADY: 5151 rsm_proc_sqready(data, src_hwaddr, arg); 5152 break; 5153 case RSMIPC_MSG_SQREADY_ACK: 5154 rsm_proc_sqready_ack(data, src_hwaddr, arg); 5155 break; 5156 case RSMIPC_MSG_CREDIT: 5157 rsm_add_credits(ctrlmsg, src_hwaddr, arg); 5158 break; 5159 case RSMIPC_MSG_REPLY: 5160 rsm_intr_reply(msghdr); 5161 break; 5162 case RSMIPC_MSG_BELL: 5163 rsm_intr_event(msg); 5164 break; 5165 case RSMIPC_MSG_IMPORTING: 5166 importer_list_add(src_node, msg->rsmipc_key, 5167 msg->rsmipc_adapter_hwaddr, 5168 msg->rsmipc_segment_cookie); 5169 break; 5170 case RSMIPC_MSG_REPUBLISH: 5171 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm); 5172 break; 5173 default: 5174 DBG_PRINTF((category, RSM_DEBUG, 5175 "rsm_intr_callback: bad msg %lx type %d data %lx\n", 5176 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data)); 5177 } 5178 5179 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n")); 5180 5181 } 5182 5183 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 5184 rsm_intr_q_op_t opcode, rsm_addr_t src, 5185 void *data, size_t size, rsm_intr_hand_arg_t arg) 5186 { 5187 DBG_DEFINE(category, 5188 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5189 5190 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n")); 5191 5192 switch (opcode) { 5193 case RSM_INTR_Q_OP_CREATE: 5194 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n")); 5195 rsm_sqcreateop_callback(src, arg); 5196 break; 5197 case RSM_INTR_Q_OP_DESTROY: 5198 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n")); 5199 break; 5200 case RSM_INTR_Q_OP_RECEIVE: 5201 rsm_intr_callback(data, src, arg); 5202 break; 5203 default: 5204 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5205 "rsm_srv_func: unknown opcode = %x\n", opcode)); 5206 } 5207 5208 chd = chd; 5209 size = size; 5210 5211 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n")); 5212 5213 return (RSM_INTR_HAND_CLAIMED); 5214 } 5215 5216 /* *************************** IPC slots ************************* */ 5217 static rsmipc_slot_t * 5218 rsmipc_alloc() 5219 { 5220 int i; 5221 rsmipc_slot_t *slot; 5222 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5223 5224 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n")); 5225 5226 /* try to find a free slot, if not wait */ 5227 mutex_enter(&rsm_ipc.lock); 5228 5229 while (rsm_ipc.count == 0) { 5230 rsm_ipc.wanted = 1; 5231 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock); 5232 } 5233 5234 /* An empty slot is available, find it */ 5235 slot = &rsm_ipc.slots[0]; 5236 for (i = 0; i < RSMIPC_SZ; i++, slot++) { 5237 if (RSMIPC_GET(slot, RSMIPC_FREE)) { 5238 RSMIPC_CLEAR(slot, RSMIPC_FREE); 5239 break; 5240 } 5241 } 5242 5243 ASSERT(i < RSMIPC_SZ); 5244 rsm_ipc.count--; /* one less is available */ 5245 rsm_ipc.sequence++; /* new sequence */ 5246 5247 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence; 5248 slot->rsmipc_cookie.ic.index = (uint_t)i; 5249 5250 mutex_exit(&rsm_ipc.lock); 5251 5252 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n")); 5253 5254 return (slot); 5255 } 5256 5257 static void 5258 rsmipc_free(rsmipc_slot_t *slot) 5259 { 5260 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5261 5262 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n")); 5263 5264 ASSERT(MUTEX_HELD(&slot->rsmipc_lock)); 5265 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot); 5266 5267 mutex_enter(&rsm_ipc.lock); 5268 5269 RSMIPC_SET(slot, RSMIPC_FREE); 5270 5271 slot->rsmipc_cookie.ic.sequence = 0; 5272 5273 mutex_exit(&slot->rsmipc_lock); 5274 rsm_ipc.count++; 5275 ASSERT(rsm_ipc.count <= RSMIPC_SZ); 5276 if (rsm_ipc.wanted) { 5277 rsm_ipc.wanted = 0; 5278 cv_broadcast(&rsm_ipc.cv); 5279 } 5280 5281 mutex_exit(&rsm_ipc.lock); 5282 5283 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n")); 5284 } 5285 5286 static int 5287 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply) 5288 { 5289 int e = 0; 5290 int credit_check = 0; 5291 int retry_cnt = 0; 5292 int min_retry_cnt = 10; 5293 rsm_send_t is; 5294 rsmipc_slot_t *rslot; 5295 adapter_t *adapter; 5296 path_t *path; 5297 sendq_token_t *sendq_token; 5298 sendq_token_t *used_sendq_token = NULL; 5299 rsm_send_q_handle_t ipc_handle; 5300 DBG_DEFINE(category, 5301 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5302 5303 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d", 5304 dest)); 5305 5306 /* 5307 * Check if this is a local case 5308 */ 5309 if (dest == my_nodeid) { 5310 switch (req->rsmipc_hdr.rsmipc_type) { 5311 case RSMIPC_MSG_SEGCONNECT: 5312 reply->rsmipc_status = (short)rsmsegacl_validate( 5313 req, dest, reply); 5314 break; 5315 case RSMIPC_MSG_BELL: 5316 req->rsmipc_hdr.rsmipc_src = dest; 5317 rsm_intr_event(req); 5318 break; 5319 case RSMIPC_MSG_IMPORTING: 5320 importer_list_add(dest, req->rsmipc_key, 5321 req->rsmipc_adapter_hwaddr, 5322 req->rsmipc_segment_cookie); 5323 break; 5324 case RSMIPC_MSG_NOTIMPORTING: 5325 importer_list_rm(dest, req->rsmipc_key, 5326 req->rsmipc_segment_cookie); 5327 break; 5328 case RSMIPC_MSG_REPUBLISH: 5329 importer_update(dest, req->rsmipc_key, 5330 req->rsmipc_perm); 5331 break; 5332 case RSMIPC_MSG_SUSPEND: 5333 importer_suspend(dest); 5334 break; 5335 case RSMIPC_MSG_SUSPEND_DONE: 5336 rsm_suspend_complete(dest, 0); 5337 break; 5338 case RSMIPC_MSG_RESUME: 5339 importer_resume(dest); 5340 break; 5341 default: 5342 ASSERT(0); 5343 } 5344 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5345 "rsmipc_send done\n")); 5346 return (0); 5347 } 5348 5349 if (dest >= MAX_NODES) { 5350 DBG_PRINTF((category, RSM_ERR, 5351 "rsm: rsmipc_send bad node number %x\n", dest)); 5352 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5353 } 5354 5355 /* 5356 * Oh boy! we are going remote. 5357 */ 5358 5359 /* 5360 * identify if we need to have credits to send this message 5361 * - only selected requests are flow controlled 5362 */ 5363 if (req != NULL) { 5364 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5365 "rsmipc_send:request type=%d\n", 5366 req->rsmipc_hdr.rsmipc_type)); 5367 5368 switch (req->rsmipc_hdr.rsmipc_type) { 5369 case RSMIPC_MSG_SEGCONNECT: 5370 case RSMIPC_MSG_DISCONNECT: 5371 case RSMIPC_MSG_IMPORTING: 5372 case RSMIPC_MSG_SUSPEND: 5373 case RSMIPC_MSG_SUSPEND_DONE: 5374 case RSMIPC_MSG_RESUME: 5375 credit_check = 1; 5376 break; 5377 default: 5378 credit_check = 0; 5379 } 5380 } 5381 5382 again: 5383 if (retry_cnt++ == min_retry_cnt) { 5384 /* backoff before further retries for 10ms */ 5385 delay(drv_usectohz(10000)); 5386 retry_cnt = 0; /* reset retry_cnt */ 5387 } 5388 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token); 5389 if (sendq_token == NULL) { 5390 DBG_PRINTF((category, RSM_ERR, 5391 "rsm: rsmipc_send no device to reach node %d\n", dest)); 5392 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5393 } 5394 5395 if ((sendq_token == used_sendq_token) && 5396 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) || 5397 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) { 5398 rele_sendq_token(sendq_token); 5399 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e)); 5400 return (RSMERR_CONN_ABORTED); 5401 } else 5402 used_sendq_token = sendq_token; 5403 5404 /* lint -save -e413 */ 5405 path = SQ_TOKEN_TO_PATH(sendq_token); 5406 adapter = path->local_adapter; 5407 /* lint -restore */ 5408 ipc_handle = sendq_token->rsmpi_sendq_handle; 5409 5410 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5411 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle)); 5412 5413 if (reply == NULL) { 5414 /* Send request without ack */ 5415 /* 5416 * Set the rsmipc_version number in the msghdr for KA 5417 * communication versioning 5418 */ 5419 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5420 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5421 /* 5422 * remote endpoints incn should match the value in our 5423 * path's remote_incn field. No need to grab any lock 5424 * since we have refcnted the path in rsmka_get_sendq_token 5425 */ 5426 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5427 5428 is.is_data = (void *)req; 5429 is.is_size = sizeof (*req); 5430 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5431 is.is_wait = 0; 5432 5433 if (credit_check) { 5434 mutex_enter(&path->mutex); 5435 /* 5436 * wait till we recv credits or path goes down. If path 5437 * goes down rsm_send will fail and we handle the error 5438 * then 5439 */ 5440 while ((sendq_token->msgbuf_avail == 0) && 5441 (path->state == RSMKA_PATH_ACTIVE)) { 5442 e = cv_wait_sig(&sendq_token->sendq_cv, 5443 &path->mutex); 5444 if (e == 0) { 5445 mutex_exit(&path->mutex); 5446 no_reply_cnt++; 5447 rele_sendq_token(sendq_token); 5448 DBG_PRINTF((category, RSM_DEBUG, 5449 "rsmipc_send done: " 5450 "cv_wait INTERRUPTED")); 5451 return (RSMERR_INTERRUPTED); 5452 } 5453 } 5454 5455 /* 5456 * path is not active retry on another path. 5457 */ 5458 if (path->state != RSMKA_PATH_ACTIVE) { 5459 mutex_exit(&path->mutex); 5460 rele_sendq_token(sendq_token); 5461 e = RSMERR_CONN_ABORTED; 5462 DBG_PRINTF((category, RSM_ERR, 5463 "rsm: rsmipc_send: path !ACTIVE")); 5464 goto again; 5465 } 5466 5467 ASSERT(sendq_token->msgbuf_avail > 0); 5468 5469 /* 5470 * reserve a msgbuf 5471 */ 5472 sendq_token->msgbuf_avail--; 5473 5474 mutex_exit(&path->mutex); 5475 5476 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5477 NULL); 5478 5479 if (e != RSM_SUCCESS) { 5480 mutex_enter(&path->mutex); 5481 /* 5482 * release the reserved msgbuf since 5483 * the send failed 5484 */ 5485 sendq_token->msgbuf_avail++; 5486 cv_broadcast(&sendq_token->sendq_cv); 5487 mutex_exit(&path->mutex); 5488 } 5489 } else 5490 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5491 NULL); 5492 5493 no_reply_cnt++; 5494 rele_sendq_token(sendq_token); 5495 if (e != RSM_SUCCESS) { 5496 DBG_PRINTF((category, RSM_ERR, 5497 "rsm: rsmipc_send no reply send" 5498 " err = %d no reply count = %d\n", 5499 e, no_reply_cnt)); 5500 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5501 e != RSMERR_BAD_BARRIER_HNDL); 5502 atomic_inc_64(&rsm_ipcsend_errcnt); 5503 goto again; 5504 } else { 5505 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5506 "rsmipc_send done\n")); 5507 return (e); 5508 } 5509 5510 } 5511 5512 if (req == NULL) { 5513 /* Send reply - No flow control is done for reply */ 5514 /* 5515 * Set the version in the msg header for KA communication 5516 * versioning 5517 */ 5518 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5519 reply->rsmipc_hdr.rsmipc_src = my_nodeid; 5520 /* incn number is not used for reply msgs currently */ 5521 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5522 5523 is.is_data = (void *)reply; 5524 is.is_size = sizeof (*reply); 5525 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5526 is.is_wait = 0; 5527 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5528 rele_sendq_token(sendq_token); 5529 if (e != RSM_SUCCESS) { 5530 DBG_PRINTF((category, RSM_ERR, 5531 "rsm: rsmipc_send reply send" 5532 " err = %d\n", e)); 5533 atomic_inc_64(&rsm_ipcsend_errcnt); 5534 goto again; 5535 } else { 5536 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5537 "rsmipc_send done\n")); 5538 return (e); 5539 } 5540 } 5541 5542 /* Reply needed */ 5543 rslot = rsmipc_alloc(); /* allocate a new ipc slot */ 5544 5545 mutex_enter(&rslot->rsmipc_lock); 5546 5547 rslot->rsmipc_data = (void *)reply; 5548 RSMIPC_SET(rslot, RSMIPC_PENDING); 5549 5550 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) { 5551 /* 5552 * Set the rsmipc_version number in the msghdr for KA 5553 * communication versioning 5554 */ 5555 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5556 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5557 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie; 5558 /* 5559 * remote endpoints incn should match the value in our 5560 * path's remote_incn field. No need to grab any lock 5561 * since we have refcnted the path in rsmka_get_sendq_token 5562 */ 5563 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5564 5565 is.is_data = (void *)req; 5566 is.is_size = sizeof (*req); 5567 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5568 is.is_wait = 0; 5569 if (credit_check) { 5570 5571 mutex_enter(&path->mutex); 5572 /* 5573 * wait till we recv credits or path goes down. If path 5574 * goes down rsm_send will fail and we handle the error 5575 * then. 5576 */ 5577 while ((sendq_token->msgbuf_avail == 0) && 5578 (path->state == RSMKA_PATH_ACTIVE)) { 5579 e = cv_wait_sig(&sendq_token->sendq_cv, 5580 &path->mutex); 5581 if (e == 0) { 5582 mutex_exit(&path->mutex); 5583 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5584 rsmipc_free(rslot); 5585 rele_sendq_token(sendq_token); 5586 DBG_PRINTF((category, RSM_DEBUG, 5587 "rsmipc_send done: " 5588 "cv_wait INTERRUPTED")); 5589 return (RSMERR_INTERRUPTED); 5590 } 5591 } 5592 5593 /* 5594 * path is not active retry on another path. 5595 */ 5596 if (path->state != RSMKA_PATH_ACTIVE) { 5597 mutex_exit(&path->mutex); 5598 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5599 rsmipc_free(rslot); 5600 rele_sendq_token(sendq_token); 5601 e = RSMERR_CONN_ABORTED; 5602 DBG_PRINTF((category, RSM_ERR, 5603 "rsm: rsmipc_send: path !ACTIVE")); 5604 goto again; 5605 } 5606 5607 ASSERT(sendq_token->msgbuf_avail > 0); 5608 5609 /* 5610 * reserve a msgbuf 5611 */ 5612 sendq_token->msgbuf_avail--; 5613 5614 mutex_exit(&path->mutex); 5615 5616 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5617 NULL); 5618 5619 if (e != RSM_SUCCESS) { 5620 mutex_enter(&path->mutex); 5621 /* 5622 * release the reserved msgbuf since 5623 * the send failed 5624 */ 5625 sendq_token->msgbuf_avail++; 5626 cv_broadcast(&sendq_token->sendq_cv); 5627 mutex_exit(&path->mutex); 5628 } 5629 } else 5630 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5631 NULL); 5632 5633 if (e != RSM_SUCCESS) { 5634 DBG_PRINTF((category, RSM_ERR, 5635 "rsm: rsmipc_send rsmpi send err = %d\n", e)); 5636 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5637 rsmipc_free(rslot); 5638 rele_sendq_token(sendq_token); 5639 atomic_inc_64(&rsm_ipcsend_errcnt); 5640 goto again; 5641 } 5642 5643 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */ 5644 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock, 5645 drv_usectohz(5000000), TR_CLOCK_TICK); 5646 if (e < 0) { 5647 /* timed out - retry */ 5648 e = RSMERR_TIMEOUT; 5649 } else if (e == 0) { 5650 /* signalled - return error */ 5651 e = RSMERR_INTERRUPTED; 5652 break; 5653 } else { 5654 e = RSM_SUCCESS; 5655 } 5656 } 5657 5658 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5659 rsmipc_free(rslot); 5660 rele_sendq_token(sendq_token); 5661 5662 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e)); 5663 return (e); 5664 } 5665 5666 static int 5667 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie) 5668 { 5669 rsmipc_request_t request; 5670 5671 /* 5672 * inform the exporter to delete this importer 5673 */ 5674 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 5675 request.rsmipc_key = segid; 5676 request.rsmipc_segment_cookie = cookie; 5677 return (rsmipc_send(dest, &request, RSM_NO_REPLY)); 5678 } 5679 5680 static void 5681 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl, 5682 int acl_len, rsm_permission_t default_permission) 5683 { 5684 int i; 5685 importing_token_t *token; 5686 rsmipc_request_t request; 5687 republish_token_t *republish_list = NULL; 5688 republish_token_t *rp; 5689 rsm_permission_t permission; 5690 int index; 5691 5692 /* 5693 * send the new access mode to all the nodes that have imported 5694 * this segment. 5695 * If the new acl does not have a node that was present in 5696 * the old acl a access permission of 0 is sent. 5697 */ 5698 5699 index = rsmhash(segid); 5700 5701 /* 5702 * create a list of node/permissions to send the republish message 5703 */ 5704 mutex_enter(&importer_list.lock); 5705 5706 token = importer_list.bucket[index]; 5707 while (token != NULL) { 5708 if (segid == token->key) { 5709 permission = default_permission; 5710 5711 for (i = 0; i < acl_len; i++) { 5712 if (token->importing_node == acl[i].ae_node) { 5713 permission = acl[i].ae_permission; 5714 break; 5715 } 5716 } 5717 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP); 5718 5719 rp->key = segid; 5720 rp->importing_node = token->importing_node; 5721 rp->permission = permission; 5722 rp->next = republish_list; 5723 republish_list = rp; 5724 } 5725 token = token->next; 5726 } 5727 5728 mutex_exit(&importer_list.lock); 5729 5730 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH; 5731 request.rsmipc_key = segid; 5732 5733 while (republish_list != NULL) { 5734 request.rsmipc_perm = republish_list->permission; 5735 (void) rsmipc_send(republish_list->importing_node, 5736 &request, RSM_NO_REPLY); 5737 rp = republish_list; 5738 republish_list = republish_list->next; 5739 kmem_free(rp, sizeof (republish_token_t)); 5740 } 5741 } 5742 5743 static void 5744 rsm_send_suspend() 5745 { 5746 int i, e; 5747 rsmipc_request_t request; 5748 list_element_t *tokp; 5749 list_element_t *head = NULL; 5750 importing_token_t *token; 5751 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5752 "rsm_send_suspend enter\n")); 5753 5754 /* 5755 * create a list of node to send the suspend message 5756 * 5757 * Currently the whole importer list is scanned and we obtain 5758 * all the nodes - this basically gets all nodes that at least 5759 * import one segment from the local node. 5760 * 5761 * no need to grab the rsm_suspend_list lock here since we are 5762 * single threaded when suspend is called. 5763 */ 5764 5765 mutex_enter(&importer_list.lock); 5766 for (i = 0; i < rsm_hash_size; i++) { 5767 5768 token = importer_list.bucket[i]; 5769 5770 while (token != NULL) { 5771 5772 tokp = head; 5773 5774 /* 5775 * make sure that the token's node 5776 * is not already on the suspend list 5777 */ 5778 while (tokp != NULL) { 5779 if (tokp->nodeid == token->importing_node) { 5780 break; 5781 } 5782 tokp = tokp->next; 5783 } 5784 5785 if (tokp == NULL) { /* not in suspend list */ 5786 tokp = kmem_zalloc(sizeof (list_element_t), 5787 KM_SLEEP); 5788 tokp->nodeid = token->importing_node; 5789 tokp->next = head; 5790 head = tokp; 5791 } 5792 5793 token = token->next; 5794 } 5795 } 5796 mutex_exit(&importer_list.lock); 5797 5798 if (head == NULL) { /* no importers so go ahead and quiesce segments */ 5799 exporter_quiesce(); 5800 return; 5801 } 5802 5803 mutex_enter(&rsm_suspend_list.list_lock); 5804 ASSERT(rsm_suspend_list.list_head == NULL); 5805 /* 5806 * update the suspend list righaway so that if a node dies the 5807 * pathmanager can set the NODE dead flag 5808 */ 5809 rsm_suspend_list.list_head = head; 5810 mutex_exit(&rsm_suspend_list.list_lock); 5811 5812 tokp = head; 5813 5814 while (tokp != NULL) { 5815 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND; 5816 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY); 5817 /* 5818 * Error in rsmipc_send currently happens due to inaccessibility 5819 * of the remote node. 5820 */ 5821 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */ 5822 tokp->flags |= RSM_SUSPEND_ACKPENDING; 5823 } 5824 5825 tokp = tokp->next; 5826 } 5827 5828 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5829 "rsm_send_suspend done\n")); 5830 5831 } 5832 5833 static void 5834 rsm_send_resume() 5835 { 5836 rsmipc_request_t request; 5837 list_element_t *elem, *head; 5838 5839 /* 5840 * save the suspend list so that we know where to send 5841 * the resume messages and make the suspend list head 5842 * NULL. 5843 */ 5844 mutex_enter(&rsm_suspend_list.list_lock); 5845 head = rsm_suspend_list.list_head; 5846 rsm_suspend_list.list_head = NULL; 5847 mutex_exit(&rsm_suspend_list.list_lock); 5848 5849 while (head != NULL) { 5850 elem = head; 5851 head = head->next; 5852 5853 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME; 5854 5855 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY); 5856 5857 kmem_free((void *)elem, sizeof (list_element_t)); 5858 5859 } 5860 5861 } 5862 5863 /* 5864 * This function takes path and sends a message using the sendq 5865 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK 5866 * and RSMIPC_MSG_CREDIT are sent using this function. 5867 */ 5868 int 5869 rsmipc_send_controlmsg(path_t *path, int msgtype) 5870 { 5871 int e; 5872 int retry_cnt = 0; 5873 int min_retry_cnt = 10; 5874 adapter_t *adapter; 5875 rsm_send_t is; 5876 rsm_send_q_handle_t ipc_handle; 5877 rsmipc_controlmsg_t msg; 5878 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL); 5879 5880 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5881 "rsmipc_send_controlmsg enter\n")); 5882 5883 ASSERT(MUTEX_HELD(&path->mutex)); 5884 5885 adapter = path->local_adapter; 5886 5887 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx " 5888 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype, 5889 my_nodeid, adapter->hwaddr, path->remote_node, 5890 path->remote_hwaddr, path->procmsg_cnt)); 5891 5892 if (path->state != RSMKA_PATH_ACTIVE) { 5893 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5894 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE")); 5895 return (1); 5896 } 5897 5898 ipc_handle = path->sendq_token.rsmpi_sendq_handle; 5899 5900 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION; 5901 msg.rsmipc_hdr.rsmipc_src = my_nodeid; 5902 msg.rsmipc_hdr.rsmipc_type = msgtype; 5903 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn; 5904 5905 if (msgtype == RSMIPC_MSG_CREDIT) 5906 msg.rsmipc_credits = path->procmsg_cnt; 5907 5908 msg.rsmipc_local_incn = path->local_incn; 5909 5910 msg.rsmipc_adapter_hwaddr = adapter->hwaddr; 5911 /* incr the sendq, path refcnt */ 5912 PATH_HOLD_NOLOCK(path); 5913 SENDQ_TOKEN_HOLD(path); 5914 5915 do { 5916 /* drop the path lock before doing the rsm_send */ 5917 mutex_exit(&path->mutex); 5918 5919 is.is_data = (void *)&msg; 5920 is.is_size = sizeof (msg); 5921 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5922 is.is_wait = 0; 5923 5924 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5925 5926 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5927 e != RSMERR_BAD_BARRIER_HNDL); 5928 5929 mutex_enter(&path->mutex); 5930 5931 if (e == RSM_SUCCESS) { 5932 break; 5933 } 5934 /* error counter for statistics */ 5935 atomic_inc_64(&rsm_ctrlmsg_errcnt); 5936 5937 DBG_PRINTF((category, RSM_ERR, 5938 "rsmipc_send_controlmsg:rsm_send error=%d", e)); 5939 5940 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */ 5941 (void) cv_reltimedwait(&path->sendq_token.sendq_cv, 5942 &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK); 5943 retry_cnt = 0; 5944 } 5945 } while (path->state == RSMKA_PATH_ACTIVE); 5946 5947 /* decrement the sendq,path refcnt that we incr before rsm_send */ 5948 SENDQ_TOKEN_RELE(path); 5949 PATH_RELE_NOLOCK(path); 5950 5951 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5952 "rsmipc_send_controlmsg done=%d", e)); 5953 return (e); 5954 } 5955 5956 /* 5957 * Called from rsm_force_unload and path_importer_disconnect. The memory 5958 * mapping for the imported segment is removed and the segment is 5959 * disconnected at the interconnect layer if disconnect_flag is TRUE. 5960 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback 5961 * and FALSE from rsm_rebind. 5962 * 5963 * When subsequent accesses cause page faulting, the dummy page is mapped 5964 * to resolve the fault, and the mapping generation number is incremented 5965 * so that the application can be notified on a close barrier operation. 5966 * 5967 * It is important to note that the caller of rsmseg_unload is responsible for 5968 * acquiring the segment lock before making a call to rsmseg_unload. This is 5969 * required to make the caller and rsmseg_unload thread safe. The segment lock 5970 * will be released by the rsmseg_unload function. 5971 */ 5972 void 5973 rsmseg_unload(rsmseg_t *im_seg) 5974 { 5975 rsmcookie_t *hdl; 5976 void *shared_cookie; 5977 rsmipc_request_t request; 5978 uint_t maxprot; 5979 5980 DBG_DEFINE(category, 5981 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5982 5983 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n")); 5984 5985 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 5986 5987 /* wait until segment leaves the mapping state */ 5988 while (im_seg->s_state == RSM_STATE_MAPPING) 5989 cv_wait(&im_seg->s_cv, &im_seg->s_lock); 5990 /* 5991 * An unload is only necessary if the segment is connected. However, 5992 * if the segment was on the import list in state RSM_STATE_CONNECTING 5993 * then a connection was in progress. Change to RSM_STATE_NEW 5994 * here to cause an early exit from the connection process. 5995 */ 5996 if (im_seg->s_state == RSM_STATE_NEW) { 5997 rsmseglock_release(im_seg); 5998 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5999 "rsmseg_unload done: RSM_STATE_NEW\n")); 6000 return; 6001 } else if (im_seg->s_state == RSM_STATE_CONNECTING) { 6002 im_seg->s_state = RSM_STATE_ABORT_CONNECT; 6003 rsmsharelock_acquire(im_seg); 6004 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 6005 rsmsharelock_release(im_seg); 6006 rsmseglock_release(im_seg); 6007 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6008 "rsmseg_unload done: RSM_STATE_CONNECTING\n")); 6009 return; 6010 } 6011 6012 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) { 6013 if (im_seg->s_ckl != NULL) { 6014 int e; 6015 /* Setup protections for remap */ 6016 maxprot = PROT_USER; 6017 if (im_seg->s_mode & RSM_PERM_READ) { 6018 maxprot |= PROT_READ; 6019 } 6020 if (im_seg->s_mode & RSM_PERM_WRITE) { 6021 maxprot |= PROT_WRITE; 6022 } 6023 hdl = im_seg->s_ckl; 6024 for (; hdl != NULL; hdl = hdl->c_next) { 6025 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 6026 remap_cookie, 6027 hdl->c_off, hdl->c_len, 6028 maxprot, 0, NULL); 6029 6030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6031 "remap returns %d\n", e)); 6032 } 6033 } 6034 6035 (void) rsm_closeconnection(im_seg, &shared_cookie); 6036 6037 if (shared_cookie != NULL) { 6038 /* 6039 * inform the exporting node so this import 6040 * can be deleted from the list of importers. 6041 */ 6042 request.rsmipc_hdr.rsmipc_type = 6043 RSMIPC_MSG_NOTIMPORTING; 6044 request.rsmipc_key = im_seg->s_segid; 6045 request.rsmipc_segment_cookie = shared_cookie; 6046 rsmseglock_release(im_seg); 6047 (void) rsmipc_send(im_seg->s_node, &request, 6048 RSM_NO_REPLY); 6049 } else { 6050 rsmseglock_release(im_seg); 6051 } 6052 } 6053 else 6054 rsmseglock_release(im_seg); 6055 6056 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n")); 6057 6058 } 6059 6060 /* ****************************** Importer Calls ************************ */ 6061 6062 static int 6063 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr) 6064 { 6065 int shifts = 0; 6066 6067 if (crgetuid(cr) != owner) { 6068 shifts += 3; 6069 if (!groupmember(group, cr)) 6070 shifts += 3; 6071 } 6072 6073 mode &= ~(perm << shifts); 6074 6075 if (mode == 0) 6076 return (0); 6077 6078 return (secpolicy_rsm_access(cr, owner, mode)); 6079 } 6080 6081 6082 static int 6083 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred, 6084 intptr_t dataptr, int mode) 6085 { 6086 int e; 6087 int recheck_state = 0; 6088 void *shared_cookie; 6089 rsmipc_request_t request; 6090 rsmipc_reply_t reply; 6091 rsm_permission_t access; 6092 adapter_t *adapter; 6093 rsm_addr_t addr = 0; 6094 rsm_import_share_t *sharedp; 6095 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6096 6097 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n")); 6098 6099 adapter = rsm_getadapter(msg, mode); 6100 if (adapter == NULL) { 6101 DBG_PRINTF((category, RSM_ERR, 6102 "rsm_connect done:ENODEV adapter=NULL\n")); 6103 return (RSMERR_CTLR_NOT_PRESENT); 6104 } 6105 6106 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) { 6107 rsmka_release_adapter(adapter); 6108 DBG_PRINTF((category, RSM_ERR, 6109 "rsm_connect done:ENODEV loopback\n")); 6110 return (RSMERR_CTLR_NOT_PRESENT); 6111 } 6112 6113 6114 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6115 ASSERT(seg->s_state == RSM_STATE_NEW); 6116 6117 /* 6118 * Translate perm to access 6119 */ 6120 if (msg->perm & ~RSM_PERM_RDWR) { 6121 rsmka_release_adapter(adapter); 6122 DBG_PRINTF((category, RSM_ERR, 6123 "rsm_connect done:EINVAL invalid perms\n")); 6124 return (RSMERR_BAD_PERMS); 6125 } 6126 access = 0; 6127 if (msg->perm & RSM_PERM_READ) 6128 access |= RSM_ACCESS_READ; 6129 if (msg->perm & RSM_PERM_WRITE) 6130 access |= RSM_ACCESS_WRITE; 6131 6132 seg->s_node = msg->nodeid; 6133 6134 /* 6135 * Adding to the import list locks the segment; release the segment 6136 * lock so we can get the reply for the send. 6137 */ 6138 e = rsmimport_add(seg, msg->key); 6139 if (e) { 6140 rsmka_release_adapter(adapter); 6141 DBG_PRINTF((category, RSM_ERR, 6142 "rsm_connect done:rsmimport_add failed %d\n", e)); 6143 return (e); 6144 } 6145 seg->s_state = RSM_STATE_CONNECTING; 6146 6147 /* 6148 * Set the s_adapter field here so as to have a valid comparison of 6149 * the adapter and the s_adapter value during rsmshare_get. For 6150 * any error, set s_adapter to NULL before doing a release_adapter 6151 */ 6152 seg->s_adapter = adapter; 6153 6154 rsmseglock_release(seg); 6155 6156 /* 6157 * get the pointer to the shared data structure; the 6158 * shared data is locked and refcount has been incremented 6159 */ 6160 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg); 6161 6162 ASSERT(rsmsharelock_held(seg)); 6163 6164 do { 6165 /* flag indicates whether we need to recheck the state */ 6166 recheck_state = 0; 6167 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6168 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 6169 switch (sharedp->rsmsi_state) { 6170 case RSMSI_STATE_NEW: 6171 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6172 break; 6173 case RSMSI_STATE_CONNECTING: 6174 /* FALLTHRU */ 6175 case RSMSI_STATE_CONN_QUIESCE: 6176 /* FALLTHRU */ 6177 case RSMSI_STATE_MAP_QUIESCE: 6178 /* wait for the state to change */ 6179 while ((sharedp->rsmsi_state == 6180 RSMSI_STATE_CONNECTING) || 6181 (sharedp->rsmsi_state == 6182 RSMSI_STATE_CONN_QUIESCE) || 6183 (sharedp->rsmsi_state == 6184 RSMSI_STATE_MAP_QUIESCE)) { 6185 if (cv_wait_sig(&sharedp->rsmsi_cv, 6186 &sharedp->rsmsi_lock) == 0) { 6187 /* signalled - clean up and return */ 6188 rsmsharelock_release(seg); 6189 rsmimport_rm(seg); 6190 seg->s_adapter = NULL; 6191 rsmka_release_adapter(adapter); 6192 seg->s_state = RSM_STATE_NEW; 6193 DBG_PRINTF((category, RSM_ERR, 6194 "rsm_connect done: INTERRUPTED\n")); 6195 return (RSMERR_INTERRUPTED); 6196 } 6197 } 6198 /* 6199 * the state changed, loop back and check what it is 6200 */ 6201 recheck_state = 1; 6202 break; 6203 case RSMSI_STATE_ABORT_CONNECT: 6204 /* exit the loop and clean up further down */ 6205 break; 6206 case RSMSI_STATE_CONNECTED: 6207 /* already connected, good - fall through */ 6208 case RSMSI_STATE_MAPPED: 6209 /* already mapped, wow - fall through */ 6210 /* access validation etc is done further down */ 6211 break; 6212 case RSMSI_STATE_DISCONNECTED: 6213 /* disconnected - so reconnect now */ 6214 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6215 break; 6216 default: 6217 ASSERT(0); /* Invalid State */ 6218 } 6219 } while (recheck_state); 6220 6221 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6222 /* we are the first to connect */ 6223 rsmsharelock_release(seg); 6224 6225 if (msg->nodeid != my_nodeid) { 6226 addr = get_remote_hwaddr(adapter, msg->nodeid); 6227 6228 if ((int64_t)addr < 0) { 6229 rsmsharelock_acquire(seg); 6230 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6231 RSMSI_STATE_NEW); 6232 rsmsharelock_release(seg); 6233 rsmimport_rm(seg); 6234 seg->s_adapter = NULL; 6235 rsmka_release_adapter(adapter); 6236 seg->s_state = RSM_STATE_NEW; 6237 DBG_PRINTF((category, RSM_ERR, 6238 "rsm_connect done: hwaddr<0\n")); 6239 return (RSMERR_INTERNAL_ERROR); 6240 } 6241 } else { 6242 addr = adapter->hwaddr; 6243 } 6244 6245 /* 6246 * send request to node [src, dest, key, msgid] and get back 6247 * [status, msgid, cookie] 6248 */ 6249 request.rsmipc_key = msg->key; 6250 /* 6251 * we need the s_mode of the exporter so pass 6252 * RSM_ACCESS_TRUSTED 6253 */ 6254 request.rsmipc_perm = RSM_ACCESS_TRUSTED; 6255 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT; 6256 request.rsmipc_adapter_hwaddr = addr; 6257 request.rsmipc_segment_cookie = sharedp; 6258 6259 e = (int)rsmipc_send(msg->nodeid, &request, &reply); 6260 if (e) { 6261 rsmsharelock_acquire(seg); 6262 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6263 RSMSI_STATE_NEW); 6264 rsmsharelock_release(seg); 6265 rsmimport_rm(seg); 6266 seg->s_adapter = NULL; 6267 rsmka_release_adapter(adapter); 6268 seg->s_state = RSM_STATE_NEW; 6269 DBG_PRINTF((category, RSM_ERR, 6270 "rsm_connect done:rsmipc_send failed %d\n", e)); 6271 return (e); 6272 } 6273 6274 if (reply.rsmipc_status != RSM_SUCCESS) { 6275 rsmsharelock_acquire(seg); 6276 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6277 RSMSI_STATE_NEW); 6278 rsmsharelock_release(seg); 6279 rsmimport_rm(seg); 6280 seg->s_adapter = NULL; 6281 rsmka_release_adapter(adapter); 6282 seg->s_state = RSM_STATE_NEW; 6283 DBG_PRINTF((category, RSM_ERR, 6284 "rsm_connect done:rsmipc_send reply err %d\n", 6285 reply.rsmipc_status)); 6286 return (reply.rsmipc_status); 6287 } 6288 6289 rsmsharelock_acquire(seg); 6290 /* store the information recvd into the shared data struct */ 6291 sharedp->rsmsi_mode = reply.rsmipc_mode; 6292 sharedp->rsmsi_uid = reply.rsmipc_uid; 6293 sharedp->rsmsi_gid = reply.rsmipc_gid; 6294 sharedp->rsmsi_seglen = reply.rsmipc_seglen; 6295 sharedp->rsmsi_cookie = sharedp; 6296 } 6297 6298 rsmsharelock_release(seg); 6299 6300 /* 6301 * Get the segment lock and check for a force disconnect 6302 * from the export side which would have changed the state 6303 * back to RSM_STATE_NEW. Once the segment lock is acquired a 6304 * force disconnect will be held off until the connection 6305 * has completed. 6306 */ 6307 rsmseglock_acquire(seg); 6308 rsmsharelock_acquire(seg); 6309 ASSERT(seg->s_state == RSM_STATE_CONNECTING || 6310 seg->s_state == RSM_STATE_ABORT_CONNECT); 6311 6312 shared_cookie = sharedp->rsmsi_cookie; 6313 6314 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) || 6315 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) { 6316 seg->s_state = RSM_STATE_NEW; 6317 seg->s_adapter = NULL; 6318 rsmsharelock_release(seg); 6319 rsmseglock_release(seg); 6320 rsmimport_rm(seg); 6321 rsmka_release_adapter(adapter); 6322 6323 rsmsharelock_acquire(seg); 6324 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) { 6325 /* 6326 * set a flag indicating abort handling has been 6327 * done 6328 */ 6329 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE; 6330 rsmsharelock_release(seg); 6331 /* send a message to exporter - only once */ 6332 (void) rsm_send_notimporting(msg->nodeid, 6333 msg->key, shared_cookie); 6334 rsmsharelock_acquire(seg); 6335 /* 6336 * wake up any waiting importers and inform that 6337 * connection has been aborted 6338 */ 6339 cv_broadcast(&sharedp->rsmsi_cv); 6340 } 6341 rsmsharelock_release(seg); 6342 6343 DBG_PRINTF((category, RSM_ERR, 6344 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n")); 6345 return (RSMERR_INTERRUPTED); 6346 } 6347 6348 6349 /* 6350 * We need to verify that this process has access 6351 */ 6352 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid, 6353 access & sharedp->rsmsi_mode, 6354 (int)(msg->perm & RSM_PERM_RDWR), cred); 6355 if (e) { 6356 rsmsharelock_release(seg); 6357 seg->s_state = RSM_STATE_NEW; 6358 seg->s_adapter = NULL; 6359 rsmseglock_release(seg); 6360 rsmimport_rm(seg); 6361 rsmka_release_adapter(adapter); 6362 /* 6363 * No need to lock segment it has been removed 6364 * from the hash table 6365 */ 6366 rsmsharelock_acquire(seg); 6367 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6368 rsmsharelock_release(seg); 6369 /* this is the first importer */ 6370 6371 (void) rsm_send_notimporting(msg->nodeid, msg->key, 6372 shared_cookie); 6373 rsmsharelock_acquire(seg); 6374 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6375 cv_broadcast(&sharedp->rsmsi_cv); 6376 } 6377 rsmsharelock_release(seg); 6378 6379 DBG_PRINTF((category, RSM_ERR, 6380 "rsm_connect done: ipcaccess failed\n")); 6381 return (RSMERR_PERM_DENIED); 6382 } 6383 6384 /* update state and cookie */ 6385 seg->s_segid = sharedp->rsmsi_segid; 6386 seg->s_len = sharedp->rsmsi_seglen; 6387 seg->s_mode = access & sharedp->rsmsi_mode; 6388 seg->s_pid = ddi_get_pid(); 6389 seg->s_mapinfo = NULL; 6390 6391 if (seg->s_node != my_nodeid) { 6392 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6393 e = adapter->rsmpi_ops->rsm_connect( 6394 adapter->rsmpi_handle, 6395 addr, seg->s_segid, &sharedp->rsmsi_handle); 6396 6397 if (e != RSM_SUCCESS) { 6398 seg->s_state = RSM_STATE_NEW; 6399 seg->s_adapter = NULL; 6400 rsmsharelock_release(seg); 6401 rsmseglock_release(seg); 6402 rsmimport_rm(seg); 6403 rsmka_release_adapter(adapter); 6404 /* 6405 * inform the exporter to delete this importer 6406 */ 6407 (void) rsm_send_notimporting(msg->nodeid, 6408 msg->key, shared_cookie); 6409 6410 /* 6411 * Now inform any waiting importers to 6412 * retry connect. This needs to be done 6413 * after sending notimporting so that 6414 * the notimporting is sent before a waiting 6415 * importer sends a segconnect while retrying 6416 * 6417 * No need to lock segment it has been removed 6418 * from the hash table 6419 */ 6420 6421 rsmsharelock_acquire(seg); 6422 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6423 cv_broadcast(&sharedp->rsmsi_cv); 6424 rsmsharelock_release(seg); 6425 6426 DBG_PRINTF((category, RSM_ERR, 6427 "rsm_connect error %d\n", e)); 6428 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR) 6429 return ( 6430 RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 6431 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) || 6432 (e == RSMERR_UNKNOWN_RSM_ADDR)) 6433 return (RSMERR_REMOTE_NODE_UNREACHABLE); 6434 else 6435 return (e); 6436 } 6437 6438 } 6439 seg->s_handle.in = sharedp->rsmsi_handle; 6440 6441 } 6442 6443 seg->s_state = RSM_STATE_CONNECT; 6444 6445 6446 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */ 6447 if (bar_va) { 6448 /* increment generation number on barrier page */ 6449 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num); 6450 /* return user off into barrier page where status will be */ 6451 msg->off = (int)seg->s_hdr.rsmrc_num; 6452 msg->gnum = bar_va[msg->off]; /* gnum race */ 6453 } else { 6454 msg->off = 0; 6455 msg->gnum = 0; /* gnum race */ 6456 } 6457 6458 msg->len = (int)sharedp->rsmsi_seglen; 6459 msg->rnum = seg->s_minor; 6460 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED); 6461 rsmsharelock_release(seg); 6462 rsmseglock_release(seg); 6463 6464 /* Return back to user the segment size & perm in case it's needed */ 6465 6466 #ifdef _MULTI_DATAMODEL 6467 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6468 rsm_ioctlmsg32_t msg32; 6469 6470 if (msg->len > UINT_MAX) 6471 msg32.len = RSM_MAXSZ_PAGE_ALIGNED; 6472 else 6473 msg32.len = msg->len; 6474 msg32.off = msg->off; 6475 msg32.perm = msg->perm; 6476 msg32.gnum = msg->gnum; 6477 msg32.rnum = msg->rnum; 6478 6479 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6480 "rsm_connect done\n")); 6481 6482 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr, 6483 sizeof (msg32), mode)) 6484 return (RSMERR_BAD_ADDR); 6485 else 6486 return (RSM_SUCCESS); 6487 } 6488 #endif 6489 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n")); 6490 6491 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg), 6492 mode)) 6493 return (RSMERR_BAD_ADDR); 6494 else 6495 return (RSM_SUCCESS); 6496 } 6497 6498 static int 6499 rsm_unmap(rsmseg_t *seg) 6500 { 6501 int err; 6502 adapter_t *adapter; 6503 rsm_import_share_t *sharedp; 6504 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6505 6506 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6507 "rsm_unmap enter %u\n", seg->s_segid)); 6508 6509 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6510 6511 /* assert seg is locked */ 6512 ASSERT(rsmseglock_held(seg)); 6513 ASSERT(seg->s_state != RSM_STATE_MAPPING); 6514 6515 if ((seg->s_state != RSM_STATE_ACTIVE) && 6516 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 6517 /* segment unmap has already been done */ 6518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6519 return (RSM_SUCCESS); 6520 } 6521 6522 sharedp = seg->s_share; 6523 6524 rsmsharelock_acquire(seg); 6525 6526 /* 6527 * - shared data struct is in MAPPED or MAP_QUIESCE state 6528 */ 6529 6530 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED || 6531 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 6532 6533 /* 6534 * Unmap pages - previously rsm_memseg_import_unmap was called only if 6535 * the segment cookie list was NULL; but it is always NULL when 6536 * called from rsmmap_unmap and won't be NULL when called for 6537 * a force disconnect - so the check for NULL cookie list was removed 6538 */ 6539 6540 ASSERT(sharedp->rsmsi_mapcnt > 0); 6541 6542 sharedp->rsmsi_mapcnt--; 6543 6544 if (sharedp->rsmsi_mapcnt == 0) { 6545 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) { 6546 /* unmap the shared RSMPI mapping */ 6547 adapter = seg->s_adapter; 6548 if (seg->s_node != my_nodeid) { 6549 ASSERT(sharedp->rsmsi_handle != NULL); 6550 err = adapter->rsmpi_ops-> 6551 rsm_unmap(sharedp->rsmsi_handle); 6552 DBG_PRINTF((category, RSM_DEBUG, 6553 "rsm_unmap: rsmpi unmap %d\n", err)); 6554 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 6555 sharedp->rsmsi_mapinfo = NULL; 6556 } 6557 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 6558 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */ 6559 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 6560 } 6561 } 6562 6563 rsmsharelock_release(seg); 6564 6565 /* 6566 * The s_cookie field is used to store the cookie returned from the 6567 * ddi_umem_lock when binding the pages for an export segment. This 6568 * is the primary use of the s_cookie field and does not normally 6569 * pertain to any importing segment except in the loopback case. 6570 * For the loopback case, the import segment and export segment are 6571 * on the same node, the s_cookie field of the segment structure for 6572 * the importer is initialized to the s_cookie field in the exported 6573 * segment during the map operation and is used during the call to 6574 * devmap_umem_setup for the import mapping. 6575 * Thus, during unmap, we simply need to set s_cookie to NULL to 6576 * indicate that the mapping no longer exists. 6577 */ 6578 seg->s_cookie = NULL; 6579 6580 seg->s_mapinfo = NULL; 6581 6582 if (seg->s_state == RSM_STATE_ACTIVE) 6583 seg->s_state = RSM_STATE_CONNECT; 6584 else 6585 seg->s_state = RSM_STATE_CONN_QUIESCE; 6586 6587 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6588 6589 return (RSM_SUCCESS); 6590 } 6591 6592 /* 6593 * cookie returned here if not null indicates that it is 6594 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING 6595 * message. 6596 */ 6597 static int 6598 rsm_closeconnection(rsmseg_t *seg, void **cookie) 6599 { 6600 int e; 6601 adapter_t *adapter; 6602 rsm_import_share_t *sharedp; 6603 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6604 6605 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6606 "rsm_closeconnection enter\n")); 6607 6608 *cookie = (void *)NULL; 6609 6610 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6611 6612 /* assert seg is locked */ 6613 ASSERT(rsmseglock_held(seg)); 6614 6615 if (seg->s_state == RSM_STATE_DISCONNECT) { 6616 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6617 "rsm_closeconnection done: already disconnected\n")); 6618 return (RSM_SUCCESS); 6619 } 6620 6621 /* wait for all putv/getv ops to get done */ 6622 while (seg->s_rdmacnt > 0) { 6623 cv_wait(&seg->s_cv, &seg->s_lock); 6624 } 6625 6626 (void) rsm_unmap(seg); 6627 6628 ASSERT(seg->s_state == RSM_STATE_CONNECT || 6629 seg->s_state == RSM_STATE_CONN_QUIESCE); 6630 6631 adapter = seg->s_adapter; 6632 sharedp = seg->s_share; 6633 6634 ASSERT(sharedp != NULL); 6635 6636 rsmsharelock_acquire(seg); 6637 6638 /* 6639 * Disconnect on adapter 6640 * 6641 * The current algorithm is stateless, I don't have to contact 6642 * server when I go away. He only gives me permissions. Of course, 6643 * the adapters will talk to terminate the connect. 6644 * 6645 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE 6646 */ 6647 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) && 6648 (sharedp->rsmsi_node != my_nodeid)) { 6649 6650 if (sharedp->rsmsi_refcnt == 1) { 6651 /* this is the last importer */ 6652 ASSERT(sharedp->rsmsi_mapcnt == 0); 6653 6654 e = adapter->rsmpi_ops-> 6655 rsm_disconnect(sharedp->rsmsi_handle); 6656 if (e != RSM_SUCCESS) { 6657 DBG_PRINTF((category, RSM_DEBUG, 6658 "rsm:disconnect failed seg=%x:err=%d\n", 6659 seg->s_key, e)); 6660 } 6661 } 6662 } 6663 6664 seg->s_handle.in = NULL; 6665 6666 sharedp->rsmsi_refcnt--; 6667 6668 if (sharedp->rsmsi_refcnt == 0) { 6669 *cookie = (void *)sharedp->rsmsi_cookie; 6670 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 6671 sharedp->rsmsi_handle = NULL; 6672 rsmsharelock_release(seg); 6673 6674 /* clean up the shared data structure */ 6675 mutex_destroy(&sharedp->rsmsi_lock); 6676 cv_destroy(&sharedp->rsmsi_cv); 6677 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t)); 6678 6679 } else { 6680 rsmsharelock_release(seg); 6681 } 6682 6683 /* increment generation number on barrier page */ 6684 if (bar_va) { 6685 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num); 6686 } 6687 6688 /* 6689 * The following needs to be done after any 6690 * rsmsharelock calls which use seg->s_share. 6691 */ 6692 seg->s_share = NULL; 6693 6694 seg->s_state = RSM_STATE_DISCONNECT; 6695 /* signal anyone waiting in the CONN_QUIESCE state */ 6696 cv_broadcast(&seg->s_cv); 6697 6698 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6699 "rsm_closeconnection done\n")); 6700 6701 return (RSM_SUCCESS); 6702 } 6703 6704 int 6705 rsm_disconnect(rsmseg_t *seg) 6706 { 6707 rsmipc_request_t request; 6708 void *shared_cookie; 6709 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6710 6711 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n")); 6712 6713 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6714 6715 /* assert seg isn't locked */ 6716 ASSERT(!rsmseglock_held(seg)); 6717 6718 6719 /* Remove segment from imported list */ 6720 rsmimport_rm(seg); 6721 6722 /* acquire the segment */ 6723 rsmseglock_acquire(seg); 6724 6725 /* wait until segment leaves the mapping state */ 6726 while (seg->s_state == RSM_STATE_MAPPING) 6727 cv_wait(&seg->s_cv, &seg->s_lock); 6728 6729 if (seg->s_state == RSM_STATE_DISCONNECT) { 6730 seg->s_state = RSM_STATE_NEW; 6731 rsmseglock_release(seg); 6732 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6733 "rsm_disconnect done: already disconnected\n")); 6734 return (RSM_SUCCESS); 6735 } 6736 6737 (void) rsm_closeconnection(seg, &shared_cookie); 6738 6739 /* update state */ 6740 seg->s_state = RSM_STATE_NEW; 6741 6742 if (shared_cookie != NULL) { 6743 /* 6744 * This is the last importer so inform the exporting node 6745 * so this import can be deleted from the list of importers. 6746 */ 6747 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 6748 request.rsmipc_key = seg->s_segid; 6749 request.rsmipc_segment_cookie = shared_cookie; 6750 rsmseglock_release(seg); 6751 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 6752 } else { 6753 rsmseglock_release(seg); 6754 } 6755 6756 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n")); 6757 6758 return (DDI_SUCCESS); 6759 } 6760 6761 /*ARGSUSED*/ 6762 static int 6763 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 6764 struct pollhead **phpp) 6765 { 6766 minor_t rnum; 6767 rsmresource_t *res; 6768 rsmseg_t *seg; 6769 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 6770 6771 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n")); 6772 6773 /* find minor, no lock */ 6774 rnum = getminor(dev); 6775 res = rsmresource_lookup(rnum, RSM_NOLOCK); 6776 6777 /* poll is supported only for export/import segments */ 6778 if ((res == NULL) || (res == RSMRC_RESERVED) || 6779 (res->rsmrc_type == RSM_RESOURCE_BAR)) { 6780 return (ENXIO); 6781 } 6782 6783 *reventsp = 0; 6784 6785 /* 6786 * An exported segment must be in state RSM_STATE_EXPORT; an 6787 * imported segment must be in state RSM_STATE_ACTIVE. 6788 */ 6789 seg = (rsmseg_t *)res; 6790 6791 if (seg->s_pollevent) { 6792 *reventsp = POLLRDNORM; 6793 } else if (!anyyet) { 6794 /* cannot take segment lock here */ 6795 *phpp = &seg->s_poll; 6796 seg->s_pollflag |= RSM_SEGMENT_POLL; 6797 } 6798 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n")); 6799 return (0); 6800 } 6801 6802 6803 6804 /* ************************* IOCTL Commands ********************* */ 6805 6806 static rsmseg_t * 6807 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp, 6808 rsm_resource_type_t type) 6809 { 6810 /* get segment from resource handle */ 6811 rsmseg_t *seg; 6812 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 6813 6814 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n")); 6815 6816 6817 if (res != RSMRC_RESERVED) { 6818 seg = (rsmseg_t *)res; 6819 } else { 6820 /* Allocate segment now and bind it */ 6821 seg = rsmseg_alloc(rnum, credp); 6822 6823 /* 6824 * if DR pre-processing is going on or DR is in progress 6825 * then the new export segments should be in the NEW_QSCD state 6826 */ 6827 if (type == RSM_RESOURCE_EXPORT_SEGMENT) { 6828 mutex_enter(&rsm_drv_data.drv_lock); 6829 if ((rsm_drv_data.drv_state == 6830 RSM_DRV_PREDEL_STARTED) || 6831 (rsm_drv_data.drv_state == 6832 RSM_DRV_PREDEL_COMPLETED) || 6833 (rsm_drv_data.drv_state == 6834 RSM_DRV_DR_IN_PROGRESS)) { 6835 seg->s_state = RSM_STATE_NEW_QUIESCED; 6836 } 6837 mutex_exit(&rsm_drv_data.drv_lock); 6838 } 6839 6840 rsmresource_insert(rnum, (rsmresource_t *)seg, type); 6841 } 6842 6843 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n")); 6844 6845 return (seg); 6846 } 6847 6848 static int 6849 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6850 int mode, cred_t *credp) 6851 { 6852 int error; 6853 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 6854 6855 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n")); 6856 6857 arg = arg; 6858 credp = credp; 6859 6860 ASSERT(seg != NULL); 6861 6862 switch (cmd) { 6863 case RSM_IOCTL_BIND: 6864 error = rsm_bind(seg, msg, arg, mode); 6865 break; 6866 case RSM_IOCTL_REBIND: 6867 error = rsm_rebind(seg, msg); 6868 break; 6869 case RSM_IOCTL_UNBIND: 6870 error = ENOTSUP; 6871 break; 6872 case RSM_IOCTL_PUBLISH: 6873 error = rsm_publish(seg, msg, arg, mode); 6874 break; 6875 case RSM_IOCTL_REPUBLISH: 6876 error = rsm_republish(seg, msg, mode); 6877 break; 6878 case RSM_IOCTL_UNPUBLISH: 6879 error = rsm_unpublish(seg, 1); 6880 break; 6881 default: 6882 error = EINVAL; 6883 break; 6884 } 6885 6886 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n", 6887 error)); 6888 6889 return (error); 6890 } 6891 static int 6892 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6893 int mode, cred_t *credp) 6894 { 6895 int error; 6896 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6897 6898 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n")); 6899 6900 ASSERT(seg); 6901 6902 switch (cmd) { 6903 case RSM_IOCTL_CONNECT: 6904 error = rsm_connect(seg, msg, credp, arg, mode); 6905 break; 6906 default: 6907 error = EINVAL; 6908 break; 6909 } 6910 6911 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n", 6912 error)); 6913 return (error); 6914 } 6915 6916 static int 6917 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6918 int mode) 6919 { 6920 int e; 6921 adapter_t *adapter; 6922 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6923 6924 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n")); 6925 6926 6927 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) { 6928 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6929 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n")); 6930 return (RSMERR_CONN_ABORTED); 6931 } else if (seg->s_node == my_nodeid) { 6932 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6933 "rsmbar_ioctl done: loopback\n")); 6934 return (RSM_SUCCESS); 6935 } 6936 6937 adapter = seg->s_adapter; 6938 6939 switch (cmd) { 6940 case RSM_IOCTL_BAR_CHECK: 6941 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6942 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va)); 6943 return (bar_va ? RSM_SUCCESS : EINVAL); 6944 case RSM_IOCTL_BAR_OPEN: 6945 e = adapter->rsmpi_ops-> 6946 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar); 6947 break; 6948 case RSM_IOCTL_BAR_ORDER: 6949 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar); 6950 break; 6951 case RSM_IOCTL_BAR_CLOSE: 6952 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar); 6953 break; 6954 default: 6955 e = EINVAL; 6956 break; 6957 } 6958 6959 if (e == RSM_SUCCESS) { 6960 #ifdef _MULTI_DATAMODEL 6961 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6962 rsm_ioctlmsg32_t msg32; 6963 int i; 6964 6965 for (i = 0; i < 4; i++) { 6966 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64; 6967 } 6968 6969 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6970 "rsmbar_ioctl done\n")); 6971 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 6972 sizeof (msg32), mode)) 6973 return (RSMERR_BAD_ADDR); 6974 else 6975 return (RSM_SUCCESS); 6976 } 6977 #endif 6978 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6979 "rsmbar_ioctl done\n")); 6980 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg, 6981 sizeof (*msg), mode)) 6982 return (RSMERR_BAD_ADDR); 6983 else 6984 return (RSM_SUCCESS); 6985 } 6986 6987 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6988 "rsmbar_ioctl done: error=%d\n", e)); 6989 6990 return (e); 6991 } 6992 6993 /* 6994 * Ring the doorbell of the export segment to which this segment is 6995 * connected. 6996 */ 6997 static int 6998 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 6999 { 7000 int e = 0; 7001 rsmipc_request_t request; 7002 7003 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7004 7005 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n")); 7006 7007 request.rsmipc_key = seg->s_segid; 7008 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7009 request.rsmipc_segment_cookie = NULL; 7010 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 7011 7012 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7013 "exportbell_ioctl done: %d\n", e)); 7014 7015 return (e); 7016 } 7017 7018 /* 7019 * Ring the doorbells of all segments importing this segment 7020 */ 7021 static int 7022 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7023 { 7024 importing_token_t *token = NULL; 7025 rsmipc_request_t request; 7026 int index; 7027 7028 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 7029 7030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n")); 7031 7032 ASSERT(seg->s_state != RSM_STATE_NEW && 7033 seg->s_state != RSM_STATE_NEW_QUIESCED); 7034 7035 request.rsmipc_key = seg->s_segid; 7036 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7037 7038 index = rsmhash(seg->s_segid); 7039 7040 token = importer_list.bucket[index]; 7041 7042 while (token != NULL) { 7043 if (seg->s_key == token->key) { 7044 request.rsmipc_segment_cookie = 7045 token->import_segment_cookie; 7046 (void) rsmipc_send(token->importing_node, 7047 &request, RSM_NO_REPLY); 7048 } 7049 token = token->next; 7050 } 7051 7052 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7053 "importbell_ioctl done\n")); 7054 return (RSM_SUCCESS); 7055 } 7056 7057 static int 7058 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp, 7059 rsm_poll_event_t **eventspp, int mode) 7060 { 7061 rsm_poll_event_t *evlist = NULL; 7062 size_t evlistsz; 7063 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7064 7065 #ifdef _MULTI_DATAMODEL 7066 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7067 int i; 7068 rsm_consume_event_msg32_t cemsg32 = {0}; 7069 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7070 rsm_poll_event32_t *evlist32; 7071 size_t evlistsz32; 7072 7073 /* copyin the ioctl message */ 7074 if (ddi_copyin(arg, (caddr_t)&cemsg32, 7075 sizeof (rsm_consume_event_msg32_t), mode)) { 7076 DBG_PRINTF((category, RSM_ERR, 7077 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7078 return (RSMERR_BAD_ADDR); 7079 } 7080 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist; 7081 msgp->numents = (int)cemsg32.numents; 7082 7083 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents; 7084 /* 7085 * If numents is large alloc events list on heap otherwise 7086 * use the address of array that was passed in. 7087 */ 7088 if (msgp->numents > RSM_MAX_POLLFDS) { 7089 if (msgp->numents > max_segs) { /* validate numents */ 7090 DBG_PRINTF((category, RSM_ERR, 7091 "consumeevent_copyin: " 7092 "RSMERR_BAD_ARGS_ERRORS\n")); 7093 return (RSMERR_BAD_ARGS_ERRORS); 7094 } 7095 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7096 } else { 7097 evlist32 = event32; 7098 } 7099 7100 /* copyin the seglist into the rsm_poll_event32_t array */ 7101 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32, 7102 evlistsz32, mode)) { 7103 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7104 kmem_free(evlist32, evlistsz32); 7105 } 7106 DBG_PRINTF((category, RSM_ERR, 7107 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7108 return (RSMERR_BAD_ADDR); 7109 } 7110 7111 /* evlist and evlistsz are based on rsm_poll_event_t type */ 7112 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents; 7113 7114 if (msgp->numents > RSM_MAX_POLLFDS) { 7115 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7116 *eventspp = evlist; 7117 } else { 7118 evlist = *eventspp; 7119 } 7120 /* 7121 * copy the rsm_poll_event32_t array to the rsm_poll_event_t 7122 * array 7123 */ 7124 for (i = 0; i < msgp->numents; i++) { 7125 evlist[i].rnum = evlist32[i].rnum; 7126 evlist[i].fdsidx = evlist32[i].fdsidx; 7127 evlist[i].revent = evlist32[i].revent; 7128 } 7129 /* free the temp 32-bit event list */ 7130 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7131 kmem_free(evlist32, evlistsz32); 7132 } 7133 7134 return (RSM_SUCCESS); 7135 } 7136 #endif 7137 /* copyin the ioctl message */ 7138 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t), 7139 mode)) { 7140 DBG_PRINTF((category, RSM_ERR, 7141 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7142 return (RSMERR_BAD_ADDR); 7143 } 7144 /* 7145 * If numents is large alloc events list on heap otherwise 7146 * use the address of array that was passed in. 7147 */ 7148 if (msgp->numents > RSM_MAX_POLLFDS) { 7149 if (msgp->numents > max_segs) { /* validate numents */ 7150 DBG_PRINTF((category, RSM_ERR, 7151 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n")); 7152 return (RSMERR_BAD_ARGS_ERRORS); 7153 } 7154 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7155 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7156 *eventspp = evlist; 7157 } 7158 7159 /* copyin the seglist */ 7160 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp), 7161 sizeof (rsm_poll_event_t)*msgp->numents, mode)) { 7162 if (evlist) { 7163 kmem_free(evlist, evlistsz); 7164 *eventspp = NULL; 7165 } 7166 DBG_PRINTF((category, RSM_ERR, 7167 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7168 return (RSMERR_BAD_ADDR); 7169 } 7170 7171 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7172 "consumeevent_copyin done\n")); 7173 return (RSM_SUCCESS); 7174 } 7175 7176 static int 7177 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp, 7178 rsm_poll_event_t *eventsp, int mode) 7179 { 7180 size_t evlistsz; 7181 int err = RSM_SUCCESS; 7182 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7183 7184 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7185 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n", 7186 msgp->numents, eventsp)); 7187 7188 #ifdef _MULTI_DATAMODEL 7189 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7190 int i; 7191 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7192 rsm_poll_event32_t *evlist32; 7193 size_t evlistsz32; 7194 7195 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents; 7196 if (msgp->numents > RSM_MAX_POLLFDS) { 7197 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7198 } else { 7199 evlist32 = event32; 7200 } 7201 7202 /* 7203 * copy the rsm_poll_event_t array to the rsm_poll_event32_t 7204 * array 7205 */ 7206 for (i = 0; i < msgp->numents; i++) { 7207 evlist32[i].rnum = eventsp[i].rnum; 7208 evlist32[i].fdsidx = eventsp[i].fdsidx; 7209 evlist32[i].revent = eventsp[i].revent; 7210 } 7211 7212 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist, 7213 evlistsz32, mode)) { 7214 err = RSMERR_BAD_ADDR; 7215 } 7216 7217 if (msgp->numents > RSM_MAX_POLLFDS) { 7218 if (evlist32) { /* free the temp 32-bit event list */ 7219 kmem_free(evlist32, evlistsz32); 7220 } 7221 /* 7222 * eventsp and evlistsz are based on rsm_poll_event_t 7223 * type 7224 */ 7225 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7226 /* event list on the heap and needs to be freed here */ 7227 if (eventsp) { 7228 kmem_free(eventsp, evlistsz); 7229 } 7230 } 7231 7232 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7233 "consumeevent_copyout done: err=%d\n", err)); 7234 return (err); 7235 } 7236 #endif 7237 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7238 7239 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz, 7240 mode)) { 7241 err = RSMERR_BAD_ADDR; 7242 } 7243 7244 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) { 7245 /* event list on the heap and needs to be freed here */ 7246 kmem_free(eventsp, evlistsz); 7247 } 7248 7249 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7250 "consumeevent_copyout done: err=%d\n", err)); 7251 return (err); 7252 } 7253 7254 static int 7255 rsm_consumeevent_ioctl(caddr_t arg, int mode) 7256 { 7257 int rc; 7258 int i; 7259 minor_t rnum; 7260 rsm_consume_event_msg_t msg = {0}; 7261 rsmseg_t *seg; 7262 rsm_poll_event_t *event_list; 7263 rsm_poll_event_t events[RSM_MAX_POLLFDS]; 7264 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7265 7266 event_list = events; 7267 7268 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) != 7269 RSM_SUCCESS) { 7270 return (rc); 7271 } 7272 7273 for (i = 0; i < msg.numents; i++) { 7274 rnum = event_list[i].rnum; 7275 event_list[i].revent = 0; 7276 /* get the segment structure */ 7277 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 7278 if (seg) { 7279 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7280 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum, 7281 seg)); 7282 if (seg->s_pollevent) { 7283 /* consume the event */ 7284 atomic_dec_32(&seg->s_pollevent); 7285 event_list[i].revent = POLLRDNORM; 7286 } 7287 rsmseglock_release(seg); 7288 } 7289 } 7290 7291 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) != 7292 RSM_SUCCESS) { 7293 return (rc); 7294 } 7295 7296 return (RSM_SUCCESS); 7297 } 7298 7299 static int 7300 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode) 7301 { 7302 int size; 7303 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7304 7305 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n")); 7306 7307 #ifdef _MULTI_DATAMODEL 7308 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7309 rsmka_iovec32_t *iovec32, *iovec32_base; 7310 int i; 7311 7312 size = count * sizeof (rsmka_iovec32_t); 7313 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP); 7314 if (ddi_copyin((caddr_t)user_vec, 7315 (caddr_t)iovec32, size, mode)) { 7316 kmem_free(iovec32, size); 7317 DBG_PRINTF((category, RSM_DEBUG, 7318 "iovec_copyin: returning RSMERR_BAD_ADDR\n")); 7319 return (RSMERR_BAD_ADDR); 7320 } 7321 7322 for (i = 0; i < count; i++, iovec++, iovec32++) { 7323 iovec->io_type = (int)iovec32->io_type; 7324 if (iovec->io_type == RSM_HANDLE_TYPE) 7325 iovec->local.segid = (rsm_memseg_id_t) 7326 iovec32->local; 7327 else 7328 iovec->local.vaddr = 7329 (caddr_t)(uintptr_t)iovec32->local; 7330 iovec->local_offset = (size_t)iovec32->local_offset; 7331 iovec->remote_offset = (size_t)iovec32->remote_offset; 7332 iovec->transfer_len = (size_t)iovec32->transfer_len; 7333 7334 } 7335 kmem_free(iovec32_base, size); 7336 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7337 "iovec_copyin done\n")); 7338 return (DDI_SUCCESS); 7339 } 7340 #endif 7341 7342 size = count * sizeof (rsmka_iovec_t); 7343 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) { 7344 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7345 "iovec_copyin done: RSMERR_BAD_ADDR\n")); 7346 return (RSMERR_BAD_ADDR); 7347 } 7348 7349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n")); 7350 7351 return (DDI_SUCCESS); 7352 } 7353 7354 7355 static int 7356 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7357 { 7358 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7359 7360 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n")); 7361 7362 #ifdef _MULTI_DATAMODEL 7363 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7364 rsmka_scat_gath32_t sg_io32; 7365 7366 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32), 7367 mode)) { 7368 DBG_PRINTF((category, RSM_DEBUG, 7369 "sgio_copyin done: returning EFAULT\n")); 7370 return (RSMERR_BAD_ADDR); 7371 } 7372 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid; 7373 sg_io->io_request_count = (size_t)sg_io32.io_request_count; 7374 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count; 7375 sg_io->flags = (size_t)sg_io32.flags; 7376 sg_io->remote_handle = (rsm_memseg_import_handle_t) 7377 (uintptr_t)sg_io32.remote_handle; 7378 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec; 7379 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7380 "sgio_copyin done\n")); 7381 return (DDI_SUCCESS); 7382 } 7383 #endif 7384 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t), 7385 mode)) { 7386 DBG_PRINTF((category, RSM_DEBUG, 7387 "sgio_copyin done: returning EFAULT\n")); 7388 return (RSMERR_BAD_ADDR); 7389 } 7390 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n")); 7391 return (DDI_SUCCESS); 7392 } 7393 7394 static int 7395 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7396 { 7397 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7398 7399 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7400 "sgio_resid_copyout enter\n")); 7401 7402 #ifdef _MULTI_DATAMODEL 7403 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7404 rsmka_scat_gath32_t sg_io32; 7405 7406 sg_io32.io_residual_count = sg_io->io_residual_count; 7407 sg_io32.flags = sg_io->flags; 7408 7409 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count, 7410 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count, 7411 sizeof (uint32_t), mode)) { 7412 7413 DBG_PRINTF((category, RSM_ERR, 7414 "sgio_resid_copyout error: rescnt\n")); 7415 return (RSMERR_BAD_ADDR); 7416 } 7417 7418 if (ddi_copyout((caddr_t)&sg_io32.flags, 7419 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags, 7420 sizeof (uint32_t), mode)) { 7421 7422 DBG_PRINTF((category, RSM_ERR, 7423 "sgio_resid_copyout error: flags\n")); 7424 return (RSMERR_BAD_ADDR); 7425 } 7426 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7427 "sgio_resid_copyout done\n")); 7428 return (DDI_SUCCESS); 7429 } 7430 #endif 7431 if (ddi_copyout((caddr_t)&sg_io->io_residual_count, 7432 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count, 7433 sizeof (ulong_t), mode)) { 7434 7435 DBG_PRINTF((category, RSM_ERR, 7436 "sgio_resid_copyout error:rescnt\n")); 7437 return (RSMERR_BAD_ADDR); 7438 } 7439 7440 if (ddi_copyout((caddr_t)&sg_io->flags, 7441 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags, 7442 sizeof (uint_t), mode)) { 7443 7444 DBG_PRINTF((category, RSM_ERR, 7445 "sgio_resid_copyout error:flags\n")); 7446 return (RSMERR_BAD_ADDR); 7447 } 7448 7449 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n")); 7450 return (DDI_SUCCESS); 7451 } 7452 7453 7454 static int 7455 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp) 7456 { 7457 rsmka_scat_gath_t sg_io; 7458 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN]; 7459 rsmka_iovec_t *ka_iovec; 7460 rsmka_iovec_t *ka_iovec_start; 7461 rsmpi_scat_gath_t rsmpi_sg_io; 7462 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN]; 7463 rsmpi_iovec_t *iovec; 7464 rsmpi_iovec_t *iovec_start = NULL; 7465 rsmapi_access_entry_t *acl; 7466 rsmresource_t *res; 7467 minor_t rnum; 7468 rsmseg_t *im_seg, *ex_seg; 7469 int e; 7470 int error = 0; 7471 uint_t i; 7472 uint_t iov_proc = 0; /* num of iovecs processed */ 7473 size_t size = 0; 7474 size_t ka_size; 7475 7476 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7477 7478 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n")); 7479 7480 credp = credp; 7481 7482 /* 7483 * Copyin the scatter/gather structure and build new structure 7484 * for rsmpi. 7485 */ 7486 e = sgio_copyin(arg, &sg_io, mode); 7487 if (e != DDI_SUCCESS) { 7488 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7489 "rsm_iovec_ioctl done: sgio_copyin %d\n", e)); 7490 return (e); 7491 } 7492 7493 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) { 7494 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7495 "rsm_iovec_ioctl done: request_count(%d) too large\n", 7496 sg_io.io_request_count)); 7497 return (RSMERR_BAD_SGIO); 7498 } 7499 7500 rsmpi_sg_io.io_request_count = sg_io.io_request_count; 7501 rsmpi_sg_io.io_residual_count = sg_io.io_request_count; 7502 rsmpi_sg_io.io_segflg = 0; 7503 7504 /* Allocate memory and copyin io vector array */ 7505 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7506 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t); 7507 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP); 7508 } else { 7509 ka_iovec_start = ka_iovec = ka_iovec_arr; 7510 } 7511 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec, 7512 sg_io.io_request_count, mode); 7513 if (e != DDI_SUCCESS) { 7514 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7515 kmem_free(ka_iovec, ka_size); 7516 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7517 "rsm_iovec_ioctl done: iovec_copyin %d\n", e)); 7518 return (e); 7519 } 7520 7521 /* get the import segment descriptor */ 7522 rnum = getminor(dev); 7523 res = rsmresource_lookup(rnum, RSM_LOCK); 7524 7525 /* 7526 * The following sequence of locking may (or MAY NOT) cause a 7527 * deadlock but this is currently not addressed here since the 7528 * implementation will be changed to incorporate the use of 7529 * reference counting for both the import and the export segments. 7530 */ 7531 7532 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */ 7533 7534 im_seg = (rsmseg_t *)res; 7535 7536 if (im_seg == NULL) { 7537 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7538 kmem_free(ka_iovec, ka_size); 7539 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7540 "rsm_iovec_ioctl done: rsmresource_lookup failed\n")); 7541 return (EINVAL); 7542 } 7543 /* putv/getv supported is supported only on import segments */ 7544 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) { 7545 rsmseglock_release(im_seg); 7546 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7547 kmem_free(ka_iovec, ka_size); 7548 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7549 "rsm_iovec_ioctl done: not an import segment\n")); 7550 return (EINVAL); 7551 } 7552 7553 /* 7554 * wait for a remote DR to complete ie. for segments to get UNQUIESCED 7555 * as well as wait for a local DR to complete. 7556 */ 7557 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) || 7558 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) || 7559 (im_seg->s_flags & RSM_DR_INPROGRESS)) { 7560 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) { 7561 DBG_PRINTF((category, RSM_DEBUG, 7562 "rsm_iovec_ioctl done: cv_wait INTR")); 7563 rsmseglock_release(im_seg); 7564 return (RSMERR_INTERRUPTED); 7565 } 7566 } 7567 7568 if ((im_seg->s_state != RSM_STATE_CONNECT) && 7569 (im_seg->s_state != RSM_STATE_ACTIVE)) { 7570 7571 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT || 7572 im_seg->s_state == RSM_STATE_NEW); 7573 7574 DBG_PRINTF((category, RSM_DEBUG, 7575 "rsm_iovec_ioctl done: im_seg not conn/map")); 7576 rsmseglock_release(im_seg); 7577 e = RSMERR_BAD_SGIO; 7578 goto out; 7579 } 7580 7581 im_seg->s_rdmacnt++; 7582 rsmseglock_release(im_seg); 7583 7584 /* 7585 * Allocate and set up the io vector for rsmpi 7586 */ 7587 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7588 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t); 7589 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP); 7590 } else { 7591 iovec_start = iovec = iovec_arr; 7592 } 7593 7594 rsmpi_sg_io.iovec = iovec; 7595 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) { 7596 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7597 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7598 7599 if (ex_seg == NULL) { 7600 e = RSMERR_BAD_SGIO; 7601 break; 7602 } 7603 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7604 7605 acl = ex_seg->s_acl; 7606 if (acl[0].ae_permission == 0) { 7607 struct buf *xbuf; 7608 dev_t sdev = 0; 7609 7610 xbuf = ddi_umem_iosetup(ex_seg->s_cookie, 7611 0, ex_seg->s_len, B_WRITE, 7612 sdev, 0, NULL, DDI_UMEM_SLEEP); 7613 7614 ASSERT(xbuf != NULL); 7615 7616 iovec->local_mem.ms_type = RSM_MEM_BUF; 7617 iovec->local_mem.ms_memory.bp = xbuf; 7618 } else { 7619 iovec->local_mem.ms_type = RSM_MEM_HANDLE; 7620 iovec->local_mem.ms_memory.handle = 7621 ex_seg->s_handle.out; 7622 } 7623 ex_seg->s_rdmacnt++; /* refcnt the handle */ 7624 rsmseglock_release(ex_seg); 7625 } else { 7626 iovec->local_mem.ms_type = RSM_MEM_VADDR; 7627 iovec->local_mem.ms_memory.vr.vaddr = 7628 ka_iovec->local.vaddr; 7629 } 7630 7631 iovec->local_offset = ka_iovec->local_offset; 7632 iovec->remote_handle = im_seg->s_handle.in; 7633 iovec->remote_offset = ka_iovec->remote_offset; 7634 iovec->transfer_length = ka_iovec->transfer_len; 7635 iovec++; 7636 ka_iovec++; 7637 } 7638 7639 if (iov_proc < sg_io.io_request_count) { 7640 /* error while processing handle */ 7641 rsmseglock_acquire(im_seg); 7642 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */ 7643 if (im_seg->s_rdmacnt == 0) { 7644 cv_broadcast(&im_seg->s_cv); 7645 } 7646 rsmseglock_release(im_seg); 7647 goto out; 7648 } 7649 7650 /* call rsmpi */ 7651 if (cmd == RSM_IOCTL_PUTV) 7652 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv( 7653 im_seg->s_adapter->rsmpi_handle, 7654 &rsmpi_sg_io); 7655 else if (cmd == RSM_IOCTL_GETV) 7656 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv( 7657 im_seg->s_adapter->rsmpi_handle, 7658 &rsmpi_sg_io); 7659 else { 7660 e = EINVAL; 7661 DBG_PRINTF((category, RSM_DEBUG, 7662 "iovec_ioctl: bad command = %x\n", cmd)); 7663 } 7664 7665 7666 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7667 "rsm_iovec_ioctl RSMPI oper done %d\n", e)); 7668 7669 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count; 7670 7671 /* 7672 * Check for implicit signal post flag and do the signal 7673 * post if needed 7674 */ 7675 if (sg_io.flags & RSM_IMPLICIT_SIGPOST && 7676 e == RSM_SUCCESS) { 7677 rsmipc_request_t request; 7678 7679 request.rsmipc_key = im_seg->s_segid; 7680 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7681 request.rsmipc_segment_cookie = NULL; 7682 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY); 7683 /* 7684 * Reset the implicit signal post flag to 0 to indicate 7685 * that the signal post has been done and need not be 7686 * done in the RSMAPI library 7687 */ 7688 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST; 7689 } 7690 7691 rsmseglock_acquire(im_seg); 7692 im_seg->s_rdmacnt--; 7693 if (im_seg->s_rdmacnt == 0) { 7694 cv_broadcast(&im_seg->s_cv); 7695 } 7696 rsmseglock_release(im_seg); 7697 error = sgio_resid_copyout(arg, &sg_io, mode); 7698 out: 7699 iovec = iovec_start; 7700 ka_iovec = ka_iovec_start; 7701 for (i = 0; i < iov_proc; i++) { 7702 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7703 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7704 7705 ASSERT(ex_seg != NULL); 7706 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7707 7708 ex_seg->s_rdmacnt--; /* unrefcnt the handle */ 7709 if (ex_seg->s_rdmacnt == 0) { 7710 cv_broadcast(&ex_seg->s_cv); 7711 } 7712 rsmseglock_release(ex_seg); 7713 } 7714 7715 ASSERT(iovec != NULL); /* true if iov_proc > 0 */ 7716 7717 /* 7718 * At present there is no dependency on the existence of xbufs 7719 * created by ddi_umem_iosetup for each of the iovecs. So we 7720 * can these xbufs here. 7721 */ 7722 if (iovec->local_mem.ms_type == RSM_MEM_BUF) { 7723 freerbuf(iovec->local_mem.ms_memory.bp); 7724 } 7725 7726 iovec++; 7727 ka_iovec++; 7728 } 7729 7730 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7731 if (iovec_start) 7732 kmem_free(iovec_start, size); 7733 kmem_free(ka_iovec_start, ka_size); 7734 } 7735 7736 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7737 "rsm_iovec_ioctl done %d\n", e)); 7738 /* if RSMPI call fails return that else return copyout's retval */ 7739 return ((e != RSM_SUCCESS) ? e : error); 7740 7741 } 7742 7743 7744 static int 7745 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode) 7746 { 7747 adapter_t *adapter; 7748 rsm_addr_t addr; 7749 rsm_node_id_t node; 7750 int rval = DDI_SUCCESS; 7751 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7752 7753 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n")); 7754 7755 adapter = rsm_getadapter(msg, mode); 7756 if (adapter == NULL) { 7757 DBG_PRINTF((category, RSM_DEBUG, 7758 "rsmaddr_ioctl done: adapter not found\n")); 7759 return (RSMERR_CTLR_NOT_PRESENT); 7760 } 7761 7762 switch (cmd) { 7763 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */ 7764 /* returns the hwaddr in msg->hwaddr */ 7765 if (msg->nodeid == my_nodeid) { 7766 msg->hwaddr = adapter->hwaddr; 7767 } else { 7768 addr = get_remote_hwaddr(adapter, msg->nodeid); 7769 if ((int64_t)addr < 0) { 7770 rval = RSMERR_INTERNAL_ERROR; 7771 } else { 7772 msg->hwaddr = addr; 7773 } 7774 } 7775 break; 7776 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */ 7777 /* returns the nodeid in msg->nodeid */ 7778 if (msg->hwaddr == adapter->hwaddr) { 7779 msg->nodeid = my_nodeid; 7780 } else { 7781 node = get_remote_nodeid(adapter, msg->hwaddr); 7782 if ((int)node < 0) { 7783 rval = RSMERR_INTERNAL_ERROR; 7784 } else { 7785 msg->nodeid = (rsm_node_id_t)node; 7786 } 7787 } 7788 break; 7789 default: 7790 rval = EINVAL; 7791 break; 7792 } 7793 7794 rsmka_release_adapter(adapter); 7795 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7796 "rsmaddr_ioctl done: %d\n", rval)); 7797 return (rval); 7798 } 7799 7800 static int 7801 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode) 7802 { 7803 DBG_DEFINE(category, 7804 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7805 7806 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n")); 7807 7808 #ifdef _MULTI_DATAMODEL 7809 7810 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7811 rsm_ioctlmsg32_t msg32; 7812 int i; 7813 7814 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) { 7815 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7816 "rsm_ddi_copyin done: EFAULT\n")); 7817 return (RSMERR_BAD_ADDR); 7818 } 7819 msg->len = msg32.len; 7820 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr; 7821 msg->arg = (caddr_t)(uintptr_t)msg32.arg; 7822 msg->key = msg32.key; 7823 msg->acl_len = msg32.acl_len; 7824 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl; 7825 msg->cnum = msg32.cnum; 7826 msg->cname = (caddr_t)(uintptr_t)msg32.cname; 7827 msg->cname_len = msg32.cname_len; 7828 msg->nodeid = msg32.nodeid; 7829 msg->hwaddr = msg32.hwaddr; 7830 msg->perm = msg32.perm; 7831 for (i = 0; i < 4; i++) { 7832 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64; 7833 } 7834 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7835 "rsm_ddi_copyin done\n")); 7836 return (RSM_SUCCESS); 7837 } 7838 #endif 7839 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n")); 7840 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode)) 7841 return (RSMERR_BAD_ADDR); 7842 else 7843 return (RSM_SUCCESS); 7844 } 7845 7846 static int 7847 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode) 7848 { 7849 rsmka_int_controller_attr_t rsm_cattr; 7850 DBG_DEFINE(category, 7851 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7852 7853 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7854 "rsmattr_ddi_copyout enter\n")); 7855 /* 7856 * need to copy appropriate data from rsm_controller_attr_t 7857 * to rsmka_int_controller_attr_t 7858 */ 7859 #ifdef _MULTI_DATAMODEL 7860 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7861 rsmka_int_controller_attr32_t rsm_cattr32; 7862 7863 rsm_cattr32.attr_direct_access_sizes = 7864 adapter->rsm_attr.attr_direct_access_sizes; 7865 rsm_cattr32.attr_atomic_sizes = 7866 adapter->rsm_attr.attr_atomic_sizes; 7867 rsm_cattr32.attr_page_size = 7868 adapter->rsm_attr.attr_page_size; 7869 if (adapter->rsm_attr.attr_max_export_segment_size > 7870 UINT_MAX) 7871 rsm_cattr32.attr_max_export_segment_size = 7872 RSM_MAXSZ_PAGE_ALIGNED; 7873 else 7874 rsm_cattr32.attr_max_export_segment_size = 7875 adapter->rsm_attr.attr_max_export_segment_size; 7876 if (adapter->rsm_attr.attr_tot_export_segment_size > 7877 UINT_MAX) 7878 rsm_cattr32.attr_tot_export_segment_size = 7879 RSM_MAXSZ_PAGE_ALIGNED; 7880 else 7881 rsm_cattr32.attr_tot_export_segment_size = 7882 adapter->rsm_attr.attr_tot_export_segment_size; 7883 if (adapter->rsm_attr.attr_max_export_segments > 7884 UINT_MAX) 7885 rsm_cattr32.attr_max_export_segments = 7886 UINT_MAX; 7887 else 7888 rsm_cattr32.attr_max_export_segments = 7889 adapter->rsm_attr.attr_max_export_segments; 7890 if (adapter->rsm_attr.attr_max_import_map_size > 7891 UINT_MAX) 7892 rsm_cattr32.attr_max_import_map_size = 7893 RSM_MAXSZ_PAGE_ALIGNED; 7894 else 7895 rsm_cattr32.attr_max_import_map_size = 7896 adapter->rsm_attr.attr_max_import_map_size; 7897 if (adapter->rsm_attr.attr_tot_import_map_size > 7898 UINT_MAX) 7899 rsm_cattr32.attr_tot_import_map_size = 7900 RSM_MAXSZ_PAGE_ALIGNED; 7901 else 7902 rsm_cattr32.attr_tot_import_map_size = 7903 adapter->rsm_attr.attr_tot_import_map_size; 7904 if (adapter->rsm_attr.attr_max_import_segments > 7905 UINT_MAX) 7906 rsm_cattr32.attr_max_import_segments = 7907 UINT_MAX; 7908 else 7909 rsm_cattr32.attr_max_import_segments = 7910 adapter->rsm_attr.attr_max_import_segments; 7911 rsm_cattr32.attr_controller_addr = 7912 adapter->rsm_attr.attr_controller_addr; 7913 7914 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7915 "rsmattr_ddi_copyout done\n")); 7916 if (ddi_copyout((caddr_t)&rsm_cattr32, arg, 7917 sizeof (rsmka_int_controller_attr32_t), mode)) { 7918 return (RSMERR_BAD_ADDR); 7919 } 7920 else 7921 return (RSM_SUCCESS); 7922 } 7923 #endif 7924 rsm_cattr.attr_direct_access_sizes = 7925 adapter->rsm_attr.attr_direct_access_sizes; 7926 rsm_cattr.attr_atomic_sizes = 7927 adapter->rsm_attr.attr_atomic_sizes; 7928 rsm_cattr.attr_page_size = 7929 adapter->rsm_attr.attr_page_size; 7930 rsm_cattr.attr_max_export_segment_size = 7931 adapter->rsm_attr.attr_max_export_segment_size; 7932 rsm_cattr.attr_tot_export_segment_size = 7933 adapter->rsm_attr.attr_tot_export_segment_size; 7934 rsm_cattr.attr_max_export_segments = 7935 adapter->rsm_attr.attr_max_export_segments; 7936 rsm_cattr.attr_max_import_map_size = 7937 adapter->rsm_attr.attr_max_import_map_size; 7938 rsm_cattr.attr_tot_import_map_size = 7939 adapter->rsm_attr.attr_tot_import_map_size; 7940 rsm_cattr.attr_max_import_segments = 7941 adapter->rsm_attr.attr_max_import_segments; 7942 rsm_cattr.attr_controller_addr = 7943 adapter->rsm_attr.attr_controller_addr; 7944 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7945 "rsmattr_ddi_copyout done\n")); 7946 if (ddi_copyout((caddr_t)&rsm_cattr, arg, 7947 sizeof (rsmka_int_controller_attr_t), mode)) { 7948 return (RSMERR_BAD_ADDR); 7949 } 7950 else 7951 return (RSM_SUCCESS); 7952 } 7953 7954 /*ARGSUSED*/ 7955 static int 7956 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 7957 int *rvalp) 7958 { 7959 rsmseg_t *seg; 7960 rsmresource_t *res; 7961 minor_t rnum; 7962 rsm_ioctlmsg_t msg = {0}; 7963 int error; 7964 adapter_t *adapter; 7965 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7966 7967 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n")); 7968 7969 if (cmd == RSM_IOCTL_CONSUMEEVENT) { 7970 error = rsm_consumeevent_ioctl((caddr_t)arg, mode); 7971 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7972 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error)); 7973 return (error); 7974 } 7975 7976 /* topology cmd does not use the arg common to other cmds */ 7977 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) { 7978 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode); 7979 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7980 "rsm_ioctl done: %d\n", error)); 7981 return (error); 7982 } 7983 7984 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) { 7985 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp); 7986 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7987 "rsm_ioctl done: %d\n", error)); 7988 return (error); 7989 } 7990 7991 /* 7992 * try to load arguments 7993 */ 7994 if (cmd != RSM_IOCTL_RING_BELL && 7995 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) { 7996 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7997 "rsm_ioctl done: EFAULT\n")); 7998 return (RSMERR_BAD_ADDR); 7999 } 8000 8001 if (cmd == RSM_IOCTL_ATTR) { 8002 adapter = rsm_getadapter(&msg, mode); 8003 if (adapter == NULL) { 8004 DBG_PRINTF((category, RSM_DEBUG, 8005 "rsm_ioctl done: ENODEV\n")); 8006 return (RSMERR_CTLR_NOT_PRESENT); 8007 } 8008 error = rsmattr_ddi_copyout(adapter, msg.arg, mode); 8009 rsmka_release_adapter(adapter); 8010 DBG_PRINTF((category, RSM_DEBUG, 8011 "rsm_ioctl:after copyout %d\n", error)); 8012 return (error); 8013 } 8014 8015 if (cmd == RSM_IOCTL_BAR_INFO) { 8016 /* Return library off,len of barrier page */ 8017 msg.off = barrier_offset; 8018 msg.len = (int)barrier_size; 8019 #ifdef _MULTI_DATAMODEL 8020 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8021 rsm_ioctlmsg32_t msg32; 8022 8023 if (msg.len > UINT_MAX) 8024 msg.len = RSM_MAXSZ_PAGE_ALIGNED; 8025 else 8026 msg32.len = (int32_t)msg.len; 8027 msg32.off = (int32_t)msg.off; 8028 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8029 "rsm_ioctl done\n")); 8030 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8031 sizeof (msg32), mode)) 8032 return (RSMERR_BAD_ADDR); 8033 else 8034 return (RSM_SUCCESS); 8035 } 8036 #endif 8037 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8038 "rsm_ioctl done\n")); 8039 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8040 sizeof (msg), mode)) 8041 return (RSMERR_BAD_ADDR); 8042 else 8043 return (RSM_SUCCESS); 8044 } 8045 8046 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) { 8047 /* map the nodeid or hwaddr */ 8048 error = rsmaddr_ioctl(cmd, &msg, mode); 8049 if (error == RSM_SUCCESS) { 8050 #ifdef _MULTI_DATAMODEL 8051 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8052 rsm_ioctlmsg32_t msg32; 8053 8054 msg32.hwaddr = (uint64_t)msg.hwaddr; 8055 msg32.nodeid = (uint32_t)msg.nodeid; 8056 8057 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8058 "rsm_ioctl done\n")); 8059 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8060 sizeof (msg32), mode)) 8061 return (RSMERR_BAD_ADDR); 8062 else 8063 return (RSM_SUCCESS); 8064 } 8065 #endif 8066 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8067 "rsm_ioctl done\n")); 8068 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8069 sizeof (msg), mode)) 8070 return (RSMERR_BAD_ADDR); 8071 else 8072 return (RSM_SUCCESS); 8073 } 8074 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8075 "rsm_ioctl done: %d\n", error)); 8076 return (error); 8077 } 8078 8079 /* Find resource and look it in read mode */ 8080 rnum = getminor(dev); 8081 res = rsmresource_lookup(rnum, RSM_NOLOCK); 8082 ASSERT(res != NULL); 8083 8084 /* 8085 * Find command group 8086 */ 8087 switch (RSM_IOCTL_CMDGRP(cmd)) { 8088 case RSM_IOCTL_EXPORT_SEG: 8089 /* 8090 * Export list is searched during publish, loopback and 8091 * remote lookup call. 8092 */ 8093 seg = rsmresource_seg(res, rnum, credp, 8094 RSM_RESOURCE_EXPORT_SEGMENT); 8095 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) { 8096 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode, 8097 credp); 8098 } else { /* export ioctl on an import/barrier resource */ 8099 error = RSMERR_BAD_SEG_HNDL; 8100 } 8101 break; 8102 case RSM_IOCTL_IMPORT_SEG: 8103 /* Import list is searched during remote unmap call. */ 8104 seg = rsmresource_seg(res, rnum, credp, 8105 RSM_RESOURCE_IMPORT_SEGMENT); 8106 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8107 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode, 8108 credp); 8109 } else { /* import ioctl on an export/barrier resource */ 8110 error = RSMERR_BAD_SEG_HNDL; 8111 } 8112 break; 8113 case RSM_IOCTL_BAR: 8114 if (res != RSMRC_RESERVED && 8115 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8116 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg, 8117 mode); 8118 } else { /* invalid res value */ 8119 error = RSMERR_BAD_SEG_HNDL; 8120 } 8121 break; 8122 case RSM_IOCTL_BELL: 8123 if (res != RSMRC_RESERVED) { 8124 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) 8125 error = exportbell_ioctl((rsmseg_t *)res, cmd); 8126 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT) 8127 error = importbell_ioctl((rsmseg_t *)res, cmd); 8128 else /* RSM_RESOURCE_BAR */ 8129 error = RSMERR_BAD_SEG_HNDL; 8130 } else { /* invalid res value */ 8131 error = RSMERR_BAD_SEG_HNDL; 8132 } 8133 break; 8134 default: 8135 error = EINVAL; 8136 } 8137 8138 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n", 8139 error)); 8140 return (error); 8141 } 8142 8143 8144 /* **************************** Segment Mapping Operations ********* */ 8145 static rsm_mapinfo_t * 8146 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset, 8147 size_t *map_len) 8148 { 8149 rsm_mapinfo_t *p; 8150 /* 8151 * Find the correct mapinfo structure to use during the mapping 8152 * from the seg->s_mapinfo list. 8153 * The seg->s_mapinfo list contains in reverse order the mappings 8154 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to 8155 * access the correct entry within this list for the mapping 8156 * requested. 8157 * 8158 * The algorithm for selecting a list entry is as follows: 8159 * 8160 * When start_offset of an entry <= off we have found the entry 8161 * we were looking for. Adjust the dev_offset and map_len (needs 8162 * to be PAGESIZE aligned). 8163 */ 8164 p = seg->s_mapinfo; 8165 for (; p; p = p->next) { 8166 if (p->start_offset <= off) { 8167 *dev_offset = p->dev_offset + off - p->start_offset; 8168 *map_len = (len > p->individual_len) ? 8169 p->individual_len : ptob(btopr(len)); 8170 return (p); 8171 } 8172 p = p->next; 8173 } 8174 8175 return (NULL); 8176 } 8177 8178 static void 8179 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo) 8180 { 8181 rsm_mapinfo_t *p; 8182 8183 while (mapinfo != NULL) { 8184 p = mapinfo; 8185 mapinfo = mapinfo->next; 8186 kmem_free(p, sizeof (*p)); 8187 } 8188 } 8189 8190 static int 8191 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off, 8192 size_t len, void **pvtp) 8193 { 8194 rsmcookie_t *p; 8195 rsmresource_t *res; 8196 rsmseg_t *seg; 8197 minor_t rnum; 8198 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8199 8200 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n")); 8201 8202 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8203 "rsmmap_map: dhp = %x\n", dhp)); 8204 8205 flags = flags; 8206 8207 rnum = getminor(dev); 8208 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8209 ASSERT(res != NULL); 8210 8211 seg = (rsmseg_t *)res; 8212 8213 rsmseglock_acquire(seg); 8214 8215 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8216 8217 /* 8218 * Allocate structure and add cookie to segment list 8219 */ 8220 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8221 8222 p->c_dhp = dhp; 8223 p->c_off = off; 8224 p->c_len = len; 8225 p->c_next = seg->s_ckl; 8226 seg->s_ckl = p; 8227 8228 *pvtp = (void *)seg; 8229 8230 rsmseglock_release(seg); 8231 8232 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n")); 8233 return (DDI_SUCCESS); 8234 } 8235 8236 /* 8237 * Page fault handling is done here. The prerequisite mapping setup 8238 * has been done in rsm_devmap with calls to ddi_devmem_setup or 8239 * ddi_umem_setup 8240 */ 8241 static int 8242 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len, 8243 uint_t type, uint_t rw) 8244 { 8245 int e; 8246 rsmseg_t *seg = (rsmseg_t *)pvt; 8247 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8248 8249 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n")); 8250 8251 rsmseglock_acquire(seg); 8252 8253 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8254 8255 while (seg->s_state == RSM_STATE_MAP_QUIESCE) { 8256 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8257 DBG_PRINTF((category, RSM_DEBUG, 8258 "rsmmap_access done: cv_wait INTR")); 8259 rsmseglock_release(seg); 8260 return (RSMERR_INTERRUPTED); 8261 } 8262 } 8263 8264 ASSERT(seg->s_state == RSM_STATE_DISCONNECT || 8265 seg->s_state == RSM_STATE_ACTIVE); 8266 8267 if (seg->s_state == RSM_STATE_DISCONNECT) 8268 seg->s_flags |= RSM_IMPORT_DUMMY; 8269 8270 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8271 "rsmmap_access: dhp = %x\n", dhp)); 8272 8273 rsmseglock_release(seg); 8274 8275 if (e = devmap_load(dhp, offset, len, type, rw)) { 8276 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n")); 8277 } 8278 8279 8280 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n")); 8281 8282 return (e); 8283 } 8284 8285 static int 8286 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp, 8287 void **newpvt) 8288 { 8289 rsmseg_t *seg = (rsmseg_t *)oldpvt; 8290 rsmcookie_t *p, *old; 8291 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8292 8293 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n")); 8294 8295 /* 8296 * Same as map, create an entry to hold cookie and add it to 8297 * connect segment list. The oldpvt is a pointer to segment. 8298 * Return segment pointer in newpvt. 8299 */ 8300 rsmseglock_acquire(seg); 8301 8302 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8303 8304 /* 8305 * Find old cookie 8306 */ 8307 for (old = seg->s_ckl; old != NULL; old = old->c_next) { 8308 if (old->c_dhp == dhp) { 8309 break; 8310 } 8311 } 8312 if (old == NULL) { 8313 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8314 "rsmmap_dup done: EINVAL\n")); 8315 rsmseglock_release(seg); 8316 return (EINVAL); 8317 } 8318 8319 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8320 8321 p->c_dhp = new_dhp; 8322 p->c_off = old->c_off; 8323 p->c_len = old->c_len; 8324 p->c_next = seg->s_ckl; 8325 seg->s_ckl = p; 8326 8327 *newpvt = (void *)seg; 8328 8329 rsmseglock_release(seg); 8330 8331 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n")); 8332 8333 return (DDI_SUCCESS); 8334 } 8335 8336 static void 8337 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len, 8338 devmap_cookie_t new_dhp1, void **pvtp1, 8339 devmap_cookie_t new_dhp2, void **pvtp2) 8340 { 8341 /* 8342 * Remove pvtp structure from segment list. 8343 */ 8344 rsmseg_t *seg = (rsmseg_t *)pvtp; 8345 int freeflag; 8346 8347 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8348 8349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n")); 8350 8351 off = off; len = len; 8352 pvtp1 = pvtp1; pvtp2 = pvtp2; 8353 8354 rsmseglock_acquire(seg); 8355 8356 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8357 8358 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8359 "rsmmap_unmap: dhp = %x\n", dhp)); 8360 /* 8361 * We can go ahead and remove the dhps even if we are in 8362 * the MAPPING state because the dhps being removed here 8363 * belong to a different mmap and we are holding the segment 8364 * lock. 8365 */ 8366 if (new_dhp1 == NULL && new_dhp2 == NULL) { 8367 /* find and remove dhp handle */ 8368 rsmcookie_t *tmp, **back = &seg->s_ckl; 8369 8370 while (*back != NULL) { 8371 tmp = *back; 8372 if (tmp->c_dhp == dhp) { 8373 *back = tmp->c_next; 8374 kmem_free(tmp, sizeof (*tmp)); 8375 break; 8376 } 8377 back = &tmp->c_next; 8378 } 8379 } else { 8380 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8381 "rsmmap_unmap:parital unmap" 8382 "new_dhp1 %lx, new_dhp2 %lx\n", 8383 (size_t)new_dhp1, (size_t)new_dhp2)); 8384 } 8385 8386 /* 8387 * rsmmap_unmap is called for each mapping cookie on the list. 8388 * When the list becomes empty and we are not in the MAPPING 8389 * state then unmap in the rsmpi driver. 8390 */ 8391 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING)) 8392 (void) rsm_unmap(seg); 8393 8394 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) { 8395 freeflag = 1; 8396 } else { 8397 freeflag = 0; 8398 } 8399 8400 rsmseglock_release(seg); 8401 8402 if (freeflag) { 8403 /* Free the segment structure */ 8404 rsmseg_free(seg); 8405 } 8406 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n")); 8407 8408 } 8409 8410 static struct devmap_callback_ctl rsmmap_ops = { 8411 DEVMAP_OPS_REV, /* devmap_ops version number */ 8412 rsmmap_map, /* devmap_ops map routine */ 8413 rsmmap_access, /* devmap_ops access routine */ 8414 rsmmap_dup, /* devmap_ops dup routine */ 8415 rsmmap_unmap, /* devmap_ops unmap routine */ 8416 }; 8417 8418 static int 8419 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len, 8420 size_t *maplen, uint_t model /*ARGSUSED*/) 8421 { 8422 struct devmap_callback_ctl *callbackops = &rsmmap_ops; 8423 int err; 8424 uint_t maxprot; 8425 minor_t rnum; 8426 rsmseg_t *seg; 8427 off_t dev_offset; 8428 size_t cur_len; 8429 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8430 8431 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n")); 8432 8433 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8434 "rsm_devmap: off = %lx, len = %lx\n", off, len)); 8435 rnum = getminor(dev); 8436 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8437 ASSERT(seg != NULL); 8438 8439 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8440 if ((off == barrier_offset) && 8441 (len == barrier_size)) { 8442 8443 ASSERT(bar_va != NULL && bar_cookie != NULL); 8444 8445 /* 8446 * The offset argument in devmap_umem_setup represents 8447 * the offset within the kernel memory defined by the 8448 * cookie. We use this offset as barrier_offset. 8449 */ 8450 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie, 8451 barrier_offset, len, PROT_USER|PROT_READ, 8452 DEVMAP_DEFAULTS, 0); 8453 8454 if (err != 0) { 8455 DBG_PRINTF((category, RSM_ERR, 8456 "rsm_devmap done: %d\n", err)); 8457 return (RSMERR_MAP_FAILED); 8458 } 8459 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8460 "rsm_devmap done: %d\n", err)); 8461 8462 *maplen = barrier_size; 8463 8464 return (err); 8465 } else { 8466 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8467 "rsm_devmap done: %d\n", err)); 8468 return (RSMERR_MAP_FAILED); 8469 } 8470 } 8471 8472 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8473 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8474 8475 /* 8476 * Make sure we still have permission for the map operation. 8477 */ 8478 maxprot = PROT_USER; 8479 if (seg->s_mode & RSM_PERM_READ) { 8480 maxprot |= PROT_READ; 8481 } 8482 8483 if (seg->s_mode & RSM_PERM_WRITE) { 8484 maxprot |= PROT_WRITE; 8485 } 8486 8487 /* 8488 * For each devmap call, rsmmap_map is called. This maintains driver 8489 * private information for the mapping. Thus, if there are multiple 8490 * devmap calls there will be multiple rsmmap_map calls and for each 8491 * call, the mapping information will be stored. 8492 * In case of an error during the processing of the devmap call, error 8493 * will be returned. This error return causes the caller of rsm_devmap 8494 * to undo all the mappings by calling rsmmap_unmap for each one. 8495 * rsmmap_unmap will free up the private information for the requested 8496 * mapping. 8497 */ 8498 if (seg->s_node != my_nodeid) { 8499 rsm_mapinfo_t *p; 8500 8501 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len); 8502 if (p == NULL) { 8503 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8504 "rsm_devmap: incorrect mapping info\n")); 8505 return (RSMERR_MAP_FAILED); 8506 } 8507 err = devmap_devmem_setup(dhc, p->dip, 8508 callbackops, p->dev_register, 8509 dev_offset, cur_len, maxprot, 8510 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0); 8511 8512 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8513 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx," 8514 "off=%lx,len=%lx\n", 8515 p->dip, p->dev_register, dev_offset, off, cur_len)); 8516 8517 if (err != 0) { 8518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8519 "rsm_devmap: devmap_devmem_setup failed %d\n", 8520 err)); 8521 return (RSMERR_MAP_FAILED); 8522 } 8523 /* cur_len is always an integral multiple pagesize */ 8524 ASSERT((cur_len & (PAGESIZE-1)) == 0); 8525 *maplen = cur_len; 8526 return (err); 8527 8528 } else { 8529 err = devmap_umem_setup(dhc, rsm_dip, callbackops, 8530 seg->s_cookie, off, len, maxprot, 8531 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0); 8532 if (err != 0) { 8533 DBG_PRINTF((category, RSM_DEBUG, 8534 "rsm_devmap: devmap_umem_setup failed %d\n", 8535 err)); 8536 return (RSMERR_MAP_FAILED); 8537 } 8538 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8539 "rsm_devmap: loopback done\n")); 8540 8541 *maplen = ptob(btopr(len)); 8542 8543 return (err); 8544 } 8545 } 8546 8547 /* 8548 * We can use the devmap framework for mapping device memory to user space by 8549 * specifying this routine in the rsm_cb_ops structure. The kernel mmap 8550 * processing calls this entry point and devmap_setup is called within this 8551 * function, which eventually calls rsm_devmap 8552 */ 8553 static int 8554 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 8555 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 8556 { 8557 int error = 0; 8558 int old_state; 8559 minor_t rnum; 8560 rsmseg_t *seg, *eseg; 8561 adapter_t *adapter; 8562 rsm_import_share_t *sharedp; 8563 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8564 8565 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n")); 8566 8567 /* 8568 * find segment 8569 */ 8570 rnum = getminor(dev); 8571 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 8572 8573 if (seg == NULL) { 8574 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8575 "rsm_segmap done: invalid segment\n")); 8576 return (EINVAL); 8577 } 8578 8579 /* 8580 * the user is trying to map a resource that has not been 8581 * defined yet. The library uses this to map in the 8582 * barrier page. 8583 */ 8584 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8585 rsmseglock_release(seg); 8586 8587 /* 8588 * The mapping for the barrier page is identified 8589 * by the special offset barrier_offset 8590 */ 8591 8592 if (off == (off_t)barrier_offset || 8593 len == (off_t)barrier_size) { 8594 if (bar_cookie == NULL || bar_va == NULL) { 8595 DBG_PRINTF((category, RSM_DEBUG, 8596 "rsm_segmap: bar cookie/va is NULL\n")); 8597 return (EINVAL); 8598 } 8599 8600 error = devmap_setup(dev, (offset_t)off, as, addrp, 8601 (size_t)len, prot, maxprot, flags, cred); 8602 8603 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8604 "rsm_segmap done: %d\n", error)); 8605 return (error); 8606 } else { 8607 DBG_PRINTF((category, RSM_DEBUG, 8608 "rsm_segmap: bad offset/length\n")); 8609 return (EINVAL); 8610 } 8611 } 8612 8613 /* Make sure you can only map imported segments */ 8614 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) { 8615 rsmseglock_release(seg); 8616 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8617 "rsm_segmap done: not an import segment\n")); 8618 return (EINVAL); 8619 } 8620 /* check means library is broken */ 8621 ASSERT(seg->s_hdr.rsmrc_num == rnum); 8622 8623 /* wait for the segment to become unquiesced */ 8624 while (seg->s_state == RSM_STATE_CONN_QUIESCE) { 8625 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8626 rsmseglock_release(seg); 8627 DBG_PRINTF((category, RSM_DEBUG, 8628 "rsm_segmap done: cv_wait INTR")); 8629 return (ENODEV); 8630 } 8631 } 8632 8633 /* wait until segment leaves the mapping state */ 8634 while (seg->s_state == RSM_STATE_MAPPING) 8635 cv_wait(&seg->s_cv, &seg->s_lock); 8636 8637 /* 8638 * we allow multiple maps of the same segment in the KA 8639 * and it works because we do an rsmpi map of the whole 8640 * segment during the first map and all the device mapping 8641 * information needed in rsm_devmap is in the mapinfo list. 8642 */ 8643 if ((seg->s_state != RSM_STATE_CONNECT) && 8644 (seg->s_state != RSM_STATE_ACTIVE)) { 8645 rsmseglock_release(seg); 8646 DBG_PRINTF((category, RSM_DEBUG, 8647 "rsm_segmap done: segment not connected\n")); 8648 return (ENODEV); 8649 } 8650 8651 /* 8652 * Make sure we are not mapping a larger segment than what's 8653 * exported 8654 */ 8655 if ((size_t)off + ptob(btopr(len)) > seg->s_len) { 8656 rsmseglock_release(seg); 8657 DBG_PRINTF((category, RSM_DEBUG, 8658 "rsm_segmap done: off+len>seg size\n")); 8659 return (ENXIO); 8660 } 8661 8662 /* 8663 * Make sure we still have permission for the map operation. 8664 */ 8665 maxprot = PROT_USER; 8666 if (seg->s_mode & RSM_PERM_READ) { 8667 maxprot |= PROT_READ; 8668 } 8669 8670 if (seg->s_mode & RSM_PERM_WRITE) { 8671 maxprot |= PROT_WRITE; 8672 } 8673 8674 if ((prot & maxprot) != prot) { 8675 /* No permission */ 8676 rsmseglock_release(seg); 8677 DBG_PRINTF((category, RSM_DEBUG, 8678 "rsm_segmap done: no permission\n")); 8679 return (EACCES); 8680 } 8681 8682 old_state = seg->s_state; 8683 8684 ASSERT(seg->s_share != NULL); 8685 8686 rsmsharelock_acquire(seg); 8687 8688 sharedp = seg->s_share; 8689 8690 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8691 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 8692 8693 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) && 8694 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) { 8695 rsmsharelock_release(seg); 8696 rsmseglock_release(seg); 8697 DBG_PRINTF((category, RSM_DEBUG, 8698 "rsm_segmap done:RSMSI_STATE %d invalid\n", 8699 sharedp->rsmsi_state)); 8700 return (ENODEV); 8701 } 8702 8703 /* 8704 * Do the map - since we want importers to share mappings 8705 * we do the rsmpi map for the whole segment 8706 */ 8707 if (seg->s_node != my_nodeid) { 8708 uint_t dev_register; 8709 off_t dev_offset; 8710 dev_info_t *dip; 8711 size_t tmp_len; 8712 size_t total_length_mapped = 0; 8713 size_t length_to_map = seg->s_len; 8714 off_t tmp_off = 0; 8715 rsm_mapinfo_t *p; 8716 8717 /* 8718 * length_to_map = seg->s_len is always an integral 8719 * multiple of PAGESIZE. Length mapped in each entry in mapinfo 8720 * list is a multiple of PAGESIZE - RSMPI map ensures this 8721 */ 8722 8723 adapter = seg->s_adapter; 8724 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8725 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8726 8727 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) { 8728 error = 0; 8729 /* map the whole segment */ 8730 while (total_length_mapped < seg->s_len) { 8731 tmp_len = 0; 8732 8733 error = adapter->rsmpi_ops->rsm_map( 8734 seg->s_handle.in, tmp_off, 8735 length_to_map, &tmp_len, 8736 &dip, &dev_register, &dev_offset, 8737 NULL, NULL); 8738 8739 if (error != 0) 8740 break; 8741 8742 /* 8743 * Store the mapping info obtained from rsm_map 8744 */ 8745 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8746 p->dev_register = dev_register; 8747 p->dev_offset = dev_offset; 8748 p->dip = dip; 8749 p->individual_len = tmp_len; 8750 p->start_offset = tmp_off; 8751 p->next = sharedp->rsmsi_mapinfo; 8752 sharedp->rsmsi_mapinfo = p; 8753 8754 total_length_mapped += tmp_len; 8755 length_to_map -= tmp_len; 8756 tmp_off += tmp_len; 8757 } 8758 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8759 8760 if (error != RSM_SUCCESS) { 8761 /* Check if this is the the first rsm_map */ 8762 if (sharedp->rsmsi_mapinfo != NULL) { 8763 /* 8764 * A single rsm_unmap undoes 8765 * multiple rsm_maps. 8766 */ 8767 (void) seg->s_adapter->rsmpi_ops-> 8768 rsm_unmap(sharedp->rsmsi_handle); 8769 rsm_free_mapinfo(sharedp-> 8770 rsmsi_mapinfo); 8771 } 8772 sharedp->rsmsi_mapinfo = NULL; 8773 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8774 rsmsharelock_release(seg); 8775 rsmseglock_release(seg); 8776 DBG_PRINTF((category, RSM_DEBUG, 8777 "rsm_segmap done: rsmpi map err %d\n", 8778 error)); 8779 ASSERT(error != RSMERR_BAD_LENGTH && 8780 error != RSMERR_BAD_MEM_ALIGNMENT && 8781 error != RSMERR_BAD_SEG_HNDL); 8782 if (error == RSMERR_UNSUPPORTED_OPERATION) 8783 return (ENOTSUP); 8784 else if (error == RSMERR_INSUFFICIENT_RESOURCES) 8785 return (EAGAIN); 8786 else if (error == RSMERR_CONN_ABORTED) 8787 return (ENODEV); 8788 else 8789 return (error); 8790 } else { 8791 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8792 } 8793 } else { 8794 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8795 } 8796 8797 sharedp->rsmsi_mapcnt++; 8798 8799 rsmsharelock_release(seg); 8800 8801 /* move to an intermediate mapping state */ 8802 seg->s_state = RSM_STATE_MAPPING; 8803 rsmseglock_release(seg); 8804 8805 error = devmap_setup(dev, (offset_t)off, as, addrp, 8806 len, prot, maxprot, flags, cred); 8807 8808 rsmseglock_acquire(seg); 8809 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8810 8811 if (error == DDI_SUCCESS) { 8812 seg->s_state = RSM_STATE_ACTIVE; 8813 } else { 8814 rsmsharelock_acquire(seg); 8815 8816 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8817 8818 sharedp->rsmsi_mapcnt--; 8819 if (sharedp->rsmsi_mapcnt == 0) { 8820 /* unmap the shared RSMPI mapping */ 8821 ASSERT(sharedp->rsmsi_handle != NULL); 8822 (void) adapter->rsmpi_ops-> 8823 rsm_unmap(sharedp->rsmsi_handle); 8824 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 8825 sharedp->rsmsi_mapinfo = NULL; 8826 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8827 } 8828 8829 rsmsharelock_release(seg); 8830 seg->s_state = old_state; 8831 DBG_PRINTF((category, RSM_ERR, 8832 "rsm: devmap_setup failed %d\n", error)); 8833 } 8834 cv_broadcast(&seg->s_cv); 8835 rsmseglock_release(seg); 8836 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n", 8837 error)); 8838 return (error); 8839 } else { 8840 /* 8841 * For loopback, the export segment mapping cookie (s_cookie) 8842 * is also used as the s_cookie value for its import segments 8843 * during mapping. 8844 * Note that reference counting for s_cookie of the export 8845 * segment is not required due to the following: 8846 * We never have a case of the export segment being destroyed, 8847 * leaving the import segments with a stale value for the 8848 * s_cookie field, since a force disconnect is done prior to a 8849 * destroy of an export segment. The force disconnect causes 8850 * the s_cookie value to be reset to NULL. Also for the 8851 * rsm_rebind operation, we change the s_cookie value of the 8852 * export segment as well as of all its local (loopback) 8853 * importers. 8854 */ 8855 DBG_ADDCATEGORY(category, RSM_LOOPBACK); 8856 8857 rsmsharelock_release(seg); 8858 /* 8859 * In order to maintain the lock ordering between the export 8860 * and import segment locks, we need to acquire the export 8861 * segment lock first and only then acquire the import 8862 * segment lock. 8863 * The above is necessary to avoid any deadlock scenarios 8864 * with rsm_rebind which also acquires both the export 8865 * and import segment locks in the above mentioned order. 8866 * Based on code inspection, there seem to be no other 8867 * situations in which both the export and import segment 8868 * locks are acquired either in the same or opposite order 8869 * as mentioned above. 8870 * Thus in order to conform to the above lock order, we 8871 * need to change the state of the import segment to 8872 * RSM_STATE_MAPPING, release the lock. Once this is done we 8873 * can now safely acquire the export segment lock first 8874 * followed by the import segment lock which is as per 8875 * the lock order mentioned above. 8876 */ 8877 /* move to an intermediate mapping state */ 8878 seg->s_state = RSM_STATE_MAPPING; 8879 rsmseglock_release(seg); 8880 8881 eseg = rsmexport_lookup(seg->s_key); 8882 8883 if (eseg == NULL) { 8884 rsmseglock_acquire(seg); 8885 /* 8886 * Revert to old_state and signal any waiters 8887 * The shared state is not changed 8888 */ 8889 8890 seg->s_state = old_state; 8891 cv_broadcast(&seg->s_cv); 8892 rsmseglock_release(seg); 8893 DBG_PRINTF((category, RSM_DEBUG, 8894 "rsm_segmap done: key %d not found\n", seg->s_key)); 8895 return (ENODEV); 8896 } 8897 8898 rsmsharelock_acquire(seg); 8899 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8900 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8901 8902 sharedp->rsmsi_mapcnt++; 8903 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8904 rsmsharelock_release(seg); 8905 8906 ASSERT(eseg->s_cookie != NULL); 8907 8908 /* 8909 * It is not required or necessary to acquire the import 8910 * segment lock here to change the value of s_cookie since 8911 * no one will touch the import segment as long as it is 8912 * in the RSM_STATE_MAPPING state. 8913 */ 8914 seg->s_cookie = eseg->s_cookie; 8915 8916 rsmseglock_release(eseg); 8917 8918 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len, 8919 prot, maxprot, flags, cred); 8920 8921 rsmseglock_acquire(seg); 8922 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8923 if (error == 0) { 8924 seg->s_state = RSM_STATE_ACTIVE; 8925 } else { 8926 rsmsharelock_acquire(seg); 8927 8928 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8929 8930 sharedp->rsmsi_mapcnt--; 8931 if (sharedp->rsmsi_mapcnt == 0) { 8932 sharedp->rsmsi_mapinfo = NULL; 8933 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8934 } 8935 rsmsharelock_release(seg); 8936 seg->s_state = old_state; 8937 seg->s_cookie = NULL; 8938 } 8939 cv_broadcast(&seg->s_cv); 8940 rsmseglock_release(seg); 8941 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8942 "rsm_segmap done: %d\n", error)); 8943 return (error); 8944 } 8945 } 8946 8947 int 8948 rsmka_null_seg_create( 8949 rsm_controller_handle_t argcp, 8950 rsm_memseg_export_handle_t *handle, 8951 size_t size, 8952 uint_t flags, 8953 rsm_memory_local_t *memory, 8954 rsm_resource_callback_t callback, 8955 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8956 { 8957 return (RSM_SUCCESS); 8958 } 8959 8960 8961 int 8962 rsmka_null_seg_destroy( 8963 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 8964 { 8965 return (RSM_SUCCESS); 8966 } 8967 8968 8969 int 8970 rsmka_null_bind( 8971 rsm_memseg_export_handle_t argmemseg, 8972 off_t offset, 8973 rsm_memory_local_t *argmemory, 8974 rsm_resource_callback_t callback, 8975 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8976 { 8977 return (RSM_SUCCESS); 8978 } 8979 8980 8981 int 8982 rsmka_null_unbind( 8983 rsm_memseg_export_handle_t argmemseg, 8984 off_t offset, 8985 size_t length /*ARGSUSED*/) 8986 { 8987 return (DDI_SUCCESS); 8988 } 8989 8990 int 8991 rsmka_null_rebind( 8992 rsm_memseg_export_handle_t argmemseg, 8993 off_t offset, 8994 rsm_memory_local_t *memory, 8995 rsm_resource_callback_t callback, 8996 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8997 { 8998 return (RSM_SUCCESS); 8999 } 9000 9001 int 9002 rsmka_null_publish( 9003 rsm_memseg_export_handle_t argmemseg, 9004 rsm_access_entry_t access_list[], 9005 uint_t access_list_length, 9006 rsm_memseg_id_t segment_id, 9007 rsm_resource_callback_t callback, 9008 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9009 { 9010 return (RSM_SUCCESS); 9011 } 9012 9013 9014 int 9015 rsmka_null_republish( 9016 rsm_memseg_export_handle_t memseg, 9017 rsm_access_entry_t access_list[], 9018 uint_t access_list_length, 9019 rsm_resource_callback_t callback, 9020 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9021 { 9022 return (RSM_SUCCESS); 9023 } 9024 9025 int 9026 rsmka_null_unpublish( 9027 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 9028 { 9029 return (RSM_SUCCESS); 9030 } 9031 9032 9033 void 9034 rsmka_init_loopback() 9035 { 9036 rsm_ops_t *ops = &null_rsmpi_ops; 9037 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK); 9038 9039 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9040 "rsmka_init_loopback enter\n")); 9041 9042 /* initialize null ops vector */ 9043 ops->rsm_seg_create = rsmka_null_seg_create; 9044 ops->rsm_seg_destroy = rsmka_null_seg_destroy; 9045 ops->rsm_bind = rsmka_null_bind; 9046 ops->rsm_unbind = rsmka_null_unbind; 9047 ops->rsm_rebind = rsmka_null_rebind; 9048 ops->rsm_publish = rsmka_null_publish; 9049 ops->rsm_unpublish = rsmka_null_unpublish; 9050 ops->rsm_republish = rsmka_null_republish; 9051 9052 /* initialize attributes for loopback adapter */ 9053 loopback_attr.attr_name = loopback_str; 9054 loopback_attr.attr_page_size = 0x8; /* 8K */ 9055 9056 /* initialize loopback adapter */ 9057 loopback_adapter.rsm_attr = loopback_attr; 9058 loopback_adapter.rsmpi_ops = &null_rsmpi_ops; 9059 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9060 "rsmka_init_loopback done\n")); 9061 } 9062 9063 /* ************** DR functions ********************************** */ 9064 static void 9065 rsm_quiesce_exp_seg(rsmresource_t *resp) 9066 { 9067 int recheck_state; 9068 rsmseg_t *segp = (rsmseg_t *)resp; 9069 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9070 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9071 9072 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9073 "%s enter: key=%u\n", function, segp->s_key)); 9074 9075 rsmseglock_acquire(segp); 9076 do { 9077 recheck_state = 0; 9078 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) || 9079 (segp->s_state == RSM_STATE_BIND_QUIESCED) || 9080 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) || 9081 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) { 9082 rsmseglock_release(segp); 9083 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9084 "%s done:state =%d\n", function, 9085 segp->s_state)); 9086 return; 9087 } 9088 9089 if (segp->s_state == RSM_STATE_NEW) { 9090 segp->s_state = RSM_STATE_NEW_QUIESCED; 9091 rsmseglock_release(segp); 9092 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9093 "%s done:state =%d\n", function, 9094 segp->s_state)); 9095 return; 9096 } 9097 9098 if (segp->s_state == RSM_STATE_BIND) { 9099 /* unbind */ 9100 (void) rsm_unbind_pages(segp); 9101 segp->s_state = RSM_STATE_BIND_QUIESCED; 9102 rsmseglock_release(segp); 9103 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9104 "%s done:state =%d\n", function, 9105 segp->s_state)); 9106 return; 9107 } 9108 9109 if (segp->s_state == RSM_STATE_EXPORT) { 9110 /* 9111 * wait for putv/getv to complete if the segp is 9112 * a local memory handle 9113 */ 9114 while ((segp->s_state == RSM_STATE_EXPORT) && 9115 (segp->s_rdmacnt != 0)) { 9116 cv_wait(&segp->s_cv, &segp->s_lock); 9117 } 9118 9119 if (segp->s_state != RSM_STATE_EXPORT) { 9120 /* 9121 * state changed need to see what it 9122 * should be changed to. 9123 */ 9124 recheck_state = 1; 9125 continue; 9126 } 9127 9128 segp->s_state = RSM_STATE_EXPORT_QUIESCING; 9129 rsmseglock_release(segp); 9130 /* 9131 * send SUSPEND messages - currently it will be 9132 * done at the end 9133 */ 9134 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9135 "%s done:state =%d\n", function, 9136 segp->s_state)); 9137 return; 9138 } 9139 } while (recheck_state); 9140 9141 rsmseglock_release(segp); 9142 } 9143 9144 static void 9145 rsm_unquiesce_exp_seg(rsmresource_t *resp) 9146 { 9147 int ret; 9148 rsmseg_t *segp = (rsmseg_t *)resp; 9149 rsmapi_access_entry_t *acl; 9150 rsm_access_entry_t *rsmpi_acl; 9151 int acl_len; 9152 int create_flags = 0; 9153 struct buf *xbuf; 9154 rsm_memory_local_t mem; 9155 adapter_t *adapter; 9156 dev_t sdev = 0; 9157 rsm_resource_callback_t callback_flag; 9158 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9159 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9160 9161 rsmseglock_acquire(segp); 9162 9163 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9164 "%s enter: key=%u, state=%d\n", function, segp->s_key, 9165 segp->s_state)); 9166 9167 if ((segp->s_state == RSM_STATE_NEW) || 9168 (segp->s_state == RSM_STATE_BIND) || 9169 (segp->s_state == RSM_STATE_EXPORT)) { 9170 rsmseglock_release(segp); 9171 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9172 function, segp->s_state)); 9173 return; 9174 } 9175 9176 if (segp->s_state == RSM_STATE_NEW_QUIESCED) { 9177 segp->s_state = RSM_STATE_NEW; 9178 cv_broadcast(&segp->s_cv); 9179 rsmseglock_release(segp); 9180 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9181 function, segp->s_state)); 9182 return; 9183 } 9184 9185 if (segp->s_state == RSM_STATE_BIND_QUIESCED) { 9186 /* bind the segment */ 9187 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9188 segp->s_len, segp->s_proc); 9189 if (ret == RSM_SUCCESS) { /* bind successful */ 9190 segp->s_state = RSM_STATE_BIND; 9191 } else { /* bind failed - resource unavailable */ 9192 segp->s_state = RSM_STATE_NEW; 9193 } 9194 cv_broadcast(&segp->s_cv); 9195 rsmseglock_release(segp); 9196 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9197 "%s done: bind_qscd bind = %d\n", function, ret)); 9198 return; 9199 } 9200 9201 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) { 9202 /* wait for the segment to move to EXPORT_QUIESCED state */ 9203 cv_wait(&segp->s_cv, &segp->s_lock); 9204 } 9205 9206 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) { 9207 /* bind the segment */ 9208 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9209 segp->s_len, segp->s_proc); 9210 9211 if (ret != RSM_SUCCESS) { 9212 /* bind failed - resource unavailable */ 9213 acl_len = segp->s_acl_len; 9214 acl = segp->s_acl; 9215 rsmpi_acl = segp->s_acl_in; 9216 segp->s_acl_len = 0; 9217 segp->s_acl = NULL; 9218 segp->s_acl_in = NULL; 9219 rsmseglock_release(segp); 9220 9221 rsmexport_rm(segp); 9222 rsmacl_free(acl, acl_len); 9223 rsmpiacl_free(rsmpi_acl, acl_len); 9224 9225 rsmseglock_acquire(segp); 9226 segp->s_state = RSM_STATE_NEW; 9227 cv_broadcast(&segp->s_cv); 9228 rsmseglock_release(segp); 9229 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9230 "%s done: exp_qscd bind failed = %d\n", 9231 function, ret)); 9232 return; 9233 } 9234 /* 9235 * publish the segment 9236 * if successful 9237 * segp->s_state = RSM_STATE_EXPORT; 9238 * else failed 9239 * segp->s_state = RSM_STATE_BIND; 9240 */ 9241 9242 /* check whether it is a local_memory_handle */ 9243 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) { 9244 if ((segp->s_acl[0].ae_node == my_nodeid) && 9245 (segp->s_acl[0].ae_permission == 0)) { 9246 segp->s_state = RSM_STATE_EXPORT; 9247 cv_broadcast(&segp->s_cv); 9248 rsmseglock_release(segp); 9249 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9250 "%s done:exp_qscd\n", function)); 9251 return; 9252 } 9253 } 9254 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE, 9255 sdev, 0, NULL, DDI_UMEM_SLEEP); 9256 ASSERT(xbuf != NULL); 9257 9258 mem.ms_type = RSM_MEM_BUF; 9259 mem.ms_bp = xbuf; 9260 9261 adapter = segp->s_adapter; 9262 9263 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 9264 create_flags = RSM_ALLOW_UNBIND_REBIND; 9265 } 9266 9267 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 9268 callback_flag = RSM_RESOURCE_DONTWAIT; 9269 } else { 9270 callback_flag = RSM_RESOURCE_SLEEP; 9271 } 9272 9273 ret = adapter->rsmpi_ops->rsm_seg_create( 9274 adapter->rsmpi_handle, &segp->s_handle.out, 9275 segp->s_len, create_flags, &mem, 9276 callback_flag, NULL); 9277 9278 if (ret != RSM_SUCCESS) { 9279 acl_len = segp->s_acl_len; 9280 acl = segp->s_acl; 9281 rsmpi_acl = segp->s_acl_in; 9282 segp->s_acl_len = 0; 9283 segp->s_acl = NULL; 9284 segp->s_acl_in = NULL; 9285 rsmseglock_release(segp); 9286 9287 rsmexport_rm(segp); 9288 rsmacl_free(acl, acl_len); 9289 rsmpiacl_free(rsmpi_acl, acl_len); 9290 9291 rsmseglock_acquire(segp); 9292 segp->s_state = RSM_STATE_BIND; 9293 cv_broadcast(&segp->s_cv); 9294 rsmseglock_release(segp); 9295 DBG_PRINTF((category, RSM_ERR, 9296 "%s done: exp_qscd create failed = %d\n", 9297 function, ret)); 9298 return; 9299 } 9300 9301 ret = adapter->rsmpi_ops->rsm_publish( 9302 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len, 9303 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL); 9304 9305 if (ret != RSM_SUCCESS) { 9306 acl_len = segp->s_acl_len; 9307 acl = segp->s_acl; 9308 rsmpi_acl = segp->s_acl_in; 9309 segp->s_acl_len = 0; 9310 segp->s_acl = NULL; 9311 segp->s_acl_in = NULL; 9312 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out); 9313 rsmseglock_release(segp); 9314 9315 rsmexport_rm(segp); 9316 rsmacl_free(acl, acl_len); 9317 rsmpiacl_free(rsmpi_acl, acl_len); 9318 9319 rsmseglock_acquire(segp); 9320 segp->s_state = RSM_STATE_BIND; 9321 cv_broadcast(&segp->s_cv); 9322 rsmseglock_release(segp); 9323 DBG_PRINTF((category, RSM_ERR, 9324 "%s done: exp_qscd publish failed = %d\n", 9325 function, ret)); 9326 return; 9327 } 9328 9329 segp->s_state = RSM_STATE_EXPORT; 9330 cv_broadcast(&segp->s_cv); 9331 rsmseglock_release(segp); 9332 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n", 9333 function)); 9334 return; 9335 } 9336 9337 rsmseglock_release(segp); 9338 9339 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9340 } 9341 9342 static void 9343 rsm_quiesce_imp_seg(rsmresource_t *resp) 9344 { 9345 rsmseg_t *segp = (rsmseg_t *)resp; 9346 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9347 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg"); 9348 9349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9350 "%s enter: key=%u\n", function, segp->s_key)); 9351 9352 rsmseglock_acquire(segp); 9353 segp->s_flags |= RSM_DR_INPROGRESS; 9354 9355 while (segp->s_rdmacnt != 0) { 9356 /* wait for the RDMA to complete */ 9357 cv_wait(&segp->s_cv, &segp->s_lock); 9358 } 9359 9360 rsmseglock_release(segp); 9361 9362 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9363 9364 } 9365 9366 static void 9367 rsm_unquiesce_imp_seg(rsmresource_t *resp) 9368 { 9369 rsmseg_t *segp = (rsmseg_t *)resp; 9370 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9371 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg"); 9372 9373 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9374 "%s enter: key=%u\n", function, segp->s_key)); 9375 9376 rsmseglock_acquire(segp); 9377 9378 segp->s_flags &= ~RSM_DR_INPROGRESS; 9379 /* wake up any waiting putv/getv ops */ 9380 cv_broadcast(&segp->s_cv); 9381 9382 rsmseglock_release(segp); 9383 9384 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9385 9386 9387 } 9388 9389 static void 9390 rsm_process_exp_seg(rsmresource_t *resp, int event) 9391 { 9392 if (event == RSM_DR_QUIESCE) 9393 rsm_quiesce_exp_seg(resp); 9394 else /* UNQUIESCE */ 9395 rsm_unquiesce_exp_seg(resp); 9396 } 9397 9398 static void 9399 rsm_process_imp_seg(rsmresource_t *resp, int event) 9400 { 9401 if (event == RSM_DR_QUIESCE) 9402 rsm_quiesce_imp_seg(resp); 9403 else /* UNQUIESCE */ 9404 rsm_unquiesce_imp_seg(resp); 9405 } 9406 9407 static void 9408 rsm_dr_process_local_segments(int event) 9409 { 9410 9411 int i, j; 9412 rsmresource_blk_t *blk; 9413 rsmresource_t *p; 9414 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9415 9416 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9417 "rsm_dr_process_local_segments enter\n")); 9418 9419 /* iterate through the resource structure */ 9420 9421 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 9422 9423 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 9424 blk = rsm_resource.rsmrc_root[i]; 9425 if (blk != NULL) { 9426 for (j = 0; j < RSMRC_BLKSZ; j++) { 9427 p = blk->rsmrcblk_blks[j]; 9428 if ((p != NULL) && (p != RSMRC_RESERVED)) { 9429 /* valid resource */ 9430 if (p->rsmrc_type == 9431 RSM_RESOURCE_EXPORT_SEGMENT) 9432 rsm_process_exp_seg(p, event); 9433 else if (p->rsmrc_type == 9434 RSM_RESOURCE_IMPORT_SEGMENT) 9435 rsm_process_imp_seg(p, event); 9436 } 9437 } 9438 } 9439 } 9440 9441 rw_exit(&rsm_resource.rsmrc_lock); 9442 9443 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9444 "rsm_dr_process_local_segments done\n")); 9445 } 9446 9447 /* *************** DR callback functions ************ */ 9448 static void 9449 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */) 9450 { 9451 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9452 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9453 "rsm_dr_callback_post_add is a no-op\n")); 9454 /* Noop */ 9455 } 9456 9457 static int 9458 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */) 9459 { 9460 int recheck_state = 0; 9461 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9462 9463 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9464 "rsm_dr_callback_pre_del enter\n")); 9465 9466 mutex_enter(&rsm_drv_data.drv_lock); 9467 9468 do { 9469 recheck_state = 0; 9470 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9471 "rsm_dr_callback_pre_del:state=%d\n", 9472 rsm_drv_data.drv_state)); 9473 9474 switch (rsm_drv_data.drv_state) { 9475 case RSM_DRV_NEW: 9476 /* 9477 * The state should usually never be RSM_DRV_NEW 9478 * since in this state the callbacks have not yet 9479 * been registered. So, ASSERT. 9480 */ 9481 ASSERT(0); 9482 return (0); 9483 case RSM_DRV_REG_PROCESSING: 9484 /* 9485 * The driver is in the process of registering 9486 * with the DR framework. So, wait till the 9487 * registration process is complete. 9488 */ 9489 recheck_state = 1; 9490 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9491 break; 9492 case RSM_DRV_UNREG_PROCESSING: 9493 /* 9494 * If the state is RSM_DRV_UNREG_PROCESSING, the 9495 * module is in the process of detaching and 9496 * unregistering the callbacks from the DR 9497 * framework. So, simply return. 9498 */ 9499 mutex_exit(&rsm_drv_data.drv_lock); 9500 DBG_PRINTF((category, RSM_DEBUG, 9501 "rsm_dr_callback_pre_del:" 9502 "pre-del on NEW/UNREG\n")); 9503 return (0); 9504 case RSM_DRV_OK: 9505 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED; 9506 break; 9507 case RSM_DRV_PREDEL_STARTED: 9508 /* FALLTHRU */ 9509 case RSM_DRV_PREDEL_COMPLETED: 9510 /* FALLTHRU */ 9511 case RSM_DRV_POSTDEL_IN_PROGRESS: 9512 recheck_state = 1; 9513 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9514 break; 9515 case RSM_DRV_DR_IN_PROGRESS: 9516 rsm_drv_data.drv_memdel_cnt++; 9517 mutex_exit(&rsm_drv_data.drv_lock); 9518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9519 "rsm_dr_callback_pre_del done\n")); 9520 return (0); 9521 /* break; */ 9522 default: 9523 ASSERT(0); 9524 break; 9525 } 9526 9527 } while (recheck_state); 9528 9529 rsm_drv_data.drv_memdel_cnt++; 9530 9531 mutex_exit(&rsm_drv_data.drv_lock); 9532 9533 /* Do all the quiescing stuff here */ 9534 DBG_PRINTF((category, RSM_DEBUG, 9535 "rsm_dr_callback_pre_del: quiesce things now\n")); 9536 9537 rsm_dr_process_local_segments(RSM_DR_QUIESCE); 9538 9539 /* 9540 * now that all local segments have been quiesced lets inform 9541 * the importers 9542 */ 9543 rsm_send_suspend(); 9544 9545 /* 9546 * In response to the suspend message the remote node(s) will process 9547 * the segments and send a suspend_complete message. Till all 9548 * the nodes send the suspend_complete message we wait in the 9549 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce 9550 * function we transition to the RSM_DRV_PREDEL_COMPLETED state. 9551 */ 9552 mutex_enter(&rsm_drv_data.drv_lock); 9553 9554 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) { 9555 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9556 } 9557 9558 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED); 9559 9560 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS; 9561 cv_broadcast(&rsm_drv_data.drv_cv); 9562 9563 mutex_exit(&rsm_drv_data.drv_lock); 9564 9565 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9566 "rsm_dr_callback_pre_del done\n")); 9567 9568 return (0); 9569 } 9570 9571 static void 9572 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */) 9573 { 9574 int recheck_state = 0; 9575 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9576 9577 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9578 "rsm_dr_callback_post_del enter\n")); 9579 9580 mutex_enter(&rsm_drv_data.drv_lock); 9581 9582 do { 9583 recheck_state = 0; 9584 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9585 "rsm_dr_callback_post_del:state=%d\n", 9586 rsm_drv_data.drv_state)); 9587 9588 switch (rsm_drv_data.drv_state) { 9589 case RSM_DRV_NEW: 9590 /* 9591 * The driver state cannot not be RSM_DRV_NEW 9592 * since in this state the callbacks have not 9593 * yet been registered. 9594 */ 9595 ASSERT(0); 9596 return; 9597 case RSM_DRV_REG_PROCESSING: 9598 /* 9599 * The driver is in the process of registering with 9600 * the DR framework. Wait till the registration is 9601 * complete. 9602 */ 9603 recheck_state = 1; 9604 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9605 break; 9606 case RSM_DRV_UNREG_PROCESSING: 9607 /* 9608 * RSM_DRV_UNREG_PROCESSING state means the module 9609 * is detaching and unregistering the callbacks 9610 * from the DR framework. So simply return. 9611 */ 9612 /* FALLTHRU */ 9613 case RSM_DRV_OK: 9614 /* 9615 * RSM_DRV_OK means we missed the pre-del 9616 * corresponding to this post-del coz we had not 9617 * registered yet, so simply return. 9618 */ 9619 mutex_exit(&rsm_drv_data.drv_lock); 9620 DBG_PRINTF((category, RSM_DEBUG, 9621 "rsm_dr_callback_post_del:" 9622 "post-del on OK/UNREG\n")); 9623 return; 9624 /* break; */ 9625 case RSM_DRV_PREDEL_STARTED: 9626 /* FALLTHRU */ 9627 case RSM_DRV_PREDEL_COMPLETED: 9628 /* FALLTHRU */ 9629 case RSM_DRV_POSTDEL_IN_PROGRESS: 9630 recheck_state = 1; 9631 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9632 break; 9633 case RSM_DRV_DR_IN_PROGRESS: 9634 rsm_drv_data.drv_memdel_cnt--; 9635 if (rsm_drv_data.drv_memdel_cnt > 0) { 9636 mutex_exit(&rsm_drv_data.drv_lock); 9637 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9638 "rsm_dr_callback_post_del done:\n")); 9639 return; 9640 } 9641 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS; 9642 break; 9643 default: 9644 ASSERT(0); 9645 return; 9646 /* break; */ 9647 } 9648 } while (recheck_state); 9649 9650 mutex_exit(&rsm_drv_data.drv_lock); 9651 9652 /* Do all the unquiescing stuff here */ 9653 DBG_PRINTF((category, RSM_DEBUG, 9654 "rsm_dr_callback_post_del: unquiesce things now\n")); 9655 9656 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE); 9657 9658 /* 9659 * now that all local segments have been unquiesced lets inform 9660 * the importers 9661 */ 9662 rsm_send_resume(); 9663 9664 mutex_enter(&rsm_drv_data.drv_lock); 9665 9666 rsm_drv_data.drv_state = RSM_DRV_OK; 9667 9668 cv_broadcast(&rsm_drv_data.drv_cv); 9669 9670 mutex_exit(&rsm_drv_data.drv_lock); 9671 9672 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9673 "rsm_dr_callback_post_del done\n")); 9674 9675 return; 9676 9677 }