1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2012 Milan Jurik. All rights reserved. 25 */ 26 27 28 /* 29 * Overview of the RSM Kernel Agent: 30 * --------------------------------- 31 * 32 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM 33 * kernel agent is a pseudo device driver which makes use of the RSMPI 34 * interface on behalf of the RSMAPI user library. 35 * 36 * The kernel agent functionality can be categorized into the following 37 * components: 38 * 1. Driver Infrastructure 39 * 2. Export/Import Segment Management 40 * 3. Internal resource allocation/deallocation 41 * 42 * The driver infrastructure includes the basic module loading entry points 43 * like _init, _info, _fini to load, unload and report information about 44 * the driver module. The driver infrastructure also includes the 45 * autoconfiguration entry points namely, attach, detach and getinfo for 46 * the device autoconfiguration. 47 * 48 * The kernel agent is a pseudo character device driver and exports 49 * a cb_ops structure which defines the driver entry points for character 50 * device access. This includes the open and close entry points. The 51 * other entry points provided include ioctl, devmap and segmap and chpoll. 52 * read and write entry points are not used since the device is memory 53 * mapped. Also ddi_prop_op is used for the prop_op entry point. 54 * 55 * The ioctl entry point supports a number of commands, which are used by 56 * the RSMAPI library in order to export and import segments. These 57 * commands include commands for binding and rebinding the physical pages 58 * allocated to the virtual address range, publishing the export segment, 59 * unpublishing and republishing an export segment, creating an 60 * import segment and a virtual connection from this import segment to 61 * an export segment, performing scatter-gather data transfer, barrier 62 * operations. 63 * 64 * 65 * Export and Import segments: 66 * --------------------------- 67 * 68 * In order to create an RSM export segment a process allocates a range in its 69 * virtual address space for the segment using standard Solaris interfaces. 70 * The process then calls RSMAPI, which in turn makes an ioctl call to the 71 * RSM kernel agent for an allocation of physical memory pages and for 72 * creation of the export segment by binding these pages to the virtual 73 * address range. These pages are locked in memory so that remote accesses 74 * are always applied to the correct page. Then the RSM segment is published, 75 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id 76 * is assigned to it. 77 * 78 * In order to import a published RSM segment, RSMAPI creates an import 79 * segment and forms a virtual connection across the interconnect to the 80 * export segment, via an ioctl into the kernel agent with the connect 81 * command. The import segment setup is completed by mapping the 82 * local device memory into the importers virtual address space. The 83 * mapping of the import segment is handled by the segmap/devmap 84 * infrastructure described as follows. 85 * 86 * Segmap and Devmap interfaces: 87 * 88 * The RSM kernel agent allows device memory to be directly accessed by user 89 * threads via memory mapping. In order to do so, the RSM kernel agent 90 * supports the devmap and segmap entry points. 91 * 92 * The segmap entry point(rsm_segmap) is responsible for setting up a memory 93 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is 94 * responsible for exporting the device memory to the user applications. 95 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the 96 * control is transfered to the devmap_setup call which calls rsm_devmap. 97 * 98 * rsm_devmap validates the user mapping to the device or kernel memory 99 * and passes the information to the system for setting up the mapping. The 100 * actual setting up of the mapping is done by devmap_devmem_setup(for 101 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are 102 * registered for device context management via the devmap_devmem_setup 103 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap, 104 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping 105 * is created, a mapping is freed, a mapping is accessed or an existing 106 * mapping is duplicated respectively. These callbacks allow the RSM kernel 107 * agent to maintain state information associated with the mappings. 108 * The state information is mainly in the form of a cookie list for the import 109 * segment for which mapping has been done. 110 * 111 * Forced disconnect of import segments: 112 * 113 * When an exported segment is unpublished, the exporter sends a forced 114 * disconnect message to all its importers. The importer segments are 115 * unloaded and disconnected. This involves unloading the original 116 * mappings and remapping to a preallocated kernel trash page. This is 117 * done by devmap_umem_remap. The trash/dummy page is a kernel page, 118 * preallocated by the kernel agent during attach using ddi_umem_alloc with 119 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application 120 * due to unloading of the original mappings. 121 * 122 * Additionally every segment has a mapping generation number associated 123 * with it. This is an entry in the barrier generation page, created 124 * during attach time. This mapping generation number for the import 125 * segments is incremented on a force disconnect to notify the application 126 * of the force disconnect. On this notification, the application needs 127 * to reconnect the segment to establish a new legitimate mapping. 128 * 129 * 130 * Locks used in the kernel agent: 131 * ------------------------------- 132 * 133 * The kernel agent uses a variety of mutexes and condition variables for 134 * mutual exclusion of the shared data structures and for synchronization 135 * between the various threads. Some of the locks are described as follows. 136 * 137 * Each resource structure, which represents either an export/import segment 138 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock. 139 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the 140 * rsmseglock_acquire and rsmseglock_release macros. An additional 141 * lock called the rsmsi_lock is used for the shared import data structure 142 * that is relevant for resources representing import segments. There is 143 * also a condition variable associated with the resource called s_cv. This 144 * is used to wait for events like the segment state change etc. 145 * 146 * The resource structures are allocated from a pool of resource structures, 147 * called rsm_resource. This pool is protected via a reader-writer lock, 148 * called rsmrc_lock. 149 * 150 * There are two separate hash tables, one for the export segments and 151 * one for the import segments. The export segments are inserted into the 152 * export segment hash table only after they have been published and the 153 * import segments are inserted in the import segments list only after they 154 * have successfully connected to an exported segment. These tables are 155 * protected via reader-writer locks. 156 * 157 * Debug Support in the kernel agent: 158 * ---------------------------------- 159 * 160 * Debugging support in the kernel agent is provided by the following 161 * macros. 162 * 163 * DBG_PRINTF((category, level, message)) is a macro which logs a debug 164 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer 165 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based 166 * on the definition of the category and level. All messages that belong to 167 * the specified category(rsmdbg_category) and are of an equal or greater 168 * severity than the specified level(rsmdbg_level) are logged. The message 169 * is a string which uses the same formatting rules as the strings used in 170 * printf. 171 * 172 * The category defines which component of the kernel agent has logged this 173 * message. There are a number of categories that have been defined such as 174 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro, 175 * DBG_ADDCATEGORY is used to add in another category to the currently 176 * specified category value so that the component using this new category 177 * can also effectively log debug messages. Thus, the category of a specific 178 * message is some combination of the available categories and we can define 179 * sub-categories if we want a finer level of granularity. 180 * 181 * The level defines the severity of the message. Different level values are 182 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being 183 * the least severe(debug level is 0). 184 * 185 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug 186 * variable or a string respectively. 187 * 188 * 189 * NOTES: 190 * 191 * Special Fork and Exec Handling: 192 * ------------------------------- 193 * 194 * The backing physical pages of an exported segment are always locked down. 195 * Thus, there are two cases in which a process having exported segments 196 * will cause a cpu to hang: (1) the process invokes exec; (2) a process 197 * forks and invokes exit before the duped file descriptors for the export 198 * segments are closed in the child process. The hang is caused because the 199 * address space release algorithm in Solaris VM subsystem is based on a 200 * non-blocking loop which does not terminate while segments are locked 201 * down. In addition to this, Solaris VM subsystem lacks a callback 202 * mechanism to the rsm kernel agent to allow unlocking these export 203 * segment pages. 204 * 205 * In order to circumvent this problem, the kernel agent does the following. 206 * The Solaris VM subsystem keeps memory segments in increasing order of 207 * virtual addressses. Thus a special page(special_exit_offset) is allocated 208 * by the kernel agent and is mmapped into the heap area of the process address 209 * space(the mmap is done by the RSMAPI library). During the mmap processing 210 * of this special page by the devmap infrastructure, a callback(the same 211 * devmap context management callbacks discussed above) is registered for an 212 * unmap. 213 * 214 * As discussed above, this page is processed by the Solaris address space 215 * release code before any of the exported segments pages(which are allocated 216 * from high memory). It is during this processing that the unmap callback gets 217 * called and this callback is responsible for force destroying the exported 218 * segments and thus eliminating the problem of locked pages. 219 * 220 * Flow-control: 221 * ------------ 222 * 223 * A credit based flow control algorithm is used for messages whose 224 * processing cannot be done in the interrupt context because it might 225 * involve invoking rsmpi calls, or might take a long time to complete 226 * or might need to allocate resources. The algorithm operates on a per 227 * path basis. To send a message the pathend needs to have a credit and 228 * it consumes one for every message that is flow controlled. On the 229 * receiving pathend the message is put on a msgbuf_queue and a task is 230 * dispatched on the worker thread - recv_taskq where it is processed. 231 * After processing the message, the receiving pathend dequeues the message, 232 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends 233 * credits to the sender pathend. 234 * 235 * RSM_DRTEST: 236 * ----------- 237 * 238 * This is used to enable the DR testing using a test driver on test 239 * platforms which do not supported DR. 240 * 241 */ 242 243 #include <sys/types.h> 244 #include <sys/param.h> 245 #include <sys/user.h> 246 #include <sys/buf.h> 247 #include <sys/systm.h> 248 #include <sys/cred.h> 249 #include <sys/vm.h> 250 #include <sys/uio.h> 251 #include <vm/seg.h> 252 #include <vm/page.h> 253 #include <sys/stat.h> 254 255 #include <sys/time.h> 256 #include <sys/errno.h> 257 258 #include <sys/file.h> 259 #include <sys/uio.h> 260 #include <sys/proc.h> 261 #include <sys/mman.h> 262 #include <sys/open.h> 263 #include <sys/atomic.h> 264 #include <sys/mem_config.h> 265 266 267 #include <sys/ddi.h> 268 #include <sys/devops.h> 269 #include <sys/ddidevmap.h> 270 #include <sys/sunddi.h> 271 #include <sys/esunddi.h> 272 #include <sys/ddi_impldefs.h> 273 274 #include <sys/kmem.h> 275 #include <sys/conf.h> 276 #include <sys/devops.h> 277 #include <sys/ddi_impldefs.h> 278 279 #include <sys/modctl.h> 280 281 #include <sys/policy.h> 282 #include <sys/types.h> 283 #include <sys/conf.h> 284 #include <sys/param.h> 285 286 #include <sys/taskq.h> 287 288 #include <sys/rsm/rsm_common.h> 289 #include <sys/rsm/rsmapi_common.h> 290 #include <sys/rsm/rsm.h> 291 #include <rsm_in.h> 292 #include <sys/rsm/rsmka_path_int.h> 293 #include <sys/rsm/rsmpi.h> 294 295 #include <sys/modctl.h> 296 #include <sys/debug.h> 297 298 #include <sys/tuneable.h> 299 300 #ifdef RSM_DRTEST 301 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec, 302 void *arg); 303 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, 304 void *arg); 305 #endif 306 307 extern void dbg_printf(int category, int level, char *fmt, ...); 308 extern void rsmka_pathmanager_init(); 309 extern void rsmka_pathmanager_cleanup(); 310 extern void rele_sendq_token(sendq_token_t *); 311 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t); 312 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t); 313 extern int rsmka_topology_ioctl(caddr_t, int, int); 314 315 extern pri_t maxclsyspri; 316 extern work_queue_t work_queue; 317 extern kmutex_t ipc_info_lock; 318 extern kmutex_t ipc_info_cvlock; 319 extern kcondvar_t ipc_info_cv; 320 extern kmutex_t path_hold_cvlock; 321 extern kcondvar_t path_hold_cv; 322 323 extern kmutex_t rsmka_buf_lock; 324 325 extern path_t *rsm_find_path(char *, int, rsm_addr_t); 326 extern adapter_t *rsmka_lookup_adapter(char *, int); 327 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *); 328 extern boolean_t rsmka_do_path_active(path_t *, int); 329 extern boolean_t rsmka_check_node_alive(rsm_node_id_t); 330 extern void rsmka_release_adapter(adapter_t *); 331 extern void rsmka_enqueue_msgbuf(path_t *path, void *data); 332 extern void rsmka_dequeue_msgbuf(path_t *path); 333 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path); 334 /* lint -w2 */ 335 336 static int rsm_open(dev_t *, int, int, cred_t *); 337 static int rsm_close(dev_t, int, int, cred_t *); 338 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 339 cred_t *credp, int *rvalp); 340 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *, 341 uint_t); 342 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t, 343 uint_t, uint_t, cred_t *); 344 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 345 struct pollhead **phpp); 346 347 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 348 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t); 349 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t); 350 351 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *); 352 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t); 353 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t); 354 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int, 355 rsm_permission_t); 356 static void rsm_export_force_destroy(ddi_umem_cookie_t *); 357 static void rsmacl_free(rsmapi_access_entry_t *, int); 358 static void rsmpiacl_free(rsm_access_entry_t *, int); 359 360 static int rsm_inc_pgcnt(pgcnt_t); 361 static void rsm_dec_pgcnt(pgcnt_t); 362 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop); 363 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *, 364 size_t *); 365 static void exporter_quiesce(); 366 static void rsmseg_suspend(rsmseg_t *, int *); 367 static void rsmsegshare_suspend(rsmseg_t *); 368 static int rsmseg_resume(rsmseg_t *, void **); 369 static int rsmsegshare_resume(rsmseg_t *); 370 371 static struct cb_ops rsm_cb_ops = { 372 rsm_open, /* open */ 373 rsm_close, /* close */ 374 nodev, /* strategy */ 375 nodev, /* print */ 376 nodev, /* dump */ 377 nodev, /* read */ 378 nodev, /* write */ 379 rsm_ioctl, /* ioctl */ 380 rsm_devmap, /* devmap */ 381 NULL, /* mmap */ 382 rsm_segmap, /* segmap */ 383 rsm_chpoll, /* poll */ 384 ddi_prop_op, /* cb_prop_op */ 385 0, /* streamtab */ 386 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */ 387 0, 388 0, 389 0 390 }; 391 392 static struct dev_ops rsm_ops = { 393 DEVO_REV, /* devo_rev, */ 394 0, /* refcnt */ 395 rsm_info, /* get_dev_info */ 396 nulldev, /* identify */ 397 nulldev, /* probe */ 398 rsm_attach, /* attach */ 399 rsm_detach, /* detach */ 400 nodev, /* reset */ 401 &rsm_cb_ops, /* driver operations */ 402 (struct bus_ops *)0, /* bus operations */ 403 0, 404 ddi_quiesce_not_needed, /* quiesce */ 405 }; 406 407 /* 408 * Module linkage information for the kernel. 409 */ 410 411 static struct modldrv modldrv = { 412 &mod_driverops, /* Type of module. This one is a pseudo driver */ 413 "Remote Shared Memory Driver", 414 &rsm_ops, /* driver ops */ 415 }; 416 417 static struct modlinkage modlinkage = { 418 MODREV_1, 419 (void *)&modldrv, 420 0, 421 0, 422 0 423 }; 424 425 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta); 426 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta); 427 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled); 428 429 static kphysm_setup_vector_t rsm_dr_callback_vec = { 430 KPHYSM_SETUP_VECTOR_VERSION, 431 rsm_dr_callback_post_add, 432 rsm_dr_callback_pre_del, 433 rsm_dr_callback_post_del 434 }; 435 436 /* This flag can be changed to 0 to help with PIT testing */ 437 int rsmka_modunloadok = 1; 438 int no_reply_cnt = 0; 439 440 uint64_t rsm_ctrlmsg_errcnt = 0; 441 uint64_t rsm_ipcsend_errcnt = 0; 442 443 #define MAX_NODES 64 444 445 static struct rsm_driver_data rsm_drv_data; 446 static struct rsmresource_table rsm_resource; 447 448 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t); 449 static void rsmresource_destroy(void); 450 static int rsmresource_alloc(minor_t *); 451 static rsmresource_t *rsmresource_free(minor_t rnum); 452 static int rsm_closeconnection(rsmseg_t *seg, void **cookie); 453 static int rsm_unpublish(rsmseg_t *seg, int mode); 454 static int rsm_unbind(rsmseg_t *seg); 455 static uint_t rsmhash(rsm_memseg_id_t key); 456 static void rsmhash_alloc(rsmhash_table_t *rhash, int size); 457 static void rsmhash_free(rsmhash_table_t *rhash, int size); 458 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval); 459 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval); 460 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, 461 void *cookie); 462 int rsm_disconnect(rsmseg_t *seg); 463 void rsmseg_unload(rsmseg_t *); 464 void rsm_suspend_complete(rsm_node_id_t src_node, int flag); 465 466 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 467 rsm_intr_q_op_t opcode, rsm_addr_t src, 468 void *data, size_t size, rsm_intr_hand_arg_t arg); 469 470 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t); 471 472 rsm_node_id_t my_nodeid; 473 474 /* cookie, va, offsets and length for the barrier */ 475 static rsm_gnum_t *bar_va; 476 static ddi_umem_cookie_t bar_cookie; 477 static off_t barrier_offset; 478 static size_t barrier_size; 479 static int max_segs; 480 481 /* cookie for the trash memory */ 482 static ddi_umem_cookie_t remap_cookie; 483 484 static rsm_memseg_id_t rsm_nextavail_segmentid; 485 486 extern taskq_t *work_taskq; 487 extern char *taskq_name; 488 489 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */ 490 491 static rsmhash_table_t rsm_export_segs; /* list of exported segs */ 492 rsmhash_table_t rsm_import_segs; /* list of imported segs */ 493 static rsmhash_table_t rsm_event_queues; /* list of event queues */ 494 495 static rsm_ipc_t rsm_ipc; /* ipc info */ 496 497 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */ 498 static list_head_t rsm_suspend_list; 499 500 /* list of descriptors for remote importers */ 501 static importers_table_t importer_list; 502 503 kmutex_t rsm_suspend_cvlock; 504 kcondvar_t rsm_suspend_cv; 505 506 static kmutex_t rsm_lock; 507 508 adapter_t loopback_adapter; 509 rsm_controller_attr_t loopback_attr; 510 511 int rsmipc_send_controlmsg(path_t *path, int msgtype); 512 513 void rsmka_init_loopback(); 514 515 int rsmka_null_seg_create( 516 rsm_controller_handle_t, 517 rsm_memseg_export_handle_t *, 518 size_t, 519 uint_t, 520 rsm_memory_local_t *, 521 rsm_resource_callback_t, 522 rsm_resource_callback_arg_t); 523 524 int rsmka_null_seg_destroy( 525 rsm_memseg_export_handle_t); 526 527 int rsmka_null_bind( 528 rsm_memseg_export_handle_t, 529 off_t, 530 rsm_memory_local_t *, 531 rsm_resource_callback_t, 532 rsm_resource_callback_arg_t); 533 534 int rsmka_null_unbind( 535 rsm_memseg_export_handle_t, 536 off_t, 537 size_t); 538 539 int rsmka_null_rebind( 540 rsm_memseg_export_handle_t, 541 off_t, 542 rsm_memory_local_t *, 543 rsm_resource_callback_t, 544 rsm_resource_callback_arg_t); 545 546 int rsmka_null_publish( 547 rsm_memseg_export_handle_t, 548 rsm_access_entry_t [], 549 uint_t, 550 rsm_memseg_id_t, 551 rsm_resource_callback_t, 552 rsm_resource_callback_arg_t); 553 554 555 int rsmka_null_republish( 556 rsm_memseg_export_handle_t, 557 rsm_access_entry_t [], 558 uint_t, 559 rsm_resource_callback_t, 560 rsm_resource_callback_arg_t); 561 562 int rsmka_null_unpublish( 563 rsm_memseg_export_handle_t); 564 565 rsm_ops_t null_rsmpi_ops; 566 567 /* 568 * data and locks to keep track of total amount of exported memory 569 */ 570 static pgcnt_t rsm_pgcnt; 571 static pgcnt_t rsm_pgcnt_max; /* max allowed */ 572 static kmutex_t rsm_pgcnt_lock; 573 574 static int rsm_enable_dr; 575 576 static char loopback_str[] = "loopback"; 577 578 int rsm_hash_size; 579 580 /* 581 * The locking model is as follows: 582 * 583 * Local operations: 584 * find resource - grab reader lock on resouce list 585 * insert rc - grab writer lock 586 * delete rc - grab writer lock and resource mutex 587 * read/write - no lock 588 * 589 * Remote invocations: 590 * find resource - grab read lock and resource mutex 591 * 592 * State: 593 * resource state - grab resource mutex 594 */ 595 596 int 597 _init(void) 598 { 599 int e; 600 601 e = mod_install(&modlinkage); 602 if (e != 0) { 603 return (e); 604 } 605 606 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL); 607 608 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL); 609 610 611 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL); 612 613 rsm_hash_size = RSM_HASHSZ; 614 615 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 616 617 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 618 619 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL); 620 621 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL); 622 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0); 623 624 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL); 625 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0); 626 627 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL); 628 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0); 629 630 rsm_ipc.count = RSMIPC_SZ; 631 rsm_ipc.wanted = 0; 632 rsm_ipc.sequence = 0; 633 634 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL); 635 636 for (e = 0; e < RSMIPC_SZ; e++) { 637 rsmipc_slot_t *slot = &rsm_ipc.slots[e]; 638 639 RSMIPC_SET(slot, RSMIPC_FREE); 640 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL); 641 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0); 642 } 643 644 /* 645 * Initialize the suspend message list 646 */ 647 rsm_suspend_list.list_head = NULL; 648 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL); 649 650 /* 651 * It is assumed here that configuration data is available 652 * during system boot since _init may be called at that time. 653 */ 654 655 rsmka_pathmanager_init(); 656 657 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 658 "rsm: _init done\n")); 659 660 return (DDI_SUCCESS); 661 662 } 663 664 int 665 _info(struct modinfo *modinfop) 666 { 667 668 return (mod_info(&modlinkage, modinfop)); 669 } 670 671 int 672 _fini(void) 673 { 674 int e; 675 676 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 677 "rsm: _fini enter\n")); 678 679 /* 680 * The rsmka_modunloadok flag is simply used to help with 681 * the PIT testing. Make this flag 0 to disallow modunload. 682 */ 683 if (rsmka_modunloadok == 0) 684 return (EBUSY); 685 686 /* rsm_detach will be called as a result of mod_remove */ 687 e = mod_remove(&modlinkage); 688 if (e) { 689 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR, 690 "Unable to fini RSM %x\n", e)); 691 return (e); 692 } 693 694 rsmka_pathmanager_cleanup(); 695 696 rw_destroy(&rsm_resource.rsmrc_lock); 697 698 rw_destroy(&rsm_export_segs.rsmhash_rw); 699 rw_destroy(&rsm_import_segs.rsmhash_rw); 700 rw_destroy(&rsm_event_queues.rsmhash_rw); 701 702 mutex_destroy(&importer_list.lock); 703 704 mutex_destroy(&rsm_ipc.lock); 705 cv_destroy(&rsm_ipc.cv); 706 707 (void) mutex_destroy(&rsm_suspend_list.list_lock); 708 709 (void) mutex_destroy(&rsm_pgcnt_lock); 710 711 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n")); 712 713 return (DDI_SUCCESS); 714 715 } 716 717 /*ARGSUSED1*/ 718 static int 719 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 720 { 721 minor_t rnum; 722 int percent; 723 int ret; 724 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 725 726 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n")); 727 728 switch (cmd) { 729 case DDI_ATTACH: 730 break; 731 case DDI_RESUME: 732 default: 733 DBG_PRINTF((category, RSM_ERR, 734 "rsm:rsm_attach - cmd not supported\n")); 735 return (DDI_FAILURE); 736 } 737 738 if (rsm_dip != NULL) { 739 DBG_PRINTF((category, RSM_ERR, 740 "rsm:rsm_attach - supports only " 741 "one instance\n")); 742 return (DDI_FAILURE); 743 } 744 745 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 746 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 747 "enable-dynamic-reconfiguration", 1); 748 749 mutex_enter(&rsm_drv_data.drv_lock); 750 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING; 751 mutex_exit(&rsm_drv_data.drv_lock); 752 753 if (rsm_enable_dr) { 754 #ifdef RSM_DRTEST 755 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec, 756 (void *)NULL); 757 #else 758 ret = kphysm_setup_func_register(&rsm_dr_callback_vec, 759 (void *)NULL); 760 #endif 761 if (ret != 0) { 762 mutex_exit(&rsm_drv_data.drv_lock); 763 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic " 764 "reconfiguration setup failed\n"); 765 return (DDI_FAILURE); 766 } 767 } 768 769 mutex_enter(&rsm_drv_data.drv_lock); 770 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING); 771 rsm_drv_data.drv_state = RSM_DRV_OK; 772 cv_broadcast(&rsm_drv_data.drv_cv); 773 mutex_exit(&rsm_drv_data.drv_lock); 774 775 /* 776 * page_list_read_lock(); 777 * xx_setup(); 778 * page_list_read_unlock(); 779 */ 780 781 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 782 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 783 "segment-hashtable-size", RSM_HASHSZ); 784 if (rsm_hash_size == 0) { 785 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 786 "rsm: segment-hashtable-size in rsm.conf " 787 "must be greater than 0, defaulting to 128\n")); 788 rsm_hash_size = RSM_HASHSZ; 789 } 790 791 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n", 792 rsm_hash_size)); 793 794 rsm_pgcnt = 0; 795 796 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 797 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 798 "max-exported-memory", 0); 799 if (percent < 0) { 800 DBG_PRINTF((category, RSM_ERR, 801 "rsm:rsm_attach not enough memory available to " 802 "export, or max-exported-memory set incorrectly.\n")); 803 return (DDI_FAILURE); 804 } 805 /* 0 indicates no fixed upper limit. maxmem is the max */ 806 /* available pageable physical mem */ 807 rsm_pgcnt_max = (percent*maxmem)/100; 808 809 if (rsm_pgcnt_max > 0) { 810 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 811 "rsm: Available physical memory = %lu pages, " 812 "Max exportable memory = %lu pages", 813 maxmem, rsm_pgcnt_max)); 814 } 815 816 /* 817 * Create minor number 818 */ 819 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) { 820 DBG_PRINTF((category, RSM_ERR, 821 "rsm: rsm_attach - Unable to get " 822 "minor number\n")); 823 return (DDI_FAILURE); 824 } 825 826 ASSERT(rnum == RSM_DRIVER_MINOR); 827 828 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR, 829 rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) { 830 DBG_PRINTF((category, RSM_ERR, 831 "rsm: rsm_attach - unable to allocate " 832 "minor #\n")); 833 return (DDI_FAILURE); 834 } 835 836 rsm_dip = devi; 837 /* 838 * Allocate the hashtables 839 */ 840 rsmhash_alloc(&rsm_export_segs, rsm_hash_size); 841 rsmhash_alloc(&rsm_import_segs, rsm_hash_size); 842 843 importer_list.bucket = (importing_token_t **) 844 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP); 845 846 /* 847 * Allocate a resource struct 848 */ 849 { 850 rsmresource_t *p; 851 852 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP); 853 854 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL); 855 856 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR); 857 } 858 859 /* 860 * Based on the rsm.conf property max-segments, determine the maximum 861 * number of segments that can be exported/imported. This is then used 862 * to determine the size for barrier failure pages. 863 */ 864 865 /* First get the max number of segments from the rsm.conf file */ 866 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 867 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 868 "max-segments", 0); 869 if (max_segs == 0) { 870 /* Use default number of segments */ 871 max_segs = RSM_MAX_NUM_SEG; 872 } 873 874 /* 875 * Based on the max number of segments allowed, determine the barrier 876 * page size. add 1 to max_segs since the barrier page itself uses 877 * a slot 878 */ 879 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t), 880 PAGESIZE); 881 882 /* 883 * allocation of the barrier failure page 884 */ 885 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size, 886 DDI_UMEM_SLEEP, &bar_cookie); 887 888 /* 889 * Set the barrier_offset 890 */ 891 barrier_offset = 0; 892 893 /* 894 * Allocate a trash memory and get a cookie for it. This will be used 895 * when remapping segments during force disconnects. Allocate the 896 * trash memory with a large size which is page aligned. 897 */ 898 (void) ddi_umem_alloc((size_t)TRASHSIZE, 899 DDI_UMEM_TRASH, &remap_cookie); 900 901 /* initialize user segment id allocation variable */ 902 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE; 903 904 /* 905 * initialize the null_rsmpi_ops vector and the loopback adapter 906 */ 907 rsmka_init_loopback(); 908 909 910 ddi_report_dev(devi); 911 912 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n")); 913 914 return (DDI_SUCCESS); 915 } 916 917 /* 918 * The call to mod_remove in the _fine routine will cause the system 919 * to call rsm_detach 920 */ 921 /*ARGSUSED*/ 922 static int 923 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 924 { 925 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 926 927 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n")); 928 929 switch (cmd) { 930 case DDI_DETACH: 931 break; 932 default: 933 DBG_PRINTF((category, RSM_ERR, 934 "rsm:rsm_detach - cmd %x not supported\n", 935 cmd)); 936 return (DDI_FAILURE); 937 } 938 939 mutex_enter(&rsm_drv_data.drv_lock); 940 while (rsm_drv_data.drv_state != RSM_DRV_OK) 941 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 942 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING; 943 mutex_exit(&rsm_drv_data.drv_lock); 944 945 /* 946 * Unregister the DR callback functions 947 */ 948 if (rsm_enable_dr) { 949 #ifdef RSM_DRTEST 950 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec, 951 (void *)NULL); 952 #else 953 kphysm_setup_func_unregister(&rsm_dr_callback_vec, 954 (void *)NULL); 955 #endif 956 } 957 958 mutex_enter(&rsm_drv_data.drv_lock); 959 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING); 960 rsm_drv_data.drv_state = RSM_DRV_NEW; 961 mutex_exit(&rsm_drv_data.drv_lock); 962 963 ASSERT(rsm_suspend_list.list_head == NULL); 964 965 /* 966 * Release all resources, seglist, controller, ... 967 */ 968 969 /* remove intersend queues */ 970 /* remove registered services */ 971 972 973 ddi_remove_minor_node(dip, DRIVER_NAME); 974 rsm_dip = NULL; 975 976 /* 977 * Free minor zero resource 978 */ 979 { 980 rsmresource_t *p; 981 982 p = rsmresource_free(RSM_DRIVER_MINOR); 983 if (p) { 984 mutex_destroy(&p->rsmrc_lock); 985 kmem_free((void *)p, sizeof (*p)); 986 } 987 } 988 989 /* 990 * Free resource table 991 */ 992 993 rsmresource_destroy(); 994 995 /* 996 * Free the hash tables 997 */ 998 rsmhash_free(&rsm_export_segs, rsm_hash_size); 999 rsmhash_free(&rsm_import_segs, rsm_hash_size); 1000 1001 kmem_free((void *)importer_list.bucket, 1002 rsm_hash_size * sizeof (importing_token_t *)); 1003 importer_list.bucket = NULL; 1004 1005 1006 /* free barrier page */ 1007 if (bar_cookie != NULL) { 1008 ddi_umem_free(bar_cookie); 1009 } 1010 bar_va = NULL; 1011 bar_cookie = NULL; 1012 1013 /* 1014 * Free the memory allocated for the trash 1015 */ 1016 if (remap_cookie != NULL) { 1017 ddi_umem_free(remap_cookie); 1018 } 1019 remap_cookie = NULL; 1020 1021 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n")); 1022 1023 return (DDI_SUCCESS); 1024 } 1025 1026 /*ARGSUSED*/ 1027 static int 1028 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1029 { 1030 register int error; 1031 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 1032 1033 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n")); 1034 1035 switch (infocmd) { 1036 case DDI_INFO_DEVT2DEVINFO: 1037 if (rsm_dip == NULL) 1038 error = DDI_FAILURE; 1039 else { 1040 *result = (void *)rsm_dip; 1041 error = DDI_SUCCESS; 1042 } 1043 break; 1044 case DDI_INFO_DEVT2INSTANCE: 1045 *result = (void *)0; 1046 error = DDI_SUCCESS; 1047 break; 1048 default: 1049 error = DDI_FAILURE; 1050 } 1051 1052 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n")); 1053 return (error); 1054 } 1055 1056 adapter_t * 1057 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode) 1058 { 1059 adapter_t *adapter; 1060 char adapter_devname[MAXNAMELEN]; 1061 int instance; 1062 DBG_DEFINE(category, 1063 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL); 1064 1065 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n")); 1066 1067 instance = msg->cnum; 1068 1069 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) { 1070 return (NULL); 1071 } 1072 1073 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode)) 1074 return (NULL); 1075 1076 if (strcmp(adapter_devname, "loopback") == 0) 1077 return (&loopback_adapter); 1078 1079 adapter = rsmka_lookup_adapter(adapter_devname, instance); 1080 1081 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n")); 1082 1083 return (adapter); 1084 } 1085 1086 1087 /* 1088 * *********************** Resource Number Management ******************** 1089 * All resources are stored in a simple hash table. The table is an array 1090 * of pointers to resource blks. Each blk contains: 1091 * base - base number of this blk 1092 * used - number of used slots in this blk. 1093 * blks - array of pointers to resource items. 1094 * An entry in a resource blk is empty if it's NULL. 1095 * 1096 * We start with no resource array. Each time we run out of slots, we 1097 * reallocate a new larger array and copy the pointer to the new array and 1098 * a new resource blk is allocated and added to the hash table. 1099 * 1100 * The resource control block contains: 1101 * root - array of pointer of resource blks 1102 * sz - current size of array. 1103 * len - last valid entry in array. 1104 * 1105 * A search operation based on a resource number is as follows: 1106 * index = rnum / RESOURCE_BLKSZ; 1107 * ASSERT(index < resource_block.len); 1108 * ASSERT(index < resource_block.sz); 1109 * offset = rnum % RESOURCE_BLKSZ; 1110 * ASSERT(offset >= resource_block.root[index]->base); 1111 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ); 1112 * return resource_block.root[index]->blks[offset]; 1113 * 1114 * A resource blk is freed with its used count reachs zero. 1115 */ 1116 static int 1117 rsmresource_alloc(minor_t *rnum) 1118 { 1119 1120 /* search for available resource slot */ 1121 int i, j, empty = -1; 1122 rsmresource_blk_t *blk; 1123 1124 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1125 "rsmresource_alloc enter\n")); 1126 1127 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1128 1129 /* Try to find an empty slot */ 1130 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1131 blk = rsm_resource.rsmrc_root[i]; 1132 if (blk != NULL && blk->rsmrcblk_avail > 0) { 1133 /* found an empty slot in this blk */ 1134 for (j = 0; j < RSMRC_BLKSZ; j++) { 1135 if (blk->rsmrcblk_blks[j] == NULL) { 1136 *rnum = (minor_t) 1137 (j + (i * RSMRC_BLKSZ)); 1138 /* 1139 * obey gen page limits 1140 */ 1141 if (*rnum >= max_segs + 1) { 1142 if (empty < 0) { 1143 rw_exit(&rsm_resource. 1144 rsmrc_lock); 1145 DBG_PRINTF(( 1146 RSM_KERNEL_ALL, 1147 RSM_ERR, 1148 "rsmresource" 1149 "_alloc failed:" 1150 "not enough res" 1151 "%d\n", *rnum)); 1152 return (RSMERR_INSUFFICIENT_RESOURCES); 1153 } else { 1154 /* use empty slot */ 1155 break; 1156 } 1157 1158 } 1159 1160 blk->rsmrcblk_blks[j] = RSMRC_RESERVED; 1161 blk->rsmrcblk_avail--; 1162 rw_exit(&rsm_resource.rsmrc_lock); 1163 DBG_PRINTF((RSM_KERNEL_ALL, 1164 RSM_DEBUG_VERBOSE, 1165 "rsmresource_alloc done\n")); 1166 return (RSM_SUCCESS); 1167 } 1168 } 1169 } else if (blk == NULL && empty < 0) { 1170 /* remember first empty slot */ 1171 empty = i; 1172 } 1173 } 1174 1175 /* Couldn't find anything, allocate a new blk */ 1176 /* 1177 * Do we need to reallocate the root array 1178 */ 1179 if (empty < 0) { 1180 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) { 1181 /* 1182 * Allocate new array and copy current stuff into it 1183 */ 1184 rsmresource_blk_t **p; 1185 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz + 1186 RSMRC_BLKSZ; 1187 /* 1188 * Don't allocate more that max valid rnum 1189 */ 1190 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >= 1191 max_segs + 1) { 1192 rw_exit(&rsm_resource.rsmrc_lock); 1193 return (RSMERR_INSUFFICIENT_RESOURCES); 1194 } 1195 1196 p = (rsmresource_blk_t **)kmem_zalloc( 1197 newsz * sizeof (*p), 1198 KM_SLEEP); 1199 1200 if (rsm_resource.rsmrc_root) { 1201 uint_t oldsz; 1202 1203 oldsz = (uint_t)(rsm_resource.rsmrc_sz * 1204 (int)sizeof (*p)); 1205 1206 /* 1207 * Copy old data into new space and 1208 * free old stuff 1209 */ 1210 bcopy(rsm_resource.rsmrc_root, p, oldsz); 1211 kmem_free(rsm_resource.rsmrc_root, oldsz); 1212 } 1213 1214 rsm_resource.rsmrc_root = p; 1215 rsm_resource.rsmrc_sz = (int)newsz; 1216 } 1217 1218 empty = rsm_resource.rsmrc_len; 1219 rsm_resource.rsmrc_len++; 1220 } 1221 1222 /* 1223 * Allocate a new blk 1224 */ 1225 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP); 1226 ASSERT(rsm_resource.rsmrc_root[empty] == NULL); 1227 rsm_resource.rsmrc_root[empty] = blk; 1228 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1; 1229 1230 /* 1231 * Allocate slot 1232 */ 1233 1234 *rnum = (minor_t)(empty * RSMRC_BLKSZ); 1235 1236 /* 1237 * watch out not to exceed bounds of barrier page 1238 */ 1239 if (*rnum >= max_segs + 1) { 1240 rw_exit(&rsm_resource.rsmrc_lock); 1241 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR, 1242 "rsmresource_alloc failed %d\n", *rnum)); 1243 1244 return (RSMERR_INSUFFICIENT_RESOURCES); 1245 } 1246 blk->rsmrcblk_blks[0] = RSMRC_RESERVED; 1247 1248 1249 rw_exit(&rsm_resource.rsmrc_lock); 1250 1251 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1252 "rsmresource_alloc done\n")); 1253 1254 return (RSM_SUCCESS); 1255 } 1256 1257 static rsmresource_t * 1258 rsmresource_free(minor_t rnum) 1259 { 1260 1261 /* search for available resource slot */ 1262 int i, j; 1263 rsmresource_blk_t *blk; 1264 rsmresource_t *p; 1265 1266 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1267 "rsmresource_free enter\n")); 1268 1269 i = (int)(rnum / RSMRC_BLKSZ); 1270 j = (int)(rnum % RSMRC_BLKSZ); 1271 1272 if (i >= rsm_resource.rsmrc_len) { 1273 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1274 "rsmresource_free done\n")); 1275 return (NULL); 1276 } 1277 1278 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1279 1280 ASSERT(rsm_resource.rsmrc_root); 1281 ASSERT(i < rsm_resource.rsmrc_len); 1282 ASSERT(i < rsm_resource.rsmrc_sz); 1283 blk = rsm_resource.rsmrc_root[i]; 1284 if (blk == NULL) { 1285 rw_exit(&rsm_resource.rsmrc_lock); 1286 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1287 "rsmresource_free done\n")); 1288 return (NULL); 1289 } 1290 1291 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */ 1292 1293 p = blk->rsmrcblk_blks[j]; 1294 if (p == RSMRC_RESERVED) { 1295 p = NULL; 1296 } 1297 1298 blk->rsmrcblk_blks[j] = NULL; 1299 blk->rsmrcblk_avail++; 1300 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) { 1301 /* free this blk */ 1302 kmem_free(blk, sizeof (*blk)); 1303 rsm_resource.rsmrc_root[i] = NULL; 1304 } 1305 1306 rw_exit(&rsm_resource.rsmrc_lock); 1307 1308 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1309 "rsmresource_free done\n")); 1310 1311 return (p); 1312 } 1313 1314 static rsmresource_t * 1315 rsmresource_lookup(minor_t rnum, int lock) 1316 { 1317 int i, j; 1318 rsmresource_blk_t *blk; 1319 rsmresource_t *p; 1320 1321 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1322 "rsmresource_lookup enter\n")); 1323 1324 /* Find resource and lock it in READER mode */ 1325 /* search for available resource slot */ 1326 1327 i = (int)(rnum / RSMRC_BLKSZ); 1328 j = (int)(rnum % RSMRC_BLKSZ); 1329 1330 if (i >= rsm_resource.rsmrc_len) { 1331 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1332 "rsmresource_lookup done\n")); 1333 return (NULL); 1334 } 1335 1336 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1337 1338 blk = rsm_resource.rsmrc_root[i]; 1339 if (blk != NULL) { 1340 ASSERT(i < rsm_resource.rsmrc_len); 1341 ASSERT(i < rsm_resource.rsmrc_sz); 1342 1343 p = blk->rsmrcblk_blks[j]; 1344 if (lock == RSM_LOCK) { 1345 if (p != RSMRC_RESERVED) { 1346 mutex_enter(&p->rsmrc_lock); 1347 } else { 1348 p = NULL; 1349 } 1350 } 1351 } else { 1352 p = NULL; 1353 } 1354 rw_exit(&rsm_resource.rsmrc_lock); 1355 1356 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1357 "rsmresource_lookup done\n")); 1358 1359 return (p); 1360 } 1361 1362 static void 1363 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type) 1364 { 1365 /* Find resource and lock it in READER mode */ 1366 /* Caller can upgrade if need be */ 1367 /* search for available resource slot */ 1368 int i, j; 1369 rsmresource_blk_t *blk; 1370 1371 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1372 "rsmresource_insert enter\n")); 1373 1374 i = (int)(rnum / RSMRC_BLKSZ); 1375 j = (int)(rnum % RSMRC_BLKSZ); 1376 1377 p->rsmrc_type = type; 1378 p->rsmrc_num = rnum; 1379 1380 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1381 1382 ASSERT(rsm_resource.rsmrc_root); 1383 ASSERT(i < rsm_resource.rsmrc_len); 1384 ASSERT(i < rsm_resource.rsmrc_sz); 1385 1386 blk = rsm_resource.rsmrc_root[i]; 1387 ASSERT(blk); 1388 1389 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED); 1390 1391 blk->rsmrcblk_blks[j] = p; 1392 1393 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1394 "rsmresource_insert done\n")); 1395 1396 rw_exit(&rsm_resource.rsmrc_lock); 1397 } 1398 1399 static void 1400 rsmresource_destroy() 1401 { 1402 int i, j; 1403 1404 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1405 "rsmresource_destroy enter\n")); 1406 1407 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1408 1409 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1410 rsmresource_blk_t *blk; 1411 1412 blk = rsm_resource.rsmrc_root[i]; 1413 if (blk == NULL) { 1414 continue; 1415 } 1416 for (j = 0; j < RSMRC_BLKSZ; j++) { 1417 if (blk->rsmrcblk_blks[j] != NULL) { 1418 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1419 "Not null slot %d, %lx\n", j, 1420 (size_t)blk->rsmrcblk_blks[j])); 1421 } 1422 } 1423 kmem_free(blk, sizeof (*blk)); 1424 rsm_resource.rsmrc_root[i] = NULL; 1425 } 1426 if (rsm_resource.rsmrc_root) { 1427 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *); 1428 kmem_free(rsm_resource.rsmrc_root, (uint_t)i); 1429 rsm_resource.rsmrc_root = NULL; 1430 rsm_resource.rsmrc_len = 0; 1431 rsm_resource.rsmrc_sz = 0; 1432 } 1433 1434 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1435 "rsmresource_destroy done\n")); 1436 1437 rw_exit(&rsm_resource.rsmrc_lock); 1438 } 1439 1440 1441 /* ******************** Generic Key Hash Table Management ********* */ 1442 static rsmresource_t * 1443 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key, 1444 rsm_resource_state_t state) 1445 { 1446 rsmresource_t *p; 1447 uint_t hashval; 1448 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1449 1450 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n")); 1451 1452 hashval = rsmhash(key); 1453 1454 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n", 1455 key, hashval)); 1456 1457 rw_enter(&rhash->rsmhash_rw, RW_READER); 1458 1459 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1460 1461 for (; p; p = p->rsmrc_next) { 1462 if (p->rsmrc_key == key) { 1463 /* acquire resource lock */ 1464 RSMRC_LOCK(p); 1465 break; 1466 } 1467 } 1468 1469 rw_exit(&rhash->rsmhash_rw); 1470 1471 if (p != NULL && p->rsmrc_state != state) { 1472 /* state changed, release lock and return null */ 1473 RSMRC_UNLOCK(p); 1474 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1475 "rsmhash_lookup done: state changed\n")); 1476 return (NULL); 1477 } 1478 1479 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n")); 1480 1481 return (p); 1482 } 1483 1484 static void 1485 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm) 1486 { 1487 rsmresource_t *p, **back; 1488 uint_t hashval; 1489 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1490 1491 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n")); 1492 1493 hashval = rsmhash(rcelm->rsmrc_key); 1494 1495 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n", 1496 rcelm->rsmrc_key, hashval)); 1497 1498 /* 1499 * It's ok not to find the segment. 1500 */ 1501 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1502 1503 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1504 1505 for (; (p = *back) != NULL; back = &p->rsmrc_next) { 1506 if (p == rcelm) { 1507 *back = rcelm->rsmrc_next; 1508 break; 1509 } 1510 } 1511 1512 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n")); 1513 1514 rw_exit(&rhash->rsmhash_rw); 1515 } 1516 1517 static int 1518 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key, 1519 int dup_check, rsm_resource_state_t state) 1520 { 1521 rsmresource_t *p = NULL, **bktp; 1522 uint_t hashval; 1523 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1524 1525 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n")); 1526 1527 /* lock table */ 1528 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1529 1530 /* 1531 * If the current resource state is other than the state passed in 1532 * then the resource is (probably) already on the list. eg. for an 1533 * import segment if the state is not RSM_STATE_NEW then it's on the 1534 * list already. 1535 */ 1536 RSMRC_LOCK(new); 1537 if (new->rsmrc_state != state) { 1538 RSMRC_UNLOCK(new); 1539 rw_exit(&rhash->rsmhash_rw); 1540 return (RSMERR_BAD_SEG_HNDL); 1541 } 1542 1543 hashval = rsmhash(key); 1544 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval)); 1545 1546 if (dup_check) { 1547 /* 1548 * Used for checking export segments; don't want to have 1549 * the same key used for multiple segments. 1550 */ 1551 1552 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1553 1554 for (; p; p = p->rsmrc_next) { 1555 if (p->rsmrc_key == key) { 1556 RSMRC_UNLOCK(new); 1557 break; 1558 } 1559 } 1560 } 1561 1562 if (p == NULL) { 1563 /* Key doesn't exist, add it */ 1564 1565 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1566 1567 new->rsmrc_key = key; 1568 new->rsmrc_next = *bktp; 1569 *bktp = new; 1570 } 1571 1572 rw_exit(&rhash->rsmhash_rw); 1573 1574 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n")); 1575 1576 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE); 1577 } 1578 1579 /* 1580 * XOR each byte of the key. 1581 */ 1582 static uint_t 1583 rsmhash(rsm_memseg_id_t key) 1584 { 1585 uint_t hash = key; 1586 1587 hash ^= (key >> 8); 1588 hash ^= (key >> 16); 1589 hash ^= (key >> 24); 1590 1591 return (hash % rsm_hash_size); 1592 1593 } 1594 1595 /* 1596 * generic function to get a specific bucket 1597 */ 1598 static void * 1599 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval) 1600 { 1601 1602 if (rhash->bucket == NULL) 1603 return (NULL); 1604 else 1605 return ((void *)rhash->bucket[hashval]); 1606 } 1607 1608 /* 1609 * generic function to get a specific bucket's address 1610 */ 1611 static void ** 1612 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval) 1613 { 1614 if (rhash->bucket == NULL) 1615 return (NULL); 1616 else 1617 return ((void **)&(rhash->bucket[hashval])); 1618 } 1619 1620 /* 1621 * generic function to alloc a hash table 1622 */ 1623 static void 1624 rsmhash_alloc(rsmhash_table_t *rhash, int size) 1625 { 1626 rhash->bucket = (rsmresource_t **) 1627 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP); 1628 } 1629 1630 /* 1631 * generic function to free a hash table 1632 */ 1633 static void 1634 rsmhash_free(rsmhash_table_t *rhash, int size) 1635 { 1636 1637 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t)); 1638 rhash->bucket = NULL; 1639 1640 } 1641 /* *********************** Exported Segment Key Management ************ */ 1642 1643 #define rsmexport_add(new, key) \ 1644 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \ 1645 RSM_STATE_BIND) 1646 1647 #define rsmexport_rm(arg) \ 1648 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg)) 1649 1650 #define rsmexport_lookup(key) \ 1651 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT) 1652 1653 /* ************************** Import Segment List Management ********** */ 1654 1655 /* 1656 * Add segment to import list. This will be useful for paging and loopback 1657 * segment unloading. 1658 */ 1659 #define rsmimport_add(arg, key) \ 1660 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \ 1661 RSM_STATE_NEW) 1662 1663 #define rsmimport_rm(arg) \ 1664 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg)) 1665 1666 /* 1667 * #define rsmimport_lookup(key) \ 1668 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT) 1669 */ 1670 1671 /* 1672 * increase the ref count and make the import segment point to the 1673 * shared data structure. Return a pointer to the share data struct 1674 * and the shared data struct is locked upon return 1675 */ 1676 static rsm_import_share_t * 1677 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter, 1678 rsmseg_t *segp) 1679 { 1680 uint_t hash; 1681 rsmresource_t *p; 1682 rsm_import_share_t *shdatap; 1683 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1684 1685 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n")); 1686 1687 hash = rsmhash(key); 1688 /* lock table */ 1689 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER); 1690 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n", 1691 key, hash)); 1692 1693 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash); 1694 1695 for (; p; p = p->rsmrc_next) { 1696 /* 1697 * Look for an entry that is importing the same exporter 1698 * with the share data structure allocated. 1699 */ 1700 if ((p->rsmrc_key == key) && 1701 (p->rsmrc_node == node) && 1702 (p->rsmrc_adapter == adapter) && 1703 (((rsmseg_t *)p)->s_share != NULL)) { 1704 shdatap = ((rsmseg_t *)p)->s_share; 1705 break; 1706 } 1707 } 1708 1709 if (p == NULL) { 1710 /* we are the first importer, create the shared data struct */ 1711 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP); 1712 shdatap->rsmsi_state = RSMSI_STATE_NEW; 1713 shdatap->rsmsi_segid = key; 1714 shdatap->rsmsi_node = node; 1715 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL); 1716 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0); 1717 } 1718 1719 rsmseglock_acquire(segp); 1720 1721 /* we grab the shared lock before returning from this function */ 1722 mutex_enter(&shdatap->rsmsi_lock); 1723 1724 shdatap->rsmsi_refcnt++; 1725 segp->s_share = shdatap; 1726 1727 rsmseglock_release(segp); 1728 1729 rw_exit(&rsm_import_segs.rsmhash_rw); 1730 1731 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n")); 1732 1733 return (shdatap); 1734 } 1735 1736 /* 1737 * the shared data structure should be locked before calling 1738 * rsmsharecv_signal(). 1739 * Change the state and signal any waiting segments. 1740 */ 1741 void 1742 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate) 1743 { 1744 ASSERT(rsmsharelock_held(seg)); 1745 1746 if (seg->s_share->rsmsi_state == oldstate) { 1747 seg->s_share->rsmsi_state = newstate; 1748 cv_broadcast(&seg->s_share->rsmsi_cv); 1749 } 1750 } 1751 1752 /* 1753 * Add to the hash table 1754 */ 1755 static void 1756 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr, 1757 void *cookie) 1758 { 1759 1760 importing_token_t *head; 1761 importing_token_t *new_token; 1762 int index; 1763 1764 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1765 1766 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n")); 1767 1768 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP); 1769 new_token->importing_node = node; 1770 new_token->key = key; 1771 new_token->import_segment_cookie = cookie; 1772 new_token->importing_adapter_hwaddr = hwaddr; 1773 1774 index = rsmhash(key); 1775 1776 mutex_enter(&importer_list.lock); 1777 1778 head = importer_list.bucket[index]; 1779 importer_list.bucket[index] = new_token; 1780 new_token->next = head; 1781 mutex_exit(&importer_list.lock); 1782 1783 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n")); 1784 } 1785 1786 static void 1787 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie) 1788 { 1789 1790 importing_token_t *prev, *token = NULL; 1791 int index; 1792 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1793 1794 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n")); 1795 1796 index = rsmhash(key); 1797 1798 mutex_enter(&importer_list.lock); 1799 1800 token = importer_list.bucket[index]; 1801 1802 prev = token; 1803 while (token != NULL) { 1804 if (token->importing_node == node && 1805 token->import_segment_cookie == cookie) { 1806 if (prev == token) 1807 importer_list.bucket[index] = token->next; 1808 else 1809 prev->next = token->next; 1810 kmem_free((void *)token, sizeof (*token)); 1811 break; 1812 } else { 1813 prev = token; 1814 token = token->next; 1815 } 1816 } 1817 1818 mutex_exit(&importer_list.lock); 1819 1820 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n")); 1821 1822 1823 } 1824 1825 /* **************************Segment Structure Management ************* */ 1826 1827 /* 1828 * Free segment structure 1829 */ 1830 static void 1831 rsmseg_free(rsmseg_t *seg) 1832 { 1833 1834 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1835 1836 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n")); 1837 1838 /* need to take seglock here to avoid race with rsmmap_unmap() */ 1839 rsmseglock_acquire(seg); 1840 if (seg->s_ckl != NULL) { 1841 /* Segment is still busy */ 1842 seg->s_state = RSM_STATE_END; 1843 rsmseglock_release(seg); 1844 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1845 "rsmseg_free done\n")); 1846 return; 1847 } 1848 1849 rsmseglock_release(seg); 1850 1851 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW); 1852 1853 /* 1854 * If it's an importer decrement the refcount 1855 * and if its down to zero free the shared data structure. 1856 * This is where failures during rsm_connect() are unrefcounted 1857 */ 1858 if (seg->s_share != NULL) { 1859 1860 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT); 1861 1862 rsmsharelock_acquire(seg); 1863 1864 ASSERT(seg->s_share->rsmsi_refcnt > 0); 1865 1866 seg->s_share->rsmsi_refcnt--; 1867 1868 if (seg->s_share->rsmsi_refcnt == 0) { 1869 rsmsharelock_release(seg); 1870 mutex_destroy(&seg->s_share->rsmsi_lock); 1871 cv_destroy(&seg->s_share->rsmsi_cv); 1872 kmem_free((void *)(seg->s_share), 1873 sizeof (rsm_import_share_t)); 1874 } else { 1875 rsmsharelock_release(seg); 1876 } 1877 /* 1878 * The following needs to be done after any 1879 * rsmsharelock calls which use seg->s_share. 1880 */ 1881 seg->s_share = NULL; 1882 } 1883 1884 cv_destroy(&seg->s_cv); 1885 mutex_destroy(&seg->s_lock); 1886 rsmacl_free(seg->s_acl, seg->s_acl_len); 1887 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len); 1888 if (seg->s_adapter) 1889 rsmka_release_adapter(seg->s_adapter); 1890 1891 kmem_free((void *)seg, sizeof (*seg)); 1892 1893 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n")); 1894 1895 } 1896 1897 1898 static rsmseg_t * 1899 rsmseg_alloc(minor_t num, struct cred *cred) 1900 { 1901 rsmseg_t *new; 1902 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1903 1904 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n")); 1905 /* 1906 * allocate memory for new segment. This should be a segkmem cache. 1907 */ 1908 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP); 1909 1910 new->s_state = RSM_STATE_NEW; 1911 new->s_minor = num; 1912 new->s_acl_len = 0; 1913 new->s_cookie = NULL; 1914 new->s_adapter = NULL; 1915 1916 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask; 1917 /* we don't have a key yet, will set at export/connect */ 1918 new->s_uid = crgetuid(cred); 1919 new->s_gid = crgetgid(cred); 1920 1921 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL); 1922 cv_init(&new->s_cv, NULL, CV_DRIVER, 0); 1923 1924 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n")); 1925 1926 return (new); 1927 } 1928 1929 /* ******************************** Driver Open/Close/Poll *************** */ 1930 1931 /*ARGSUSED1*/ 1932 static int 1933 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred) 1934 { 1935 minor_t rnum; 1936 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 1937 1938 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n")); 1939 /* 1940 * Char only 1941 */ 1942 if (otyp != OTYP_CHR) { 1943 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n")); 1944 return (EINVAL); 1945 } 1946 1947 /* 1948 * Only zero can be opened, clones are used for resources. 1949 */ 1950 if (getminor(*devp) != RSM_DRIVER_MINOR) { 1951 DBG_PRINTF((category, RSM_ERR, 1952 "rsm_open: bad minor %d\n", getminor(*devp))); 1953 return (ENODEV); 1954 } 1955 1956 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) { 1957 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n")); 1958 return (EPERM); 1959 } 1960 1961 if (!(flag & FWRITE)) { 1962 /* 1963 * The library function _rsm_librsm_init calls open for 1964 * /dev/rsm with flag set to O_RDONLY. We want a valid 1965 * file descriptor to be returned for minor device zero. 1966 */ 1967 1968 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1969 "rsm_open RDONLY done\n")); 1970 return (DDI_SUCCESS); 1971 } 1972 1973 /* 1974 * - allocate new minor number and segment. 1975 * - add segment to list of all segments. 1976 * - set minordev data to segment 1977 * - update devp argument to new device 1978 * - update s_cred to cred; make sure you do crhold(cred); 1979 */ 1980 1981 /* allocate a new resource number */ 1982 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) { 1983 /* 1984 * We will bind this minor to a specific resource in first 1985 * ioctl 1986 */ 1987 *devp = makedevice(getmajor(*devp), rnum); 1988 } else { 1989 return (EAGAIN); 1990 } 1991 1992 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n")); 1993 return (DDI_SUCCESS); 1994 } 1995 1996 static void 1997 rsmseg_close(rsmseg_t *seg, int force_flag) 1998 { 1999 int e = RSM_SUCCESS; 2000 2001 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2002 2003 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n")); 2004 2005 rsmseglock_acquire(seg); 2006 if (!force_flag && (seg->s_hdr.rsmrc_type == 2007 RSM_RESOURCE_EXPORT_SEGMENT)) { 2008 /* 2009 * If we are processing rsm_close wait for force_destroy 2010 * processing to complete since force_destroy processing 2011 * needs to finish first before we can free the segment. 2012 * force_destroy is only for export segments 2013 */ 2014 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) { 2015 cv_wait(&seg->s_cv, &seg->s_lock); 2016 } 2017 } 2018 rsmseglock_release(seg); 2019 2020 /* It's ok to read the state without a lock */ 2021 switch (seg->s_state) { 2022 case RSM_STATE_EXPORT: 2023 case RSM_STATE_EXPORT_QUIESCING: 2024 case RSM_STATE_EXPORT_QUIESCED: 2025 e = rsm_unpublish(seg, 1); 2026 /* FALLTHRU */ 2027 case RSM_STATE_BIND_QUIESCED: 2028 /* FALLTHRU */ 2029 case RSM_STATE_BIND: 2030 e = rsm_unbind(seg); 2031 if (e != RSM_SUCCESS && force_flag == 1) 2032 return; 2033 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT); 2034 /* FALLTHRU */ 2035 case RSM_STATE_NEW_QUIESCED: 2036 rsmseglock_acquire(seg); 2037 seg->s_state = RSM_STATE_NEW; 2038 cv_broadcast(&seg->s_cv); 2039 rsmseglock_release(seg); 2040 break; 2041 case RSM_STATE_NEW: 2042 break; 2043 case RSM_STATE_ZOMBIE: 2044 /* 2045 * Segments in this state have been removed off the 2046 * exported segments list and have been unpublished 2047 * and unbind. These segments have been removed during 2048 * a callback to the rsm_export_force_destroy, which 2049 * is called for the purpose of unlocking these 2050 * exported memory segments when a process exits but 2051 * leaves the segments locked down since rsm_close is 2052 * is not called for the segments. This can happen 2053 * when a process calls fork or exec and then exits. 2054 * Once the segments are in the ZOMBIE state, all that 2055 * remains is to destroy them when rsm_close is called. 2056 * This is done here. Thus, for such segments the 2057 * the state is changed to new so that later in this 2058 * function rsmseg_free is called. 2059 */ 2060 rsmseglock_acquire(seg); 2061 seg->s_state = RSM_STATE_NEW; 2062 rsmseglock_release(seg); 2063 break; 2064 case RSM_STATE_MAP_QUIESCE: 2065 case RSM_STATE_ACTIVE: 2066 /* Disconnect will handle the unmap */ 2067 case RSM_STATE_CONN_QUIESCE: 2068 case RSM_STATE_CONNECT: 2069 case RSM_STATE_DISCONNECT: 2070 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 2071 (void) rsm_disconnect(seg); 2072 break; 2073 case RSM_STATE_MAPPING: 2074 /*FALLTHRU*/ 2075 case RSM_STATE_END: 2076 DBG_PRINTF((category, RSM_ERR, 2077 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2078 break; 2079 default: 2080 DBG_PRINTF((category, RSM_ERR, 2081 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2082 break; 2083 } 2084 2085 /* 2086 * check state. 2087 * - make sure you do crfree(s_cred); 2088 * release segment and minor number 2089 */ 2090 ASSERT(seg->s_state == RSM_STATE_NEW); 2091 2092 /* 2093 * The export_force_destroy callback is created to unlock 2094 * the exported segments of a process 2095 * when the process does a fork or exec and then exits calls this 2096 * function with the force flag set to 1 which indicates that the 2097 * segment state must be converted to ZOMBIE. This state means that the 2098 * segments still exist and have been unlocked and most importantly the 2099 * only operation allowed is to destroy them on an rsm_close. 2100 */ 2101 if (force_flag) { 2102 rsmseglock_acquire(seg); 2103 seg->s_state = RSM_STATE_ZOMBIE; 2104 rsmseglock_release(seg); 2105 } else { 2106 rsmseg_free(seg); 2107 } 2108 2109 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n")); 2110 } 2111 2112 static int 2113 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred) 2114 { 2115 minor_t rnum = getminor(dev); 2116 rsmresource_t *res; 2117 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2118 2119 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n")); 2120 2121 flag = flag; cred = cred; 2122 2123 if (otyp != OTYP_CHR) 2124 return (EINVAL); 2125 2126 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum)); 2127 2128 /* 2129 * At this point we are the last reference to the resource. 2130 * Free resource number from resource table. 2131 * It's ok to remove number before we free the segment. 2132 * We need to lock the resource to protect against remote calls. 2133 */ 2134 if (rnum == RSM_DRIVER_MINOR || 2135 (res = rsmresource_free(rnum)) == NULL) { 2136 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2137 return (DDI_SUCCESS); 2138 } 2139 2140 switch (res->rsmrc_type) { 2141 case RSM_RESOURCE_EXPORT_SEGMENT: 2142 case RSM_RESOURCE_IMPORT_SEGMENT: 2143 rsmseg_close((rsmseg_t *)res, 0); 2144 break; 2145 case RSM_RESOURCE_BAR: 2146 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n")); 2147 break; 2148 default: 2149 break; 2150 } 2151 2152 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2153 2154 return (DDI_SUCCESS); 2155 } 2156 2157 /* 2158 * rsm_inc_pgcnt 2159 * 2160 * Description: increment rsm page counter. 2161 * 2162 * Parameters: pgcnt_t pnum; number of pages to be used 2163 * 2164 * Returns: RSM_SUCCESS if memory limit not exceeded 2165 * ENOSPC if memory limit exceeded. In this case, the 2166 * page counter remains unchanged. 2167 * 2168 */ 2169 static int 2170 rsm_inc_pgcnt(pgcnt_t pnum) 2171 { 2172 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2173 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2174 return (RSM_SUCCESS); 2175 } 2176 2177 mutex_enter(&rsm_pgcnt_lock); 2178 2179 if (rsm_pgcnt + pnum > rsm_pgcnt_max) { 2180 /* ensure that limits have not been exceeded */ 2181 mutex_exit(&rsm_pgcnt_lock); 2182 return (RSMERR_INSUFFICIENT_MEM); 2183 } 2184 2185 rsm_pgcnt += pnum; 2186 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n", 2187 rsm_pgcnt)); 2188 mutex_exit(&rsm_pgcnt_lock); 2189 2190 return (RSM_SUCCESS); 2191 } 2192 2193 /* 2194 * rsm_dec_pgcnt 2195 * 2196 * Description: decrement rsm page counter. 2197 * 2198 * Parameters: pgcnt_t pnum; number of pages freed 2199 * 2200 */ 2201 static void 2202 rsm_dec_pgcnt(pgcnt_t pnum) 2203 { 2204 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2205 2206 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2207 return; 2208 } 2209 2210 mutex_enter(&rsm_pgcnt_lock); 2211 ASSERT(rsm_pgcnt >= pnum); 2212 rsm_pgcnt -= pnum; 2213 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n", 2214 rsm_pgcnt)); 2215 mutex_exit(&rsm_pgcnt_lock); 2216 } 2217 2218 static struct umem_callback_ops rsm_as_ops = { 2219 UMEM_CALLBACK_VERSION, /* version number */ 2220 rsm_export_force_destroy, 2221 }; 2222 2223 static int 2224 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len, 2225 proc_t *procp) 2226 { 2227 int error = RSM_SUCCESS; 2228 ulong_t pnum; 2229 struct umem_callback_ops *callbackops = &rsm_as_ops; 2230 2231 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2232 2233 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n")); 2234 2235 /* 2236 * Make sure vaddr and len are aligned on a page boundary 2237 */ 2238 if ((uintptr_t)vaddr & (PAGESIZE - 1)) { 2239 return (RSMERR_BAD_ADDR); 2240 } 2241 2242 if (len & (PAGESIZE - 1)) { 2243 return (RSMERR_BAD_LENGTH); 2244 } 2245 2246 /* 2247 * Find number of pages 2248 */ 2249 pnum = btopr(len); 2250 error = rsm_inc_pgcnt(pnum); 2251 if (error != RSM_SUCCESS) { 2252 DBG_PRINTF((category, RSM_ERR, 2253 "rsm_bind_pages:mem limit exceeded\n")); 2254 return (RSMERR_INSUFFICIENT_MEM); 2255 } 2256 2257 error = umem_lockmemory(vaddr, len, 2258 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM, 2259 cookie, 2260 callbackops, procp); 2261 2262 if (error) { 2263 rsm_dec_pgcnt(pnum); 2264 DBG_PRINTF((category, RSM_ERR, 2265 "rsm_bind_pages:ddi_umem_lock failed\n")); 2266 /* 2267 * ddi_umem_lock, in the case of failure, returns one of 2268 * the following three errors. These are translated into 2269 * the RSMERR namespace and returned. 2270 */ 2271 if (error == EFAULT) 2272 return (RSMERR_BAD_ADDR); 2273 else if (error == EACCES) 2274 return (RSMERR_PERM_DENIED); 2275 else 2276 return (RSMERR_INSUFFICIENT_MEM); 2277 } 2278 2279 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n")); 2280 2281 return (error); 2282 2283 } 2284 2285 static int 2286 rsm_unbind_pages(rsmseg_t *seg) 2287 { 2288 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2289 2290 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n")); 2291 2292 ASSERT(rsmseglock_held(seg)); 2293 2294 if (seg->s_cookie != NULL) { 2295 /* unlock address range */ 2296 ddi_umem_unlock(seg->s_cookie); 2297 rsm_dec_pgcnt(btopr(seg->s_len)); 2298 seg->s_cookie = NULL; 2299 } 2300 2301 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n")); 2302 2303 return (RSM_SUCCESS); 2304 } 2305 2306 2307 static int 2308 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2309 { 2310 int e; 2311 adapter_t *adapter; 2312 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2313 2314 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n")); 2315 2316 adapter = rsm_getadapter(msg, mode); 2317 if (adapter == NULL) { 2318 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2319 "rsm_bind done:no adapter\n")); 2320 return (RSMERR_CTLR_NOT_PRESENT); 2321 } 2322 2323 /* lock address range */ 2324 if (msg->vaddr == NULL) { 2325 rsmka_release_adapter(adapter); 2326 DBG_PRINTF((category, RSM_ERR, 2327 "rsm: rsm_bind done: invalid vaddr\n")); 2328 return (RSMERR_BAD_ADDR); 2329 } 2330 if (msg->len <= 0) { 2331 rsmka_release_adapter(adapter); 2332 DBG_PRINTF((category, RSM_ERR, 2333 "rsm_bind: invalid length\n")); 2334 return (RSMERR_BAD_LENGTH); 2335 } 2336 2337 /* Lock segment */ 2338 rsmseglock_acquire(seg); 2339 2340 while (seg->s_state == RSM_STATE_NEW_QUIESCED) { 2341 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2342 DBG_PRINTF((category, RSM_DEBUG, 2343 "rsm_bind done: cv_wait INTERRUPTED")); 2344 rsmka_release_adapter(adapter); 2345 rsmseglock_release(seg); 2346 return (RSMERR_INTERRUPTED); 2347 } 2348 } 2349 2350 ASSERT(seg->s_state == RSM_STATE_NEW); 2351 2352 ASSERT(seg->s_cookie == NULL); 2353 2354 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc); 2355 if (e == RSM_SUCCESS) { 2356 seg->s_flags |= RSM_USER_MEMORY; 2357 if (msg->perm & RSM_ALLOW_REBIND) { 2358 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND; 2359 } 2360 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) { 2361 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT; 2362 } 2363 seg->s_region.r_vaddr = msg->vaddr; 2364 /* 2365 * Set the s_pid value in the segment structure. This is used 2366 * to identify exported segments belonging to a particular 2367 * process so that when the process exits, these segments can 2368 * be unlocked forcefully even if rsm_close is not called on 2369 * process exit since there maybe other processes referencing 2370 * them (for example on a fork or exec). 2371 * The s_pid value is also used to authenticate the process 2372 * doing a publish or unpublish on the export segment. Only 2373 * the creator of the export segment has a right to do a 2374 * publish or unpublish and unbind on the segment. 2375 */ 2376 seg->s_pid = ddi_get_pid(); 2377 seg->s_len = msg->len; 2378 seg->s_state = RSM_STATE_BIND; 2379 seg->s_adapter = adapter; 2380 seg->s_proc = curproc; 2381 } else { 2382 rsmka_release_adapter(adapter); 2383 DBG_PRINTF((category, RSM_WARNING, 2384 "unable to lock down pages\n")); 2385 } 2386 2387 msg->rnum = seg->s_minor; 2388 /* Unlock segment */ 2389 rsmseglock_release(seg); 2390 2391 if (e == RSM_SUCCESS) { 2392 /* copyout the resource number */ 2393 #ifdef _MULTI_DATAMODEL 2394 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 2395 rsm_ioctlmsg32_t msg32; 2396 2397 msg32.rnum = msg->rnum; 2398 if (ddi_copyout((caddr_t)&msg32.rnum, 2399 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum, 2400 sizeof (minor_t), mode)) { 2401 rsmka_release_adapter(adapter); 2402 e = RSMERR_BAD_ADDR; 2403 } 2404 } 2405 #endif 2406 if (ddi_copyout((caddr_t)&msg->rnum, 2407 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum, 2408 sizeof (minor_t), mode)) { 2409 rsmka_release_adapter(adapter); 2410 e = RSMERR_BAD_ADDR; 2411 } 2412 } 2413 2414 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n")); 2415 2416 return (e); 2417 } 2418 2419 static void 2420 rsm_remap_local_importers(rsm_node_id_t src_nodeid, 2421 rsm_memseg_id_t ex_segid, 2422 ddi_umem_cookie_t cookie) 2423 2424 { 2425 rsmresource_t *p = NULL; 2426 rsmhash_table_t *rhash = &rsm_import_segs; 2427 uint_t index; 2428 2429 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2430 "rsm_remap_local_importers enter\n")); 2431 2432 index = rsmhash(ex_segid); 2433 2434 rw_enter(&rhash->rsmhash_rw, RW_READER); 2435 2436 p = rsmhash_getbkt(rhash, index); 2437 2438 for (; p; p = p->rsmrc_next) { 2439 rsmseg_t *seg = (rsmseg_t *)p; 2440 rsmseglock_acquire(seg); 2441 /* 2442 * Change the s_cookie value of only the local importers 2443 * which have been mapped (in state RSM_STATE_ACTIVE). 2444 * Note that there is no need to change the s_cookie value 2445 * if the imported segment is in RSM_STATE_MAPPING since 2446 * eventually the s_cookie will be updated via the mapping 2447 * functionality. 2448 */ 2449 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) && 2450 (seg->s_state == RSM_STATE_ACTIVE)) { 2451 seg->s_cookie = cookie; 2452 } 2453 rsmseglock_release(seg); 2454 } 2455 rw_exit(&rhash->rsmhash_rw); 2456 2457 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2458 "rsm_remap_local_importers done\n")); 2459 } 2460 2461 static int 2462 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg) 2463 { 2464 int e; 2465 adapter_t *adapter; 2466 ddi_umem_cookie_t cookie; 2467 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2468 2469 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n")); 2470 2471 /* Check for permissions to rebind */ 2472 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) { 2473 return (RSMERR_REBIND_NOT_ALLOWED); 2474 } 2475 2476 if (seg->s_pid != ddi_get_pid() && 2477 ddi_get_pid() != 0) { 2478 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n")); 2479 return (RSMERR_NOT_CREATOR); 2480 } 2481 2482 /* 2483 * We will not be allowing partial rebind and hence length passed 2484 * in must be same as segment length 2485 */ 2486 if (msg->vaddr == NULL) { 2487 DBG_PRINTF((category, RSM_ERR, 2488 "rsm_rebind done: null msg->vaddr\n")); 2489 return (RSMERR_BAD_ADDR); 2490 } 2491 if (msg->len != seg->s_len) { 2492 DBG_PRINTF((category, RSM_ERR, 2493 "rsm_rebind: invalid length\n")); 2494 return (RSMERR_BAD_LENGTH); 2495 } 2496 2497 /* Lock segment */ 2498 rsmseglock_acquire(seg); 2499 2500 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) || 2501 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 2502 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) { 2503 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2504 rsmseglock_release(seg); 2505 DBG_PRINTF((category, RSM_DEBUG, 2506 "rsm_rebind done: cv_wait INTERRUPTED")); 2507 return (RSMERR_INTERRUPTED); 2508 } 2509 } 2510 2511 /* verify segment state */ 2512 if ((seg->s_state != RSM_STATE_BIND) && 2513 (seg->s_state != RSM_STATE_EXPORT)) { 2514 /* Unlock segment */ 2515 rsmseglock_release(seg); 2516 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2517 "rsm_rebind done: invalid state\n")); 2518 return (RSMERR_BAD_SEG_HNDL); 2519 } 2520 2521 ASSERT(seg->s_cookie != NULL); 2522 2523 if (msg->vaddr == seg->s_region.r_vaddr) { 2524 rsmseglock_release(seg); 2525 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2526 return (RSM_SUCCESS); 2527 } 2528 2529 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc); 2530 if (e == RSM_SUCCESS) { 2531 struct buf *xbuf; 2532 dev_t sdev = 0; 2533 rsm_memory_local_t mem; 2534 2535 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE, 2536 sdev, 0, NULL, DDI_UMEM_SLEEP); 2537 ASSERT(xbuf != NULL); 2538 2539 mem.ms_type = RSM_MEM_BUF; 2540 mem.ms_bp = xbuf; 2541 2542 adapter = seg->s_adapter; 2543 e = adapter->rsmpi_ops->rsm_rebind( 2544 seg->s_handle.out, 0, &mem, 2545 RSM_RESOURCE_DONTWAIT, NULL); 2546 2547 if (e == RSM_SUCCESS) { 2548 /* 2549 * unbind the older pages, and unload local importers; 2550 * but don't disconnect importers 2551 */ 2552 (void) rsm_unbind_pages(seg); 2553 seg->s_cookie = cookie; 2554 seg->s_region.r_vaddr = msg->vaddr; 2555 rsm_remap_local_importers(my_nodeid, seg->s_segid, 2556 cookie); 2557 } else { 2558 /* 2559 * Unbind the pages associated with "cookie" by the 2560 * rsm_bind_pages calls prior to this. This is 2561 * similar to what is done in the rsm_unbind_pages 2562 * routine for the seg->s_cookie. 2563 */ 2564 ddi_umem_unlock(cookie); 2565 rsm_dec_pgcnt(btopr(msg->len)); 2566 DBG_PRINTF((category, RSM_ERR, 2567 "rsm_rebind failed with %d\n", e)); 2568 } 2569 /* 2570 * At present there is no dependency on the existence of xbuf. 2571 * So we can free it here. If in the future this changes, it can 2572 * be freed sometime during the segment destroy. 2573 */ 2574 freerbuf(xbuf); 2575 } 2576 2577 /* Unlock segment */ 2578 rsmseglock_release(seg); 2579 2580 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2581 2582 return (e); 2583 } 2584 2585 static int 2586 rsm_unbind(rsmseg_t *seg) 2587 { 2588 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2589 2590 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n")); 2591 2592 rsmseglock_acquire(seg); 2593 2594 /* verify segment state */ 2595 if ((seg->s_state != RSM_STATE_BIND) && 2596 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2597 rsmseglock_release(seg); 2598 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2599 "rsm_unbind: invalid state\n")); 2600 return (RSMERR_BAD_SEG_HNDL); 2601 } 2602 2603 /* unlock current range */ 2604 (void) rsm_unbind_pages(seg); 2605 2606 if (seg->s_state == RSM_STATE_BIND) { 2607 seg->s_state = RSM_STATE_NEW; 2608 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 2609 seg->s_state = RSM_STATE_NEW_QUIESCED; 2610 } 2611 2612 rsmseglock_release(seg); 2613 2614 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n")); 2615 2616 return (RSM_SUCCESS); 2617 } 2618 2619 /* **************************** Exporter Access List Management ******* */ 2620 static void 2621 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len) 2622 { 2623 int acl_sz; 2624 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2625 2626 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n")); 2627 2628 /* acl could be NULL */ 2629 2630 if (acl != NULL && acl_len > 0) { 2631 acl_sz = acl_len * sizeof (rsmapi_access_entry_t); 2632 kmem_free((void *)acl, acl_sz); 2633 } 2634 2635 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n")); 2636 } 2637 2638 static void 2639 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len) 2640 { 2641 int acl_sz; 2642 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2643 2644 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n")); 2645 2646 if (acl != NULL && acl_len > 0) { 2647 acl_sz = acl_len * sizeof (rsm_access_entry_t); 2648 kmem_free((void *)acl, acl_sz); 2649 } 2650 2651 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n")); 2652 2653 } 2654 2655 static int 2656 rsmacl_build(rsm_ioctlmsg_t *msg, int mode, 2657 rsmapi_access_entry_t **list, int *len, int loopback) 2658 { 2659 rsmapi_access_entry_t *acl; 2660 int acl_len; 2661 int i; 2662 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2663 2664 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n")); 2665 2666 *len = 0; 2667 *list = NULL; 2668 2669 acl_len = msg->acl_len; 2670 if ((loopback && acl_len > 1) || (acl_len < 0) || 2671 (acl_len > MAX_NODES)) { 2672 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2673 "rsmacl_build done: acl invalid\n")); 2674 return (RSMERR_BAD_ACL); 2675 } 2676 2677 if (acl_len > 0 && acl_len <= MAX_NODES) { 2678 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t); 2679 2680 acl = kmem_alloc(acl_size, KM_SLEEP); 2681 2682 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl, 2683 acl_size, mode)) { 2684 kmem_free((void *) acl, acl_size); 2685 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2686 "rsmacl_build done: BAD_ADDR\n")); 2687 return (RSMERR_BAD_ADDR); 2688 } 2689 2690 /* 2691 * Verify access list 2692 */ 2693 for (i = 0; i < acl_len; i++) { 2694 if (acl[i].ae_node > MAX_NODES || 2695 (loopback && (acl[i].ae_node != my_nodeid)) || 2696 acl[i].ae_permission > RSM_ACCESS_TRUSTED) { 2697 /* invalid entry */ 2698 kmem_free((void *) acl, acl_size); 2699 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2700 "rsmacl_build done: EINVAL\n")); 2701 return (RSMERR_BAD_ACL); 2702 } 2703 } 2704 2705 *len = acl_len; 2706 *list = acl; 2707 } 2708 2709 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n")); 2710 2711 return (DDI_SUCCESS); 2712 } 2713 2714 static int 2715 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest, 2716 int acl_len, adapter_t *adapter) 2717 { 2718 rsm_access_entry_t *acl; 2719 rsm_addr_t hwaddr; 2720 int i; 2721 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2722 2723 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n")); 2724 2725 if (src != NULL) { 2726 size_t acl_size = acl_len * sizeof (rsm_access_entry_t); 2727 acl = kmem_alloc(acl_size, KM_SLEEP); 2728 2729 /* 2730 * translate access list 2731 */ 2732 for (i = 0; i < acl_len; i++) { 2733 if (src[i].ae_node == my_nodeid) { 2734 acl[i].ae_addr = adapter->hwaddr; 2735 } else { 2736 hwaddr = get_remote_hwaddr(adapter, 2737 src[i].ae_node); 2738 if ((int64_t)hwaddr < 0) { 2739 /* invalid hwaddr */ 2740 kmem_free((void *) acl, acl_size); 2741 DBG_PRINTF((category, 2742 RSM_DEBUG_VERBOSE, 2743 "rsmpiacl_create done:" 2744 "EINVAL hwaddr\n")); 2745 return (RSMERR_INTERNAL_ERROR); 2746 } 2747 acl[i].ae_addr = hwaddr; 2748 } 2749 /* rsmpi understands only RSM_PERM_XXXX */ 2750 acl[i].ae_permission = 2751 src[i].ae_permission & RSM_PERM_RDWR; 2752 } 2753 *dest = acl; 2754 } else { 2755 *dest = NULL; 2756 } 2757 2758 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n")); 2759 2760 return (RSM_SUCCESS); 2761 } 2762 2763 static int 2764 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode, 2765 rsmipc_reply_t *reply) 2766 { 2767 2768 int i; 2769 rsmseg_t *seg; 2770 rsm_memseg_id_t key = req->rsmipc_key; 2771 rsm_permission_t perm = req->rsmipc_perm; 2772 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2773 2774 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2775 "rsmsegacl_validate enter\n")); 2776 2777 /* 2778 * Find segment and grab its lock. The reason why we grab the segment 2779 * lock in side the search is to avoid the race when the segment is 2780 * being deleted and we already have a pointer to it. 2781 */ 2782 seg = rsmexport_lookup(key); 2783 if (!seg) { 2784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2785 "rsmsegacl_validate done: %u ENXIO\n", key)); 2786 return (RSMERR_SEG_NOT_PUBLISHED); 2787 } 2788 2789 ASSERT(rsmseglock_held(seg)); 2790 ASSERT(seg->s_state == RSM_STATE_EXPORT); 2791 2792 /* 2793 * We implement a 2-level protection scheme. 2794 * First, we check if local/remote host has access rights. 2795 * Second, we check if the user has access rights. 2796 * 2797 * This routine only validates the rnode access_list 2798 */ 2799 if (seg->s_acl_len > 0) { 2800 /* 2801 * Check host access list 2802 */ 2803 ASSERT(seg->s_acl != NULL); 2804 for (i = 0; i < seg->s_acl_len; i++) { 2805 if (seg->s_acl[i].ae_node == rnode) { 2806 perm &= seg->s_acl[i].ae_permission; 2807 goto found; 2808 } 2809 } 2810 /* rnode is not found in the list */ 2811 rsmseglock_release(seg); 2812 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2813 "rsmsegacl_validate done: EPERM\n")); 2814 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 2815 } else { 2816 /* use default owner creation umask */ 2817 perm &= seg->s_mode; 2818 } 2819 2820 found: 2821 /* update perm for this node */ 2822 reply->rsmipc_mode = perm; 2823 reply->rsmipc_uid = seg->s_uid; 2824 reply->rsmipc_gid = seg->s_gid; 2825 reply->rsmipc_segid = seg->s_segid; 2826 reply->rsmipc_seglen = seg->s_len; 2827 2828 /* 2829 * Perm of requesting node is valid; source will validate user 2830 */ 2831 rsmseglock_release(seg); 2832 2833 /* 2834 * Add the importer to the list right away, if connect fails 2835 * the importer will ask the exporter to remove it. 2836 */ 2837 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr, 2838 req->rsmipc_segment_cookie); 2839 2840 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n")); 2841 2842 return (RSM_SUCCESS); 2843 } 2844 2845 2846 /* ************************** Exporter Calls ************************* */ 2847 2848 static int 2849 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2850 { 2851 int e; 2852 int acl_len; 2853 rsmapi_access_entry_t *acl; 2854 rsm_access_entry_t *rsmpi_acl; 2855 rsm_memory_local_t mem; 2856 struct buf *xbuf; 2857 dev_t sdev = 0; 2858 adapter_t *adapter; 2859 rsm_memseg_id_t segment_id = 0; 2860 int loopback_flag = 0; 2861 int create_flags = 0; 2862 rsm_resource_callback_t callback_flag; 2863 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2864 2865 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n")); 2866 2867 if (seg->s_adapter == &loopback_adapter) 2868 loopback_flag = 1; 2869 2870 if (seg->s_pid != ddi_get_pid() && 2871 ddi_get_pid() != 0) { 2872 DBG_PRINTF((category, RSM_ERR, 2873 "rsm_publish: Not creator\n")); 2874 return (RSMERR_NOT_CREATOR); 2875 } 2876 2877 /* 2878 * Get per node access list 2879 */ 2880 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag); 2881 if (e != DDI_SUCCESS) { 2882 DBG_PRINTF((category, RSM_ERR, 2883 "rsm_publish done: rsmacl_build failed\n")); 2884 return (e); 2885 } 2886 2887 /* 2888 * The application provided msg->key is used for resolving a 2889 * segment id according to the following: 2890 * key = 0 Kernel Agent selects the segment id 2891 * key <= RSM_DLPI_ID_END Reserved for system usage except 2892 * RSMLIB range 2893 * key < RSM_USER_APP_ID_BASE segment id = key 2894 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections 2895 * 2896 * rsm_nextavail_segmentid is initialized to 0x80000000 and 2897 * overflows to zero after 0x80000000 allocations. 2898 * An algorithm is needed which allows reinitialization and provides 2899 * for reallocation after overflow. For now, ENOMEM is returned 2900 * once the overflow condition has occurred. 2901 */ 2902 if (msg->key == 0) { 2903 mutex_enter(&rsm_lock); 2904 segment_id = rsm_nextavail_segmentid; 2905 if (segment_id != 0) { 2906 rsm_nextavail_segmentid++; 2907 mutex_exit(&rsm_lock); 2908 } else { 2909 mutex_exit(&rsm_lock); 2910 DBG_PRINTF((category, RSM_ERR, 2911 "rsm_publish done: no more keys avlbl\n")); 2912 return (RSMERR_INSUFFICIENT_RESOURCES); 2913 } 2914 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END) 2915 /* range reserved for internal use by base/ndi libraries */ 2916 segment_id = msg->key; 2917 else if (msg->key <= RSM_DLPI_ID_END) 2918 return (RSMERR_RESERVED_SEGID); 2919 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1) 2920 segment_id = msg->key; 2921 else { 2922 DBG_PRINTF((category, RSM_ERR, 2923 "rsm_publish done: invalid key %u\n", msg->key)); 2924 return (RSMERR_RESERVED_SEGID); 2925 } 2926 2927 /* Add key to exportlist; The segment lock is held on success */ 2928 e = rsmexport_add(seg, segment_id); 2929 if (e) { 2930 rsmacl_free(acl, acl_len); 2931 DBG_PRINTF((category, RSM_ERR, 2932 "rsm_publish done: export_add failed: %d\n", e)); 2933 return (e); 2934 } 2935 2936 seg->s_segid = segment_id; 2937 2938 if ((seg->s_state != RSM_STATE_BIND) && 2939 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2940 /* state changed since then, free acl and return */ 2941 rsmseglock_release(seg); 2942 rsmexport_rm(seg); 2943 rsmacl_free(acl, acl_len); 2944 DBG_PRINTF((category, RSM_ERR, 2945 "rsm_publish done: segment in wrong state: %d\n", 2946 seg->s_state)); 2947 return (RSMERR_BAD_SEG_HNDL); 2948 } 2949 2950 /* 2951 * If this is for a local memory handle and permissions are zero, 2952 * then the surrogate segment is very large and we want to skip 2953 * allocation of DVMA space. 2954 * 2955 * Careful! If the user didn't use an ACL list, acl will be a NULL 2956 * pointer. Check that before dereferencing it. 2957 */ 2958 if (acl != (rsmapi_access_entry_t *)NULL) { 2959 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 2960 goto skipdriver; 2961 } 2962 2963 /* create segment */ 2964 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE, 2965 sdev, 0, NULL, DDI_UMEM_SLEEP); 2966 ASSERT(xbuf != NULL); 2967 2968 mem.ms_type = RSM_MEM_BUF; 2969 mem.ms_bp = xbuf; 2970 2971 /* This call includes a bind operations */ 2972 2973 adapter = seg->s_adapter; 2974 /* 2975 * create a acl list with hwaddr for RSMPI publish 2976 */ 2977 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter); 2978 2979 if (e != RSM_SUCCESS) { 2980 rsmseglock_release(seg); 2981 rsmexport_rm(seg); 2982 rsmacl_free(acl, acl_len); 2983 freerbuf(xbuf); 2984 DBG_PRINTF((category, RSM_ERR, 2985 "rsm_publish done: rsmpiacl_create failed: %d\n", e)); 2986 return (e); 2987 } 2988 2989 if (seg->s_state == RSM_STATE_BIND) { 2990 /* create segment */ 2991 2992 /* This call includes a bind operations */ 2993 2994 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 2995 create_flags = RSM_ALLOW_UNBIND_REBIND; 2996 } 2997 2998 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 2999 callback_flag = RSM_RESOURCE_DONTWAIT; 3000 } else { 3001 callback_flag = RSM_RESOURCE_SLEEP; 3002 } 3003 3004 e = adapter->rsmpi_ops->rsm_seg_create( 3005 adapter->rsmpi_handle, 3006 &seg->s_handle.out, seg->s_len, 3007 create_flags, &mem, 3008 callback_flag, NULL); 3009 /* 3010 * At present there is no dependency on the existence of xbuf. 3011 * So we can free it here. If in the future this changes, it can 3012 * be freed sometime during the segment destroy. 3013 */ 3014 freerbuf(xbuf); 3015 3016 if (e != RSM_SUCCESS) { 3017 rsmseglock_release(seg); 3018 rsmexport_rm(seg); 3019 rsmacl_free(acl, acl_len); 3020 rsmpiacl_free(rsmpi_acl, acl_len); 3021 DBG_PRINTF((category, RSM_ERR, 3022 "rsm_publish done: export_create failed: %d\n", e)); 3023 /* 3024 * The following assertion ensures that the two errors 3025 * related to the length and its alignment do not occur 3026 * since they have been checked during export_create 3027 */ 3028 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT && 3029 e != RSMERR_BAD_LENGTH); 3030 if (e == RSMERR_NOT_MEM) 3031 e = RSMERR_INSUFFICIENT_MEM; 3032 3033 return (e); 3034 } 3035 /* export segment, this should create an IMMU mapping */ 3036 e = adapter->rsmpi_ops->rsm_publish( 3037 seg->s_handle.out, 3038 rsmpi_acl, acl_len, 3039 seg->s_segid, 3040 RSM_RESOURCE_DONTWAIT, NULL); 3041 3042 if (e != RSM_SUCCESS) { 3043 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3044 rsmseglock_release(seg); 3045 rsmexport_rm(seg); 3046 rsmacl_free(acl, acl_len); 3047 rsmpiacl_free(rsmpi_acl, acl_len); 3048 DBG_PRINTF((category, RSM_ERR, 3049 "rsm_publish done: export_publish failed: %d\n", 3050 e)); 3051 return (e); 3052 } 3053 } 3054 3055 seg->s_acl_in = rsmpi_acl; 3056 3057 skipdriver: 3058 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */ 3059 seg->s_acl_len = acl_len; 3060 seg->s_acl = acl; 3061 3062 if (seg->s_state == RSM_STATE_BIND) { 3063 seg->s_state = RSM_STATE_EXPORT; 3064 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 3065 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 3066 cv_broadcast(&seg->s_cv); 3067 } 3068 3069 rsmseglock_release(seg); 3070 3071 /* 3072 * If the segment id was solicited, then return it in 3073 * the original incoming message. 3074 */ 3075 if (msg->key == 0) { 3076 msg->key = segment_id; 3077 #ifdef _MULTI_DATAMODEL 3078 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 3079 rsm_ioctlmsg32_t msg32; 3080 3081 msg32.key = msg->key; 3082 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3083 "rsm_publish done\n")); 3084 return (ddi_copyout((caddr_t)&msg32, 3085 (caddr_t)dataptr, sizeof (msg32), mode)); 3086 } 3087 #endif 3088 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3089 "rsm_publish done\n")); 3090 return (ddi_copyout((caddr_t)msg, 3091 (caddr_t)dataptr, sizeof (*msg), mode)); 3092 } 3093 3094 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n")); 3095 return (DDI_SUCCESS); 3096 } 3097 3098 /* 3099 * This function modifies the access control list of an already published 3100 * segment. There is no effect on import segments which are already 3101 * connected. 3102 */ 3103 static int 3104 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode) 3105 { 3106 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl; 3107 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl; 3108 int new_acl_len, old_acl_len, tmp_acl_len; 3109 int e, i; 3110 adapter_t *adapter; 3111 int loopback_flag = 0; 3112 rsm_memseg_id_t key; 3113 rsm_permission_t permission; 3114 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3115 3116 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n")); 3117 3118 if ((seg->s_state != RSM_STATE_EXPORT) && 3119 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) && 3120 (seg->s_state != RSM_STATE_EXPORT_QUIESCING)) 3121 return (RSMERR_SEG_NOT_PUBLISHED); 3122 3123 if (seg->s_pid != ddi_get_pid() && 3124 ddi_get_pid() != 0) { 3125 DBG_PRINTF((category, RSM_ERR, 3126 "rsm_republish: Not owner\n")); 3127 return (RSMERR_NOT_CREATOR); 3128 } 3129 3130 if (seg->s_adapter == &loopback_adapter) 3131 loopback_flag = 1; 3132 3133 /* 3134 * Build new list first 3135 */ 3136 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag); 3137 if (e) { 3138 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3139 "rsm_republish done: rsmacl_build failed %d", e)); 3140 return (e); 3141 } 3142 3143 /* Lock segment */ 3144 rsmseglock_acquire(seg); 3145 /* 3146 * a republish is in progress - REPUBLISH message is being 3147 * sent to the importers so wait for it to complete OR 3148 * wait till DR completes 3149 */ 3150 while (((seg->s_state == RSM_STATE_EXPORT) && 3151 (seg->s_flags & RSM_REPUBLISH_WAIT)) || 3152 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) || 3153 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) { 3154 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3155 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3156 "rsm_republish done: cv_wait INTERRUPTED")); 3157 rsmseglock_release(seg); 3158 rsmacl_free(new_acl, new_acl_len); 3159 return (RSMERR_INTERRUPTED); 3160 } 3161 } 3162 3163 /* recheck if state is valid */ 3164 if (seg->s_state != RSM_STATE_EXPORT) { 3165 rsmseglock_release(seg); 3166 rsmacl_free(new_acl, new_acl_len); 3167 return (RSMERR_SEG_NOT_PUBLISHED); 3168 } 3169 3170 key = seg->s_key; 3171 old_acl = seg->s_acl; 3172 old_acl_len = seg->s_acl_len; 3173 3174 seg->s_acl = new_acl; 3175 seg->s_acl_len = new_acl_len; 3176 3177 /* 3178 * This call will only be meaningful if and when the interconnect 3179 * layer makes use of the access list 3180 */ 3181 adapter = seg->s_adapter; 3182 /* 3183 * create a acl list with hwaddr for RSMPI publish 3184 */ 3185 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter); 3186 3187 if (e != RSM_SUCCESS) { 3188 seg->s_acl = old_acl; 3189 seg->s_acl_len = old_acl_len; 3190 rsmseglock_release(seg); 3191 rsmacl_free(new_acl, new_acl_len); 3192 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3193 "rsm_republish done: rsmpiacl_create failed %d", e)); 3194 return (e); 3195 } 3196 rsmpi_old_acl = seg->s_acl_in; 3197 seg->s_acl_in = rsmpi_new_acl; 3198 3199 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out, 3200 seg->s_acl_in, seg->s_acl_len, 3201 RSM_RESOURCE_DONTWAIT, NULL); 3202 3203 if (e != RSM_SUCCESS) { 3204 seg->s_acl = old_acl; 3205 seg->s_acl_in = rsmpi_old_acl; 3206 seg->s_acl_len = old_acl_len; 3207 rsmseglock_release(seg); 3208 rsmacl_free(new_acl, new_acl_len); 3209 rsmpiacl_free(rsmpi_new_acl, new_acl_len); 3210 3211 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3212 "rsm_republish done: rsmpi republish failed %d\n", e)); 3213 return (e); 3214 } 3215 3216 /* create a tmp copy of the new acl */ 3217 tmp_acl_len = new_acl_len; 3218 if (tmp_acl_len > 0) { 3219 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP); 3220 for (i = 0; i < tmp_acl_len; i++) { 3221 tmp_acl[i].ae_node = new_acl[i].ae_node; 3222 tmp_acl[i].ae_permission = new_acl[i].ae_permission; 3223 } 3224 /* 3225 * The default permission of a node which was in the old 3226 * ACL but not in the new ACL is 0 ie no access. 3227 */ 3228 permission = 0; 3229 } else { 3230 /* 3231 * NULL acl means all importers can connect and 3232 * default permission will be owner creation umask 3233 */ 3234 tmp_acl = NULL; 3235 permission = seg->s_mode; 3236 } 3237 3238 /* make other republishers to wait for republish to complete */ 3239 seg->s_flags |= RSM_REPUBLISH_WAIT; 3240 3241 rsmseglock_release(seg); 3242 3243 /* send the new perms to the importing nodes */ 3244 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission); 3245 3246 rsmseglock_acquire(seg); 3247 seg->s_flags &= ~RSM_REPUBLISH_WAIT; 3248 /* wake up any one waiting for republish to complete */ 3249 cv_broadcast(&seg->s_cv); 3250 rsmseglock_release(seg); 3251 3252 rsmacl_free(tmp_acl, tmp_acl_len); 3253 rsmacl_free(old_acl, old_acl_len); 3254 rsmpiacl_free(rsmpi_old_acl, old_acl_len); 3255 3256 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n")); 3257 return (DDI_SUCCESS); 3258 } 3259 3260 static int 3261 rsm_unpublish(rsmseg_t *seg, int mode) 3262 { 3263 rsmapi_access_entry_t *acl; 3264 rsm_access_entry_t *rsmpi_acl; 3265 int acl_len; 3266 int e; 3267 adapter_t *adapter; 3268 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3269 3270 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n")); 3271 3272 if (seg->s_pid != ddi_get_pid() && 3273 ddi_get_pid() != 0) { 3274 DBG_PRINTF((category, RSM_ERR, 3275 "rsm_unpublish: Not creator\n")); 3276 return (RSMERR_NOT_CREATOR); 3277 } 3278 3279 rsmseglock_acquire(seg); 3280 /* 3281 * wait for QUIESCING to complete here before rsmexport_rm 3282 * is called because the SUSPEND_COMPLETE mesg which changes 3283 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and 3284 * signals the cv_wait needs to find it in the hashtable. 3285 */ 3286 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 3287 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) { 3288 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3289 rsmseglock_release(seg); 3290 DBG_PRINTF((category, RSM_ERR, 3291 "rsm_unpublish done: cv_wait INTR qscing" 3292 "getv/putv in progress")); 3293 return (RSMERR_INTERRUPTED); 3294 } 3295 } 3296 3297 /* verify segment state */ 3298 if ((seg->s_state != RSM_STATE_EXPORT) && 3299 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3300 rsmseglock_release(seg); 3301 DBG_PRINTF((category, RSM_ERR, 3302 "rsm_unpublish done: bad state %x\n", seg->s_state)); 3303 return (RSMERR_SEG_NOT_PUBLISHED); 3304 } 3305 3306 rsmseglock_release(seg); 3307 3308 rsmexport_rm(seg); 3309 3310 rsm_send_importer_disconnects(seg->s_segid, my_nodeid); 3311 3312 rsmseglock_acquire(seg); 3313 /* 3314 * wait for republish to complete 3315 */ 3316 while ((seg->s_state == RSM_STATE_EXPORT) && 3317 (seg->s_flags & RSM_REPUBLISH_WAIT)) { 3318 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3319 DBG_PRINTF((category, RSM_ERR, 3320 "rsm_unpublish done: cv_wait INTR repubing")); 3321 rsmseglock_release(seg); 3322 return (RSMERR_INTERRUPTED); 3323 } 3324 } 3325 3326 if ((seg->s_state != RSM_STATE_EXPORT) && 3327 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3328 DBG_PRINTF((category, RSM_ERR, 3329 "rsm_unpublish done: invalid state")); 3330 rsmseglock_release(seg); 3331 return (RSMERR_SEG_NOT_PUBLISHED); 3332 } 3333 3334 /* 3335 * check for putv/get surrogate segment which was not published 3336 * to the driver. 3337 * 3338 * Be certain to see if there is an ACL first! If this segment was 3339 * not published with an ACL, acl will be a null pointer. Check 3340 * that before dereferencing it. 3341 */ 3342 acl = seg->s_acl; 3343 if (acl != (rsmapi_access_entry_t *)NULL) { 3344 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 3345 goto bypass; 3346 } 3347 3348 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */ 3349 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) 3350 goto bypass; 3351 3352 adapter = seg->s_adapter; 3353 for (;;) { 3354 if (seg->s_state != RSM_STATE_EXPORT) { 3355 rsmseglock_release(seg); 3356 DBG_PRINTF((category, RSM_ERR, 3357 "rsm_unpublish done: bad state %x\n", 3358 seg->s_state)); 3359 return (RSMERR_SEG_NOT_PUBLISHED); 3360 } 3361 3362 /* unpublish from adapter */ 3363 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out); 3364 3365 if (e == RSM_SUCCESS) { 3366 break; 3367 } 3368 3369 if (e == RSMERR_SEG_IN_USE && mode == 1) { 3370 /* 3371 * wait for unpublish to succeed, it's busy. 3372 */ 3373 seg->s_flags |= RSM_EXPORT_WAIT; 3374 3375 /* wait for a max of 1 ms - this is an empirical */ 3376 /* value that was found by some minimal testing */ 3377 /* can be fine tuned when we have better numbers */ 3378 /* A long term fix would be to send cv_signal */ 3379 /* from the intr callback routine */ 3380 /* currently nobody signals this wait */ 3381 (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock, 3382 drv_usectohz(1000), TR_CLOCK_TICK); 3383 3384 DBG_PRINTF((category, RSM_ERR, 3385 "rsm_unpublish: SEG_IN_USE\n")); 3386 3387 seg->s_flags &= ~RSM_EXPORT_WAIT; 3388 } else { 3389 if (mode == 1) { 3390 DBG_PRINTF((category, RSM_ERR, 3391 "rsm:rsmpi unpublish err %x\n", e)); 3392 seg->s_state = RSM_STATE_BIND; 3393 } 3394 rsmseglock_release(seg); 3395 return (e); 3396 } 3397 } 3398 3399 /* Free segment */ 3400 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3401 3402 if (e != RSM_SUCCESS) { 3403 DBG_PRINTF((category, RSM_ERR, 3404 "rsm_unpublish: rsmpi destroy key=%x failed %x\n", 3405 seg->s_key, e)); 3406 } 3407 3408 bypass: 3409 acl = seg->s_acl; 3410 rsmpi_acl = seg->s_acl_in; 3411 acl_len = seg->s_acl_len; 3412 3413 seg->s_acl = NULL; 3414 seg->s_acl_in = NULL; 3415 seg->s_acl_len = 0; 3416 3417 if (seg->s_state == RSM_STATE_EXPORT) { 3418 seg->s_state = RSM_STATE_BIND; 3419 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) { 3420 seg->s_state = RSM_STATE_BIND_QUIESCED; 3421 cv_broadcast(&seg->s_cv); 3422 } 3423 3424 rsmseglock_release(seg); 3425 3426 rsmacl_free(acl, acl_len); 3427 rsmpiacl_free(rsmpi_acl, acl_len); 3428 3429 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n")); 3430 3431 return (DDI_SUCCESS); 3432 } 3433 3434 /* 3435 * Called from rsm_unpublish to force an unload and disconnection of all 3436 * importers of the unpublished segment. 3437 * 3438 * First build the list of segments requiring a force disconnect, then 3439 * send a request for each. 3440 */ 3441 static void 3442 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid, 3443 rsm_node_id_t ex_nodeid) 3444 { 3445 rsmipc_request_t request; 3446 importing_token_t *prev_token, *token, *tmp_token, *tokp; 3447 importing_token_t *force_disconnect_list = NULL; 3448 int index; 3449 3450 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3451 "rsm_send_importer_disconnects enter\n")); 3452 3453 index = rsmhash(ex_segid); 3454 3455 mutex_enter(&importer_list.lock); 3456 3457 prev_token = NULL; 3458 token = importer_list.bucket[index]; 3459 3460 while (token != NULL) { 3461 if (token->key == ex_segid) { 3462 /* 3463 * take it off the importer list and add it 3464 * to the force disconnect list. 3465 */ 3466 if (prev_token == NULL) 3467 importer_list.bucket[index] = token->next; 3468 else 3469 prev_token->next = token->next; 3470 tmp_token = token; 3471 token = token->next; 3472 if (force_disconnect_list == NULL) { 3473 force_disconnect_list = tmp_token; 3474 tmp_token->next = NULL; 3475 } else { 3476 tokp = force_disconnect_list; 3477 /* 3478 * make sure that the tmp_token's node 3479 * is not already on the force disconnect 3480 * list. 3481 */ 3482 while (tokp != NULL) { 3483 if (tokp->importing_node == 3484 tmp_token->importing_node) { 3485 break; 3486 } 3487 tokp = tokp->next; 3488 } 3489 if (tokp == NULL) { 3490 tmp_token->next = 3491 force_disconnect_list; 3492 force_disconnect_list = tmp_token; 3493 } else { 3494 kmem_free((void *)tmp_token, 3495 sizeof (*token)); 3496 } 3497 } 3498 3499 } else { 3500 prev_token = token; 3501 token = token->next; 3502 } 3503 } 3504 mutex_exit(&importer_list.lock); 3505 3506 token = force_disconnect_list; 3507 while (token != NULL) { 3508 if (token->importing_node == my_nodeid) { 3509 rsm_force_unload(ex_nodeid, ex_segid, 3510 DISCONNECT); 3511 } else { 3512 request.rsmipc_hdr.rsmipc_type = 3513 RSMIPC_MSG_DISCONNECT; 3514 request.rsmipc_key = token->key; 3515 for (;;) { 3516 if (rsmipc_send(token->importing_node, 3517 &request, 3518 RSM_NO_REPLY) == RSM_SUCCESS) { 3519 break; 3520 } else { 3521 delay(drv_usectohz(10000)); 3522 } 3523 } 3524 } 3525 tmp_token = token; 3526 token = token->next; 3527 kmem_free((void *)tmp_token, sizeof (*token)); 3528 } 3529 3530 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3531 "rsm_send_importer_disconnects done\n")); 3532 } 3533 3534 /* 3535 * This function is used as a callback for unlocking the pages locked 3536 * down by a process which then does a fork or an exec. 3537 * It marks the export segments corresponding to umem cookie given by 3538 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be 3539 * destroyed later when an rsm_close occurs). 3540 */ 3541 static void 3542 rsm_export_force_destroy(ddi_umem_cookie_t *ck) 3543 { 3544 rsmresource_blk_t *blk; 3545 rsmresource_t *p; 3546 rsmseg_t *eseg = NULL; 3547 int i, j; 3548 int found = 0; 3549 3550 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3551 "rsm_export_force_destroy enter\n")); 3552 3553 /* 3554 * Walk the resource list and locate the export segment (either 3555 * in the BIND or the EXPORT state) which corresponds to the 3556 * ddi_umem_cookie_t being freed up, and call rsmseg_close. 3557 * Change the state to ZOMBIE by calling rsmseg_close with the 3558 * force_flag argument (the second argument) set to 1. Also, 3559 * unpublish and unbind the segment, but don't free it. Free it 3560 * only on a rsm_close call for the segment. 3561 */ 3562 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 3563 3564 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 3565 blk = rsm_resource.rsmrc_root[i]; 3566 if (blk == NULL) { 3567 continue; 3568 } 3569 3570 for (j = 0; j < RSMRC_BLKSZ; j++) { 3571 p = blk->rsmrcblk_blks[j]; 3572 if ((p != NULL) && (p != RSMRC_RESERVED) && 3573 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) { 3574 eseg = (rsmseg_t *)p; 3575 if (eseg->s_cookie != ck) 3576 continue; /* continue searching */ 3577 /* 3578 * Found the segment, set flag to indicate 3579 * force destroy processing is in progress 3580 */ 3581 rsmseglock_acquire(eseg); 3582 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT; 3583 rsmseglock_release(eseg); 3584 found = 1; 3585 break; 3586 } 3587 } 3588 3589 if (found) 3590 break; 3591 } 3592 3593 rw_exit(&rsm_resource.rsmrc_lock); 3594 3595 if (found) { 3596 ASSERT(eseg != NULL); 3597 /* call rsmseg_close with force flag set to 1 */ 3598 rsmseg_close(eseg, 1); 3599 /* 3600 * force destroy processing done, clear flag and signal any 3601 * thread waiting in rsmseg_close. 3602 */ 3603 rsmseglock_acquire(eseg); 3604 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT; 3605 cv_broadcast(&eseg->s_cv); 3606 rsmseglock_release(eseg); 3607 } 3608 3609 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3610 "rsm_export_force_destroy done\n")); 3611 } 3612 3613 /* ******************************* Remote Calls *********************** */ 3614 static void 3615 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req) 3616 { 3617 rsmipc_reply_t reply; 3618 DBG_DEFINE(category, 3619 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3620 3621 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3622 "rsm_intr_segconnect enter\n")); 3623 3624 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply); 3625 3626 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 3627 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie; 3628 3629 (void) rsmipc_send(src, NULL, &reply); 3630 3631 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3632 "rsm_intr_segconnect done\n")); 3633 } 3634 3635 3636 /* 3637 * When an exported segment is unpublished the exporter sends an ipc 3638 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher 3639 * calls this function. The import list is scanned; segments which match the 3640 * exported segment id are unloaded and disconnected. 3641 * 3642 * Will also be called from rsm_rebind with disconnect_flag FALSE. 3643 * 3644 */ 3645 static void 3646 rsm_force_unload(rsm_node_id_t src_nodeid, 3647 rsm_memseg_id_t ex_segid, 3648 boolean_t disconnect_flag) 3649 3650 { 3651 rsmresource_t *p = NULL; 3652 rsmhash_table_t *rhash = &rsm_import_segs; 3653 uint_t index; 3654 DBG_DEFINE(category, 3655 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3656 3657 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n")); 3658 3659 index = rsmhash(ex_segid); 3660 3661 rw_enter(&rhash->rsmhash_rw, RW_READER); 3662 3663 p = rsmhash_getbkt(rhash, index); 3664 3665 for (; p; p = p->rsmrc_next) { 3666 rsmseg_t *seg = (rsmseg_t *)p; 3667 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) { 3668 /* 3669 * In order to make rsmseg_unload and rsm_force_unload 3670 * thread safe, acquire the segment lock here. 3671 * rsmseg_unload is responsible for releasing the lock. 3672 * rsmseg_unload releases the lock just before a call 3673 * to rsmipc_send or in case of an early exit which 3674 * occurs if the segment was in the state 3675 * RSM_STATE_CONNECTING or RSM_STATE_NEW. 3676 */ 3677 rsmseglock_acquire(seg); 3678 if (disconnect_flag) 3679 seg->s_flags |= RSM_FORCE_DISCONNECT; 3680 rsmseg_unload(seg); 3681 } 3682 } 3683 rw_exit(&rhash->rsmhash_rw); 3684 3685 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n")); 3686 } 3687 3688 static void 3689 rsm_intr_reply(rsmipc_msghdr_t *msg) 3690 { 3691 /* 3692 * Find slot for cookie in reply. 3693 * Match sequence with sequence in cookie 3694 * If no match; return 3695 * Try to grap lock of slot, if locked return 3696 * copy data into reply slot area 3697 * signal waiter 3698 */ 3699 rsmipc_slot_t *slot; 3700 rsmipc_cookie_t *cookie; 3701 void *data = (void *) msg; 3702 size_t size = sizeof (rsmipc_reply_t); 3703 DBG_DEFINE(category, 3704 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3705 3706 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n")); 3707 3708 cookie = &msg->rsmipc_cookie; 3709 if (cookie->ic.index >= RSMIPC_SZ) { 3710 DBG_PRINTF((category, RSM_ERR, 3711 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index)); 3712 return; 3713 } 3714 3715 ASSERT(cookie->ic.index < RSMIPC_SZ); 3716 slot = &rsm_ipc.slots[cookie->ic.index]; 3717 mutex_enter(&slot->rsmipc_lock); 3718 if (slot->rsmipc_cookie.value == cookie->value) { 3719 /* found a match */ 3720 if (RSMIPC_GET(slot, RSMIPC_PENDING)) { 3721 bcopy(data, slot->rsmipc_data, size); 3722 RSMIPC_CLEAR(slot, RSMIPC_PENDING); 3723 cv_signal(&slot->rsmipc_cv); 3724 } 3725 } else { 3726 DBG_PRINTF((category, RSM_DEBUG, 3727 "rsm: rsm_intr_reply mismatched reply %d\n", 3728 cookie->ic.index)); 3729 } 3730 mutex_exit(&slot->rsmipc_lock); 3731 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n")); 3732 } 3733 3734 /* 3735 * This function gets dispatched on the worker thread when we receive 3736 * the SQREADY message. This function sends the SQREADY_ACK message. 3737 */ 3738 static void 3739 rsm_sqready_ack_deferred(void *arg) 3740 { 3741 path_t *path = (path_t *)arg; 3742 DBG_DEFINE(category, 3743 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3744 3745 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3746 "rsm_sqready_ack_deferred enter\n")); 3747 3748 mutex_enter(&path->mutex); 3749 3750 /* 3751 * If path is not active no point in sending the ACK 3752 * because the whole SQREADY protocol will again start 3753 * when the path becomes active. 3754 */ 3755 if (path->state != RSMKA_PATH_ACTIVE) { 3756 /* 3757 * decrement the path refcnt incremented in rsm_proc_sqready 3758 */ 3759 PATH_RELE_NOLOCK(path); 3760 mutex_exit(&path->mutex); 3761 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3762 "rsm_sqready_ack_deferred done:!ACTIVE\n")); 3763 return; 3764 } 3765 3766 /* send an SQREADY_ACK message */ 3767 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK); 3768 3769 /* initialize credits to the max level */ 3770 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3771 3772 /* wake up any send that is waiting for credits */ 3773 cv_broadcast(&path->sendq_token.sendq_cv); 3774 3775 /* 3776 * decrement the path refcnt since we incremented it in 3777 * rsm_proc_sqready 3778 */ 3779 PATH_RELE_NOLOCK(path); 3780 3781 mutex_exit(&path->mutex); 3782 3783 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3784 "rsm_sqready_ack_deferred done\n")); 3785 } 3786 3787 /* 3788 * Process the SQREADY message 3789 */ 3790 static void 3791 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3792 rsm_intr_hand_arg_t arg) 3793 { 3794 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3795 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3796 path_t *path; 3797 DBG_DEFINE(category, 3798 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3799 3800 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n")); 3801 3802 /* look up the path - incr the path refcnt */ 3803 path = rsm_find_path(hdlr_argp->adapter_name, 3804 hdlr_argp->adapter_instance, src_hwaddr); 3805 3806 /* 3807 * No path exists or path is not active - drop the message 3808 */ 3809 if (path == NULL) { 3810 DBG_PRINTF((category, RSM_DEBUG, 3811 "rsm_proc_sqready done: msg dropped no path\n")); 3812 return; 3813 } 3814 3815 mutex_exit(&path->mutex); 3816 3817 /* drain any tasks from the previous incarnation */ 3818 taskq_wait(path->recv_taskq); 3819 3820 mutex_enter(&path->mutex); 3821 /* 3822 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK 3823 * in the meanwhile we received an SQREADY message, blindly reset 3824 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK 3825 * and forget about the SQREADY that we sent. 3826 */ 3827 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3828 3829 if (path->state != RSMKA_PATH_ACTIVE) { 3830 /* decr refcnt and drop the mutex */ 3831 PATH_RELE_NOLOCK(path); 3832 mutex_exit(&path->mutex); 3833 DBG_PRINTF((category, RSM_DEBUG, 3834 "rsm_proc_sqready done: msg dropped path !ACTIVE\n")); 3835 return; 3836 } 3837 3838 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx " 3839 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3840 3841 /* 3842 * The sender's local incarnation number is our remote incarnation 3843 * number save it in the path data structure 3844 */ 3845 path->remote_incn = msg->rsmipc_local_incn; 3846 path->sendq_token.msgbuf_avail = 0; 3847 path->procmsg_cnt = 0; 3848 3849 /* 3850 * path is active - dispatch task to send SQREADY_ACK - remember 3851 * RSMPI calls can't be done in interrupt context 3852 * 3853 * We can use the recv_taskq to send because the remote endpoint 3854 * cannot start sending messages till it receives SQREADY_ACK hence 3855 * at this point there are no tasks on recv_taskq. 3856 * 3857 * The path refcnt will be decremented in rsm_sqready_ack_deferred. 3858 */ 3859 (void) taskq_dispatch(path->recv_taskq, 3860 rsm_sqready_ack_deferred, path, KM_NOSLEEP); 3861 3862 mutex_exit(&path->mutex); 3863 3864 3865 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n")); 3866 } 3867 3868 /* 3869 * Process the SQREADY_ACK message 3870 */ 3871 static void 3872 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3873 rsm_intr_hand_arg_t arg) 3874 { 3875 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3876 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3877 path_t *path; 3878 DBG_DEFINE(category, 3879 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3880 3881 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3882 "rsm_proc_sqready_ack enter\n")); 3883 3884 /* look up the path - incr the path refcnt */ 3885 path = rsm_find_path(hdlr_argp->adapter_name, 3886 hdlr_argp->adapter_instance, src_hwaddr); 3887 3888 /* 3889 * drop the message if - no path exists or path is not active 3890 * or if its not waiting for SQREADY_ACK message 3891 */ 3892 if (path == NULL) { 3893 DBG_PRINTF((category, RSM_DEBUG, 3894 "rsm_proc_sqready_ack done: msg dropped no path\n")); 3895 return; 3896 } 3897 3898 if ((path->state != RSMKA_PATH_ACTIVE) || 3899 !(path->flags & RSMKA_WAIT_FOR_SQACK)) { 3900 /* decrement the refcnt */ 3901 PATH_RELE_NOLOCK(path); 3902 mutex_exit(&path->mutex); 3903 DBG_PRINTF((category, RSM_DEBUG, 3904 "rsm_proc_sqready_ack done: msg dropped\n")); 3905 return; 3906 } 3907 3908 /* 3909 * Check if this message is in response to the last RSMIPC_MSG_SQREADY 3910 * sent, if not drop it. 3911 */ 3912 if (path->local_incn != msghdr->rsmipc_incn) { 3913 /* decrement the refcnt */ 3914 PATH_RELE_NOLOCK(path); 3915 mutex_exit(&path->mutex); 3916 DBG_PRINTF((category, RSM_DEBUG, 3917 "rsm_proc_sqready_ack done: msg old incn %lld\n", 3918 msghdr->rsmipc_incn)); 3919 return; 3920 } 3921 3922 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx " 3923 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3924 3925 /* 3926 * clear the WAIT_FOR_SQACK flag since we have recvd the ack 3927 */ 3928 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3929 3930 /* save the remote sendq incn number */ 3931 path->remote_incn = msg->rsmipc_local_incn; 3932 3933 /* initialize credits to the max level */ 3934 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3935 3936 /* wake up any send that is waiting for credits */ 3937 cv_broadcast(&path->sendq_token.sendq_cv); 3938 3939 /* decrement the refcnt */ 3940 PATH_RELE_NOLOCK(path); 3941 3942 mutex_exit(&path->mutex); 3943 3944 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3945 "rsm_proc_sqready_ack done\n")); 3946 } 3947 3948 /* 3949 * process the RSMIPC_MSG_CREDIT message 3950 */ 3951 static void 3952 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3953 rsm_intr_hand_arg_t arg) 3954 { 3955 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3956 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3957 path_t *path; 3958 DBG_DEFINE(category, 3959 RSM_KERNEL_AGENT | RSM_FUNC_ALL | 3960 RSM_INTR_CALLBACK | RSM_FLOWCONTROL); 3961 3962 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n")); 3963 3964 /* look up the path - incr the path refcnt */ 3965 path = rsm_find_path(hdlr_argp->adapter_name, 3966 hdlr_argp->adapter_instance, src_hwaddr); 3967 3968 if (path == NULL) { 3969 DBG_PRINTF((category, RSM_DEBUG, 3970 "rsm_add_credits enter: path not found\n")); 3971 return; 3972 } 3973 3974 /* the path is not active - discard credits */ 3975 if (path->state != RSMKA_PATH_ACTIVE) { 3976 PATH_RELE_NOLOCK(path); 3977 mutex_exit(&path->mutex); 3978 DBG_PRINTF((category, RSM_DEBUG, 3979 "rsm_add_credits enter:path=%lx !ACTIVE\n", path)); 3980 return; 3981 } 3982 3983 /* 3984 * Check if these credits are for current incarnation of the path. 3985 */ 3986 if (path->local_incn != msghdr->rsmipc_incn) { 3987 /* decrement the refcnt */ 3988 PATH_RELE_NOLOCK(path); 3989 mutex_exit(&path->mutex); 3990 DBG_PRINTF((category, RSM_DEBUG, 3991 "rsm_add_credits enter: old incn %lld\n", 3992 msghdr->rsmipc_incn)); 3993 return; 3994 } 3995 3996 DBG_PRINTF((category, RSM_DEBUG, 3997 "rsm_add_credits:path=%lx new-creds=%d " 3998 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits, 3999 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src, 4000 src_hwaddr)); 4001 4002 4003 /* add credits to the path's sendq */ 4004 path->sendq_token.msgbuf_avail += msg->rsmipc_credits; 4005 4006 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES); 4007 4008 /* wake up any send that is waiting for credits */ 4009 cv_broadcast(&path->sendq_token.sendq_cv); 4010 4011 /* decrement the refcnt */ 4012 PATH_RELE_NOLOCK(path); 4013 4014 mutex_exit(&path->mutex); 4015 4016 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n")); 4017 } 4018 4019 static void 4020 rsm_intr_event(rsmipc_request_t *msg) 4021 { 4022 rsmseg_t *seg; 4023 rsmresource_t *p; 4024 rsm_node_id_t src_node; 4025 DBG_DEFINE(category, 4026 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4027 4028 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n")); 4029 4030 src_node = msg->rsmipc_hdr.rsmipc_src; 4031 4032 if ((seg = msg->rsmipc_segment_cookie) != NULL) { 4033 /* This is for an import segment */ 4034 uint_t hashval = rsmhash(msg->rsmipc_key); 4035 4036 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4037 4038 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4039 4040 for (; p; p = p->rsmrc_next) { 4041 if ((p->rsmrc_key == msg->rsmipc_key) && 4042 (p->rsmrc_node == src_node)) { 4043 seg = (rsmseg_t *)p; 4044 rsmseglock_acquire(seg); 4045 4046 atomic_inc_32(&seg->s_pollevent); 4047 4048 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4049 pollwakeup(&seg->s_poll, POLLRDNORM); 4050 4051 rsmseglock_release(seg); 4052 } 4053 } 4054 4055 rw_exit(&rsm_import_segs.rsmhash_rw); 4056 } else { 4057 /* This is for an export segment */ 4058 seg = rsmexport_lookup(msg->rsmipc_key); 4059 if (!seg) { 4060 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4061 "rsm_intr_event done: exp seg not found\n")); 4062 return; 4063 } 4064 4065 ASSERT(rsmseglock_held(seg)); 4066 4067 atomic_inc_32(&seg->s_pollevent); 4068 4069 /* 4070 * We must hold the segment lock here, or else the segment 4071 * can be freed while pollwakeup is using it. This implies 4072 * that we MUST NOT grab the segment lock during rsm_chpoll, 4073 * as outlined in the chpoll(2) man page. 4074 */ 4075 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4076 pollwakeup(&seg->s_poll, POLLRDNORM); 4077 4078 rsmseglock_release(seg); 4079 } 4080 4081 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n")); 4082 } 4083 4084 /* 4085 * The exporter did a republish and changed the ACL - this change is only 4086 * visible to new importers. 4087 */ 4088 static void 4089 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key, 4090 rsm_permission_t perm) 4091 { 4092 4093 rsmresource_t *p; 4094 rsmseg_t *seg; 4095 uint_t hashval = rsmhash(key); 4096 DBG_DEFINE(category, 4097 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4098 4099 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n")); 4100 4101 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4102 4103 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4104 4105 for (; p; p = p->rsmrc_next) { 4106 /* 4107 * find the importer and update the permission in the shared 4108 * data structure. Any new importers will use the new perms 4109 */ 4110 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) { 4111 seg = (rsmseg_t *)p; 4112 4113 rsmseglock_acquire(seg); 4114 rsmsharelock_acquire(seg); 4115 seg->s_share->rsmsi_mode = perm; 4116 rsmsharelock_release(seg); 4117 rsmseglock_release(seg); 4118 4119 break; 4120 } 4121 } 4122 4123 rw_exit(&rsm_import_segs.rsmhash_rw); 4124 4125 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n")); 4126 } 4127 4128 void 4129 rsm_suspend_complete(rsm_node_id_t src_node, int flag) 4130 { 4131 int done = 1; /* indicate all SUSPENDS have been acked */ 4132 list_element_t *elem; 4133 DBG_DEFINE(category, 4134 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4135 4136 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4137 "rsm_suspend_complete enter\n")); 4138 4139 mutex_enter(&rsm_suspend_list.list_lock); 4140 4141 if (rsm_suspend_list.list_head == NULL) { 4142 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4143 "rsm_suspend_complete done: suspend_list is empty\n")); 4144 mutex_exit(&rsm_suspend_list.list_lock); 4145 return; 4146 } 4147 4148 elem = rsm_suspend_list.list_head; 4149 while (elem != NULL) { 4150 if (elem->nodeid == src_node) { 4151 /* clear the pending flag for the node */ 4152 elem->flags &= ~RSM_SUSPEND_ACKPENDING; 4153 elem->flags |= flag; 4154 } 4155 4156 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING)) 4157 done = 0; /* still some nodes have not yet ACKED */ 4158 4159 elem = elem->next; 4160 } 4161 4162 mutex_exit(&rsm_suspend_list.list_lock); 4163 4164 if (!done) { 4165 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4166 "rsm_suspend_complete done: acks pending\n")); 4167 return; 4168 } 4169 /* 4170 * Now that we are done with suspending all the remote importers 4171 * time to quiesce the local exporters 4172 */ 4173 exporter_quiesce(); 4174 4175 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4176 "rsm_suspend_complete done\n")); 4177 } 4178 4179 static void 4180 exporter_quiesce() 4181 { 4182 int i, e; 4183 rsmresource_t *current; 4184 rsmseg_t *seg; 4185 adapter_t *adapter; 4186 DBG_DEFINE(category, 4187 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4188 4189 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n")); 4190 /* 4191 * The importers send a SUSPEND_COMPLETE to the exporter node 4192 * Unpublish, unbind the export segment and 4193 * move the segments to the EXPORT_QUIESCED state 4194 */ 4195 4196 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER); 4197 4198 for (i = 0; i < rsm_hash_size; i++) { 4199 current = rsm_export_segs.bucket[i]; 4200 while (current != NULL) { 4201 seg = (rsmseg_t *)current; 4202 rsmseglock_acquire(seg); 4203 if (current->rsmrc_state == 4204 RSM_STATE_EXPORT_QUIESCING) { 4205 adapter = seg->s_adapter; 4206 /* 4207 * some local memory handles are not published 4208 * check if it was published 4209 */ 4210 if ((seg->s_acl == NULL) || 4211 (seg->s_acl[0].ae_node != my_nodeid) || 4212 (seg->s_acl[0].ae_permission != 0)) { 4213 4214 e = adapter->rsmpi_ops->rsm_unpublish( 4215 seg->s_handle.out); 4216 DBG_PRINTF((category, RSM_DEBUG, 4217 "exporter_quiesce:unpub %d\n", e)); 4218 4219 e = adapter->rsmpi_ops->rsm_seg_destroy( 4220 seg->s_handle.out); 4221 4222 DBG_PRINTF((category, RSM_DEBUG, 4223 "exporter_quiesce:destroy %d\n", 4224 e)); 4225 } 4226 4227 (void) rsm_unbind_pages(seg); 4228 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 4229 cv_broadcast(&seg->s_cv); 4230 } 4231 rsmseglock_release(seg); 4232 current = current->rsmrc_next; 4233 } 4234 } 4235 rw_exit(&rsm_export_segs.rsmhash_rw); 4236 4237 /* 4238 * All the local segments we are done with the pre-del processing 4239 * - time to move to PREDEL_COMPLETED. 4240 */ 4241 4242 mutex_enter(&rsm_drv_data.drv_lock); 4243 4244 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED); 4245 4246 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED; 4247 4248 cv_broadcast(&rsm_drv_data.drv_cv); 4249 4250 mutex_exit(&rsm_drv_data.drv_lock); 4251 4252 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n")); 4253 } 4254 4255 static void 4256 importer_suspend(rsm_node_id_t src_node) 4257 { 4258 int i; 4259 int susp_flg; /* true means already suspended */ 4260 int num_importers; 4261 rsmresource_t *p = NULL, *curp; 4262 rsmhash_table_t *rhash = &rsm_import_segs; 4263 rsmseg_t *seg; 4264 rsmipc_request_t request; 4265 DBG_DEFINE(category, 4266 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4267 4268 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n")); 4269 4270 rw_enter(&rhash->rsmhash_rw, RW_READER); 4271 for (i = 0; i < rsm_hash_size; i++) { 4272 p = rhash->bucket[i]; 4273 4274 /* 4275 * Suspend all importers with same <node, key> pair. 4276 * After the last one of the shared importers has been 4277 * suspended - suspend the shared mappings/connection. 4278 */ 4279 for (; p; p = p->rsmrc_next) { 4280 rsmseg_t *first = (rsmseg_t *)p; 4281 if ((first->s_node != src_node) || 4282 (first->s_state == RSM_STATE_DISCONNECT)) 4283 continue; /* go to next entry */ 4284 /* 4285 * search the rest of the bucket for 4286 * other siblings (imprtrs with the same key) 4287 * of "first" and suspend them. 4288 * All importers with same key fall in 4289 * the same bucket. 4290 */ 4291 num_importers = 0; 4292 for (curp = p; curp; curp = curp->rsmrc_next) { 4293 seg = (rsmseg_t *)curp; 4294 4295 rsmseglock_acquire(seg); 4296 4297 if ((seg->s_node != first->s_node) || 4298 (seg->s_key != first->s_key) || 4299 (seg->s_state == RSM_STATE_DISCONNECT)) { 4300 /* 4301 * either not a peer segment or its a 4302 * disconnected segment - skip it 4303 */ 4304 rsmseglock_release(seg); 4305 continue; 4306 } 4307 4308 rsmseg_suspend(seg, &susp_flg); 4309 4310 if (susp_flg) { /* seg already suspended */ 4311 rsmseglock_release(seg); 4312 break; /* the inner for loop */ 4313 } 4314 4315 num_importers++; 4316 rsmsharelock_acquire(seg); 4317 /* 4318 * we've processed all importers that are 4319 * siblings of "first" 4320 */ 4321 if (num_importers == 4322 seg->s_share->rsmsi_refcnt) { 4323 rsmsharelock_release(seg); 4324 rsmseglock_release(seg); 4325 break; 4326 } 4327 rsmsharelock_release(seg); 4328 rsmseglock_release(seg); 4329 } 4330 4331 /* 4332 * All the importers with the same key and 4333 * nodeid as "first" have been suspended. 4334 * Now suspend the shared connect/mapping. 4335 * This is done only once. 4336 */ 4337 if (!susp_flg) { 4338 rsmsegshare_suspend(seg); 4339 } 4340 } 4341 } 4342 4343 rw_exit(&rhash->rsmhash_rw); 4344 4345 /* send an ACK for SUSPEND message */ 4346 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE; 4347 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY); 4348 4349 4350 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n")); 4351 4352 } 4353 4354 static void 4355 rsmseg_suspend(rsmseg_t *seg, int *susp_flg) 4356 { 4357 int recheck_state; 4358 rsmcookie_t *hdl; 4359 DBG_DEFINE(category, 4360 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4361 4362 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4363 "rsmseg_suspend enter: key=%u\n", seg->s_key)); 4364 4365 *susp_flg = 0; 4366 4367 ASSERT(rsmseglock_held(seg)); 4368 /* wait if putv/getv is in progress */ 4369 while (seg->s_rdmacnt > 0) 4370 cv_wait(&seg->s_cv, &seg->s_lock); 4371 4372 do { 4373 recheck_state = 0; 4374 4375 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4376 "rsmseg_suspend:segment %x state=%d\n", 4377 seg->s_key, seg->s_state)); 4378 4379 switch (seg->s_state) { 4380 case RSM_STATE_NEW: 4381 /* not a valid state */ 4382 break; 4383 case RSM_STATE_CONNECTING: 4384 seg->s_state = RSM_STATE_ABORT_CONNECT; 4385 break; 4386 case RSM_STATE_ABORT_CONNECT: 4387 break; 4388 case RSM_STATE_CONNECT: 4389 seg->s_handle.in = NULL; 4390 seg->s_state = RSM_STATE_CONN_QUIESCE; 4391 break; 4392 case RSM_STATE_MAPPING: 4393 /* wait until segment leaves the mapping state */ 4394 while (seg->s_state == RSM_STATE_MAPPING) 4395 cv_wait(&seg->s_cv, &seg->s_lock); 4396 recheck_state = 1; 4397 break; 4398 case RSM_STATE_ACTIVE: 4399 /* unload the mappings */ 4400 if (seg->s_ckl != NULL) { 4401 hdl = seg->s_ckl; 4402 for (; hdl != NULL; hdl = hdl->c_next) { 4403 (void) devmap_unload(hdl->c_dhp, 4404 hdl->c_off, hdl->c_len); 4405 } 4406 } 4407 seg->s_mapinfo = NULL; 4408 seg->s_state = RSM_STATE_MAP_QUIESCE; 4409 break; 4410 case RSM_STATE_CONN_QUIESCE: 4411 /* FALLTHRU */ 4412 case RSM_STATE_MAP_QUIESCE: 4413 /* rsmseg_suspend already done for seg */ 4414 *susp_flg = 1; 4415 break; 4416 case RSM_STATE_DISCONNECT: 4417 break; 4418 default: 4419 ASSERT(0); /* invalid state */ 4420 } 4421 } while (recheck_state); 4422 4423 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n")); 4424 } 4425 4426 static void 4427 rsmsegshare_suspend(rsmseg_t *seg) 4428 { 4429 int e; 4430 adapter_t *adapter; 4431 rsm_import_share_t *sharedp; 4432 DBG_DEFINE(category, 4433 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4434 4435 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4436 "rsmsegshare_suspend enter\n")); 4437 4438 rsmseglock_acquire(seg); 4439 rsmsharelock_acquire(seg); 4440 4441 sharedp = seg->s_share; 4442 adapter = seg->s_adapter; 4443 switch (sharedp->rsmsi_state) { 4444 case RSMSI_STATE_NEW: 4445 break; 4446 case RSMSI_STATE_CONNECTING: 4447 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 4448 break; 4449 case RSMSI_STATE_ABORT_CONNECT: 4450 break; 4451 case RSMSI_STATE_CONNECTED: 4452 /* do the rsmpi disconnect */ 4453 if (sharedp->rsmsi_node != my_nodeid) { 4454 e = adapter->rsmpi_ops-> 4455 rsm_disconnect(sharedp->rsmsi_handle); 4456 4457 DBG_PRINTF((category, RSM_DEBUG, 4458 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4459 sharedp->rsmsi_segid, e)); 4460 } 4461 4462 sharedp->rsmsi_handle = NULL; 4463 4464 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 4465 break; 4466 case RSMSI_STATE_CONN_QUIESCE: 4467 break; 4468 case RSMSI_STATE_MAPPED: 4469 /* do the rsmpi unmap and disconnect */ 4470 if (sharedp->rsmsi_node != my_nodeid) { 4471 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in); 4472 4473 DBG_PRINTF((category, RSM_DEBUG, 4474 "rsmshare_suspend: rsmpi unmap %d\n", e)); 4475 4476 e = adapter->rsmpi_ops-> 4477 rsm_disconnect(sharedp->rsmsi_handle); 4478 DBG_PRINTF((category, RSM_DEBUG, 4479 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4480 sharedp->rsmsi_segid, e)); 4481 } 4482 4483 sharedp->rsmsi_handle = NULL; 4484 4485 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE; 4486 break; 4487 case RSMSI_STATE_MAP_QUIESCE: 4488 break; 4489 case RSMSI_STATE_DISCONNECTED: 4490 break; 4491 default: 4492 ASSERT(0); /* invalid state */ 4493 } 4494 4495 rsmsharelock_release(seg); 4496 rsmseglock_release(seg); 4497 4498 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4499 "rsmsegshare_suspend done\n")); 4500 } 4501 4502 /* 4503 * This should get called on receiving a RESUME message or from 4504 * the pathmanger if the node undergoing DR dies. 4505 */ 4506 static void 4507 importer_resume(rsm_node_id_t src_node) 4508 { 4509 int i; 4510 rsmresource_t *p = NULL; 4511 rsmhash_table_t *rhash = &rsm_import_segs; 4512 void *cookie; 4513 DBG_DEFINE(category, 4514 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4515 4516 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n")); 4517 4518 rw_enter(&rhash->rsmhash_rw, RW_READER); 4519 4520 for (i = 0; i < rsm_hash_size; i++) { 4521 p = rhash->bucket[i]; 4522 4523 for (; p; p = p->rsmrc_next) { 4524 rsmseg_t *seg = (rsmseg_t *)p; 4525 4526 rsmseglock_acquire(seg); 4527 4528 /* process only importers of node undergoing DR */ 4529 if (seg->s_node != src_node) { 4530 rsmseglock_release(seg); 4531 continue; 4532 } 4533 4534 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) { 4535 rsmipc_request_t request; 4536 /* 4537 * rsmpi map/connect failed 4538 * inform the exporter so that it can 4539 * remove the importer. 4540 */ 4541 request.rsmipc_hdr.rsmipc_type = 4542 RSMIPC_MSG_NOTIMPORTING; 4543 request.rsmipc_key = seg->s_segid; 4544 request.rsmipc_segment_cookie = cookie; 4545 rsmseglock_release(seg); 4546 (void) rsmipc_send(seg->s_node, &request, 4547 RSM_NO_REPLY); 4548 } else { 4549 rsmseglock_release(seg); 4550 } 4551 } 4552 } 4553 4554 rw_exit(&rhash->rsmhash_rw); 4555 4556 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n")); 4557 } 4558 4559 static int 4560 rsmseg_resume(rsmseg_t *seg, void **cookie) 4561 { 4562 int e; 4563 int retc; 4564 off_t dev_offset; 4565 size_t maplen; 4566 uint_t maxprot; 4567 rsm_mapinfo_t *p; 4568 rsmcookie_t *hdl; 4569 rsm_import_share_t *sharedp; 4570 DBG_DEFINE(category, 4571 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4572 4573 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4574 "rsmseg_resume enter: key=%u\n", seg->s_key)); 4575 4576 *cookie = NULL; 4577 4578 ASSERT(rsmseglock_held(seg)); 4579 4580 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) && 4581 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 4582 return (RSM_SUCCESS); 4583 } 4584 4585 sharedp = seg->s_share; 4586 4587 rsmsharelock_acquire(seg); 4588 4589 /* resume the shared connection and/or mapping */ 4590 retc = rsmsegshare_resume(seg); 4591 4592 if (seg->s_state == RSM_STATE_CONN_QUIESCE) { 4593 /* shared state can either be connected or mapped */ 4594 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) || 4595 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) { 4596 ASSERT(retc == RSM_SUCCESS); 4597 seg->s_handle.in = sharedp->rsmsi_handle; 4598 rsmsharelock_release(seg); 4599 seg->s_state = RSM_STATE_CONNECT; 4600 4601 } else { /* error in rsmpi connect during resume */ 4602 seg->s_handle.in = NULL; 4603 seg->s_state = RSM_STATE_DISCONNECT; 4604 4605 sharedp->rsmsi_refcnt--; 4606 cookie = (void *)sharedp->rsmsi_cookie; 4607 4608 if (sharedp->rsmsi_refcnt == 0) { 4609 ASSERT(sharedp->rsmsi_mapcnt == 0); 4610 rsmsharelock_release(seg); 4611 4612 /* clean up the shared data structure */ 4613 mutex_destroy(&sharedp->rsmsi_lock); 4614 cv_destroy(&sharedp->rsmsi_cv); 4615 kmem_free((void *)(sharedp), 4616 sizeof (rsm_import_share_t)); 4617 4618 } else { 4619 rsmsharelock_release(seg); 4620 } 4621 /* 4622 * The following needs to be done after any 4623 * rsmsharelock calls which use seg->s_share. 4624 */ 4625 seg->s_share = NULL; 4626 } 4627 4628 /* signal any waiting segment */ 4629 cv_broadcast(&seg->s_cv); 4630 4631 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4632 "rsmseg_resume done:state=%d\n", seg->s_state)); 4633 return (retc); 4634 } 4635 4636 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE); 4637 4638 /* Setup protections for remap */ 4639 maxprot = PROT_USER; 4640 if (seg->s_mode & RSM_PERM_READ) { 4641 maxprot |= PROT_READ; 4642 } 4643 if (seg->s_mode & RSM_PERM_WRITE) { 4644 maxprot |= PROT_WRITE; 4645 } 4646 4647 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) { 4648 /* error in rsmpi connect or map during resume */ 4649 4650 /* remap to trash page */ 4651 ASSERT(seg->s_ckl != NULL); 4652 4653 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4654 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 4655 remap_cookie, hdl->c_off, hdl->c_len, 4656 maxprot, 0, NULL); 4657 4658 DBG_PRINTF((category, RSM_ERR, 4659 "rsmseg_resume:remap=%d\n", e)); 4660 } 4661 4662 seg->s_handle.in = NULL; 4663 seg->s_state = RSM_STATE_DISCONNECT; 4664 4665 sharedp->rsmsi_refcnt--; 4666 4667 sharedp->rsmsi_mapcnt--; 4668 seg->s_mapinfo = NULL; 4669 4670 if (sharedp->rsmsi_refcnt == 0) { 4671 ASSERT(sharedp->rsmsi_mapcnt == 0); 4672 rsmsharelock_release(seg); 4673 4674 /* clean up the shared data structure */ 4675 mutex_destroy(&sharedp->rsmsi_lock); 4676 cv_destroy(&sharedp->rsmsi_cv); 4677 kmem_free((void *)(sharedp), 4678 sizeof (rsm_import_share_t)); 4679 4680 } else { 4681 rsmsharelock_release(seg); 4682 } 4683 /* 4684 * The following needs to be done after any 4685 * rsmsharelock calls which use seg->s_share. 4686 */ 4687 seg->s_share = NULL; 4688 4689 /* signal any waiting segment */ 4690 cv_broadcast(&seg->s_cv); 4691 4692 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4693 "rsmseg_resume done:seg=%x,err=%d\n", 4694 seg->s_key, retc)); 4695 return (retc); 4696 4697 } 4698 4699 seg->s_handle.in = sharedp->rsmsi_handle; 4700 4701 if (seg->s_node == my_nodeid) { /* loopback */ 4702 ASSERT(seg->s_mapinfo == NULL); 4703 4704 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4705 e = devmap_umem_remap(hdl->c_dhp, 4706 rsm_dip, seg->s_cookie, 4707 hdl->c_off, hdl->c_len, 4708 maxprot, 0, NULL); 4709 4710 DBG_PRINTF((category, RSM_ERR, 4711 "rsmseg_resume:remap=%d\n", e)); 4712 } 4713 } else { /* remote exporter */ 4714 /* remap to the new rsmpi maps */ 4715 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 4716 4717 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4718 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len, 4719 &dev_offset, &maplen); 4720 e = devmap_devmem_remap(hdl->c_dhp, 4721 p->dip, p->dev_register, dev_offset, 4722 maplen, maxprot, 0, NULL); 4723 4724 DBG_PRINTF((category, RSM_ERR, 4725 "rsmseg_resume:remap=%d\n", e)); 4726 } 4727 } 4728 4729 rsmsharelock_release(seg); 4730 4731 seg->s_state = RSM_STATE_ACTIVE; 4732 cv_broadcast(&seg->s_cv); 4733 4734 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n")); 4735 4736 return (retc); 4737 } 4738 4739 static int 4740 rsmsegshare_resume(rsmseg_t *seg) 4741 { 4742 int e = RSM_SUCCESS; 4743 adapter_t *adapter; 4744 rsm_import_share_t *sharedp; 4745 DBG_DEFINE(category, 4746 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4747 4748 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n")); 4749 4750 ASSERT(rsmseglock_held(seg)); 4751 ASSERT(rsmsharelock_held(seg)); 4752 4753 sharedp = seg->s_share; 4754 4755 /* 4756 * If we are not in a xxxx_QUIESCE state that means shared 4757 * connect/mapping processing has been already been done 4758 * so return success. 4759 */ 4760 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) && 4761 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) { 4762 return (RSM_SUCCESS); 4763 } 4764 4765 adapter = seg->s_adapter; 4766 4767 if (sharedp->rsmsi_node != my_nodeid) { 4768 rsm_addr_t hwaddr; 4769 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node); 4770 4771 e = adapter->rsmpi_ops->rsm_connect( 4772 adapter->rsmpi_handle, hwaddr, 4773 sharedp->rsmsi_segid, &sharedp->rsmsi_handle); 4774 4775 DBG_PRINTF((category, RSM_DEBUG, 4776 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n", 4777 sharedp->rsmsi_segid, e)); 4778 4779 if (e != RSM_SUCCESS) { 4780 /* when do we send the NOT_IMPORTING message */ 4781 sharedp->rsmsi_handle = NULL; 4782 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4783 /* signal any waiting segment */ 4784 cv_broadcast(&sharedp->rsmsi_cv); 4785 return (e); 4786 } 4787 } 4788 4789 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) { 4790 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 4791 /* signal any waiting segment */ 4792 cv_broadcast(&sharedp->rsmsi_cv); 4793 return (e); 4794 } 4795 4796 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 4797 4798 /* do the rsmpi map of the whole segment here */ 4799 if (sharedp->rsmsi_node != my_nodeid) { 4800 size_t mapped_len; 4801 rsm_mapinfo_t *p; 4802 4803 /* 4804 * We need to do rsmpi maps with <off, lens> identical to 4805 * the old mapinfo list because the segment mapping handles 4806 * dhp and such need the fragmentation of rsmpi maps to be 4807 * identical to what it was during the mmap of the segment 4808 */ 4809 p = sharedp->rsmsi_mapinfo; 4810 4811 while (p != NULL) { 4812 mapped_len = 0; 4813 4814 e = adapter->rsmpi_ops->rsm_map( 4815 sharedp->rsmsi_handle, p->start_offset, 4816 p->individual_len, &mapped_len, 4817 &p->dip, &p->dev_register, &p->dev_offset, 4818 NULL, NULL); 4819 4820 if (e != 0) { 4821 DBG_PRINTF((category, RSM_ERR, 4822 "rsmsegshare_resume: rsmpi map err=%d\n", 4823 e)); 4824 break; 4825 } 4826 4827 if (mapped_len != p->individual_len) { 4828 DBG_PRINTF((category, RSM_ERR, 4829 "rsmsegshare_resume: rsmpi maplen" 4830 "< reqlen=%lx\n", mapped_len)); 4831 e = RSMERR_BAD_LENGTH; 4832 break; 4833 } 4834 4835 p = p->next; 4836 4837 } 4838 4839 4840 if (e != RSM_SUCCESS) { /* rsmpi map failed */ 4841 int err; 4842 /* Check if this is the first rsm_map */ 4843 if (p != sharedp->rsmsi_mapinfo) { 4844 /* 4845 * A single rsm_unmap undoes multiple rsm_maps. 4846 */ 4847 (void) seg->s_adapter->rsmpi_ops-> 4848 rsm_unmap(sharedp->rsmsi_handle); 4849 } 4850 4851 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 4852 sharedp->rsmsi_mapinfo = NULL; 4853 4854 err = adapter->rsmpi_ops-> 4855 rsm_disconnect(sharedp->rsmsi_handle); 4856 4857 DBG_PRINTF((category, RSM_DEBUG, 4858 "rsmsegshare_resume:disconn seg=%x:err=%d\n", 4859 sharedp->rsmsi_segid, err)); 4860 4861 sharedp->rsmsi_handle = NULL; 4862 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4863 4864 /* signal the waiting segments */ 4865 cv_broadcast(&sharedp->rsmsi_cv); 4866 DBG_PRINTF((category, RSM_DEBUG, 4867 "rsmsegshare_resume done: rsmpi map err\n")); 4868 return (e); 4869 } 4870 } 4871 4872 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 4873 4874 /* signal any waiting segment */ 4875 cv_broadcast(&sharedp->rsmsi_cv); 4876 4877 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n")); 4878 4879 return (e); 4880 } 4881 4882 /* 4883 * this is the routine that gets called by recv_taskq which is the 4884 * thread that processes messages that are flow-controlled. 4885 */ 4886 static void 4887 rsm_intr_proc_deferred(void *arg) 4888 { 4889 path_t *path = (path_t *)arg; 4890 rsmipc_request_t *msg; 4891 rsmipc_msghdr_t *msghdr; 4892 rsm_node_id_t src_node; 4893 msgbuf_elem_t *head; 4894 int e; 4895 DBG_DEFINE(category, 4896 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4897 4898 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4899 "rsm_intr_proc_deferred enter\n")); 4900 4901 mutex_enter(&path->mutex); 4902 4903 /* use the head of the msgbuf_queue */ 4904 head = rsmka_gethead_msgbuf(path); 4905 4906 mutex_exit(&path->mutex); 4907 4908 msg = (rsmipc_request_t *)&(head->msg); 4909 msghdr = (rsmipc_msghdr_t *)msg; 4910 4911 src_node = msghdr->rsmipc_src; 4912 4913 /* 4914 * messages that need to send a reply should check the message version 4915 * before processing the message. And all messages that need to 4916 * send a reply should be processed here by the worker thread. 4917 */ 4918 switch (msghdr->rsmipc_type) { 4919 case RSMIPC_MSG_SEGCONNECT: 4920 if (msghdr->rsmipc_version != RSM_VERSION) { 4921 rsmipc_reply_t reply; 4922 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION; 4923 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 4924 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie; 4925 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply); 4926 } else { 4927 rsm_intr_segconnect(src_node, msg); 4928 } 4929 break; 4930 case RSMIPC_MSG_DISCONNECT: 4931 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT); 4932 break; 4933 case RSMIPC_MSG_SUSPEND: 4934 importer_suspend(src_node); 4935 break; 4936 case RSMIPC_MSG_SUSPEND_DONE: 4937 rsm_suspend_complete(src_node, 0); 4938 break; 4939 case RSMIPC_MSG_RESUME: 4940 importer_resume(src_node); 4941 break; 4942 default: 4943 ASSERT(0); 4944 } 4945 4946 mutex_enter(&path->mutex); 4947 4948 rsmka_dequeue_msgbuf(path); 4949 4950 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */ 4951 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES) 4952 path->procmsg_cnt++; 4953 4954 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES); 4955 4956 /* No need to send credits if path is going down */ 4957 if ((path->state == RSMKA_PATH_ACTIVE) && 4958 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) { 4959 /* 4960 * send credits and reset procmsg_cnt if success otherwise 4961 * credits will be sent after processing the next message 4962 */ 4963 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT); 4964 if (e == 0) 4965 path->procmsg_cnt = 0; 4966 else 4967 DBG_PRINTF((category, RSM_ERR, 4968 "rsm_intr_proc_deferred:send credits err=%d\n", e)); 4969 } 4970 4971 /* 4972 * decrement the path refcnt since we incremented it in 4973 * rsm_intr_callback_dispatch 4974 */ 4975 PATH_RELE_NOLOCK(path); 4976 4977 mutex_exit(&path->mutex); 4978 4979 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4980 "rsm_intr_proc_deferred done\n")); 4981 } 4982 4983 /* 4984 * Flow-controlled messages are enqueued and dispatched onto a taskq here 4985 */ 4986 static void 4987 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr, 4988 rsm_intr_hand_arg_t arg) 4989 { 4990 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 4991 path_t *path; 4992 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 4993 DBG_DEFINE(category, 4994 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4995 4996 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4997 "rsm_intr_callback_dispatch enter\n")); 4998 ASSERT(data && hdlr_argp); 4999 5000 /* look up the path - incr the path refcnt */ 5001 path = rsm_find_path(hdlr_argp->adapter_name, 5002 hdlr_argp->adapter_instance, src_hwaddr); 5003 5004 /* the path has been removed - drop this message */ 5005 if (path == NULL) { 5006 DBG_PRINTF((category, RSM_DEBUG, 5007 "rsm_intr_callback_dispatch done: msg dropped\n")); 5008 return; 5009 } 5010 /* the path is not active - don't accept new messages */ 5011 if (path->state != RSMKA_PATH_ACTIVE) { 5012 PATH_RELE_NOLOCK(path); 5013 mutex_exit(&path->mutex); 5014 DBG_PRINTF((category, RSM_DEBUG, 5015 "rsm_intr_callback_dispatch done: msg dropped" 5016 " path=%lx !ACTIVE\n", path)); 5017 return; 5018 } 5019 5020 /* 5021 * Check if this message was sent to an older incarnation 5022 * of the path/sendq. 5023 */ 5024 if (path->local_incn != msghdr->rsmipc_incn) { 5025 /* decrement the refcnt */ 5026 PATH_RELE_NOLOCK(path); 5027 mutex_exit(&path->mutex); 5028 DBG_PRINTF((category, RSM_DEBUG, 5029 "rsm_intr_callback_dispatch done: old incn %lld\n", 5030 msghdr->rsmipc_incn)); 5031 return; 5032 } 5033 5034 /* copy and enqueue msg on the path's msgbuf queue */ 5035 rsmka_enqueue_msgbuf(path, data); 5036 5037 /* 5038 * schedule task to process messages - ignore retval from 5039 * task_dispatch because we sender cannot send more than 5040 * what receiver can handle. 5041 */ 5042 (void) taskq_dispatch(path->recv_taskq, 5043 rsm_intr_proc_deferred, path, KM_NOSLEEP); 5044 5045 mutex_exit(&path->mutex); 5046 5047 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5048 "rsm_intr_callback_dispatch done\n")); 5049 } 5050 5051 /* 5052 * This procedure is called from rsm_srv_func when a remote node creates a 5053 * a send queue. This event is used as a hint that an earlier failed 5054 * attempt to create a send queue to that remote node may now succeed and 5055 * should be retried. Indication of an earlier failed attempt is provided 5056 * by the RSMKA_SQCREATE_PENDING flag. 5057 */ 5058 static void 5059 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5060 { 5061 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 5062 path_t *path; 5063 DBG_DEFINE(category, 5064 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5065 5066 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5067 "rsm_sqcreateop_callback enter\n")); 5068 5069 /* look up the path - incr the path refcnt */ 5070 path = rsm_find_path(hdlr_argp->adapter_name, 5071 hdlr_argp->adapter_instance, src_hwaddr); 5072 5073 if (path == NULL) { 5074 DBG_PRINTF((category, RSM_DEBUG, 5075 "rsm_sqcreateop_callback done: no path\n")); 5076 return; 5077 } 5078 5079 if ((path->state == RSMKA_PATH_UP) && 5080 (path->flags & RSMKA_SQCREATE_PENDING)) { 5081 /* 5082 * previous attempt to create sendq had failed, retry 5083 * it and move to RSMKA_PATH_ACTIVE state if successful. 5084 * the refcnt will be decremented in the do_deferred_work 5085 */ 5086 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP); 5087 } else { 5088 /* decrement the refcnt */ 5089 PATH_RELE_NOLOCK(path); 5090 } 5091 mutex_exit(&path->mutex); 5092 5093 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5094 "rsm_sqcreateop_callback done\n")); 5095 } 5096 5097 static void 5098 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5099 { 5100 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 5101 rsmipc_request_t *msg = (rsmipc_request_t *)data; 5102 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data; 5103 rsm_node_id_t src_node; 5104 DBG_DEFINE(category, 5105 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5106 5107 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:" 5108 "src=%d, type=%d\n", msghdr->rsmipc_src, 5109 msghdr->rsmipc_type)); 5110 5111 /* 5112 * Check for the version number in the msg header. If it is not 5113 * RSM_VERSION, drop the message. In the future, we need to manage 5114 * incompatible version numbers in some way 5115 */ 5116 if (msghdr->rsmipc_version != RSM_VERSION) { 5117 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n")); 5118 /* 5119 * Drop requests that don't have a reply right here 5120 * Request with reply will send a BAD_VERSION reply 5121 * when they get processed by the worker thread. 5122 */ 5123 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) { 5124 return; 5125 } 5126 5127 } 5128 5129 src_node = msghdr->rsmipc_src; 5130 5131 switch (msghdr->rsmipc_type) { 5132 case RSMIPC_MSG_SEGCONNECT: 5133 case RSMIPC_MSG_DISCONNECT: 5134 case RSMIPC_MSG_SUSPEND: 5135 case RSMIPC_MSG_SUSPEND_DONE: 5136 case RSMIPC_MSG_RESUME: 5137 /* 5138 * These message types are handled by a worker thread using 5139 * the flow-control algorithm. 5140 * Any message processing that does one or more of the 5141 * following should be handled in a worker thread. 5142 * - allocates resources and might sleep 5143 * - makes RSMPI calls down to the interconnect driver 5144 * this by defn include requests with reply. 5145 * - takes a long duration of time 5146 */ 5147 rsm_intr_callback_dispatch(data, src_hwaddr, arg); 5148 break; 5149 case RSMIPC_MSG_NOTIMPORTING: 5150 importer_list_rm(src_node, msg->rsmipc_key, 5151 msg->rsmipc_segment_cookie); 5152 break; 5153 case RSMIPC_MSG_SQREADY: 5154 rsm_proc_sqready(data, src_hwaddr, arg); 5155 break; 5156 case RSMIPC_MSG_SQREADY_ACK: 5157 rsm_proc_sqready_ack(data, src_hwaddr, arg); 5158 break; 5159 case RSMIPC_MSG_CREDIT: 5160 rsm_add_credits(ctrlmsg, src_hwaddr, arg); 5161 break; 5162 case RSMIPC_MSG_REPLY: 5163 rsm_intr_reply(msghdr); 5164 break; 5165 case RSMIPC_MSG_BELL: 5166 rsm_intr_event(msg); 5167 break; 5168 case RSMIPC_MSG_IMPORTING: 5169 importer_list_add(src_node, msg->rsmipc_key, 5170 msg->rsmipc_adapter_hwaddr, 5171 msg->rsmipc_segment_cookie); 5172 break; 5173 case RSMIPC_MSG_REPUBLISH: 5174 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm); 5175 break; 5176 default: 5177 DBG_PRINTF((category, RSM_DEBUG, 5178 "rsm_intr_callback: bad msg %lx type %d data %lx\n", 5179 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data)); 5180 } 5181 5182 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n")); 5183 5184 } 5185 5186 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 5187 rsm_intr_q_op_t opcode, rsm_addr_t src, 5188 void *data, size_t size, rsm_intr_hand_arg_t arg) 5189 { 5190 DBG_DEFINE(category, 5191 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5192 5193 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n")); 5194 5195 switch (opcode) { 5196 case RSM_INTR_Q_OP_CREATE: 5197 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n")); 5198 rsm_sqcreateop_callback(src, arg); 5199 break; 5200 case RSM_INTR_Q_OP_DESTROY: 5201 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n")); 5202 break; 5203 case RSM_INTR_Q_OP_RECEIVE: 5204 rsm_intr_callback(data, src, arg); 5205 break; 5206 default: 5207 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5208 "rsm_srv_func: unknown opcode = %x\n", opcode)); 5209 } 5210 5211 chd = chd; 5212 size = size; 5213 5214 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n")); 5215 5216 return (RSM_INTR_HAND_CLAIMED); 5217 } 5218 5219 /* *************************** IPC slots ************************* */ 5220 static rsmipc_slot_t * 5221 rsmipc_alloc() 5222 { 5223 int i; 5224 rsmipc_slot_t *slot; 5225 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5226 5227 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n")); 5228 5229 /* try to find a free slot, if not wait */ 5230 mutex_enter(&rsm_ipc.lock); 5231 5232 while (rsm_ipc.count == 0) { 5233 rsm_ipc.wanted = 1; 5234 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock); 5235 } 5236 5237 /* An empty slot is available, find it */ 5238 slot = &rsm_ipc.slots[0]; 5239 for (i = 0; i < RSMIPC_SZ; i++, slot++) { 5240 if (RSMIPC_GET(slot, RSMIPC_FREE)) { 5241 RSMIPC_CLEAR(slot, RSMIPC_FREE); 5242 break; 5243 } 5244 } 5245 5246 ASSERT(i < RSMIPC_SZ); 5247 rsm_ipc.count--; /* one less is available */ 5248 rsm_ipc.sequence++; /* new sequence */ 5249 5250 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence; 5251 slot->rsmipc_cookie.ic.index = (uint_t)i; 5252 5253 mutex_exit(&rsm_ipc.lock); 5254 5255 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n")); 5256 5257 return (slot); 5258 } 5259 5260 static void 5261 rsmipc_free(rsmipc_slot_t *slot) 5262 { 5263 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5264 5265 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n")); 5266 5267 ASSERT(MUTEX_HELD(&slot->rsmipc_lock)); 5268 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot); 5269 5270 mutex_enter(&rsm_ipc.lock); 5271 5272 RSMIPC_SET(slot, RSMIPC_FREE); 5273 5274 slot->rsmipc_cookie.ic.sequence = 0; 5275 5276 mutex_exit(&slot->rsmipc_lock); 5277 rsm_ipc.count++; 5278 ASSERT(rsm_ipc.count <= RSMIPC_SZ); 5279 if (rsm_ipc.wanted) { 5280 rsm_ipc.wanted = 0; 5281 cv_broadcast(&rsm_ipc.cv); 5282 } 5283 5284 mutex_exit(&rsm_ipc.lock); 5285 5286 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n")); 5287 } 5288 5289 static int 5290 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply) 5291 { 5292 int e = 0; 5293 int credit_check = 0; 5294 int retry_cnt = 0; 5295 int min_retry_cnt = 10; 5296 rsm_send_t is; 5297 rsmipc_slot_t *rslot; 5298 adapter_t *adapter; 5299 path_t *path; 5300 sendq_token_t *sendq_token; 5301 sendq_token_t *used_sendq_token = NULL; 5302 rsm_send_q_handle_t ipc_handle; 5303 DBG_DEFINE(category, 5304 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5305 5306 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d", 5307 dest)); 5308 5309 /* 5310 * Check if this is a local case 5311 */ 5312 if (dest == my_nodeid) { 5313 switch (req->rsmipc_hdr.rsmipc_type) { 5314 case RSMIPC_MSG_SEGCONNECT: 5315 reply->rsmipc_status = (short)rsmsegacl_validate( 5316 req, dest, reply); 5317 break; 5318 case RSMIPC_MSG_BELL: 5319 req->rsmipc_hdr.rsmipc_src = dest; 5320 rsm_intr_event(req); 5321 break; 5322 case RSMIPC_MSG_IMPORTING: 5323 importer_list_add(dest, req->rsmipc_key, 5324 req->rsmipc_adapter_hwaddr, 5325 req->rsmipc_segment_cookie); 5326 break; 5327 case RSMIPC_MSG_NOTIMPORTING: 5328 importer_list_rm(dest, req->rsmipc_key, 5329 req->rsmipc_segment_cookie); 5330 break; 5331 case RSMIPC_MSG_REPUBLISH: 5332 importer_update(dest, req->rsmipc_key, 5333 req->rsmipc_perm); 5334 break; 5335 case RSMIPC_MSG_SUSPEND: 5336 importer_suspend(dest); 5337 break; 5338 case RSMIPC_MSG_SUSPEND_DONE: 5339 rsm_suspend_complete(dest, 0); 5340 break; 5341 case RSMIPC_MSG_RESUME: 5342 importer_resume(dest); 5343 break; 5344 default: 5345 ASSERT(0); 5346 } 5347 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5348 "rsmipc_send done\n")); 5349 return (0); 5350 } 5351 5352 if (dest >= MAX_NODES) { 5353 DBG_PRINTF((category, RSM_ERR, 5354 "rsm: rsmipc_send bad node number %x\n", dest)); 5355 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5356 } 5357 5358 /* 5359 * Oh boy! we are going remote. 5360 */ 5361 5362 /* 5363 * identify if we need to have credits to send this message 5364 * - only selected requests are flow controlled 5365 */ 5366 if (req != NULL) { 5367 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5368 "rsmipc_send:request type=%d\n", 5369 req->rsmipc_hdr.rsmipc_type)); 5370 5371 switch (req->rsmipc_hdr.rsmipc_type) { 5372 case RSMIPC_MSG_SEGCONNECT: 5373 case RSMIPC_MSG_DISCONNECT: 5374 case RSMIPC_MSG_IMPORTING: 5375 case RSMIPC_MSG_SUSPEND: 5376 case RSMIPC_MSG_SUSPEND_DONE: 5377 case RSMIPC_MSG_RESUME: 5378 credit_check = 1; 5379 break; 5380 default: 5381 credit_check = 0; 5382 } 5383 } 5384 5385 again: 5386 if (retry_cnt++ == min_retry_cnt) { 5387 /* backoff before further retries for 10ms */ 5388 delay(drv_usectohz(10000)); 5389 retry_cnt = 0; /* reset retry_cnt */ 5390 } 5391 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token); 5392 if (sendq_token == NULL) { 5393 DBG_PRINTF((category, RSM_ERR, 5394 "rsm: rsmipc_send no device to reach node %d\n", dest)); 5395 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5396 } 5397 5398 if ((sendq_token == used_sendq_token) && 5399 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) || 5400 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) { 5401 rele_sendq_token(sendq_token); 5402 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e)); 5403 return (RSMERR_CONN_ABORTED); 5404 } else 5405 used_sendq_token = sendq_token; 5406 5407 /* lint -save -e413 */ 5408 path = SQ_TOKEN_TO_PATH(sendq_token); 5409 adapter = path->local_adapter; 5410 /* lint -restore */ 5411 ipc_handle = sendq_token->rsmpi_sendq_handle; 5412 5413 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5414 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle)); 5415 5416 if (reply == NULL) { 5417 /* Send request without ack */ 5418 /* 5419 * Set the rsmipc_version number in the msghdr for KA 5420 * communication versioning 5421 */ 5422 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5423 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5424 /* 5425 * remote endpoints incn should match the value in our 5426 * path's remote_incn field. No need to grab any lock 5427 * since we have refcnted the path in rsmka_get_sendq_token 5428 */ 5429 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5430 5431 is.is_data = (void *)req; 5432 is.is_size = sizeof (*req); 5433 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5434 is.is_wait = 0; 5435 5436 if (credit_check) { 5437 mutex_enter(&path->mutex); 5438 /* 5439 * wait till we recv credits or path goes down. If path 5440 * goes down rsm_send will fail and we handle the error 5441 * then 5442 */ 5443 while ((sendq_token->msgbuf_avail == 0) && 5444 (path->state == RSMKA_PATH_ACTIVE)) { 5445 e = cv_wait_sig(&sendq_token->sendq_cv, 5446 &path->mutex); 5447 if (e == 0) { 5448 mutex_exit(&path->mutex); 5449 no_reply_cnt++; 5450 rele_sendq_token(sendq_token); 5451 DBG_PRINTF((category, RSM_DEBUG, 5452 "rsmipc_send done: " 5453 "cv_wait INTERRUPTED")); 5454 return (RSMERR_INTERRUPTED); 5455 } 5456 } 5457 5458 /* 5459 * path is not active retry on another path. 5460 */ 5461 if (path->state != RSMKA_PATH_ACTIVE) { 5462 mutex_exit(&path->mutex); 5463 rele_sendq_token(sendq_token); 5464 e = RSMERR_CONN_ABORTED; 5465 DBG_PRINTF((category, RSM_ERR, 5466 "rsm: rsmipc_send: path !ACTIVE")); 5467 goto again; 5468 } 5469 5470 ASSERT(sendq_token->msgbuf_avail > 0); 5471 5472 /* 5473 * reserve a msgbuf 5474 */ 5475 sendq_token->msgbuf_avail--; 5476 5477 mutex_exit(&path->mutex); 5478 5479 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5480 NULL); 5481 5482 if (e != RSM_SUCCESS) { 5483 mutex_enter(&path->mutex); 5484 /* 5485 * release the reserved msgbuf since 5486 * the send failed 5487 */ 5488 sendq_token->msgbuf_avail++; 5489 cv_broadcast(&sendq_token->sendq_cv); 5490 mutex_exit(&path->mutex); 5491 } 5492 } else 5493 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5494 NULL); 5495 5496 no_reply_cnt++; 5497 rele_sendq_token(sendq_token); 5498 if (e != RSM_SUCCESS) { 5499 DBG_PRINTF((category, RSM_ERR, 5500 "rsm: rsmipc_send no reply send" 5501 " err = %d no reply count = %d\n", 5502 e, no_reply_cnt)); 5503 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5504 e != RSMERR_BAD_BARRIER_HNDL); 5505 atomic_inc_64(&rsm_ipcsend_errcnt); 5506 goto again; 5507 } else { 5508 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5509 "rsmipc_send done\n")); 5510 return (e); 5511 } 5512 5513 } 5514 5515 if (req == NULL) { 5516 /* Send reply - No flow control is done for reply */ 5517 /* 5518 * Set the version in the msg header for KA communication 5519 * versioning 5520 */ 5521 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5522 reply->rsmipc_hdr.rsmipc_src = my_nodeid; 5523 /* incn number is not used for reply msgs currently */ 5524 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5525 5526 is.is_data = (void *)reply; 5527 is.is_size = sizeof (*reply); 5528 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5529 is.is_wait = 0; 5530 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5531 rele_sendq_token(sendq_token); 5532 if (e != RSM_SUCCESS) { 5533 DBG_PRINTF((category, RSM_ERR, 5534 "rsm: rsmipc_send reply send" 5535 " err = %d\n", e)); 5536 atomic_inc_64(&rsm_ipcsend_errcnt); 5537 goto again; 5538 } else { 5539 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5540 "rsmipc_send done\n")); 5541 return (e); 5542 } 5543 } 5544 5545 /* Reply needed */ 5546 rslot = rsmipc_alloc(); /* allocate a new ipc slot */ 5547 5548 mutex_enter(&rslot->rsmipc_lock); 5549 5550 rslot->rsmipc_data = (void *)reply; 5551 RSMIPC_SET(rslot, RSMIPC_PENDING); 5552 5553 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) { 5554 /* 5555 * Set the rsmipc_version number in the msghdr for KA 5556 * communication versioning 5557 */ 5558 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5559 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5560 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie; 5561 /* 5562 * remote endpoints incn should match the value in our 5563 * path's remote_incn field. No need to grab any lock 5564 * since we have refcnted the path in rsmka_get_sendq_token 5565 */ 5566 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5567 5568 is.is_data = (void *)req; 5569 is.is_size = sizeof (*req); 5570 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5571 is.is_wait = 0; 5572 if (credit_check) { 5573 5574 mutex_enter(&path->mutex); 5575 /* 5576 * wait till we recv credits or path goes down. If path 5577 * goes down rsm_send will fail and we handle the error 5578 * then. 5579 */ 5580 while ((sendq_token->msgbuf_avail == 0) && 5581 (path->state == RSMKA_PATH_ACTIVE)) { 5582 e = cv_wait_sig(&sendq_token->sendq_cv, 5583 &path->mutex); 5584 if (e == 0) { 5585 mutex_exit(&path->mutex); 5586 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5587 rsmipc_free(rslot); 5588 rele_sendq_token(sendq_token); 5589 DBG_PRINTF((category, RSM_DEBUG, 5590 "rsmipc_send done: " 5591 "cv_wait INTERRUPTED")); 5592 return (RSMERR_INTERRUPTED); 5593 } 5594 } 5595 5596 /* 5597 * path is not active retry on another path. 5598 */ 5599 if (path->state != RSMKA_PATH_ACTIVE) { 5600 mutex_exit(&path->mutex); 5601 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5602 rsmipc_free(rslot); 5603 rele_sendq_token(sendq_token); 5604 e = RSMERR_CONN_ABORTED; 5605 DBG_PRINTF((category, RSM_ERR, 5606 "rsm: rsmipc_send: path !ACTIVE")); 5607 goto again; 5608 } 5609 5610 ASSERT(sendq_token->msgbuf_avail > 0); 5611 5612 /* 5613 * reserve a msgbuf 5614 */ 5615 sendq_token->msgbuf_avail--; 5616 5617 mutex_exit(&path->mutex); 5618 5619 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5620 NULL); 5621 5622 if (e != RSM_SUCCESS) { 5623 mutex_enter(&path->mutex); 5624 /* 5625 * release the reserved msgbuf since 5626 * the send failed 5627 */ 5628 sendq_token->msgbuf_avail++; 5629 cv_broadcast(&sendq_token->sendq_cv); 5630 mutex_exit(&path->mutex); 5631 } 5632 } else 5633 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5634 NULL); 5635 5636 if (e != RSM_SUCCESS) { 5637 DBG_PRINTF((category, RSM_ERR, 5638 "rsm: rsmipc_send rsmpi send err = %d\n", e)); 5639 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5640 rsmipc_free(rslot); 5641 rele_sendq_token(sendq_token); 5642 atomic_inc_64(&rsm_ipcsend_errcnt); 5643 goto again; 5644 } 5645 5646 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */ 5647 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock, 5648 drv_usectohz(5000000), TR_CLOCK_TICK); 5649 if (e < 0) { 5650 /* timed out - retry */ 5651 e = RSMERR_TIMEOUT; 5652 } else if (e == 0) { 5653 /* signalled - return error */ 5654 e = RSMERR_INTERRUPTED; 5655 break; 5656 } else { 5657 e = RSM_SUCCESS; 5658 } 5659 } 5660 5661 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5662 rsmipc_free(rslot); 5663 rele_sendq_token(sendq_token); 5664 5665 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e)); 5666 return (e); 5667 } 5668 5669 static int 5670 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie) 5671 { 5672 rsmipc_request_t request; 5673 5674 /* 5675 * inform the exporter to delete this importer 5676 */ 5677 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 5678 request.rsmipc_key = segid; 5679 request.rsmipc_segment_cookie = cookie; 5680 return (rsmipc_send(dest, &request, RSM_NO_REPLY)); 5681 } 5682 5683 static void 5684 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl, 5685 int acl_len, rsm_permission_t default_permission) 5686 { 5687 int i; 5688 importing_token_t *token; 5689 rsmipc_request_t request; 5690 republish_token_t *republish_list = NULL; 5691 republish_token_t *rp; 5692 rsm_permission_t permission; 5693 int index; 5694 5695 /* 5696 * send the new access mode to all the nodes that have imported 5697 * this segment. 5698 * If the new acl does not have a node that was present in 5699 * the old acl a access permission of 0 is sent. 5700 */ 5701 5702 index = rsmhash(segid); 5703 5704 /* 5705 * create a list of node/permissions to send the republish message 5706 */ 5707 mutex_enter(&importer_list.lock); 5708 5709 token = importer_list.bucket[index]; 5710 while (token != NULL) { 5711 if (segid == token->key) { 5712 permission = default_permission; 5713 5714 for (i = 0; i < acl_len; i++) { 5715 if (token->importing_node == acl[i].ae_node) { 5716 permission = acl[i].ae_permission; 5717 break; 5718 } 5719 } 5720 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP); 5721 5722 rp->key = segid; 5723 rp->importing_node = token->importing_node; 5724 rp->permission = permission; 5725 rp->next = republish_list; 5726 republish_list = rp; 5727 } 5728 token = token->next; 5729 } 5730 5731 mutex_exit(&importer_list.lock); 5732 5733 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH; 5734 request.rsmipc_key = segid; 5735 5736 while (republish_list != NULL) { 5737 request.rsmipc_perm = republish_list->permission; 5738 (void) rsmipc_send(republish_list->importing_node, 5739 &request, RSM_NO_REPLY); 5740 rp = republish_list; 5741 republish_list = republish_list->next; 5742 kmem_free(rp, sizeof (republish_token_t)); 5743 } 5744 } 5745 5746 static void 5747 rsm_send_suspend() 5748 { 5749 int i, e; 5750 rsmipc_request_t request; 5751 list_element_t *tokp; 5752 list_element_t *head = NULL; 5753 importing_token_t *token; 5754 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5755 "rsm_send_suspend enter\n")); 5756 5757 /* 5758 * create a list of node to send the suspend message 5759 * 5760 * Currently the whole importer list is scanned and we obtain 5761 * all the nodes - this basically gets all nodes that at least 5762 * import one segment from the local node. 5763 * 5764 * no need to grab the rsm_suspend_list lock here since we are 5765 * single threaded when suspend is called. 5766 */ 5767 5768 mutex_enter(&importer_list.lock); 5769 for (i = 0; i < rsm_hash_size; i++) { 5770 5771 token = importer_list.bucket[i]; 5772 5773 while (token != NULL) { 5774 5775 tokp = head; 5776 5777 /* 5778 * make sure that the token's node 5779 * is not already on the suspend list 5780 */ 5781 while (tokp != NULL) { 5782 if (tokp->nodeid == token->importing_node) { 5783 break; 5784 } 5785 tokp = tokp->next; 5786 } 5787 5788 if (tokp == NULL) { /* not in suspend list */ 5789 tokp = kmem_zalloc(sizeof (list_element_t), 5790 KM_SLEEP); 5791 tokp->nodeid = token->importing_node; 5792 tokp->next = head; 5793 head = tokp; 5794 } 5795 5796 token = token->next; 5797 } 5798 } 5799 mutex_exit(&importer_list.lock); 5800 5801 if (head == NULL) { /* no importers so go ahead and quiesce segments */ 5802 exporter_quiesce(); 5803 return; 5804 } 5805 5806 mutex_enter(&rsm_suspend_list.list_lock); 5807 ASSERT(rsm_suspend_list.list_head == NULL); 5808 /* 5809 * update the suspend list righaway so that if a node dies the 5810 * pathmanager can set the NODE dead flag 5811 */ 5812 rsm_suspend_list.list_head = head; 5813 mutex_exit(&rsm_suspend_list.list_lock); 5814 5815 tokp = head; 5816 5817 while (tokp != NULL) { 5818 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND; 5819 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY); 5820 /* 5821 * Error in rsmipc_send currently happens due to inaccessibility 5822 * of the remote node. 5823 */ 5824 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */ 5825 tokp->flags |= RSM_SUSPEND_ACKPENDING; 5826 } 5827 5828 tokp = tokp->next; 5829 } 5830 5831 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5832 "rsm_send_suspend done\n")); 5833 5834 } 5835 5836 static void 5837 rsm_send_resume() 5838 { 5839 rsmipc_request_t request; 5840 list_element_t *elem, *head; 5841 5842 /* 5843 * save the suspend list so that we know where to send 5844 * the resume messages and make the suspend list head 5845 * NULL. 5846 */ 5847 mutex_enter(&rsm_suspend_list.list_lock); 5848 head = rsm_suspend_list.list_head; 5849 rsm_suspend_list.list_head = NULL; 5850 mutex_exit(&rsm_suspend_list.list_lock); 5851 5852 while (head != NULL) { 5853 elem = head; 5854 head = head->next; 5855 5856 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME; 5857 5858 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY); 5859 5860 kmem_free((void *)elem, sizeof (list_element_t)); 5861 5862 } 5863 5864 } 5865 5866 /* 5867 * This function takes path and sends a message using the sendq 5868 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK 5869 * and RSMIPC_MSG_CREDIT are sent using this function. 5870 */ 5871 int 5872 rsmipc_send_controlmsg(path_t *path, int msgtype) 5873 { 5874 int e; 5875 int retry_cnt = 0; 5876 int min_retry_cnt = 10; 5877 adapter_t *adapter; 5878 rsm_send_t is; 5879 rsm_send_q_handle_t ipc_handle; 5880 rsmipc_controlmsg_t msg; 5881 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL); 5882 5883 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5884 "rsmipc_send_controlmsg enter\n")); 5885 5886 ASSERT(MUTEX_HELD(&path->mutex)); 5887 5888 adapter = path->local_adapter; 5889 5890 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx " 5891 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype, 5892 my_nodeid, adapter->hwaddr, path->remote_node, 5893 path->remote_hwaddr, path->procmsg_cnt)); 5894 5895 if (path->state != RSMKA_PATH_ACTIVE) { 5896 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5897 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE")); 5898 return (1); 5899 } 5900 5901 ipc_handle = path->sendq_token.rsmpi_sendq_handle; 5902 5903 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION; 5904 msg.rsmipc_hdr.rsmipc_src = my_nodeid; 5905 msg.rsmipc_hdr.rsmipc_type = msgtype; 5906 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn; 5907 5908 if (msgtype == RSMIPC_MSG_CREDIT) 5909 msg.rsmipc_credits = path->procmsg_cnt; 5910 5911 msg.rsmipc_local_incn = path->local_incn; 5912 5913 msg.rsmipc_adapter_hwaddr = adapter->hwaddr; 5914 /* incr the sendq, path refcnt */ 5915 PATH_HOLD_NOLOCK(path); 5916 SENDQ_TOKEN_HOLD(path); 5917 5918 do { 5919 /* drop the path lock before doing the rsm_send */ 5920 mutex_exit(&path->mutex); 5921 5922 is.is_data = (void *)&msg; 5923 is.is_size = sizeof (msg); 5924 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5925 is.is_wait = 0; 5926 5927 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5928 5929 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5930 e != RSMERR_BAD_BARRIER_HNDL); 5931 5932 mutex_enter(&path->mutex); 5933 5934 if (e == RSM_SUCCESS) { 5935 break; 5936 } 5937 /* error counter for statistics */ 5938 atomic_inc_64(&rsm_ctrlmsg_errcnt); 5939 5940 DBG_PRINTF((category, RSM_ERR, 5941 "rsmipc_send_controlmsg:rsm_send error=%d", e)); 5942 5943 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */ 5944 (void) cv_reltimedwait(&path->sendq_token.sendq_cv, 5945 &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK); 5946 retry_cnt = 0; 5947 } 5948 } while (path->state == RSMKA_PATH_ACTIVE); 5949 5950 /* decrement the sendq,path refcnt that we incr before rsm_send */ 5951 SENDQ_TOKEN_RELE(path); 5952 PATH_RELE_NOLOCK(path); 5953 5954 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5955 "rsmipc_send_controlmsg done=%d", e)); 5956 return (e); 5957 } 5958 5959 /* 5960 * Called from rsm_force_unload and path_importer_disconnect. The memory 5961 * mapping for the imported segment is removed and the segment is 5962 * disconnected at the interconnect layer if disconnect_flag is TRUE. 5963 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback 5964 * and FALSE from rsm_rebind. 5965 * 5966 * When subsequent accesses cause page faulting, the dummy page is mapped 5967 * to resolve the fault, and the mapping generation number is incremented 5968 * so that the application can be notified on a close barrier operation. 5969 * 5970 * It is important to note that the caller of rsmseg_unload is responsible for 5971 * acquiring the segment lock before making a call to rsmseg_unload. This is 5972 * required to make the caller and rsmseg_unload thread safe. The segment lock 5973 * will be released by the rsmseg_unload function. 5974 */ 5975 void 5976 rsmseg_unload(rsmseg_t *im_seg) 5977 { 5978 rsmcookie_t *hdl; 5979 void *shared_cookie; 5980 rsmipc_request_t request; 5981 uint_t maxprot; 5982 5983 DBG_DEFINE(category, 5984 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5985 5986 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n")); 5987 5988 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 5989 5990 /* wait until segment leaves the mapping state */ 5991 while (im_seg->s_state == RSM_STATE_MAPPING) 5992 cv_wait(&im_seg->s_cv, &im_seg->s_lock); 5993 /* 5994 * An unload is only necessary if the segment is connected. However, 5995 * if the segment was on the import list in state RSM_STATE_CONNECTING 5996 * then a connection was in progress. Change to RSM_STATE_NEW 5997 * here to cause an early exit from the connection process. 5998 */ 5999 if (im_seg->s_state == RSM_STATE_NEW) { 6000 rsmseglock_release(im_seg); 6001 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6002 "rsmseg_unload done: RSM_STATE_NEW\n")); 6003 return; 6004 } else if (im_seg->s_state == RSM_STATE_CONNECTING) { 6005 im_seg->s_state = RSM_STATE_ABORT_CONNECT; 6006 rsmsharelock_acquire(im_seg); 6007 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 6008 rsmsharelock_release(im_seg); 6009 rsmseglock_release(im_seg); 6010 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6011 "rsmseg_unload done: RSM_STATE_CONNECTING\n")); 6012 return; 6013 } 6014 6015 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) { 6016 if (im_seg->s_ckl != NULL) { 6017 int e; 6018 /* Setup protections for remap */ 6019 maxprot = PROT_USER; 6020 if (im_seg->s_mode & RSM_PERM_READ) { 6021 maxprot |= PROT_READ; 6022 } 6023 if (im_seg->s_mode & RSM_PERM_WRITE) { 6024 maxprot |= PROT_WRITE; 6025 } 6026 hdl = im_seg->s_ckl; 6027 for (; hdl != NULL; hdl = hdl->c_next) { 6028 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 6029 remap_cookie, 6030 hdl->c_off, hdl->c_len, 6031 maxprot, 0, NULL); 6032 6033 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6034 "remap returns %d\n", e)); 6035 } 6036 } 6037 6038 (void) rsm_closeconnection(im_seg, &shared_cookie); 6039 6040 if (shared_cookie != NULL) { 6041 /* 6042 * inform the exporting node so this import 6043 * can be deleted from the list of importers. 6044 */ 6045 request.rsmipc_hdr.rsmipc_type = 6046 RSMIPC_MSG_NOTIMPORTING; 6047 request.rsmipc_key = im_seg->s_segid; 6048 request.rsmipc_segment_cookie = shared_cookie; 6049 rsmseglock_release(im_seg); 6050 (void) rsmipc_send(im_seg->s_node, &request, 6051 RSM_NO_REPLY); 6052 } else { 6053 rsmseglock_release(im_seg); 6054 } 6055 } 6056 else 6057 rsmseglock_release(im_seg); 6058 6059 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n")); 6060 6061 } 6062 6063 /* ****************************** Importer Calls ************************ */ 6064 6065 static int 6066 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr) 6067 { 6068 int shifts = 0; 6069 6070 if (crgetuid(cr) != owner) { 6071 shifts += 3; 6072 if (!groupmember(group, cr)) 6073 shifts += 3; 6074 } 6075 6076 mode &= ~(perm << shifts); 6077 6078 if (mode == 0) 6079 return (0); 6080 6081 return (secpolicy_rsm_access(cr, owner, mode)); 6082 } 6083 6084 6085 static int 6086 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred, 6087 intptr_t dataptr, int mode) 6088 { 6089 int e; 6090 int recheck_state = 0; 6091 void *shared_cookie; 6092 rsmipc_request_t request; 6093 rsmipc_reply_t reply; 6094 rsm_permission_t access; 6095 adapter_t *adapter; 6096 rsm_addr_t addr = 0; 6097 rsm_import_share_t *sharedp; 6098 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6099 6100 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n")); 6101 6102 adapter = rsm_getadapter(msg, mode); 6103 if (adapter == NULL) { 6104 DBG_PRINTF((category, RSM_ERR, 6105 "rsm_connect done:ENODEV adapter=NULL\n")); 6106 return (RSMERR_CTLR_NOT_PRESENT); 6107 } 6108 6109 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) { 6110 rsmka_release_adapter(adapter); 6111 DBG_PRINTF((category, RSM_ERR, 6112 "rsm_connect done:ENODEV loopback\n")); 6113 return (RSMERR_CTLR_NOT_PRESENT); 6114 } 6115 6116 6117 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6118 ASSERT(seg->s_state == RSM_STATE_NEW); 6119 6120 /* 6121 * Translate perm to access 6122 */ 6123 if (msg->perm & ~RSM_PERM_RDWR) { 6124 rsmka_release_adapter(adapter); 6125 DBG_PRINTF((category, RSM_ERR, 6126 "rsm_connect done:EINVAL invalid perms\n")); 6127 return (RSMERR_BAD_PERMS); 6128 } 6129 access = 0; 6130 if (msg->perm & RSM_PERM_READ) 6131 access |= RSM_ACCESS_READ; 6132 if (msg->perm & RSM_PERM_WRITE) 6133 access |= RSM_ACCESS_WRITE; 6134 6135 seg->s_node = msg->nodeid; 6136 6137 /* 6138 * Adding to the import list locks the segment; release the segment 6139 * lock so we can get the reply for the send. 6140 */ 6141 e = rsmimport_add(seg, msg->key); 6142 if (e) { 6143 rsmka_release_adapter(adapter); 6144 DBG_PRINTF((category, RSM_ERR, 6145 "rsm_connect done:rsmimport_add failed %d\n", e)); 6146 return (e); 6147 } 6148 seg->s_state = RSM_STATE_CONNECTING; 6149 6150 /* 6151 * Set the s_adapter field here so as to have a valid comparison of 6152 * the adapter and the s_adapter value during rsmshare_get. For 6153 * any error, set s_adapter to NULL before doing a release_adapter 6154 */ 6155 seg->s_adapter = adapter; 6156 6157 rsmseglock_release(seg); 6158 6159 /* 6160 * get the pointer to the shared data structure; the 6161 * shared data is locked and refcount has been incremented 6162 */ 6163 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg); 6164 6165 ASSERT(rsmsharelock_held(seg)); 6166 6167 do { 6168 /* flag indicates whether we need to recheck the state */ 6169 recheck_state = 0; 6170 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6171 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 6172 switch (sharedp->rsmsi_state) { 6173 case RSMSI_STATE_NEW: 6174 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6175 break; 6176 case RSMSI_STATE_CONNECTING: 6177 /* FALLTHRU */ 6178 case RSMSI_STATE_CONN_QUIESCE: 6179 /* FALLTHRU */ 6180 case RSMSI_STATE_MAP_QUIESCE: 6181 /* wait for the state to change */ 6182 while ((sharedp->rsmsi_state == 6183 RSMSI_STATE_CONNECTING) || 6184 (sharedp->rsmsi_state == 6185 RSMSI_STATE_CONN_QUIESCE) || 6186 (sharedp->rsmsi_state == 6187 RSMSI_STATE_MAP_QUIESCE)) { 6188 if (cv_wait_sig(&sharedp->rsmsi_cv, 6189 &sharedp->rsmsi_lock) == 0) { 6190 /* signalled - clean up and return */ 6191 rsmsharelock_release(seg); 6192 rsmimport_rm(seg); 6193 seg->s_adapter = NULL; 6194 rsmka_release_adapter(adapter); 6195 seg->s_state = RSM_STATE_NEW; 6196 DBG_PRINTF((category, RSM_ERR, 6197 "rsm_connect done: INTERRUPTED\n")); 6198 return (RSMERR_INTERRUPTED); 6199 } 6200 } 6201 /* 6202 * the state changed, loop back and check what it is 6203 */ 6204 recheck_state = 1; 6205 break; 6206 case RSMSI_STATE_ABORT_CONNECT: 6207 /* exit the loop and clean up further down */ 6208 break; 6209 case RSMSI_STATE_CONNECTED: 6210 /* already connected, good - fall through */ 6211 case RSMSI_STATE_MAPPED: 6212 /* already mapped, wow - fall through */ 6213 /* access validation etc is done further down */ 6214 break; 6215 case RSMSI_STATE_DISCONNECTED: 6216 /* disconnected - so reconnect now */ 6217 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6218 break; 6219 default: 6220 ASSERT(0); /* Invalid State */ 6221 } 6222 } while (recheck_state); 6223 6224 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6225 /* we are the first to connect */ 6226 rsmsharelock_release(seg); 6227 6228 if (msg->nodeid != my_nodeid) { 6229 addr = get_remote_hwaddr(adapter, msg->nodeid); 6230 6231 if ((int64_t)addr < 0) { 6232 rsmsharelock_acquire(seg); 6233 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6234 RSMSI_STATE_NEW); 6235 rsmsharelock_release(seg); 6236 rsmimport_rm(seg); 6237 seg->s_adapter = NULL; 6238 rsmka_release_adapter(adapter); 6239 seg->s_state = RSM_STATE_NEW; 6240 DBG_PRINTF((category, RSM_ERR, 6241 "rsm_connect done: hwaddr<0\n")); 6242 return (RSMERR_INTERNAL_ERROR); 6243 } 6244 } else { 6245 addr = adapter->hwaddr; 6246 } 6247 6248 /* 6249 * send request to node [src, dest, key, msgid] and get back 6250 * [status, msgid, cookie] 6251 */ 6252 request.rsmipc_key = msg->key; 6253 /* 6254 * we need the s_mode of the exporter so pass 6255 * RSM_ACCESS_TRUSTED 6256 */ 6257 request.rsmipc_perm = RSM_ACCESS_TRUSTED; 6258 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT; 6259 request.rsmipc_adapter_hwaddr = addr; 6260 request.rsmipc_segment_cookie = sharedp; 6261 6262 e = (int)rsmipc_send(msg->nodeid, &request, &reply); 6263 if (e) { 6264 rsmsharelock_acquire(seg); 6265 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6266 RSMSI_STATE_NEW); 6267 rsmsharelock_release(seg); 6268 rsmimport_rm(seg); 6269 seg->s_adapter = NULL; 6270 rsmka_release_adapter(adapter); 6271 seg->s_state = RSM_STATE_NEW; 6272 DBG_PRINTF((category, RSM_ERR, 6273 "rsm_connect done:rsmipc_send failed %d\n", e)); 6274 return (e); 6275 } 6276 6277 if (reply.rsmipc_status != RSM_SUCCESS) { 6278 rsmsharelock_acquire(seg); 6279 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6280 RSMSI_STATE_NEW); 6281 rsmsharelock_release(seg); 6282 rsmimport_rm(seg); 6283 seg->s_adapter = NULL; 6284 rsmka_release_adapter(adapter); 6285 seg->s_state = RSM_STATE_NEW; 6286 DBG_PRINTF((category, RSM_ERR, 6287 "rsm_connect done:rsmipc_send reply err %d\n", 6288 reply.rsmipc_status)); 6289 return (reply.rsmipc_status); 6290 } 6291 6292 rsmsharelock_acquire(seg); 6293 /* store the information recvd into the shared data struct */ 6294 sharedp->rsmsi_mode = reply.rsmipc_mode; 6295 sharedp->rsmsi_uid = reply.rsmipc_uid; 6296 sharedp->rsmsi_gid = reply.rsmipc_gid; 6297 sharedp->rsmsi_seglen = reply.rsmipc_seglen; 6298 sharedp->rsmsi_cookie = sharedp; 6299 } 6300 6301 rsmsharelock_release(seg); 6302 6303 /* 6304 * Get the segment lock and check for a force disconnect 6305 * from the export side which would have changed the state 6306 * back to RSM_STATE_NEW. Once the segment lock is acquired a 6307 * force disconnect will be held off until the connection 6308 * has completed. 6309 */ 6310 rsmseglock_acquire(seg); 6311 rsmsharelock_acquire(seg); 6312 ASSERT(seg->s_state == RSM_STATE_CONNECTING || 6313 seg->s_state == RSM_STATE_ABORT_CONNECT); 6314 6315 shared_cookie = sharedp->rsmsi_cookie; 6316 6317 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) || 6318 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) { 6319 seg->s_state = RSM_STATE_NEW; 6320 seg->s_adapter = NULL; 6321 rsmsharelock_release(seg); 6322 rsmseglock_release(seg); 6323 rsmimport_rm(seg); 6324 rsmka_release_adapter(adapter); 6325 6326 rsmsharelock_acquire(seg); 6327 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) { 6328 /* 6329 * set a flag indicating abort handling has been 6330 * done 6331 */ 6332 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE; 6333 rsmsharelock_release(seg); 6334 /* send a message to exporter - only once */ 6335 (void) rsm_send_notimporting(msg->nodeid, 6336 msg->key, shared_cookie); 6337 rsmsharelock_acquire(seg); 6338 /* 6339 * wake up any waiting importers and inform that 6340 * connection has been aborted 6341 */ 6342 cv_broadcast(&sharedp->rsmsi_cv); 6343 } 6344 rsmsharelock_release(seg); 6345 6346 DBG_PRINTF((category, RSM_ERR, 6347 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n")); 6348 return (RSMERR_INTERRUPTED); 6349 } 6350 6351 6352 /* 6353 * We need to verify that this process has access 6354 */ 6355 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid, 6356 access & sharedp->rsmsi_mode, 6357 (int)(msg->perm & RSM_PERM_RDWR), cred); 6358 if (e) { 6359 rsmsharelock_release(seg); 6360 seg->s_state = RSM_STATE_NEW; 6361 seg->s_adapter = NULL; 6362 rsmseglock_release(seg); 6363 rsmimport_rm(seg); 6364 rsmka_release_adapter(adapter); 6365 /* 6366 * No need to lock segment it has been removed 6367 * from the hash table 6368 */ 6369 rsmsharelock_acquire(seg); 6370 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6371 rsmsharelock_release(seg); 6372 /* this is the first importer */ 6373 6374 (void) rsm_send_notimporting(msg->nodeid, msg->key, 6375 shared_cookie); 6376 rsmsharelock_acquire(seg); 6377 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6378 cv_broadcast(&sharedp->rsmsi_cv); 6379 } 6380 rsmsharelock_release(seg); 6381 6382 DBG_PRINTF((category, RSM_ERR, 6383 "rsm_connect done: ipcaccess failed\n")); 6384 return (RSMERR_PERM_DENIED); 6385 } 6386 6387 /* update state and cookie */ 6388 seg->s_segid = sharedp->rsmsi_segid; 6389 seg->s_len = sharedp->rsmsi_seglen; 6390 seg->s_mode = access & sharedp->rsmsi_mode; 6391 seg->s_pid = ddi_get_pid(); 6392 seg->s_mapinfo = NULL; 6393 6394 if (seg->s_node != my_nodeid) { 6395 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6396 e = adapter->rsmpi_ops->rsm_connect( 6397 adapter->rsmpi_handle, 6398 addr, seg->s_segid, &sharedp->rsmsi_handle); 6399 6400 if (e != RSM_SUCCESS) { 6401 seg->s_state = RSM_STATE_NEW; 6402 seg->s_adapter = NULL; 6403 rsmsharelock_release(seg); 6404 rsmseglock_release(seg); 6405 rsmimport_rm(seg); 6406 rsmka_release_adapter(adapter); 6407 /* 6408 * inform the exporter to delete this importer 6409 */ 6410 (void) rsm_send_notimporting(msg->nodeid, 6411 msg->key, shared_cookie); 6412 6413 /* 6414 * Now inform any waiting importers to 6415 * retry connect. This needs to be done 6416 * after sending notimporting so that 6417 * the notimporting is sent before a waiting 6418 * importer sends a segconnect while retrying 6419 * 6420 * No need to lock segment it has been removed 6421 * from the hash table 6422 */ 6423 6424 rsmsharelock_acquire(seg); 6425 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6426 cv_broadcast(&sharedp->rsmsi_cv); 6427 rsmsharelock_release(seg); 6428 6429 DBG_PRINTF((category, RSM_ERR, 6430 "rsm_connect error %d\n", e)); 6431 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR) 6432 return ( 6433 RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 6434 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) || 6435 (e == RSMERR_UNKNOWN_RSM_ADDR)) 6436 return (RSMERR_REMOTE_NODE_UNREACHABLE); 6437 else 6438 return (e); 6439 } 6440 6441 } 6442 seg->s_handle.in = sharedp->rsmsi_handle; 6443 6444 } 6445 6446 seg->s_state = RSM_STATE_CONNECT; 6447 6448 6449 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */ 6450 if (bar_va) { 6451 /* increment generation number on barrier page */ 6452 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num); 6453 /* return user off into barrier page where status will be */ 6454 msg->off = (int)seg->s_hdr.rsmrc_num; 6455 msg->gnum = bar_va[msg->off]; /* gnum race */ 6456 } else { 6457 msg->off = 0; 6458 msg->gnum = 0; /* gnum race */ 6459 } 6460 6461 msg->len = (int)sharedp->rsmsi_seglen; 6462 msg->rnum = seg->s_minor; 6463 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED); 6464 rsmsharelock_release(seg); 6465 rsmseglock_release(seg); 6466 6467 /* Return back to user the segment size & perm in case it's needed */ 6468 6469 #ifdef _MULTI_DATAMODEL 6470 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6471 rsm_ioctlmsg32_t msg32; 6472 6473 if (msg->len > UINT_MAX) 6474 msg32.len = RSM_MAXSZ_PAGE_ALIGNED; 6475 else 6476 msg32.len = msg->len; 6477 msg32.off = msg->off; 6478 msg32.perm = msg->perm; 6479 msg32.gnum = msg->gnum; 6480 msg32.rnum = msg->rnum; 6481 6482 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6483 "rsm_connect done\n")); 6484 6485 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr, 6486 sizeof (msg32), mode)) 6487 return (RSMERR_BAD_ADDR); 6488 else 6489 return (RSM_SUCCESS); 6490 } 6491 #endif 6492 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n")); 6493 6494 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg), 6495 mode)) 6496 return (RSMERR_BAD_ADDR); 6497 else 6498 return (RSM_SUCCESS); 6499 } 6500 6501 static int 6502 rsm_unmap(rsmseg_t *seg) 6503 { 6504 int err; 6505 adapter_t *adapter; 6506 rsm_import_share_t *sharedp; 6507 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6508 6509 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6510 "rsm_unmap enter %u\n", seg->s_segid)); 6511 6512 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6513 6514 /* assert seg is locked */ 6515 ASSERT(rsmseglock_held(seg)); 6516 ASSERT(seg->s_state != RSM_STATE_MAPPING); 6517 6518 if ((seg->s_state != RSM_STATE_ACTIVE) && 6519 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 6520 /* segment unmap has already been done */ 6521 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6522 return (RSM_SUCCESS); 6523 } 6524 6525 sharedp = seg->s_share; 6526 6527 rsmsharelock_acquire(seg); 6528 6529 /* 6530 * - shared data struct is in MAPPED or MAP_QUIESCE state 6531 */ 6532 6533 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED || 6534 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 6535 6536 /* 6537 * Unmap pages - previously rsm_memseg_import_unmap was called only if 6538 * the segment cookie list was NULL; but it is always NULL when 6539 * called from rsmmap_unmap and won't be NULL when called for 6540 * a force disconnect - so the check for NULL cookie list was removed 6541 */ 6542 6543 ASSERT(sharedp->rsmsi_mapcnt > 0); 6544 6545 sharedp->rsmsi_mapcnt--; 6546 6547 if (sharedp->rsmsi_mapcnt == 0) { 6548 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) { 6549 /* unmap the shared RSMPI mapping */ 6550 adapter = seg->s_adapter; 6551 if (seg->s_node != my_nodeid) { 6552 ASSERT(sharedp->rsmsi_handle != NULL); 6553 err = adapter->rsmpi_ops-> 6554 rsm_unmap(sharedp->rsmsi_handle); 6555 DBG_PRINTF((category, RSM_DEBUG, 6556 "rsm_unmap: rsmpi unmap %d\n", err)); 6557 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 6558 sharedp->rsmsi_mapinfo = NULL; 6559 } 6560 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 6561 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */ 6562 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 6563 } 6564 } 6565 6566 rsmsharelock_release(seg); 6567 6568 /* 6569 * The s_cookie field is used to store the cookie returned from the 6570 * ddi_umem_lock when binding the pages for an export segment. This 6571 * is the primary use of the s_cookie field and does not normally 6572 * pertain to any importing segment except in the loopback case. 6573 * For the loopback case, the import segment and export segment are 6574 * on the same node, the s_cookie field of the segment structure for 6575 * the importer is initialized to the s_cookie field in the exported 6576 * segment during the map operation and is used during the call to 6577 * devmap_umem_setup for the import mapping. 6578 * Thus, during unmap, we simply need to set s_cookie to NULL to 6579 * indicate that the mapping no longer exists. 6580 */ 6581 seg->s_cookie = NULL; 6582 6583 seg->s_mapinfo = NULL; 6584 6585 if (seg->s_state == RSM_STATE_ACTIVE) 6586 seg->s_state = RSM_STATE_CONNECT; 6587 else 6588 seg->s_state = RSM_STATE_CONN_QUIESCE; 6589 6590 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6591 6592 return (RSM_SUCCESS); 6593 } 6594 6595 /* 6596 * cookie returned here if not null indicates that it is 6597 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING 6598 * message. 6599 */ 6600 static int 6601 rsm_closeconnection(rsmseg_t *seg, void **cookie) 6602 { 6603 int e; 6604 adapter_t *adapter; 6605 rsm_import_share_t *sharedp; 6606 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6607 6608 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6609 "rsm_closeconnection enter\n")); 6610 6611 *cookie = (void *)NULL; 6612 6613 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6614 6615 /* assert seg is locked */ 6616 ASSERT(rsmseglock_held(seg)); 6617 6618 if (seg->s_state == RSM_STATE_DISCONNECT) { 6619 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6620 "rsm_closeconnection done: already disconnected\n")); 6621 return (RSM_SUCCESS); 6622 } 6623 6624 /* wait for all putv/getv ops to get done */ 6625 while (seg->s_rdmacnt > 0) { 6626 cv_wait(&seg->s_cv, &seg->s_lock); 6627 } 6628 6629 (void) rsm_unmap(seg); 6630 6631 ASSERT(seg->s_state == RSM_STATE_CONNECT || 6632 seg->s_state == RSM_STATE_CONN_QUIESCE); 6633 6634 adapter = seg->s_adapter; 6635 sharedp = seg->s_share; 6636 6637 ASSERT(sharedp != NULL); 6638 6639 rsmsharelock_acquire(seg); 6640 6641 /* 6642 * Disconnect on adapter 6643 * 6644 * The current algorithm is stateless, I don't have to contact 6645 * server when I go away. He only gives me permissions. Of course, 6646 * the adapters will talk to terminate the connect. 6647 * 6648 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE 6649 */ 6650 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) && 6651 (sharedp->rsmsi_node != my_nodeid)) { 6652 6653 if (sharedp->rsmsi_refcnt == 1) { 6654 /* this is the last importer */ 6655 ASSERT(sharedp->rsmsi_mapcnt == 0); 6656 6657 e = adapter->rsmpi_ops-> 6658 rsm_disconnect(sharedp->rsmsi_handle); 6659 if (e != RSM_SUCCESS) { 6660 DBG_PRINTF((category, RSM_DEBUG, 6661 "rsm:disconnect failed seg=%x:err=%d\n", 6662 seg->s_key, e)); 6663 } 6664 } 6665 } 6666 6667 seg->s_handle.in = NULL; 6668 6669 sharedp->rsmsi_refcnt--; 6670 6671 if (sharedp->rsmsi_refcnt == 0) { 6672 *cookie = (void *)sharedp->rsmsi_cookie; 6673 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 6674 sharedp->rsmsi_handle = NULL; 6675 rsmsharelock_release(seg); 6676 6677 /* clean up the shared data structure */ 6678 mutex_destroy(&sharedp->rsmsi_lock); 6679 cv_destroy(&sharedp->rsmsi_cv); 6680 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t)); 6681 6682 } else { 6683 rsmsharelock_release(seg); 6684 } 6685 6686 /* increment generation number on barrier page */ 6687 if (bar_va) { 6688 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num); 6689 } 6690 6691 /* 6692 * The following needs to be done after any 6693 * rsmsharelock calls which use seg->s_share. 6694 */ 6695 seg->s_share = NULL; 6696 6697 seg->s_state = RSM_STATE_DISCONNECT; 6698 /* signal anyone waiting in the CONN_QUIESCE state */ 6699 cv_broadcast(&seg->s_cv); 6700 6701 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6702 "rsm_closeconnection done\n")); 6703 6704 return (RSM_SUCCESS); 6705 } 6706 6707 int 6708 rsm_disconnect(rsmseg_t *seg) 6709 { 6710 rsmipc_request_t request; 6711 void *shared_cookie; 6712 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6713 6714 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n")); 6715 6716 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6717 6718 /* assert seg isn't locked */ 6719 ASSERT(!rsmseglock_held(seg)); 6720 6721 6722 /* Remove segment from imported list */ 6723 rsmimport_rm(seg); 6724 6725 /* acquire the segment */ 6726 rsmseglock_acquire(seg); 6727 6728 /* wait until segment leaves the mapping state */ 6729 while (seg->s_state == RSM_STATE_MAPPING) 6730 cv_wait(&seg->s_cv, &seg->s_lock); 6731 6732 if (seg->s_state == RSM_STATE_DISCONNECT) { 6733 seg->s_state = RSM_STATE_NEW; 6734 rsmseglock_release(seg); 6735 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6736 "rsm_disconnect done: already disconnected\n")); 6737 return (RSM_SUCCESS); 6738 } 6739 6740 (void) rsm_closeconnection(seg, &shared_cookie); 6741 6742 /* update state */ 6743 seg->s_state = RSM_STATE_NEW; 6744 6745 if (shared_cookie != NULL) { 6746 /* 6747 * This is the last importer so inform the exporting node 6748 * so this import can be deleted from the list of importers. 6749 */ 6750 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 6751 request.rsmipc_key = seg->s_segid; 6752 request.rsmipc_segment_cookie = shared_cookie; 6753 rsmseglock_release(seg); 6754 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 6755 } else { 6756 rsmseglock_release(seg); 6757 } 6758 6759 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n")); 6760 6761 return (DDI_SUCCESS); 6762 } 6763 6764 /*ARGSUSED*/ 6765 static int 6766 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 6767 struct pollhead **phpp) 6768 { 6769 minor_t rnum; 6770 rsmresource_t *res; 6771 rsmseg_t *seg; 6772 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 6773 6774 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n")); 6775 6776 /* find minor, no lock */ 6777 rnum = getminor(dev); 6778 res = rsmresource_lookup(rnum, RSM_NOLOCK); 6779 6780 /* poll is supported only for export/import segments */ 6781 if ((res == NULL) || (res == RSMRC_RESERVED) || 6782 (res->rsmrc_type == RSM_RESOURCE_BAR)) { 6783 return (ENXIO); 6784 } 6785 6786 *reventsp = 0; 6787 6788 /* 6789 * An exported segment must be in state RSM_STATE_EXPORT; an 6790 * imported segment must be in state RSM_STATE_ACTIVE. 6791 */ 6792 seg = (rsmseg_t *)res; 6793 6794 if (seg->s_pollevent) { 6795 *reventsp = POLLRDNORM; 6796 } else if (!anyyet) { 6797 /* cannot take segment lock here */ 6798 *phpp = &seg->s_poll; 6799 seg->s_pollflag |= RSM_SEGMENT_POLL; 6800 } 6801 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n")); 6802 return (0); 6803 } 6804 6805 6806 6807 /* ************************* IOCTL Commands ********************* */ 6808 6809 static rsmseg_t * 6810 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp, 6811 rsm_resource_type_t type) 6812 { 6813 /* get segment from resource handle */ 6814 rsmseg_t *seg; 6815 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 6816 6817 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n")); 6818 6819 6820 if (res != RSMRC_RESERVED) { 6821 seg = (rsmseg_t *)res; 6822 } else { 6823 /* Allocate segment now and bind it */ 6824 seg = rsmseg_alloc(rnum, credp); 6825 6826 /* 6827 * if DR pre-processing is going on or DR is in progress 6828 * then the new export segments should be in the NEW_QSCD state 6829 */ 6830 if (type == RSM_RESOURCE_EXPORT_SEGMENT) { 6831 mutex_enter(&rsm_drv_data.drv_lock); 6832 if ((rsm_drv_data.drv_state == 6833 RSM_DRV_PREDEL_STARTED) || 6834 (rsm_drv_data.drv_state == 6835 RSM_DRV_PREDEL_COMPLETED) || 6836 (rsm_drv_data.drv_state == 6837 RSM_DRV_DR_IN_PROGRESS)) { 6838 seg->s_state = RSM_STATE_NEW_QUIESCED; 6839 } 6840 mutex_exit(&rsm_drv_data.drv_lock); 6841 } 6842 6843 rsmresource_insert(rnum, (rsmresource_t *)seg, type); 6844 } 6845 6846 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n")); 6847 6848 return (seg); 6849 } 6850 6851 static int 6852 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6853 int mode, cred_t *credp) 6854 { 6855 int error; 6856 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 6857 6858 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n")); 6859 6860 arg = arg; 6861 credp = credp; 6862 6863 ASSERT(seg != NULL); 6864 6865 switch (cmd) { 6866 case RSM_IOCTL_BIND: 6867 error = rsm_bind(seg, msg, arg, mode); 6868 break; 6869 case RSM_IOCTL_REBIND: 6870 error = rsm_rebind(seg, msg); 6871 break; 6872 case RSM_IOCTL_UNBIND: 6873 error = ENOTSUP; 6874 break; 6875 case RSM_IOCTL_PUBLISH: 6876 error = rsm_publish(seg, msg, arg, mode); 6877 break; 6878 case RSM_IOCTL_REPUBLISH: 6879 error = rsm_republish(seg, msg, mode); 6880 break; 6881 case RSM_IOCTL_UNPUBLISH: 6882 error = rsm_unpublish(seg, 1); 6883 break; 6884 default: 6885 error = EINVAL; 6886 break; 6887 } 6888 6889 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n", 6890 error)); 6891 6892 return (error); 6893 } 6894 static int 6895 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6896 int mode, cred_t *credp) 6897 { 6898 int error; 6899 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6900 6901 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n")); 6902 6903 ASSERT(seg); 6904 6905 switch (cmd) { 6906 case RSM_IOCTL_CONNECT: 6907 error = rsm_connect(seg, msg, credp, arg, mode); 6908 break; 6909 default: 6910 error = EINVAL; 6911 break; 6912 } 6913 6914 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n", 6915 error)); 6916 return (error); 6917 } 6918 6919 static int 6920 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6921 int mode) 6922 { 6923 int e; 6924 adapter_t *adapter; 6925 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6926 6927 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n")); 6928 6929 6930 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) { 6931 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6932 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n")); 6933 return (RSMERR_CONN_ABORTED); 6934 } else if (seg->s_node == my_nodeid) { 6935 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6936 "rsmbar_ioctl done: loopback\n")); 6937 return (RSM_SUCCESS); 6938 } 6939 6940 adapter = seg->s_adapter; 6941 6942 switch (cmd) { 6943 case RSM_IOCTL_BAR_CHECK: 6944 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6945 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va)); 6946 return (bar_va ? RSM_SUCCESS : EINVAL); 6947 case RSM_IOCTL_BAR_OPEN: 6948 e = adapter->rsmpi_ops-> 6949 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar); 6950 break; 6951 case RSM_IOCTL_BAR_ORDER: 6952 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar); 6953 break; 6954 case RSM_IOCTL_BAR_CLOSE: 6955 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar); 6956 break; 6957 default: 6958 e = EINVAL; 6959 break; 6960 } 6961 6962 if (e == RSM_SUCCESS) { 6963 #ifdef _MULTI_DATAMODEL 6964 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6965 rsm_ioctlmsg32_t msg32; 6966 int i; 6967 6968 for (i = 0; i < 4; i++) { 6969 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64; 6970 } 6971 6972 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6973 "rsmbar_ioctl done\n")); 6974 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 6975 sizeof (msg32), mode)) 6976 return (RSMERR_BAD_ADDR); 6977 else 6978 return (RSM_SUCCESS); 6979 } 6980 #endif 6981 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6982 "rsmbar_ioctl done\n")); 6983 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg, 6984 sizeof (*msg), mode)) 6985 return (RSMERR_BAD_ADDR); 6986 else 6987 return (RSM_SUCCESS); 6988 } 6989 6990 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6991 "rsmbar_ioctl done: error=%d\n", e)); 6992 6993 return (e); 6994 } 6995 6996 /* 6997 * Ring the doorbell of the export segment to which this segment is 6998 * connected. 6999 */ 7000 static int 7001 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7002 { 7003 int e = 0; 7004 rsmipc_request_t request; 7005 7006 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7007 7008 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n")); 7009 7010 request.rsmipc_key = seg->s_segid; 7011 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7012 request.rsmipc_segment_cookie = NULL; 7013 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 7014 7015 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7016 "exportbell_ioctl done: %d\n", e)); 7017 7018 return (e); 7019 } 7020 7021 /* 7022 * Ring the doorbells of all segments importing this segment 7023 */ 7024 static int 7025 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7026 { 7027 importing_token_t *token = NULL; 7028 rsmipc_request_t request; 7029 int index; 7030 7031 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 7032 7033 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n")); 7034 7035 ASSERT(seg->s_state != RSM_STATE_NEW && 7036 seg->s_state != RSM_STATE_NEW_QUIESCED); 7037 7038 request.rsmipc_key = seg->s_segid; 7039 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7040 7041 index = rsmhash(seg->s_segid); 7042 7043 token = importer_list.bucket[index]; 7044 7045 while (token != NULL) { 7046 if (seg->s_key == token->key) { 7047 request.rsmipc_segment_cookie = 7048 token->import_segment_cookie; 7049 (void) rsmipc_send(token->importing_node, 7050 &request, RSM_NO_REPLY); 7051 } 7052 token = token->next; 7053 } 7054 7055 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7056 "importbell_ioctl done\n")); 7057 return (RSM_SUCCESS); 7058 } 7059 7060 static int 7061 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp, 7062 rsm_poll_event_t **eventspp, int mode) 7063 { 7064 rsm_poll_event_t *evlist = NULL; 7065 size_t evlistsz; 7066 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7067 7068 #ifdef _MULTI_DATAMODEL 7069 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7070 int i; 7071 rsm_consume_event_msg32_t cemsg32 = {0}; 7072 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7073 rsm_poll_event32_t *evlist32; 7074 size_t evlistsz32; 7075 7076 /* copyin the ioctl message */ 7077 if (ddi_copyin(arg, (caddr_t)&cemsg32, 7078 sizeof (rsm_consume_event_msg32_t), mode)) { 7079 DBG_PRINTF((category, RSM_ERR, 7080 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7081 return (RSMERR_BAD_ADDR); 7082 } 7083 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist; 7084 msgp->numents = (int)cemsg32.numents; 7085 7086 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents; 7087 /* 7088 * If numents is large alloc events list on heap otherwise 7089 * use the address of array that was passed in. 7090 */ 7091 if (msgp->numents > RSM_MAX_POLLFDS) { 7092 if (msgp->numents > max_segs) { /* validate numents */ 7093 DBG_PRINTF((category, RSM_ERR, 7094 "consumeevent_copyin: " 7095 "RSMERR_BAD_ARGS_ERRORS\n")); 7096 return (RSMERR_BAD_ARGS_ERRORS); 7097 } 7098 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7099 } else { 7100 evlist32 = event32; 7101 } 7102 7103 /* copyin the seglist into the rsm_poll_event32_t array */ 7104 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32, 7105 evlistsz32, mode)) { 7106 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7107 kmem_free(evlist32, evlistsz32); 7108 } 7109 DBG_PRINTF((category, RSM_ERR, 7110 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7111 return (RSMERR_BAD_ADDR); 7112 } 7113 7114 /* evlist and evlistsz are based on rsm_poll_event_t type */ 7115 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents; 7116 7117 if (msgp->numents > RSM_MAX_POLLFDS) { 7118 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7119 *eventspp = evlist; 7120 } else { 7121 evlist = *eventspp; 7122 } 7123 /* 7124 * copy the rsm_poll_event32_t array to the rsm_poll_event_t 7125 * array 7126 */ 7127 for (i = 0; i < msgp->numents; i++) { 7128 evlist[i].rnum = evlist32[i].rnum; 7129 evlist[i].fdsidx = evlist32[i].fdsidx; 7130 evlist[i].revent = evlist32[i].revent; 7131 } 7132 /* free the temp 32-bit event list */ 7133 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7134 kmem_free(evlist32, evlistsz32); 7135 } 7136 7137 return (RSM_SUCCESS); 7138 } 7139 #endif 7140 /* copyin the ioctl message */ 7141 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t), 7142 mode)) { 7143 DBG_PRINTF((category, RSM_ERR, 7144 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7145 return (RSMERR_BAD_ADDR); 7146 } 7147 /* 7148 * If numents is large alloc events list on heap otherwise 7149 * use the address of array that was passed in. 7150 */ 7151 if (msgp->numents > RSM_MAX_POLLFDS) { 7152 if (msgp->numents > max_segs) { /* validate numents */ 7153 DBG_PRINTF((category, RSM_ERR, 7154 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n")); 7155 return (RSMERR_BAD_ARGS_ERRORS); 7156 } 7157 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7158 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7159 *eventspp = evlist; 7160 } 7161 7162 /* copyin the seglist */ 7163 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp), 7164 sizeof (rsm_poll_event_t)*msgp->numents, mode)) { 7165 if (evlist) { 7166 kmem_free(evlist, evlistsz); 7167 *eventspp = NULL; 7168 } 7169 DBG_PRINTF((category, RSM_ERR, 7170 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7171 return (RSMERR_BAD_ADDR); 7172 } 7173 7174 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7175 "consumeevent_copyin done\n")); 7176 return (RSM_SUCCESS); 7177 } 7178 7179 static int 7180 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp, 7181 rsm_poll_event_t *eventsp, int mode) 7182 { 7183 size_t evlistsz; 7184 int err = RSM_SUCCESS; 7185 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7186 7187 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7188 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n", 7189 msgp->numents, eventsp)); 7190 7191 #ifdef _MULTI_DATAMODEL 7192 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7193 int i; 7194 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7195 rsm_poll_event32_t *evlist32; 7196 size_t evlistsz32; 7197 7198 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents; 7199 if (msgp->numents > RSM_MAX_POLLFDS) { 7200 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7201 } else { 7202 evlist32 = event32; 7203 } 7204 7205 /* 7206 * copy the rsm_poll_event_t array to the rsm_poll_event32_t 7207 * array 7208 */ 7209 for (i = 0; i < msgp->numents; i++) { 7210 evlist32[i].rnum = eventsp[i].rnum; 7211 evlist32[i].fdsidx = eventsp[i].fdsidx; 7212 evlist32[i].revent = eventsp[i].revent; 7213 } 7214 7215 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist, 7216 evlistsz32, mode)) { 7217 err = RSMERR_BAD_ADDR; 7218 } 7219 7220 if (msgp->numents > RSM_MAX_POLLFDS) { 7221 if (evlist32) { /* free the temp 32-bit event list */ 7222 kmem_free(evlist32, evlistsz32); 7223 } 7224 /* 7225 * eventsp and evlistsz are based on rsm_poll_event_t 7226 * type 7227 */ 7228 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7229 /* event list on the heap and needs to be freed here */ 7230 if (eventsp) { 7231 kmem_free(eventsp, evlistsz); 7232 } 7233 } 7234 7235 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7236 "consumeevent_copyout done: err=%d\n", err)); 7237 return (err); 7238 } 7239 #endif 7240 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7241 7242 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz, 7243 mode)) { 7244 err = RSMERR_BAD_ADDR; 7245 } 7246 7247 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) { 7248 /* event list on the heap and needs to be freed here */ 7249 kmem_free(eventsp, evlistsz); 7250 } 7251 7252 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7253 "consumeevent_copyout done: err=%d\n", err)); 7254 return (err); 7255 } 7256 7257 static int 7258 rsm_consumeevent_ioctl(caddr_t arg, int mode) 7259 { 7260 int rc; 7261 int i; 7262 minor_t rnum; 7263 rsm_consume_event_msg_t msg = {0}; 7264 rsmseg_t *seg; 7265 rsm_poll_event_t *event_list; 7266 rsm_poll_event_t events[RSM_MAX_POLLFDS]; 7267 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7268 7269 event_list = events; 7270 7271 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) != 7272 RSM_SUCCESS) { 7273 return (rc); 7274 } 7275 7276 for (i = 0; i < msg.numents; i++) { 7277 rnum = event_list[i].rnum; 7278 event_list[i].revent = 0; 7279 /* get the segment structure */ 7280 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 7281 if (seg) { 7282 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7283 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum, 7284 seg)); 7285 if (seg->s_pollevent) { 7286 /* consume the event */ 7287 atomic_dec_32(&seg->s_pollevent); 7288 event_list[i].revent = POLLRDNORM; 7289 } 7290 rsmseglock_release(seg); 7291 } 7292 } 7293 7294 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) != 7295 RSM_SUCCESS) { 7296 return (rc); 7297 } 7298 7299 return (RSM_SUCCESS); 7300 } 7301 7302 static int 7303 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode) 7304 { 7305 int size; 7306 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7307 7308 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n")); 7309 7310 #ifdef _MULTI_DATAMODEL 7311 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7312 rsmka_iovec32_t *iovec32, *iovec32_base; 7313 int i; 7314 7315 size = count * sizeof (rsmka_iovec32_t); 7316 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP); 7317 if (ddi_copyin((caddr_t)user_vec, 7318 (caddr_t)iovec32, size, mode)) { 7319 kmem_free(iovec32, size); 7320 DBG_PRINTF((category, RSM_DEBUG, 7321 "iovec_copyin: returning RSMERR_BAD_ADDR\n")); 7322 return (RSMERR_BAD_ADDR); 7323 } 7324 7325 for (i = 0; i < count; i++, iovec++, iovec32++) { 7326 iovec->io_type = (int)iovec32->io_type; 7327 if (iovec->io_type == RSM_HANDLE_TYPE) 7328 iovec->local.segid = (rsm_memseg_id_t) 7329 iovec32->local; 7330 else 7331 iovec->local.vaddr = 7332 (caddr_t)(uintptr_t)iovec32->local; 7333 iovec->local_offset = (size_t)iovec32->local_offset; 7334 iovec->remote_offset = (size_t)iovec32->remote_offset; 7335 iovec->transfer_len = (size_t)iovec32->transfer_len; 7336 7337 } 7338 kmem_free(iovec32_base, size); 7339 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7340 "iovec_copyin done\n")); 7341 return (DDI_SUCCESS); 7342 } 7343 #endif 7344 7345 size = count * sizeof (rsmka_iovec_t); 7346 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) { 7347 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7348 "iovec_copyin done: RSMERR_BAD_ADDR\n")); 7349 return (RSMERR_BAD_ADDR); 7350 } 7351 7352 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n")); 7353 7354 return (DDI_SUCCESS); 7355 } 7356 7357 7358 static int 7359 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7360 { 7361 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7362 7363 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n")); 7364 7365 #ifdef _MULTI_DATAMODEL 7366 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7367 rsmka_scat_gath32_t sg_io32; 7368 7369 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32), 7370 mode)) { 7371 DBG_PRINTF((category, RSM_DEBUG, 7372 "sgio_copyin done: returning EFAULT\n")); 7373 return (RSMERR_BAD_ADDR); 7374 } 7375 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid; 7376 sg_io->io_request_count = (size_t)sg_io32.io_request_count; 7377 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count; 7378 sg_io->flags = (size_t)sg_io32.flags; 7379 sg_io->remote_handle = (rsm_memseg_import_handle_t) 7380 (uintptr_t)sg_io32.remote_handle; 7381 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec; 7382 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7383 "sgio_copyin done\n")); 7384 return (DDI_SUCCESS); 7385 } 7386 #endif 7387 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t), 7388 mode)) { 7389 DBG_PRINTF((category, RSM_DEBUG, 7390 "sgio_copyin done: returning EFAULT\n")); 7391 return (RSMERR_BAD_ADDR); 7392 } 7393 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n")); 7394 return (DDI_SUCCESS); 7395 } 7396 7397 static int 7398 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7399 { 7400 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7401 7402 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7403 "sgio_resid_copyout enter\n")); 7404 7405 #ifdef _MULTI_DATAMODEL 7406 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7407 rsmka_scat_gath32_t sg_io32; 7408 7409 sg_io32.io_residual_count = sg_io->io_residual_count; 7410 sg_io32.flags = sg_io->flags; 7411 7412 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count, 7413 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count, 7414 sizeof (uint32_t), mode)) { 7415 7416 DBG_PRINTF((category, RSM_ERR, 7417 "sgio_resid_copyout error: rescnt\n")); 7418 return (RSMERR_BAD_ADDR); 7419 } 7420 7421 if (ddi_copyout((caddr_t)&sg_io32.flags, 7422 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags, 7423 sizeof (uint32_t), mode)) { 7424 7425 DBG_PRINTF((category, RSM_ERR, 7426 "sgio_resid_copyout error: flags\n")); 7427 return (RSMERR_BAD_ADDR); 7428 } 7429 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7430 "sgio_resid_copyout done\n")); 7431 return (DDI_SUCCESS); 7432 } 7433 #endif 7434 if (ddi_copyout((caddr_t)&sg_io->io_residual_count, 7435 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count, 7436 sizeof (ulong_t), mode)) { 7437 7438 DBG_PRINTF((category, RSM_ERR, 7439 "sgio_resid_copyout error:rescnt\n")); 7440 return (RSMERR_BAD_ADDR); 7441 } 7442 7443 if (ddi_copyout((caddr_t)&sg_io->flags, 7444 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags, 7445 sizeof (uint_t), mode)) { 7446 7447 DBG_PRINTF((category, RSM_ERR, 7448 "sgio_resid_copyout error:flags\n")); 7449 return (RSMERR_BAD_ADDR); 7450 } 7451 7452 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n")); 7453 return (DDI_SUCCESS); 7454 } 7455 7456 7457 static int 7458 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp) 7459 { 7460 rsmka_scat_gath_t sg_io; 7461 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN]; 7462 rsmka_iovec_t *ka_iovec; 7463 rsmka_iovec_t *ka_iovec_start; 7464 rsmpi_scat_gath_t rsmpi_sg_io; 7465 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN]; 7466 rsmpi_iovec_t *iovec; 7467 rsmpi_iovec_t *iovec_start = NULL; 7468 rsmapi_access_entry_t *acl; 7469 rsmresource_t *res; 7470 minor_t rnum; 7471 rsmseg_t *im_seg, *ex_seg; 7472 int e; 7473 int error = 0; 7474 uint_t i; 7475 uint_t iov_proc = 0; /* num of iovecs processed */ 7476 size_t size = 0; 7477 size_t ka_size; 7478 7479 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7480 7481 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n")); 7482 7483 credp = credp; 7484 7485 /* 7486 * Copyin the scatter/gather structure and build new structure 7487 * for rsmpi. 7488 */ 7489 e = sgio_copyin(arg, &sg_io, mode); 7490 if (e != DDI_SUCCESS) { 7491 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7492 "rsm_iovec_ioctl done: sgio_copyin %d\n", e)); 7493 return (e); 7494 } 7495 7496 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) { 7497 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7498 "rsm_iovec_ioctl done: request_count(%d) too large\n", 7499 sg_io.io_request_count)); 7500 return (RSMERR_BAD_SGIO); 7501 } 7502 7503 rsmpi_sg_io.io_request_count = sg_io.io_request_count; 7504 rsmpi_sg_io.io_residual_count = sg_io.io_request_count; 7505 rsmpi_sg_io.io_segflg = 0; 7506 7507 /* Allocate memory and copyin io vector array */ 7508 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7509 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t); 7510 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP); 7511 } else { 7512 ka_iovec_start = ka_iovec = ka_iovec_arr; 7513 } 7514 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec, 7515 sg_io.io_request_count, mode); 7516 if (e != DDI_SUCCESS) { 7517 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7518 kmem_free(ka_iovec, ka_size); 7519 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7520 "rsm_iovec_ioctl done: iovec_copyin %d\n", e)); 7521 return (e); 7522 } 7523 7524 /* get the import segment descriptor */ 7525 rnum = getminor(dev); 7526 res = rsmresource_lookup(rnum, RSM_LOCK); 7527 7528 /* 7529 * The following sequence of locking may (or MAY NOT) cause a 7530 * deadlock but this is currently not addressed here since the 7531 * implementation will be changed to incorporate the use of 7532 * reference counting for both the import and the export segments. 7533 */ 7534 7535 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */ 7536 7537 im_seg = (rsmseg_t *)res; 7538 7539 if (im_seg == NULL) { 7540 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7541 kmem_free(ka_iovec, ka_size); 7542 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7543 "rsm_iovec_ioctl done: rsmresource_lookup failed\n")); 7544 return (EINVAL); 7545 } 7546 /* putv/getv supported is supported only on import segments */ 7547 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) { 7548 rsmseglock_release(im_seg); 7549 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7550 kmem_free(ka_iovec, ka_size); 7551 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7552 "rsm_iovec_ioctl done: not an import segment\n")); 7553 return (EINVAL); 7554 } 7555 7556 /* 7557 * wait for a remote DR to complete ie. for segments to get UNQUIESCED 7558 * as well as wait for a local DR to complete. 7559 */ 7560 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) || 7561 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) || 7562 (im_seg->s_flags & RSM_DR_INPROGRESS)) { 7563 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) { 7564 DBG_PRINTF((category, RSM_DEBUG, 7565 "rsm_iovec_ioctl done: cv_wait INTR")); 7566 rsmseglock_release(im_seg); 7567 return (RSMERR_INTERRUPTED); 7568 } 7569 } 7570 7571 if ((im_seg->s_state != RSM_STATE_CONNECT) && 7572 (im_seg->s_state != RSM_STATE_ACTIVE)) { 7573 7574 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT || 7575 im_seg->s_state == RSM_STATE_NEW); 7576 7577 DBG_PRINTF((category, RSM_DEBUG, 7578 "rsm_iovec_ioctl done: im_seg not conn/map")); 7579 rsmseglock_release(im_seg); 7580 e = RSMERR_BAD_SGIO; 7581 goto out; 7582 } 7583 7584 im_seg->s_rdmacnt++; 7585 rsmseglock_release(im_seg); 7586 7587 /* 7588 * Allocate and set up the io vector for rsmpi 7589 */ 7590 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7591 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t); 7592 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP); 7593 } else { 7594 iovec_start = iovec = iovec_arr; 7595 } 7596 7597 rsmpi_sg_io.iovec = iovec; 7598 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) { 7599 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7600 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7601 7602 if (ex_seg == NULL) { 7603 e = RSMERR_BAD_SGIO; 7604 break; 7605 } 7606 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7607 7608 acl = ex_seg->s_acl; 7609 if (acl[0].ae_permission == 0) { 7610 struct buf *xbuf; 7611 dev_t sdev = 0; 7612 7613 xbuf = ddi_umem_iosetup(ex_seg->s_cookie, 7614 0, ex_seg->s_len, B_WRITE, 7615 sdev, 0, NULL, DDI_UMEM_SLEEP); 7616 7617 ASSERT(xbuf != NULL); 7618 7619 iovec->local_mem.ms_type = RSM_MEM_BUF; 7620 iovec->local_mem.ms_memory.bp = xbuf; 7621 } else { 7622 iovec->local_mem.ms_type = RSM_MEM_HANDLE; 7623 iovec->local_mem.ms_memory.handle = 7624 ex_seg->s_handle.out; 7625 } 7626 ex_seg->s_rdmacnt++; /* refcnt the handle */ 7627 rsmseglock_release(ex_seg); 7628 } else { 7629 iovec->local_mem.ms_type = RSM_MEM_VADDR; 7630 iovec->local_mem.ms_memory.vr.vaddr = 7631 ka_iovec->local.vaddr; 7632 } 7633 7634 iovec->local_offset = ka_iovec->local_offset; 7635 iovec->remote_handle = im_seg->s_handle.in; 7636 iovec->remote_offset = ka_iovec->remote_offset; 7637 iovec->transfer_length = ka_iovec->transfer_len; 7638 iovec++; 7639 ka_iovec++; 7640 } 7641 7642 if (iov_proc < sg_io.io_request_count) { 7643 /* error while processing handle */ 7644 rsmseglock_acquire(im_seg); 7645 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */ 7646 if (im_seg->s_rdmacnt == 0) { 7647 cv_broadcast(&im_seg->s_cv); 7648 } 7649 rsmseglock_release(im_seg); 7650 goto out; 7651 } 7652 7653 /* call rsmpi */ 7654 if (cmd == RSM_IOCTL_PUTV) 7655 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv( 7656 im_seg->s_adapter->rsmpi_handle, 7657 &rsmpi_sg_io); 7658 else if (cmd == RSM_IOCTL_GETV) 7659 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv( 7660 im_seg->s_adapter->rsmpi_handle, 7661 &rsmpi_sg_io); 7662 else { 7663 e = EINVAL; 7664 DBG_PRINTF((category, RSM_DEBUG, 7665 "iovec_ioctl: bad command = %x\n", cmd)); 7666 } 7667 7668 7669 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7670 "rsm_iovec_ioctl RSMPI oper done %d\n", e)); 7671 7672 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count; 7673 7674 /* 7675 * Check for implicit signal post flag and do the signal 7676 * post if needed 7677 */ 7678 if (sg_io.flags & RSM_IMPLICIT_SIGPOST && 7679 e == RSM_SUCCESS) { 7680 rsmipc_request_t request; 7681 7682 request.rsmipc_key = im_seg->s_segid; 7683 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7684 request.rsmipc_segment_cookie = NULL; 7685 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY); 7686 /* 7687 * Reset the implicit signal post flag to 0 to indicate 7688 * that the signal post has been done and need not be 7689 * done in the RSMAPI library 7690 */ 7691 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST; 7692 } 7693 7694 rsmseglock_acquire(im_seg); 7695 im_seg->s_rdmacnt--; 7696 if (im_seg->s_rdmacnt == 0) { 7697 cv_broadcast(&im_seg->s_cv); 7698 } 7699 rsmseglock_release(im_seg); 7700 error = sgio_resid_copyout(arg, &sg_io, mode); 7701 out: 7702 iovec = iovec_start; 7703 ka_iovec = ka_iovec_start; 7704 for (i = 0; i < iov_proc; i++) { 7705 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7706 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7707 7708 ASSERT(ex_seg != NULL); 7709 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7710 7711 ex_seg->s_rdmacnt--; /* unrefcnt the handle */ 7712 if (ex_seg->s_rdmacnt == 0) { 7713 cv_broadcast(&ex_seg->s_cv); 7714 } 7715 rsmseglock_release(ex_seg); 7716 } 7717 7718 ASSERT(iovec != NULL); /* true if iov_proc > 0 */ 7719 7720 /* 7721 * At present there is no dependency on the existence of xbufs 7722 * created by ddi_umem_iosetup for each of the iovecs. So we 7723 * can these xbufs here. 7724 */ 7725 if (iovec->local_mem.ms_type == RSM_MEM_BUF) { 7726 freerbuf(iovec->local_mem.ms_memory.bp); 7727 } 7728 7729 iovec++; 7730 ka_iovec++; 7731 } 7732 7733 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7734 if (iovec_start) 7735 kmem_free(iovec_start, size); 7736 kmem_free(ka_iovec_start, ka_size); 7737 } 7738 7739 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7740 "rsm_iovec_ioctl done %d\n", e)); 7741 /* if RSMPI call fails return that else return copyout's retval */ 7742 return ((e != RSM_SUCCESS) ? e : error); 7743 7744 } 7745 7746 7747 static int 7748 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode) 7749 { 7750 adapter_t *adapter; 7751 rsm_addr_t addr; 7752 rsm_node_id_t node; 7753 int rval = DDI_SUCCESS; 7754 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7755 7756 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n")); 7757 7758 adapter = rsm_getadapter(msg, mode); 7759 if (adapter == NULL) { 7760 DBG_PRINTF((category, RSM_DEBUG, 7761 "rsmaddr_ioctl done: adapter not found\n")); 7762 return (RSMERR_CTLR_NOT_PRESENT); 7763 } 7764 7765 switch (cmd) { 7766 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */ 7767 /* returns the hwaddr in msg->hwaddr */ 7768 if (msg->nodeid == my_nodeid) { 7769 msg->hwaddr = adapter->hwaddr; 7770 } else { 7771 addr = get_remote_hwaddr(adapter, msg->nodeid); 7772 if ((int64_t)addr < 0) { 7773 rval = RSMERR_INTERNAL_ERROR; 7774 } else { 7775 msg->hwaddr = addr; 7776 } 7777 } 7778 break; 7779 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */ 7780 /* returns the nodeid in msg->nodeid */ 7781 if (msg->hwaddr == adapter->hwaddr) { 7782 msg->nodeid = my_nodeid; 7783 } else { 7784 node = get_remote_nodeid(adapter, msg->hwaddr); 7785 if ((int)node < 0) { 7786 rval = RSMERR_INTERNAL_ERROR; 7787 } else { 7788 msg->nodeid = (rsm_node_id_t)node; 7789 } 7790 } 7791 break; 7792 default: 7793 rval = EINVAL; 7794 break; 7795 } 7796 7797 rsmka_release_adapter(adapter); 7798 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7799 "rsmaddr_ioctl done: %d\n", rval)); 7800 return (rval); 7801 } 7802 7803 static int 7804 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode) 7805 { 7806 DBG_DEFINE(category, 7807 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7808 7809 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n")); 7810 7811 #ifdef _MULTI_DATAMODEL 7812 7813 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7814 rsm_ioctlmsg32_t msg32; 7815 int i; 7816 7817 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) { 7818 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7819 "rsm_ddi_copyin done: EFAULT\n")); 7820 return (RSMERR_BAD_ADDR); 7821 } 7822 msg->len = msg32.len; 7823 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr; 7824 msg->arg = (caddr_t)(uintptr_t)msg32.arg; 7825 msg->key = msg32.key; 7826 msg->acl_len = msg32.acl_len; 7827 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl; 7828 msg->cnum = msg32.cnum; 7829 msg->cname = (caddr_t)(uintptr_t)msg32.cname; 7830 msg->cname_len = msg32.cname_len; 7831 msg->nodeid = msg32.nodeid; 7832 msg->hwaddr = msg32.hwaddr; 7833 msg->perm = msg32.perm; 7834 for (i = 0; i < 4; i++) { 7835 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64; 7836 } 7837 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7838 "rsm_ddi_copyin done\n")); 7839 return (RSM_SUCCESS); 7840 } 7841 #endif 7842 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n")); 7843 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode)) 7844 return (RSMERR_BAD_ADDR); 7845 else 7846 return (RSM_SUCCESS); 7847 } 7848 7849 static int 7850 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode) 7851 { 7852 rsmka_int_controller_attr_t rsm_cattr; 7853 DBG_DEFINE(category, 7854 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7855 7856 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7857 "rsmattr_ddi_copyout enter\n")); 7858 /* 7859 * need to copy appropriate data from rsm_controller_attr_t 7860 * to rsmka_int_controller_attr_t 7861 */ 7862 #ifdef _MULTI_DATAMODEL 7863 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7864 rsmka_int_controller_attr32_t rsm_cattr32; 7865 7866 rsm_cattr32.attr_direct_access_sizes = 7867 adapter->rsm_attr.attr_direct_access_sizes; 7868 rsm_cattr32.attr_atomic_sizes = 7869 adapter->rsm_attr.attr_atomic_sizes; 7870 rsm_cattr32.attr_page_size = 7871 adapter->rsm_attr.attr_page_size; 7872 if (adapter->rsm_attr.attr_max_export_segment_size > 7873 UINT_MAX) 7874 rsm_cattr32.attr_max_export_segment_size = 7875 RSM_MAXSZ_PAGE_ALIGNED; 7876 else 7877 rsm_cattr32.attr_max_export_segment_size = 7878 adapter->rsm_attr.attr_max_export_segment_size; 7879 if (adapter->rsm_attr.attr_tot_export_segment_size > 7880 UINT_MAX) 7881 rsm_cattr32.attr_tot_export_segment_size = 7882 RSM_MAXSZ_PAGE_ALIGNED; 7883 else 7884 rsm_cattr32.attr_tot_export_segment_size = 7885 adapter->rsm_attr.attr_tot_export_segment_size; 7886 if (adapter->rsm_attr.attr_max_export_segments > 7887 UINT_MAX) 7888 rsm_cattr32.attr_max_export_segments = 7889 UINT_MAX; 7890 else 7891 rsm_cattr32.attr_max_export_segments = 7892 adapter->rsm_attr.attr_max_export_segments; 7893 if (adapter->rsm_attr.attr_max_import_map_size > 7894 UINT_MAX) 7895 rsm_cattr32.attr_max_import_map_size = 7896 RSM_MAXSZ_PAGE_ALIGNED; 7897 else 7898 rsm_cattr32.attr_max_import_map_size = 7899 adapter->rsm_attr.attr_max_import_map_size; 7900 if (adapter->rsm_attr.attr_tot_import_map_size > 7901 UINT_MAX) 7902 rsm_cattr32.attr_tot_import_map_size = 7903 RSM_MAXSZ_PAGE_ALIGNED; 7904 else 7905 rsm_cattr32.attr_tot_import_map_size = 7906 adapter->rsm_attr.attr_tot_import_map_size; 7907 if (adapter->rsm_attr.attr_max_import_segments > 7908 UINT_MAX) 7909 rsm_cattr32.attr_max_import_segments = 7910 UINT_MAX; 7911 else 7912 rsm_cattr32.attr_max_import_segments = 7913 adapter->rsm_attr.attr_max_import_segments; 7914 rsm_cattr32.attr_controller_addr = 7915 adapter->rsm_attr.attr_controller_addr; 7916 7917 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7918 "rsmattr_ddi_copyout done\n")); 7919 if (ddi_copyout((caddr_t)&rsm_cattr32, arg, 7920 sizeof (rsmka_int_controller_attr32_t), mode)) { 7921 return (RSMERR_BAD_ADDR); 7922 } 7923 else 7924 return (RSM_SUCCESS); 7925 } 7926 #endif 7927 rsm_cattr.attr_direct_access_sizes = 7928 adapter->rsm_attr.attr_direct_access_sizes; 7929 rsm_cattr.attr_atomic_sizes = 7930 adapter->rsm_attr.attr_atomic_sizes; 7931 rsm_cattr.attr_page_size = 7932 adapter->rsm_attr.attr_page_size; 7933 rsm_cattr.attr_max_export_segment_size = 7934 adapter->rsm_attr.attr_max_export_segment_size; 7935 rsm_cattr.attr_tot_export_segment_size = 7936 adapter->rsm_attr.attr_tot_export_segment_size; 7937 rsm_cattr.attr_max_export_segments = 7938 adapter->rsm_attr.attr_max_export_segments; 7939 rsm_cattr.attr_max_import_map_size = 7940 adapter->rsm_attr.attr_max_import_map_size; 7941 rsm_cattr.attr_tot_import_map_size = 7942 adapter->rsm_attr.attr_tot_import_map_size; 7943 rsm_cattr.attr_max_import_segments = 7944 adapter->rsm_attr.attr_max_import_segments; 7945 rsm_cattr.attr_controller_addr = 7946 adapter->rsm_attr.attr_controller_addr; 7947 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7948 "rsmattr_ddi_copyout done\n")); 7949 if (ddi_copyout((caddr_t)&rsm_cattr, arg, 7950 sizeof (rsmka_int_controller_attr_t), mode)) { 7951 return (RSMERR_BAD_ADDR); 7952 } 7953 else 7954 return (RSM_SUCCESS); 7955 } 7956 7957 /*ARGSUSED*/ 7958 static int 7959 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 7960 int *rvalp) 7961 { 7962 rsmseg_t *seg; 7963 rsmresource_t *res; 7964 minor_t rnum; 7965 rsm_ioctlmsg_t msg = {0}; 7966 int error; 7967 adapter_t *adapter; 7968 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7969 7970 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n")); 7971 7972 if (cmd == RSM_IOCTL_CONSUMEEVENT) { 7973 error = rsm_consumeevent_ioctl((caddr_t)arg, mode); 7974 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7975 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error)); 7976 return (error); 7977 } 7978 7979 /* topology cmd does not use the arg common to other cmds */ 7980 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) { 7981 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode); 7982 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7983 "rsm_ioctl done: %d\n", error)); 7984 return (error); 7985 } 7986 7987 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) { 7988 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp); 7989 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7990 "rsm_ioctl done: %d\n", error)); 7991 return (error); 7992 } 7993 7994 /* 7995 * try to load arguments 7996 */ 7997 if (cmd != RSM_IOCTL_RING_BELL && 7998 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) { 7999 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8000 "rsm_ioctl done: EFAULT\n")); 8001 return (RSMERR_BAD_ADDR); 8002 } 8003 8004 if (cmd == RSM_IOCTL_ATTR) { 8005 adapter = rsm_getadapter(&msg, mode); 8006 if (adapter == NULL) { 8007 DBG_PRINTF((category, RSM_DEBUG, 8008 "rsm_ioctl done: ENODEV\n")); 8009 return (RSMERR_CTLR_NOT_PRESENT); 8010 } 8011 error = rsmattr_ddi_copyout(adapter, msg.arg, mode); 8012 rsmka_release_adapter(adapter); 8013 DBG_PRINTF((category, RSM_DEBUG, 8014 "rsm_ioctl:after copyout %d\n", error)); 8015 return (error); 8016 } 8017 8018 if (cmd == RSM_IOCTL_BAR_INFO) { 8019 /* Return library off,len of barrier page */ 8020 msg.off = barrier_offset; 8021 msg.len = (int)barrier_size; 8022 #ifdef _MULTI_DATAMODEL 8023 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8024 rsm_ioctlmsg32_t msg32; 8025 8026 if (msg.len > UINT_MAX) 8027 msg.len = RSM_MAXSZ_PAGE_ALIGNED; 8028 else 8029 msg32.len = (int32_t)msg.len; 8030 msg32.off = (int32_t)msg.off; 8031 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8032 "rsm_ioctl done\n")); 8033 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8034 sizeof (msg32), mode)) 8035 return (RSMERR_BAD_ADDR); 8036 else 8037 return (RSM_SUCCESS); 8038 } 8039 #endif 8040 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8041 "rsm_ioctl done\n")); 8042 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8043 sizeof (msg), mode)) 8044 return (RSMERR_BAD_ADDR); 8045 else 8046 return (RSM_SUCCESS); 8047 } 8048 8049 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) { 8050 /* map the nodeid or hwaddr */ 8051 error = rsmaddr_ioctl(cmd, &msg, mode); 8052 if (error == RSM_SUCCESS) { 8053 #ifdef _MULTI_DATAMODEL 8054 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8055 rsm_ioctlmsg32_t msg32; 8056 8057 msg32.hwaddr = (uint64_t)msg.hwaddr; 8058 msg32.nodeid = (uint32_t)msg.nodeid; 8059 8060 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8061 "rsm_ioctl done\n")); 8062 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8063 sizeof (msg32), mode)) 8064 return (RSMERR_BAD_ADDR); 8065 else 8066 return (RSM_SUCCESS); 8067 } 8068 #endif 8069 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8070 "rsm_ioctl done\n")); 8071 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8072 sizeof (msg), mode)) 8073 return (RSMERR_BAD_ADDR); 8074 else 8075 return (RSM_SUCCESS); 8076 } 8077 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8078 "rsm_ioctl done: %d\n", error)); 8079 return (error); 8080 } 8081 8082 /* Find resource and look it in read mode */ 8083 rnum = getminor(dev); 8084 res = rsmresource_lookup(rnum, RSM_NOLOCK); 8085 ASSERT(res != NULL); 8086 8087 /* 8088 * Find command group 8089 */ 8090 switch (RSM_IOCTL_CMDGRP(cmd)) { 8091 case RSM_IOCTL_EXPORT_SEG: 8092 /* 8093 * Export list is searched during publish, loopback and 8094 * remote lookup call. 8095 */ 8096 seg = rsmresource_seg(res, rnum, credp, 8097 RSM_RESOURCE_EXPORT_SEGMENT); 8098 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) { 8099 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode, 8100 credp); 8101 } else { /* export ioctl on an import/barrier resource */ 8102 error = RSMERR_BAD_SEG_HNDL; 8103 } 8104 break; 8105 case RSM_IOCTL_IMPORT_SEG: 8106 /* Import list is searched during remote unmap call. */ 8107 seg = rsmresource_seg(res, rnum, credp, 8108 RSM_RESOURCE_IMPORT_SEGMENT); 8109 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8110 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode, 8111 credp); 8112 } else { /* import ioctl on an export/barrier resource */ 8113 error = RSMERR_BAD_SEG_HNDL; 8114 } 8115 break; 8116 case RSM_IOCTL_BAR: 8117 if (res != RSMRC_RESERVED && 8118 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8119 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg, 8120 mode); 8121 } else { /* invalid res value */ 8122 error = RSMERR_BAD_SEG_HNDL; 8123 } 8124 break; 8125 case RSM_IOCTL_BELL: 8126 if (res != RSMRC_RESERVED) { 8127 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) 8128 error = exportbell_ioctl((rsmseg_t *)res, cmd); 8129 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT) 8130 error = importbell_ioctl((rsmseg_t *)res, cmd); 8131 else /* RSM_RESOURCE_BAR */ 8132 error = RSMERR_BAD_SEG_HNDL; 8133 } else { /* invalid res value */ 8134 error = RSMERR_BAD_SEG_HNDL; 8135 } 8136 break; 8137 default: 8138 error = EINVAL; 8139 } 8140 8141 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n", 8142 error)); 8143 return (error); 8144 } 8145 8146 8147 /* **************************** Segment Mapping Operations ********* */ 8148 static rsm_mapinfo_t * 8149 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset, 8150 size_t *map_len) 8151 { 8152 rsm_mapinfo_t *p; 8153 /* 8154 * Find the correct mapinfo structure to use during the mapping 8155 * from the seg->s_mapinfo list. 8156 * The seg->s_mapinfo list contains in reverse order the mappings 8157 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to 8158 * access the correct entry within this list for the mapping 8159 * requested. 8160 * 8161 * The algorithm for selecting a list entry is as follows: 8162 * 8163 * When start_offset of an entry <= off we have found the entry 8164 * we were looking for. Adjust the dev_offset and map_len (needs 8165 * to be PAGESIZE aligned). 8166 */ 8167 p = seg->s_mapinfo; 8168 for (; p; p = p->next) { 8169 if (p->start_offset <= off) { 8170 *dev_offset = p->dev_offset + off - p->start_offset; 8171 *map_len = (len > p->individual_len) ? 8172 p->individual_len : ptob(btopr(len)); 8173 return (p); 8174 } 8175 p = p->next; 8176 } 8177 8178 return (NULL); 8179 } 8180 8181 static void 8182 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo) 8183 { 8184 rsm_mapinfo_t *p; 8185 8186 while (mapinfo != NULL) { 8187 p = mapinfo; 8188 mapinfo = mapinfo->next; 8189 kmem_free(p, sizeof (*p)); 8190 } 8191 } 8192 8193 static int 8194 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off, 8195 size_t len, void **pvtp) 8196 { 8197 rsmcookie_t *p; 8198 rsmresource_t *res; 8199 rsmseg_t *seg; 8200 minor_t rnum; 8201 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8202 8203 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n")); 8204 8205 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8206 "rsmmap_map: dhp = %x\n", dhp)); 8207 8208 flags = flags; 8209 8210 rnum = getminor(dev); 8211 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8212 ASSERT(res != NULL); 8213 8214 seg = (rsmseg_t *)res; 8215 8216 rsmseglock_acquire(seg); 8217 8218 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8219 8220 /* 8221 * Allocate structure and add cookie to segment list 8222 */ 8223 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8224 8225 p->c_dhp = dhp; 8226 p->c_off = off; 8227 p->c_len = len; 8228 p->c_next = seg->s_ckl; 8229 seg->s_ckl = p; 8230 8231 *pvtp = (void *)seg; 8232 8233 rsmseglock_release(seg); 8234 8235 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n")); 8236 return (DDI_SUCCESS); 8237 } 8238 8239 /* 8240 * Page fault handling is done here. The prerequisite mapping setup 8241 * has been done in rsm_devmap with calls to ddi_devmem_setup or 8242 * ddi_umem_setup 8243 */ 8244 static int 8245 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len, 8246 uint_t type, uint_t rw) 8247 { 8248 int e; 8249 rsmseg_t *seg = (rsmseg_t *)pvt; 8250 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8251 8252 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n")); 8253 8254 rsmseglock_acquire(seg); 8255 8256 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8257 8258 while (seg->s_state == RSM_STATE_MAP_QUIESCE) { 8259 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8260 DBG_PRINTF((category, RSM_DEBUG, 8261 "rsmmap_access done: cv_wait INTR")); 8262 rsmseglock_release(seg); 8263 return (RSMERR_INTERRUPTED); 8264 } 8265 } 8266 8267 ASSERT(seg->s_state == RSM_STATE_DISCONNECT || 8268 seg->s_state == RSM_STATE_ACTIVE); 8269 8270 if (seg->s_state == RSM_STATE_DISCONNECT) 8271 seg->s_flags |= RSM_IMPORT_DUMMY; 8272 8273 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8274 "rsmmap_access: dhp = %x\n", dhp)); 8275 8276 rsmseglock_release(seg); 8277 8278 if (e = devmap_load(dhp, offset, len, type, rw)) { 8279 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n")); 8280 } 8281 8282 8283 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n")); 8284 8285 return (e); 8286 } 8287 8288 static int 8289 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp, 8290 void **newpvt) 8291 { 8292 rsmseg_t *seg = (rsmseg_t *)oldpvt; 8293 rsmcookie_t *p, *old; 8294 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8295 8296 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n")); 8297 8298 /* 8299 * Same as map, create an entry to hold cookie and add it to 8300 * connect segment list. The oldpvt is a pointer to segment. 8301 * Return segment pointer in newpvt. 8302 */ 8303 rsmseglock_acquire(seg); 8304 8305 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8306 8307 /* 8308 * Find old cookie 8309 */ 8310 for (old = seg->s_ckl; old != NULL; old = old->c_next) { 8311 if (old->c_dhp == dhp) { 8312 break; 8313 } 8314 } 8315 if (old == NULL) { 8316 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8317 "rsmmap_dup done: EINVAL\n")); 8318 rsmseglock_release(seg); 8319 return (EINVAL); 8320 } 8321 8322 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8323 8324 p->c_dhp = new_dhp; 8325 p->c_off = old->c_off; 8326 p->c_len = old->c_len; 8327 p->c_next = seg->s_ckl; 8328 seg->s_ckl = p; 8329 8330 *newpvt = (void *)seg; 8331 8332 rsmseglock_release(seg); 8333 8334 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n")); 8335 8336 return (DDI_SUCCESS); 8337 } 8338 8339 static void 8340 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len, 8341 devmap_cookie_t new_dhp1, void **pvtp1, 8342 devmap_cookie_t new_dhp2, void **pvtp2) 8343 { 8344 /* 8345 * Remove pvtp structure from segment list. 8346 */ 8347 rsmseg_t *seg = (rsmseg_t *)pvtp; 8348 int freeflag; 8349 8350 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8351 8352 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n")); 8353 8354 off = off; len = len; 8355 pvtp1 = pvtp1; pvtp2 = pvtp2; 8356 8357 rsmseglock_acquire(seg); 8358 8359 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8360 8361 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8362 "rsmmap_unmap: dhp = %x\n", dhp)); 8363 /* 8364 * We can go ahead and remove the dhps even if we are in 8365 * the MAPPING state because the dhps being removed here 8366 * belong to a different mmap and we are holding the segment 8367 * lock. 8368 */ 8369 if (new_dhp1 == NULL && new_dhp2 == NULL) { 8370 /* find and remove dhp handle */ 8371 rsmcookie_t *tmp, **back = &seg->s_ckl; 8372 8373 while (*back != NULL) { 8374 tmp = *back; 8375 if (tmp->c_dhp == dhp) { 8376 *back = tmp->c_next; 8377 kmem_free(tmp, sizeof (*tmp)); 8378 break; 8379 } 8380 back = &tmp->c_next; 8381 } 8382 } else { 8383 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8384 "rsmmap_unmap:parital unmap" 8385 "new_dhp1 %lx, new_dhp2 %lx\n", 8386 (size_t)new_dhp1, (size_t)new_dhp2)); 8387 } 8388 8389 /* 8390 * rsmmap_unmap is called for each mapping cookie on the list. 8391 * When the list becomes empty and we are not in the MAPPING 8392 * state then unmap in the rsmpi driver. 8393 */ 8394 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING)) 8395 (void) rsm_unmap(seg); 8396 8397 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) { 8398 freeflag = 1; 8399 } else { 8400 freeflag = 0; 8401 } 8402 8403 rsmseglock_release(seg); 8404 8405 if (freeflag) { 8406 /* Free the segment structure */ 8407 rsmseg_free(seg); 8408 } 8409 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n")); 8410 8411 } 8412 8413 static struct devmap_callback_ctl rsmmap_ops = { 8414 DEVMAP_OPS_REV, /* devmap_ops version number */ 8415 rsmmap_map, /* devmap_ops map routine */ 8416 rsmmap_access, /* devmap_ops access routine */ 8417 rsmmap_dup, /* devmap_ops dup routine */ 8418 rsmmap_unmap, /* devmap_ops unmap routine */ 8419 }; 8420 8421 static int 8422 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len, 8423 size_t *maplen, uint_t model /*ARGSUSED*/) 8424 { 8425 struct devmap_callback_ctl *callbackops = &rsmmap_ops; 8426 int err; 8427 uint_t maxprot; 8428 minor_t rnum; 8429 rsmseg_t *seg; 8430 off_t dev_offset; 8431 size_t cur_len; 8432 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8433 8434 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n")); 8435 8436 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8437 "rsm_devmap: off = %lx, len = %lx\n", off, len)); 8438 rnum = getminor(dev); 8439 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8440 ASSERT(seg != NULL); 8441 8442 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8443 if ((off == barrier_offset) && 8444 (len == barrier_size)) { 8445 8446 ASSERT(bar_va != NULL && bar_cookie != NULL); 8447 8448 /* 8449 * The offset argument in devmap_umem_setup represents 8450 * the offset within the kernel memory defined by the 8451 * cookie. We use this offset as barrier_offset. 8452 */ 8453 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie, 8454 barrier_offset, len, PROT_USER|PROT_READ, 8455 DEVMAP_DEFAULTS, 0); 8456 8457 if (err != 0) { 8458 DBG_PRINTF((category, RSM_ERR, 8459 "rsm_devmap done: %d\n", err)); 8460 return (RSMERR_MAP_FAILED); 8461 } 8462 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8463 "rsm_devmap done: %d\n", err)); 8464 8465 *maplen = barrier_size; 8466 8467 return (err); 8468 } else { 8469 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8470 "rsm_devmap done: %d\n", err)); 8471 return (RSMERR_MAP_FAILED); 8472 } 8473 } 8474 8475 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8476 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8477 8478 /* 8479 * Make sure we still have permission for the map operation. 8480 */ 8481 maxprot = PROT_USER; 8482 if (seg->s_mode & RSM_PERM_READ) { 8483 maxprot |= PROT_READ; 8484 } 8485 8486 if (seg->s_mode & RSM_PERM_WRITE) { 8487 maxprot |= PROT_WRITE; 8488 } 8489 8490 /* 8491 * For each devmap call, rsmmap_map is called. This maintains driver 8492 * private information for the mapping. Thus, if there are multiple 8493 * devmap calls there will be multiple rsmmap_map calls and for each 8494 * call, the mapping information will be stored. 8495 * In case of an error during the processing of the devmap call, error 8496 * will be returned. This error return causes the caller of rsm_devmap 8497 * to undo all the mappings by calling rsmmap_unmap for each one. 8498 * rsmmap_unmap will free up the private information for the requested 8499 * mapping. 8500 */ 8501 if (seg->s_node != my_nodeid) { 8502 rsm_mapinfo_t *p; 8503 8504 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len); 8505 if (p == NULL) { 8506 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8507 "rsm_devmap: incorrect mapping info\n")); 8508 return (RSMERR_MAP_FAILED); 8509 } 8510 err = devmap_devmem_setup(dhc, p->dip, 8511 callbackops, p->dev_register, 8512 dev_offset, cur_len, maxprot, 8513 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0); 8514 8515 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8516 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx," 8517 "off=%lx,len=%lx\n", 8518 p->dip, p->dev_register, dev_offset, off, cur_len)); 8519 8520 if (err != 0) { 8521 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8522 "rsm_devmap: devmap_devmem_setup failed %d\n", 8523 err)); 8524 return (RSMERR_MAP_FAILED); 8525 } 8526 /* cur_len is always an integral multiple pagesize */ 8527 ASSERT((cur_len & (PAGESIZE-1)) == 0); 8528 *maplen = cur_len; 8529 return (err); 8530 8531 } else { 8532 err = devmap_umem_setup(dhc, rsm_dip, callbackops, 8533 seg->s_cookie, off, len, maxprot, 8534 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0); 8535 if (err != 0) { 8536 DBG_PRINTF((category, RSM_DEBUG, 8537 "rsm_devmap: devmap_umem_setup failed %d\n", 8538 err)); 8539 return (RSMERR_MAP_FAILED); 8540 } 8541 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8542 "rsm_devmap: loopback done\n")); 8543 8544 *maplen = ptob(btopr(len)); 8545 8546 return (err); 8547 } 8548 } 8549 8550 /* 8551 * We can use the devmap framework for mapping device memory to user space by 8552 * specifying this routine in the rsm_cb_ops structure. The kernel mmap 8553 * processing calls this entry point and devmap_setup is called within this 8554 * function, which eventually calls rsm_devmap 8555 */ 8556 static int 8557 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 8558 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 8559 { 8560 int error = 0; 8561 int old_state; 8562 minor_t rnum; 8563 rsmseg_t *seg, *eseg; 8564 adapter_t *adapter; 8565 rsm_import_share_t *sharedp; 8566 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8567 8568 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n")); 8569 8570 /* 8571 * find segment 8572 */ 8573 rnum = getminor(dev); 8574 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 8575 8576 if (seg == NULL) { 8577 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8578 "rsm_segmap done: invalid segment\n")); 8579 return (EINVAL); 8580 } 8581 8582 /* 8583 * the user is trying to map a resource that has not been 8584 * defined yet. The library uses this to map in the 8585 * barrier page. 8586 */ 8587 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8588 rsmseglock_release(seg); 8589 8590 /* 8591 * The mapping for the barrier page is identified 8592 * by the special offset barrier_offset 8593 */ 8594 8595 if (off == (off_t)barrier_offset || 8596 len == (off_t)barrier_size) { 8597 if (bar_cookie == NULL || bar_va == NULL) { 8598 DBG_PRINTF((category, RSM_DEBUG, 8599 "rsm_segmap: bar cookie/va is NULL\n")); 8600 return (EINVAL); 8601 } 8602 8603 error = devmap_setup(dev, (offset_t)off, as, addrp, 8604 (size_t)len, prot, maxprot, flags, cred); 8605 8606 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8607 "rsm_segmap done: %d\n", error)); 8608 return (error); 8609 } else { 8610 DBG_PRINTF((category, RSM_DEBUG, 8611 "rsm_segmap: bad offset/length\n")); 8612 return (EINVAL); 8613 } 8614 } 8615 8616 /* Make sure you can only map imported segments */ 8617 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) { 8618 rsmseglock_release(seg); 8619 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8620 "rsm_segmap done: not an import segment\n")); 8621 return (EINVAL); 8622 } 8623 /* check means library is broken */ 8624 ASSERT(seg->s_hdr.rsmrc_num == rnum); 8625 8626 /* wait for the segment to become unquiesced */ 8627 while (seg->s_state == RSM_STATE_CONN_QUIESCE) { 8628 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8629 rsmseglock_release(seg); 8630 DBG_PRINTF((category, RSM_DEBUG, 8631 "rsm_segmap done: cv_wait INTR")); 8632 return (ENODEV); 8633 } 8634 } 8635 8636 /* wait until segment leaves the mapping state */ 8637 while (seg->s_state == RSM_STATE_MAPPING) 8638 cv_wait(&seg->s_cv, &seg->s_lock); 8639 8640 /* 8641 * we allow multiple maps of the same segment in the KA 8642 * and it works because we do an rsmpi map of the whole 8643 * segment during the first map and all the device mapping 8644 * information needed in rsm_devmap is in the mapinfo list. 8645 */ 8646 if ((seg->s_state != RSM_STATE_CONNECT) && 8647 (seg->s_state != RSM_STATE_ACTIVE)) { 8648 rsmseglock_release(seg); 8649 DBG_PRINTF((category, RSM_DEBUG, 8650 "rsm_segmap done: segment not connected\n")); 8651 return (ENODEV); 8652 } 8653 8654 /* 8655 * Make sure we are not mapping a larger segment than what's 8656 * exported 8657 */ 8658 if ((size_t)off + ptob(btopr(len)) > seg->s_len) { 8659 rsmseglock_release(seg); 8660 DBG_PRINTF((category, RSM_DEBUG, 8661 "rsm_segmap done: off+len>seg size\n")); 8662 return (ENXIO); 8663 } 8664 8665 /* 8666 * Make sure we still have permission for the map operation. 8667 */ 8668 maxprot = PROT_USER; 8669 if (seg->s_mode & RSM_PERM_READ) { 8670 maxprot |= PROT_READ; 8671 } 8672 8673 if (seg->s_mode & RSM_PERM_WRITE) { 8674 maxprot |= PROT_WRITE; 8675 } 8676 8677 if ((prot & maxprot) != prot) { 8678 /* No permission */ 8679 rsmseglock_release(seg); 8680 DBG_PRINTF((category, RSM_DEBUG, 8681 "rsm_segmap done: no permission\n")); 8682 return (EACCES); 8683 } 8684 8685 old_state = seg->s_state; 8686 8687 ASSERT(seg->s_share != NULL); 8688 8689 rsmsharelock_acquire(seg); 8690 8691 sharedp = seg->s_share; 8692 8693 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8694 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 8695 8696 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) && 8697 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) { 8698 rsmsharelock_release(seg); 8699 rsmseglock_release(seg); 8700 DBG_PRINTF((category, RSM_DEBUG, 8701 "rsm_segmap done:RSMSI_STATE %d invalid\n", 8702 sharedp->rsmsi_state)); 8703 return (ENODEV); 8704 } 8705 8706 /* 8707 * Do the map - since we want importers to share mappings 8708 * we do the rsmpi map for the whole segment 8709 */ 8710 if (seg->s_node != my_nodeid) { 8711 uint_t dev_register; 8712 off_t dev_offset; 8713 dev_info_t *dip; 8714 size_t tmp_len; 8715 size_t total_length_mapped = 0; 8716 size_t length_to_map = seg->s_len; 8717 off_t tmp_off = 0; 8718 rsm_mapinfo_t *p; 8719 8720 /* 8721 * length_to_map = seg->s_len is always an integral 8722 * multiple of PAGESIZE. Length mapped in each entry in mapinfo 8723 * list is a multiple of PAGESIZE - RSMPI map ensures this 8724 */ 8725 8726 adapter = seg->s_adapter; 8727 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8728 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8729 8730 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) { 8731 error = 0; 8732 /* map the whole segment */ 8733 while (total_length_mapped < seg->s_len) { 8734 tmp_len = 0; 8735 8736 error = adapter->rsmpi_ops->rsm_map( 8737 seg->s_handle.in, tmp_off, 8738 length_to_map, &tmp_len, 8739 &dip, &dev_register, &dev_offset, 8740 NULL, NULL); 8741 8742 if (error != 0) 8743 break; 8744 8745 /* 8746 * Store the mapping info obtained from rsm_map 8747 */ 8748 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8749 p->dev_register = dev_register; 8750 p->dev_offset = dev_offset; 8751 p->dip = dip; 8752 p->individual_len = tmp_len; 8753 p->start_offset = tmp_off; 8754 p->next = sharedp->rsmsi_mapinfo; 8755 sharedp->rsmsi_mapinfo = p; 8756 8757 total_length_mapped += tmp_len; 8758 length_to_map -= tmp_len; 8759 tmp_off += tmp_len; 8760 } 8761 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8762 8763 if (error != RSM_SUCCESS) { 8764 /* Check if this is the the first rsm_map */ 8765 if (sharedp->rsmsi_mapinfo != NULL) { 8766 /* 8767 * A single rsm_unmap undoes 8768 * multiple rsm_maps. 8769 */ 8770 (void) seg->s_adapter->rsmpi_ops-> 8771 rsm_unmap(sharedp->rsmsi_handle); 8772 rsm_free_mapinfo(sharedp-> 8773 rsmsi_mapinfo); 8774 } 8775 sharedp->rsmsi_mapinfo = NULL; 8776 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8777 rsmsharelock_release(seg); 8778 rsmseglock_release(seg); 8779 DBG_PRINTF((category, RSM_DEBUG, 8780 "rsm_segmap done: rsmpi map err %d\n", 8781 error)); 8782 ASSERT(error != RSMERR_BAD_LENGTH && 8783 error != RSMERR_BAD_MEM_ALIGNMENT && 8784 error != RSMERR_BAD_SEG_HNDL); 8785 if (error == RSMERR_UNSUPPORTED_OPERATION) 8786 return (ENOTSUP); 8787 else if (error == RSMERR_INSUFFICIENT_RESOURCES) 8788 return (EAGAIN); 8789 else if (error == RSMERR_CONN_ABORTED) 8790 return (ENODEV); 8791 else 8792 return (error); 8793 } else { 8794 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8795 } 8796 } else { 8797 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8798 } 8799 8800 sharedp->rsmsi_mapcnt++; 8801 8802 rsmsharelock_release(seg); 8803 8804 /* move to an intermediate mapping state */ 8805 seg->s_state = RSM_STATE_MAPPING; 8806 rsmseglock_release(seg); 8807 8808 error = devmap_setup(dev, (offset_t)off, as, addrp, 8809 len, prot, maxprot, flags, cred); 8810 8811 rsmseglock_acquire(seg); 8812 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8813 8814 if (error == DDI_SUCCESS) { 8815 seg->s_state = RSM_STATE_ACTIVE; 8816 } else { 8817 rsmsharelock_acquire(seg); 8818 8819 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8820 8821 sharedp->rsmsi_mapcnt--; 8822 if (sharedp->rsmsi_mapcnt == 0) { 8823 /* unmap the shared RSMPI mapping */ 8824 ASSERT(sharedp->rsmsi_handle != NULL); 8825 (void) adapter->rsmpi_ops-> 8826 rsm_unmap(sharedp->rsmsi_handle); 8827 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 8828 sharedp->rsmsi_mapinfo = NULL; 8829 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8830 } 8831 8832 rsmsharelock_release(seg); 8833 seg->s_state = old_state; 8834 DBG_PRINTF((category, RSM_ERR, 8835 "rsm: devmap_setup failed %d\n", error)); 8836 } 8837 cv_broadcast(&seg->s_cv); 8838 rsmseglock_release(seg); 8839 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n", 8840 error)); 8841 return (error); 8842 } else { 8843 /* 8844 * For loopback, the export segment mapping cookie (s_cookie) 8845 * is also used as the s_cookie value for its import segments 8846 * during mapping. 8847 * Note that reference counting for s_cookie of the export 8848 * segment is not required due to the following: 8849 * We never have a case of the export segment being destroyed, 8850 * leaving the import segments with a stale value for the 8851 * s_cookie field, since a force disconnect is done prior to a 8852 * destroy of an export segment. The force disconnect causes 8853 * the s_cookie value to be reset to NULL. Also for the 8854 * rsm_rebind operation, we change the s_cookie value of the 8855 * export segment as well as of all its local (loopback) 8856 * importers. 8857 */ 8858 DBG_ADDCATEGORY(category, RSM_LOOPBACK); 8859 8860 rsmsharelock_release(seg); 8861 /* 8862 * In order to maintain the lock ordering between the export 8863 * and import segment locks, we need to acquire the export 8864 * segment lock first and only then acquire the import 8865 * segment lock. 8866 * The above is necessary to avoid any deadlock scenarios 8867 * with rsm_rebind which also acquires both the export 8868 * and import segment locks in the above mentioned order. 8869 * Based on code inspection, there seem to be no other 8870 * situations in which both the export and import segment 8871 * locks are acquired either in the same or opposite order 8872 * as mentioned above. 8873 * Thus in order to conform to the above lock order, we 8874 * need to change the state of the import segment to 8875 * RSM_STATE_MAPPING, release the lock. Once this is done we 8876 * can now safely acquire the export segment lock first 8877 * followed by the import segment lock which is as per 8878 * the lock order mentioned above. 8879 */ 8880 /* move to an intermediate mapping state */ 8881 seg->s_state = RSM_STATE_MAPPING; 8882 rsmseglock_release(seg); 8883 8884 eseg = rsmexport_lookup(seg->s_key); 8885 8886 if (eseg == NULL) { 8887 rsmseglock_acquire(seg); 8888 /* 8889 * Revert to old_state and signal any waiters 8890 * The shared state is not changed 8891 */ 8892 8893 seg->s_state = old_state; 8894 cv_broadcast(&seg->s_cv); 8895 rsmseglock_release(seg); 8896 DBG_PRINTF((category, RSM_DEBUG, 8897 "rsm_segmap done: key %d not found\n", seg->s_key)); 8898 return (ENODEV); 8899 } 8900 8901 rsmsharelock_acquire(seg); 8902 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8903 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8904 8905 sharedp->rsmsi_mapcnt++; 8906 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8907 rsmsharelock_release(seg); 8908 8909 ASSERT(eseg->s_cookie != NULL); 8910 8911 /* 8912 * It is not required or necessary to acquire the import 8913 * segment lock here to change the value of s_cookie since 8914 * no one will touch the import segment as long as it is 8915 * in the RSM_STATE_MAPPING state. 8916 */ 8917 seg->s_cookie = eseg->s_cookie; 8918 8919 rsmseglock_release(eseg); 8920 8921 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len, 8922 prot, maxprot, flags, cred); 8923 8924 rsmseglock_acquire(seg); 8925 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8926 if (error == 0) { 8927 seg->s_state = RSM_STATE_ACTIVE; 8928 } else { 8929 rsmsharelock_acquire(seg); 8930 8931 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8932 8933 sharedp->rsmsi_mapcnt--; 8934 if (sharedp->rsmsi_mapcnt == 0) { 8935 sharedp->rsmsi_mapinfo = NULL; 8936 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8937 } 8938 rsmsharelock_release(seg); 8939 seg->s_state = old_state; 8940 seg->s_cookie = NULL; 8941 } 8942 cv_broadcast(&seg->s_cv); 8943 rsmseglock_release(seg); 8944 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8945 "rsm_segmap done: %d\n", error)); 8946 return (error); 8947 } 8948 } 8949 8950 int 8951 rsmka_null_seg_create( 8952 rsm_controller_handle_t argcp, 8953 rsm_memseg_export_handle_t *handle, 8954 size_t size, 8955 uint_t flags, 8956 rsm_memory_local_t *memory, 8957 rsm_resource_callback_t callback, 8958 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8959 { 8960 return (RSM_SUCCESS); 8961 } 8962 8963 8964 int 8965 rsmka_null_seg_destroy( 8966 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 8967 { 8968 return (RSM_SUCCESS); 8969 } 8970 8971 8972 int 8973 rsmka_null_bind( 8974 rsm_memseg_export_handle_t argmemseg, 8975 off_t offset, 8976 rsm_memory_local_t *argmemory, 8977 rsm_resource_callback_t callback, 8978 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8979 { 8980 return (RSM_SUCCESS); 8981 } 8982 8983 8984 int 8985 rsmka_null_unbind( 8986 rsm_memseg_export_handle_t argmemseg, 8987 off_t offset, 8988 size_t length /*ARGSUSED*/) 8989 { 8990 return (DDI_SUCCESS); 8991 } 8992 8993 int 8994 rsmka_null_rebind( 8995 rsm_memseg_export_handle_t argmemseg, 8996 off_t offset, 8997 rsm_memory_local_t *memory, 8998 rsm_resource_callback_t callback, 8999 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9000 { 9001 return (RSM_SUCCESS); 9002 } 9003 9004 int 9005 rsmka_null_publish( 9006 rsm_memseg_export_handle_t argmemseg, 9007 rsm_access_entry_t access_list[], 9008 uint_t access_list_length, 9009 rsm_memseg_id_t segment_id, 9010 rsm_resource_callback_t callback, 9011 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9012 { 9013 return (RSM_SUCCESS); 9014 } 9015 9016 9017 int 9018 rsmka_null_republish( 9019 rsm_memseg_export_handle_t memseg, 9020 rsm_access_entry_t access_list[], 9021 uint_t access_list_length, 9022 rsm_resource_callback_t callback, 9023 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9024 { 9025 return (RSM_SUCCESS); 9026 } 9027 9028 int 9029 rsmka_null_unpublish( 9030 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 9031 { 9032 return (RSM_SUCCESS); 9033 } 9034 9035 9036 void 9037 rsmka_init_loopback() 9038 { 9039 rsm_ops_t *ops = &null_rsmpi_ops; 9040 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK); 9041 9042 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9043 "rsmka_init_loopback enter\n")); 9044 9045 /* initialize null ops vector */ 9046 ops->rsm_seg_create = rsmka_null_seg_create; 9047 ops->rsm_seg_destroy = rsmka_null_seg_destroy; 9048 ops->rsm_bind = rsmka_null_bind; 9049 ops->rsm_unbind = rsmka_null_unbind; 9050 ops->rsm_rebind = rsmka_null_rebind; 9051 ops->rsm_publish = rsmka_null_publish; 9052 ops->rsm_unpublish = rsmka_null_unpublish; 9053 ops->rsm_republish = rsmka_null_republish; 9054 9055 /* initialize attributes for loopback adapter */ 9056 loopback_attr.attr_name = loopback_str; 9057 loopback_attr.attr_page_size = 0x8; /* 8K */ 9058 9059 /* initialize loopback adapter */ 9060 loopback_adapter.rsm_attr = loopback_attr; 9061 loopback_adapter.rsmpi_ops = &null_rsmpi_ops; 9062 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9063 "rsmka_init_loopback done\n")); 9064 } 9065 9066 /* ************** DR functions ********************************** */ 9067 static void 9068 rsm_quiesce_exp_seg(rsmresource_t *resp) 9069 { 9070 int recheck_state; 9071 rsmseg_t *segp = (rsmseg_t *)resp; 9072 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9073 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9074 9075 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9076 "%s enter: key=%u\n", function, segp->s_key)); 9077 9078 rsmseglock_acquire(segp); 9079 do { 9080 recheck_state = 0; 9081 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) || 9082 (segp->s_state == RSM_STATE_BIND_QUIESCED) || 9083 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) || 9084 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) { 9085 rsmseglock_release(segp); 9086 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9087 "%s done:state =%d\n", function, 9088 segp->s_state)); 9089 return; 9090 } 9091 9092 if (segp->s_state == RSM_STATE_NEW) { 9093 segp->s_state = RSM_STATE_NEW_QUIESCED; 9094 rsmseglock_release(segp); 9095 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9096 "%s done:state =%d\n", function, 9097 segp->s_state)); 9098 return; 9099 } 9100 9101 if (segp->s_state == RSM_STATE_BIND) { 9102 /* unbind */ 9103 (void) rsm_unbind_pages(segp); 9104 segp->s_state = RSM_STATE_BIND_QUIESCED; 9105 rsmseglock_release(segp); 9106 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9107 "%s done:state =%d\n", function, 9108 segp->s_state)); 9109 return; 9110 } 9111 9112 if (segp->s_state == RSM_STATE_EXPORT) { 9113 /* 9114 * wait for putv/getv to complete if the segp is 9115 * a local memory handle 9116 */ 9117 while ((segp->s_state == RSM_STATE_EXPORT) && 9118 (segp->s_rdmacnt != 0)) { 9119 cv_wait(&segp->s_cv, &segp->s_lock); 9120 } 9121 9122 if (segp->s_state != RSM_STATE_EXPORT) { 9123 /* 9124 * state changed need to see what it 9125 * should be changed to. 9126 */ 9127 recheck_state = 1; 9128 continue; 9129 } 9130 9131 segp->s_state = RSM_STATE_EXPORT_QUIESCING; 9132 rsmseglock_release(segp); 9133 /* 9134 * send SUSPEND messages - currently it will be 9135 * done at the end 9136 */ 9137 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9138 "%s done:state =%d\n", function, 9139 segp->s_state)); 9140 return; 9141 } 9142 } while (recheck_state); 9143 9144 rsmseglock_release(segp); 9145 } 9146 9147 static void 9148 rsm_unquiesce_exp_seg(rsmresource_t *resp) 9149 { 9150 int ret; 9151 rsmseg_t *segp = (rsmseg_t *)resp; 9152 rsmapi_access_entry_t *acl; 9153 rsm_access_entry_t *rsmpi_acl; 9154 int acl_len; 9155 int create_flags = 0; 9156 struct buf *xbuf; 9157 rsm_memory_local_t mem; 9158 adapter_t *adapter; 9159 dev_t sdev = 0; 9160 rsm_resource_callback_t callback_flag; 9161 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9162 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9163 9164 rsmseglock_acquire(segp); 9165 9166 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9167 "%s enter: key=%u, state=%d\n", function, segp->s_key, 9168 segp->s_state)); 9169 9170 if ((segp->s_state == RSM_STATE_NEW) || 9171 (segp->s_state == RSM_STATE_BIND) || 9172 (segp->s_state == RSM_STATE_EXPORT)) { 9173 rsmseglock_release(segp); 9174 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9175 function, segp->s_state)); 9176 return; 9177 } 9178 9179 if (segp->s_state == RSM_STATE_NEW_QUIESCED) { 9180 segp->s_state = RSM_STATE_NEW; 9181 cv_broadcast(&segp->s_cv); 9182 rsmseglock_release(segp); 9183 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9184 function, segp->s_state)); 9185 return; 9186 } 9187 9188 if (segp->s_state == RSM_STATE_BIND_QUIESCED) { 9189 /* bind the segment */ 9190 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9191 segp->s_len, segp->s_proc); 9192 if (ret == RSM_SUCCESS) { /* bind successful */ 9193 segp->s_state = RSM_STATE_BIND; 9194 } else { /* bind failed - resource unavailable */ 9195 segp->s_state = RSM_STATE_NEW; 9196 } 9197 cv_broadcast(&segp->s_cv); 9198 rsmseglock_release(segp); 9199 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9200 "%s done: bind_qscd bind = %d\n", function, ret)); 9201 return; 9202 } 9203 9204 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) { 9205 /* wait for the segment to move to EXPORT_QUIESCED state */ 9206 cv_wait(&segp->s_cv, &segp->s_lock); 9207 } 9208 9209 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) { 9210 /* bind the segment */ 9211 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9212 segp->s_len, segp->s_proc); 9213 9214 if (ret != RSM_SUCCESS) { 9215 /* bind failed - resource unavailable */ 9216 acl_len = segp->s_acl_len; 9217 acl = segp->s_acl; 9218 rsmpi_acl = segp->s_acl_in; 9219 segp->s_acl_len = 0; 9220 segp->s_acl = NULL; 9221 segp->s_acl_in = NULL; 9222 rsmseglock_release(segp); 9223 9224 rsmexport_rm(segp); 9225 rsmacl_free(acl, acl_len); 9226 rsmpiacl_free(rsmpi_acl, acl_len); 9227 9228 rsmseglock_acquire(segp); 9229 segp->s_state = RSM_STATE_NEW; 9230 cv_broadcast(&segp->s_cv); 9231 rsmseglock_release(segp); 9232 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9233 "%s done: exp_qscd bind failed = %d\n", 9234 function, ret)); 9235 return; 9236 } 9237 /* 9238 * publish the segment 9239 * if successful 9240 * segp->s_state = RSM_STATE_EXPORT; 9241 * else failed 9242 * segp->s_state = RSM_STATE_BIND; 9243 */ 9244 9245 /* check whether it is a local_memory_handle */ 9246 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) { 9247 if ((segp->s_acl[0].ae_node == my_nodeid) && 9248 (segp->s_acl[0].ae_permission == 0)) { 9249 segp->s_state = RSM_STATE_EXPORT; 9250 cv_broadcast(&segp->s_cv); 9251 rsmseglock_release(segp); 9252 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9253 "%s done:exp_qscd\n", function)); 9254 return; 9255 } 9256 } 9257 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE, 9258 sdev, 0, NULL, DDI_UMEM_SLEEP); 9259 ASSERT(xbuf != NULL); 9260 9261 mem.ms_type = RSM_MEM_BUF; 9262 mem.ms_bp = xbuf; 9263 9264 adapter = segp->s_adapter; 9265 9266 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 9267 create_flags = RSM_ALLOW_UNBIND_REBIND; 9268 } 9269 9270 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 9271 callback_flag = RSM_RESOURCE_DONTWAIT; 9272 } else { 9273 callback_flag = RSM_RESOURCE_SLEEP; 9274 } 9275 9276 ret = adapter->rsmpi_ops->rsm_seg_create( 9277 adapter->rsmpi_handle, &segp->s_handle.out, 9278 segp->s_len, create_flags, &mem, 9279 callback_flag, NULL); 9280 9281 if (ret != RSM_SUCCESS) { 9282 acl_len = segp->s_acl_len; 9283 acl = segp->s_acl; 9284 rsmpi_acl = segp->s_acl_in; 9285 segp->s_acl_len = 0; 9286 segp->s_acl = NULL; 9287 segp->s_acl_in = NULL; 9288 rsmseglock_release(segp); 9289 9290 rsmexport_rm(segp); 9291 rsmacl_free(acl, acl_len); 9292 rsmpiacl_free(rsmpi_acl, acl_len); 9293 9294 rsmseglock_acquire(segp); 9295 segp->s_state = RSM_STATE_BIND; 9296 cv_broadcast(&segp->s_cv); 9297 rsmseglock_release(segp); 9298 DBG_PRINTF((category, RSM_ERR, 9299 "%s done: exp_qscd create failed = %d\n", 9300 function, ret)); 9301 return; 9302 } 9303 9304 ret = adapter->rsmpi_ops->rsm_publish( 9305 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len, 9306 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL); 9307 9308 if (ret != RSM_SUCCESS) { 9309 acl_len = segp->s_acl_len; 9310 acl = segp->s_acl; 9311 rsmpi_acl = segp->s_acl_in; 9312 segp->s_acl_len = 0; 9313 segp->s_acl = NULL; 9314 segp->s_acl_in = NULL; 9315 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out); 9316 rsmseglock_release(segp); 9317 9318 rsmexport_rm(segp); 9319 rsmacl_free(acl, acl_len); 9320 rsmpiacl_free(rsmpi_acl, acl_len); 9321 9322 rsmseglock_acquire(segp); 9323 segp->s_state = RSM_STATE_BIND; 9324 cv_broadcast(&segp->s_cv); 9325 rsmseglock_release(segp); 9326 DBG_PRINTF((category, RSM_ERR, 9327 "%s done: exp_qscd publish failed = %d\n", 9328 function, ret)); 9329 return; 9330 } 9331 9332 segp->s_state = RSM_STATE_EXPORT; 9333 cv_broadcast(&segp->s_cv); 9334 rsmseglock_release(segp); 9335 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n", 9336 function)); 9337 return; 9338 } 9339 9340 rsmseglock_release(segp); 9341 9342 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9343 } 9344 9345 static void 9346 rsm_quiesce_imp_seg(rsmresource_t *resp) 9347 { 9348 rsmseg_t *segp = (rsmseg_t *)resp; 9349 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9350 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg"); 9351 9352 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9353 "%s enter: key=%u\n", function, segp->s_key)); 9354 9355 rsmseglock_acquire(segp); 9356 segp->s_flags |= RSM_DR_INPROGRESS; 9357 9358 while (segp->s_rdmacnt != 0) { 9359 /* wait for the RDMA to complete */ 9360 cv_wait(&segp->s_cv, &segp->s_lock); 9361 } 9362 9363 rsmseglock_release(segp); 9364 9365 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9366 9367 } 9368 9369 static void 9370 rsm_unquiesce_imp_seg(rsmresource_t *resp) 9371 { 9372 rsmseg_t *segp = (rsmseg_t *)resp; 9373 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9374 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg"); 9375 9376 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9377 "%s enter: key=%u\n", function, segp->s_key)); 9378 9379 rsmseglock_acquire(segp); 9380 9381 segp->s_flags &= ~RSM_DR_INPROGRESS; 9382 /* wake up any waiting putv/getv ops */ 9383 cv_broadcast(&segp->s_cv); 9384 9385 rsmseglock_release(segp); 9386 9387 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9388 9389 9390 } 9391 9392 static void 9393 rsm_process_exp_seg(rsmresource_t *resp, int event) 9394 { 9395 if (event == RSM_DR_QUIESCE) 9396 rsm_quiesce_exp_seg(resp); 9397 else /* UNQUIESCE */ 9398 rsm_unquiesce_exp_seg(resp); 9399 } 9400 9401 static void 9402 rsm_process_imp_seg(rsmresource_t *resp, int event) 9403 { 9404 if (event == RSM_DR_QUIESCE) 9405 rsm_quiesce_imp_seg(resp); 9406 else /* UNQUIESCE */ 9407 rsm_unquiesce_imp_seg(resp); 9408 } 9409 9410 static void 9411 rsm_dr_process_local_segments(int event) 9412 { 9413 9414 int i, j; 9415 rsmresource_blk_t *blk; 9416 rsmresource_t *p; 9417 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9418 9419 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9420 "rsm_dr_process_local_segments enter\n")); 9421 9422 /* iterate through the resource structure */ 9423 9424 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 9425 9426 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 9427 blk = rsm_resource.rsmrc_root[i]; 9428 if (blk != NULL) { 9429 for (j = 0; j < RSMRC_BLKSZ; j++) { 9430 p = blk->rsmrcblk_blks[j]; 9431 if ((p != NULL) && (p != RSMRC_RESERVED)) { 9432 /* valid resource */ 9433 if (p->rsmrc_type == 9434 RSM_RESOURCE_EXPORT_SEGMENT) 9435 rsm_process_exp_seg(p, event); 9436 else if (p->rsmrc_type == 9437 RSM_RESOURCE_IMPORT_SEGMENT) 9438 rsm_process_imp_seg(p, event); 9439 } 9440 } 9441 } 9442 } 9443 9444 rw_exit(&rsm_resource.rsmrc_lock); 9445 9446 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9447 "rsm_dr_process_local_segments done\n")); 9448 } 9449 9450 /* *************** DR callback functions ************ */ 9451 static void 9452 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */) 9453 { 9454 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9455 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9456 "rsm_dr_callback_post_add is a no-op\n")); 9457 /* Noop */ 9458 } 9459 9460 static int 9461 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */) 9462 { 9463 int recheck_state = 0; 9464 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9465 9466 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9467 "rsm_dr_callback_pre_del enter\n")); 9468 9469 mutex_enter(&rsm_drv_data.drv_lock); 9470 9471 do { 9472 recheck_state = 0; 9473 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9474 "rsm_dr_callback_pre_del:state=%d\n", 9475 rsm_drv_data.drv_state)); 9476 9477 switch (rsm_drv_data.drv_state) { 9478 case RSM_DRV_NEW: 9479 /* 9480 * The state should usually never be RSM_DRV_NEW 9481 * since in this state the callbacks have not yet 9482 * been registered. So, ASSERT. 9483 */ 9484 ASSERT(0); 9485 return (0); 9486 case RSM_DRV_REG_PROCESSING: 9487 /* 9488 * The driver is in the process of registering 9489 * with the DR framework. So, wait till the 9490 * registration process is complete. 9491 */ 9492 recheck_state = 1; 9493 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9494 break; 9495 case RSM_DRV_UNREG_PROCESSING: 9496 /* 9497 * If the state is RSM_DRV_UNREG_PROCESSING, the 9498 * module is in the process of detaching and 9499 * unregistering the callbacks from the DR 9500 * framework. So, simply return. 9501 */ 9502 mutex_exit(&rsm_drv_data.drv_lock); 9503 DBG_PRINTF((category, RSM_DEBUG, 9504 "rsm_dr_callback_pre_del:" 9505 "pre-del on NEW/UNREG\n")); 9506 return (0); 9507 case RSM_DRV_OK: 9508 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED; 9509 break; 9510 case RSM_DRV_PREDEL_STARTED: 9511 /* FALLTHRU */ 9512 case RSM_DRV_PREDEL_COMPLETED: 9513 /* FALLTHRU */ 9514 case RSM_DRV_POSTDEL_IN_PROGRESS: 9515 recheck_state = 1; 9516 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9517 break; 9518 case RSM_DRV_DR_IN_PROGRESS: 9519 rsm_drv_data.drv_memdel_cnt++; 9520 mutex_exit(&rsm_drv_data.drv_lock); 9521 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9522 "rsm_dr_callback_pre_del done\n")); 9523 return (0); 9524 /* break; */ 9525 default: 9526 ASSERT(0); 9527 break; 9528 } 9529 9530 } while (recheck_state); 9531 9532 rsm_drv_data.drv_memdel_cnt++; 9533 9534 mutex_exit(&rsm_drv_data.drv_lock); 9535 9536 /* Do all the quiescing stuff here */ 9537 DBG_PRINTF((category, RSM_DEBUG, 9538 "rsm_dr_callback_pre_del: quiesce things now\n")); 9539 9540 rsm_dr_process_local_segments(RSM_DR_QUIESCE); 9541 9542 /* 9543 * now that all local segments have been quiesced lets inform 9544 * the importers 9545 */ 9546 rsm_send_suspend(); 9547 9548 /* 9549 * In response to the suspend message the remote node(s) will process 9550 * the segments and send a suspend_complete message. Till all 9551 * the nodes send the suspend_complete message we wait in the 9552 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce 9553 * function we transition to the RSM_DRV_PREDEL_COMPLETED state. 9554 */ 9555 mutex_enter(&rsm_drv_data.drv_lock); 9556 9557 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) { 9558 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9559 } 9560 9561 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED); 9562 9563 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS; 9564 cv_broadcast(&rsm_drv_data.drv_cv); 9565 9566 mutex_exit(&rsm_drv_data.drv_lock); 9567 9568 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9569 "rsm_dr_callback_pre_del done\n")); 9570 9571 return (0); 9572 } 9573 9574 static void 9575 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */) 9576 { 9577 int recheck_state = 0; 9578 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9579 9580 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9581 "rsm_dr_callback_post_del enter\n")); 9582 9583 mutex_enter(&rsm_drv_data.drv_lock); 9584 9585 do { 9586 recheck_state = 0; 9587 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9588 "rsm_dr_callback_post_del:state=%d\n", 9589 rsm_drv_data.drv_state)); 9590 9591 switch (rsm_drv_data.drv_state) { 9592 case RSM_DRV_NEW: 9593 /* 9594 * The driver state cannot not be RSM_DRV_NEW 9595 * since in this state the callbacks have not 9596 * yet been registered. 9597 */ 9598 ASSERT(0); 9599 return; 9600 case RSM_DRV_REG_PROCESSING: 9601 /* 9602 * The driver is in the process of registering with 9603 * the DR framework. Wait till the registration is 9604 * complete. 9605 */ 9606 recheck_state = 1; 9607 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9608 break; 9609 case RSM_DRV_UNREG_PROCESSING: 9610 /* 9611 * RSM_DRV_UNREG_PROCESSING state means the module 9612 * is detaching and unregistering the callbacks 9613 * from the DR framework. So simply return. 9614 */ 9615 /* FALLTHRU */ 9616 case RSM_DRV_OK: 9617 /* 9618 * RSM_DRV_OK means we missed the pre-del 9619 * corresponding to this post-del coz we had not 9620 * registered yet, so simply return. 9621 */ 9622 mutex_exit(&rsm_drv_data.drv_lock); 9623 DBG_PRINTF((category, RSM_DEBUG, 9624 "rsm_dr_callback_post_del:" 9625 "post-del on OK/UNREG\n")); 9626 return; 9627 /* break; */ 9628 case RSM_DRV_PREDEL_STARTED: 9629 /* FALLTHRU */ 9630 case RSM_DRV_PREDEL_COMPLETED: 9631 /* FALLTHRU */ 9632 case RSM_DRV_POSTDEL_IN_PROGRESS: 9633 recheck_state = 1; 9634 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9635 break; 9636 case RSM_DRV_DR_IN_PROGRESS: 9637 rsm_drv_data.drv_memdel_cnt--; 9638 if (rsm_drv_data.drv_memdel_cnt > 0) { 9639 mutex_exit(&rsm_drv_data.drv_lock); 9640 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9641 "rsm_dr_callback_post_del done:\n")); 9642 return; 9643 } 9644 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS; 9645 break; 9646 default: 9647 ASSERT(0); 9648 return; 9649 /* break; */ 9650 } 9651 } while (recheck_state); 9652 9653 mutex_exit(&rsm_drv_data.drv_lock); 9654 9655 /* Do all the unquiescing stuff here */ 9656 DBG_PRINTF((category, RSM_DEBUG, 9657 "rsm_dr_callback_post_del: unquiesce things now\n")); 9658 9659 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE); 9660 9661 /* 9662 * now that all local segments have been unquiesced lets inform 9663 * the importers 9664 */ 9665 rsm_send_resume(); 9666 9667 mutex_enter(&rsm_drv_data.drv_lock); 9668 9669 rsm_drv_data.drv_state = RSM_DRV_OK; 9670 9671 cv_broadcast(&rsm_drv_data.drv_cv); 9672 9673 mutex_exit(&rsm_drv_data.drv_lock); 9674 9675 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9676 "rsm_dr_callback_post_del done\n")); 9677 9678 return; 9679 9680 }