1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2012 Milan Jurik. All rights reserved.
  25  */
  26 
  27 
  28 /*
  29  * Overview of the RSM Kernel Agent:
  30  * ---------------------------------
  31  *
  32  * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
  33  * kernel agent is a pseudo device driver which makes use of the RSMPI
  34  * interface on behalf of the RSMAPI user library.
  35  *
  36  * The kernel agent functionality can be categorized into the following
  37  * components:
  38  * 1. Driver Infrastructure
  39  * 2. Export/Import Segment Management
  40  * 3. Internal resource allocation/deallocation
  41  *
  42  * The driver infrastructure includes the basic module loading entry points
  43  * like _init, _info, _fini to load, unload and report information about
  44  * the driver module. The driver infrastructure also includes the
  45  * autoconfiguration entry points namely, attach, detach and getinfo for
  46  * the device autoconfiguration.
  47  *
  48  * The kernel agent is a pseudo character device driver and exports
  49  * a cb_ops structure which defines the driver entry points for character
  50  * device access. This includes the open and close entry points. The
  51  * other entry points provided include ioctl, devmap and segmap and chpoll.
  52  * read and write entry points are not used since the device is memory
  53  * mapped. Also ddi_prop_op is used for the prop_op entry point.
  54  *
  55  * The ioctl entry point supports a number of commands, which are used by
  56  * the RSMAPI library in order to export and import segments. These
  57  * commands include commands for binding and rebinding the physical pages
  58  * allocated to the virtual address range, publishing the export segment,
  59  * unpublishing and republishing an export segment, creating an
  60  * import segment and a virtual connection from this import segment to
  61  * an export segment, performing scatter-gather data transfer, barrier
  62  * operations.
  63  *
  64  *
  65  * Export and Import segments:
  66  * ---------------------------
  67  *
  68  * In order to create an RSM export segment a process allocates a range in its
  69  * virtual address space for the segment using standard Solaris interfaces.
  70  * The process then calls RSMAPI, which in turn makes an ioctl call to the
  71  * RSM kernel agent for an allocation of physical memory pages and for
  72  * creation of the export segment by binding these pages to the virtual
  73  * address range. These pages are locked in memory so that remote accesses
  74  * are always applied to the correct page. Then the RSM segment is published,
  75  * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
  76  * is assigned to it.
  77  *
  78  * In order to import a published RSM segment, RSMAPI creates an import
  79  * segment and forms a virtual connection across the interconnect to the
  80  * export segment, via an ioctl into the kernel agent with the connect
  81  * command. The import segment setup is completed by mapping the
  82  * local device memory into the importers virtual address space. The
  83  * mapping of the import segment is handled by the segmap/devmap
  84  * infrastructure described as follows.
  85  *
  86  * Segmap and Devmap interfaces:
  87  *
  88  * The RSM kernel agent allows device memory to be directly accessed by user
  89  * threads via memory mapping. In order to do so, the RSM kernel agent
  90  * supports the devmap and segmap entry points.
  91  *
  92  * The segmap entry point(rsm_segmap) is responsible for setting up a memory
  93  * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
  94  * responsible for exporting the device memory to the user applications.
  95  * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
  96  * control is transfered to the devmap_setup call which calls rsm_devmap.
  97  *
  98  * rsm_devmap validates the user mapping to the device or kernel memory
  99  * and passes the information to the system for setting up the mapping. The
 100  * actual setting up of the mapping is done by devmap_devmem_setup(for
 101  * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
 102  * registered for device context management via the devmap_devmem_setup
 103  * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
 104  * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
 105  * is created, a mapping is freed, a mapping is accessed or an existing
 106  * mapping is duplicated respectively. These callbacks allow the RSM kernel
 107  * agent to maintain state information associated with the mappings.
 108  * The state information is mainly in the form of a cookie list for the import
 109  * segment for which mapping has been done.
 110  *
 111  * Forced disconnect of import segments:
 112  *
 113  * When an exported segment is unpublished, the exporter sends a forced
 114  * disconnect message to all its importers. The importer segments are
 115  * unloaded and disconnected. This involves unloading the original
 116  * mappings and remapping to a preallocated kernel trash page. This is
 117  * done by devmap_umem_remap. The trash/dummy page is a kernel page,
 118  * preallocated by the kernel agent during attach using ddi_umem_alloc with
 119  * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
 120  * due to unloading of the original mappings.
 121  *
 122  * Additionally every segment has a mapping generation number associated
 123  * with it. This is an entry in the barrier generation page, created
 124  * during attach time. This mapping generation number for the import
 125  * segments is incremented on a force disconnect to notify the application
 126  * of the force disconnect. On this notification, the application needs
 127  * to reconnect the segment to establish a new legitimate mapping.
 128  *
 129  *
 130  * Locks used in the kernel agent:
 131  * -------------------------------
 132  *
 133  * The kernel agent uses a variety of mutexes and condition variables for
 134  * mutual exclusion of the shared data structures and for synchronization
 135  * between the various threads. Some of the locks are described as follows.
 136  *
 137  * Each resource structure, which represents either an export/import segment
 138  * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
 139  * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
 140  * rsmseglock_acquire and rsmseglock_release macros. An additional
 141  * lock called the rsmsi_lock is used for the shared import data structure
 142  * that is relevant for resources representing import segments. There is
 143  * also a condition variable associated with the resource called s_cv. This
 144  * is used to wait for events like the segment state change etc.
 145  *
 146  * The resource structures are allocated from a pool of resource structures,
 147  * called rsm_resource. This pool is protected via a reader-writer lock,
 148  * called rsmrc_lock.
 149  *
 150  * There are two separate hash tables, one for the export segments and
 151  * one for the import segments. The export segments are inserted into the
 152  * export segment hash table only after they have been published and the
 153  * import segments are inserted in the import segments list only after they
 154  * have successfully connected to an exported segment. These tables are
 155  * protected via reader-writer locks.
 156  *
 157  * Debug Support in the kernel agent:
 158  * ----------------------------------
 159  *
 160  * Debugging support in the kernel agent is provided by the following
 161  * macros.
 162  *
 163  * DBG_PRINTF((category, level, message)) is a macro which logs a debug
 164  * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
 165  * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
 166  * on the definition of the category and level. All messages that belong to
 167  * the specified category(rsmdbg_category) and are of an equal or greater
 168  * severity than the specified level(rsmdbg_level) are logged. The message
 169  * is a string which uses the same formatting rules as the strings used in
 170  * printf.
 171  *
 172  * The category defines which component of the kernel agent has logged this
 173  * message. There are a number of categories that have been defined such as
 174  * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
 175  * DBG_ADDCATEGORY is used to add in another category to the currently
 176  * specified category value so that the component using this new category
 177  * can also effectively log debug messages. Thus, the category of a specific
 178  * message is some combination of the available categories and we can define
 179  * sub-categories if we want a finer level of granularity.
 180  *
 181  * The level defines the severity of the message. Different level values are
 182  * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
 183  * the least severe(debug level is 0).
 184  *
 185  * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
 186  * variable or a string respectively.
 187  *
 188  *
 189  * NOTES:
 190  *
 191  * Special Fork and Exec Handling:
 192  * -------------------------------
 193  *
 194  * The backing physical pages of an exported segment are always locked down.
 195  * Thus, there are two cases in which a process having exported segments
 196  * will cause a cpu to hang: (1) the process invokes exec; (2) a process
 197  * forks and invokes exit before the duped file descriptors for the export
 198  * segments are closed in the child process. The hang is caused because the
 199  * address space release algorithm in Solaris VM subsystem is based on a
 200  * non-blocking loop which does not terminate while segments are locked
 201  * down. In addition to this, Solaris VM subsystem lacks a callback
 202  * mechanism to the rsm kernel agent to allow unlocking these export
 203  * segment pages.
 204  *
 205  * In order to circumvent this problem, the kernel agent does the following.
 206  * The Solaris VM subsystem keeps memory segments in increasing order of
 207  * virtual addressses. Thus a special page(special_exit_offset) is allocated
 208  * by the kernel agent and is mmapped into the heap area of the process address
 209  * space(the mmap is done by the RSMAPI library). During the mmap processing
 210  * of this special page by the devmap infrastructure, a callback(the same
 211  * devmap context management callbacks discussed above) is registered for an
 212  * unmap.
 213  *
 214  * As discussed above, this page is processed by the Solaris address space
 215  * release code before any of the exported segments pages(which are allocated
 216  * from high memory). It is during this processing that the unmap callback gets
 217  * called and this callback is responsible for force destroying the exported
 218  * segments and thus eliminating the problem of locked pages.
 219  *
 220  * Flow-control:
 221  * ------------
 222  *
 223  * A credit based flow control algorithm is used for messages whose
 224  * processing cannot be done in the interrupt context because it might
 225  * involve invoking rsmpi calls, or might take a long time to complete
 226  * or might need to allocate resources. The algorithm operates on a per
 227  * path basis. To send a message the pathend needs to have a credit and
 228  * it consumes one for every message that is flow controlled. On the
 229  * receiving pathend the message is put on a msgbuf_queue and a task is
 230  * dispatched on the worker thread - recv_taskq where it is processed.
 231  * After processing the message, the receiving pathend dequeues the message,
 232  * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
 233  * credits to the sender pathend.
 234  *
 235  * RSM_DRTEST:
 236  * -----------
 237  *
 238  * This is used to enable the DR testing using a test driver on test
 239  * platforms which do not supported DR.
 240  *
 241  */
 242 
 243 #include <sys/types.h>
 244 #include <sys/param.h>
 245 #include <sys/user.h>
 246 #include <sys/buf.h>
 247 #include <sys/systm.h>
 248 #include <sys/cred.h>
 249 #include <sys/vm.h>
 250 #include <sys/uio.h>
 251 #include <vm/seg.h>
 252 #include <vm/page.h>
 253 #include <sys/stat.h>
 254 
 255 #include <sys/time.h>
 256 #include <sys/errno.h>
 257 
 258 #include <sys/file.h>
 259 #include <sys/uio.h>
 260 #include <sys/proc.h>
 261 #include <sys/mman.h>
 262 #include <sys/open.h>
 263 #include <sys/atomic.h>
 264 #include <sys/mem_config.h>
 265 
 266 
 267 #include <sys/ddi.h>
 268 #include <sys/devops.h>
 269 #include <sys/ddidevmap.h>
 270 #include <sys/sunddi.h>
 271 #include <sys/esunddi.h>
 272 #include <sys/ddi_impldefs.h>
 273 
 274 #include <sys/kmem.h>
 275 #include <sys/conf.h>
 276 #include <sys/devops.h>
 277 #include <sys/ddi_impldefs.h>
 278 
 279 #include <sys/modctl.h>
 280 
 281 #include <sys/policy.h>
 282 #include <sys/types.h>
 283 #include <sys/conf.h>
 284 #include <sys/param.h>
 285 
 286 #include <sys/taskq.h>
 287 
 288 #include <sys/rsm/rsm_common.h>
 289 #include <sys/rsm/rsmapi_common.h>
 290 #include <sys/rsm/rsm.h>
 291 #include <rsm_in.h>
 292 #include <sys/rsm/rsmka_path_int.h>
 293 #include <sys/rsm/rsmpi.h>
 294 
 295 #include <sys/modctl.h>
 296 #include <sys/debug.h>
 297 
 298 #include <sys/tuneable.h>
 299 
 300 #ifdef  RSM_DRTEST
 301 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
 302                 void *arg);
 303 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
 304                 void *arg);
 305 #endif
 306 
 307 extern void dbg_printf(int category, int level, char *fmt, ...);
 308 extern void rsmka_pathmanager_init();
 309 extern void rsmka_pathmanager_cleanup();
 310 extern void rele_sendq_token(sendq_token_t *);
 311 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
 312 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
 313 extern int rsmka_topology_ioctl(caddr_t, int, int);
 314 
 315 extern pri_t maxclsyspri;
 316 extern work_queue_t work_queue;
 317 extern kmutex_t ipc_info_lock;
 318 extern kmutex_t ipc_info_cvlock;
 319 extern kcondvar_t ipc_info_cv;
 320 extern kmutex_t path_hold_cvlock;
 321 extern kcondvar_t path_hold_cv;
 322 
 323 extern kmutex_t rsmka_buf_lock;
 324 
 325 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
 326 extern adapter_t *rsmka_lookup_adapter(char *, int);
 327 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
 328 extern boolean_t rsmka_do_path_active(path_t *, int);
 329 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
 330 extern void rsmka_release_adapter(adapter_t *);
 331 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
 332 extern void rsmka_dequeue_msgbuf(path_t *path);
 333 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
 334 /* lint -w2 */
 335 
 336 static int rsm_open(dev_t *, int, int, cred_t *);
 337 static int rsm_close(dev_t, int, int, cred_t *);
 338 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
 339     cred_t *credp, int *rvalp);
 340 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
 341     uint_t);
 342 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
 343     uint_t, uint_t, cred_t *);
 344 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
 345     struct pollhead **phpp);
 346 
 347 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 348 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
 349 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
 350 
 351 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
 352 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
 353 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
 354 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
 355                                 rsm_permission_t);
 356 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
 357 static void rsmacl_free(rsmapi_access_entry_t *, int);
 358 static void rsmpiacl_free(rsm_access_entry_t *, int);
 359 
 360 static int rsm_inc_pgcnt(pgcnt_t);
 361 static void rsm_dec_pgcnt(pgcnt_t);
 362 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
 363 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
 364                                         size_t *);
 365 static void exporter_quiesce();
 366 static void rsmseg_suspend(rsmseg_t *, int *);
 367 static void rsmsegshare_suspend(rsmseg_t *);
 368 static int rsmseg_resume(rsmseg_t *, void **);
 369 static int rsmsegshare_resume(rsmseg_t *);
 370 
 371 static struct cb_ops rsm_cb_ops = {
 372         rsm_open,               /* open */
 373         rsm_close,              /* close */
 374         nodev,                  /* strategy */
 375         nodev,                  /* print */
 376         nodev,                  /* dump */
 377         nodev,                  /* read */
 378         nodev,                  /* write */
 379         rsm_ioctl,              /* ioctl */
 380         rsm_devmap,             /* devmap */
 381         NULL,                   /* mmap */
 382         rsm_segmap,             /* segmap */
 383         rsm_chpoll,             /* poll */
 384         ddi_prop_op,            /* cb_prop_op */
 385         0,                      /* streamtab  */
 386         D_NEW|D_MP|D_DEVMAP,    /* Driver compatibility flag */
 387         0,
 388         0,
 389         0
 390 };
 391 
 392 static struct dev_ops rsm_ops = {
 393         DEVO_REV,               /* devo_rev, */
 394         0,                      /* refcnt  */
 395         rsm_info,               /* get_dev_info */
 396         nulldev,                /* identify */
 397         nulldev,                /* probe */
 398         rsm_attach,             /* attach */
 399         rsm_detach,             /* detach */
 400         nodev,                  /* reset */
 401         &rsm_cb_ops,                /* driver operations */
 402         (struct bus_ops *)0,    /* bus operations */
 403         0,
 404         ddi_quiesce_not_needed,         /* quiesce */
 405 };
 406 
 407 /*
 408  * Module linkage information for the kernel.
 409  */
 410 
 411 static struct modldrv modldrv = {
 412         &mod_driverops, /* Type of module.  This one is a pseudo driver */
 413         "Remote Shared Memory Driver",
 414         &rsm_ops,   /* driver ops */
 415 };
 416 
 417 static struct modlinkage modlinkage = {
 418         MODREV_1,
 419         { (void *)&modldrv, NULL }
 420 };
 421 
 422 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
 423 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
 424 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
 425 
 426 static kphysm_setup_vector_t rsm_dr_callback_vec = {
 427         KPHYSM_SETUP_VECTOR_VERSION,
 428         rsm_dr_callback_post_add,
 429         rsm_dr_callback_pre_del,
 430         rsm_dr_callback_post_del
 431 };
 432 
 433 /* This flag can be changed to 0 to help with PIT testing */
 434 int rsmka_modunloadok = 1;
 435 int no_reply_cnt = 0;
 436 
 437 uint64_t rsm_ctrlmsg_errcnt = 0;
 438 uint64_t rsm_ipcsend_errcnt = 0;
 439 
 440 #define MAX_NODES 64
 441 
 442 static struct rsm_driver_data rsm_drv_data;
 443 static struct rsmresource_table rsm_resource;
 444 
 445 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
 446 static void rsmresource_destroy(void);
 447 static int rsmresource_alloc(minor_t *);
 448 static rsmresource_t *rsmresource_free(minor_t rnum);
 449 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
 450 static int rsm_unpublish(rsmseg_t *seg, int mode);
 451 static int rsm_unbind(rsmseg_t *seg);
 452 static uint_t rsmhash(rsm_memseg_id_t key);
 453 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
 454 static void rsmhash_free(rsmhash_table_t *rhash, int size);
 455 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
 456 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
 457 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
 458                                         void *cookie);
 459 int rsm_disconnect(rsmseg_t *seg);
 460 void rsmseg_unload(rsmseg_t *);
 461 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
 462 
 463 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
 464     rsm_intr_q_op_t opcode, rsm_addr_t src,
 465     void *data, size_t size, rsm_intr_hand_arg_t arg);
 466 
 467 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
 468 
 469 rsm_node_id_t my_nodeid;
 470 
 471 /* cookie, va, offsets and length for the barrier */
 472 static rsm_gnum_t               *bar_va;
 473 static ddi_umem_cookie_t        bar_cookie;
 474 static off_t                    barrier_offset;
 475 static size_t                   barrier_size;
 476 static int                      max_segs;
 477 
 478 /* cookie for the trash memory */
 479 static ddi_umem_cookie_t        remap_cookie;
 480 
 481 static rsm_memseg_id_t  rsm_nextavail_segmentid;
 482 
 483 extern taskq_t *work_taskq;
 484 extern char *taskq_name;
 485 
 486 static dev_info_t *rsm_dip;     /* private copy of devinfo pointer */
 487 
 488 static rsmhash_table_t rsm_export_segs;         /* list of exported segs */
 489 rsmhash_table_t rsm_import_segs;                /* list of imported segs */
 490 static rsmhash_table_t rsm_event_queues;        /* list of event queues */
 491 
 492 static  rsm_ipc_t       rsm_ipc;                /* ipc info */
 493 
 494 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
 495 static list_head_t      rsm_suspend_list;
 496 
 497 /* list of descriptors for remote importers */
 498 static importers_table_t importer_list;
 499 
 500 kmutex_t rsm_suspend_cvlock;
 501 kcondvar_t rsm_suspend_cv;
 502 
 503 static kmutex_t rsm_lock;
 504 
 505 adapter_t loopback_adapter;
 506 rsm_controller_attr_t loopback_attr;
 507 
 508 int rsmipc_send_controlmsg(path_t *path, int msgtype);
 509 
 510 void rsmka_init_loopback();
 511 
 512 int rsmka_null_seg_create(
 513     rsm_controller_handle_t,
 514     rsm_memseg_export_handle_t *,
 515     size_t,
 516     uint_t,
 517     rsm_memory_local_t *,
 518     rsm_resource_callback_t,
 519     rsm_resource_callback_arg_t);
 520 
 521 int rsmka_null_seg_destroy(
 522     rsm_memseg_export_handle_t);
 523 
 524 int rsmka_null_bind(
 525     rsm_memseg_export_handle_t,
 526     off_t,
 527     rsm_memory_local_t *,
 528     rsm_resource_callback_t,
 529     rsm_resource_callback_arg_t);
 530 
 531 int rsmka_null_unbind(
 532     rsm_memseg_export_handle_t,
 533     off_t,
 534     size_t);
 535 
 536 int rsmka_null_rebind(
 537     rsm_memseg_export_handle_t,
 538     off_t,
 539     rsm_memory_local_t *,
 540     rsm_resource_callback_t,
 541     rsm_resource_callback_arg_t);
 542 
 543 int rsmka_null_publish(
 544     rsm_memseg_export_handle_t,
 545     rsm_access_entry_t [],
 546     uint_t,
 547     rsm_memseg_id_t,
 548     rsm_resource_callback_t,
 549     rsm_resource_callback_arg_t);
 550 
 551 
 552 int rsmka_null_republish(
 553     rsm_memseg_export_handle_t,
 554     rsm_access_entry_t [],
 555     uint_t,
 556     rsm_resource_callback_t,
 557     rsm_resource_callback_arg_t);
 558 
 559 int rsmka_null_unpublish(
 560     rsm_memseg_export_handle_t);
 561 
 562 rsm_ops_t null_rsmpi_ops;
 563 
 564 /*
 565  * data and locks to keep track of total amount of exported memory
 566  */
 567 static  pgcnt_t         rsm_pgcnt;
 568 static  pgcnt_t         rsm_pgcnt_max;  /* max allowed */
 569 static  kmutex_t        rsm_pgcnt_lock;
 570 
 571 static  int             rsm_enable_dr;
 572 
 573 static  char            loopback_str[] = "loopback";
 574 
 575 int             rsm_hash_size;
 576 
 577 /*
 578  * The locking model is as follows:
 579  *
 580  * Local operations:
 581  *              find resource - grab reader lock on resouce list
 582  *              insert rc     - grab writer lock
 583  *              delete rc     - grab writer lock and resource mutex
 584  *              read/write    - no lock
 585  *
 586  * Remote invocations:
 587  *              find resource - grab read lock and resource mutex
 588  *
 589  * State:
 590  *              resource state - grab resource mutex
 591  */
 592 
 593 int
 594 _init(void)
 595 {
 596         int e;
 597 
 598         e = mod_install(&modlinkage);
 599         if (e != 0) {
 600                 return (e);
 601         }
 602 
 603         mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
 604 
 605         mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
 606 
 607 
 608         rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
 609 
 610         rsm_hash_size = RSM_HASHSZ;
 611 
 612         rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
 613 
 614         rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
 615 
 616         mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
 617 
 618         mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
 619         cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
 620 
 621         mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
 622         cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
 623 
 624         mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
 625         cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
 626 
 627         rsm_ipc.count = RSMIPC_SZ;
 628         rsm_ipc.wanted = 0;
 629         rsm_ipc.sequence = 0;
 630 
 631         (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
 632 
 633         for (e = 0; e < RSMIPC_SZ; e++) {
 634                 rsmipc_slot_t *slot = &rsm_ipc.slots[e];
 635 
 636                 RSMIPC_SET(slot, RSMIPC_FREE);
 637                 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
 638                 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
 639         }
 640 
 641         /*
 642          * Initialize the suspend message list
 643          */
 644         rsm_suspend_list.list_head = NULL;
 645         mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
 646 
 647         /*
 648          * It is assumed here that configuration data is available
 649          * during system boot since _init may be called at that time.
 650          */
 651 
 652         rsmka_pathmanager_init();
 653 
 654         DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
 655             "rsm: _init done\n"));
 656 
 657         return (DDI_SUCCESS);
 658 
 659 }
 660 
 661 int
 662 _info(struct modinfo *modinfop)
 663 {
 664 
 665         return (mod_info(&modlinkage, modinfop));
 666 }
 667 
 668 int
 669 _fini(void)
 670 {
 671         int e;
 672 
 673         DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
 674             "rsm: _fini enter\n"));
 675 
 676         /*
 677          * The rsmka_modunloadok flag is simply used to help with
 678          * the PIT testing. Make this flag 0 to disallow modunload.
 679          */
 680         if (rsmka_modunloadok == 0)
 681                 return (EBUSY);
 682 
 683         /* rsm_detach will be called as a result of mod_remove */
 684         e = mod_remove(&modlinkage);
 685         if (e) {
 686                 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
 687                     "Unable to fini RSM %x\n", e));
 688                 return (e);
 689         }
 690 
 691         rsmka_pathmanager_cleanup();
 692 
 693         rw_destroy(&rsm_resource.rsmrc_lock);
 694 
 695         rw_destroy(&rsm_export_segs.rsmhash_rw);
 696         rw_destroy(&rsm_import_segs.rsmhash_rw);
 697         rw_destroy(&rsm_event_queues.rsmhash_rw);
 698 
 699         mutex_destroy(&importer_list.lock);
 700 
 701         mutex_destroy(&rsm_ipc.lock);
 702         cv_destroy(&rsm_ipc.cv);
 703 
 704         (void) mutex_destroy(&rsm_suspend_list.list_lock);
 705 
 706         (void) mutex_destroy(&rsm_pgcnt_lock);
 707 
 708         DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
 709 
 710         return (DDI_SUCCESS);
 711 
 712 }
 713 
 714 /*ARGSUSED1*/
 715 static int
 716 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 717 {
 718         minor_t rnum;
 719         int     percent;
 720         int     ret;
 721         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
 722 
 723         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
 724 
 725         switch (cmd) {
 726         case DDI_ATTACH:
 727                 break;
 728         case DDI_RESUME:
 729         default:
 730                 DBG_PRINTF((category, RSM_ERR,
 731                     "rsm:rsm_attach - cmd not supported\n"));
 732                 return (DDI_FAILURE);
 733         }
 734 
 735         if (rsm_dip != NULL) {
 736                 DBG_PRINTF((category, RSM_ERR,
 737                     "rsm:rsm_attach - supports only "
 738                     "one instance\n"));
 739                 return (DDI_FAILURE);
 740         }
 741 
 742         rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 743             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 744             "enable-dynamic-reconfiguration", 1);
 745 
 746         mutex_enter(&rsm_drv_data.drv_lock);
 747         rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
 748         mutex_exit(&rsm_drv_data.drv_lock);
 749 
 750         if (rsm_enable_dr) {
 751 #ifdef  RSM_DRTEST
 752                 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
 753                     (void *)NULL);
 754 #else
 755                 ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
 756                     (void *)NULL);
 757 #endif
 758                 if (ret != 0) {
 759                         mutex_exit(&rsm_drv_data.drv_lock);
 760                         cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
 761                             "reconfiguration setup failed\n");
 762                         return (DDI_FAILURE);
 763                 }
 764         }
 765 
 766         mutex_enter(&rsm_drv_data.drv_lock);
 767         ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
 768         rsm_drv_data.drv_state = RSM_DRV_OK;
 769         cv_broadcast(&rsm_drv_data.drv_cv);
 770         mutex_exit(&rsm_drv_data.drv_lock);
 771 
 772         /*
 773          * page_list_read_lock();
 774          * xx_setup();
 775          * page_list_read_unlock();
 776          */
 777 
 778         rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 779             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 780             "segment-hashtable-size", RSM_HASHSZ);
 781         if (rsm_hash_size == 0) {
 782                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
 783                     "rsm: segment-hashtable-size in rsm.conf "
 784                     "must be greater than 0, defaulting to 128\n"));
 785                 rsm_hash_size = RSM_HASHSZ;
 786         }
 787 
 788         DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
 789             rsm_hash_size));
 790 
 791         rsm_pgcnt = 0;
 792 
 793         percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 794             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 795             "max-exported-memory", 0);
 796         if (percent < 0) {
 797                 DBG_PRINTF((category, RSM_ERR,
 798                     "rsm:rsm_attach not enough memory available to "
 799                     "export, or max-exported-memory set incorrectly.\n"));
 800                 return (DDI_FAILURE);
 801         }
 802         /* 0 indicates no fixed upper limit. maxmem is the max  */
 803         /* available pageable physical mem                      */
 804         rsm_pgcnt_max = (percent*maxmem)/100;
 805 
 806         if (rsm_pgcnt_max > 0) {
 807                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
 808                     "rsm: Available physical memory = %lu pages, "
 809                     "Max exportable memory = %lu pages",
 810                     maxmem, rsm_pgcnt_max));
 811         }
 812 
 813         /*
 814          * Create minor number
 815          */
 816         if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
 817                 DBG_PRINTF((category, RSM_ERR,
 818                     "rsm: rsm_attach - Unable to get "
 819                     "minor number\n"));
 820                 return (DDI_FAILURE);
 821         }
 822 
 823         ASSERT(rnum == RSM_DRIVER_MINOR);
 824 
 825         if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
 826             rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 827                 DBG_PRINTF((category, RSM_ERR,
 828                     "rsm: rsm_attach - unable to allocate "
 829                     "minor #\n"));
 830                 return (DDI_FAILURE);
 831         }
 832 
 833         rsm_dip = devi;
 834         /*
 835          * Allocate the hashtables
 836          */
 837         rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
 838         rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
 839 
 840         importer_list.bucket = (importing_token_t **)
 841             kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP);
 842 
 843         /*
 844          * Allocate a resource struct
 845          */
 846         {
 847                 rsmresource_t *p;
 848 
 849                 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
 850 
 851                 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
 852 
 853                 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
 854         }
 855 
 856         /*
 857          * Based on the rsm.conf property max-segments, determine the maximum
 858          * number of segments that can be exported/imported. This is then used
 859          * to determine the size for barrier failure pages.
 860          */
 861 
 862         /* First get the max number of segments from the rsm.conf file */
 863         max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 864             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 865             "max-segments", 0);
 866         if (max_segs == 0) {
 867                 /* Use default number of segments */
 868                 max_segs = RSM_MAX_NUM_SEG;
 869         }
 870 
 871         /*
 872          * Based on the max number of segments allowed, determine the barrier
 873          * page size. add 1 to max_segs since the barrier page itself uses
 874          * a slot
 875          */
 876         barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
 877             PAGESIZE);
 878 
 879         /*
 880          * allocation of the barrier failure page
 881          */
 882         bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
 883             DDI_UMEM_SLEEP, &bar_cookie);
 884 
 885         /*
 886          * Set the barrier_offset
 887          */
 888         barrier_offset = 0;
 889 
 890         /*
 891          * Allocate a trash memory and get a cookie for it. This will be used
 892          * when remapping segments during force disconnects. Allocate the
 893          * trash memory with a large size which is page aligned.
 894          */
 895         (void) ddi_umem_alloc((size_t)TRASHSIZE,
 896             DDI_UMEM_TRASH, &remap_cookie);
 897 
 898         /* initialize user segment id allocation variable */
 899         rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
 900 
 901         /*
 902          * initialize the null_rsmpi_ops vector and the loopback adapter
 903          */
 904         rsmka_init_loopback();
 905 
 906 
 907         ddi_report_dev(devi);
 908 
 909         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
 910 
 911         return (DDI_SUCCESS);
 912 }
 913 
 914 /*
 915  * The call to mod_remove in the _fine routine will cause the system
 916  * to call rsm_detach
 917  */
 918 /*ARGSUSED*/
 919 static int
 920 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 921 {
 922         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
 923 
 924         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
 925 
 926         switch (cmd) {
 927         case DDI_DETACH:
 928                 break;
 929         default:
 930                 DBG_PRINTF((category, RSM_ERR,
 931                     "rsm:rsm_detach - cmd %x not supported\n",
 932                     cmd));
 933                 return (DDI_FAILURE);
 934         }
 935 
 936         mutex_enter(&rsm_drv_data.drv_lock);
 937         while (rsm_drv_data.drv_state != RSM_DRV_OK)
 938                 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
 939         rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
 940         mutex_exit(&rsm_drv_data.drv_lock);
 941 
 942         /*
 943          * Unregister the DR callback functions
 944          */
 945         if (rsm_enable_dr) {
 946 #ifdef  RSM_DRTEST
 947                 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
 948                     (void *)NULL);
 949 #else
 950                 kphysm_setup_func_unregister(&rsm_dr_callback_vec,
 951                     (void *)NULL);
 952 #endif
 953         }
 954 
 955         mutex_enter(&rsm_drv_data.drv_lock);
 956         ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
 957         rsm_drv_data.drv_state = RSM_DRV_NEW;
 958         mutex_exit(&rsm_drv_data.drv_lock);
 959 
 960         ASSERT(rsm_suspend_list.list_head == NULL);
 961 
 962         /*
 963          * Release all resources, seglist, controller, ...
 964          */
 965 
 966         /* remove intersend queues */
 967         /* remove registered services */
 968 
 969 
 970         ddi_remove_minor_node(dip, DRIVER_NAME);
 971         rsm_dip = NULL;
 972 
 973         /*
 974          * Free minor zero resource
 975          */
 976         {
 977                 rsmresource_t *p;
 978 
 979                 p = rsmresource_free(RSM_DRIVER_MINOR);
 980                 if (p) {
 981                         mutex_destroy(&p->rsmrc_lock);
 982                         kmem_free((void *)p, sizeof (*p));
 983                 }
 984         }
 985 
 986         /*
 987          * Free resource table
 988          */
 989 
 990         rsmresource_destroy();
 991 
 992         /*
 993          * Free the hash tables
 994          */
 995         rsmhash_free(&rsm_export_segs, rsm_hash_size);
 996         rsmhash_free(&rsm_import_segs, rsm_hash_size);
 997 
 998         kmem_free((void *)importer_list.bucket,
 999             rsm_hash_size * sizeof (importing_token_t *));
1000         importer_list.bucket = NULL;
1001 
1002 
1003         /* free barrier page */
1004         if (bar_cookie != NULL) {
1005                 ddi_umem_free(bar_cookie);
1006         }
1007         bar_va = NULL;
1008         bar_cookie = NULL;
1009 
1010         /*
1011          * Free the memory allocated for the trash
1012          */
1013         if (remap_cookie != NULL) {
1014                 ddi_umem_free(remap_cookie);
1015         }
1016         remap_cookie = NULL;
1017 
1018         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1019 
1020         return (DDI_SUCCESS);
1021 }
1022 
1023 /*ARGSUSED*/
1024 static int
1025 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1026 {
1027         register int error;
1028         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1029 
1030         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1031 
1032         switch (infocmd) {
1033         case DDI_INFO_DEVT2DEVINFO:
1034                 if (rsm_dip == NULL)
1035                         error = DDI_FAILURE;
1036                 else {
1037                         *result = (void *)rsm_dip;
1038                         error = DDI_SUCCESS;
1039                 }
1040                 break;
1041         case DDI_INFO_DEVT2INSTANCE:
1042                 *result = (void *)0;
1043                 error = DDI_SUCCESS;
1044                 break;
1045         default:
1046                 error = DDI_FAILURE;
1047         }
1048 
1049         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1050         return (error);
1051 }
1052 
1053 adapter_t *
1054 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1055 {
1056         adapter_t *adapter;
1057         char adapter_devname[MAXNAMELEN];
1058         int instance;
1059         DBG_DEFINE(category,
1060             RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1061 
1062         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1063 
1064         instance = msg->cnum;
1065 
1066         if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1067                 return (NULL);
1068         }
1069 
1070         if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1071                 return (NULL);
1072 
1073         if (strcmp(adapter_devname, "loopback") == 0)
1074                 return (&loopback_adapter);
1075 
1076         adapter = rsmka_lookup_adapter(adapter_devname, instance);
1077 
1078         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1079 
1080         return (adapter);
1081 }
1082 
1083 
1084 /*
1085  * *********************** Resource Number Management ********************
1086  * All resources are stored in a simple hash table. The table is an array
1087  * of pointers to resource blks. Each blk contains:
1088  *      base    - base number of this blk
1089  *      used    - number of used slots in this blk.
1090  *      blks    - array of pointers to resource items.
1091  * An entry in a resource blk is empty if it's NULL.
1092  *
1093  * We start with no resource array. Each time we run out of slots, we
1094  * reallocate a new larger array and copy the pointer to the new array and
1095  * a new resource blk is allocated and added to the hash table.
1096  *
1097  * The resource control block contains:
1098  *      root    - array of pointer of resource blks
1099  *      sz      - current size of array.
1100  *      len     - last valid entry in array.
1101  *
1102  * A search operation based on a resource number is as follows:
1103  *      index = rnum / RESOURCE_BLKSZ;
1104  *      ASSERT(index < resource_block.len);
1105  *      ASSERT(index < resource_block.sz);
1106  *      offset = rnum % RESOURCE_BLKSZ;
1107  *      ASSERT(offset >= resource_block.root[index]->base);
1108  *      ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1109  *      return resource_block.root[index]->blks[offset];
1110  *
1111  * A resource blk is freed with its used count reachs zero.
1112  */
1113 static int
1114 rsmresource_alloc(minor_t *rnum)
1115 {
1116 
1117         /* search for available resource slot */
1118         int i, j, empty = -1;
1119         rsmresource_blk_t *blk;
1120 
1121         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1122             "rsmresource_alloc enter\n"));
1123 
1124         rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1125 
1126         /* Try to find an empty slot */
1127         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1128                 blk = rsm_resource.rsmrc_root[i];
1129                 if (blk != NULL && blk->rsmrcblk_avail > 0) {
1130                         /* found an empty slot in this blk */
1131                         for (j = 0; j < RSMRC_BLKSZ; j++) {
1132                                 if (blk->rsmrcblk_blks[j] == NULL) {
1133                                         *rnum = (minor_t)
1134                                             (j + (i * RSMRC_BLKSZ));
1135                                         /*
1136                                          * obey gen page limits
1137                                          */
1138                                         if (*rnum >= max_segs + 1) {
1139                                                 if (empty < 0) {
1140                                                         rw_exit(&rsm_resource.
1141                                                             rsmrc_lock);
1142                                                         DBG_PRINTF((
1143                                                             RSM_KERNEL_ALL,
1144                                                             RSM_ERR,
1145                                                             "rsmresource"
1146                                                             "_alloc failed:"
1147                                                             "not enough res"
1148                                                             "%d\n", *rnum));
1149                                         return (RSMERR_INSUFFICIENT_RESOURCES);
1150                                                 } else {
1151                                                         /* use empty slot */
1152                                                         break;
1153                                                 }
1154 
1155                                         }
1156 
1157                                         blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1158                                         blk->rsmrcblk_avail--;
1159                                         rw_exit(&rsm_resource.rsmrc_lock);
1160                                         DBG_PRINTF((RSM_KERNEL_ALL,
1161                                             RSM_DEBUG_VERBOSE,
1162                                             "rsmresource_alloc done\n"));
1163                                         return (RSM_SUCCESS);
1164                                 }
1165                         }
1166                 } else if (blk == NULL && empty < 0) {
1167                         /* remember first empty slot */
1168                         empty = i;
1169                 }
1170         }
1171 
1172         /* Couldn't find anything, allocate a new blk */
1173         /*
1174          * Do we need to reallocate the root array
1175          */
1176         if (empty < 0) {
1177                 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1178                         /*
1179                          * Allocate new array and copy current stuff into it
1180                          */
1181                         rsmresource_blk_t       **p;
1182                         uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1183                             RSMRC_BLKSZ;
1184                         /*
1185                          * Don't allocate more that max valid rnum
1186                          */
1187                         if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1188                             max_segs + 1) {
1189                                 rw_exit(&rsm_resource.rsmrc_lock);
1190                                 return (RSMERR_INSUFFICIENT_RESOURCES);
1191                         }
1192 
1193                         p = (rsmresource_blk_t **)kmem_zalloc(
1194                             newsz * sizeof (*p),
1195                             KM_SLEEP);
1196 
1197                         if (rsm_resource.rsmrc_root) {
1198                                 uint_t oldsz;
1199 
1200                                 oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1201                                     (int)sizeof (*p));
1202 
1203                                 /*
1204                                  * Copy old data into new space and
1205                                  * free old stuff
1206                                  */
1207                                 bcopy(rsm_resource.rsmrc_root, p, oldsz);
1208                                 kmem_free(rsm_resource.rsmrc_root, oldsz);
1209                         }
1210 
1211                         rsm_resource.rsmrc_root = p;
1212                         rsm_resource.rsmrc_sz = (int)newsz;
1213                 }
1214 
1215                 empty = rsm_resource.rsmrc_len;
1216                 rsm_resource.rsmrc_len++;
1217         }
1218 
1219         /*
1220          * Allocate a new blk
1221          */
1222         blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1223         ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1224         rsm_resource.rsmrc_root[empty] = blk;
1225         blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1226 
1227         /*
1228          * Allocate slot
1229          */
1230 
1231         *rnum = (minor_t)(empty * RSMRC_BLKSZ);
1232 
1233         /*
1234          * watch out not to exceed bounds of barrier page
1235          */
1236         if (*rnum >= max_segs + 1) {
1237                 rw_exit(&rsm_resource.rsmrc_lock);
1238                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1239                     "rsmresource_alloc failed %d\n", *rnum));
1240 
1241                 return (RSMERR_INSUFFICIENT_RESOURCES);
1242         }
1243         blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1244 
1245 
1246         rw_exit(&rsm_resource.rsmrc_lock);
1247 
1248         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1249             "rsmresource_alloc done\n"));
1250 
1251         return (RSM_SUCCESS);
1252 }
1253 
1254 static rsmresource_t *
1255 rsmresource_free(minor_t rnum)
1256 {
1257 
1258         /* search for available resource slot */
1259         int i, j;
1260         rsmresource_blk_t *blk;
1261         rsmresource_t *p;
1262 
1263         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1264             "rsmresource_free enter\n"));
1265 
1266         i = (int)(rnum / RSMRC_BLKSZ);
1267         j = (int)(rnum % RSMRC_BLKSZ);
1268 
1269         if (i >= rsm_resource.rsmrc_len) {
1270                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1271                     "rsmresource_free done\n"));
1272                 return (NULL);
1273         }
1274 
1275         rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1276 
1277         ASSERT(rsm_resource.rsmrc_root);
1278         ASSERT(i < rsm_resource.rsmrc_len);
1279         ASSERT(i < rsm_resource.rsmrc_sz);
1280         blk = rsm_resource.rsmrc_root[i];
1281         if (blk == NULL) {
1282                 rw_exit(&rsm_resource.rsmrc_lock);
1283                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1284                     "rsmresource_free done\n"));
1285                 return (NULL);
1286         }
1287 
1288         ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1289 
1290         p = blk->rsmrcblk_blks[j];
1291         if (p == RSMRC_RESERVED) {
1292                 p = NULL;
1293         }
1294 
1295         blk->rsmrcblk_blks[j] = NULL;
1296         blk->rsmrcblk_avail++;
1297         if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1298                 /* free this blk */
1299                 kmem_free(blk, sizeof (*blk));
1300                 rsm_resource.rsmrc_root[i] = NULL;
1301         }
1302 
1303         rw_exit(&rsm_resource.rsmrc_lock);
1304 
1305         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1306             "rsmresource_free done\n"));
1307 
1308         return (p);
1309 }
1310 
1311 static rsmresource_t *
1312 rsmresource_lookup(minor_t rnum, int lock)
1313 {
1314         int i, j;
1315         rsmresource_blk_t *blk;
1316         rsmresource_t *p;
1317 
1318         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1319             "rsmresource_lookup enter\n"));
1320 
1321         /* Find resource and lock it in READER mode */
1322         /* search for available resource slot */
1323 
1324         i = (int)(rnum / RSMRC_BLKSZ);
1325         j = (int)(rnum % RSMRC_BLKSZ);
1326 
1327         if (i >= rsm_resource.rsmrc_len) {
1328                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1329                     "rsmresource_lookup done\n"));
1330                 return (NULL);
1331         }
1332 
1333         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1334 
1335         blk = rsm_resource.rsmrc_root[i];
1336         if (blk != NULL) {
1337                 ASSERT(i < rsm_resource.rsmrc_len);
1338                 ASSERT(i < rsm_resource.rsmrc_sz);
1339 
1340                 p = blk->rsmrcblk_blks[j];
1341                 if (lock == RSM_LOCK) {
1342                         if (p != RSMRC_RESERVED) {
1343                                 mutex_enter(&p->rsmrc_lock);
1344                         } else {
1345                                 p = NULL;
1346                         }
1347                 }
1348         } else {
1349                 p = NULL;
1350         }
1351         rw_exit(&rsm_resource.rsmrc_lock);
1352 
1353         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1354             "rsmresource_lookup done\n"));
1355 
1356         return (p);
1357 }
1358 
1359 static void
1360 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1361 {
1362         /* Find resource and lock it in READER mode */
1363         /* Caller can upgrade if need be */
1364         /* search for available resource slot */
1365         int i, j;
1366         rsmresource_blk_t *blk;
1367 
1368         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1369             "rsmresource_insert enter\n"));
1370 
1371         i = (int)(rnum / RSMRC_BLKSZ);
1372         j = (int)(rnum % RSMRC_BLKSZ);
1373 
1374         p->rsmrc_type = type;
1375         p->rsmrc_num = rnum;
1376 
1377         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1378 
1379         ASSERT(rsm_resource.rsmrc_root);
1380         ASSERT(i < rsm_resource.rsmrc_len);
1381         ASSERT(i < rsm_resource.rsmrc_sz);
1382 
1383         blk = rsm_resource.rsmrc_root[i];
1384         ASSERT(blk);
1385 
1386         ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1387 
1388         blk->rsmrcblk_blks[j] = p;
1389 
1390         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1391             "rsmresource_insert done\n"));
1392 
1393         rw_exit(&rsm_resource.rsmrc_lock);
1394 }
1395 
1396 static void
1397 rsmresource_destroy()
1398 {
1399         int i, j;
1400 
1401         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1402             "rsmresource_destroy enter\n"));
1403 
1404         rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1405 
1406         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1407                 rsmresource_blk_t       *blk;
1408 
1409                 blk = rsm_resource.rsmrc_root[i];
1410                 if (blk == NULL) {
1411                         continue;
1412                 }
1413                 for (j = 0; j < RSMRC_BLKSZ; j++) {
1414                         if (blk->rsmrcblk_blks[j] != NULL) {
1415                                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1416                                     "Not null slot %d, %lx\n", j,
1417                                     (size_t)blk->rsmrcblk_blks[j]));
1418                         }
1419                 }
1420                 kmem_free(blk, sizeof (*blk));
1421                 rsm_resource.rsmrc_root[i] = NULL;
1422         }
1423         if (rsm_resource.rsmrc_root) {
1424                 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1425                 kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1426                 rsm_resource.rsmrc_root = NULL;
1427                 rsm_resource.rsmrc_len = 0;
1428                 rsm_resource.rsmrc_sz = 0;
1429         }
1430 
1431         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1432             "rsmresource_destroy done\n"));
1433 
1434         rw_exit(&rsm_resource.rsmrc_lock);
1435 }
1436 
1437 
1438 /* ******************** Generic Key Hash Table Management ********* */
1439 static rsmresource_t *
1440 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1441     rsm_resource_state_t state)
1442 {
1443         rsmresource_t   *p;
1444         uint_t          hashval;
1445         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1446 
1447         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1448 
1449         hashval = rsmhash(key);
1450 
1451         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1452             key, hashval));
1453 
1454         rw_enter(&rhash->rsmhash_rw, RW_READER);
1455 
1456         p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1457 
1458         for (; p; p = p->rsmrc_next) {
1459                 if (p->rsmrc_key == key) {
1460                         /* acquire resource lock */
1461                         RSMRC_LOCK(p);
1462                         break;
1463                 }
1464         }
1465 
1466         rw_exit(&rhash->rsmhash_rw);
1467 
1468         if (p != NULL && p->rsmrc_state != state) {
1469                 /* state changed, release lock and return null */
1470                 RSMRC_UNLOCK(p);
1471                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1472                     "rsmhash_lookup done: state changed\n"));
1473                 return (NULL);
1474         }
1475 
1476         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1477 
1478         return (p);
1479 }
1480 
1481 static void
1482 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1483 {
1484         rsmresource_t           *p, **back;
1485         uint_t                  hashval;
1486         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1487 
1488         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1489 
1490         hashval = rsmhash(rcelm->rsmrc_key);
1491 
1492         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1493             rcelm->rsmrc_key, hashval));
1494 
1495         /*
1496          * It's ok not to find the segment.
1497          */
1498         rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1499 
1500         back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1501 
1502         for (; (p = *back) != NULL;  back = &p->rsmrc_next) {
1503                 if (p == rcelm) {
1504                         *back = rcelm->rsmrc_next;
1505                         break;
1506                 }
1507         }
1508 
1509         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1510 
1511         rw_exit(&rhash->rsmhash_rw);
1512 }
1513 
1514 static int
1515 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1516     int dup_check, rsm_resource_state_t state)
1517 {
1518         rsmresource_t   *p = NULL, **bktp;
1519         uint_t          hashval;
1520         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1521 
1522         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1523 
1524         /* lock table */
1525         rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1526 
1527         /*
1528          * If the current resource state is other than the state passed in
1529          * then the resource is (probably) already on the list. eg. for an
1530          * import segment if the state is not RSM_STATE_NEW then it's on the
1531          * list already.
1532          */
1533         RSMRC_LOCK(new);
1534         if (new->rsmrc_state != state) {
1535                 RSMRC_UNLOCK(new);
1536                 rw_exit(&rhash->rsmhash_rw);
1537                 return (RSMERR_BAD_SEG_HNDL);
1538         }
1539 
1540         hashval = rsmhash(key);
1541         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1542 
1543         if (dup_check) {
1544                 /*
1545                  * Used for checking export segments; don't want to have
1546                  * the same key used for multiple segments.
1547                  */
1548 
1549                 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1550 
1551                 for (; p; p = p->rsmrc_next) {
1552                         if (p->rsmrc_key == key) {
1553                                 RSMRC_UNLOCK(new);
1554                                 break;
1555                         }
1556                 }
1557         }
1558 
1559         if (p == NULL) {
1560                 /* Key doesn't exist, add it */
1561 
1562                 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1563 
1564                 new->rsmrc_key = key;
1565                 new->rsmrc_next = *bktp;
1566                 *bktp = new;
1567         }
1568 
1569         rw_exit(&rhash->rsmhash_rw);
1570 
1571         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1572 
1573         return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1574 }
1575 
1576 /*
1577  * XOR each byte of the key.
1578  */
1579 static uint_t
1580 rsmhash(rsm_memseg_id_t key)
1581 {
1582         uint_t  hash = key;
1583 
1584         hash ^=  (key >> 8);
1585         hash ^=  (key >> 16);
1586         hash ^=  (key >> 24);
1587 
1588         return (hash % rsm_hash_size);
1589 
1590 }
1591 
1592 /*
1593  * generic function to get a specific bucket
1594  */
1595 static void *
1596 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1597 {
1598 
1599         if (rhash->bucket == NULL)
1600                 return (NULL);
1601         else
1602                 return ((void *)rhash->bucket[hashval]);
1603 }
1604 
1605 /*
1606  * generic function to get a specific bucket's address
1607  */
1608 static void **
1609 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1610 {
1611         if (rhash->bucket == NULL)
1612                 return (NULL);
1613         else
1614                 return ((void **)&(rhash->bucket[hashval]));
1615 }
1616 
1617 /*
1618  * generic function to alloc a hash table
1619  */
1620 static void
1621 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1622 {
1623         rhash->bucket = (rsmresource_t **)
1624             kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1625 }
1626 
1627 /*
1628  * generic function to free a hash table
1629  */
1630 static void
1631 rsmhash_free(rsmhash_table_t *rhash, int size)
1632 {
1633 
1634         kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1635         rhash->bucket = NULL;
1636 
1637 }
1638 /* *********************** Exported Segment Key Management ************ */
1639 
1640 #define rsmexport_add(new, key)         \
1641         rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1642             RSM_STATE_BIND)
1643 
1644 #define rsmexport_rm(arg)       \
1645         rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1646 
1647 #define rsmexport_lookup(key)   \
1648         (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1649 
1650 /* ************************** Import Segment List Management ********** */
1651 
1652 /*
1653  *  Add segment to import list. This will be useful for paging and loopback
1654  * segment unloading.
1655  */
1656 #define rsmimport_add(arg, key) \
1657         rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1658             RSM_STATE_NEW)
1659 
1660 #define rsmimport_rm(arg)       \
1661         rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1662 
1663 /*
1664  *      #define rsmimport_lookup(key)   \
1665  *      (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1666  */
1667 
1668 /*
1669  * increase the ref count and make the import segment point to the
1670  * shared data structure. Return a pointer to the share data struct
1671  * and the shared data struct is locked upon return
1672  */
1673 static rsm_import_share_t *
1674 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1675     rsmseg_t *segp)
1676 {
1677         uint_t          hash;
1678         rsmresource_t           *p;
1679         rsm_import_share_t      *shdatap;
1680         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1681 
1682         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1683 
1684         hash = rsmhash(key);
1685         /* lock table */
1686         rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1687         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1688             key, hash));
1689 
1690         p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1691 
1692         for (; p; p = p->rsmrc_next) {
1693                 /*
1694                  * Look for an entry that is importing the same exporter
1695                  * with the share data structure allocated.
1696                  */
1697                 if ((p->rsmrc_key == key) &&
1698                     (p->rsmrc_node == node) &&
1699                     (p->rsmrc_adapter == adapter) &&
1700                     (((rsmseg_t *)p)->s_share != NULL)) {
1701                         shdatap = ((rsmseg_t *)p)->s_share;
1702                         break;
1703                 }
1704         }
1705 
1706         if (p == NULL) {
1707                 /* we are the first importer, create the shared data struct */
1708                 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1709                 shdatap->rsmsi_state = RSMSI_STATE_NEW;
1710                 shdatap->rsmsi_segid = key;
1711                 shdatap->rsmsi_node = node;
1712                 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1713                 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1714         }
1715 
1716         rsmseglock_acquire(segp);
1717 
1718         /* we grab the shared lock before returning from this function */
1719         mutex_enter(&shdatap->rsmsi_lock);
1720 
1721         shdatap->rsmsi_refcnt++;
1722         segp->s_share = shdatap;
1723 
1724         rsmseglock_release(segp);
1725 
1726         rw_exit(&rsm_import_segs.rsmhash_rw);
1727 
1728         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1729 
1730         return (shdatap);
1731 }
1732 
1733 /*
1734  * the shared data structure should be locked before calling
1735  * rsmsharecv_signal().
1736  * Change the state and signal any waiting segments.
1737  */
1738 void
1739 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1740 {
1741         ASSERT(rsmsharelock_held(seg));
1742 
1743         if (seg->s_share->rsmsi_state == oldstate) {
1744                 seg->s_share->rsmsi_state = newstate;
1745                 cv_broadcast(&seg->s_share->rsmsi_cv);
1746         }
1747 }
1748 
1749 /*
1750  * Add to the hash table
1751  */
1752 static void
1753 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1754     void *cookie)
1755 {
1756 
1757         importing_token_t       *head;
1758         importing_token_t       *new_token;
1759         int                     index;
1760 
1761         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1762 
1763         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1764 
1765         new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1766         new_token->importing_node = node;
1767         new_token->key = key;
1768         new_token->import_segment_cookie = cookie;
1769         new_token->importing_adapter_hwaddr = hwaddr;
1770 
1771         index = rsmhash(key);
1772 
1773         mutex_enter(&importer_list.lock);
1774 
1775         head = importer_list.bucket[index];
1776         importer_list.bucket[index] = new_token;
1777         new_token->next = head;
1778         mutex_exit(&importer_list.lock);
1779 
1780         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1781 }
1782 
1783 static void
1784 importer_list_rm(rsm_node_id_t node,  rsm_memseg_id_t key, void *cookie)
1785 {
1786 
1787         importing_token_t       *prev, *token = NULL;
1788         int                     index;
1789         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1790 
1791         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1792 
1793         index = rsmhash(key);
1794 
1795         mutex_enter(&importer_list.lock);
1796 
1797         token = importer_list.bucket[index];
1798 
1799         prev = token;
1800         while (token != NULL) {
1801                 if (token->importing_node == node &&
1802                     token->import_segment_cookie == cookie) {
1803                         if (prev == token)
1804                                 importer_list.bucket[index] = token->next;
1805                         else
1806                                 prev->next = token->next;
1807                         kmem_free((void *)token, sizeof (*token));
1808                         break;
1809                 } else {
1810                         prev = token;
1811                         token = token->next;
1812                 }
1813         }
1814 
1815         mutex_exit(&importer_list.lock);
1816 
1817         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1818 
1819 
1820 }
1821 
1822 /* **************************Segment Structure Management ************* */
1823 
1824 /*
1825  * Free segment structure
1826  */
1827 static void
1828 rsmseg_free(rsmseg_t *seg)
1829 {
1830 
1831         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1832 
1833         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1834 
1835         /* need to take seglock here to avoid race with rsmmap_unmap() */
1836         rsmseglock_acquire(seg);
1837         if (seg->s_ckl != NULL) {
1838                 /* Segment is still busy */
1839                 seg->s_state = RSM_STATE_END;
1840                 rsmseglock_release(seg);
1841                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1842                     "rsmseg_free done\n"));
1843                 return;
1844         }
1845 
1846         rsmseglock_release(seg);
1847 
1848         ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1849 
1850         /*
1851          * If it's an importer decrement the refcount
1852          * and if its down to zero free the shared data structure.
1853          * This is where failures during rsm_connect() are unrefcounted
1854          */
1855         if (seg->s_share != NULL) {
1856 
1857                 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1858 
1859                 rsmsharelock_acquire(seg);
1860 
1861                 ASSERT(seg->s_share->rsmsi_refcnt > 0);
1862 
1863                 seg->s_share->rsmsi_refcnt--;
1864 
1865                 if (seg->s_share->rsmsi_refcnt == 0) {
1866                         rsmsharelock_release(seg);
1867                         mutex_destroy(&seg->s_share->rsmsi_lock);
1868                         cv_destroy(&seg->s_share->rsmsi_cv);
1869                         kmem_free((void *)(seg->s_share),
1870                             sizeof (rsm_import_share_t));
1871                 } else {
1872                         rsmsharelock_release(seg);
1873                 }
1874                 /*
1875                  * The following needs to be done after any
1876                  * rsmsharelock calls which use seg->s_share.
1877                  */
1878                 seg->s_share = NULL;
1879         }
1880 
1881         cv_destroy(&seg->s_cv);
1882         mutex_destroy(&seg->s_lock);
1883         rsmacl_free(seg->s_acl, seg->s_acl_len);
1884         rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1885         if (seg->s_adapter)
1886                 rsmka_release_adapter(seg->s_adapter);
1887 
1888         kmem_free((void *)seg, sizeof (*seg));
1889 
1890         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1891 
1892 }
1893 
1894 
1895 static rsmseg_t *
1896 rsmseg_alloc(minor_t num, struct cred *cred)
1897 {
1898         rsmseg_t        *new;
1899         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1900 
1901         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1902         /*
1903          * allocate memory for new segment. This should be a segkmem cache.
1904          */
1905         new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1906 
1907         new->s_state = RSM_STATE_NEW;
1908         new->s_minor = num;
1909         new->s_acl_len       = 0;
1910         new->s_cookie = NULL;
1911         new->s_adapter = NULL;
1912 
1913         new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1914         /* we don't have a key yet, will set at export/connect */
1915         new->s_uid  = crgetuid(cred);
1916         new->s_gid  = crgetgid(cred);
1917 
1918         mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1919         cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1920 
1921         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1922 
1923         return (new);
1924 }
1925 
1926 /* ******************************** Driver Open/Close/Poll *************** */
1927 
1928 /*ARGSUSED1*/
1929 static int
1930 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1931 {
1932         minor_t rnum;
1933         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1934 
1935         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1936         /*
1937          * Char only
1938          */
1939         if (otyp != OTYP_CHR) {
1940                 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1941                 return (EINVAL);
1942         }
1943 
1944         /*
1945          * Only zero can be opened, clones are used for resources.
1946          */
1947         if (getminor(*devp) != RSM_DRIVER_MINOR) {
1948                 DBG_PRINTF((category, RSM_ERR,
1949                     "rsm_open: bad minor %d\n", getminor(*devp)));
1950                 return (ENODEV);
1951         }
1952 
1953         if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1954                 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1955                 return (EPERM);
1956         }
1957 
1958         if (!(flag & FWRITE)) {
1959                 /*
1960                  * The library function _rsm_librsm_init calls open for
1961                  * /dev/rsm with flag set to O_RDONLY.  We want a valid
1962                  * file descriptor to be returned for minor device zero.
1963                  */
1964 
1965                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1966                     "rsm_open RDONLY done\n"));
1967                 return (DDI_SUCCESS);
1968         }
1969 
1970         /*
1971          * - allocate new minor number and segment.
1972          * - add segment to list of all segments.
1973          * - set minordev data to segment
1974          * - update devp argument to new device
1975          * - update s_cred to cred; make sure you do crhold(cred);
1976          */
1977 
1978         /* allocate a new resource number */
1979         if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1980                 /*
1981                  * We will bind this minor to a specific resource in first
1982                  * ioctl
1983                  */
1984                 *devp = makedevice(getmajor(*devp), rnum);
1985         } else {
1986                 return (EAGAIN);
1987         }
1988 
1989         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1990         return (DDI_SUCCESS);
1991 }
1992 
1993 static void
1994 rsmseg_close(rsmseg_t *seg, int force_flag)
1995 {
1996         int e = RSM_SUCCESS;
1997 
1998         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1999 
2000         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2001 
2002         rsmseglock_acquire(seg);
2003         if (!force_flag && (seg->s_hdr.rsmrc_type ==
2004             RSM_RESOURCE_EXPORT_SEGMENT)) {
2005                 /*
2006                  * If we are processing rsm_close wait for force_destroy
2007                  * processing to complete since force_destroy processing
2008                  * needs to finish first before we can free the segment.
2009                  * force_destroy is only for export segments
2010                  */
2011                 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2012                         cv_wait(&seg->s_cv, &seg->s_lock);
2013                 }
2014         }
2015         rsmseglock_release(seg);
2016 
2017         /* It's ok to read the state without a lock */
2018         switch (seg->s_state) {
2019         case RSM_STATE_EXPORT:
2020         case RSM_STATE_EXPORT_QUIESCING:
2021         case RSM_STATE_EXPORT_QUIESCED:
2022                 e = rsm_unpublish(seg, 1);
2023                 /* FALLTHRU */
2024         case RSM_STATE_BIND_QUIESCED:
2025                 /* FALLTHRU */
2026         case RSM_STATE_BIND:
2027                 e = rsm_unbind(seg);
2028                 if (e != RSM_SUCCESS && force_flag == 1)
2029                         return;
2030                 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2031                 /* FALLTHRU */
2032         case RSM_STATE_NEW_QUIESCED:
2033                 rsmseglock_acquire(seg);
2034                 seg->s_state = RSM_STATE_NEW;
2035                 cv_broadcast(&seg->s_cv);
2036                 rsmseglock_release(seg);
2037                 break;
2038         case RSM_STATE_NEW:
2039                 break;
2040         case RSM_STATE_ZOMBIE:
2041                 /*
2042                  * Segments in this state have been removed off the
2043                  * exported segments list and have been unpublished
2044                  * and unbind. These segments have been removed during
2045                  * a callback to the rsm_export_force_destroy, which
2046                  * is called for the purpose of unlocking these
2047                  * exported memory segments when a process exits but
2048                  * leaves the segments locked down since rsm_close is
2049                  * is not called for the segments. This can happen
2050                  * when a process calls fork or exec and then exits.
2051                  * Once the segments are in the ZOMBIE state, all that
2052                  * remains is to destroy them when rsm_close is called.
2053                  * This is done here. Thus, for such segments the
2054                  * the state is changed to new so that later in this
2055                  * function rsmseg_free is called.
2056                  */
2057                 rsmseglock_acquire(seg);
2058                 seg->s_state = RSM_STATE_NEW;
2059                 rsmseglock_release(seg);
2060                 break;
2061         case RSM_STATE_MAP_QUIESCE:
2062         case RSM_STATE_ACTIVE:
2063                 /* Disconnect will handle the unmap */
2064         case RSM_STATE_CONN_QUIESCE:
2065         case RSM_STATE_CONNECT:
2066         case RSM_STATE_DISCONNECT:
2067                 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2068                 (void) rsm_disconnect(seg);
2069                 break;
2070         case RSM_STATE_MAPPING:
2071                 /*FALLTHRU*/
2072         case RSM_STATE_END:
2073                 DBG_PRINTF((category, RSM_ERR,
2074                     "Invalid segment state %d in rsm_close\n", seg->s_state));
2075                 break;
2076         default:
2077                 DBG_PRINTF((category, RSM_ERR,
2078                     "Invalid segment state %d in rsm_close\n", seg->s_state));
2079                 break;
2080         }
2081 
2082         /*
2083          * check state.
2084          * - make sure you do crfree(s_cred);
2085          * release segment and minor number
2086          */
2087         ASSERT(seg->s_state == RSM_STATE_NEW);
2088 
2089         /*
2090          * The export_force_destroy callback is created to unlock
2091          * the exported segments of a process
2092          * when the process does a fork or exec and then exits calls this
2093          * function with the force flag set to 1 which indicates that the
2094          * segment state must be converted to ZOMBIE. This state means that the
2095          * segments still exist and have been unlocked and most importantly the
2096          * only operation allowed is to destroy them on an rsm_close.
2097          */
2098         if (force_flag) {
2099                 rsmseglock_acquire(seg);
2100                 seg->s_state = RSM_STATE_ZOMBIE;
2101                 rsmseglock_release(seg);
2102         } else {
2103                 rsmseg_free(seg);
2104         }
2105 
2106         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2107 }
2108 
2109 static int
2110 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2111 {
2112         minor_t rnum = getminor(dev);
2113         rsmresource_t *res;
2114         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2115 
2116         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2117 
2118         flag = flag; cred = cred;
2119 
2120         if (otyp != OTYP_CHR)
2121                 return (EINVAL);
2122 
2123         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2124 
2125         /*
2126          * At this point we are the last reference to the resource.
2127          * Free resource number from resource table.
2128          * It's ok to remove number before we free the segment.
2129          * We need to lock the resource to protect against remote calls.
2130          */
2131         if (rnum == RSM_DRIVER_MINOR ||
2132             (res = rsmresource_free(rnum)) == NULL) {
2133                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2134                 return (DDI_SUCCESS);
2135         }
2136 
2137         switch (res->rsmrc_type) {
2138         case RSM_RESOURCE_EXPORT_SEGMENT:
2139         case RSM_RESOURCE_IMPORT_SEGMENT:
2140                 rsmseg_close((rsmseg_t *)res, 0);
2141                 break;
2142         case RSM_RESOURCE_BAR:
2143                 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2144                 break;
2145         default:
2146                 break;
2147         }
2148 
2149         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2150 
2151         return (DDI_SUCCESS);
2152 }
2153 
2154 /*
2155  * rsm_inc_pgcnt
2156  *
2157  * Description: increment rsm page counter.
2158  *
2159  * Parameters:  pgcnt_t pnum;   number of pages to be used
2160  *
2161  * Returns:     RSM_SUCCESS     if memory limit not exceeded
2162  *              ENOSPC          if memory limit exceeded. In this case, the
2163  *                              page counter remains unchanged.
2164  *
2165  */
2166 static int
2167 rsm_inc_pgcnt(pgcnt_t pnum)
2168 {
2169         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2170         if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2171                 return (RSM_SUCCESS);
2172         }
2173 
2174         mutex_enter(&rsm_pgcnt_lock);
2175 
2176         if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2177                 /* ensure that limits have not been exceeded */
2178                 mutex_exit(&rsm_pgcnt_lock);
2179                 return (RSMERR_INSUFFICIENT_MEM);
2180         }
2181 
2182         rsm_pgcnt += pnum;
2183         DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2184             rsm_pgcnt));
2185         mutex_exit(&rsm_pgcnt_lock);
2186 
2187         return (RSM_SUCCESS);
2188 }
2189 
2190 /*
2191  * rsm_dec_pgcnt
2192  *
2193  * Description: decrement rsm page counter.
2194  *
2195  * Parameters:  pgcnt_t pnum;   number of pages freed
2196  *
2197  */
2198 static void
2199 rsm_dec_pgcnt(pgcnt_t pnum)
2200 {
2201         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2202 
2203         if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2204                 return;
2205         }
2206 
2207         mutex_enter(&rsm_pgcnt_lock);
2208         ASSERT(rsm_pgcnt >= pnum);
2209         rsm_pgcnt -= pnum;
2210         DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2211             rsm_pgcnt));
2212         mutex_exit(&rsm_pgcnt_lock);
2213 }
2214 
2215 static struct umem_callback_ops rsm_as_ops = {
2216         UMEM_CALLBACK_VERSION, /* version number */
2217         rsm_export_force_destroy,
2218 };
2219 
2220 static int
2221 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2222     proc_t *procp)
2223 {
2224         int error = RSM_SUCCESS;
2225         ulong_t pnum;
2226         struct umem_callback_ops *callbackops = &rsm_as_ops;
2227 
2228         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2229 
2230         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2231 
2232         /*
2233          * Make sure vaddr and len are aligned on a page boundary
2234          */
2235         if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2236                 return (RSMERR_BAD_ADDR);
2237         }
2238 
2239         if (len & (PAGESIZE - 1)) {
2240                 return (RSMERR_BAD_LENGTH);
2241         }
2242 
2243         /*
2244          * Find number of pages
2245          */
2246         pnum = btopr(len);
2247         error = rsm_inc_pgcnt(pnum);
2248         if (error != RSM_SUCCESS) {
2249                 DBG_PRINTF((category, RSM_ERR,
2250                     "rsm_bind_pages:mem limit exceeded\n"));
2251                 return (RSMERR_INSUFFICIENT_MEM);
2252         }
2253 
2254         error = umem_lockmemory(vaddr, len,
2255             DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2256             cookie,
2257             callbackops, procp);
2258 
2259         if (error) {
2260                 rsm_dec_pgcnt(pnum);
2261                 DBG_PRINTF((category, RSM_ERR,
2262                     "rsm_bind_pages:ddi_umem_lock failed\n"));
2263                 /*
2264                  * ddi_umem_lock, in the case of failure, returns one of
2265                  * the following three errors. These are translated into
2266                  * the RSMERR namespace and returned.
2267                  */
2268                 if (error == EFAULT)
2269                         return (RSMERR_BAD_ADDR);
2270                 else if (error == EACCES)
2271                         return (RSMERR_PERM_DENIED);
2272                 else
2273                         return (RSMERR_INSUFFICIENT_MEM);
2274         }
2275 
2276         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2277 
2278         return (error);
2279 
2280 }
2281 
2282 static int
2283 rsm_unbind_pages(rsmseg_t *seg)
2284 {
2285         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2286 
2287         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2288 
2289         ASSERT(rsmseglock_held(seg));
2290 
2291         if (seg->s_cookie != NULL) {
2292                 /* unlock address range */
2293                 ddi_umem_unlock(seg->s_cookie);
2294                 rsm_dec_pgcnt(btopr(seg->s_len));
2295                 seg->s_cookie = NULL;
2296         }
2297 
2298         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2299 
2300         return (RSM_SUCCESS);
2301 }
2302 
2303 
2304 static int
2305 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2306 {
2307         int e;
2308         adapter_t *adapter;
2309         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2310 
2311         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2312 
2313         adapter = rsm_getadapter(msg, mode);
2314         if (adapter == NULL) {
2315                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2316                     "rsm_bind done:no adapter\n"));
2317                 return (RSMERR_CTLR_NOT_PRESENT);
2318         }
2319 
2320         /* lock address range */
2321         if (msg->vaddr == NULL) {
2322                 rsmka_release_adapter(adapter);
2323                 DBG_PRINTF((category, RSM_ERR,
2324                     "rsm: rsm_bind done: invalid vaddr\n"));
2325                 return (RSMERR_BAD_ADDR);
2326         }
2327         if (msg->len <= 0) {
2328                 rsmka_release_adapter(adapter);
2329                 DBG_PRINTF((category, RSM_ERR,
2330                     "rsm_bind: invalid length\n"));
2331                 return (RSMERR_BAD_LENGTH);
2332         }
2333 
2334         /* Lock segment */
2335         rsmseglock_acquire(seg);
2336 
2337         while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2338                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2339                         DBG_PRINTF((category, RSM_DEBUG,
2340                             "rsm_bind done: cv_wait INTERRUPTED"));
2341                         rsmka_release_adapter(adapter);
2342                         rsmseglock_release(seg);
2343                         return (RSMERR_INTERRUPTED);
2344                 }
2345         }
2346 
2347         ASSERT(seg->s_state == RSM_STATE_NEW);
2348 
2349         ASSERT(seg->s_cookie == NULL);
2350 
2351         e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2352         if (e == RSM_SUCCESS) {
2353                 seg->s_flags |= RSM_USER_MEMORY;
2354                 if (msg->perm & RSM_ALLOW_REBIND) {
2355                         seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2356                 }
2357                 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2358                         seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2359                 }
2360                 seg->s_region.r_vaddr = msg->vaddr;
2361                 /*
2362                  * Set the s_pid value in the segment structure. This is used
2363                  * to identify exported segments belonging to a particular
2364                  * process so that when the process exits, these segments can
2365                  * be unlocked forcefully even if rsm_close is not called on
2366                  * process exit since there maybe other processes referencing
2367                  * them (for example on a fork or exec).
2368                  * The s_pid value is also used to authenticate the process
2369                  * doing a publish or unpublish on the export segment. Only
2370                  * the creator of the export segment has a right to do a
2371                  * publish or unpublish and unbind on the segment.
2372                  */
2373                 seg->s_pid = ddi_get_pid();
2374                 seg->s_len = msg->len;
2375                 seg->s_state = RSM_STATE_BIND;
2376                 seg->s_adapter = adapter;
2377                 seg->s_proc = curproc;
2378         } else {
2379                 rsmka_release_adapter(adapter);
2380                 DBG_PRINTF((category, RSM_WARNING,
2381                     "unable to lock down pages\n"));
2382         }
2383 
2384         msg->rnum = seg->s_minor;
2385         /* Unlock segment */
2386         rsmseglock_release(seg);
2387 
2388         if (e == RSM_SUCCESS) {
2389                 /* copyout the resource number */
2390 #ifdef _MULTI_DATAMODEL
2391                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2392                         rsm_ioctlmsg32_t msg32;
2393 
2394                         msg32.rnum = msg->rnum;
2395                         if (ddi_copyout((caddr_t)&msg32.rnum,
2396                             (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2397                             sizeof (minor_t), mode)) {
2398                                 rsmka_release_adapter(adapter);
2399                                 e = RSMERR_BAD_ADDR;
2400                         }
2401                 }
2402 #endif
2403                 if (ddi_copyout((caddr_t)&msg->rnum,
2404                     (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2405                     sizeof (minor_t), mode)) {
2406                         rsmka_release_adapter(adapter);
2407                         e = RSMERR_BAD_ADDR;
2408                 }
2409         }
2410 
2411         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2412 
2413         return (e);
2414 }
2415 
2416 static void
2417 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2418     rsm_memseg_id_t ex_segid,
2419     ddi_umem_cookie_t cookie)
2420 
2421 {
2422         rsmresource_t   *p = NULL;
2423         rsmhash_table_t *rhash = &rsm_import_segs;
2424         uint_t          index;
2425 
2426         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2427             "rsm_remap_local_importers enter\n"));
2428 
2429         index = rsmhash(ex_segid);
2430 
2431         rw_enter(&rhash->rsmhash_rw, RW_READER);
2432 
2433         p = rsmhash_getbkt(rhash, index);
2434 
2435         for (; p; p = p->rsmrc_next) {
2436                 rsmseg_t *seg = (rsmseg_t *)p;
2437                 rsmseglock_acquire(seg);
2438                 /*
2439                  * Change the s_cookie value of only the local importers
2440                  * which have been mapped (in state RSM_STATE_ACTIVE).
2441                  * Note that there is no need to change the s_cookie value
2442                  * if the imported segment is in RSM_STATE_MAPPING since
2443                  * eventually the s_cookie will be updated via the mapping
2444                  * functionality.
2445                  */
2446                 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2447                     (seg->s_state == RSM_STATE_ACTIVE)) {
2448                         seg->s_cookie = cookie;
2449                 }
2450                 rsmseglock_release(seg);
2451         }
2452         rw_exit(&rhash->rsmhash_rw);
2453 
2454         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2455             "rsm_remap_local_importers done\n"));
2456 }
2457 
2458 static int
2459 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2460 {
2461         int e;
2462         adapter_t *adapter;
2463         ddi_umem_cookie_t cookie;
2464         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2465 
2466         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2467 
2468         /* Check for permissions to rebind */
2469         if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2470                 return (RSMERR_REBIND_NOT_ALLOWED);
2471         }
2472 
2473         if (seg->s_pid != ddi_get_pid() &&
2474             ddi_get_pid() != 0) {
2475                 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2476                 return (RSMERR_NOT_CREATOR);
2477         }
2478 
2479         /*
2480          * We will not be allowing partial rebind and hence length passed
2481          * in must be same as segment length
2482          */
2483         if (msg->vaddr == NULL) {
2484                 DBG_PRINTF((category, RSM_ERR,
2485                     "rsm_rebind done: null msg->vaddr\n"));
2486                 return (RSMERR_BAD_ADDR);
2487         }
2488         if (msg->len != seg->s_len) {
2489                 DBG_PRINTF((category, RSM_ERR,
2490                     "rsm_rebind: invalid length\n"));
2491                 return (RSMERR_BAD_LENGTH);
2492         }
2493 
2494         /* Lock segment */
2495         rsmseglock_acquire(seg);
2496 
2497         while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2498             (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2499             (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2500                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2501                         rsmseglock_release(seg);
2502                         DBG_PRINTF((category, RSM_DEBUG,
2503                             "rsm_rebind done: cv_wait INTERRUPTED"));
2504                         return (RSMERR_INTERRUPTED);
2505                 }
2506         }
2507 
2508         /* verify segment state */
2509         if ((seg->s_state != RSM_STATE_BIND) &&
2510             (seg->s_state != RSM_STATE_EXPORT)) {
2511                 /* Unlock segment */
2512                 rsmseglock_release(seg);
2513                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2514                     "rsm_rebind done: invalid state\n"));
2515                 return (RSMERR_BAD_SEG_HNDL);
2516         }
2517 
2518         ASSERT(seg->s_cookie != NULL);
2519 
2520         if (msg->vaddr == seg->s_region.r_vaddr) {
2521                 rsmseglock_release(seg);
2522                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2523                 return (RSM_SUCCESS);
2524         }
2525 
2526         e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2527         if (e == RSM_SUCCESS) {
2528                 struct buf *xbuf;
2529                 dev_t sdev = 0;
2530                 rsm_memory_local_t mem;
2531 
2532                 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2533                     sdev, 0, NULL, DDI_UMEM_SLEEP);
2534                 ASSERT(xbuf != NULL);
2535 
2536                 mem.ms_type = RSM_MEM_BUF;
2537                 mem.ms_bp = xbuf;
2538 
2539                 adapter = seg->s_adapter;
2540                 e = adapter->rsmpi_ops->rsm_rebind(
2541                     seg->s_handle.out, 0, &mem,
2542                     RSM_RESOURCE_DONTWAIT, NULL);
2543 
2544                 if (e == RSM_SUCCESS) {
2545                         /*
2546                          * unbind the older pages, and unload local importers;
2547                          * but don't disconnect importers
2548                          */
2549                         (void) rsm_unbind_pages(seg);
2550                         seg->s_cookie = cookie;
2551                         seg->s_region.r_vaddr = msg->vaddr;
2552                         rsm_remap_local_importers(my_nodeid, seg->s_segid,
2553                             cookie);
2554                 } else {
2555                         /*
2556                          * Unbind the pages associated with "cookie" by the
2557                          * rsm_bind_pages calls prior to this. This is
2558                          * similar to what is done in the rsm_unbind_pages
2559                          * routine for the seg->s_cookie.
2560                          */
2561                         ddi_umem_unlock(cookie);
2562                         rsm_dec_pgcnt(btopr(msg->len));
2563                         DBG_PRINTF((category, RSM_ERR,
2564                             "rsm_rebind failed with %d\n", e));
2565                 }
2566                 /*
2567                  * At present there is no dependency on the existence of xbuf.
2568                  * So we can free it here. If in the future this changes, it can
2569                  * be freed sometime during the segment destroy.
2570                  */
2571                 freerbuf(xbuf);
2572         }
2573 
2574         /* Unlock segment */
2575         rsmseglock_release(seg);
2576 
2577         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2578 
2579         return (e);
2580 }
2581 
2582 static int
2583 rsm_unbind(rsmseg_t *seg)
2584 {
2585         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2586 
2587         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2588 
2589         rsmseglock_acquire(seg);
2590 
2591         /* verify segment state */
2592         if ((seg->s_state != RSM_STATE_BIND) &&
2593             (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2594                 rsmseglock_release(seg);
2595                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2596                     "rsm_unbind: invalid state\n"));
2597                 return (RSMERR_BAD_SEG_HNDL);
2598         }
2599 
2600         /* unlock current range */
2601         (void) rsm_unbind_pages(seg);
2602 
2603         if (seg->s_state == RSM_STATE_BIND) {
2604                 seg->s_state = RSM_STATE_NEW;
2605         } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2606                 seg->s_state = RSM_STATE_NEW_QUIESCED;
2607         }
2608 
2609         rsmseglock_release(seg);
2610 
2611         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2612 
2613         return (RSM_SUCCESS);
2614 }
2615 
2616 /* **************************** Exporter Access List Management ******* */
2617 static void
2618 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2619 {
2620         int     acl_sz;
2621         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2622 
2623         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2624 
2625         /* acl could be NULL */
2626 
2627         if (acl != NULL && acl_len > 0) {
2628                 acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2629                 kmem_free((void *)acl, acl_sz);
2630         }
2631 
2632         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2633 }
2634 
2635 static void
2636 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2637 {
2638         int     acl_sz;
2639         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2640 
2641         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2642 
2643         if (acl != NULL && acl_len > 0) {
2644                 acl_sz = acl_len * sizeof (rsm_access_entry_t);
2645                 kmem_free((void *)acl, acl_sz);
2646         }
2647 
2648         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2649 
2650 }
2651 
2652 static int
2653 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2654     rsmapi_access_entry_t **list, int *len, int loopback)
2655 {
2656         rsmapi_access_entry_t *acl;
2657         int     acl_len;
2658         int i;
2659         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2660 
2661         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2662 
2663         *len = 0;
2664         *list = NULL;
2665 
2666         acl_len = msg->acl_len;
2667         if ((loopback && acl_len > 1) || (acl_len < 0) ||
2668             (acl_len > MAX_NODES)) {
2669                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2670                     "rsmacl_build done: acl invalid\n"));
2671                 return (RSMERR_BAD_ACL);
2672         }
2673 
2674         if (acl_len > 0 && acl_len <= MAX_NODES) {
2675                 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2676 
2677                 acl = kmem_alloc(acl_size, KM_SLEEP);
2678 
2679                 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2680                     acl_size, mode)) {
2681                         kmem_free((void *) acl, acl_size);
2682                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2683                             "rsmacl_build done: BAD_ADDR\n"));
2684                         return (RSMERR_BAD_ADDR);
2685                 }
2686 
2687                 /*
2688                  * Verify access list
2689                  */
2690                 for (i = 0; i < acl_len; i++) {
2691                         if (acl[i].ae_node > MAX_NODES ||
2692                             (loopback && (acl[i].ae_node != my_nodeid)) ||
2693                             acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2694                                 /* invalid entry */
2695                                 kmem_free((void *) acl, acl_size);
2696                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2697                                     "rsmacl_build done: EINVAL\n"));
2698                                 return (RSMERR_BAD_ACL);
2699                         }
2700                 }
2701 
2702                 *len = acl_len;
2703                 *list = acl;
2704         }
2705 
2706         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2707 
2708         return (DDI_SUCCESS);
2709 }
2710 
2711 static int
2712 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2713     int acl_len, adapter_t *adapter)
2714 {
2715         rsm_access_entry_t *acl;
2716         rsm_addr_t hwaddr;
2717         int i;
2718         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2719 
2720         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2721 
2722         if (src != NULL) {
2723                 size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2724                 acl = kmem_alloc(acl_size, KM_SLEEP);
2725 
2726                 /*
2727                  * translate access list
2728                  */
2729                 for (i = 0; i < acl_len; i++) {
2730                         if (src[i].ae_node == my_nodeid) {
2731                                 acl[i].ae_addr = adapter->hwaddr;
2732                         } else {
2733                                 hwaddr = get_remote_hwaddr(adapter,
2734                                     src[i].ae_node);
2735                                 if ((int64_t)hwaddr < 0) {
2736                                         /* invalid hwaddr */
2737                                         kmem_free((void *) acl, acl_size);
2738                                         DBG_PRINTF((category,
2739                                             RSM_DEBUG_VERBOSE,
2740                                             "rsmpiacl_create done:"
2741                                             "EINVAL hwaddr\n"));
2742                                         return (RSMERR_INTERNAL_ERROR);
2743                                 }
2744                                 acl[i].ae_addr = hwaddr;
2745                         }
2746                         /* rsmpi understands only RSM_PERM_XXXX */
2747                         acl[i].ae_permission =
2748                             src[i].ae_permission & RSM_PERM_RDWR;
2749                 }
2750                 *dest = acl;
2751         } else {
2752                 *dest = NULL;
2753         }
2754 
2755         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2756 
2757         return (RSM_SUCCESS);
2758 }
2759 
2760 static int
2761 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2762     rsmipc_reply_t *reply)
2763 {
2764 
2765         int             i;
2766         rsmseg_t        *seg;
2767         rsm_memseg_id_t key = req->rsmipc_key;
2768         rsm_permission_t perm = req->rsmipc_perm;
2769         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2770 
2771         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2772             "rsmsegacl_validate enter\n"));
2773 
2774         /*
2775          * Find segment and grab its lock. The reason why we grab the segment
2776          * lock in side the search is to avoid the race when the segment is
2777          * being deleted and we already have a pointer to it.
2778          */
2779         seg = rsmexport_lookup(key);
2780         if (!seg) {
2781                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2782                     "rsmsegacl_validate done: %u ENXIO\n", key));
2783                 return (RSMERR_SEG_NOT_PUBLISHED);
2784         }
2785 
2786         ASSERT(rsmseglock_held(seg));
2787         ASSERT(seg->s_state == RSM_STATE_EXPORT);
2788 
2789         /*
2790          * We implement a 2-level protection scheme.
2791          * First, we check if local/remote host has access rights.
2792          * Second, we check if the user has access rights.
2793          *
2794          * This routine only validates the rnode access_list
2795          */
2796         if (seg->s_acl_len > 0) {
2797                 /*
2798                  * Check host access list
2799                  */
2800                 ASSERT(seg->s_acl != NULL);
2801                 for (i = 0; i < seg->s_acl_len; i++) {
2802                         if (seg->s_acl[i].ae_node == rnode) {
2803                                 perm &= seg->s_acl[i].ae_permission;
2804                                 goto found;
2805                         }
2806                 }
2807                 /* rnode is not found in the list */
2808                 rsmseglock_release(seg);
2809                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2810                     "rsmsegacl_validate done: EPERM\n"));
2811                 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2812         } else {
2813                 /* use default owner creation umask */
2814                 perm &= seg->s_mode;
2815         }
2816 
2817 found:
2818         /* update perm for this node */
2819         reply->rsmipc_mode = perm;
2820         reply->rsmipc_uid = seg->s_uid;
2821         reply->rsmipc_gid = seg->s_gid;
2822         reply->rsmipc_segid = seg->s_segid;
2823         reply->rsmipc_seglen = seg->s_len;
2824 
2825         /*
2826          * Perm of requesting node is valid; source will validate user
2827          */
2828         rsmseglock_release(seg);
2829 
2830         /*
2831          * Add the importer to the list right away, if connect fails
2832          * the importer will ask the exporter to remove it.
2833          */
2834         importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2835             req->rsmipc_segment_cookie);
2836 
2837         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2838 
2839         return (RSM_SUCCESS);
2840 }
2841 
2842 
2843 /* ************************** Exporter Calls ************************* */
2844 
2845 static int
2846 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2847 {
2848         int                     e;
2849         int                     acl_len;
2850         rsmapi_access_entry_t   *acl;
2851         rsm_access_entry_t      *rsmpi_acl;
2852         rsm_memory_local_t      mem;
2853         struct buf              *xbuf;
2854         dev_t                   sdev = 0;
2855         adapter_t               *adapter;
2856         rsm_memseg_id_t         segment_id = 0;
2857         int                     loopback_flag = 0;
2858         int                     create_flags = 0;
2859         rsm_resource_callback_t callback_flag;
2860         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2861 
2862         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2863 
2864         if (seg->s_adapter == &loopback_adapter)
2865                 loopback_flag = 1;
2866 
2867         if (seg->s_pid != ddi_get_pid() &&
2868             ddi_get_pid() != 0) {
2869                 DBG_PRINTF((category, RSM_ERR,
2870                     "rsm_publish: Not creator\n"));
2871                 return (RSMERR_NOT_CREATOR);
2872         }
2873 
2874         /*
2875          * Get per node access list
2876          */
2877         e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2878         if (e != DDI_SUCCESS) {
2879                 DBG_PRINTF((category, RSM_ERR,
2880                     "rsm_publish done: rsmacl_build failed\n"));
2881                 return (e);
2882         }
2883 
2884         /*
2885          * The application provided msg->key is used for resolving a
2886          * segment id according to the following:
2887          *    key = 0                   Kernel Agent selects the segment id
2888          *    key <= RSM_DLPI_ID_END Reserved for system usage except
2889          *                              RSMLIB range
2890          *    key < RSM_USER_APP_ID_BASE segment id = key
2891          *    key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2892          *
2893          * rsm_nextavail_segmentid is initialized to 0x80000000 and
2894          * overflows to zero after 0x80000000 allocations.
2895          * An algorithm is needed which allows reinitialization and provides
2896          * for reallocation after overflow.  For now, ENOMEM is returned
2897          * once the overflow condition has occurred.
2898          */
2899         if (msg->key == 0) {
2900                 mutex_enter(&rsm_lock);
2901                 segment_id = rsm_nextavail_segmentid;
2902                 if (segment_id != 0) {
2903                         rsm_nextavail_segmentid++;
2904                         mutex_exit(&rsm_lock);
2905                 } else {
2906                         mutex_exit(&rsm_lock);
2907                         DBG_PRINTF((category, RSM_ERR,
2908                             "rsm_publish done: no more keys avlbl\n"));
2909                         return (RSMERR_INSUFFICIENT_RESOURCES);
2910                 }
2911         } else  if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2912                 /* range reserved for internal use by base/ndi libraries */
2913                 segment_id = msg->key;
2914         else    if (msg->key <= RSM_DLPI_ID_END)
2915                 return (RSMERR_RESERVED_SEGID);
2916         else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2917                 segment_id = msg->key;
2918         else {
2919                 DBG_PRINTF((category, RSM_ERR,
2920                     "rsm_publish done: invalid key %u\n", msg->key));
2921                 return (RSMERR_RESERVED_SEGID);
2922         }
2923 
2924         /* Add key to exportlist; The segment lock is held on success */
2925         e = rsmexport_add(seg, segment_id);
2926         if (e) {
2927                 rsmacl_free(acl, acl_len);
2928                 DBG_PRINTF((category, RSM_ERR,
2929                     "rsm_publish done: export_add failed: %d\n", e));
2930                 return (e);
2931         }
2932 
2933         seg->s_segid = segment_id;
2934 
2935         if ((seg->s_state != RSM_STATE_BIND) &&
2936             (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2937                 /* state changed since then, free acl and return */
2938                 rsmseglock_release(seg);
2939                 rsmexport_rm(seg);
2940                 rsmacl_free(acl, acl_len);
2941                 DBG_PRINTF((category, RSM_ERR,
2942                     "rsm_publish done: segment in wrong state: %d\n",
2943                     seg->s_state));
2944                 return (RSMERR_BAD_SEG_HNDL);
2945         }
2946 
2947         /*
2948          * If this is for a local memory handle and permissions are zero,
2949          * then the surrogate segment is very large and we want to skip
2950          * allocation of DVMA space.
2951          *
2952          * Careful!  If the user didn't use an ACL list, acl will be a NULL
2953          * pointer.  Check that before dereferencing it.
2954          */
2955         if (acl != (rsmapi_access_entry_t *)NULL) {
2956                 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2957                         goto skipdriver;
2958         }
2959 
2960         /* create segment  */
2961         xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2962             sdev, 0, NULL, DDI_UMEM_SLEEP);
2963         ASSERT(xbuf != NULL);
2964 
2965         mem.ms_type = RSM_MEM_BUF;
2966         mem.ms_bp = xbuf;
2967 
2968         /* This call includes a bind operations */
2969 
2970         adapter = seg->s_adapter;
2971         /*
2972          * create a acl list with hwaddr for RSMPI publish
2973          */
2974         e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2975 
2976         if (e != RSM_SUCCESS) {
2977                 rsmseglock_release(seg);
2978                 rsmexport_rm(seg);
2979                 rsmacl_free(acl, acl_len);
2980                 freerbuf(xbuf);
2981                 DBG_PRINTF((category, RSM_ERR,
2982                     "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2983                 return (e);
2984         }
2985 
2986         if (seg->s_state == RSM_STATE_BIND) {
2987                 /* create segment  */
2988 
2989                 /* This call includes a bind operations */
2990 
2991                 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2992                         create_flags = RSM_ALLOW_UNBIND_REBIND;
2993                 }
2994 
2995                 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
2996                         callback_flag  = RSM_RESOURCE_DONTWAIT;
2997                 } else {
2998                         callback_flag  = RSM_RESOURCE_SLEEP;
2999                 }
3000 
3001                 e = adapter->rsmpi_ops->rsm_seg_create(
3002                     adapter->rsmpi_handle,
3003                     &seg->s_handle.out, seg->s_len,
3004                     create_flags, &mem,
3005                     callback_flag, NULL);
3006                 /*
3007                  * At present there is no dependency on the existence of xbuf.
3008                  * So we can free it here. If in the future this changes, it can
3009                  * be freed sometime during the segment destroy.
3010                  */
3011                 freerbuf(xbuf);
3012 
3013                 if (e != RSM_SUCCESS) {
3014                         rsmseglock_release(seg);
3015                         rsmexport_rm(seg);
3016                         rsmacl_free(acl, acl_len);
3017                         rsmpiacl_free(rsmpi_acl, acl_len);
3018                         DBG_PRINTF((category, RSM_ERR,
3019                             "rsm_publish done: export_create failed: %d\n", e));
3020                         /*
3021                          * The following assertion ensures that the two errors
3022                          * related to the length and its alignment do not occur
3023                          * since they have been checked during export_create
3024                          */
3025                         ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3026                             e != RSMERR_BAD_LENGTH);
3027                         if (e == RSMERR_NOT_MEM)
3028                                 e = RSMERR_INSUFFICIENT_MEM;
3029 
3030                         return (e);
3031                 }
3032                 /* export segment, this should create an IMMU mapping */
3033                 e = adapter->rsmpi_ops->rsm_publish(
3034                     seg->s_handle.out,
3035                     rsmpi_acl, acl_len,
3036                     seg->s_segid,
3037                     RSM_RESOURCE_DONTWAIT, NULL);
3038 
3039                 if (e != RSM_SUCCESS) {
3040                         adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3041                         rsmseglock_release(seg);
3042                         rsmexport_rm(seg);
3043                         rsmacl_free(acl, acl_len);
3044                         rsmpiacl_free(rsmpi_acl, acl_len);
3045                         DBG_PRINTF((category, RSM_ERR,
3046                             "rsm_publish done: export_publish failed: %d\n",
3047                             e));
3048                         return (e);
3049                 }
3050         }
3051 
3052         seg->s_acl_in = rsmpi_acl;
3053 
3054 skipdriver:
3055         /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3056         seg->s_acl_len       = acl_len;
3057         seg->s_acl   = acl;
3058 
3059         if (seg->s_state == RSM_STATE_BIND) {
3060                 seg->s_state = RSM_STATE_EXPORT;
3061         } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3062                 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3063                 cv_broadcast(&seg->s_cv);
3064         }
3065 
3066         rsmseglock_release(seg);
3067 
3068         /*
3069          * If the segment id was solicited, then return it in
3070          * the original incoming message.
3071          */
3072         if (msg->key == 0) {
3073                 msg->key = segment_id;
3074 #ifdef _MULTI_DATAMODEL
3075                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3076                         rsm_ioctlmsg32_t msg32;
3077 
3078                         msg32.key = msg->key;
3079                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3080                             "rsm_publish done\n"));
3081                         return (ddi_copyout((caddr_t)&msg32,
3082                             (caddr_t)dataptr, sizeof (msg32), mode));
3083                 }
3084 #endif
3085                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3086                     "rsm_publish done\n"));
3087                 return (ddi_copyout((caddr_t)msg,
3088                     (caddr_t)dataptr, sizeof (*msg), mode));
3089         }
3090 
3091         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3092         return (DDI_SUCCESS);
3093 }
3094 
3095 /*
3096  * This function modifies the access control list of an already published
3097  * segment.  There is no effect on import segments which are already
3098  * connected.
3099  */
3100 static int
3101 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3102 {
3103         rsmapi_access_entry_t   *new_acl, *old_acl, *tmp_acl;
3104         rsm_access_entry_t      *rsmpi_new_acl, *rsmpi_old_acl;
3105         int                     new_acl_len, old_acl_len, tmp_acl_len;
3106         int                     e, i;
3107         adapter_t               *adapter;
3108         int                     loopback_flag = 0;
3109         rsm_memseg_id_t         key;
3110         rsm_permission_t        permission;
3111         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3112 
3113         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3114 
3115         if ((seg->s_state != RSM_STATE_EXPORT) &&
3116             (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3117             (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3118                 return (RSMERR_SEG_NOT_PUBLISHED);
3119 
3120         if (seg->s_pid != ddi_get_pid() &&
3121             ddi_get_pid() != 0) {
3122                 DBG_PRINTF((category, RSM_ERR,
3123                     "rsm_republish: Not owner\n"));
3124                 return (RSMERR_NOT_CREATOR);
3125         }
3126 
3127         if (seg->s_adapter == &loopback_adapter)
3128                 loopback_flag = 1;
3129 
3130         /*
3131          * Build new list first
3132          */
3133         e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3134         if (e) {
3135                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3136                     "rsm_republish done: rsmacl_build failed %d", e));
3137                 return (e);
3138         }
3139 
3140         /* Lock segment */
3141         rsmseglock_acquire(seg);
3142         /*
3143          * a republish is in progress - REPUBLISH message is being
3144          * sent to the importers so wait for it to complete OR
3145          * wait till DR completes
3146          */
3147         while (((seg->s_state == RSM_STATE_EXPORT) &&
3148             (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3149             (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3150             (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3151                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3152                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3153                             "rsm_republish done: cv_wait  INTERRUPTED"));
3154                         rsmseglock_release(seg);
3155                         rsmacl_free(new_acl, new_acl_len);
3156                         return (RSMERR_INTERRUPTED);
3157                 }
3158         }
3159 
3160         /* recheck if state is valid */
3161         if (seg->s_state != RSM_STATE_EXPORT) {
3162                 rsmseglock_release(seg);
3163                 rsmacl_free(new_acl, new_acl_len);
3164                 return (RSMERR_SEG_NOT_PUBLISHED);
3165         }
3166 
3167         key = seg->s_key;
3168         old_acl = seg->s_acl;
3169         old_acl_len = seg->s_acl_len;
3170 
3171         seg->s_acl = new_acl;
3172         seg->s_acl_len = new_acl_len;
3173 
3174         /*
3175          * This call will only be meaningful if and when the interconnect
3176          * layer makes use of the access list
3177          */
3178         adapter = seg->s_adapter;
3179         /*
3180          * create a acl list with hwaddr for RSMPI publish
3181          */
3182         e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3183 
3184         if (e != RSM_SUCCESS) {
3185                 seg->s_acl = old_acl;
3186                 seg->s_acl_len = old_acl_len;
3187                 rsmseglock_release(seg);
3188                 rsmacl_free(new_acl, new_acl_len);
3189                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3190                     "rsm_republish done: rsmpiacl_create failed %d", e));
3191                 return (e);
3192         }
3193         rsmpi_old_acl = seg->s_acl_in;
3194         seg->s_acl_in = rsmpi_new_acl;
3195 
3196         e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3197             seg->s_acl_in, seg->s_acl_len,
3198             RSM_RESOURCE_DONTWAIT, NULL);
3199 
3200         if (e != RSM_SUCCESS) {
3201                 seg->s_acl = old_acl;
3202                 seg->s_acl_in = rsmpi_old_acl;
3203                 seg->s_acl_len = old_acl_len;
3204                 rsmseglock_release(seg);
3205                 rsmacl_free(new_acl, new_acl_len);
3206                 rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3207 
3208                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3209                     "rsm_republish done: rsmpi republish failed %d\n", e));
3210                 return (e);
3211         }
3212 
3213         /* create a tmp copy of the new acl */
3214         tmp_acl_len = new_acl_len;
3215         if (tmp_acl_len > 0) {
3216                 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3217                 for (i = 0; i < tmp_acl_len; i++) {
3218                         tmp_acl[i].ae_node = new_acl[i].ae_node;
3219                         tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3220                 }
3221                 /*
3222                  * The default permission of a node which was in the old
3223                  * ACL but not in the new ACL is 0 ie no access.
3224                  */
3225                 permission = 0;
3226         } else {
3227                 /*
3228                  * NULL acl means all importers can connect and
3229                  * default permission will be owner creation umask
3230                  */
3231                 tmp_acl = NULL;
3232                 permission = seg->s_mode;
3233         }
3234 
3235         /* make other republishers to wait for republish to complete */
3236         seg->s_flags |= RSM_REPUBLISH_WAIT;
3237 
3238         rsmseglock_release(seg);
3239 
3240         /* send the new perms to the importing nodes */
3241         rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3242 
3243         rsmseglock_acquire(seg);
3244         seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3245         /* wake up any one waiting for republish to complete */
3246         cv_broadcast(&seg->s_cv);
3247         rsmseglock_release(seg);
3248 
3249         rsmacl_free(tmp_acl, tmp_acl_len);
3250         rsmacl_free(old_acl, old_acl_len);
3251         rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3252 
3253         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3254         return (DDI_SUCCESS);
3255 }
3256 
3257 static int
3258 rsm_unpublish(rsmseg_t *seg, int mode)
3259 {
3260         rsmapi_access_entry_t   *acl;
3261         rsm_access_entry_t      *rsmpi_acl;
3262         int                     acl_len;
3263         int                     e;
3264         adapter_t *adapter;
3265         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3266 
3267         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3268 
3269         if (seg->s_pid != ddi_get_pid() &&
3270             ddi_get_pid() != 0) {
3271                 DBG_PRINTF((category, RSM_ERR,
3272                     "rsm_unpublish: Not creator\n"));
3273                 return (RSMERR_NOT_CREATOR);
3274         }
3275 
3276         rsmseglock_acquire(seg);
3277         /*
3278          * wait for QUIESCING to complete here before rsmexport_rm
3279          * is called because the SUSPEND_COMPLETE mesg which changes
3280          * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3281          * signals the cv_wait needs to find it in the hashtable.
3282          */
3283         while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3284             ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3285                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3286                         rsmseglock_release(seg);
3287                         DBG_PRINTF((category, RSM_ERR,
3288                             "rsm_unpublish done: cv_wait INTR qscing"
3289                             "getv/putv in progress"));
3290                         return (RSMERR_INTERRUPTED);
3291                 }
3292         }
3293 
3294         /* verify segment state */
3295         if ((seg->s_state != RSM_STATE_EXPORT) &&
3296             (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3297                 rsmseglock_release(seg);
3298                 DBG_PRINTF((category, RSM_ERR,
3299                     "rsm_unpublish done: bad state %x\n", seg->s_state));
3300                 return (RSMERR_SEG_NOT_PUBLISHED);
3301         }
3302 
3303         rsmseglock_release(seg);
3304 
3305         rsmexport_rm(seg);
3306 
3307         rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3308 
3309         rsmseglock_acquire(seg);
3310         /*
3311          * wait for republish to complete
3312          */
3313         while ((seg->s_state == RSM_STATE_EXPORT) &&
3314             (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3315                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3316                         DBG_PRINTF((category, RSM_ERR,
3317                             "rsm_unpublish done: cv_wait INTR repubing"));
3318                         rsmseglock_release(seg);
3319                         return (RSMERR_INTERRUPTED);
3320                 }
3321         }
3322 
3323         if ((seg->s_state != RSM_STATE_EXPORT) &&
3324             (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3325                 DBG_PRINTF((category, RSM_ERR,
3326                     "rsm_unpublish done: invalid state"));
3327                 rsmseglock_release(seg);
3328                 return (RSMERR_SEG_NOT_PUBLISHED);
3329         }
3330 
3331         /*
3332          * check for putv/get surrogate segment which was not published
3333          * to the driver.
3334          *
3335          * Be certain to see if there is an ACL first!  If this segment was
3336          * not published with an ACL, acl will be a null pointer.  Check
3337          * that before dereferencing it.
3338          */
3339         acl = seg->s_acl;
3340         if (acl != (rsmapi_access_entry_t *)NULL) {
3341                 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3342                         goto bypass;
3343         }
3344 
3345         /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3346         if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3347                 goto bypass;
3348 
3349         adapter = seg->s_adapter;
3350         for (;;) {
3351                 if (seg->s_state != RSM_STATE_EXPORT) {
3352                         rsmseglock_release(seg);
3353                         DBG_PRINTF((category, RSM_ERR,
3354                             "rsm_unpublish done: bad state %x\n",
3355                             seg->s_state));
3356                         return (RSMERR_SEG_NOT_PUBLISHED);
3357                 }
3358 
3359                 /* unpublish from adapter */
3360                 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3361 
3362                 if (e == RSM_SUCCESS) {
3363                         break;
3364                 }
3365 
3366                 if (e == RSMERR_SEG_IN_USE && mode == 1) {
3367                         /*
3368                          * wait for unpublish to succeed, it's busy.
3369                          */
3370                         seg->s_flags |= RSM_EXPORT_WAIT;
3371 
3372                         /* wait for a max of 1 ms - this is an empirical */
3373                         /* value that was found by some minimal testing  */
3374                         /* can be fine tuned when we have better numbers */
3375                         /* A long term fix would be to send cv_signal    */
3376                         /* from the intr callback routine                */
3377                         /* currently nobody signals this wait            */
3378                         (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock,
3379                             drv_usectohz(1000), TR_CLOCK_TICK);
3380 
3381                         DBG_PRINTF((category, RSM_ERR,
3382                             "rsm_unpublish: SEG_IN_USE\n"));
3383 
3384                         seg->s_flags &= ~RSM_EXPORT_WAIT;
3385                 } else {
3386                         if (mode == 1) {
3387                                 DBG_PRINTF((category, RSM_ERR,
3388                                     "rsm:rsmpi unpublish err %x\n", e));
3389                                 seg->s_state = RSM_STATE_BIND;
3390                         }
3391                         rsmseglock_release(seg);
3392                         return (e);
3393                 }
3394         }
3395 
3396         /* Free segment */
3397         e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3398 
3399         if (e != RSM_SUCCESS) {
3400                 DBG_PRINTF((category, RSM_ERR,
3401                     "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3402                     seg->s_key, e));
3403         }
3404 
3405 bypass:
3406         acl = seg->s_acl;
3407         rsmpi_acl = seg->s_acl_in;
3408         acl_len = seg->s_acl_len;
3409 
3410         seg->s_acl = NULL;
3411         seg->s_acl_in = NULL;
3412         seg->s_acl_len = 0;
3413 
3414         if (seg->s_state == RSM_STATE_EXPORT) {
3415                 seg->s_state = RSM_STATE_BIND;
3416         } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3417                 seg->s_state = RSM_STATE_BIND_QUIESCED;
3418                 cv_broadcast(&seg->s_cv);
3419         }
3420 
3421         rsmseglock_release(seg);
3422 
3423         rsmacl_free(acl, acl_len);
3424         rsmpiacl_free(rsmpi_acl, acl_len);
3425 
3426         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3427 
3428         return (DDI_SUCCESS);
3429 }
3430 
3431 /*
3432  * Called from rsm_unpublish to force an unload and disconnection of all
3433  * importers of the unpublished segment.
3434  *
3435  * First build the list of segments requiring a force disconnect, then
3436  * send a request for each.
3437  */
3438 static void
3439 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3440     rsm_node_id_t ex_nodeid)
3441 {
3442         rsmipc_request_t        request;
3443         importing_token_t       *prev_token, *token, *tmp_token, *tokp;
3444         importing_token_t       *force_disconnect_list = NULL;
3445         int                     index;
3446 
3447         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3448             "rsm_send_importer_disconnects enter\n"));
3449 
3450         index = rsmhash(ex_segid);
3451 
3452         mutex_enter(&importer_list.lock);
3453 
3454         prev_token = NULL;
3455         token = importer_list.bucket[index];
3456 
3457         while (token != NULL) {
3458                 if (token->key == ex_segid) {
3459                         /*
3460                          * take it off the importer list and add it
3461                          * to the force disconnect list.
3462                          */
3463                         if (prev_token == NULL)
3464                                 importer_list.bucket[index] = token->next;
3465                         else
3466                                 prev_token->next = token->next;
3467                         tmp_token = token;
3468                         token = token->next;
3469                         if (force_disconnect_list == NULL) {
3470                                 force_disconnect_list = tmp_token;
3471                                 tmp_token->next = NULL;
3472                         } else {
3473                                 tokp = force_disconnect_list;
3474                                 /*
3475                                  * make sure that the tmp_token's node
3476                                  * is not already on the force disconnect
3477                                  * list.
3478                                  */
3479                                 while (tokp != NULL) {
3480                                         if (tokp->importing_node ==
3481                                             tmp_token->importing_node) {
3482                                                 break;
3483                                         }
3484                                         tokp = tokp->next;
3485                                 }
3486                                 if (tokp == NULL) {
3487                                         tmp_token->next =
3488                                             force_disconnect_list;
3489                                         force_disconnect_list = tmp_token;
3490                                 } else {
3491                                         kmem_free((void *)tmp_token,
3492                                             sizeof (*token));
3493                                 }
3494                         }
3495 
3496                 } else {
3497                         prev_token = token;
3498                         token = token->next;
3499                 }
3500         }
3501         mutex_exit(&importer_list.lock);
3502 
3503         token = force_disconnect_list;
3504         while (token != NULL) {
3505                 if (token->importing_node == my_nodeid) {
3506                         rsm_force_unload(ex_nodeid, ex_segid,
3507                             DISCONNECT);
3508                 } else {
3509                         request.rsmipc_hdr.rsmipc_type =
3510                             RSMIPC_MSG_DISCONNECT;
3511                         request.rsmipc_key = token->key;
3512                         for (;;) {
3513                                 if (rsmipc_send(token->importing_node,
3514                                     &request,
3515                                     RSM_NO_REPLY) == RSM_SUCCESS) {
3516                                         break;
3517                                 } else {
3518                                         delay(drv_usectohz(10000));
3519                                 }
3520                         }
3521                 }
3522                 tmp_token = token;
3523                 token = token->next;
3524                 kmem_free((void *)tmp_token, sizeof (*token));
3525         }
3526 
3527         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3528             "rsm_send_importer_disconnects done\n"));
3529 }
3530 
3531 /*
3532  * This function is used as a callback for unlocking the pages locked
3533  * down by a process which then does a fork or an exec.
3534  * It marks the export segments corresponding to umem cookie given by
3535  * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3536  * destroyed later when an rsm_close occurs).
3537  */
3538 static void
3539 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3540 {
3541         rsmresource_blk_t *blk;
3542         rsmresource_t *p;
3543         rsmseg_t *eseg = NULL;
3544         int i, j;
3545         int found = 0;
3546 
3547         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3548             "rsm_export_force_destroy enter\n"));
3549 
3550         /*
3551          * Walk the resource list and locate the export segment (either
3552          * in the BIND or the EXPORT state) which corresponds to the
3553          * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3554          * Change the state to ZOMBIE by calling rsmseg_close with the
3555          * force_flag argument (the second argument) set to 1. Also,
3556          * unpublish and unbind the segment, but don't free it. Free it
3557          * only on a rsm_close call for the segment.
3558          */
3559         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3560 
3561         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3562                 blk = rsm_resource.rsmrc_root[i];
3563                 if (blk == NULL) {
3564                         continue;
3565                 }
3566 
3567                 for (j = 0; j < RSMRC_BLKSZ; j++) {
3568                         p = blk->rsmrcblk_blks[j];
3569                         if ((p != NULL) && (p != RSMRC_RESERVED) &&
3570                             (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3571                                 eseg = (rsmseg_t *)p;
3572                                 if (eseg->s_cookie != ck)
3573                                         continue; /* continue searching */
3574                                 /*
3575                                  * Found the segment, set flag to indicate
3576                                  * force destroy processing is in progress
3577                                  */
3578                                 rsmseglock_acquire(eseg);
3579                                 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3580                                 rsmseglock_release(eseg);
3581                                 found = 1;
3582                                 break;
3583                         }
3584                 }
3585 
3586                 if (found)
3587                         break;
3588         }
3589 
3590         rw_exit(&rsm_resource.rsmrc_lock);
3591 
3592         if (found) {
3593                 ASSERT(eseg != NULL);
3594                 /* call rsmseg_close with force flag set to 1 */
3595                 rsmseg_close(eseg, 1);
3596                 /*
3597                  * force destroy processing done, clear flag and signal any
3598                  * thread waiting in rsmseg_close.
3599                  */
3600                 rsmseglock_acquire(eseg);
3601                 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3602                 cv_broadcast(&eseg->s_cv);
3603                 rsmseglock_release(eseg);
3604         }
3605 
3606         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3607             "rsm_export_force_destroy done\n"));
3608 }
3609 
3610 /* ******************************* Remote Calls *********************** */
3611 static void
3612 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3613 {
3614         rsmipc_reply_t reply;
3615         DBG_DEFINE(category,
3616             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3617 
3618         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3619             "rsm_intr_segconnect enter\n"));
3620 
3621         reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3622 
3623         reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3624         reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3625 
3626         (void) rsmipc_send(src, NULL, &reply);
3627 
3628         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3629             "rsm_intr_segconnect done\n"));
3630 }
3631 
3632 
3633 /*
3634  * When an exported segment is unpublished the exporter sends an ipc
3635  * message (RSMIPC_MSG_DISCONNECT) to all importers.  The recv ipc dispatcher
3636  * calls this function.  The import list is scanned; segments which match the
3637  * exported segment id are unloaded and disconnected.
3638  *
3639  * Will also be called from rsm_rebind with disconnect_flag FALSE.
3640  *
3641  */
3642 static void
3643 rsm_force_unload(rsm_node_id_t src_nodeid,
3644     rsm_memseg_id_t ex_segid,
3645     boolean_t disconnect_flag)
3646 
3647 {
3648         rsmresource_t   *p = NULL;
3649         rsmhash_table_t *rhash = &rsm_import_segs;
3650         uint_t          index;
3651         DBG_DEFINE(category,
3652             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3653 
3654         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3655 
3656         index = rsmhash(ex_segid);
3657 
3658         rw_enter(&rhash->rsmhash_rw, RW_READER);
3659 
3660         p = rsmhash_getbkt(rhash, index);
3661 
3662         for (; p; p = p->rsmrc_next) {
3663                 rsmseg_t *seg = (rsmseg_t *)p;
3664                 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3665                         /*
3666                          * In order to make rsmseg_unload and rsm_force_unload
3667                          * thread safe, acquire the segment lock here.
3668                          * rsmseg_unload is responsible for releasing the lock.
3669                          * rsmseg_unload releases the lock just before a call
3670                          * to rsmipc_send or in case of an early exit which
3671                          * occurs if the segment was in the state
3672                          * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3673                          */
3674                         rsmseglock_acquire(seg);
3675                         if (disconnect_flag)
3676                                 seg->s_flags |= RSM_FORCE_DISCONNECT;
3677                         rsmseg_unload(seg);
3678                 }
3679         }
3680         rw_exit(&rhash->rsmhash_rw);
3681 
3682         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3683 }
3684 
3685 static void
3686 rsm_intr_reply(rsmipc_msghdr_t *msg)
3687 {
3688         /*
3689          * Find slot for cookie in reply.
3690          * Match sequence with sequence in cookie
3691          * If no match; return
3692          * Try to grap lock of slot, if locked return
3693          * copy data into reply slot area
3694          * signal waiter
3695          */
3696         rsmipc_slot_t   *slot;
3697         rsmipc_cookie_t *cookie;
3698         void *data = (void *) msg;
3699         size_t size = sizeof (rsmipc_reply_t);
3700         DBG_DEFINE(category,
3701             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3702 
3703         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3704 
3705         cookie = &msg->rsmipc_cookie;
3706         if (cookie->ic.index >= RSMIPC_SZ) {
3707                 DBG_PRINTF((category, RSM_ERR,
3708                     "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3709                 return;
3710         }
3711 
3712         ASSERT(cookie->ic.index < RSMIPC_SZ);
3713         slot = &rsm_ipc.slots[cookie->ic.index];
3714         mutex_enter(&slot->rsmipc_lock);
3715         if (slot->rsmipc_cookie.value == cookie->value) {
3716                 /* found a match */
3717                 if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3718                         bcopy(data, slot->rsmipc_data, size);
3719                         RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3720                         cv_signal(&slot->rsmipc_cv);
3721                 }
3722         } else {
3723                 DBG_PRINTF((category, RSM_DEBUG,
3724                     "rsm: rsm_intr_reply mismatched reply %d\n",
3725                     cookie->ic.index));
3726         }
3727         mutex_exit(&slot->rsmipc_lock);
3728         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3729 }
3730 
3731 /*
3732  * This function gets dispatched on the worker thread when we receive
3733  * the SQREADY message. This function sends the SQREADY_ACK message.
3734  */
3735 static void
3736 rsm_sqready_ack_deferred(void *arg)
3737 {
3738         path_t  *path = (path_t *)arg;
3739         DBG_DEFINE(category,
3740             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3741 
3742         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3743             "rsm_sqready_ack_deferred enter\n"));
3744 
3745         mutex_enter(&path->mutex);
3746 
3747         /*
3748          * If path is not active no point in sending the ACK
3749          * because the whole SQREADY protocol will again start
3750          * when the path becomes active.
3751          */
3752         if (path->state != RSMKA_PATH_ACTIVE) {
3753                 /*
3754                  * decrement the path refcnt incremented in rsm_proc_sqready
3755                  */
3756                 PATH_RELE_NOLOCK(path);
3757                 mutex_exit(&path->mutex);
3758                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3759                     "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3760                 return;
3761         }
3762 
3763         /* send an SQREADY_ACK message */
3764         (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3765 
3766         /* initialize credits to the max level */
3767         path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3768 
3769         /* wake up any send that is waiting for credits */
3770         cv_broadcast(&path->sendq_token.sendq_cv);
3771 
3772         /*
3773          * decrement the path refcnt since we incremented it in
3774          * rsm_proc_sqready
3775          */
3776         PATH_RELE_NOLOCK(path);
3777 
3778         mutex_exit(&path->mutex);
3779 
3780         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3781             "rsm_sqready_ack_deferred done\n"));
3782 }
3783 
3784 /*
3785  * Process the SQREADY message
3786  */
3787 static void
3788 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3789     rsm_intr_hand_arg_t arg)
3790 {
3791         rsmipc_msghdr_t         *msghdr = (rsmipc_msghdr_t *)msg;
3792         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
3793         path_t                  *path;
3794         DBG_DEFINE(category,
3795             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3796 
3797         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3798 
3799         /* look up the path - incr the path refcnt */
3800         path = rsm_find_path(hdlr_argp->adapter_name,
3801             hdlr_argp->adapter_instance, src_hwaddr);
3802 
3803         /*
3804          * No path exists or path is not active - drop the message
3805          */
3806         if (path == NULL) {
3807                 DBG_PRINTF((category, RSM_DEBUG,
3808                     "rsm_proc_sqready done: msg dropped no path\n"));
3809                 return;
3810         }
3811 
3812         mutex_exit(&path->mutex);
3813 
3814         /* drain any tasks from the previous incarnation */
3815         taskq_wait(path->recv_taskq);
3816 
3817         mutex_enter(&path->mutex);
3818         /*
3819          * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3820          * in the meanwhile we received an SQREADY message, blindly reset
3821          * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3822          * and forget about the SQREADY that we sent.
3823          */
3824         path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3825 
3826         if (path->state != RSMKA_PATH_ACTIVE) {
3827                 /* decr refcnt and drop the mutex */
3828                 PATH_RELE_NOLOCK(path);
3829                 mutex_exit(&path->mutex);
3830                 DBG_PRINTF((category, RSM_DEBUG,
3831                     "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3832                 return;
3833         }
3834 
3835         DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3836             " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3837 
3838         /*
3839          * The sender's local incarnation number is our remote incarnation
3840          * number save it in the path data structure
3841          */
3842         path->remote_incn = msg->rsmipc_local_incn;
3843         path->sendq_token.msgbuf_avail = 0;
3844         path->procmsg_cnt = 0;
3845 
3846         /*
3847          * path is active - dispatch task to send SQREADY_ACK - remember
3848          * RSMPI calls can't be done in interrupt context
3849          *
3850          * We can use the recv_taskq to send because the remote endpoint
3851          * cannot start sending messages till it receives SQREADY_ACK hence
3852          * at this point there are no tasks on recv_taskq.
3853          *
3854          * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3855          */
3856         (void) taskq_dispatch(path->recv_taskq,
3857             rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3858 
3859         mutex_exit(&path->mutex);
3860 
3861 
3862         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3863 }
3864 
3865 /*
3866  * Process the SQREADY_ACK message
3867  */
3868 static void
3869 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3870     rsm_intr_hand_arg_t arg)
3871 {
3872         rsmipc_msghdr_t         *msghdr = (rsmipc_msghdr_t *)msg;
3873         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
3874         path_t                  *path;
3875         DBG_DEFINE(category,
3876             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3877 
3878         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3879             "rsm_proc_sqready_ack enter\n"));
3880 
3881         /* look up the path - incr the path refcnt */
3882         path = rsm_find_path(hdlr_argp->adapter_name,
3883             hdlr_argp->adapter_instance, src_hwaddr);
3884 
3885         /*
3886          * drop the message if - no path exists or path is not active
3887          * or if its not waiting for SQREADY_ACK message
3888          */
3889         if (path == NULL) {
3890                 DBG_PRINTF((category, RSM_DEBUG,
3891                     "rsm_proc_sqready_ack done: msg dropped no path\n"));
3892                 return;
3893         }
3894 
3895         if ((path->state != RSMKA_PATH_ACTIVE) ||
3896             !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3897                 /* decrement the refcnt */
3898                 PATH_RELE_NOLOCK(path);
3899                 mutex_exit(&path->mutex);
3900                 DBG_PRINTF((category, RSM_DEBUG,
3901                     "rsm_proc_sqready_ack done: msg dropped\n"));
3902                 return;
3903         }
3904 
3905         /*
3906          * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3907          * sent, if not drop it.
3908          */
3909         if (path->local_incn != msghdr->rsmipc_incn) {
3910                 /* decrement the refcnt */
3911                 PATH_RELE_NOLOCK(path);
3912                 mutex_exit(&path->mutex);
3913                 DBG_PRINTF((category, RSM_DEBUG,
3914                     "rsm_proc_sqready_ack done: msg old incn %lld\n",
3915                     msghdr->rsmipc_incn));
3916                 return;
3917         }
3918 
3919         DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3920             " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3921 
3922         /*
3923          * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3924          */
3925         path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3926 
3927         /* save the remote sendq incn number */
3928         path->remote_incn = msg->rsmipc_local_incn;
3929 
3930         /* initialize credits to the max level */
3931         path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3932 
3933         /* wake up any send that is waiting for credits */
3934         cv_broadcast(&path->sendq_token.sendq_cv);
3935 
3936         /* decrement the refcnt */
3937         PATH_RELE_NOLOCK(path);
3938 
3939         mutex_exit(&path->mutex);
3940 
3941         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3942             "rsm_proc_sqready_ack done\n"));
3943 }
3944 
3945 /*
3946  * process the RSMIPC_MSG_CREDIT message
3947  */
3948 static void
3949 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3950     rsm_intr_hand_arg_t arg)
3951 {
3952         rsmipc_msghdr_t         *msghdr = (rsmipc_msghdr_t *)msg;
3953         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
3954         path_t                  *path;
3955         DBG_DEFINE(category,
3956             RSM_KERNEL_AGENT | RSM_FUNC_ALL |
3957             RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3958 
3959         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3960 
3961         /* look up the path - incr the path refcnt */
3962         path = rsm_find_path(hdlr_argp->adapter_name,
3963             hdlr_argp->adapter_instance, src_hwaddr);
3964 
3965         if (path == NULL) {
3966                 DBG_PRINTF((category, RSM_DEBUG,
3967                     "rsm_add_credits enter: path not found\n"));
3968                 return;
3969         }
3970 
3971         /* the path is not active - discard credits */
3972         if (path->state != RSMKA_PATH_ACTIVE) {
3973                 PATH_RELE_NOLOCK(path);
3974                 mutex_exit(&path->mutex);
3975                 DBG_PRINTF((category, RSM_DEBUG,
3976                     "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3977                 return;
3978         }
3979 
3980         /*
3981          * Check if these credits are for current incarnation of the path.
3982          */
3983         if (path->local_incn != msghdr->rsmipc_incn) {
3984                 /* decrement the refcnt */
3985                 PATH_RELE_NOLOCK(path);
3986                 mutex_exit(&path->mutex);
3987                 DBG_PRINTF((category, RSM_DEBUG,
3988                     "rsm_add_credits enter: old incn %lld\n",
3989                     msghdr->rsmipc_incn));
3990                 return;
3991         }
3992 
3993         DBG_PRINTF((category, RSM_DEBUG,
3994             "rsm_add_credits:path=%lx new-creds=%d "
3995             "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
3996             path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
3997             src_hwaddr));
3998 
3999 
4000         /* add credits to the path's sendq */
4001         path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4002 
4003         ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4004 
4005         /* wake up any send that is waiting for credits */
4006         cv_broadcast(&path->sendq_token.sendq_cv);
4007 
4008         /* decrement the refcnt */
4009         PATH_RELE_NOLOCK(path);
4010 
4011         mutex_exit(&path->mutex);
4012 
4013         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4014 }
4015 
4016 static void
4017 rsm_intr_event(rsmipc_request_t *msg)
4018 {
4019         rsmseg_t        *seg;
4020         rsmresource_t   *p;
4021         rsm_node_id_t   src_node;
4022         DBG_DEFINE(category,
4023             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4024 
4025         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4026 
4027         src_node = msg->rsmipc_hdr.rsmipc_src;
4028 
4029         if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4030                 /* This is for an import segment */
4031                 uint_t hashval = rsmhash(msg->rsmipc_key);
4032 
4033                 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4034 
4035                 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4036 
4037                 for (; p; p = p->rsmrc_next) {
4038                         if ((p->rsmrc_key == msg->rsmipc_key) &&
4039                             (p->rsmrc_node == src_node)) {
4040                                 seg = (rsmseg_t *)p;
4041                                 rsmseglock_acquire(seg);
4042 
4043                                 atomic_inc_32(&seg->s_pollevent);
4044 
4045                                 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4046                                         pollwakeup(&seg->s_poll, POLLRDNORM);
4047 
4048                                 rsmseglock_release(seg);
4049                         }
4050                 }
4051 
4052                 rw_exit(&rsm_import_segs.rsmhash_rw);
4053         } else {
4054                 /* This is for an export segment */
4055                 seg = rsmexport_lookup(msg->rsmipc_key);
4056                 if (!seg) {
4057                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4058                             "rsm_intr_event done: exp seg not found\n"));
4059                         return;
4060                 }
4061 
4062                 ASSERT(rsmseglock_held(seg));
4063 
4064                 atomic_inc_32(&seg->s_pollevent);
4065 
4066                 /*
4067                  * We must hold the segment lock here, or else the segment
4068                  * can be freed while pollwakeup is using it. This implies
4069                  * that we MUST NOT grab the segment lock during rsm_chpoll,
4070                  * as outlined in the chpoll(2) man page.
4071                  */
4072                 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4073                         pollwakeup(&seg->s_poll, POLLRDNORM);
4074 
4075                 rsmseglock_release(seg);
4076         }
4077 
4078         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4079 }
4080 
4081 /*
4082  * The exporter did a republish and changed the ACL - this change is only
4083  * visible to new importers.
4084  */
4085 static void
4086 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4087     rsm_permission_t perm)
4088 {
4089 
4090         rsmresource_t   *p;
4091         rsmseg_t        *seg;
4092         uint_t          hashval = rsmhash(key);
4093         DBG_DEFINE(category,
4094             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4095 
4096         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4097 
4098         rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4099 
4100         p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4101 
4102         for (; p; p = p->rsmrc_next) {
4103                 /*
4104                  * find the importer and update the permission in the shared
4105                  * data structure. Any new importers will use the new perms
4106                  */
4107                 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4108                         seg = (rsmseg_t *)p;
4109 
4110                         rsmseglock_acquire(seg);
4111                         rsmsharelock_acquire(seg);
4112                         seg->s_share->rsmsi_mode = perm;
4113                         rsmsharelock_release(seg);
4114                         rsmseglock_release(seg);
4115 
4116                         break;
4117                 }
4118         }
4119 
4120         rw_exit(&rsm_import_segs.rsmhash_rw);
4121 
4122         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4123 }
4124 
4125 void
4126 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4127 {
4128         int             done = 1; /* indicate all SUSPENDS have been acked */
4129         list_element_t  *elem;
4130         DBG_DEFINE(category,
4131             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4132 
4133         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4134             "rsm_suspend_complete enter\n"));
4135 
4136         mutex_enter(&rsm_suspend_list.list_lock);
4137 
4138         if (rsm_suspend_list.list_head == NULL) {
4139                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4140                     "rsm_suspend_complete done: suspend_list is empty\n"));
4141                 mutex_exit(&rsm_suspend_list.list_lock);
4142                 return;
4143         }
4144 
4145         elem = rsm_suspend_list.list_head;
4146         while (elem != NULL) {
4147                 if (elem->nodeid == src_node) {
4148                         /* clear the pending flag for the node */
4149                         elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4150                         elem->flags |= flag;
4151                 }
4152 
4153                 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4154                         done = 0; /* still some nodes have not yet ACKED */
4155 
4156                 elem = elem->next;
4157         }
4158 
4159         mutex_exit(&rsm_suspend_list.list_lock);
4160 
4161         if (!done) {
4162                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4163                     "rsm_suspend_complete done: acks pending\n"));
4164                 return;
4165         }
4166         /*
4167          * Now that we are done with suspending all the remote importers
4168          * time to quiesce the local exporters
4169          */
4170         exporter_quiesce();
4171 
4172         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4173             "rsm_suspend_complete done\n"));
4174 }
4175 
4176 static void
4177 exporter_quiesce()
4178 {
4179         int             i, e;
4180         rsmresource_t   *current;
4181         rsmseg_t        *seg;
4182         adapter_t       *adapter;
4183         DBG_DEFINE(category,
4184             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4185 
4186         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4187         /*
4188          * The importers send a SUSPEND_COMPLETE to the exporter node
4189          *      Unpublish, unbind the export segment and
4190          *      move the segments to the EXPORT_QUIESCED state
4191          */
4192 
4193         rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4194 
4195         for (i = 0; i < rsm_hash_size; i++) {
4196                 current = rsm_export_segs.bucket[i];
4197                 while (current != NULL) {
4198                         seg = (rsmseg_t *)current;
4199                         rsmseglock_acquire(seg);
4200                         if (current->rsmrc_state ==
4201                             RSM_STATE_EXPORT_QUIESCING) {
4202                                 adapter = seg->s_adapter;
4203                                 /*
4204                                  * some local memory handles are not published
4205                                  * check if it was published
4206                                  */
4207                                 if ((seg->s_acl == NULL) ||
4208                                     (seg->s_acl[0].ae_node != my_nodeid) ||
4209                                     (seg->s_acl[0].ae_permission != 0)) {
4210 
4211                                         e = adapter->rsmpi_ops->rsm_unpublish(
4212                                             seg->s_handle.out);
4213                                         DBG_PRINTF((category, RSM_DEBUG,
4214                                             "exporter_quiesce:unpub %d\n", e));
4215 
4216                                         e = adapter->rsmpi_ops->rsm_seg_destroy(
4217                                             seg->s_handle.out);
4218 
4219                                         DBG_PRINTF((category, RSM_DEBUG,
4220                                             "exporter_quiesce:destroy %d\n",
4221                                             e));
4222                                 }
4223 
4224                                 (void) rsm_unbind_pages(seg);
4225                                 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4226                                 cv_broadcast(&seg->s_cv);
4227                         }
4228                         rsmseglock_release(seg);
4229                         current = current->rsmrc_next;
4230                 }
4231         }
4232         rw_exit(&rsm_export_segs.rsmhash_rw);
4233 
4234         /*
4235          * All the local segments we are done with the pre-del processing
4236          * - time to move to PREDEL_COMPLETED.
4237          */
4238 
4239         mutex_enter(&rsm_drv_data.drv_lock);
4240 
4241         ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4242 
4243         rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4244 
4245         cv_broadcast(&rsm_drv_data.drv_cv);
4246 
4247         mutex_exit(&rsm_drv_data.drv_lock);
4248 
4249         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4250 }
4251 
4252 static void
4253 importer_suspend(rsm_node_id_t src_node)
4254 {
4255         int             i;
4256         int             susp_flg; /* true means already suspended */
4257         int             num_importers;
4258         rsmresource_t   *p = NULL, *curp;
4259         rsmhash_table_t *rhash = &rsm_import_segs;
4260         rsmseg_t        *seg;
4261         rsmipc_request_t request;
4262         DBG_DEFINE(category,
4263             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4264 
4265         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4266 
4267         rw_enter(&rhash->rsmhash_rw, RW_READER);
4268         for (i = 0; i < rsm_hash_size; i++) {
4269                 p = rhash->bucket[i];
4270 
4271                 /*
4272                  * Suspend all importers with same <node, key> pair.
4273                  * After the last one of the shared importers has been
4274                  * suspended - suspend the shared mappings/connection.
4275                  */
4276                 for (; p; p = p->rsmrc_next) {
4277                         rsmseg_t *first = (rsmseg_t *)p;
4278                         if ((first->s_node != src_node) ||
4279                             (first->s_state == RSM_STATE_DISCONNECT))
4280                                 continue; /* go to next entry */
4281                         /*
4282                          * search the rest of the bucket for
4283                          * other siblings (imprtrs with the same key)
4284                          * of "first" and suspend them.
4285                          * All importers with same key fall in
4286                          * the same bucket.
4287                          */
4288                         num_importers = 0;
4289                         for (curp = p; curp; curp = curp->rsmrc_next) {
4290                                 seg = (rsmseg_t *)curp;
4291 
4292                                 rsmseglock_acquire(seg);
4293 
4294                                 if ((seg->s_node != first->s_node) ||
4295                                     (seg->s_key != first->s_key) ||
4296                                     (seg->s_state == RSM_STATE_DISCONNECT)) {
4297                                         /*
4298                                          * either not a peer segment or its a
4299                                          * disconnected segment - skip it
4300                                          */
4301                                         rsmseglock_release(seg);
4302                                         continue;
4303                                 }
4304 
4305                                 rsmseg_suspend(seg, &susp_flg);
4306 
4307                                 if (susp_flg) { /* seg already suspended */
4308                                         rsmseglock_release(seg);
4309                                         break; /* the inner for loop */
4310                                 }
4311 
4312                                 num_importers++;
4313                                 rsmsharelock_acquire(seg);
4314                                 /*
4315                                  * we've processed all importers that are
4316                                  * siblings of "first"
4317                                  */
4318                                 if (num_importers ==
4319                                     seg->s_share->rsmsi_refcnt) {
4320                                         rsmsharelock_release(seg);
4321                                         rsmseglock_release(seg);
4322                                         break;
4323                                 }
4324                                 rsmsharelock_release(seg);
4325                                 rsmseglock_release(seg);
4326                         }
4327 
4328                         /*
4329                          * All the importers with the same key and
4330                          * nodeid as "first" have been suspended.
4331                          * Now suspend the shared connect/mapping.
4332                          * This is done only once.
4333                          */
4334                         if (!susp_flg) {
4335                                 rsmsegshare_suspend(seg);
4336                         }
4337                 }
4338         }
4339 
4340         rw_exit(&rhash->rsmhash_rw);
4341 
4342         /* send an ACK for SUSPEND message */
4343         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4344         (void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4345 
4346 
4347         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4348 
4349 }
4350 
4351 static void
4352 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4353 {
4354         int             recheck_state;
4355         rsmcookie_t     *hdl;
4356         DBG_DEFINE(category,
4357             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4358 
4359         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4360             "rsmseg_suspend enter: key=%u\n", seg->s_key));
4361 
4362         *susp_flg = 0;
4363 
4364         ASSERT(rsmseglock_held(seg));
4365         /* wait if putv/getv is in progress */
4366         while (seg->s_rdmacnt > 0)
4367                 cv_wait(&seg->s_cv, &seg->s_lock);
4368 
4369         do {
4370                 recheck_state = 0;
4371 
4372                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4373                     "rsmseg_suspend:segment %x state=%d\n",
4374                     seg->s_key, seg->s_state));
4375 
4376                 switch (seg->s_state) {
4377                 case RSM_STATE_NEW:
4378                         /* not a valid state */
4379                         break;
4380                 case RSM_STATE_CONNECTING:
4381                         seg->s_state = RSM_STATE_ABORT_CONNECT;
4382                         break;
4383                 case RSM_STATE_ABORT_CONNECT:
4384                         break;
4385                 case RSM_STATE_CONNECT:
4386                         seg->s_handle.in = NULL;
4387                         seg->s_state = RSM_STATE_CONN_QUIESCE;
4388                         break;
4389                 case RSM_STATE_MAPPING:
4390                         /* wait until segment leaves the mapping state */
4391                         while (seg->s_state == RSM_STATE_MAPPING)
4392                                 cv_wait(&seg->s_cv, &seg->s_lock);
4393                         recheck_state = 1;
4394                         break;
4395                 case RSM_STATE_ACTIVE:
4396                         /* unload the mappings */
4397                         if (seg->s_ckl != NULL) {
4398                                 hdl = seg->s_ckl;
4399                                 for (; hdl != NULL; hdl = hdl->c_next) {
4400                                         (void) devmap_unload(hdl->c_dhp,
4401                                             hdl->c_off, hdl->c_len);
4402                                 }
4403                         }
4404                         seg->s_mapinfo = NULL;
4405                         seg->s_state = RSM_STATE_MAP_QUIESCE;
4406                         break;
4407                 case RSM_STATE_CONN_QUIESCE:
4408                         /* FALLTHRU */
4409                 case RSM_STATE_MAP_QUIESCE:
4410                         /* rsmseg_suspend already done for seg */
4411                         *susp_flg = 1;
4412                         break;
4413                 case RSM_STATE_DISCONNECT:
4414                         break;
4415                 default:
4416                         ASSERT(0); /* invalid state */
4417                 }
4418         } while (recheck_state);
4419 
4420         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4421 }
4422 
4423 static void
4424 rsmsegshare_suspend(rsmseg_t *seg)
4425 {
4426         int                     e;
4427         adapter_t               *adapter;
4428         rsm_import_share_t      *sharedp;
4429         DBG_DEFINE(category,
4430             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4431 
4432         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4433             "rsmsegshare_suspend enter\n"));
4434 
4435         rsmseglock_acquire(seg);
4436         rsmsharelock_acquire(seg);
4437 
4438         sharedp = seg->s_share;
4439         adapter = seg->s_adapter;
4440         switch (sharedp->rsmsi_state) {
4441         case RSMSI_STATE_NEW:
4442                 break;
4443         case RSMSI_STATE_CONNECTING:
4444                 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4445                 break;
4446         case RSMSI_STATE_ABORT_CONNECT:
4447                 break;
4448         case RSMSI_STATE_CONNECTED:
4449                 /* do the rsmpi disconnect */
4450                 if (sharedp->rsmsi_node != my_nodeid) {
4451                         e = adapter->rsmpi_ops->
4452                             rsm_disconnect(sharedp->rsmsi_handle);
4453 
4454                         DBG_PRINTF((category, RSM_DEBUG,
4455                             "rsm:rsmpi disconnect seg=%x:err=%d\n",
4456                             sharedp->rsmsi_segid, e));
4457                 }
4458 
4459                 sharedp->rsmsi_handle = NULL;
4460 
4461                 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4462                 break;
4463         case RSMSI_STATE_CONN_QUIESCE:
4464                 break;
4465         case RSMSI_STATE_MAPPED:
4466                 /* do the rsmpi unmap and disconnect */
4467                 if (sharedp->rsmsi_node != my_nodeid) {
4468                         e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4469 
4470                         DBG_PRINTF((category, RSM_DEBUG,
4471                             "rsmshare_suspend: rsmpi unmap %d\n", e));
4472 
4473                         e = adapter->rsmpi_ops->
4474                             rsm_disconnect(sharedp->rsmsi_handle);
4475                         DBG_PRINTF((category, RSM_DEBUG,
4476                             "rsm:rsmpi disconnect seg=%x:err=%d\n",
4477                             sharedp->rsmsi_segid, e));
4478                 }
4479 
4480                 sharedp->rsmsi_handle = NULL;
4481 
4482                 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4483                 break;
4484         case RSMSI_STATE_MAP_QUIESCE:
4485                 break;
4486         case RSMSI_STATE_DISCONNECTED:
4487                 break;
4488         default:
4489                 ASSERT(0); /* invalid state */
4490         }
4491 
4492         rsmsharelock_release(seg);
4493         rsmseglock_release(seg);
4494 
4495         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4496             "rsmsegshare_suspend done\n"));
4497 }
4498 
4499 /*
4500  * This should get called on receiving a RESUME message or from
4501  * the pathmanger if the node undergoing DR dies.
4502  */
4503 static void
4504 importer_resume(rsm_node_id_t src_node)
4505 {
4506         int             i;
4507         rsmresource_t   *p = NULL;
4508         rsmhash_table_t *rhash = &rsm_import_segs;
4509         void            *cookie;
4510         DBG_DEFINE(category,
4511             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4512 
4513         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4514 
4515         rw_enter(&rhash->rsmhash_rw, RW_READER);
4516 
4517         for (i = 0; i < rsm_hash_size; i++) {
4518                 p = rhash->bucket[i];
4519 
4520                 for (; p; p = p->rsmrc_next) {
4521                         rsmseg_t *seg = (rsmseg_t *)p;
4522 
4523                         rsmseglock_acquire(seg);
4524 
4525                         /* process only importers of node undergoing DR */
4526                         if (seg->s_node != src_node) {
4527                                 rsmseglock_release(seg);
4528                                 continue;
4529                         }
4530 
4531                         if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4532                                 rsmipc_request_t        request;
4533                                 /*
4534                                  * rsmpi map/connect failed
4535                                  * inform the exporter so that it can
4536                                  * remove the importer.
4537                                  */
4538                                 request.rsmipc_hdr.rsmipc_type =
4539                                     RSMIPC_MSG_NOTIMPORTING;
4540                                 request.rsmipc_key = seg->s_segid;
4541                                 request.rsmipc_segment_cookie = cookie;
4542                                 rsmseglock_release(seg);
4543                                 (void) rsmipc_send(seg->s_node, &request,
4544                                     RSM_NO_REPLY);
4545                         } else {
4546                                 rsmseglock_release(seg);
4547                         }
4548                 }
4549         }
4550 
4551         rw_exit(&rhash->rsmhash_rw);
4552 
4553         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4554 }
4555 
4556 static int
4557 rsmseg_resume(rsmseg_t *seg, void **cookie)
4558 {
4559         int                     e;
4560         int                     retc;
4561         off_t                   dev_offset;
4562         size_t                  maplen;
4563         uint_t                  maxprot;
4564         rsm_mapinfo_t           *p;
4565         rsmcookie_t             *hdl;
4566         rsm_import_share_t      *sharedp;
4567         DBG_DEFINE(category,
4568             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4569 
4570         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4571             "rsmseg_resume enter: key=%u\n", seg->s_key));
4572 
4573         *cookie = NULL;
4574 
4575         ASSERT(rsmseglock_held(seg));
4576 
4577         if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4578             (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4579                 return (RSM_SUCCESS);
4580         }
4581 
4582         sharedp = seg->s_share;
4583 
4584         rsmsharelock_acquire(seg);
4585 
4586         /* resume the shared connection and/or mapping */
4587         retc = rsmsegshare_resume(seg);
4588 
4589         if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4590                 /* shared state can either be connected or mapped */
4591                 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4592                     (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4593                         ASSERT(retc == RSM_SUCCESS);
4594                         seg->s_handle.in = sharedp->rsmsi_handle;
4595                         rsmsharelock_release(seg);
4596                         seg->s_state = RSM_STATE_CONNECT;
4597 
4598                 } else { /* error in rsmpi connect during resume */
4599                         seg->s_handle.in = NULL;
4600                         seg->s_state = RSM_STATE_DISCONNECT;
4601 
4602                         sharedp->rsmsi_refcnt--;
4603                         cookie = (void *)sharedp->rsmsi_cookie;
4604 
4605                         if (sharedp->rsmsi_refcnt == 0) {
4606                                 ASSERT(sharedp->rsmsi_mapcnt == 0);
4607                                 rsmsharelock_release(seg);
4608 
4609                                 /* clean up the shared data structure */
4610                                 mutex_destroy(&sharedp->rsmsi_lock);
4611                                 cv_destroy(&sharedp->rsmsi_cv);
4612                                 kmem_free((void *)(sharedp),
4613                                     sizeof (rsm_import_share_t));
4614 
4615                         } else {
4616                                 rsmsharelock_release(seg);
4617                         }
4618                         /*
4619                          * The following needs to be done after any
4620                          * rsmsharelock calls which use seg->s_share.
4621                          */
4622                         seg->s_share = NULL;
4623                 }
4624 
4625                 /* signal any waiting segment */
4626                 cv_broadcast(&seg->s_cv);
4627 
4628                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4629                     "rsmseg_resume done:state=%d\n", seg->s_state));
4630                 return (retc);
4631         }
4632 
4633         ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4634 
4635         /* Setup protections for remap */
4636         maxprot = PROT_USER;
4637         if (seg->s_mode & RSM_PERM_READ) {
4638                 maxprot |= PROT_READ;
4639         }
4640         if (seg->s_mode & RSM_PERM_WRITE) {
4641                 maxprot |= PROT_WRITE;
4642         }
4643 
4644         if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4645                 /* error in rsmpi connect or map during resume */
4646 
4647                 /* remap to trash page */
4648                 ASSERT(seg->s_ckl != NULL);
4649 
4650                 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4651                         e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4652                             remap_cookie, hdl->c_off, hdl->c_len,
4653                             maxprot, 0, NULL);
4654 
4655                         DBG_PRINTF((category, RSM_ERR,
4656                             "rsmseg_resume:remap=%d\n", e));
4657                 }
4658 
4659                 seg->s_handle.in = NULL;
4660                 seg->s_state = RSM_STATE_DISCONNECT;
4661 
4662                 sharedp->rsmsi_refcnt--;
4663 
4664                 sharedp->rsmsi_mapcnt--;
4665                 seg->s_mapinfo = NULL;
4666 
4667                 if (sharedp->rsmsi_refcnt == 0) {
4668                         ASSERT(sharedp->rsmsi_mapcnt == 0);
4669                         rsmsharelock_release(seg);
4670 
4671                         /* clean up the shared data structure */
4672                         mutex_destroy(&sharedp->rsmsi_lock);
4673                         cv_destroy(&sharedp->rsmsi_cv);
4674                         kmem_free((void *)(sharedp),
4675                             sizeof (rsm_import_share_t));
4676 
4677                 } else {
4678                         rsmsharelock_release(seg);
4679                 }
4680                 /*
4681                  * The following needs to be done after any
4682                  * rsmsharelock calls which use seg->s_share.
4683                  */
4684                 seg->s_share = NULL;
4685 
4686                 /* signal any waiting segment */
4687                 cv_broadcast(&seg->s_cv);
4688 
4689                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4690                     "rsmseg_resume done:seg=%x,err=%d\n",
4691                     seg->s_key, retc));
4692                 return (retc);
4693 
4694         }
4695 
4696         seg->s_handle.in = sharedp->rsmsi_handle;
4697 
4698         if (seg->s_node == my_nodeid) { /* loopback */
4699                 ASSERT(seg->s_mapinfo == NULL);
4700 
4701                 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4702                         e = devmap_umem_remap(hdl->c_dhp,
4703                             rsm_dip, seg->s_cookie,
4704                             hdl->c_off, hdl->c_len,
4705                             maxprot, 0, NULL);
4706 
4707                         DBG_PRINTF((category, RSM_ERR,
4708                             "rsmseg_resume:remap=%d\n", e));
4709                 }
4710         } else { /* remote exporter */
4711                 /* remap to the new rsmpi maps */
4712                 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4713 
4714                 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4715                         p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4716                             &dev_offset, &maplen);
4717                         e = devmap_devmem_remap(hdl->c_dhp,
4718                             p->dip, p->dev_register, dev_offset,
4719                             maplen, maxprot, 0, NULL);
4720 
4721                         DBG_PRINTF((category, RSM_ERR,
4722                             "rsmseg_resume:remap=%d\n", e));
4723                 }
4724         }
4725 
4726         rsmsharelock_release(seg);
4727 
4728         seg->s_state = RSM_STATE_ACTIVE;
4729         cv_broadcast(&seg->s_cv);
4730 
4731         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4732 
4733         return (retc);
4734 }
4735 
4736 static int
4737 rsmsegshare_resume(rsmseg_t *seg)
4738 {
4739         int                     e = RSM_SUCCESS;
4740         adapter_t               *adapter;
4741         rsm_import_share_t      *sharedp;
4742         DBG_DEFINE(category,
4743             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4744 
4745         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4746 
4747         ASSERT(rsmseglock_held(seg));
4748         ASSERT(rsmsharelock_held(seg));
4749 
4750         sharedp = seg->s_share;
4751 
4752         /*
4753          * If we are not in a xxxx_QUIESCE state that means shared
4754          * connect/mapping processing has been already been done
4755          * so return success.
4756          */
4757         if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4758             (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4759                 return (RSM_SUCCESS);
4760         }
4761 
4762         adapter = seg->s_adapter;
4763 
4764         if (sharedp->rsmsi_node != my_nodeid) {
4765                 rsm_addr_t      hwaddr;
4766                 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4767 
4768                 e = adapter->rsmpi_ops->rsm_connect(
4769                     adapter->rsmpi_handle, hwaddr,
4770                     sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4771 
4772                 DBG_PRINTF((category, RSM_DEBUG,
4773                     "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4774                     sharedp->rsmsi_segid, e));
4775 
4776                 if (e != RSM_SUCCESS) {
4777                         /* when do we send the NOT_IMPORTING message */
4778                         sharedp->rsmsi_handle = NULL;
4779                         sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4780                         /* signal any waiting segment */
4781                         cv_broadcast(&sharedp->rsmsi_cv);
4782                         return (e);
4783                 }
4784         }
4785 
4786         if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4787                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4788                 /* signal any waiting segment */
4789                 cv_broadcast(&sharedp->rsmsi_cv);
4790                 return (e);
4791         }
4792 
4793         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4794 
4795         /* do the rsmpi map of the whole segment here */
4796         if (sharedp->rsmsi_node != my_nodeid) {
4797                 size_t mapped_len;
4798                 rsm_mapinfo_t *p;
4799 
4800                 /*
4801                  * We need to do rsmpi maps with <off, lens> identical to
4802                  * the old mapinfo list because the segment mapping handles
4803                  * dhp and such need the fragmentation of rsmpi maps to be
4804                  * identical to what it was during the mmap of the segment
4805                  */
4806                 p = sharedp->rsmsi_mapinfo;
4807 
4808                 while (p != NULL) {
4809                         mapped_len = 0;
4810 
4811                         e = adapter->rsmpi_ops->rsm_map(
4812                             sharedp->rsmsi_handle, p->start_offset,
4813                             p->individual_len, &mapped_len,
4814                             &p->dip, &p->dev_register, &p->dev_offset,
4815                             NULL, NULL);
4816 
4817                         if (e != 0) {
4818                                 DBG_PRINTF((category, RSM_ERR,
4819                                     "rsmsegshare_resume: rsmpi map err=%d\n",
4820                                     e));
4821                                 break;
4822                         }
4823 
4824                         if (mapped_len != p->individual_len) {
4825                                 DBG_PRINTF((category, RSM_ERR,
4826                                     "rsmsegshare_resume: rsmpi maplen"
4827                                     "< reqlen=%lx\n", mapped_len));
4828                                 e = RSMERR_BAD_LENGTH;
4829                                 break;
4830                         }
4831 
4832                         p = p->next;
4833 
4834                 }
4835 
4836 
4837                 if (e != RSM_SUCCESS) { /* rsmpi map failed */
4838                         int     err;
4839                         /* Check if this is the first rsm_map */
4840                         if (p != sharedp->rsmsi_mapinfo) {
4841                                 /*
4842                                  * A single rsm_unmap undoes multiple rsm_maps.
4843                                  */
4844                                 (void) seg->s_adapter->rsmpi_ops->
4845                                     rsm_unmap(sharedp->rsmsi_handle);
4846                         }
4847 
4848                         rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4849                         sharedp->rsmsi_mapinfo = NULL;
4850 
4851                         err = adapter->rsmpi_ops->
4852                             rsm_disconnect(sharedp->rsmsi_handle);
4853 
4854                         DBG_PRINTF((category, RSM_DEBUG,
4855                             "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4856                             sharedp->rsmsi_segid, err));
4857 
4858                         sharedp->rsmsi_handle = NULL;
4859                         sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4860 
4861                         /* signal the waiting segments */
4862                         cv_broadcast(&sharedp->rsmsi_cv);
4863                         DBG_PRINTF((category, RSM_DEBUG,
4864                             "rsmsegshare_resume done: rsmpi map err\n"));
4865                         return (e);
4866                 }
4867         }
4868 
4869         sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4870 
4871         /* signal any waiting segment */
4872         cv_broadcast(&sharedp->rsmsi_cv);
4873 
4874         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4875 
4876         return (e);
4877 }
4878 
4879 /*
4880  * this is the routine that gets called by recv_taskq which is the
4881  * thread that processes messages that are flow-controlled.
4882  */
4883 static void
4884 rsm_intr_proc_deferred(void *arg)
4885 {
4886         path_t                  *path = (path_t *)arg;
4887         rsmipc_request_t        *msg;
4888         rsmipc_msghdr_t         *msghdr;
4889         rsm_node_id_t           src_node;
4890         msgbuf_elem_t           *head;
4891         int                     e;
4892         DBG_DEFINE(category,
4893             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4894 
4895         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4896             "rsm_intr_proc_deferred enter\n"));
4897 
4898         mutex_enter(&path->mutex);
4899 
4900         /* use the head of the msgbuf_queue */
4901         head = rsmka_gethead_msgbuf(path);
4902 
4903         mutex_exit(&path->mutex);
4904 
4905         msg = (rsmipc_request_t *)&(head->msg);
4906         msghdr = (rsmipc_msghdr_t *)msg;
4907 
4908         src_node = msghdr->rsmipc_src;
4909 
4910         /*
4911          * messages that need to send a reply should check the message version
4912          * before processing the message. And all messages that need to
4913          * send a reply should be processed here by the worker thread.
4914          */
4915         switch (msghdr->rsmipc_type) {
4916         case RSMIPC_MSG_SEGCONNECT:
4917                 if (msghdr->rsmipc_version != RSM_VERSION) {
4918                         rsmipc_reply_t reply;
4919                         reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4920                         reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4921                         reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4922                         (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4923                 } else {
4924                         rsm_intr_segconnect(src_node, msg);
4925                 }
4926                 break;
4927         case RSMIPC_MSG_DISCONNECT:
4928                 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4929                 break;
4930         case RSMIPC_MSG_SUSPEND:
4931                 importer_suspend(src_node);
4932                 break;
4933         case RSMIPC_MSG_SUSPEND_DONE:
4934                 rsm_suspend_complete(src_node, 0);
4935                 break;
4936         case RSMIPC_MSG_RESUME:
4937                 importer_resume(src_node);
4938                 break;
4939         default:
4940                 ASSERT(0);
4941         }
4942 
4943         mutex_enter(&path->mutex);
4944 
4945         rsmka_dequeue_msgbuf(path);
4946 
4947         /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4948         if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4949                 path->procmsg_cnt++;
4950 
4951         ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4952 
4953         /* No need to send credits if path is going down */
4954         if ((path->state == RSMKA_PATH_ACTIVE) &&
4955             (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4956                 /*
4957                  * send credits and reset procmsg_cnt if success otherwise
4958                  * credits will be sent after processing the next message
4959                  */
4960                 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4961                 if (e == 0)
4962                         path->procmsg_cnt = 0;
4963                 else
4964                         DBG_PRINTF((category, RSM_ERR,
4965                             "rsm_intr_proc_deferred:send credits err=%d\n", e));
4966         }
4967 
4968         /*
4969          * decrement the path refcnt since we incremented it in
4970          * rsm_intr_callback_dispatch
4971          */
4972         PATH_RELE_NOLOCK(path);
4973 
4974         mutex_exit(&path->mutex);
4975 
4976         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4977             "rsm_intr_proc_deferred done\n"));
4978 }
4979 
4980 /*
4981  * Flow-controlled messages are enqueued and dispatched onto a taskq here
4982  */
4983 static void
4984 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4985     rsm_intr_hand_arg_t arg)
4986 {
4987         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
4988         path_t                  *path;
4989         rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4990         DBG_DEFINE(category,
4991             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4992 
4993         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4994             "rsm_intr_callback_dispatch enter\n"));
4995         ASSERT(data && hdlr_argp);
4996 
4997         /* look up the path - incr the path refcnt */
4998         path = rsm_find_path(hdlr_argp->adapter_name,
4999             hdlr_argp->adapter_instance, src_hwaddr);
5000 
5001         /* the path has been removed - drop this message */
5002         if (path == NULL) {
5003                 DBG_PRINTF((category, RSM_DEBUG,
5004                     "rsm_intr_callback_dispatch done: msg dropped\n"));
5005                 return;
5006         }
5007         /* the path is not active - don't accept new messages */
5008         if (path->state != RSMKA_PATH_ACTIVE) {
5009                 PATH_RELE_NOLOCK(path);
5010                 mutex_exit(&path->mutex);
5011                 DBG_PRINTF((category, RSM_DEBUG,
5012                     "rsm_intr_callback_dispatch done: msg dropped"
5013                     " path=%lx !ACTIVE\n", path));
5014                 return;
5015         }
5016 
5017         /*
5018          * Check if this message was sent to an older incarnation
5019          * of the path/sendq.
5020          */
5021         if (path->local_incn != msghdr->rsmipc_incn) {
5022                 /* decrement the refcnt */
5023                 PATH_RELE_NOLOCK(path);
5024                 mutex_exit(&path->mutex);
5025                 DBG_PRINTF((category, RSM_DEBUG,
5026                     "rsm_intr_callback_dispatch done: old incn %lld\n",
5027                     msghdr->rsmipc_incn));
5028                 return;
5029         }
5030 
5031         /* copy and enqueue msg on the path's msgbuf queue */
5032         rsmka_enqueue_msgbuf(path, data);
5033 
5034         /*
5035          * schedule task to process messages - ignore retval from
5036          * task_dispatch because we sender cannot send more than
5037          * what receiver can handle.
5038          */
5039         (void) taskq_dispatch(path->recv_taskq,
5040             rsm_intr_proc_deferred, path, KM_NOSLEEP);
5041 
5042         mutex_exit(&path->mutex);
5043 
5044         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5045             "rsm_intr_callback_dispatch done\n"));
5046 }
5047 
5048 /*
5049  * This procedure is called from rsm_srv_func when a remote node creates a
5050  * a send queue.  This event is used as a hint that an  earlier failed
5051  * attempt to create a send queue to that remote node may now succeed and
5052  * should be retried.  Indication of an earlier failed attempt is provided
5053  * by the RSMKA_SQCREATE_PENDING flag.
5054  */
5055 static void
5056 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5057 {
5058         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
5059         path_t                  *path;
5060         DBG_DEFINE(category,
5061             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5062 
5063         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5064             "rsm_sqcreateop_callback enter\n"));
5065 
5066         /* look up the path - incr the path refcnt */
5067         path = rsm_find_path(hdlr_argp->adapter_name,
5068             hdlr_argp->adapter_instance, src_hwaddr);
5069 
5070         if (path == NULL) {
5071                 DBG_PRINTF((category, RSM_DEBUG,
5072                     "rsm_sqcreateop_callback done: no path\n"));
5073                 return;
5074         }
5075 
5076         if ((path->state == RSMKA_PATH_UP) &&
5077             (path->flags & RSMKA_SQCREATE_PENDING)) {
5078                 /*
5079                  * previous attempt to create sendq had failed, retry
5080                  * it and move to RSMKA_PATH_ACTIVE state if successful.
5081                  * the refcnt will be decremented in the do_deferred_work
5082                  */
5083                 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5084         } else {
5085                 /* decrement the refcnt */
5086                 PATH_RELE_NOLOCK(path);
5087         }
5088         mutex_exit(&path->mutex);
5089 
5090         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5091             "rsm_sqcreateop_callback done\n"));
5092 }
5093 
5094 static void
5095 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5096 {
5097         rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5098         rsmipc_request_t *msg = (rsmipc_request_t *)data;
5099         rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5100         rsm_node_id_t src_node;
5101         DBG_DEFINE(category,
5102             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5103 
5104         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5105             "src=%d, type=%d\n", msghdr->rsmipc_src,
5106             msghdr->rsmipc_type));
5107 
5108         /*
5109          * Check for the version number in the msg header. If it is not
5110          * RSM_VERSION, drop the message. In the future, we need to manage
5111          * incompatible version numbers in some way
5112          */
5113         if (msghdr->rsmipc_version != RSM_VERSION) {
5114                 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5115                 /*
5116                  * Drop requests that don't have a reply right here
5117                  * Request with reply will send a BAD_VERSION reply
5118                  * when they get processed by the worker thread.
5119                  */
5120                 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5121                         return;
5122                 }
5123 
5124         }
5125 
5126         src_node = msghdr->rsmipc_src;
5127 
5128         switch (msghdr->rsmipc_type) {
5129         case RSMIPC_MSG_SEGCONNECT:
5130         case RSMIPC_MSG_DISCONNECT:
5131         case RSMIPC_MSG_SUSPEND:
5132         case RSMIPC_MSG_SUSPEND_DONE:
5133         case RSMIPC_MSG_RESUME:
5134                 /*
5135                  * These message types are handled by a worker thread using
5136                  * the flow-control algorithm.
5137                  * Any message processing that does one or more of the
5138                  * following should be handled in a worker thread.
5139                  *      - allocates resources and might sleep
5140                  *      - makes RSMPI calls down to the interconnect driver
5141                  *      this by defn include requests with reply.
5142                  *      - takes a long duration of time
5143                  */
5144                 rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5145                 break;
5146         case RSMIPC_MSG_NOTIMPORTING:
5147                 importer_list_rm(src_node, msg->rsmipc_key,
5148                     msg->rsmipc_segment_cookie);
5149                 break;
5150         case RSMIPC_MSG_SQREADY:
5151                 rsm_proc_sqready(data, src_hwaddr, arg);
5152                 break;
5153         case RSMIPC_MSG_SQREADY_ACK:
5154                 rsm_proc_sqready_ack(data, src_hwaddr, arg);
5155                 break;
5156         case RSMIPC_MSG_CREDIT:
5157                 rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5158                 break;
5159         case RSMIPC_MSG_REPLY:
5160                 rsm_intr_reply(msghdr);
5161                 break;
5162         case RSMIPC_MSG_BELL:
5163                 rsm_intr_event(msg);
5164                 break;
5165         case RSMIPC_MSG_IMPORTING:
5166                 importer_list_add(src_node, msg->rsmipc_key,
5167                     msg->rsmipc_adapter_hwaddr,
5168                     msg->rsmipc_segment_cookie);
5169                 break;
5170         case RSMIPC_MSG_REPUBLISH:
5171                 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5172                 break;
5173         default:
5174                 DBG_PRINTF((category, RSM_DEBUG,
5175                     "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5176                     (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5177         }
5178 
5179         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5180 
5181 }
5182 
5183 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5184     rsm_intr_q_op_t opcode, rsm_addr_t src,
5185     void *data, size_t size, rsm_intr_hand_arg_t arg)
5186 {
5187         DBG_DEFINE(category,
5188             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5189 
5190         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5191 
5192         switch (opcode) {
5193         case RSM_INTR_Q_OP_CREATE:
5194                 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5195                 rsm_sqcreateop_callback(src, arg);
5196                 break;
5197         case RSM_INTR_Q_OP_DESTROY:
5198                 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5199                 break;
5200         case RSM_INTR_Q_OP_RECEIVE:
5201                 rsm_intr_callback(data, src, arg);
5202                 break;
5203         default:
5204                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5205                     "rsm_srv_func: unknown opcode = %x\n", opcode));
5206         }
5207 
5208         chd = chd;
5209         size = size;
5210 
5211         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5212 
5213         return (RSM_INTR_HAND_CLAIMED);
5214 }
5215 
5216 /* *************************** IPC slots ************************* */
5217 static rsmipc_slot_t *
5218 rsmipc_alloc()
5219 {
5220         int i;
5221         rsmipc_slot_t *slot;
5222         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5223 
5224         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5225 
5226         /* try to find a free slot, if not wait */
5227         mutex_enter(&rsm_ipc.lock);
5228 
5229         while (rsm_ipc.count == 0) {
5230                 rsm_ipc.wanted = 1;
5231                 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5232         }
5233 
5234         /* An empty slot is available, find it */
5235         slot = &rsm_ipc.slots[0];
5236         for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5237                 if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5238                         RSMIPC_CLEAR(slot, RSMIPC_FREE);
5239                         break;
5240                 }
5241         }
5242 
5243         ASSERT(i < RSMIPC_SZ);
5244         rsm_ipc.count--;        /* one less is available */
5245         rsm_ipc.sequence++; /* new sequence */
5246 
5247         slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5248         slot->rsmipc_cookie.ic.index = (uint_t)i;
5249 
5250         mutex_exit(&rsm_ipc.lock);
5251 
5252         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5253 
5254         return (slot);
5255 }
5256 
5257 static void
5258 rsmipc_free(rsmipc_slot_t *slot)
5259 {
5260         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5261 
5262         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5263 
5264         ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5265         ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5266 
5267         mutex_enter(&rsm_ipc.lock);
5268 
5269         RSMIPC_SET(slot, RSMIPC_FREE);
5270 
5271         slot->rsmipc_cookie.ic.sequence = 0;
5272 
5273         mutex_exit(&slot->rsmipc_lock);
5274         rsm_ipc.count++;
5275         ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5276         if (rsm_ipc.wanted) {
5277                 rsm_ipc.wanted = 0;
5278                 cv_broadcast(&rsm_ipc.cv);
5279         }
5280 
5281         mutex_exit(&rsm_ipc.lock);
5282 
5283         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5284 }
5285 
5286 static int
5287 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5288 {
5289         int             e = 0;
5290         int             credit_check = 0;
5291         int             retry_cnt = 0;
5292         int             min_retry_cnt = 10;
5293         rsm_send_t      is;
5294         rsmipc_slot_t   *rslot;
5295         adapter_t       *adapter;
5296         path_t          *path;
5297         sendq_token_t   *sendq_token;
5298         sendq_token_t   *used_sendq_token = NULL;
5299         rsm_send_q_handle_t     ipc_handle;
5300         DBG_DEFINE(category,
5301             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5302 
5303         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5304             dest));
5305 
5306         /*
5307          * Check if this is a local case
5308          */
5309         if (dest == my_nodeid) {
5310                 switch (req->rsmipc_hdr.rsmipc_type) {
5311                 case RSMIPC_MSG_SEGCONNECT:
5312                         reply->rsmipc_status = (short)rsmsegacl_validate(
5313                             req, dest, reply);
5314                         break;
5315                 case RSMIPC_MSG_BELL:
5316                         req->rsmipc_hdr.rsmipc_src = dest;
5317                         rsm_intr_event(req);
5318                         break;
5319                 case RSMIPC_MSG_IMPORTING:
5320                         importer_list_add(dest, req->rsmipc_key,
5321                             req->rsmipc_adapter_hwaddr,
5322                             req->rsmipc_segment_cookie);
5323                         break;
5324                 case RSMIPC_MSG_NOTIMPORTING:
5325                         importer_list_rm(dest, req->rsmipc_key,
5326                             req->rsmipc_segment_cookie);
5327                         break;
5328                 case RSMIPC_MSG_REPUBLISH:
5329                         importer_update(dest, req->rsmipc_key,
5330                             req->rsmipc_perm);
5331                         break;
5332                 case RSMIPC_MSG_SUSPEND:
5333                         importer_suspend(dest);
5334                         break;
5335                 case RSMIPC_MSG_SUSPEND_DONE:
5336                         rsm_suspend_complete(dest, 0);
5337                         break;
5338                 case RSMIPC_MSG_RESUME:
5339                         importer_resume(dest);
5340                         break;
5341                 default:
5342                         ASSERT(0);
5343                 }
5344                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5345                     "rsmipc_send done\n"));
5346                 return (0);
5347         }
5348 
5349         if (dest >= MAX_NODES) {
5350                 DBG_PRINTF((category, RSM_ERR,
5351                     "rsm: rsmipc_send bad node number %x\n", dest));
5352                 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5353         }
5354 
5355         /*
5356          * Oh boy! we are going remote.
5357          */
5358 
5359         /*
5360          * identify if we need to have credits to send this message
5361          * - only selected requests are flow controlled
5362          */
5363         if (req != NULL) {
5364                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5365                     "rsmipc_send:request type=%d\n",
5366                     req->rsmipc_hdr.rsmipc_type));
5367 
5368                 switch (req->rsmipc_hdr.rsmipc_type) {
5369                 case RSMIPC_MSG_SEGCONNECT:
5370                 case RSMIPC_MSG_DISCONNECT:
5371                 case RSMIPC_MSG_IMPORTING:
5372                 case RSMIPC_MSG_SUSPEND:
5373                 case RSMIPC_MSG_SUSPEND_DONE:
5374                 case RSMIPC_MSG_RESUME:
5375                         credit_check = 1;
5376                         break;
5377                 default:
5378                         credit_check = 0;
5379                 }
5380         }
5381 
5382 again:
5383         if (retry_cnt++ == min_retry_cnt) {
5384                 /* backoff before further retries for 10ms */
5385                 delay(drv_usectohz(10000));
5386                 retry_cnt = 0; /* reset retry_cnt */
5387         }
5388         sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5389         if (sendq_token == NULL) {
5390                 DBG_PRINTF((category, RSM_ERR,
5391                     "rsm: rsmipc_send no device to reach node %d\n", dest));
5392                 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5393         }
5394 
5395         if ((sendq_token == used_sendq_token) &&
5396             ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5397             (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5398                 rele_sendq_token(sendq_token);
5399                 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5400                 return (RSMERR_CONN_ABORTED);
5401         } else
5402                 used_sendq_token = sendq_token;
5403 
5404 /* lint -save -e413 */
5405         path = SQ_TOKEN_TO_PATH(sendq_token);
5406         adapter = path->local_adapter;
5407 /* lint -restore */
5408         ipc_handle = sendq_token->rsmpi_sendq_handle;
5409 
5410         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5411             "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5412 
5413         if (reply == NULL) {
5414                 /* Send request without ack */
5415                 /*
5416                  * Set the rsmipc_version number in the msghdr for KA
5417                  * communication versioning
5418                  */
5419                 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5420                 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5421                 /*
5422                  * remote endpoints incn should match the value in our
5423                  * path's remote_incn field. No need to grab any lock
5424                  * since we have refcnted the path in rsmka_get_sendq_token
5425                  */
5426                 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5427 
5428                 is.is_data = (void *)req;
5429                 is.is_size = sizeof (*req);
5430                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5431                 is.is_wait = 0;
5432 
5433                 if (credit_check) {
5434                         mutex_enter(&path->mutex);
5435                         /*
5436                          * wait till we recv credits or path goes down. If path
5437                          * goes down rsm_send will fail and we handle the error
5438                          * then
5439                          */
5440                         while ((sendq_token->msgbuf_avail == 0) &&
5441                             (path->state == RSMKA_PATH_ACTIVE)) {
5442                                 e = cv_wait_sig(&sendq_token->sendq_cv,
5443                                     &path->mutex);
5444                                 if (e == 0) {
5445                                         mutex_exit(&path->mutex);
5446                                         no_reply_cnt++;
5447                                         rele_sendq_token(sendq_token);
5448                                         DBG_PRINTF((category, RSM_DEBUG,
5449                                             "rsmipc_send done: "
5450                                             "cv_wait INTERRUPTED"));
5451                                         return (RSMERR_INTERRUPTED);
5452                                 }
5453                         }
5454 
5455                         /*
5456                          * path is not active retry on another path.
5457                          */
5458                         if (path->state != RSMKA_PATH_ACTIVE) {
5459                                 mutex_exit(&path->mutex);
5460                                 rele_sendq_token(sendq_token);
5461                                 e = RSMERR_CONN_ABORTED;
5462                                 DBG_PRINTF((category, RSM_ERR,
5463                                     "rsm: rsmipc_send: path !ACTIVE"));
5464                                 goto again;
5465                         }
5466 
5467                         ASSERT(sendq_token->msgbuf_avail > 0);
5468 
5469                         /*
5470                          * reserve a msgbuf
5471                          */
5472                         sendq_token->msgbuf_avail--;
5473 
5474                         mutex_exit(&path->mutex);
5475 
5476                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5477                             NULL);
5478 
5479                         if (e != RSM_SUCCESS) {
5480                                 mutex_enter(&path->mutex);
5481                                 /*
5482                                  * release the reserved msgbuf since
5483                                  * the send failed
5484                                  */
5485                                 sendq_token->msgbuf_avail++;
5486                                 cv_broadcast(&sendq_token->sendq_cv);
5487                                 mutex_exit(&path->mutex);
5488                         }
5489                 } else
5490                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5491                             NULL);
5492 
5493                 no_reply_cnt++;
5494                 rele_sendq_token(sendq_token);
5495                 if (e != RSM_SUCCESS) {
5496                         DBG_PRINTF((category, RSM_ERR,
5497                             "rsm: rsmipc_send no reply send"
5498                             " err = %d no reply count = %d\n",
5499                             e, no_reply_cnt));
5500                         ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5501                             e != RSMERR_BAD_BARRIER_HNDL);
5502                         atomic_inc_64(&rsm_ipcsend_errcnt);
5503                         goto again;
5504                 } else {
5505                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5506                             "rsmipc_send done\n"));
5507                         return (e);
5508                 }
5509 
5510         }
5511 
5512         if (req == NULL) {
5513                 /* Send reply - No flow control is done for reply */
5514                 /*
5515                  * Set the version in the msg header for KA communication
5516                  * versioning
5517                  */
5518                 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5519                 reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5520                 /* incn number is not used for reply msgs currently */
5521                 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5522 
5523                 is.is_data = (void *)reply;
5524                 is.is_size = sizeof (*reply);
5525                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5526                 is.is_wait = 0;
5527                 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5528                 rele_sendq_token(sendq_token);
5529                 if (e != RSM_SUCCESS) {
5530                         DBG_PRINTF((category, RSM_ERR,
5531                             "rsm: rsmipc_send reply send"
5532                             " err = %d\n", e));
5533                         atomic_inc_64(&rsm_ipcsend_errcnt);
5534                         goto again;
5535                 } else {
5536                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5537                             "rsmipc_send done\n"));
5538                         return (e);
5539                 }
5540         }
5541 
5542         /* Reply needed */
5543         rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5544 
5545         mutex_enter(&rslot->rsmipc_lock);
5546 
5547         rslot->rsmipc_data = (void *)reply;
5548         RSMIPC_SET(rslot, RSMIPC_PENDING);
5549 
5550         while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5551                 /*
5552                  * Set the rsmipc_version number in the msghdr for KA
5553                  * communication versioning
5554                  */
5555                 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5556                 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5557                 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5558                 /*
5559                  * remote endpoints incn should match the value in our
5560                  * path's remote_incn field. No need to grab any lock
5561                  * since we have refcnted the path in rsmka_get_sendq_token
5562                  */
5563                 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5564 
5565                 is.is_data = (void *)req;
5566                 is.is_size = sizeof (*req);
5567                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5568                 is.is_wait = 0;
5569                 if (credit_check) {
5570 
5571                         mutex_enter(&path->mutex);
5572                         /*
5573                          * wait till we recv credits or path goes down. If path
5574                          * goes down rsm_send will fail and we handle the error
5575                          * then.
5576                          */
5577                         while ((sendq_token->msgbuf_avail == 0) &&
5578                             (path->state == RSMKA_PATH_ACTIVE)) {
5579                                 e = cv_wait_sig(&sendq_token->sendq_cv,
5580                                     &path->mutex);
5581                                 if (e == 0) {
5582                                         mutex_exit(&path->mutex);
5583                                         RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5584                                         rsmipc_free(rslot);
5585                                         rele_sendq_token(sendq_token);
5586                                         DBG_PRINTF((category, RSM_DEBUG,
5587                                             "rsmipc_send done: "
5588                                             "cv_wait INTERRUPTED"));
5589                                         return (RSMERR_INTERRUPTED);
5590                                 }
5591                         }
5592 
5593                         /*
5594                          * path is not active retry on another path.
5595                          */
5596                         if (path->state != RSMKA_PATH_ACTIVE) {
5597                                 mutex_exit(&path->mutex);
5598                                 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5599                                 rsmipc_free(rslot);
5600                                 rele_sendq_token(sendq_token);
5601                                 e = RSMERR_CONN_ABORTED;
5602                                 DBG_PRINTF((category, RSM_ERR,
5603                                     "rsm: rsmipc_send: path !ACTIVE"));
5604                                 goto again;
5605                         }
5606 
5607                         ASSERT(sendq_token->msgbuf_avail > 0);
5608 
5609                         /*
5610                          * reserve a msgbuf
5611                          */
5612                         sendq_token->msgbuf_avail--;
5613 
5614                         mutex_exit(&path->mutex);
5615 
5616                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5617                             NULL);
5618 
5619                         if (e != RSM_SUCCESS) {
5620                                 mutex_enter(&path->mutex);
5621                                 /*
5622                                  * release the reserved msgbuf since
5623                                  * the send failed
5624                                  */
5625                                 sendq_token->msgbuf_avail++;
5626                                 cv_broadcast(&sendq_token->sendq_cv);
5627                                 mutex_exit(&path->mutex);
5628                         }
5629                 } else
5630                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5631                             NULL);
5632 
5633                 if (e != RSM_SUCCESS) {
5634                         DBG_PRINTF((category, RSM_ERR,
5635                             "rsm: rsmipc_send rsmpi send err = %d\n", e));
5636                         RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5637                         rsmipc_free(rslot);
5638                         rele_sendq_token(sendq_token);
5639                         atomic_inc_64(&rsm_ipcsend_errcnt);
5640                         goto again;
5641                 }
5642 
5643                 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5644                 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5645                     drv_usectohz(5000000), TR_CLOCK_TICK);
5646                 if (e < 0) {
5647                         /* timed out - retry */
5648                         e = RSMERR_TIMEOUT;
5649                 } else if (e == 0) {
5650                         /* signalled - return error */
5651                         e = RSMERR_INTERRUPTED;
5652                         break;
5653                 } else {
5654                         e = RSM_SUCCESS;
5655                 }
5656         }
5657 
5658         RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5659         rsmipc_free(rslot);
5660         rele_sendq_token(sendq_token);
5661 
5662         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5663         return (e);
5664 }
5665 
5666 static int
5667 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,  void *cookie)
5668 {
5669         rsmipc_request_t request;
5670 
5671         /*
5672          *  inform the exporter to delete this importer
5673          */
5674         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5675         request.rsmipc_key = segid;
5676         request.rsmipc_segment_cookie = cookie;
5677         return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5678 }
5679 
5680 static void
5681 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl,
5682     int acl_len, rsm_permission_t default_permission)
5683 {
5684         int                     i;
5685         importing_token_t       *token;
5686         rsmipc_request_t        request;
5687         republish_token_t       *republish_list = NULL;
5688         republish_token_t       *rp;
5689         rsm_permission_t        permission;
5690         int                     index;
5691 
5692         /*
5693          * send the new access mode to all the nodes that have imported
5694          * this segment.
5695          * If the new acl does not have a node that was present in
5696          * the old acl a access permission of 0 is sent.
5697          */
5698 
5699         index = rsmhash(segid);
5700 
5701         /*
5702          * create a list of node/permissions to send the republish message
5703          */
5704         mutex_enter(&importer_list.lock);
5705 
5706         token = importer_list.bucket[index];
5707         while (token != NULL) {
5708                 if (segid == token->key) {
5709                         permission = default_permission;
5710 
5711                         for (i = 0; i < acl_len; i++) {
5712                                 if (token->importing_node == acl[i].ae_node) {
5713                                         permission = acl[i].ae_permission;
5714                                         break;
5715                                 }
5716                         }
5717                         rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5718 
5719                         rp->key = segid;
5720                         rp->importing_node = token->importing_node;
5721                         rp->permission = permission;
5722                         rp->next = republish_list;
5723                         republish_list = rp;
5724                 }
5725                 token = token->next;
5726         }
5727 
5728         mutex_exit(&importer_list.lock);
5729 
5730         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5731         request.rsmipc_key = segid;
5732 
5733         while (republish_list != NULL) {
5734                 request.rsmipc_perm = republish_list->permission;
5735                 (void) rsmipc_send(republish_list->importing_node,
5736                     &request, RSM_NO_REPLY);
5737                 rp = republish_list;
5738                 republish_list = republish_list->next;
5739                 kmem_free(rp, sizeof (republish_token_t));
5740         }
5741 }
5742 
5743 static void
5744 rsm_send_suspend()
5745 {
5746         int                     i, e;
5747         rsmipc_request_t        request;
5748         list_element_t          *tokp;
5749         list_element_t          *head = NULL;
5750         importing_token_t       *token;
5751         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5752             "rsm_send_suspend enter\n"));
5753 
5754         /*
5755          * create a list of node to send the suspend message
5756          *
5757          * Currently the whole importer list is scanned and we obtain
5758          * all the nodes - this basically gets all nodes that at least
5759          * import one segment from the local node.
5760          *
5761          * no need to grab the rsm_suspend_list lock here since we are
5762          * single threaded when suspend is called.
5763          */
5764 
5765         mutex_enter(&importer_list.lock);
5766         for (i = 0; i < rsm_hash_size; i++) {
5767 
5768                 token = importer_list.bucket[i];
5769 
5770                 while (token != NULL) {
5771 
5772                         tokp = head;
5773 
5774                         /*
5775                          * make sure that the token's node
5776                          * is not already on the suspend list
5777                          */
5778                         while (tokp != NULL) {
5779                                 if (tokp->nodeid == token->importing_node) {
5780                                         break;
5781                                 }
5782                                 tokp = tokp->next;
5783                         }
5784 
5785                         if (tokp == NULL) { /* not in suspend list */
5786                                 tokp = kmem_zalloc(sizeof (list_element_t),
5787                                     KM_SLEEP);
5788                                 tokp->nodeid = token->importing_node;
5789                                 tokp->next = head;
5790                                 head = tokp;
5791                         }
5792 
5793                         token = token->next;
5794                 }
5795         }
5796         mutex_exit(&importer_list.lock);
5797 
5798         if (head == NULL) { /* no importers so go ahead and quiesce segments */
5799                 exporter_quiesce();
5800                 return;
5801         }
5802 
5803         mutex_enter(&rsm_suspend_list.list_lock);
5804         ASSERT(rsm_suspend_list.list_head == NULL);
5805         /*
5806          * update the suspend list righaway so that if a node dies the
5807          * pathmanager can set the NODE dead flag
5808          */
5809         rsm_suspend_list.list_head = head;
5810         mutex_exit(&rsm_suspend_list.list_lock);
5811 
5812         tokp = head;
5813 
5814         while (tokp != NULL) {
5815                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5816                 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5817                 /*
5818                  * Error in rsmipc_send currently happens due to inaccessibility
5819                  * of the remote node.
5820                  */
5821                 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5822                         tokp->flags |= RSM_SUSPEND_ACKPENDING;
5823                 }
5824 
5825                 tokp = tokp->next;
5826         }
5827 
5828         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5829             "rsm_send_suspend done\n"));
5830 
5831 }
5832 
5833 static void
5834 rsm_send_resume()
5835 {
5836         rsmipc_request_t        request;
5837         list_element_t          *elem, *head;
5838 
5839         /*
5840          * save the suspend list so that we know where to send
5841          * the resume messages and make the suspend list head
5842          * NULL.
5843          */
5844         mutex_enter(&rsm_suspend_list.list_lock);
5845         head = rsm_suspend_list.list_head;
5846         rsm_suspend_list.list_head = NULL;
5847         mutex_exit(&rsm_suspend_list.list_lock);
5848 
5849         while (head != NULL) {
5850                 elem = head;
5851                 head = head->next;
5852 
5853                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5854 
5855                 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5856 
5857                 kmem_free((void *)elem, sizeof (list_element_t));
5858 
5859         }
5860 
5861 }
5862 
5863 /*
5864  * This function takes path and sends a message using the sendq
5865  * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5866  * and RSMIPC_MSG_CREDIT are sent using this function.
5867  */
5868 int
5869 rsmipc_send_controlmsg(path_t *path, int msgtype)
5870 {
5871         int                     e;
5872         int                     retry_cnt = 0;
5873         int                     min_retry_cnt = 10;
5874         adapter_t               *adapter;
5875         rsm_send_t              is;
5876         rsm_send_q_handle_t     ipc_handle;
5877         rsmipc_controlmsg_t     msg;
5878         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5879 
5880         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5881             "rsmipc_send_controlmsg enter\n"));
5882 
5883         ASSERT(MUTEX_HELD(&path->mutex));
5884 
5885         adapter = path->local_adapter;
5886 
5887         DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5888             "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5889             my_nodeid, adapter->hwaddr, path->remote_node,
5890             path->remote_hwaddr, path->procmsg_cnt));
5891 
5892         if (path->state != RSMKA_PATH_ACTIVE) {
5893                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5894                     "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5895                 return (1);
5896         }
5897 
5898         ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5899 
5900         msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5901         msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5902         msg.rsmipc_hdr.rsmipc_type = msgtype;
5903         msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5904 
5905         if (msgtype == RSMIPC_MSG_CREDIT)
5906                 msg.rsmipc_credits = path->procmsg_cnt;
5907 
5908         msg.rsmipc_local_incn = path->local_incn;
5909 
5910         msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5911         /* incr the sendq, path refcnt */
5912         PATH_HOLD_NOLOCK(path);
5913         SENDQ_TOKEN_HOLD(path);
5914 
5915         do {
5916                 /* drop the path lock before doing the rsm_send */
5917                 mutex_exit(&path->mutex);
5918 
5919                 is.is_data = (void *)&msg;
5920                 is.is_size = sizeof (msg);
5921                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5922                 is.is_wait = 0;
5923 
5924                 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5925 
5926                 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5927                     e != RSMERR_BAD_BARRIER_HNDL);
5928 
5929                 mutex_enter(&path->mutex);
5930 
5931                 if (e == RSM_SUCCESS) {
5932                         break;
5933                 }
5934                 /* error counter for statistics */
5935                 atomic_inc_64(&rsm_ctrlmsg_errcnt);
5936 
5937                 DBG_PRINTF((category, RSM_ERR,
5938                     "rsmipc_send_controlmsg:rsm_send error=%d", e));
5939 
5940                 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5941                         (void) cv_reltimedwait(&path->sendq_token.sendq_cv,
5942                             &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK);
5943                         retry_cnt = 0;
5944                 }
5945         } while (path->state == RSMKA_PATH_ACTIVE);
5946 
5947         /* decrement the sendq,path refcnt that we incr before rsm_send */
5948         SENDQ_TOKEN_RELE(path);
5949         PATH_RELE_NOLOCK(path);
5950 
5951         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5952             "rsmipc_send_controlmsg done=%d", e));
5953         return (e);
5954 }
5955 
5956 /*
5957  * Called from rsm_force_unload and path_importer_disconnect. The memory
5958  * mapping for the imported segment is removed and the segment is
5959  * disconnected at the interconnect layer if disconnect_flag is TRUE.
5960  * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5961  * and FALSE from rsm_rebind.
5962  *
5963  * When subsequent accesses cause page faulting, the dummy page is mapped
5964  * to resolve the fault, and the mapping generation number is incremented
5965  * so that the application can be notified on a close barrier operation.
5966  *
5967  * It is important to note that the caller of rsmseg_unload is responsible for
5968  * acquiring the segment lock before making a call to rsmseg_unload. This is
5969  * required to make the caller and rsmseg_unload thread safe. The segment lock
5970  * will be released by the rsmseg_unload function.
5971  */
5972 void
5973 rsmseg_unload(rsmseg_t *im_seg)
5974 {
5975         rsmcookie_t             *hdl;
5976         void                    *shared_cookie;
5977         rsmipc_request_t        request;
5978         uint_t                  maxprot;
5979 
5980         DBG_DEFINE(category,
5981             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5982 
5983         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5984 
5985         ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5986 
5987         /* wait until segment leaves the mapping state */
5988         while (im_seg->s_state == RSM_STATE_MAPPING)
5989                 cv_wait(&im_seg->s_cv, &im_seg->s_lock);
5990         /*
5991          * An unload is only necessary if the segment is connected. However,
5992          * if the segment was on the import list in state RSM_STATE_CONNECTING
5993          * then a connection was in progress. Change to RSM_STATE_NEW
5994          * here to cause an early exit from the connection process.
5995          */
5996         if (im_seg->s_state == RSM_STATE_NEW) {
5997                 rsmseglock_release(im_seg);
5998                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5999                     "rsmseg_unload done: RSM_STATE_NEW\n"));
6000                 return;
6001         } else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6002                 im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6003                 rsmsharelock_acquire(im_seg);
6004                 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6005                 rsmsharelock_release(im_seg);
6006                 rsmseglock_release(im_seg);
6007                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6008                     "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6009                 return;
6010         }
6011 
6012         if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6013                 if (im_seg->s_ckl != NULL) {
6014                         int e;
6015                         /* Setup protections for remap */
6016                         maxprot = PROT_USER;
6017                         if (im_seg->s_mode & RSM_PERM_READ) {
6018                                 maxprot |= PROT_READ;
6019                         }
6020                         if (im_seg->s_mode & RSM_PERM_WRITE) {
6021                                 maxprot |= PROT_WRITE;
6022                         }
6023                         hdl = im_seg->s_ckl;
6024                         for (; hdl != NULL; hdl = hdl->c_next) {
6025                                 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6026                                     remap_cookie,
6027                                     hdl->c_off, hdl->c_len,
6028                                     maxprot, 0, NULL);
6029 
6030                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6031                                     "remap returns %d\n", e));
6032                         }
6033                 }
6034 
6035                 (void) rsm_closeconnection(im_seg, &shared_cookie);
6036 
6037                 if (shared_cookie != NULL) {
6038                         /*
6039                          * inform the exporting node so this import
6040                          * can be deleted from the list of importers.
6041                          */
6042                         request.rsmipc_hdr.rsmipc_type =
6043                             RSMIPC_MSG_NOTIMPORTING;
6044                         request.rsmipc_key = im_seg->s_segid;
6045                         request.rsmipc_segment_cookie = shared_cookie;
6046                         rsmseglock_release(im_seg);
6047                         (void) rsmipc_send(im_seg->s_node, &request,
6048                             RSM_NO_REPLY);
6049                 } else {
6050                         rsmseglock_release(im_seg);
6051                 }
6052         }
6053         else
6054                 rsmseglock_release(im_seg);
6055 
6056         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6057 
6058 }
6059 
6060 /* ****************************** Importer Calls ************************ */
6061 
6062 static int
6063 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6064 {
6065         int shifts = 0;
6066 
6067         if (crgetuid(cr) != owner) {
6068                 shifts += 3;
6069                 if (!groupmember(group, cr))
6070                         shifts += 3;
6071         }
6072 
6073         mode &= ~(perm << shifts);
6074 
6075         if (mode == 0)
6076                 return (0);
6077 
6078         return (secpolicy_rsm_access(cr, owner, mode));
6079 }
6080 
6081 
6082 static int
6083 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6084     intptr_t dataptr, int mode)
6085 {
6086         int e;
6087         int                     recheck_state = 0;
6088         void                    *shared_cookie;
6089         rsmipc_request_t        request;
6090         rsmipc_reply_t          reply;
6091         rsm_permission_t        access;
6092         adapter_t               *adapter;
6093         rsm_addr_t              addr = 0;
6094         rsm_import_share_t      *sharedp;
6095         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6096 
6097         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6098 
6099         adapter = rsm_getadapter(msg, mode);
6100         if (adapter == NULL) {
6101                 DBG_PRINTF((category, RSM_ERR,
6102                     "rsm_connect done:ENODEV adapter=NULL\n"));
6103                 return (RSMERR_CTLR_NOT_PRESENT);
6104         }
6105 
6106         if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6107                 rsmka_release_adapter(adapter);
6108                 DBG_PRINTF((category, RSM_ERR,
6109                     "rsm_connect done:ENODEV loopback\n"));
6110                 return (RSMERR_CTLR_NOT_PRESENT);
6111         }
6112 
6113 
6114         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6115         ASSERT(seg->s_state == RSM_STATE_NEW);
6116 
6117         /*
6118          * Translate perm to access
6119          */
6120         if (msg->perm & ~RSM_PERM_RDWR) {
6121                 rsmka_release_adapter(adapter);
6122                 DBG_PRINTF((category, RSM_ERR,
6123                     "rsm_connect done:EINVAL invalid perms\n"));
6124                 return (RSMERR_BAD_PERMS);
6125         }
6126         access = 0;
6127         if (msg->perm & RSM_PERM_READ)
6128                 access |= RSM_ACCESS_READ;
6129         if (msg->perm & RSM_PERM_WRITE)
6130                 access |= RSM_ACCESS_WRITE;
6131 
6132         seg->s_node = msg->nodeid;
6133 
6134         /*
6135          * Adding to the import list locks the segment; release the segment
6136          * lock so we can get the reply for the send.
6137          */
6138         e = rsmimport_add(seg, msg->key);
6139         if (e) {
6140                 rsmka_release_adapter(adapter);
6141                 DBG_PRINTF((category, RSM_ERR,
6142                     "rsm_connect done:rsmimport_add failed %d\n", e));
6143                 return (e);
6144         }
6145         seg->s_state = RSM_STATE_CONNECTING;
6146 
6147         /*
6148          * Set the s_adapter field here so as to have a valid comparison of
6149          * the adapter and the s_adapter value during rsmshare_get. For
6150          * any error, set s_adapter to NULL before doing a release_adapter
6151          */
6152         seg->s_adapter = adapter;
6153 
6154         rsmseglock_release(seg);
6155 
6156         /*
6157          * get the pointer to the shared data structure; the
6158          * shared data is locked and refcount has been incremented
6159          */
6160         sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6161 
6162         ASSERT(rsmsharelock_held(seg));
6163 
6164         do {
6165                 /* flag indicates whether we need to recheck the state */
6166                 recheck_state = 0;
6167                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6168                     "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6169                 switch (sharedp->rsmsi_state) {
6170                 case RSMSI_STATE_NEW:
6171                         sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6172                         break;
6173                 case RSMSI_STATE_CONNECTING:
6174                         /* FALLTHRU */
6175                 case RSMSI_STATE_CONN_QUIESCE:
6176                         /* FALLTHRU */
6177                 case RSMSI_STATE_MAP_QUIESCE:
6178                         /* wait for the state to change */
6179                         while ((sharedp->rsmsi_state ==
6180                             RSMSI_STATE_CONNECTING) ||
6181                             (sharedp->rsmsi_state ==
6182                             RSMSI_STATE_CONN_QUIESCE) ||
6183                             (sharedp->rsmsi_state ==
6184                             RSMSI_STATE_MAP_QUIESCE)) {
6185                                 if (cv_wait_sig(&sharedp->rsmsi_cv,
6186                                     &sharedp->rsmsi_lock) == 0) {
6187                                         /* signalled - clean up and return */
6188                                         rsmsharelock_release(seg);
6189                                         rsmimport_rm(seg);
6190                                         seg->s_adapter = NULL;
6191                                         rsmka_release_adapter(adapter);
6192                                         seg->s_state = RSM_STATE_NEW;
6193                                         DBG_PRINTF((category, RSM_ERR,
6194                                             "rsm_connect done: INTERRUPTED\n"));
6195                                         return (RSMERR_INTERRUPTED);
6196                                 }
6197                         }
6198                         /*
6199                          * the state changed, loop back and check what it is
6200                          */
6201                         recheck_state = 1;
6202                         break;
6203                 case RSMSI_STATE_ABORT_CONNECT:
6204                         /* exit the loop and clean up further down */
6205                         break;
6206                 case RSMSI_STATE_CONNECTED:
6207                         /* already connected, good - fall through */
6208                 case RSMSI_STATE_MAPPED:
6209                         /* already mapped, wow - fall through */
6210                         /* access validation etc is done further down */
6211                         break;
6212                 case RSMSI_STATE_DISCONNECTED:
6213                         /* disconnected - so reconnect now */
6214                         sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6215                         break;
6216                 default:
6217                         ASSERT(0); /* Invalid State */
6218                 }
6219         } while (recheck_state);
6220 
6221         if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6222                 /* we are the first to connect */
6223                 rsmsharelock_release(seg);
6224 
6225                 if (msg->nodeid != my_nodeid) {
6226                         addr = get_remote_hwaddr(adapter, msg->nodeid);
6227 
6228                         if ((int64_t)addr < 0) {
6229                                 rsmsharelock_acquire(seg);
6230                                 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6231                                     RSMSI_STATE_NEW);
6232                                 rsmsharelock_release(seg);
6233                                 rsmimport_rm(seg);
6234                                 seg->s_adapter = NULL;
6235                                 rsmka_release_adapter(adapter);
6236                                 seg->s_state = RSM_STATE_NEW;
6237                                 DBG_PRINTF((category, RSM_ERR,
6238                                     "rsm_connect done: hwaddr<0\n"));
6239                                 return (RSMERR_INTERNAL_ERROR);
6240                         }
6241                 } else {
6242                         addr = adapter->hwaddr;
6243                 }
6244 
6245                 /*
6246                  * send request to node [src, dest, key, msgid] and get back
6247                  * [status, msgid, cookie]
6248                  */
6249                 request.rsmipc_key = msg->key;
6250                 /*
6251                  * we need the s_mode of the exporter so pass
6252                  * RSM_ACCESS_TRUSTED
6253                  */
6254                 request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6255                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6256                 request.rsmipc_adapter_hwaddr = addr;
6257                 request.rsmipc_segment_cookie = sharedp;
6258 
6259                 e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6260                 if (e) {
6261                         rsmsharelock_acquire(seg);
6262                         rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6263                             RSMSI_STATE_NEW);
6264                         rsmsharelock_release(seg);
6265                         rsmimport_rm(seg);
6266                         seg->s_adapter = NULL;
6267                         rsmka_release_adapter(adapter);
6268                         seg->s_state = RSM_STATE_NEW;
6269                         DBG_PRINTF((category, RSM_ERR,
6270                             "rsm_connect done:rsmipc_send failed %d\n", e));
6271                         return (e);
6272                 }
6273 
6274                 if (reply.rsmipc_status != RSM_SUCCESS) {
6275                         rsmsharelock_acquire(seg);
6276                         rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6277                             RSMSI_STATE_NEW);
6278                         rsmsharelock_release(seg);
6279                         rsmimport_rm(seg);
6280                         seg->s_adapter = NULL;
6281                         rsmka_release_adapter(adapter);
6282                         seg->s_state = RSM_STATE_NEW;
6283                         DBG_PRINTF((category, RSM_ERR,
6284                             "rsm_connect done:rsmipc_send reply err %d\n",
6285                             reply.rsmipc_status));
6286                         return (reply.rsmipc_status);
6287                 }
6288 
6289                 rsmsharelock_acquire(seg);
6290                 /* store the information recvd into the shared data struct */
6291                 sharedp->rsmsi_mode = reply.rsmipc_mode;
6292                 sharedp->rsmsi_uid = reply.rsmipc_uid;
6293                 sharedp->rsmsi_gid = reply.rsmipc_gid;
6294                 sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6295                 sharedp->rsmsi_cookie = sharedp;
6296         }
6297 
6298         rsmsharelock_release(seg);
6299 
6300         /*
6301          * Get the segment lock and check for a force disconnect
6302          * from the export side which would have changed the state
6303          * back to RSM_STATE_NEW. Once the segment lock is acquired a
6304          * force disconnect will be held off until the connection
6305          * has completed.
6306          */
6307         rsmseglock_acquire(seg);
6308         rsmsharelock_acquire(seg);
6309         ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6310             seg->s_state == RSM_STATE_ABORT_CONNECT);
6311 
6312         shared_cookie = sharedp->rsmsi_cookie;
6313 
6314         if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6315             (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6316                 seg->s_state = RSM_STATE_NEW;
6317                 seg->s_adapter = NULL;
6318                 rsmsharelock_release(seg);
6319                 rsmseglock_release(seg);
6320                 rsmimport_rm(seg);
6321                 rsmka_release_adapter(adapter);
6322 
6323                 rsmsharelock_acquire(seg);
6324                 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6325                         /*
6326                          * set a flag indicating abort handling has been
6327                          * done
6328                          */
6329                         sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6330                         rsmsharelock_release(seg);
6331                         /* send a message to exporter - only once */
6332                         (void) rsm_send_notimporting(msg->nodeid,
6333                             msg->key, shared_cookie);
6334                         rsmsharelock_acquire(seg);
6335                         /*
6336                          * wake up any waiting importers and inform that
6337                          * connection has been aborted
6338                          */
6339                         cv_broadcast(&sharedp->rsmsi_cv);
6340                 }
6341                 rsmsharelock_release(seg);
6342 
6343                 DBG_PRINTF((category, RSM_ERR,
6344                     "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6345                 return (RSMERR_INTERRUPTED);
6346         }
6347 
6348 
6349         /*
6350          * We need to verify that this process has access
6351          */
6352         e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6353             access & sharedp->rsmsi_mode,
6354             (int)(msg->perm & RSM_PERM_RDWR), cred);
6355         if (e) {
6356                 rsmsharelock_release(seg);
6357                 seg->s_state = RSM_STATE_NEW;
6358                 seg->s_adapter = NULL;
6359                 rsmseglock_release(seg);
6360                 rsmimport_rm(seg);
6361                 rsmka_release_adapter(adapter);
6362                 /*
6363                  * No need to lock segment it has been removed
6364                  * from the hash table
6365                  */
6366                 rsmsharelock_acquire(seg);
6367                 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6368                         rsmsharelock_release(seg);
6369                         /* this is the first importer */
6370 
6371                         (void) rsm_send_notimporting(msg->nodeid, msg->key,
6372                             shared_cookie);
6373                         rsmsharelock_acquire(seg);
6374                         sharedp->rsmsi_state = RSMSI_STATE_NEW;
6375                         cv_broadcast(&sharedp->rsmsi_cv);
6376                 }
6377                 rsmsharelock_release(seg);
6378 
6379                 DBG_PRINTF((category, RSM_ERR,
6380                     "rsm_connect done: ipcaccess failed\n"));
6381                 return (RSMERR_PERM_DENIED);
6382         }
6383 
6384         /* update state and cookie */
6385         seg->s_segid = sharedp->rsmsi_segid;
6386         seg->s_len = sharedp->rsmsi_seglen;
6387         seg->s_mode = access & sharedp->rsmsi_mode;
6388         seg->s_pid = ddi_get_pid();
6389         seg->s_mapinfo = NULL;
6390 
6391         if (seg->s_node != my_nodeid) {
6392                 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6393                         e = adapter->rsmpi_ops->rsm_connect(
6394                             adapter->rsmpi_handle,
6395                             addr, seg->s_segid, &sharedp->rsmsi_handle);
6396 
6397                         if (e != RSM_SUCCESS) {
6398                                 seg->s_state = RSM_STATE_NEW;
6399                                 seg->s_adapter = NULL;
6400                                 rsmsharelock_release(seg);
6401                                 rsmseglock_release(seg);
6402                                 rsmimport_rm(seg);
6403                                 rsmka_release_adapter(adapter);
6404                                 /*
6405                                  *  inform the exporter to delete this importer
6406                                  */
6407                                 (void) rsm_send_notimporting(msg->nodeid,
6408                                     msg->key, shared_cookie);
6409 
6410                                 /*
6411                                  * Now inform any waiting importers to
6412                                  * retry connect. This needs to be done
6413                                  * after sending notimporting so that
6414                                  * the notimporting is sent before a waiting
6415                                  * importer sends a segconnect while retrying
6416                                  *
6417                                  * No need to lock segment it has been removed
6418                                  * from the hash table
6419                                  */
6420 
6421                                 rsmsharelock_acquire(seg);
6422                                 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6423                                 cv_broadcast(&sharedp->rsmsi_cv);
6424                                 rsmsharelock_release(seg);
6425 
6426                                 DBG_PRINTF((category, RSM_ERR,
6427                                     "rsm_connect error %d\n", e));
6428                                 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6429                                         return (
6430                                             RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6431                                 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6432                                     (e == RSMERR_UNKNOWN_RSM_ADDR))
6433                                         return (RSMERR_REMOTE_NODE_UNREACHABLE);
6434                                 else
6435                                         return (e);
6436                         }
6437 
6438                 }
6439                 seg->s_handle.in = sharedp->rsmsi_handle;
6440 
6441         }
6442 
6443         seg->s_state = RSM_STATE_CONNECT;
6444 
6445 
6446         seg->s_flags &= ~RSM_IMPORT_DUMMY;       /* clear dummy flag */
6447         if (bar_va) {
6448                 /* increment generation number on barrier page */
6449                 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6450                 /* return user off into barrier page where status will be */
6451                 msg->off = (int)seg->s_hdr.rsmrc_num;
6452                 msg->gnum = bar_va[msg->off];     /* gnum race */
6453         } else {
6454                 msg->off = 0;
6455                 msg->gnum = 0;       /* gnum race */
6456         }
6457 
6458         msg->len = (int)sharedp->rsmsi_seglen;
6459         msg->rnum = seg->s_minor;
6460         rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6461         rsmsharelock_release(seg);
6462         rsmseglock_release(seg);
6463 
6464         /* Return back to user the segment size & perm in case it's needed */
6465 
6466 #ifdef _MULTI_DATAMODEL
6467         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6468                 rsm_ioctlmsg32_t msg32;
6469 
6470                 if (msg->len > UINT_MAX)
6471                         msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6472                 else
6473                         msg32.len = msg->len;
6474                 msg32.off = msg->off;
6475                 msg32.perm = msg->perm;
6476                 msg32.gnum = msg->gnum;
6477                 msg32.rnum = msg->rnum;
6478 
6479                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6480                     "rsm_connect done\n"));
6481 
6482                 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6483                     sizeof (msg32), mode))
6484                         return (RSMERR_BAD_ADDR);
6485                 else
6486                         return (RSM_SUCCESS);
6487         }
6488 #endif
6489         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6490 
6491         if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6492             mode))
6493                 return (RSMERR_BAD_ADDR);
6494         else
6495                 return (RSM_SUCCESS);
6496 }
6497 
6498 static int
6499 rsm_unmap(rsmseg_t *seg)
6500 {
6501         int                     err;
6502         adapter_t               *adapter;
6503         rsm_import_share_t      *sharedp;
6504         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6505 
6506         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6507             "rsm_unmap enter %u\n", seg->s_segid));
6508 
6509         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6510 
6511         /* assert seg is locked */
6512         ASSERT(rsmseglock_held(seg));
6513         ASSERT(seg->s_state != RSM_STATE_MAPPING);
6514 
6515         if ((seg->s_state != RSM_STATE_ACTIVE) &&
6516             (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6517                 /* segment unmap has already been done */
6518                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6519                 return (RSM_SUCCESS);
6520         }
6521 
6522         sharedp = seg->s_share;
6523 
6524         rsmsharelock_acquire(seg);
6525 
6526         /*
6527          *      - shared data struct is in MAPPED or MAP_QUIESCE state
6528          */
6529 
6530         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6531             sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6532 
6533         /*
6534          * Unmap pages - previously rsm_memseg_import_unmap was called only if
6535          * the segment cookie list was NULL; but it is always NULL when
6536          * called from rsmmap_unmap and won't be NULL when called for
6537          * a force disconnect - so the check for NULL cookie list was removed
6538          */
6539 
6540         ASSERT(sharedp->rsmsi_mapcnt > 0);
6541 
6542         sharedp->rsmsi_mapcnt--;
6543 
6544         if (sharedp->rsmsi_mapcnt == 0) {
6545                 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6546                         /* unmap the shared RSMPI mapping */
6547                         adapter = seg->s_adapter;
6548                         if (seg->s_node != my_nodeid) {
6549                                 ASSERT(sharedp->rsmsi_handle != NULL);
6550                                 err = adapter->rsmpi_ops->
6551                                     rsm_unmap(sharedp->rsmsi_handle);
6552                                 DBG_PRINTF((category, RSM_DEBUG,
6553                                     "rsm_unmap: rsmpi unmap %d\n", err));
6554                                 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6555                                 sharedp->rsmsi_mapinfo = NULL;
6556                         }
6557                         sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6558                 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6559                         sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6560                 }
6561         }
6562 
6563         rsmsharelock_release(seg);
6564 
6565         /*
6566          * The s_cookie field is used to store the cookie returned from the
6567          * ddi_umem_lock when binding the pages for an export segment. This
6568          * is the primary use of the s_cookie field and does not normally
6569          * pertain to any importing segment except in the loopback case.
6570          * For the loopback case, the import segment and export segment are
6571          * on the same node, the s_cookie field of the segment structure for
6572          * the importer is initialized to the s_cookie field in the exported
6573          * segment during the map operation and is used during the call to
6574          * devmap_umem_setup for the import mapping.
6575          * Thus, during unmap, we simply need to set s_cookie to NULL to
6576          * indicate that the mapping no longer exists.
6577          */
6578         seg->s_cookie = NULL;
6579 
6580         seg->s_mapinfo = NULL;
6581 
6582         if (seg->s_state == RSM_STATE_ACTIVE)
6583                 seg->s_state = RSM_STATE_CONNECT;
6584         else
6585                 seg->s_state = RSM_STATE_CONN_QUIESCE;
6586 
6587         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6588 
6589         return (RSM_SUCCESS);
6590 }
6591 
6592 /*
6593  * cookie returned here if not null indicates that it is
6594  * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6595  * message.
6596  */
6597 static int
6598 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6599 {
6600         int                     e;
6601         adapter_t               *adapter;
6602         rsm_import_share_t      *sharedp;
6603         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6604 
6605         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6606             "rsm_closeconnection enter\n"));
6607 
6608         *cookie = (void *)NULL;
6609 
6610         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6611 
6612         /* assert seg is locked */
6613         ASSERT(rsmseglock_held(seg));
6614 
6615         if (seg->s_state == RSM_STATE_DISCONNECT) {
6616                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6617                     "rsm_closeconnection done: already disconnected\n"));
6618                 return (RSM_SUCCESS);
6619         }
6620 
6621         /* wait for all putv/getv ops to get done */
6622         while (seg->s_rdmacnt > 0) {
6623                 cv_wait(&seg->s_cv, &seg->s_lock);
6624         }
6625 
6626         (void) rsm_unmap(seg);
6627 
6628         ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6629             seg->s_state == RSM_STATE_CONN_QUIESCE);
6630 
6631         adapter = seg->s_adapter;
6632         sharedp = seg->s_share;
6633 
6634         ASSERT(sharedp != NULL);
6635 
6636         rsmsharelock_acquire(seg);
6637 
6638         /*
6639          * Disconnect on adapter
6640          *
6641          * The current algorithm is stateless, I don't have to contact
6642          * server when I go away. He only gives me permissions. Of course,
6643          * the adapters will talk to terminate the connect.
6644          *
6645          * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6646          */
6647         if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6648             (sharedp->rsmsi_node != my_nodeid)) {
6649 
6650                 if (sharedp->rsmsi_refcnt == 1) {
6651                         /* this is the last importer */
6652                         ASSERT(sharedp->rsmsi_mapcnt == 0);
6653 
6654                         e = adapter->rsmpi_ops->
6655                             rsm_disconnect(sharedp->rsmsi_handle);
6656                         if (e != RSM_SUCCESS) {
6657                                 DBG_PRINTF((category, RSM_DEBUG,
6658                                     "rsm:disconnect failed seg=%x:err=%d\n",
6659                                     seg->s_key, e));
6660                         }
6661                 }
6662         }
6663 
6664         seg->s_handle.in = NULL;
6665 
6666         sharedp->rsmsi_refcnt--;
6667 
6668         if (sharedp->rsmsi_refcnt == 0) {
6669                 *cookie = (void *)sharedp->rsmsi_cookie;
6670                 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6671                 sharedp->rsmsi_handle = NULL;
6672                 rsmsharelock_release(seg);
6673 
6674                 /* clean up the shared data structure */
6675                 mutex_destroy(&sharedp->rsmsi_lock);
6676                 cv_destroy(&sharedp->rsmsi_cv);
6677                 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6678 
6679         } else {
6680                 rsmsharelock_release(seg);
6681         }
6682 
6683         /* increment generation number on barrier page */
6684         if (bar_va) {
6685                 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6686         }
6687 
6688         /*
6689          * The following needs to be done after any
6690          * rsmsharelock calls which use seg->s_share.
6691          */
6692         seg->s_share = NULL;
6693 
6694         seg->s_state = RSM_STATE_DISCONNECT;
6695         /* signal anyone waiting in the CONN_QUIESCE state */
6696         cv_broadcast(&seg->s_cv);
6697 
6698         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6699             "rsm_closeconnection done\n"));
6700 
6701         return (RSM_SUCCESS);
6702 }
6703 
6704 int
6705 rsm_disconnect(rsmseg_t *seg)
6706 {
6707         rsmipc_request_t        request;
6708         void                    *shared_cookie;
6709         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6710 
6711         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6712 
6713         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6714 
6715         /* assert seg isn't locked */
6716         ASSERT(!rsmseglock_held(seg));
6717 
6718 
6719         /* Remove segment from imported list */
6720         rsmimport_rm(seg);
6721 
6722         /* acquire the segment */
6723         rsmseglock_acquire(seg);
6724 
6725         /* wait until segment leaves the mapping state */
6726         while (seg->s_state == RSM_STATE_MAPPING)
6727                 cv_wait(&seg->s_cv, &seg->s_lock);
6728 
6729         if (seg->s_state == RSM_STATE_DISCONNECT) {
6730                 seg->s_state = RSM_STATE_NEW;
6731                 rsmseglock_release(seg);
6732                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6733                     "rsm_disconnect done: already disconnected\n"));
6734                 return (RSM_SUCCESS);
6735         }
6736 
6737         (void) rsm_closeconnection(seg, &shared_cookie);
6738 
6739         /* update state */
6740         seg->s_state = RSM_STATE_NEW;
6741 
6742         if (shared_cookie != NULL) {
6743                 /*
6744                  *  This is the last importer so inform the exporting node
6745                  *  so this import can be deleted from the list of importers.
6746                  */
6747                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6748                 request.rsmipc_key = seg->s_segid;
6749                 request.rsmipc_segment_cookie = shared_cookie;
6750                 rsmseglock_release(seg);
6751                 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6752         } else {
6753                 rsmseglock_release(seg);
6754         }
6755 
6756         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6757 
6758         return (DDI_SUCCESS);
6759 }
6760 
6761 /*ARGSUSED*/
6762 static int
6763 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6764     struct pollhead **phpp)
6765 {
6766         minor_t         rnum;
6767         rsmresource_t   *res;
6768         rsmseg_t        *seg;
6769         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6770 
6771         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6772 
6773         /* find minor, no lock */
6774         rnum = getminor(dev);
6775         res = rsmresource_lookup(rnum, RSM_NOLOCK);
6776 
6777         /* poll is supported only for export/import segments */
6778         if ((res == NULL) || (res == RSMRC_RESERVED) ||
6779             (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6780                 return (ENXIO);
6781         }
6782 
6783         *reventsp = 0;
6784 
6785         /*
6786          * An exported segment must be in state RSM_STATE_EXPORT; an
6787          * imported segment must be in state RSM_STATE_ACTIVE.
6788          */
6789         seg = (rsmseg_t *)res;
6790 
6791         if (seg->s_pollevent) {
6792                 *reventsp = POLLRDNORM;
6793         } else if (!anyyet) {
6794                 /* cannot take segment lock here */
6795                 *phpp = &seg->s_poll;
6796                 seg->s_pollflag |= RSM_SEGMENT_POLL;
6797         }
6798         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6799         return (0);
6800 }
6801 
6802 
6803 
6804 /* ************************* IOCTL Commands ********************* */
6805 
6806 static rsmseg_t *
6807 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6808     rsm_resource_type_t type)
6809 {
6810         /* get segment from resource handle */
6811         rsmseg_t *seg;
6812         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6813 
6814         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6815 
6816 
6817         if (res != RSMRC_RESERVED) {
6818                 seg = (rsmseg_t *)res;
6819         } else {
6820                 /* Allocate segment now and bind it */
6821                 seg = rsmseg_alloc(rnum, credp);
6822 
6823                 /*
6824                  * if DR pre-processing is going on or DR is in progress
6825                  * then the new export segments should be in the NEW_QSCD state
6826                  */
6827                 if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6828                         mutex_enter(&rsm_drv_data.drv_lock);
6829                         if ((rsm_drv_data.drv_state ==
6830                             RSM_DRV_PREDEL_STARTED) ||
6831                             (rsm_drv_data.drv_state ==
6832                             RSM_DRV_PREDEL_COMPLETED) ||
6833                             (rsm_drv_data.drv_state ==
6834                             RSM_DRV_DR_IN_PROGRESS)) {
6835                                 seg->s_state = RSM_STATE_NEW_QUIESCED;
6836                         }
6837                         mutex_exit(&rsm_drv_data.drv_lock);
6838                 }
6839 
6840                 rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6841         }
6842 
6843         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6844 
6845         return (seg);
6846 }
6847 
6848 static int
6849 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6850     int mode, cred_t *credp)
6851 {
6852         int error;
6853         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6854 
6855         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6856 
6857         arg = arg;
6858         credp = credp;
6859 
6860         ASSERT(seg != NULL);
6861 
6862         switch (cmd) {
6863         case RSM_IOCTL_BIND:
6864                 error = rsm_bind(seg, msg, arg, mode);
6865                 break;
6866         case RSM_IOCTL_REBIND:
6867                 error = rsm_rebind(seg, msg);
6868                 break;
6869         case RSM_IOCTL_UNBIND:
6870                 error = ENOTSUP;
6871                 break;
6872         case RSM_IOCTL_PUBLISH:
6873                 error = rsm_publish(seg, msg, arg, mode);
6874                 break;
6875         case RSM_IOCTL_REPUBLISH:
6876                 error = rsm_republish(seg, msg, mode);
6877                 break;
6878         case RSM_IOCTL_UNPUBLISH:
6879                 error = rsm_unpublish(seg, 1);
6880                 break;
6881         default:
6882                 error = EINVAL;
6883                 break;
6884         }
6885 
6886         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6887             error));
6888 
6889         return (error);
6890 }
6891 static int
6892 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6893     int mode, cred_t *credp)
6894 {
6895         int error;
6896         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6897 
6898         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6899 
6900         ASSERT(seg);
6901 
6902         switch (cmd) {
6903         case RSM_IOCTL_CONNECT:
6904                 error = rsm_connect(seg, msg, credp, arg, mode);
6905                 break;
6906         default:
6907                 error = EINVAL;
6908                 break;
6909         }
6910 
6911         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6912             error));
6913         return (error);
6914 }
6915 
6916 static int
6917 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6918     int mode)
6919 {
6920         int e;
6921         adapter_t *adapter;
6922         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6923 
6924         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6925 
6926 
6927         if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6928                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6929                     "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6930                 return (RSMERR_CONN_ABORTED);
6931         } else if (seg->s_node == my_nodeid) {
6932                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6933                     "rsmbar_ioctl done: loopback\n"));
6934                 return (RSM_SUCCESS);
6935         }
6936 
6937         adapter = seg->s_adapter;
6938 
6939         switch (cmd) {
6940         case RSM_IOCTL_BAR_CHECK:
6941                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6942                     "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6943                 return (bar_va ? RSM_SUCCESS : EINVAL);
6944         case RSM_IOCTL_BAR_OPEN:
6945                 e = adapter->rsmpi_ops->
6946                     rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6947                 break;
6948         case RSM_IOCTL_BAR_ORDER:
6949                 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6950                 break;
6951         case RSM_IOCTL_BAR_CLOSE:
6952                 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6953                 break;
6954         default:
6955                 e = EINVAL;
6956                 break;
6957         }
6958 
6959         if (e == RSM_SUCCESS) {
6960 #ifdef _MULTI_DATAMODEL
6961                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6962                         rsm_ioctlmsg32_t msg32;
6963                         int i;
6964 
6965                         for (i = 0; i < 4; i++) {
6966                                 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6967                         }
6968 
6969                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6970                             "rsmbar_ioctl done\n"));
6971                         if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6972                             sizeof (msg32), mode))
6973                                 return (RSMERR_BAD_ADDR);
6974                         else
6975                                 return (RSM_SUCCESS);
6976                 }
6977 #endif
6978                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6979                     "rsmbar_ioctl done\n"));
6980                 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6981                     sizeof (*msg), mode))
6982                         return (RSMERR_BAD_ADDR);
6983                 else
6984                         return (RSM_SUCCESS);
6985         }
6986 
6987         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6988             "rsmbar_ioctl done: error=%d\n", e));
6989 
6990         return (e);
6991 }
6992 
6993 /*
6994  * Ring the doorbell of the export segment to which this segment is
6995  * connected.
6996  */
6997 static int
6998 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
6999 {
7000         int e = 0;
7001         rsmipc_request_t request;
7002 
7003         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7004 
7005         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7006 
7007         request.rsmipc_key = seg->s_segid;
7008         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7009         request.rsmipc_segment_cookie = NULL;
7010         e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7011 
7012         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7013             "exportbell_ioctl done: %d\n", e));
7014 
7015         return (e);
7016 }
7017 
7018 /*
7019  * Ring the doorbells of all segments importing this segment
7020  */
7021 static int
7022 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7023 {
7024         importing_token_t       *token = NULL;
7025         rsmipc_request_t        request;
7026         int                     index;
7027 
7028         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7029 
7030         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7031 
7032         ASSERT(seg->s_state != RSM_STATE_NEW &&
7033             seg->s_state != RSM_STATE_NEW_QUIESCED);
7034 
7035         request.rsmipc_key = seg->s_segid;
7036         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7037 
7038         index = rsmhash(seg->s_segid);
7039 
7040         token = importer_list.bucket[index];
7041 
7042         while (token != NULL) {
7043                 if (seg->s_key == token->key) {
7044                         request.rsmipc_segment_cookie =
7045                             token->import_segment_cookie;
7046                         (void) rsmipc_send(token->importing_node,
7047                             &request, RSM_NO_REPLY);
7048                 }
7049                 token = token->next;
7050         }
7051 
7052         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7053             "importbell_ioctl done\n"));
7054         return (RSM_SUCCESS);
7055 }
7056 
7057 static int
7058 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7059     rsm_poll_event_t **eventspp, int mode)
7060 {
7061         rsm_poll_event_t        *evlist = NULL;
7062         size_t                  evlistsz;
7063         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7064 
7065 #ifdef _MULTI_DATAMODEL
7066         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7067                 int i;
7068                 rsm_consume_event_msg32_t cemsg32 = {0};
7069                 rsm_poll_event32_t      event32[RSM_MAX_POLLFDS];
7070                 rsm_poll_event32_t      *evlist32;
7071                 size_t                  evlistsz32;
7072 
7073                 /* copyin the ioctl message */
7074                 if (ddi_copyin(arg, (caddr_t)&cemsg32,
7075                     sizeof (rsm_consume_event_msg32_t), mode)) {
7076                         DBG_PRINTF((category, RSM_ERR,
7077                             "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7078                         return (RSMERR_BAD_ADDR);
7079                 }
7080                 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7081                 msgp->numents = (int)cemsg32.numents;
7082 
7083                 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7084                 /*
7085                  * If numents is large alloc events list on heap otherwise
7086                  * use the address of array that was passed in.
7087                  */
7088                 if (msgp->numents > RSM_MAX_POLLFDS) {
7089                         if (msgp->numents > max_segs) { /* validate numents */
7090                                 DBG_PRINTF((category, RSM_ERR,
7091                                     "consumeevent_copyin: "
7092                                     "RSMERR_BAD_ARGS_ERRORS\n"));
7093                                 return (RSMERR_BAD_ARGS_ERRORS);
7094                         }
7095                         evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7096                 } else {
7097                         evlist32 = event32;
7098                 }
7099 
7100                 /* copyin the seglist into the rsm_poll_event32_t array */
7101                 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7102                     evlistsz32, mode)) {
7103                         if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7104                                 kmem_free(evlist32, evlistsz32);
7105                         }
7106                         DBG_PRINTF((category, RSM_ERR,
7107                             "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7108                         return (RSMERR_BAD_ADDR);
7109                 }
7110 
7111                 /* evlist and evlistsz are based on rsm_poll_event_t type */
7112                 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7113 
7114                 if (msgp->numents > RSM_MAX_POLLFDS) {
7115                         evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7116                         *eventspp = evlist;
7117                 } else {
7118                         evlist = *eventspp;
7119                 }
7120                 /*
7121                  * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7122                  * array
7123                  */
7124                 for (i = 0; i < msgp->numents; i++) {
7125                         evlist[i].rnum = evlist32[i].rnum;
7126                         evlist[i].fdsidx = evlist32[i].fdsidx;
7127                         evlist[i].revent = evlist32[i].revent;
7128                 }
7129                 /* free the temp 32-bit event list */
7130                 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7131                         kmem_free(evlist32, evlistsz32);
7132                 }
7133 
7134                 return (RSM_SUCCESS);
7135         }
7136 #endif
7137         /* copyin the ioctl message */
7138         if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7139             mode)) {
7140                 DBG_PRINTF((category, RSM_ERR,
7141                     "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7142                 return (RSMERR_BAD_ADDR);
7143         }
7144         /*
7145          * If numents is large alloc events list on heap otherwise
7146          * use the address of array that was passed in.
7147          */
7148         if (msgp->numents > RSM_MAX_POLLFDS) {
7149                 if (msgp->numents > max_segs) { /* validate numents */
7150                         DBG_PRINTF((category, RSM_ERR,
7151                             "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7152                         return (RSMERR_BAD_ARGS_ERRORS);
7153                 }
7154                 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7155                 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7156                 *eventspp  = evlist;
7157         }
7158 
7159         /* copyin the seglist */
7160         if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7161             sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7162                 if (evlist) {
7163                         kmem_free(evlist, evlistsz);
7164                         *eventspp = NULL;
7165                 }
7166                 DBG_PRINTF((category, RSM_ERR,
7167                     "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7168                 return (RSMERR_BAD_ADDR);
7169         }
7170 
7171         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7172             "consumeevent_copyin done\n"));
7173         return (RSM_SUCCESS);
7174 }
7175 
7176 static int
7177 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7178     rsm_poll_event_t *eventsp, int mode)
7179 {
7180         size_t                  evlistsz;
7181         int                     err = RSM_SUCCESS;
7182         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7183 
7184         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7185             "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7186             msgp->numents, eventsp));
7187 
7188 #ifdef _MULTI_DATAMODEL
7189         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7190                 int i;
7191                 rsm_poll_event32_t      event32[RSM_MAX_POLLFDS];
7192                 rsm_poll_event32_t      *evlist32;
7193                 size_t                  evlistsz32;
7194 
7195                 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7196                 if (msgp->numents > RSM_MAX_POLLFDS) {
7197                         evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7198                 } else {
7199                         evlist32 = event32;
7200                 }
7201 
7202                 /*
7203                  * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7204                  * array
7205                  */
7206                 for (i = 0; i < msgp->numents; i++) {
7207                         evlist32[i].rnum = eventsp[i].rnum;
7208                         evlist32[i].fdsidx = eventsp[i].fdsidx;
7209                         evlist32[i].revent = eventsp[i].revent;
7210                 }
7211 
7212                 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7213                     evlistsz32, mode)) {
7214                         err = RSMERR_BAD_ADDR;
7215                 }
7216 
7217                 if (msgp->numents > RSM_MAX_POLLFDS) {
7218                         if (evlist32) { /* free the temp 32-bit event list */
7219                                 kmem_free(evlist32, evlistsz32);
7220                         }
7221                         /*
7222                          * eventsp and evlistsz are based on rsm_poll_event_t
7223                          * type
7224                          */
7225                         evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7226                         /* event list on the heap and needs to be freed here */
7227                         if (eventsp) {
7228                                 kmem_free(eventsp, evlistsz);
7229                         }
7230                 }
7231 
7232                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7233                     "consumeevent_copyout done: err=%d\n", err));
7234                 return (err);
7235         }
7236 #endif
7237         evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7238 
7239         if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7240             mode)) {
7241                 err = RSMERR_BAD_ADDR;
7242         }
7243 
7244         if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7245                 /* event list on the heap and needs to be freed here */
7246                 kmem_free(eventsp, evlistsz);
7247         }
7248 
7249         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7250             "consumeevent_copyout done: err=%d\n", err));
7251         return (err);
7252 }
7253 
7254 static int
7255 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7256 {
7257         int     rc;
7258         int     i;
7259         minor_t rnum;
7260         rsm_consume_event_msg_t msg = {0};
7261         rsmseg_t                *seg;
7262         rsm_poll_event_t        *event_list;
7263         rsm_poll_event_t        events[RSM_MAX_POLLFDS];
7264         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7265 
7266         event_list = events;
7267 
7268         if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7269             RSM_SUCCESS) {
7270                 return (rc);
7271         }
7272 
7273         for (i = 0; i < msg.numents; i++) {
7274                 rnum = event_list[i].rnum;
7275                 event_list[i].revent = 0;
7276                 /* get the segment structure */
7277                 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7278                 if (seg) {
7279                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7280                             "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7281                             seg));
7282                         if (seg->s_pollevent) {
7283                                 /* consume the event */
7284                                 atomic_dec_32(&seg->s_pollevent);
7285                                 event_list[i].revent = POLLRDNORM;
7286                         }
7287                         rsmseglock_release(seg);
7288                 }
7289         }
7290 
7291         if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7292             RSM_SUCCESS) {
7293                 return (rc);
7294         }
7295 
7296         return (RSM_SUCCESS);
7297 }
7298 
7299 static int
7300 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7301 {
7302         int size;
7303         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7304 
7305         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7306 
7307 #ifdef _MULTI_DATAMODEL
7308         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7309                 rsmka_iovec32_t *iovec32, *iovec32_base;
7310                 int i;
7311 
7312                 size = count * sizeof (rsmka_iovec32_t);
7313                 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7314                 if (ddi_copyin((caddr_t)user_vec,
7315                     (caddr_t)iovec32, size, mode)) {
7316                         kmem_free(iovec32, size);
7317                         DBG_PRINTF((category, RSM_DEBUG,
7318                             "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7319                         return (RSMERR_BAD_ADDR);
7320                 }
7321 
7322                 for (i = 0; i < count; i++, iovec++, iovec32++) {
7323                         iovec->io_type = (int)iovec32->io_type;
7324                         if (iovec->io_type == RSM_HANDLE_TYPE)
7325                                 iovec->local.segid = (rsm_memseg_id_t)
7326                                     iovec32->local;
7327                         else
7328                                 iovec->local.vaddr =
7329                                     (caddr_t)(uintptr_t)iovec32->local;
7330                         iovec->local_offset = (size_t)iovec32->local_offset;
7331                         iovec->remote_offset = (size_t)iovec32->remote_offset;
7332                         iovec->transfer_len = (size_t)iovec32->transfer_len;
7333 
7334                 }
7335                 kmem_free(iovec32_base, size);
7336                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7337                     "iovec_copyin done\n"));
7338                 return (DDI_SUCCESS);
7339         }
7340 #endif
7341 
7342         size = count * sizeof (rsmka_iovec_t);
7343         if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7344                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7345                     "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7346                 return (RSMERR_BAD_ADDR);
7347         }
7348 
7349         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7350 
7351         return (DDI_SUCCESS);
7352 }
7353 
7354 
7355 static int
7356 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7357 {
7358         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7359 
7360         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7361 
7362 #ifdef _MULTI_DATAMODEL
7363         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7364                 rsmka_scat_gath32_t sg_io32;
7365 
7366                 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7367                     mode)) {
7368                         DBG_PRINTF((category, RSM_DEBUG,
7369                             "sgio_copyin done: returning EFAULT\n"));
7370                         return (RSMERR_BAD_ADDR);
7371                 }
7372                 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7373                 sg_io->io_request_count =  (size_t)sg_io32.io_request_count;
7374                 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7375                 sg_io->flags = (size_t)sg_io32.flags;
7376                 sg_io->remote_handle = (rsm_memseg_import_handle_t)
7377                     (uintptr_t)sg_io32.remote_handle;
7378                 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7379                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7380                     "sgio_copyin done\n"));
7381                 return (DDI_SUCCESS);
7382         }
7383 #endif
7384         if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7385             mode)) {
7386                 DBG_PRINTF((category, RSM_DEBUG,
7387                     "sgio_copyin done: returning EFAULT\n"));
7388                 return (RSMERR_BAD_ADDR);
7389         }
7390         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7391         return (DDI_SUCCESS);
7392 }
7393 
7394 static int
7395 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7396 {
7397         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7398 
7399         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7400             "sgio_resid_copyout enter\n"));
7401 
7402 #ifdef _MULTI_DATAMODEL
7403         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7404                 rsmka_scat_gath32_t sg_io32;
7405 
7406                 sg_io32.io_residual_count = sg_io->io_residual_count;
7407                 sg_io32.flags = sg_io->flags;
7408 
7409                 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7410                     (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7411                     sizeof (uint32_t), mode)) {
7412 
7413                         DBG_PRINTF((category, RSM_ERR,
7414                             "sgio_resid_copyout error: rescnt\n"));
7415                         return (RSMERR_BAD_ADDR);
7416                 }
7417 
7418                 if (ddi_copyout((caddr_t)&sg_io32.flags,
7419                     (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7420                     sizeof (uint32_t), mode)) {
7421 
7422                         DBG_PRINTF((category, RSM_ERR,
7423                             "sgio_resid_copyout error: flags\n"));
7424                         return (RSMERR_BAD_ADDR);
7425                 }
7426                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7427                     "sgio_resid_copyout done\n"));
7428                 return (DDI_SUCCESS);
7429         }
7430 #endif
7431         if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7432             (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7433             sizeof (ulong_t), mode)) {
7434 
7435                 DBG_PRINTF((category, RSM_ERR,
7436                     "sgio_resid_copyout error:rescnt\n"));
7437                 return (RSMERR_BAD_ADDR);
7438         }
7439 
7440         if (ddi_copyout((caddr_t)&sg_io->flags,
7441             (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7442             sizeof (uint_t), mode)) {
7443 
7444                 DBG_PRINTF((category, RSM_ERR,
7445                     "sgio_resid_copyout error:flags\n"));
7446                 return (RSMERR_BAD_ADDR);
7447         }
7448 
7449         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7450         return (DDI_SUCCESS);
7451 }
7452 
7453 
7454 static int
7455 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7456 {
7457         rsmka_scat_gath_t       sg_io;
7458         rsmka_iovec_t           ka_iovec_arr[RSM_MAX_IOVLEN];
7459         rsmka_iovec_t           *ka_iovec;
7460         rsmka_iovec_t           *ka_iovec_start;
7461         rsmpi_scat_gath_t       rsmpi_sg_io;
7462         rsmpi_iovec_t           iovec_arr[RSM_MAX_IOVLEN];
7463         rsmpi_iovec_t           *iovec;
7464         rsmpi_iovec_t           *iovec_start = NULL;
7465         rsmapi_access_entry_t   *acl;
7466         rsmresource_t           *res;
7467         minor_t                 rnum;
7468         rsmseg_t                *im_seg, *ex_seg;
7469         int                     e;
7470         int                     error = 0;
7471         uint_t                  i;
7472         uint_t                  iov_proc = 0; /* num of iovecs processed */
7473         size_t                  size = 0;
7474         size_t                  ka_size;
7475 
7476         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7477 
7478         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7479 
7480         credp = credp;
7481 
7482         /*
7483          * Copyin the scatter/gather structure  and build new structure
7484          * for rsmpi.
7485          */
7486         e = sgio_copyin(arg, &sg_io, mode);
7487         if (e != DDI_SUCCESS) {
7488                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7489                     "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7490                 return (e);
7491         }
7492 
7493         if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7494                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7495                     "rsm_iovec_ioctl done: request_count(%d) too large\n",
7496                     sg_io.io_request_count));
7497                 return (RSMERR_BAD_SGIO);
7498         }
7499 
7500         rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7501         rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7502         rsmpi_sg_io.io_segflg = 0;
7503 
7504         /* Allocate memory and copyin io vector array  */
7505         if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7506                 ka_size =  sg_io.io_request_count * sizeof (rsmka_iovec_t);
7507                 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7508         } else {
7509                 ka_iovec_start = ka_iovec = ka_iovec_arr;
7510         }
7511         e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7512             sg_io.io_request_count, mode);
7513         if (e != DDI_SUCCESS) {
7514                 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7515                         kmem_free(ka_iovec, ka_size);
7516                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7517                     "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7518                 return (e);
7519         }
7520 
7521         /* get the import segment descriptor */
7522         rnum = getminor(dev);
7523         res = rsmresource_lookup(rnum, RSM_LOCK);
7524 
7525         /*
7526          * The following sequence of locking may (or MAY NOT) cause a
7527          * deadlock but this is currently not addressed here since the
7528          * implementation will be changed to incorporate the use of
7529          * reference counting for both the import and the export segments.
7530          */
7531 
7532         /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7533 
7534         im_seg = (rsmseg_t *)res;
7535 
7536         if (im_seg == NULL) {
7537                 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7538                         kmem_free(ka_iovec, ka_size);
7539                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7540                     "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7541                 return (EINVAL);
7542         }
7543         /* putv/getv supported is supported only on import segments */
7544         if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7545                 rsmseglock_release(im_seg);
7546                 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7547                         kmem_free(ka_iovec, ka_size);
7548                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7549                     "rsm_iovec_ioctl done: not an import segment\n"));
7550                 return (EINVAL);
7551         }
7552 
7553         /*
7554          * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7555          * as well as wait for a local DR to complete.
7556          */
7557         while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7558             (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7559             (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7560                 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7561                         DBG_PRINTF((category, RSM_DEBUG,
7562                             "rsm_iovec_ioctl done: cv_wait INTR"));
7563                         rsmseglock_release(im_seg);
7564                         return (RSMERR_INTERRUPTED);
7565                 }
7566         }
7567 
7568         if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7569             (im_seg->s_state != RSM_STATE_ACTIVE)) {
7570 
7571                 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7572                     im_seg->s_state == RSM_STATE_NEW);
7573 
7574                 DBG_PRINTF((category, RSM_DEBUG,
7575                     "rsm_iovec_ioctl done: im_seg not conn/map"));
7576                 rsmseglock_release(im_seg);
7577                 e = RSMERR_BAD_SGIO;
7578                 goto out;
7579         }
7580 
7581         im_seg->s_rdmacnt++;
7582         rsmseglock_release(im_seg);
7583 
7584         /*
7585          * Allocate and set up the io vector for rsmpi
7586          */
7587         if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7588                 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7589                 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7590         } else {
7591                 iovec_start = iovec = iovec_arr;
7592         }
7593 
7594         rsmpi_sg_io.iovec = iovec;
7595         for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7596                 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7597                         ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7598 
7599                         if (ex_seg == NULL) {
7600                                 e = RSMERR_BAD_SGIO;
7601                                 break;
7602                         }
7603                         ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7604 
7605                         acl = ex_seg->s_acl;
7606                         if (acl[0].ae_permission == 0) {
7607                                 struct buf *xbuf;
7608                                 dev_t sdev = 0;
7609 
7610                                 xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7611                                     0, ex_seg->s_len, B_WRITE,
7612                                     sdev, 0, NULL, DDI_UMEM_SLEEP);
7613 
7614                                 ASSERT(xbuf != NULL);
7615 
7616                                 iovec->local_mem.ms_type = RSM_MEM_BUF;
7617                                 iovec->local_mem.ms_memory.bp = xbuf;
7618                         } else {
7619                                 iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7620                                 iovec->local_mem.ms_memory.handle =
7621                                     ex_seg->s_handle.out;
7622                         }
7623                         ex_seg->s_rdmacnt++; /* refcnt the handle */
7624                         rsmseglock_release(ex_seg);
7625                 } else {
7626                         iovec->local_mem.ms_type = RSM_MEM_VADDR;
7627                         iovec->local_mem.ms_memory.vr.vaddr =
7628                             ka_iovec->local.vaddr;
7629                 }
7630 
7631                 iovec->local_offset = ka_iovec->local_offset;
7632                 iovec->remote_handle = im_seg->s_handle.in;
7633                 iovec->remote_offset = ka_iovec->remote_offset;
7634                 iovec->transfer_length = ka_iovec->transfer_len;
7635                 iovec++;
7636                 ka_iovec++;
7637         }
7638 
7639         if (iov_proc <  sg_io.io_request_count) {
7640                 /* error while processing handle */
7641                 rsmseglock_acquire(im_seg);
7642                 im_seg->s_rdmacnt--;   /* decrement the refcnt for importseg */
7643                 if (im_seg->s_rdmacnt == 0) {
7644                         cv_broadcast(&im_seg->s_cv);
7645                 }
7646                 rsmseglock_release(im_seg);
7647                 goto out;
7648         }
7649 
7650         /* call rsmpi */
7651         if (cmd == RSM_IOCTL_PUTV)
7652                 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7653                     im_seg->s_adapter->rsmpi_handle,
7654                     &rsmpi_sg_io);
7655         else if (cmd == RSM_IOCTL_GETV)
7656                 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7657                     im_seg->s_adapter->rsmpi_handle,
7658                     &rsmpi_sg_io);
7659         else {
7660                 e = EINVAL;
7661                 DBG_PRINTF((category, RSM_DEBUG,
7662                     "iovec_ioctl: bad command = %x\n", cmd));
7663         }
7664 
7665 
7666         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7667             "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7668 
7669         sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7670 
7671         /*
7672          * Check for implicit signal post flag and do the signal
7673          * post if needed
7674          */
7675         if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7676             e == RSM_SUCCESS) {
7677                 rsmipc_request_t request;
7678 
7679                 request.rsmipc_key = im_seg->s_segid;
7680                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7681                 request.rsmipc_segment_cookie = NULL;
7682                 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7683                 /*
7684                  * Reset the implicit signal post flag to 0 to indicate
7685                  * that the signal post has been done and need not be
7686                  * done in the RSMAPI library
7687                  */
7688                 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7689         }
7690 
7691         rsmseglock_acquire(im_seg);
7692         im_seg->s_rdmacnt--;
7693         if (im_seg->s_rdmacnt == 0) {
7694                 cv_broadcast(&im_seg->s_cv);
7695         }
7696         rsmseglock_release(im_seg);
7697         error = sgio_resid_copyout(arg, &sg_io, mode);
7698 out:
7699         iovec = iovec_start;
7700         ka_iovec = ka_iovec_start;
7701         for (i = 0; i < iov_proc; i++) {
7702                 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7703                         ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7704 
7705                         ASSERT(ex_seg != NULL);
7706                         ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7707 
7708                         ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7709                         if (ex_seg->s_rdmacnt == 0) {
7710                                 cv_broadcast(&ex_seg->s_cv);
7711                         }
7712                         rsmseglock_release(ex_seg);
7713                 }
7714 
7715                 ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7716 
7717                 /*
7718                  * At present there is no dependency on the existence of xbufs
7719                  * created by ddi_umem_iosetup for each of the iovecs. So we
7720                  * can these xbufs here.
7721                  */
7722                 if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7723                         freerbuf(iovec->local_mem.ms_memory.bp);
7724                 }
7725 
7726                 iovec++;
7727                 ka_iovec++;
7728         }
7729 
7730         if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7731                 if (iovec_start)
7732                         kmem_free(iovec_start, size);
7733                 kmem_free(ka_iovec_start, ka_size);
7734         }
7735 
7736         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7737             "rsm_iovec_ioctl done %d\n", e));
7738         /* if RSMPI call fails return that else return copyout's retval */
7739         return ((e != RSM_SUCCESS) ? e : error);
7740 
7741 }
7742 
7743 
7744 static int
7745 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7746 {
7747         adapter_t       *adapter;
7748         rsm_addr_t      addr;
7749         rsm_node_id_t   node;
7750         int             rval = DDI_SUCCESS;
7751         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7752 
7753         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7754 
7755         adapter =  rsm_getadapter(msg, mode);
7756         if (adapter == NULL) {
7757                 DBG_PRINTF((category, RSM_DEBUG,
7758                     "rsmaddr_ioctl done: adapter not found\n"));
7759                 return (RSMERR_CTLR_NOT_PRESENT);
7760         }
7761 
7762         switch (cmd) {
7763         case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7764                 /* returns the hwaddr in msg->hwaddr */
7765                 if (msg->nodeid == my_nodeid) {
7766                         msg->hwaddr = adapter->hwaddr;
7767                 } else {
7768                         addr = get_remote_hwaddr(adapter, msg->nodeid);
7769                         if ((int64_t)addr < 0) {
7770                                 rval = RSMERR_INTERNAL_ERROR;
7771                         } else {
7772                                 msg->hwaddr = addr;
7773                         }
7774                 }
7775                 break;
7776         case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7777                 /* returns the nodeid in msg->nodeid */
7778                 if (msg->hwaddr == adapter->hwaddr) {
7779                         msg->nodeid = my_nodeid;
7780                 } else {
7781                         node = get_remote_nodeid(adapter, msg->hwaddr);
7782                         if ((int)node < 0) {
7783                                 rval = RSMERR_INTERNAL_ERROR;
7784                         } else {
7785                                 msg->nodeid = (rsm_node_id_t)node;
7786                         }
7787                 }
7788                 break;
7789         default:
7790                 rval = EINVAL;
7791                 break;
7792         }
7793 
7794         rsmka_release_adapter(adapter);
7795         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7796             "rsmaddr_ioctl done: %d\n", rval));
7797         return (rval);
7798 }
7799 
7800 static int
7801 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7802 {
7803         DBG_DEFINE(category,
7804             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7805 
7806         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7807 
7808 #ifdef _MULTI_DATAMODEL
7809 
7810         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7811                 rsm_ioctlmsg32_t msg32;
7812                 int i;
7813 
7814                 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7815                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7816                             "rsm_ddi_copyin done: EFAULT\n"));
7817                         return (RSMERR_BAD_ADDR);
7818                 }
7819                 msg->len = msg32.len;
7820                 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7821                 msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7822                 msg->key = msg32.key;
7823                 msg->acl_len = msg32.acl_len;
7824                 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7825                 msg->cnum = msg32.cnum;
7826                 msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7827                 msg->cname_len = msg32.cname_len;
7828                 msg->nodeid = msg32.nodeid;
7829                 msg->hwaddr = msg32.hwaddr;
7830                 msg->perm = msg32.perm;
7831                 for (i = 0; i < 4; i++) {
7832                         msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7833                 }
7834                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7835                     "rsm_ddi_copyin done\n"));
7836                 return (RSM_SUCCESS);
7837         }
7838 #endif
7839         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7840         if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7841                 return (RSMERR_BAD_ADDR);
7842         else
7843                 return (RSM_SUCCESS);
7844 }
7845 
7846 static int
7847 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7848 {
7849         rsmka_int_controller_attr_t     rsm_cattr;
7850         DBG_DEFINE(category,
7851             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7852 
7853         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7854             "rsmattr_ddi_copyout enter\n"));
7855         /*
7856          * need to copy appropriate data from rsm_controller_attr_t
7857          * to rsmka_int_controller_attr_t
7858          */
7859 #ifdef  _MULTI_DATAMODEL
7860         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7861                 rsmka_int_controller_attr32_t rsm_cattr32;
7862 
7863                 rsm_cattr32.attr_direct_access_sizes =
7864                     adapter->rsm_attr.attr_direct_access_sizes;
7865                 rsm_cattr32.attr_atomic_sizes =
7866                     adapter->rsm_attr.attr_atomic_sizes;
7867                 rsm_cattr32.attr_page_size =
7868                     adapter->rsm_attr.attr_page_size;
7869                 if (adapter->rsm_attr.attr_max_export_segment_size >
7870                     UINT_MAX)
7871                         rsm_cattr32.attr_max_export_segment_size =
7872                             RSM_MAXSZ_PAGE_ALIGNED;
7873                 else
7874                         rsm_cattr32.attr_max_export_segment_size =
7875                             adapter->rsm_attr.attr_max_export_segment_size;
7876                 if (adapter->rsm_attr.attr_tot_export_segment_size >
7877                     UINT_MAX)
7878                         rsm_cattr32.attr_tot_export_segment_size =
7879                             RSM_MAXSZ_PAGE_ALIGNED;
7880                 else
7881                         rsm_cattr32.attr_tot_export_segment_size =
7882                             adapter->rsm_attr.attr_tot_export_segment_size;
7883                 if (adapter->rsm_attr.attr_max_export_segments >
7884                     UINT_MAX)
7885                         rsm_cattr32.attr_max_export_segments =
7886                             UINT_MAX;
7887                 else
7888                         rsm_cattr32.attr_max_export_segments =
7889                             adapter->rsm_attr.attr_max_export_segments;
7890                 if (adapter->rsm_attr.attr_max_import_map_size >
7891                     UINT_MAX)
7892                         rsm_cattr32.attr_max_import_map_size =
7893                             RSM_MAXSZ_PAGE_ALIGNED;
7894                 else
7895                         rsm_cattr32.attr_max_import_map_size =
7896                             adapter->rsm_attr.attr_max_import_map_size;
7897                 if (adapter->rsm_attr.attr_tot_import_map_size >
7898                     UINT_MAX)
7899                         rsm_cattr32.attr_tot_import_map_size =
7900                             RSM_MAXSZ_PAGE_ALIGNED;
7901                 else
7902                         rsm_cattr32.attr_tot_import_map_size =
7903                             adapter->rsm_attr.attr_tot_import_map_size;
7904                 if (adapter->rsm_attr.attr_max_import_segments >
7905                     UINT_MAX)
7906                         rsm_cattr32.attr_max_import_segments =
7907                             UINT_MAX;
7908                 else
7909                         rsm_cattr32.attr_max_import_segments =
7910                             adapter->rsm_attr.attr_max_import_segments;
7911                 rsm_cattr32.attr_controller_addr =
7912                     adapter->rsm_attr.attr_controller_addr;
7913 
7914                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7915                     "rsmattr_ddi_copyout done\n"));
7916                 if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7917                     sizeof (rsmka_int_controller_attr32_t), mode)) {
7918                         return (RSMERR_BAD_ADDR);
7919                 }
7920                 else
7921                         return (RSM_SUCCESS);
7922         }
7923 #endif
7924         rsm_cattr.attr_direct_access_sizes =
7925             adapter->rsm_attr.attr_direct_access_sizes;
7926         rsm_cattr.attr_atomic_sizes =
7927             adapter->rsm_attr.attr_atomic_sizes;
7928         rsm_cattr.attr_page_size =
7929             adapter->rsm_attr.attr_page_size;
7930         rsm_cattr.attr_max_export_segment_size =
7931             adapter->rsm_attr.attr_max_export_segment_size;
7932         rsm_cattr.attr_tot_export_segment_size =
7933             adapter->rsm_attr.attr_tot_export_segment_size;
7934         rsm_cattr.attr_max_export_segments =
7935             adapter->rsm_attr.attr_max_export_segments;
7936         rsm_cattr.attr_max_import_map_size =
7937             adapter->rsm_attr.attr_max_import_map_size;
7938         rsm_cattr.attr_tot_import_map_size =
7939             adapter->rsm_attr.attr_tot_import_map_size;
7940         rsm_cattr.attr_max_import_segments =
7941             adapter->rsm_attr.attr_max_import_segments;
7942         rsm_cattr.attr_controller_addr =
7943             adapter->rsm_attr.attr_controller_addr;
7944         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7945             "rsmattr_ddi_copyout done\n"));
7946         if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7947             sizeof (rsmka_int_controller_attr_t), mode)) {
7948                 return (RSMERR_BAD_ADDR);
7949         }
7950         else
7951                 return (RSM_SUCCESS);
7952 }
7953 
7954 /*ARGSUSED*/
7955 static int
7956 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7957     int *rvalp)
7958 {
7959         rsmseg_t *seg;
7960         rsmresource_t   *res;
7961         minor_t         rnum;
7962         rsm_ioctlmsg_t msg = {0};
7963         int error;
7964         adapter_t *adapter;
7965         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7966 
7967         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7968 
7969         if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7970                 error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7971                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7972                     "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7973                 return (error);
7974         }
7975 
7976         /* topology cmd does not use the arg common to other cmds */
7977         if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7978                 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7979                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7980                     "rsm_ioctl done: %d\n", error));
7981                 return (error);
7982         }
7983 
7984         if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7985                 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7986                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7987                     "rsm_ioctl done: %d\n", error));
7988                 return (error);
7989         }
7990 
7991         /*
7992          * try to load arguments
7993          */
7994         if (cmd != RSM_IOCTL_RING_BELL &&
7995             rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
7996                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7997                     "rsm_ioctl done: EFAULT\n"));
7998                 return (RSMERR_BAD_ADDR);
7999         }
8000 
8001         if (cmd == RSM_IOCTL_ATTR) {
8002                 adapter =  rsm_getadapter(&msg, mode);
8003                 if (adapter == NULL) {
8004                         DBG_PRINTF((category, RSM_DEBUG,
8005                             "rsm_ioctl done: ENODEV\n"));
8006                         return (RSMERR_CTLR_NOT_PRESENT);
8007                 }
8008                 error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8009                 rsmka_release_adapter(adapter);
8010                 DBG_PRINTF((category, RSM_DEBUG,
8011                     "rsm_ioctl:after copyout %d\n", error));
8012                 return (error);
8013         }
8014 
8015         if (cmd == RSM_IOCTL_BAR_INFO) {
8016                 /* Return library off,len of barrier page */
8017                 msg.off = barrier_offset;
8018                 msg.len = (int)barrier_size;
8019 #ifdef _MULTI_DATAMODEL
8020                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8021                         rsm_ioctlmsg32_t msg32;
8022 
8023                         if (msg.len > UINT_MAX)
8024                                 msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8025                         else
8026                                 msg32.len = (int32_t)msg.len;
8027                         msg32.off = (int32_t)msg.off;
8028                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8029                             "rsm_ioctl done\n"));
8030                         if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8031                             sizeof (msg32), mode))
8032                                 return (RSMERR_BAD_ADDR);
8033                         else
8034                                 return (RSM_SUCCESS);
8035                 }
8036 #endif
8037                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8038                     "rsm_ioctl done\n"));
8039                 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8040                     sizeof (msg), mode))
8041                         return (RSMERR_BAD_ADDR);
8042                 else
8043                         return (RSM_SUCCESS);
8044         }
8045 
8046         if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8047                 /* map the nodeid or hwaddr */
8048                 error = rsmaddr_ioctl(cmd, &msg, mode);
8049                 if (error == RSM_SUCCESS) {
8050 #ifdef _MULTI_DATAMODEL
8051                         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8052                                 rsm_ioctlmsg32_t msg32;
8053 
8054                                 msg32.hwaddr = (uint64_t)msg.hwaddr;
8055                                 msg32.nodeid = (uint32_t)msg.nodeid;
8056 
8057                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8058                                     "rsm_ioctl done\n"));
8059                                 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8060                                     sizeof (msg32), mode))
8061                                         return (RSMERR_BAD_ADDR);
8062                                 else
8063                                         return (RSM_SUCCESS);
8064                         }
8065 #endif
8066                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8067                             "rsm_ioctl done\n"));
8068                         if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8069                             sizeof (msg), mode))
8070                                 return (RSMERR_BAD_ADDR);
8071                         else
8072                                 return (RSM_SUCCESS);
8073                 }
8074                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8075                     "rsm_ioctl done: %d\n", error));
8076                 return (error);
8077         }
8078 
8079         /* Find resource and look it in read mode */
8080         rnum = getminor(dev);
8081         res = rsmresource_lookup(rnum, RSM_NOLOCK);
8082         ASSERT(res != NULL);
8083 
8084         /*
8085          * Find command group
8086          */
8087         switch (RSM_IOCTL_CMDGRP(cmd)) {
8088         case RSM_IOCTL_EXPORT_SEG:
8089                 /*
8090                  * Export list is searched during publish, loopback and
8091                  * remote lookup call.
8092                  */
8093                 seg = rsmresource_seg(res, rnum, credp,
8094                     RSM_RESOURCE_EXPORT_SEGMENT);
8095                 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8096                         error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8097                             credp);
8098                 } else { /* export ioctl on an import/barrier resource */
8099                         error = RSMERR_BAD_SEG_HNDL;
8100                 }
8101                 break;
8102         case RSM_IOCTL_IMPORT_SEG:
8103                 /* Import list is searched during remote unmap call. */
8104                 seg = rsmresource_seg(res, rnum, credp,
8105                     RSM_RESOURCE_IMPORT_SEGMENT);
8106                 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8107                         error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8108                             credp);
8109                 } else  { /* import ioctl on an export/barrier resource */
8110                         error = RSMERR_BAD_SEG_HNDL;
8111                 }
8112                 break;
8113         case RSM_IOCTL_BAR:
8114                 if (res != RSMRC_RESERVED &&
8115                     res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8116                         error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8117                             mode);
8118                 } else { /* invalid res value */
8119                         error = RSMERR_BAD_SEG_HNDL;
8120                 }
8121                 break;
8122         case RSM_IOCTL_BELL:
8123                 if (res != RSMRC_RESERVED) {
8124                         if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8125                                 error = exportbell_ioctl((rsmseg_t *)res, cmd);
8126                         else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8127                                 error = importbell_ioctl((rsmseg_t *)res, cmd);
8128                         else /* RSM_RESOURCE_BAR */
8129                                 error = RSMERR_BAD_SEG_HNDL;
8130                 } else { /* invalid res value */
8131                         error = RSMERR_BAD_SEG_HNDL;
8132                 }
8133                 break;
8134         default:
8135                 error = EINVAL;
8136         }
8137 
8138         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8139             error));
8140         return (error);
8141 }
8142 
8143 
8144 /* **************************** Segment Mapping Operations ********* */
8145 static rsm_mapinfo_t *
8146 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8147     size_t *map_len)
8148 {
8149         rsm_mapinfo_t   *p;
8150         /*
8151          * Find the correct mapinfo structure to use during the mapping
8152          * from the seg->s_mapinfo list.
8153          * The seg->s_mapinfo list contains in reverse order the mappings
8154          * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8155          * access the correct entry within this list for the mapping
8156          * requested.
8157          *
8158          * The algorithm for selecting a list entry is as follows:
8159          *
8160          * When start_offset of an entry <= off we have found the entry
8161          * we were looking for. Adjust the dev_offset and map_len (needs
8162          * to be PAGESIZE aligned).
8163          */
8164         p = seg->s_mapinfo;
8165         for (; p; p = p->next) {
8166                 if (p->start_offset <= off) {
8167                         *dev_offset = p->dev_offset + off - p->start_offset;
8168                         *map_len = (len > p->individual_len) ?
8169                             p->individual_len : ptob(btopr(len));
8170                         return (p);
8171                 }
8172                 p = p->next;
8173         }
8174 
8175         return (NULL);
8176 }
8177 
8178 static void
8179 rsm_free_mapinfo(rsm_mapinfo_t  *mapinfo)
8180 {
8181         rsm_mapinfo_t *p;
8182 
8183         while (mapinfo != NULL) {
8184                 p = mapinfo;
8185                 mapinfo = mapinfo->next;
8186                 kmem_free(p, sizeof (*p));
8187         }
8188 }
8189 
8190 static int
8191 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8192     size_t len, void **pvtp)
8193 {
8194         rsmcookie_t     *p;
8195         rsmresource_t   *res;
8196         rsmseg_t        *seg;
8197         minor_t rnum;
8198         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8199 
8200         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8201 
8202         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8203             "rsmmap_map: dhp = %x\n", dhp));
8204 
8205         flags = flags;
8206 
8207         rnum = getminor(dev);
8208         res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8209         ASSERT(res != NULL);
8210 
8211         seg = (rsmseg_t *)res;
8212 
8213         rsmseglock_acquire(seg);
8214 
8215         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8216 
8217         /*
8218          * Allocate structure and add cookie to segment list
8219          */
8220         p = kmem_alloc(sizeof (*p), KM_SLEEP);
8221 
8222         p->c_dhp = dhp;
8223         p->c_off = off;
8224         p->c_len = len;
8225         p->c_next = seg->s_ckl;
8226         seg->s_ckl = p;
8227 
8228         *pvtp = (void *)seg;
8229 
8230         rsmseglock_release(seg);
8231 
8232         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8233         return (DDI_SUCCESS);
8234 }
8235 
8236 /*
8237  * Page fault handling is done here. The prerequisite mapping setup
8238  * has been done in rsm_devmap with calls to ddi_devmem_setup or
8239  * ddi_umem_setup
8240  */
8241 static int
8242 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8243     uint_t type, uint_t rw)
8244 {
8245         int e;
8246         rsmseg_t *seg = (rsmseg_t *)pvt;
8247         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8248 
8249         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8250 
8251         rsmseglock_acquire(seg);
8252 
8253         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8254 
8255         while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8256                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8257                         DBG_PRINTF((category, RSM_DEBUG,
8258                             "rsmmap_access done: cv_wait INTR"));
8259                         rsmseglock_release(seg);
8260                         return (RSMERR_INTERRUPTED);
8261                 }
8262         }
8263 
8264         ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8265             seg->s_state == RSM_STATE_ACTIVE);
8266 
8267         if (seg->s_state == RSM_STATE_DISCONNECT)
8268                 seg->s_flags |= RSM_IMPORT_DUMMY;
8269 
8270         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8271             "rsmmap_access: dhp = %x\n", dhp));
8272 
8273         rsmseglock_release(seg);
8274 
8275         if (e = devmap_load(dhp, offset, len, type, rw)) {
8276                 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
8277         }
8278 
8279 
8280         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8281 
8282         return (e);
8283 }
8284 
8285 static int
8286 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8287         void **newpvt)
8288 {
8289         rsmseg_t        *seg = (rsmseg_t *)oldpvt;
8290         rsmcookie_t     *p, *old;
8291         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8292 
8293         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8294 
8295         /*
8296          * Same as map, create an entry to hold cookie and add it to
8297          * connect segment list. The oldpvt is a pointer to segment.
8298          * Return segment pointer in newpvt.
8299          */
8300         rsmseglock_acquire(seg);
8301 
8302         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8303 
8304         /*
8305          * Find old cookie
8306          */
8307         for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8308                 if (old->c_dhp == dhp) {
8309                         break;
8310                 }
8311         }
8312         if (old == NULL) {
8313                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8314                     "rsmmap_dup done: EINVAL\n"));
8315                 rsmseglock_release(seg);
8316                 return (EINVAL);
8317         }
8318 
8319         p = kmem_alloc(sizeof (*p), KM_SLEEP);
8320 
8321         p->c_dhp = new_dhp;
8322         p->c_off = old->c_off;
8323         p->c_len = old->c_len;
8324         p->c_next = seg->s_ckl;
8325         seg->s_ckl = p;
8326 
8327         *newpvt = (void *)seg;
8328 
8329         rsmseglock_release(seg);
8330 
8331         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8332 
8333         return (DDI_SUCCESS);
8334 }
8335 
8336 static void
8337 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8338         devmap_cookie_t new_dhp1, void **pvtp1,
8339         devmap_cookie_t new_dhp2, void **pvtp2)
8340 {
8341         /*
8342          * Remove pvtp structure from segment list.
8343          */
8344         rsmseg_t        *seg = (rsmseg_t *)pvtp;
8345         int freeflag;
8346 
8347         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8348 
8349         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8350 
8351         off = off; len = len;
8352         pvtp1 = pvtp1; pvtp2 = pvtp2;
8353 
8354         rsmseglock_acquire(seg);
8355 
8356         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8357 
8358         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8359             "rsmmap_unmap: dhp = %x\n", dhp));
8360         /*
8361          * We can go ahead and remove the dhps even if we are in
8362          * the MAPPING state because the dhps being removed here
8363          * belong to a different mmap and we are holding the segment
8364          * lock.
8365          */
8366         if (new_dhp1 == NULL && new_dhp2 == NULL) {
8367                 /* find and remove dhp handle */
8368                 rsmcookie_t *tmp, **back = &seg->s_ckl;
8369 
8370                 while (*back != NULL) {
8371                         tmp = *back;
8372                         if (tmp->c_dhp == dhp) {
8373                                 *back = tmp->c_next;
8374                                 kmem_free(tmp, sizeof (*tmp));
8375                                 break;
8376                         }
8377                         back = &tmp->c_next;
8378                 }
8379         } else {
8380                 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8381                     "rsmmap_unmap:parital unmap"
8382                     "new_dhp1 %lx, new_dhp2 %lx\n",
8383                     (size_t)new_dhp1, (size_t)new_dhp2));
8384         }
8385 
8386         /*
8387          * rsmmap_unmap is called for each mapping cookie on the list.
8388          * When the list becomes empty and we are not in the MAPPING
8389          * state then unmap in the rsmpi driver.
8390          */
8391         if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8392                 (void) rsm_unmap(seg);
8393 
8394         if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8395                 freeflag = 1;
8396         } else {
8397                 freeflag = 0;
8398         }
8399 
8400         rsmseglock_release(seg);
8401 
8402         if (freeflag) {
8403                 /* Free the segment structure */
8404                 rsmseg_free(seg);
8405         }
8406         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8407 
8408 }
8409 
8410 static struct devmap_callback_ctl rsmmap_ops = {
8411         DEVMAP_OPS_REV, /* devmap_ops version number    */
8412         rsmmap_map,     /* devmap_ops map routine */
8413         rsmmap_access,  /* devmap_ops access routine */
8414         rsmmap_dup,             /* devmap_ops dup routine               */
8415         rsmmap_unmap,   /* devmap_ops unmap routine */
8416 };
8417 
8418 static int
8419 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8420     size_t *maplen, uint_t model /*ARGSUSED*/)
8421 {
8422         struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8423         int             err;
8424         uint_t          maxprot;
8425         minor_t         rnum;
8426         rsmseg_t        *seg;
8427         off_t           dev_offset;
8428         size_t          cur_len;
8429         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8430 
8431         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8432 
8433         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8434             "rsm_devmap: off = %lx, len = %lx\n", off, len));
8435         rnum = getminor(dev);
8436         seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8437         ASSERT(seg != NULL);
8438 
8439         if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8440                 if ((off == barrier_offset) &&
8441                     (len == barrier_size)) {
8442 
8443                         ASSERT(bar_va != NULL && bar_cookie != NULL);
8444 
8445                         /*
8446                          * The offset argument in devmap_umem_setup represents
8447                          * the offset within the kernel memory defined by the
8448                          * cookie. We use this offset as barrier_offset.
8449                          */
8450                         err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8451                             barrier_offset, len, PROT_USER|PROT_READ,
8452                             DEVMAP_DEFAULTS, 0);
8453 
8454                         if (err != 0) {
8455                                 DBG_PRINTF((category, RSM_ERR,
8456                                     "rsm_devmap done: %d\n", err));
8457                                 return (RSMERR_MAP_FAILED);
8458                         }
8459                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8460                             "rsm_devmap done: %d\n", err));
8461 
8462                         *maplen = barrier_size;
8463 
8464                         return (err);
8465                 } else {
8466                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8467                             "rsm_devmap done: %d\n", err));
8468                         return (RSMERR_MAP_FAILED);
8469                 }
8470         }
8471 
8472         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8473         ASSERT(seg->s_state == RSM_STATE_MAPPING);
8474 
8475         /*
8476          * Make sure we still have permission for the map operation.
8477          */
8478         maxprot = PROT_USER;
8479         if (seg->s_mode & RSM_PERM_READ) {
8480                 maxprot |= PROT_READ;
8481         }
8482 
8483         if (seg->s_mode & RSM_PERM_WRITE) {
8484                 maxprot |= PROT_WRITE;
8485         }
8486 
8487         /*
8488          * For each devmap call, rsmmap_map is called. This maintains driver
8489          * private information for the mapping. Thus, if there are multiple
8490          * devmap calls there will be multiple rsmmap_map calls and for each
8491          * call, the mapping information will be stored.
8492          * In case of an error during the processing of the devmap call, error
8493          * will be returned. This error return causes the caller of rsm_devmap
8494          * to undo all the mappings by calling rsmmap_unmap for each one.
8495          * rsmmap_unmap will free up the private information for the requested
8496          * mapping.
8497          */
8498         if (seg->s_node != my_nodeid) {
8499                 rsm_mapinfo_t *p;
8500 
8501                 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8502                 if (p == NULL) {
8503                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8504                             "rsm_devmap: incorrect mapping info\n"));
8505                         return (RSMERR_MAP_FAILED);
8506                 }
8507                 err = devmap_devmem_setup(dhc, p->dip,
8508                     callbackops, p->dev_register,
8509                     dev_offset, cur_len, maxprot,
8510                     DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8511 
8512                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8513                     "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8514                     "off=%lx,len=%lx\n",
8515                     p->dip, p->dev_register, dev_offset, off, cur_len));
8516 
8517                 if (err != 0) {
8518                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8519                             "rsm_devmap: devmap_devmem_setup failed %d\n",
8520                             err));
8521                         return (RSMERR_MAP_FAILED);
8522                 }
8523                 /* cur_len is always an integral multiple pagesize */
8524                 ASSERT((cur_len & (PAGESIZE-1)) == 0);
8525                 *maplen = cur_len;
8526                 return (err);
8527 
8528         } else {
8529                 err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8530                     seg->s_cookie, off, len, maxprot,
8531                     DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8532                 if (err != 0) {
8533                         DBG_PRINTF((category, RSM_DEBUG,
8534                             "rsm_devmap: devmap_umem_setup failed %d\n",
8535                             err));
8536                         return (RSMERR_MAP_FAILED);
8537                 }
8538                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8539                     "rsm_devmap: loopback done\n"));
8540 
8541                 *maplen = ptob(btopr(len));
8542 
8543                 return (err);
8544         }
8545 }
8546 
8547 /*
8548  * We can use the devmap framework for mapping device memory to user space by
8549  * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8550  * processing calls this entry point and devmap_setup is called within this
8551  * function, which eventually calls rsm_devmap
8552  */
8553 static int
8554 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8555     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8556 {
8557         int                     error = 0;
8558         int                     old_state;
8559         minor_t                 rnum;
8560         rsmseg_t                *seg, *eseg;
8561         adapter_t               *adapter;
8562         rsm_import_share_t      *sharedp;
8563         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8564 
8565         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8566 
8567         /*
8568          * find segment
8569          */
8570         rnum = getminor(dev);
8571         seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8572 
8573         if (seg == NULL) {
8574                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8575                     "rsm_segmap done: invalid segment\n"));
8576                 return (EINVAL);
8577         }
8578 
8579         /*
8580          * the user is trying to map a resource that has not been
8581          * defined yet. The library uses this to map in the
8582          * barrier page.
8583          */
8584         if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8585                 rsmseglock_release(seg);
8586 
8587                 /*
8588                  * The mapping for the barrier page is identified
8589                  * by the special offset barrier_offset
8590                  */
8591 
8592                 if (off == (off_t)barrier_offset ||
8593                     len == (off_t)barrier_size) {
8594                         if (bar_cookie == NULL || bar_va == NULL) {
8595                                 DBG_PRINTF((category, RSM_DEBUG,
8596                                     "rsm_segmap: bar cookie/va is NULL\n"));
8597                                 return (EINVAL);
8598                         }
8599 
8600                         error = devmap_setup(dev, (offset_t)off, as, addrp,
8601                             (size_t)len, prot, maxprot, flags,  cred);
8602 
8603                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8604                             "rsm_segmap done: %d\n", error));
8605                         return (error);
8606                 } else {
8607                         DBG_PRINTF((category, RSM_DEBUG,
8608                             "rsm_segmap: bad offset/length\n"));
8609                         return (EINVAL);
8610                 }
8611         }
8612 
8613         /* Make sure you can only map imported segments */
8614         if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8615                 rsmseglock_release(seg);
8616                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8617                     "rsm_segmap done: not an import segment\n"));
8618                 return (EINVAL);
8619         }
8620         /* check means library is broken */
8621         ASSERT(seg->s_hdr.rsmrc_num == rnum);
8622 
8623         /* wait for the segment to become unquiesced */
8624         while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8625                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8626                         rsmseglock_release(seg);
8627                         DBG_PRINTF((category, RSM_DEBUG,
8628                             "rsm_segmap done: cv_wait INTR"));
8629                         return (ENODEV);
8630                 }
8631         }
8632 
8633         /* wait until segment leaves the mapping state */
8634         while (seg->s_state == RSM_STATE_MAPPING)
8635                 cv_wait(&seg->s_cv, &seg->s_lock);
8636 
8637         /*
8638          * we allow multiple maps of the same segment in the KA
8639          * and it works because we do an rsmpi map of the whole
8640          * segment during the first map and all the device mapping
8641          * information needed in rsm_devmap is in the mapinfo list.
8642          */
8643         if ((seg->s_state != RSM_STATE_CONNECT) &&
8644             (seg->s_state != RSM_STATE_ACTIVE)) {
8645                 rsmseglock_release(seg);
8646                 DBG_PRINTF((category, RSM_DEBUG,
8647                     "rsm_segmap done: segment not connected\n"));
8648                 return (ENODEV);
8649         }
8650 
8651         /*
8652          * Make sure we are not mapping a larger segment than what's
8653          * exported
8654          */
8655         if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8656                 rsmseglock_release(seg);
8657                 DBG_PRINTF((category, RSM_DEBUG,
8658                     "rsm_segmap done: off+len>seg size\n"));
8659                 return (ENXIO);
8660         }
8661 
8662         /*
8663          * Make sure we still have permission for the map operation.
8664          */
8665         maxprot = PROT_USER;
8666         if (seg->s_mode & RSM_PERM_READ) {
8667                 maxprot |= PROT_READ;
8668         }
8669 
8670         if (seg->s_mode & RSM_PERM_WRITE) {
8671                 maxprot |= PROT_WRITE;
8672         }
8673 
8674         if ((prot & maxprot) != prot) {
8675                 /* No permission */
8676                 rsmseglock_release(seg);
8677                 DBG_PRINTF((category, RSM_DEBUG,
8678                     "rsm_segmap done: no permission\n"));
8679                 return (EACCES);
8680         }
8681 
8682         old_state = seg->s_state;
8683 
8684         ASSERT(seg->s_share != NULL);
8685 
8686         rsmsharelock_acquire(seg);
8687 
8688         sharedp = seg->s_share;
8689 
8690         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8691             "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8692 
8693         if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8694             (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8695                 rsmsharelock_release(seg);
8696                 rsmseglock_release(seg);
8697                 DBG_PRINTF((category, RSM_DEBUG,
8698                     "rsm_segmap done:RSMSI_STATE %d invalid\n",
8699                     sharedp->rsmsi_state));
8700                 return (ENODEV);
8701         }
8702 
8703         /*
8704          * Do the map - since we want importers to share mappings
8705          * we do the rsmpi map for the whole segment
8706          */
8707         if (seg->s_node != my_nodeid) {
8708                 uint_t dev_register;
8709                 off_t dev_offset;
8710                 dev_info_t *dip;
8711                 size_t tmp_len;
8712                 size_t total_length_mapped = 0;
8713                 size_t length_to_map = seg->s_len;
8714                 off_t tmp_off = 0;
8715                 rsm_mapinfo_t *p;
8716 
8717                 /*
8718                  * length_to_map = seg->s_len is always an integral
8719                  * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8720                  * list is a multiple of PAGESIZE - RSMPI map ensures this
8721                  */
8722 
8723                 adapter = seg->s_adapter;
8724                 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8725                     sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8726 
8727                 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8728                         error = 0;
8729                         /* map the whole segment */
8730                         while (total_length_mapped < seg->s_len) {
8731                                 tmp_len = 0;
8732 
8733                                 error = adapter->rsmpi_ops->rsm_map(
8734                                     seg->s_handle.in, tmp_off,
8735                                     length_to_map, &tmp_len,
8736                                     &dip, &dev_register, &dev_offset,
8737                                     NULL, NULL);
8738 
8739                                 if (error != 0)
8740                                         break;
8741 
8742                                 /*
8743                                  * Store the mapping info obtained from rsm_map
8744                                  */
8745                                 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8746                                 p->dev_register = dev_register;
8747                                 p->dev_offset = dev_offset;
8748                                 p->dip = dip;
8749                                 p->individual_len = tmp_len;
8750                                 p->start_offset = tmp_off;
8751                                 p->next = sharedp->rsmsi_mapinfo;
8752                                 sharedp->rsmsi_mapinfo = p;
8753 
8754                                 total_length_mapped += tmp_len;
8755                                 length_to_map -= tmp_len;
8756                                 tmp_off += tmp_len;
8757                         }
8758                         seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8759 
8760                         if (error != RSM_SUCCESS) {
8761                                 /* Check if this is the the first rsm_map */
8762                                 if (sharedp->rsmsi_mapinfo != NULL) {
8763                                         /*
8764                                          * A single rsm_unmap undoes
8765                                          * multiple rsm_maps.
8766                                          */
8767                                         (void) seg->s_adapter->rsmpi_ops->
8768                                             rsm_unmap(sharedp->rsmsi_handle);
8769                                         rsm_free_mapinfo(sharedp->
8770                                             rsmsi_mapinfo);
8771                                 }
8772                                 sharedp->rsmsi_mapinfo = NULL;
8773                                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8774                                 rsmsharelock_release(seg);
8775                                 rsmseglock_release(seg);
8776                                 DBG_PRINTF((category, RSM_DEBUG,
8777                                     "rsm_segmap done: rsmpi map err %d\n",
8778                                     error));
8779                                 ASSERT(error != RSMERR_BAD_LENGTH &&
8780                                     error != RSMERR_BAD_MEM_ALIGNMENT &&
8781                                     error != RSMERR_BAD_SEG_HNDL);
8782                                 if (error == RSMERR_UNSUPPORTED_OPERATION)
8783                                         return (ENOTSUP);
8784                                 else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8785                                         return (EAGAIN);
8786                                 else if (error == RSMERR_CONN_ABORTED)
8787                                         return (ENODEV);
8788                                 else
8789                                         return (error);
8790                         } else {
8791                                 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8792                         }
8793                 } else {
8794                         seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8795                 }
8796 
8797                 sharedp->rsmsi_mapcnt++;
8798 
8799                 rsmsharelock_release(seg);
8800 
8801                 /* move to an intermediate mapping state */
8802                 seg->s_state = RSM_STATE_MAPPING;
8803                 rsmseglock_release(seg);
8804 
8805                 error = devmap_setup(dev, (offset_t)off, as, addrp,
8806                     len, prot, maxprot, flags, cred);
8807 
8808                 rsmseglock_acquire(seg);
8809                 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8810 
8811                 if (error == DDI_SUCCESS) {
8812                         seg->s_state = RSM_STATE_ACTIVE;
8813                 } else {
8814                         rsmsharelock_acquire(seg);
8815 
8816                         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8817 
8818                         sharedp->rsmsi_mapcnt--;
8819                         if (sharedp->rsmsi_mapcnt == 0) {
8820                                 /* unmap the shared RSMPI mapping */
8821                                 ASSERT(sharedp->rsmsi_handle != NULL);
8822                                 (void) adapter->rsmpi_ops->
8823                                     rsm_unmap(sharedp->rsmsi_handle);
8824                                 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8825                                 sharedp->rsmsi_mapinfo = NULL;
8826                                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8827                         }
8828 
8829                         rsmsharelock_release(seg);
8830                         seg->s_state = old_state;
8831                         DBG_PRINTF((category, RSM_ERR,
8832                             "rsm: devmap_setup failed %d\n", error));
8833                 }
8834                 cv_broadcast(&seg->s_cv);
8835                 rsmseglock_release(seg);
8836                 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8837                     error));
8838                 return (error);
8839         } else {
8840                 /*
8841                  * For loopback, the export segment mapping cookie (s_cookie)
8842                  * is also used as the s_cookie value for its import segments
8843                  * during mapping.
8844                  * Note that reference counting for s_cookie of the export
8845                  * segment is not required due to the following:
8846                  * We never have a case of the export segment being destroyed,
8847                  * leaving the import segments with a stale value for the
8848                  * s_cookie field, since a force disconnect is done prior to a
8849                  * destroy of an export segment. The force disconnect causes
8850                  * the s_cookie value to be reset to NULL. Also for the
8851                  * rsm_rebind operation, we change the s_cookie value of the
8852                  * export segment as well as of all its local (loopback)
8853                  * importers.
8854                  */
8855                 DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8856 
8857                 rsmsharelock_release(seg);
8858                 /*
8859                  * In order to maintain the lock ordering between the export
8860                  * and import segment locks, we need to acquire the export
8861                  * segment lock first and only then acquire the import
8862                  * segment lock.
8863                  * The above is necessary to avoid any deadlock scenarios
8864                  * with rsm_rebind which also acquires both the export
8865                  * and import segment locks in the above mentioned order.
8866                  * Based on code inspection, there seem to be no other
8867                  * situations in which both the export and import segment
8868                  * locks are acquired either in the same or opposite order
8869                  * as mentioned above.
8870                  * Thus in order to conform to the above lock order, we
8871                  * need to change the state of the import segment to
8872                  * RSM_STATE_MAPPING, release the lock. Once this is done we
8873                  * can now safely acquire the export segment lock first
8874                  * followed by the import segment lock which is as per
8875                  * the lock order mentioned above.
8876                  */
8877                 /* move to an intermediate mapping state */
8878                 seg->s_state = RSM_STATE_MAPPING;
8879                 rsmseglock_release(seg);
8880 
8881                 eseg = rsmexport_lookup(seg->s_key);
8882 
8883                 if (eseg == NULL) {
8884                         rsmseglock_acquire(seg);
8885                         /*
8886                          * Revert to old_state and signal any waiters
8887                          * The shared state is not changed
8888                          */
8889 
8890                         seg->s_state = old_state;
8891                         cv_broadcast(&seg->s_cv);
8892                         rsmseglock_release(seg);
8893                         DBG_PRINTF((category, RSM_DEBUG,
8894                             "rsm_segmap done: key %d not found\n", seg->s_key));
8895                         return (ENODEV);
8896                 }
8897 
8898                 rsmsharelock_acquire(seg);
8899                 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8900                     sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8901 
8902                 sharedp->rsmsi_mapcnt++;
8903                 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8904                 rsmsharelock_release(seg);
8905 
8906                 ASSERT(eseg->s_cookie != NULL);
8907 
8908                 /*
8909                  * It is not required or necessary to acquire the import
8910                  * segment lock here to change the value of s_cookie since
8911                  * no one will touch the import segment as long as it is
8912                  * in the RSM_STATE_MAPPING state.
8913                  */
8914                 seg->s_cookie = eseg->s_cookie;
8915 
8916                 rsmseglock_release(eseg);
8917 
8918                 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8919                     prot, maxprot, flags, cred);
8920 
8921                 rsmseglock_acquire(seg);
8922                 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8923                 if (error == 0) {
8924                         seg->s_state = RSM_STATE_ACTIVE;
8925                 } else {
8926                         rsmsharelock_acquire(seg);
8927 
8928                         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8929 
8930                         sharedp->rsmsi_mapcnt--;
8931                         if (sharedp->rsmsi_mapcnt == 0) {
8932                                 sharedp->rsmsi_mapinfo = NULL;
8933                                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8934                         }
8935                         rsmsharelock_release(seg);
8936                         seg->s_state = old_state;
8937                         seg->s_cookie = NULL;
8938                 }
8939                 cv_broadcast(&seg->s_cv);
8940                 rsmseglock_release(seg);
8941                 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8942                     "rsm_segmap done: %d\n", error));
8943                 return (error);
8944         }
8945 }
8946 
8947 int
8948 rsmka_null_seg_create(
8949     rsm_controller_handle_t argcp,
8950     rsm_memseg_export_handle_t *handle,
8951     size_t size,
8952     uint_t flags,
8953     rsm_memory_local_t *memory,
8954     rsm_resource_callback_t callback,
8955     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
8956 {
8957         return (RSM_SUCCESS);
8958 }
8959 
8960 
8961 int
8962 rsmka_null_seg_destroy(
8963     rsm_memseg_export_handle_t argmemseg        /*ARGSUSED*/)
8964 {
8965         return (RSM_SUCCESS);
8966 }
8967 
8968 
8969 int
8970 rsmka_null_bind(
8971     rsm_memseg_export_handle_t argmemseg,
8972     off_t offset,
8973     rsm_memory_local_t *argmemory,
8974     rsm_resource_callback_t callback,
8975     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
8976 {
8977         return (RSM_SUCCESS);
8978 }
8979 
8980 
8981 int
8982 rsmka_null_unbind(
8983     rsm_memseg_export_handle_t argmemseg,
8984     off_t offset,
8985     size_t length       /*ARGSUSED*/)
8986 {
8987         return (DDI_SUCCESS);
8988 }
8989 
8990 int
8991 rsmka_null_rebind(
8992     rsm_memseg_export_handle_t argmemseg,
8993     off_t offset,
8994     rsm_memory_local_t *memory,
8995     rsm_resource_callback_t callback,
8996     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
8997 {
8998         return (RSM_SUCCESS);
8999 }
9000 
9001 int
9002 rsmka_null_publish(
9003     rsm_memseg_export_handle_t argmemseg,
9004     rsm_access_entry_t access_list[],
9005     uint_t access_list_length,
9006     rsm_memseg_id_t segment_id,
9007     rsm_resource_callback_t callback,
9008     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
9009 {
9010         return (RSM_SUCCESS);
9011 }
9012 
9013 
9014 int
9015 rsmka_null_republish(
9016     rsm_memseg_export_handle_t memseg,
9017     rsm_access_entry_t access_list[],
9018     uint_t access_list_length,
9019     rsm_resource_callback_t callback,
9020     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
9021 {
9022         return (RSM_SUCCESS);
9023 }
9024 
9025 int
9026 rsmka_null_unpublish(
9027     rsm_memseg_export_handle_t argmemseg        /*ARGSUSED*/)
9028 {
9029         return (RSM_SUCCESS);
9030 }
9031 
9032 
9033 void
9034 rsmka_init_loopback()
9035 {
9036         rsm_ops_t       *ops = &null_rsmpi_ops;
9037         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9038 
9039         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9040             "rsmka_init_loopback enter\n"));
9041 
9042         /* initialize null ops vector */
9043         ops->rsm_seg_create = rsmka_null_seg_create;
9044         ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9045         ops->rsm_bind = rsmka_null_bind;
9046         ops->rsm_unbind = rsmka_null_unbind;
9047         ops->rsm_rebind = rsmka_null_rebind;
9048         ops->rsm_publish = rsmka_null_publish;
9049         ops->rsm_unpublish = rsmka_null_unpublish;
9050         ops->rsm_republish = rsmka_null_republish;
9051 
9052         /* initialize attributes for loopback adapter */
9053         loopback_attr.attr_name = loopback_str;
9054         loopback_attr.attr_page_size = 0x8; /* 8K */
9055 
9056         /* initialize loopback adapter */
9057         loopback_adapter.rsm_attr = loopback_attr;
9058         loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9059         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9060             "rsmka_init_loopback done\n"));
9061 }
9062 
9063 /* ************** DR functions ********************************** */
9064 static void
9065 rsm_quiesce_exp_seg(rsmresource_t *resp)
9066 {
9067         int             recheck_state;
9068         rsmseg_t        *segp = (rsmseg_t *)resp;
9069         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9070         DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9071 
9072         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9073             "%s enter: key=%u\n", function, segp->s_key));
9074 
9075         rsmseglock_acquire(segp);
9076         do {
9077                 recheck_state = 0;
9078                 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9079                     (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9080                     (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9081                     (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9082                         rsmseglock_release(segp);
9083                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9084                             "%s done:state =%d\n", function,
9085                             segp->s_state));
9086                         return;
9087                 }
9088 
9089                 if (segp->s_state == RSM_STATE_NEW) {
9090                         segp->s_state = RSM_STATE_NEW_QUIESCED;
9091                         rsmseglock_release(segp);
9092                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9093                             "%s done:state =%d\n", function,
9094                             segp->s_state));
9095                         return;
9096                 }
9097 
9098                 if (segp->s_state == RSM_STATE_BIND) {
9099                         /* unbind */
9100                         (void) rsm_unbind_pages(segp);
9101                         segp->s_state = RSM_STATE_BIND_QUIESCED;
9102                         rsmseglock_release(segp);
9103                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9104                             "%s done:state =%d\n", function,
9105                             segp->s_state));
9106                         return;
9107                 }
9108 
9109                 if (segp->s_state == RSM_STATE_EXPORT) {
9110                         /*
9111                          * wait for putv/getv to complete if the segp is
9112                          * a local memory handle
9113                          */
9114                         while ((segp->s_state == RSM_STATE_EXPORT) &&
9115                             (segp->s_rdmacnt != 0)) {
9116                                 cv_wait(&segp->s_cv, &segp->s_lock);
9117                         }
9118 
9119                         if (segp->s_state != RSM_STATE_EXPORT) {
9120                                 /*
9121                                  * state changed need to see what it
9122                                  * should be changed to.
9123                                  */
9124                                 recheck_state = 1;
9125                                 continue;
9126                         }
9127 
9128                         segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9129                         rsmseglock_release(segp);
9130                         /*
9131                          * send SUSPEND messages - currently it will be
9132                          * done at the end
9133                          */
9134                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9135                             "%s done:state =%d\n", function,
9136                             segp->s_state));
9137                         return;
9138                 }
9139         } while (recheck_state);
9140 
9141         rsmseglock_release(segp);
9142 }
9143 
9144 static void
9145 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9146 {
9147         int                     ret;
9148         rsmseg_t                *segp = (rsmseg_t *)resp;
9149         rsmapi_access_entry_t   *acl;
9150         rsm_access_entry_t      *rsmpi_acl;
9151         int                     acl_len;
9152         int                     create_flags = 0;
9153         struct buf              *xbuf;
9154         rsm_memory_local_t      mem;
9155         adapter_t               *adapter;
9156         dev_t                   sdev = 0;
9157         rsm_resource_callback_t callback_flag;
9158         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9159         DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9160 
9161         rsmseglock_acquire(segp);
9162 
9163         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9164             "%s enter: key=%u, state=%d\n", function, segp->s_key,
9165             segp->s_state));
9166 
9167         if ((segp->s_state == RSM_STATE_NEW) ||
9168             (segp->s_state == RSM_STATE_BIND) ||
9169             (segp->s_state == RSM_STATE_EXPORT)) {
9170                 rsmseglock_release(segp);
9171                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9172                     function, segp->s_state));
9173                 return;
9174         }
9175 
9176         if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9177                 segp->s_state = RSM_STATE_NEW;
9178                 cv_broadcast(&segp->s_cv);
9179                 rsmseglock_release(segp);
9180                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9181                     function, segp->s_state));
9182                 return;
9183         }
9184 
9185         if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9186                 /* bind the segment */
9187                 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9188                     segp->s_len, segp->s_proc);
9189                 if (ret == RSM_SUCCESS) { /* bind successful */
9190                         segp->s_state = RSM_STATE_BIND;
9191                 } else { /* bind failed - resource unavailable */
9192                         segp->s_state = RSM_STATE_NEW;
9193                 }
9194                 cv_broadcast(&segp->s_cv);
9195                 rsmseglock_release(segp);
9196                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9197                     "%s done: bind_qscd bind = %d\n", function, ret));
9198                 return;
9199         }
9200 
9201         while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9202                 /* wait for the segment to move to EXPORT_QUIESCED state */
9203                 cv_wait(&segp->s_cv, &segp->s_lock);
9204         }
9205 
9206         if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9207                 /* bind the segment */
9208                 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9209                     segp->s_len, segp->s_proc);
9210 
9211                 if (ret != RSM_SUCCESS) {
9212                         /* bind failed - resource unavailable */
9213                         acl_len = segp->s_acl_len;
9214                         acl = segp->s_acl;
9215                         rsmpi_acl = segp->s_acl_in;
9216                         segp->s_acl_len = 0;
9217                         segp->s_acl = NULL;
9218                         segp->s_acl_in = NULL;
9219                         rsmseglock_release(segp);
9220 
9221                         rsmexport_rm(segp);
9222                         rsmacl_free(acl, acl_len);
9223                         rsmpiacl_free(rsmpi_acl, acl_len);
9224 
9225                         rsmseglock_acquire(segp);
9226                         segp->s_state = RSM_STATE_NEW;
9227                         cv_broadcast(&segp->s_cv);
9228                         rsmseglock_release(segp);
9229                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9230                             "%s done: exp_qscd bind failed = %d\n",
9231                             function, ret));
9232                         return;
9233                 }
9234                 /*
9235                  * publish the segment
9236                  * if  successful
9237                  *   segp->s_state = RSM_STATE_EXPORT;
9238                  * else failed
9239                  *   segp->s_state = RSM_STATE_BIND;
9240                  */
9241 
9242                 /* check whether it is a local_memory_handle */
9243                 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9244                         if ((segp->s_acl[0].ae_node == my_nodeid) &&
9245                             (segp->s_acl[0].ae_permission == 0)) {
9246                                 segp->s_state = RSM_STATE_EXPORT;
9247                                 cv_broadcast(&segp->s_cv);
9248                                 rsmseglock_release(segp);
9249                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9250                                     "%s done:exp_qscd\n", function));
9251                                 return;
9252                         }
9253                 }
9254                 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9255                     sdev, 0, NULL, DDI_UMEM_SLEEP);
9256                 ASSERT(xbuf != NULL);
9257 
9258                 mem.ms_type = RSM_MEM_BUF;
9259                 mem.ms_bp = xbuf;
9260 
9261                 adapter = segp->s_adapter;
9262 
9263                 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9264                         create_flags = RSM_ALLOW_UNBIND_REBIND;
9265                 }
9266 
9267                 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9268                         callback_flag  = RSM_RESOURCE_DONTWAIT;
9269                 } else {
9270                         callback_flag  = RSM_RESOURCE_SLEEP;
9271                 }
9272 
9273                 ret = adapter->rsmpi_ops->rsm_seg_create(
9274                     adapter->rsmpi_handle, &segp->s_handle.out,
9275                     segp->s_len, create_flags, &mem,
9276                     callback_flag, NULL);
9277 
9278                 if (ret != RSM_SUCCESS) {
9279                         acl_len = segp->s_acl_len;
9280                         acl = segp->s_acl;
9281                         rsmpi_acl = segp->s_acl_in;
9282                         segp->s_acl_len = 0;
9283                         segp->s_acl = NULL;
9284                         segp->s_acl_in = NULL;
9285                         rsmseglock_release(segp);
9286 
9287                         rsmexport_rm(segp);
9288                         rsmacl_free(acl, acl_len);
9289                         rsmpiacl_free(rsmpi_acl, acl_len);
9290 
9291                         rsmseglock_acquire(segp);
9292                         segp->s_state = RSM_STATE_BIND;
9293                         cv_broadcast(&segp->s_cv);
9294                         rsmseglock_release(segp);
9295                         DBG_PRINTF((category, RSM_ERR,
9296                             "%s done: exp_qscd create failed = %d\n",
9297                             function, ret));
9298                         return;
9299                 }
9300 
9301                 ret = adapter->rsmpi_ops->rsm_publish(
9302                     segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9303                     segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9304 
9305                 if (ret != RSM_SUCCESS) {
9306                         acl_len = segp->s_acl_len;
9307                         acl = segp->s_acl;
9308                         rsmpi_acl = segp->s_acl_in;
9309                         segp->s_acl_len = 0;
9310                         segp->s_acl = NULL;
9311                         segp->s_acl_in = NULL;
9312                         adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9313                         rsmseglock_release(segp);
9314 
9315                         rsmexport_rm(segp);
9316                         rsmacl_free(acl, acl_len);
9317                         rsmpiacl_free(rsmpi_acl, acl_len);
9318 
9319                         rsmseglock_acquire(segp);
9320                         segp->s_state = RSM_STATE_BIND;
9321                         cv_broadcast(&segp->s_cv);
9322                         rsmseglock_release(segp);
9323                         DBG_PRINTF((category, RSM_ERR,
9324                             "%s done: exp_qscd publish failed = %d\n",
9325                             function, ret));
9326                         return;
9327                 }
9328 
9329                 segp->s_state = RSM_STATE_EXPORT;
9330                 cv_broadcast(&segp->s_cv);
9331                 rsmseglock_release(segp);
9332                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9333                     function));
9334                 return;
9335         }
9336 
9337         rsmseglock_release(segp);
9338 
9339         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9340 }
9341 
9342 static void
9343 rsm_quiesce_imp_seg(rsmresource_t *resp)
9344 {
9345         rsmseg_t        *segp = (rsmseg_t *)resp;
9346         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9347         DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9348 
9349         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9350             "%s enter: key=%u\n", function, segp->s_key));
9351 
9352         rsmseglock_acquire(segp);
9353         segp->s_flags |= RSM_DR_INPROGRESS;
9354 
9355         while (segp->s_rdmacnt != 0) {
9356                 /* wait for the RDMA to complete */
9357                 cv_wait(&segp->s_cv, &segp->s_lock);
9358         }
9359 
9360         rsmseglock_release(segp);
9361 
9362         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9363 
9364 }
9365 
9366 static void
9367 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9368 {
9369         rsmseg_t        *segp = (rsmseg_t *)resp;
9370         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9371         DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9372 
9373         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9374             "%s enter: key=%u\n", function, segp->s_key));
9375 
9376         rsmseglock_acquire(segp);
9377 
9378         segp->s_flags &= ~RSM_DR_INPROGRESS;
9379         /* wake up any waiting putv/getv ops */
9380         cv_broadcast(&segp->s_cv);
9381 
9382         rsmseglock_release(segp);
9383 
9384         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9385 
9386 
9387 }
9388 
9389 static void
9390 rsm_process_exp_seg(rsmresource_t *resp, int event)
9391 {
9392         if (event == RSM_DR_QUIESCE)
9393                 rsm_quiesce_exp_seg(resp);
9394         else /* UNQUIESCE */
9395                 rsm_unquiesce_exp_seg(resp);
9396 }
9397 
9398 static void
9399 rsm_process_imp_seg(rsmresource_t *resp, int event)
9400 {
9401         if (event == RSM_DR_QUIESCE)
9402                 rsm_quiesce_imp_seg(resp);
9403         else /* UNQUIESCE */
9404                 rsm_unquiesce_imp_seg(resp);
9405 }
9406 
9407 static void
9408 rsm_dr_process_local_segments(int event)
9409 {
9410 
9411         int i, j;
9412         rsmresource_blk_t       *blk;
9413         rsmresource_t           *p;
9414         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9415 
9416         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9417             "rsm_dr_process_local_segments enter\n"));
9418 
9419         /* iterate through the resource structure */
9420 
9421         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9422 
9423         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9424                 blk = rsm_resource.rsmrc_root[i];
9425                 if (blk != NULL) {
9426                         for (j = 0; j < RSMRC_BLKSZ; j++) {
9427                                 p = blk->rsmrcblk_blks[j];
9428                                 if ((p != NULL) && (p != RSMRC_RESERVED)) {
9429                                         /* valid resource */
9430                                         if (p->rsmrc_type ==
9431                                             RSM_RESOURCE_EXPORT_SEGMENT)
9432                                                 rsm_process_exp_seg(p, event);
9433                                         else if (p->rsmrc_type ==
9434                                             RSM_RESOURCE_IMPORT_SEGMENT)
9435                                                 rsm_process_imp_seg(p, event);
9436                                 }
9437                         }
9438                 }
9439         }
9440 
9441         rw_exit(&rsm_resource.rsmrc_lock);
9442 
9443         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9444             "rsm_dr_process_local_segments done\n"));
9445 }
9446 
9447 /* *************** DR callback functions ************ */
9448 static void
9449 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9450 {
9451         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9452         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9453             "rsm_dr_callback_post_add is a no-op\n"));
9454         /* Noop */
9455 }
9456 
9457 static int
9458 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9459 {
9460         int     recheck_state = 0;
9461         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9462 
9463         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9464             "rsm_dr_callback_pre_del enter\n"));
9465 
9466         mutex_enter(&rsm_drv_data.drv_lock);
9467 
9468         do {
9469                 recheck_state = 0;
9470                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9471                     "rsm_dr_callback_pre_del:state=%d\n",
9472                     rsm_drv_data.drv_state));
9473 
9474                 switch (rsm_drv_data.drv_state) {
9475                 case RSM_DRV_NEW:
9476                         /*
9477                          * The state should usually never be RSM_DRV_NEW
9478                          * since in this state the callbacks have not yet
9479                          * been registered. So, ASSERT.
9480                          */
9481                         ASSERT(0);
9482                         return (0);
9483                 case RSM_DRV_REG_PROCESSING:
9484                         /*
9485                          * The driver is in the process of registering
9486                          * with the DR framework. So, wait till the
9487                          * registration process is complete.
9488                          */
9489                         recheck_state = 1;
9490                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9491                         break;
9492                 case RSM_DRV_UNREG_PROCESSING:
9493                         /*
9494                          * If the state is RSM_DRV_UNREG_PROCESSING, the
9495                          * module is in the process of detaching and
9496                          * unregistering the callbacks from the DR
9497                          * framework. So, simply return.
9498                          */
9499                         mutex_exit(&rsm_drv_data.drv_lock);
9500                         DBG_PRINTF((category, RSM_DEBUG,
9501                             "rsm_dr_callback_pre_del:"
9502                             "pre-del on NEW/UNREG\n"));
9503                         return (0);
9504                 case RSM_DRV_OK:
9505                         rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9506                         break;
9507                 case RSM_DRV_PREDEL_STARTED:
9508                         /* FALLTHRU */
9509                 case RSM_DRV_PREDEL_COMPLETED:
9510                         /* FALLTHRU */
9511                 case RSM_DRV_POSTDEL_IN_PROGRESS:
9512                         recheck_state = 1;
9513                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9514                         break;
9515                 case RSM_DRV_DR_IN_PROGRESS:
9516                         rsm_drv_data.drv_memdel_cnt++;
9517                         mutex_exit(&rsm_drv_data.drv_lock);
9518                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9519                             "rsm_dr_callback_pre_del done\n"));
9520                         return (0);
9521                         /* break; */
9522                 default:
9523                         ASSERT(0);
9524                         break;
9525                 }
9526 
9527         } while (recheck_state);
9528 
9529         rsm_drv_data.drv_memdel_cnt++;
9530 
9531         mutex_exit(&rsm_drv_data.drv_lock);
9532 
9533         /* Do all the quiescing stuff here */
9534         DBG_PRINTF((category, RSM_DEBUG,
9535             "rsm_dr_callback_pre_del: quiesce things now\n"));
9536 
9537         rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9538 
9539         /*
9540          * now that all local segments have been quiesced lets inform
9541          * the importers
9542          */
9543         rsm_send_suspend();
9544 
9545         /*
9546          * In response to the suspend message the remote node(s) will process
9547          * the segments and send a suspend_complete message. Till all
9548          * the nodes send the suspend_complete message we wait in the
9549          * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9550          * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9551          */
9552         mutex_enter(&rsm_drv_data.drv_lock);
9553 
9554         while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9555                 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9556         }
9557 
9558         ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9559 
9560         rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9561         cv_broadcast(&rsm_drv_data.drv_cv);
9562 
9563         mutex_exit(&rsm_drv_data.drv_lock);
9564 
9565         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9566             "rsm_dr_callback_pre_del done\n"));
9567 
9568         return (0);
9569 }
9570 
9571 static void
9572 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9573 {
9574         int     recheck_state = 0;
9575         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9576 
9577         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9578             "rsm_dr_callback_post_del enter\n"));
9579 
9580         mutex_enter(&rsm_drv_data.drv_lock);
9581 
9582         do {
9583                 recheck_state = 0;
9584                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9585                     "rsm_dr_callback_post_del:state=%d\n",
9586                     rsm_drv_data.drv_state));
9587 
9588                 switch (rsm_drv_data.drv_state) {
9589                 case RSM_DRV_NEW:
9590                         /*
9591                          * The driver state cannot not be RSM_DRV_NEW
9592                          * since in this state the callbacks have not
9593                          * yet been registered.
9594                          */
9595                         ASSERT(0);
9596                         return;
9597                 case RSM_DRV_REG_PROCESSING:
9598                         /*
9599                          * The driver is in the process of registering with
9600                          * the DR framework. Wait till the registration is
9601                          * complete.
9602                          */
9603                         recheck_state = 1;
9604                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9605                         break;
9606                 case RSM_DRV_UNREG_PROCESSING:
9607                         /*
9608                          * RSM_DRV_UNREG_PROCESSING state means the module
9609                          * is detaching and unregistering the callbacks
9610                          * from the DR framework. So simply return.
9611                          */
9612                         /* FALLTHRU */
9613                 case RSM_DRV_OK:
9614                         /*
9615                          * RSM_DRV_OK means we missed the pre-del
9616                          * corresponding to this post-del coz we had not
9617                          * registered yet, so simply return.
9618                          */
9619                         mutex_exit(&rsm_drv_data.drv_lock);
9620                         DBG_PRINTF((category, RSM_DEBUG,
9621                             "rsm_dr_callback_post_del:"
9622                             "post-del on OK/UNREG\n"));
9623                         return;
9624                         /* break; */
9625                 case RSM_DRV_PREDEL_STARTED:
9626                         /* FALLTHRU */
9627                 case RSM_DRV_PREDEL_COMPLETED:
9628                         /* FALLTHRU */
9629                 case RSM_DRV_POSTDEL_IN_PROGRESS:
9630                         recheck_state = 1;
9631                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9632                         break;
9633                 case RSM_DRV_DR_IN_PROGRESS:
9634                         rsm_drv_data.drv_memdel_cnt--;
9635                         if (rsm_drv_data.drv_memdel_cnt > 0) {
9636                                 mutex_exit(&rsm_drv_data.drv_lock);
9637                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9638                                     "rsm_dr_callback_post_del done:\n"));
9639                                 return;
9640                         }
9641                         rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9642                         break;
9643                 default:
9644                         ASSERT(0);
9645                         return;
9646                         /* break; */
9647                 }
9648         } while (recheck_state);
9649 
9650         mutex_exit(&rsm_drv_data.drv_lock);
9651 
9652         /* Do all the unquiescing stuff here */
9653         DBG_PRINTF((category, RSM_DEBUG,
9654             "rsm_dr_callback_post_del: unquiesce things now\n"));
9655 
9656         rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9657 
9658         /*
9659          * now that all local segments have been unquiesced lets inform
9660          * the importers
9661          */
9662         rsm_send_resume();
9663 
9664         mutex_enter(&rsm_drv_data.drv_lock);
9665 
9666         rsm_drv_data.drv_state = RSM_DRV_OK;
9667 
9668         cv_broadcast(&rsm_drv_data.drv_cv);
9669 
9670         mutex_exit(&rsm_drv_data.drv_lock);
9671 
9672         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9673             "rsm_dr_callback_post_del done\n"));
9674 
9675         return;
9676 
9677 }