1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2012 Milan Jurik. All rights reserved.
  25  */
  26 
  27 
  28 /*
  29  * Overview of the RSM Kernel Agent:
  30  * ---------------------------------
  31  *
  32  * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
  33  * kernel agent is a pseudo device driver which makes use of the RSMPI
  34  * interface on behalf of the RSMAPI user library.
  35  *
  36  * The kernel agent functionality can be categorized into the following
  37  * components:
  38  * 1. Driver Infrastructure
  39  * 2. Export/Import Segment Management
  40  * 3. Internal resource allocation/deallocation
  41  *
  42  * The driver infrastructure includes the basic module loading entry points
  43  * like _init, _info, _fini to load, unload and report information about
  44  * the driver module. The driver infrastructure also includes the
  45  * autoconfiguration entry points namely, attach, detach and getinfo for
  46  * the device autoconfiguration.
  47  *
  48  * The kernel agent is a pseudo character device driver and exports
  49  * a cb_ops structure which defines the driver entry points for character
  50  * device access. This includes the open and close entry points. The
  51  * other entry points provided include ioctl, devmap and segmap and chpoll.
  52  * read and write entry points are not used since the device is memory
  53  * mapped. Also ddi_prop_op is used for the prop_op entry point.
  54  *
  55  * The ioctl entry point supports a number of commands, which are used by
  56  * the RSMAPI library in order to export and import segments. These
  57  * commands include commands for binding and rebinding the physical pages
  58  * allocated to the virtual address range, publishing the export segment,
  59  * unpublishing and republishing an export segment, creating an
  60  * import segment and a virtual connection from this import segment to
  61  * an export segment, performing scatter-gather data transfer, barrier
  62  * operations.
  63  *
  64  *
  65  * Export and Import segments:
  66  * ---------------------------
  67  *
  68  * In order to create an RSM export segment a process allocates a range in its
  69  * virtual address space for the segment using standard Solaris interfaces.
  70  * The process then calls RSMAPI, which in turn makes an ioctl call to the
  71  * RSM kernel agent for an allocation of physical memory pages and for
  72  * creation of the export segment by binding these pages to the virtual
  73  * address range. These pages are locked in memory so that remote accesses
  74  * are always applied to the correct page. Then the RSM segment is published,
  75  * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
  76  * is assigned to it.
  77  *
  78  * In order to import a published RSM segment, RSMAPI creates an import
  79  * segment and forms a virtual connection across the interconnect to the
  80  * export segment, via an ioctl into the kernel agent with the connect
  81  * command. The import segment setup is completed by mapping the
  82  * local device memory into the importers virtual address space. The
  83  * mapping of the import segment is handled by the segmap/devmap
  84  * infrastructure described as follows.
  85  *
  86  * Segmap and Devmap interfaces:
  87  *
  88  * The RSM kernel agent allows device memory to be directly accessed by user
  89  * threads via memory mapping. In order to do so, the RSM kernel agent
  90  * supports the devmap and segmap entry points.
  91  *
  92  * The segmap entry point(rsm_segmap) is responsible for setting up a memory
  93  * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
  94  * responsible for exporting the device memory to the user applications.
  95  * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
  96  * control is transfered to the devmap_setup call which calls rsm_devmap.
  97  *
  98  * rsm_devmap validates the user mapping to the device or kernel memory
  99  * and passes the information to the system for setting up the mapping. The
 100  * actual setting up of the mapping is done by devmap_devmem_setup(for
 101  * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
 102  * registered for device context management via the devmap_devmem_setup
 103  * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
 104  * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
 105  * is created, a mapping is freed, a mapping is accessed or an existing
 106  * mapping is duplicated respectively. These callbacks allow the RSM kernel
 107  * agent to maintain state information associated with the mappings.
 108  * The state information is mainly in the form of a cookie list for the import
 109  * segment for which mapping has been done.
 110  *
 111  * Forced disconnect of import segments:
 112  *
 113  * When an exported segment is unpublished, the exporter sends a forced
 114  * disconnect message to all its importers. The importer segments are
 115  * unloaded and disconnected. This involves unloading the original
 116  * mappings and remapping to a preallocated kernel trash page. This is
 117  * done by devmap_umem_remap. The trash/dummy page is a kernel page,
 118  * preallocated by the kernel agent during attach using ddi_umem_alloc with
 119  * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
 120  * due to unloading of the original mappings.
 121  *
 122  * Additionally every segment has a mapping generation number associated
 123  * with it. This is an entry in the barrier generation page, created
 124  * during attach time. This mapping generation number for the import
 125  * segments is incremented on a force disconnect to notify the application
 126  * of the force disconnect. On this notification, the application needs
 127  * to reconnect the segment to establish a new legitimate mapping.
 128  *
 129  *
 130  * Locks used in the kernel agent:
 131  * -------------------------------
 132  *
 133  * The kernel agent uses a variety of mutexes and condition variables for
 134  * mutual exclusion of the shared data structures and for synchronization
 135  * between the various threads. Some of the locks are described as follows.
 136  *
 137  * Each resource structure, which represents either an export/import segment
 138  * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
 139  * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
 140  * rsmseglock_acquire and rsmseglock_release macros. An additional
 141  * lock called the rsmsi_lock is used for the shared import data structure
 142  * that is relevant for resources representing import segments. There is
 143  * also a condition variable associated with the resource called s_cv. This
 144  * is used to wait for events like the segment state change etc.
 145  *
 146  * The resource structures are allocated from a pool of resource structures,
 147  * called rsm_resource. This pool is protected via a reader-writer lock,
 148  * called rsmrc_lock.
 149  *
 150  * There are two separate hash tables, one for the export segments and
 151  * one for the import segments. The export segments are inserted into the
 152  * export segment hash table only after they have been published and the
 153  * import segments are inserted in the import segments list only after they
 154  * have successfully connected to an exported segment. These tables are
 155  * protected via reader-writer locks.
 156  *
 157  * Debug Support in the kernel agent:
 158  * ----------------------------------
 159  *
 160  * Debugging support in the kernel agent is provided by the following
 161  * macros.
 162  *
 163  * DBG_PRINTF((category, level, message)) is a macro which logs a debug
 164  * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
 165  * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
 166  * on the definition of the category and level. All messages that belong to
 167  * the specified category(rsmdbg_category) and are of an equal or greater
 168  * severity than the specified level(rsmdbg_level) are logged. The message
 169  * is a string which uses the same formatting rules as the strings used in
 170  * printf.
 171  *
 172  * The category defines which component of the kernel agent has logged this
 173  * message. There are a number of categories that have been defined such as
 174  * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
 175  * DBG_ADDCATEGORY is used to add in another category to the currently
 176  * specified category value so that the component using this new category
 177  * can also effectively log debug messages. Thus, the category of a specific
 178  * message is some combination of the available categories and we can define
 179  * sub-categories if we want a finer level of granularity.
 180  *
 181  * The level defines the severity of the message. Different level values are
 182  * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
 183  * the least severe(debug level is 0).
 184  *
 185  * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
 186  * variable or a string respectively.
 187  *
 188  *
 189  * NOTES:
 190  *
 191  * Special Fork and Exec Handling:
 192  * -------------------------------
 193  *
 194  * The backing physical pages of an exported segment are always locked down.
 195  * Thus, there are two cases in which a process having exported segments
 196  * will cause a cpu to hang: (1) the process invokes exec; (2) a process
 197  * forks and invokes exit before the duped file descriptors for the export
 198  * segments are closed in the child process. The hang is caused because the
 199  * address space release algorithm in Solaris VM subsystem is based on a
 200  * non-blocking loop which does not terminate while segments are locked
 201  * down. In addition to this, Solaris VM subsystem lacks a callback
 202  * mechanism to the rsm kernel agent to allow unlocking these export
 203  * segment pages.
 204  *
 205  * In order to circumvent this problem, the kernel agent does the following.
 206  * The Solaris VM subsystem keeps memory segments in increasing order of
 207  * virtual addressses. Thus a special page(special_exit_offset) is allocated
 208  * by the kernel agent and is mmapped into the heap area of the process address
 209  * space(the mmap is done by the RSMAPI library). During the mmap processing
 210  * of this special page by the devmap infrastructure, a callback(the same
 211  * devmap context management callbacks discussed above) is registered for an
 212  * unmap.
 213  *
 214  * As discussed above, this page is processed by the Solaris address space
 215  * release code before any of the exported segments pages(which are allocated
 216  * from high memory). It is during this processing that the unmap callback gets
 217  * called and this callback is responsible for force destroying the exported
 218  * segments and thus eliminating the problem of locked pages.
 219  *
 220  * Flow-control:
 221  * ------------
 222  *
 223  * A credit based flow control algorithm is used for messages whose
 224  * processing cannot be done in the interrupt context because it might
 225  * involve invoking rsmpi calls, or might take a long time to complete
 226  * or might need to allocate resources. The algorithm operates on a per
 227  * path basis. To send a message the pathend needs to have a credit and
 228  * it consumes one for every message that is flow controlled. On the
 229  * receiving pathend the message is put on a msgbuf_queue and a task is
 230  * dispatched on the worker thread - recv_taskq where it is processed.
 231  * After processing the message, the receiving pathend dequeues the message,
 232  * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
 233  * credits to the sender pathend.
 234  *
 235  * RSM_DRTEST:
 236  * -----------
 237  *
 238  * This is used to enable the DR testing using a test driver on test
 239  * platforms which do not supported DR.
 240  *
 241  */
 242 
 243 #include <sys/types.h>
 244 #include <sys/param.h>
 245 #include <sys/user.h>
 246 #include <sys/buf.h>
 247 #include <sys/systm.h>
 248 #include <sys/cred.h>
 249 #include <sys/vm.h>
 250 #include <sys/uio.h>
 251 #include <vm/seg.h>
 252 #include <vm/page.h>
 253 #include <sys/stat.h>
 254 
 255 #include <sys/time.h>
 256 #include <sys/errno.h>
 257 
 258 #include <sys/file.h>
 259 #include <sys/uio.h>
 260 #include <sys/proc.h>
 261 #include <sys/mman.h>
 262 #include <sys/open.h>
 263 #include <sys/atomic.h>
 264 #include <sys/mem_config.h>
 265 
 266 
 267 #include <sys/ddi.h>
 268 #include <sys/devops.h>
 269 #include <sys/ddidevmap.h>
 270 #include <sys/sunddi.h>
 271 #include <sys/esunddi.h>
 272 #include <sys/ddi_impldefs.h>
 273 
 274 #include <sys/kmem.h>
 275 #include <sys/conf.h>
 276 #include <sys/devops.h>
 277 #include <sys/ddi_impldefs.h>
 278 
 279 #include <sys/modctl.h>
 280 
 281 #include <sys/policy.h>
 282 #include <sys/types.h>
 283 #include <sys/conf.h>
 284 #include <sys/param.h>
 285 
 286 #include <sys/taskq.h>
 287 
 288 #include <sys/rsm/rsm_common.h>
 289 #include <sys/rsm/rsmapi_common.h>
 290 #include <sys/rsm/rsm.h>
 291 #include <rsm_in.h>
 292 #include <sys/rsm/rsmka_path_int.h>
 293 #include <sys/rsm/rsmpi.h>
 294 
 295 #include <sys/modctl.h>
 296 #include <sys/debug.h>
 297 
 298 #include <sys/tuneable.h>
 299 
 300 #ifdef  RSM_DRTEST
 301 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
 302                 void *arg);
 303 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
 304                 void *arg);
 305 #endif
 306 
 307 extern void dbg_printf(int category, int level, char *fmt, ...);
 308 extern void rsmka_pathmanager_init();
 309 extern void rsmka_pathmanager_cleanup();
 310 extern void rele_sendq_token(sendq_token_t *);
 311 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
 312 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
 313 extern int rsmka_topology_ioctl(caddr_t, int, int);
 314 
 315 extern pri_t maxclsyspri;
 316 extern work_queue_t work_queue;
 317 extern kmutex_t ipc_info_lock;
 318 extern kmutex_t ipc_info_cvlock;
 319 extern kcondvar_t ipc_info_cv;
 320 extern kmutex_t path_hold_cvlock;
 321 extern kcondvar_t path_hold_cv;
 322 
 323 extern kmutex_t rsmka_buf_lock;
 324 
 325 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
 326 extern adapter_t *rsmka_lookup_adapter(char *, int);
 327 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
 328 extern boolean_t rsmka_do_path_active(path_t *, int);
 329 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
 330 extern void rsmka_release_adapter(adapter_t *);
 331 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
 332 extern void rsmka_dequeue_msgbuf(path_t *path);
 333 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
 334 /* lint -w2 */
 335 
 336 static int rsm_open(dev_t *, int, int, cred_t *);
 337 static int rsm_close(dev_t, int, int, cred_t *);
 338 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
 339     cred_t *credp, int *rvalp);
 340 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
 341     uint_t);
 342 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
 343     uint_t, uint_t, cred_t *);
 344 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
 345     struct pollhead **phpp);
 346 
 347 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 348 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
 349 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
 350 
 351 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
 352 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
 353 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
 354 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
 355                                 rsm_permission_t);
 356 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
 357 static void rsmacl_free(rsmapi_access_entry_t *, int);
 358 static void rsmpiacl_free(rsm_access_entry_t *, int);
 359 
 360 static int rsm_inc_pgcnt(pgcnt_t);
 361 static void rsm_dec_pgcnt(pgcnt_t);
 362 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
 363 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
 364                                         size_t *);
 365 static void exporter_quiesce();
 366 static void rsmseg_suspend(rsmseg_t *, int *);
 367 static void rsmsegshare_suspend(rsmseg_t *);
 368 static int rsmseg_resume(rsmseg_t *, void **);
 369 static int rsmsegshare_resume(rsmseg_t *);
 370 
 371 static struct cb_ops rsm_cb_ops = {
 372         rsm_open,               /* open */
 373         rsm_close,              /* close */
 374         nodev,                  /* strategy */
 375         nodev,                  /* print */
 376         nodev,                  /* dump */
 377         nodev,                  /* read */
 378         nodev,                  /* write */
 379         rsm_ioctl,              /* ioctl */
 380         rsm_devmap,             /* devmap */
 381         NULL,                   /* mmap */
 382         rsm_segmap,             /* segmap */
 383         rsm_chpoll,             /* poll */
 384         ddi_prop_op,            /* cb_prop_op */
 385         0,                      /* streamtab  */
 386         D_NEW|D_MP|D_DEVMAP,    /* Driver compatibility flag */
 387         0,
 388         0,
 389         0
 390 };
 391 
 392 static struct dev_ops rsm_ops = {
 393         DEVO_REV,               /* devo_rev, */
 394         0,                      /* refcnt  */
 395         rsm_info,               /* get_dev_info */
 396         nulldev,                /* identify */
 397         nulldev,                /* probe */
 398         rsm_attach,             /* attach */
 399         rsm_detach,             /* detach */
 400         nodev,                  /* reset */
 401         &rsm_cb_ops,                /* driver operations */
 402         (struct bus_ops *)0,    /* bus operations */
 403         0,
 404         ddi_quiesce_not_needed,         /* quiesce */
 405 };
 406 
 407 /*
 408  * Module linkage information for the kernel.
 409  */
 410 
 411 static struct modldrv modldrv = {
 412         &mod_driverops, /* Type of module.  This one is a pseudo driver */
 413         "Remote Shared Memory Driver",
 414         &rsm_ops,   /* driver ops */
 415 };
 416 
 417 static struct modlinkage modlinkage = {
 418         MODREV_1,
 419         (void *)&modldrv,
 420         0,
 421         0,
 422         0
 423 };
 424 
 425 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
 426 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
 427 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
 428 
 429 static kphysm_setup_vector_t rsm_dr_callback_vec = {
 430         KPHYSM_SETUP_VECTOR_VERSION,
 431         rsm_dr_callback_post_add,
 432         rsm_dr_callback_pre_del,
 433         rsm_dr_callback_post_del
 434 };
 435 
 436 /* This flag can be changed to 0 to help with PIT testing */
 437 int rsmka_modunloadok = 1;
 438 int no_reply_cnt = 0;
 439 
 440 uint64_t rsm_ctrlmsg_errcnt = 0;
 441 uint64_t rsm_ipcsend_errcnt = 0;
 442 
 443 #define MAX_NODES 64
 444 
 445 static struct rsm_driver_data rsm_drv_data;
 446 static struct rsmresource_table rsm_resource;
 447 
 448 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
 449 static void rsmresource_destroy(void);
 450 static int rsmresource_alloc(minor_t *);
 451 static rsmresource_t *rsmresource_free(minor_t rnum);
 452 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
 453 static int rsm_unpublish(rsmseg_t *seg, int mode);
 454 static int rsm_unbind(rsmseg_t *seg);
 455 static uint_t rsmhash(rsm_memseg_id_t key);
 456 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
 457 static void rsmhash_free(rsmhash_table_t *rhash, int size);
 458 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
 459 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
 460 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
 461                                         void *cookie);
 462 int rsm_disconnect(rsmseg_t *seg);
 463 void rsmseg_unload(rsmseg_t *);
 464 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
 465 
 466 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
 467     rsm_intr_q_op_t opcode, rsm_addr_t src,
 468     void *data, size_t size, rsm_intr_hand_arg_t arg);
 469 
 470 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
 471 
 472 rsm_node_id_t my_nodeid;
 473 
 474 /* cookie, va, offsets and length for the barrier */
 475 static rsm_gnum_t               *bar_va;
 476 static ddi_umem_cookie_t        bar_cookie;
 477 static off_t                    barrier_offset;
 478 static size_t                   barrier_size;
 479 static int                      max_segs;
 480 
 481 /* cookie for the trash memory */
 482 static ddi_umem_cookie_t        remap_cookie;
 483 
 484 static rsm_memseg_id_t  rsm_nextavail_segmentid;
 485 
 486 extern taskq_t *work_taskq;
 487 extern char *taskq_name;
 488 
 489 static dev_info_t *rsm_dip;     /* private copy of devinfo pointer */
 490 
 491 static rsmhash_table_t rsm_export_segs;         /* list of exported segs */
 492 rsmhash_table_t rsm_import_segs;                /* list of imported segs */
 493 static rsmhash_table_t rsm_event_queues;        /* list of event queues */
 494 
 495 static  rsm_ipc_t       rsm_ipc;                /* ipc info */
 496 
 497 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
 498 static list_head_t      rsm_suspend_list;
 499 
 500 /* list of descriptors for remote importers */
 501 static importers_table_t importer_list;
 502 
 503 kmutex_t rsm_suspend_cvlock;
 504 kcondvar_t rsm_suspend_cv;
 505 
 506 static kmutex_t rsm_lock;
 507 
 508 adapter_t loopback_adapter;
 509 rsm_controller_attr_t loopback_attr;
 510 
 511 int rsmipc_send_controlmsg(path_t *path, int msgtype);
 512 
 513 void rsmka_init_loopback();
 514 
 515 int rsmka_null_seg_create(
 516     rsm_controller_handle_t,
 517     rsm_memseg_export_handle_t *,
 518     size_t,
 519     uint_t,
 520     rsm_memory_local_t *,
 521     rsm_resource_callback_t,
 522     rsm_resource_callback_arg_t);
 523 
 524 int rsmka_null_seg_destroy(
 525     rsm_memseg_export_handle_t);
 526 
 527 int rsmka_null_bind(
 528     rsm_memseg_export_handle_t,
 529     off_t,
 530     rsm_memory_local_t *,
 531     rsm_resource_callback_t,
 532     rsm_resource_callback_arg_t);
 533 
 534 int rsmka_null_unbind(
 535     rsm_memseg_export_handle_t,
 536     off_t,
 537     size_t);
 538 
 539 int rsmka_null_rebind(
 540     rsm_memseg_export_handle_t,
 541     off_t,
 542     rsm_memory_local_t *,
 543     rsm_resource_callback_t,
 544     rsm_resource_callback_arg_t);
 545 
 546 int rsmka_null_publish(
 547     rsm_memseg_export_handle_t,
 548     rsm_access_entry_t [],
 549     uint_t,
 550     rsm_memseg_id_t,
 551     rsm_resource_callback_t,
 552     rsm_resource_callback_arg_t);
 553 
 554 
 555 int rsmka_null_republish(
 556     rsm_memseg_export_handle_t,
 557     rsm_access_entry_t [],
 558     uint_t,
 559     rsm_resource_callback_t,
 560     rsm_resource_callback_arg_t);
 561 
 562 int rsmka_null_unpublish(
 563     rsm_memseg_export_handle_t);
 564 
 565 rsm_ops_t null_rsmpi_ops;
 566 
 567 /*
 568  * data and locks to keep track of total amount of exported memory
 569  */
 570 static  pgcnt_t         rsm_pgcnt;
 571 static  pgcnt_t         rsm_pgcnt_max;  /* max allowed */
 572 static  kmutex_t        rsm_pgcnt_lock;
 573 
 574 static  int             rsm_enable_dr;
 575 
 576 static  char            loopback_str[] = "loopback";
 577 
 578 int             rsm_hash_size;
 579 
 580 /*
 581  * The locking model is as follows:
 582  *
 583  * Local operations:
 584  *              find resource - grab reader lock on resouce list
 585  *              insert rc     - grab writer lock
 586  *              delete rc     - grab writer lock and resource mutex
 587  *              read/write    - no lock
 588  *
 589  * Remote invocations:
 590  *              find resource - grab read lock and resource mutex
 591  *
 592  * State:
 593  *              resource state - grab resource mutex
 594  */
 595 
 596 int
 597 _init(void)
 598 {
 599         int e;
 600 
 601         e = mod_install(&modlinkage);
 602         if (e != 0) {
 603                 return (e);
 604         }
 605 
 606         mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
 607 
 608         mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
 609 
 610 
 611         rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
 612 
 613         rsm_hash_size = RSM_HASHSZ;
 614 
 615         rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
 616 
 617         rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
 618 
 619         mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
 620 
 621         mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
 622         cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
 623 
 624         mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
 625         cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
 626 
 627         mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
 628         cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
 629 
 630         rsm_ipc.count = RSMIPC_SZ;
 631         rsm_ipc.wanted = 0;
 632         rsm_ipc.sequence = 0;
 633 
 634         (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
 635 
 636         for (e = 0; e < RSMIPC_SZ; e++) {
 637                 rsmipc_slot_t *slot = &rsm_ipc.slots[e];
 638 
 639                 RSMIPC_SET(slot, RSMIPC_FREE);
 640                 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
 641                 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
 642         }
 643 
 644         /*
 645          * Initialize the suspend message list
 646          */
 647         rsm_suspend_list.list_head = NULL;
 648         mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
 649 
 650         /*
 651          * It is assumed here that configuration data is available
 652          * during system boot since _init may be called at that time.
 653          */
 654 
 655         rsmka_pathmanager_init();
 656 
 657         DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
 658             "rsm: _init done\n"));
 659 
 660         return (DDI_SUCCESS);
 661 
 662 }
 663 
 664 int
 665 _info(struct modinfo *modinfop)
 666 {
 667 
 668         return (mod_info(&modlinkage, modinfop));
 669 }
 670 
 671 int
 672 _fini(void)
 673 {
 674         int e;
 675 
 676         DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
 677             "rsm: _fini enter\n"));
 678 
 679         /*
 680          * The rsmka_modunloadok flag is simply used to help with
 681          * the PIT testing. Make this flag 0 to disallow modunload.
 682          */
 683         if (rsmka_modunloadok == 0)
 684                 return (EBUSY);
 685 
 686         /* rsm_detach will be called as a result of mod_remove */
 687         e = mod_remove(&modlinkage);
 688         if (e) {
 689                 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
 690                     "Unable to fini RSM %x\n", e));
 691                 return (e);
 692         }
 693 
 694         rsmka_pathmanager_cleanup();
 695 
 696         rw_destroy(&rsm_resource.rsmrc_lock);
 697 
 698         rw_destroy(&rsm_export_segs.rsmhash_rw);
 699         rw_destroy(&rsm_import_segs.rsmhash_rw);
 700         rw_destroy(&rsm_event_queues.rsmhash_rw);
 701 
 702         mutex_destroy(&importer_list.lock);
 703 
 704         mutex_destroy(&rsm_ipc.lock);
 705         cv_destroy(&rsm_ipc.cv);
 706 
 707         (void) mutex_destroy(&rsm_suspend_list.list_lock);
 708 
 709         (void) mutex_destroy(&rsm_pgcnt_lock);
 710 
 711         DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
 712 
 713         return (DDI_SUCCESS);
 714 
 715 }
 716 
 717 /*ARGSUSED1*/
 718 static int
 719 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 720 {
 721         minor_t rnum;
 722         int     percent;
 723         int     ret;
 724         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
 725 
 726         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
 727 
 728         switch (cmd) {
 729         case DDI_ATTACH:
 730                 break;
 731         case DDI_RESUME:
 732         default:
 733                 DBG_PRINTF((category, RSM_ERR,
 734                     "rsm:rsm_attach - cmd not supported\n"));
 735                 return (DDI_FAILURE);
 736         }
 737 
 738         if (rsm_dip != NULL) {
 739                 DBG_PRINTF((category, RSM_ERR,
 740                     "rsm:rsm_attach - supports only "
 741                     "one instance\n"));
 742                 return (DDI_FAILURE);
 743         }
 744 
 745         rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 746             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 747             "enable-dynamic-reconfiguration", 1);
 748 
 749         mutex_enter(&rsm_drv_data.drv_lock);
 750         rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
 751         mutex_exit(&rsm_drv_data.drv_lock);
 752 
 753         if (rsm_enable_dr) {
 754 #ifdef  RSM_DRTEST
 755                 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
 756                     (void *)NULL);
 757 #else
 758                 ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
 759                     (void *)NULL);
 760 #endif
 761                 if (ret != 0) {
 762                         mutex_exit(&rsm_drv_data.drv_lock);
 763                         cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
 764                             "reconfiguration setup failed\n");
 765                         return (DDI_FAILURE);
 766                 }
 767         }
 768 
 769         mutex_enter(&rsm_drv_data.drv_lock);
 770         ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
 771         rsm_drv_data.drv_state = RSM_DRV_OK;
 772         cv_broadcast(&rsm_drv_data.drv_cv);
 773         mutex_exit(&rsm_drv_data.drv_lock);
 774 
 775         /*
 776          * page_list_read_lock();
 777          * xx_setup();
 778          * page_list_read_unlock();
 779          */
 780 
 781         rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 782             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 783             "segment-hashtable-size", RSM_HASHSZ);
 784         if (rsm_hash_size == 0) {
 785                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
 786                     "rsm: segment-hashtable-size in rsm.conf "
 787                     "must be greater than 0, defaulting to 128\n"));
 788                 rsm_hash_size = RSM_HASHSZ;
 789         }
 790 
 791         DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
 792             rsm_hash_size));
 793 
 794         rsm_pgcnt = 0;
 795 
 796         percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 797             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 798             "max-exported-memory", 0);
 799         if (percent < 0) {
 800                 DBG_PRINTF((category, RSM_ERR,
 801                     "rsm:rsm_attach not enough memory available to "
 802                     "export, or max-exported-memory set incorrectly.\n"));
 803                 return (DDI_FAILURE);
 804         }
 805         /* 0 indicates no fixed upper limit. maxmem is the max  */
 806         /* available pageable physical mem                      */
 807         rsm_pgcnt_max = (percent*maxmem)/100;
 808 
 809         if (rsm_pgcnt_max > 0) {
 810                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
 811                     "rsm: Available physical memory = %lu pages, "
 812                     "Max exportable memory = %lu pages",
 813                     maxmem, rsm_pgcnt_max));
 814         }
 815 
 816         /*
 817          * Create minor number
 818          */
 819         if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
 820                 DBG_PRINTF((category, RSM_ERR,
 821                     "rsm: rsm_attach - Unable to get "
 822                     "minor number\n"));
 823                 return (DDI_FAILURE);
 824         }
 825 
 826         ASSERT(rnum == RSM_DRIVER_MINOR);
 827 
 828         if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
 829             rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 830                 DBG_PRINTF((category, RSM_ERR,
 831                     "rsm: rsm_attach - unable to allocate "
 832                     "minor #\n"));
 833                 return (DDI_FAILURE);
 834         }
 835 
 836         rsm_dip = devi;
 837         /*
 838          * Allocate the hashtables
 839          */
 840         rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
 841         rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
 842 
 843         importer_list.bucket = (importing_token_t **)
 844             kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP);
 845 
 846         /*
 847          * Allocate a resource struct
 848          */
 849         {
 850                 rsmresource_t *p;
 851 
 852                 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
 853 
 854                 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
 855 
 856                 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
 857         }
 858 
 859         /*
 860          * Based on the rsm.conf property max-segments, determine the maximum
 861          * number of segments that can be exported/imported. This is then used
 862          * to determine the size for barrier failure pages.
 863          */
 864 
 865         /* First get the max number of segments from the rsm.conf file */
 866         max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
 867             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
 868             "max-segments", 0);
 869         if (max_segs == 0) {
 870                 /* Use default number of segments */
 871                 max_segs = RSM_MAX_NUM_SEG;
 872         }
 873 
 874         /*
 875          * Based on the max number of segments allowed, determine the barrier
 876          * page size. add 1 to max_segs since the barrier page itself uses
 877          * a slot
 878          */
 879         barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
 880             PAGESIZE);
 881 
 882         /*
 883          * allocation of the barrier failure page
 884          */
 885         bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
 886             DDI_UMEM_SLEEP, &bar_cookie);
 887 
 888         /*
 889          * Set the barrier_offset
 890          */
 891         barrier_offset = 0;
 892 
 893         /*
 894          * Allocate a trash memory and get a cookie for it. This will be used
 895          * when remapping segments during force disconnects. Allocate the
 896          * trash memory with a large size which is page aligned.
 897          */
 898         (void) ddi_umem_alloc((size_t)TRASHSIZE,
 899             DDI_UMEM_TRASH, &remap_cookie);
 900 
 901         /* initialize user segment id allocation variable */
 902         rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
 903 
 904         /*
 905          * initialize the null_rsmpi_ops vector and the loopback adapter
 906          */
 907         rsmka_init_loopback();
 908 
 909 
 910         ddi_report_dev(devi);
 911 
 912         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
 913 
 914         return (DDI_SUCCESS);
 915 }
 916 
 917 /*
 918  * The call to mod_remove in the _fine routine will cause the system
 919  * to call rsm_detach
 920  */
 921 /*ARGSUSED*/
 922 static int
 923 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 924 {
 925         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
 926 
 927         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
 928 
 929         switch (cmd) {
 930         case DDI_DETACH:
 931                 break;
 932         default:
 933                 DBG_PRINTF((category, RSM_ERR,
 934                     "rsm:rsm_detach - cmd %x not supported\n",
 935                     cmd));
 936                 return (DDI_FAILURE);
 937         }
 938 
 939         mutex_enter(&rsm_drv_data.drv_lock);
 940         while (rsm_drv_data.drv_state != RSM_DRV_OK)
 941                 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
 942         rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
 943         mutex_exit(&rsm_drv_data.drv_lock);
 944 
 945         /*
 946          * Unregister the DR callback functions
 947          */
 948         if (rsm_enable_dr) {
 949 #ifdef  RSM_DRTEST
 950                 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
 951                     (void *)NULL);
 952 #else
 953                 kphysm_setup_func_unregister(&rsm_dr_callback_vec,
 954                     (void *)NULL);
 955 #endif
 956         }
 957 
 958         mutex_enter(&rsm_drv_data.drv_lock);
 959         ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
 960         rsm_drv_data.drv_state = RSM_DRV_NEW;
 961         mutex_exit(&rsm_drv_data.drv_lock);
 962 
 963         ASSERT(rsm_suspend_list.list_head == NULL);
 964 
 965         /*
 966          * Release all resources, seglist, controller, ...
 967          */
 968 
 969         /* remove intersend queues */
 970         /* remove registered services */
 971 
 972 
 973         ddi_remove_minor_node(dip, DRIVER_NAME);
 974         rsm_dip = NULL;
 975 
 976         /*
 977          * Free minor zero resource
 978          */
 979         {
 980                 rsmresource_t *p;
 981 
 982                 p = rsmresource_free(RSM_DRIVER_MINOR);
 983                 if (p) {
 984                         mutex_destroy(&p->rsmrc_lock);
 985                         kmem_free((void *)p, sizeof (*p));
 986                 }
 987         }
 988 
 989         /*
 990          * Free resource table
 991          */
 992 
 993         rsmresource_destroy();
 994 
 995         /*
 996          * Free the hash tables
 997          */
 998         rsmhash_free(&rsm_export_segs, rsm_hash_size);
 999         rsmhash_free(&rsm_import_segs, rsm_hash_size);
1000 
1001         kmem_free((void *)importer_list.bucket,
1002             rsm_hash_size * sizeof (importing_token_t *));
1003         importer_list.bucket = NULL;
1004 
1005 
1006         /* free barrier page */
1007         if (bar_cookie != NULL) {
1008                 ddi_umem_free(bar_cookie);
1009         }
1010         bar_va = NULL;
1011         bar_cookie = NULL;
1012 
1013         /*
1014          * Free the memory allocated for the trash
1015          */
1016         if (remap_cookie != NULL) {
1017                 ddi_umem_free(remap_cookie);
1018         }
1019         remap_cookie = NULL;
1020 
1021         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1022 
1023         return (DDI_SUCCESS);
1024 }
1025 
1026 /*ARGSUSED*/
1027 static int
1028 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1029 {
1030         register int error;
1031         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1032 
1033         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1034 
1035         switch (infocmd) {
1036         case DDI_INFO_DEVT2DEVINFO:
1037                 if (rsm_dip == NULL)
1038                         error = DDI_FAILURE;
1039                 else {
1040                         *result = (void *)rsm_dip;
1041                         error = DDI_SUCCESS;
1042                 }
1043                 break;
1044         case DDI_INFO_DEVT2INSTANCE:
1045                 *result = (void *)0;
1046                 error = DDI_SUCCESS;
1047                 break;
1048         default:
1049                 error = DDI_FAILURE;
1050         }
1051 
1052         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1053         return (error);
1054 }
1055 
1056 adapter_t *
1057 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1058 {
1059         adapter_t *adapter;
1060         char adapter_devname[MAXNAMELEN];
1061         int instance;
1062         DBG_DEFINE(category,
1063             RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1064 
1065         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1066 
1067         instance = msg->cnum;
1068 
1069         if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1070                 return (NULL);
1071         }
1072 
1073         if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1074                 return (NULL);
1075 
1076         if (strcmp(adapter_devname, "loopback") == 0)
1077                 return (&loopback_adapter);
1078 
1079         adapter = rsmka_lookup_adapter(adapter_devname, instance);
1080 
1081         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1082 
1083         return (adapter);
1084 }
1085 
1086 
1087 /*
1088  * *********************** Resource Number Management ********************
1089  * All resources are stored in a simple hash table. The table is an array
1090  * of pointers to resource blks. Each blk contains:
1091  *      base    - base number of this blk
1092  *      used    - number of used slots in this blk.
1093  *      blks    - array of pointers to resource items.
1094  * An entry in a resource blk is empty if it's NULL.
1095  *
1096  * We start with no resource array. Each time we run out of slots, we
1097  * reallocate a new larger array and copy the pointer to the new array and
1098  * a new resource blk is allocated and added to the hash table.
1099  *
1100  * The resource control block contains:
1101  *      root    - array of pointer of resource blks
1102  *      sz      - current size of array.
1103  *      len     - last valid entry in array.
1104  *
1105  * A search operation based on a resource number is as follows:
1106  *      index = rnum / RESOURCE_BLKSZ;
1107  *      ASSERT(index < resource_block.len);
1108  *      ASSERT(index < resource_block.sz);
1109  *      offset = rnum % RESOURCE_BLKSZ;
1110  *      ASSERT(offset >= resource_block.root[index]->base);
1111  *      ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1112  *      return resource_block.root[index]->blks[offset];
1113  *
1114  * A resource blk is freed with its used count reachs zero.
1115  */
1116 static int
1117 rsmresource_alloc(minor_t *rnum)
1118 {
1119 
1120         /* search for available resource slot */
1121         int i, j, empty = -1;
1122         rsmresource_blk_t *blk;
1123 
1124         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1125             "rsmresource_alloc enter\n"));
1126 
1127         rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1128 
1129         /* Try to find an empty slot */
1130         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1131                 blk = rsm_resource.rsmrc_root[i];
1132                 if (blk != NULL && blk->rsmrcblk_avail > 0) {
1133                         /* found an empty slot in this blk */
1134                         for (j = 0; j < RSMRC_BLKSZ; j++) {
1135                                 if (blk->rsmrcblk_blks[j] == NULL) {
1136                                         *rnum = (minor_t)
1137                                             (j + (i * RSMRC_BLKSZ));
1138                                         /*
1139                                          * obey gen page limits
1140                                          */
1141                                         if (*rnum >= max_segs + 1) {
1142                                                 if (empty < 0) {
1143                                                         rw_exit(&rsm_resource.
1144                                                             rsmrc_lock);
1145                                                         DBG_PRINTF((
1146                                                             RSM_KERNEL_ALL,
1147                                                             RSM_ERR,
1148                                                             "rsmresource"
1149                                                             "_alloc failed:"
1150                                                             "not enough res"
1151                                                             "%d\n", *rnum));
1152                                         return (RSMERR_INSUFFICIENT_RESOURCES);
1153                                                 } else {
1154                                                         /* use empty slot */
1155                                                         break;
1156                                                 }
1157 
1158                                         }
1159 
1160                                         blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1161                                         blk->rsmrcblk_avail--;
1162                                         rw_exit(&rsm_resource.rsmrc_lock);
1163                                         DBG_PRINTF((RSM_KERNEL_ALL,
1164                                             RSM_DEBUG_VERBOSE,
1165                                             "rsmresource_alloc done\n"));
1166                                         return (RSM_SUCCESS);
1167                                 }
1168                         }
1169                 } else if (blk == NULL && empty < 0) {
1170                         /* remember first empty slot */
1171                         empty = i;
1172                 }
1173         }
1174 
1175         /* Couldn't find anything, allocate a new blk */
1176         /*
1177          * Do we need to reallocate the root array
1178          */
1179         if (empty < 0) {
1180                 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1181                         /*
1182                          * Allocate new array and copy current stuff into it
1183                          */
1184                         rsmresource_blk_t       **p;
1185                         uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1186                             RSMRC_BLKSZ;
1187                         /*
1188                          * Don't allocate more that max valid rnum
1189                          */
1190                         if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1191                             max_segs + 1) {
1192                                 rw_exit(&rsm_resource.rsmrc_lock);
1193                                 return (RSMERR_INSUFFICIENT_RESOURCES);
1194                         }
1195 
1196                         p = (rsmresource_blk_t **)kmem_zalloc(
1197                             newsz * sizeof (*p),
1198                             KM_SLEEP);
1199 
1200                         if (rsm_resource.rsmrc_root) {
1201                                 uint_t oldsz;
1202 
1203                                 oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1204                                     (int)sizeof (*p));
1205 
1206                                 /*
1207                                  * Copy old data into new space and
1208                                  * free old stuff
1209                                  */
1210                                 bcopy(rsm_resource.rsmrc_root, p, oldsz);
1211                                 kmem_free(rsm_resource.rsmrc_root, oldsz);
1212                         }
1213 
1214                         rsm_resource.rsmrc_root = p;
1215                         rsm_resource.rsmrc_sz = (int)newsz;
1216                 }
1217 
1218                 empty = rsm_resource.rsmrc_len;
1219                 rsm_resource.rsmrc_len++;
1220         }
1221 
1222         /*
1223          * Allocate a new blk
1224          */
1225         blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1226         ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1227         rsm_resource.rsmrc_root[empty] = blk;
1228         blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1229 
1230         /*
1231          * Allocate slot
1232          */
1233 
1234         *rnum = (minor_t)(empty * RSMRC_BLKSZ);
1235 
1236         /*
1237          * watch out not to exceed bounds of barrier page
1238          */
1239         if (*rnum >= max_segs + 1) {
1240                 rw_exit(&rsm_resource.rsmrc_lock);
1241                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1242                     "rsmresource_alloc failed %d\n", *rnum));
1243 
1244                 return (RSMERR_INSUFFICIENT_RESOURCES);
1245         }
1246         blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1247 
1248 
1249         rw_exit(&rsm_resource.rsmrc_lock);
1250 
1251         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1252             "rsmresource_alloc done\n"));
1253 
1254         return (RSM_SUCCESS);
1255 }
1256 
1257 static rsmresource_t *
1258 rsmresource_free(minor_t rnum)
1259 {
1260 
1261         /* search for available resource slot */
1262         int i, j;
1263         rsmresource_blk_t *blk;
1264         rsmresource_t *p;
1265 
1266         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1267             "rsmresource_free enter\n"));
1268 
1269         i = (int)(rnum / RSMRC_BLKSZ);
1270         j = (int)(rnum % RSMRC_BLKSZ);
1271 
1272         if (i >= rsm_resource.rsmrc_len) {
1273                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1274                     "rsmresource_free done\n"));
1275                 return (NULL);
1276         }
1277 
1278         rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1279 
1280         ASSERT(rsm_resource.rsmrc_root);
1281         ASSERT(i < rsm_resource.rsmrc_len);
1282         ASSERT(i < rsm_resource.rsmrc_sz);
1283         blk = rsm_resource.rsmrc_root[i];
1284         if (blk == NULL) {
1285                 rw_exit(&rsm_resource.rsmrc_lock);
1286                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1287                     "rsmresource_free done\n"));
1288                 return (NULL);
1289         }
1290 
1291         ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1292 
1293         p = blk->rsmrcblk_blks[j];
1294         if (p == RSMRC_RESERVED) {
1295                 p = NULL;
1296         }
1297 
1298         blk->rsmrcblk_blks[j] = NULL;
1299         blk->rsmrcblk_avail++;
1300         if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1301                 /* free this blk */
1302                 kmem_free(blk, sizeof (*blk));
1303                 rsm_resource.rsmrc_root[i] = NULL;
1304         }
1305 
1306         rw_exit(&rsm_resource.rsmrc_lock);
1307 
1308         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1309             "rsmresource_free done\n"));
1310 
1311         return (p);
1312 }
1313 
1314 static rsmresource_t *
1315 rsmresource_lookup(minor_t rnum, int lock)
1316 {
1317         int i, j;
1318         rsmresource_blk_t *blk;
1319         rsmresource_t *p;
1320 
1321         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1322             "rsmresource_lookup enter\n"));
1323 
1324         /* Find resource and lock it in READER mode */
1325         /* search for available resource slot */
1326 
1327         i = (int)(rnum / RSMRC_BLKSZ);
1328         j = (int)(rnum % RSMRC_BLKSZ);
1329 
1330         if (i >= rsm_resource.rsmrc_len) {
1331                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1332                     "rsmresource_lookup done\n"));
1333                 return (NULL);
1334         }
1335 
1336         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1337 
1338         blk = rsm_resource.rsmrc_root[i];
1339         if (blk != NULL) {
1340                 ASSERT(i < rsm_resource.rsmrc_len);
1341                 ASSERT(i < rsm_resource.rsmrc_sz);
1342 
1343                 p = blk->rsmrcblk_blks[j];
1344                 if (lock == RSM_LOCK) {
1345                         if (p != RSMRC_RESERVED) {
1346                                 mutex_enter(&p->rsmrc_lock);
1347                         } else {
1348                                 p = NULL;
1349                         }
1350                 }
1351         } else {
1352                 p = NULL;
1353         }
1354         rw_exit(&rsm_resource.rsmrc_lock);
1355 
1356         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1357             "rsmresource_lookup done\n"));
1358 
1359         return (p);
1360 }
1361 
1362 static void
1363 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1364 {
1365         /* Find resource and lock it in READER mode */
1366         /* Caller can upgrade if need be */
1367         /* search for available resource slot */
1368         int i, j;
1369         rsmresource_blk_t *blk;
1370 
1371         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1372             "rsmresource_insert enter\n"));
1373 
1374         i = (int)(rnum / RSMRC_BLKSZ);
1375         j = (int)(rnum % RSMRC_BLKSZ);
1376 
1377         p->rsmrc_type = type;
1378         p->rsmrc_num = rnum;
1379 
1380         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1381 
1382         ASSERT(rsm_resource.rsmrc_root);
1383         ASSERT(i < rsm_resource.rsmrc_len);
1384         ASSERT(i < rsm_resource.rsmrc_sz);
1385 
1386         blk = rsm_resource.rsmrc_root[i];
1387         ASSERT(blk);
1388 
1389         ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1390 
1391         blk->rsmrcblk_blks[j] = p;
1392 
1393         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1394             "rsmresource_insert done\n"));
1395 
1396         rw_exit(&rsm_resource.rsmrc_lock);
1397 }
1398 
1399 static void
1400 rsmresource_destroy()
1401 {
1402         int i, j;
1403 
1404         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1405             "rsmresource_destroy enter\n"));
1406 
1407         rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1408 
1409         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1410                 rsmresource_blk_t       *blk;
1411 
1412                 blk = rsm_resource.rsmrc_root[i];
1413                 if (blk == NULL) {
1414                         continue;
1415                 }
1416                 for (j = 0; j < RSMRC_BLKSZ; j++) {
1417                         if (blk->rsmrcblk_blks[j] != NULL) {
1418                                 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1419                                     "Not null slot %d, %lx\n", j,
1420                                     (size_t)blk->rsmrcblk_blks[j]));
1421                         }
1422                 }
1423                 kmem_free(blk, sizeof (*blk));
1424                 rsm_resource.rsmrc_root[i] = NULL;
1425         }
1426         if (rsm_resource.rsmrc_root) {
1427                 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1428                 kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1429                 rsm_resource.rsmrc_root = NULL;
1430                 rsm_resource.rsmrc_len = 0;
1431                 rsm_resource.rsmrc_sz = 0;
1432         }
1433 
1434         DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1435             "rsmresource_destroy done\n"));
1436 
1437         rw_exit(&rsm_resource.rsmrc_lock);
1438 }
1439 
1440 
1441 /* ******************** Generic Key Hash Table Management ********* */
1442 static rsmresource_t *
1443 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1444     rsm_resource_state_t state)
1445 {
1446         rsmresource_t   *p;
1447         uint_t          hashval;
1448         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1449 
1450         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1451 
1452         hashval = rsmhash(key);
1453 
1454         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1455             key, hashval));
1456 
1457         rw_enter(&rhash->rsmhash_rw, RW_READER);
1458 
1459         p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1460 
1461         for (; p; p = p->rsmrc_next) {
1462                 if (p->rsmrc_key == key) {
1463                         /* acquire resource lock */
1464                         RSMRC_LOCK(p);
1465                         break;
1466                 }
1467         }
1468 
1469         rw_exit(&rhash->rsmhash_rw);
1470 
1471         if (p != NULL && p->rsmrc_state != state) {
1472                 /* state changed, release lock and return null */
1473                 RSMRC_UNLOCK(p);
1474                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1475                     "rsmhash_lookup done: state changed\n"));
1476                 return (NULL);
1477         }
1478 
1479         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1480 
1481         return (p);
1482 }
1483 
1484 static void
1485 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1486 {
1487         rsmresource_t           *p, **back;
1488         uint_t                  hashval;
1489         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1490 
1491         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1492 
1493         hashval = rsmhash(rcelm->rsmrc_key);
1494 
1495         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1496             rcelm->rsmrc_key, hashval));
1497 
1498         /*
1499          * It's ok not to find the segment.
1500          */
1501         rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1502 
1503         back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1504 
1505         for (; (p = *back) != NULL;  back = &p->rsmrc_next) {
1506                 if (p == rcelm) {
1507                         *back = rcelm->rsmrc_next;
1508                         break;
1509                 }
1510         }
1511 
1512         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1513 
1514         rw_exit(&rhash->rsmhash_rw);
1515 }
1516 
1517 static int
1518 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1519     int dup_check, rsm_resource_state_t state)
1520 {
1521         rsmresource_t   *p = NULL, **bktp;
1522         uint_t          hashval;
1523         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1524 
1525         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1526 
1527         /* lock table */
1528         rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1529 
1530         /*
1531          * If the current resource state is other than the state passed in
1532          * then the resource is (probably) already on the list. eg. for an
1533          * import segment if the state is not RSM_STATE_NEW then it's on the
1534          * list already.
1535          */
1536         RSMRC_LOCK(new);
1537         if (new->rsmrc_state != state) {
1538                 RSMRC_UNLOCK(new);
1539                 rw_exit(&rhash->rsmhash_rw);
1540                 return (RSMERR_BAD_SEG_HNDL);
1541         }
1542 
1543         hashval = rsmhash(key);
1544         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1545 
1546         if (dup_check) {
1547                 /*
1548                  * Used for checking export segments; don't want to have
1549                  * the same key used for multiple segments.
1550                  */
1551 
1552                 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1553 
1554                 for (; p; p = p->rsmrc_next) {
1555                         if (p->rsmrc_key == key) {
1556                                 RSMRC_UNLOCK(new);
1557                                 break;
1558                         }
1559                 }
1560         }
1561 
1562         if (p == NULL) {
1563                 /* Key doesn't exist, add it */
1564 
1565                 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1566 
1567                 new->rsmrc_key = key;
1568                 new->rsmrc_next = *bktp;
1569                 *bktp = new;
1570         }
1571 
1572         rw_exit(&rhash->rsmhash_rw);
1573 
1574         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1575 
1576         return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1577 }
1578 
1579 /*
1580  * XOR each byte of the key.
1581  */
1582 static uint_t
1583 rsmhash(rsm_memseg_id_t key)
1584 {
1585         uint_t  hash = key;
1586 
1587         hash ^=  (key >> 8);
1588         hash ^=  (key >> 16);
1589         hash ^=  (key >> 24);
1590 
1591         return (hash % rsm_hash_size);
1592 
1593 }
1594 
1595 /*
1596  * generic function to get a specific bucket
1597  */
1598 static void *
1599 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1600 {
1601 
1602         if (rhash->bucket == NULL)
1603                 return (NULL);
1604         else
1605                 return ((void *)rhash->bucket[hashval]);
1606 }
1607 
1608 /*
1609  * generic function to get a specific bucket's address
1610  */
1611 static void **
1612 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1613 {
1614         if (rhash->bucket == NULL)
1615                 return (NULL);
1616         else
1617                 return ((void **)&(rhash->bucket[hashval]));
1618 }
1619 
1620 /*
1621  * generic function to alloc a hash table
1622  */
1623 static void
1624 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1625 {
1626         rhash->bucket = (rsmresource_t **)
1627             kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1628 }
1629 
1630 /*
1631  * generic function to free a hash table
1632  */
1633 static void
1634 rsmhash_free(rsmhash_table_t *rhash, int size)
1635 {
1636 
1637         kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1638         rhash->bucket = NULL;
1639 
1640 }
1641 /* *********************** Exported Segment Key Management ************ */
1642 
1643 #define rsmexport_add(new, key)         \
1644         rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1645             RSM_STATE_BIND)
1646 
1647 #define rsmexport_rm(arg)       \
1648         rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1649 
1650 #define rsmexport_lookup(key)   \
1651         (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1652 
1653 /* ************************** Import Segment List Management ********** */
1654 
1655 /*
1656  *  Add segment to import list. This will be useful for paging and loopback
1657  * segment unloading.
1658  */
1659 #define rsmimport_add(arg, key) \
1660         rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1661             RSM_STATE_NEW)
1662 
1663 #define rsmimport_rm(arg)       \
1664         rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1665 
1666 /*
1667  *      #define rsmimport_lookup(key)   \
1668  *      (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1669  */
1670 
1671 /*
1672  * increase the ref count and make the import segment point to the
1673  * shared data structure. Return a pointer to the share data struct
1674  * and the shared data struct is locked upon return
1675  */
1676 static rsm_import_share_t *
1677 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1678     rsmseg_t *segp)
1679 {
1680         uint_t          hash;
1681         rsmresource_t           *p;
1682         rsm_import_share_t      *shdatap;
1683         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1684 
1685         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1686 
1687         hash = rsmhash(key);
1688         /* lock table */
1689         rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1690         DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1691             key, hash));
1692 
1693         p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1694 
1695         for (; p; p = p->rsmrc_next) {
1696                 /*
1697                  * Look for an entry that is importing the same exporter
1698                  * with the share data structure allocated.
1699                  */
1700                 if ((p->rsmrc_key == key) &&
1701                     (p->rsmrc_node == node) &&
1702                     (p->rsmrc_adapter == adapter) &&
1703                     (((rsmseg_t *)p)->s_share != NULL)) {
1704                         shdatap = ((rsmseg_t *)p)->s_share;
1705                         break;
1706                 }
1707         }
1708 
1709         if (p == NULL) {
1710                 /* we are the first importer, create the shared data struct */
1711                 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1712                 shdatap->rsmsi_state = RSMSI_STATE_NEW;
1713                 shdatap->rsmsi_segid = key;
1714                 shdatap->rsmsi_node = node;
1715                 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1716                 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1717         }
1718 
1719         rsmseglock_acquire(segp);
1720 
1721         /* we grab the shared lock before returning from this function */
1722         mutex_enter(&shdatap->rsmsi_lock);
1723 
1724         shdatap->rsmsi_refcnt++;
1725         segp->s_share = shdatap;
1726 
1727         rsmseglock_release(segp);
1728 
1729         rw_exit(&rsm_import_segs.rsmhash_rw);
1730 
1731         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1732 
1733         return (shdatap);
1734 }
1735 
1736 /*
1737  * the shared data structure should be locked before calling
1738  * rsmsharecv_signal().
1739  * Change the state and signal any waiting segments.
1740  */
1741 void
1742 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1743 {
1744         ASSERT(rsmsharelock_held(seg));
1745 
1746         if (seg->s_share->rsmsi_state == oldstate) {
1747                 seg->s_share->rsmsi_state = newstate;
1748                 cv_broadcast(&seg->s_share->rsmsi_cv);
1749         }
1750 }
1751 
1752 /*
1753  * Add to the hash table
1754  */
1755 static void
1756 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1757     void *cookie)
1758 {
1759 
1760         importing_token_t       *head;
1761         importing_token_t       *new_token;
1762         int                     index;
1763 
1764         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1765 
1766         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1767 
1768         new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1769         new_token->importing_node = node;
1770         new_token->key = key;
1771         new_token->import_segment_cookie = cookie;
1772         new_token->importing_adapter_hwaddr = hwaddr;
1773 
1774         index = rsmhash(key);
1775 
1776         mutex_enter(&importer_list.lock);
1777 
1778         head = importer_list.bucket[index];
1779         importer_list.bucket[index] = new_token;
1780         new_token->next = head;
1781         mutex_exit(&importer_list.lock);
1782 
1783         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1784 }
1785 
1786 static void
1787 importer_list_rm(rsm_node_id_t node,  rsm_memseg_id_t key, void *cookie)
1788 {
1789 
1790         importing_token_t       *prev, *token = NULL;
1791         int                     index;
1792         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1793 
1794         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1795 
1796         index = rsmhash(key);
1797 
1798         mutex_enter(&importer_list.lock);
1799 
1800         token = importer_list.bucket[index];
1801 
1802         prev = token;
1803         while (token != NULL) {
1804                 if (token->importing_node == node &&
1805                     token->import_segment_cookie == cookie) {
1806                         if (prev == token)
1807                                 importer_list.bucket[index] = token->next;
1808                         else
1809                                 prev->next = token->next;
1810                         kmem_free((void *)token, sizeof (*token));
1811                         break;
1812                 } else {
1813                         prev = token;
1814                         token = token->next;
1815                 }
1816         }
1817 
1818         mutex_exit(&importer_list.lock);
1819 
1820         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1821 
1822 
1823 }
1824 
1825 /* **************************Segment Structure Management ************* */
1826 
1827 /*
1828  * Free segment structure
1829  */
1830 static void
1831 rsmseg_free(rsmseg_t *seg)
1832 {
1833 
1834         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1835 
1836         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1837 
1838         /* need to take seglock here to avoid race with rsmmap_unmap() */
1839         rsmseglock_acquire(seg);
1840         if (seg->s_ckl != NULL) {
1841                 /* Segment is still busy */
1842                 seg->s_state = RSM_STATE_END;
1843                 rsmseglock_release(seg);
1844                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1845                     "rsmseg_free done\n"));
1846                 return;
1847         }
1848 
1849         rsmseglock_release(seg);
1850 
1851         ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1852 
1853         /*
1854          * If it's an importer decrement the refcount
1855          * and if its down to zero free the shared data structure.
1856          * This is where failures during rsm_connect() are unrefcounted
1857          */
1858         if (seg->s_share != NULL) {
1859 
1860                 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1861 
1862                 rsmsharelock_acquire(seg);
1863 
1864                 ASSERT(seg->s_share->rsmsi_refcnt > 0);
1865 
1866                 seg->s_share->rsmsi_refcnt--;
1867 
1868                 if (seg->s_share->rsmsi_refcnt == 0) {
1869                         rsmsharelock_release(seg);
1870                         mutex_destroy(&seg->s_share->rsmsi_lock);
1871                         cv_destroy(&seg->s_share->rsmsi_cv);
1872                         kmem_free((void *)(seg->s_share),
1873                             sizeof (rsm_import_share_t));
1874                 } else {
1875                         rsmsharelock_release(seg);
1876                 }
1877                 /*
1878                  * The following needs to be done after any
1879                  * rsmsharelock calls which use seg->s_share.
1880                  */
1881                 seg->s_share = NULL;
1882         }
1883 
1884         cv_destroy(&seg->s_cv);
1885         mutex_destroy(&seg->s_lock);
1886         rsmacl_free(seg->s_acl, seg->s_acl_len);
1887         rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1888         if (seg->s_adapter)
1889                 rsmka_release_adapter(seg->s_adapter);
1890 
1891         kmem_free((void *)seg, sizeof (*seg));
1892 
1893         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1894 
1895 }
1896 
1897 
1898 static rsmseg_t *
1899 rsmseg_alloc(minor_t num, struct cred *cred)
1900 {
1901         rsmseg_t        *new;
1902         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1903 
1904         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1905         /*
1906          * allocate memory for new segment. This should be a segkmem cache.
1907          */
1908         new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1909 
1910         new->s_state = RSM_STATE_NEW;
1911         new->s_minor = num;
1912         new->s_acl_len       = 0;
1913         new->s_cookie = NULL;
1914         new->s_adapter = NULL;
1915 
1916         new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1917         /* we don't have a key yet, will set at export/connect */
1918         new->s_uid  = crgetuid(cred);
1919         new->s_gid  = crgetgid(cred);
1920 
1921         mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1922         cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1923 
1924         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1925 
1926         return (new);
1927 }
1928 
1929 /* ******************************** Driver Open/Close/Poll *************** */
1930 
1931 /*ARGSUSED1*/
1932 static int
1933 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1934 {
1935         minor_t rnum;
1936         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1937 
1938         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1939         /*
1940          * Char only
1941          */
1942         if (otyp != OTYP_CHR) {
1943                 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1944                 return (EINVAL);
1945         }
1946 
1947         /*
1948          * Only zero can be opened, clones are used for resources.
1949          */
1950         if (getminor(*devp) != RSM_DRIVER_MINOR) {
1951                 DBG_PRINTF((category, RSM_ERR,
1952                     "rsm_open: bad minor %d\n", getminor(*devp)));
1953                 return (ENODEV);
1954         }
1955 
1956         if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1957                 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1958                 return (EPERM);
1959         }
1960 
1961         if (!(flag & FWRITE)) {
1962                 /*
1963                  * The library function _rsm_librsm_init calls open for
1964                  * /dev/rsm with flag set to O_RDONLY.  We want a valid
1965                  * file descriptor to be returned for minor device zero.
1966                  */
1967 
1968                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1969                     "rsm_open RDONLY done\n"));
1970                 return (DDI_SUCCESS);
1971         }
1972 
1973         /*
1974          * - allocate new minor number and segment.
1975          * - add segment to list of all segments.
1976          * - set minordev data to segment
1977          * - update devp argument to new device
1978          * - update s_cred to cred; make sure you do crhold(cred);
1979          */
1980 
1981         /* allocate a new resource number */
1982         if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1983                 /*
1984                  * We will bind this minor to a specific resource in first
1985                  * ioctl
1986                  */
1987                 *devp = makedevice(getmajor(*devp), rnum);
1988         } else {
1989                 return (EAGAIN);
1990         }
1991 
1992         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1993         return (DDI_SUCCESS);
1994 }
1995 
1996 static void
1997 rsmseg_close(rsmseg_t *seg, int force_flag)
1998 {
1999         int e = RSM_SUCCESS;
2000 
2001         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2002 
2003         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2004 
2005         rsmseglock_acquire(seg);
2006         if (!force_flag && (seg->s_hdr.rsmrc_type ==
2007             RSM_RESOURCE_EXPORT_SEGMENT)) {
2008                 /*
2009                  * If we are processing rsm_close wait for force_destroy
2010                  * processing to complete since force_destroy processing
2011                  * needs to finish first before we can free the segment.
2012                  * force_destroy is only for export segments
2013                  */
2014                 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2015                         cv_wait(&seg->s_cv, &seg->s_lock);
2016                 }
2017         }
2018         rsmseglock_release(seg);
2019 
2020         /* It's ok to read the state without a lock */
2021         switch (seg->s_state) {
2022         case RSM_STATE_EXPORT:
2023         case RSM_STATE_EXPORT_QUIESCING:
2024         case RSM_STATE_EXPORT_QUIESCED:
2025                 e = rsm_unpublish(seg, 1);
2026                 /* FALLTHRU */
2027         case RSM_STATE_BIND_QUIESCED:
2028                 /* FALLTHRU */
2029         case RSM_STATE_BIND:
2030                 e = rsm_unbind(seg);
2031                 if (e != RSM_SUCCESS && force_flag == 1)
2032                         return;
2033                 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2034                 /* FALLTHRU */
2035         case RSM_STATE_NEW_QUIESCED:
2036                 rsmseglock_acquire(seg);
2037                 seg->s_state = RSM_STATE_NEW;
2038                 cv_broadcast(&seg->s_cv);
2039                 rsmseglock_release(seg);
2040                 break;
2041         case RSM_STATE_NEW:
2042                 break;
2043         case RSM_STATE_ZOMBIE:
2044                 /*
2045                  * Segments in this state have been removed off the
2046                  * exported segments list and have been unpublished
2047                  * and unbind. These segments have been removed during
2048                  * a callback to the rsm_export_force_destroy, which
2049                  * is called for the purpose of unlocking these
2050                  * exported memory segments when a process exits but
2051                  * leaves the segments locked down since rsm_close is
2052                  * is not called for the segments. This can happen
2053                  * when a process calls fork or exec and then exits.
2054                  * Once the segments are in the ZOMBIE state, all that
2055                  * remains is to destroy them when rsm_close is called.
2056                  * This is done here. Thus, for such segments the
2057                  * the state is changed to new so that later in this
2058                  * function rsmseg_free is called.
2059                  */
2060                 rsmseglock_acquire(seg);
2061                 seg->s_state = RSM_STATE_NEW;
2062                 rsmseglock_release(seg);
2063                 break;
2064         case RSM_STATE_MAP_QUIESCE:
2065         case RSM_STATE_ACTIVE:
2066                 /* Disconnect will handle the unmap */
2067         case RSM_STATE_CONN_QUIESCE:
2068         case RSM_STATE_CONNECT:
2069         case RSM_STATE_DISCONNECT:
2070                 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2071                 (void) rsm_disconnect(seg);
2072                 break;
2073         case RSM_STATE_MAPPING:
2074                 /*FALLTHRU*/
2075         case RSM_STATE_END:
2076                 DBG_PRINTF((category, RSM_ERR,
2077                     "Invalid segment state %d in rsm_close\n", seg->s_state));
2078                 break;
2079         default:
2080                 DBG_PRINTF((category, RSM_ERR,
2081                     "Invalid segment state %d in rsm_close\n", seg->s_state));
2082                 break;
2083         }
2084 
2085         /*
2086          * check state.
2087          * - make sure you do crfree(s_cred);
2088          * release segment and minor number
2089          */
2090         ASSERT(seg->s_state == RSM_STATE_NEW);
2091 
2092         /*
2093          * The export_force_destroy callback is created to unlock
2094          * the exported segments of a process
2095          * when the process does a fork or exec and then exits calls this
2096          * function with the force flag set to 1 which indicates that the
2097          * segment state must be converted to ZOMBIE. This state means that the
2098          * segments still exist and have been unlocked and most importantly the
2099          * only operation allowed is to destroy them on an rsm_close.
2100          */
2101         if (force_flag) {
2102                 rsmseglock_acquire(seg);
2103                 seg->s_state = RSM_STATE_ZOMBIE;
2104                 rsmseglock_release(seg);
2105         } else {
2106                 rsmseg_free(seg);
2107         }
2108 
2109         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2110 }
2111 
2112 static int
2113 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2114 {
2115         minor_t rnum = getminor(dev);
2116         rsmresource_t *res;
2117         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2118 
2119         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2120 
2121         flag = flag; cred = cred;
2122 
2123         if (otyp != OTYP_CHR)
2124                 return (EINVAL);
2125 
2126         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2127 
2128         /*
2129          * At this point we are the last reference to the resource.
2130          * Free resource number from resource table.
2131          * It's ok to remove number before we free the segment.
2132          * We need to lock the resource to protect against remote calls.
2133          */
2134         if (rnum == RSM_DRIVER_MINOR ||
2135             (res = rsmresource_free(rnum)) == NULL) {
2136                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2137                 return (DDI_SUCCESS);
2138         }
2139 
2140         switch (res->rsmrc_type) {
2141         case RSM_RESOURCE_EXPORT_SEGMENT:
2142         case RSM_RESOURCE_IMPORT_SEGMENT:
2143                 rsmseg_close((rsmseg_t *)res, 0);
2144                 break;
2145         case RSM_RESOURCE_BAR:
2146                 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2147                 break;
2148         default:
2149                 break;
2150         }
2151 
2152         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2153 
2154         return (DDI_SUCCESS);
2155 }
2156 
2157 /*
2158  * rsm_inc_pgcnt
2159  *
2160  * Description: increment rsm page counter.
2161  *
2162  * Parameters:  pgcnt_t pnum;   number of pages to be used
2163  *
2164  * Returns:     RSM_SUCCESS     if memory limit not exceeded
2165  *              ENOSPC          if memory limit exceeded. In this case, the
2166  *                              page counter remains unchanged.
2167  *
2168  */
2169 static int
2170 rsm_inc_pgcnt(pgcnt_t pnum)
2171 {
2172         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2173         if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2174                 return (RSM_SUCCESS);
2175         }
2176 
2177         mutex_enter(&rsm_pgcnt_lock);
2178 
2179         if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2180                 /* ensure that limits have not been exceeded */
2181                 mutex_exit(&rsm_pgcnt_lock);
2182                 return (RSMERR_INSUFFICIENT_MEM);
2183         }
2184 
2185         rsm_pgcnt += pnum;
2186         DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2187             rsm_pgcnt));
2188         mutex_exit(&rsm_pgcnt_lock);
2189 
2190         return (RSM_SUCCESS);
2191 }
2192 
2193 /*
2194  * rsm_dec_pgcnt
2195  *
2196  * Description: decrement rsm page counter.
2197  *
2198  * Parameters:  pgcnt_t pnum;   number of pages freed
2199  *
2200  */
2201 static void
2202 rsm_dec_pgcnt(pgcnt_t pnum)
2203 {
2204         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2205 
2206         if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2207                 return;
2208         }
2209 
2210         mutex_enter(&rsm_pgcnt_lock);
2211         ASSERT(rsm_pgcnt >= pnum);
2212         rsm_pgcnt -= pnum;
2213         DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2214             rsm_pgcnt));
2215         mutex_exit(&rsm_pgcnt_lock);
2216 }
2217 
2218 static struct umem_callback_ops rsm_as_ops = {
2219         UMEM_CALLBACK_VERSION, /* version number */
2220         rsm_export_force_destroy,
2221 };
2222 
2223 static int
2224 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2225     proc_t *procp)
2226 {
2227         int error = RSM_SUCCESS;
2228         ulong_t pnum;
2229         struct umem_callback_ops *callbackops = &rsm_as_ops;
2230 
2231         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2232 
2233         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2234 
2235         /*
2236          * Make sure vaddr and len are aligned on a page boundary
2237          */
2238         if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2239                 return (RSMERR_BAD_ADDR);
2240         }
2241 
2242         if (len & (PAGESIZE - 1)) {
2243                 return (RSMERR_BAD_LENGTH);
2244         }
2245 
2246         /*
2247          * Find number of pages
2248          */
2249         pnum = btopr(len);
2250         error = rsm_inc_pgcnt(pnum);
2251         if (error != RSM_SUCCESS) {
2252                 DBG_PRINTF((category, RSM_ERR,
2253                     "rsm_bind_pages:mem limit exceeded\n"));
2254                 return (RSMERR_INSUFFICIENT_MEM);
2255         }
2256 
2257         error = umem_lockmemory(vaddr, len,
2258             DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2259             cookie,
2260             callbackops, procp);
2261 
2262         if (error) {
2263                 rsm_dec_pgcnt(pnum);
2264                 DBG_PRINTF((category, RSM_ERR,
2265                     "rsm_bind_pages:ddi_umem_lock failed\n"));
2266                 /*
2267                  * ddi_umem_lock, in the case of failure, returns one of
2268                  * the following three errors. These are translated into
2269                  * the RSMERR namespace and returned.
2270                  */
2271                 if (error == EFAULT)
2272                         return (RSMERR_BAD_ADDR);
2273                 else if (error == EACCES)
2274                         return (RSMERR_PERM_DENIED);
2275                 else
2276                         return (RSMERR_INSUFFICIENT_MEM);
2277         }
2278 
2279         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2280 
2281         return (error);
2282 
2283 }
2284 
2285 static int
2286 rsm_unbind_pages(rsmseg_t *seg)
2287 {
2288         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2289 
2290         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2291 
2292         ASSERT(rsmseglock_held(seg));
2293 
2294         if (seg->s_cookie != NULL) {
2295                 /* unlock address range */
2296                 ddi_umem_unlock(seg->s_cookie);
2297                 rsm_dec_pgcnt(btopr(seg->s_len));
2298                 seg->s_cookie = NULL;
2299         }
2300 
2301         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2302 
2303         return (RSM_SUCCESS);
2304 }
2305 
2306 
2307 static int
2308 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2309 {
2310         int e;
2311         adapter_t *adapter;
2312         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2313 
2314         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2315 
2316         adapter = rsm_getadapter(msg, mode);
2317         if (adapter == NULL) {
2318                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2319                     "rsm_bind done:no adapter\n"));
2320                 return (RSMERR_CTLR_NOT_PRESENT);
2321         }
2322 
2323         /* lock address range */
2324         if (msg->vaddr == NULL) {
2325                 rsmka_release_adapter(adapter);
2326                 DBG_PRINTF((category, RSM_ERR,
2327                     "rsm: rsm_bind done: invalid vaddr\n"));
2328                 return (RSMERR_BAD_ADDR);
2329         }
2330         if (msg->len <= 0) {
2331                 rsmka_release_adapter(adapter);
2332                 DBG_PRINTF((category, RSM_ERR,
2333                     "rsm_bind: invalid length\n"));
2334                 return (RSMERR_BAD_LENGTH);
2335         }
2336 
2337         /* Lock segment */
2338         rsmseglock_acquire(seg);
2339 
2340         while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2341                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2342                         DBG_PRINTF((category, RSM_DEBUG,
2343                             "rsm_bind done: cv_wait INTERRUPTED"));
2344                         rsmka_release_adapter(adapter);
2345                         rsmseglock_release(seg);
2346                         return (RSMERR_INTERRUPTED);
2347                 }
2348         }
2349 
2350         ASSERT(seg->s_state == RSM_STATE_NEW);
2351 
2352         ASSERT(seg->s_cookie == NULL);
2353 
2354         e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2355         if (e == RSM_SUCCESS) {
2356                 seg->s_flags |= RSM_USER_MEMORY;
2357                 if (msg->perm & RSM_ALLOW_REBIND) {
2358                         seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2359                 }
2360                 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2361                         seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2362                 }
2363                 seg->s_region.r_vaddr = msg->vaddr;
2364                 /*
2365                  * Set the s_pid value in the segment structure. This is used
2366                  * to identify exported segments belonging to a particular
2367                  * process so that when the process exits, these segments can
2368                  * be unlocked forcefully even if rsm_close is not called on
2369                  * process exit since there maybe other processes referencing
2370                  * them (for example on a fork or exec).
2371                  * The s_pid value is also used to authenticate the process
2372                  * doing a publish or unpublish on the export segment. Only
2373                  * the creator of the export segment has a right to do a
2374                  * publish or unpublish and unbind on the segment.
2375                  */
2376                 seg->s_pid = ddi_get_pid();
2377                 seg->s_len = msg->len;
2378                 seg->s_state = RSM_STATE_BIND;
2379                 seg->s_adapter = adapter;
2380                 seg->s_proc = curproc;
2381         } else {
2382                 rsmka_release_adapter(adapter);
2383                 DBG_PRINTF((category, RSM_WARNING,
2384                     "unable to lock down pages\n"));
2385         }
2386 
2387         msg->rnum = seg->s_minor;
2388         /* Unlock segment */
2389         rsmseglock_release(seg);
2390 
2391         if (e == RSM_SUCCESS) {
2392                 /* copyout the resource number */
2393 #ifdef _MULTI_DATAMODEL
2394                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2395                         rsm_ioctlmsg32_t msg32;
2396 
2397                         msg32.rnum = msg->rnum;
2398                         if (ddi_copyout((caddr_t)&msg32.rnum,
2399                             (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2400                             sizeof (minor_t), mode)) {
2401                                 rsmka_release_adapter(adapter);
2402                                 e = RSMERR_BAD_ADDR;
2403                         }
2404                 }
2405 #endif
2406                 if (ddi_copyout((caddr_t)&msg->rnum,
2407                     (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2408                     sizeof (minor_t), mode)) {
2409                         rsmka_release_adapter(adapter);
2410                         e = RSMERR_BAD_ADDR;
2411                 }
2412         }
2413 
2414         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2415 
2416         return (e);
2417 }
2418 
2419 static void
2420 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2421     rsm_memseg_id_t ex_segid,
2422     ddi_umem_cookie_t cookie)
2423 
2424 {
2425         rsmresource_t   *p = NULL;
2426         rsmhash_table_t *rhash = &rsm_import_segs;
2427         uint_t          index;
2428 
2429         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2430             "rsm_remap_local_importers enter\n"));
2431 
2432         index = rsmhash(ex_segid);
2433 
2434         rw_enter(&rhash->rsmhash_rw, RW_READER);
2435 
2436         p = rsmhash_getbkt(rhash, index);
2437 
2438         for (; p; p = p->rsmrc_next) {
2439                 rsmseg_t *seg = (rsmseg_t *)p;
2440                 rsmseglock_acquire(seg);
2441                 /*
2442                  * Change the s_cookie value of only the local importers
2443                  * which have been mapped (in state RSM_STATE_ACTIVE).
2444                  * Note that there is no need to change the s_cookie value
2445                  * if the imported segment is in RSM_STATE_MAPPING since
2446                  * eventually the s_cookie will be updated via the mapping
2447                  * functionality.
2448                  */
2449                 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2450                     (seg->s_state == RSM_STATE_ACTIVE)) {
2451                         seg->s_cookie = cookie;
2452                 }
2453                 rsmseglock_release(seg);
2454         }
2455         rw_exit(&rhash->rsmhash_rw);
2456 
2457         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2458             "rsm_remap_local_importers done\n"));
2459 }
2460 
2461 static int
2462 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2463 {
2464         int e;
2465         adapter_t *adapter;
2466         ddi_umem_cookie_t cookie;
2467         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2468 
2469         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2470 
2471         /* Check for permissions to rebind */
2472         if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2473                 return (RSMERR_REBIND_NOT_ALLOWED);
2474         }
2475 
2476         if (seg->s_pid != ddi_get_pid() &&
2477             ddi_get_pid() != 0) {
2478                 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2479                 return (RSMERR_NOT_CREATOR);
2480         }
2481 
2482         /*
2483          * We will not be allowing partial rebind and hence length passed
2484          * in must be same as segment length
2485          */
2486         if (msg->vaddr == NULL) {
2487                 DBG_PRINTF((category, RSM_ERR,
2488                     "rsm_rebind done: null msg->vaddr\n"));
2489                 return (RSMERR_BAD_ADDR);
2490         }
2491         if (msg->len != seg->s_len) {
2492                 DBG_PRINTF((category, RSM_ERR,
2493                     "rsm_rebind: invalid length\n"));
2494                 return (RSMERR_BAD_LENGTH);
2495         }
2496 
2497         /* Lock segment */
2498         rsmseglock_acquire(seg);
2499 
2500         while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2501             (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2502             (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2503                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2504                         rsmseglock_release(seg);
2505                         DBG_PRINTF((category, RSM_DEBUG,
2506                             "rsm_rebind done: cv_wait INTERRUPTED"));
2507                         return (RSMERR_INTERRUPTED);
2508                 }
2509         }
2510 
2511         /* verify segment state */
2512         if ((seg->s_state != RSM_STATE_BIND) &&
2513             (seg->s_state != RSM_STATE_EXPORT)) {
2514                 /* Unlock segment */
2515                 rsmseglock_release(seg);
2516                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2517                     "rsm_rebind done: invalid state\n"));
2518                 return (RSMERR_BAD_SEG_HNDL);
2519         }
2520 
2521         ASSERT(seg->s_cookie != NULL);
2522 
2523         if (msg->vaddr == seg->s_region.r_vaddr) {
2524                 rsmseglock_release(seg);
2525                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2526                 return (RSM_SUCCESS);
2527         }
2528 
2529         e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2530         if (e == RSM_SUCCESS) {
2531                 struct buf *xbuf;
2532                 dev_t sdev = 0;
2533                 rsm_memory_local_t mem;
2534 
2535                 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2536                     sdev, 0, NULL, DDI_UMEM_SLEEP);
2537                 ASSERT(xbuf != NULL);
2538 
2539                 mem.ms_type = RSM_MEM_BUF;
2540                 mem.ms_bp = xbuf;
2541 
2542                 adapter = seg->s_adapter;
2543                 e = adapter->rsmpi_ops->rsm_rebind(
2544                     seg->s_handle.out, 0, &mem,
2545                     RSM_RESOURCE_DONTWAIT, NULL);
2546 
2547                 if (e == RSM_SUCCESS) {
2548                         /*
2549                          * unbind the older pages, and unload local importers;
2550                          * but don't disconnect importers
2551                          */
2552                         (void) rsm_unbind_pages(seg);
2553                         seg->s_cookie = cookie;
2554                         seg->s_region.r_vaddr = msg->vaddr;
2555                         rsm_remap_local_importers(my_nodeid, seg->s_segid,
2556                             cookie);
2557                 } else {
2558                         /*
2559                          * Unbind the pages associated with "cookie" by the
2560                          * rsm_bind_pages calls prior to this. This is
2561                          * similar to what is done in the rsm_unbind_pages
2562                          * routine for the seg->s_cookie.
2563                          */
2564                         ddi_umem_unlock(cookie);
2565                         rsm_dec_pgcnt(btopr(msg->len));
2566                         DBG_PRINTF((category, RSM_ERR,
2567                             "rsm_rebind failed with %d\n", e));
2568                 }
2569                 /*
2570                  * At present there is no dependency on the existence of xbuf.
2571                  * So we can free it here. If in the future this changes, it can
2572                  * be freed sometime during the segment destroy.
2573                  */
2574                 freerbuf(xbuf);
2575         }
2576 
2577         /* Unlock segment */
2578         rsmseglock_release(seg);
2579 
2580         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2581 
2582         return (e);
2583 }
2584 
2585 static int
2586 rsm_unbind(rsmseg_t *seg)
2587 {
2588         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2589 
2590         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2591 
2592         rsmseglock_acquire(seg);
2593 
2594         /* verify segment state */
2595         if ((seg->s_state != RSM_STATE_BIND) &&
2596             (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2597                 rsmseglock_release(seg);
2598                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2599                     "rsm_unbind: invalid state\n"));
2600                 return (RSMERR_BAD_SEG_HNDL);
2601         }
2602 
2603         /* unlock current range */
2604         (void) rsm_unbind_pages(seg);
2605 
2606         if (seg->s_state == RSM_STATE_BIND) {
2607                 seg->s_state = RSM_STATE_NEW;
2608         } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2609                 seg->s_state = RSM_STATE_NEW_QUIESCED;
2610         }
2611 
2612         rsmseglock_release(seg);
2613 
2614         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2615 
2616         return (RSM_SUCCESS);
2617 }
2618 
2619 /* **************************** Exporter Access List Management ******* */
2620 static void
2621 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2622 {
2623         int     acl_sz;
2624         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2625 
2626         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2627 
2628         /* acl could be NULL */
2629 
2630         if (acl != NULL && acl_len > 0) {
2631                 acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2632                 kmem_free((void *)acl, acl_sz);
2633         }
2634 
2635         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2636 }
2637 
2638 static void
2639 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2640 {
2641         int     acl_sz;
2642         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2643 
2644         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2645 
2646         if (acl != NULL && acl_len > 0) {
2647                 acl_sz = acl_len * sizeof (rsm_access_entry_t);
2648                 kmem_free((void *)acl, acl_sz);
2649         }
2650 
2651         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2652 
2653 }
2654 
2655 static int
2656 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2657     rsmapi_access_entry_t **list, int *len, int loopback)
2658 {
2659         rsmapi_access_entry_t *acl;
2660         int     acl_len;
2661         int i;
2662         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2663 
2664         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2665 
2666         *len = 0;
2667         *list = NULL;
2668 
2669         acl_len = msg->acl_len;
2670         if ((loopback && acl_len > 1) || (acl_len < 0) ||
2671             (acl_len > MAX_NODES)) {
2672                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2673                     "rsmacl_build done: acl invalid\n"));
2674                 return (RSMERR_BAD_ACL);
2675         }
2676 
2677         if (acl_len > 0 && acl_len <= MAX_NODES) {
2678                 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2679 
2680                 acl = kmem_alloc(acl_size, KM_SLEEP);
2681 
2682                 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2683                     acl_size, mode)) {
2684                         kmem_free((void *) acl, acl_size);
2685                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2686                             "rsmacl_build done: BAD_ADDR\n"));
2687                         return (RSMERR_BAD_ADDR);
2688                 }
2689 
2690                 /*
2691                  * Verify access list
2692                  */
2693                 for (i = 0; i < acl_len; i++) {
2694                         if (acl[i].ae_node > MAX_NODES ||
2695                             (loopback && (acl[i].ae_node != my_nodeid)) ||
2696                             acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2697                                 /* invalid entry */
2698                                 kmem_free((void *) acl, acl_size);
2699                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2700                                     "rsmacl_build done: EINVAL\n"));
2701                                 return (RSMERR_BAD_ACL);
2702                         }
2703                 }
2704 
2705                 *len = acl_len;
2706                 *list = acl;
2707         }
2708 
2709         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2710 
2711         return (DDI_SUCCESS);
2712 }
2713 
2714 static int
2715 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2716     int acl_len, adapter_t *adapter)
2717 {
2718         rsm_access_entry_t *acl;
2719         rsm_addr_t hwaddr;
2720         int i;
2721         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2722 
2723         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2724 
2725         if (src != NULL) {
2726                 size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2727                 acl = kmem_alloc(acl_size, KM_SLEEP);
2728 
2729                 /*
2730                  * translate access list
2731                  */
2732                 for (i = 0; i < acl_len; i++) {
2733                         if (src[i].ae_node == my_nodeid) {
2734                                 acl[i].ae_addr = adapter->hwaddr;
2735                         } else {
2736                                 hwaddr = get_remote_hwaddr(adapter,
2737                                     src[i].ae_node);
2738                                 if ((int64_t)hwaddr < 0) {
2739                                         /* invalid hwaddr */
2740                                         kmem_free((void *) acl, acl_size);
2741                                         DBG_PRINTF((category,
2742                                             RSM_DEBUG_VERBOSE,
2743                                             "rsmpiacl_create done:"
2744                                             "EINVAL hwaddr\n"));
2745                                         return (RSMERR_INTERNAL_ERROR);
2746                                 }
2747                                 acl[i].ae_addr = hwaddr;
2748                         }
2749                         /* rsmpi understands only RSM_PERM_XXXX */
2750                         acl[i].ae_permission =
2751                             src[i].ae_permission & RSM_PERM_RDWR;
2752                 }
2753                 *dest = acl;
2754         } else {
2755                 *dest = NULL;
2756         }
2757 
2758         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2759 
2760         return (RSM_SUCCESS);
2761 }
2762 
2763 static int
2764 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2765     rsmipc_reply_t *reply)
2766 {
2767 
2768         int             i;
2769         rsmseg_t        *seg;
2770         rsm_memseg_id_t key = req->rsmipc_key;
2771         rsm_permission_t perm = req->rsmipc_perm;
2772         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2773 
2774         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2775             "rsmsegacl_validate enter\n"));
2776 
2777         /*
2778          * Find segment and grab its lock. The reason why we grab the segment
2779          * lock in side the search is to avoid the race when the segment is
2780          * being deleted and we already have a pointer to it.
2781          */
2782         seg = rsmexport_lookup(key);
2783         if (!seg) {
2784                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2785                     "rsmsegacl_validate done: %u ENXIO\n", key));
2786                 return (RSMERR_SEG_NOT_PUBLISHED);
2787         }
2788 
2789         ASSERT(rsmseglock_held(seg));
2790         ASSERT(seg->s_state == RSM_STATE_EXPORT);
2791 
2792         /*
2793          * We implement a 2-level protection scheme.
2794          * First, we check if local/remote host has access rights.
2795          * Second, we check if the user has access rights.
2796          *
2797          * This routine only validates the rnode access_list
2798          */
2799         if (seg->s_acl_len > 0) {
2800                 /*
2801                  * Check host access list
2802                  */
2803                 ASSERT(seg->s_acl != NULL);
2804                 for (i = 0; i < seg->s_acl_len; i++) {
2805                         if (seg->s_acl[i].ae_node == rnode) {
2806                                 perm &= seg->s_acl[i].ae_permission;
2807                                 goto found;
2808                         }
2809                 }
2810                 /* rnode is not found in the list */
2811                 rsmseglock_release(seg);
2812                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2813                     "rsmsegacl_validate done: EPERM\n"));
2814                 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2815         } else {
2816                 /* use default owner creation umask */
2817                 perm &= seg->s_mode;
2818         }
2819 
2820 found:
2821         /* update perm for this node */
2822         reply->rsmipc_mode = perm;
2823         reply->rsmipc_uid = seg->s_uid;
2824         reply->rsmipc_gid = seg->s_gid;
2825         reply->rsmipc_segid = seg->s_segid;
2826         reply->rsmipc_seglen = seg->s_len;
2827 
2828         /*
2829          * Perm of requesting node is valid; source will validate user
2830          */
2831         rsmseglock_release(seg);
2832 
2833         /*
2834          * Add the importer to the list right away, if connect fails
2835          * the importer will ask the exporter to remove it.
2836          */
2837         importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2838             req->rsmipc_segment_cookie);
2839 
2840         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2841 
2842         return (RSM_SUCCESS);
2843 }
2844 
2845 
2846 /* ************************** Exporter Calls ************************* */
2847 
2848 static int
2849 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2850 {
2851         int                     e;
2852         int                     acl_len;
2853         rsmapi_access_entry_t   *acl;
2854         rsm_access_entry_t      *rsmpi_acl;
2855         rsm_memory_local_t      mem;
2856         struct buf              *xbuf;
2857         dev_t                   sdev = 0;
2858         adapter_t               *adapter;
2859         rsm_memseg_id_t         segment_id = 0;
2860         int                     loopback_flag = 0;
2861         int                     create_flags = 0;
2862         rsm_resource_callback_t callback_flag;
2863         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2864 
2865         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2866 
2867         if (seg->s_adapter == &loopback_adapter)
2868                 loopback_flag = 1;
2869 
2870         if (seg->s_pid != ddi_get_pid() &&
2871             ddi_get_pid() != 0) {
2872                 DBG_PRINTF((category, RSM_ERR,
2873                     "rsm_publish: Not creator\n"));
2874                 return (RSMERR_NOT_CREATOR);
2875         }
2876 
2877         /*
2878          * Get per node access list
2879          */
2880         e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2881         if (e != DDI_SUCCESS) {
2882                 DBG_PRINTF((category, RSM_ERR,
2883                     "rsm_publish done: rsmacl_build failed\n"));
2884                 return (e);
2885         }
2886 
2887         /*
2888          * The application provided msg->key is used for resolving a
2889          * segment id according to the following:
2890          *    key = 0                   Kernel Agent selects the segment id
2891          *    key <= RSM_DLPI_ID_END Reserved for system usage except
2892          *                              RSMLIB range
2893          *    key < RSM_USER_APP_ID_BASE segment id = key
2894          *    key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2895          *
2896          * rsm_nextavail_segmentid is initialized to 0x80000000 and
2897          * overflows to zero after 0x80000000 allocations.
2898          * An algorithm is needed which allows reinitialization and provides
2899          * for reallocation after overflow.  For now, ENOMEM is returned
2900          * once the overflow condition has occurred.
2901          */
2902         if (msg->key == 0) {
2903                 mutex_enter(&rsm_lock);
2904                 segment_id = rsm_nextavail_segmentid;
2905                 if (segment_id != 0) {
2906                         rsm_nextavail_segmentid++;
2907                         mutex_exit(&rsm_lock);
2908                 } else {
2909                         mutex_exit(&rsm_lock);
2910                         DBG_PRINTF((category, RSM_ERR,
2911                             "rsm_publish done: no more keys avlbl\n"));
2912                         return (RSMERR_INSUFFICIENT_RESOURCES);
2913                 }
2914         } else  if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2915                 /* range reserved for internal use by base/ndi libraries */
2916                 segment_id = msg->key;
2917         else    if (msg->key <= RSM_DLPI_ID_END)
2918                 return (RSMERR_RESERVED_SEGID);
2919         else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2920                 segment_id = msg->key;
2921         else {
2922                 DBG_PRINTF((category, RSM_ERR,
2923                     "rsm_publish done: invalid key %u\n", msg->key));
2924                 return (RSMERR_RESERVED_SEGID);
2925         }
2926 
2927         /* Add key to exportlist; The segment lock is held on success */
2928         e = rsmexport_add(seg, segment_id);
2929         if (e) {
2930                 rsmacl_free(acl, acl_len);
2931                 DBG_PRINTF((category, RSM_ERR,
2932                     "rsm_publish done: export_add failed: %d\n", e));
2933                 return (e);
2934         }
2935 
2936         seg->s_segid = segment_id;
2937 
2938         if ((seg->s_state != RSM_STATE_BIND) &&
2939             (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2940                 /* state changed since then, free acl and return */
2941                 rsmseglock_release(seg);
2942                 rsmexport_rm(seg);
2943                 rsmacl_free(acl, acl_len);
2944                 DBG_PRINTF((category, RSM_ERR,
2945                     "rsm_publish done: segment in wrong state: %d\n",
2946                     seg->s_state));
2947                 return (RSMERR_BAD_SEG_HNDL);
2948         }
2949 
2950         /*
2951          * If this is for a local memory handle and permissions are zero,
2952          * then the surrogate segment is very large and we want to skip
2953          * allocation of DVMA space.
2954          *
2955          * Careful!  If the user didn't use an ACL list, acl will be a NULL
2956          * pointer.  Check that before dereferencing it.
2957          */
2958         if (acl != (rsmapi_access_entry_t *)NULL) {
2959                 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2960                         goto skipdriver;
2961         }
2962 
2963         /* create segment  */
2964         xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2965             sdev, 0, NULL, DDI_UMEM_SLEEP);
2966         ASSERT(xbuf != NULL);
2967 
2968         mem.ms_type = RSM_MEM_BUF;
2969         mem.ms_bp = xbuf;
2970 
2971         /* This call includes a bind operations */
2972 
2973         adapter = seg->s_adapter;
2974         /*
2975          * create a acl list with hwaddr for RSMPI publish
2976          */
2977         e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2978 
2979         if (e != RSM_SUCCESS) {
2980                 rsmseglock_release(seg);
2981                 rsmexport_rm(seg);
2982                 rsmacl_free(acl, acl_len);
2983                 freerbuf(xbuf);
2984                 DBG_PRINTF((category, RSM_ERR,
2985                     "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2986                 return (e);
2987         }
2988 
2989         if (seg->s_state == RSM_STATE_BIND) {
2990                 /* create segment  */
2991 
2992                 /* This call includes a bind operations */
2993 
2994                 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2995                         create_flags = RSM_ALLOW_UNBIND_REBIND;
2996                 }
2997 
2998                 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
2999                         callback_flag  = RSM_RESOURCE_DONTWAIT;
3000                 } else {
3001                         callback_flag  = RSM_RESOURCE_SLEEP;
3002                 }
3003 
3004                 e = adapter->rsmpi_ops->rsm_seg_create(
3005                     adapter->rsmpi_handle,
3006                     &seg->s_handle.out, seg->s_len,
3007                     create_flags, &mem,
3008                     callback_flag, NULL);
3009                 /*
3010                  * At present there is no dependency on the existence of xbuf.
3011                  * So we can free it here. If in the future this changes, it can
3012                  * be freed sometime during the segment destroy.
3013                  */
3014                 freerbuf(xbuf);
3015 
3016                 if (e != RSM_SUCCESS) {
3017                         rsmseglock_release(seg);
3018                         rsmexport_rm(seg);
3019                         rsmacl_free(acl, acl_len);
3020                         rsmpiacl_free(rsmpi_acl, acl_len);
3021                         DBG_PRINTF((category, RSM_ERR,
3022                             "rsm_publish done: export_create failed: %d\n", e));
3023                         /*
3024                          * The following assertion ensures that the two errors
3025                          * related to the length and its alignment do not occur
3026                          * since they have been checked during export_create
3027                          */
3028                         ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3029                             e != RSMERR_BAD_LENGTH);
3030                         if (e == RSMERR_NOT_MEM)
3031                                 e = RSMERR_INSUFFICIENT_MEM;
3032 
3033                         return (e);
3034                 }
3035                 /* export segment, this should create an IMMU mapping */
3036                 e = adapter->rsmpi_ops->rsm_publish(
3037                     seg->s_handle.out,
3038                     rsmpi_acl, acl_len,
3039                     seg->s_segid,
3040                     RSM_RESOURCE_DONTWAIT, NULL);
3041 
3042                 if (e != RSM_SUCCESS) {
3043                         adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3044                         rsmseglock_release(seg);
3045                         rsmexport_rm(seg);
3046                         rsmacl_free(acl, acl_len);
3047                         rsmpiacl_free(rsmpi_acl, acl_len);
3048                         DBG_PRINTF((category, RSM_ERR,
3049                             "rsm_publish done: export_publish failed: %d\n",
3050                             e));
3051                         return (e);
3052                 }
3053         }
3054 
3055         seg->s_acl_in = rsmpi_acl;
3056 
3057 skipdriver:
3058         /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3059         seg->s_acl_len       = acl_len;
3060         seg->s_acl   = acl;
3061 
3062         if (seg->s_state == RSM_STATE_BIND) {
3063                 seg->s_state = RSM_STATE_EXPORT;
3064         } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3065                 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3066                 cv_broadcast(&seg->s_cv);
3067         }
3068 
3069         rsmseglock_release(seg);
3070 
3071         /*
3072          * If the segment id was solicited, then return it in
3073          * the original incoming message.
3074          */
3075         if (msg->key == 0) {
3076                 msg->key = segment_id;
3077 #ifdef _MULTI_DATAMODEL
3078                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3079                         rsm_ioctlmsg32_t msg32;
3080 
3081                         msg32.key = msg->key;
3082                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3083                             "rsm_publish done\n"));
3084                         return (ddi_copyout((caddr_t)&msg32,
3085                             (caddr_t)dataptr, sizeof (msg32), mode));
3086                 }
3087 #endif
3088                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3089                     "rsm_publish done\n"));
3090                 return (ddi_copyout((caddr_t)msg,
3091                     (caddr_t)dataptr, sizeof (*msg), mode));
3092         }
3093 
3094         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3095         return (DDI_SUCCESS);
3096 }
3097 
3098 /*
3099  * This function modifies the access control list of an already published
3100  * segment.  There is no effect on import segments which are already
3101  * connected.
3102  */
3103 static int
3104 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3105 {
3106         rsmapi_access_entry_t   *new_acl, *old_acl, *tmp_acl;
3107         rsm_access_entry_t      *rsmpi_new_acl, *rsmpi_old_acl;
3108         int                     new_acl_len, old_acl_len, tmp_acl_len;
3109         int                     e, i;
3110         adapter_t               *adapter;
3111         int                     loopback_flag = 0;
3112         rsm_memseg_id_t         key;
3113         rsm_permission_t        permission;
3114         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3115 
3116         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3117 
3118         if ((seg->s_state != RSM_STATE_EXPORT) &&
3119             (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3120             (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3121                 return (RSMERR_SEG_NOT_PUBLISHED);
3122 
3123         if (seg->s_pid != ddi_get_pid() &&
3124             ddi_get_pid() != 0) {
3125                 DBG_PRINTF((category, RSM_ERR,
3126                     "rsm_republish: Not owner\n"));
3127                 return (RSMERR_NOT_CREATOR);
3128         }
3129 
3130         if (seg->s_adapter == &loopback_adapter)
3131                 loopback_flag = 1;
3132 
3133         /*
3134          * Build new list first
3135          */
3136         e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3137         if (e) {
3138                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3139                     "rsm_republish done: rsmacl_build failed %d", e));
3140                 return (e);
3141         }
3142 
3143         /* Lock segment */
3144         rsmseglock_acquire(seg);
3145         /*
3146          * a republish is in progress - REPUBLISH message is being
3147          * sent to the importers so wait for it to complete OR
3148          * wait till DR completes
3149          */
3150         while (((seg->s_state == RSM_STATE_EXPORT) &&
3151             (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3152             (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3153             (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3154                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3155                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3156                             "rsm_republish done: cv_wait  INTERRUPTED"));
3157                         rsmseglock_release(seg);
3158                         rsmacl_free(new_acl, new_acl_len);
3159                         return (RSMERR_INTERRUPTED);
3160                 }
3161         }
3162 
3163         /* recheck if state is valid */
3164         if (seg->s_state != RSM_STATE_EXPORT) {
3165                 rsmseglock_release(seg);
3166                 rsmacl_free(new_acl, new_acl_len);
3167                 return (RSMERR_SEG_NOT_PUBLISHED);
3168         }
3169 
3170         key = seg->s_key;
3171         old_acl = seg->s_acl;
3172         old_acl_len = seg->s_acl_len;
3173 
3174         seg->s_acl = new_acl;
3175         seg->s_acl_len = new_acl_len;
3176 
3177         /*
3178          * This call will only be meaningful if and when the interconnect
3179          * layer makes use of the access list
3180          */
3181         adapter = seg->s_adapter;
3182         /*
3183          * create a acl list with hwaddr for RSMPI publish
3184          */
3185         e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3186 
3187         if (e != RSM_SUCCESS) {
3188                 seg->s_acl = old_acl;
3189                 seg->s_acl_len = old_acl_len;
3190                 rsmseglock_release(seg);
3191                 rsmacl_free(new_acl, new_acl_len);
3192                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3193                     "rsm_republish done: rsmpiacl_create failed %d", e));
3194                 return (e);
3195         }
3196         rsmpi_old_acl = seg->s_acl_in;
3197         seg->s_acl_in = rsmpi_new_acl;
3198 
3199         e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3200             seg->s_acl_in, seg->s_acl_len,
3201             RSM_RESOURCE_DONTWAIT, NULL);
3202 
3203         if (e != RSM_SUCCESS) {
3204                 seg->s_acl = old_acl;
3205                 seg->s_acl_in = rsmpi_old_acl;
3206                 seg->s_acl_len = old_acl_len;
3207                 rsmseglock_release(seg);
3208                 rsmacl_free(new_acl, new_acl_len);
3209                 rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3210 
3211                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3212                     "rsm_republish done: rsmpi republish failed %d\n", e));
3213                 return (e);
3214         }
3215 
3216         /* create a tmp copy of the new acl */
3217         tmp_acl_len = new_acl_len;
3218         if (tmp_acl_len > 0) {
3219                 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3220                 for (i = 0; i < tmp_acl_len; i++) {
3221                         tmp_acl[i].ae_node = new_acl[i].ae_node;
3222                         tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3223                 }
3224                 /*
3225                  * The default permission of a node which was in the old
3226                  * ACL but not in the new ACL is 0 ie no access.
3227                  */
3228                 permission = 0;
3229         } else {
3230                 /*
3231                  * NULL acl means all importers can connect and
3232                  * default permission will be owner creation umask
3233                  */
3234                 tmp_acl = NULL;
3235                 permission = seg->s_mode;
3236         }
3237 
3238         /* make other republishers to wait for republish to complete */
3239         seg->s_flags |= RSM_REPUBLISH_WAIT;
3240 
3241         rsmseglock_release(seg);
3242 
3243         /* send the new perms to the importing nodes */
3244         rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3245 
3246         rsmseglock_acquire(seg);
3247         seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3248         /* wake up any one waiting for republish to complete */
3249         cv_broadcast(&seg->s_cv);
3250         rsmseglock_release(seg);
3251 
3252         rsmacl_free(tmp_acl, tmp_acl_len);
3253         rsmacl_free(old_acl, old_acl_len);
3254         rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3255 
3256         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3257         return (DDI_SUCCESS);
3258 }
3259 
3260 static int
3261 rsm_unpublish(rsmseg_t *seg, int mode)
3262 {
3263         rsmapi_access_entry_t   *acl;
3264         rsm_access_entry_t      *rsmpi_acl;
3265         int                     acl_len;
3266         int                     e;
3267         adapter_t *adapter;
3268         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3269 
3270         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3271 
3272         if (seg->s_pid != ddi_get_pid() &&
3273             ddi_get_pid() != 0) {
3274                 DBG_PRINTF((category, RSM_ERR,
3275                     "rsm_unpublish: Not creator\n"));
3276                 return (RSMERR_NOT_CREATOR);
3277         }
3278 
3279         rsmseglock_acquire(seg);
3280         /*
3281          * wait for QUIESCING to complete here before rsmexport_rm
3282          * is called because the SUSPEND_COMPLETE mesg which changes
3283          * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3284          * signals the cv_wait needs to find it in the hashtable.
3285          */
3286         while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3287             ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3288                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3289                         rsmseglock_release(seg);
3290                         DBG_PRINTF((category, RSM_ERR,
3291                             "rsm_unpublish done: cv_wait INTR qscing"
3292                             "getv/putv in progress"));
3293                         return (RSMERR_INTERRUPTED);
3294                 }
3295         }
3296 
3297         /* verify segment state */
3298         if ((seg->s_state != RSM_STATE_EXPORT) &&
3299             (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3300                 rsmseglock_release(seg);
3301                 DBG_PRINTF((category, RSM_ERR,
3302                     "rsm_unpublish done: bad state %x\n", seg->s_state));
3303                 return (RSMERR_SEG_NOT_PUBLISHED);
3304         }
3305 
3306         rsmseglock_release(seg);
3307 
3308         rsmexport_rm(seg);
3309 
3310         rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3311 
3312         rsmseglock_acquire(seg);
3313         /*
3314          * wait for republish to complete
3315          */
3316         while ((seg->s_state == RSM_STATE_EXPORT) &&
3317             (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3318                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3319                         DBG_PRINTF((category, RSM_ERR,
3320                             "rsm_unpublish done: cv_wait INTR repubing"));
3321                         rsmseglock_release(seg);
3322                         return (RSMERR_INTERRUPTED);
3323                 }
3324         }
3325 
3326         if ((seg->s_state != RSM_STATE_EXPORT) &&
3327             (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3328                 DBG_PRINTF((category, RSM_ERR,
3329                     "rsm_unpublish done: invalid state"));
3330                 rsmseglock_release(seg);
3331                 return (RSMERR_SEG_NOT_PUBLISHED);
3332         }
3333 
3334         /*
3335          * check for putv/get surrogate segment which was not published
3336          * to the driver.
3337          *
3338          * Be certain to see if there is an ACL first!  If this segment was
3339          * not published with an ACL, acl will be a null pointer.  Check
3340          * that before dereferencing it.
3341          */
3342         acl = seg->s_acl;
3343         if (acl != (rsmapi_access_entry_t *)NULL) {
3344                 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3345                         goto bypass;
3346         }
3347 
3348         /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3349         if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3350                 goto bypass;
3351 
3352         adapter = seg->s_adapter;
3353         for (;;) {
3354                 if (seg->s_state != RSM_STATE_EXPORT) {
3355                         rsmseglock_release(seg);
3356                         DBG_PRINTF((category, RSM_ERR,
3357                             "rsm_unpublish done: bad state %x\n",
3358                             seg->s_state));
3359                         return (RSMERR_SEG_NOT_PUBLISHED);
3360                 }
3361 
3362                 /* unpublish from adapter */
3363                 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3364 
3365                 if (e == RSM_SUCCESS) {
3366                         break;
3367                 }
3368 
3369                 if (e == RSMERR_SEG_IN_USE && mode == 1) {
3370                         /*
3371                          * wait for unpublish to succeed, it's busy.
3372                          */
3373                         seg->s_flags |= RSM_EXPORT_WAIT;
3374 
3375                         /* wait for a max of 1 ms - this is an empirical */
3376                         /* value that was found by some minimal testing  */
3377                         /* can be fine tuned when we have better numbers */
3378                         /* A long term fix would be to send cv_signal    */
3379                         /* from the intr callback routine                */
3380                         /* currently nobody signals this wait            */
3381                         (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock,
3382                             drv_usectohz(1000), TR_CLOCK_TICK);
3383 
3384                         DBG_PRINTF((category, RSM_ERR,
3385                             "rsm_unpublish: SEG_IN_USE\n"));
3386 
3387                         seg->s_flags &= ~RSM_EXPORT_WAIT;
3388                 } else {
3389                         if (mode == 1) {
3390                                 DBG_PRINTF((category, RSM_ERR,
3391                                     "rsm:rsmpi unpublish err %x\n", e));
3392                                 seg->s_state = RSM_STATE_BIND;
3393                         }
3394                         rsmseglock_release(seg);
3395                         return (e);
3396                 }
3397         }
3398 
3399         /* Free segment */
3400         e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3401 
3402         if (e != RSM_SUCCESS) {
3403                 DBG_PRINTF((category, RSM_ERR,
3404                     "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3405                     seg->s_key, e));
3406         }
3407 
3408 bypass:
3409         acl = seg->s_acl;
3410         rsmpi_acl = seg->s_acl_in;
3411         acl_len = seg->s_acl_len;
3412 
3413         seg->s_acl = NULL;
3414         seg->s_acl_in = NULL;
3415         seg->s_acl_len = 0;
3416 
3417         if (seg->s_state == RSM_STATE_EXPORT) {
3418                 seg->s_state = RSM_STATE_BIND;
3419         } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3420                 seg->s_state = RSM_STATE_BIND_QUIESCED;
3421                 cv_broadcast(&seg->s_cv);
3422         }
3423 
3424         rsmseglock_release(seg);
3425 
3426         rsmacl_free(acl, acl_len);
3427         rsmpiacl_free(rsmpi_acl, acl_len);
3428 
3429         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3430 
3431         return (DDI_SUCCESS);
3432 }
3433 
3434 /*
3435  * Called from rsm_unpublish to force an unload and disconnection of all
3436  * importers of the unpublished segment.
3437  *
3438  * First build the list of segments requiring a force disconnect, then
3439  * send a request for each.
3440  */
3441 static void
3442 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3443     rsm_node_id_t ex_nodeid)
3444 {
3445         rsmipc_request_t        request;
3446         importing_token_t       *prev_token, *token, *tmp_token, *tokp;
3447         importing_token_t       *force_disconnect_list = NULL;
3448         int                     index;
3449 
3450         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3451             "rsm_send_importer_disconnects enter\n"));
3452 
3453         index = rsmhash(ex_segid);
3454 
3455         mutex_enter(&importer_list.lock);
3456 
3457         prev_token = NULL;
3458         token = importer_list.bucket[index];
3459 
3460         while (token != NULL) {
3461                 if (token->key == ex_segid) {
3462                         /*
3463                          * take it off the importer list and add it
3464                          * to the force disconnect list.
3465                          */
3466                         if (prev_token == NULL)
3467                                 importer_list.bucket[index] = token->next;
3468                         else
3469                                 prev_token->next = token->next;
3470                         tmp_token = token;
3471                         token = token->next;
3472                         if (force_disconnect_list == NULL) {
3473                                 force_disconnect_list = tmp_token;
3474                                 tmp_token->next = NULL;
3475                         } else {
3476                                 tokp = force_disconnect_list;
3477                                 /*
3478                                  * make sure that the tmp_token's node
3479                                  * is not already on the force disconnect
3480                                  * list.
3481                                  */
3482                                 while (tokp != NULL) {
3483                                         if (tokp->importing_node ==
3484                                             tmp_token->importing_node) {
3485                                                 break;
3486                                         }
3487                                         tokp = tokp->next;
3488                                 }
3489                                 if (tokp == NULL) {
3490                                         tmp_token->next =
3491                                             force_disconnect_list;
3492                                         force_disconnect_list = tmp_token;
3493                                 } else {
3494                                         kmem_free((void *)tmp_token,
3495                                             sizeof (*token));
3496                                 }
3497                         }
3498 
3499                 } else {
3500                         prev_token = token;
3501                         token = token->next;
3502                 }
3503         }
3504         mutex_exit(&importer_list.lock);
3505 
3506         token = force_disconnect_list;
3507         while (token != NULL) {
3508                 if (token->importing_node == my_nodeid) {
3509                         rsm_force_unload(ex_nodeid, ex_segid,
3510                             DISCONNECT);
3511                 } else {
3512                         request.rsmipc_hdr.rsmipc_type =
3513                             RSMIPC_MSG_DISCONNECT;
3514                         request.rsmipc_key = token->key;
3515                         for (;;) {
3516                                 if (rsmipc_send(token->importing_node,
3517                                     &request,
3518                                     RSM_NO_REPLY) == RSM_SUCCESS) {
3519                                         break;
3520                                 } else {
3521                                         delay(drv_usectohz(10000));
3522                                 }
3523                         }
3524                 }
3525                 tmp_token = token;
3526                 token = token->next;
3527                 kmem_free((void *)tmp_token, sizeof (*token));
3528         }
3529 
3530         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3531             "rsm_send_importer_disconnects done\n"));
3532 }
3533 
3534 /*
3535  * This function is used as a callback for unlocking the pages locked
3536  * down by a process which then does a fork or an exec.
3537  * It marks the export segments corresponding to umem cookie given by
3538  * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3539  * destroyed later when an rsm_close occurs).
3540  */
3541 static void
3542 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3543 {
3544         rsmresource_blk_t *blk;
3545         rsmresource_t *p;
3546         rsmseg_t *eseg = NULL;
3547         int i, j;
3548         int found = 0;
3549 
3550         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3551             "rsm_export_force_destroy enter\n"));
3552 
3553         /*
3554          * Walk the resource list and locate the export segment (either
3555          * in the BIND or the EXPORT state) which corresponds to the
3556          * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3557          * Change the state to ZOMBIE by calling rsmseg_close with the
3558          * force_flag argument (the second argument) set to 1. Also,
3559          * unpublish and unbind the segment, but don't free it. Free it
3560          * only on a rsm_close call for the segment.
3561          */
3562         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3563 
3564         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3565                 blk = rsm_resource.rsmrc_root[i];
3566                 if (blk == NULL) {
3567                         continue;
3568                 }
3569 
3570                 for (j = 0; j < RSMRC_BLKSZ; j++) {
3571                         p = blk->rsmrcblk_blks[j];
3572                         if ((p != NULL) && (p != RSMRC_RESERVED) &&
3573                             (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3574                                 eseg = (rsmseg_t *)p;
3575                                 if (eseg->s_cookie != ck)
3576                                         continue; /* continue searching */
3577                                 /*
3578                                  * Found the segment, set flag to indicate
3579                                  * force destroy processing is in progress
3580                                  */
3581                                 rsmseglock_acquire(eseg);
3582                                 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3583                                 rsmseglock_release(eseg);
3584                                 found = 1;
3585                                 break;
3586                         }
3587                 }
3588 
3589                 if (found)
3590                         break;
3591         }
3592 
3593         rw_exit(&rsm_resource.rsmrc_lock);
3594 
3595         if (found) {
3596                 ASSERT(eseg != NULL);
3597                 /* call rsmseg_close with force flag set to 1 */
3598                 rsmseg_close(eseg, 1);
3599                 /*
3600                  * force destroy processing done, clear flag and signal any
3601                  * thread waiting in rsmseg_close.
3602                  */
3603                 rsmseglock_acquire(eseg);
3604                 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3605                 cv_broadcast(&eseg->s_cv);
3606                 rsmseglock_release(eseg);
3607         }
3608 
3609         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3610             "rsm_export_force_destroy done\n"));
3611 }
3612 
3613 /* ******************************* Remote Calls *********************** */
3614 static void
3615 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3616 {
3617         rsmipc_reply_t reply;
3618         DBG_DEFINE(category,
3619             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3620 
3621         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3622             "rsm_intr_segconnect enter\n"));
3623 
3624         reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3625 
3626         reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3627         reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3628 
3629         (void) rsmipc_send(src, NULL, &reply);
3630 
3631         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3632             "rsm_intr_segconnect done\n"));
3633 }
3634 
3635 
3636 /*
3637  * When an exported segment is unpublished the exporter sends an ipc
3638  * message (RSMIPC_MSG_DISCONNECT) to all importers.  The recv ipc dispatcher
3639  * calls this function.  The import list is scanned; segments which match the
3640  * exported segment id are unloaded and disconnected.
3641  *
3642  * Will also be called from rsm_rebind with disconnect_flag FALSE.
3643  *
3644  */
3645 static void
3646 rsm_force_unload(rsm_node_id_t src_nodeid,
3647     rsm_memseg_id_t ex_segid,
3648     boolean_t disconnect_flag)
3649 
3650 {
3651         rsmresource_t   *p = NULL;
3652         rsmhash_table_t *rhash = &rsm_import_segs;
3653         uint_t          index;
3654         DBG_DEFINE(category,
3655             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3656 
3657         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3658 
3659         index = rsmhash(ex_segid);
3660 
3661         rw_enter(&rhash->rsmhash_rw, RW_READER);
3662 
3663         p = rsmhash_getbkt(rhash, index);
3664 
3665         for (; p; p = p->rsmrc_next) {
3666                 rsmseg_t *seg = (rsmseg_t *)p;
3667                 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3668                         /*
3669                          * In order to make rsmseg_unload and rsm_force_unload
3670                          * thread safe, acquire the segment lock here.
3671                          * rsmseg_unload is responsible for releasing the lock.
3672                          * rsmseg_unload releases the lock just before a call
3673                          * to rsmipc_send or in case of an early exit which
3674                          * occurs if the segment was in the state
3675                          * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3676                          */
3677                         rsmseglock_acquire(seg);
3678                         if (disconnect_flag)
3679                                 seg->s_flags |= RSM_FORCE_DISCONNECT;
3680                         rsmseg_unload(seg);
3681                 }
3682         }
3683         rw_exit(&rhash->rsmhash_rw);
3684 
3685         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3686 }
3687 
3688 static void
3689 rsm_intr_reply(rsmipc_msghdr_t *msg)
3690 {
3691         /*
3692          * Find slot for cookie in reply.
3693          * Match sequence with sequence in cookie
3694          * If no match; return
3695          * Try to grap lock of slot, if locked return
3696          * copy data into reply slot area
3697          * signal waiter
3698          */
3699         rsmipc_slot_t   *slot;
3700         rsmipc_cookie_t *cookie;
3701         void *data = (void *) msg;
3702         size_t size = sizeof (rsmipc_reply_t);
3703         DBG_DEFINE(category,
3704             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3705 
3706         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3707 
3708         cookie = &msg->rsmipc_cookie;
3709         if (cookie->ic.index >= RSMIPC_SZ) {
3710                 DBG_PRINTF((category, RSM_ERR,
3711                     "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3712                 return;
3713         }
3714 
3715         ASSERT(cookie->ic.index < RSMIPC_SZ);
3716         slot = &rsm_ipc.slots[cookie->ic.index];
3717         mutex_enter(&slot->rsmipc_lock);
3718         if (slot->rsmipc_cookie.value == cookie->value) {
3719                 /* found a match */
3720                 if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3721                         bcopy(data, slot->rsmipc_data, size);
3722                         RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3723                         cv_signal(&slot->rsmipc_cv);
3724                 }
3725         } else {
3726                 DBG_PRINTF((category, RSM_DEBUG,
3727                     "rsm: rsm_intr_reply mismatched reply %d\n",
3728                     cookie->ic.index));
3729         }
3730         mutex_exit(&slot->rsmipc_lock);
3731         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3732 }
3733 
3734 /*
3735  * This function gets dispatched on the worker thread when we receive
3736  * the SQREADY message. This function sends the SQREADY_ACK message.
3737  */
3738 static void
3739 rsm_sqready_ack_deferred(void *arg)
3740 {
3741         path_t  *path = (path_t *)arg;
3742         DBG_DEFINE(category,
3743             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3744 
3745         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3746             "rsm_sqready_ack_deferred enter\n"));
3747 
3748         mutex_enter(&path->mutex);
3749 
3750         /*
3751          * If path is not active no point in sending the ACK
3752          * because the whole SQREADY protocol will again start
3753          * when the path becomes active.
3754          */
3755         if (path->state != RSMKA_PATH_ACTIVE) {
3756                 /*
3757                  * decrement the path refcnt incremented in rsm_proc_sqready
3758                  */
3759                 PATH_RELE_NOLOCK(path);
3760                 mutex_exit(&path->mutex);
3761                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3762                     "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3763                 return;
3764         }
3765 
3766         /* send an SQREADY_ACK message */
3767         (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3768 
3769         /* initialize credits to the max level */
3770         path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3771 
3772         /* wake up any send that is waiting for credits */
3773         cv_broadcast(&path->sendq_token.sendq_cv);
3774 
3775         /*
3776          * decrement the path refcnt since we incremented it in
3777          * rsm_proc_sqready
3778          */
3779         PATH_RELE_NOLOCK(path);
3780 
3781         mutex_exit(&path->mutex);
3782 
3783         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3784             "rsm_sqready_ack_deferred done\n"));
3785 }
3786 
3787 /*
3788  * Process the SQREADY message
3789  */
3790 static void
3791 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3792     rsm_intr_hand_arg_t arg)
3793 {
3794         rsmipc_msghdr_t         *msghdr = (rsmipc_msghdr_t *)msg;
3795         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
3796         path_t                  *path;
3797         DBG_DEFINE(category,
3798             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3799 
3800         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3801 
3802         /* look up the path - incr the path refcnt */
3803         path = rsm_find_path(hdlr_argp->adapter_name,
3804             hdlr_argp->adapter_instance, src_hwaddr);
3805 
3806         /*
3807          * No path exists or path is not active - drop the message
3808          */
3809         if (path == NULL) {
3810                 DBG_PRINTF((category, RSM_DEBUG,
3811                     "rsm_proc_sqready done: msg dropped no path\n"));
3812                 return;
3813         }
3814 
3815         mutex_exit(&path->mutex);
3816 
3817         /* drain any tasks from the previous incarnation */
3818         taskq_wait(path->recv_taskq);
3819 
3820         mutex_enter(&path->mutex);
3821         /*
3822          * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3823          * in the meanwhile we received an SQREADY message, blindly reset
3824          * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3825          * and forget about the SQREADY that we sent.
3826          */
3827         path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3828 
3829         if (path->state != RSMKA_PATH_ACTIVE) {
3830                 /* decr refcnt and drop the mutex */
3831                 PATH_RELE_NOLOCK(path);
3832                 mutex_exit(&path->mutex);
3833                 DBG_PRINTF((category, RSM_DEBUG,
3834                     "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3835                 return;
3836         }
3837 
3838         DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3839             " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3840 
3841         /*
3842          * The sender's local incarnation number is our remote incarnation
3843          * number save it in the path data structure
3844          */
3845         path->remote_incn = msg->rsmipc_local_incn;
3846         path->sendq_token.msgbuf_avail = 0;
3847         path->procmsg_cnt = 0;
3848 
3849         /*
3850          * path is active - dispatch task to send SQREADY_ACK - remember
3851          * RSMPI calls can't be done in interrupt context
3852          *
3853          * We can use the recv_taskq to send because the remote endpoint
3854          * cannot start sending messages till it receives SQREADY_ACK hence
3855          * at this point there are no tasks on recv_taskq.
3856          *
3857          * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3858          */
3859         (void) taskq_dispatch(path->recv_taskq,
3860             rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3861 
3862         mutex_exit(&path->mutex);
3863 
3864 
3865         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3866 }
3867 
3868 /*
3869  * Process the SQREADY_ACK message
3870  */
3871 static void
3872 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3873     rsm_intr_hand_arg_t arg)
3874 {
3875         rsmipc_msghdr_t         *msghdr = (rsmipc_msghdr_t *)msg;
3876         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
3877         path_t                  *path;
3878         DBG_DEFINE(category,
3879             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3880 
3881         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3882             "rsm_proc_sqready_ack enter\n"));
3883 
3884         /* look up the path - incr the path refcnt */
3885         path = rsm_find_path(hdlr_argp->adapter_name,
3886             hdlr_argp->adapter_instance, src_hwaddr);
3887 
3888         /*
3889          * drop the message if - no path exists or path is not active
3890          * or if its not waiting for SQREADY_ACK message
3891          */
3892         if (path == NULL) {
3893                 DBG_PRINTF((category, RSM_DEBUG,
3894                     "rsm_proc_sqready_ack done: msg dropped no path\n"));
3895                 return;
3896         }
3897 
3898         if ((path->state != RSMKA_PATH_ACTIVE) ||
3899             !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3900                 /* decrement the refcnt */
3901                 PATH_RELE_NOLOCK(path);
3902                 mutex_exit(&path->mutex);
3903                 DBG_PRINTF((category, RSM_DEBUG,
3904                     "rsm_proc_sqready_ack done: msg dropped\n"));
3905                 return;
3906         }
3907 
3908         /*
3909          * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3910          * sent, if not drop it.
3911          */
3912         if (path->local_incn != msghdr->rsmipc_incn) {
3913                 /* decrement the refcnt */
3914                 PATH_RELE_NOLOCK(path);
3915                 mutex_exit(&path->mutex);
3916                 DBG_PRINTF((category, RSM_DEBUG,
3917                     "rsm_proc_sqready_ack done: msg old incn %lld\n",
3918                     msghdr->rsmipc_incn));
3919                 return;
3920         }
3921 
3922         DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3923             " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3924 
3925         /*
3926          * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3927          */
3928         path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3929 
3930         /* save the remote sendq incn number */
3931         path->remote_incn = msg->rsmipc_local_incn;
3932 
3933         /* initialize credits to the max level */
3934         path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3935 
3936         /* wake up any send that is waiting for credits */
3937         cv_broadcast(&path->sendq_token.sendq_cv);
3938 
3939         /* decrement the refcnt */
3940         PATH_RELE_NOLOCK(path);
3941 
3942         mutex_exit(&path->mutex);
3943 
3944         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3945             "rsm_proc_sqready_ack done\n"));
3946 }
3947 
3948 /*
3949  * process the RSMIPC_MSG_CREDIT message
3950  */
3951 static void
3952 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3953     rsm_intr_hand_arg_t arg)
3954 {
3955         rsmipc_msghdr_t         *msghdr = (rsmipc_msghdr_t *)msg;
3956         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
3957         path_t                  *path;
3958         DBG_DEFINE(category,
3959             RSM_KERNEL_AGENT | RSM_FUNC_ALL |
3960             RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3961 
3962         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3963 
3964         /* look up the path - incr the path refcnt */
3965         path = rsm_find_path(hdlr_argp->adapter_name,
3966             hdlr_argp->adapter_instance, src_hwaddr);
3967 
3968         if (path == NULL) {
3969                 DBG_PRINTF((category, RSM_DEBUG,
3970                     "rsm_add_credits enter: path not found\n"));
3971                 return;
3972         }
3973 
3974         /* the path is not active - discard credits */
3975         if (path->state != RSMKA_PATH_ACTIVE) {
3976                 PATH_RELE_NOLOCK(path);
3977                 mutex_exit(&path->mutex);
3978                 DBG_PRINTF((category, RSM_DEBUG,
3979                     "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3980                 return;
3981         }
3982 
3983         /*
3984          * Check if these credits are for current incarnation of the path.
3985          */
3986         if (path->local_incn != msghdr->rsmipc_incn) {
3987                 /* decrement the refcnt */
3988                 PATH_RELE_NOLOCK(path);
3989                 mutex_exit(&path->mutex);
3990                 DBG_PRINTF((category, RSM_DEBUG,
3991                     "rsm_add_credits enter: old incn %lld\n",
3992                     msghdr->rsmipc_incn));
3993                 return;
3994         }
3995 
3996         DBG_PRINTF((category, RSM_DEBUG,
3997             "rsm_add_credits:path=%lx new-creds=%d "
3998             "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
3999             path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
4000             src_hwaddr));
4001 
4002 
4003         /* add credits to the path's sendq */
4004         path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4005 
4006         ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4007 
4008         /* wake up any send that is waiting for credits */
4009         cv_broadcast(&path->sendq_token.sendq_cv);
4010 
4011         /* decrement the refcnt */
4012         PATH_RELE_NOLOCK(path);
4013 
4014         mutex_exit(&path->mutex);
4015 
4016         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4017 }
4018 
4019 static void
4020 rsm_intr_event(rsmipc_request_t *msg)
4021 {
4022         rsmseg_t        *seg;
4023         rsmresource_t   *p;
4024         rsm_node_id_t   src_node;
4025         DBG_DEFINE(category,
4026             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4027 
4028         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4029 
4030         src_node = msg->rsmipc_hdr.rsmipc_src;
4031 
4032         if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4033                 /* This is for an import segment */
4034                 uint_t hashval = rsmhash(msg->rsmipc_key);
4035 
4036                 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4037 
4038                 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4039 
4040                 for (; p; p = p->rsmrc_next) {
4041                         if ((p->rsmrc_key == msg->rsmipc_key) &&
4042                             (p->rsmrc_node == src_node)) {
4043                                 seg = (rsmseg_t *)p;
4044                                 rsmseglock_acquire(seg);
4045 
4046                                 atomic_inc_32(&seg->s_pollevent);
4047 
4048                                 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4049                                         pollwakeup(&seg->s_poll, POLLRDNORM);
4050 
4051                                 rsmseglock_release(seg);
4052                         }
4053                 }
4054 
4055                 rw_exit(&rsm_import_segs.rsmhash_rw);
4056         } else {
4057                 /* This is for an export segment */
4058                 seg = rsmexport_lookup(msg->rsmipc_key);
4059                 if (!seg) {
4060                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4061                             "rsm_intr_event done: exp seg not found\n"));
4062                         return;
4063                 }
4064 
4065                 ASSERT(rsmseglock_held(seg));
4066 
4067                 atomic_inc_32(&seg->s_pollevent);
4068 
4069                 /*
4070                  * We must hold the segment lock here, or else the segment
4071                  * can be freed while pollwakeup is using it. This implies
4072                  * that we MUST NOT grab the segment lock during rsm_chpoll,
4073                  * as outlined in the chpoll(2) man page.
4074                  */
4075                 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4076                         pollwakeup(&seg->s_poll, POLLRDNORM);
4077 
4078                 rsmseglock_release(seg);
4079         }
4080 
4081         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4082 }
4083 
4084 /*
4085  * The exporter did a republish and changed the ACL - this change is only
4086  * visible to new importers.
4087  */
4088 static void
4089 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4090     rsm_permission_t perm)
4091 {
4092 
4093         rsmresource_t   *p;
4094         rsmseg_t        *seg;
4095         uint_t          hashval = rsmhash(key);
4096         DBG_DEFINE(category,
4097             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4098 
4099         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4100 
4101         rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4102 
4103         p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4104 
4105         for (; p; p = p->rsmrc_next) {
4106                 /*
4107                  * find the importer and update the permission in the shared
4108                  * data structure. Any new importers will use the new perms
4109                  */
4110                 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4111                         seg = (rsmseg_t *)p;
4112 
4113                         rsmseglock_acquire(seg);
4114                         rsmsharelock_acquire(seg);
4115                         seg->s_share->rsmsi_mode = perm;
4116                         rsmsharelock_release(seg);
4117                         rsmseglock_release(seg);
4118 
4119                         break;
4120                 }
4121         }
4122 
4123         rw_exit(&rsm_import_segs.rsmhash_rw);
4124 
4125         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4126 }
4127 
4128 void
4129 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4130 {
4131         int             done = 1; /* indicate all SUSPENDS have been acked */
4132         list_element_t  *elem;
4133         DBG_DEFINE(category,
4134             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4135 
4136         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4137             "rsm_suspend_complete enter\n"));
4138 
4139         mutex_enter(&rsm_suspend_list.list_lock);
4140 
4141         if (rsm_suspend_list.list_head == NULL) {
4142                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4143                     "rsm_suspend_complete done: suspend_list is empty\n"));
4144                 mutex_exit(&rsm_suspend_list.list_lock);
4145                 return;
4146         }
4147 
4148         elem = rsm_suspend_list.list_head;
4149         while (elem != NULL) {
4150                 if (elem->nodeid == src_node) {
4151                         /* clear the pending flag for the node */
4152                         elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4153                         elem->flags |= flag;
4154                 }
4155 
4156                 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4157                         done = 0; /* still some nodes have not yet ACKED */
4158 
4159                 elem = elem->next;
4160         }
4161 
4162         mutex_exit(&rsm_suspend_list.list_lock);
4163 
4164         if (!done) {
4165                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4166                     "rsm_suspend_complete done: acks pending\n"));
4167                 return;
4168         }
4169         /*
4170          * Now that we are done with suspending all the remote importers
4171          * time to quiesce the local exporters
4172          */
4173         exporter_quiesce();
4174 
4175         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4176             "rsm_suspend_complete done\n"));
4177 }
4178 
4179 static void
4180 exporter_quiesce()
4181 {
4182         int             i, e;
4183         rsmresource_t   *current;
4184         rsmseg_t        *seg;
4185         adapter_t       *adapter;
4186         DBG_DEFINE(category,
4187             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4188 
4189         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4190         /*
4191          * The importers send a SUSPEND_COMPLETE to the exporter node
4192          *      Unpublish, unbind the export segment and
4193          *      move the segments to the EXPORT_QUIESCED state
4194          */
4195 
4196         rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4197 
4198         for (i = 0; i < rsm_hash_size; i++) {
4199                 current = rsm_export_segs.bucket[i];
4200                 while (current != NULL) {
4201                         seg = (rsmseg_t *)current;
4202                         rsmseglock_acquire(seg);
4203                         if (current->rsmrc_state ==
4204                             RSM_STATE_EXPORT_QUIESCING) {
4205                                 adapter = seg->s_adapter;
4206                                 /*
4207                                  * some local memory handles are not published
4208                                  * check if it was published
4209                                  */
4210                                 if ((seg->s_acl == NULL) ||
4211                                     (seg->s_acl[0].ae_node != my_nodeid) ||
4212                                     (seg->s_acl[0].ae_permission != 0)) {
4213 
4214                                         e = adapter->rsmpi_ops->rsm_unpublish(
4215                                             seg->s_handle.out);
4216                                         DBG_PRINTF((category, RSM_DEBUG,
4217                                             "exporter_quiesce:unpub %d\n", e));
4218 
4219                                         e = adapter->rsmpi_ops->rsm_seg_destroy(
4220                                             seg->s_handle.out);
4221 
4222                                         DBG_PRINTF((category, RSM_DEBUG,
4223                                             "exporter_quiesce:destroy %d\n",
4224                                             e));
4225                                 }
4226 
4227                                 (void) rsm_unbind_pages(seg);
4228                                 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4229                                 cv_broadcast(&seg->s_cv);
4230                         }
4231                         rsmseglock_release(seg);
4232                         current = current->rsmrc_next;
4233                 }
4234         }
4235         rw_exit(&rsm_export_segs.rsmhash_rw);
4236 
4237         /*
4238          * All the local segments we are done with the pre-del processing
4239          * - time to move to PREDEL_COMPLETED.
4240          */
4241 
4242         mutex_enter(&rsm_drv_data.drv_lock);
4243 
4244         ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4245 
4246         rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4247 
4248         cv_broadcast(&rsm_drv_data.drv_cv);
4249 
4250         mutex_exit(&rsm_drv_data.drv_lock);
4251 
4252         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4253 }
4254 
4255 static void
4256 importer_suspend(rsm_node_id_t src_node)
4257 {
4258         int             i;
4259         int             susp_flg; /* true means already suspended */
4260         int             num_importers;
4261         rsmresource_t   *p = NULL, *curp;
4262         rsmhash_table_t *rhash = &rsm_import_segs;
4263         rsmseg_t        *seg;
4264         rsmipc_request_t request;
4265         DBG_DEFINE(category,
4266             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4267 
4268         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4269 
4270         rw_enter(&rhash->rsmhash_rw, RW_READER);
4271         for (i = 0; i < rsm_hash_size; i++) {
4272                 p = rhash->bucket[i];
4273 
4274                 /*
4275                  * Suspend all importers with same <node, key> pair.
4276                  * After the last one of the shared importers has been
4277                  * suspended - suspend the shared mappings/connection.
4278                  */
4279                 for (; p; p = p->rsmrc_next) {
4280                         rsmseg_t *first = (rsmseg_t *)p;
4281                         if ((first->s_node != src_node) ||
4282                             (first->s_state == RSM_STATE_DISCONNECT))
4283                                 continue; /* go to next entry */
4284                         /*
4285                          * search the rest of the bucket for
4286                          * other siblings (imprtrs with the same key)
4287                          * of "first" and suspend them.
4288                          * All importers with same key fall in
4289                          * the same bucket.
4290                          */
4291                         num_importers = 0;
4292                         for (curp = p; curp; curp = curp->rsmrc_next) {
4293                                 seg = (rsmseg_t *)curp;
4294 
4295                                 rsmseglock_acquire(seg);
4296 
4297                                 if ((seg->s_node != first->s_node) ||
4298                                     (seg->s_key != first->s_key) ||
4299                                     (seg->s_state == RSM_STATE_DISCONNECT)) {
4300                                         /*
4301                                          * either not a peer segment or its a
4302                                          * disconnected segment - skip it
4303                                          */
4304                                         rsmseglock_release(seg);
4305                                         continue;
4306                                 }
4307 
4308                                 rsmseg_suspend(seg, &susp_flg);
4309 
4310                                 if (susp_flg) { /* seg already suspended */
4311                                         rsmseglock_release(seg);
4312                                         break; /* the inner for loop */
4313                                 }
4314 
4315                                 num_importers++;
4316                                 rsmsharelock_acquire(seg);
4317                                 /*
4318                                  * we've processed all importers that are
4319                                  * siblings of "first"
4320                                  */
4321                                 if (num_importers ==
4322                                     seg->s_share->rsmsi_refcnt) {
4323                                         rsmsharelock_release(seg);
4324                                         rsmseglock_release(seg);
4325                                         break;
4326                                 }
4327                                 rsmsharelock_release(seg);
4328                                 rsmseglock_release(seg);
4329                         }
4330 
4331                         /*
4332                          * All the importers with the same key and
4333                          * nodeid as "first" have been suspended.
4334                          * Now suspend the shared connect/mapping.
4335                          * This is done only once.
4336                          */
4337                         if (!susp_flg) {
4338                                 rsmsegshare_suspend(seg);
4339                         }
4340                 }
4341         }
4342 
4343         rw_exit(&rhash->rsmhash_rw);
4344 
4345         /* send an ACK for SUSPEND message */
4346         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4347         (void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4348 
4349 
4350         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4351 
4352 }
4353 
4354 static void
4355 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4356 {
4357         int             recheck_state;
4358         rsmcookie_t     *hdl;
4359         DBG_DEFINE(category,
4360             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4361 
4362         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4363             "rsmseg_suspend enter: key=%u\n", seg->s_key));
4364 
4365         *susp_flg = 0;
4366 
4367         ASSERT(rsmseglock_held(seg));
4368         /* wait if putv/getv is in progress */
4369         while (seg->s_rdmacnt > 0)
4370                 cv_wait(&seg->s_cv, &seg->s_lock);
4371 
4372         do {
4373                 recheck_state = 0;
4374 
4375                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4376                     "rsmseg_suspend:segment %x state=%d\n",
4377                     seg->s_key, seg->s_state));
4378 
4379                 switch (seg->s_state) {
4380                 case RSM_STATE_NEW:
4381                         /* not a valid state */
4382                         break;
4383                 case RSM_STATE_CONNECTING:
4384                         seg->s_state = RSM_STATE_ABORT_CONNECT;
4385                         break;
4386                 case RSM_STATE_ABORT_CONNECT:
4387                         break;
4388                 case RSM_STATE_CONNECT:
4389                         seg->s_handle.in = NULL;
4390                         seg->s_state = RSM_STATE_CONN_QUIESCE;
4391                         break;
4392                 case RSM_STATE_MAPPING:
4393                         /* wait until segment leaves the mapping state */
4394                         while (seg->s_state == RSM_STATE_MAPPING)
4395                                 cv_wait(&seg->s_cv, &seg->s_lock);
4396                         recheck_state = 1;
4397                         break;
4398                 case RSM_STATE_ACTIVE:
4399                         /* unload the mappings */
4400                         if (seg->s_ckl != NULL) {
4401                                 hdl = seg->s_ckl;
4402                                 for (; hdl != NULL; hdl = hdl->c_next) {
4403                                         (void) devmap_unload(hdl->c_dhp,
4404                                             hdl->c_off, hdl->c_len);
4405                                 }
4406                         }
4407                         seg->s_mapinfo = NULL;
4408                         seg->s_state = RSM_STATE_MAP_QUIESCE;
4409                         break;
4410                 case RSM_STATE_CONN_QUIESCE:
4411                         /* FALLTHRU */
4412                 case RSM_STATE_MAP_QUIESCE:
4413                         /* rsmseg_suspend already done for seg */
4414                         *susp_flg = 1;
4415                         break;
4416                 case RSM_STATE_DISCONNECT:
4417                         break;
4418                 default:
4419                         ASSERT(0); /* invalid state */
4420                 }
4421         } while (recheck_state);
4422 
4423         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4424 }
4425 
4426 static void
4427 rsmsegshare_suspend(rsmseg_t *seg)
4428 {
4429         int                     e;
4430         adapter_t               *adapter;
4431         rsm_import_share_t      *sharedp;
4432         DBG_DEFINE(category,
4433             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4434 
4435         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4436             "rsmsegshare_suspend enter\n"));
4437 
4438         rsmseglock_acquire(seg);
4439         rsmsharelock_acquire(seg);
4440 
4441         sharedp = seg->s_share;
4442         adapter = seg->s_adapter;
4443         switch (sharedp->rsmsi_state) {
4444         case RSMSI_STATE_NEW:
4445                 break;
4446         case RSMSI_STATE_CONNECTING:
4447                 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4448                 break;
4449         case RSMSI_STATE_ABORT_CONNECT:
4450                 break;
4451         case RSMSI_STATE_CONNECTED:
4452                 /* do the rsmpi disconnect */
4453                 if (sharedp->rsmsi_node != my_nodeid) {
4454                         e = adapter->rsmpi_ops->
4455                             rsm_disconnect(sharedp->rsmsi_handle);
4456 
4457                         DBG_PRINTF((category, RSM_DEBUG,
4458                             "rsm:rsmpi disconnect seg=%x:err=%d\n",
4459                             sharedp->rsmsi_segid, e));
4460                 }
4461 
4462                 sharedp->rsmsi_handle = NULL;
4463 
4464                 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4465                 break;
4466         case RSMSI_STATE_CONN_QUIESCE:
4467                 break;
4468         case RSMSI_STATE_MAPPED:
4469                 /* do the rsmpi unmap and disconnect */
4470                 if (sharedp->rsmsi_node != my_nodeid) {
4471                         e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4472 
4473                         DBG_PRINTF((category, RSM_DEBUG,
4474                             "rsmshare_suspend: rsmpi unmap %d\n", e));
4475 
4476                         e = adapter->rsmpi_ops->
4477                             rsm_disconnect(sharedp->rsmsi_handle);
4478                         DBG_PRINTF((category, RSM_DEBUG,
4479                             "rsm:rsmpi disconnect seg=%x:err=%d\n",
4480                             sharedp->rsmsi_segid, e));
4481                 }
4482 
4483                 sharedp->rsmsi_handle = NULL;
4484 
4485                 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4486                 break;
4487         case RSMSI_STATE_MAP_QUIESCE:
4488                 break;
4489         case RSMSI_STATE_DISCONNECTED:
4490                 break;
4491         default:
4492                 ASSERT(0); /* invalid state */
4493         }
4494 
4495         rsmsharelock_release(seg);
4496         rsmseglock_release(seg);
4497 
4498         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4499             "rsmsegshare_suspend done\n"));
4500 }
4501 
4502 /*
4503  * This should get called on receiving a RESUME message or from
4504  * the pathmanger if the node undergoing DR dies.
4505  */
4506 static void
4507 importer_resume(rsm_node_id_t src_node)
4508 {
4509         int             i;
4510         rsmresource_t   *p = NULL;
4511         rsmhash_table_t *rhash = &rsm_import_segs;
4512         void            *cookie;
4513         DBG_DEFINE(category,
4514             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4515 
4516         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4517 
4518         rw_enter(&rhash->rsmhash_rw, RW_READER);
4519 
4520         for (i = 0; i < rsm_hash_size; i++) {
4521                 p = rhash->bucket[i];
4522 
4523                 for (; p; p = p->rsmrc_next) {
4524                         rsmseg_t *seg = (rsmseg_t *)p;
4525 
4526                         rsmseglock_acquire(seg);
4527 
4528                         /* process only importers of node undergoing DR */
4529                         if (seg->s_node != src_node) {
4530                                 rsmseglock_release(seg);
4531                                 continue;
4532                         }
4533 
4534                         if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4535                                 rsmipc_request_t        request;
4536                                 /*
4537                                  * rsmpi map/connect failed
4538                                  * inform the exporter so that it can
4539                                  * remove the importer.
4540                                  */
4541                                 request.rsmipc_hdr.rsmipc_type =
4542                                     RSMIPC_MSG_NOTIMPORTING;
4543                                 request.rsmipc_key = seg->s_segid;
4544                                 request.rsmipc_segment_cookie = cookie;
4545                                 rsmseglock_release(seg);
4546                                 (void) rsmipc_send(seg->s_node, &request,
4547                                     RSM_NO_REPLY);
4548                         } else {
4549                                 rsmseglock_release(seg);
4550                         }
4551                 }
4552         }
4553 
4554         rw_exit(&rhash->rsmhash_rw);
4555 
4556         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4557 }
4558 
4559 static int
4560 rsmseg_resume(rsmseg_t *seg, void **cookie)
4561 {
4562         int                     e;
4563         int                     retc;
4564         off_t                   dev_offset;
4565         size_t                  maplen;
4566         uint_t                  maxprot;
4567         rsm_mapinfo_t           *p;
4568         rsmcookie_t             *hdl;
4569         rsm_import_share_t      *sharedp;
4570         DBG_DEFINE(category,
4571             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4572 
4573         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4574             "rsmseg_resume enter: key=%u\n", seg->s_key));
4575 
4576         *cookie = NULL;
4577 
4578         ASSERT(rsmseglock_held(seg));
4579 
4580         if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4581             (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4582                 return (RSM_SUCCESS);
4583         }
4584 
4585         sharedp = seg->s_share;
4586 
4587         rsmsharelock_acquire(seg);
4588 
4589         /* resume the shared connection and/or mapping */
4590         retc = rsmsegshare_resume(seg);
4591 
4592         if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4593                 /* shared state can either be connected or mapped */
4594                 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4595                     (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4596                         ASSERT(retc == RSM_SUCCESS);
4597                         seg->s_handle.in = sharedp->rsmsi_handle;
4598                         rsmsharelock_release(seg);
4599                         seg->s_state = RSM_STATE_CONNECT;
4600 
4601                 } else { /* error in rsmpi connect during resume */
4602                         seg->s_handle.in = NULL;
4603                         seg->s_state = RSM_STATE_DISCONNECT;
4604 
4605                         sharedp->rsmsi_refcnt--;
4606                         cookie = (void *)sharedp->rsmsi_cookie;
4607 
4608                         if (sharedp->rsmsi_refcnt == 0) {
4609                                 ASSERT(sharedp->rsmsi_mapcnt == 0);
4610                                 rsmsharelock_release(seg);
4611 
4612                                 /* clean up the shared data structure */
4613                                 mutex_destroy(&sharedp->rsmsi_lock);
4614                                 cv_destroy(&sharedp->rsmsi_cv);
4615                                 kmem_free((void *)(sharedp),
4616                                     sizeof (rsm_import_share_t));
4617 
4618                         } else {
4619                                 rsmsharelock_release(seg);
4620                         }
4621                         /*
4622                          * The following needs to be done after any
4623                          * rsmsharelock calls which use seg->s_share.
4624                          */
4625                         seg->s_share = NULL;
4626                 }
4627 
4628                 /* signal any waiting segment */
4629                 cv_broadcast(&seg->s_cv);
4630 
4631                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4632                     "rsmseg_resume done:state=%d\n", seg->s_state));
4633                 return (retc);
4634         }
4635 
4636         ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4637 
4638         /* Setup protections for remap */
4639         maxprot = PROT_USER;
4640         if (seg->s_mode & RSM_PERM_READ) {
4641                 maxprot |= PROT_READ;
4642         }
4643         if (seg->s_mode & RSM_PERM_WRITE) {
4644                 maxprot |= PROT_WRITE;
4645         }
4646 
4647         if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4648                 /* error in rsmpi connect or map during resume */
4649 
4650                 /* remap to trash page */
4651                 ASSERT(seg->s_ckl != NULL);
4652 
4653                 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4654                         e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4655                             remap_cookie, hdl->c_off, hdl->c_len,
4656                             maxprot, 0, NULL);
4657 
4658                         DBG_PRINTF((category, RSM_ERR,
4659                             "rsmseg_resume:remap=%d\n", e));
4660                 }
4661 
4662                 seg->s_handle.in = NULL;
4663                 seg->s_state = RSM_STATE_DISCONNECT;
4664 
4665                 sharedp->rsmsi_refcnt--;
4666 
4667                 sharedp->rsmsi_mapcnt--;
4668                 seg->s_mapinfo = NULL;
4669 
4670                 if (sharedp->rsmsi_refcnt == 0) {
4671                         ASSERT(sharedp->rsmsi_mapcnt == 0);
4672                         rsmsharelock_release(seg);
4673 
4674                         /* clean up the shared data structure */
4675                         mutex_destroy(&sharedp->rsmsi_lock);
4676                         cv_destroy(&sharedp->rsmsi_cv);
4677                         kmem_free((void *)(sharedp),
4678                             sizeof (rsm_import_share_t));
4679 
4680                 } else {
4681                         rsmsharelock_release(seg);
4682                 }
4683                 /*
4684                  * The following needs to be done after any
4685                  * rsmsharelock calls which use seg->s_share.
4686                  */
4687                 seg->s_share = NULL;
4688 
4689                 /* signal any waiting segment */
4690                 cv_broadcast(&seg->s_cv);
4691 
4692                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4693                     "rsmseg_resume done:seg=%x,err=%d\n",
4694                     seg->s_key, retc));
4695                 return (retc);
4696 
4697         }
4698 
4699         seg->s_handle.in = sharedp->rsmsi_handle;
4700 
4701         if (seg->s_node == my_nodeid) { /* loopback */
4702                 ASSERT(seg->s_mapinfo == NULL);
4703 
4704                 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4705                         e = devmap_umem_remap(hdl->c_dhp,
4706                             rsm_dip, seg->s_cookie,
4707                             hdl->c_off, hdl->c_len,
4708                             maxprot, 0, NULL);
4709 
4710                         DBG_PRINTF((category, RSM_ERR,
4711                             "rsmseg_resume:remap=%d\n", e));
4712                 }
4713         } else { /* remote exporter */
4714                 /* remap to the new rsmpi maps */
4715                 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4716 
4717                 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4718                         p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4719                             &dev_offset, &maplen);
4720                         e = devmap_devmem_remap(hdl->c_dhp,
4721                             p->dip, p->dev_register, dev_offset,
4722                             maplen, maxprot, 0, NULL);
4723 
4724                         DBG_PRINTF((category, RSM_ERR,
4725                             "rsmseg_resume:remap=%d\n", e));
4726                 }
4727         }
4728 
4729         rsmsharelock_release(seg);
4730 
4731         seg->s_state = RSM_STATE_ACTIVE;
4732         cv_broadcast(&seg->s_cv);
4733 
4734         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4735 
4736         return (retc);
4737 }
4738 
4739 static int
4740 rsmsegshare_resume(rsmseg_t *seg)
4741 {
4742         int                     e = RSM_SUCCESS;
4743         adapter_t               *adapter;
4744         rsm_import_share_t      *sharedp;
4745         DBG_DEFINE(category,
4746             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4747 
4748         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4749 
4750         ASSERT(rsmseglock_held(seg));
4751         ASSERT(rsmsharelock_held(seg));
4752 
4753         sharedp = seg->s_share;
4754 
4755         /*
4756          * If we are not in a xxxx_QUIESCE state that means shared
4757          * connect/mapping processing has been already been done
4758          * so return success.
4759          */
4760         if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4761             (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4762                 return (RSM_SUCCESS);
4763         }
4764 
4765         adapter = seg->s_adapter;
4766 
4767         if (sharedp->rsmsi_node != my_nodeid) {
4768                 rsm_addr_t      hwaddr;
4769                 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4770 
4771                 e = adapter->rsmpi_ops->rsm_connect(
4772                     adapter->rsmpi_handle, hwaddr,
4773                     sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4774 
4775                 DBG_PRINTF((category, RSM_DEBUG,
4776                     "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4777                     sharedp->rsmsi_segid, e));
4778 
4779                 if (e != RSM_SUCCESS) {
4780                         /* when do we send the NOT_IMPORTING message */
4781                         sharedp->rsmsi_handle = NULL;
4782                         sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4783                         /* signal any waiting segment */
4784                         cv_broadcast(&sharedp->rsmsi_cv);
4785                         return (e);
4786                 }
4787         }
4788 
4789         if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4790                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4791                 /* signal any waiting segment */
4792                 cv_broadcast(&sharedp->rsmsi_cv);
4793                 return (e);
4794         }
4795 
4796         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4797 
4798         /* do the rsmpi map of the whole segment here */
4799         if (sharedp->rsmsi_node != my_nodeid) {
4800                 size_t mapped_len;
4801                 rsm_mapinfo_t *p;
4802 
4803                 /*
4804                  * We need to do rsmpi maps with <off, lens> identical to
4805                  * the old mapinfo list because the segment mapping handles
4806                  * dhp and such need the fragmentation of rsmpi maps to be
4807                  * identical to what it was during the mmap of the segment
4808                  */
4809                 p = sharedp->rsmsi_mapinfo;
4810 
4811                 while (p != NULL) {
4812                         mapped_len = 0;
4813 
4814                         e = adapter->rsmpi_ops->rsm_map(
4815                             sharedp->rsmsi_handle, p->start_offset,
4816                             p->individual_len, &mapped_len,
4817                             &p->dip, &p->dev_register, &p->dev_offset,
4818                             NULL, NULL);
4819 
4820                         if (e != 0) {
4821                                 DBG_PRINTF((category, RSM_ERR,
4822                                     "rsmsegshare_resume: rsmpi map err=%d\n",
4823                                     e));
4824                                 break;
4825                         }
4826 
4827                         if (mapped_len != p->individual_len) {
4828                                 DBG_PRINTF((category, RSM_ERR,
4829                                     "rsmsegshare_resume: rsmpi maplen"
4830                                     "< reqlen=%lx\n", mapped_len));
4831                                 e = RSMERR_BAD_LENGTH;
4832                                 break;
4833                         }
4834 
4835                         p = p->next;
4836 
4837                 }
4838 
4839 
4840                 if (e != RSM_SUCCESS) { /* rsmpi map failed */
4841                         int     err;
4842                         /* Check if this is the first rsm_map */
4843                         if (p != sharedp->rsmsi_mapinfo) {
4844                                 /*
4845                                  * A single rsm_unmap undoes multiple rsm_maps.
4846                                  */
4847                                 (void) seg->s_adapter->rsmpi_ops->
4848                                     rsm_unmap(sharedp->rsmsi_handle);
4849                         }
4850 
4851                         rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4852                         sharedp->rsmsi_mapinfo = NULL;
4853 
4854                         err = adapter->rsmpi_ops->
4855                             rsm_disconnect(sharedp->rsmsi_handle);
4856 
4857                         DBG_PRINTF((category, RSM_DEBUG,
4858                             "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4859                             sharedp->rsmsi_segid, err));
4860 
4861                         sharedp->rsmsi_handle = NULL;
4862                         sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4863 
4864                         /* signal the waiting segments */
4865                         cv_broadcast(&sharedp->rsmsi_cv);
4866                         DBG_PRINTF((category, RSM_DEBUG,
4867                             "rsmsegshare_resume done: rsmpi map err\n"));
4868                         return (e);
4869                 }
4870         }
4871 
4872         sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4873 
4874         /* signal any waiting segment */
4875         cv_broadcast(&sharedp->rsmsi_cv);
4876 
4877         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4878 
4879         return (e);
4880 }
4881 
4882 /*
4883  * this is the routine that gets called by recv_taskq which is the
4884  * thread that processes messages that are flow-controlled.
4885  */
4886 static void
4887 rsm_intr_proc_deferred(void *arg)
4888 {
4889         path_t                  *path = (path_t *)arg;
4890         rsmipc_request_t        *msg;
4891         rsmipc_msghdr_t         *msghdr;
4892         rsm_node_id_t           src_node;
4893         msgbuf_elem_t           *head;
4894         int                     e;
4895         DBG_DEFINE(category,
4896             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4897 
4898         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4899             "rsm_intr_proc_deferred enter\n"));
4900 
4901         mutex_enter(&path->mutex);
4902 
4903         /* use the head of the msgbuf_queue */
4904         head = rsmka_gethead_msgbuf(path);
4905 
4906         mutex_exit(&path->mutex);
4907 
4908         msg = (rsmipc_request_t *)&(head->msg);
4909         msghdr = (rsmipc_msghdr_t *)msg;
4910 
4911         src_node = msghdr->rsmipc_src;
4912 
4913         /*
4914          * messages that need to send a reply should check the message version
4915          * before processing the message. And all messages that need to
4916          * send a reply should be processed here by the worker thread.
4917          */
4918         switch (msghdr->rsmipc_type) {
4919         case RSMIPC_MSG_SEGCONNECT:
4920                 if (msghdr->rsmipc_version != RSM_VERSION) {
4921                         rsmipc_reply_t reply;
4922                         reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4923                         reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4924                         reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4925                         (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4926                 } else {
4927                         rsm_intr_segconnect(src_node, msg);
4928                 }
4929                 break;
4930         case RSMIPC_MSG_DISCONNECT:
4931                 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4932                 break;
4933         case RSMIPC_MSG_SUSPEND:
4934                 importer_suspend(src_node);
4935                 break;
4936         case RSMIPC_MSG_SUSPEND_DONE:
4937                 rsm_suspend_complete(src_node, 0);
4938                 break;
4939         case RSMIPC_MSG_RESUME:
4940                 importer_resume(src_node);
4941                 break;
4942         default:
4943                 ASSERT(0);
4944         }
4945 
4946         mutex_enter(&path->mutex);
4947 
4948         rsmka_dequeue_msgbuf(path);
4949 
4950         /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4951         if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4952                 path->procmsg_cnt++;
4953 
4954         ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4955 
4956         /* No need to send credits if path is going down */
4957         if ((path->state == RSMKA_PATH_ACTIVE) &&
4958             (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4959                 /*
4960                  * send credits and reset procmsg_cnt if success otherwise
4961                  * credits will be sent after processing the next message
4962                  */
4963                 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4964                 if (e == 0)
4965                         path->procmsg_cnt = 0;
4966                 else
4967                         DBG_PRINTF((category, RSM_ERR,
4968                             "rsm_intr_proc_deferred:send credits err=%d\n", e));
4969         }
4970 
4971         /*
4972          * decrement the path refcnt since we incremented it in
4973          * rsm_intr_callback_dispatch
4974          */
4975         PATH_RELE_NOLOCK(path);
4976 
4977         mutex_exit(&path->mutex);
4978 
4979         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4980             "rsm_intr_proc_deferred done\n"));
4981 }
4982 
4983 /*
4984  * Flow-controlled messages are enqueued and dispatched onto a taskq here
4985  */
4986 static void
4987 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4988     rsm_intr_hand_arg_t arg)
4989 {
4990         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
4991         path_t                  *path;
4992         rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4993         DBG_DEFINE(category,
4994             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4995 
4996         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4997             "rsm_intr_callback_dispatch enter\n"));
4998         ASSERT(data && hdlr_argp);
4999 
5000         /* look up the path - incr the path refcnt */
5001         path = rsm_find_path(hdlr_argp->adapter_name,
5002             hdlr_argp->adapter_instance, src_hwaddr);
5003 
5004         /* the path has been removed - drop this message */
5005         if (path == NULL) {
5006                 DBG_PRINTF((category, RSM_DEBUG,
5007                     "rsm_intr_callback_dispatch done: msg dropped\n"));
5008                 return;
5009         }
5010         /* the path is not active - don't accept new messages */
5011         if (path->state != RSMKA_PATH_ACTIVE) {
5012                 PATH_RELE_NOLOCK(path);
5013                 mutex_exit(&path->mutex);
5014                 DBG_PRINTF((category, RSM_DEBUG,
5015                     "rsm_intr_callback_dispatch done: msg dropped"
5016                     " path=%lx !ACTIVE\n", path));
5017                 return;
5018         }
5019 
5020         /*
5021          * Check if this message was sent to an older incarnation
5022          * of the path/sendq.
5023          */
5024         if (path->local_incn != msghdr->rsmipc_incn) {
5025                 /* decrement the refcnt */
5026                 PATH_RELE_NOLOCK(path);
5027                 mutex_exit(&path->mutex);
5028                 DBG_PRINTF((category, RSM_DEBUG,
5029                     "rsm_intr_callback_dispatch done: old incn %lld\n",
5030                     msghdr->rsmipc_incn));
5031                 return;
5032         }
5033 
5034         /* copy and enqueue msg on the path's msgbuf queue */
5035         rsmka_enqueue_msgbuf(path, data);
5036 
5037         /*
5038          * schedule task to process messages - ignore retval from
5039          * task_dispatch because we sender cannot send more than
5040          * what receiver can handle.
5041          */
5042         (void) taskq_dispatch(path->recv_taskq,
5043             rsm_intr_proc_deferred, path, KM_NOSLEEP);
5044 
5045         mutex_exit(&path->mutex);
5046 
5047         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5048             "rsm_intr_callback_dispatch done\n"));
5049 }
5050 
5051 /*
5052  * This procedure is called from rsm_srv_func when a remote node creates a
5053  * a send queue.  This event is used as a hint that an  earlier failed
5054  * attempt to create a send queue to that remote node may now succeed and
5055  * should be retried.  Indication of an earlier failed attempt is provided
5056  * by the RSMKA_SQCREATE_PENDING flag.
5057  */
5058 static void
5059 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5060 {
5061         srv_handler_arg_t       *hdlr_argp = (srv_handler_arg_t *)arg;
5062         path_t                  *path;
5063         DBG_DEFINE(category,
5064             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5065 
5066         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5067             "rsm_sqcreateop_callback enter\n"));
5068 
5069         /* look up the path - incr the path refcnt */
5070         path = rsm_find_path(hdlr_argp->adapter_name,
5071             hdlr_argp->adapter_instance, src_hwaddr);
5072 
5073         if (path == NULL) {
5074                 DBG_PRINTF((category, RSM_DEBUG,
5075                     "rsm_sqcreateop_callback done: no path\n"));
5076                 return;
5077         }
5078 
5079         if ((path->state == RSMKA_PATH_UP) &&
5080             (path->flags & RSMKA_SQCREATE_PENDING)) {
5081                 /*
5082                  * previous attempt to create sendq had failed, retry
5083                  * it and move to RSMKA_PATH_ACTIVE state if successful.
5084                  * the refcnt will be decremented in the do_deferred_work
5085                  */
5086                 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5087         } else {
5088                 /* decrement the refcnt */
5089                 PATH_RELE_NOLOCK(path);
5090         }
5091         mutex_exit(&path->mutex);
5092 
5093         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5094             "rsm_sqcreateop_callback done\n"));
5095 }
5096 
5097 static void
5098 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5099 {
5100         rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5101         rsmipc_request_t *msg = (rsmipc_request_t *)data;
5102         rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5103         rsm_node_id_t src_node;
5104         DBG_DEFINE(category,
5105             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5106 
5107         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5108             "src=%d, type=%d\n", msghdr->rsmipc_src,
5109             msghdr->rsmipc_type));
5110 
5111         /*
5112          * Check for the version number in the msg header. If it is not
5113          * RSM_VERSION, drop the message. In the future, we need to manage
5114          * incompatible version numbers in some way
5115          */
5116         if (msghdr->rsmipc_version != RSM_VERSION) {
5117                 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5118                 /*
5119                  * Drop requests that don't have a reply right here
5120                  * Request with reply will send a BAD_VERSION reply
5121                  * when they get processed by the worker thread.
5122                  */
5123                 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5124                         return;
5125                 }
5126 
5127         }
5128 
5129         src_node = msghdr->rsmipc_src;
5130 
5131         switch (msghdr->rsmipc_type) {
5132         case RSMIPC_MSG_SEGCONNECT:
5133         case RSMIPC_MSG_DISCONNECT:
5134         case RSMIPC_MSG_SUSPEND:
5135         case RSMIPC_MSG_SUSPEND_DONE:
5136         case RSMIPC_MSG_RESUME:
5137                 /*
5138                  * These message types are handled by a worker thread using
5139                  * the flow-control algorithm.
5140                  * Any message processing that does one or more of the
5141                  * following should be handled in a worker thread.
5142                  *      - allocates resources and might sleep
5143                  *      - makes RSMPI calls down to the interconnect driver
5144                  *      this by defn include requests with reply.
5145                  *      - takes a long duration of time
5146                  */
5147                 rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5148                 break;
5149         case RSMIPC_MSG_NOTIMPORTING:
5150                 importer_list_rm(src_node, msg->rsmipc_key,
5151                     msg->rsmipc_segment_cookie);
5152                 break;
5153         case RSMIPC_MSG_SQREADY:
5154                 rsm_proc_sqready(data, src_hwaddr, arg);
5155                 break;
5156         case RSMIPC_MSG_SQREADY_ACK:
5157                 rsm_proc_sqready_ack(data, src_hwaddr, arg);
5158                 break;
5159         case RSMIPC_MSG_CREDIT:
5160                 rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5161                 break;
5162         case RSMIPC_MSG_REPLY:
5163                 rsm_intr_reply(msghdr);
5164                 break;
5165         case RSMIPC_MSG_BELL:
5166                 rsm_intr_event(msg);
5167                 break;
5168         case RSMIPC_MSG_IMPORTING:
5169                 importer_list_add(src_node, msg->rsmipc_key,
5170                     msg->rsmipc_adapter_hwaddr,
5171                     msg->rsmipc_segment_cookie);
5172                 break;
5173         case RSMIPC_MSG_REPUBLISH:
5174                 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5175                 break;
5176         default:
5177                 DBG_PRINTF((category, RSM_DEBUG,
5178                     "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5179                     (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5180         }
5181 
5182         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5183 
5184 }
5185 
5186 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5187     rsm_intr_q_op_t opcode, rsm_addr_t src,
5188     void *data, size_t size, rsm_intr_hand_arg_t arg)
5189 {
5190         DBG_DEFINE(category,
5191             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5192 
5193         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5194 
5195         switch (opcode) {
5196         case RSM_INTR_Q_OP_CREATE:
5197                 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5198                 rsm_sqcreateop_callback(src, arg);
5199                 break;
5200         case RSM_INTR_Q_OP_DESTROY:
5201                 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5202                 break;
5203         case RSM_INTR_Q_OP_RECEIVE:
5204                 rsm_intr_callback(data, src, arg);
5205                 break;
5206         default:
5207                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5208                     "rsm_srv_func: unknown opcode = %x\n", opcode));
5209         }
5210 
5211         chd = chd;
5212         size = size;
5213 
5214         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5215 
5216         return (RSM_INTR_HAND_CLAIMED);
5217 }
5218 
5219 /* *************************** IPC slots ************************* */
5220 static rsmipc_slot_t *
5221 rsmipc_alloc()
5222 {
5223         int i;
5224         rsmipc_slot_t *slot;
5225         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5226 
5227         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5228 
5229         /* try to find a free slot, if not wait */
5230         mutex_enter(&rsm_ipc.lock);
5231 
5232         while (rsm_ipc.count == 0) {
5233                 rsm_ipc.wanted = 1;
5234                 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5235         }
5236 
5237         /* An empty slot is available, find it */
5238         slot = &rsm_ipc.slots[0];
5239         for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5240                 if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5241                         RSMIPC_CLEAR(slot, RSMIPC_FREE);
5242                         break;
5243                 }
5244         }
5245 
5246         ASSERT(i < RSMIPC_SZ);
5247         rsm_ipc.count--;        /* one less is available */
5248         rsm_ipc.sequence++; /* new sequence */
5249 
5250         slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5251         slot->rsmipc_cookie.ic.index = (uint_t)i;
5252 
5253         mutex_exit(&rsm_ipc.lock);
5254 
5255         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5256 
5257         return (slot);
5258 }
5259 
5260 static void
5261 rsmipc_free(rsmipc_slot_t *slot)
5262 {
5263         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5264 
5265         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5266 
5267         ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5268         ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5269 
5270         mutex_enter(&rsm_ipc.lock);
5271 
5272         RSMIPC_SET(slot, RSMIPC_FREE);
5273 
5274         slot->rsmipc_cookie.ic.sequence = 0;
5275 
5276         mutex_exit(&slot->rsmipc_lock);
5277         rsm_ipc.count++;
5278         ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5279         if (rsm_ipc.wanted) {
5280                 rsm_ipc.wanted = 0;
5281                 cv_broadcast(&rsm_ipc.cv);
5282         }
5283 
5284         mutex_exit(&rsm_ipc.lock);
5285 
5286         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5287 }
5288 
5289 static int
5290 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5291 {
5292         int             e = 0;
5293         int             credit_check = 0;
5294         int             retry_cnt = 0;
5295         int             min_retry_cnt = 10;
5296         rsm_send_t      is;
5297         rsmipc_slot_t   *rslot;
5298         adapter_t       *adapter;
5299         path_t          *path;
5300         sendq_token_t   *sendq_token;
5301         sendq_token_t   *used_sendq_token = NULL;
5302         rsm_send_q_handle_t     ipc_handle;
5303         DBG_DEFINE(category,
5304             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5305 
5306         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5307             dest));
5308 
5309         /*
5310          * Check if this is a local case
5311          */
5312         if (dest == my_nodeid) {
5313                 switch (req->rsmipc_hdr.rsmipc_type) {
5314                 case RSMIPC_MSG_SEGCONNECT:
5315                         reply->rsmipc_status = (short)rsmsegacl_validate(
5316                             req, dest, reply);
5317                         break;
5318                 case RSMIPC_MSG_BELL:
5319                         req->rsmipc_hdr.rsmipc_src = dest;
5320                         rsm_intr_event(req);
5321                         break;
5322                 case RSMIPC_MSG_IMPORTING:
5323                         importer_list_add(dest, req->rsmipc_key,
5324                             req->rsmipc_adapter_hwaddr,
5325                             req->rsmipc_segment_cookie);
5326                         break;
5327                 case RSMIPC_MSG_NOTIMPORTING:
5328                         importer_list_rm(dest, req->rsmipc_key,
5329                             req->rsmipc_segment_cookie);
5330                         break;
5331                 case RSMIPC_MSG_REPUBLISH:
5332                         importer_update(dest, req->rsmipc_key,
5333                             req->rsmipc_perm);
5334                         break;
5335                 case RSMIPC_MSG_SUSPEND:
5336                         importer_suspend(dest);
5337                         break;
5338                 case RSMIPC_MSG_SUSPEND_DONE:
5339                         rsm_suspend_complete(dest, 0);
5340                         break;
5341                 case RSMIPC_MSG_RESUME:
5342                         importer_resume(dest);
5343                         break;
5344                 default:
5345                         ASSERT(0);
5346                 }
5347                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5348                     "rsmipc_send done\n"));
5349                 return (0);
5350         }
5351 
5352         if (dest >= MAX_NODES) {
5353                 DBG_PRINTF((category, RSM_ERR,
5354                     "rsm: rsmipc_send bad node number %x\n", dest));
5355                 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5356         }
5357 
5358         /*
5359          * Oh boy! we are going remote.
5360          */
5361 
5362         /*
5363          * identify if we need to have credits to send this message
5364          * - only selected requests are flow controlled
5365          */
5366         if (req != NULL) {
5367                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5368                     "rsmipc_send:request type=%d\n",
5369                     req->rsmipc_hdr.rsmipc_type));
5370 
5371                 switch (req->rsmipc_hdr.rsmipc_type) {
5372                 case RSMIPC_MSG_SEGCONNECT:
5373                 case RSMIPC_MSG_DISCONNECT:
5374                 case RSMIPC_MSG_IMPORTING:
5375                 case RSMIPC_MSG_SUSPEND:
5376                 case RSMIPC_MSG_SUSPEND_DONE:
5377                 case RSMIPC_MSG_RESUME:
5378                         credit_check = 1;
5379                         break;
5380                 default:
5381                         credit_check = 0;
5382                 }
5383         }
5384 
5385 again:
5386         if (retry_cnt++ == min_retry_cnt) {
5387                 /* backoff before further retries for 10ms */
5388                 delay(drv_usectohz(10000));
5389                 retry_cnt = 0; /* reset retry_cnt */
5390         }
5391         sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5392         if (sendq_token == NULL) {
5393                 DBG_PRINTF((category, RSM_ERR,
5394                     "rsm: rsmipc_send no device to reach node %d\n", dest));
5395                 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5396         }
5397 
5398         if ((sendq_token == used_sendq_token) &&
5399             ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5400             (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5401                 rele_sendq_token(sendq_token);
5402                 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5403                 return (RSMERR_CONN_ABORTED);
5404         } else
5405                 used_sendq_token = sendq_token;
5406 
5407 /* lint -save -e413 */
5408         path = SQ_TOKEN_TO_PATH(sendq_token);
5409         adapter = path->local_adapter;
5410 /* lint -restore */
5411         ipc_handle = sendq_token->rsmpi_sendq_handle;
5412 
5413         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5414             "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5415 
5416         if (reply == NULL) {
5417                 /* Send request without ack */
5418                 /*
5419                  * Set the rsmipc_version number in the msghdr for KA
5420                  * communication versioning
5421                  */
5422                 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5423                 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5424                 /*
5425                  * remote endpoints incn should match the value in our
5426                  * path's remote_incn field. No need to grab any lock
5427                  * since we have refcnted the path in rsmka_get_sendq_token
5428                  */
5429                 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5430 
5431                 is.is_data = (void *)req;
5432                 is.is_size = sizeof (*req);
5433                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5434                 is.is_wait = 0;
5435 
5436                 if (credit_check) {
5437                         mutex_enter(&path->mutex);
5438                         /*
5439                          * wait till we recv credits or path goes down. If path
5440                          * goes down rsm_send will fail and we handle the error
5441                          * then
5442                          */
5443                         while ((sendq_token->msgbuf_avail == 0) &&
5444                             (path->state == RSMKA_PATH_ACTIVE)) {
5445                                 e = cv_wait_sig(&sendq_token->sendq_cv,
5446                                     &path->mutex);
5447                                 if (e == 0) {
5448                                         mutex_exit(&path->mutex);
5449                                         no_reply_cnt++;
5450                                         rele_sendq_token(sendq_token);
5451                                         DBG_PRINTF((category, RSM_DEBUG,
5452                                             "rsmipc_send done: "
5453                                             "cv_wait INTERRUPTED"));
5454                                         return (RSMERR_INTERRUPTED);
5455                                 }
5456                         }
5457 
5458                         /*
5459                          * path is not active retry on another path.
5460                          */
5461                         if (path->state != RSMKA_PATH_ACTIVE) {
5462                                 mutex_exit(&path->mutex);
5463                                 rele_sendq_token(sendq_token);
5464                                 e = RSMERR_CONN_ABORTED;
5465                                 DBG_PRINTF((category, RSM_ERR,
5466                                     "rsm: rsmipc_send: path !ACTIVE"));
5467                                 goto again;
5468                         }
5469 
5470                         ASSERT(sendq_token->msgbuf_avail > 0);
5471 
5472                         /*
5473                          * reserve a msgbuf
5474                          */
5475                         sendq_token->msgbuf_avail--;
5476 
5477                         mutex_exit(&path->mutex);
5478 
5479                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5480                             NULL);
5481 
5482                         if (e != RSM_SUCCESS) {
5483                                 mutex_enter(&path->mutex);
5484                                 /*
5485                                  * release the reserved msgbuf since
5486                                  * the send failed
5487                                  */
5488                                 sendq_token->msgbuf_avail++;
5489                                 cv_broadcast(&sendq_token->sendq_cv);
5490                                 mutex_exit(&path->mutex);
5491                         }
5492                 } else
5493                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5494                             NULL);
5495 
5496                 no_reply_cnt++;
5497                 rele_sendq_token(sendq_token);
5498                 if (e != RSM_SUCCESS) {
5499                         DBG_PRINTF((category, RSM_ERR,
5500                             "rsm: rsmipc_send no reply send"
5501                             " err = %d no reply count = %d\n",
5502                             e, no_reply_cnt));
5503                         ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5504                             e != RSMERR_BAD_BARRIER_HNDL);
5505                         atomic_inc_64(&rsm_ipcsend_errcnt);
5506                         goto again;
5507                 } else {
5508                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5509                             "rsmipc_send done\n"));
5510                         return (e);
5511                 }
5512 
5513         }
5514 
5515         if (req == NULL) {
5516                 /* Send reply - No flow control is done for reply */
5517                 /*
5518                  * Set the version in the msg header for KA communication
5519                  * versioning
5520                  */
5521                 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5522                 reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5523                 /* incn number is not used for reply msgs currently */
5524                 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5525 
5526                 is.is_data = (void *)reply;
5527                 is.is_size = sizeof (*reply);
5528                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5529                 is.is_wait = 0;
5530                 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5531                 rele_sendq_token(sendq_token);
5532                 if (e != RSM_SUCCESS) {
5533                         DBG_PRINTF((category, RSM_ERR,
5534                             "rsm: rsmipc_send reply send"
5535                             " err = %d\n", e));
5536                         atomic_inc_64(&rsm_ipcsend_errcnt);
5537                         goto again;
5538                 } else {
5539                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5540                             "rsmipc_send done\n"));
5541                         return (e);
5542                 }
5543         }
5544 
5545         /* Reply needed */
5546         rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5547 
5548         mutex_enter(&rslot->rsmipc_lock);
5549 
5550         rslot->rsmipc_data = (void *)reply;
5551         RSMIPC_SET(rslot, RSMIPC_PENDING);
5552 
5553         while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5554                 /*
5555                  * Set the rsmipc_version number in the msghdr for KA
5556                  * communication versioning
5557                  */
5558                 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5559                 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5560                 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5561                 /*
5562                  * remote endpoints incn should match the value in our
5563                  * path's remote_incn field. No need to grab any lock
5564                  * since we have refcnted the path in rsmka_get_sendq_token
5565                  */
5566                 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5567 
5568                 is.is_data = (void *)req;
5569                 is.is_size = sizeof (*req);
5570                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5571                 is.is_wait = 0;
5572                 if (credit_check) {
5573 
5574                         mutex_enter(&path->mutex);
5575                         /*
5576                          * wait till we recv credits or path goes down. If path
5577                          * goes down rsm_send will fail and we handle the error
5578                          * then.
5579                          */
5580                         while ((sendq_token->msgbuf_avail == 0) &&
5581                             (path->state == RSMKA_PATH_ACTIVE)) {
5582                                 e = cv_wait_sig(&sendq_token->sendq_cv,
5583                                     &path->mutex);
5584                                 if (e == 0) {
5585                                         mutex_exit(&path->mutex);
5586                                         RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5587                                         rsmipc_free(rslot);
5588                                         rele_sendq_token(sendq_token);
5589                                         DBG_PRINTF((category, RSM_DEBUG,
5590                                             "rsmipc_send done: "
5591                                             "cv_wait INTERRUPTED"));
5592                                         return (RSMERR_INTERRUPTED);
5593                                 }
5594                         }
5595 
5596                         /*
5597                          * path is not active retry on another path.
5598                          */
5599                         if (path->state != RSMKA_PATH_ACTIVE) {
5600                                 mutex_exit(&path->mutex);
5601                                 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5602                                 rsmipc_free(rslot);
5603                                 rele_sendq_token(sendq_token);
5604                                 e = RSMERR_CONN_ABORTED;
5605                                 DBG_PRINTF((category, RSM_ERR,
5606                                     "rsm: rsmipc_send: path !ACTIVE"));
5607                                 goto again;
5608                         }
5609 
5610                         ASSERT(sendq_token->msgbuf_avail > 0);
5611 
5612                         /*
5613                          * reserve a msgbuf
5614                          */
5615                         sendq_token->msgbuf_avail--;
5616 
5617                         mutex_exit(&path->mutex);
5618 
5619                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5620                             NULL);
5621 
5622                         if (e != RSM_SUCCESS) {
5623                                 mutex_enter(&path->mutex);
5624                                 /*
5625                                  * release the reserved msgbuf since
5626                                  * the send failed
5627                                  */
5628                                 sendq_token->msgbuf_avail++;
5629                                 cv_broadcast(&sendq_token->sendq_cv);
5630                                 mutex_exit(&path->mutex);
5631                         }
5632                 } else
5633                         e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5634                             NULL);
5635 
5636                 if (e != RSM_SUCCESS) {
5637                         DBG_PRINTF((category, RSM_ERR,
5638                             "rsm: rsmipc_send rsmpi send err = %d\n", e));
5639                         RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5640                         rsmipc_free(rslot);
5641                         rele_sendq_token(sendq_token);
5642                         atomic_inc_64(&rsm_ipcsend_errcnt);
5643                         goto again;
5644                 }
5645 
5646                 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5647                 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5648                     drv_usectohz(5000000), TR_CLOCK_TICK);
5649                 if (e < 0) {
5650                         /* timed out - retry */
5651                         e = RSMERR_TIMEOUT;
5652                 } else if (e == 0) {
5653                         /* signalled - return error */
5654                         e = RSMERR_INTERRUPTED;
5655                         break;
5656                 } else {
5657                         e = RSM_SUCCESS;
5658                 }
5659         }
5660 
5661         RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5662         rsmipc_free(rslot);
5663         rele_sendq_token(sendq_token);
5664 
5665         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5666         return (e);
5667 }
5668 
5669 static int
5670 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,  void *cookie)
5671 {
5672         rsmipc_request_t request;
5673 
5674         /*
5675          *  inform the exporter to delete this importer
5676          */
5677         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5678         request.rsmipc_key = segid;
5679         request.rsmipc_segment_cookie = cookie;
5680         return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5681 }
5682 
5683 static void
5684 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl,
5685     int acl_len, rsm_permission_t default_permission)
5686 {
5687         int                     i;
5688         importing_token_t       *token;
5689         rsmipc_request_t        request;
5690         republish_token_t       *republish_list = NULL;
5691         republish_token_t       *rp;
5692         rsm_permission_t        permission;
5693         int                     index;
5694 
5695         /*
5696          * send the new access mode to all the nodes that have imported
5697          * this segment.
5698          * If the new acl does not have a node that was present in
5699          * the old acl a access permission of 0 is sent.
5700          */
5701 
5702         index = rsmhash(segid);
5703 
5704         /*
5705          * create a list of node/permissions to send the republish message
5706          */
5707         mutex_enter(&importer_list.lock);
5708 
5709         token = importer_list.bucket[index];
5710         while (token != NULL) {
5711                 if (segid == token->key) {
5712                         permission = default_permission;
5713 
5714                         for (i = 0; i < acl_len; i++) {
5715                                 if (token->importing_node == acl[i].ae_node) {
5716                                         permission = acl[i].ae_permission;
5717                                         break;
5718                                 }
5719                         }
5720                         rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5721 
5722                         rp->key = segid;
5723                         rp->importing_node = token->importing_node;
5724                         rp->permission = permission;
5725                         rp->next = republish_list;
5726                         republish_list = rp;
5727                 }
5728                 token = token->next;
5729         }
5730 
5731         mutex_exit(&importer_list.lock);
5732 
5733         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5734         request.rsmipc_key = segid;
5735 
5736         while (republish_list != NULL) {
5737                 request.rsmipc_perm = republish_list->permission;
5738                 (void) rsmipc_send(republish_list->importing_node,
5739                     &request, RSM_NO_REPLY);
5740                 rp = republish_list;
5741                 republish_list = republish_list->next;
5742                 kmem_free(rp, sizeof (republish_token_t));
5743         }
5744 }
5745 
5746 static void
5747 rsm_send_suspend()
5748 {
5749         int                     i, e;
5750         rsmipc_request_t        request;
5751         list_element_t          *tokp;
5752         list_element_t          *head = NULL;
5753         importing_token_t       *token;
5754         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5755             "rsm_send_suspend enter\n"));
5756 
5757         /*
5758          * create a list of node to send the suspend message
5759          *
5760          * Currently the whole importer list is scanned and we obtain
5761          * all the nodes - this basically gets all nodes that at least
5762          * import one segment from the local node.
5763          *
5764          * no need to grab the rsm_suspend_list lock here since we are
5765          * single threaded when suspend is called.
5766          */
5767 
5768         mutex_enter(&importer_list.lock);
5769         for (i = 0; i < rsm_hash_size; i++) {
5770 
5771                 token = importer_list.bucket[i];
5772 
5773                 while (token != NULL) {
5774 
5775                         tokp = head;
5776 
5777                         /*
5778                          * make sure that the token's node
5779                          * is not already on the suspend list
5780                          */
5781                         while (tokp != NULL) {
5782                                 if (tokp->nodeid == token->importing_node) {
5783                                         break;
5784                                 }
5785                                 tokp = tokp->next;
5786                         }
5787 
5788                         if (tokp == NULL) { /* not in suspend list */
5789                                 tokp = kmem_zalloc(sizeof (list_element_t),
5790                                     KM_SLEEP);
5791                                 tokp->nodeid = token->importing_node;
5792                                 tokp->next = head;
5793                                 head = tokp;
5794                         }
5795 
5796                         token = token->next;
5797                 }
5798         }
5799         mutex_exit(&importer_list.lock);
5800 
5801         if (head == NULL) { /* no importers so go ahead and quiesce segments */
5802                 exporter_quiesce();
5803                 return;
5804         }
5805 
5806         mutex_enter(&rsm_suspend_list.list_lock);
5807         ASSERT(rsm_suspend_list.list_head == NULL);
5808         /*
5809          * update the suspend list righaway so that if a node dies the
5810          * pathmanager can set the NODE dead flag
5811          */
5812         rsm_suspend_list.list_head = head;
5813         mutex_exit(&rsm_suspend_list.list_lock);
5814 
5815         tokp = head;
5816 
5817         while (tokp != NULL) {
5818                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5819                 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5820                 /*
5821                  * Error in rsmipc_send currently happens due to inaccessibility
5822                  * of the remote node.
5823                  */
5824                 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5825                         tokp->flags |= RSM_SUSPEND_ACKPENDING;
5826                 }
5827 
5828                 tokp = tokp->next;
5829         }
5830 
5831         DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5832             "rsm_send_suspend done\n"));
5833 
5834 }
5835 
5836 static void
5837 rsm_send_resume()
5838 {
5839         rsmipc_request_t        request;
5840         list_element_t          *elem, *head;
5841 
5842         /*
5843          * save the suspend list so that we know where to send
5844          * the resume messages and make the suspend list head
5845          * NULL.
5846          */
5847         mutex_enter(&rsm_suspend_list.list_lock);
5848         head = rsm_suspend_list.list_head;
5849         rsm_suspend_list.list_head = NULL;
5850         mutex_exit(&rsm_suspend_list.list_lock);
5851 
5852         while (head != NULL) {
5853                 elem = head;
5854                 head = head->next;
5855 
5856                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5857 
5858                 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5859 
5860                 kmem_free((void *)elem, sizeof (list_element_t));
5861 
5862         }
5863 
5864 }
5865 
5866 /*
5867  * This function takes path and sends a message using the sendq
5868  * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5869  * and RSMIPC_MSG_CREDIT are sent using this function.
5870  */
5871 int
5872 rsmipc_send_controlmsg(path_t *path, int msgtype)
5873 {
5874         int                     e;
5875         int                     retry_cnt = 0;
5876         int                     min_retry_cnt = 10;
5877         adapter_t               *adapter;
5878         rsm_send_t              is;
5879         rsm_send_q_handle_t     ipc_handle;
5880         rsmipc_controlmsg_t     msg;
5881         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5882 
5883         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5884             "rsmipc_send_controlmsg enter\n"));
5885 
5886         ASSERT(MUTEX_HELD(&path->mutex));
5887 
5888         adapter = path->local_adapter;
5889 
5890         DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5891             "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5892             my_nodeid, adapter->hwaddr, path->remote_node,
5893             path->remote_hwaddr, path->procmsg_cnt));
5894 
5895         if (path->state != RSMKA_PATH_ACTIVE) {
5896                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5897                     "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5898                 return (1);
5899         }
5900 
5901         ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5902 
5903         msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5904         msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5905         msg.rsmipc_hdr.rsmipc_type = msgtype;
5906         msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5907 
5908         if (msgtype == RSMIPC_MSG_CREDIT)
5909                 msg.rsmipc_credits = path->procmsg_cnt;
5910 
5911         msg.rsmipc_local_incn = path->local_incn;
5912 
5913         msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5914         /* incr the sendq, path refcnt */
5915         PATH_HOLD_NOLOCK(path);
5916         SENDQ_TOKEN_HOLD(path);
5917 
5918         do {
5919                 /* drop the path lock before doing the rsm_send */
5920                 mutex_exit(&path->mutex);
5921 
5922                 is.is_data = (void *)&msg;
5923                 is.is_size = sizeof (msg);
5924                 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5925                 is.is_wait = 0;
5926 
5927                 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5928 
5929                 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5930                     e != RSMERR_BAD_BARRIER_HNDL);
5931 
5932                 mutex_enter(&path->mutex);
5933 
5934                 if (e == RSM_SUCCESS) {
5935                         break;
5936                 }
5937                 /* error counter for statistics */
5938                 atomic_inc_64(&rsm_ctrlmsg_errcnt);
5939 
5940                 DBG_PRINTF((category, RSM_ERR,
5941                     "rsmipc_send_controlmsg:rsm_send error=%d", e));
5942 
5943                 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5944                         (void) cv_reltimedwait(&path->sendq_token.sendq_cv,
5945                             &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK);
5946                         retry_cnt = 0;
5947                 }
5948         } while (path->state == RSMKA_PATH_ACTIVE);
5949 
5950         /* decrement the sendq,path refcnt that we incr before rsm_send */
5951         SENDQ_TOKEN_RELE(path);
5952         PATH_RELE_NOLOCK(path);
5953 
5954         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5955             "rsmipc_send_controlmsg done=%d", e));
5956         return (e);
5957 }
5958 
5959 /*
5960  * Called from rsm_force_unload and path_importer_disconnect. The memory
5961  * mapping for the imported segment is removed and the segment is
5962  * disconnected at the interconnect layer if disconnect_flag is TRUE.
5963  * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5964  * and FALSE from rsm_rebind.
5965  *
5966  * When subsequent accesses cause page faulting, the dummy page is mapped
5967  * to resolve the fault, and the mapping generation number is incremented
5968  * so that the application can be notified on a close barrier operation.
5969  *
5970  * It is important to note that the caller of rsmseg_unload is responsible for
5971  * acquiring the segment lock before making a call to rsmseg_unload. This is
5972  * required to make the caller and rsmseg_unload thread safe. The segment lock
5973  * will be released by the rsmseg_unload function.
5974  */
5975 void
5976 rsmseg_unload(rsmseg_t *im_seg)
5977 {
5978         rsmcookie_t             *hdl;
5979         void                    *shared_cookie;
5980         rsmipc_request_t        request;
5981         uint_t                  maxprot;
5982 
5983         DBG_DEFINE(category,
5984             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5985 
5986         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5987 
5988         ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5989 
5990         /* wait until segment leaves the mapping state */
5991         while (im_seg->s_state == RSM_STATE_MAPPING)
5992                 cv_wait(&im_seg->s_cv, &im_seg->s_lock);
5993         /*
5994          * An unload is only necessary if the segment is connected. However,
5995          * if the segment was on the import list in state RSM_STATE_CONNECTING
5996          * then a connection was in progress. Change to RSM_STATE_NEW
5997          * here to cause an early exit from the connection process.
5998          */
5999         if (im_seg->s_state == RSM_STATE_NEW) {
6000                 rsmseglock_release(im_seg);
6001                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6002                     "rsmseg_unload done: RSM_STATE_NEW\n"));
6003                 return;
6004         } else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6005                 im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6006                 rsmsharelock_acquire(im_seg);
6007                 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6008                 rsmsharelock_release(im_seg);
6009                 rsmseglock_release(im_seg);
6010                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6011                     "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6012                 return;
6013         }
6014 
6015         if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6016                 if (im_seg->s_ckl != NULL) {
6017                         int e;
6018                         /* Setup protections for remap */
6019                         maxprot = PROT_USER;
6020                         if (im_seg->s_mode & RSM_PERM_READ) {
6021                                 maxprot |= PROT_READ;
6022                         }
6023                         if (im_seg->s_mode & RSM_PERM_WRITE) {
6024                                 maxprot |= PROT_WRITE;
6025                         }
6026                         hdl = im_seg->s_ckl;
6027                         for (; hdl != NULL; hdl = hdl->c_next) {
6028                                 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6029                                     remap_cookie,
6030                                     hdl->c_off, hdl->c_len,
6031                                     maxprot, 0, NULL);
6032 
6033                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6034                                     "remap returns %d\n", e));
6035                         }
6036                 }
6037 
6038                 (void) rsm_closeconnection(im_seg, &shared_cookie);
6039 
6040                 if (shared_cookie != NULL) {
6041                         /*
6042                          * inform the exporting node so this import
6043                          * can be deleted from the list of importers.
6044                          */
6045                         request.rsmipc_hdr.rsmipc_type =
6046                             RSMIPC_MSG_NOTIMPORTING;
6047                         request.rsmipc_key = im_seg->s_segid;
6048                         request.rsmipc_segment_cookie = shared_cookie;
6049                         rsmseglock_release(im_seg);
6050                         (void) rsmipc_send(im_seg->s_node, &request,
6051                             RSM_NO_REPLY);
6052                 } else {
6053                         rsmseglock_release(im_seg);
6054                 }
6055         }
6056         else
6057                 rsmseglock_release(im_seg);
6058 
6059         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6060 
6061 }
6062 
6063 /* ****************************** Importer Calls ************************ */
6064 
6065 static int
6066 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6067 {
6068         int shifts = 0;
6069 
6070         if (crgetuid(cr) != owner) {
6071                 shifts += 3;
6072                 if (!groupmember(group, cr))
6073                         shifts += 3;
6074         }
6075 
6076         mode &= ~(perm << shifts);
6077 
6078         if (mode == 0)
6079                 return (0);
6080 
6081         return (secpolicy_rsm_access(cr, owner, mode));
6082 }
6083 
6084 
6085 static int
6086 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6087     intptr_t dataptr, int mode)
6088 {
6089         int e;
6090         int                     recheck_state = 0;
6091         void                    *shared_cookie;
6092         rsmipc_request_t        request;
6093         rsmipc_reply_t          reply;
6094         rsm_permission_t        access;
6095         adapter_t               *adapter;
6096         rsm_addr_t              addr = 0;
6097         rsm_import_share_t      *sharedp;
6098         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6099 
6100         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6101 
6102         adapter = rsm_getadapter(msg, mode);
6103         if (adapter == NULL) {
6104                 DBG_PRINTF((category, RSM_ERR,
6105                     "rsm_connect done:ENODEV adapter=NULL\n"));
6106                 return (RSMERR_CTLR_NOT_PRESENT);
6107         }
6108 
6109         if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6110                 rsmka_release_adapter(adapter);
6111                 DBG_PRINTF((category, RSM_ERR,
6112                     "rsm_connect done:ENODEV loopback\n"));
6113                 return (RSMERR_CTLR_NOT_PRESENT);
6114         }
6115 
6116 
6117         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6118         ASSERT(seg->s_state == RSM_STATE_NEW);
6119 
6120         /*
6121          * Translate perm to access
6122          */
6123         if (msg->perm & ~RSM_PERM_RDWR) {
6124                 rsmka_release_adapter(adapter);
6125                 DBG_PRINTF((category, RSM_ERR,
6126                     "rsm_connect done:EINVAL invalid perms\n"));
6127                 return (RSMERR_BAD_PERMS);
6128         }
6129         access = 0;
6130         if (msg->perm & RSM_PERM_READ)
6131                 access |= RSM_ACCESS_READ;
6132         if (msg->perm & RSM_PERM_WRITE)
6133                 access |= RSM_ACCESS_WRITE;
6134 
6135         seg->s_node = msg->nodeid;
6136 
6137         /*
6138          * Adding to the import list locks the segment; release the segment
6139          * lock so we can get the reply for the send.
6140          */
6141         e = rsmimport_add(seg, msg->key);
6142         if (e) {
6143                 rsmka_release_adapter(adapter);
6144                 DBG_PRINTF((category, RSM_ERR,
6145                     "rsm_connect done:rsmimport_add failed %d\n", e));
6146                 return (e);
6147         }
6148         seg->s_state = RSM_STATE_CONNECTING;
6149 
6150         /*
6151          * Set the s_adapter field here so as to have a valid comparison of
6152          * the adapter and the s_adapter value during rsmshare_get. For
6153          * any error, set s_adapter to NULL before doing a release_adapter
6154          */
6155         seg->s_adapter = adapter;
6156 
6157         rsmseglock_release(seg);
6158 
6159         /*
6160          * get the pointer to the shared data structure; the
6161          * shared data is locked and refcount has been incremented
6162          */
6163         sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6164 
6165         ASSERT(rsmsharelock_held(seg));
6166 
6167         do {
6168                 /* flag indicates whether we need to recheck the state */
6169                 recheck_state = 0;
6170                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6171                     "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6172                 switch (sharedp->rsmsi_state) {
6173                 case RSMSI_STATE_NEW:
6174                         sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6175                         break;
6176                 case RSMSI_STATE_CONNECTING:
6177                         /* FALLTHRU */
6178                 case RSMSI_STATE_CONN_QUIESCE:
6179                         /* FALLTHRU */
6180                 case RSMSI_STATE_MAP_QUIESCE:
6181                         /* wait for the state to change */
6182                         while ((sharedp->rsmsi_state ==
6183                             RSMSI_STATE_CONNECTING) ||
6184                             (sharedp->rsmsi_state ==
6185                             RSMSI_STATE_CONN_QUIESCE) ||
6186                             (sharedp->rsmsi_state ==
6187                             RSMSI_STATE_MAP_QUIESCE)) {
6188                                 if (cv_wait_sig(&sharedp->rsmsi_cv,
6189                                     &sharedp->rsmsi_lock) == 0) {
6190                                         /* signalled - clean up and return */
6191                                         rsmsharelock_release(seg);
6192                                         rsmimport_rm(seg);
6193                                         seg->s_adapter = NULL;
6194                                         rsmka_release_adapter(adapter);
6195                                         seg->s_state = RSM_STATE_NEW;
6196                                         DBG_PRINTF((category, RSM_ERR,
6197                                             "rsm_connect done: INTERRUPTED\n"));
6198                                         return (RSMERR_INTERRUPTED);
6199                                 }
6200                         }
6201                         /*
6202                          * the state changed, loop back and check what it is
6203                          */
6204                         recheck_state = 1;
6205                         break;
6206                 case RSMSI_STATE_ABORT_CONNECT:
6207                         /* exit the loop and clean up further down */
6208                         break;
6209                 case RSMSI_STATE_CONNECTED:
6210                         /* already connected, good - fall through */
6211                 case RSMSI_STATE_MAPPED:
6212                         /* already mapped, wow - fall through */
6213                         /* access validation etc is done further down */
6214                         break;
6215                 case RSMSI_STATE_DISCONNECTED:
6216                         /* disconnected - so reconnect now */
6217                         sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6218                         break;
6219                 default:
6220                         ASSERT(0); /* Invalid State */
6221                 }
6222         } while (recheck_state);
6223 
6224         if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6225                 /* we are the first to connect */
6226                 rsmsharelock_release(seg);
6227 
6228                 if (msg->nodeid != my_nodeid) {
6229                         addr = get_remote_hwaddr(adapter, msg->nodeid);
6230 
6231                         if ((int64_t)addr < 0) {
6232                                 rsmsharelock_acquire(seg);
6233                                 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6234                                     RSMSI_STATE_NEW);
6235                                 rsmsharelock_release(seg);
6236                                 rsmimport_rm(seg);
6237                                 seg->s_adapter = NULL;
6238                                 rsmka_release_adapter(adapter);
6239                                 seg->s_state = RSM_STATE_NEW;
6240                                 DBG_PRINTF((category, RSM_ERR,
6241                                     "rsm_connect done: hwaddr<0\n"));
6242                                 return (RSMERR_INTERNAL_ERROR);
6243                         }
6244                 } else {
6245                         addr = adapter->hwaddr;
6246                 }
6247 
6248                 /*
6249                  * send request to node [src, dest, key, msgid] and get back
6250                  * [status, msgid, cookie]
6251                  */
6252                 request.rsmipc_key = msg->key;
6253                 /*
6254                  * we need the s_mode of the exporter so pass
6255                  * RSM_ACCESS_TRUSTED
6256                  */
6257                 request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6258                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6259                 request.rsmipc_adapter_hwaddr = addr;
6260                 request.rsmipc_segment_cookie = sharedp;
6261 
6262                 e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6263                 if (e) {
6264                         rsmsharelock_acquire(seg);
6265                         rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6266                             RSMSI_STATE_NEW);
6267                         rsmsharelock_release(seg);
6268                         rsmimport_rm(seg);
6269                         seg->s_adapter = NULL;
6270                         rsmka_release_adapter(adapter);
6271                         seg->s_state = RSM_STATE_NEW;
6272                         DBG_PRINTF((category, RSM_ERR,
6273                             "rsm_connect done:rsmipc_send failed %d\n", e));
6274                         return (e);
6275                 }
6276 
6277                 if (reply.rsmipc_status != RSM_SUCCESS) {
6278                         rsmsharelock_acquire(seg);
6279                         rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6280                             RSMSI_STATE_NEW);
6281                         rsmsharelock_release(seg);
6282                         rsmimport_rm(seg);
6283                         seg->s_adapter = NULL;
6284                         rsmka_release_adapter(adapter);
6285                         seg->s_state = RSM_STATE_NEW;
6286                         DBG_PRINTF((category, RSM_ERR,
6287                             "rsm_connect done:rsmipc_send reply err %d\n",
6288                             reply.rsmipc_status));
6289                         return (reply.rsmipc_status);
6290                 }
6291 
6292                 rsmsharelock_acquire(seg);
6293                 /* store the information recvd into the shared data struct */
6294                 sharedp->rsmsi_mode = reply.rsmipc_mode;
6295                 sharedp->rsmsi_uid = reply.rsmipc_uid;
6296                 sharedp->rsmsi_gid = reply.rsmipc_gid;
6297                 sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6298                 sharedp->rsmsi_cookie = sharedp;
6299         }
6300 
6301         rsmsharelock_release(seg);
6302 
6303         /*
6304          * Get the segment lock and check for a force disconnect
6305          * from the export side which would have changed the state
6306          * back to RSM_STATE_NEW. Once the segment lock is acquired a
6307          * force disconnect will be held off until the connection
6308          * has completed.
6309          */
6310         rsmseglock_acquire(seg);
6311         rsmsharelock_acquire(seg);
6312         ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6313             seg->s_state == RSM_STATE_ABORT_CONNECT);
6314 
6315         shared_cookie = sharedp->rsmsi_cookie;
6316 
6317         if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6318             (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6319                 seg->s_state = RSM_STATE_NEW;
6320                 seg->s_adapter = NULL;
6321                 rsmsharelock_release(seg);
6322                 rsmseglock_release(seg);
6323                 rsmimport_rm(seg);
6324                 rsmka_release_adapter(adapter);
6325 
6326                 rsmsharelock_acquire(seg);
6327                 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6328                         /*
6329                          * set a flag indicating abort handling has been
6330                          * done
6331                          */
6332                         sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6333                         rsmsharelock_release(seg);
6334                         /* send a message to exporter - only once */
6335                         (void) rsm_send_notimporting(msg->nodeid,
6336                             msg->key, shared_cookie);
6337                         rsmsharelock_acquire(seg);
6338                         /*
6339                          * wake up any waiting importers and inform that
6340                          * connection has been aborted
6341                          */
6342                         cv_broadcast(&sharedp->rsmsi_cv);
6343                 }
6344                 rsmsharelock_release(seg);
6345 
6346                 DBG_PRINTF((category, RSM_ERR,
6347                     "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6348                 return (RSMERR_INTERRUPTED);
6349         }
6350 
6351 
6352         /*
6353          * We need to verify that this process has access
6354          */
6355         e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6356             access & sharedp->rsmsi_mode,
6357             (int)(msg->perm & RSM_PERM_RDWR), cred);
6358         if (e) {
6359                 rsmsharelock_release(seg);
6360                 seg->s_state = RSM_STATE_NEW;
6361                 seg->s_adapter = NULL;
6362                 rsmseglock_release(seg);
6363                 rsmimport_rm(seg);
6364                 rsmka_release_adapter(adapter);
6365                 /*
6366                  * No need to lock segment it has been removed
6367                  * from the hash table
6368                  */
6369                 rsmsharelock_acquire(seg);
6370                 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6371                         rsmsharelock_release(seg);
6372                         /* this is the first importer */
6373 
6374                         (void) rsm_send_notimporting(msg->nodeid, msg->key,
6375                             shared_cookie);
6376                         rsmsharelock_acquire(seg);
6377                         sharedp->rsmsi_state = RSMSI_STATE_NEW;
6378                         cv_broadcast(&sharedp->rsmsi_cv);
6379                 }
6380                 rsmsharelock_release(seg);
6381 
6382                 DBG_PRINTF((category, RSM_ERR,
6383                     "rsm_connect done: ipcaccess failed\n"));
6384                 return (RSMERR_PERM_DENIED);
6385         }
6386 
6387         /* update state and cookie */
6388         seg->s_segid = sharedp->rsmsi_segid;
6389         seg->s_len = sharedp->rsmsi_seglen;
6390         seg->s_mode = access & sharedp->rsmsi_mode;
6391         seg->s_pid = ddi_get_pid();
6392         seg->s_mapinfo = NULL;
6393 
6394         if (seg->s_node != my_nodeid) {
6395                 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6396                         e = adapter->rsmpi_ops->rsm_connect(
6397                             adapter->rsmpi_handle,
6398                             addr, seg->s_segid, &sharedp->rsmsi_handle);
6399 
6400                         if (e != RSM_SUCCESS) {
6401                                 seg->s_state = RSM_STATE_NEW;
6402                                 seg->s_adapter = NULL;
6403                                 rsmsharelock_release(seg);
6404                                 rsmseglock_release(seg);
6405                                 rsmimport_rm(seg);
6406                                 rsmka_release_adapter(adapter);
6407                                 /*
6408                                  *  inform the exporter to delete this importer
6409                                  */
6410                                 (void) rsm_send_notimporting(msg->nodeid,
6411                                     msg->key, shared_cookie);
6412 
6413                                 /*
6414                                  * Now inform any waiting importers to
6415                                  * retry connect. This needs to be done
6416                                  * after sending notimporting so that
6417                                  * the notimporting is sent before a waiting
6418                                  * importer sends a segconnect while retrying
6419                                  *
6420                                  * No need to lock segment it has been removed
6421                                  * from the hash table
6422                                  */
6423 
6424                                 rsmsharelock_acquire(seg);
6425                                 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6426                                 cv_broadcast(&sharedp->rsmsi_cv);
6427                                 rsmsharelock_release(seg);
6428 
6429                                 DBG_PRINTF((category, RSM_ERR,
6430                                     "rsm_connect error %d\n", e));
6431                                 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6432                                         return (
6433                                             RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6434                                 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6435                                     (e == RSMERR_UNKNOWN_RSM_ADDR))
6436                                         return (RSMERR_REMOTE_NODE_UNREACHABLE);
6437                                 else
6438                                         return (e);
6439                         }
6440 
6441                 }
6442                 seg->s_handle.in = sharedp->rsmsi_handle;
6443 
6444         }
6445 
6446         seg->s_state = RSM_STATE_CONNECT;
6447 
6448 
6449         seg->s_flags &= ~RSM_IMPORT_DUMMY;       /* clear dummy flag */
6450         if (bar_va) {
6451                 /* increment generation number on barrier page */
6452                 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6453                 /* return user off into barrier page where status will be */
6454                 msg->off = (int)seg->s_hdr.rsmrc_num;
6455                 msg->gnum = bar_va[msg->off];     /* gnum race */
6456         } else {
6457                 msg->off = 0;
6458                 msg->gnum = 0;       /* gnum race */
6459         }
6460 
6461         msg->len = (int)sharedp->rsmsi_seglen;
6462         msg->rnum = seg->s_minor;
6463         rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6464         rsmsharelock_release(seg);
6465         rsmseglock_release(seg);
6466 
6467         /* Return back to user the segment size & perm in case it's needed */
6468 
6469 #ifdef _MULTI_DATAMODEL
6470         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6471                 rsm_ioctlmsg32_t msg32;
6472 
6473                 if (msg->len > UINT_MAX)
6474                         msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6475                 else
6476                         msg32.len = msg->len;
6477                 msg32.off = msg->off;
6478                 msg32.perm = msg->perm;
6479                 msg32.gnum = msg->gnum;
6480                 msg32.rnum = msg->rnum;
6481 
6482                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6483                     "rsm_connect done\n"));
6484 
6485                 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6486                     sizeof (msg32), mode))
6487                         return (RSMERR_BAD_ADDR);
6488                 else
6489                         return (RSM_SUCCESS);
6490         }
6491 #endif
6492         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6493 
6494         if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6495             mode))
6496                 return (RSMERR_BAD_ADDR);
6497         else
6498                 return (RSM_SUCCESS);
6499 }
6500 
6501 static int
6502 rsm_unmap(rsmseg_t *seg)
6503 {
6504         int                     err;
6505         adapter_t               *adapter;
6506         rsm_import_share_t      *sharedp;
6507         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6508 
6509         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6510             "rsm_unmap enter %u\n", seg->s_segid));
6511 
6512         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6513 
6514         /* assert seg is locked */
6515         ASSERT(rsmseglock_held(seg));
6516         ASSERT(seg->s_state != RSM_STATE_MAPPING);
6517 
6518         if ((seg->s_state != RSM_STATE_ACTIVE) &&
6519             (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6520                 /* segment unmap has already been done */
6521                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6522                 return (RSM_SUCCESS);
6523         }
6524 
6525         sharedp = seg->s_share;
6526 
6527         rsmsharelock_acquire(seg);
6528 
6529         /*
6530          *      - shared data struct is in MAPPED or MAP_QUIESCE state
6531          */
6532 
6533         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6534             sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6535 
6536         /*
6537          * Unmap pages - previously rsm_memseg_import_unmap was called only if
6538          * the segment cookie list was NULL; but it is always NULL when
6539          * called from rsmmap_unmap and won't be NULL when called for
6540          * a force disconnect - so the check for NULL cookie list was removed
6541          */
6542 
6543         ASSERT(sharedp->rsmsi_mapcnt > 0);
6544 
6545         sharedp->rsmsi_mapcnt--;
6546 
6547         if (sharedp->rsmsi_mapcnt == 0) {
6548                 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6549                         /* unmap the shared RSMPI mapping */
6550                         adapter = seg->s_adapter;
6551                         if (seg->s_node != my_nodeid) {
6552                                 ASSERT(sharedp->rsmsi_handle != NULL);
6553                                 err = adapter->rsmpi_ops->
6554                                     rsm_unmap(sharedp->rsmsi_handle);
6555                                 DBG_PRINTF((category, RSM_DEBUG,
6556                                     "rsm_unmap: rsmpi unmap %d\n", err));
6557                                 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6558                                 sharedp->rsmsi_mapinfo = NULL;
6559                         }
6560                         sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6561                 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6562                         sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6563                 }
6564         }
6565 
6566         rsmsharelock_release(seg);
6567 
6568         /*
6569          * The s_cookie field is used to store the cookie returned from the
6570          * ddi_umem_lock when binding the pages for an export segment. This
6571          * is the primary use of the s_cookie field and does not normally
6572          * pertain to any importing segment except in the loopback case.
6573          * For the loopback case, the import segment and export segment are
6574          * on the same node, the s_cookie field of the segment structure for
6575          * the importer is initialized to the s_cookie field in the exported
6576          * segment during the map operation and is used during the call to
6577          * devmap_umem_setup for the import mapping.
6578          * Thus, during unmap, we simply need to set s_cookie to NULL to
6579          * indicate that the mapping no longer exists.
6580          */
6581         seg->s_cookie = NULL;
6582 
6583         seg->s_mapinfo = NULL;
6584 
6585         if (seg->s_state == RSM_STATE_ACTIVE)
6586                 seg->s_state = RSM_STATE_CONNECT;
6587         else
6588                 seg->s_state = RSM_STATE_CONN_QUIESCE;
6589 
6590         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6591 
6592         return (RSM_SUCCESS);
6593 }
6594 
6595 /*
6596  * cookie returned here if not null indicates that it is
6597  * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6598  * message.
6599  */
6600 static int
6601 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6602 {
6603         int                     e;
6604         adapter_t               *adapter;
6605         rsm_import_share_t      *sharedp;
6606         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6607 
6608         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6609             "rsm_closeconnection enter\n"));
6610 
6611         *cookie = (void *)NULL;
6612 
6613         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6614 
6615         /* assert seg is locked */
6616         ASSERT(rsmseglock_held(seg));
6617 
6618         if (seg->s_state == RSM_STATE_DISCONNECT) {
6619                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6620                     "rsm_closeconnection done: already disconnected\n"));
6621                 return (RSM_SUCCESS);
6622         }
6623 
6624         /* wait for all putv/getv ops to get done */
6625         while (seg->s_rdmacnt > 0) {
6626                 cv_wait(&seg->s_cv, &seg->s_lock);
6627         }
6628 
6629         (void) rsm_unmap(seg);
6630 
6631         ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6632             seg->s_state == RSM_STATE_CONN_QUIESCE);
6633 
6634         adapter = seg->s_adapter;
6635         sharedp = seg->s_share;
6636 
6637         ASSERT(sharedp != NULL);
6638 
6639         rsmsharelock_acquire(seg);
6640 
6641         /*
6642          * Disconnect on adapter
6643          *
6644          * The current algorithm is stateless, I don't have to contact
6645          * server when I go away. He only gives me permissions. Of course,
6646          * the adapters will talk to terminate the connect.
6647          *
6648          * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6649          */
6650         if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6651             (sharedp->rsmsi_node != my_nodeid)) {
6652 
6653                 if (sharedp->rsmsi_refcnt == 1) {
6654                         /* this is the last importer */
6655                         ASSERT(sharedp->rsmsi_mapcnt == 0);
6656 
6657                         e = adapter->rsmpi_ops->
6658                             rsm_disconnect(sharedp->rsmsi_handle);
6659                         if (e != RSM_SUCCESS) {
6660                                 DBG_PRINTF((category, RSM_DEBUG,
6661                                     "rsm:disconnect failed seg=%x:err=%d\n",
6662                                     seg->s_key, e));
6663                         }
6664                 }
6665         }
6666 
6667         seg->s_handle.in = NULL;
6668 
6669         sharedp->rsmsi_refcnt--;
6670 
6671         if (sharedp->rsmsi_refcnt == 0) {
6672                 *cookie = (void *)sharedp->rsmsi_cookie;
6673                 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6674                 sharedp->rsmsi_handle = NULL;
6675                 rsmsharelock_release(seg);
6676 
6677                 /* clean up the shared data structure */
6678                 mutex_destroy(&sharedp->rsmsi_lock);
6679                 cv_destroy(&sharedp->rsmsi_cv);
6680                 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6681 
6682         } else {
6683                 rsmsharelock_release(seg);
6684         }
6685 
6686         /* increment generation number on barrier page */
6687         if (bar_va) {
6688                 atomic_inc_16(bar_va + seg->s_hdr.rsmrc_num);
6689         }
6690 
6691         /*
6692          * The following needs to be done after any
6693          * rsmsharelock calls which use seg->s_share.
6694          */
6695         seg->s_share = NULL;
6696 
6697         seg->s_state = RSM_STATE_DISCONNECT;
6698         /* signal anyone waiting in the CONN_QUIESCE state */
6699         cv_broadcast(&seg->s_cv);
6700 
6701         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6702             "rsm_closeconnection done\n"));
6703 
6704         return (RSM_SUCCESS);
6705 }
6706 
6707 int
6708 rsm_disconnect(rsmseg_t *seg)
6709 {
6710         rsmipc_request_t        request;
6711         void                    *shared_cookie;
6712         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6713 
6714         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6715 
6716         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6717 
6718         /* assert seg isn't locked */
6719         ASSERT(!rsmseglock_held(seg));
6720 
6721 
6722         /* Remove segment from imported list */
6723         rsmimport_rm(seg);
6724 
6725         /* acquire the segment */
6726         rsmseglock_acquire(seg);
6727 
6728         /* wait until segment leaves the mapping state */
6729         while (seg->s_state == RSM_STATE_MAPPING)
6730                 cv_wait(&seg->s_cv, &seg->s_lock);
6731 
6732         if (seg->s_state == RSM_STATE_DISCONNECT) {
6733                 seg->s_state = RSM_STATE_NEW;
6734                 rsmseglock_release(seg);
6735                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6736                     "rsm_disconnect done: already disconnected\n"));
6737                 return (RSM_SUCCESS);
6738         }
6739 
6740         (void) rsm_closeconnection(seg, &shared_cookie);
6741 
6742         /* update state */
6743         seg->s_state = RSM_STATE_NEW;
6744 
6745         if (shared_cookie != NULL) {
6746                 /*
6747                  *  This is the last importer so inform the exporting node
6748                  *  so this import can be deleted from the list of importers.
6749                  */
6750                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6751                 request.rsmipc_key = seg->s_segid;
6752                 request.rsmipc_segment_cookie = shared_cookie;
6753                 rsmseglock_release(seg);
6754                 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6755         } else {
6756                 rsmseglock_release(seg);
6757         }
6758 
6759         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6760 
6761         return (DDI_SUCCESS);
6762 }
6763 
6764 /*ARGSUSED*/
6765 static int
6766 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6767     struct pollhead **phpp)
6768 {
6769         minor_t         rnum;
6770         rsmresource_t   *res;
6771         rsmseg_t        *seg;
6772         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6773 
6774         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6775 
6776         /* find minor, no lock */
6777         rnum = getminor(dev);
6778         res = rsmresource_lookup(rnum, RSM_NOLOCK);
6779 
6780         /* poll is supported only for export/import segments */
6781         if ((res == NULL) || (res == RSMRC_RESERVED) ||
6782             (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6783                 return (ENXIO);
6784         }
6785 
6786         *reventsp = 0;
6787 
6788         /*
6789          * An exported segment must be in state RSM_STATE_EXPORT; an
6790          * imported segment must be in state RSM_STATE_ACTIVE.
6791          */
6792         seg = (rsmseg_t *)res;
6793 
6794         if (seg->s_pollevent) {
6795                 *reventsp = POLLRDNORM;
6796         } else if (!anyyet) {
6797                 /* cannot take segment lock here */
6798                 *phpp = &seg->s_poll;
6799                 seg->s_pollflag |= RSM_SEGMENT_POLL;
6800         }
6801         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6802         return (0);
6803 }
6804 
6805 
6806 
6807 /* ************************* IOCTL Commands ********************* */
6808 
6809 static rsmseg_t *
6810 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6811     rsm_resource_type_t type)
6812 {
6813         /* get segment from resource handle */
6814         rsmseg_t *seg;
6815         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6816 
6817         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6818 
6819 
6820         if (res != RSMRC_RESERVED) {
6821                 seg = (rsmseg_t *)res;
6822         } else {
6823                 /* Allocate segment now and bind it */
6824                 seg = rsmseg_alloc(rnum, credp);
6825 
6826                 /*
6827                  * if DR pre-processing is going on or DR is in progress
6828                  * then the new export segments should be in the NEW_QSCD state
6829                  */
6830                 if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6831                         mutex_enter(&rsm_drv_data.drv_lock);
6832                         if ((rsm_drv_data.drv_state ==
6833                             RSM_DRV_PREDEL_STARTED) ||
6834                             (rsm_drv_data.drv_state ==
6835                             RSM_DRV_PREDEL_COMPLETED) ||
6836                             (rsm_drv_data.drv_state ==
6837                             RSM_DRV_DR_IN_PROGRESS)) {
6838                                 seg->s_state = RSM_STATE_NEW_QUIESCED;
6839                         }
6840                         mutex_exit(&rsm_drv_data.drv_lock);
6841                 }
6842 
6843                 rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6844         }
6845 
6846         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6847 
6848         return (seg);
6849 }
6850 
6851 static int
6852 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6853     int mode, cred_t *credp)
6854 {
6855         int error;
6856         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6857 
6858         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6859 
6860         arg = arg;
6861         credp = credp;
6862 
6863         ASSERT(seg != NULL);
6864 
6865         switch (cmd) {
6866         case RSM_IOCTL_BIND:
6867                 error = rsm_bind(seg, msg, arg, mode);
6868                 break;
6869         case RSM_IOCTL_REBIND:
6870                 error = rsm_rebind(seg, msg);
6871                 break;
6872         case RSM_IOCTL_UNBIND:
6873                 error = ENOTSUP;
6874                 break;
6875         case RSM_IOCTL_PUBLISH:
6876                 error = rsm_publish(seg, msg, arg, mode);
6877                 break;
6878         case RSM_IOCTL_REPUBLISH:
6879                 error = rsm_republish(seg, msg, mode);
6880                 break;
6881         case RSM_IOCTL_UNPUBLISH:
6882                 error = rsm_unpublish(seg, 1);
6883                 break;
6884         default:
6885                 error = EINVAL;
6886                 break;
6887         }
6888 
6889         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6890             error));
6891 
6892         return (error);
6893 }
6894 static int
6895 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6896     int mode, cred_t *credp)
6897 {
6898         int error;
6899         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6900 
6901         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6902 
6903         ASSERT(seg);
6904 
6905         switch (cmd) {
6906         case RSM_IOCTL_CONNECT:
6907                 error = rsm_connect(seg, msg, credp, arg, mode);
6908                 break;
6909         default:
6910                 error = EINVAL;
6911                 break;
6912         }
6913 
6914         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6915             error));
6916         return (error);
6917 }
6918 
6919 static int
6920 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6921     int mode)
6922 {
6923         int e;
6924         adapter_t *adapter;
6925         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6926 
6927         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6928 
6929 
6930         if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6931                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6932                     "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6933                 return (RSMERR_CONN_ABORTED);
6934         } else if (seg->s_node == my_nodeid) {
6935                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6936                     "rsmbar_ioctl done: loopback\n"));
6937                 return (RSM_SUCCESS);
6938         }
6939 
6940         adapter = seg->s_adapter;
6941 
6942         switch (cmd) {
6943         case RSM_IOCTL_BAR_CHECK:
6944                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6945                     "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6946                 return (bar_va ? RSM_SUCCESS : EINVAL);
6947         case RSM_IOCTL_BAR_OPEN:
6948                 e = adapter->rsmpi_ops->
6949                     rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6950                 break;
6951         case RSM_IOCTL_BAR_ORDER:
6952                 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6953                 break;
6954         case RSM_IOCTL_BAR_CLOSE:
6955                 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6956                 break;
6957         default:
6958                 e = EINVAL;
6959                 break;
6960         }
6961 
6962         if (e == RSM_SUCCESS) {
6963 #ifdef _MULTI_DATAMODEL
6964                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6965                         rsm_ioctlmsg32_t msg32;
6966                         int i;
6967 
6968                         for (i = 0; i < 4; i++) {
6969                                 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6970                         }
6971 
6972                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6973                             "rsmbar_ioctl done\n"));
6974                         if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6975                             sizeof (msg32), mode))
6976                                 return (RSMERR_BAD_ADDR);
6977                         else
6978                                 return (RSM_SUCCESS);
6979                 }
6980 #endif
6981                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6982                     "rsmbar_ioctl done\n"));
6983                 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6984                     sizeof (*msg), mode))
6985                         return (RSMERR_BAD_ADDR);
6986                 else
6987                         return (RSM_SUCCESS);
6988         }
6989 
6990         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6991             "rsmbar_ioctl done: error=%d\n", e));
6992 
6993         return (e);
6994 }
6995 
6996 /*
6997  * Ring the doorbell of the export segment to which this segment is
6998  * connected.
6999  */
7000 static int
7001 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7002 {
7003         int e = 0;
7004         rsmipc_request_t request;
7005 
7006         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7007 
7008         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7009 
7010         request.rsmipc_key = seg->s_segid;
7011         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7012         request.rsmipc_segment_cookie = NULL;
7013         e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7014 
7015         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7016             "exportbell_ioctl done: %d\n", e));
7017 
7018         return (e);
7019 }
7020 
7021 /*
7022  * Ring the doorbells of all segments importing this segment
7023  */
7024 static int
7025 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7026 {
7027         importing_token_t       *token = NULL;
7028         rsmipc_request_t        request;
7029         int                     index;
7030 
7031         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7032 
7033         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7034 
7035         ASSERT(seg->s_state != RSM_STATE_NEW &&
7036             seg->s_state != RSM_STATE_NEW_QUIESCED);
7037 
7038         request.rsmipc_key = seg->s_segid;
7039         request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7040 
7041         index = rsmhash(seg->s_segid);
7042 
7043         token = importer_list.bucket[index];
7044 
7045         while (token != NULL) {
7046                 if (seg->s_key == token->key) {
7047                         request.rsmipc_segment_cookie =
7048                             token->import_segment_cookie;
7049                         (void) rsmipc_send(token->importing_node,
7050                             &request, RSM_NO_REPLY);
7051                 }
7052                 token = token->next;
7053         }
7054 
7055         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7056             "importbell_ioctl done\n"));
7057         return (RSM_SUCCESS);
7058 }
7059 
7060 static int
7061 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7062     rsm_poll_event_t **eventspp, int mode)
7063 {
7064         rsm_poll_event_t        *evlist = NULL;
7065         size_t                  evlistsz;
7066         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7067 
7068 #ifdef _MULTI_DATAMODEL
7069         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7070                 int i;
7071                 rsm_consume_event_msg32_t cemsg32 = {0};
7072                 rsm_poll_event32_t      event32[RSM_MAX_POLLFDS];
7073                 rsm_poll_event32_t      *evlist32;
7074                 size_t                  evlistsz32;
7075 
7076                 /* copyin the ioctl message */
7077                 if (ddi_copyin(arg, (caddr_t)&cemsg32,
7078                     sizeof (rsm_consume_event_msg32_t), mode)) {
7079                         DBG_PRINTF((category, RSM_ERR,
7080                             "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7081                         return (RSMERR_BAD_ADDR);
7082                 }
7083                 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7084                 msgp->numents = (int)cemsg32.numents;
7085 
7086                 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7087                 /*
7088                  * If numents is large alloc events list on heap otherwise
7089                  * use the address of array that was passed in.
7090                  */
7091                 if (msgp->numents > RSM_MAX_POLLFDS) {
7092                         if (msgp->numents > max_segs) { /* validate numents */
7093                                 DBG_PRINTF((category, RSM_ERR,
7094                                     "consumeevent_copyin: "
7095                                     "RSMERR_BAD_ARGS_ERRORS\n"));
7096                                 return (RSMERR_BAD_ARGS_ERRORS);
7097                         }
7098                         evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7099                 } else {
7100                         evlist32 = event32;
7101                 }
7102 
7103                 /* copyin the seglist into the rsm_poll_event32_t array */
7104                 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7105                     evlistsz32, mode)) {
7106                         if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7107                                 kmem_free(evlist32, evlistsz32);
7108                         }
7109                         DBG_PRINTF((category, RSM_ERR,
7110                             "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7111                         return (RSMERR_BAD_ADDR);
7112                 }
7113 
7114                 /* evlist and evlistsz are based on rsm_poll_event_t type */
7115                 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7116 
7117                 if (msgp->numents > RSM_MAX_POLLFDS) {
7118                         evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7119                         *eventspp = evlist;
7120                 } else {
7121                         evlist = *eventspp;
7122                 }
7123                 /*
7124                  * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7125                  * array
7126                  */
7127                 for (i = 0; i < msgp->numents; i++) {
7128                         evlist[i].rnum = evlist32[i].rnum;
7129                         evlist[i].fdsidx = evlist32[i].fdsidx;
7130                         evlist[i].revent = evlist32[i].revent;
7131                 }
7132                 /* free the temp 32-bit event list */
7133                 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7134                         kmem_free(evlist32, evlistsz32);
7135                 }
7136 
7137                 return (RSM_SUCCESS);
7138         }
7139 #endif
7140         /* copyin the ioctl message */
7141         if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7142             mode)) {
7143                 DBG_PRINTF((category, RSM_ERR,
7144                     "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7145                 return (RSMERR_BAD_ADDR);
7146         }
7147         /*
7148          * If numents is large alloc events list on heap otherwise
7149          * use the address of array that was passed in.
7150          */
7151         if (msgp->numents > RSM_MAX_POLLFDS) {
7152                 if (msgp->numents > max_segs) { /* validate numents */
7153                         DBG_PRINTF((category, RSM_ERR,
7154                             "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7155                         return (RSMERR_BAD_ARGS_ERRORS);
7156                 }
7157                 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7158                 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7159                 *eventspp  = evlist;
7160         }
7161 
7162         /* copyin the seglist */
7163         if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7164             sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7165                 if (evlist) {
7166                         kmem_free(evlist, evlistsz);
7167                         *eventspp = NULL;
7168                 }
7169                 DBG_PRINTF((category, RSM_ERR,
7170                     "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7171                 return (RSMERR_BAD_ADDR);
7172         }
7173 
7174         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7175             "consumeevent_copyin done\n"));
7176         return (RSM_SUCCESS);
7177 }
7178 
7179 static int
7180 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7181     rsm_poll_event_t *eventsp, int mode)
7182 {
7183         size_t                  evlistsz;
7184         int                     err = RSM_SUCCESS;
7185         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7186 
7187         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7188             "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7189             msgp->numents, eventsp));
7190 
7191 #ifdef _MULTI_DATAMODEL
7192         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7193                 int i;
7194                 rsm_poll_event32_t      event32[RSM_MAX_POLLFDS];
7195                 rsm_poll_event32_t      *evlist32;
7196                 size_t                  evlistsz32;
7197 
7198                 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7199                 if (msgp->numents > RSM_MAX_POLLFDS) {
7200                         evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7201                 } else {
7202                         evlist32 = event32;
7203                 }
7204 
7205                 /*
7206                  * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7207                  * array
7208                  */
7209                 for (i = 0; i < msgp->numents; i++) {
7210                         evlist32[i].rnum = eventsp[i].rnum;
7211                         evlist32[i].fdsidx = eventsp[i].fdsidx;
7212                         evlist32[i].revent = eventsp[i].revent;
7213                 }
7214 
7215                 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7216                     evlistsz32, mode)) {
7217                         err = RSMERR_BAD_ADDR;
7218                 }
7219 
7220                 if (msgp->numents > RSM_MAX_POLLFDS) {
7221                         if (evlist32) { /* free the temp 32-bit event list */
7222                                 kmem_free(evlist32, evlistsz32);
7223                         }
7224                         /*
7225                          * eventsp and evlistsz are based on rsm_poll_event_t
7226                          * type
7227                          */
7228                         evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7229                         /* event list on the heap and needs to be freed here */
7230                         if (eventsp) {
7231                                 kmem_free(eventsp, evlistsz);
7232                         }
7233                 }
7234 
7235                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7236                     "consumeevent_copyout done: err=%d\n", err));
7237                 return (err);
7238         }
7239 #endif
7240         evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7241 
7242         if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7243             mode)) {
7244                 err = RSMERR_BAD_ADDR;
7245         }
7246 
7247         if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7248                 /* event list on the heap and needs to be freed here */
7249                 kmem_free(eventsp, evlistsz);
7250         }
7251 
7252         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7253             "consumeevent_copyout done: err=%d\n", err));
7254         return (err);
7255 }
7256 
7257 static int
7258 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7259 {
7260         int     rc;
7261         int     i;
7262         minor_t rnum;
7263         rsm_consume_event_msg_t msg = {0};
7264         rsmseg_t                *seg;
7265         rsm_poll_event_t        *event_list;
7266         rsm_poll_event_t        events[RSM_MAX_POLLFDS];
7267         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7268 
7269         event_list = events;
7270 
7271         if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7272             RSM_SUCCESS) {
7273                 return (rc);
7274         }
7275 
7276         for (i = 0; i < msg.numents; i++) {
7277                 rnum = event_list[i].rnum;
7278                 event_list[i].revent = 0;
7279                 /* get the segment structure */
7280                 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7281                 if (seg) {
7282                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7283                             "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7284                             seg));
7285                         if (seg->s_pollevent) {
7286                                 /* consume the event */
7287                                 atomic_dec_32(&seg->s_pollevent);
7288                                 event_list[i].revent = POLLRDNORM;
7289                         }
7290                         rsmseglock_release(seg);
7291                 }
7292         }
7293 
7294         if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7295             RSM_SUCCESS) {
7296                 return (rc);
7297         }
7298 
7299         return (RSM_SUCCESS);
7300 }
7301 
7302 static int
7303 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7304 {
7305         int size;
7306         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7307 
7308         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7309 
7310 #ifdef _MULTI_DATAMODEL
7311         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7312                 rsmka_iovec32_t *iovec32, *iovec32_base;
7313                 int i;
7314 
7315                 size = count * sizeof (rsmka_iovec32_t);
7316                 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7317                 if (ddi_copyin((caddr_t)user_vec,
7318                     (caddr_t)iovec32, size, mode)) {
7319                         kmem_free(iovec32, size);
7320                         DBG_PRINTF((category, RSM_DEBUG,
7321                             "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7322                         return (RSMERR_BAD_ADDR);
7323                 }
7324 
7325                 for (i = 0; i < count; i++, iovec++, iovec32++) {
7326                         iovec->io_type = (int)iovec32->io_type;
7327                         if (iovec->io_type == RSM_HANDLE_TYPE)
7328                                 iovec->local.segid = (rsm_memseg_id_t)
7329                                     iovec32->local;
7330                         else
7331                                 iovec->local.vaddr =
7332                                     (caddr_t)(uintptr_t)iovec32->local;
7333                         iovec->local_offset = (size_t)iovec32->local_offset;
7334                         iovec->remote_offset = (size_t)iovec32->remote_offset;
7335                         iovec->transfer_len = (size_t)iovec32->transfer_len;
7336 
7337                 }
7338                 kmem_free(iovec32_base, size);
7339                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7340                     "iovec_copyin done\n"));
7341                 return (DDI_SUCCESS);
7342         }
7343 #endif
7344 
7345         size = count * sizeof (rsmka_iovec_t);
7346         if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7347                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7348                     "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7349                 return (RSMERR_BAD_ADDR);
7350         }
7351 
7352         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7353 
7354         return (DDI_SUCCESS);
7355 }
7356 
7357 
7358 static int
7359 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7360 {
7361         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7362 
7363         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7364 
7365 #ifdef _MULTI_DATAMODEL
7366         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7367                 rsmka_scat_gath32_t sg_io32;
7368 
7369                 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7370                     mode)) {
7371                         DBG_PRINTF((category, RSM_DEBUG,
7372                             "sgio_copyin done: returning EFAULT\n"));
7373                         return (RSMERR_BAD_ADDR);
7374                 }
7375                 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7376                 sg_io->io_request_count =  (size_t)sg_io32.io_request_count;
7377                 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7378                 sg_io->flags = (size_t)sg_io32.flags;
7379                 sg_io->remote_handle = (rsm_memseg_import_handle_t)
7380                     (uintptr_t)sg_io32.remote_handle;
7381                 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7382                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7383                     "sgio_copyin done\n"));
7384                 return (DDI_SUCCESS);
7385         }
7386 #endif
7387         if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7388             mode)) {
7389                 DBG_PRINTF((category, RSM_DEBUG,
7390                     "sgio_copyin done: returning EFAULT\n"));
7391                 return (RSMERR_BAD_ADDR);
7392         }
7393         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7394         return (DDI_SUCCESS);
7395 }
7396 
7397 static int
7398 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7399 {
7400         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7401 
7402         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7403             "sgio_resid_copyout enter\n"));
7404 
7405 #ifdef _MULTI_DATAMODEL
7406         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7407                 rsmka_scat_gath32_t sg_io32;
7408 
7409                 sg_io32.io_residual_count = sg_io->io_residual_count;
7410                 sg_io32.flags = sg_io->flags;
7411 
7412                 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7413                     (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7414                     sizeof (uint32_t), mode)) {
7415 
7416                         DBG_PRINTF((category, RSM_ERR,
7417                             "sgio_resid_copyout error: rescnt\n"));
7418                         return (RSMERR_BAD_ADDR);
7419                 }
7420 
7421                 if (ddi_copyout((caddr_t)&sg_io32.flags,
7422                     (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7423                     sizeof (uint32_t), mode)) {
7424 
7425                         DBG_PRINTF((category, RSM_ERR,
7426                             "sgio_resid_copyout error: flags\n"));
7427                         return (RSMERR_BAD_ADDR);
7428                 }
7429                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7430                     "sgio_resid_copyout done\n"));
7431                 return (DDI_SUCCESS);
7432         }
7433 #endif
7434         if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7435             (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7436             sizeof (ulong_t), mode)) {
7437 
7438                 DBG_PRINTF((category, RSM_ERR,
7439                     "sgio_resid_copyout error:rescnt\n"));
7440                 return (RSMERR_BAD_ADDR);
7441         }
7442 
7443         if (ddi_copyout((caddr_t)&sg_io->flags,
7444             (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7445             sizeof (uint_t), mode)) {
7446 
7447                 DBG_PRINTF((category, RSM_ERR,
7448                     "sgio_resid_copyout error:flags\n"));
7449                 return (RSMERR_BAD_ADDR);
7450         }
7451 
7452         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7453         return (DDI_SUCCESS);
7454 }
7455 
7456 
7457 static int
7458 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7459 {
7460         rsmka_scat_gath_t       sg_io;
7461         rsmka_iovec_t           ka_iovec_arr[RSM_MAX_IOVLEN];
7462         rsmka_iovec_t           *ka_iovec;
7463         rsmka_iovec_t           *ka_iovec_start;
7464         rsmpi_scat_gath_t       rsmpi_sg_io;
7465         rsmpi_iovec_t           iovec_arr[RSM_MAX_IOVLEN];
7466         rsmpi_iovec_t           *iovec;
7467         rsmpi_iovec_t           *iovec_start = NULL;
7468         rsmapi_access_entry_t   *acl;
7469         rsmresource_t           *res;
7470         minor_t                 rnum;
7471         rsmseg_t                *im_seg, *ex_seg;
7472         int                     e;
7473         int                     error = 0;
7474         uint_t                  i;
7475         uint_t                  iov_proc = 0; /* num of iovecs processed */
7476         size_t                  size = 0;
7477         size_t                  ka_size;
7478 
7479         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7480 
7481         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7482 
7483         credp = credp;
7484 
7485         /*
7486          * Copyin the scatter/gather structure  and build new structure
7487          * for rsmpi.
7488          */
7489         e = sgio_copyin(arg, &sg_io, mode);
7490         if (e != DDI_SUCCESS) {
7491                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7492                     "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7493                 return (e);
7494         }
7495 
7496         if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7497                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7498                     "rsm_iovec_ioctl done: request_count(%d) too large\n",
7499                     sg_io.io_request_count));
7500                 return (RSMERR_BAD_SGIO);
7501         }
7502 
7503         rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7504         rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7505         rsmpi_sg_io.io_segflg = 0;
7506 
7507         /* Allocate memory and copyin io vector array  */
7508         if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7509                 ka_size =  sg_io.io_request_count * sizeof (rsmka_iovec_t);
7510                 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7511         } else {
7512                 ka_iovec_start = ka_iovec = ka_iovec_arr;
7513         }
7514         e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7515             sg_io.io_request_count, mode);
7516         if (e != DDI_SUCCESS) {
7517                 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7518                         kmem_free(ka_iovec, ka_size);
7519                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7520                     "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7521                 return (e);
7522         }
7523 
7524         /* get the import segment descriptor */
7525         rnum = getminor(dev);
7526         res = rsmresource_lookup(rnum, RSM_LOCK);
7527 
7528         /*
7529          * The following sequence of locking may (or MAY NOT) cause a
7530          * deadlock but this is currently not addressed here since the
7531          * implementation will be changed to incorporate the use of
7532          * reference counting for both the import and the export segments.
7533          */
7534 
7535         /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7536 
7537         im_seg = (rsmseg_t *)res;
7538 
7539         if (im_seg == NULL) {
7540                 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7541                         kmem_free(ka_iovec, ka_size);
7542                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7543                     "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7544                 return (EINVAL);
7545         }
7546         /* putv/getv supported is supported only on import segments */
7547         if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7548                 rsmseglock_release(im_seg);
7549                 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7550                         kmem_free(ka_iovec, ka_size);
7551                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7552                     "rsm_iovec_ioctl done: not an import segment\n"));
7553                 return (EINVAL);
7554         }
7555 
7556         /*
7557          * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7558          * as well as wait for a local DR to complete.
7559          */
7560         while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7561             (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7562             (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7563                 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7564                         DBG_PRINTF((category, RSM_DEBUG,
7565                             "rsm_iovec_ioctl done: cv_wait INTR"));
7566                         rsmseglock_release(im_seg);
7567                         return (RSMERR_INTERRUPTED);
7568                 }
7569         }
7570 
7571         if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7572             (im_seg->s_state != RSM_STATE_ACTIVE)) {
7573 
7574                 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7575                     im_seg->s_state == RSM_STATE_NEW);
7576 
7577                 DBG_PRINTF((category, RSM_DEBUG,
7578                     "rsm_iovec_ioctl done: im_seg not conn/map"));
7579                 rsmseglock_release(im_seg);
7580                 e = RSMERR_BAD_SGIO;
7581                 goto out;
7582         }
7583 
7584         im_seg->s_rdmacnt++;
7585         rsmseglock_release(im_seg);
7586 
7587         /*
7588          * Allocate and set up the io vector for rsmpi
7589          */
7590         if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7591                 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7592                 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7593         } else {
7594                 iovec_start = iovec = iovec_arr;
7595         }
7596 
7597         rsmpi_sg_io.iovec = iovec;
7598         for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7599                 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7600                         ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7601 
7602                         if (ex_seg == NULL) {
7603                                 e = RSMERR_BAD_SGIO;
7604                                 break;
7605                         }
7606                         ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7607 
7608                         acl = ex_seg->s_acl;
7609                         if (acl[0].ae_permission == 0) {
7610                                 struct buf *xbuf;
7611                                 dev_t sdev = 0;
7612 
7613                                 xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7614                                     0, ex_seg->s_len, B_WRITE,
7615                                     sdev, 0, NULL, DDI_UMEM_SLEEP);
7616 
7617                                 ASSERT(xbuf != NULL);
7618 
7619                                 iovec->local_mem.ms_type = RSM_MEM_BUF;
7620                                 iovec->local_mem.ms_memory.bp = xbuf;
7621                         } else {
7622                                 iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7623                                 iovec->local_mem.ms_memory.handle =
7624                                     ex_seg->s_handle.out;
7625                         }
7626                         ex_seg->s_rdmacnt++; /* refcnt the handle */
7627                         rsmseglock_release(ex_seg);
7628                 } else {
7629                         iovec->local_mem.ms_type = RSM_MEM_VADDR;
7630                         iovec->local_mem.ms_memory.vr.vaddr =
7631                             ka_iovec->local.vaddr;
7632                 }
7633 
7634                 iovec->local_offset = ka_iovec->local_offset;
7635                 iovec->remote_handle = im_seg->s_handle.in;
7636                 iovec->remote_offset = ka_iovec->remote_offset;
7637                 iovec->transfer_length = ka_iovec->transfer_len;
7638                 iovec++;
7639                 ka_iovec++;
7640         }
7641 
7642         if (iov_proc <  sg_io.io_request_count) {
7643                 /* error while processing handle */
7644                 rsmseglock_acquire(im_seg);
7645                 im_seg->s_rdmacnt--;   /* decrement the refcnt for importseg */
7646                 if (im_seg->s_rdmacnt == 0) {
7647                         cv_broadcast(&im_seg->s_cv);
7648                 }
7649                 rsmseglock_release(im_seg);
7650                 goto out;
7651         }
7652 
7653         /* call rsmpi */
7654         if (cmd == RSM_IOCTL_PUTV)
7655                 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7656                     im_seg->s_adapter->rsmpi_handle,
7657                     &rsmpi_sg_io);
7658         else if (cmd == RSM_IOCTL_GETV)
7659                 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7660                     im_seg->s_adapter->rsmpi_handle,
7661                     &rsmpi_sg_io);
7662         else {
7663                 e = EINVAL;
7664                 DBG_PRINTF((category, RSM_DEBUG,
7665                     "iovec_ioctl: bad command = %x\n", cmd));
7666         }
7667 
7668 
7669         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7670             "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7671 
7672         sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7673 
7674         /*
7675          * Check for implicit signal post flag and do the signal
7676          * post if needed
7677          */
7678         if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7679             e == RSM_SUCCESS) {
7680                 rsmipc_request_t request;
7681 
7682                 request.rsmipc_key = im_seg->s_segid;
7683                 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7684                 request.rsmipc_segment_cookie = NULL;
7685                 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7686                 /*
7687                  * Reset the implicit signal post flag to 0 to indicate
7688                  * that the signal post has been done and need not be
7689                  * done in the RSMAPI library
7690                  */
7691                 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7692         }
7693 
7694         rsmseglock_acquire(im_seg);
7695         im_seg->s_rdmacnt--;
7696         if (im_seg->s_rdmacnt == 0) {
7697                 cv_broadcast(&im_seg->s_cv);
7698         }
7699         rsmseglock_release(im_seg);
7700         error = sgio_resid_copyout(arg, &sg_io, mode);
7701 out:
7702         iovec = iovec_start;
7703         ka_iovec = ka_iovec_start;
7704         for (i = 0; i < iov_proc; i++) {
7705                 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7706                         ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7707 
7708                         ASSERT(ex_seg != NULL);
7709                         ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7710 
7711                         ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7712                         if (ex_seg->s_rdmacnt == 0) {
7713                                 cv_broadcast(&ex_seg->s_cv);
7714                         }
7715                         rsmseglock_release(ex_seg);
7716                 }
7717 
7718                 ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7719 
7720                 /*
7721                  * At present there is no dependency on the existence of xbufs
7722                  * created by ddi_umem_iosetup for each of the iovecs. So we
7723                  * can these xbufs here.
7724                  */
7725                 if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7726                         freerbuf(iovec->local_mem.ms_memory.bp);
7727                 }
7728 
7729                 iovec++;
7730                 ka_iovec++;
7731         }
7732 
7733         if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7734                 if (iovec_start)
7735                         kmem_free(iovec_start, size);
7736                 kmem_free(ka_iovec_start, ka_size);
7737         }
7738 
7739         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7740             "rsm_iovec_ioctl done %d\n", e));
7741         /* if RSMPI call fails return that else return copyout's retval */
7742         return ((e != RSM_SUCCESS) ? e : error);
7743 
7744 }
7745 
7746 
7747 static int
7748 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7749 {
7750         adapter_t       *adapter;
7751         rsm_addr_t      addr;
7752         rsm_node_id_t   node;
7753         int             rval = DDI_SUCCESS;
7754         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7755 
7756         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7757 
7758         adapter =  rsm_getadapter(msg, mode);
7759         if (adapter == NULL) {
7760                 DBG_PRINTF((category, RSM_DEBUG,
7761                     "rsmaddr_ioctl done: adapter not found\n"));
7762                 return (RSMERR_CTLR_NOT_PRESENT);
7763         }
7764 
7765         switch (cmd) {
7766         case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7767                 /* returns the hwaddr in msg->hwaddr */
7768                 if (msg->nodeid == my_nodeid) {
7769                         msg->hwaddr = adapter->hwaddr;
7770                 } else {
7771                         addr = get_remote_hwaddr(adapter, msg->nodeid);
7772                         if ((int64_t)addr < 0) {
7773                                 rval = RSMERR_INTERNAL_ERROR;
7774                         } else {
7775                                 msg->hwaddr = addr;
7776                         }
7777                 }
7778                 break;
7779         case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7780                 /* returns the nodeid in msg->nodeid */
7781                 if (msg->hwaddr == adapter->hwaddr) {
7782                         msg->nodeid = my_nodeid;
7783                 } else {
7784                         node = get_remote_nodeid(adapter, msg->hwaddr);
7785                         if ((int)node < 0) {
7786                                 rval = RSMERR_INTERNAL_ERROR;
7787                         } else {
7788                                 msg->nodeid = (rsm_node_id_t)node;
7789                         }
7790                 }
7791                 break;
7792         default:
7793                 rval = EINVAL;
7794                 break;
7795         }
7796 
7797         rsmka_release_adapter(adapter);
7798         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7799             "rsmaddr_ioctl done: %d\n", rval));
7800         return (rval);
7801 }
7802 
7803 static int
7804 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7805 {
7806         DBG_DEFINE(category,
7807             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7808 
7809         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7810 
7811 #ifdef _MULTI_DATAMODEL
7812 
7813         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7814                 rsm_ioctlmsg32_t msg32;
7815                 int i;
7816 
7817                 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7818                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7819                             "rsm_ddi_copyin done: EFAULT\n"));
7820                         return (RSMERR_BAD_ADDR);
7821                 }
7822                 msg->len = msg32.len;
7823                 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7824                 msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7825                 msg->key = msg32.key;
7826                 msg->acl_len = msg32.acl_len;
7827                 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7828                 msg->cnum = msg32.cnum;
7829                 msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7830                 msg->cname_len = msg32.cname_len;
7831                 msg->nodeid = msg32.nodeid;
7832                 msg->hwaddr = msg32.hwaddr;
7833                 msg->perm = msg32.perm;
7834                 for (i = 0; i < 4; i++) {
7835                         msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7836                 }
7837                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7838                     "rsm_ddi_copyin done\n"));
7839                 return (RSM_SUCCESS);
7840         }
7841 #endif
7842         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7843         if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7844                 return (RSMERR_BAD_ADDR);
7845         else
7846                 return (RSM_SUCCESS);
7847 }
7848 
7849 static int
7850 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7851 {
7852         rsmka_int_controller_attr_t     rsm_cattr;
7853         DBG_DEFINE(category,
7854             RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7855 
7856         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7857             "rsmattr_ddi_copyout enter\n"));
7858         /*
7859          * need to copy appropriate data from rsm_controller_attr_t
7860          * to rsmka_int_controller_attr_t
7861          */
7862 #ifdef  _MULTI_DATAMODEL
7863         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7864                 rsmka_int_controller_attr32_t rsm_cattr32;
7865 
7866                 rsm_cattr32.attr_direct_access_sizes =
7867                     adapter->rsm_attr.attr_direct_access_sizes;
7868                 rsm_cattr32.attr_atomic_sizes =
7869                     adapter->rsm_attr.attr_atomic_sizes;
7870                 rsm_cattr32.attr_page_size =
7871                     adapter->rsm_attr.attr_page_size;
7872                 if (adapter->rsm_attr.attr_max_export_segment_size >
7873                     UINT_MAX)
7874                         rsm_cattr32.attr_max_export_segment_size =
7875                             RSM_MAXSZ_PAGE_ALIGNED;
7876                 else
7877                         rsm_cattr32.attr_max_export_segment_size =
7878                             adapter->rsm_attr.attr_max_export_segment_size;
7879                 if (adapter->rsm_attr.attr_tot_export_segment_size >
7880                     UINT_MAX)
7881                         rsm_cattr32.attr_tot_export_segment_size =
7882                             RSM_MAXSZ_PAGE_ALIGNED;
7883                 else
7884                         rsm_cattr32.attr_tot_export_segment_size =
7885                             adapter->rsm_attr.attr_tot_export_segment_size;
7886                 if (adapter->rsm_attr.attr_max_export_segments >
7887                     UINT_MAX)
7888                         rsm_cattr32.attr_max_export_segments =
7889                             UINT_MAX;
7890                 else
7891                         rsm_cattr32.attr_max_export_segments =
7892                             adapter->rsm_attr.attr_max_export_segments;
7893                 if (adapter->rsm_attr.attr_max_import_map_size >
7894                     UINT_MAX)
7895                         rsm_cattr32.attr_max_import_map_size =
7896                             RSM_MAXSZ_PAGE_ALIGNED;
7897                 else
7898                         rsm_cattr32.attr_max_import_map_size =
7899                             adapter->rsm_attr.attr_max_import_map_size;
7900                 if (adapter->rsm_attr.attr_tot_import_map_size >
7901                     UINT_MAX)
7902                         rsm_cattr32.attr_tot_import_map_size =
7903                             RSM_MAXSZ_PAGE_ALIGNED;
7904                 else
7905                         rsm_cattr32.attr_tot_import_map_size =
7906                             adapter->rsm_attr.attr_tot_import_map_size;
7907                 if (adapter->rsm_attr.attr_max_import_segments >
7908                     UINT_MAX)
7909                         rsm_cattr32.attr_max_import_segments =
7910                             UINT_MAX;
7911                 else
7912                         rsm_cattr32.attr_max_import_segments =
7913                             adapter->rsm_attr.attr_max_import_segments;
7914                 rsm_cattr32.attr_controller_addr =
7915                     adapter->rsm_attr.attr_controller_addr;
7916 
7917                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7918                     "rsmattr_ddi_copyout done\n"));
7919                 if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7920                     sizeof (rsmka_int_controller_attr32_t), mode)) {
7921                         return (RSMERR_BAD_ADDR);
7922                 }
7923                 else
7924                         return (RSM_SUCCESS);
7925         }
7926 #endif
7927         rsm_cattr.attr_direct_access_sizes =
7928             adapter->rsm_attr.attr_direct_access_sizes;
7929         rsm_cattr.attr_atomic_sizes =
7930             adapter->rsm_attr.attr_atomic_sizes;
7931         rsm_cattr.attr_page_size =
7932             adapter->rsm_attr.attr_page_size;
7933         rsm_cattr.attr_max_export_segment_size =
7934             adapter->rsm_attr.attr_max_export_segment_size;
7935         rsm_cattr.attr_tot_export_segment_size =
7936             adapter->rsm_attr.attr_tot_export_segment_size;
7937         rsm_cattr.attr_max_export_segments =
7938             adapter->rsm_attr.attr_max_export_segments;
7939         rsm_cattr.attr_max_import_map_size =
7940             adapter->rsm_attr.attr_max_import_map_size;
7941         rsm_cattr.attr_tot_import_map_size =
7942             adapter->rsm_attr.attr_tot_import_map_size;
7943         rsm_cattr.attr_max_import_segments =
7944             adapter->rsm_attr.attr_max_import_segments;
7945         rsm_cattr.attr_controller_addr =
7946             adapter->rsm_attr.attr_controller_addr;
7947         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7948             "rsmattr_ddi_copyout done\n"));
7949         if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7950             sizeof (rsmka_int_controller_attr_t), mode)) {
7951                 return (RSMERR_BAD_ADDR);
7952         }
7953         else
7954                 return (RSM_SUCCESS);
7955 }
7956 
7957 /*ARGSUSED*/
7958 static int
7959 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7960     int *rvalp)
7961 {
7962         rsmseg_t *seg;
7963         rsmresource_t   *res;
7964         minor_t         rnum;
7965         rsm_ioctlmsg_t msg = {0};
7966         int error;
7967         adapter_t *adapter;
7968         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7969 
7970         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7971 
7972         if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7973                 error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7974                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7975                     "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7976                 return (error);
7977         }
7978 
7979         /* topology cmd does not use the arg common to other cmds */
7980         if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7981                 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7982                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7983                     "rsm_ioctl done: %d\n", error));
7984                 return (error);
7985         }
7986 
7987         if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7988                 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7989                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7990                     "rsm_ioctl done: %d\n", error));
7991                 return (error);
7992         }
7993 
7994         /*
7995          * try to load arguments
7996          */
7997         if (cmd != RSM_IOCTL_RING_BELL &&
7998             rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
7999                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8000                     "rsm_ioctl done: EFAULT\n"));
8001                 return (RSMERR_BAD_ADDR);
8002         }
8003 
8004         if (cmd == RSM_IOCTL_ATTR) {
8005                 adapter =  rsm_getadapter(&msg, mode);
8006                 if (adapter == NULL) {
8007                         DBG_PRINTF((category, RSM_DEBUG,
8008                             "rsm_ioctl done: ENODEV\n"));
8009                         return (RSMERR_CTLR_NOT_PRESENT);
8010                 }
8011                 error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8012                 rsmka_release_adapter(adapter);
8013                 DBG_PRINTF((category, RSM_DEBUG,
8014                     "rsm_ioctl:after copyout %d\n", error));
8015                 return (error);
8016         }
8017 
8018         if (cmd == RSM_IOCTL_BAR_INFO) {
8019                 /* Return library off,len of barrier page */
8020                 msg.off = barrier_offset;
8021                 msg.len = (int)barrier_size;
8022 #ifdef _MULTI_DATAMODEL
8023                 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8024                         rsm_ioctlmsg32_t msg32;
8025 
8026                         if (msg.len > UINT_MAX)
8027                                 msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8028                         else
8029                                 msg32.len = (int32_t)msg.len;
8030                         msg32.off = (int32_t)msg.off;
8031                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8032                             "rsm_ioctl done\n"));
8033                         if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8034                             sizeof (msg32), mode))
8035                                 return (RSMERR_BAD_ADDR);
8036                         else
8037                                 return (RSM_SUCCESS);
8038                 }
8039 #endif
8040                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8041                     "rsm_ioctl done\n"));
8042                 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8043                     sizeof (msg), mode))
8044                         return (RSMERR_BAD_ADDR);
8045                 else
8046                         return (RSM_SUCCESS);
8047         }
8048 
8049         if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8050                 /* map the nodeid or hwaddr */
8051                 error = rsmaddr_ioctl(cmd, &msg, mode);
8052                 if (error == RSM_SUCCESS) {
8053 #ifdef _MULTI_DATAMODEL
8054                         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8055                                 rsm_ioctlmsg32_t msg32;
8056 
8057                                 msg32.hwaddr = (uint64_t)msg.hwaddr;
8058                                 msg32.nodeid = (uint32_t)msg.nodeid;
8059 
8060                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8061                                     "rsm_ioctl done\n"));
8062                                 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8063                                     sizeof (msg32), mode))
8064                                         return (RSMERR_BAD_ADDR);
8065                                 else
8066                                         return (RSM_SUCCESS);
8067                         }
8068 #endif
8069                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8070                             "rsm_ioctl done\n"));
8071                         if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8072                             sizeof (msg), mode))
8073                                 return (RSMERR_BAD_ADDR);
8074                         else
8075                                 return (RSM_SUCCESS);
8076                 }
8077                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8078                     "rsm_ioctl done: %d\n", error));
8079                 return (error);
8080         }
8081 
8082         /* Find resource and look it in read mode */
8083         rnum = getminor(dev);
8084         res = rsmresource_lookup(rnum, RSM_NOLOCK);
8085         ASSERT(res != NULL);
8086 
8087         /*
8088          * Find command group
8089          */
8090         switch (RSM_IOCTL_CMDGRP(cmd)) {
8091         case RSM_IOCTL_EXPORT_SEG:
8092                 /*
8093                  * Export list is searched during publish, loopback and
8094                  * remote lookup call.
8095                  */
8096                 seg = rsmresource_seg(res, rnum, credp,
8097                     RSM_RESOURCE_EXPORT_SEGMENT);
8098                 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8099                         error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8100                             credp);
8101                 } else { /* export ioctl on an import/barrier resource */
8102                         error = RSMERR_BAD_SEG_HNDL;
8103                 }
8104                 break;
8105         case RSM_IOCTL_IMPORT_SEG:
8106                 /* Import list is searched during remote unmap call. */
8107                 seg = rsmresource_seg(res, rnum, credp,
8108                     RSM_RESOURCE_IMPORT_SEGMENT);
8109                 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8110                         error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8111                             credp);
8112                 } else  { /* import ioctl on an export/barrier resource */
8113                         error = RSMERR_BAD_SEG_HNDL;
8114                 }
8115                 break;
8116         case RSM_IOCTL_BAR:
8117                 if (res != RSMRC_RESERVED &&
8118                     res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8119                         error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8120                             mode);
8121                 } else { /* invalid res value */
8122                         error = RSMERR_BAD_SEG_HNDL;
8123                 }
8124                 break;
8125         case RSM_IOCTL_BELL:
8126                 if (res != RSMRC_RESERVED) {
8127                         if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8128                                 error = exportbell_ioctl((rsmseg_t *)res, cmd);
8129                         else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8130                                 error = importbell_ioctl((rsmseg_t *)res, cmd);
8131                         else /* RSM_RESOURCE_BAR */
8132                                 error = RSMERR_BAD_SEG_HNDL;
8133                 } else { /* invalid res value */
8134                         error = RSMERR_BAD_SEG_HNDL;
8135                 }
8136                 break;
8137         default:
8138                 error = EINVAL;
8139         }
8140 
8141         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8142             error));
8143         return (error);
8144 }
8145 
8146 
8147 /* **************************** Segment Mapping Operations ********* */
8148 static rsm_mapinfo_t *
8149 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8150     size_t *map_len)
8151 {
8152         rsm_mapinfo_t   *p;
8153         /*
8154          * Find the correct mapinfo structure to use during the mapping
8155          * from the seg->s_mapinfo list.
8156          * The seg->s_mapinfo list contains in reverse order the mappings
8157          * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8158          * access the correct entry within this list for the mapping
8159          * requested.
8160          *
8161          * The algorithm for selecting a list entry is as follows:
8162          *
8163          * When start_offset of an entry <= off we have found the entry
8164          * we were looking for. Adjust the dev_offset and map_len (needs
8165          * to be PAGESIZE aligned).
8166          */
8167         p = seg->s_mapinfo;
8168         for (; p; p = p->next) {
8169                 if (p->start_offset <= off) {
8170                         *dev_offset = p->dev_offset + off - p->start_offset;
8171                         *map_len = (len > p->individual_len) ?
8172                             p->individual_len : ptob(btopr(len));
8173                         return (p);
8174                 }
8175                 p = p->next;
8176         }
8177 
8178         return (NULL);
8179 }
8180 
8181 static void
8182 rsm_free_mapinfo(rsm_mapinfo_t  *mapinfo)
8183 {
8184         rsm_mapinfo_t *p;
8185 
8186         while (mapinfo != NULL) {
8187                 p = mapinfo;
8188                 mapinfo = mapinfo->next;
8189                 kmem_free(p, sizeof (*p));
8190         }
8191 }
8192 
8193 static int
8194 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8195     size_t len, void **pvtp)
8196 {
8197         rsmcookie_t     *p;
8198         rsmresource_t   *res;
8199         rsmseg_t        *seg;
8200         minor_t rnum;
8201         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8202 
8203         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8204 
8205         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8206             "rsmmap_map: dhp = %x\n", dhp));
8207 
8208         flags = flags;
8209 
8210         rnum = getminor(dev);
8211         res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8212         ASSERT(res != NULL);
8213 
8214         seg = (rsmseg_t *)res;
8215 
8216         rsmseglock_acquire(seg);
8217 
8218         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8219 
8220         /*
8221          * Allocate structure and add cookie to segment list
8222          */
8223         p = kmem_alloc(sizeof (*p), KM_SLEEP);
8224 
8225         p->c_dhp = dhp;
8226         p->c_off = off;
8227         p->c_len = len;
8228         p->c_next = seg->s_ckl;
8229         seg->s_ckl = p;
8230 
8231         *pvtp = (void *)seg;
8232 
8233         rsmseglock_release(seg);
8234 
8235         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8236         return (DDI_SUCCESS);
8237 }
8238 
8239 /*
8240  * Page fault handling is done here. The prerequisite mapping setup
8241  * has been done in rsm_devmap with calls to ddi_devmem_setup or
8242  * ddi_umem_setup
8243  */
8244 static int
8245 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8246     uint_t type, uint_t rw)
8247 {
8248         int e;
8249         rsmseg_t *seg = (rsmseg_t *)pvt;
8250         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8251 
8252         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8253 
8254         rsmseglock_acquire(seg);
8255 
8256         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8257 
8258         while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8259                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8260                         DBG_PRINTF((category, RSM_DEBUG,
8261                             "rsmmap_access done: cv_wait INTR"));
8262                         rsmseglock_release(seg);
8263                         return (RSMERR_INTERRUPTED);
8264                 }
8265         }
8266 
8267         ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8268             seg->s_state == RSM_STATE_ACTIVE);
8269 
8270         if (seg->s_state == RSM_STATE_DISCONNECT)
8271                 seg->s_flags |= RSM_IMPORT_DUMMY;
8272 
8273         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8274             "rsmmap_access: dhp = %x\n", dhp));
8275 
8276         rsmseglock_release(seg);
8277 
8278         if (e = devmap_load(dhp, offset, len, type, rw)) {
8279                 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
8280         }
8281 
8282 
8283         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8284 
8285         return (e);
8286 }
8287 
8288 static int
8289 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8290         void **newpvt)
8291 {
8292         rsmseg_t        *seg = (rsmseg_t *)oldpvt;
8293         rsmcookie_t     *p, *old;
8294         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8295 
8296         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8297 
8298         /*
8299          * Same as map, create an entry to hold cookie and add it to
8300          * connect segment list. The oldpvt is a pointer to segment.
8301          * Return segment pointer in newpvt.
8302          */
8303         rsmseglock_acquire(seg);
8304 
8305         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8306 
8307         /*
8308          * Find old cookie
8309          */
8310         for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8311                 if (old->c_dhp == dhp) {
8312                         break;
8313                 }
8314         }
8315         if (old == NULL) {
8316                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8317                     "rsmmap_dup done: EINVAL\n"));
8318                 rsmseglock_release(seg);
8319                 return (EINVAL);
8320         }
8321 
8322         p = kmem_alloc(sizeof (*p), KM_SLEEP);
8323 
8324         p->c_dhp = new_dhp;
8325         p->c_off = old->c_off;
8326         p->c_len = old->c_len;
8327         p->c_next = seg->s_ckl;
8328         seg->s_ckl = p;
8329 
8330         *newpvt = (void *)seg;
8331 
8332         rsmseglock_release(seg);
8333 
8334         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8335 
8336         return (DDI_SUCCESS);
8337 }
8338 
8339 static void
8340 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8341         devmap_cookie_t new_dhp1, void **pvtp1,
8342         devmap_cookie_t new_dhp2, void **pvtp2)
8343 {
8344         /*
8345          * Remove pvtp structure from segment list.
8346          */
8347         rsmseg_t        *seg = (rsmseg_t *)pvtp;
8348         int freeflag;
8349 
8350         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8351 
8352         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8353 
8354         off = off; len = len;
8355         pvtp1 = pvtp1; pvtp2 = pvtp2;
8356 
8357         rsmseglock_acquire(seg);
8358 
8359         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8360 
8361         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8362             "rsmmap_unmap: dhp = %x\n", dhp));
8363         /*
8364          * We can go ahead and remove the dhps even if we are in
8365          * the MAPPING state because the dhps being removed here
8366          * belong to a different mmap and we are holding the segment
8367          * lock.
8368          */
8369         if (new_dhp1 == NULL && new_dhp2 == NULL) {
8370                 /* find and remove dhp handle */
8371                 rsmcookie_t *tmp, **back = &seg->s_ckl;
8372 
8373                 while (*back != NULL) {
8374                         tmp = *back;
8375                         if (tmp->c_dhp == dhp) {
8376                                 *back = tmp->c_next;
8377                                 kmem_free(tmp, sizeof (*tmp));
8378                                 break;
8379                         }
8380                         back = &tmp->c_next;
8381                 }
8382         } else {
8383                 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8384                     "rsmmap_unmap:parital unmap"
8385                     "new_dhp1 %lx, new_dhp2 %lx\n",
8386                     (size_t)new_dhp1, (size_t)new_dhp2));
8387         }
8388 
8389         /*
8390          * rsmmap_unmap is called for each mapping cookie on the list.
8391          * When the list becomes empty and we are not in the MAPPING
8392          * state then unmap in the rsmpi driver.
8393          */
8394         if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8395                 (void) rsm_unmap(seg);
8396 
8397         if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8398                 freeflag = 1;
8399         } else {
8400                 freeflag = 0;
8401         }
8402 
8403         rsmseglock_release(seg);
8404 
8405         if (freeflag) {
8406                 /* Free the segment structure */
8407                 rsmseg_free(seg);
8408         }
8409         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8410 
8411 }
8412 
8413 static struct devmap_callback_ctl rsmmap_ops = {
8414         DEVMAP_OPS_REV, /* devmap_ops version number    */
8415         rsmmap_map,     /* devmap_ops map routine */
8416         rsmmap_access,  /* devmap_ops access routine */
8417         rsmmap_dup,             /* devmap_ops dup routine               */
8418         rsmmap_unmap,   /* devmap_ops unmap routine */
8419 };
8420 
8421 static int
8422 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8423     size_t *maplen, uint_t model /*ARGSUSED*/)
8424 {
8425         struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8426         int             err;
8427         uint_t          maxprot;
8428         minor_t         rnum;
8429         rsmseg_t        *seg;
8430         off_t           dev_offset;
8431         size_t          cur_len;
8432         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8433 
8434         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8435 
8436         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8437             "rsm_devmap: off = %lx, len = %lx\n", off, len));
8438         rnum = getminor(dev);
8439         seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8440         ASSERT(seg != NULL);
8441 
8442         if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8443                 if ((off == barrier_offset) &&
8444                     (len == barrier_size)) {
8445 
8446                         ASSERT(bar_va != NULL && bar_cookie != NULL);
8447 
8448                         /*
8449                          * The offset argument in devmap_umem_setup represents
8450                          * the offset within the kernel memory defined by the
8451                          * cookie. We use this offset as barrier_offset.
8452                          */
8453                         err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8454                             barrier_offset, len, PROT_USER|PROT_READ,
8455                             DEVMAP_DEFAULTS, 0);
8456 
8457                         if (err != 0) {
8458                                 DBG_PRINTF((category, RSM_ERR,
8459                                     "rsm_devmap done: %d\n", err));
8460                                 return (RSMERR_MAP_FAILED);
8461                         }
8462                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8463                             "rsm_devmap done: %d\n", err));
8464 
8465                         *maplen = barrier_size;
8466 
8467                         return (err);
8468                 } else {
8469                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8470                             "rsm_devmap done: %d\n", err));
8471                         return (RSMERR_MAP_FAILED);
8472                 }
8473         }
8474 
8475         ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8476         ASSERT(seg->s_state == RSM_STATE_MAPPING);
8477 
8478         /*
8479          * Make sure we still have permission for the map operation.
8480          */
8481         maxprot = PROT_USER;
8482         if (seg->s_mode & RSM_PERM_READ) {
8483                 maxprot |= PROT_READ;
8484         }
8485 
8486         if (seg->s_mode & RSM_PERM_WRITE) {
8487                 maxprot |= PROT_WRITE;
8488         }
8489 
8490         /*
8491          * For each devmap call, rsmmap_map is called. This maintains driver
8492          * private information for the mapping. Thus, if there are multiple
8493          * devmap calls there will be multiple rsmmap_map calls and for each
8494          * call, the mapping information will be stored.
8495          * In case of an error during the processing of the devmap call, error
8496          * will be returned. This error return causes the caller of rsm_devmap
8497          * to undo all the mappings by calling rsmmap_unmap for each one.
8498          * rsmmap_unmap will free up the private information for the requested
8499          * mapping.
8500          */
8501         if (seg->s_node != my_nodeid) {
8502                 rsm_mapinfo_t *p;
8503 
8504                 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8505                 if (p == NULL) {
8506                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8507                             "rsm_devmap: incorrect mapping info\n"));
8508                         return (RSMERR_MAP_FAILED);
8509                 }
8510                 err = devmap_devmem_setup(dhc, p->dip,
8511                     callbackops, p->dev_register,
8512                     dev_offset, cur_len, maxprot,
8513                     DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8514 
8515                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8516                     "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8517                     "off=%lx,len=%lx\n",
8518                     p->dip, p->dev_register, dev_offset, off, cur_len));
8519 
8520                 if (err != 0) {
8521                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8522                             "rsm_devmap: devmap_devmem_setup failed %d\n",
8523                             err));
8524                         return (RSMERR_MAP_FAILED);
8525                 }
8526                 /* cur_len is always an integral multiple pagesize */
8527                 ASSERT((cur_len & (PAGESIZE-1)) == 0);
8528                 *maplen = cur_len;
8529                 return (err);
8530 
8531         } else {
8532                 err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8533                     seg->s_cookie, off, len, maxprot,
8534                     DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8535                 if (err != 0) {
8536                         DBG_PRINTF((category, RSM_DEBUG,
8537                             "rsm_devmap: devmap_umem_setup failed %d\n",
8538                             err));
8539                         return (RSMERR_MAP_FAILED);
8540                 }
8541                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8542                     "rsm_devmap: loopback done\n"));
8543 
8544                 *maplen = ptob(btopr(len));
8545 
8546                 return (err);
8547         }
8548 }
8549 
8550 /*
8551  * We can use the devmap framework for mapping device memory to user space by
8552  * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8553  * processing calls this entry point and devmap_setup is called within this
8554  * function, which eventually calls rsm_devmap
8555  */
8556 static int
8557 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8558     uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8559 {
8560         int                     error = 0;
8561         int                     old_state;
8562         minor_t                 rnum;
8563         rsmseg_t                *seg, *eseg;
8564         adapter_t               *adapter;
8565         rsm_import_share_t      *sharedp;
8566         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8567 
8568         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8569 
8570         /*
8571          * find segment
8572          */
8573         rnum = getminor(dev);
8574         seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8575 
8576         if (seg == NULL) {
8577                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8578                     "rsm_segmap done: invalid segment\n"));
8579                 return (EINVAL);
8580         }
8581 
8582         /*
8583          * the user is trying to map a resource that has not been
8584          * defined yet. The library uses this to map in the
8585          * barrier page.
8586          */
8587         if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8588                 rsmseglock_release(seg);
8589 
8590                 /*
8591                  * The mapping for the barrier page is identified
8592                  * by the special offset barrier_offset
8593                  */
8594 
8595                 if (off == (off_t)barrier_offset ||
8596                     len == (off_t)barrier_size) {
8597                         if (bar_cookie == NULL || bar_va == NULL) {
8598                                 DBG_PRINTF((category, RSM_DEBUG,
8599                                     "rsm_segmap: bar cookie/va is NULL\n"));
8600                                 return (EINVAL);
8601                         }
8602 
8603                         error = devmap_setup(dev, (offset_t)off, as, addrp,
8604                             (size_t)len, prot, maxprot, flags,  cred);
8605 
8606                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8607                             "rsm_segmap done: %d\n", error));
8608                         return (error);
8609                 } else {
8610                         DBG_PRINTF((category, RSM_DEBUG,
8611                             "rsm_segmap: bad offset/length\n"));
8612                         return (EINVAL);
8613                 }
8614         }
8615 
8616         /* Make sure you can only map imported segments */
8617         if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8618                 rsmseglock_release(seg);
8619                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8620                     "rsm_segmap done: not an import segment\n"));
8621                 return (EINVAL);
8622         }
8623         /* check means library is broken */
8624         ASSERT(seg->s_hdr.rsmrc_num == rnum);
8625 
8626         /* wait for the segment to become unquiesced */
8627         while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8628                 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8629                         rsmseglock_release(seg);
8630                         DBG_PRINTF((category, RSM_DEBUG,
8631                             "rsm_segmap done: cv_wait INTR"));
8632                         return (ENODEV);
8633                 }
8634         }
8635 
8636         /* wait until segment leaves the mapping state */
8637         while (seg->s_state == RSM_STATE_MAPPING)
8638                 cv_wait(&seg->s_cv, &seg->s_lock);
8639 
8640         /*
8641          * we allow multiple maps of the same segment in the KA
8642          * and it works because we do an rsmpi map of the whole
8643          * segment during the first map and all the device mapping
8644          * information needed in rsm_devmap is in the mapinfo list.
8645          */
8646         if ((seg->s_state != RSM_STATE_CONNECT) &&
8647             (seg->s_state != RSM_STATE_ACTIVE)) {
8648                 rsmseglock_release(seg);
8649                 DBG_PRINTF((category, RSM_DEBUG,
8650                     "rsm_segmap done: segment not connected\n"));
8651                 return (ENODEV);
8652         }
8653 
8654         /*
8655          * Make sure we are not mapping a larger segment than what's
8656          * exported
8657          */
8658         if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8659                 rsmseglock_release(seg);
8660                 DBG_PRINTF((category, RSM_DEBUG,
8661                     "rsm_segmap done: off+len>seg size\n"));
8662                 return (ENXIO);
8663         }
8664 
8665         /*
8666          * Make sure we still have permission for the map operation.
8667          */
8668         maxprot = PROT_USER;
8669         if (seg->s_mode & RSM_PERM_READ) {
8670                 maxprot |= PROT_READ;
8671         }
8672 
8673         if (seg->s_mode & RSM_PERM_WRITE) {
8674                 maxprot |= PROT_WRITE;
8675         }
8676 
8677         if ((prot & maxprot) != prot) {
8678                 /* No permission */
8679                 rsmseglock_release(seg);
8680                 DBG_PRINTF((category, RSM_DEBUG,
8681                     "rsm_segmap done: no permission\n"));
8682                 return (EACCES);
8683         }
8684 
8685         old_state = seg->s_state;
8686 
8687         ASSERT(seg->s_share != NULL);
8688 
8689         rsmsharelock_acquire(seg);
8690 
8691         sharedp = seg->s_share;
8692 
8693         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8694             "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8695 
8696         if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8697             (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8698                 rsmsharelock_release(seg);
8699                 rsmseglock_release(seg);
8700                 DBG_PRINTF((category, RSM_DEBUG,
8701                     "rsm_segmap done:RSMSI_STATE %d invalid\n",
8702                     sharedp->rsmsi_state));
8703                 return (ENODEV);
8704         }
8705 
8706         /*
8707          * Do the map - since we want importers to share mappings
8708          * we do the rsmpi map for the whole segment
8709          */
8710         if (seg->s_node != my_nodeid) {
8711                 uint_t dev_register;
8712                 off_t dev_offset;
8713                 dev_info_t *dip;
8714                 size_t tmp_len;
8715                 size_t total_length_mapped = 0;
8716                 size_t length_to_map = seg->s_len;
8717                 off_t tmp_off = 0;
8718                 rsm_mapinfo_t *p;
8719 
8720                 /*
8721                  * length_to_map = seg->s_len is always an integral
8722                  * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8723                  * list is a multiple of PAGESIZE - RSMPI map ensures this
8724                  */
8725 
8726                 adapter = seg->s_adapter;
8727                 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8728                     sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8729 
8730                 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8731                         error = 0;
8732                         /* map the whole segment */
8733                         while (total_length_mapped < seg->s_len) {
8734                                 tmp_len = 0;
8735 
8736                                 error = adapter->rsmpi_ops->rsm_map(
8737                                     seg->s_handle.in, tmp_off,
8738                                     length_to_map, &tmp_len,
8739                                     &dip, &dev_register, &dev_offset,
8740                                     NULL, NULL);
8741 
8742                                 if (error != 0)
8743                                         break;
8744 
8745                                 /*
8746                                  * Store the mapping info obtained from rsm_map
8747                                  */
8748                                 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8749                                 p->dev_register = dev_register;
8750                                 p->dev_offset = dev_offset;
8751                                 p->dip = dip;
8752                                 p->individual_len = tmp_len;
8753                                 p->start_offset = tmp_off;
8754                                 p->next = sharedp->rsmsi_mapinfo;
8755                                 sharedp->rsmsi_mapinfo = p;
8756 
8757                                 total_length_mapped += tmp_len;
8758                                 length_to_map -= tmp_len;
8759                                 tmp_off += tmp_len;
8760                         }
8761                         seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8762 
8763                         if (error != RSM_SUCCESS) {
8764                                 /* Check if this is the the first rsm_map */
8765                                 if (sharedp->rsmsi_mapinfo != NULL) {
8766                                         /*
8767                                          * A single rsm_unmap undoes
8768                                          * multiple rsm_maps.
8769                                          */
8770                                         (void) seg->s_adapter->rsmpi_ops->
8771                                             rsm_unmap(sharedp->rsmsi_handle);
8772                                         rsm_free_mapinfo(sharedp->
8773                                             rsmsi_mapinfo);
8774                                 }
8775                                 sharedp->rsmsi_mapinfo = NULL;
8776                                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8777                                 rsmsharelock_release(seg);
8778                                 rsmseglock_release(seg);
8779                                 DBG_PRINTF((category, RSM_DEBUG,
8780                                     "rsm_segmap done: rsmpi map err %d\n",
8781                                     error));
8782                                 ASSERT(error != RSMERR_BAD_LENGTH &&
8783                                     error != RSMERR_BAD_MEM_ALIGNMENT &&
8784                                     error != RSMERR_BAD_SEG_HNDL);
8785                                 if (error == RSMERR_UNSUPPORTED_OPERATION)
8786                                         return (ENOTSUP);
8787                                 else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8788                                         return (EAGAIN);
8789                                 else if (error == RSMERR_CONN_ABORTED)
8790                                         return (ENODEV);
8791                                 else
8792                                         return (error);
8793                         } else {
8794                                 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8795                         }
8796                 } else {
8797                         seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8798                 }
8799 
8800                 sharedp->rsmsi_mapcnt++;
8801 
8802                 rsmsharelock_release(seg);
8803 
8804                 /* move to an intermediate mapping state */
8805                 seg->s_state = RSM_STATE_MAPPING;
8806                 rsmseglock_release(seg);
8807 
8808                 error = devmap_setup(dev, (offset_t)off, as, addrp,
8809                     len, prot, maxprot, flags, cred);
8810 
8811                 rsmseglock_acquire(seg);
8812                 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8813 
8814                 if (error == DDI_SUCCESS) {
8815                         seg->s_state = RSM_STATE_ACTIVE;
8816                 } else {
8817                         rsmsharelock_acquire(seg);
8818 
8819                         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8820 
8821                         sharedp->rsmsi_mapcnt--;
8822                         if (sharedp->rsmsi_mapcnt == 0) {
8823                                 /* unmap the shared RSMPI mapping */
8824                                 ASSERT(sharedp->rsmsi_handle != NULL);
8825                                 (void) adapter->rsmpi_ops->
8826                                     rsm_unmap(sharedp->rsmsi_handle);
8827                                 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8828                                 sharedp->rsmsi_mapinfo = NULL;
8829                                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8830                         }
8831 
8832                         rsmsharelock_release(seg);
8833                         seg->s_state = old_state;
8834                         DBG_PRINTF((category, RSM_ERR,
8835                             "rsm: devmap_setup failed %d\n", error));
8836                 }
8837                 cv_broadcast(&seg->s_cv);
8838                 rsmseglock_release(seg);
8839                 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8840                     error));
8841                 return (error);
8842         } else {
8843                 /*
8844                  * For loopback, the export segment mapping cookie (s_cookie)
8845                  * is also used as the s_cookie value for its import segments
8846                  * during mapping.
8847                  * Note that reference counting for s_cookie of the export
8848                  * segment is not required due to the following:
8849                  * We never have a case of the export segment being destroyed,
8850                  * leaving the import segments with a stale value for the
8851                  * s_cookie field, since a force disconnect is done prior to a
8852                  * destroy of an export segment. The force disconnect causes
8853                  * the s_cookie value to be reset to NULL. Also for the
8854                  * rsm_rebind operation, we change the s_cookie value of the
8855                  * export segment as well as of all its local (loopback)
8856                  * importers.
8857                  */
8858                 DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8859 
8860                 rsmsharelock_release(seg);
8861                 /*
8862                  * In order to maintain the lock ordering between the export
8863                  * and import segment locks, we need to acquire the export
8864                  * segment lock first and only then acquire the import
8865                  * segment lock.
8866                  * The above is necessary to avoid any deadlock scenarios
8867                  * with rsm_rebind which also acquires both the export
8868                  * and import segment locks in the above mentioned order.
8869                  * Based on code inspection, there seem to be no other
8870                  * situations in which both the export and import segment
8871                  * locks are acquired either in the same or opposite order
8872                  * as mentioned above.
8873                  * Thus in order to conform to the above lock order, we
8874                  * need to change the state of the import segment to
8875                  * RSM_STATE_MAPPING, release the lock. Once this is done we
8876                  * can now safely acquire the export segment lock first
8877                  * followed by the import segment lock which is as per
8878                  * the lock order mentioned above.
8879                  */
8880                 /* move to an intermediate mapping state */
8881                 seg->s_state = RSM_STATE_MAPPING;
8882                 rsmseglock_release(seg);
8883 
8884                 eseg = rsmexport_lookup(seg->s_key);
8885 
8886                 if (eseg == NULL) {
8887                         rsmseglock_acquire(seg);
8888                         /*
8889                          * Revert to old_state and signal any waiters
8890                          * The shared state is not changed
8891                          */
8892 
8893                         seg->s_state = old_state;
8894                         cv_broadcast(&seg->s_cv);
8895                         rsmseglock_release(seg);
8896                         DBG_PRINTF((category, RSM_DEBUG,
8897                             "rsm_segmap done: key %d not found\n", seg->s_key));
8898                         return (ENODEV);
8899                 }
8900 
8901                 rsmsharelock_acquire(seg);
8902                 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8903                     sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8904 
8905                 sharedp->rsmsi_mapcnt++;
8906                 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8907                 rsmsharelock_release(seg);
8908 
8909                 ASSERT(eseg->s_cookie != NULL);
8910 
8911                 /*
8912                  * It is not required or necessary to acquire the import
8913                  * segment lock here to change the value of s_cookie since
8914                  * no one will touch the import segment as long as it is
8915                  * in the RSM_STATE_MAPPING state.
8916                  */
8917                 seg->s_cookie = eseg->s_cookie;
8918 
8919                 rsmseglock_release(eseg);
8920 
8921                 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8922                     prot, maxprot, flags, cred);
8923 
8924                 rsmseglock_acquire(seg);
8925                 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8926                 if (error == 0) {
8927                         seg->s_state = RSM_STATE_ACTIVE;
8928                 } else {
8929                         rsmsharelock_acquire(seg);
8930 
8931                         ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8932 
8933                         sharedp->rsmsi_mapcnt--;
8934                         if (sharedp->rsmsi_mapcnt == 0) {
8935                                 sharedp->rsmsi_mapinfo = NULL;
8936                                 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8937                         }
8938                         rsmsharelock_release(seg);
8939                         seg->s_state = old_state;
8940                         seg->s_cookie = NULL;
8941                 }
8942                 cv_broadcast(&seg->s_cv);
8943                 rsmseglock_release(seg);
8944                 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8945                     "rsm_segmap done: %d\n", error));
8946                 return (error);
8947         }
8948 }
8949 
8950 int
8951 rsmka_null_seg_create(
8952     rsm_controller_handle_t argcp,
8953     rsm_memseg_export_handle_t *handle,
8954     size_t size,
8955     uint_t flags,
8956     rsm_memory_local_t *memory,
8957     rsm_resource_callback_t callback,
8958     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
8959 {
8960         return (RSM_SUCCESS);
8961 }
8962 
8963 
8964 int
8965 rsmka_null_seg_destroy(
8966     rsm_memseg_export_handle_t argmemseg        /*ARGSUSED*/)
8967 {
8968         return (RSM_SUCCESS);
8969 }
8970 
8971 
8972 int
8973 rsmka_null_bind(
8974     rsm_memseg_export_handle_t argmemseg,
8975     off_t offset,
8976     rsm_memory_local_t *argmemory,
8977     rsm_resource_callback_t callback,
8978     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
8979 {
8980         return (RSM_SUCCESS);
8981 }
8982 
8983 
8984 int
8985 rsmka_null_unbind(
8986     rsm_memseg_export_handle_t argmemseg,
8987     off_t offset,
8988     size_t length       /*ARGSUSED*/)
8989 {
8990         return (DDI_SUCCESS);
8991 }
8992 
8993 int
8994 rsmka_null_rebind(
8995     rsm_memseg_export_handle_t argmemseg,
8996     off_t offset,
8997     rsm_memory_local_t *memory,
8998     rsm_resource_callback_t callback,
8999     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
9000 {
9001         return (RSM_SUCCESS);
9002 }
9003 
9004 int
9005 rsmka_null_publish(
9006     rsm_memseg_export_handle_t argmemseg,
9007     rsm_access_entry_t access_list[],
9008     uint_t access_list_length,
9009     rsm_memseg_id_t segment_id,
9010     rsm_resource_callback_t callback,
9011     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
9012 {
9013         return (RSM_SUCCESS);
9014 }
9015 
9016 
9017 int
9018 rsmka_null_republish(
9019     rsm_memseg_export_handle_t memseg,
9020     rsm_access_entry_t access_list[],
9021     uint_t access_list_length,
9022     rsm_resource_callback_t callback,
9023     rsm_resource_callback_arg_t callback_arg    /*ARGSUSED*/)
9024 {
9025         return (RSM_SUCCESS);
9026 }
9027 
9028 int
9029 rsmka_null_unpublish(
9030     rsm_memseg_export_handle_t argmemseg        /*ARGSUSED*/)
9031 {
9032         return (RSM_SUCCESS);
9033 }
9034 
9035 
9036 void
9037 rsmka_init_loopback()
9038 {
9039         rsm_ops_t       *ops = &null_rsmpi_ops;
9040         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9041 
9042         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9043             "rsmka_init_loopback enter\n"));
9044 
9045         /* initialize null ops vector */
9046         ops->rsm_seg_create = rsmka_null_seg_create;
9047         ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9048         ops->rsm_bind = rsmka_null_bind;
9049         ops->rsm_unbind = rsmka_null_unbind;
9050         ops->rsm_rebind = rsmka_null_rebind;
9051         ops->rsm_publish = rsmka_null_publish;
9052         ops->rsm_unpublish = rsmka_null_unpublish;
9053         ops->rsm_republish = rsmka_null_republish;
9054 
9055         /* initialize attributes for loopback adapter */
9056         loopback_attr.attr_name = loopback_str;
9057         loopback_attr.attr_page_size = 0x8; /* 8K */
9058 
9059         /* initialize loopback adapter */
9060         loopback_adapter.rsm_attr = loopback_attr;
9061         loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9062         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9063             "rsmka_init_loopback done\n"));
9064 }
9065 
9066 /* ************** DR functions ********************************** */
9067 static void
9068 rsm_quiesce_exp_seg(rsmresource_t *resp)
9069 {
9070         int             recheck_state;
9071         rsmseg_t        *segp = (rsmseg_t *)resp;
9072         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9073         DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9074 
9075         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9076             "%s enter: key=%u\n", function, segp->s_key));
9077 
9078         rsmseglock_acquire(segp);
9079         do {
9080                 recheck_state = 0;
9081                 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9082                     (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9083                     (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9084                     (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9085                         rsmseglock_release(segp);
9086                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9087                             "%s done:state =%d\n", function,
9088                             segp->s_state));
9089                         return;
9090                 }
9091 
9092                 if (segp->s_state == RSM_STATE_NEW) {
9093                         segp->s_state = RSM_STATE_NEW_QUIESCED;
9094                         rsmseglock_release(segp);
9095                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9096                             "%s done:state =%d\n", function,
9097                             segp->s_state));
9098                         return;
9099                 }
9100 
9101                 if (segp->s_state == RSM_STATE_BIND) {
9102                         /* unbind */
9103                         (void) rsm_unbind_pages(segp);
9104                         segp->s_state = RSM_STATE_BIND_QUIESCED;
9105                         rsmseglock_release(segp);
9106                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9107                             "%s done:state =%d\n", function,
9108                             segp->s_state));
9109                         return;
9110                 }
9111 
9112                 if (segp->s_state == RSM_STATE_EXPORT) {
9113                         /*
9114                          * wait for putv/getv to complete if the segp is
9115                          * a local memory handle
9116                          */
9117                         while ((segp->s_state == RSM_STATE_EXPORT) &&
9118                             (segp->s_rdmacnt != 0)) {
9119                                 cv_wait(&segp->s_cv, &segp->s_lock);
9120                         }
9121 
9122                         if (segp->s_state != RSM_STATE_EXPORT) {
9123                                 /*
9124                                  * state changed need to see what it
9125                                  * should be changed to.
9126                                  */
9127                                 recheck_state = 1;
9128                                 continue;
9129                         }
9130 
9131                         segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9132                         rsmseglock_release(segp);
9133                         /*
9134                          * send SUSPEND messages - currently it will be
9135                          * done at the end
9136                          */
9137                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9138                             "%s done:state =%d\n", function,
9139                             segp->s_state));
9140                         return;
9141                 }
9142         } while (recheck_state);
9143 
9144         rsmseglock_release(segp);
9145 }
9146 
9147 static void
9148 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9149 {
9150         int                     ret;
9151         rsmseg_t                *segp = (rsmseg_t *)resp;
9152         rsmapi_access_entry_t   *acl;
9153         rsm_access_entry_t      *rsmpi_acl;
9154         int                     acl_len;
9155         int                     create_flags = 0;
9156         struct buf              *xbuf;
9157         rsm_memory_local_t      mem;
9158         adapter_t               *adapter;
9159         dev_t                   sdev = 0;
9160         rsm_resource_callback_t callback_flag;
9161         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9162         DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9163 
9164         rsmseglock_acquire(segp);
9165 
9166         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9167             "%s enter: key=%u, state=%d\n", function, segp->s_key,
9168             segp->s_state));
9169 
9170         if ((segp->s_state == RSM_STATE_NEW) ||
9171             (segp->s_state == RSM_STATE_BIND) ||
9172             (segp->s_state == RSM_STATE_EXPORT)) {
9173                 rsmseglock_release(segp);
9174                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9175                     function, segp->s_state));
9176                 return;
9177         }
9178 
9179         if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9180                 segp->s_state = RSM_STATE_NEW;
9181                 cv_broadcast(&segp->s_cv);
9182                 rsmseglock_release(segp);
9183                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9184                     function, segp->s_state));
9185                 return;
9186         }
9187 
9188         if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9189                 /* bind the segment */
9190                 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9191                     segp->s_len, segp->s_proc);
9192                 if (ret == RSM_SUCCESS) { /* bind successful */
9193                         segp->s_state = RSM_STATE_BIND;
9194                 } else { /* bind failed - resource unavailable */
9195                         segp->s_state = RSM_STATE_NEW;
9196                 }
9197                 cv_broadcast(&segp->s_cv);
9198                 rsmseglock_release(segp);
9199                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9200                     "%s done: bind_qscd bind = %d\n", function, ret));
9201                 return;
9202         }
9203 
9204         while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9205                 /* wait for the segment to move to EXPORT_QUIESCED state */
9206                 cv_wait(&segp->s_cv, &segp->s_lock);
9207         }
9208 
9209         if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9210                 /* bind the segment */
9211                 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9212                     segp->s_len, segp->s_proc);
9213 
9214                 if (ret != RSM_SUCCESS) {
9215                         /* bind failed - resource unavailable */
9216                         acl_len = segp->s_acl_len;
9217                         acl = segp->s_acl;
9218                         rsmpi_acl = segp->s_acl_in;
9219                         segp->s_acl_len = 0;
9220                         segp->s_acl = NULL;
9221                         segp->s_acl_in = NULL;
9222                         rsmseglock_release(segp);
9223 
9224                         rsmexport_rm(segp);
9225                         rsmacl_free(acl, acl_len);
9226                         rsmpiacl_free(rsmpi_acl, acl_len);
9227 
9228                         rsmseglock_acquire(segp);
9229                         segp->s_state = RSM_STATE_NEW;
9230                         cv_broadcast(&segp->s_cv);
9231                         rsmseglock_release(segp);
9232                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9233                             "%s done: exp_qscd bind failed = %d\n",
9234                             function, ret));
9235                         return;
9236                 }
9237                 /*
9238                  * publish the segment
9239                  * if  successful
9240                  *   segp->s_state = RSM_STATE_EXPORT;
9241                  * else failed
9242                  *   segp->s_state = RSM_STATE_BIND;
9243                  */
9244 
9245                 /* check whether it is a local_memory_handle */
9246                 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9247                         if ((segp->s_acl[0].ae_node == my_nodeid) &&
9248                             (segp->s_acl[0].ae_permission == 0)) {
9249                                 segp->s_state = RSM_STATE_EXPORT;
9250                                 cv_broadcast(&segp->s_cv);
9251                                 rsmseglock_release(segp);
9252                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9253                                     "%s done:exp_qscd\n", function));
9254                                 return;
9255                         }
9256                 }
9257                 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9258                     sdev, 0, NULL, DDI_UMEM_SLEEP);
9259                 ASSERT(xbuf != NULL);
9260 
9261                 mem.ms_type = RSM_MEM_BUF;
9262                 mem.ms_bp = xbuf;
9263 
9264                 adapter = segp->s_adapter;
9265 
9266                 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9267                         create_flags = RSM_ALLOW_UNBIND_REBIND;
9268                 }
9269 
9270                 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9271                         callback_flag  = RSM_RESOURCE_DONTWAIT;
9272                 } else {
9273                         callback_flag  = RSM_RESOURCE_SLEEP;
9274                 }
9275 
9276                 ret = adapter->rsmpi_ops->rsm_seg_create(
9277                     adapter->rsmpi_handle, &segp->s_handle.out,
9278                     segp->s_len, create_flags, &mem,
9279                     callback_flag, NULL);
9280 
9281                 if (ret != RSM_SUCCESS) {
9282                         acl_len = segp->s_acl_len;
9283                         acl = segp->s_acl;
9284                         rsmpi_acl = segp->s_acl_in;
9285                         segp->s_acl_len = 0;
9286                         segp->s_acl = NULL;
9287                         segp->s_acl_in = NULL;
9288                         rsmseglock_release(segp);
9289 
9290                         rsmexport_rm(segp);
9291                         rsmacl_free(acl, acl_len);
9292                         rsmpiacl_free(rsmpi_acl, acl_len);
9293 
9294                         rsmseglock_acquire(segp);
9295                         segp->s_state = RSM_STATE_BIND;
9296                         cv_broadcast(&segp->s_cv);
9297                         rsmseglock_release(segp);
9298                         DBG_PRINTF((category, RSM_ERR,
9299                             "%s done: exp_qscd create failed = %d\n",
9300                             function, ret));
9301                         return;
9302                 }
9303 
9304                 ret = adapter->rsmpi_ops->rsm_publish(
9305                     segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9306                     segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9307 
9308                 if (ret != RSM_SUCCESS) {
9309                         acl_len = segp->s_acl_len;
9310                         acl = segp->s_acl;
9311                         rsmpi_acl = segp->s_acl_in;
9312                         segp->s_acl_len = 0;
9313                         segp->s_acl = NULL;
9314                         segp->s_acl_in = NULL;
9315                         adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9316                         rsmseglock_release(segp);
9317 
9318                         rsmexport_rm(segp);
9319                         rsmacl_free(acl, acl_len);
9320                         rsmpiacl_free(rsmpi_acl, acl_len);
9321 
9322                         rsmseglock_acquire(segp);
9323                         segp->s_state = RSM_STATE_BIND;
9324                         cv_broadcast(&segp->s_cv);
9325                         rsmseglock_release(segp);
9326                         DBG_PRINTF((category, RSM_ERR,
9327                             "%s done: exp_qscd publish failed = %d\n",
9328                             function, ret));
9329                         return;
9330                 }
9331 
9332                 segp->s_state = RSM_STATE_EXPORT;
9333                 cv_broadcast(&segp->s_cv);
9334                 rsmseglock_release(segp);
9335                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9336                     function));
9337                 return;
9338         }
9339 
9340         rsmseglock_release(segp);
9341 
9342         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9343 }
9344 
9345 static void
9346 rsm_quiesce_imp_seg(rsmresource_t *resp)
9347 {
9348         rsmseg_t        *segp = (rsmseg_t *)resp;
9349         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9350         DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9351 
9352         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9353             "%s enter: key=%u\n", function, segp->s_key));
9354 
9355         rsmseglock_acquire(segp);
9356         segp->s_flags |= RSM_DR_INPROGRESS;
9357 
9358         while (segp->s_rdmacnt != 0) {
9359                 /* wait for the RDMA to complete */
9360                 cv_wait(&segp->s_cv, &segp->s_lock);
9361         }
9362 
9363         rsmseglock_release(segp);
9364 
9365         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9366 
9367 }
9368 
9369 static void
9370 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9371 {
9372         rsmseg_t        *segp = (rsmseg_t *)resp;
9373         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9374         DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9375 
9376         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9377             "%s enter: key=%u\n", function, segp->s_key));
9378 
9379         rsmseglock_acquire(segp);
9380 
9381         segp->s_flags &= ~RSM_DR_INPROGRESS;
9382         /* wake up any waiting putv/getv ops */
9383         cv_broadcast(&segp->s_cv);
9384 
9385         rsmseglock_release(segp);
9386 
9387         DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9388 
9389 
9390 }
9391 
9392 static void
9393 rsm_process_exp_seg(rsmresource_t *resp, int event)
9394 {
9395         if (event == RSM_DR_QUIESCE)
9396                 rsm_quiesce_exp_seg(resp);
9397         else /* UNQUIESCE */
9398                 rsm_unquiesce_exp_seg(resp);
9399 }
9400 
9401 static void
9402 rsm_process_imp_seg(rsmresource_t *resp, int event)
9403 {
9404         if (event == RSM_DR_QUIESCE)
9405                 rsm_quiesce_imp_seg(resp);
9406         else /* UNQUIESCE */
9407                 rsm_unquiesce_imp_seg(resp);
9408 }
9409 
9410 static void
9411 rsm_dr_process_local_segments(int event)
9412 {
9413 
9414         int i, j;
9415         rsmresource_blk_t       *blk;
9416         rsmresource_t           *p;
9417         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9418 
9419         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9420             "rsm_dr_process_local_segments enter\n"));
9421 
9422         /* iterate through the resource structure */
9423 
9424         rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9425 
9426         for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9427                 blk = rsm_resource.rsmrc_root[i];
9428                 if (blk != NULL) {
9429                         for (j = 0; j < RSMRC_BLKSZ; j++) {
9430                                 p = blk->rsmrcblk_blks[j];
9431                                 if ((p != NULL) && (p != RSMRC_RESERVED)) {
9432                                         /* valid resource */
9433                                         if (p->rsmrc_type ==
9434                                             RSM_RESOURCE_EXPORT_SEGMENT)
9435                                                 rsm_process_exp_seg(p, event);
9436                                         else if (p->rsmrc_type ==
9437                                             RSM_RESOURCE_IMPORT_SEGMENT)
9438                                                 rsm_process_imp_seg(p, event);
9439                                 }
9440                         }
9441                 }
9442         }
9443 
9444         rw_exit(&rsm_resource.rsmrc_lock);
9445 
9446         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9447             "rsm_dr_process_local_segments done\n"));
9448 }
9449 
9450 /* *************** DR callback functions ************ */
9451 static void
9452 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9453 {
9454         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9455         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9456             "rsm_dr_callback_post_add is a no-op\n"));
9457         /* Noop */
9458 }
9459 
9460 static int
9461 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9462 {
9463         int     recheck_state = 0;
9464         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9465 
9466         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9467             "rsm_dr_callback_pre_del enter\n"));
9468 
9469         mutex_enter(&rsm_drv_data.drv_lock);
9470 
9471         do {
9472                 recheck_state = 0;
9473                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9474                     "rsm_dr_callback_pre_del:state=%d\n",
9475                     rsm_drv_data.drv_state));
9476 
9477                 switch (rsm_drv_data.drv_state) {
9478                 case RSM_DRV_NEW:
9479                         /*
9480                          * The state should usually never be RSM_DRV_NEW
9481                          * since in this state the callbacks have not yet
9482                          * been registered. So, ASSERT.
9483                          */
9484                         ASSERT(0);
9485                         return (0);
9486                 case RSM_DRV_REG_PROCESSING:
9487                         /*
9488                          * The driver is in the process of registering
9489                          * with the DR framework. So, wait till the
9490                          * registration process is complete.
9491                          */
9492                         recheck_state = 1;
9493                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9494                         break;
9495                 case RSM_DRV_UNREG_PROCESSING:
9496                         /*
9497                          * If the state is RSM_DRV_UNREG_PROCESSING, the
9498                          * module is in the process of detaching and
9499                          * unregistering the callbacks from the DR
9500                          * framework. So, simply return.
9501                          */
9502                         mutex_exit(&rsm_drv_data.drv_lock);
9503                         DBG_PRINTF((category, RSM_DEBUG,
9504                             "rsm_dr_callback_pre_del:"
9505                             "pre-del on NEW/UNREG\n"));
9506                         return (0);
9507                 case RSM_DRV_OK:
9508                         rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9509                         break;
9510                 case RSM_DRV_PREDEL_STARTED:
9511                         /* FALLTHRU */
9512                 case RSM_DRV_PREDEL_COMPLETED:
9513                         /* FALLTHRU */
9514                 case RSM_DRV_POSTDEL_IN_PROGRESS:
9515                         recheck_state = 1;
9516                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9517                         break;
9518                 case RSM_DRV_DR_IN_PROGRESS:
9519                         rsm_drv_data.drv_memdel_cnt++;
9520                         mutex_exit(&rsm_drv_data.drv_lock);
9521                         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9522                             "rsm_dr_callback_pre_del done\n"));
9523                         return (0);
9524                         /* break; */
9525                 default:
9526                         ASSERT(0);
9527                         break;
9528                 }
9529 
9530         } while (recheck_state);
9531 
9532         rsm_drv_data.drv_memdel_cnt++;
9533 
9534         mutex_exit(&rsm_drv_data.drv_lock);
9535 
9536         /* Do all the quiescing stuff here */
9537         DBG_PRINTF((category, RSM_DEBUG,
9538             "rsm_dr_callback_pre_del: quiesce things now\n"));
9539 
9540         rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9541 
9542         /*
9543          * now that all local segments have been quiesced lets inform
9544          * the importers
9545          */
9546         rsm_send_suspend();
9547 
9548         /*
9549          * In response to the suspend message the remote node(s) will process
9550          * the segments and send a suspend_complete message. Till all
9551          * the nodes send the suspend_complete message we wait in the
9552          * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9553          * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9554          */
9555         mutex_enter(&rsm_drv_data.drv_lock);
9556 
9557         while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9558                 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9559         }
9560 
9561         ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9562 
9563         rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9564         cv_broadcast(&rsm_drv_data.drv_cv);
9565 
9566         mutex_exit(&rsm_drv_data.drv_lock);
9567 
9568         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9569             "rsm_dr_callback_pre_del done\n"));
9570 
9571         return (0);
9572 }
9573 
9574 static void
9575 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9576 {
9577         int     recheck_state = 0;
9578         DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9579 
9580         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9581             "rsm_dr_callback_post_del enter\n"));
9582 
9583         mutex_enter(&rsm_drv_data.drv_lock);
9584 
9585         do {
9586                 recheck_state = 0;
9587                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9588                     "rsm_dr_callback_post_del:state=%d\n",
9589                     rsm_drv_data.drv_state));
9590 
9591                 switch (rsm_drv_data.drv_state) {
9592                 case RSM_DRV_NEW:
9593                         /*
9594                          * The driver state cannot not be RSM_DRV_NEW
9595                          * since in this state the callbacks have not
9596                          * yet been registered.
9597                          */
9598                         ASSERT(0);
9599                         return;
9600                 case RSM_DRV_REG_PROCESSING:
9601                         /*
9602                          * The driver is in the process of registering with
9603                          * the DR framework. Wait till the registration is
9604                          * complete.
9605                          */
9606                         recheck_state = 1;
9607                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9608                         break;
9609                 case RSM_DRV_UNREG_PROCESSING:
9610                         /*
9611                          * RSM_DRV_UNREG_PROCESSING state means the module
9612                          * is detaching and unregistering the callbacks
9613                          * from the DR framework. So simply return.
9614                          */
9615                         /* FALLTHRU */
9616                 case RSM_DRV_OK:
9617                         /*
9618                          * RSM_DRV_OK means we missed the pre-del
9619                          * corresponding to this post-del coz we had not
9620                          * registered yet, so simply return.
9621                          */
9622                         mutex_exit(&rsm_drv_data.drv_lock);
9623                         DBG_PRINTF((category, RSM_DEBUG,
9624                             "rsm_dr_callback_post_del:"
9625                             "post-del on OK/UNREG\n"));
9626                         return;
9627                         /* break; */
9628                 case RSM_DRV_PREDEL_STARTED:
9629                         /* FALLTHRU */
9630                 case RSM_DRV_PREDEL_COMPLETED:
9631                         /* FALLTHRU */
9632                 case RSM_DRV_POSTDEL_IN_PROGRESS:
9633                         recheck_state = 1;
9634                         cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9635                         break;
9636                 case RSM_DRV_DR_IN_PROGRESS:
9637                         rsm_drv_data.drv_memdel_cnt--;
9638                         if (rsm_drv_data.drv_memdel_cnt > 0) {
9639                                 mutex_exit(&rsm_drv_data.drv_lock);
9640                                 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9641                                     "rsm_dr_callback_post_del done:\n"));
9642                                 return;
9643                         }
9644                         rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9645                         break;
9646                 default:
9647                         ASSERT(0);
9648                         return;
9649                         /* break; */
9650                 }
9651         } while (recheck_state);
9652 
9653         mutex_exit(&rsm_drv_data.drv_lock);
9654 
9655         /* Do all the unquiescing stuff here */
9656         DBG_PRINTF((category, RSM_DEBUG,
9657             "rsm_dr_callback_post_del: unquiesce things now\n"));
9658 
9659         rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9660 
9661         /*
9662          * now that all local segments have been unquiesced lets inform
9663          * the importers
9664          */
9665         rsm_send_resume();
9666 
9667         mutex_enter(&rsm_drv_data.drv_lock);
9668 
9669         rsm_drv_data.drv_state = RSM_DRV_OK;
9670 
9671         cv_broadcast(&rsm_drv_data.drv_cv);
9672 
9673         mutex_exit(&rsm_drv_data.drv_lock);
9674 
9675         DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9676             "rsm_dr_callback_post_del done\n"));
9677 
9678         return;
9679 
9680 }