1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #ifndef _RDC_IO_H
  27 #define _RDC_IO_H
  28 
  29 #ifdef  __cplusplus
  30 extern "C" {
  31 #endif
  32 
  33 
  34 #include <sys/unistat/spcs_s.h>
  35 #ifdef DS_DDICT
  36 #define bool_t  int
  37 #endif
  38 #include  <sys/nsctl/rdc_prot.h>
  39 #include <sys/nsctl/nsctl.h>
  40 #include <sys/nsctl/rdc_ioctl.h>
  41 
  42 /*
  43  * Definitions for kstats
  44  */
  45 #define RDC_MKSTAT_MAXSETS              "maxsets"
  46 #define RDC_MKSTAT_MAXFBAS              "maxfbas"
  47 #define RDC_MKSTAT_RPC_TIMEOUT          "rpc_timeout"
  48 #define RDC_MKSTAT_HEALTH_THRES         "health_thres"
  49 #define RDC_MKSTAT_BITMAP_WRITES        "bitmap_writes"
  50 #define RDC_MKSTAT_CLNT_COTS_CALLS      "clnt_cots_calls"
  51 #define RDC_MKSTAT_CLNT_CLTS_CALLS      "clnt_clts_calls"
  52 #define RDC_MKSTAT_SVC_COTS_CALLS       "svc_cots_calls"
  53 #define RDC_MKSTAT_SVC_CLTS_CALLS       "svc_clts_calls"
  54 #define RDC_MKSTAT_BITMAP_REF_DELAY     "bitmap_ref_delay"
  55 
  56 #define RDC_IKSTAT_FLAGS                "flags"
  57 #define RDC_IKSTAT_SYNCFLAGS            "syncflags"
  58 #define RDC_IKSTAT_BMPFLAGS             "bmpflags"
  59 #define RDC_IKSTAT_SYNCPOS              "syncpos"
  60 #define RDC_IKSTAT_VOLSIZE              "volsize"
  61 #define RDC_IKSTAT_BITSSET              "bitsset"
  62 #define RDC_IKSTAT_AUTOSYNC             "autosync"
  63 #define RDC_IKSTAT_MAXQFBAS             "maxqfbas"
  64 #define RDC_IKSTAT_MAXQITEMS            "maxqitems"
  65 #define RDC_IKSTAT_FILE                 "primary_vol"
  66 #define RDC_IKSTAT_SECFILE              "secondary_vol"
  67 #define RDC_IKSTAT_BITMAP               "bitmap"
  68 #define RDC_IKSTAT_PRIMARY_HOST         "primary_host"
  69 #define RDC_IKSTAT_SECONDARY_HOST       "secondary_host"
  70 #define RDC_IKSTAT_TYPE_FLAG            "type_flag"
  71 #define RDC_IKSTAT_BMP_SIZE             "bmp_size"
  72 #define RDC_IKSTAT_DISK_STATUS          "disk_status"
  73 #define RDC_IKSTAT_IF_DOWN              "if_down"
  74 #define RDC_IKSTAT_IF_RPC_VERSION       "if_rpc_version"
  75 #define RDC_IKSTAT_ASYNC_THROTTLE_DELAY "async_throttle_delay"
  76 #define RDC_IKSTAT_ASYNC_BLOCK_HWM      "async_block_hwm"
  77 #define RDC_IKSTAT_ASYNC_ITEM_HWM       "async_item_hwm"
  78 #define RDC_IKSTAT_QUEUE_TYPE           "async_queue_type"
  79 #define RDC_IKSTAT_ASYNC_ITEMS          "async_queue_items"
  80 #define RDC_IKSTAT_ASYNC_BLOCKS         "async_queue_blocks"
  81 
  82 /*
  83  * Queue types
  84  */
  85 #define RDC_DISKQUE     0X01
  86 #define RDC_MEMQUE      0x02
  87 #define RDC_NOQUE       -1
  88 
  89 #define RDC_ACTIVE      0x1
  90 #define RDC_INACTIVE    0x2
  91 
  92 #ifdef _KERNEL
  93 
  94 extern nstset_t *_rdc_ioset;
  95 extern nstset_t *_rdc_flset;
  96 
  97 #ifdef DEBUG
  98 extern int RDC_MAX_SYNC_THREADS;
  99 extern int rdc_maxthreads_last;
 100 int num_sync_threads;
 101 #else
 102 #define RDC_MAX_SYNC_THREADS    8
 103 #endif
 104 #ifdef DEBUG
 105 #define RDC_AVAIL_THR_TUNE(n)   \
 106         do { \
 107                 if (rdc_maxthreads_last < RDC_MAX_SYNC_THREADS) { \
 108                         (void) nst_add_thread(n.rdc_syncset, \
 109                     RDC_MAX_SYNC_THREADS - rdc_maxthreads_last);\
 110                 } \
 111                 if (rdc_maxthreads_last > RDC_MAX_SYNC_THREADS) { \
 112                         (void) nst_del_thread(n.rdc_syncset, \
 113                             rdc_maxthreads_last - RDC_MAX_SYNC_THREADS); \
 114                 } \
 115                 n.avail_thr = RDC_MAX_SYNC_THREADS - n.active_thr; \
 116                 if (n.avail_thr < 0) { \
 117                         n.avail_thr = 0; \
 118                 } \
 119                 rdc_maxthreads_last = RDC_MAX_SYNC_THREADS; \
 120                 num_sync_threads = nst_nthread(n.rdc_syncset); \
 121         } while (0);
 122 #else
 123 #define RDC_AVAIL_THR_TUNE(n)   \
 124         do { \
 125                 n.avail_thr = RDC_MAX_SYNC_THREADS - n.active_thr; \
 126                 if (n.avail_thr < 0) \
 127                         n.avail_thr = 0; \
 128         } while (0);
 129 
 130 #endif
 131 
 132 typedef struct syncloop_info {
 133         int             active_thr;
 134         int             avail_thr; /* should be MAX_RDC_SYNC_THREADS - active */
 135         kmutex_t        lock;
 136         nstset_t        *rdc_syncset;
 137 } sync_info_t;
 138 
 139 sync_info_t sync_info;
 140 
 141 /*
 142  * Static server information
 143  */
 144 typedef struct servinfo {
 145         struct knetconfig       *ri_knconf;     /* bound TLI fd */
 146         struct netbuf           ri_addr;        /* server's address */
 147         struct sec_data         *ri_secdata;    /* sec data for rpcsec module */
 148         char                    *ri_hostname;   /* server's hostname */
 149         int                     ri_hostnamelen; /* server's hostname length */
 150 } rdc_srv_t;
 151 
 152 /*
 153  * Interface structure, including health monitoring.
 154  */
 155 typedef struct rdc_if_s {
 156         struct rdc_if_s *next;          /* chain pointer */
 157         struct netbuf ifaddr;
 158         struct netbuf r_ifaddr;
 159         rdc_srv_t *srv;                 /* servinfo of server end */
 160         int     if_down;                /* i/f is down (set on primary) */
 161         int     isprimary;              /* this end is a primary */
 162         int     issecondary;            /* this end is a secondary */
 163         rpcvers_t rpc_version;          /* RPC protocol version in use */
 164         int     no_ping;                /* set on secondary to hold off RPCs */
 165         int     old_pulse;              /* previous (current) pulse value */
 166         int     new_pulse;              /* new (incoming) pulse value */
 167         int     deadness;               /* how close to death are we? */
 168         volatile int exiting;           /* daemon exit flag */
 169         time_t  last;                   /* time of last ping */
 170 } rdc_if_t;
 171 
 172 
 173 typedef struct rdc_aio_s {
 174         struct rdc_aio_s *next;
 175         nsc_buf_t *handle;
 176         nsc_buf_t *qhandle;
 177         nsc_off_t pos;
 178         nsc_off_t qpos;
 179         nsc_size_t len;
 180         nsc_size_t orig_len;
 181         int     flag;
 182         int     iostatus;
 183         int     index;
 184         uint_t  seq;            /* sequence on async Q */
 185 } rdc_aio_t;
 186 
 187 /* values for (rdc_aio_t *)->iostatus */
 188 enum {
 189         RDC_IO_NONE = 0,        /* not used */
 190         RDC_IO_INIT,            /* io started */
 191         RDC_IO_DONE,            /* io done successfully */
 192         RDC_IO_FAILED,          /* io failed */
 193         RDC_IO_DISCARDED,       /* io discarded */
 194         RDC_IO_CANCELLED        /* group_log in progress */
 195 };
 196 
 197 
 198 #define RDC_MAX_QBLOCKS 16384   /* 8MB temporary q for diskq to flush to */
 199 #define RDC_LOW_QBLOCKS 13927   /* roughly 85% of queue full */
 200 #define RDC_HALF_MQUEUE 8192    /* half of the memory queue */
 201 
 202 typedef struct netqueue {
 203         rdc_aio_t *net_qhead;
 204         rdc_aio_t *net_qtail;
 205         kmutex_t net_qlock;
 206         int hwmhit;                     /* queue full hit? reset after hwm */
 207         int qfill_sleeping;             /* waiting for work? */
 208         int qfflags;                    /* diskq/memq flusher flags */
 209         kcondvar_t qfcv;                /* for timed waits */
 210         volatile nsc_size_t blocks;     /* number of FBAs in q */
 211         volatile uint64_t nitems;       /* number of items in q */
 212         volatile int  inflbls;          /* number of inflight blocks */
 213         volatile int  inflitems;        /* number of inflight items */
 214         uint64_t  nitems_hwm;           /* highest items on queue */
 215         nsc_size_t  blocks_hwm;         /* highest blocks on queue */
 216         long throttle_delay;            /* Number of times we delayed x 2 */
 217 } net_queue;
 218 
 219 
 220 /*
 221  * Bitmap header structures.
 222  * These must be fixed size in all data models.
 223  * If we ever support little-endian machines (eg. Intel) we will need
 224  * to add byte-swapping logic.
 225  */
 226 
 227 typedef struct {
 228         int32_t magic;
 229         int32_t serial_mode;
 230         int32_t use_mirror;
 231         int32_t mirror_down;
 232         int32_t sync_needed;
 233         char bitmapname[NSC_MAXPATH];
 234         char filename[NSC_MAXPATH];
 235         int32_t volume_failed;
 236 } rdc_headerv2_t;
 237 #define RDC_HDR_V2      0x52444302      /* RDC2 */
 238 
 239 #define RDC_SYNC        0x1
 240 #define RDC_REV_SYNC    0x2
 241 #define RDC_FULL_SYNC   0x3
 242 
 243 #define RDC_FAILED      0x1
 244 #define RDC_COMPLETED   0x2
 245 
 246 typedef struct {
 247         char    file[NSC_MAXPATH];
 248         char    bitmap[NSC_MAXPATH];
 249 } rdc_hdr_addr_t;
 250 
 251 typedef struct {
 252         int32_t         magic;
 253         rdc_hdr_addr_t  primary;
 254         rdc_hdr_addr_t  secondary;
 255         int32_t         flags;
 256         int32_t         autosync;
 257         int32_t         maxqfbas;
 258         int32_t         maxqitems;
 259         int32_t         syshostid;      /* for cluster bitmaps */
 260 } rdc_headerv3_t;
 261 #define RDC_HDR_V3      0x52444303      /* RDC3 */
 262 
 263 typedef struct {
 264         int32_t         magic;
 265         rdc_hdr_addr_t  primary;
 266         rdc_hdr_addr_t  secondary;
 267         int32_t         flags;
 268         int32_t         autosync;
 269         int32_t         maxqfbas;
 270         int32_t         maxqitems;
 271         int32_t         syshostid;      /* for cluster bitmaps */
 272         int32_t         asyncthr;
 273 } rdc_headerv4_t;
 274 #define RDC_HDR_V4      0x52444304      /* RDC4 */
 275 
 276 typedef struct {
 277         int32_t         magic;
 278         rdc_hdr_addr_t  primary;
 279         rdc_hdr_addr_t  secondary;
 280         int32_t         flags;
 281         int32_t         autosync;
 282         int64_t         maxqfbas;
 283         int64_t         maxqitems;
 284         int32_t         syshostid;      /* for cluster bitmaps */
 285         int32_t         asyncthr;
 286         int32_t         refcntsize;     /* size in bytes of each refcount */
 287 } rdc_headerv5_t;
 288 #define RDC_HDR_V5      0x52444305      /* RDC5 */
 289 
 290 typedef rdc_headerv5_t  rdc_header_t;   /* Current header type */
 291 #define RDC_HDR_MAGIC   RDC_HDR_V5      /* Current header magic number */
 292 
 293 #endif  /* _KERNEL */
 294 
 295 #define RDC_BITMAP_FBA  1               /* Offset at which the bitmap starts */
 296 #define RDC_BITREF_FBA(krdc) (RDC_BITMAP_FBA + FBA_LEN(krdc->bitmap_size))
 297 
 298 #ifdef _KERNEL
 299 
 300 #define RDC_FUTILE_ATTEMPTS     50
 301 typedef struct aio_buf_s {
 302         struct aio_buf_s        *next;          /* next aio_buf */
 303         nsc_buf_t               *rdc_abufp;     /* actual anon buf */
 304         int                     kindex;         /* index we are attached to */
 305 } aio_buf_t;
 306 
 307 typedef struct rdc_thrsync {
 308         kmutex_t        lock;
 309         int             threads;
 310         int             complete;
 311         kcondvar_t      cv;
 312 } rdc_thrsync_t;
 313 
 314 typedef struct sync_status_s {
 315         int     offset;
 316         struct sync_status_s *next;
 317 } sync_status_t;
 318 
 319 typedef struct rdc_syncthr {
 320         nsc_off_t               offset;
 321         nsc_size_t              len;
 322         struct rdc_k_info       *krdc;
 323         sync_status_t           *status;
 324 } rdc_syncthr_t;
 325 
 326 /*
 327  * RDC buffer header
 328  */
 329 
 330 typedef struct rdc_buf_s {
 331         nsc_buf_t       rdc_bufh;       /* exported buffer header */
 332         nsc_buf_t       *rdc_bufp;      /* underlying buffer */
 333         aio_buf_t       *rdc_anon;      /* ANON async buffer */
 334         struct rdc_fd_s *rdc_fd;        /* back link */
 335         size_t          rdc_vsize;      /* size of allocated nsc_vec_t */
 336         int             rdc_flags;      /* flags */
 337         kmutex_t        aio_lock;       /* lock for rdc_anon */
 338         rdc_thrsync_t   rdc_sync;       /* for thread syncronization */
 339 } rdc_buf_t;
 340 
 341 #define RDC_VEC_ALLOC   0x1             /* local kmem vector for remote io */
 342 #define RDC_ALLOC       0x2             /* rdc_bufp is nsc_buf_alloc'd */
 343 #define RDC_ASYNC_VEC   0x4             /* Keep tmp handle for async flusher */
 344 #define RDC_REMOTE_BUF  0x8             /* buffer alloc'd for remote io only */
 345 #define RDC_NULL_BUF    0x10            /* tell diskq to only store io_hdr */
 346 #define RDC_ASYNC_BUF   0x20            /* this buf is to an async vol */
 347 #define RDC_NULLBUFREAD 0x0f000000      /* read because RDC_NULL_BUF detected */
 348 
 349 #define BUF_IS_ASYNC(h) (((h) != NULL) && (h)->rdc_flags & RDC_ASYNC_BUF)
 350 #define RDC_REMOTE(h)   (((h) != NULL) && ((h)->rdc_flags & RDC_REMOTE_BUF) && \
 351                         (((h)->rdc_flags & RDC_ASYNC_VEC) == 0))
 352 
 353 /* check a handle against a supplied pos/len pair */
 354 
 355 #define RDC_HANDLE_LIMITS(h, p, l) \
 356                 (((h)->sb_user & RDC_DISKQUE) || \
 357                 ((p) >= (h)->sb_pos) && \
 358                 (((p) + (l)) <= ((h)->sb_pos + (h)->sb_len)))
 359 
 360 /* check a dset against a supplied pos/len pair */
 361 
 362 #define RDC_DSET_LIMITS(d, p, l) \
 363                 (((p) >= (d)->pos) && \
 364                 (((p) + (l)) <= ((d)->pos + (d)->fbalen)))
 365 
 366 /*
 367  * RDC device info structures
 368  */
 369 
 370 typedef struct _rdc_info_dev_s {
 371         nsc_fd_t        *bi_fd;         /* file descriptor */
 372         nsc_iodev_t     *bi_iodev;      /* I/O device structure */
 373         struct rdc_k_info *bi_krdc;     /* back link */
 374         int             bi_rsrv;        /* Count of reserves held */
 375         int             bi_orsrv;       /* Reserves for other io provider */
 376         int             bi_failed;      /* Count of failed (faked) reserves */
 377         int             bi_ofailed;     /* Other io provider failed reserves */
 378         int             bi_flag;        /* Reserve flags */
 379 } _rdc_info_dev_t;
 380 
 381 
 382 typedef struct rdc_info_dev_s {
 383         struct rdc_info_dev_s   *id_next;       /* forward link */
 384         _rdc_info_dev_t         id_cache_dev;   /* cached device info */
 385         _rdc_info_dev_t         id_raw_dev;     /* raw device info */
 386         kmutex_t                id_rlock;       /* reserve/release lock */
 387         kcondvar_t              id_rcv;         /* nsc_release pending cv */
 388         int                     id_sets;        /* # of sets referencing */
 389         int                     id_release;     /* # of pending nsc_releases */
 390         int                     id_flag;        /* flags */
 391 } rdc_info_dev_t;
 392 
 393 
 394 typedef struct rdc_path_s {
 395         nsc_path_t              *rp_tok;        /* nsc_register_path token */
 396         int                     rp_ref;         /* # of rdc_fd_t's */
 397 } rdc_path_t;
 398 
 399 
 400 /*
 401  * Values for id_flag
 402  */
 403 #define RDC_ID_CLOSING          0x1             /* device is closing */
 404 
 405 #include <sys/nsctl/rdc_diskq.h>
 406 
 407 /*
 408  * value for diskio.seq.
 409  */
 410 #define RDC_NOSEQ               (0)             /* ignore sequence */
 411 #define RDC_NEWSEQ              (1)             /* start of sequence */
 412 
 413 typedef struct rdc_sleepq {
 414         struct rdc_sleepq       *next;
 415         uint_t                  seq;            /* sequence in queue */
 416         int                     idx;            /* idx number of request */
 417         int                     pindex;         /* primary host set index */
 418         int                     sindex;         /* secondary host set index */
 419         uint64_t                qpos;           /* offset on primary's queue */
 420         int                     nocache;        /* cache flag to alloc_buf */
 421 } rdc_sleepq_t;
 422 
 423 /*
 424  * RDC group structure
 425  */
 426 typedef struct rdc_group {
 427         int             count;
 428         int             rdc_writer;
 429         int             unregistering;
 430         kmutex_t        lock;
 431         net_queue       ra_queue;       /* io todo async queues */
 432         kcondvar_t      iowaitcv;       /* wait for flusher */
 433         kcondvar_t      unregistercv;   /* wait for unregister */
 434         int             rdc_thrnum;     /* number of threads */
 435         int             rdc_addthrnum;  /* number threads added to thr set */
 436         kmutex_t        addthrnumlk;    /* lock for above */
 437         rdc_sleepq_t    *sleepq;        /* head of waiting tasks */
 438         /*
 439          * Dual use, the outgoing sequence number on the client.
 440          * The next expected sequence number on the server.
 441          * Protected by the ra_queue lock.
 442          */
 443         uint_t          seq;
 444         /*
 445          * Dual use, the last acknowledged sequence number.
 446          * Used to ensure that the queue doesn't overflow on server
 447          * and to stall transmissions on the client.
 448          * Protected by the ra_queue lock.
 449          */
 450         uint_t          seqack;
 451         int             asyncstall;     /* count of asleep threads */
 452         int             asyncdis;       /* discard stalled output */
 453         kcondvar_t      asyncqcv;       /* output stall here */
 454         int             flags;          /* memory or disk. status etc */
 455         disk_queue      diskq;          /* disk queue */
 456         nsc_fd_t        *diskqfd;       /* diskq handle */
 457         nsc_path_t      *q_tok;         /* q registration */
 458         int             diskqrsrv;      /* reserve count */
 459         kmutex_t        diskqmutex;     /* enables/disables/reserves */
 460         uint_t          synccount;      /* number of group members syncing */
 461 } rdc_group_t;
 462 
 463 /* group state */
 464 #define RDC_DISKQ_KILL          0x01    /* a force kill of diskq pending */
 465 
 466 #define RDC_IS_DISKQ(grp)       (grp->flags & RDC_DISKQUE)
 467 #define RDC_IS_MEMQ(grp)        (grp->flags & RDC_MEMQUE)
 468 
 469 /*
 470  * These flags are used in the
 471  * aux_state field, and are used to track:
 472  * AUXSYNCIP: When the code has a sync thread running, used instead
 473  * of the RC_SYNCING flag which gets cleared before the sync thread
 474  * terminates.
 475  * AUXWRITE: Set when rdc_sync_write_thr is running, so the rdc_unintercept
 476  * code can wait until a one-to-many write has actually terminated.
 477  */
 478 #define RDC_AUXSYNCIP   0x01            /* a sync is in progress */
 479 #define RDC_AUXWRITE    0x02            /* I've got a write in progress */
 480 
 481 
 482 /*
 483  * RDC kernel-private information
 484  */
 485 typedef struct rdc_k_info {
 486         int                     index;          /* Index into array */
 487         int                     remote_index;   /* -1 means unknown */
 488         int                     type_flag;
 489         int                     rpc_version;    /* RPC version this set supps */
 490         int                     spare1;
 491         nsc_off_t               syncbitpos;
 492         kmutex_t                syncbitmutex;   /* lock for syncbitpos */
 493         volatile int            busy_count;     /* ioctls in progress */
 494         volatile int            sync_done;
 495         int                     aux_state; /* syncing ,don't disable */
 496         rdc_thrsync_t           syncs;          /* _rdc_sync thread tracking */
 497         rdc_info_dev_t          *devices;
 498         nsc_iodev_t             *iodev;         /* I/O device structure */
 499         rdc_path_t              cache_path;
 500         rdc_path_t              raw_path;
 501         rdc_if_t                *intf;
 502         rdc_srv_t               *lsrv;          /* list of servinfo */
 503         nsc_size_t              maxfbas;        /* returned from nsc_maxfbas */
 504         unsigned char           *dcio_bitmap;
 505         void                    *bitmap_ref;    /* Incore bitmap bit ref */
 506         struct rdc_group        *group;
 507         nsc_size_t              bitmap_size;
 508         int                     bmaprsrv;       /* bitmap reserve count */
 509         int                     bitmap_write;
 510         nsc_fd_t                *bitmapfd;
 511         nsc_fd_t                *remote_fd;     /* FCAL direct io */
 512         volatile int            disk_status;    /* set to halt sync */
 513         int                     closing;
 514         nsc_path_t              *b_tok;         /* Bitmap registration */
 515         int                     b_ref;
 516         kmutex_t                dc_sleep;
 517         kmutex_t                bmapmutex;      /* mutex for bitmap ops */
 518         kcondvar_t              busycv;         /* wait for ioctl to complete */
 519         kcondvar_t              closingcv;      /* unregister_path/close */
 520         kcondvar_t              haltcv;         /* wait for sync to halt */
 521         kcondvar_t              synccv;         /* wait for sync to halt */
 522         struct rdc_net_dataset  *net_dataset;   /* replaces hnds */
 523         int64_t                 io_time;        /* moved from cd_info */
 524         struct rdc_k_info       *many_next;     /* 1-to-many circular list */
 525         struct rdc_k_info       *multi_next;    /* to multihop krdc */
 526         struct rdc_k_info       *group_next;    /* group circular list */
 527         kstat_t                 *io_kstats;     /* io kstat */
 528         kstat_t                 *bmp_kstats;    /* bitmap io kstat */
 529         kstat_t                 *set_kstats;    /* set kstat */
 530         kmutex_t                kstat_mutex;    /* mutex for kstats */
 531         kmutex_t                bmp_kstat_mutex;        /* mutex for kstats */
 532         struct bm_ref_ops       *bm_refs;
 533 } rdc_k_info_t;
 534 
 535 #define c_fd            devices->id_cache_dev.bi_fd
 536 #define c_rsrv          devices->id_cache_dev.bi_rsrv
 537 #define c_failed        devices->id_cache_dev.bi_failed
 538 #define c_flag          devices->id_cache_dev.bi_flag
 539 
 540 #define c_tok           cache_path.rp_tok
 541 #define c_ref           cache_path.rp_ref
 542 
 543 #define r_fd            devices->id_raw_dev.bi_fd
 544 #define r_rsrv          devices->id_raw_dev.bi_rsrv
 545 #define r_failed        devices->id_raw_dev.bi_failed
 546 #define r_flag          devices->id_raw_dev.bi_flag
 547 
 548 #define r_tok           raw_path.rp_tok
 549 #define r_ref           raw_path.rp_ref
 550 
 551 /*
 552  * flags for _rdc_rsrv_devs()
 553  */
 554 
 555 /*
 556  * which device(s) to reserve - integer bitmap.
 557  */
 558 
 559 #define RDC_CACHE       0x1     /* data device in cache mode */
 560 #define RDC_RAW         0x2     /* data device in raw mode */
 561 #define RDC_BMP         0x4     /* bitmap device */
 562 #define RDC_QUE         0x8     /* diskq device */
 563 
 564 /*
 565  * device usage after reserve - integer flag.
 566  */
 567 
 568 #define RDC_INTERNAL    0x1     /* reserve for rdc internal purposes */
 569 #define RDC_EXTERNAL    0x2     /* reserve in response to io provider Attach */
 570 
 571 /*
 572  * Utility macro for nsc_*() io function returns.
 573  */
 574 
 575 #define RDC_SUCCESS(rc) (((rc) == NSC_DONE) || ((rc) == NSC_HIT))
 576 
 577 /*
 578  * RDC file descriptor structure
 579  */
 580 
 581 typedef struct rdc_fd_s {
 582         rdc_k_info_t    *rdc_info;      /* devices info structure */
 583         int             rdc_type;       /* open type, diskq or bitmap */
 584         int             rdc_oflags;     /* raw or cached open type */
 585 } rdc_fd_t;
 586 
 587 /*
 588  * fd and rsrv macros
 589  */
 590 
 591 #define RSRV(bi)        (((bi)->bi_rsrv > 0) || ((bi)->bi_failed > 0))
 592 #define ORSRV(bi)       (((bi)->bi_orsrv > 0) || ((bi)->bi_ofailed > 0))
 593 #define RFAILED(bi)     (((bi)->bi_failed > 0) || ((bi)->bi_ofailed > 0))
 594 
 595 #define IS_RSRV(bi)     (RSRV(bi) || ORSRV(bi))
 596 
 597 #define IS_CRSRV(gcd)   (IS_RSRV(&(gcd)->devices->id_cache_dev))
 598 #define IS_RRSRV(gcd)   (IS_RSRV(&(gcd)->devices->id_raw_dev))
 599 
 600 #define IS_RFAILED(gcd) \
 601                 (RFAILED(&(gcd)->devices->id_cache_dev) || \
 602                 RFAILED(&(gcd)->devices->id_raw_dev))
 603 
 604 #define RDC_IS_BMP(rdc) ((rdc)->rdc_type == RDC_BMP)
 605 #define RDC_IS_QUE(rdc) ((rdc)->rdc_type == RDC_QUE)
 606 #define RDC_IS_RAW(rdc) (((rdc)->rdc_oflags & NSC_CACHE) == 0)
 607 #define RDC_U_FD(gcd)   (IS_CRSRV(gcd) ? (gcd)->c_fd : (gcd)->r_fd)
 608 #define RDC_FD(rdc)     (RDC_U_FD(rdc->rdc_info))
 609 
 610 
 611 typedef struct rdc_host_u {
 612         char *nodename;
 613         int netaddr;
 614         struct netbuf *naddr;
 615 } rdc_host_t;
 616 
 617 /*
 618  * Reply from remote read
 619  * - convenience defines for the client side code.
 620  * - keep this in sync with the readres structure in rdc_prot.h/.x
 621  */
 622 #define rdcrdresult     readres
 623 #define rr_status       status
 624 #define rr_ok           readres_u.reply
 625 #define rr_bufsize      rr_ok.data.data_len
 626 #define rr_data         rr_ok.data.data_val
 627 
 628 /*
 629  * Flags for remote read rpc
 630  *
 631  * _START must be a unique rpc, _DATA and _END may be OR-d together.
 632  */
 633 #define RDC_RREAD_DATA  0x1     /* Intermediate rpc with data payload */
 634 #define RDC_RREAD_START 0x2     /* Setup rpc */
 635 #define RDC_RREAD_END   0x4     /* End rpc */
 636 #define RDC_RREAD_FAIL  0x8     /* Primary is failed */
 637 
 638 /*
 639  * Flags for remote write rpc
 640  */
 641 #define RDC_RWRITE_FAIL 0x8     /* Primary is failed */
 642 
 643 /*
 644  * macro used to determine if the incomming sq, with sequence
 645  * value x, should be placed before the sq with sequence value y.
 646  * This has to account for integer wrap. We account for integer
 647  * wrap by checking if the difference between x and y is within
 648  * half of the maximum integer value (RDC_MAXINT) or not.
 649  */
 650 
 651 #define RDC_BITSPERBYTE 8
 652 #define RDC_BITS(type)  (RDC_BITSPERBYTE * (long)sizeof (type))
 653 #define RDC_HIBITI      ((unsigned)1 << (RDC_BITS(int) - 1))
 654 #define RDC_MAXINT      ((int)(~RDC_HIBITI))
 655 #define RDC_RANGE       ((RDC_MAXINT / 2) -1)
 656 
 657 #define RDC_INFRONT(x, y) (((x < y) && ((y - x) < RDC_RANGE)) ? 1 : \
 658         ((x > y) && ((x - y) > RDC_RANGE)) ? 1 : 0)
 659 
 660 
 661 
 662 
 663 #endif /* _KERNEL */
 664 
 665 /*
 666  * RDC user-visible information
 667  */
 668 typedef rdc_set_t rdc_u_info_t;
 669 
 670 
 671 /*
 672  * RDC flags for set state / set cd RPC.
 673  * Must remain compatible with rdc RPC protocol version v3.
 674  */
 675 #define CCIO_NONE               0x0000
 676 #define CCIO_ENABLE             0x0008
 677 #define CCIO_SLAVE              0x0010
 678 #define CCIO_DONE               0x0020
 679 #define CCIO_ENABLELOG          0x0100
 680 #define CCIO_RSYNC              0x0400
 681 #define CCIO_REMOTE             0x2000
 682 
 683 
 684 /*
 685  * In kernel type flags (krdc->type_flag).
 686  */
 687 #define RDC_CONFIGURED          0x1
 688 #define RDC_DISABLEPEND         0x2     /* Suspend/Disable is in progress */
 689 #define RDC_ASYNCMODE           0x4
 690 #define RDC_RESUMEPEND          0x8
 691 #define RDC_RESPONSIBLE         0x10
 692 #define RDC_BUSYWAIT            0x20
 693 #define RDC_UNREGISTER          0x40    /* Unregister is in progress */
 694 #define RDC_QDISABLEPEND        0x100   /* Q Suspend/Disable is in progress */
 695 
 696 #define IS_ENABLED(urdc)        ((IS_CONFIGURED(&rdc_k_info[(urdc)->index]) && \
 697         (rdc_get_vflags(urdc) & RDC_ENABLED)))
 698 #define IS_CONFIGURED(krdc)     ((krdc)->type_flag & RDC_CONFIGURED)
 699 #define IS_MANY(krdc)           ((krdc)->many_next != (krdc))
 700 #define IS_MULTI(krdc)          ((krdc)->multi_next != NULL)
 701 
 702 #define IS_VALID_INDEX(index)   ((index) >= 0 && (index) < rdc_max_sets && \
 703                                         IS_CONFIGURED(&rdc_k_info[(index)]))
 704 
 705 #define RDC_NOFLUSH     0       /* Do not do a flush when starting logging */
 706 #define RDC_NOREMOTE    0       /* Do no remote logging notifications */
 707 #define RDC_FLUSH       1       /* Do a flush when starting logging */
 708 #define RDC_ALLREMOTE   2       /* Notify all remote group members */
 709 #define RDC_OTHERREMOTE 4       /* Notify all remote group members except */
 710                                 /* the one corresponding to the current set, */
 711                                 /* to prevent recursion in the case where */
 712                                 /* the request was initiated from the remote */
 713                                 /* node. */
 714 #define RDC_FORCE_GROUP 8       /* set all group memebers logging regardless */
 715 
 716 #ifdef _KERNEL
 717 
 718 /*
 719  * Functions, vars
 720  */
 721 
 722 #define RDC_SYNC_EVENT_TIMEOUT  (60 * HZ)
 723 typedef struct {
 724         clock_t lbolt;
 725         int event;
 726         int ack;
 727         int daemon_waiting;             /* Daemon waiting in ioctl */
 728         int kernel_waiting;             /* Kernel waiting for daemon to reply */
 729         char master[NSC_MAXPATH];
 730         char group[NSC_MAXPATH];
 731         kmutex_t mutex;
 732         kcondvar_t cv;
 733         kcondvar_t done_cv;
 734 } rdc_sync_event_t;
 735 extern rdc_sync_event_t rdc_sync_event;
 736 extern clock_t rdc_sync_event_timeout;
 737 extern kmutex_t rdc_sync_mutex;
 738 
 739 extern rdc_u_info_t *rdc_u_info;
 740 extern rdc_k_info_t *rdc_k_info;
 741 
 742 extern int rdc_max_sets;
 743 
 744 extern unsigned long rdc_async_timeout;
 745 
 746 extern int rdc_self_host();
 747 extern uint64_t mirror_getsize(int index);
 748 extern void rdc_sleepqdiscard(rdc_group_t *);
 749 
 750 
 751 #ifdef  DEBUG
 752 extern void rdc_stallzero(int);
 753 #endif
 754 
 755 struct rdc_net_dataitem {
 756         void *dptr;
 757         int   len;      /* byte count */
 758         int   mlen;     /* actual malloced size */
 759         struct rdc_net_dataitem *next;
 760 };
 761 typedef struct rdc_net_dataitem rdc_net_dataitem_t;
 762 
 763 struct rdc_net_dataset {
 764         int id;
 765         int inuse;
 766         int delpend;
 767         int nitems;
 768         nsc_off_t pos;
 769         nsc_size_t fbalen;
 770         rdc_net_dataitem_t *head;
 771         rdc_net_dataitem_t *tail;
 772         struct rdc_net_dataset *next;
 773 };
 774 typedef struct rdc_net_dataset rdc_net_dataset_t;
 775 
 776 
 777 #endif /* _KERNEL */
 778 
 779 
 780 #define RDC_TCP_DEV             "/dev/tcp"
 781 
 782 #define RDC_VERS_MIN    RDC_VERSION5
 783 #define RDC_VERS_MAX    RDC_VERSION7
 784 
 785 #define RDC_HEALTH_THRESHOLD    20
 786 #define RDC_MIN_HEALTH_THRES    5
 787 #define SNDR_MAXTHREADS         16
 788 /*
 789  * These next two defines are the default value of the async queue size
 790  * They have been calculated to be 8MB of data with an average of
 791  * 2K IO size
 792  */
 793 #define RDC_MAXTHRES_QUEUE      16384   /* max # of fbas on async q */
 794 #define RDC_MAX_QITEMS          4096    /* max # of items on async q */
 795 #define RDC_ASYNCTHR            2       /* number of async threads */
 796 
 797 #define RDC_RPC_MAX             (RDC_MAXDATA + sizeof (net_data5) +\
 798                                         (RPC_MAXDATASIZE - 8192))
 799 #define ATM_NONE 0
 800 #define ATM_INIT 1
 801 #define ATM_EXIT 2
 802 
 803 #define RDC_CLNT_TMOUT          16
 804 
 805 #define BMAP_BLKSIZE 1024
 806 #define BMAP_BLKSIZEV7 RDC_MAXDATA
 807 
 808 /* right now we can only trace 1m or less writes to the bitmap (32 bits wide) */
 809 #define RDC_MAX_MAXFBAS 2048
 810 
 811 #if defined(_KERNEL)
 812 /* kstat interface */
 813 
 814 /*
 815  * Per module kstats
 816  * only one instance
 817  */
 818 typedef struct {
 819         kstat_named_t   m_maxsets;              /* Max # of sndr sets */
 820         kstat_named_t   m_maxfbas;              /* Max # of FBAS from nsctl */
 821         kstat_named_t   m_rpc_timeout;          /* global RPC timeout */
 822         kstat_named_t   m_health_thres;         /* Health thread timeout */
 823         kstat_named_t   m_bitmap_writes;        /* True for bitmap writes */
 824         kstat_named_t   m_clnt_cots_calls;      /* # of clnt COTS calls */
 825         kstat_named_t   m_clnt_clts_calls;      /* # of clnt CLTS calls */
 826         kstat_named_t   m_svc_cots_calls;       /* # of server COTS calls */
 827         kstat_named_t   m_svc_clts_calls;       /* # of server CLTS calls */
 828         kstat_named_t   m_bitmap_ref_delay;     /* # of bitmap ref overflows */
 829 } sndr_m_stats_t;
 830 
 831 /*
 832  * Per set kstats
 833  * one instance per configured set
 834  */
 835 typedef struct {
 836         kstat_named_t   s_flags;        /* from rdc_set_t */
 837         kstat_named_t   s_syncflags;    /* from rdc_set_t */
 838         kstat_named_t   s_bmpflags;     /* from rdc_set_t */
 839         kstat_named_t   s_syncpos;      /* from rdc_set_t */
 840         kstat_named_t   s_volsize;      /* from rdc_set_t */
 841         kstat_named_t   s_bits_set;     /* from rdc_set_t */
 842         kstat_named_t   s_autosync;     /* from rdc_set_t */
 843         kstat_named_t   s_maxqfbas;     /* from rdc_set_t */
 844         kstat_named_t   s_maxqitems;    /* from rdc_set_t */
 845         kstat_named_t   s_primary_vol;  /* from rdc_set_t */
 846         kstat_named_t   s_secondary_vol;        /* from rdc_set_t */
 847         kstat_named_t   s_bitmap;       /* from rdc_set_t */
 848         kstat_named_t   s_primary_intf; /* from rdc_set_t */
 849         kstat_named_t   s_secondary_intf;       /* from rdc_set_t */
 850         kstat_named_t   s_type_flag;    /* from rdc_k_info_t */
 851         kstat_named_t   s_bitmap_size;  /* from rdc_k_info_t */
 852         kstat_named_t   s_disk_status;  /* from rdc_k_info_t */
 853         kstat_named_t   s_if_if_down;   /* from rdc_if_t */
 854         kstat_named_t   s_if_rpc_version;       /* from rdc_if_t */
 855         kstat_named_t   s_aqueue_blk_hwm;       /* from rdc_k_info_t */
 856         kstat_named_t   s_aqueue_itm_hwm;       /* from rdc_k_info_t */
 857         kstat_named_t   s_aqueue_throttle;      /* from rdc_k_info_t */
 858         kstat_named_t   s_aqueue_items;
 859         kstat_named_t   s_aqueue_blocks;
 860         kstat_named_t   s_aqueue_type;
 861 } rdc_info_stats_t;
 862 #endif /* _KERNEL */
 863 
 864 #ifndef _SunOS_5_6      /* i.e. 2.7+ */
 865 typedef int xdr_t;
 866 #else   /* i.e. 2.6- */
 867 typedef unsigned long rpcprog_t;
 868 typedef unsigned long rpcvers_t;
 869 typedef unsigned long rpcproc_t;
 870 typedef unsigned long rpcprot_t;
 871 typedef unsigned long rpcport_t;
 872 #endif /* _SunOS_5_6 */
 873 
 874 
 875 #ifdef _KERNEL
 876 
 877 extern nsc_size_t MAX_RDC_FBAS;
 878 extern volatile int net_exit;
 879 extern nsc_size_t rdc_maxthres_queue;   /* max # of fbas on async q */
 880 extern int rdc_max_qitems;              /* max # of items on async q */
 881 extern int rdc_asyncthr;        /* # of async threads */
 882 
 883 #ifdef DEBUG
 884 extern kmutex_t rdc_cntlock;
 885 extern int rdc_datasetcnt;
 886 #endif
 887 
 888 /*
 889  * Macro to keep tabs on dataset memory usage.
 890  */
 891 #ifdef DEBUG
 892 #define RDC_DSMEMUSE(x) \
 893         mutex_enter(&rdc_cntlock);\
 894         rdc_datasetcnt += (x);\
 895         mutex_exit(&rdc_cntlock);
 896 #else
 897 #define RDC_DSMEMUSE(x)
 898 #endif
 899 
 900 
 901 
 902 
 903 
 904 extern kmutex_t rdc_ping_lock;
 905 extern rdc_if_t *rdc_if_top;
 906 
 907 extern int _rdc_enqueue_write(rdc_k_info_t *, nsc_off_t, nsc_size_t, int,
 908     nsc_buf_t *);
 909 extern int rdc_net_state(int, int);
 910 extern int rdc_net_getbmap(int, int);
 911 extern int rdc_net_getsize(int, uint64_t *);
 912 extern int rdc_net_write(int, int, nsc_buf_t *, nsc_off_t, nsc_size_t, uint_t,
 913     int, netwriteres *);
 914 extern int rdc_net_read(int, int, nsc_buf_t *, nsc_off_t, nsc_size_t);
 915 extern int _rdc_remote_read(rdc_k_info_t *, nsc_buf_t *, nsc_off_t, nsc_size_t,
 916     int);
 917 extern int _rdc_multi_write(nsc_buf_t *, nsc_off_t, nsc_size_t, int,
 918     rdc_k_info_t *);
 919 extern int rdc_start_server(struct rdc_svc_args *, int);
 920 extern aio_buf_t *rdc_aio_buf_get(rdc_buf_t *, int);
 921 extern void rdc_aio_buf_del(rdc_buf_t *, rdc_k_info_t *);
 922 extern aio_buf_t *rdc_aio_buf_add(int, rdc_buf_t *);
 923 extern int rdc_net_getstate(rdc_k_info_t *, int *, int *, int *, int);
 924 extern kmutex_t rdc_conf_lock;
 925 extern kmutex_t rdc_many_lock;
 926 extern int rdc_drain_queue(int);
 927 extern int flush_group_queue(int);
 928 extern void rdc_dev_close(rdc_k_info_t *);
 929 extern int rdc_dev_open(rdc_set_t *, int);
 930 extern void rdc_get_details(rdc_k_info_t *);
 931 extern int rdc_lookup_bitmap(char *);
 932 extern int rdc_lookup_enabled(char *, int);
 933 extern int rdc_lookup_byaddr(rdc_set_t *);
 934 extern int rdc_lookup_byname(rdc_set_t *);
 935 extern int rdc_intercept(rdc_k_info_t *);
 936 extern int rdc_unintercept(rdc_k_info_t *);
 937 extern int _rdc_rsrv_devs(rdc_k_info_t *, int, int);
 938 extern void _rdc_rlse_devs(rdc_k_info_t *, int);
 939 extern void _rdc_unload(void);
 940 extern int _rdc_load(void);
 941 extern int _rdc_configure(void);
 942 extern void _rdc_deconfigure(void);
 943 extern void _rdc_async_throttle(rdc_k_info_t *, long);
 944 extern int rdc_writer(int);
 945 extern int rdc_dump_alloc_bufs_cd(int);
 946 extern void rdc_dump_alloc_bufs(rdc_if_t *);
 947 extern int rdc_check_secondary(rdc_if_t *, int);
 948 extern void rdc_dump_queue(int);
 949 extern int rdc_isactive_if(struct netbuf *, struct netbuf *);
 950 extern rdc_if_t *rdc_add_to_if(rdc_srv_t *, struct netbuf *, struct netbuf *,
 951     int);
 952 extern void rdc_remove_from_if(rdc_if_t *);
 953 extern void rdc_set_if_vers(rdc_u_info_t *, rpcvers_t);
 954 
 955 extern void rdc_print_svinfo(rdc_srv_t *, char *);
 956 extern rdc_srv_t *rdc_create_svinfo(char *, struct netbuf *,
 957                         struct knetconfig *);
 958 extern void rdc_destroy_svinfo(rdc_srv_t *);
 959 
 960 extern void init_rdc_netbuf(struct netbuf *);
 961 extern void free_rdc_netbuf(struct netbuf *);
 962 extern void dup_rdc_netbuf(const struct netbuf *, struct netbuf *);
 963 extern int rdc_netbuf_toint(struct netbuf *);
 964 extern struct netbuf *rdc_int_tonetbuf(int);
 965 extern void rdc_lor(const uchar_t *, uchar_t *, int);
 966 extern int rdc_resume2(rdc_k_info_t *);
 967 extern void rdc_set_flags(rdc_u_info_t *, int);
 968 extern void rdc_clr_flags(rdc_u_info_t *, int);
 969 extern int rdc_get_vflags(rdc_u_info_t *);
 970 extern void rdc_set_mflags(rdc_u_info_t *, int);
 971 extern void rdc_clr_mflags(rdc_u_info_t *, int);
 972 extern int rdc_get_mflags(rdc_u_info_t *);
 973 extern void rdc_set_flags_log(rdc_u_info_t *, int, char *);
 974 extern void rdc_group_log(rdc_k_info_t *krdc, int flush, char *why);
 975 extern int _rdc_config(void *, int, spcs_s_info_t, int *);
 976 extern void rdc_many_enter(rdc_k_info_t *);
 977 extern void rdc_many_exit(rdc_k_info_t *);
 978 extern void rdc_group_enter(rdc_k_info_t *);
 979 extern void rdc_group_exit(rdc_k_info_t *);
 980 extern int _rdc_sync_event_wait(void *, void *, int, spcs_s_info_t, int *);
 981 extern int _rdc_sync_event_notify(int, char *, char *);
 982 extern int _rdc_link_down(void *, int, spcs_s_info_t, int *);
 983 extern void rdc_delgroup(rdc_group_t *);
 984 extern int rdc_write_bitmap_fba(rdc_k_info_t *, nsc_off_t);
 985 extern int rdc_bitmapset(int, char *, char *, void *, int, nsc_off_t, int);
 986 extern rdc_net_dataset_t *rdc_net_add_set(int);
 987 extern rdc_net_dataset_t *rdc_net_get_set(int, int);
 988 extern void rdc_net_put_set(int, rdc_net_dataset_t *);
 989 extern void rdc_net_del_set(int, rdc_net_dataset_t *);
 990 extern void rdc_net_free_set(rdc_k_info_t *, rdc_net_dataset_t *);
 991 extern int rdc_lookup_byhostdev(char *intf, char *file);
 992 extern int rdc_lookup_configured(char *path);
 993 extern void rdc_dump_dsets(int);
 994 extern void set_busy(rdc_k_info_t *);
 995 extern void wakeup_busy(rdc_k_info_t *);
 996 
 997 
 998 #ifdef  DEBUG
 999 extern int rdc_async6(void *, int mode, int *);
1000 extern int rdc_readgen(void *, int, int *);
1001 #endif
1002 
1003 #endif /* _KERNEL */
1004 
1005 #ifdef  __cplusplus
1006 }
1007 #endif
1008 
1009 #endif /* _RDC_IO_H */