1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  22 /*        All Rights Reserved   */
  23 
  24 
  25 /*
  26  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 /*
  31  * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
  32  */
  33 
  34 #ifndef _SYS_STRSUBR_H
  35 #define _SYS_STRSUBR_H
  36 
  37 /*
  38  * WARNING:
  39  * Everything in this file is private, belonging to the
  40  * STREAMS subsystem.  The only guarantee made about the
  41  * contents of this file is that if you include it, your
  42  * code will not port to the next release.
  43  */
  44 #include <sys/stream.h>
  45 #include <sys/stropts.h>
  46 #include <sys/vnode.h>
  47 #include <sys/kstat.h>
  48 #include <sys/uio.h>
  49 #include <sys/proc.h>
  50 #include <sys/netstack.h>
  51 #include <sys/modhash.h>
  52 
  53 #ifdef  __cplusplus
  54 extern "C" {
  55 #endif
  56 
  57 /*
  58  * In general, the STREAMS locks are disjoint; they are only held
  59  * locally, and not simultaneously by a thread.  However, module
  60  * code, including at the stream head, requires some locks to be
  61  * acquired in order for its safety.
  62  *      1. Stream level claim.  This prevents the value of q_next
  63  *              from changing while module code is executing.
  64  *      2. Queue level claim.  This prevents the value of q_ptr
  65  *              from changing while put or service code is executing.
  66  *              In addition, it provides for queue single-threading
  67  *              for QPAIR and PERQ MT-safe modules.
  68  *      3. Stream head lock.  May be held by the stream head module
  69  *              to implement a read/write/open/close monitor.
  70  *         Note: that the only types of twisted stream supported are
  71  *         the pipe and transports which have read and write service
  72  *         procedures on both sides of the twist.
  73  *      4. Queue lock.  May be acquired by utility routines on
  74  *              behalf of a module.
  75  */
  76 
  77 /*
  78  * In general, sd_lock protects the consistency of the stdata
  79  * structure.  Additionally, it is used with sd_monitor
  80  * to implement an open/close monitor.  In particular, it protects
  81  * the following fields:
  82  *      sd_iocblk
  83  *      sd_flag
  84  *      sd_copyflag
  85  *      sd_iocid
  86  *      sd_iocwait
  87  *      sd_sidp
  88  *      sd_pgidp
  89  *      sd_wroff
  90  *      sd_tail
  91  *      sd_rerror
  92  *      sd_werror
  93  *      sd_pushcnt
  94  *      sd_sigflags
  95  *      sd_siglist
  96  *      sd_pollist
  97  *      sd_mark
  98  *      sd_closetime
  99  *      sd_wakeq
 100  *      sd_maxblk
 101  *
 102  * The following fields are modified only by the allocator, which
 103  * has exclusive access to them at that time:
 104  *      sd_wrq
 105  *      sd_strtab
 106  *
 107  * The following field is protected by the overlying file system
 108  * code, guaranteeing single-threading of opens:
 109  *      sd_vnode
 110  *
 111  * Stream-level locks should be acquired before any queue-level locks
 112  *      are acquired.
 113  *
 114  * The stream head write queue lock(sd_wrq) is used to protect the
 115  * fields qn_maxpsz and qn_minpsz because freezestr() which is
 116  * necessary for strqset() only gets the queue lock.
 117  */
 118 
 119 /*
 120  * Function types for the parameterized stream head.
 121  * The msgfunc_t takes the parameters:
 122  *      msgfunc(vnode_t *vp, mblk_t *mp, strwakeup_t *wakeups,
 123  *              strsigset_t *firstmsgsigs, strsigset_t *allmsgsigs,
 124  *              strpollset_t *pollwakeups);
 125  * It returns an optional message to be processed by the stream head.
 126  *
 127  * The parameters for errfunc_t are:
 128  *      errfunc(vnode *vp, int ispeek, int *clearerr);
 129  * It returns an errno and zero if there was no pending error.
 130  */
 131 typedef uint_t  strwakeup_t;
 132 typedef uint_t  strsigset_t;
 133 typedef short   strpollset_t;
 134 typedef uintptr_t callbparams_id_t;
 135 typedef mblk_t  *(*msgfunc_t)(vnode_t *, mblk_t *, strwakeup_t *,
 136                         strsigset_t *, strsigset_t *, strpollset_t *);
 137 typedef int     (*errfunc_t)(vnode_t *, int, int *);
 138 
 139 /*
 140  * Per stream sd_lock in putnext may be replaced by per cpu stream_putlocks
 141  * each living in a separate cache line. putnext/canputnext grabs only one of
 142  * stream_putlocks while strlock() (called on behalf of insertq()/removeq())
 143  * acquires all stream_putlocks. Normally stream_putlocks are only employed
 144  * for highly contended streams that have SQ_CIPUT queues in the critical path
 145  * (e.g. NFS/UDP stream).
 146  *
 147  * stream_putlocks are dynamically assigned to stdata structure through
 148  * sd_ciputctrl pointer possibly when a stream is already in use. Since
 149  * strlock() uses stream_putlocks only under sd_lock acquiring sd_lock when
 150  * assigning stream_putlocks to the stream ensures synchronization with
 151  * strlock().
 152  *
 153  * For lock ordering purposes stream_putlocks are treated as the extension of
 154  * sd_lock and are always grabbed right after grabbing sd_lock and released
 155  * right before releasing sd_lock except putnext/canputnext where only one of
 156  * stream_putlocks locks is used and where it is the first lock to grab.
 157  */
 158 
 159 typedef struct ciputctrl_str {
 160         union _ciput_un {
 161                 uchar_t pad[64];
 162                 struct _ciput_str {
 163                         kmutex_t        ciput_lck;
 164                         ushort_t        ciput_cnt;
 165                 } ciput_str;
 166         } ciput_un;
 167 } ciputctrl_t;
 168 
 169 #define ciputctrl_lock  ciput_un.ciput_str.ciput_lck
 170 #define ciputctrl_count ciput_un.ciput_str.ciput_cnt
 171 
 172 /*
 173  * Header for a stream: interface to rest of system.
 174  *
 175  * NOTE: While this is a consolidation-private structure, some unbundled and
 176  *       third-party products inappropriately make use of some of the fields.
 177  *       As such, please take care to not gratuitously change any offsets of
 178  *       existing members.
 179  */
 180 typedef struct stdata {
 181         struct queue    *sd_wrq;        /* write queue */
 182         struct msgb     *sd_iocblk;     /* return block for ioctl */
 183         struct vnode    *sd_vnode;      /* pointer to associated vnode */
 184         struct streamtab *sd_strtab;    /* pointer to streamtab for stream */
 185         uint_t          sd_flag;        /* state/flags */
 186         uint_t          sd_iocid;       /* ioctl id */
 187         struct pid      *sd_sidp;       /* controlling session info */
 188         struct pid      *sd_pgidp;      /* controlling process group info */
 189         ushort_t        sd_tail;        /* reserved space in written mblks */
 190         ushort_t        sd_wroff;       /* write offset */
 191         int             sd_rerror;      /* error to return on read ops */
 192         int             sd_werror;      /* error to return on write ops */
 193         int             sd_pushcnt;     /* number of pushes done on stream */
 194         int             sd_sigflags;    /* logical OR of all siglist events */
 195         struct strsig   *sd_siglist;    /* pid linked list to rcv SIGPOLL sig */
 196         struct pollhead sd_pollist;     /* list of all pollers to wake up */
 197         struct msgb     *sd_mark;       /* "marked" message on read queue */
 198         clock_t         sd_closetime;   /* time to wait to drain q in close */
 199         kmutex_t        sd_lock;        /* protect head consistency */
 200         kcondvar_t      sd_monitor;     /* open/close/push/pop monitor */
 201         kcondvar_t      sd_iocmonitor;  /* ioctl single-threading */
 202         kcondvar_t      sd_refmonitor;  /* sd_refcnt monitor */
 203         ssize_t         sd_qn_minpsz;   /* These two fields are a performance */
 204         ssize_t         sd_qn_maxpsz;   /* enhancements, cache the values in */
 205                                         /* the stream head so we don't have */
 206                                         /* to ask the module below the stream */
 207                                         /* head to get this information. */
 208         struct stdata   *sd_mate;       /* pointer to twisted stream mate */
 209         kthread_id_t    sd_freezer;     /* thread that froze stream */
 210         kmutex_t        sd_reflock;     /* Protects sd_refcnt */
 211         int             sd_refcnt;      /* number of claimstr */
 212         uint_t          sd_wakeq;       /* strwakeq()'s copy of sd_flag */
 213         struct queue    *sd_struiordq;  /* sync barrier struio() read queue */
 214         struct queue    *sd_struiowrq;  /* sync barrier struio() write queue */
 215         char            sd_struiodnak;  /* defer NAK of M_IOCTL by rput() */
 216         struct msgb     *sd_struionak;  /* pointer M_IOCTL mblk(s) to NAK */
 217         caddr_t         sd_t_audit_data; /* For audit purposes only */
 218         ssize_t         sd_maxblk;      /* maximum message block size */
 219         uint_t          sd_rput_opt;    /* options/flags for strrput */
 220         uint_t          sd_wput_opt;    /* options/flags for write/putmsg */
 221         uint_t          sd_read_opt;    /* options/flags for strread */
 222         msgfunc_t       sd_rprotofunc;  /* rput M_*PROTO routine */
 223         msgfunc_t       sd_rputdatafunc; /* read M_DATA routine */
 224         msgfunc_t       sd_rmiscfunc;   /* rput routine (non-data/proto) */
 225         msgfunc_t       sd_wputdatafunc; /* wput M_DATA routine */
 226         errfunc_t       sd_rderrfunc;   /* read side error callback */
 227         errfunc_t       sd_wrerrfunc;   /* write side error callback */
 228         /*
 229          * support for low contention concurrent putnext.
 230          */
 231         ciputctrl_t     *sd_ciputctrl;
 232         uint_t          sd_nciputctrl;
 233 
 234         int             sd_anchor;      /* position of anchor in stream */
 235         /*
 236          * Service scheduling at the stream head.
 237          */
 238         kmutex_t        sd_qlock;
 239         struct queue    *sd_qhead;      /* Head of queues to be serviced. */
 240         struct queue    *sd_qtail;      /* Tail of queues to be serviced. */
 241         void            *sd_servid;     /* Service ID for bckgrnd schedule */
 242         ushort_t        sd_svcflags;    /* Servicing flags */
 243         short           sd_nqueues;     /* Number of queues in the list */
 244         kcondvar_t      sd_qcv;         /* Waiters for qhead to become empty */
 245         kcondvar_t      sd_zcopy_wait;
 246         uint_t          sd_copyflag;    /* copy-related flags */
 247         zoneid_t        sd_anchorzone;  /* Allow removal from same zone only */
 248         struct msgb     *sd_cmdblk;     /* reply from _I_CMD */
 249 
 250         /*
 251          * When a STREAMS device is cloned, the sd_vnode element of this
 252          * structure is replaced by a pointer to a common vnode shared across
 253          * all streams that are using the device. In this case, it is no longer
 254          * possible to get from the stream head back to the original vnode via
 255          * sd_vnode. Therefore, when such a device is cloned, the parent vnode -
 256          * i.e. that which was created during the device clone in spec_clone()
 257          * - is kept in sd_pvnode.
 258          */
 259         struct vnode    *sd_pvnode;
 260 } stdata_t;
 261 
 262 /*
 263  * stdata servicing flags.
 264  */
 265 #define STRS_WILLSERVICE        0x01
 266 #define STRS_SCHEDULED          0x02
 267 
 268 #define STREAM_NEEDSERVICE(stp) ((stp)->sd_qhead != NULL)
 269 
 270 /*
 271  * stdata flag field defines
 272  */
 273 #define IOCWAIT         0x00000001      /* Someone is doing an ioctl */
 274 #define RSLEEP          0x00000002      /* Someone wants to read/recv msg */
 275 #define WSLEEP          0x00000004      /* Someone wants to write */
 276 #define STRPRI          0x00000008      /* An M_PCPROTO is at stream head */
 277 #define STRHUP          0x00000010      /* Device has vanished */
 278 #define STWOPEN         0x00000020      /* waiting for 1st open */
 279 #define STPLEX          0x00000040      /* stream is being multiplexed */
 280 #define STRISTTY        0x00000080      /* stream is a terminal */
 281 #define STRGETINPROG    0x00000100      /* (k)strgetmsg is running */
 282 #define IOCWAITNE       0x00000200      /* STR_NOERROR ioctl running */
 283 #define STRDERR         0x00000400      /* fatal read error from M_ERROR */
 284 #define STWRERR         0x00000800      /* fatal write error from M_ERROR */
 285 #define STRDERRNONPERSIST 0x00001000    /* nonpersistent read errors */
 286 #define STWRERRNONPERSIST 0x00002000    /* nonpersistent write errors */
 287 #define STRCLOSE        0x00004000      /* wait for a close to complete */
 288 #define SNDMREAD        0x00008000      /* used for read notification */
 289 #define OLDNDELAY       0x00010000      /* use old TTY semantics for */
 290                                         /* NDELAY reads and writes */
 291         /*              0x00020000         unused */
 292         /*              0x00040000         unused */
 293 #define STRTOSTOP       0x00080000      /* block background writes */
 294 #define STRCMDWAIT      0x00100000      /* someone is doing an _I_CMD */
 295         /*              0x00200000         unused */
 296 #define STRMOUNT        0x00400000      /* stream is mounted */
 297 #define STRNOTATMARK    0x00800000      /* Not at mark (when empty read q) */
 298 #define STRDELIM        0x01000000      /* generate delimited messages */
 299 #define STRATMARK       0x02000000      /* At mark (due to MSGMARKNEXT) */
 300 #define STZCNOTIFY      0x04000000      /* wait for zerocopy mblk to be acked */
 301 #define STRPLUMB        0x08000000      /* push/pop pending */
 302 #define STREOF          0x10000000      /* End-of-file indication */
 303 #define STREOPENFAIL    0x20000000      /* indicates if re-open has failed */
 304 #define STRMATE         0x40000000      /* this stream is a mate */
 305 #define STRHASLINKS     0x80000000      /* I_LINKs under this stream */
 306 
 307 /*
 308  * Copy-related flags (sd_copyflag), set by SO_COPYOPT.
 309  */
 310 #define STZCVMSAFE      0x00000001      /* safe to borrow file (segmapped) */
 311                                         /* pages instead of bcopy */
 312 #define STZCVMUNSAFE    0x00000002      /* unsafe to borrow file pages */
 313 #define STRCOPYCACHED   0x00000004      /* copy should NOT bypass cache */
 314 
 315 /*
 316  * Options and flags for strrput (sd_rput_opt)
 317  */
 318 #define SR_POLLIN       0x00000001      /* pollwakeup needed for band0 data */
 319 #define SR_SIGALLDATA   0x00000002      /* Send SIGPOLL for all M_DATA */
 320 #define SR_CONSOL_DATA  0x00000004      /* Consolidate M_DATA onto q_last */
 321 #define SR_IGN_ZEROLEN  0x00000008      /* Ignore zero-length M_DATA */
 322 
 323 /*
 324  * Options and flags for strwrite/strputmsg (sd_wput_opt)
 325  */
 326 #define SW_SIGPIPE      0x00000001      /* Send SIGPIPE for write error */
 327 #define SW_RECHECK_ERR  0x00000002      /* Recheck errors in strwrite loop */
 328 #define SW_SNDZERO      0x00000004      /* send 0-length msg down pipe/FIFO */
 329 
 330 /*
 331  * Options and flags for strread (sd_read_opt)
 332  */
 333 #define RD_MSGDIS       0x00000001      /* read msg discard */
 334 #define RD_MSGNODIS     0x00000002      /* read msg no discard */
 335 #define RD_PROTDAT      0x00000004      /* read M_[PC]PROTO contents as data */
 336 #define RD_PROTDIS      0x00000008      /* discard M_[PC]PROTO blocks and */
 337                                         /* retain data blocks */
 338 /*
 339  * Flags parameter for strsetrputhooks() and strsetwputhooks().
 340  * These flags define the interface for setting the above internal
 341  * flags in sd_rput_opt and sd_wput_opt.
 342  */
 343 #define SH_CONSOL_DATA  0x00000001      /* Consolidate M_DATA onto q_last */
 344 #define SH_SIGALLDATA   0x00000002      /* Send SIGPOLL for all M_DATA */
 345 #define SH_IGN_ZEROLEN  0x00000004      /* Drop zero-length M_DATA */
 346 
 347 #define SH_SIGPIPE      0x00000100      /* Send SIGPIPE for write error */
 348 #define SH_RECHECK_ERR  0x00000200      /* Recheck errors in strwrite loop */
 349 
 350 /*
 351  * Each queue points to a sync queue (the inner perimeter) which keeps
 352  * track of the number of threads that are inside a given queue (sq_count)
 353  * and also is used to implement the asynchronous putnext
 354  * (by queuing messages if the queue can not be entered.)
 355  *
 356  * Messages are queued on sq_head/sq_tail including deferred qwriter(INNER)
 357  * messages. The sq_head/sq_tail list is a singly-linked list with
 358  * b_queue recording the queue and b_prev recording the function to
 359  * be called (either the put procedure or a qwriter callback function.)
 360  *
 361  * The sq_count counter tracks the number of threads that are
 362  * executing inside the perimeter or (in the case of outer perimeters)
 363  * have some work queued for them relating to the perimeter. The sq_rmqcount
 364  * counter tracks the subset which are in removeq() (usually invoked from
 365  * qprocsoff(9F)).
 366  *
 367  * In addition a module writer can declare that the module has an outer
 368  * perimeter (by setting D_MTOUTPERIM) in which case all inner perimeter
 369  * syncq's for the module point (through sq_outer) to an outer perimeter
 370  * syncq. The outer perimeter consists of the doubly linked list (sq_onext and
 371  * sq_oprev) linking all the inner perimeter syncq's with out outer perimeter
 372  * syncq. This is used to implement qwriter(OUTER) (an asynchronous way of
 373  * getting exclusive access at the outer perimeter) and outer_enter/exit
 374  * which are used by the framework to acquire exclusive access to the outer
 375  * perimeter during open and close of modules that have set D_MTOUTPERIM.
 376  *
 377  * In the inner perimeter case sq_save is available for use by machine
 378  * dependent code. sq_head/sq_tail are used to queue deferred messages on
 379  * the inner perimeter syncqs and to queue become_writer requests on the
 380  * outer perimeter syncqs.
 381  *
 382  * Note: machine dependent optimized versions of putnext may depend
 383  * on the order of sq_flags and sq_count (so that they can e.g.
 384  * read these two fields in a single load instruction.)
 385  *
 386  * Per perimeter SQLOCK/sq_count in putnext/put may be replaced by per cpu
 387  * sq_putlocks/sq_putcounts each living in a separate cache line. Obviously
 388  * sq_putlock[x] protects sq_putcount[x]. putnext/put routine will grab only 1
 389  * of sq_putlocks and update only 1 of sq_putcounts. strlock() and many
 390  * other routines in strsubr.c and ddi.c will grab all sq_putlocks (as well as
 391  * SQLOCK) and figure out the count value as the sum of sq_count and all of
 392  * sq_putcounts. The idea is to make critical fast path -- putnext -- much
 393  * faster at the expense of much less often used slower path like
 394  * strlock(). One known case where entersq/strlock is executed pretty often is
 395  * SpecWeb but since IP is SQ_CIOC and socket TCP/IP stream is nextless
 396  * there's no need to grab multiple sq_putlocks and look at sq_putcounts. See
 397  * strsubr.c for more comments.
 398  *
 399  * Note regular SQLOCK and sq_count are still used in many routines
 400  * (e.g. entersq(), rwnext()) in the same way as before sq_putlocks were
 401  * introduced.
 402  *
 403  * To understand when all sq_putlocks need to be held and all sq_putcounts
 404  * need to be added up one needs to look closely at putnext code. Basically if
 405  * a routine like e.g. wait_syncq() needs to be sure that perimeter is empty
 406  * all sq_putlocks/sq_putcounts need to be held/added up. On the other hand
 407  * there's no need to hold all sq_putlocks and count all sq_putcounts in
 408  * routines like leavesq()/dropsq() and etc. since the are usually exit
 409  * counterparts of entersq/outer_enter() and etc. which have already either
 410  * prevented put entry poins from executing or did not care about put
 411  * entrypoints. entersq() doesn't need to care about sq_putlocks/sq_putcounts
 412  * if the entry point has a shared access since put has the highest degree of
 413  * concurrency and such entersq() does not intend to block out put
 414  * entrypoints.
 415  *
 416  * Before sq_putcounts were introduced the standard way to wait for perimeter
 417  * to become empty was:
 418  *
 419  *      mutex_enter(SQLOCK(sq));
 420  *      while (sq->sq_count > 0) {
 421  *              sq->sq_flags |= SQ_WANTWAKEUP;
 422  *              cv_wait(&sq->sq_wait, SQLOCK(sq));
 423  *      }
 424  *      mutex_exit(SQLOCK(sq));
 425  *
 426  * The new way is:
 427  *
 428  *      mutex_enter(SQLOCK(sq));
 429  *      count = sq->sq_count;
 430  *      SQ_PUTLOCKS_ENTER(sq);
 431  *      SUM_SQ_PUTCOUNTS(sq, count);
 432  *      while (count != 0) {
 433  *              sq->sq_flags |= SQ_WANTWAKEUP;
 434  *              SQ_PUTLOCKS_EXIT(sq);
 435  *              cv_wait(&sq->sq_wait, SQLOCK(sq));
 436  *              count = sq->sq_count;
 437  *              SQ_PUTLOCKS_ENTER(sq);
 438  *              SUM_SQ_PUTCOUNTS(sq, count);
 439  *      }
 440  *      SQ_PUTLOCKS_EXIT(sq);
 441  *      mutex_exit(SQLOCK(sq));
 442  *
 443  * Note that SQ_WANTWAKEUP is set before dropping SQ_PUTLOCKS. This makes sure
 444  * putnext won't skip a wakeup.
 445  *
 446  * sq_putlocks are treated as the extension of SQLOCK for lock ordering
 447  * purposes and are always grabbed right after grabbing SQLOCK and released
 448  * right before releasing SQLOCK. This also allows dynamic creation of
 449  * sq_putlocks while holding SQLOCK (by making sq_ciputctrl non null even when
 450  * the stream is already in use). Only in putnext one of sq_putlocks
 451  * is grabbed instead of SQLOCK. putnext return path remembers what counter it
 452  * incremented and decrements the right counter on its way out.
 453  */
 454 
 455 struct syncq {
 456         kmutex_t        sq_lock;        /* atomic access to syncq */
 457         uint16_t        sq_count;       /* # threads inside */
 458         uint16_t        sq_flags;       /* state and some type info */
 459         /*
 460          * Distributed syncq scheduling
 461          *  The list of queue's is handled by sq_head and
 462          *  sq_tail fields.
 463          *
 464          *  The list of events is handled by the sq_evhead and sq_evtail
 465          *  fields.
 466          */
 467         queue_t         *sq_head;       /* queue of deferred messages */
 468         queue_t         *sq_tail;       /* queue of deferred messages */
 469         mblk_t          *sq_evhead;     /* Event message on the syncq */
 470         mblk_t          *sq_evtail;
 471         uint_t          sq_nqueues;     /* # of queues on this sq */
 472         /*
 473          * Concurrency and condition variables
 474          */
 475         uint16_t        sq_type;        /* type (concurrency) of syncq */
 476         uint16_t        sq_rmqcount;    /* # threads inside removeq() */
 477         kcondvar_t      sq_wait;        /* block on this sync queue */
 478         kcondvar_t      sq_exitwait;    /* waiting for thread to leave the */
 479                                         /* inner perimeter */
 480         /*
 481          * Handling synchronous callbacks such as qtimeout and qbufcall
 482          */
 483         ushort_t        sq_callbflags;  /* flags for callback synchronization */
 484         callbparams_id_t sq_cancelid;   /* id of callback being cancelled */
 485         struct callbparams *sq_callbpend;       /* Pending callbacks */
 486 
 487         /*
 488          * Links forming an outer perimeter from one outer syncq and
 489          * a set of inner sync queues.
 490          */
 491         struct syncq    *sq_outer;      /* Pointer to outer perimeter */
 492         struct syncq    *sq_onext;      /* Linked list of syncq's making */
 493         struct syncq    *sq_oprev;      /* up the outer perimeter. */
 494         /*
 495          * support for low contention concurrent putnext.
 496          */
 497         ciputctrl_t     *sq_ciputctrl;
 498         uint_t          sq_nciputctrl;
 499         /*
 500          * Counter for the number of threads wanting to become exclusive.
 501          */
 502         uint_t          sq_needexcl;
 503         /*
 504          * These two fields are used for scheduling a syncq for
 505          * background processing. The sq_svcflag is protected by
 506          * SQLOCK lock.
 507          */
 508         struct syncq    *sq_next;       /* for syncq scheduling */
 509         void *          sq_servid;
 510         uint_t          sq_servcount;   /* # pending background threads */
 511         uint_t          sq_svcflags;    /* Scheduling flags     */
 512         clock_t         sq_tstamp;      /* Time when was enabled */
 513         /*
 514          * Maximum priority of the queues on this syncq.
 515          */
 516         pri_t           sq_pri;
 517 };
 518 typedef struct syncq syncq_t;
 519 
 520 /*
 521  * sync queue scheduling flags (for sq_svcflags).
 522  */
 523 #define SQ_SERVICE      0x1             /* being serviced */
 524 #define SQ_BGTHREAD     0x2             /* awaiting service by bg thread */
 525 #define SQ_DISABLED     0x4             /* don't put syncq in service list */
 526 
 527 /*
 528  * FASTPUT bit in sd_count/putcount.
 529  */
 530 #define SQ_FASTPUT      0x8000
 531 #define SQ_FASTMASK     0x7FFF
 532 
 533 /*
 534  * sync queue state flags
 535  */
 536 #define SQ_EXCL         0x0001          /* exclusive access to inner */
 537                                         /*      perimeter */
 538 #define SQ_BLOCKED      0x0002          /* qprocsoff */
 539 #define SQ_FROZEN       0x0004          /* freezestr */
 540 #define SQ_WRITER       0x0008          /* qwriter(OUTER) pending or running */
 541 #define SQ_MESSAGES     0x0010          /* messages on syncq */
 542 #define SQ_WANTWAKEUP   0x0020          /* do cv_broadcast on sq_wait */
 543 #define SQ_WANTEXWAKEUP 0x0040          /* do cv_broadcast on sq_exitwait */
 544 #define SQ_EVENTS       0x0080          /* Events pending */
 545 #define SQ_QUEUED       (SQ_MESSAGES | SQ_EVENTS)
 546 #define SQ_FLAGMASK     0x00FF
 547 
 548 /*
 549  * Test a queue to see if inner perimeter is exclusive.
 550  */
 551 #define PERIM_EXCL(q)   ((q)->q_syncq->sq_flags & SQ_EXCL)
 552 
 553 /*
 554  * If any of these flags are set it is not possible for a thread to
 555  * enter a put or service procedure. Instead it must either block
 556  * or put the message on the syncq.
 557  */
 558 #define SQ_GOAWAY       (SQ_EXCL|SQ_BLOCKED|SQ_FROZEN|SQ_WRITER|\
 559                         SQ_QUEUED)
 560 /*
 561  * If any of these flags are set it not possible to drain the syncq
 562  */
 563 #define SQ_STAYAWAY     (SQ_BLOCKED|SQ_FROZEN|SQ_WRITER)
 564 
 565 /*
 566  * Flags to trigger syncq tail processing.
 567  */
 568 #define SQ_TAIL         (SQ_QUEUED|SQ_WANTWAKEUP|SQ_WANTEXWAKEUP)
 569 
 570 /*
 571  * Syncq types (stored in sq_type)
 572  * The SQ_TYPES_IN_FLAGS (ciput) are also stored in sq_flags
 573  * for performance reasons. Thus these type values have to be in the low
 574  * 16 bits and not conflict with the sq_flags values above.
 575  *
 576  * Notes:
 577  *  - putnext() and put() assume that the put procedures have the highest
 578  *    degree of concurrency. Thus if any of the SQ_CI* are set then SQ_CIPUT
 579  *    has to be set. This restriction can be lifted by adding code to putnext
 580  *    and put that check that sq_count == 0 like entersq does.
 581  *  - putnext() and put() does currently not handle !SQ_COPUT
 582  *  - In order to implement !SQ_COCB outer_enter has to be fixed so that
 583  *    the callback can be cancelled while cv_waiting in outer_enter.
 584  *  - If SQ_CISVC needs to be implemented, qprocsoff() needs to wait
 585  *    for the currently running services to stop (wait for QINSERVICE
 586  *    to go off). disable_svc called from qprcosoff disables only
 587  *    services that will be run in future.
 588  *
 589  * All the SQ_CO flags are set when there is no outer perimeter.
 590  */
 591 #define SQ_CIPUT        0x0100          /* Concurrent inner put proc */
 592 #define SQ_CISVC        0x0200          /* Concurrent inner svc proc */
 593 #define SQ_CIOC         0x0400          /* Concurrent inner open/close */
 594 #define SQ_CICB         0x0800          /* Concurrent inner callback */
 595 #define SQ_COPUT        0x1000          /* Concurrent outer put proc */
 596 #define SQ_COSVC        0x2000          /* Concurrent outer svc proc */
 597 #define SQ_COOC         0x4000          /* Concurrent outer open/close */
 598 #define SQ_COCB         0x8000          /* Concurrent outer callback */
 599 
 600 /* Types also kept in sq_flags for performance */
 601 #define SQ_TYPES_IN_FLAGS       (SQ_CIPUT)
 602 
 603 #define SQ_CI           (SQ_CIPUT|SQ_CISVC|SQ_CIOC|SQ_CICB)
 604 #define SQ_CO           (SQ_COPUT|SQ_COSVC|SQ_COOC|SQ_COCB)
 605 #define SQ_TYPEMASK     (SQ_CI|SQ_CO)
 606 
 607 /*
 608  * Flag combinations passed to entersq and leavesq to specify the type
 609  * of entry point.
 610  */
 611 #define SQ_PUT          (SQ_CIPUT|SQ_COPUT)
 612 #define SQ_SVC          (SQ_CISVC|SQ_COSVC)
 613 #define SQ_OPENCLOSE    (SQ_CIOC|SQ_COOC)
 614 #define SQ_CALLBACK     (SQ_CICB|SQ_COCB)
 615 
 616 /*
 617  * Other syncq types which are not copied into flags.
 618  */
 619 #define SQ_PERMOD       0x01            /* Syncq is PERMOD */
 620 
 621 /*
 622  * Asynchronous callback qun*** flag.
 623  * The mechanism these flags are used in is one where callbacks enter
 624  * the perimeter thanks to framework support. To use this mechanism
 625  * the q* and qun* flavors of the callback routines must be used.
 626  * e.g. qtimeout and quntimeout. The synchronization provided by the flags
 627  * avoids deadlocks between blocking qun* routines and the perimeter
 628  * lock.
 629  */
 630 #define SQ_CALLB_BYPASSED       0x01            /* bypassed callback fn */
 631 
 632 /*
 633  * Cancel callback mask.
 634  * The mask expands as the number of cancelable callback types grows
 635  * Note - separate callback flag because different callbacks have
 636  * overlapping id space.
 637  */
 638 #define SQ_CALLB_CANCEL_MASK    (SQ_CANCEL_TOUT|SQ_CANCEL_BUFCALL)
 639 
 640 #define SQ_CANCEL_TOUT          0x02            /* cancel timeout request */
 641 #define SQ_CANCEL_BUFCALL       0x04            /* cancel bufcall request */
 642 
 643 typedef struct callbparams {
 644         syncq_t         *cbp_sq;
 645         void            (*cbp_func)(void *);
 646         void            *cbp_arg;
 647         callbparams_id_t cbp_id;
 648         uint_t          cbp_flags;
 649         struct callbparams *cbp_next;
 650         size_t          cbp_size;
 651 } callbparams_t;
 652 
 653 typedef struct strbufcall {
 654         void            (*bc_func)(void *);
 655         void            *bc_arg;
 656         size_t          bc_size;
 657         bufcall_id_t    bc_id;
 658         struct strbufcall *bc_next;
 659         kthread_id_t    bc_executor;
 660 } strbufcall_t;
 661 
 662 /*
 663  * Structure of list of processes to be sent SIGPOLL/SIGURG signal
 664  * on request.  The valid S_* events are defined in stropts.h.
 665  */
 666 typedef struct strsig {
 667         struct pid      *ss_pidp;       /* pid/pgrp pointer */
 668         pid_t           ss_pid;         /* positive pid, negative pgrp */
 669         int             ss_events;      /* S_* events */
 670         struct strsig   *ss_next;
 671 } strsig_t;
 672 
 673 /*
 674  * bufcall list
 675  */
 676 struct bclist {
 677         strbufcall_t    *bc_head;
 678         strbufcall_t    *bc_tail;
 679 };
 680 
 681 /*
 682  * Structure used to track mux links and unlinks.
 683  */
 684 struct mux_node {
 685         major_t          mn_imaj;       /* internal major device number */
 686         uint16_t         mn_indegree;   /* number of incoming edges */
 687         struct mux_node *mn_originp;    /* where we came from during search */
 688         struct mux_edge *mn_startp;     /* where search left off in mn_outp */
 689         struct mux_edge *mn_outp;       /* list of outgoing edges */
 690         uint_t           mn_flags;      /* see below */
 691 };
 692 
 693 /*
 694  * Flags for mux_nodes.
 695  */
 696 #define VISITED 1
 697 
 698 /*
 699  * Edge structure - a list of these is hung off the
 700  * mux_node to represent the outgoing edges.
 701  */
 702 struct mux_edge {
 703         struct mux_node *me_nodep;      /* edge leads to this node */
 704         struct mux_edge *me_nextp;      /* next edge */
 705         int              me_muxid;      /* id of link */
 706         dev_t            me_dev;        /* dev_t - used for kernel PUNLINK */
 707 };
 708 
 709 /*
 710  * Queue info
 711  *
 712  * The syncq is included here to reduce memory fragmentation
 713  * for kernel memory allocators that only allocate in sizes that are
 714  * powers of two. If the kernel memory allocator changes this should
 715  * be revisited.
 716  */
 717 typedef struct queinfo {
 718         struct queue    qu_rqueue;      /* read queue - must be first */
 719         struct queue    qu_wqueue;      /* write queue - must be second */
 720         struct syncq    qu_syncq;       /* syncq - must be third */
 721 } queinfo_t;
 722 
 723 /*
 724  * Multiplexed streams info
 725  */
 726 typedef struct linkinfo {
 727         struct linkblk  li_lblk;        /* must be first */
 728         struct file     *li_fpdown;     /* file pointer for lower stream */
 729         struct linkinfo *li_next;       /* next in list */
 730         struct linkinfo *li_prev;       /* previous in list */
 731 } linkinfo_t;
 732 
 733 /*
 734  * List of syncq's used by freeezestr/unfreezestr
 735  */
 736 typedef struct syncql {
 737         struct syncql   *sql_next;
 738         syncq_t         *sql_sq;
 739 } syncql_t;
 740 
 741 typedef struct sqlist {
 742         syncql_t        *sqlist_head;
 743         size_t          sqlist_size;            /* structure size in bytes */
 744         size_t          sqlist_index;           /* next free entry in array */
 745         syncql_t        sqlist_array[4];        /* 4 or more entries */
 746 } sqlist_t;
 747 
 748 typedef struct perdm {
 749         struct perdm            *dm_next;
 750         syncq_t                 *dm_sq;
 751         struct streamtab        *dm_str;
 752         uint_t                  dm_ref;
 753 } perdm_t;
 754 
 755 #define NEED_DM(dmp, qflag) \
 756         (dmp == NULL && (qflag & (QPERMOD | QMTOUTPERIM)))
 757 
 758 /*
 759  * fmodsw_impl_t is used within the kernel. fmodsw is used by
 760  * the modules/drivers. The information is copied from fmodsw
 761  * defined in the module/driver into the fmodsw_impl_t structure
 762  * during the module/driver initialization.
 763  */
 764 typedef struct fmodsw_impl      fmodsw_impl_t;
 765 
 766 struct fmodsw_impl {
 767         fmodsw_impl_t           *f_next;
 768         char                    f_name[FMNAMESZ + 1];
 769         struct streamtab        *f_str;
 770         uint32_t                f_qflag;
 771         uint32_t                f_sqtype;
 772         perdm_t                 *f_dmp;
 773         uint32_t                f_ref;
 774         uint32_t                f_hits;
 775 };
 776 
 777 typedef enum {
 778         FMODSW_HOLD =   0x00000001,
 779         FMODSW_LOAD =   0x00000002
 780 } fmodsw_flags_t;
 781 
 782 typedef struct cdevsw_impl {
 783         struct streamtab        *d_str;
 784         uint32_t                d_qflag;
 785         uint32_t                d_sqtype;
 786         perdm_t                 *d_dmp;
 787 } cdevsw_impl_t;
 788 
 789 /*
 790  * Enumeration of the types of access that can be requested for a
 791  * controlling terminal under job control.
 792  */
 793 enum jcaccess {
 794         JCREAD,                 /* read data on a ctty */
 795         JCWRITE,                /* write data to a ctty */
 796         JCSETP,                 /* set ctty parameters */
 797         JCGETP                  /* get ctty parameters */
 798 };
 799 
 800 struct str_stack {
 801         netstack_t      *ss_netstack;   /* Common netstack */
 802 
 803         kmutex_t        ss_sad_lock;    /* autopush lock */
 804         mod_hash_t      *ss_sad_hash;
 805         size_t          ss_sad_hash_nchains;
 806         struct saddev   *ss_saddev;     /* sad device array */
 807         int             ss_sadcnt;      /* number of sad devices */
 808 
 809         int             ss_devcnt;      /* number of mux_nodes */
 810         struct mux_node *ss_mux_nodes;  /* mux info for cycle checking */
 811 };
 812 typedef struct str_stack str_stack_t;
 813 
 814 /*
 815  * Finding related queues
 816  */
 817 #define STREAM(q)       ((q)->q_stream)
 818 #define SQ(rq)          ((syncq_t *)((rq) + 2))
 819 
 820 /*
 821  * Get the module/driver name for a queue.  Since some queues don't have
 822  * q_info structures (e.g., see log_makeq()), fall back to "?".
 823  */
 824 #define Q2NAME(q) \
 825         (((q)->q_qinfo != NULL && (q)->q_qinfo->qi_minfo->mi_idname != NULL) ? \
 826         (q)->q_qinfo->qi_minfo->mi_idname : "?")
 827 
 828 /*
 829  * Locking macros
 830  */
 831 #define QLOCK(q)        (&(q)->q_lock)
 832 #define SQLOCK(sq)      (&(sq)->sq_lock)
 833 
 834 #define STREAM_PUTLOCKS_ENTER(stp) {                                           \
 835                 ASSERT(MUTEX_HELD(&(stp)->sd_lock));                            \
 836                 if ((stp)->sd_ciputctrl != NULL) {                          \
 837                         int i;                                                 \
 838                         int nlocks = (stp)->sd_nciputctrl;                  \
 839                         ciputctrl_t *cip = (stp)->sd_ciputctrl;                     \
 840                         for (i = 0; i <= nlocks; i++) {                             \
 841                                 mutex_enter(&cip[i].ciputctrl_lock);               \
 842                         }                                                      \
 843                 }                                                              \
 844         }
 845 
 846 #define STREAM_PUTLOCKS_EXIT(stp) {                                            \
 847                 ASSERT(MUTEX_HELD(&(stp)->sd_lock));                            \
 848                 if ((stp)->sd_ciputctrl != NULL) {                          \
 849                         int i;                                                 \
 850                         int nlocks = (stp)->sd_nciputctrl;                  \
 851                         ciputctrl_t *cip = (stp)->sd_ciputctrl;                     \
 852                         for (i = 0; i <= nlocks; i++) {                             \
 853                                 mutex_exit(&cip[i].ciputctrl_lock);        \
 854                         }                                                      \
 855                 }                                                              \
 856         }
 857 
 858 #define SQ_PUTLOCKS_ENTER(sq) {                                                \
 859                 ASSERT(MUTEX_HELD(SQLOCK(sq)));                                \
 860                 if ((sq)->sq_ciputctrl != NULL) {                           \
 861                         int i;                                                 \
 862                         int nlocks = (sq)->sq_nciputctrl;                   \
 863                         ciputctrl_t *cip = (sq)->sq_ciputctrl;                      \
 864                         ASSERT((sq)->sq_type & SQ_CIPUT);                       \
 865                         for (i = 0; i <= nlocks; i++) {                             \
 866                                 mutex_enter(&cip[i].ciputctrl_lock);               \
 867                         }                                                      \
 868                 }                                                              \
 869         }
 870 
 871 #define SQ_PUTLOCKS_EXIT(sq) {                                                 \
 872                 ASSERT(MUTEX_HELD(SQLOCK(sq)));                                \
 873                 if ((sq)->sq_ciputctrl != NULL) {                           \
 874                         int i;                                                 \
 875                         int nlocks = (sq)->sq_nciputctrl;                   \
 876                         ciputctrl_t *cip = (sq)->sq_ciputctrl;                      \
 877                         ASSERT((sq)->sq_type & SQ_CIPUT);                       \
 878                         for (i = 0; i <= nlocks; i++) {                             \
 879                                 mutex_exit(&cip[i].ciputctrl_lock);        \
 880                         }                                                      \
 881                 }                                                              \
 882         }
 883 
 884 #define SQ_PUTCOUNT_SETFAST(sq) {                                       \
 885                 ASSERT(MUTEX_HELD(SQLOCK(sq)));                         \
 886                 if ((sq)->sq_ciputctrl != NULL) {                    \
 887                         int i;                                          \
 888                         int nlocks = (sq)->sq_nciputctrl;            \
 889                         ciputctrl_t *cip = (sq)->sq_ciputctrl;               \
 890                         ASSERT((sq)->sq_type & SQ_CIPUT);                \
 891                         for (i = 0; i <= nlocks; i++) {                      \
 892                                 mutex_enter(&cip[i].ciputctrl_lock);        \
 893                                 cip[i].ciputctrl_count |= SQ_FASTPUT;   \
 894                                 mutex_exit(&cip[i].ciputctrl_lock); \
 895                         }                                               \
 896                 }                                                       \
 897         }
 898 
 899 #define SQ_PUTCOUNT_CLRFAST(sq) {                                       \
 900                 ASSERT(MUTEX_HELD(SQLOCK(sq)));                         \
 901                 if ((sq)->sq_ciputctrl != NULL) {                    \
 902                         int i;                                          \
 903                         int nlocks = (sq)->sq_nciputctrl;            \
 904                         ciputctrl_t *cip = (sq)->sq_ciputctrl;               \
 905                         ASSERT((sq)->sq_type & SQ_CIPUT);                \
 906                         for (i = 0; i <= nlocks; i++) {                      \
 907                                 mutex_enter(&cip[i].ciputctrl_lock);        \
 908                                 cip[i].ciputctrl_count &= ~SQ_FASTPUT;      \
 909                                 mutex_exit(&cip[i].ciputctrl_lock); \
 910                         }                                               \
 911                 }                                                       \
 912         }
 913 
 914 
 915 #ifdef  DEBUG
 916 
 917 #define SQ_PUTLOCKS_HELD(sq) {                                                 \
 918                 ASSERT(MUTEX_HELD(SQLOCK(sq)));                                \
 919                 if ((sq)->sq_ciputctrl != NULL) {                           \
 920                         int i;                                                 \
 921                         int nlocks = (sq)->sq_nciputctrl;                   \
 922                         ciputctrl_t *cip = (sq)->sq_ciputctrl;                      \
 923                         ASSERT((sq)->sq_type & SQ_CIPUT);                       \
 924                         for (i = 0; i <= nlocks; i++) {                             \
 925                                 ASSERT(MUTEX_HELD(&cip[i].ciputctrl_lock));    \
 926                         }                                                      \
 927                 }                                                              \
 928         }
 929 
 930 #define SUMCHECK_SQ_PUTCOUNTS(sq, countcheck) {                                \
 931                 if ((sq)->sq_ciputctrl != NULL) {                           \
 932                         int i;                                                 \
 933                         uint_t count = 0;                                      \
 934                         int ncounts = (sq)->sq_nciputctrl;                  \
 935                         ASSERT((sq)->sq_type & SQ_CIPUT);                       \
 936                         for (i = 0; i <= ncounts; i++) {                    \
 937                                 count +=                                       \
 938                                     (((sq)->sq_ciputctrl[i].ciputctrl_count) & \
 939                                     SQ_FASTMASK);                              \
 940                         }                                                      \
 941                         ASSERT(count == (countcheck));                         \
 942                 }                                                              \
 943         }
 944 
 945 #define SUMCHECK_CIPUTCTRL_COUNTS(ciput, nciput, countcheck) {                 \
 946                 int i;                                                         \
 947                 uint_t count = 0;                                              \
 948                 ASSERT((ciput) != NULL);                                       \
 949                 for (i = 0; i <= (nciput); i++) {                           \
 950                         count += (((ciput)[i].ciputctrl_count) &           \
 951                             SQ_FASTMASK);                                      \
 952                 }                                                              \
 953                 ASSERT(count == (countcheck));                                 \
 954         }
 955 
 956 #else   /* DEBUG */
 957 
 958 #define SQ_PUTLOCKS_HELD(sq)
 959 #define SUMCHECK_SQ_PUTCOUNTS(sq, countcheck)
 960 #define SUMCHECK_CIPUTCTRL_COUNTS(sq, nciput, countcheck)
 961 
 962 #endif  /* DEBUG */
 963 
 964 #define SUM_SQ_PUTCOUNTS(sq, count) {                                          \
 965                 if ((sq)->sq_ciputctrl != NULL) {                           \
 966                         int i;                                                 \
 967                         int ncounts = (sq)->sq_nciputctrl;                  \
 968                         ciputctrl_t *cip = (sq)->sq_ciputctrl;                      \
 969                         ASSERT((sq)->sq_type & SQ_CIPUT);                       \
 970                         for (i = 0; i <= ncounts; i++) {                    \
 971                                 (count) += ((cip[i].ciputctrl_count) &             \
 972                                     SQ_FASTMASK);                              \
 973                         }                                                      \
 974                 }                                                              \
 975         }
 976 
 977 #define CLAIM_QNEXT_LOCK(stp)   mutex_enter(&(stp)->sd_lock)
 978 #define RELEASE_QNEXT_LOCK(stp) mutex_exit(&(stp)->sd_lock)
 979 
 980 /*
 981  * syncq message manipulation macros.
 982  */
 983 /*
 984  * Put a message on the queue syncq.
 985  * Assumes QLOCK held.
 986  */
 987 #define SQPUT_MP(qp, mp)                                                \
 988         {                                                               \
 989                 qp->q_syncqmsgs++;                                   \
 990                 if (qp->q_sqhead == NULL) {                          \
 991                         qp->q_sqhead = qp->q_sqtail = mp;         \
 992                 } else {                                                \
 993                         qp->q_sqtail->b_next = mp;                        \
 994                         qp->q_sqtail = mp;                           \
 995                 }                                                       \
 996                 set_qfull(qp);                                          \
 997         }
 998 
 999 /*
1000  * Miscellaneous parameters and flags.
1001  */
1002 
1003 /*
1004  * Default timeout in milliseconds for ioctls and close
1005  */
1006 #define STRTIMOUT 15000
1007 
1008 /*
1009  * Flag values for stream io
1010  */
1011 #define WRITEWAIT       0x1     /* waiting for write event */
1012 #define READWAIT        0x2     /* waiting for read event */
1013 #define NOINTR          0x4     /* error is not to be set for signal */
1014 #define GETWAIT         0x8     /* waiting for getmsg event */
1015 
1016 /*
1017  * These flags need to be unique for stream io name space
1018  * and copy modes name space.  These flags allow strwaitq
1019  * and strdoioctl to proceed as if signals or errors on the stream
1020  * head have not occurred; i.e. they will be detected by some other
1021  * means.
1022  * STR_NOSIG does not allow signals to interrupt the call
1023  * STR_NOERROR does not allow stream head read, write or hup errors to
1024  * affect the call.  When used with strdoioctl(), if a previous ioctl
1025  * is pending and times out, STR_NOERROR will cause strdoioctl() to not
1026  * return ETIME. If, however, the requested ioctl times out, ETIME
1027  * will be returned (use ic_timout instead)
1028  * STR_PEEK is used to inform strwaitq that the reader is peeking at data
1029  * and that a non-persistent error should not be cleared.
1030  * STR_DELAYERR is used to inform strwaitq that it should not check errors
1031  * after being awoken since, in addition to an error, there might also be
1032  * data queued on the stream head read queue.
1033  */
1034 #define STR_NOSIG       0x10    /* Ignore signals during strdoioctl/strwaitq */
1035 #define STR_NOERROR     0x20    /* Ignore errors during strdoioctl/strwaitq */
1036 #define STR_PEEK        0x40    /* Peeking behavior on non-persistent errors */
1037 #define STR_DELAYERR    0x80    /* Do not check errors on return */
1038 
1039 /*
1040  * Copy modes for tty and I_STR ioctls
1041  */
1042 #define U_TO_K  01                      /* User to Kernel */
1043 #define K_TO_K  02                      /* Kernel to Kernel */
1044 
1045 /*
1046  * Mux defines.
1047  */
1048 #define LINKNORMAL      0x01            /* normal mux link */
1049 #define LINKPERSIST     0x02            /* persistent mux link */
1050 #define LINKTYPEMASK    0x03            /* bitmask of all link types */
1051 #define LINKCLOSE       0x04            /* unlink from strclose */
1052 
1053 /*
1054  * Definitions of Streams macros and function interfaces.
1055  */
1056 
1057 /*
1058  * Obsolete queue scheduling macros. They are not used anymore, but still kept
1059  * here for 3-d party modules and drivers who might still use them.
1060  */
1061 #define setqsched()
1062 #define qready()        1
1063 
1064 #ifdef _KERNEL
1065 #define runqueues()
1066 #define queuerun()
1067 #endif
1068 
1069 /* compatibility module for style 2 drivers with DR race condition */
1070 #define DRMODNAME       "drcompat"
1071 
1072 /*
1073  * Macros dealing with mux_nodes.
1074  */
1075 #define MUX_VISIT(X)    ((X)->mn_flags |= VISITED)
1076 #define MUX_CLEAR(X)    ((X)->mn_flags &= (~VISITED)); \
1077                         ((X)->mn_originp = NULL)
1078 #define MUX_DIDVISIT(X) ((X)->mn_flags & VISITED)
1079 
1080 
1081 /*
1082  * Twisted stream macros
1083  */
1084 #define STRMATED(X)     ((X)->sd_flag & STRMATE)
1085 #define STRLOCKMATES(X) if (&((X)->sd_lock) > &(((X)->sd_mate)->sd_lock)) { \
1086                                 mutex_enter(&((X)->sd_lock)); \
1087                                 mutex_enter(&(((X)->sd_mate)->sd_lock));  \
1088                         } else {  \
1089                                 mutex_enter(&(((X)->sd_mate)->sd_lock)); \
1090                                 mutex_enter(&((X)->sd_lock)); \
1091                         }
1092 #define STRUNLOCKMATES(X)       mutex_exit(&((X)->sd_lock)); \
1093                         mutex_exit(&(((X)->sd_mate)->sd_lock))
1094 
1095 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
1096 
1097 extern void strinit(void);
1098 extern int strdoioctl(struct stdata *, struct strioctl *, int, int,
1099     cred_t *, int *);
1100 extern void strsendsig(struct strsig *, int, uchar_t, int);
1101 extern void str_sendsig(vnode_t *, int, uchar_t, int);
1102 extern void strhup(struct stdata *);
1103 extern int qattach(queue_t *, dev_t *, int, cred_t *, fmodsw_impl_t *,
1104     boolean_t);
1105 extern int qreopen(queue_t *, dev_t *, int, cred_t *);
1106 extern void qdetach(queue_t *, int, int, cred_t *, boolean_t);
1107 extern void enterq(queue_t *);
1108 extern void leaveq(queue_t *);
1109 extern int putiocd(mblk_t *, caddr_t, int, cred_t *);
1110 extern int getiocd(mblk_t *, caddr_t, int);
1111 extern struct linkinfo *alloclink(queue_t *, queue_t *, struct file *);
1112 extern void lbfree(struct linkinfo *);
1113 extern int linkcycle(stdata_t *, stdata_t *, str_stack_t *);
1114 extern struct linkinfo *findlinks(stdata_t *, int, int, str_stack_t *);
1115 extern queue_t *getendq(queue_t *);
1116 extern int mlink(vnode_t *, int, int, cred_t *, int *, int);
1117 extern int mlink_file(vnode_t *, int, struct file *, cred_t *, int *, int);
1118 extern int munlink(struct stdata *, struct linkinfo *, int, cred_t *, int *,
1119     str_stack_t *);
1120 extern int munlinkall(struct stdata *, int, cred_t *, int *, str_stack_t *);
1121 extern void mux_addedge(stdata_t *, stdata_t *, int, str_stack_t *);
1122 extern void mux_rmvedge(stdata_t *, int, str_stack_t *);
1123 extern int devflg_to_qflag(struct streamtab *, uint32_t, uint32_t *,
1124     uint32_t *);
1125 extern void setq(queue_t *, struct qinit *, struct qinit *, perdm_t *,
1126     uint32_t, uint32_t, boolean_t);
1127 extern perdm_t *hold_dm(struct streamtab *, uint32_t, uint32_t);
1128 extern void rele_dm(perdm_t *);
1129 extern int strmakectl(struct strbuf *, int32_t, int32_t, mblk_t **);
1130 extern int strmakedata(ssize_t *, struct uio *, stdata_t *, int32_t, mblk_t **);
1131 extern int strmakemsg(struct strbuf *, ssize_t *, struct uio *,
1132     struct stdata *, int32_t, mblk_t **);
1133 extern int strgetmsg(vnode_t *, struct strbuf *, struct strbuf *, uchar_t *,
1134     int *, int, rval_t *);
1135 extern int strputmsg(vnode_t *, struct strbuf *, struct strbuf *, uchar_t,
1136     int flag, int fmode);
1137 extern int strstartplumb(struct stdata *, int, int);
1138 extern void strendplumb(struct stdata *);
1139 extern int stropen(struct vnode *, dev_t *, int, cred_t *);
1140 extern int strclose(struct vnode *, int, cred_t *);
1141 extern int strpoll(register struct stdata *, short, int, short *,
1142     struct pollhead **);
1143 extern void strclean(struct vnode *);
1144 extern void str_cn_clean();     /* XXX hook for consoles signal cleanup */
1145 extern int strwrite(struct vnode *, struct uio *, cred_t *);
1146 extern int strwrite_common(struct vnode *, struct uio *, cred_t *, int);
1147 extern int strread(struct vnode *, struct uio *, cred_t *);
1148 extern int strioctl(struct vnode *, int, intptr_t, int, int, cred_t *, int *);
1149 extern int strrput(queue_t *, mblk_t *);
1150 extern int strrput_nondata(queue_t *, mblk_t *);
1151 extern mblk_t *strrput_proto(vnode_t *, mblk_t *,
1152     strwakeup_t *, strsigset_t *, strsigset_t *, strpollset_t *);
1153 extern mblk_t *strrput_misc(vnode_t *, mblk_t *,
1154     strwakeup_t *, strsigset_t *, strsigset_t *, strpollset_t *);
1155 extern int getiocseqno(void);
1156 extern int strwaitbuf(size_t, int);
1157 extern int strwaitq(stdata_t *, int, ssize_t, int, clock_t, int *);
1158 extern struct stdata *shalloc(queue_t *);
1159 extern void shfree(struct stdata *s);
1160 extern queue_t *allocq(void);
1161 extern void freeq(queue_t *);
1162 extern qband_t *allocband(void);
1163 extern void freeband(qband_t *);
1164 extern void freebs_enqueue(mblk_t *, dblk_t *);
1165 extern void setqback(queue_t *, unsigned char);
1166 extern int strcopyin(void *, void *, size_t, int);
1167 extern int strcopyout(void *, void *, size_t, int);
1168 extern void strsignal(struct stdata *, int, int32_t);
1169 extern clock_t str_cv_wait(kcondvar_t *, kmutex_t *, clock_t, int);
1170 extern void disable_svc(queue_t *);
1171 extern void enable_svc(queue_t *);
1172 extern void remove_runlist(queue_t *);
1173 extern void wait_svc(queue_t *);
1174 extern void backenable(queue_t *, uchar_t);
1175 extern void set_qend(queue_t *);
1176 extern int strgeterr(stdata_t *, int32_t, int);
1177 extern void qenable_locked(queue_t *);
1178 extern mblk_t *getq_noenab(queue_t *, ssize_t);
1179 extern void rmvq_noenab(queue_t *, mblk_t *);
1180 extern void qbackenable(queue_t *, uchar_t);
1181 extern void set_qfull(queue_t *);
1182 
1183 extern void strblock(queue_t *);
1184 extern void strunblock(queue_t *);
1185 extern int qclaimed(queue_t *);
1186 extern int straccess(struct stdata *, enum jcaccess);
1187 
1188 extern void entersq(syncq_t *, int);
1189 extern void leavesq(syncq_t *, int);
1190 extern void claimq(queue_t *);
1191 extern void releaseq(queue_t *);
1192 extern void claimstr(queue_t *);
1193 extern void releasestr(queue_t *);
1194 extern void removeq(queue_t *);
1195 extern void insertq(struct stdata *, queue_t *);
1196 extern void drain_syncq(syncq_t *);
1197 extern void qfill_syncq(syncq_t *, queue_t *, mblk_t *);
1198 extern void qdrain_syncq(syncq_t *, queue_t *);
1199 extern int flush_syncq(syncq_t *, queue_t *);
1200 extern void wait_sq_svc(syncq_t *);
1201 
1202 extern void outer_enter(syncq_t *, uint16_t);
1203 extern void outer_exit(syncq_t *);
1204 extern void qwriter_inner(queue_t *, mblk_t *, void (*)());
1205 extern void qwriter_outer(queue_t *, mblk_t *, void (*)());
1206 
1207 extern callbparams_t *callbparams_alloc(syncq_t *, void (*)(void *),
1208     void *, int);
1209 extern void callbparams_free(syncq_t *, callbparams_t *);
1210 extern void callbparams_free_id(syncq_t *, callbparams_id_t, int32_t);
1211 extern void qcallbwrapper(void *);
1212 
1213 extern mblk_t *esballoc_wait(unsigned char *, size_t, uint_t, frtn_t *);
1214 extern mblk_t *esballoca(unsigned char *, size_t, uint_t, frtn_t *);
1215 extern mblk_t *desballoca(unsigned char *, size_t, uint_t, frtn_t *);
1216 extern int do_sendfp(struct stdata *, struct file *, struct cred *);
1217 extern int frozenstr(queue_t *);
1218 extern size_t xmsgsize(mblk_t *);
1219 
1220 extern void putnext_tail(syncq_t *, queue_t *, uint32_t);
1221 extern void stream_willservice(stdata_t *);
1222 extern void stream_runservice(stdata_t *);
1223 
1224 extern void strmate(vnode_t *, vnode_t *);
1225 extern queue_t *strvp2wq(vnode_t *);
1226 extern vnode_t *strq2vp(queue_t *);
1227 extern mblk_t *allocb_wait(size_t, uint_t, uint_t, int *);
1228 extern mblk_t *allocb_cred(size_t, cred_t *, pid_t);
1229 extern mblk_t *allocb_cred_wait(size_t, uint_t, int *, cred_t *, pid_t);
1230 extern mblk_t *allocb_tmpl(size_t, const mblk_t *);
1231 extern mblk_t *allocb_tryhard(size_t);
1232 extern void mblk_copycred(mblk_t *, const mblk_t *);
1233 extern void mblk_setcred(mblk_t *, cred_t *, pid_t);
1234 extern cred_t *msg_getcred(const mblk_t *, pid_t *);
1235 extern struct ts_label_s *msg_getlabel(const mblk_t *);
1236 extern cred_t *msg_extractcred(mblk_t *, pid_t *);
1237 extern void strpollwakeup(vnode_t *, short);
1238 extern int putnextctl_wait(queue_t *, int);
1239 
1240 extern int kstrputmsg(struct vnode *, mblk_t *, struct uio *, ssize_t,
1241     unsigned char, int, int);
1242 extern int kstrgetmsg(struct vnode *, mblk_t **, struct uio *,
1243     unsigned char *, int *, clock_t, rval_t *);
1244 
1245 extern void strsetrerror(vnode_t *, int, int, errfunc_t);
1246 extern void strsetwerror(vnode_t *, int, int, errfunc_t);
1247 extern void strseteof(vnode_t *, int);
1248 extern void strflushrq(vnode_t *, int);
1249 extern void strsetrputhooks(vnode_t *, uint_t, msgfunc_t, msgfunc_t);
1250 extern void strsetwputhooks(vnode_t *, uint_t, clock_t);
1251 extern void strsetrwputdatahooks(vnode_t *, msgfunc_t, msgfunc_t);
1252 extern int strwaitmark(vnode_t *);
1253 extern void strsignal_nolock(stdata_t *, int, uchar_t);
1254 
1255 struct multidata_s;
1256 struct pdesc_s;
1257 extern int hcksum_assoc(mblk_t *, struct multidata_s *, struct pdesc_s  *,
1258     uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, int);
1259 extern void hcksum_retrieve(mblk_t *, struct multidata_s *, struct pdesc_s *,
1260     uint32_t *, uint32_t *, uint32_t *, uint32_t *, uint32_t *);
1261 extern void lso_info_set(mblk_t *, uint32_t, uint32_t);
1262 extern void lso_info_cleanup(mblk_t *);
1263 extern unsigned int bcksum(uchar_t *, int, unsigned int);
1264 extern boolean_t is_vmloaned_mblk(mblk_t *, struct multidata_s *,
1265     struct pdesc_s *);
1266 
1267 extern int fmodsw_register(const char *, struct streamtab *, int);
1268 extern int fmodsw_unregister(const char *);
1269 extern fmodsw_impl_t *fmodsw_find(const char *, fmodsw_flags_t);
1270 extern void fmodsw_rele(fmodsw_impl_t *);
1271 
1272 extern void freemsgchain(mblk_t *);
1273 extern mblk_t *copymsgchain(mblk_t *);
1274 
1275 extern mblk_t *mcopyinuio(struct stdata *, uio_t *, ssize_t, ssize_t, int *);
1276 
1277 /*
1278  * shared or externally configured data structures
1279  */
1280 extern ssize_t strmsgsz;                /* maximum stream message size */
1281 extern ssize_t strctlsz;                /* maximum size of ctl message */
1282 extern int nstrpush;                    /* maximum number of pushes allowed */
1283 
1284 /*
1285  * Bufcalls related variables.
1286  */
1287 extern struct bclist strbcalls;         /* List of bufcalls */
1288 extern kmutex_t strbcall_lock;          /* Protects the list of bufcalls */
1289 extern kcondvar_t strbcall_cv;          /* Signaling when a bufcall is added */
1290 extern kcondvar_t bcall_cv;     /* wait of executing bufcall completes */
1291 
1292 extern frtn_t frnop;
1293 
1294 extern struct kmem_cache *ciputctrl_cache;
1295 extern int n_ciputctrl;
1296 extern int max_n_ciputctrl;
1297 extern int min_n_ciputctrl;
1298 
1299 extern cdevsw_impl_t *devimpl;
1300 
1301 /*
1302  * esballoc queue for throttling
1303  */
1304 typedef struct esb_queue {
1305         kmutex_t        eq_lock;
1306         uint_t          eq_len;         /* number of queued messages */
1307         mblk_t          *eq_head;       /* head of queue */
1308         mblk_t          *eq_tail;       /* tail of queue */
1309         uint_t          eq_flags;       /* esballoc queue flags */
1310 } esb_queue_t;
1311 
1312 /*
1313  * esballoc flags for queue processing.
1314  */
1315 #define ESBQ_PROCESSING 0x01    /* queue is being processed */
1316 #define ESBQ_TIMER      0x02    /* timer is active */
1317 
1318 extern void esballoc_queue_init(void);
1319 
1320 #endif  /* _KERNEL */
1321 
1322 /*
1323  * Note: Use of these macros are restricted to kernel/unix and
1324  * intended for the STREAMS framework.
1325  * All modules/drivers should include sys/ddi.h.
1326  *
1327  * Finding related queues
1328  */
1329 #define         _OTHERQ(q)      ((q)->q_flag&QREADR? (q)+1: (q)-1)
1330 #define         _WR(q)          ((q)->q_flag&QREADR? (q)+1: (q))
1331 #define         _RD(q)          ((q)->q_flag&QREADR? (q): (q)-1)
1332 #define         _SAMESTR(q)     (!((q)->q_flag & QEND))
1333 
1334 /*
1335  * These are also declared here for modules/drivers that erroneously
1336  * include strsubr.h after ddi.h or fail to include ddi.h at all.
1337  */
1338 extern struct queue *OTHERQ(queue_t *); /* stream.h */
1339 extern struct queue *RD(queue_t *);
1340 extern struct queue *WR(queue_t *);
1341 extern int SAMESTR(queue_t *);
1342 
1343 /*
1344  * The following hardware checksum related macros are private
1345  * interfaces that are subject to change without notice.
1346  */
1347 #ifdef _KERNEL
1348 #define DB_CKSUMSTART(mp)       ((mp)->b_datap->db_cksumstart)
1349 #define DB_CKSUMEND(mp)         ((mp)->b_datap->db_cksumend)
1350 #define DB_CKSUMSTUFF(mp)       ((mp)->b_datap->db_cksumstuff)
1351 #define DB_CKSUMFLAGS(mp)       ((mp)->b_datap->db_struioun.cksum.flags)
1352 #define DB_CKSUM16(mp)          ((mp)->b_datap->db_cksum16)
1353 #define DB_CKSUM32(mp)          ((mp)->b_datap->db_cksum32)
1354 #define DB_LSOFLAGS(mp)         ((mp)->b_datap->db_struioun.cksum.flags)
1355 #define DB_LSOMSS(mp)           ((mp)->b_datap->db_struioun.cksum.pad)
1356 #endif  /* _KERNEL */
1357 
1358 #ifdef  __cplusplus
1359 }
1360 #endif
1361 
1362 
1363 #endif  /* _SYS_STRSUBR_H */