1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  27 /*        All Rights Reserved   */
  28 
  29 /*
  30  * University Copyright- Copyright (c) 1982, 1986, 1988
  31  * The Regents of the University of California
  32  * All Rights Reserved
  33  *
  34  * University Acknowledgment- Portions of this document are derived from
  35  * software developed by the University of California, Berkeley, and its
  36  * contributors.
  37  */
  38 
  39 #ifndef _SYS_SOCKETVAR_H
  40 #define _SYS_SOCKETVAR_H
  41 
  42 #include <sys/types.h>
  43 #include <sys/stream.h>
  44 #include <sys/t_lock.h>
  45 #include <sys/cred.h>
  46 #include <sys/vnode.h>
  47 #include <sys/file.h>
  48 #include <sys/param.h>
  49 #include <sys/zone.h>
  50 #include <sys/sdt.h>
  51 #include <sys/modctl.h>
  52 #include <sys/atomic.h>
  53 #include <sys/socket.h>
  54 #include <sys/ksocket.h>
  55 #include <sys/kstat.h>
  56 
  57 #ifdef _KERNEL
  58 #include <sys/vfs_opreg.h>
  59 #endif
  60 
  61 #ifdef  __cplusplus
  62 extern "C" {
  63 #endif
  64 
  65 /*
  66  * Internal representation of the address used to represent addresses
  67  * in the loopback transport for AF_UNIX. While the sockaddr_un is used
  68  * as the sockfs layer address for AF_UNIX the pathnames contained in
  69  * these addresses are not unique (due to relative pathnames) thus can not
  70  * be used in the transport.
  71  *
  72  * The transport level address consists of a magic number (used to separate the
  73  * name space for specific and implicit binds). For a specific bind
  74  * this is followed by a "vnode *" which ensures that all specific binds
  75  * have a unique transport level address. For implicit binds the latter
  76  * part of the address is a byte string (of the same length as a pointer)
  77  * that is assigned by the loopback transport.
  78  *
  79  * The uniqueness assumes that the loopback transport has a separate namespace
  80  * for sockets in order to avoid name conflicts with e.g. TLI use of the
  81  * same transport.
  82  */
  83 struct so_ux_addr {
  84         void    *soua_vp;       /* vnode pointer or assigned by tl */
  85         uint_t  soua_magic;     /* See below */
  86 };
  87 
  88 #define SOU_MAGIC_EXPLICIT      0x75787670      /* "uxvp" */
  89 #define SOU_MAGIC_IMPLICIT      0x616e6f6e      /* "anon" */
  90 
  91 struct sockaddr_ux {
  92         sa_family_t             sou_family;     /* AF_UNIX */
  93         struct so_ux_addr       sou_addr;
  94 };
  95 
  96 #if defined(_KERNEL) || defined(_KMEMUSER)
  97 
  98 #include <sys/socket_proto.h>
  99 
 100 typedef struct sonodeops sonodeops_t;
 101 typedef struct sonode sonode_t;
 102 
 103 struct sodirect_s;
 104 
 105 /*
 106  * The sonode represents a socket. A sonode never exist in the file system
 107  * name space and can not be opened using open() - only the socket, socketpair
 108  * and accept calls create sonodes.
 109  *
 110  * The locking of sockfs uses the so_lock mutex plus the SOLOCKED and
 111  * SOREADLOCKED flags in so_flag. The mutex protects all the state in the
 112  * sonode. It is expected that the underlying transport protocol serializes
 113  * socket operations, so sockfs will not normally not single-thread
 114  * operations. However, certain sockets, including TPI based ones, can only
 115  * handle one control operation at a time. The SOLOCKED flag is used to
 116  * single-thread operations from sockfs users to prevent e.g. multiple bind()
 117  * calls to operate on the same sonode concurrently. The SOREADLOCKED flag is
 118  * used to ensure that only one thread sleeps in kstrgetmsg for a given
 119  * sonode. This is needed to ensure atomic operation for things like
 120  * MSG_WAITALL.
 121  *
 122  * The so_fallback_rwlock is used to ensure that for sockets that can
 123  * fall back to TPI, the fallback is not initiated until all pending
 124  * operations have completed.
 125  *
 126  * Note that so_lock is sometimes held across calls that might go to sleep
 127  * (kmem_alloc and soallocproto*). This implies that no other lock in
 128  * the system should be held when calling into sockfs; from the system call
 129  * side or from strrput (in case of TPI based sockets). If locks are held
 130  * while calling into sockfs the system might hang when running low on memory.
 131  */
 132 struct sonode {
 133         struct  vnode   *so_vnode;      /* vnode associated with this sonode */
 134 
 135         sonodeops_t     *so_ops;        /* operations vector for this sonode */
 136         void            *so_priv;       /* sonode private data */
 137 
 138         krwlock_t       so_fallback_rwlock;
 139         kmutex_t        so_lock;        /* protects sonode fields */
 140 
 141         kcondvar_t      so_state_cv;    /* synchronize state changes */
 142         kcondvar_t      so_single_cv;   /* wait due to SOLOCKED */
 143         kcondvar_t      so_read_cv;     /* wait due to SOREADLOCKED */
 144 
 145         /* These fields are protected by so_lock */
 146 
 147         uint_t          so_state;       /* internal state flags SS_*, below */
 148         uint_t          so_mode;        /* characteristics on socket. SM_* */
 149         ushort_t        so_flag;        /* flags, see below */
 150         int             so_count;       /* count of opened references */
 151 
 152         sock_connid_t   so_proto_connid; /* protocol generation number */
 153 
 154         ushort_t        so_error;       /* error affecting connection */
 155 
 156         struct sockparams *so_sockparams;       /* vnode or socket module */
 157         /* Needed to recreate the same socket for accept */
 158         short   so_family;
 159         short   so_type;
 160         short   so_protocol;
 161         short   so_version;             /* From so_socket call */
 162 
 163         /* Accept queue */
 164         kmutex_t        so_acceptq_lock;        /* protects accept queue */
 165         list_t          so_acceptq_list;        /* pending conns */
 166         list_t          so_acceptq_defer;       /* deferred conns */
 167         list_node_t     so_acceptq_node;        /* acceptq list node */
 168         unsigned int    so_acceptq_len;         /* # of conns (both lists) */
 169         unsigned int    so_backlog;             /* Listen backlog */
 170         kcondvar_t      so_acceptq_cv;          /* wait for new conn. */
 171         struct sonode   *so_listener;           /* parent socket */
 172 
 173         /* Options */
 174         short   so_options;             /* From socket call, see socket.h */
 175         struct linger   so_linger;      /* SO_LINGER value */
 176 #define so_sndbuf       so_proto_props.sopp_txhiwat     /* SO_SNDBUF value */
 177 #define so_sndlowat     so_proto_props.sopp_txlowat     /* tx low water mark */
 178 #define so_rcvbuf       so_proto_props.sopp_rxhiwat     /* SO_RCVBUF value */
 179 #define so_rcvlowat     so_proto_props.sopp_rxlowat     /* rx low water mark */
 180 #define so_max_addr_len so_proto_props.sopp_maxaddrlen
 181 #define so_minpsz       so_proto_props.sopp_minpsz
 182 #define so_maxpsz       so_proto_props.sopp_maxpsz
 183 
 184         int     so_xpg_rcvbuf;          /* SO_RCVBUF value for XPG4 socket */
 185         clock_t so_sndtimeo;            /* send timeout */
 186         clock_t so_rcvtimeo;            /* recv timeout */
 187 
 188         mblk_t  *so_oobmsg;             /* outofline oob data */
 189         ssize_t so_oobmark;             /* offset of the oob data */
 190 
 191         pid_t   so_pgrp;                /* pgrp for signals */
 192 
 193         cred_t          *so_peercred;   /* connected socket peer cred */
 194         pid_t           so_cpid;        /* connected socket peer cached pid */
 195         zoneid_t        so_zoneid;      /* opener's zoneid */
 196 
 197         struct pollhead so_poll_list;   /* common pollhead */
 198         short           so_pollev;      /* events that should be generated */
 199 
 200         /* Receive */
 201         unsigned int    so_rcv_queued;  /* # bytes on both rcv lists */
 202         mblk_t          *so_rcv_q_head; /* processing/copyout rcv queue */
 203         mblk_t          *so_rcv_q_last_head;
 204         mblk_t          *so_rcv_head;   /* protocol prequeue */
 205         mblk_t          *so_rcv_last_head;      /* last mblk in b_next chain */
 206         kcondvar_t      so_rcv_cv;      /* wait for data */
 207         uint_t          so_rcv_wanted;  /* # of bytes wanted by app */
 208         timeout_id_t    so_rcv_timer_tid;
 209 
 210 #define so_rcv_thresh   so_proto_props.sopp_rcvthresh
 211 #define so_rcv_timer_interval so_proto_props.sopp_rcvtimer
 212 
 213         kcondvar_t      so_snd_cv;      /* wait for snd buffers */
 214         uint32_t
 215                 so_snd_qfull: 1,        /* Transmit full */
 216                 so_rcv_wakeup: 1,
 217                 so_snd_wakeup: 1,
 218                 so_not_str: 1,  /* B_TRUE if not streams based socket */
 219                 so_pad_to_bit_31: 28;
 220 
 221         /* Communication channel with protocol */
 222         sock_lower_handle_t     so_proto_handle;
 223         sock_downcalls_t        *so_downcalls;
 224 
 225         struct sock_proto_props so_proto_props; /* protocol settings */
 226         boolean_t               so_flowctrld;   /* Flow controlled */
 227         uint_t                  so_copyflag;    /* Copy related flag */
 228         kcondvar_t              so_copy_cv;     /* Copy cond variable */
 229 
 230         /* kernel sockets */
 231         ksocket_callbacks_t     so_ksock_callbacks;
 232         void                    *so_ksock_cb_arg;       /* callback argument */
 233         kcondvar_t              so_closing_cv;
 234 
 235         /* != NULL for sodirect enabled socket */
 236         struct sodirect_s       *so_direct;
 237 
 238         /* socket filters */
 239         uint_t                  so_filter_active;       /* # of active fil */
 240         uint_t                  so_filter_tx;           /* pending tx ops */
 241         struct sof_instance     *so_filter_top;         /* top of stack */
 242         struct sof_instance     *so_filter_bottom;      /* bottom of stack */
 243         clock_t                 so_filter_defertime;    /* time when deferred */
 244 };
 245 
 246 #define SO_HAVE_DATA(so)                                                \
 247         /*                                                              \
 248          * For the (tid == 0) case we must check so_rcv_{q_,}head       \
 249          * rather than (so_rcv_queued > 0), since the latter does not        \
 250          * take into account mblks with only control/name information.  \
 251          */                                                             \
 252         ((so)->so_rcv_timer_tid == 0 && ((so)->so_rcv_head != NULL ||     \
 253         (so)->so_rcv_q_head != NULL)) ||                             \
 254         ((so)->so_state & SS_CANTRCVMORE)
 255 
 256 /*
 257  * Events handled by the protocol (in case sd_poll is set)
 258  */
 259 #define SO_PROTO_POLLEV         (POLLIN|POLLRDNORM|POLLRDBAND)
 260 
 261 
 262 #endif /* _KERNEL || _KMEMUSER */
 263 
 264 /* flags */
 265 #define SOMOD           0x0001          /* update socket modification time */
 266 #define SOACC           0x0002          /* update socket access time */
 267 
 268 #define SOLOCKED        0x0010          /* use to serialize open/closes */
 269 #define SOREADLOCKED    0x0020          /* serialize kstrgetmsg calls */
 270 #define SOCLONE         0x0040          /* child of clone driver */
 271 #define SOASYNC_UNBIND  0x0080          /* wait for ACK of async unbind */
 272 
 273 #define SOCK_IS_NONSTR(so)      ((so)->so_not_str)
 274 
 275 /*
 276  * Socket state bits.
 277  */
 278 #define SS_ISCONNECTED          0x00000001 /* socket connected to a peer */
 279 #define SS_ISCONNECTING         0x00000002 /* in process, connecting to peer */
 280 #define SS_ISDISCONNECTING      0x00000004 /* in process of disconnecting */
 281 #define SS_CANTSENDMORE         0x00000008 /* can't send more data to peer */
 282 
 283 #define SS_CANTRCVMORE          0x00000010 /* can't receive more data */
 284 #define SS_ISBOUND              0x00000020 /* socket is bound */
 285 #define SS_NDELAY               0x00000040 /* FNDELAY non-blocking */
 286 #define SS_NONBLOCK             0x00000080 /* O_NONBLOCK non-blocking */
 287 
 288 #define SS_ASYNC                0x00000100 /* async i/o notify */
 289 #define SS_ACCEPTCONN           0x00000200 /* listen done */
 290 /*      unused                  0x00000400 */   /* was SS_HASCONNIND */
 291 #define SS_SAVEDEOR             0x00000800 /* Saved MSG_EOR rcv side state */
 292 
 293 #define SS_RCVATMARK            0x00001000 /* at mark on input */
 294 #define SS_OOBPEND              0x00002000 /* OOB pending or present - poll */
 295 #define SS_HAVEOOBDATA          0x00004000 /* OOB data present */
 296 #define SS_HADOOBDATA           0x00008000 /* OOB data consumed */
 297 #define SS_CLOSING              0x00010000 /* in process of closing */
 298 
 299 #define SS_FIL_DEFER            0x00020000 /* filter deferred notification */
 300 #define SS_FILOP_OK             0x00040000 /* socket can attach filters */
 301 #define SS_FIL_RCV_FLOWCTRL     0x00080000 /* filter asserted rcv flow ctrl */
 302 #define SS_FIL_SND_FLOWCTRL     0x00100000 /* filter asserted snd flow ctrl */
 303 #define SS_FIL_STOP             0x00200000 /* no more filter actions */
 304 
 305 #define SS_SODIRECT             0x00400000 /* transport supports sodirect */
 306 
 307 #define SS_SENTLASTREADSIG      0x01000000 /* last rx signal has been sent */
 308 #define SS_SENTLASTWRITESIG     0x02000000 /* last tx signal has been sent */
 309 
 310 #define SS_FALLBACK_DRAIN       0x20000000 /* data was/is being drained */
 311 #define SS_FALLBACK_PENDING     0x40000000 /* fallback is pending */
 312 #define SS_FALLBACK_COMP        0x80000000 /* fallback has completed */
 313 
 314 
 315 /* Set of states when the socket can't be rebound */
 316 #define SS_CANTREBIND   (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING|\
 317                             SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ACCEPTCONN)
 318 
 319 /*
 320  * Sockets that can fall back to TPI must ensure that fall back is not
 321  * initiated while a thread is using a socket.
 322  */
 323 #define SO_BLOCK_FALLBACK(so, fn)                               \
 324         ASSERT(MUTEX_NOT_HELD(&(so)->so_lock));                  \
 325         rw_enter(&(so)->so_fallback_rwlock, RW_READER);          \
 326         if ((so)->so_state & (SS_FALLBACK_COMP|SS_FILOP_OK)) {   \
 327                 if ((so)->so_state & SS_FALLBACK_COMP) { \
 328                         rw_exit(&(so)->so_fallback_rwlock);      \
 329                         return (fn);                            \
 330                 } else {                                        \
 331                         mutex_enter(&(so)->so_lock);             \
 332                         (so)->so_state &= ~SS_FILOP_OK;          \
 333                         mutex_exit(&(so)->so_lock);              \
 334                 }                                               \
 335         }
 336 
 337 #define SO_UNBLOCK_FALLBACK(so) {                       \
 338         rw_exit(&(so)->so_fallback_rwlock);              \
 339 }
 340 
 341 #define SO_SND_FLOWCTRLD(so)    \
 342         ((so)->so_snd_qfull || (so)->so_state & SS_FIL_SND_FLOWCTRL)
 343 
 344 /* Poll events */
 345 #define SO_POLLEV_IN            0x1     /* POLLIN wakeup needed */
 346 #define SO_POLLEV_ALWAYS        0x2     /* wakeups */
 347 
 348 /*
 349  * Characteristics of sockets. Not changed after the socket is created.
 350  */
 351 #define SM_PRIV                 0x001   /* privileged for broadcast, raw... */
 352 #define SM_ATOMIC               0x002   /* atomic data transmission */
 353 #define SM_ADDR                 0x004   /* addresses given with messages */
 354 #define SM_CONNREQUIRED         0x008   /* connection required by protocol */
 355 
 356 #define SM_FDPASSING            0x010   /* passes file descriptors */
 357 #define SM_EXDATA               0x020   /* Can handle T_EXDATA_REQ */
 358 #define SM_OPTDATA              0x040   /* Can handle T_OPTDATA_REQ */
 359 #define SM_BYTESTREAM           0x080   /* Byte stream - can use M_DATA */
 360 
 361 #define SM_ACCEPTOR_ID          0x100   /* so_acceptor_id is valid */
 362 
 363 #define SM_KERNEL               0x200   /* kernel socket */
 364 
 365 /* The modes below are only for non-streams sockets */
 366 #define SM_ACCEPTSUPP           0x400   /* can handle accept() */
 367 #define SM_SENDFILESUPP         0x800   /* Private: proto supp sendfile  */
 368 
 369 /*
 370  * Socket versions. Used by the socket library when calling _so_socket().
 371  */
 372 #define SOV_STREAM      0       /* Not a socket - just a stream */
 373 #define SOV_DEFAULT     1       /* Select based on so_default_version */
 374 #define SOV_SOCKSTREAM  2       /* Socket plus streams operations */
 375 #define SOV_SOCKBSD     3       /* Socket with no streams operations */
 376 #define SOV_XPG4_2      4       /* Xnet socket */
 377 
 378 #if defined(_KERNEL) || defined(_KMEMUSER)
 379 
 380 /*
 381  * sonode create and destroy functions.
 382  */
 383 typedef struct sonode *(*so_create_func_t)(struct sockparams *,
 384     int, int, int, int, int, int *, cred_t *);
 385 typedef void (*so_destroy_func_t)(struct sonode *);
 386 
 387 /* STREAM device information */
 388 typedef struct sdev_info {
 389         char    *sd_devpath;
 390         int     sd_devpathlen; /* Is 0 if sp_devpath is a static string */
 391         vnode_t *sd_vnode;
 392 } sdev_info_t;
 393 
 394 #define SOCKMOD_VERSION_1       1
 395 #define SOCKMOD_VERSION         2
 396 
 397 /* name of the TPI pseudo socket module */
 398 #define SOTPI_SMOD_NAME         "socktpi"
 399 
 400 typedef struct __smod_priv_s {
 401         so_create_func_t        smodp_sock_create_func;
 402         so_destroy_func_t       smodp_sock_destroy_func;
 403         so_proto_fallback_func_t smodp_proto_fallback_func;
 404         const char              *smodp_fallback_devpath_v4;
 405         const char              *smodp_fallback_devpath_v6;
 406 } __smod_priv_t;
 407 
 408 /*
 409  * Socket module register information
 410  */
 411 typedef struct smod_reg_s {
 412         int             smod_version;
 413         char            *smod_name;
 414         size_t          smod_uc_version;
 415         size_t          smod_dc_version;
 416         so_proto_create_func_t  smod_proto_create_func;
 417 
 418         /* __smod_priv_data must be NULL */
 419         __smod_priv_t   *__smod_priv;
 420 } smod_reg_t;
 421 
 422 /*
 423  * Socket module information
 424  */
 425 typedef struct smod_info {
 426         int             smod_version;
 427         char            *smod_name;
 428         uint_t          smod_refcnt;            /* # of entries */
 429         size_t          smod_uc_version;        /* upcall version */
 430         size_t          smod_dc_version;        /* down call version */
 431         so_proto_create_func_t  smod_proto_create_func;
 432         so_proto_fallback_func_t smod_proto_fallback_func;
 433         const char              *smod_fallback_devpath_v4;
 434         const char              *smod_fallback_devpath_v6;
 435         so_create_func_t        smod_sock_create_func;
 436         so_destroy_func_t       smod_sock_destroy_func;
 437         list_node_t     smod_node;
 438 } smod_info_t;
 439 
 440 typedef struct sockparams_stats {
 441         kstat_named_t   sps_nfallback;  /* # of fallbacks to TPI */
 442         kstat_named_t   sps_nactive;    /* # of active sockets */
 443         kstat_named_t   sps_ncreate;    /* total # of created sockets */
 444 } sockparams_stats_t;
 445 
 446 /*
 447  * sockparams
 448  *
 449  * Used for mapping family/type/protocol to a socket module or STREAMS device
 450  */
 451 struct sockparams {
 452         /*
 453          * The family, type, protocol, sdev_info and smod_name are
 454          * set when the entry is created, and they will never change
 455          * thereafter.
 456          */
 457         int             sp_family;
 458         int             sp_type;
 459         int             sp_protocol;
 460 
 461         sdev_info_t     sp_sdev_info;   /* STREAM device */
 462         char            *sp_smod_name;  /* socket module name */
 463 
 464         kmutex_t        sp_lock;        /* lock for refcnt and smod_info */
 465         uint64_t        sp_refcnt;      /* entry reference count */
 466         smod_info_t     *sp_smod_info;  /* socket module */
 467 
 468         sockparams_stats_t sp_stats;
 469         kstat_t         *sp_kstat;
 470 
 471         /*
 472          * The entries below are only modified while holding
 473          * sockconf_lock as a writer.
 474          */
 475         int             sp_flags;       /* see below */
 476         list_node_t     sp_node;
 477 
 478         list_t          sp_auto_filters; /* list of automatic filters */
 479         list_t          sp_prog_filters; /* list of programmatic filters */
 480 };
 481 
 482 struct sof_entry;
 483 
 484 typedef struct sp_filter {
 485         struct sof_entry *spf_filter;
 486         list_node_t     spf_node;
 487 } sp_filter_t;
 488 
 489 
 490 /*
 491  * sockparams flags
 492  */
 493 #define SOCKPARAMS_EPHEMERAL    0x1     /* temp. entry, not on global list */
 494 
 495 extern void sockparams_init(void);
 496 extern struct sockparams *sockparams_hold_ephemeral_bydev(int, int, int,
 497     const char *, int, int *);
 498 extern struct sockparams *sockparams_hold_ephemeral_bymod(int, int, int,
 499     const char *, int, int *);
 500 extern void sockparams_ephemeral_drop_last_ref(struct sockparams *);
 501 
 502 extern struct sockparams *sockparams_create(int, int, int, char *, char *, int,
 503     int, int, int *);
 504 extern void     sockparams_destroy(struct sockparams *);
 505 extern int      sockparams_add(struct sockparams *);
 506 extern int      sockparams_delete(int, int, int);
 507 extern int      sockparams_new_filter(struct sof_entry *);
 508 extern void     sockparams_filter_cleanup(struct sof_entry *);
 509 
 510 extern void smod_init(void);
 511 extern void smod_add(smod_info_t *);
 512 extern int smod_register(const smod_reg_t *);
 513 extern int smod_unregister(const char *);
 514 extern smod_info_t *smod_lookup_byname(const char *);
 515 
 516 #define SOCKPARAMS_HAS_DEVICE(sp)                                       \
 517         ((sp)->sp_sdev_info.sd_devpath != NULL)
 518 
 519 /* Increase the smod_info_t reference count */
 520 #define SMOD_INC_REF(smodp) {                                           \
 521         ASSERT((smodp) != NULL);                                        \
 522         DTRACE_PROBE1(smodinfo__inc__ref, struct smod_info *, (smodp)); \
 523         atomic_inc_uint(&(smodp)->smod_refcnt);                          \
 524 }
 525 
 526 /*
 527  * Decreace the socket module entry reference count.
 528  * When no one mapping to the entry, we try to unload the module from the
 529  * kernel. If the module can't unload, just leave the module entry with
 530  * a zero refcnt.
 531  */
 532 #define SMOD_DEC_REF(smodp, modname) {                                  \
 533         ASSERT((smodp) != NULL);                                        \
 534         ASSERT((smodp)->smod_refcnt != 0);                           \
 535         atomic_dec_uint(&(smodp)->smod_refcnt);                          \
 536         /*                                                              \
 537          * No need to atomically check the return value because the     \
 538          * socket module framework will verify that no one is using     \
 539          * the module before unloading. Worst thing that can happen     \
 540          * here is multiple calls to mod_remove_by_name(), which is OK. \
 541          */                                                             \
 542         if ((smodp)->smod_refcnt == 0)                                       \
 543                 (void) mod_remove_by_name(modname);                     \
 544 }
 545 
 546 /* Increase the reference count */
 547 #define SOCKPARAMS_INC_REF(sp) {                                        \
 548         ASSERT((sp) != NULL);                                           \
 549         DTRACE_PROBE1(sockparams__inc__ref, struct sockparams *, (sp)); \
 550         mutex_enter(&(sp)->sp_lock);                                     \
 551         (sp)->sp_refcnt++;                                           \
 552         ASSERT((sp)->sp_refcnt != 0);                                        \
 553         mutex_exit(&(sp)->sp_lock);                                      \
 554 }
 555 
 556 /*
 557  * Decrease the reference count.
 558  *
 559  * If the sockparams is ephemeral, then the thread dropping the last ref
 560  * count will destroy the entry.
 561  */
 562 #define SOCKPARAMS_DEC_REF(sp) {                                        \
 563         ASSERT((sp) != NULL);                                           \
 564         DTRACE_PROBE1(sockparams__dec__ref, struct sockparams *, (sp)); \
 565         mutex_enter(&(sp)->sp_lock);                                     \
 566         ASSERT((sp)->sp_refcnt > 0);                                      \
 567         if ((sp)->sp_refcnt == 1) {                                  \
 568                 if ((sp)->sp_flags & SOCKPARAMS_EPHEMERAL) {             \
 569                         mutex_exit(&(sp)->sp_lock);                      \
 570                         sockparams_ephemeral_drop_last_ref((sp));       \
 571                 } else {                                                \
 572                         (sp)->sp_refcnt--;                           \
 573                         if ((sp)->sp_smod_info != NULL) {            \
 574                                 SMOD_DEC_REF((sp)->sp_smod_info,     \
 575                                     (sp)->sp_smod_name);             \
 576                         }                                               \
 577                         (sp)->sp_smod_info = NULL;                   \
 578                         mutex_exit(&(sp)->sp_lock);                      \
 579                 }                                                       \
 580         } else {                                                        \
 581                 (sp)->sp_refcnt--;                                   \
 582                 mutex_exit(&(sp)->sp_lock);                              \
 583         }                                                               \
 584 }
 585 
 586 /*
 587  * Used to traverse the list of AF_UNIX sockets to construct the kstat
 588  * for netstat(1m).
 589  */
 590 struct socklist {
 591         kmutex_t        sl_lock;
 592         struct sonode   *sl_list;
 593 };
 594 
 595 extern struct socklist socklist;
 596 /*
 597  * ss_full_waits is the number of times the reader thread
 598  * waits when the queue is full and ss_empty_waits is the number
 599  * of times the consumer thread waits when the queue is empty.
 600  * No locks for these as they are just indicators of whether
 601  * disk or network or both is slow or fast.
 602  */
 603 struct sendfile_stats {
 604         uint32_t ss_file_cached;
 605         uint32_t ss_file_not_cached;
 606         uint32_t ss_full_waits;
 607         uint32_t ss_empty_waits;
 608         uint32_t ss_file_segmap;
 609 };
 610 
 611 /*
 612  * A single sendfile request is represented by snf_req.
 613  */
 614 typedef struct snf_req {
 615         struct snf_req  *sr_next;
 616         mblk_t          *sr_mp_head;
 617         mblk_t          *sr_mp_tail;
 618         kmutex_t        sr_lock;
 619         kcondvar_t      sr_cv;
 620         uint_t          sr_qlen;
 621         int             sr_hiwat;
 622         int             sr_lowat;
 623         int             sr_operation;
 624         struct vnode    *sr_vp;
 625         file_t          *sr_fp;
 626         ssize_t         sr_maxpsz;
 627         u_offset_t      sr_file_off;
 628         u_offset_t      sr_file_size;
 629 #define SR_READ_DONE    0x80000000
 630         int             sr_read_error;
 631         int             sr_write_error;
 632 } snf_req_t;
 633 
 634 /* A queue of sendfile requests */
 635 struct sendfile_queue {
 636         snf_req_t       *snfq_req_head;
 637         snf_req_t       *snfq_req_tail;
 638         kmutex_t        snfq_lock;
 639         kcondvar_t      snfq_cv;
 640         int             snfq_svc_threads;       /* # of service threads */
 641         int             snfq_idle_cnt;          /* # of idling threads */
 642         int             snfq_max_threads;
 643         int             snfq_req_cnt;           /* Number of requests */
 644 };
 645 
 646 #define READ_OP                 1
 647 #define SNFQ_TIMEOUT            (60 * 5 * hz)   /* 5 minutes */
 648 
 649 /* Socket network operations switch */
 650 struct sonodeops {
 651         int     (*sop_init)(struct sonode *, struct sonode *, cred_t *,
 652                     int);
 653         int     (*sop_accept)(struct sonode *, int, cred_t *, struct sonode **);
 654         int     (*sop_bind)(struct sonode *, struct sockaddr *, socklen_t,
 655                     int, cred_t *);
 656         int     (*sop_listen)(struct sonode *, int, cred_t *);
 657         int     (*sop_connect)(struct sonode *, struct sockaddr *,
 658                     socklen_t, int, int, cred_t *);
 659         int     (*sop_recvmsg)(struct sonode *, struct msghdr *,
 660                     struct uio *, cred_t *);
 661         int     (*sop_sendmsg)(struct sonode *, struct msghdr *,
 662                     struct uio *, cred_t *);
 663         int     (*sop_sendmblk)(struct sonode *, struct msghdr *, int,
 664                     cred_t *, mblk_t **);
 665         int     (*sop_getpeername)(struct sonode *, struct sockaddr *,
 666                     socklen_t *, boolean_t, cred_t *);
 667         int     (*sop_getsockname)(struct sonode *, struct sockaddr *,
 668                     socklen_t *, cred_t *);
 669         int     (*sop_shutdown)(struct sonode *, int, cred_t *);
 670         int     (*sop_getsockopt)(struct sonode *, int, int, void *,
 671                     socklen_t *, int, cred_t *);
 672         int     (*sop_setsockopt)(struct sonode *, int, int, const void *,
 673                     socklen_t, cred_t *);
 674         int     (*sop_ioctl)(struct sonode *, int, intptr_t, int,
 675                     cred_t *, int32_t *);
 676         int     (*sop_poll)(struct sonode *, short, int, short *,
 677                     struct pollhead **);
 678         int     (*sop_close)(struct sonode *, int, cred_t *);
 679 };
 680 
 681 #define SOP_INIT(so, flag, cr, flags)   \
 682         ((so)->so_ops->sop_init((so), (flag), (cr), (flags)))
 683 #define SOP_ACCEPT(so, fflag, cr, nsop) \
 684         ((so)->so_ops->sop_accept((so), (fflag), (cr), (nsop)))
 685 #define SOP_BIND(so, name, namelen, flags, cr)  \
 686         ((so)->so_ops->sop_bind((so), (name), (namelen), (flags), (cr)))
 687 #define SOP_LISTEN(so, backlog, cr)     \
 688         ((so)->so_ops->sop_listen((so), (backlog), (cr)))
 689 #define SOP_CONNECT(so, name, namelen, fflag, flags, cr)        \
 690         ((so)->so_ops->sop_connect((so), (name), (namelen), (fflag), (flags), \
 691         (cr)))
 692 #define SOP_RECVMSG(so, msg, uiop, cr)  \
 693         ((so)->so_ops->sop_recvmsg((so), (msg), (uiop), (cr)))
 694 #define SOP_SENDMSG(so, msg, uiop, cr)  \
 695         ((so)->so_ops->sop_sendmsg((so), (msg), (uiop), (cr)))
 696 #define SOP_SENDMBLK(so, msg, size, cr, mpp)    \
 697         ((so)->so_ops->sop_sendmblk((so), (msg), (size), (cr), (mpp)))
 698 #define SOP_GETPEERNAME(so, addr, addrlen, accept, cr)  \
 699         ((so)->so_ops->sop_getpeername((so), (addr), (addrlen), (accept), (cr)))
 700 #define SOP_GETSOCKNAME(so, addr, addrlen, cr)  \
 701         ((so)->so_ops->sop_getsockname((so), (addr), (addrlen), (cr)))
 702 #define SOP_SHUTDOWN(so, how, cr)       \
 703         ((so)->so_ops->sop_shutdown((so), (how), (cr)))
 704 #define SOP_GETSOCKOPT(so, level, optionname, optval, optlenp, flags, cr) \
 705         ((so)->so_ops->sop_getsockopt((so), (level), (optionname),        \
 706             (optval), (optlenp), (flags), (cr)))
 707 #define SOP_SETSOCKOPT(so, level, optionname, optval, optlen, cr)       \
 708         ((so)->so_ops->sop_setsockopt((so), (level), (optionname),        \
 709             (optval), (optlen), (cr)))
 710 #define SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)        \
 711         ((so)->so_ops->sop_ioctl((so), (cmd), (arg), (mode), (cr), (rvalp)))
 712 #define SOP_POLL(so, events, anyyet, reventsp, phpp) \
 713         ((so)->so_ops->sop_poll((so), (events), (anyyet), (reventsp), (phpp)))
 714 #define SOP_CLOSE(so, flag, cr) \
 715         ((so)->so_ops->sop_close((so), (flag), (cr)))
 716 
 717 #endif /* defined(_KERNEL) || defined(_KMEMUSER) */
 718 
 719 #ifdef _KERNEL
 720 
 721 #define ISALIGNED_cmsghdr(addr) \
 722                 (((uintptr_t)(addr) & (_CMSG_HDR_ALIGNMENT - 1)) == 0)
 723 
 724 #define ROUNDUP_cmsglen(len) \
 725         (((len) + _CMSG_HDR_ALIGNMENT - 1) & ~(_CMSG_HDR_ALIGNMENT - 1))
 726 
 727 #define IS_NON_STREAM_SOCK(vp) \
 728         ((vp)->v_type == VSOCK && (vp)->v_stream == NULL)
 729 /*
 730  * Macros that operate on struct cmsghdr.
 731  * Used in parsing msg_control.
 732  * The CMSG_VALID macro does not assume that the last option buffer is padded.
 733  */
 734 #define CMSG_NEXT(cmsg)                                         \
 735         (struct cmsghdr *)((uintptr_t)(cmsg) +                  \
 736             ROUNDUP_cmsglen((cmsg)->cmsg_len))
 737 #define CMSG_CONTENT(cmsg)      (&((cmsg)[1]))
 738 #define CMSG_CONTENTLEN(cmsg)   ((cmsg)->cmsg_len - sizeof (struct cmsghdr))
 739 #define CMSG_VALID(cmsg, start, end)                                    \
 740         (ISALIGNED_cmsghdr(cmsg) &&                                     \
 741         ((uintptr_t)(cmsg) >= (uintptr_t)(start)) &&                 \
 742         ((uintptr_t)(cmsg) < (uintptr_t)(end)) &&                    \
 743         ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \
 744         ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
 745 
 746 /*
 747  * Maximum size of any argument that is copied in (addresses, options,
 748  * access rights). MUST be at least MAXPATHLEN + 3.
 749  * BSD and SunOS 4.X limited this to MLEN or MCLBYTES.
 750  */
 751 #define SO_MAXARGSIZE   8192
 752 
 753 /*
 754  * Convert between vnode and sonode
 755  */
 756 #define VTOSO(vp)       ((struct sonode *)((vp)->v_data))
 757 #define SOTOV(sp)       ((sp)->so_vnode)
 758 
 759 /*
 760  * Internal flags for sobind()
 761  */
 762 #define _SOBIND_REBIND          0x01    /* Bind to existing local address */
 763 #define _SOBIND_UNSPEC          0x02    /* Bind to unspecified address */
 764 #define _SOBIND_LOCK_HELD       0x04    /* so_excl_lock held by caller */
 765 #define _SOBIND_NOXLATE         0x08    /* No addr translation for AF_UNIX */
 766 #define _SOBIND_XPG4_2          0x10    /* xpg4.2 semantics */
 767 #define _SOBIND_SOCKBSD         0x20    /* BSD semantics */
 768 #define _SOBIND_LISTEN          0x40    /* Make into SS_ACCEPTCONN */
 769 #define _SOBIND_SOCKETPAIR      0x80    /* Internal flag for so_socketpair() */
 770                                         /* to enable listen with backlog = 1 */
 771 
 772 /*
 773  * Internal flags for sounbind()
 774  */
 775 #define _SOUNBIND_REBIND        0x01    /* Don't clear fields - will rebind */
 776 
 777 /*
 778  * Internal flags for soconnect()
 779  */
 780 #define _SOCONNECT_NOXLATE      0x01    /* No addr translation for AF_UNIX */
 781 #define _SOCONNECT_DID_BIND     0x02    /* Unbind when connect fails */
 782 #define _SOCONNECT_XPG4_2       0x04    /* xpg4.2 semantics */
 783 
 784 /*
 785  * Internal flags for sodisconnect()
 786  */
 787 #define _SODISCONNECT_LOCK_HELD 0x01    /* so_excl_lock held by caller */
 788 
 789 /*
 790  * Internal flags for sotpi_getsockopt().
 791  */
 792 #define _SOGETSOCKOPT_XPG4_2    0x01    /* xpg4.2 semantics */
 793 
 794 /*
 795  * Internal flags for soallocproto*()
 796  */
 797 #define _ALLOC_NOSLEEP          0       /* Don't sleep for memory */
 798 #define _ALLOC_INTR             1       /* Sleep until interrupt */
 799 #define _ALLOC_SLEEP            2       /* Sleep forever */
 800 
 801 /*
 802  * Internal structure for handling AF_UNIX file descriptor passing
 803  */
 804 struct fdbuf {
 805         int             fd_size;        /* In bytes, for kmem_free */
 806         int             fd_numfd;       /* Number of elements below */
 807         char            *fd_ebuf;       /* Extra buffer to free  */
 808         int             fd_ebuflen;
 809         frtn_t          fd_frtn;
 810         struct file     *fd_fds[1];     /* One or more */
 811 };
 812 #define FDBUF_HDRSIZE   (sizeof (struct fdbuf) - sizeof (struct file *))
 813 
 814 /*
 815  * Variable that can be patched to set what version of socket socket()
 816  * will create.
 817  */
 818 extern int so_default_version;
 819 
 820 #ifdef DEBUG
 821 /* Turn on extra testing capabilities */
 822 #define SOCK_TEST
 823 #endif /* DEBUG */
 824 
 825 #ifdef DEBUG
 826 char    *pr_state(uint_t, uint_t);
 827 char    *pr_addr(int, struct sockaddr *, t_uscalar_t);
 828 int     so_verify_oobstate(struct sonode *);
 829 #endif /* DEBUG */
 830 
 831 /*
 832  * DEBUG macros
 833  */
 834 #if defined(DEBUG)
 835 #define SOCK_DEBUG
 836 
 837 extern int sockdebug;
 838 extern int sockprinterr;
 839 
 840 #define eprint(args)    printf args
 841 #define eprintso(so, args) \
 842 { if (sockprinterr && ((so)->so_options & SO_DEBUG)) printf args; }
 843 #define eprintline(error)                                       \
 844 {                                                               \
 845         if (error != EINTR && (sockprinterr || sockdebug > 0))       \
 846                 printf("socket error %d: line %d file %s\n",    \
 847                         (error), __LINE__, __FILE__);           \
 848 }
 849 
 850 #define eprintsoline(so, error)                                 \
 851 { if (sockprinterr && ((so)->so_options & SO_DEBUG))             \
 852         printf("socket(%p) error %d: line %d file %s\n",        \
 853                 (void *)(so), (error), __LINE__, __FILE__);     \
 854 }
 855 #define dprint(level, args)     { if (sockdebug > (level)) printf args; }
 856 #define dprintso(so, level, args) \
 857 { if (sockdebug > (level) && ((so)->so_options & SO_DEBUG)) printf args; }
 858 
 859 #else /* define(DEBUG) */
 860 
 861 #define eprint(args)            {}
 862 #define eprintso(so, args)      {}
 863 #define eprintline(error)       {}
 864 #define eprintsoline(so, error) {}
 865 #define dprint(level, args)     {}
 866 #define dprintso(so, level, args) {}
 867 
 868 #endif /* defined(DEBUG) */
 869 
 870 extern struct vfsops                    sock_vfsops;
 871 extern struct vnodeops                  *socket_vnodeops;
 872 extern const struct fs_operation_def    socket_vnodeops_template[];
 873 
 874 extern dev_t                            sockdev;
 875 
 876 extern krwlock_t                        sockconf_lock;
 877 
 878 /*
 879  * sockfs functions
 880  */
 881 extern int      sock_getmsg(vnode_t *, struct strbuf *, struct strbuf *,
 882                         uchar_t *, int *, int, rval_t *);
 883 extern int      sock_putmsg(vnode_t *, struct strbuf *, struct strbuf *,
 884                         uchar_t, int, int);
 885 extern int      sogetvp(char *, vnode_t **, int);
 886 extern int      sockinit(int, char *);
 887 extern int      solookup(int, int, int, struct sockparams **);
 888 extern void     so_lock_single(struct sonode *);
 889 extern void     so_unlock_single(struct sonode *, int);
 890 extern int      so_lock_read(struct sonode *, int);
 891 extern int      so_lock_read_intr(struct sonode *, int);
 892 extern void     so_unlock_read(struct sonode *);
 893 extern void     *sogetoff(mblk_t *, t_uscalar_t, t_uscalar_t, uint_t);
 894 extern void     so_getopt_srcaddr(void *, t_uscalar_t,
 895                         void **, t_uscalar_t *);
 896 extern int      so_getopt_unix_close(void *, t_uscalar_t);
 897 extern void     fdbuf_free(struct fdbuf *);
 898 extern mblk_t   *fdbuf_allocmsg(int, struct fdbuf *);
 899 extern int      fdbuf_create(void *, int, struct fdbuf **);
 900 extern void     so_closefds(void *, t_uscalar_t, int, int);
 901 extern int      so_getfdopt(void *, t_uscalar_t, int, void **, int *);
 902 t_uscalar_t     so_optlen(void *, t_uscalar_t, int);
 903 extern void     so_cmsg2opt(void *, t_uscalar_t, int, mblk_t *);
 904 extern t_uscalar_t
 905                 so_cmsglen(mblk_t *, void *, t_uscalar_t, int);
 906 extern int      so_opt2cmsg(mblk_t *, void *, t_uscalar_t, int,
 907                         void *, t_uscalar_t);
 908 extern void     soisconnecting(struct sonode *);
 909 extern void     soisconnected(struct sonode *);
 910 extern void     soisdisconnected(struct sonode *, int);
 911 extern void     socantsendmore(struct sonode *);
 912 extern void     socantrcvmore(struct sonode *);
 913 extern void     soseterror(struct sonode *, int);
 914 extern int      sogeterr(struct sonode *, boolean_t);
 915 extern int      sowaitconnected(struct sonode *, int, int);
 916 
 917 extern ssize_t  soreadfile(file_t *, uchar_t *, u_offset_t, int *, size_t);
 918 extern void     *sock_kstat_init(zoneid_t);
 919 extern void     sock_kstat_fini(zoneid_t, void *);
 920 extern struct sonode *getsonode(int, int *, file_t **);
 921 /*
 922  * Function wrappers (mostly around the sonode switch) for
 923  * backward compatibility.
 924  */
 925 extern int      soaccept(struct sonode *, int, struct sonode **);
 926 extern int      sobind(struct sonode *, struct sockaddr *, socklen_t,
 927                     int, int);
 928 extern int      solisten(struct sonode *, int);
 929 extern int      soconnect(struct sonode *, struct sockaddr *, socklen_t,
 930                     int, int);
 931 extern int      sorecvmsg(struct sonode *, struct nmsghdr *, struct uio *);
 932 extern int      sosendmsg(struct sonode *, struct nmsghdr *, struct uio *);
 933 extern int      soshutdown(struct sonode *, int);
 934 extern int      sogetsockopt(struct sonode *, int, int, void *, socklen_t *,
 935                     int);
 936 extern int      sosetsockopt(struct sonode *, int, int, const void *,
 937                     t_uscalar_t);
 938 
 939 extern struct sonode    *socreate(struct sockparams *, int, int, int, int,
 940                             int *);
 941 
 942 extern int      so_copyin(const void *, void *, size_t, int);
 943 extern int      so_copyout(const void *, void *, size_t, int);
 944 
 945 #endif
 946 
 947 /*
 948  * Internal structure for obtaining sonode information from the socklist.
 949  * These types match those corresponding in the sonode structure.
 950  * This is not a published interface, and may change at any time.
 951  */
 952 struct sockinfo {
 953         uint_t          si_size;                /* real length of this struct */
 954         short           si_family;
 955         short           si_type;
 956         ushort_t        si_flag;
 957         uint_t          si_state;
 958         uint_t          si_ux_laddr_sou_magic;
 959         uint_t          si_ux_faddr_sou_magic;
 960         t_scalar_t      si_serv_type;
 961         t_uscalar_t     si_laddr_soa_len;
 962         t_uscalar_t     si_faddr_soa_len;
 963         uint16_t        si_laddr_family;
 964         uint16_t        si_faddr_family;
 965         char            si_laddr_sun_path[MAXPATHLEN + 1]; /* NULL terminated */
 966         char            si_faddr_sun_path[MAXPATHLEN + 1];
 967         boolean_t       si_faddr_noxlate;
 968         zoneid_t        si_szoneid;
 969 };
 970 
 971 /*
 972  * Subcodes for sockconf() system call
 973  */
 974 #define SOCKCONFIG_ADD_SOCK             0
 975 #define SOCKCONFIG_REMOVE_SOCK          1
 976 #define SOCKCONFIG_ADD_FILTER           2
 977 #define SOCKCONFIG_REMOVE_FILTER        3
 978 
 979 /*
 980  * Data structures for configuring socket filters.
 981  */
 982 
 983 /*
 984  * Placement hint for automatic filters
 985  */
 986 typedef enum {
 987         SOF_HINT_NONE,
 988         SOF_HINT_TOP,
 989         SOF_HINT_BOTTOM,
 990         SOF_HINT_BEFORE,
 991         SOF_HINT_AFTER
 992 } sof_hint_t;
 993 
 994 /*
 995  * Socket tuple. Used by sockconfig_filter_props to list socket
 996  * types of interest.
 997  */
 998 typedef struct sof_socktuple {
 999         int     sofst_family;
1000         int     sofst_type;
1001         int     sofst_protocol;
1002 } sof_socktuple_t;
1003 
1004 /*
1005  * Socket filter properties used by sockconfig() system call.
1006  */
1007 struct sockconfig_filter_props {
1008         char            *sfp_modname;
1009         boolean_t       sfp_autoattach;
1010         sof_hint_t      sfp_hint;
1011         char            *sfp_hintarg;
1012         uint_t          sfp_socktuple_cnt;
1013         sof_socktuple_t *sfp_socktuple;
1014 };
1015 
1016 #ifdef  _SYSCALL32
1017 
1018 typedef struct sof_socktuple32 {
1019         int32_t sofst_family;
1020         int32_t sofst_type;
1021         int32_t sofst_protocol;
1022 } sof_socktuple32_t;
1023 
1024 struct sockconfig_filter_props32 {
1025         caddr32_t       sfp_modname;
1026         boolean_t       sfp_autoattach;
1027         sof_hint_t      sfp_hint;
1028         caddr32_t       sfp_hintarg;
1029         uint32_t        sfp_socktuple_cnt;
1030         caddr32_t       sfp_socktuple;
1031 };
1032 
1033 #endif  /* _SYSCALL32 */
1034 
1035 #define SOCKMOD_PATH    "socketmod"     /* dir where sockmods are stored */
1036 
1037 #ifdef  __cplusplus
1038 }
1039 #endif
1040 
1041 #endif  /* _SYS_SOCKETVAR_H */