1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 /*
  25  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  26  */
  27 
  28 
  29 /*
  30  * nfs_tbind.c, common part for nfsd and lockd.
  31  */
  32 
  33 #include <tiuser.h>
  34 #include <fcntl.h>
  35 #include <netconfig.h>
  36 #include <stropts.h>
  37 #include <errno.h>
  38 #include <syslog.h>
  39 #include <rpc/rpc.h>
  40 #include <sys/time.h>
  41 #include <sys/resource.h>
  42 #include <signal.h>
  43 #include <netdir.h>
  44 #include <unistd.h>
  45 #include <string.h>
  46 #include <netinet/tcp.h>
  47 #include <malloc.h>
  48 #include <stdlib.h>
  49 #include "nfs_tbind.h"
  50 #include <nfs/nfs.h>
  51 #include <nfs/nfs_acl.h>
  52 #include <nfs/nfssys.h>
  53 #include <nfs/nfs4.h>
  54 #include <zone.h>
  55 #include <sys/socket.h>
  56 #include <tsol/label.h>
  57 
  58 /*
  59  * Determine valid semantics for most applications.
  60  */
  61 #define OK_TPI_TYPE(_nconf) \
  62         (_nconf->nc_semantics == NC_TPI_CLTS || \
  63         _nconf->nc_semantics == NC_TPI_COTS || \
  64         _nconf->nc_semantics == NC_TPI_COTS_ORD)
  65 
  66 #define BE32_TO_U32(a) \
  67         ((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
  68         (((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
  69         (((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8)  | \
  70         ((ulong_t)((uchar_t *)a)[3] & 0xFF))
  71 
  72 /*
  73  * Number of elements to add to the poll array on each allocation.
  74  */
  75 #define POLL_ARRAY_INC_SIZE     64
  76 
  77 /*
  78  * Number of file descriptors by which the process soft limit may be
  79  * increased on each call to nofile_increase(0).
  80  */
  81 #define NOFILE_INC_SIZE 64
  82 
  83 /*
  84  * Default TCP send and receive buffer size of NFS server.
  85  */
  86 #define NFSD_TCP_BUFSZ  (1024*1024)
  87 
  88 struct conn_ind {
  89         struct conn_ind *conn_next;
  90         struct conn_ind *conn_prev;
  91         struct t_call   *conn_call;
  92 };
  93 
  94 struct conn_entry {
  95         bool_t                  closing;
  96         struct netconfig        nc;
  97 };
  98 
  99 /*
 100  * this file contains transport routines common to nfsd and lockd
 101  */
 102 static  int     nofile_increase(int);
 103 static  int     reuseaddr(int);
 104 static  int     recvucred(int);
 105 static  int     anonmlp(int);
 106 static  void    add_to_poll_list(int, struct netconfig *);
 107 static  char    *serv_name_to_port_name(char *);
 108 static  int     bind_to_proto(char *, char *, struct netbuf **,
 109                                 struct netconfig **);
 110 static  int     bind_to_provider(char *, char *, struct netbuf **,
 111                                         struct netconfig **);
 112 static  void    conn_close_oldest(void);
 113 static  boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
 114 static  void    cots_listen_event(int, int);
 115 static  int     discon_get(int, struct netconfig *, struct conn_ind **);
 116 static  int     do_poll_clts_action(int, int);
 117 static  int     do_poll_cots_action(int, int);
 118 static  void    remove_from_poll_list(int);
 119 static  int     set_addrmask(int, struct netconfig *, struct netbuf *);
 120 static  int     is_listen_fd_index(int);
 121 
 122 static  struct pollfd *poll_array;
 123 static  struct conn_entry *conn_polled;
 124 static  int     num_conns;              /* Current number of connections */
 125 int             (*Mysvc4)(int, struct netbuf *, struct netconfig *, int,
 126                 struct netbuf *);
 127 static int      setopt(int fd, int level, int name, int value);
 128 static int      get_opt(int fd, int level, int name);
 129 static void     nfslib_set_sockbuf(int fd);
 130 
 131 /*
 132  * Called to create and prepare a transport descriptor for in-kernel
 133  * RPC service.
 134  * Returns -1 on failure and a valid descriptor on success.
 135  */
 136 int
 137 nfslib_transport_open(struct netconfig *nconf)
 138 {
 139         int fd;
 140         struct strioctl strioc;
 141 
 142         if ((nconf == (struct netconfig *)NULL) ||
 143             (nconf->nc_device == (char *)NULL)) {
 144                 syslog(LOG_ERR, "no netconfig device");
 145                 return (-1);
 146         }
 147 
 148         /*
 149          * Open the transport device.
 150          */
 151         fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
 152         if (fd == -1) {
 153                 if (t_errno == TSYSERR && errno == EMFILE &&
 154                     (nofile_increase(0) == 0)) {
 155                         /* Try again with a higher NOFILE limit. */
 156                         fd = t_open(nconf->nc_device, O_RDWR,
 157                             (struct t_info *)NULL);
 158                 }
 159                 if (fd == -1) {
 160                         syslog(LOG_ERR, "t_open %s failed:  t_errno %d, %m",
 161                             nconf->nc_device, t_errno);
 162                         return (-1);
 163                 }
 164         }
 165 
 166         /*
 167          * Pop timod because the RPC module must be as close as possible
 168          * to the transport.
 169          */
 170         if (ioctl(fd, I_POP, 0) < 0) {
 171                 syslog(LOG_ERR, "I_POP of timod failed: %m");
 172                 (void) t_close(fd);
 173                 return (-1);
 174         }
 175 
 176         /*
 177          * Common code for CLTS and COTS transports
 178          */
 179         if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
 180                 syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
 181                 (void) t_close(fd);
 182                 return (-1);
 183         }
 184 
 185         strioc.ic_cmd = RPC_SERVER;
 186         strioc.ic_dp = (char *)0;
 187         strioc.ic_len = 0;
 188         strioc.ic_timout = -1;
 189 
 190         /* Tell rpcmod to act like a server stream. */
 191         if (ioctl(fd, I_STR, &strioc) < 0) {
 192                 syslog(LOG_ERR, "rpcmod set-up ioctl failed: %m");
 193                 (void) t_close(fd);
 194                 return (-1);
 195         }
 196 
 197         /*
 198          * Re-push timod so that we will still be doing TLI
 199          * operations on the descriptor.
 200          */
 201         if (ioctl(fd, I_PUSH, "timod") < 0) {
 202                 syslog(LOG_ERR, "I_PUSH of timod failed: %m");
 203                 (void) t_close(fd);
 204                 return (-1);
 205         }
 206 
 207         /*
 208          * Enable options of returning the ip's for udp.
 209          */
 210         if (strcmp(nconf->nc_netid, "udp6") == 0)
 211                 __rpc_tli_set_options(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1);
 212         else if (strcmp(nconf->nc_netid, "udp") == 0)
 213                 __rpc_tli_set_options(fd, IPPROTO_IP, IP_RECVDSTADDR, 1);
 214 
 215         return (fd);
 216 }
 217 
 218 static int
 219 nofile_increase(int limit)
 220 {
 221         struct rlimit rl;
 222 
 223         if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
 224                 syslog(LOG_ERR, "getrlimit of NOFILE failed: %m");
 225                 return (-1);
 226         }
 227 
 228         if (limit > 0)
 229                 rl.rlim_cur = limit;
 230         else
 231                 rl.rlim_cur += NOFILE_INC_SIZE;
 232 
 233         if (rl.rlim_cur > rl.rlim_max &&
 234             rl.rlim_max != RLIM_INFINITY)
 235                 rl.rlim_max = rl.rlim_cur;
 236 
 237         if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
 238                 syslog(LOG_ERR, "setrlimit of NOFILE to %d failed: %m",
 239                     rl.rlim_cur);
 240                 return (-1);
 241         }
 242 
 243         return (0);
 244 }
 245 
 246 static void
 247 nfslib_set_sockbuf(int fd)
 248 {
 249         int curval, val;
 250 
 251         val = NFSD_TCP_BUFSZ;
 252 
 253         curval = get_opt(fd, SOL_SOCKET, SO_SNDBUF);
 254         syslog(LOG_DEBUG, "Current SO_SNDBUF value is %d", curval);
 255         if ((curval != -1) && (curval < val)) {
 256                 syslog(LOG_DEBUG, "Set SO_SNDBUF  option to %d", val);
 257                 if (setopt(fd, SOL_SOCKET, SO_SNDBUF, val) < 0) {
 258                         syslog(LOG_ERR,
 259                             "couldn't set SO_SNDBUF to %d - t_errno = %d",
 260                             val, t_errno);
 261                         syslog(LOG_ERR,
 262                             "Check and increase system-wide tcp_max_buf");
 263                 }
 264         }
 265 
 266         curval = get_opt(fd, SOL_SOCKET, SO_RCVBUF);
 267         syslog(LOG_DEBUG, "Current SO_RCVBUF value is %d", curval);
 268         if ((curval != -1) && (curval < val)) {
 269                 syslog(LOG_DEBUG, "Set SO_RCVBUF  option to %d", val);
 270                 if (setopt(fd, SOL_SOCKET, SO_RCVBUF, val) < 0) {
 271                         syslog(LOG_ERR,
 272                             "couldn't set SO_RCVBUF to %d - t_errno = %d",
 273                             val, t_errno);
 274                         syslog(LOG_ERR,
 275                             "Check and increase system-wide tcp_max_buf");
 276                 }
 277         }
 278 }
 279 
 280 int
 281 nfslib_bindit(struct netconfig *nconf, struct netbuf **addr,
 282         struct nd_hostserv *hs, int backlog)
 283 {
 284         int fd;
 285         struct t_bind  *ntb;
 286         struct t_bind tb;
 287         struct nd_addrlist *addrlist;
 288         struct t_optmgmt req, resp;
 289         struct opthdr *opt;
 290         char reqbuf[128];
 291         bool_t use_any = FALSE;
 292         bool_t gzone = TRUE;
 293 
 294         if ((fd = nfslib_transport_open(nconf)) == -1) {
 295                 syslog(LOG_ERR, "cannot establish transport service over %s",
 296                     nconf->nc_device);
 297                 return (-1);
 298         }
 299 
 300         addrlist = (struct nd_addrlist *)NULL;
 301 
 302         /* nfs4_callback service does not used a fieed port number */
 303 
 304         if (strcmp(hs->h_serv, "nfs4_callback") == 0) {
 305                 tb.addr.maxlen = 0;
 306                 tb.addr.len = 0;
 307                 tb.addr.buf = 0;
 308                 use_any = TRUE;
 309                 gzone = (getzoneid() == GLOBAL_ZONEID);
 310         } else if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
 311 
 312                 syslog(LOG_ERR,
 313                 "Cannot get address for transport %s host %s service %s",
 314                     nconf->nc_netid, hs->h_host, hs->h_serv);
 315                 (void) t_close(fd);
 316                 return (-1);
 317         }
 318 
 319         if (strcmp(nconf->nc_proto, "tcp") == 0) {
 320                 /*
 321                  * If we're running over TCP, then set the
 322                  * SO_REUSEADDR option so that we can bind
 323                  * to our preferred address even if previously
 324                  * left connections exist in FIN_WAIT states.
 325                  * This is somewhat bogus, but otherwise you have
 326                  * to wait 2 minutes to restart after killing it.
 327                  */
 328                 if (reuseaddr(fd) == -1) {
 329                         syslog(LOG_WARNING,
 330                         "couldn't set SO_REUSEADDR option on transport");
 331                 }
 332         } else if (strcmp(nconf->nc_proto, "udp") == 0) {
 333                 /*
 334                  * In order to run MLP on UDP, we need to handle creds.
 335                  */
 336                 if (recvucred(fd) == -1) {
 337                         syslog(LOG_WARNING,
 338                             "couldn't set SO_RECVUCRED option on transport");
 339                 }
 340         }
 341 
 342         /*
 343          * Make non global zone nfs4_callback port MLP
 344          */
 345         if (use_any && is_system_labeled() && !gzone) {
 346                 if (anonmlp(fd) == -1) {
 347                         /*
 348                          * failing to set this option means nfs4_callback
 349                          * could fail silently later. So fail it with
 350                          * with an error message now.
 351                          */
 352                         syslog(LOG_ERR,
 353                             "couldn't set SO_ANON_MLP option on transport");
 354                         (void) t_close(fd);
 355                         return (-1);
 356                 }
 357         }
 358 
 359         if (nconf->nc_semantics == NC_TPI_CLTS)
 360                 tb.qlen = 0;
 361         else
 362                 tb.qlen = backlog;
 363 
 364         /* LINTED pointer alignment */
 365         ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
 366         if (ntb == (struct t_bind *)NULL) {
 367                 syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
 368                 (void) t_close(fd);
 369                 netdir_free((void *)addrlist, ND_ADDRLIST);
 370                 return (-1);
 371         }
 372 
 373         /*
 374          * XXX - what about the space tb->addr.buf points to? This should
 375          * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
 376          * should't be called with T_ALL.
 377          */
 378         if (addrlist)
 379                 tb.addr = *(addrlist->n_addrs);              /* structure copy */
 380 
 381         if (t_bind(fd, &tb, ntb) == -1) {
 382                 syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
 383                 (void) t_free((char *)ntb, T_BIND);
 384                 netdir_free((void *)addrlist, ND_ADDRLIST);
 385                 (void) t_close(fd);
 386                 return (-1);
 387         }
 388 
 389         /* make sure we bound to the right address */
 390         if (use_any == FALSE &&
 391             (tb.addr.len != ntb->addr.len ||
 392             memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0)) {
 393                 syslog(LOG_ERR, "t_bind to wrong address");
 394                 (void) t_free((char *)ntb, T_BIND);
 395                 netdir_free((void *)addrlist, ND_ADDRLIST);
 396                 (void) t_close(fd);
 397                 return (-1);
 398         }
 399 
 400         /*
 401          * Call nfs4svc_setport so that the kernel can be
 402          * informed what port number the daemon is listing
 403          * for incoming connection requests.
 404          */
 405 
 406         if ((nconf->nc_semantics == NC_TPI_COTS ||
 407             nconf->nc_semantics == NC_TPI_COTS_ORD) && Mysvc4 != NULL)
 408                 (*Mysvc4)(fd, NULL, nconf, NFS4_SETPORT, &ntb->addr);
 409 
 410         *addr = &ntb->addr;
 411         netdir_free((void *)addrlist, ND_ADDRLIST);
 412 
 413         if (strcmp(nconf->nc_proto, "tcp") == 0) {
 414                 /*
 415                  * Disable the Nagle algorithm on TCP connections.
 416                  * Connections accepted from this listener will
 417                  * inherit the listener options.
 418                  */
 419 
 420                 /* LINTED pointer alignment */
 421                 opt = (struct opthdr *)reqbuf;
 422                 opt->level = IPPROTO_TCP;
 423                 opt->name = TCP_NODELAY;
 424                 opt->len = sizeof (int);
 425 
 426                 /* LINTED pointer alignment */
 427                 *(int *)((char *)opt + sizeof (*opt)) = 1;
 428 
 429                 req.flags = T_NEGOTIATE;
 430                 req.opt.len = sizeof (*opt) + opt->len;
 431                 req.opt.buf = (char *)opt;
 432                 resp.flags = 0;
 433                 resp.opt.buf = reqbuf;
 434                 resp.opt.maxlen = sizeof (reqbuf);
 435 
 436                 if (t_optmgmt(fd, &req, &resp) < 0 ||
 437                     resp.flags != T_SUCCESS) {
 438                         syslog(LOG_ERR,
 439         "couldn't set NODELAY option for proto %s: t_errno = %d, %m",
 440                             nconf->nc_proto, t_errno);
 441                 }
 442 
 443                 nfslib_set_sockbuf(fd);
 444         }
 445 
 446         return (fd);
 447 }
 448 
 449 static int
 450 get_opt(int fd, int level, int name)
 451 {
 452         struct t_optmgmt req, res;
 453         struct {
 454                 struct opthdr opt;
 455                 int value;
 456         } reqbuf;
 457 
 458         reqbuf.opt.level = level;
 459         reqbuf.opt.name = name;
 460         reqbuf.opt.len = sizeof (int);
 461         reqbuf.value = 0;
 462 
 463         req.flags = T_CURRENT;
 464         req.opt.len = sizeof (reqbuf);
 465         req.opt.buf = (char *)&reqbuf;
 466 
 467         res.flags = 0;
 468         res.opt.buf = (char *)&reqbuf;
 469         res.opt.maxlen = sizeof (reqbuf);
 470 
 471         if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
 472                 t_error("t_optmgmt");
 473                 return (-1);
 474         }
 475         return (reqbuf.value);
 476 }
 477 
 478 static int
 479 setopt(int fd, int level, int name, int value)
 480 {
 481         struct t_optmgmt req, resp;
 482         struct {
 483                 struct opthdr opt;
 484                 int value;
 485         } reqbuf;
 486 
 487         reqbuf.opt.level = level;
 488         reqbuf.opt.name = name;
 489         reqbuf.opt.len = sizeof (int);
 490 
 491         reqbuf.value = value;
 492 
 493         req.flags = T_NEGOTIATE;
 494         req.opt.len = sizeof (reqbuf);
 495         req.opt.buf = (char *)&reqbuf;
 496 
 497         resp.flags = 0;
 498         resp.opt.buf = (char *)&reqbuf;
 499         resp.opt.maxlen = sizeof (reqbuf);
 500 
 501         if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
 502                 t_error("t_optmgmt");
 503                 return (-1);
 504         }
 505         return (0);
 506 }
 507 
 508 static int
 509 reuseaddr(int fd)
 510 {
 511         return (setopt(fd, SOL_SOCKET, SO_REUSEADDR, 1));
 512 }
 513 
 514 static int
 515 recvucred(int fd)
 516 {
 517         return (setopt(fd, SOL_SOCKET, SO_RECVUCRED, 1));
 518 }
 519 
 520 static int
 521 anonmlp(int fd)
 522 {
 523         return (setopt(fd, SOL_SOCKET, SO_ANON_MLP, 1));
 524 }
 525 
 526 void
 527 nfslib_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
 528 {
 529         int error;
 530 
 531         /*
 532          * Save the error code across syslog(), just in case syslog()
 533          * gets its own error and, therefore, overwrites errno.
 534          */
 535         error = errno;
 536         if (t_errno == TSYSERR) {
 537                 syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
 538                     tli_name, fd, nconf->nc_proto);
 539         } else {
 540                 syslog(LOG_ERR,
 541                     "%s(file descriptor %d/transport %s) TLI error %d",
 542                     tli_name, fd, nconf->nc_proto, t_errno);
 543         }
 544         errno = error;
 545 }
 546 
 547 /*
 548  * Called to set up service over a particular transport.
 549  */
 550 void
 551 do_one(char *provider, NETSELDECL(proto), struct protob *protobp0,
 552         int (*svc)(int, struct netbuf, struct netconfig *))
 553 {
 554         register int sock;
 555         struct protob *protobp;
 556         struct netbuf *retaddr;
 557         struct netconfig *retnconf;
 558         struct netbuf addrmask;
 559         int vers;
 560         int err;
 561         int l;
 562 
 563         if (provider)
 564                 sock = bind_to_provider(provider, protobp0->serv, &retaddr,
 565                     &retnconf);
 566         else
 567                 sock = bind_to_proto(proto, protobp0->serv, &retaddr,
 568                     &retnconf);
 569 
 570         if (sock == -1) {
 571                 (void) syslog(LOG_ERR,
 572         "Cannot establish %s service over %s: transport setup problem.",
 573                     protobp0->serv, provider ? provider : proto);
 574                 return;
 575         }
 576 
 577         if (set_addrmask(sock, retnconf, &addrmask) < 0) {
 578                 (void) syslog(LOG_ERR,
 579                     "Cannot set address mask for %s", retnconf->nc_netid);
 580                 return;
 581         }
 582 
 583         /*
 584          * Register all versions of the programs in the protocol block list.
 585          */
 586         l = strlen(NC_UDP);
 587         for (protobp = protobp0; protobp; protobp = protobp->next) {
 588                 for (vers = protobp->versmin; vers <= protobp->versmax;
 589                     vers++) {
 590                         if ((protobp->program == NFS_PROGRAM ||
 591                             protobp->program == NFS_ACL_PROGRAM) &&
 592                             vers == NFS_V4 &&
 593                             strncasecmp(retnconf->nc_proto, NC_UDP, l) == 0)
 594                                 continue;
 595 
 596                         (void) rpcb_unset(protobp->program, vers, retnconf);
 597                         (void) rpcb_set(protobp->program, vers, retnconf,
 598                             retaddr);
 599                 }
 600         }
 601 
 602         /*
 603          * Register services with CLTS semantics right now.
 604          * Note: services with COTS/COTS_ORD semantics will be
 605          * registered later from cots_listen_event function.
 606          */
 607         if (retnconf->nc_semantics == NC_TPI_CLTS) {
 608                 /* Don't drop core if supporting module(s) aren't loaded. */
 609                 (void) signal(SIGSYS, SIG_IGN);
 610 
 611                 /*
 612                  * svc() doesn't block, it returns success or failure.
 613                  */
 614 
 615                 if (svc == NULL && Mysvc4 != NULL)
 616                         err = (*Mysvc4)(sock, &addrmask, retnconf,
 617                             NFS4_SETPORT|NFS4_KRPC_START, retaddr);
 618                 else
 619                         err = (*svc)(sock, addrmask, retnconf);
 620 
 621                 if (err < 0) {
 622                         (void) syslog(LOG_ERR,
 623                             "Cannot establish %s service over <file desc."
 624                             " %d, protocol %s> : %m. Exiting",
 625                             protobp0->serv, sock, retnconf->nc_proto);
 626                         exit(1);
 627                 }
 628         }
 629         free(addrmask.buf);
 630 
 631         /*
 632          * We successfully set up the server over this transport.
 633          * Add this descriptor to the one being polled on.
 634          */
 635         add_to_poll_list(sock, retnconf);
 636 }
 637 
 638 /*
 639  * Set up the NFS service over all the available transports.
 640  * Returns -1 for failure, 0 for success.
 641  */
 642 int
 643 do_all(struct protob *protobp,
 644         int (*svc)(int, struct netbuf, struct netconfig *))
 645 {
 646         struct netconfig *nconf;
 647         NCONF_HANDLE *nc;
 648         int l;
 649 
 650         if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
 651                 syslog(LOG_ERR, "setnetconfig failed: %m");
 652                 return (-1);
 653         }
 654         l = strlen(NC_UDP);
 655         while (nconf = getnetconfig(nc)) {
 656                 if ((nconf->nc_flag & NC_VISIBLE) &&
 657                     strcmp(nconf->nc_protofmly, NC_LOOPBACK) != 0 &&
 658                     OK_TPI_TYPE(nconf) &&
 659                     (protobp->program != NFS4_CALLBACK ||
 660                     strncasecmp(nconf->nc_proto, NC_UDP, l) != 0))
 661                         do_one(nconf->nc_device, nconf->nc_proto,
 662                             protobp, svc);
 663         }
 664         (void) endnetconfig(nc);
 665         return (0);
 666 }
 667 
 668 /*
 669  * poll on the open transport descriptors for events and errors.
 670  */
 671 void
 672 poll_for_action(void)
 673 {
 674         int nfds;
 675         int i;
 676 
 677         /*
 678          * Keep polling until all transports have been closed. When this
 679          * happens, we return.
 680          */
 681         while ((int)num_fds > 0) {
 682                 nfds = poll(poll_array, num_fds, INFTIM);
 683                 switch (nfds) {
 684                 case 0:
 685                         continue;
 686 
 687                 case -1:
 688                         /*
 689                          * Some errors from poll could be
 690                          * due to temporary conditions, and we try to
 691                          * be robust in the face of them. Other
 692                          * errors (should never happen in theory)
 693                          * are fatal (eg. EINVAL, EFAULT).
 694                          */
 695                         switch (errno) {
 696                         case EINTR:
 697                                 continue;
 698 
 699                         case EAGAIN:
 700                         case ENOMEM:
 701                                 (void) sleep(10);
 702                                 continue;
 703 
 704                         default:
 705                                 (void) syslog(LOG_ERR,
 706                                     "poll failed: %m. Exiting");
 707                                 exit(1);
 708                         }
 709                 default:
 710                         break;
 711                 }
 712 
 713                 /*
 714                  * Go through the poll list looking for events.
 715                  */
 716                 for (i = 0; i < num_fds && nfds > 0; i++) {
 717                         if (poll_array[i].revents) {
 718                                 nfds--;
 719                                 /*
 720                                  * We have a message, so try to read it.
 721                                  * Record the error return in errno,
 722                                  * so that syslog(LOG_ERR, "...%m")
 723                                  * dumps the corresponding error string.
 724                                  */
 725                                 if (conn_polled[i].nc.nc_semantics ==
 726                                     NC_TPI_CLTS) {
 727                                         errno = do_poll_clts_action(
 728                                             poll_array[i].fd, i);
 729                                 } else {
 730                                         errno = do_poll_cots_action(
 731                                             poll_array[i].fd, i);
 732                                 }
 733 
 734                                 if (errno == 0)
 735                                         continue;
 736                                 /*
 737                                  * Most returned error codes mean that there is
 738                                  * fatal condition which we can only deal with
 739                                  * by closing the transport.
 740                                  */
 741                                 if (errno != EAGAIN && errno != ENOMEM) {
 742                                         (void) syslog(LOG_ERR,
 743                 "Error (%m) reading descriptor %d/transport %s. Closing it.",
 744                                             poll_array[i].fd,
 745                                             conn_polled[i].nc.nc_proto);
 746                                         (void) t_close(poll_array[i].fd);
 747                                         remove_from_poll_list(poll_array[i].fd);
 748 
 749                                 } else if (errno == ENOMEM)
 750                                         (void) sleep(5);
 751                         }
 752                 }
 753         }
 754 
 755         (void) syslog(LOG_ERR,
 756             "All transports have been closed with errors. Exiting.");
 757 }
 758 
 759 /*
 760  * Allocate poll/transport array entries for this descriptor.
 761  */
 762 static void
 763 add_to_poll_list(int fd, struct netconfig *nconf)
 764 {
 765         static int poll_array_size = 0;
 766 
 767         /*
 768          * If the arrays are full, allocate new ones.
 769          */
 770         if (num_fds == poll_array_size) {
 771                 struct pollfd *tpa;
 772                 struct conn_entry *tnp;
 773 
 774                 if (poll_array_size != 0) {
 775                         tpa = poll_array;
 776                         tnp = conn_polled;
 777                 } else
 778                         tpa = (struct pollfd *)0;
 779 
 780                 poll_array_size += POLL_ARRAY_INC_SIZE;
 781                 /*
 782                  * Allocate new arrays.
 783                  */
 784                 poll_array = (struct pollfd *)
 785                     malloc(poll_array_size * sizeof (struct pollfd) + 256);
 786                 conn_polled = (struct conn_entry *)
 787                     malloc(poll_array_size * sizeof (struct conn_entry) + 256);
 788                 if (poll_array == (struct pollfd *)NULL ||
 789                     conn_polled == (struct conn_entry *)NULL) {
 790                         syslog(LOG_ERR, "malloc failed for poll array");
 791                         exit(1);
 792                 }
 793 
 794                 /*
 795                  * Copy the data of the old ones into new arrays, and
 796                  * free the old ones.
 797                  */
 798                 if (tpa) {
 799                         (void) memcpy((void *)poll_array, (void *)tpa,
 800                             num_fds * sizeof (struct pollfd));
 801                         (void) memcpy((void *)conn_polled, (void *)tnp,
 802                             num_fds * sizeof (struct conn_entry));
 803                         free((void *)tpa);
 804                         free((void *)tnp);
 805                 }
 806         }
 807 
 808         /*
 809          * Set the descriptor and event list. All possible events are
 810          * polled for.
 811          */
 812         poll_array[num_fds].fd = fd;
 813         poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
 814 
 815         /*
 816          * Copy the transport data over too.
 817          */
 818         conn_polled[num_fds].nc = *nconf;
 819         conn_polled[num_fds].closing = 0;
 820 
 821         /*
 822          * Set the descriptor to non-blocking. Avoids a race
 823          * between data arriving on the stream and then having it
 824          * flushed before we can read it.
 825          */
 826         if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
 827                 (void) syslog(LOG_ERR,
 828         "fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
 829                     num_fds, nconf->nc_proto);
 830                 exit(1);
 831         }
 832 
 833         /*
 834          * Count this descriptor.
 835          */
 836         ++num_fds;
 837 }
 838 
 839 static void
 840 remove_from_poll_list(int fd)
 841 {
 842         int i;
 843         int num_to_copy;
 844 
 845         for (i = 0; i < num_fds; i++) {
 846                 if (poll_array[i].fd == fd) {
 847                         --num_fds;
 848                         num_to_copy = num_fds - i;
 849                         (void) memcpy((void *)&poll_array[i],
 850                             (void *)&poll_array[i+1],
 851                             num_to_copy * sizeof (struct pollfd));
 852                         (void) memset((void *)&poll_array[num_fds], 0,
 853                             sizeof (struct pollfd));
 854                         (void) memcpy((void *)&conn_polled[i],
 855                             (void *)&conn_polled[i+1],
 856                             num_to_copy * sizeof (struct conn_entry));
 857                         (void) memset((void *)&conn_polled[num_fds], 0,
 858                             sizeof (struct conn_entry));
 859                         return;
 860                 }
 861         }
 862         syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
 863 
 864 }
 865 
 866 /*
 867  * Called to read and interpret the event on a connectionless descriptor.
 868  * Returns 0 if successful, or a UNIX error code if failure.
 869  */
 870 static int
 871 do_poll_clts_action(int fd, int conn_index)
 872 {
 873         int error;
 874         int ret;
 875         int flags;
 876         struct netconfig *nconf = &conn_polled[conn_index].nc;
 877         static struct t_unitdata *unitdata = NULL;
 878         static struct t_uderr *uderr = NULL;
 879         static int oldfd = -1;
 880         struct nd_hostservlist *host = NULL;
 881         struct strbuf ctl[1], data[1];
 882         /*
 883          * We just need to have some space to consume the
 884          * message in the event we can't use the TLI interface to do the
 885          * job.
 886          *
 887          * We flush the message using getmsg(). For the control part
 888          * we allocate enough for any TPI header plus 32 bytes for address
 889          * and options. For the data part, there is nothing magic about
 890          * the size of the array, but 256 bytes is probably better than
 891          * 1 byte, and we don't expect any data portion anyway.
 892          *
 893          * If the array sizes are too small, we handle this because getmsg()
 894          * (called to consume the message) will return MOREDATA|MORECTL.
 895          * Thus we just call getmsg() until it's read the message.
 896          */
 897         char ctlbuf[sizeof (union T_primitives) + 32];
 898         char databuf[256];
 899 
 900         /*
 901          * If this is the same descriptor as the last time
 902          * do_poll_clts_action was called, we can save some
 903          * de-allocation and allocation.
 904          */
 905         if (oldfd != fd) {
 906                 oldfd = fd;
 907 
 908                 if (unitdata) {
 909                         (void) t_free((char *)unitdata, T_UNITDATA);
 910                         unitdata = NULL;
 911                 }
 912                 if (uderr) {
 913                         (void) t_free((char *)uderr, T_UDERROR);
 914                         uderr = NULL;
 915                 }
 916         }
 917 
 918         /*
 919          * Allocate a unitdata structure for receiving the event.
 920          */
 921         if (unitdata == NULL) {
 922                 /* LINTED pointer alignment */
 923                 unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
 924                 if (unitdata == NULL) {
 925                         if (t_errno == TSYSERR) {
 926                                 /*
 927                                  * Save the error code across
 928                                  * syslog(), just in case
 929                                  * syslog() gets its own error
 930                                  * and therefore overwrites errno.
 931                                  */
 932                                 error = errno;
 933                                 (void) syslog(LOG_ERR,
 934         "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
 935                                     fd, nconf->nc_proto);
 936                                 return (error);
 937                         }
 938                         (void) syslog(LOG_ERR,
 939 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
 940                             fd, nconf->nc_proto, t_errno);
 941                         goto flush_it;
 942                 }
 943         }
 944 
 945 try_again:
 946         flags = 0;
 947 
 948         /*
 949          * The idea is we wait for T_UNITDATA_IND's. Of course,
 950          * we don't get any, because rpcmod filters them out.
 951          * However, we need to call t_rcvudata() to let TLI
 952          * tell us we have a T_UDERROR_IND.
 953          *
 954          * algorithm is:
 955          *      t_rcvudata(), expecting TLOOK.
 956          *      t_look(), expecting T_UDERR.
 957          *      t_rcvuderr(), expecting success (0).
 958          *      expand destination address into ASCII,
 959          *      and dump it.
 960          */
 961 
 962         ret = t_rcvudata(fd, unitdata, &flags);
 963         if (ret == 0 || t_errno == TBUFOVFLW) {
 964                 (void) syslog(LOG_WARNING,
 965 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
 966                     fd, nconf->nc_proto, unitdata->udata.len);
 967 
 968                 /*
 969                  * Even though we don't expect any data, in case we do,
 970                  * keep reading until there is no more.
 971                  */
 972                 if (flags & T_MORE)
 973                         goto try_again;
 974 
 975                 return (0);
 976         }
 977 
 978         switch (t_errno) {
 979         case TNODATA:
 980                 return (0);
 981         case TSYSERR:
 982                 /*
 983                  * System errors are returned to caller.
 984                  * Save the error code across
 985                  * syslog(), just in case
 986                  * syslog() gets its own error
 987                  * and therefore overwrites errno.
 988                  */
 989                 error = errno;
 990                 (void) syslog(LOG_ERR,
 991                     "t_rcvudata(file descriptor %d/transport %s) %m",
 992                     fd, nconf->nc_proto);
 993                 return (error);
 994         case TLOOK:
 995                 break;
 996         default:
 997                 (void) syslog(LOG_ERR,
 998                 "t_rcvudata(file descriptor %d/transport %s) TLI error %d",
 999                     fd, nconf->nc_proto, t_errno);
1000                 goto flush_it;
1001         }
1002 
1003         ret = t_look(fd);
1004         switch (ret) {
1005         case 0:
1006                 return (0);
1007         case -1:
1008                 /*
1009                  * System errors are returned to caller.
1010                  */
1011                 if (t_errno == TSYSERR) {
1012                         /*
1013                          * Save the error code across
1014                          * syslog(), just in case
1015                          * syslog() gets its own error
1016                          * and therefore overwrites errno.
1017                          */
1018                         error = errno;
1019                         (void) syslog(LOG_ERR,
1020                             "t_look(file descriptor %d/transport %s) %m",
1021                             fd, nconf->nc_proto);
1022                         return (error);
1023                 }
1024                 (void) syslog(LOG_ERR,
1025                     "t_look(file descriptor %d/transport %s) TLI error %d",
1026                     fd, nconf->nc_proto, t_errno);
1027                 goto flush_it;
1028         case T_UDERR:
1029                 break;
1030         default:
1031                 (void) syslog(LOG_WARNING,
1032         "t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
1033                     fd, nconf->nc_proto, ret, T_UDERR);
1034         }
1035 
1036         if (uderr == NULL) {
1037                 /* LINTED pointer alignment */
1038                 uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
1039                 if (uderr == NULL) {
1040                         if (t_errno == TSYSERR) {
1041                                 /*
1042                                  * Save the error code across
1043                                  * syslog(), just in case
1044                                  * syslog() gets its own error
1045                                  * and therefore overwrites errno.
1046                                  */
1047                                 error = errno;
1048                                 (void) syslog(LOG_ERR,
1049         "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
1050                                     fd, nconf->nc_proto);
1051                                 return (error);
1052                         }
1053                         (void) syslog(LOG_ERR,
1054 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1055                             fd, nconf->nc_proto, t_errno);
1056                         goto flush_it;
1057                 }
1058         }
1059 
1060         ret = t_rcvuderr(fd, uderr);
1061         if (ret == 0) {
1062 
1063                 /*
1064                  * Save the datagram error in errno, so that the
1065                  * %m argument to syslog picks up the error string.
1066                  */
1067                 errno = uderr->error;
1068 
1069                 /*
1070                  * Log the datagram error, then log the host that
1071                  * probably triggerred. Cannot log both in the
1072                  * same transaction because of packet size limitations
1073                  * in /dev/log.
1074                  */
1075                 (void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1076 "NFS response over <file descriptor %d/transport %s> generated error: %m",
1077                     fd, nconf->nc_proto);
1078 
1079                 /*
1080                  * Try to map the client's address back to a
1081                  * name.
1082                  */
1083                 ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1084                 if (ret != -1 && host && host->h_cnt > 0 &&
1085                     host->h_hostservs) {
1086                 (void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1087 "Bad NFS response was sent to client with host name: %s; service port: %s",
1088                     host->h_hostservs->h_host,
1089                     host->h_hostservs->h_serv);
1090                 } else {
1091                         int i, j;
1092                         char *buf;
1093                         char *hex = "0123456789abcdef";
1094 
1095                         /*
1096                          * Mapping failed, print the whole thing
1097                          * in ASCII hex.
1098                          */
1099                         buf = (char *)malloc(uderr->addr.len * 2 + 1);
1100                         for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1101                                 buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1102                                 buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1103                         }
1104                         buf[j] = '\0';
1105                 (void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1106         "Bad NFS response was sent to client with transport address: 0x%s",
1107                     buf);
1108                         free((void *)buf);
1109                 }
1110 
1111                 if (ret == 0 && host != NULL)
1112                         netdir_free((void *)host, ND_HOSTSERVLIST);
1113                 return (0);
1114         }
1115 
1116         switch (t_errno) {
1117         case TNOUDERR:
1118                 goto flush_it;
1119         case TSYSERR:
1120                 /*
1121                  * System errors are returned to caller.
1122                  * Save the error code across
1123                  * syslog(), just in case
1124                  * syslog() gets its own error
1125                  * and therefore overwrites errno.
1126                  */
1127                 error = errno;
1128                 (void) syslog(LOG_ERR,
1129                     "t_rcvuderr(file descriptor %d/transport %s) %m",
1130                     fd, nconf->nc_proto);
1131                 return (error);
1132         default:
1133                 (void) syslog(LOG_ERR,
1134                 "t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1135                     fd, nconf->nc_proto, t_errno);
1136                 goto flush_it;
1137         }
1138 
1139 flush_it:
1140         /*
1141          * If we get here, then we could not cope with whatever message
1142          * we attempted to read, so flush it. If we did read a message,
1143          * and one isn't present, that is all right, because fd is in
1144          * nonblocking mode.
1145          */
1146         (void) syslog(LOG_ERR,
1147         "Flushing one input message from <file descriptor %d/transport %s>",
1148             fd, nconf->nc_proto);
1149 
1150         /*
1151          * Read and discard the message. Do this this until there is
1152          * no more control/data in the message or until we get an error.
1153          */
1154         do {
1155                 ctl->maxlen = sizeof (ctlbuf);
1156                 ctl->buf = ctlbuf;
1157                 data->maxlen = sizeof (databuf);
1158                 data->buf = databuf;
1159                 flags = 0;
1160                 ret = getmsg(fd, ctl, data, &flags);
1161                 if (ret == -1)
1162                         return (errno);
1163         } while (ret != 0);
1164 
1165         return (0);
1166 }
1167 
1168 static void
1169 conn_close_oldest(void)
1170 {
1171         int fd;
1172         int i1;
1173 
1174         /*
1175          * Find the oldest connection that is not already in the
1176          * process of shutting down.
1177          */
1178         for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
1179                 if (i1 >= num_fds)
1180                         return;
1181                 if (conn_polled[i1].closing == 0)
1182                         break;
1183         }
1184 #ifdef DEBUG
1185         printf("too many connections (%d), releasing oldest (%d)\n",
1186             num_conns, poll_array[i1].fd);
1187 #else
1188         syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
1189             num_conns, poll_array[i1].fd);
1190 #endif
1191         fd = poll_array[i1].fd;
1192         if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
1193                 /*
1194                  * For politeness, send a T_DISCON_REQ to the transport
1195                  * provider.  We close the stream anyway.
1196                  */
1197                 (void) t_snddis(fd, (struct t_call *)0);
1198                 num_conns--;
1199                 remove_from_poll_list(fd);
1200                 (void) t_close(fd);
1201         } else {
1202                 /*
1203                  * For orderly release, we do not close the stream
1204                  * until the T_ORDREL_IND arrives to complete
1205                  * the handshake.
1206                  */
1207                 if (t_sndrel(fd) == 0)
1208                         conn_polled[i1].closing = 1;
1209         }
1210 }
1211 
1212 static boolean_t
1213 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1214 {
1215         struct conn_ind *conn;
1216         struct conn_ind *next_conn;
1217 
1218         conn = (struct conn_ind *)malloc(sizeof (*conn));
1219         if (conn == NULL) {
1220                 syslog(LOG_ERR, "malloc for listen indication failed");
1221                 return (FALSE);
1222         }
1223 
1224         /* LINTED pointer alignment */
1225         conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
1226         if (conn->conn_call == NULL) {
1227                 free((char *)conn);
1228                 nfslib_log_tli_error("t_alloc", fd, nconf);
1229                 return (FALSE);
1230         }
1231 
1232         if (t_listen(fd, conn->conn_call) == -1) {
1233                 nfslib_log_tli_error("t_listen", fd, nconf);
1234                 (void) t_free((char *)conn->conn_call, T_CALL);
1235                 free((char *)conn);
1236                 return (FALSE);
1237         }
1238 
1239         if (conn->conn_call->udata.len > 0) {
1240                 syslog(LOG_WARNING,
1241         "rejecting inbound connection(%s) with %d bytes of connect data",
1242                     nconf->nc_proto, conn->conn_call->udata.len);
1243 
1244                 conn->conn_call->udata.len = 0;
1245                 (void) t_snddis(fd, conn->conn_call);
1246                 (void) t_free((char *)conn->conn_call, T_CALL);
1247                 free((char *)conn);
1248                 return (FALSE);
1249         }
1250 
1251         if ((next_conn = *connp) != NULL) {
1252                 next_conn->conn_prev->conn_next = conn;
1253                 conn->conn_next = next_conn;
1254                 conn->conn_prev = next_conn->conn_prev;
1255                 next_conn->conn_prev = conn;
1256         } else {
1257                 conn->conn_next = conn;
1258                 conn->conn_prev = conn;
1259                 *connp = conn;
1260         }
1261         return (TRUE);
1262 }
1263 
1264 static int
1265 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1266 {
1267         struct conn_ind *conn;
1268         struct t_discon discon;
1269 
1270         discon.udata.buf = (char *)0;
1271         discon.udata.maxlen = 0;
1272         if (t_rcvdis(fd, &discon) == -1) {
1273                 nfslib_log_tli_error("t_rcvdis", fd, nconf);
1274                 return (-1);
1275         }
1276 
1277         conn = *connp;
1278         if (conn == NULL)
1279                 return (0);
1280 
1281         do {
1282                 if (conn->conn_call->sequence == discon.sequence) {
1283                         if (conn->conn_next == conn)
1284                                 *connp = (struct conn_ind *)0;
1285                         else {
1286                                 if (conn == *connp) {
1287                                         *connp = conn->conn_next;
1288                                 }
1289                                 conn->conn_next->conn_prev = conn->conn_prev;
1290                                 conn->conn_prev->conn_next = conn->conn_next;
1291                         }
1292                         free((char *)conn);
1293                         break;
1294                 }
1295                 conn = conn->conn_next;
1296         } while (conn != *connp);
1297 
1298         return (0);
1299 }
1300 
1301 static void
1302 cots_listen_event(int fd, int conn_index)
1303 {
1304         struct t_call *call;
1305         struct conn_ind *conn;
1306         struct conn_ind *conn_head;
1307         int event;
1308         struct netconfig *nconf = &conn_polled[conn_index].nc;
1309         int new_fd;
1310         struct netbuf addrmask;
1311         int ret = 0;
1312         char *clnt;
1313         char *clnt_uaddr = NULL;
1314         struct nd_hostservlist *clnt_serv = NULL;
1315 
1316         conn_head = (struct conn_ind *)0;
1317         (void) conn_get(fd, nconf, &conn_head);
1318 
1319         while ((conn = conn_head) != NULL) {
1320                 conn_head = conn->conn_next;
1321                 if (conn_head == conn)
1322                         conn_head = (struct conn_ind *)0;
1323                 else {
1324                         conn_head->conn_prev = conn->conn_prev;
1325                         conn->conn_prev->conn_next = conn_head;
1326                 }
1327                 call = conn->conn_call;
1328                 free((char *)conn);
1329 
1330                 /*
1331                  * If we have already accepted the maximum number of
1332                  * connections allowed on the command line, then drop
1333                  * the oldest connection (for any protocol) before
1334                  * accepting the new connection.  Unless explicitly
1335                  * set on the command line, max_conns_allowed is -1.
1336                  */
1337                 if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1338                         conn_close_oldest();
1339 
1340                 /*
1341                  * Create a new transport endpoint for the same proto as
1342                  * the listener.
1343                  */
1344                 new_fd = nfslib_transport_open(nconf);
1345                 if (new_fd == -1) {
1346                         call->udata.len = 0;
1347                         (void) t_snddis(fd, call);
1348                         (void) t_free((char *)call, T_CALL);
1349                         syslog(LOG_ERR, "Cannot establish transport over %s",
1350                             nconf->nc_device);
1351                         continue;
1352                 }
1353 
1354                 /* Bind to a generic address/port for the accepting stream. */
1355                 if (t_bind(new_fd, (struct t_bind *)NULL,
1356                     (struct t_bind *)NULL) == -1) {
1357                         nfslib_log_tli_error("t_bind", new_fd, nconf);
1358                         call->udata.len = 0;
1359                         (void) t_snddis(fd, call);
1360                         (void) t_free((char *)call, T_CALL);
1361                         (void) t_close(new_fd);
1362                         continue;
1363                 }
1364 
1365                 while (t_accept(fd, new_fd, call) == -1) {
1366                         if (t_errno != TLOOK) {
1367 #ifdef DEBUG
1368                                 nfslib_log_tli_error("t_accept", fd, nconf);
1369 #endif
1370                                 call->udata.len = 0;
1371                                 (void) t_snddis(fd, call);
1372                                 (void) t_free((char *)call, T_CALL);
1373                                 (void) t_close(new_fd);
1374                                 goto do_next_conn;
1375                         }
1376                         while (event = t_look(fd)) {
1377                                 switch (event) {
1378                                 case T_LISTEN:
1379 #ifdef DEBUG
1380                                         printf(
1381 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1382 #endif
1383                                         (void) conn_get(fd, nconf, &conn_head);
1384                                         continue;
1385                                 case T_DISCONNECT:
1386 #ifdef DEBUG
1387                                         printf(
1388         "cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1389                                             nconf->nc_proto);
1390 #endif
1391                                         (void) discon_get(fd, nconf,
1392                                             &conn_head);
1393                                         continue;
1394                                 default:
1395                                         syslog(LOG_ERR,
1396                         "unexpected event 0x%x during accept processing (%s)",
1397                                             event, nconf->nc_proto);
1398                                         call->udata.len = 0;
1399                                         (void) t_snddis(fd, call);
1400                                         (void) t_free((char *)call, T_CALL);
1401                                         (void) t_close(new_fd);
1402                                         goto do_next_conn;
1403                                 }
1404                         }
1405                 }
1406 
1407                 if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1408                         (void) syslog(LOG_ERR,
1409                             "Cannot set address mask for %s",
1410                             nconf->nc_netid);
1411                         return;
1412                 }
1413 
1414                 /* Tell KRPC about the new stream. */
1415                 if (Mysvc4 != NULL)
1416                         ret = (*Mysvc4)(new_fd, &addrmask, nconf,
1417                             NFS4_KRPC_START, &call->addr);
1418                 else
1419                         ret = (*Mysvc)(new_fd, addrmask, nconf);
1420 
1421                 if (ret < 0) {
1422                         if (errno != ENOTCONN) {
1423                                 syslog(LOG_ERR,
1424                                     "unable to register new connection: %m");
1425                         } else {
1426                                 /*
1427                                  * This is the only error that could be
1428                                  * caused by the client, so who was it?
1429                                  */
1430                                 if (netdir_getbyaddr(nconf, &clnt_serv,
1431                                     &(call->addr)) == ND_OK &&
1432                                     clnt_serv->h_cnt > 0)
1433                                         clnt = clnt_serv->h_hostservs->h_host;
1434                                 else
1435                                         clnt = clnt_uaddr = taddr2uaddr(nconf,
1436                                             &(call->addr));
1437                                 /*
1438                                  * If we don't know who the client was,
1439                                  * remain silent.
1440                                  */
1441                                 if (clnt)
1442                                         syslog(LOG_ERR,
1443 "unable to register new connection: client %s has dropped connection", clnt);
1444                                 if (clnt_serv)
1445                                         netdir_free(clnt_serv, ND_HOSTSERVLIST);
1446                                 if (clnt_uaddr)
1447                                         free(clnt_uaddr);
1448                         }
1449                         free(addrmask.buf);
1450                         (void) t_snddis(new_fd, (struct t_call *)0);
1451                         (void) t_free((char *)call, T_CALL);
1452                         (void) t_close(new_fd);
1453                         goto do_next_conn;
1454                 }
1455 
1456                 free(addrmask.buf);
1457                 (void) t_free((char *)call, T_CALL);
1458 
1459                 /*
1460                  * Poll on the new descriptor so that we get disconnect
1461                  * and orderly release indications.
1462                  */
1463                 num_conns++;
1464                 add_to_poll_list(new_fd, nconf);
1465 
1466                 /* Reset nconf in case it has been moved. */
1467                 nconf = &conn_polled[conn_index].nc;
1468 do_next_conn:;
1469         }
1470 }
1471 
1472 static int
1473 do_poll_cots_action(int fd, int conn_index)
1474 {
1475         char buf[256];
1476         int event;
1477         int i1;
1478         int flags;
1479         struct conn_entry *connent = &conn_polled[conn_index];
1480         struct netconfig *nconf = &(connent->nc);
1481         const char *errorstr;
1482 
1483         while (event = t_look(fd)) {
1484                 switch (event) {
1485                 case T_LISTEN:
1486 #ifdef DEBUG
1487 printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf->nc_proto, fd);
1488 #endif
1489                         cots_listen_event(fd, conn_index);
1490                         break;
1491 
1492                 case T_DATA:
1493 #ifdef DEBUG
1494 printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd, nconf->nc_proto);
1495 #endif
1496                         /*
1497                          * Receive a private notification from CONS rpcmod.
1498                          */
1499                         i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1500                         if (i1 == -1) {
1501                                 syslog(LOG_ERR, "t_rcv failed");
1502                                 break;
1503                         }
1504                         if (i1 < sizeof (int))
1505                                 break;
1506                         i1 = BE32_TO_U32(buf);
1507                         if (i1 == 1 || i1 == 2) {
1508                                 /*
1509                                  * This connection has been idle for too long,
1510                                  * so release it as politely as we can.  If we
1511                                  * have already initiated an orderly release
1512                                  * and we get notified that the stream is
1513                                  * still idle, pull the plug.  This prevents
1514                                  * hung connections from continuing to consume
1515                                  * resources.
1516                                  */
1517 #ifdef DEBUG
1518 printf("do_poll_cots_action(%s,%d): ", nconf->nc_proto, fd);
1519 printf("initiating orderly release of idle connection\n");
1520 #endif
1521                                 if (nconf->nc_semantics == NC_TPI_COTS ||
1522                                     connent->closing != 0) {
1523                                         (void) t_snddis(fd, (struct t_call *)0);
1524                                         goto fdclose;
1525                                 }
1526                                 /*
1527                                  * For NC_TPI_COTS_ORD, the stream is closed
1528                                  * and removed from the poll list when the
1529                                  * T_ORDREL is received from the provider.  We
1530                                  * don't wait for it here because it may take
1531                                  * a while for the transport to shut down.
1532                                  */
1533                                 if (t_sndrel(fd) == -1) {
1534                                         syslog(LOG_ERR,
1535                                         "unable to send orderly release %m");
1536                                 }
1537                                 connent->closing = 1;
1538                         } else
1539                                 syslog(LOG_ERR,
1540                                 "unexpected event from CONS rpcmod %d", i1);
1541                         break;
1542 
1543                 case T_ORDREL:
1544 #ifdef DEBUG
1545 printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf->nc_proto, fd);
1546 #endif
1547                         /* Perform an orderly release. */
1548                         if (t_rcvrel(fd) == 0) {
1549                                 /* T_ORDREL on listen fd's should be ignored */
1550                                 if (!is_listen_fd_index(conn_index)) {
1551                                         (void) t_sndrel(fd);
1552                                         goto fdclose;
1553                                 }
1554                                 break;
1555 
1556                         } else if (t_errno == TLOOK) {
1557                                 break;
1558                         } else {
1559                                 nfslib_log_tli_error("t_rcvrel", fd, nconf);
1560 
1561                                 /*
1562                                  * check to make sure we do not close
1563                                  * listen fd
1564                                  */
1565                                 if (is_listen_fd_index(conn_index))
1566                                         break;
1567                                 else
1568                                         goto fdclose;
1569                         }
1570 
1571                 case T_DISCONNECT:
1572 #ifdef DEBUG
1573 printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf->nc_proto, fd);
1574 #endif
1575                         if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1576                                 nfslib_log_tli_error("t_rcvdis", fd, nconf);
1577 
1578                         /*
1579                          * T_DISCONNECT on listen fd's should be ignored.
1580                          */
1581                         if (is_listen_fd_index(conn_index))
1582                                 break;
1583                         else
1584                                 goto fdclose;
1585 
1586                 case T_ERROR:
1587                 default:
1588                         if (event == T_ERROR || t_errno == TSYSERR) {
1589                                 if ((errorstr = strerror(errno)) == NULL) {
1590                                         (void) sprintf(buf,
1591                                             "Unknown error num %d", errno);
1592                                         errorstr = (const char *) buf;
1593                                 }
1594                         } else if (event == -1)
1595                                 errorstr = t_strerror(t_errno);
1596                         else
1597                                 errorstr = "";
1598                         syslog(LOG_ERR,
1599                             "unexpected TLI event (0x%x) on "
1600                             "connection-oriented transport(%s,%d):%s",
1601                             event, nconf->nc_proto, fd, errorstr);
1602 fdclose:
1603                         num_conns--;
1604                         remove_from_poll_list(fd);
1605                         (void) t_close(fd);
1606                         return (0);
1607                 }
1608         }
1609 
1610         return (0);
1611 }
1612 
1613 static char *
1614 serv_name_to_port_name(char *name)
1615 {
1616         /*
1617          * Map service names (used primarily in logging) to
1618          * RPC port names (used by netdir_*() routines).
1619          */
1620         if (strcmp(name, "NFS") == 0) {
1621                 return ("nfs");
1622         } else if (strcmp(name, "NLM") == 0) {
1623                 return ("lockd");
1624         } else if (strcmp(name, "NFS4_CALLBACK") == 0) {
1625                 return ("nfs4_callback");
1626         }
1627 
1628         return ("unrecognized");
1629 }
1630 
1631 static int
1632 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1633                 struct netconfig **retnconf)
1634 {
1635         struct netconfig *nconf;
1636         NCONF_HANDLE *nc;
1637         struct nd_hostserv hs;
1638 
1639         hs.h_host = HOST_SELF;
1640         hs.h_serv = serv_name_to_port_name(serv);
1641 
1642         if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1643                 syslog(LOG_ERR, "setnetconfig failed: %m");
1644                 return (-1);
1645         }
1646         while (nconf = getnetconfig(nc)) {
1647                 if (OK_TPI_TYPE(nconf) &&
1648                     strcmp(nconf->nc_device, provider) == 0) {
1649                         *retnconf = nconf;
1650                         return (nfslib_bindit(nconf, addr, &hs,
1651                             listen_backlog));
1652                 }
1653         }
1654         (void) endnetconfig(nc);
1655 
1656         syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1657             provider);
1658         return (-1);
1659 }
1660 
1661 static int
1662 bind_to_proto(NETSELDECL(proto), char *serv, struct netbuf **addr,
1663                 struct netconfig **retnconf)
1664 {
1665         struct netconfig *nconf;
1666         NCONF_HANDLE *nc = NULL;
1667         struct nd_hostserv hs;
1668 
1669         hs.h_host = HOST_SELF;
1670         hs.h_serv = serv_name_to_port_name(serv);
1671 
1672         if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1673                 syslog(LOG_ERR, "setnetconfig failed: %m");
1674                 return (-1);
1675         }
1676         while (nconf = getnetconfig(nc)) {
1677                 if (OK_TPI_TYPE(nconf) && NETSELEQ(nconf->nc_proto, proto)) {
1678                         *retnconf = nconf;
1679                         return (nfslib_bindit(nconf, addr, &hs,
1680                             listen_backlog));
1681                 }
1682         }
1683         (void) endnetconfig(nc);
1684 
1685         syslog(LOG_ERR, "couldn't find netconfig entry for protocol %s",
1686             proto);
1687         return (-1);
1688 }
1689 
1690 #include <netinet/in.h>
1691 
1692 /*
1693  * Create an address mask appropriate for the transport.
1694  * The mask is used to obtain the host-specific part of
1695  * a network address when comparing addresses.
1696  * For an internet address the host-specific part is just
1697  * the 32 bit IP address and this part of the mask is set
1698  * to all-ones. The port number part of the mask is zeroes.
1699  */
1700 static int
1701 set_addrmask(fd, nconf, mask)
1702         struct netconfig *nconf;
1703         struct netbuf *mask;
1704 {
1705         struct t_info info;
1706 
1707         /*
1708          * Find the size of the address we need to mask.
1709          */
1710         if (t_getinfo(fd, &info) < 0) {
1711                 t_error("t_getinfo");
1712                 return (-1);
1713         }
1714         mask->len = mask->maxlen = info.addr;
1715         if (info.addr <= 0) {
1716                 /*
1717                  * loopback devices have infinite addr size
1718                  * (it is identified by -1 in addr field of t_info structure),
1719                  * so don't build the netmask for them. It's a special case
1720                  * that should be handled properly.
1721                  */
1722                 if ((info.addr == -1) &&
1723                     (0 == strcmp(nconf->nc_protofmly, NC_LOOPBACK))) {
1724                         memset(mask, 0, sizeof (*mask));
1725                         return (0);
1726                 }
1727 
1728                 syslog(LOG_ERR, "set_addrmask: address size: %ld",
1729                         info.addr);
1730                 return (-1);
1731         }
1732 
1733         mask->buf = (char *)malloc(mask->len);
1734         if (mask->buf == NULL) {
1735                 syslog(LOG_ERR, "set_addrmask: no memory");
1736                 return (-1);
1737         }
1738         (void) memset(mask->buf, 0, mask->len);   /* reset all mask bits */
1739 
1740         if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1741                 /*
1742                  * Set the mask so that the port is ignored.
1743                  */
1744                 /* LINTED pointer alignment */
1745                 ((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1746                                                                 (ulong_t)~0;
1747                 /* LINTED pointer alignment */
1748                 ((struct sockaddr_in *)mask->buf)->sin_family =
1749                                                                 (ushort_t)~0;
1750         } else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1751                 /* LINTED pointer alignment */
1752                 (void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1753                         (uchar_t)~0, sizeof (struct in6_addr));
1754                 /* LINTED pointer alignment */
1755                 ((struct sockaddr_in6 *)mask->buf)->sin6_family =
1756                                                                 (ushort_t)~0;
1757         } else {
1758 
1759                 /*
1760                  * Set all mask bits.
1761                  */
1762                 (void) memset(mask->buf, 0xFF, mask->len);
1763         }
1764         return (0);
1765 }
1766 
1767 /*
1768  * For listen fd's index is always less than end_listen_fds.
1769  * end_listen_fds is defined externally in the daemon that uses this library.
1770  * It's value is equal to the number of open file descriptors after the
1771  * last listen end point was opened but before any connection was accepted.
1772  */
1773 static int
1774 is_listen_fd_index(int index)
1775 {
1776         return (index < end_listen_fds);
1777 }