1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright 2014 Gary Mills
  27  */
  28 
  29 
  30 /*
  31  * nfs_tbind.c, common part for nfsd and lockd.
  32  */
  33 
  34 #include <tiuser.h>
  35 #include <fcntl.h>
  36 #include <netconfig.h>
  37 #include <stropts.h>
  38 #include <errno.h>
  39 #include <syslog.h>
  40 #include <rpc/rpc.h>
  41 #include <sys/time.h>
  42 #include <sys/resource.h>
  43 #include <signal.h>
  44 #include <netdir.h>
  45 #include <unistd.h>
  46 #include <string.h>
  47 #include <netinet/tcp.h>
  48 #include <malloc.h>
  49 #include <stdlib.h>
  50 #include "nfs_tbind.h"
  51 #include <nfs/nfs.h>
  52 #include <nfs/nfs_acl.h>
  53 #include <nfs/nfssys.h>
  54 #include <nfs/nfs4.h>
  55 #include <zone.h>
  56 #include <sys/socket.h>
  57 #include <tsol/label.h>
  58 
  59 /*
  60  * Determine valid semantics for most applications.
  61  */
  62 #define OK_TPI_TYPE(_nconf) \
  63         (_nconf->nc_semantics == NC_TPI_CLTS || \
  64         _nconf->nc_semantics == NC_TPI_COTS || \
  65         _nconf->nc_semantics == NC_TPI_COTS_ORD)
  66 
  67 #define BE32_TO_U32(a) \
  68         ((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
  69         (((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
  70         (((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8)  | \
  71         ((ulong_t)((uchar_t *)a)[3] & 0xFF))
  72 
  73 /*
  74  * Number of elements to add to the poll array on each allocation.
  75  */
  76 #define POLL_ARRAY_INC_SIZE     64
  77 
  78 /*
  79  * Number of file descriptors by which the process soft limit may be
  80  * increased on each call to nofile_increase(0).
  81  */
  82 #define NOFILE_INC_SIZE 64
  83 
  84 /*
  85  * Default TCP send and receive buffer size of NFS server.
  86  */
  87 #define NFSD_TCP_BUFSZ  (1024*1024)
  88 
  89 struct conn_ind {
  90         struct conn_ind *conn_next;
  91         struct conn_ind *conn_prev;
  92         struct t_call   *conn_call;
  93 };
  94 
  95 struct conn_entry {
  96         bool_t                  closing;
  97         struct netconfig        nc;
  98 };
  99 
 100 /*
 101  * this file contains transport routines common to nfsd and lockd
 102  */
 103 static  int     nofile_increase(int);
 104 static  int     reuseaddr(int);
 105 static  int     recvucred(int);
 106 static  int     anonmlp(int);
 107 static  void    add_to_poll_list(int, struct netconfig *);
 108 static  char    *serv_name_to_port_name(char *);
 109 static  int     bind_to_proto(char *, char *, struct netbuf **,
 110                                 struct netconfig **);
 111 static  int     bind_to_provider(char *, char *, struct netbuf **,
 112                                         struct netconfig **);
 113 static  void    conn_close_oldest(void);
 114 static  boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
 115 static  void    cots_listen_event(int, int);
 116 static  int     discon_get(int, struct netconfig *, struct conn_ind **);
 117 static  int     do_poll_clts_action(int, int);
 118 static  int     do_poll_cots_action(int, int);
 119 static  void    remove_from_poll_list(int);
 120 static  int     set_addrmask(int, struct netconfig *, struct netbuf *);
 121 static  int     is_listen_fd_index(int);
 122 
 123 static  struct pollfd *poll_array;
 124 static  struct conn_entry *conn_polled;
 125 static  int     num_conns;              /* Current number of connections */
 126 int             (*Mysvc4)(int, struct netbuf *, struct netconfig *, int,
 127                 struct netbuf *);
 128 static int      setopt(int fd, int level, int name, int value);
 129 static int      get_opt(int fd, int level, int name);
 130 static void     nfslib_set_sockbuf(int fd);
 131 
 132 /*
 133  * Called to create and prepare a transport descriptor for in-kernel
 134  * RPC service.
 135  * Returns -1 on failure and a valid descriptor on success.
 136  */
 137 int
 138 nfslib_transport_open(struct netconfig *nconf)
 139 {
 140         int fd;
 141         struct strioctl strioc;
 142 
 143         if ((nconf == (struct netconfig *)NULL) ||
 144             (nconf->nc_device == (char *)NULL)) {
 145                 syslog(LOG_ERR, "no netconfig device");
 146                 return (-1);
 147         }
 148 
 149         /*
 150          * Open the transport device.
 151          */
 152         fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
 153         if (fd == -1) {
 154                 if (t_errno == TSYSERR && errno == EMFILE &&
 155                     (nofile_increase(0) == 0)) {
 156                         /* Try again with a higher NOFILE limit. */
 157                         fd = t_open(nconf->nc_device, O_RDWR,
 158                             (struct t_info *)NULL);
 159                 }
 160                 if (fd == -1) {
 161                         syslog(LOG_ERR, "t_open %s failed:  t_errno %d, %m",
 162                             nconf->nc_device, t_errno);
 163                         return (-1);
 164                 }
 165         }
 166 
 167         /*
 168          * Pop timod because the RPC module must be as close as possible
 169          * to the transport.
 170          */
 171         if (ioctl(fd, I_POP, 0) < 0) {
 172                 syslog(LOG_ERR, "I_POP of timod failed: %m");
 173                 (void) t_close(fd);
 174                 return (-1);
 175         }
 176 
 177         /*
 178          * Common code for CLTS and COTS transports
 179          */
 180         if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
 181                 syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
 182                 (void) t_close(fd);
 183                 return (-1);
 184         }
 185 
 186         strioc.ic_cmd = RPC_SERVER;
 187         strioc.ic_dp = (char *)0;
 188         strioc.ic_len = 0;
 189         strioc.ic_timout = -1;
 190 
 191         /* Tell rpcmod to act like a server stream. */
 192         if (ioctl(fd, I_STR, &strioc) < 0) {
 193                 syslog(LOG_ERR, "rpcmod set-up ioctl failed: %m");
 194                 (void) t_close(fd);
 195                 return (-1);
 196         }
 197 
 198         /*
 199          * Re-push timod so that we will still be doing TLI
 200          * operations on the descriptor.
 201          */
 202         if (ioctl(fd, I_PUSH, "timod") < 0) {
 203                 syslog(LOG_ERR, "I_PUSH of timod failed: %m");
 204                 (void) t_close(fd);
 205                 return (-1);
 206         }
 207 
 208         /*
 209          * Enable options of returning the ip's for udp.
 210          */
 211         if (strcmp(nconf->nc_netid, "udp6") == 0)
 212                 __rpc_tli_set_options(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1);
 213         else if (strcmp(nconf->nc_netid, "udp") == 0)
 214                 __rpc_tli_set_options(fd, IPPROTO_IP, IP_RECVDSTADDR, 1);
 215 
 216         return (fd);
 217 }
 218 
 219 static int
 220 nofile_increase(int limit)
 221 {
 222         struct rlimit rl;
 223 
 224         if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
 225                 syslog(LOG_ERR, "getrlimit of NOFILE failed: %m");
 226                 return (-1);
 227         }
 228 
 229         if (limit > 0)
 230                 rl.rlim_cur = limit;
 231         else
 232                 rl.rlim_cur += NOFILE_INC_SIZE;
 233 
 234         if (rl.rlim_cur > rl.rlim_max &&
 235             rl.rlim_max != RLIM_INFINITY)
 236                 rl.rlim_max = rl.rlim_cur;
 237 
 238         if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
 239                 syslog(LOG_ERR, "setrlimit of NOFILE to %d failed: %m",
 240                     rl.rlim_cur);
 241                 return (-1);
 242         }
 243 
 244         return (0);
 245 }
 246 
 247 static void
 248 nfslib_set_sockbuf(int fd)
 249 {
 250         int curval, val;
 251 
 252         val = NFSD_TCP_BUFSZ;
 253 
 254         curval = get_opt(fd, SOL_SOCKET, SO_SNDBUF);
 255         syslog(LOG_DEBUG, "Current SO_SNDBUF value is %d", curval);
 256         if ((curval != -1) && (curval < val)) {
 257                 syslog(LOG_DEBUG, "Set SO_SNDBUF  option to %d", val);
 258                 if (setopt(fd, SOL_SOCKET, SO_SNDBUF, val) < 0) {
 259                         syslog(LOG_ERR,
 260                             "couldn't set SO_SNDBUF to %d - t_errno = %d",
 261                             val, t_errno);
 262                         syslog(LOG_ERR,
 263                             "Check and increase system-wide tcp_max_buf");
 264                 }
 265         }
 266 
 267         curval = get_opt(fd, SOL_SOCKET, SO_RCVBUF);
 268         syslog(LOG_DEBUG, "Current SO_RCVBUF value is %d", curval);
 269         if ((curval != -1) && (curval < val)) {
 270                 syslog(LOG_DEBUG, "Set SO_RCVBUF  option to %d", val);
 271                 if (setopt(fd, SOL_SOCKET, SO_RCVBUF, val) < 0) {
 272                         syslog(LOG_ERR,
 273                             "couldn't set SO_RCVBUF to %d - t_errno = %d",
 274                             val, t_errno);
 275                         syslog(LOG_ERR,
 276                             "Check and increase system-wide tcp_max_buf");
 277                 }
 278         }
 279 }
 280 
 281 int
 282 nfslib_bindit(struct netconfig *nconf, struct netbuf **addr,
 283         struct nd_hostserv *hs, int backlog)
 284 {
 285         int fd;
 286         struct t_bind  *ntb;
 287         struct t_bind tb;
 288         struct nd_addrlist *addrlist;
 289         struct t_optmgmt req, resp;
 290         struct opthdr *opt;
 291         char reqbuf[128];
 292         bool_t use_any = FALSE;
 293         bool_t gzone = TRUE;
 294 
 295         if ((fd = nfslib_transport_open(nconf)) == -1) {
 296                 syslog(LOG_ERR, "cannot establish transport service over %s",
 297                     nconf->nc_device);
 298                 return (-1);
 299         }
 300 
 301         addrlist = (struct nd_addrlist *)NULL;
 302 
 303         /* nfs4_callback service does not used a fieed port number */
 304 
 305         if (strcmp(hs->h_serv, "nfs4_callback") == 0) {
 306                 tb.addr.maxlen = 0;
 307                 tb.addr.len = 0;
 308                 tb.addr.buf = 0;
 309                 use_any = TRUE;
 310                 gzone = (getzoneid() == GLOBAL_ZONEID);
 311         } else if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
 312 
 313                 syslog(LOG_ERR,
 314                 "Cannot get address for transport %s host %s service %s",
 315                     nconf->nc_netid, hs->h_host, hs->h_serv);
 316                 (void) t_close(fd);
 317                 return (-1);
 318         }
 319 
 320         if (strcmp(nconf->nc_proto, "tcp") == 0) {
 321                 /*
 322                  * If we're running over TCP, then set the
 323                  * SO_REUSEADDR option so that we can bind
 324                  * to our preferred address even if previously
 325                  * left connections exist in FIN_WAIT states.
 326                  * This is somewhat bogus, but otherwise you have
 327                  * to wait 2 minutes to restart after killing it.
 328                  */
 329                 if (reuseaddr(fd) == -1) {
 330                         syslog(LOG_WARNING,
 331                         "couldn't set SO_REUSEADDR option on transport");
 332                 }
 333         } else if (strcmp(nconf->nc_proto, "udp") == 0) {
 334                 /*
 335                  * In order to run MLP on UDP, we need to handle creds.
 336                  */
 337                 if (recvucred(fd) == -1) {
 338                         syslog(LOG_WARNING,
 339                             "couldn't set SO_RECVUCRED option on transport");
 340                 }
 341         }
 342 
 343         /*
 344          * Make non global zone nfs4_callback port MLP
 345          */
 346         if (use_any && is_system_labeled() && !gzone) {
 347                 if (anonmlp(fd) == -1) {
 348                         /*
 349                          * failing to set this option means nfs4_callback
 350                          * could fail silently later. So fail it with
 351                          * with an error message now.
 352                          */
 353                         syslog(LOG_ERR,
 354                             "couldn't set SO_ANON_MLP option on transport");
 355                         (void) t_close(fd);
 356                         return (-1);
 357                 }
 358         }
 359 
 360         if (nconf->nc_semantics == NC_TPI_CLTS)
 361                 tb.qlen = 0;
 362         else
 363                 tb.qlen = backlog;
 364 
 365         /* LINTED pointer alignment */
 366         ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
 367         if (ntb == (struct t_bind *)NULL) {
 368                 syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
 369                 (void) t_close(fd);
 370                 netdir_free((void *)addrlist, ND_ADDRLIST);
 371                 return (-1);
 372         }
 373 
 374         /*
 375          * XXX - what about the space tb->addr.buf points to? This should
 376          * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
 377          * should't be called with T_ALL.
 378          */
 379         if (addrlist)
 380                 tb.addr = *(addrlist->n_addrs);              /* structure copy */
 381 
 382         if (t_bind(fd, &tb, ntb) == -1) {
 383                 syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
 384                 (void) t_free((char *)ntb, T_BIND);
 385                 netdir_free((void *)addrlist, ND_ADDRLIST);
 386                 (void) t_close(fd);
 387                 return (-1);
 388         }
 389 
 390         /* make sure we bound to the right address */
 391         if (use_any == FALSE &&
 392             (tb.addr.len != ntb->addr.len ||
 393             memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0)) {
 394                 syslog(LOG_ERR, "t_bind to wrong address");
 395                 (void) t_free((char *)ntb, T_BIND);
 396                 netdir_free((void *)addrlist, ND_ADDRLIST);
 397                 (void) t_close(fd);
 398                 return (-1);
 399         }
 400 
 401         /*
 402          * Call nfs4svc_setport so that the kernel can be
 403          * informed what port number the daemon is listing
 404          * for incoming connection requests.
 405          */
 406 
 407         if ((nconf->nc_semantics == NC_TPI_COTS ||
 408             nconf->nc_semantics == NC_TPI_COTS_ORD) && Mysvc4 != NULL)
 409                 (*Mysvc4)(fd, NULL, nconf, NFS4_SETPORT, &ntb->addr);
 410 
 411         *addr = &ntb->addr;
 412         netdir_free((void *)addrlist, ND_ADDRLIST);
 413 
 414         if (strcmp(nconf->nc_proto, "tcp") == 0) {
 415                 /*
 416                  * Disable the Nagle algorithm on TCP connections.
 417                  * Connections accepted from this listener will
 418                  * inherit the listener options.
 419                  */
 420 
 421                 /* LINTED pointer alignment */
 422                 opt = (struct opthdr *)reqbuf;
 423                 opt->level = IPPROTO_TCP;
 424                 opt->name = TCP_NODELAY;
 425                 opt->len = sizeof (int);
 426 
 427                 /* LINTED pointer alignment */
 428                 *(int *)((char *)opt + sizeof (*opt)) = 1;
 429 
 430                 req.flags = T_NEGOTIATE;
 431                 req.opt.len = sizeof (*opt) + opt->len;
 432                 req.opt.buf = (char *)opt;
 433                 resp.flags = 0;
 434                 resp.opt.buf = reqbuf;
 435                 resp.opt.maxlen = sizeof (reqbuf);
 436 
 437                 if (t_optmgmt(fd, &req, &resp) < 0 ||
 438                     resp.flags != T_SUCCESS) {
 439                         syslog(LOG_ERR,
 440         "couldn't set NODELAY option for proto %s: t_errno = %d, %m",
 441                             nconf->nc_proto, t_errno);
 442                 }
 443 
 444                 nfslib_set_sockbuf(fd);
 445         }
 446 
 447         return (fd);
 448 }
 449 
 450 static int
 451 get_opt(int fd, int level, int name)
 452 {
 453         struct t_optmgmt req, res;
 454         struct {
 455                 struct opthdr opt;
 456                 int value;
 457         } reqbuf;
 458 
 459         reqbuf.opt.level = level;
 460         reqbuf.opt.name = name;
 461         reqbuf.opt.len = sizeof (int);
 462         reqbuf.value = 0;
 463 
 464         req.flags = T_CURRENT;
 465         req.opt.len = sizeof (reqbuf);
 466         req.opt.buf = (char *)&reqbuf;
 467 
 468         res.flags = 0;
 469         res.opt.buf = (char *)&reqbuf;
 470         res.opt.maxlen = sizeof (reqbuf);
 471 
 472         if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
 473                 t_error("t_optmgmt");
 474                 return (-1);
 475         }
 476         return (reqbuf.value);
 477 }
 478 
 479 static int
 480 setopt(int fd, int level, int name, int value)
 481 {
 482         struct t_optmgmt req, resp;
 483         struct {
 484                 struct opthdr opt;
 485                 int value;
 486         } reqbuf;
 487 
 488         reqbuf.opt.level = level;
 489         reqbuf.opt.name = name;
 490         reqbuf.opt.len = sizeof (int);
 491 
 492         reqbuf.value = value;
 493 
 494         req.flags = T_NEGOTIATE;
 495         req.opt.len = sizeof (reqbuf);
 496         req.opt.buf = (char *)&reqbuf;
 497 
 498         resp.flags = 0;
 499         resp.opt.buf = (char *)&reqbuf;
 500         resp.opt.maxlen = sizeof (reqbuf);
 501 
 502         if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
 503                 t_error("t_optmgmt");
 504                 return (-1);
 505         }
 506         return (0);
 507 }
 508 
 509 static int
 510 reuseaddr(int fd)
 511 {
 512         return (setopt(fd, SOL_SOCKET, SO_REUSEADDR, 1));
 513 }
 514 
 515 static int
 516 recvucred(int fd)
 517 {
 518         return (setopt(fd, SOL_SOCKET, SO_RECVUCRED, 1));
 519 }
 520 
 521 static int
 522 anonmlp(int fd)
 523 {
 524         return (setopt(fd, SOL_SOCKET, SO_ANON_MLP, 1));
 525 }
 526 
 527 void
 528 nfslib_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
 529 {
 530         int error;
 531 
 532         /*
 533          * Save the error code across syslog(), just in case syslog()
 534          * gets its own error and, therefore, overwrites errno.
 535          */
 536         error = errno;
 537         if (t_errno == TSYSERR) {
 538                 syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
 539                     tli_name, fd, nconf->nc_proto);
 540         } else {
 541                 syslog(LOG_ERR,
 542                     "%s(file descriptor %d/transport %s) TLI error %d",
 543                     tli_name, fd, nconf->nc_proto, t_errno);
 544         }
 545         errno = error;
 546 }
 547 
 548 /*
 549  * Called to set up service over a particular transport.
 550  */
 551 void
 552 do_one(char *provider, NETSELDECL(proto), struct protob *protobp0,
 553         int (*svc)(int, struct netbuf, struct netconfig *))
 554 {
 555         register int sock;
 556         struct protob *protobp;
 557         struct netbuf *retaddr;
 558         struct netconfig *retnconf;
 559         struct netbuf addrmask;
 560         int vers;
 561         int err;
 562         int l;
 563 
 564         if (provider)
 565                 sock = bind_to_provider(provider, protobp0->serv, &retaddr,
 566                     &retnconf);
 567         else
 568                 sock = bind_to_proto(proto, protobp0->serv, &retaddr,
 569                     &retnconf);
 570 
 571         if (sock == -1) {
 572                 (void) syslog(LOG_ERR,
 573         "Cannot establish %s service over %s: transport setup problem.",
 574                     protobp0->serv, provider ? provider : proto);
 575                 return;
 576         }
 577 
 578         if (set_addrmask(sock, retnconf, &addrmask) < 0) {
 579                 (void) syslog(LOG_ERR,
 580                     "Cannot set address mask for %s", retnconf->nc_netid);
 581                 return;
 582         }
 583 
 584         /*
 585          * Register all versions of the programs in the protocol block list.
 586          */
 587         l = strlen(NC_UDP);
 588         for (protobp = protobp0; protobp; protobp = protobp->next) {
 589                 for (vers = protobp->versmin; vers <= protobp->versmax;
 590                     vers++) {
 591                         if ((protobp->program == NFS_PROGRAM ||
 592                             protobp->program == NFS_ACL_PROGRAM) &&
 593                             vers == NFS_V4 &&
 594                             strncasecmp(retnconf->nc_proto, NC_UDP, l) == 0)
 595                                 continue;
 596 
 597                         (void) rpcb_unset(protobp->program, vers, retnconf);
 598                         (void) rpcb_set(protobp->program, vers, retnconf,
 599                             retaddr);
 600                 }
 601         }
 602 
 603         /*
 604          * Register services with CLTS semantics right now.
 605          * Note: services with COTS/COTS_ORD semantics will be
 606          * registered later from cots_listen_event function.
 607          */
 608         if (retnconf->nc_semantics == NC_TPI_CLTS) {
 609                 /* Don't drop core if supporting module(s) aren't loaded. */
 610                 (void) signal(SIGSYS, SIG_IGN);
 611 
 612                 /*
 613                  * svc() doesn't block, it returns success or failure.
 614                  */
 615 
 616                 if (svc == NULL && Mysvc4 != NULL)
 617                         err = (*Mysvc4)(sock, &addrmask, retnconf,
 618                             NFS4_SETPORT|NFS4_KRPC_START, retaddr);
 619                 else
 620                         err = (*svc)(sock, addrmask, retnconf);
 621 
 622                 if (err < 0) {
 623                         (void) syslog(LOG_ERR,
 624                             "Cannot establish %s service over <file desc."
 625                             " %d, protocol %s> : %m. Exiting",
 626                             protobp0->serv, sock, retnconf->nc_proto);
 627                         exit(1);
 628                 }
 629         }
 630         free(addrmask.buf);
 631 
 632         /*
 633          * We successfully set up the server over this transport.
 634          * Add this descriptor to the one being polled on.
 635          */
 636         add_to_poll_list(sock, retnconf);
 637 }
 638 
 639 /*
 640  * Set up the NFS service over all the available transports.
 641  * Returns -1 for failure, 0 for success.
 642  */
 643 int
 644 do_all(struct protob *protobp,
 645         int (*svc)(int, struct netbuf, struct netconfig *))
 646 {
 647         struct netconfig *nconf;
 648         NCONF_HANDLE *nc;
 649         int l;
 650 
 651         if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
 652                 syslog(LOG_ERR, "setnetconfig failed: %m");
 653                 return (-1);
 654         }
 655         l = strlen(NC_UDP);
 656         while (nconf = getnetconfig(nc)) {
 657                 if ((nconf->nc_flag & NC_VISIBLE) &&
 658                     strcmp(nconf->nc_protofmly, NC_LOOPBACK) != 0 &&
 659                     OK_TPI_TYPE(nconf) &&
 660                     (protobp->program != NFS4_CALLBACK ||
 661                     strncasecmp(nconf->nc_proto, NC_UDP, l) != 0))
 662                         do_one(nconf->nc_device, nconf->nc_proto,
 663                             protobp, svc);
 664         }
 665         (void) endnetconfig(nc);
 666         return (0);
 667 }
 668 
 669 /*
 670  * poll on the open transport descriptors for events and errors.
 671  */
 672 void
 673 poll_for_action(void)
 674 {
 675         int nfds;
 676         int i;
 677 
 678         /*
 679          * Keep polling until all transports have been closed. When this
 680          * happens, we return.
 681          */
 682         while ((int)num_fds > 0) {
 683                 nfds = poll(poll_array, num_fds, INFTIM);
 684                 switch (nfds) {
 685                 case 0:
 686                         continue;
 687 
 688                 case -1:
 689                         /*
 690                          * Some errors from poll could be
 691                          * due to temporary conditions, and we try to
 692                          * be robust in the face of them. Other
 693                          * errors (should never happen in theory)
 694                          * are fatal (eg. EINVAL, EFAULT).
 695                          */
 696                         switch (errno) {
 697                         case EINTR:
 698                                 continue;
 699 
 700                         case EAGAIN:
 701                         case ENOMEM:
 702                                 (void) sleep(10);
 703                                 continue;
 704 
 705                         default:
 706                                 (void) syslog(LOG_ERR,
 707                                     "poll failed: %m. Exiting");
 708                                 exit(1);
 709                         }
 710                 default:
 711                         break;
 712                 }
 713 
 714                 /*
 715                  * Go through the poll list looking for events.
 716                  */
 717                 for (i = 0; i < num_fds && nfds > 0; i++) {
 718                         if (poll_array[i].revents) {
 719                                 nfds--;
 720                                 /*
 721                                  * We have a message, so try to read it.
 722                                  * Record the error return in errno,
 723                                  * so that syslog(LOG_ERR, "...%m")
 724                                  * dumps the corresponding error string.
 725                                  */
 726                                 if (conn_polled[i].nc.nc_semantics ==
 727                                     NC_TPI_CLTS) {
 728                                         errno = do_poll_clts_action(
 729                                             poll_array[i].fd, i);
 730                                 } else {
 731                                         errno = do_poll_cots_action(
 732                                             poll_array[i].fd, i);
 733                                 }
 734 
 735                                 if (errno == 0)
 736                                         continue;
 737                                 /*
 738                                  * Most returned error codes mean that there is
 739                                  * fatal condition which we can only deal with
 740                                  * by closing the transport.
 741                                  */
 742                                 if (errno != EAGAIN && errno != ENOMEM) {
 743                                         (void) syslog(LOG_ERR,
 744                 "Error (%m) reading descriptor %d/transport %s. Closing it.",
 745                                             poll_array[i].fd,
 746                                             conn_polled[i].nc.nc_proto);
 747                                         (void) t_close(poll_array[i].fd);
 748                                         remove_from_poll_list(poll_array[i].fd);
 749 
 750                                 } else if (errno == ENOMEM)
 751                                         (void) sleep(5);
 752                         }
 753                 }
 754         }
 755 
 756         (void) syslog(LOG_ERR,
 757             "All transports have been closed with errors. Exiting.");
 758 }
 759 
 760 /*
 761  * Allocate poll/transport array entries for this descriptor.
 762  */
 763 static void
 764 add_to_poll_list(int fd, struct netconfig *nconf)
 765 {
 766         static int poll_array_size = 0;
 767 
 768         /*
 769          * If the arrays are full, allocate new ones.
 770          */
 771         if (num_fds == poll_array_size) {
 772                 struct pollfd *tpa;
 773                 struct conn_entry *tnp;
 774 
 775                 if (poll_array_size != 0) {
 776                         tpa = poll_array;
 777                         tnp = conn_polled;
 778                 } else
 779                         tpa = (struct pollfd *)0;
 780 
 781                 poll_array_size += POLL_ARRAY_INC_SIZE;
 782                 /*
 783                  * Allocate new arrays.
 784                  */
 785                 poll_array = (struct pollfd *)
 786                     malloc(poll_array_size * sizeof (struct pollfd) + 256);
 787                 conn_polled = (struct conn_entry *)
 788                     malloc(poll_array_size * sizeof (struct conn_entry) + 256);
 789                 if (poll_array == (struct pollfd *)NULL ||
 790                     conn_polled == (struct conn_entry *)NULL) {
 791                         syslog(LOG_ERR, "malloc failed for poll array");
 792                         exit(1);
 793                 }
 794 
 795                 /*
 796                  * Copy the data of the old ones into new arrays, and
 797                  * free the old ones.
 798                  */
 799                 if (tpa) {
 800                         (void) memcpy((void *)poll_array, (void *)tpa,
 801                             num_fds * sizeof (struct pollfd));
 802                         (void) memcpy((void *)conn_polled, (void *)tnp,
 803                             num_fds * sizeof (struct conn_entry));
 804                         free((void *)tpa);
 805                         free((void *)tnp);
 806                 }
 807         }
 808 
 809         /*
 810          * Set the descriptor and event list. All possible events are
 811          * polled for.
 812          */
 813         poll_array[num_fds].fd = fd;
 814         poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
 815 
 816         /*
 817          * Copy the transport data over too.
 818          */
 819         conn_polled[num_fds].nc = *nconf;
 820         conn_polled[num_fds].closing = 0;
 821 
 822         /*
 823          * Set the descriptor to non-blocking. Avoids a race
 824          * between data arriving on the stream and then having it
 825          * flushed before we can read it.
 826          */
 827         if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
 828                 (void) syslog(LOG_ERR,
 829         "fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
 830                     num_fds, nconf->nc_proto);
 831                 exit(1);
 832         }
 833 
 834         /*
 835          * Count this descriptor.
 836          */
 837         ++num_fds;
 838 }
 839 
 840 static void
 841 remove_from_poll_list(int fd)
 842 {
 843         int i;
 844         int num_to_copy;
 845 
 846         for (i = 0; i < num_fds; i++) {
 847                 if (poll_array[i].fd == fd) {
 848                         --num_fds;
 849                         num_to_copy = num_fds - i;
 850                         (void) memcpy((void *)&poll_array[i],
 851                             (void *)&poll_array[i+1],
 852                             num_to_copy * sizeof (struct pollfd));
 853                         (void) memset((void *)&poll_array[num_fds], 0,
 854                             sizeof (struct pollfd));
 855                         (void) memcpy((void *)&conn_polled[i],
 856                             (void *)&conn_polled[i+1],
 857                             num_to_copy * sizeof (struct conn_entry));
 858                         (void) memset((void *)&conn_polled[num_fds], 0,
 859                             sizeof (struct conn_entry));
 860                         return;
 861                 }
 862         }
 863         syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
 864 
 865 }
 866 
 867 /*
 868  * Called to read and interpret the event on a connectionless descriptor.
 869  * Returns 0 if successful, or a UNIX error code if failure.
 870  */
 871 static int
 872 do_poll_clts_action(int fd, int conn_index)
 873 {
 874         int error;
 875         int ret;
 876         int flags;
 877         struct netconfig *nconf = &conn_polled[conn_index].nc;
 878         static struct t_unitdata *unitdata = NULL;
 879         static struct t_uderr *uderr = NULL;
 880         static int oldfd = -1;
 881         struct nd_hostservlist *host = NULL;
 882         struct strbuf ctl[1], data[1];
 883         /*
 884          * We just need to have some space to consume the
 885          * message in the event we can't use the TLI interface to do the
 886          * job.
 887          *
 888          * We flush the message using getmsg(). For the control part
 889          * we allocate enough for any TPI header plus 32 bytes for address
 890          * and options. For the data part, there is nothing magic about
 891          * the size of the array, but 256 bytes is probably better than
 892          * 1 byte, and we don't expect any data portion anyway.
 893          *
 894          * If the array sizes are too small, we handle this because getmsg()
 895          * (called to consume the message) will return MOREDATA|MORECTL.
 896          * Thus we just call getmsg() until it's read the message.
 897          */
 898         char ctlbuf[sizeof (union T_primitives) + 32];
 899         char databuf[256];
 900 
 901         /*
 902          * If this is the same descriptor as the last time
 903          * do_poll_clts_action was called, we can save some
 904          * de-allocation and allocation.
 905          */
 906         if (oldfd != fd) {
 907                 oldfd = fd;
 908 
 909                 if (unitdata) {
 910                         (void) t_free((char *)unitdata, T_UNITDATA);
 911                         unitdata = NULL;
 912                 }
 913                 if (uderr) {
 914                         (void) t_free((char *)uderr, T_UDERROR);
 915                         uderr = NULL;
 916                 }
 917         }
 918 
 919         /*
 920          * Allocate a unitdata structure for receiving the event.
 921          */
 922         if (unitdata == NULL) {
 923                 /* LINTED pointer alignment */
 924                 unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
 925                 if (unitdata == NULL) {
 926                         if (t_errno == TSYSERR) {
 927                                 /*
 928                                  * Save the error code across
 929                                  * syslog(), just in case
 930                                  * syslog() gets its own error
 931                                  * and therefore overwrites errno.
 932                                  */
 933                                 error = errno;
 934                                 (void) syslog(LOG_ERR,
 935         "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
 936                                     fd, nconf->nc_proto);
 937                                 return (error);
 938                         }
 939                         (void) syslog(LOG_ERR,
 940 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
 941                             fd, nconf->nc_proto, t_errno);
 942                         goto flush_it;
 943                 }
 944         }
 945 
 946 try_again:
 947         flags = 0;
 948 
 949         /*
 950          * The idea is we wait for T_UNITDATA_IND's. Of course,
 951          * we don't get any, because rpcmod filters them out.
 952          * However, we need to call t_rcvudata() to let TLI
 953          * tell us we have a T_UDERROR_IND.
 954          *
 955          * algorithm is:
 956          *      t_rcvudata(), expecting TLOOK.
 957          *      t_look(), expecting T_UDERR.
 958          *      t_rcvuderr(), expecting success (0).
 959          *      expand destination address into ASCII,
 960          *      and dump it.
 961          */
 962 
 963         ret = t_rcvudata(fd, unitdata, &flags);
 964         if (ret == 0 || t_errno == TBUFOVFLW) {
 965                 (void) syslog(LOG_WARNING,
 966 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
 967                     fd, nconf->nc_proto, unitdata->udata.len);
 968 
 969                 /*
 970                  * Even though we don't expect any data, in case we do,
 971                  * keep reading until there is no more.
 972                  */
 973                 if (flags & T_MORE)
 974                         goto try_again;
 975 
 976                 return (0);
 977         }
 978 
 979         switch (t_errno) {
 980         case TNODATA:
 981                 return (0);
 982         case TSYSERR:
 983                 /*
 984                  * System errors are returned to caller.
 985                  * Save the error code across
 986                  * syslog(), just in case
 987                  * syslog() gets its own error
 988                  * and therefore overwrites errno.
 989                  */
 990                 error = errno;
 991                 (void) syslog(LOG_ERR,
 992                     "t_rcvudata(file descriptor %d/transport %s) %m",
 993                     fd, nconf->nc_proto);
 994                 return (error);
 995         case TLOOK:
 996                 break;
 997         default:
 998                 (void) syslog(LOG_ERR,
 999                 "t_rcvudata(file descriptor %d/transport %s) TLI error %d",
1000                     fd, nconf->nc_proto, t_errno);
1001                 goto flush_it;
1002         }
1003 
1004         ret = t_look(fd);
1005         switch (ret) {
1006         case 0:
1007                 return (0);
1008         case -1:
1009                 /*
1010                  * System errors are returned to caller.
1011                  */
1012                 if (t_errno == TSYSERR) {
1013                         /*
1014                          * Save the error code across
1015                          * syslog(), just in case
1016                          * syslog() gets its own error
1017                          * and therefore overwrites errno.
1018                          */
1019                         error = errno;
1020                         (void) syslog(LOG_ERR,
1021                             "t_look(file descriptor %d/transport %s) %m",
1022                             fd, nconf->nc_proto);
1023                         return (error);
1024                 }
1025                 (void) syslog(LOG_ERR,
1026                     "t_look(file descriptor %d/transport %s) TLI error %d",
1027                     fd, nconf->nc_proto, t_errno);
1028                 goto flush_it;
1029         case T_UDERR:
1030                 break;
1031         default:
1032                 (void) syslog(LOG_WARNING,
1033         "t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
1034                     fd, nconf->nc_proto, ret, T_UDERR);
1035         }
1036 
1037         if (uderr == NULL) {
1038                 /* LINTED pointer alignment */
1039                 uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
1040                 if (uderr == NULL) {
1041                         if (t_errno == TSYSERR) {
1042                                 /*
1043                                  * Save the error code across
1044                                  * syslog(), just in case
1045                                  * syslog() gets its own error
1046                                  * and therefore overwrites errno.
1047                                  */
1048                                 error = errno;
1049                                 (void) syslog(LOG_ERR,
1050         "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
1051                                     fd, nconf->nc_proto);
1052                                 return (error);
1053                         }
1054                         (void) syslog(LOG_ERR,
1055 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1056                             fd, nconf->nc_proto, t_errno);
1057                         goto flush_it;
1058                 }
1059         }
1060 
1061         ret = t_rcvuderr(fd, uderr);
1062         if (ret == 0) {
1063 
1064                 /*
1065                  * Save the datagram error in errno, so that the
1066                  * %m argument to syslog picks up the error string.
1067                  */
1068                 errno = uderr->error;
1069 
1070                 /*
1071                  * Log the datagram error, then log the host that
1072                  * probably triggerred. Cannot log both in the
1073                  * same transaction because of packet size limitations
1074                  * in /dev/log.
1075                  */
1076                 (void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1077 "NFS response over <file descriptor %d/transport %s> generated error: %m",
1078                     fd, nconf->nc_proto);
1079 
1080                 /*
1081                  * Try to map the client's address back to a
1082                  * name.
1083                  */
1084                 ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1085                 if (ret != -1 && host && host->h_cnt > 0 &&
1086                     host->h_hostservs) {
1087                 (void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1088 "Bad NFS response was sent to client with host name: %s; service port: %s",
1089                     host->h_hostservs->h_host,
1090                     host->h_hostservs->h_serv);
1091                 } else {
1092                         int i, j;
1093                         char *buf;
1094                         char *hex = "0123456789abcdef";
1095 
1096                         /*
1097                          * Mapping failed, print the whole thing
1098                          * in ASCII hex.
1099                          */
1100                         buf = (char *)malloc(uderr->addr.len * 2 + 1);
1101                         for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1102                                 buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1103                                 buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1104                         }
1105                         buf[j] = '\0';
1106                 (void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1107         "Bad NFS response was sent to client with transport address: 0x%s",
1108                     buf);
1109                         free((void *)buf);
1110                 }
1111 
1112                 if (ret == 0 && host != NULL)
1113                         netdir_free((void *)host, ND_HOSTSERVLIST);
1114                 return (0);
1115         }
1116 
1117         switch (t_errno) {
1118         case TNOUDERR:
1119                 goto flush_it;
1120         case TSYSERR:
1121                 /*
1122                  * System errors are returned to caller.
1123                  * Save the error code across
1124                  * syslog(), just in case
1125                  * syslog() gets its own error
1126                  * and therefore overwrites errno.
1127                  */
1128                 error = errno;
1129                 (void) syslog(LOG_ERR,
1130                     "t_rcvuderr(file descriptor %d/transport %s) %m",
1131                     fd, nconf->nc_proto);
1132                 return (error);
1133         default:
1134                 (void) syslog(LOG_ERR,
1135                 "t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1136                     fd, nconf->nc_proto, t_errno);
1137                 goto flush_it;
1138         }
1139 
1140 flush_it:
1141         /*
1142          * If we get here, then we could not cope with whatever message
1143          * we attempted to read, so flush it. If we did read a message,
1144          * and one isn't present, that is all right, because fd is in
1145          * nonblocking mode.
1146          */
1147         (void) syslog(LOG_ERR,
1148         "Flushing one input message from <file descriptor %d/transport %s>",
1149             fd, nconf->nc_proto);
1150 
1151         /*
1152          * Read and discard the message. Do this this until there is
1153          * no more control/data in the message or until we get an error.
1154          */
1155         do {
1156                 ctl->maxlen = sizeof (ctlbuf);
1157                 ctl->buf = ctlbuf;
1158                 data->maxlen = sizeof (databuf);
1159                 data->buf = databuf;
1160                 flags = 0;
1161                 ret = getmsg(fd, ctl, data, &flags);
1162                 if (ret == -1)
1163                         return (errno);
1164         } while (ret != 0);
1165 
1166         return (0);
1167 }
1168 
1169 static void
1170 conn_close_oldest(void)
1171 {
1172         int fd;
1173         int i1;
1174 
1175         /*
1176          * Find the oldest connection that is not already in the
1177          * process of shutting down.
1178          */
1179         for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
1180                 if (i1 >= num_fds)
1181                         return;
1182                 if (conn_polled[i1].closing == 0)
1183                         break;
1184         }
1185 #ifdef DEBUG
1186         printf("too many connections (%d), releasing oldest (%d)\n",
1187             num_conns, poll_array[i1].fd);
1188 #else
1189         syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
1190             num_conns, poll_array[i1].fd);
1191 #endif
1192         fd = poll_array[i1].fd;
1193         if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
1194                 /*
1195                  * For politeness, send a T_DISCON_REQ to the transport
1196                  * provider.  We close the stream anyway.
1197                  */
1198                 (void) t_snddis(fd, (struct t_call *)0);
1199                 num_conns--;
1200                 remove_from_poll_list(fd);
1201                 (void) t_close(fd);
1202         } else {
1203                 /*
1204                  * For orderly release, we do not close the stream
1205                  * until the T_ORDREL_IND arrives to complete
1206                  * the handshake.
1207                  */
1208                 if (t_sndrel(fd) == 0)
1209                         conn_polled[i1].closing = 1;
1210         }
1211 }
1212 
1213 static boolean_t
1214 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1215 {
1216         struct conn_ind *conn;
1217         struct conn_ind *next_conn;
1218 
1219         conn = (struct conn_ind *)malloc(sizeof (*conn));
1220         if (conn == NULL) {
1221                 syslog(LOG_ERR, "malloc for listen indication failed");
1222                 return (FALSE);
1223         }
1224 
1225         /* LINTED pointer alignment */
1226         conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
1227         if (conn->conn_call == NULL) {
1228                 free((char *)conn);
1229                 nfslib_log_tli_error("t_alloc", fd, nconf);
1230                 return (FALSE);
1231         }
1232 
1233         if (t_listen(fd, conn->conn_call) == -1) {
1234                 nfslib_log_tli_error("t_listen", fd, nconf);
1235                 (void) t_free((char *)conn->conn_call, T_CALL);
1236                 free((char *)conn);
1237                 return (FALSE);
1238         }
1239 
1240         if (conn->conn_call->udata.len > 0) {
1241                 syslog(LOG_WARNING,
1242         "rejecting inbound connection(%s) with %d bytes of connect data",
1243                     nconf->nc_proto, conn->conn_call->udata.len);
1244 
1245                 conn->conn_call->udata.len = 0;
1246                 (void) t_snddis(fd, conn->conn_call);
1247                 (void) t_free((char *)conn->conn_call, T_CALL);
1248                 free((char *)conn);
1249                 return (FALSE);
1250         }
1251 
1252         if ((next_conn = *connp) != NULL) {
1253                 next_conn->conn_prev->conn_next = conn;
1254                 conn->conn_next = next_conn;
1255                 conn->conn_prev = next_conn->conn_prev;
1256                 next_conn->conn_prev = conn;
1257         } else {
1258                 conn->conn_next = conn;
1259                 conn->conn_prev = conn;
1260                 *connp = conn;
1261         }
1262         return (TRUE);
1263 }
1264 
1265 static int
1266 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1267 {
1268         struct conn_ind *conn;
1269         struct t_discon discon;
1270 
1271         discon.udata.buf = (char *)0;
1272         discon.udata.maxlen = 0;
1273         if (t_rcvdis(fd, &discon) == -1) {
1274                 nfslib_log_tli_error("t_rcvdis", fd, nconf);
1275                 return (-1);
1276         }
1277 
1278         conn = *connp;
1279         if (conn == NULL)
1280                 return (0);
1281 
1282         do {
1283                 if (conn->conn_call->sequence == discon.sequence) {
1284                         if (conn->conn_next == conn)
1285                                 *connp = (struct conn_ind *)0;
1286                         else {
1287                                 if (conn == *connp) {
1288                                         *connp = conn->conn_next;
1289                                 }
1290                                 conn->conn_next->conn_prev = conn->conn_prev;
1291                                 conn->conn_prev->conn_next = conn->conn_next;
1292                         }
1293                         free((char *)conn);
1294                         break;
1295                 }
1296                 conn = conn->conn_next;
1297         } while (conn != *connp);
1298 
1299         return (0);
1300 }
1301 
1302 static void
1303 cots_listen_event(int fd, int conn_index)
1304 {
1305         struct t_call *call;
1306         struct conn_ind *conn;
1307         struct conn_ind *conn_head;
1308         int event;
1309         struct netconfig *nconf = &conn_polled[conn_index].nc;
1310         int new_fd;
1311         struct netbuf addrmask;
1312         int ret = 0;
1313         char *clnt;
1314         char *clnt_uaddr = NULL;
1315         struct nd_hostservlist *clnt_serv = NULL;
1316 
1317         conn_head = NULL;
1318         (void) conn_get(fd, nconf, &conn_head);
1319 
1320         while ((conn = conn_head) != NULL) {
1321                 conn_head = conn->conn_next;
1322                 if (conn_head == conn)
1323                         conn_head = NULL;
1324                 else {
1325                         conn_head->conn_prev = conn->conn_prev;
1326                         conn->conn_prev->conn_next = conn_head;
1327                 }
1328                 call = conn->conn_call;
1329                 free(conn);
1330 
1331                 /*
1332                  * If we have already accepted the maximum number of
1333                  * connections allowed on the command line, then drop
1334                  * the oldest connection (for any protocol) before
1335                  * accepting the new connection.  Unless explicitly
1336                  * set on the command line, max_conns_allowed is -1.
1337                  */
1338                 if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1339                         conn_close_oldest();
1340 
1341                 /*
1342                  * Create a new transport endpoint for the same proto as
1343                  * the listener.
1344                  */
1345                 new_fd = nfslib_transport_open(nconf);
1346                 if (new_fd == -1) {
1347                         call->udata.len = 0;
1348                         (void) t_snddis(fd, call);
1349                         (void) t_free((char *)call, T_CALL);
1350                         syslog(LOG_ERR, "Cannot establish transport over %s",
1351                             nconf->nc_device);
1352                         continue;
1353                 }
1354 
1355                 /* Bind to a generic address/port for the accepting stream. */
1356                 if (t_bind(new_fd, NULL, NULL) == -1) {
1357                         nfslib_log_tli_error("t_bind", new_fd, nconf);
1358                         call->udata.len = 0;
1359                         (void) t_snddis(fd, call);
1360                         (void) t_free((char *)call, T_CALL);
1361                         (void) t_close(new_fd);
1362                         continue;
1363                 }
1364 
1365                 while (t_accept(fd, new_fd, call) == -1) {
1366                         if (t_errno != TLOOK) {
1367 #ifdef DEBUG
1368                                 nfslib_log_tli_error("t_accept", fd, nconf);
1369 #endif
1370                                 call->udata.len = 0;
1371                                 (void) t_snddis(fd, call);
1372                                 (void) t_free((char *)call, T_CALL);
1373                                 (void) t_close(new_fd);
1374                                 goto do_next_conn;
1375                         }
1376                         while (event = t_look(fd)) {
1377                                 switch (event) {
1378                                 case T_LISTEN:
1379 #ifdef DEBUG
1380                                         printf(
1381 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1382 #endif
1383                                         (void) conn_get(fd, nconf, &conn_head);
1384                                         continue;
1385                                 case T_DISCONNECT:
1386 #ifdef DEBUG
1387                                         printf(
1388         "cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1389                                             nconf->nc_proto);
1390 #endif
1391                                         (void) discon_get(fd, nconf,
1392                                             &conn_head);
1393                                         continue;
1394                                 default:
1395                                         syslog(LOG_ERR,
1396                         "unexpected event 0x%x during accept processing (%s)",
1397                                             event, nconf->nc_proto);
1398                                         call->udata.len = 0;
1399                                         (void) t_snddis(fd, call);
1400                                         (void) t_free((char *)call, T_CALL);
1401                                         (void) t_close(new_fd);
1402                                         goto do_next_conn;
1403                                 }
1404                         }
1405                 }
1406 
1407                 if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1408                         (void) syslog(LOG_ERR,
1409                             "Cannot set address mask for %s",
1410                             nconf->nc_netid);
1411                         (void) t_snddis(new_fd, NULL);
1412                         (void) t_free((char *)call, T_CALL);
1413                         (void) t_close(new_fd);
1414                         continue;
1415                 }
1416 
1417                 /* Tell kRPC about the new stream. */
1418                 if (Mysvc4 != NULL)
1419                         ret = (*Mysvc4)(new_fd, &addrmask, nconf,
1420                             NFS4_KRPC_START, &call->addr);
1421                 else
1422                         ret = (*Mysvc)(new_fd, addrmask, nconf);
1423 
1424                 if (ret < 0) {
1425                         if (errno != ENOTCONN) {
1426                                 syslog(LOG_ERR,
1427                                     "unable to register new connection: %m");
1428                         } else {
1429                                 /*
1430                                  * This is the only error that could be
1431                                  * caused by the client, so who was it?
1432                                  */
1433                                 if (netdir_getbyaddr(nconf, &clnt_serv,
1434                                     &(call->addr)) == ND_OK &&
1435                                     clnt_serv->h_cnt > 0)
1436                                         clnt = clnt_serv->h_hostservs->h_host;
1437                                 else
1438                                         clnt = clnt_uaddr = taddr2uaddr(nconf,
1439                                             &(call->addr));
1440                                 /*
1441                                  * If we don't know who the client was,
1442                                  * remain silent.
1443                                  */
1444                                 if (clnt)
1445                                         syslog(LOG_ERR,
1446 "unable to register new connection: client %s has dropped connection", clnt);
1447                                 if (clnt_serv) {
1448                                         netdir_free(clnt_serv, ND_HOSTSERVLIST);
1449                                         clnt_serv = NULL;
1450                                 }
1451                                 if (clnt_uaddr) {
1452                                         free(clnt_uaddr);
1453                                         clnt_uaddr = NULL;
1454                                 }
1455                         }
1456                         free(addrmask.buf);
1457                         (void) t_snddis(new_fd, NULL);
1458                         (void) t_free((char *)call, T_CALL);
1459                         (void) t_close(new_fd);
1460                         goto do_next_conn;
1461                 }
1462 
1463                 free(addrmask.buf);
1464                 (void) t_free((char *)call, T_CALL);
1465 
1466                 /*
1467                  * Poll on the new descriptor so that we get disconnect
1468                  * and orderly release indications.
1469                  */
1470                 num_conns++;
1471                 add_to_poll_list(new_fd, nconf);
1472 
1473                 /* Reset nconf in case it has been moved. */
1474                 nconf = &conn_polled[conn_index].nc;
1475 do_next_conn:;
1476         }
1477 }
1478 
1479 static int
1480 do_poll_cots_action(int fd, int conn_index)
1481 {
1482         char buf[256];
1483         int event;
1484         int i1;
1485         int flags;
1486         struct conn_entry *connent = &conn_polled[conn_index];
1487         struct netconfig *nconf = &(connent->nc);
1488         const char *errorstr;
1489 
1490         while (event = t_look(fd)) {
1491                 switch (event) {
1492                 case T_LISTEN:
1493 #ifdef DEBUG
1494 printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf->nc_proto, fd);
1495 #endif
1496                         cots_listen_event(fd, conn_index);
1497                         break;
1498 
1499                 case T_DATA:
1500 #ifdef DEBUG
1501 printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd, nconf->nc_proto);
1502 #endif
1503                         /*
1504                          * Receive a private notification from CONS rpcmod.
1505                          */
1506                         i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1507                         if (i1 == -1) {
1508                                 syslog(LOG_ERR, "t_rcv failed");
1509                                 break;
1510                         }
1511                         if (i1 < sizeof (int))
1512                                 break;
1513                         i1 = BE32_TO_U32(buf);
1514                         if (i1 == 1 || i1 == 2) {
1515                                 /*
1516                                  * This connection has been idle for too long,
1517                                  * so release it as politely as we can.  If we
1518                                  * have already initiated an orderly release
1519                                  * and we get notified that the stream is
1520                                  * still idle, pull the plug.  This prevents
1521                                  * hung connections from continuing to consume
1522                                  * resources.
1523                                  */
1524 #ifdef DEBUG
1525 printf("do_poll_cots_action(%s,%d): ", nconf->nc_proto, fd);
1526 printf("initiating orderly release of idle connection\n");
1527 #endif
1528                                 if (nconf->nc_semantics == NC_TPI_COTS ||
1529                                     connent->closing != 0) {
1530                                         (void) t_snddis(fd, (struct t_call *)0);
1531                                         goto fdclose;
1532                                 }
1533                                 /*
1534                                  * For NC_TPI_COTS_ORD, the stream is closed
1535                                  * and removed from the poll list when the
1536                                  * T_ORDREL is received from the provider.  We
1537                                  * don't wait for it here because it may take
1538                                  * a while for the transport to shut down.
1539                                  */
1540                                 if (t_sndrel(fd) == -1) {
1541                                         syslog(LOG_ERR,
1542                                         "unable to send orderly release %m");
1543                                 }
1544                                 connent->closing = 1;
1545                         } else
1546                                 syslog(LOG_ERR,
1547                                 "unexpected event from CONS rpcmod %d", i1);
1548                         break;
1549 
1550                 case T_ORDREL:
1551 #ifdef DEBUG
1552 printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf->nc_proto, fd);
1553 #endif
1554                         /* Perform an orderly release. */
1555                         if (t_rcvrel(fd) == 0) {
1556                                 /* T_ORDREL on listen fd's should be ignored */
1557                                 if (!is_listen_fd_index(conn_index)) {
1558                                         (void) t_sndrel(fd);
1559                                         goto fdclose;
1560                                 }
1561                                 break;
1562 
1563                         } else if (t_errno == TLOOK) {
1564                                 break;
1565                         } else {
1566                                 nfslib_log_tli_error("t_rcvrel", fd, nconf);
1567 
1568                                 /*
1569                                  * check to make sure we do not close
1570                                  * listen fd
1571                                  */
1572                                 if (is_listen_fd_index(conn_index))
1573                                         break;
1574                                 else
1575                                         goto fdclose;
1576                         }
1577 
1578                 case T_DISCONNECT:
1579 #ifdef DEBUG
1580 printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf->nc_proto, fd);
1581 #endif
1582                         if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1583                                 nfslib_log_tli_error("t_rcvdis", fd, nconf);
1584 
1585                         /*
1586                          * T_DISCONNECT on listen fd's should be ignored.
1587                          */
1588                         if (is_listen_fd_index(conn_index))
1589                                 break;
1590                         else
1591                                 goto fdclose;
1592 
1593                 default:
1594                         if (t_errno == TSYSERR) {
1595                                 if ((errorstr = strerror(errno)) == NULL) {
1596                                         (void) sprintf(buf,
1597                                             "Unknown error num %d", errno);
1598                                         errorstr = (const char *) buf;
1599                                 }
1600                         } else if (event == -1)
1601                                 errorstr = t_strerror(t_errno);
1602                         else
1603                                 errorstr = "";
1604                         syslog(LOG_ERR,
1605                             "unexpected TLI event (0x%x) on "
1606                             "connection-oriented transport(%s,%d):%s",
1607                             event, nconf->nc_proto, fd, errorstr);
1608 fdclose:
1609                         num_conns--;
1610                         remove_from_poll_list(fd);
1611                         (void) t_close(fd);
1612                         return (0);
1613                 }
1614         }
1615 
1616         return (0);
1617 }
1618 
1619 static char *
1620 serv_name_to_port_name(char *name)
1621 {
1622         /*
1623          * Map service names (used primarily in logging) to
1624          * RPC port names (used by netdir_*() routines).
1625          */
1626         if (strcmp(name, "NFS") == 0) {
1627                 return ("nfs");
1628         } else if (strcmp(name, "NLM") == 0) {
1629                 return ("lockd");
1630         } else if (strcmp(name, "NFS4_CALLBACK") == 0) {
1631                 return ("nfs4_callback");
1632         }
1633 
1634         return ("unrecognized");
1635 }
1636 
1637 static int
1638 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1639                 struct netconfig **retnconf)
1640 {
1641         struct netconfig *nconf;
1642         NCONF_HANDLE *nc;
1643         struct nd_hostserv hs;
1644 
1645         hs.h_host = HOST_SELF;
1646         hs.h_serv = serv_name_to_port_name(serv);
1647 
1648         if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1649                 syslog(LOG_ERR, "setnetconfig failed: %m");
1650                 return (-1);
1651         }
1652         while (nconf = getnetconfig(nc)) {
1653                 if (OK_TPI_TYPE(nconf) &&
1654                     strcmp(nconf->nc_device, provider) == 0) {
1655                         *retnconf = nconf;
1656                         return (nfslib_bindit(nconf, addr, &hs,
1657                             listen_backlog));
1658                 }
1659         }
1660         (void) endnetconfig(nc);
1661 
1662         syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1663             provider);
1664         return (-1);
1665 }
1666 
1667 static int
1668 bind_to_proto(NETSELDECL(proto), char *serv, struct netbuf **addr,
1669                 struct netconfig **retnconf)
1670 {
1671         struct netconfig *nconf;
1672         NCONF_HANDLE *nc = NULL;
1673         struct nd_hostserv hs;
1674 
1675         hs.h_host = HOST_SELF;
1676         hs.h_serv = serv_name_to_port_name(serv);
1677 
1678         if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1679                 syslog(LOG_ERR, "setnetconfig failed: %m");
1680                 return (-1);
1681         }
1682         while (nconf = getnetconfig(nc)) {
1683                 if (OK_TPI_TYPE(nconf) && NETSELEQ(nconf->nc_proto, proto)) {
1684                         *retnconf = nconf;
1685                         return (nfslib_bindit(nconf, addr, &hs,
1686                             listen_backlog));
1687                 }
1688         }
1689         (void) endnetconfig(nc);
1690 
1691         syslog(LOG_ERR, "couldn't find netconfig entry for protocol %s",
1692             proto);
1693         return (-1);
1694 }
1695 
1696 #include <netinet/in.h>
1697 
1698 /*
1699  * Create an address mask appropriate for the transport.
1700  * The mask is used to obtain the host-specific part of
1701  * a network address when comparing addresses.
1702  * For an internet address the host-specific part is just
1703  * the 32 bit IP address and this part of the mask is set
1704  * to all-ones. The port number part of the mask is zeroes.
1705  */
1706 static int
1707 set_addrmask(int fd,
1708         struct netconfig *nconf,
1709         struct netbuf *mask)
1710 {
1711         struct t_info info;
1712 
1713         /*
1714          * Find the size of the address we need to mask.
1715          */
1716         if (t_getinfo(fd, &info) < 0) {
1717                 t_error("t_getinfo");
1718                 return (-1);
1719         }
1720         mask->len = mask->maxlen = info.addr;
1721         if (info.addr <= 0) {
1722                 /*
1723                  * loopback devices have infinite addr size
1724                  * (it is identified by -1 in addr field of t_info structure),
1725                  * so don't build the netmask for them. It's a special case
1726                  * that should be handled properly.
1727                  */
1728                 if ((info.addr == -1) &&
1729                     (0 == strcmp(nconf->nc_protofmly, NC_LOOPBACK))) {
1730                         memset(mask, 0, sizeof (*mask));
1731                         return (0);
1732                 }
1733 
1734                 syslog(LOG_ERR, "set_addrmask: address size: %ld", info.addr);
1735                 return (-1);
1736         }
1737 
1738         mask->buf = (char *)malloc(mask->len);
1739         if (mask->buf == NULL) {
1740                 syslog(LOG_ERR, "set_addrmask: no memory");
1741                 return (-1);
1742         }
1743         (void) memset(mask->buf, 0, mask->len);   /* reset all mask bits */
1744 
1745         if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1746                 /*
1747                  * Set the mask so that the port is ignored.
1748                  */
1749                 /* LINTED pointer alignment */
1750                 ((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1751                     (ulong_t)~0;
1752                 /* LINTED pointer alignment */
1753                 ((struct sockaddr_in *)mask->buf)->sin_family =
1754                     (ushort_t)~0;
1755         } else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1756                 /* LINTED pointer alignment */
1757                 (void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1758                     (uchar_t)~0, sizeof (struct in6_addr));
1759                 /* LINTED pointer alignment */
1760                 ((struct sockaddr_in6 *)mask->buf)->sin6_family =
1761                     (ushort_t)~0;
1762         } else {
1763 
1764                 /*
1765                  * Set all mask bits.
1766                  */
1767                 (void) memset(mask->buf, 0xFF, mask->len);
1768         }
1769         return (0);
1770 }
1771 
1772 /*
1773  * For listen fd's index is always less than end_listen_fds.
1774  * end_listen_fds is defined externally in the daemon that uses this library.
1775  * It's value is equal to the number of open file descriptors after the
1776  * last listen end point was opened but before any connection was accepted.
1777  */
1778 static int
1779 is_listen_fd_index(int index)
1780 {
1781         return (index < end_listen_fds);
1782 }