1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  26  */
  27 
  28 
  29 /*
  30  * nfs_tbind.c, common part for nfsd and lockd.
  31  */
  32 
  33 #include <tiuser.h>
  34 #include <fcntl.h>
  35 #include <netconfig.h>
  36 #include <stropts.h>
  37 #include <errno.h>
  38 #include <syslog.h>
  39 #include <rpc/rpc.h>
  40 #include <sys/time.h>
  41 #include <sys/resource.h>
  42 #include <signal.h>
  43 #include <netdir.h>
  44 #include <unistd.h>
  45 #include <string.h>
  46 #include <netinet/tcp.h>
  47 #include <malloc.h>
  48 #include <stdlib.h>
  49 #include "nfs_tbind.h"
  50 #include <nfs/nfs.h>
  51 #include <nfs/nfs_acl.h>
  52 #include <nfs/nfssys.h>
  53 #include <nfs/nfs4.h>
  54 #include <zone.h>
  55 #include <sys/socket.h>
  56 #include <tsol/label.h>
  57 
  58 /*
  59  * Determine valid semantics for most applications.
  60  */
  61 #define OK_TPI_TYPE(_nconf) \
  62         (_nconf->nc_semantics == NC_TPI_CLTS || \
  63         _nconf->nc_semantics == NC_TPI_COTS || \
  64         _nconf->nc_semantics == NC_TPI_COTS_ORD)
  65 
  66 #define BE32_TO_U32(a) \
  67         ((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
  68         (((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
  69         (((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8)  | \
  70         ((ulong_t)((uchar_t *)a)[3] & 0xFF))
  71 
  72 /*
  73  * Number of elements to add to the poll array on each allocation.
  74  */
  75 #define POLL_ARRAY_INC_SIZE     64
  76 
  77 /*
  78  * Number of file descriptors by which the process soft limit may be
  79  * increased on each call to nofile_increase(0).
  80  */
  81 #define NOFILE_INC_SIZE 64
  82 
  83 /*
  84  * Default TCP send and receive buffer size of NFS server.
  85  */
  86 #define NFSD_TCP_BUFSZ  (1024*1024)
  87 
  88 struct conn_ind {
  89         struct conn_ind *conn_next;
  90         struct conn_ind *conn_prev;
  91         struct t_call   *conn_call;
  92 };
  93 
  94 struct conn_entry {
  95         bool_t                  closing;
  96         struct netconfig        nc;
  97 };
  98 
  99 /*
 100  * this file contains transport routines common to nfsd and lockd
 101  */
 102 static  int     nofile_increase(int);
 103 static  int     reuseaddr(int);
 104 static  int     recvucred(int);
 105 static  int     anonmlp(int);
 106 static  void    add_to_poll_list(int, struct netconfig *);
 107 static  char    *serv_name_to_port_name(char *);
 108 static  int     bind_to_proto(char *, char *, struct netbuf **,
 109                                 struct netconfig **);
 110 static  int     bind_to_provider(char *, char *, struct netbuf **,
 111                                         struct netconfig **);
 112 static  void    conn_close_oldest(void);
 113 static  boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
 114 static  void    cots_listen_event(int, int);
 115 static  int     discon_get(int, struct netconfig *, struct conn_ind **);
 116 static  int     do_poll_clts_action(int, int);
 117 static  int     do_poll_cots_action(int, int);
 118 static  void    remove_from_poll_list(int);
 119 static  int     set_addrmask(int, struct netconfig *, struct netbuf *);
 120 static  int     is_listen_fd_index(int);
 121 
 122 static  struct pollfd *poll_array;
 123 static  struct conn_entry *conn_polled;
 124 static  int     num_conns;              /* Current number of connections */
 125 int             (*Mysvc4)(int, struct netbuf *, struct netconfig *, int,
 126                 struct netbuf *);
 127 static int      setopt(int fd, int level, int name, int value);
 128 static int      get_opt(int fd, int level, int name);
 129 static void     nfslib_set_sockbuf(int fd);
 130 
 131 /*
 132  * Called to create and prepare a transport descriptor for in-kernel
 133  * RPC service.
 134  * Returns -1 on failure and a valid descriptor on success.
 135  */
 136 int
 137 nfslib_transport_open(struct netconfig *nconf)
 138 {
 139         int fd;
 140         struct strioctl strioc;
 141 
 142         if ((nconf == (struct netconfig *)NULL) ||
 143             (nconf->nc_device == (char *)NULL)) {
 144                 syslog(LOG_ERR, "no netconfig device");
 145                 return (-1);
 146         }
 147 
 148         /*
 149          * Open the transport device.
 150          */
 151         fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
 152         if (fd == -1) {
 153                 if (t_errno == TSYSERR && errno == EMFILE &&
 154                     (nofile_increase(0) == 0)) {
 155                         /* Try again with a higher NOFILE limit. */
 156                         fd = t_open(nconf->nc_device, O_RDWR,
 157                             (struct t_info *)NULL);
 158                 }
 159                 if (fd == -1) {
 160                         syslog(LOG_ERR, "t_open %s failed:  t_errno %d, %m",
 161                             nconf->nc_device, t_errno);
 162                         return (-1);
 163                 }
 164         }
 165 
 166         /*
 167          * Pop timod because the RPC module must be as close as possible
 168          * to the transport.
 169          */
 170         if (ioctl(fd, I_POP, 0) < 0) {
 171                 syslog(LOG_ERR, "I_POP of timod failed: %m");
 172                 (void) t_close(fd);
 173                 return (-1);
 174         }
 175 
 176         /*
 177          * Common code for CLTS and COTS transports
 178          */
 179         if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
 180                 syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
 181                 (void) t_close(fd);
 182                 return (-1);
 183         }
 184 
 185         strioc.ic_cmd = RPC_SERVER;
 186         strioc.ic_dp = (char *)0;
 187         strioc.ic_len = 0;
 188         strioc.ic_timout = -1;
 189 
 190         /* Tell rpcmod to act like a server stream. */
 191         if (ioctl(fd, I_STR, &strioc) < 0) {
 192                 syslog(LOG_ERR, "rpcmod set-up ioctl failed: %m");
 193                 (void) t_close(fd);
 194                 return (-1);
 195         }
 196 
 197         /*
 198          * Re-push timod so that we will still be doing TLI
 199          * operations on the descriptor.
 200          */
 201         if (ioctl(fd, I_PUSH, "timod") < 0) {
 202                 syslog(LOG_ERR, "I_PUSH of timod failed: %m");
 203                 (void) t_close(fd);
 204                 return (-1);
 205         }
 206 
 207         /*
 208          * Enable options of returning the ip's for udp.
 209          */
 210         if (strcmp(nconf->nc_netid, "udp6") == 0)
 211                 __rpc_tli_set_options(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1);
 212         else if (strcmp(nconf->nc_netid, "udp") == 0)
 213                 __rpc_tli_set_options(fd, IPPROTO_IP, IP_RECVDSTADDR, 1);
 214 
 215         return (fd);
 216 }
 217 
 218 static int
 219 nofile_increase(int limit)
 220 {
 221         struct rlimit rl;
 222 
 223         if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
 224                 syslog(LOG_ERR, "getrlimit of NOFILE failed: %m");
 225                 return (-1);
 226         }
 227 
 228         if (limit > 0)
 229                 rl.rlim_cur = limit;
 230         else
 231                 rl.rlim_cur += NOFILE_INC_SIZE;
 232 
 233         if (rl.rlim_cur > rl.rlim_max &&
 234             rl.rlim_max != RLIM_INFINITY)
 235                 rl.rlim_max = rl.rlim_cur;
 236 
 237         if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
 238                 syslog(LOG_ERR, "setrlimit of NOFILE to %d failed: %m",
 239                     rl.rlim_cur);
 240                 return (-1);
 241         }
 242 
 243         return (0);
 244 }
 245 
 246 static void
 247 nfslib_set_sockbuf(int fd)
 248 {
 249         int curval, val;
 250 
 251         val = NFSD_TCP_BUFSZ;
 252 
 253         curval = get_opt(fd, SOL_SOCKET, SO_SNDBUF);
 254         syslog(LOG_DEBUG, "Current SO_SNDBUF value is %d", curval);
 255         if ((curval != -1) && (curval < val)) {
 256                 syslog(LOG_DEBUG, "Set SO_SNDBUF  option to %d", val);
 257                 if (setopt(fd, SOL_SOCKET, SO_SNDBUF, val) < 0) {
 258                         syslog(LOG_ERR,
 259                             "couldn't set SO_SNDBUF to %d - t_errno = %d",
 260                             val, t_errno);
 261                         syslog(LOG_ERR,
 262                             "Check and increase system-wide tcp_max_buf");
 263                 }
 264         }
 265 
 266         curval = get_opt(fd, SOL_SOCKET, SO_RCVBUF);
 267         syslog(LOG_DEBUG, "Current SO_RCVBUF value is %d", curval);
 268         if ((curval != -1) && (curval < val)) {
 269                 syslog(LOG_DEBUG, "Set SO_RCVBUF  option to %d", val);
 270                 if (setopt(fd, SOL_SOCKET, SO_RCVBUF, val) < 0) {
 271                         syslog(LOG_ERR,
 272                             "couldn't set SO_RCVBUF to %d - t_errno = %d",
 273                             val, t_errno);
 274                         syslog(LOG_ERR,
 275                             "Check and increase system-wide tcp_max_buf");
 276                 }
 277         }
 278 }
 279 
 280 int
 281 nfslib_bindit(struct netconfig *nconf, struct netbuf **addr,
 282         struct nd_hostserv *hs, int backlog)
 283 {
 284         int fd;
 285         struct t_bind  *ntb;
 286         struct t_bind tb;
 287         struct nd_addrlist *addrlist;
 288         struct t_optmgmt req, resp;
 289         struct opthdr *opt;
 290         char reqbuf[128];
 291         bool_t use_any = FALSE;
 292         bool_t gzone = TRUE;
 293 
 294         if ((fd = nfslib_transport_open(nconf)) == -1) {
 295                 syslog(LOG_ERR, "cannot establish transport service over %s",
 296                     nconf->nc_device);
 297                 return (-1);
 298         }
 299 
 300         addrlist = (struct nd_addrlist *)NULL;
 301 
 302         /* nfs4_callback service does not used a fieed port number */
 303 
 304         if (strcmp(hs->h_serv, "nfs4_callback") == 0) {
 305                 tb.addr.maxlen = 0;
 306                 tb.addr.len = 0;
 307                 tb.addr.buf = 0;
 308                 use_any = TRUE;
 309                 gzone = (getzoneid() == GLOBAL_ZONEID);
 310         } else if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
 311 
 312                 syslog(LOG_ERR,
 313                 "Cannot get address for transport %s host %s service %s",
 314                     nconf->nc_netid, hs->h_host, hs->h_serv);
 315                 (void) t_close(fd);
 316                 return (-1);
 317         }
 318 
 319         if (strcmp(nconf->nc_proto, "tcp") == 0) {
 320                 /*
 321                  * If we're running over TCP, then set the
 322                  * SO_REUSEADDR option so that we can bind
 323                  * to our preferred address even if previously
 324                  * left connections exist in FIN_WAIT states.
 325                  * This is somewhat bogus, but otherwise you have
 326                  * to wait 2 minutes to restart after killing it.
 327                  */
 328                 if (reuseaddr(fd) == -1) {
 329                         syslog(LOG_WARNING,
 330                         "couldn't set SO_REUSEADDR option on transport");
 331                 }
 332         } else if (strcmp(nconf->nc_proto, "udp") == 0) {
 333                 /*
 334                  * In order to run MLP on UDP, we need to handle creds.
 335                  */
 336                 if (recvucred(fd) == -1) {
 337                         syslog(LOG_WARNING,
 338                             "couldn't set SO_RECVUCRED option on transport");
 339                 }
 340         }
 341 
 342         /*
 343          * Make non global zone nfs4_callback port MLP
 344          */
 345         if (use_any && is_system_labeled() && !gzone) {
 346                 if (anonmlp(fd) == -1) {
 347                         /*
 348                          * failing to set this option means nfs4_callback
 349                          * could fail silently later. So fail it with
 350                          * with an error message now.
 351                          */
 352                         syslog(LOG_ERR,
 353                             "couldn't set SO_ANON_MLP option on transport");
 354                         (void) t_close(fd);
 355                         return (-1);
 356                 }
 357         }
 358 
 359         if (nconf->nc_semantics == NC_TPI_CLTS)
 360                 tb.qlen = 0;
 361         else
 362                 tb.qlen = backlog;
 363 
 364         /* LINTED pointer alignment */
 365         ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
 366         if (ntb == (struct t_bind *)NULL) {
 367                 syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
 368                 (void) t_close(fd);
 369                 netdir_free((void *)addrlist, ND_ADDRLIST);
 370                 return (-1);
 371         }
 372 
 373         /*
 374          * XXX - what about the space tb->addr.buf points to? This should
 375          * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
 376          * should't be called with T_ALL.
 377          */
 378         if (addrlist)
 379                 tb.addr = *(addrlist->n_addrs);              /* structure copy */
 380 
 381         if (t_bind(fd, &tb, ntb) == -1) {
 382                 syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
 383                 (void) t_free((char *)ntb, T_BIND);
 384                 netdir_free((void *)addrlist, ND_ADDRLIST);
 385                 (void) t_close(fd);
 386                 return (-1);
 387         }
 388 
 389         /* make sure we bound to the right address */
 390         if (use_any == FALSE &&
 391             (tb.addr.len != ntb->addr.len ||
 392             memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0)) {
 393                 syslog(LOG_ERR, "t_bind to wrong address");
 394                 (void) t_free((char *)ntb, T_BIND);
 395                 netdir_free((void *)addrlist, ND_ADDRLIST);
 396                 (void) t_close(fd);
 397                 return (-1);
 398         }
 399 
 400         /*
 401          * Call nfs4svc_setport so that the kernel can be
 402          * informed what port number the daemon is listing
 403          * for incoming connection requests.
 404          */
 405 
 406         if ((nconf->nc_semantics == NC_TPI_COTS ||
 407             nconf->nc_semantics == NC_TPI_COTS_ORD) && Mysvc4 != NULL)
 408                 (*Mysvc4)(fd, NULL, nconf, NFS4_SETPORT, &ntb->addr);
 409 
 410         *addr = &ntb->addr;
 411         netdir_free((void *)addrlist, ND_ADDRLIST);
 412 
 413         if (strcmp(nconf->nc_proto, "tcp") == 0) {
 414                 /*
 415                  * Disable the Nagle algorithm on TCP connections.
 416                  * Connections accepted from this listener will
 417                  * inherit the listener options.
 418                  */
 419 
 420                 /* LINTED pointer alignment */
 421                 opt = (struct opthdr *)reqbuf;
 422                 opt->level = IPPROTO_TCP;
 423                 opt->name = TCP_NODELAY;
 424                 opt->len = sizeof (int);
 425 
 426                 /* LINTED pointer alignment */
 427                 *(int *)((char *)opt + sizeof (*opt)) = 1;
 428 
 429                 req.flags = T_NEGOTIATE;
 430                 req.opt.len = sizeof (*opt) + opt->len;
 431                 req.opt.buf = (char *)opt;
 432                 resp.flags = 0;
 433                 resp.opt.buf = reqbuf;
 434                 resp.opt.maxlen = sizeof (reqbuf);
 435 
 436                 if (t_optmgmt(fd, &req, &resp) < 0 ||
 437                     resp.flags != T_SUCCESS) {
 438                         syslog(LOG_ERR,
 439         "couldn't set NODELAY option for proto %s: t_errno = %d, %m",
 440                             nconf->nc_proto, t_errno);
 441                 }
 442 
 443                 nfslib_set_sockbuf(fd);
 444         }
 445 
 446         return (fd);
 447 }
 448 
 449 static int
 450 get_opt(int fd, int level, int name)
 451 {
 452         struct t_optmgmt req, res;
 453         struct {
 454                 struct opthdr opt;
 455                 int value;
 456         } reqbuf;
 457 
 458         reqbuf.opt.level = level;
 459         reqbuf.opt.name = name;
 460         reqbuf.opt.len = sizeof (int);
 461         reqbuf.value = 0;
 462 
 463         req.flags = T_CURRENT;
 464         req.opt.len = sizeof (reqbuf);
 465         req.opt.buf = (char *)&reqbuf;
 466 
 467         res.flags = 0;
 468         res.opt.buf = (char *)&reqbuf;
 469         res.opt.maxlen = sizeof (reqbuf);
 470 
 471         if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
 472                 t_error("t_optmgmt");
 473                 return (-1);
 474         }
 475         return (reqbuf.value);
 476 }
 477 
 478 static int
 479 setopt(int fd, int level, int name, int value)
 480 {
 481         struct t_optmgmt req, resp;
 482         struct {
 483                 struct opthdr opt;
 484                 int value;
 485         } reqbuf;
 486 
 487         reqbuf.opt.level = level;
 488         reqbuf.opt.name = name;
 489         reqbuf.opt.len = sizeof (int);
 490 
 491         reqbuf.value = value;
 492 
 493         req.flags = T_NEGOTIATE;
 494         req.opt.len = sizeof (reqbuf);
 495         req.opt.buf = (char *)&reqbuf;
 496 
 497         resp.flags = 0;
 498         resp.opt.buf = (char *)&reqbuf;
 499         resp.opt.maxlen = sizeof (reqbuf);
 500 
 501         if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
 502                 t_error("t_optmgmt");
 503                 return (-1);
 504         }
 505         return (0);
 506 }
 507 
 508 static int
 509 reuseaddr(int fd)
 510 {
 511         return (setopt(fd, SOL_SOCKET, SO_REUSEADDR, 1));
 512 }
 513 
 514 static int
 515 recvucred(int fd)
 516 {
 517         return (setopt(fd, SOL_SOCKET, SO_RECVUCRED, 1));
 518 }
 519 
 520 static int
 521 anonmlp(int fd)
 522 {
 523         return (setopt(fd, SOL_SOCKET, SO_ANON_MLP, 1));
 524 }
 525 
 526 void
 527 nfslib_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
 528 {
 529         int error;
 530 
 531         /*
 532          * Save the error code across syslog(), just in case syslog()
 533          * gets its own error and, therefore, overwrites errno.
 534          */
 535         error = errno;
 536         if (t_errno == TSYSERR) {
 537                 syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
 538                     tli_name, fd, nconf->nc_proto);
 539         } else {
 540                 syslog(LOG_ERR,
 541                     "%s(file descriptor %d/transport %s) TLI error %d",
 542                     tli_name, fd, nconf->nc_proto, t_errno);
 543         }
 544         errno = error;
 545 }
 546 
 547 /*
 548  * Called to set up service over a particular transport.
 549  */
 550 void
 551 do_one(char *provider, NETSELDECL(proto), struct protob *protobp0,
 552         int (*svc)(int, struct netbuf, struct netconfig *))
 553 {
 554         register int sock;
 555         struct protob *protobp;
 556         struct netbuf *retaddr;
 557         struct netconfig *retnconf;
 558         struct netbuf addrmask;
 559         int vers;
 560         int err;
 561         int l;
 562 
 563         if (provider)
 564                 sock = bind_to_provider(provider, protobp0->serv, &retaddr,
 565                     &retnconf);
 566         else
 567                 sock = bind_to_proto(proto, protobp0->serv, &retaddr,
 568                     &retnconf);
 569 
 570         if (sock == -1) {
 571                 (void) syslog(LOG_ERR,
 572         "Cannot establish %s service over %s: transport setup problem.",
 573                     protobp0->serv, provider ? provider : proto);
 574                 return;
 575         }
 576 
 577         if (set_addrmask(sock, retnconf, &addrmask) < 0) {
 578                 (void) syslog(LOG_ERR,
 579                     "Cannot set address mask for %s", retnconf->nc_netid);
 580                 return;
 581         }
 582 
 583         /*
 584          * Register all versions of the programs in the protocol block list.
 585          */
 586         l = strlen(NC_UDP);
 587         for (protobp = protobp0; protobp; protobp = protobp->next) {
 588                 for (vers = protobp->versmin; vers <= protobp->versmax;
 589                     vers++) {
 590                         if ((protobp->program == NFS_PROGRAM ||
 591                             protobp->program == NFS_ACL_PROGRAM) &&
 592                             vers == NFS_V4 &&
 593                             strncasecmp(retnconf->nc_proto, NC_UDP, l) == 0)
 594                                 continue;
 595 
 596                         (void) rpcb_unset(protobp->program, vers, retnconf);
 597                         (void) rpcb_set(protobp->program, vers, retnconf,
 598                             retaddr);
 599                 }
 600         }
 601 
 602         /*
 603          * Register services with CLTS semantics right now.
 604          * Note: services with COTS/COTS_ORD semantics will be
 605          * registered later from cots_listen_event function.
 606          */
 607         if (retnconf->nc_semantics == NC_TPI_CLTS) {
 608                 /* Don't drop core if supporting module(s) aren't loaded. */
 609                 (void) signal(SIGSYS, SIG_IGN);
 610 
 611                 /*
 612                  * svc() doesn't block, it returns success or failure.
 613                  */
 614 
 615                 if (svc == NULL && Mysvc4 != NULL)
 616                         err = (*Mysvc4)(sock, &addrmask, retnconf,
 617                             NFS4_SETPORT|NFS4_KRPC_START, retaddr);
 618                 else
 619                         err = (*svc)(sock, addrmask, retnconf);
 620 
 621                 if (err < 0) {
 622                         (void) syslog(LOG_ERR,
 623                             "Cannot establish %s service over <file desc."
 624                             " %d, protocol %s> : %m. Exiting",
 625                             protobp0->serv, sock, retnconf->nc_proto);
 626                         exit(1);
 627                 }
 628         }
 629         free(addrmask.buf);
 630 
 631         /*
 632          * We successfully set up the server over this transport.
 633          * Add this descriptor to the one being polled on.
 634          */
 635         add_to_poll_list(sock, retnconf);
 636 }
 637 
 638 /*
 639  * Set up the NFS service over all the available transports.
 640  * Returns -1 for failure, 0 for success.
 641  */
 642 int
 643 do_all(struct protob *protobp,
 644         int (*svc)(int, struct netbuf, struct netconfig *))
 645 {
 646         struct netconfig *nconf;
 647         NCONF_HANDLE *nc;
 648         int l;
 649 
 650         if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
 651                 syslog(LOG_ERR, "setnetconfig failed: %m");
 652                 return (-1);
 653         }
 654         l = strlen(NC_UDP);
 655         while (nconf = getnetconfig(nc)) {
 656                 if ((nconf->nc_flag & NC_VISIBLE) &&
 657                     strcmp(nconf->nc_protofmly, NC_LOOPBACK) != 0 &&
 658                     OK_TPI_TYPE(nconf) &&
 659                     (protobp->program != NFS4_CALLBACK ||
 660                     strncasecmp(nconf->nc_proto, NC_UDP, l) != 0))
 661                         do_one(nconf->nc_device, nconf->nc_proto,
 662                             protobp, svc);
 663         }
 664         (void) endnetconfig(nc);
 665         return (0);
 666 }
 667 
 668 /*
 669  * poll on the open transport descriptors for events and errors.
 670  */
 671 void
 672 poll_for_action(void)
 673 {
 674         int nfds;
 675         int i;
 676 
 677         /*
 678          * Keep polling until all transports have been closed. When this
 679          * happens, we return.
 680          */
 681         while ((int)num_fds > 0) {
 682                 nfds = poll(poll_array, num_fds, INFTIM);
 683                 switch (nfds) {
 684                 case 0:
 685                         continue;
 686 
 687                 case -1:
 688                         /*
 689                          * Some errors from poll could be
 690                          * due to temporary conditions, and we try to
 691                          * be robust in the face of them. Other
 692                          * errors (should never happen in theory)
 693                          * are fatal (eg. EINVAL, EFAULT).
 694                          */
 695                         switch (errno) {
 696                         case EINTR:
 697                                 continue;
 698 
 699                         case EAGAIN:
 700                         case ENOMEM:
 701                                 (void) sleep(10);
 702                                 continue;
 703 
 704                         default:
 705                                 (void) syslog(LOG_ERR,
 706                                     "poll failed: %m. Exiting");
 707                                 exit(1);
 708                         }
 709                 default:
 710                         break;
 711                 }
 712 
 713                 /*
 714                  * Go through the poll list looking for events.
 715                  */
 716                 for (i = 0; i < num_fds && nfds > 0; i++) {
 717                         if (poll_array[i].revents) {
 718                                 nfds--;
 719                                 /*
 720                                  * We have a message, so try to read it.
 721                                  * Record the error return in errno,
 722                                  * so that syslog(LOG_ERR, "...%m")
 723                                  * dumps the corresponding error string.
 724                                  */
 725                                 if (conn_polled[i].nc.nc_semantics ==
 726                                     NC_TPI_CLTS) {
 727                                         errno = do_poll_clts_action(
 728                                             poll_array[i].fd, i);
 729                                 } else {
 730                                         errno = do_poll_cots_action(
 731                                             poll_array[i].fd, i);
 732                                 }
 733 
 734                                 if (errno == 0)
 735                                         continue;
 736                                 /*
 737                                  * Most returned error codes mean that there is
 738                                  * fatal condition which we can only deal with
 739                                  * by closing the transport.
 740                                  */
 741                                 if (errno != EAGAIN && errno != ENOMEM) {
 742                                         (void) syslog(LOG_ERR,
 743                 "Error (%m) reading descriptor %d/transport %s. Closing it.",
 744                                             poll_array[i].fd,
 745                                             conn_polled[i].nc.nc_proto);
 746                                         (void) t_close(poll_array[i].fd);
 747                                         remove_from_poll_list(poll_array[i].fd);
 748 
 749                                 } else if (errno == ENOMEM)
 750                                         (void) sleep(5);
 751                         }
 752                 }
 753         }
 754 
 755         (void) syslog(LOG_ERR,
 756             "All transports have been closed with errors. Exiting.");
 757 }
 758 
 759 /*
 760  * Allocate poll/transport array entries for this descriptor.
 761  */
 762 static void
 763 add_to_poll_list(int fd, struct netconfig *nconf)
 764 {
 765         static int poll_array_size = 0;
 766 
 767         /*
 768          * If the arrays are full, allocate new ones.
 769          */
 770         if (num_fds == poll_array_size) {
 771                 struct pollfd *tpa;
 772                 struct conn_entry *tnp;
 773 
 774                 if (poll_array_size != 0) {
 775                         tpa = poll_array;
 776                         tnp = conn_polled;
 777                 } else
 778                         tpa = (struct pollfd *)0;
 779 
 780                 poll_array_size += POLL_ARRAY_INC_SIZE;
 781                 /*
 782                  * Allocate new arrays.
 783                  */
 784                 poll_array = (struct pollfd *)
 785                     malloc(poll_array_size * sizeof (struct pollfd) + 256);
 786                 conn_polled = (struct conn_entry *)
 787                     malloc(poll_array_size * sizeof (struct conn_entry) + 256);
 788                 if (poll_array == (struct pollfd *)NULL ||
 789                     conn_polled == (struct conn_entry *)NULL) {
 790                         syslog(LOG_ERR, "malloc failed for poll array");
 791                         exit(1);
 792                 }
 793 
 794                 /*
 795                  * Copy the data of the old ones into new arrays, and
 796                  * free the old ones.
 797                  */
 798                 if (tpa) {
 799                         (void) memcpy((void *)poll_array, (void *)tpa,
 800                             num_fds * sizeof (struct pollfd));
 801                         (void) memcpy((void *)conn_polled, (void *)tnp,
 802                             num_fds * sizeof (struct conn_entry));
 803                         free((void *)tpa);
 804                         free((void *)tnp);
 805                 }
 806         }
 807 
 808         /*
 809          * Set the descriptor and event list. All possible events are
 810          * polled for.
 811          */
 812         poll_array[num_fds].fd = fd;
 813         poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
 814 
 815         /*
 816          * Copy the transport data over too.
 817          */
 818         conn_polled[num_fds].nc = *nconf;
 819         conn_polled[num_fds].closing = 0;
 820 
 821         /*
 822          * Set the descriptor to non-blocking. Avoids a race
 823          * between data arriving on the stream and then having it
 824          * flushed before we can read it.
 825          */
 826         if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
 827                 (void) syslog(LOG_ERR,
 828         "fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
 829                     num_fds, nconf->nc_proto);
 830                 exit(1);
 831         }
 832 
 833         /*
 834          * Count this descriptor.
 835          */
 836         ++num_fds;
 837 }
 838 
 839 static void
 840 remove_from_poll_list(int fd)
 841 {
 842         int i;
 843         int num_to_copy;
 844 
 845         for (i = 0; i < num_fds; i++) {
 846                 if (poll_array[i].fd == fd) {
 847                         --num_fds;
 848                         num_to_copy = num_fds - i;
 849                         (void) memcpy((void *)&poll_array[i],
 850                             (void *)&poll_array[i+1],
 851                             num_to_copy * sizeof (struct pollfd));
 852                         (void) memset((void *)&poll_array[num_fds], 0,
 853                             sizeof (struct pollfd));
 854                         (void) memcpy((void *)&conn_polled[i],
 855                             (void *)&conn_polled[i+1],
 856                             num_to_copy * sizeof (struct conn_entry));
 857                         (void) memset((void *)&conn_polled[num_fds], 0,
 858                             sizeof (struct conn_entry));
 859                         return;
 860                 }
 861         }
 862         syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
 863 
 864 }
 865 
 866 /*
 867  * Called to read and interpret the event on a connectionless descriptor.
 868  * Returns 0 if successful, or a UNIX error code if failure.
 869  */
 870 static int
 871 do_poll_clts_action(int fd, int conn_index)
 872 {
 873         int error;
 874         int ret;
 875         int flags;
 876         struct netconfig *nconf = &conn_polled[conn_index].nc;
 877         static struct t_unitdata *unitdata = NULL;
 878         static struct t_uderr *uderr = NULL;
 879         static int oldfd = -1;
 880         struct nd_hostservlist *host = NULL;
 881         struct strbuf ctl[1], data[1];
 882         /*
 883          * We just need to have some space to consume the
 884          * message in the event we can't use the TLI interface to do the
 885          * job.
 886          *
 887          * We flush the message using getmsg(). For the control part
 888          * we allocate enough for any TPI header plus 32 bytes for address
 889          * and options. For the data part, there is nothing magic about
 890          * the size of the array, but 256 bytes is probably better than
 891          * 1 byte, and we don't expect any data portion anyway.
 892          *
 893          * If the array sizes are too small, we handle this because getmsg()
 894          * (called to consume the message) will return MOREDATA|MORECTL.
 895          * Thus we just call getmsg() until it's read the message.
 896          */
 897         char ctlbuf[sizeof (union T_primitives) + 32];
 898         char databuf[256];
 899 
 900         /*
 901          * If this is the same descriptor as the last time
 902          * do_poll_clts_action was called, we can save some
 903          * de-allocation and allocation.
 904          */
 905         if (oldfd != fd) {
 906                 oldfd = fd;
 907 
 908                 if (unitdata) {
 909                         (void) t_free((char *)unitdata, T_UNITDATA);
 910                         unitdata = NULL;
 911                 }
 912                 if (uderr) {
 913                         (void) t_free((char *)uderr, T_UDERROR);
 914                         uderr = NULL;
 915                 }
 916         }
 917 
 918         /*
 919          * Allocate a unitdata structure for receiving the event.
 920          */
 921         if (unitdata == NULL) {
 922                 /* LINTED pointer alignment */
 923                 unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
 924                 if (unitdata == NULL) {
 925                         if (t_errno == TSYSERR) {
 926                                 /*
 927                                  * Save the error code across
 928                                  * syslog(), just in case
 929                                  * syslog() gets its own error
 930                                  * and therefore overwrites errno.
 931                                  */
 932                                 error = errno;
 933                                 (void) syslog(LOG_ERR,
 934         "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
 935                                     fd, nconf->nc_proto);
 936                                 return (error);
 937                         }
 938                         (void) syslog(LOG_ERR,
 939 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
 940                             fd, nconf->nc_proto, t_errno);
 941                         goto flush_it;
 942                 }
 943         }
 944 
 945 try_again:
 946         flags = 0;
 947 
 948         /*
 949          * The idea is we wait for T_UNITDATA_IND's. Of course,
 950          * we don't get any, because rpcmod filters them out.
 951          * However, we need to call t_rcvudata() to let TLI
 952          * tell us we have a T_UDERROR_IND.
 953          *
 954          * algorithm is:
 955          *      t_rcvudata(), expecting TLOOK.
 956          *      t_look(), expecting T_UDERR.
 957          *      t_rcvuderr(), expecting success (0).
 958          *      expand destination address into ASCII,
 959          *      and dump it.
 960          */
 961 
 962         ret = t_rcvudata(fd, unitdata, &flags);
 963         if (ret == 0 || t_errno == TBUFOVFLW) {
 964                 (void) syslog(LOG_WARNING,
 965 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
 966                     fd, nconf->nc_proto, unitdata->udata.len);
 967 
 968                 /*
 969                  * Even though we don't expect any data, in case we do,
 970                  * keep reading until there is no more.
 971                  */
 972                 if (flags & T_MORE)
 973                         goto try_again;
 974 
 975                 return (0);
 976         }
 977 
 978         switch (t_errno) {
 979         case TNODATA:
 980                 return (0);
 981         case TSYSERR:
 982                 /*
 983                  * System errors are returned to caller.
 984                  * Save the error code across
 985                  * syslog(), just in case
 986                  * syslog() gets its own error
 987                  * and therefore overwrites errno.
 988                  */
 989                 error = errno;
 990                 (void) syslog(LOG_ERR,
 991                     "t_rcvudata(file descriptor %d/transport %s) %m",
 992                     fd, nconf->nc_proto);
 993                 return (error);
 994         case TLOOK:
 995                 break;
 996         default:
 997                 (void) syslog(LOG_ERR,
 998                 "t_rcvudata(file descriptor %d/transport %s) TLI error %d",
 999                     fd, nconf->nc_proto, t_errno);
1000                 goto flush_it;
1001         }
1002 
1003         ret = t_look(fd);
1004         switch (ret) {
1005         case 0:
1006                 return (0);
1007         case -1:
1008                 /*
1009                  * System errors are returned to caller.
1010                  */
1011                 if (t_errno == TSYSERR) {
1012                         /*
1013                          * Save the error code across
1014                          * syslog(), just in case
1015                          * syslog() gets its own error
1016                          * and therefore overwrites errno.
1017                          */
1018                         error = errno;
1019                         (void) syslog(LOG_ERR,
1020                             "t_look(file descriptor %d/transport %s) %m",
1021                             fd, nconf->nc_proto);
1022                         return (error);
1023                 }
1024                 (void) syslog(LOG_ERR,
1025                     "t_look(file descriptor %d/transport %s) TLI error %d",
1026                     fd, nconf->nc_proto, t_errno);
1027                 goto flush_it;
1028         case T_UDERR:
1029                 break;
1030         default:
1031                 (void) syslog(LOG_WARNING,
1032         "t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
1033                     fd, nconf->nc_proto, ret, T_UDERR);
1034         }
1035 
1036         if (uderr == NULL) {
1037                 /* LINTED pointer alignment */
1038                 uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
1039                 if (uderr == NULL) {
1040                         if (t_errno == TSYSERR) {
1041                                 /*
1042                                  * Save the error code across
1043                                  * syslog(), just in case
1044                                  * syslog() gets its own error
1045                                  * and therefore overwrites errno.
1046                                  */
1047                                 error = errno;
1048                                 (void) syslog(LOG_ERR,
1049         "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
1050                                     fd, nconf->nc_proto);
1051                                 return (error);
1052                         }
1053                         (void) syslog(LOG_ERR,
1054 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1055                             fd, nconf->nc_proto, t_errno);
1056                         goto flush_it;
1057                 }
1058         }
1059 
1060         ret = t_rcvuderr(fd, uderr);
1061         if (ret == 0) {
1062 
1063                 /*
1064                  * Save the datagram error in errno, so that the
1065                  * %m argument to syslog picks up the error string.
1066                  */
1067                 errno = uderr->error;
1068 
1069                 /*
1070                  * Log the datagram error, then log the host that
1071                  * probably triggerred. Cannot log both in the
1072                  * same transaction because of packet size limitations
1073                  * in /dev/log.
1074                  */
1075                 (void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1076 "NFS response over <file descriptor %d/transport %s> generated error: %m",
1077                     fd, nconf->nc_proto);
1078 
1079                 /*
1080                  * Try to map the client's address back to a
1081                  * name.
1082                  */
1083                 ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1084                 if (ret != -1 && host && host->h_cnt > 0 &&
1085                     host->h_hostservs) {
1086                 (void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1087 "Bad NFS response was sent to client with host name: %s; service port: %s",
1088                     host->h_hostservs->h_host,
1089                     host->h_hostservs->h_serv);
1090                 } else {
1091                         int i, j;
1092                         char *buf;
1093                         char *hex = "0123456789abcdef";
1094 
1095                         /*
1096                          * Mapping failed, print the whole thing
1097                          * in ASCII hex.
1098                          */
1099                         buf = (char *)malloc(uderr->addr.len * 2 + 1);
1100                         for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1101                                 buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1102                                 buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1103                         }
1104                         buf[j] = '\0';
1105                 (void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1106         "Bad NFS response was sent to client with transport address: 0x%s",
1107                     buf);
1108                         free((void *)buf);
1109                 }
1110 
1111                 if (ret == 0 && host != NULL)
1112                         netdir_free((void *)host, ND_HOSTSERVLIST);
1113                 return (0);
1114         }
1115 
1116         switch (t_errno) {
1117         case TNOUDERR:
1118                 goto flush_it;
1119         case TSYSERR:
1120                 /*
1121                  * System errors are returned to caller.
1122                  * Save the error code across
1123                  * syslog(), just in case
1124                  * syslog() gets its own error
1125                  * and therefore overwrites errno.
1126                  */
1127                 error = errno;
1128                 (void) syslog(LOG_ERR,
1129                     "t_rcvuderr(file descriptor %d/transport %s) %m",
1130                     fd, nconf->nc_proto);
1131                 return (error);
1132         default:
1133                 (void) syslog(LOG_ERR,
1134                 "t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1135                     fd, nconf->nc_proto, t_errno);
1136                 goto flush_it;
1137         }
1138 
1139 flush_it:
1140         /*
1141          * If we get here, then we could not cope with whatever message
1142          * we attempted to read, so flush it. If we did read a message,
1143          * and one isn't present, that is all right, because fd is in
1144          * nonblocking mode.
1145          */
1146         (void) syslog(LOG_ERR,
1147         "Flushing one input message from <file descriptor %d/transport %s>",
1148             fd, nconf->nc_proto);
1149 
1150         /*
1151          * Read and discard the message. Do this this until there is
1152          * no more control/data in the message or until we get an error.
1153          */
1154         do {
1155                 ctl->maxlen = sizeof (ctlbuf);
1156                 ctl->buf = ctlbuf;
1157                 data->maxlen = sizeof (databuf);
1158                 data->buf = databuf;
1159                 flags = 0;
1160                 ret = getmsg(fd, ctl, data, &flags);
1161                 if (ret == -1)
1162                         return (errno);
1163         } while (ret != 0);
1164 
1165         return (0);
1166 }
1167 
1168 static void
1169 conn_close_oldest(void)
1170 {
1171         int fd;
1172         int i1;
1173 
1174         /*
1175          * Find the oldest connection that is not already in the
1176          * process of shutting down.
1177          */
1178         for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
1179                 if (i1 >= num_fds)
1180                         return;
1181                 if (conn_polled[i1].closing == 0)
1182                         break;
1183         }
1184 #ifdef DEBUG
1185         printf("too many connections (%d), releasing oldest (%d)\n",
1186             num_conns, poll_array[i1].fd);
1187 #else
1188         syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
1189             num_conns, poll_array[i1].fd);
1190 #endif
1191         fd = poll_array[i1].fd;
1192         if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
1193                 /*
1194                  * For politeness, send a T_DISCON_REQ to the transport
1195                  * provider.  We close the stream anyway.
1196                  */
1197                 (void) t_snddis(fd, (struct t_call *)0);
1198                 num_conns--;
1199                 remove_from_poll_list(fd);
1200                 (void) t_close(fd);
1201         } else {
1202                 /*
1203                  * For orderly release, we do not close the stream
1204                  * until the T_ORDREL_IND arrives to complete
1205                  * the handshake.
1206                  */
1207                 if (t_sndrel(fd) == 0)
1208                         conn_polled[i1].closing = 1;
1209         }
1210 }
1211 
1212 static boolean_t
1213 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1214 {
1215         struct conn_ind *conn;
1216         struct conn_ind *next_conn;
1217 
1218         conn = (struct conn_ind *)malloc(sizeof (*conn));
1219         if (conn == NULL) {
1220                 syslog(LOG_ERR, "malloc for listen indication failed");
1221                 return (FALSE);
1222         }
1223 
1224         /* LINTED pointer alignment */
1225         conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
1226         if (conn->conn_call == NULL) {
1227                 free((char *)conn);
1228                 nfslib_log_tli_error("t_alloc", fd, nconf);
1229                 return (FALSE);
1230         }
1231 
1232         if (t_listen(fd, conn->conn_call) == -1) {
1233                 nfslib_log_tli_error("t_listen", fd, nconf);
1234                 (void) t_free((char *)conn->conn_call, T_CALL);
1235                 free((char *)conn);
1236                 return (FALSE);
1237         }
1238 
1239         if (conn->conn_call->udata.len > 0) {
1240                 syslog(LOG_WARNING,
1241         "rejecting inbound connection(%s) with %d bytes of connect data",
1242                     nconf->nc_proto, conn->conn_call->udata.len);
1243 
1244                 conn->conn_call->udata.len = 0;
1245                 (void) t_snddis(fd, conn->conn_call);
1246                 (void) t_free((char *)conn->conn_call, T_CALL);
1247                 free((char *)conn);
1248                 return (FALSE);
1249         }
1250 
1251         if ((next_conn = *connp) != NULL) {
1252                 next_conn->conn_prev->conn_next = conn;
1253                 conn->conn_next = next_conn;
1254                 conn->conn_prev = next_conn->conn_prev;
1255                 next_conn->conn_prev = conn;
1256         } else {
1257                 conn->conn_next = conn;
1258                 conn->conn_prev = conn;
1259                 *connp = conn;
1260         }
1261         return (TRUE);
1262 }
1263 
1264 static int
1265 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1266 {
1267         struct conn_ind *conn;
1268         struct t_discon discon;
1269 
1270         discon.udata.buf = (char *)0;
1271         discon.udata.maxlen = 0;
1272         if (t_rcvdis(fd, &discon) == -1) {
1273                 nfslib_log_tli_error("t_rcvdis", fd, nconf);
1274                 return (-1);
1275         }
1276 
1277         conn = *connp;
1278         if (conn == NULL)
1279                 return (0);
1280 
1281         do {
1282                 if (conn->conn_call->sequence == discon.sequence) {
1283                         if (conn->conn_next == conn)
1284                                 *connp = (struct conn_ind *)0;
1285                         else {
1286                                 if (conn == *connp) {
1287                                         *connp = conn->conn_next;
1288                                 }
1289                                 conn->conn_next->conn_prev = conn->conn_prev;
1290                                 conn->conn_prev->conn_next = conn->conn_next;
1291                         }
1292                         free((char *)conn);
1293                         break;
1294                 }
1295                 conn = conn->conn_next;
1296         } while (conn != *connp);
1297 
1298         return (0);
1299 }
1300 
1301 static void
1302 cots_listen_event(int fd, int conn_index)
1303 {
1304         struct t_call *call;
1305         struct conn_ind *conn;
1306         struct conn_ind *conn_head;
1307         int event;
1308         struct netconfig *nconf = &conn_polled[conn_index].nc;
1309         int new_fd;
1310         struct netbuf addrmask;
1311         int ret = 0;
1312         char *clnt;
1313         char *clnt_uaddr = NULL;
1314         struct nd_hostservlist *clnt_serv = NULL;
1315 
1316         conn_head = NULL;
1317         (void) conn_get(fd, nconf, &conn_head);
1318 
1319         while ((conn = conn_head) != NULL) {
1320                 conn_head = conn->conn_next;
1321                 if (conn_head == conn)
1322                         conn_head = NULL;
1323                 else {
1324                         conn_head->conn_prev = conn->conn_prev;
1325                         conn->conn_prev->conn_next = conn_head;
1326                 }
1327                 call = conn->conn_call;
1328                 free(conn);
1329 
1330                 /*
1331                  * If we have already accepted the maximum number of
1332                  * connections allowed on the command line, then drop
1333                  * the oldest connection (for any protocol) before
1334                  * accepting the new connection.  Unless explicitly
1335                  * set on the command line, max_conns_allowed is -1.
1336                  */
1337                 if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1338                         conn_close_oldest();
1339 
1340                 /*
1341                  * Create a new transport endpoint for the same proto as
1342                  * the listener.
1343                  */
1344                 new_fd = nfslib_transport_open(nconf);
1345                 if (new_fd == -1) {
1346                         call->udata.len = 0;
1347                         (void) t_snddis(fd, call);
1348                         (void) t_free((char *)call, T_CALL);
1349                         syslog(LOG_ERR, "Cannot establish transport over %s",
1350                             nconf->nc_device);
1351                         continue;
1352                 }
1353 
1354                 /* Bind to a generic address/port for the accepting stream. */
1355                 if (t_bind(new_fd, NULL, NULL) == -1) {
1356                         nfslib_log_tli_error("t_bind", new_fd, nconf);
1357                         call->udata.len = 0;
1358                         (void) t_snddis(fd, call);
1359                         (void) t_free((char *)call, T_CALL);
1360                         (void) t_close(new_fd);
1361                         continue;
1362                 }
1363 
1364                 while (t_accept(fd, new_fd, call) == -1) {
1365                         if (t_errno != TLOOK) {
1366 #ifdef DEBUG
1367                                 nfslib_log_tli_error("t_accept", fd, nconf);
1368 #endif
1369                                 call->udata.len = 0;
1370                                 (void) t_snddis(fd, call);
1371                                 (void) t_free((char *)call, T_CALL);
1372                                 (void) t_close(new_fd);
1373                                 goto do_next_conn;
1374                         }
1375                         while (event = t_look(fd)) {
1376                                 switch (event) {
1377                                 case T_LISTEN:
1378 #ifdef DEBUG
1379                                         printf(
1380 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1381 #endif
1382                                         (void) conn_get(fd, nconf, &conn_head);
1383                                         continue;
1384                                 case T_DISCONNECT:
1385 #ifdef DEBUG
1386                                         printf(
1387         "cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1388                                             nconf->nc_proto);
1389 #endif
1390                                         (void) discon_get(fd, nconf,
1391                                             &conn_head);
1392                                         continue;
1393                                 default:
1394                                         syslog(LOG_ERR,
1395                         "unexpected event 0x%x during accept processing (%s)",
1396                                             event, nconf->nc_proto);
1397                                         call->udata.len = 0;
1398                                         (void) t_snddis(fd, call);
1399                                         (void) t_free((char *)call, T_CALL);
1400                                         (void) t_close(new_fd);
1401                                         goto do_next_conn;
1402                                 }
1403                         }
1404                 }
1405 
1406                 if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1407                         (void) syslog(LOG_ERR,
1408                             "Cannot set address mask for %s",
1409                             nconf->nc_netid);
1410                         (void) t_snddis(new_fd, NULL);
1411                         (void) t_free((char *)call, T_CALL);
1412                         (void) t_close(new_fd);
1413                         continue;
1414                 }
1415 
1416                 /* Tell kRPC about the new stream. */
1417                 if (Mysvc4 != NULL)
1418                         ret = (*Mysvc4)(new_fd, &addrmask, nconf,
1419                             NFS4_KRPC_START, &call->addr);
1420                 else
1421                         ret = (*Mysvc)(new_fd, addrmask, nconf);
1422 
1423                 if (ret < 0) {
1424                         if (errno != ENOTCONN) {
1425                                 syslog(LOG_ERR,
1426                                     "unable to register new connection: %m");
1427                         } else {
1428                                 /*
1429                                  * This is the only error that could be
1430                                  * caused by the client, so who was it?
1431                                  */
1432                                 if (netdir_getbyaddr(nconf, &clnt_serv,
1433                                     &(call->addr)) == ND_OK &&
1434                                     clnt_serv->h_cnt > 0)
1435                                         clnt = clnt_serv->h_hostservs->h_host;
1436                                 else
1437                                         clnt = clnt_uaddr = taddr2uaddr(nconf,
1438                                             &(call->addr));
1439                                 /*
1440                                  * If we don't know who the client was,
1441                                  * remain silent.
1442                                  */
1443                                 if (clnt)
1444                                         syslog(LOG_ERR,
1445 "unable to register new connection: client %s has dropped connection", clnt);
1446                                 if (clnt_serv) {
1447                                         netdir_free(clnt_serv, ND_HOSTSERVLIST);
1448                                         clnt_serv = NULL;
1449                                 }
1450                                 if (clnt_uaddr) {
1451                                         free(clnt_uaddr);
1452                                         clnt_uaddr = NULL;
1453                                 }
1454                         }
1455                         free(addrmask.buf);
1456                         (void) t_snddis(new_fd, NULL);
1457                         (void) t_free((char *)call, T_CALL);
1458                         (void) t_close(new_fd);
1459                         goto do_next_conn;
1460                 }
1461 
1462                 free(addrmask.buf);
1463                 (void) t_free((char *)call, T_CALL);
1464 
1465                 /*
1466                  * Poll on the new descriptor so that we get disconnect
1467                  * and orderly release indications.
1468                  */
1469                 num_conns++;
1470                 add_to_poll_list(new_fd, nconf);
1471 
1472                 /* Reset nconf in case it has been moved. */
1473                 nconf = &conn_polled[conn_index].nc;
1474 do_next_conn:;
1475         }
1476 }
1477 
1478 static int
1479 do_poll_cots_action(int fd, int conn_index)
1480 {
1481         char buf[256];
1482         int event;
1483         int i1;
1484         int flags;
1485         struct conn_entry *connent = &conn_polled[conn_index];
1486         struct netconfig *nconf = &(connent->nc);
1487         const char *errorstr;
1488 
1489         while (event = t_look(fd)) {
1490                 switch (event) {
1491                 case T_LISTEN:
1492 #ifdef DEBUG
1493 printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf->nc_proto, fd);
1494 #endif
1495                         cots_listen_event(fd, conn_index);
1496                         break;
1497 
1498                 case T_DATA:
1499 #ifdef DEBUG
1500 printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd, nconf->nc_proto);
1501 #endif
1502                         /*
1503                          * Receive a private notification from CONS rpcmod.
1504                          */
1505                         i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1506                         if (i1 == -1) {
1507                                 syslog(LOG_ERR, "t_rcv failed");
1508                                 break;
1509                         }
1510                         if (i1 < sizeof (int))
1511                                 break;
1512                         i1 = BE32_TO_U32(buf);
1513                         if (i1 == 1 || i1 == 2) {
1514                                 /*
1515                                  * This connection has been idle for too long,
1516                                  * so release it as politely as we can.  If we
1517                                  * have already initiated an orderly release
1518                                  * and we get notified that the stream is
1519                                  * still idle, pull the plug.  This prevents
1520                                  * hung connections from continuing to consume
1521                                  * resources.
1522                                  */
1523 #ifdef DEBUG
1524 printf("do_poll_cots_action(%s,%d): ", nconf->nc_proto, fd);
1525 printf("initiating orderly release of idle connection\n");
1526 #endif
1527                                 if (nconf->nc_semantics == NC_TPI_COTS ||
1528                                     connent->closing != 0) {
1529                                         (void) t_snddis(fd, (struct t_call *)0);
1530                                         goto fdclose;
1531                                 }
1532                                 /*
1533                                  * For NC_TPI_COTS_ORD, the stream is closed
1534                                  * and removed from the poll list when the
1535                                  * T_ORDREL is received from the provider.  We
1536                                  * don't wait for it here because it may take
1537                                  * a while for the transport to shut down.
1538                                  */
1539                                 if (t_sndrel(fd) == -1) {
1540                                         syslog(LOG_ERR,
1541                                         "unable to send orderly release %m");
1542                                 }
1543                                 connent->closing = 1;
1544                         } else
1545                                 syslog(LOG_ERR,
1546                                 "unexpected event from CONS rpcmod %d", i1);
1547                         break;
1548 
1549                 case T_ORDREL:
1550 #ifdef DEBUG
1551 printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf->nc_proto, fd);
1552 #endif
1553                         /* Perform an orderly release. */
1554                         if (t_rcvrel(fd) == 0) {
1555                                 /* T_ORDREL on listen fd's should be ignored */
1556                                 if (!is_listen_fd_index(conn_index)) {
1557                                         (void) t_sndrel(fd);
1558                                         goto fdclose;
1559                                 }
1560                                 break;
1561 
1562                         } else if (t_errno == TLOOK) {
1563                                 break;
1564                         } else {
1565                                 nfslib_log_tli_error("t_rcvrel", fd, nconf);
1566 
1567                                 /*
1568                                  * check to make sure we do not close
1569                                  * listen fd
1570                                  */
1571                                 if (is_listen_fd_index(conn_index))
1572                                         break;
1573                                 else
1574                                         goto fdclose;
1575                         }
1576 
1577                 case T_DISCONNECT:
1578 #ifdef DEBUG
1579 printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf->nc_proto, fd);
1580 #endif
1581                         if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1582                                 nfslib_log_tli_error("t_rcvdis", fd, nconf);
1583 
1584                         /*
1585                          * T_DISCONNECT on listen fd's should be ignored.
1586                          */
1587                         if (is_listen_fd_index(conn_index))
1588                                 break;
1589                         else
1590                                 goto fdclose;
1591 
1592                 case T_ERROR:
1593                 default:
1594                         if (event == T_ERROR || t_errno == TSYSERR) {
1595                                 if ((errorstr = strerror(errno)) == NULL) {
1596                                         (void) sprintf(buf,
1597                                             "Unknown error num %d", errno);
1598                                         errorstr = (const char *) buf;
1599                                 }
1600                         } else if (event == -1)
1601                                 errorstr = t_strerror(t_errno);
1602                         else
1603                                 errorstr = "";
1604                         syslog(LOG_ERR,
1605                             "unexpected TLI event (0x%x) on "
1606                             "connection-oriented transport(%s,%d):%s",
1607                             event, nconf->nc_proto, fd, errorstr);
1608 fdclose:
1609                         num_conns--;
1610                         remove_from_poll_list(fd);
1611                         (void) t_close(fd);
1612                         return (0);
1613                 }
1614         }
1615 
1616         return (0);
1617 }
1618 
1619 static char *
1620 serv_name_to_port_name(char *name)
1621 {
1622         /*
1623          * Map service names (used primarily in logging) to
1624          * RPC port names (used by netdir_*() routines).
1625          */
1626         if (strcmp(name, "NFS") == 0) {
1627                 return ("nfs");
1628         } else if (strcmp(name, "NLM") == 0) {
1629                 return ("lockd");
1630         } else if (strcmp(name, "NFS4_CALLBACK") == 0) {
1631                 return ("nfs4_callback");
1632         }
1633 
1634         return ("unrecognized");
1635 }
1636 
1637 static int
1638 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1639                 struct netconfig **retnconf)
1640 {
1641         struct netconfig *nconf;
1642         NCONF_HANDLE *nc;
1643         struct nd_hostserv hs;
1644 
1645         hs.h_host = HOST_SELF;
1646         hs.h_serv = serv_name_to_port_name(serv);
1647 
1648         if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1649                 syslog(LOG_ERR, "setnetconfig failed: %m");
1650                 return (-1);
1651         }
1652         while (nconf = getnetconfig(nc)) {
1653                 if (OK_TPI_TYPE(nconf) &&
1654                     strcmp(nconf->nc_device, provider) == 0) {
1655                         *retnconf = nconf;
1656                         return (nfslib_bindit(nconf, addr, &hs,
1657                             listen_backlog));
1658                 }
1659         }
1660         (void) endnetconfig(nc);
1661 
1662         syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1663             provider);
1664         return (-1);
1665 }
1666 
1667 static int
1668 bind_to_proto(NETSELDECL(proto), char *serv, struct netbuf **addr,
1669                 struct netconfig **retnconf)
1670 {
1671         struct netconfig *nconf;
1672         NCONF_HANDLE *nc = NULL;
1673         struct nd_hostserv hs;
1674 
1675         hs.h_host = HOST_SELF;
1676         hs.h_serv = serv_name_to_port_name(serv);
1677 
1678         if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1679                 syslog(LOG_ERR, "setnetconfig failed: %m");
1680                 return (-1);
1681         }
1682         while (nconf = getnetconfig(nc)) {
1683                 if (OK_TPI_TYPE(nconf) && NETSELEQ(nconf->nc_proto, proto)) {
1684                         *retnconf = nconf;
1685                         return (nfslib_bindit(nconf, addr, &hs,
1686                             listen_backlog));
1687                 }
1688         }
1689         (void) endnetconfig(nc);
1690 
1691         syslog(LOG_ERR, "couldn't find netconfig entry for protocol %s",
1692             proto);
1693         return (-1);
1694 }
1695 
1696 #include <netinet/in.h>
1697 
1698 /*
1699  * Create an address mask appropriate for the transport.
1700  * The mask is used to obtain the host-specific part of
1701  * a network address when comparing addresses.
1702  * For an internet address the host-specific part is just
1703  * the 32 bit IP address and this part of the mask is set
1704  * to all-ones. The port number part of the mask is zeroes.
1705  */
1706 static int
1707 set_addrmask(int fd,
1708         struct netconfig *nconf,
1709         struct netbuf *mask)
1710 {
1711         struct t_info info;
1712 
1713         /*
1714          * Find the size of the address we need to mask.
1715          */
1716         if (t_getinfo(fd, &info) < 0) {
1717                 t_error("t_getinfo");
1718                 return (-1);
1719         }
1720         mask->len = mask->maxlen = info.addr;
1721         if (info.addr <= 0) {
1722                 /*
1723                  * loopback devices have infinite addr size
1724                  * (it is identified by -1 in addr field of t_info structure),
1725                  * so don't build the netmask for them. It's a special case
1726                  * that should be handled properly.
1727                  */
1728                 if ((info.addr == -1) &&
1729                     (0 == strcmp(nconf->nc_protofmly, NC_LOOPBACK))) {
1730                         memset(mask, 0, sizeof (*mask));
1731                         return (0);
1732                 }
1733 
1734                 syslog(LOG_ERR, "set_addrmask: address size: %ld", info.addr);
1735                 return (-1);
1736         }
1737 
1738         mask->buf = (char *)malloc(mask->len);
1739         if (mask->buf == NULL) {
1740                 syslog(LOG_ERR, "set_addrmask: no memory");
1741                 return (-1);
1742         }
1743         (void) memset(mask->buf, 0, mask->len);   /* reset all mask bits */
1744 
1745         if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1746                 /*
1747                  * Set the mask so that the port is ignored.
1748                  */
1749                 /* LINTED pointer alignment */
1750                 ((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1751                     (ulong_t)~0;
1752                 /* LINTED pointer alignment */
1753                 ((struct sockaddr_in *)mask->buf)->sin_family =
1754                     (ushort_t)~0;
1755         } else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1756                 /* LINTED pointer alignment */
1757                 (void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1758                     (uchar_t)~0, sizeof (struct in6_addr));
1759                 /* LINTED pointer alignment */
1760                 ((struct sockaddr_in6 *)mask->buf)->sin6_family =
1761                     (ushort_t)~0;
1762         } else {
1763 
1764                 /*
1765                  * Set all mask bits.
1766                  */
1767                 (void) memset(mask->buf, 0xFF, mask->len);
1768         }
1769         return (0);
1770 }
1771 
1772 /*
1773  * For listen fd's index is always less than end_listen_fds.
1774  * end_listen_fds is defined externally in the daemon that uses this library.
1775  * It's value is equal to the number of open file descriptors after the
1776  * last listen end point was opened but before any connection was accepted.
1777  */
1778 static int
1779 is_listen_fd_index(int index)
1780 {
1781         return (index < end_listen_fds);
1782 }