1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * This file contains the glue code that allows the NWS software to
  28  * determine whether a cluster disk service is local to this node or
  29  * not.
  30  *
  31  * See PSARC/1999/462 for more information on the interfaces from
  32  * suncluster that are used here.
  33  */
  34 
  35 #include <sys/types.h>
  36 #include <sys/wait.h>
  37 #include <sys/mkdev.h>
  38 #include <sys/stat.h>
  39 #include <stdlib.h>
  40 #include <unistd.h>
  41 #include <string.h>
  42 #include <strings.h>
  43 #include <errno.h>
  44 #include <fcntl.h>
  45 #include <stdio.h>
  46 #include <dlfcn.h>
  47 
  48 #include <sys/ncall/ncall.h>
  49 #include <sys/nsctl/nsc_hash.h>
  50 
  51 #include "cfg_cluster.h"
  52 #include "cfg_impl.h"
  53 #include "cfg.h"
  54 
  55 /*
  56  * Static variables
  57  */
  58 
  59 static scconf_nodeid_t cl_nodeid = (uint_t)0xffff;
  60 static char *cl_nodename = NULL;
  61 
  62 static void *libscstat;
  63 static void *libscconf;
  64 
  65 static hash_node_t **schash;
  66 static int init_sc_entry();
  67 
  68 typedef struct hash_data_s {
  69         scstat_node_name_t      scstat_node_name;
  70 } hash_data_t;
  71 
  72 /*
  73  * Global variables
  74  */
  75 int cl_initialized = 0;
  76 
  77 
  78 /*
  79  * Tell the linker to keep quiet.
  80  */
  81 
  82 #pragma weak scconf_get_nodename
  83 #pragma weak scconf_strerr
  84 #pragma weak scconf_get_ds_by_devt
  85 
  86 #pragma weak scstat_get_ds_status
  87 #pragma weak scstat_free_ds_status
  88 #pragma weak scstat_strerr
  89 
  90 
  91 /*
  92  * Initialise the library if we have not done so before.
  93  *
  94  * - IMPORTANT -
  95  *
  96  * This must -never- be called from any command that can be started
  97  * from /usr/cluster/lib/sc/run_reserve (and hence
  98  * /usr/cluster/sbin/reconfig) or the system will deadlock
  99  * during switchover.  This includes:
 100  *
 101  *       - svadm (no options, "print") -- called during sv switchover
 102  *       - all boot commands
 103  *
 104  * - grab this node's cluster nodeid
 105  * - attempt to dlopen() the suncluster shared libraries we need
 106  * - grab this node's cluster nodename
 107  *
 108  * Returns:
 109  *   0   - success
 110  *  -1   - error, errno is set
 111  */
 112 
 113 int
 114 cfg_cluster_init(void)
 115 {
 116         const char *scconf = "/usr/cluster/lib/libscconf.so.1";
 117         const char *scstat = "/usr/cluster/lib/libscstat.so.1";
 118 #ifdef DEBUG
 119         char errbuf[SCCONF_MAXSTRINGLEN];
 120 #endif
 121         scconf_nodeid_t id;
 122         scconf_errno_t err;
 123         char *name;
 124         FILE *pipe;
 125         int rc;
 126 
 127         /*
 128          * First check to see if we really are a cluster as clinfo -n can lie
 129          */
 130         if (cl_nodeid == 0xffff) {
 131                 rc = system("/usr/sbin/clinfo");
 132                 if (rc != -1 && WEXITSTATUS(rc) == 1) {
 133                         /* not a cluster */
 134                         cl_initialized = 1;
 135                         cl_nodeid = 0;
 136                         return (0);
 137                 }
 138 
 139                 pipe = popen("/usr/sbin/clinfo -n 2>/dev/null || echo 0", "r");
 140                 if (pipe == NULL) {
 141 #ifdef DEBUG
 142                         fprintf(stderr, "unable to get nodeid: %s\n",
 143                                 strerror(errno));
 144 #endif
 145                         return (-1);
 146                 }
 147 
 148                 if ((rc = fscanf(pipe, "%d", &id)) != 1) {
 149 #ifdef DEBUG
 150                         fprintf(stderr, "unable to get nodeid: %s\n",
 151                                 strerror(errno));
 152 #endif
 153                         return (-1);
 154                 }
 155 
 156                 pclose(pipe);
 157 
 158                 cl_nodeid = id;
 159         }
 160 
 161         /* Already loaded the Sun Cluster device tree */
 162         if (cl_initialized)
 163                 return (0);
 164 
 165         /*
 166          * Try and dlopen the various libraries that we need
 167          */
 168 
 169         libscconf = dlopen(scconf, RTLD_LAZY | RTLD_GLOBAL);
 170         if (libscconf == NULL)
 171                 goto error;
 172 
 173         libscstat = dlopen(scstat, RTLD_LAZY | RTLD_GLOBAL);
 174         if (libscstat == NULL)
 175                 goto error;
 176 
 177         err = scconf_get_nodename(id, &name);
 178         if (err == SCCONF_EPERM) {
 179                 cl_nodename = NULL;
 180         } else if (err != SCCONF_NOERR) {
 181 #ifdef DEBUG
 182                 scconf_strerr(errbuf, err);
 183                 fprintf(stderr, "scconf_get_nodename: %d: %s\n", err, errbuf);
 184 #endif
 185                 goto error;
 186         } else
 187                 cl_nodename = name;
 188 
 189         /* Load the Sun Cluster device tree */
 190         init_sc_entry();
 191         cl_initialized = 1;
 192         return (0);
 193 
 194 error:  /* error cleanup */
 195         if (libscconf)
 196                 dlclose(libscconf);
 197 
 198         if (libscstat)
 199                 dlclose(libscstat);
 200 
 201         libscconf = NULL;
 202         libscstat = NULL;
 203 
 204         errno = ENOSYS;
 205         return (-1);
 206 }
 207 
 208 
 209 /*
 210  * cfg_issuncluster()
 211  *
 212  * Description:
 213  *  Return the SunCluster nodeid of this node.
 214  *
 215  * Returns:
 216  *  >0   - running in a SunCluster (value is nodeid of this node)
 217  *   0   - not running in a cluster
 218  *  -1   - failure; errno is set
 219  */
 220 
 221 int
 222 cfg_issuncluster()
 223 {
 224         if (cfg_cluster_init() >= 0)
 225                 return ((int)cl_nodeid);
 226         else
 227                 return (-1);
 228 }
 229 int
 230 cfg_iscluster()
 231 {
 232         return (cfg_issuncluster());
 233 }
 234 
 235 /*
 236  * cfg_l_dgname_islocal()
 237  * Check if disk group is local on a non-SunCluster.
 238  *
 239  * Returns as cfg_dgname_islocal().
 240  */
 241 #ifndef lint
 242 static int
 243 cfg_l_dgname_islocal(char *dgname, char **othernode)
 244 {
 245         const char *metaset = "/usr/sbin/metaset -s %s -o > /dev/null 2>&1";
 246         char command[1024];
 247         int rc;
 248 
 249         if (snprintf(command, sizeof (command), metaset, dgname) >=
 250             sizeof (command)) {
 251                 errno = ENOMEM;
 252                 return (-1);
 253         }
 254 
 255         rc = system(command);
 256         if (rc < 0) {
 257                 return (-1);
 258         }
 259 
 260         if (WEXITSTATUS(rc) != 0) {
 261                 if (othernode) {
 262                         /* metaset doesn't tell us */
 263                         *othernode = "unknown";
 264                 }
 265 
 266                 return (0);
 267         }
 268 
 269         return (1);
 270 }
 271 #endif
 272 
 273 /*
 274  * cfg_dgname_islocal(char *dgname, char **othernode)
 275  * -- determine if the named disk service is mastered on this node
 276  *
 277  * If the disk service is mastered on another node, that nodename
 278  * will be returned in othernode (if not NULL).  It is up to the
 279  * calling program to call free() on this value at a later time to
 280  * free the memory allocated.
 281  *
 282  * Returns:
 283  *   1   - disk service is mastered on this node
 284  *   0   - disk service is not mastered on this node (*othernode set)
 285  *   -1  - error (errno will be set)
 286  */
 287 
 288 int
 289 cfg_dgname_islocal(char *dgname, char **othernode)
 290 {
 291         hash_data_t *data;
 292 
 293         if (dgname == NULL || *dgname == '\0' || othernode == NULL) {
 294                 errno = EINVAL;
 295                 return (-1);
 296         }
 297 
 298         /* Handle non-cluster configurations */
 299         if (cfg_cluster_init() < 0) {
 300                 return (-1);
 301         } else  if (cl_nodeid == 0) {
 302                 /* it has to be local */
 303                 return (1);
 304         }
 305 
 306         /*
 307          * lookup the current diskgroup name
 308          */
 309         if (data = (hash_data_t *)nsc_lookup(schash, dgname)) {
 310                 if (strcmp(data->scstat_node_name, cl_nodename)) {
 311                         if (othernode)
 312                             *othernode = strdup(data->scstat_node_name);
 313                         return (0);
 314                 } else {
 315                         return (1);
 316                 }
 317         } else {
 318                 errno = ENODEV;
 319                 return (-1);
 320         }
 321 }
 322 
 323 /*
 324  * cfg_l_dgname()
 325  * parse the disk group name from the a device pathname on a non-SunCluster.
 326  *
 327  * Returns as cfg_dgname().
 328  */
 329 
 330 char *
 331 cfg_l_dgname(const char *pathname, char *buffer, size_t buflen)
 332 {
 333         const char *dev = "/dev/";
 334         const char *vx = "vx/";
 335         const char *md = "md/";
 336         const char *dsk = "dsk/";
 337         const char *start, *cp;
 338         int ll, len, chkdsk;
 339 
 340         bzero(buffer, buflen);
 341         chkdsk = 0;
 342 
 343         ll = strlen(dev);
 344         if (strncmp(pathname, dev, ll) != 0) {
 345                 /* not a device pathname */
 346                 errno = EINVAL;
 347                 return ((char *)NULL);
 348         }
 349 
 350         start = pathname + ll;
 351 
 352         if (strncmp(start, vx, (ll = strlen(vx))) == 0) {
 353                 /*
 354                  * Veritas --
 355                  * /dev/vx/{r}dsk/dgname/partition
 356                  */
 357 
 358                 start += ll;
 359 
 360                 ll = strlen(dsk);
 361 
 362                 if (*start == 'r' && strncmp((start + 1), dsk, ll) == 0)
 363                         start += ll + 1;
 364                 else if (strncmp(start, dsk, ll) == 0)
 365                         start += ll;
 366                 else {
 367                         /* no dgname */
 368                         return (buffer);
 369                 }
 370         } else {
 371                 /* no dgname */
 372                 return (buffer);
 373         }
 374 
 375         for (cp = start, len = 0; *cp != '\0' && *cp != '/'; cp++)
 376                 len++;  /* count length of dgname */
 377 
 378         if (*cp == '\0') {
 379                 /* no dgname */
 380                 return (buffer);
 381         }
 382 
 383 #ifdef DEBUG
 384         if (*cp != '/') {
 385                 fprintf(stderr,
 386                     "cfg_dgname: parse error: *cp = '%c', expected '/'\n", *cp);
 387                 errno = EPROTO;
 388                 return ((char *)NULL);
 389         }
 390 #endif
 391 
 392         if (chkdsk) {
 393                 cp++;   /* skip the NULL */
 394 
 395                 ll = strlen(dsk);
 396 
 397                 if ((*cp != 'r' || strncmp((cp + 1), dsk, ll) != 0) &&
 398                     strncmp(cp, dsk, ll) != 0) {
 399                         /* no dgname */
 400                         return (buffer);
 401                 }
 402         }
 403 
 404         if (len >= buflen) {
 405                 errno = E2BIG;
 406                 return ((char *)NULL);
 407         }
 408 
 409         (void) strncpy(buffer, start, len);
 410         return (buffer);
 411 }
 412 
 413 
 414 /*
 415  * cfg_dgname()
 416  * determine which cluster resource group the pathname belongs to, if any
 417  *
 418  * Returns:
 419  *      NULL                    - error (errno is set)
 420  *      ptr to NULL-string      - no dgname
 421  *      pointer to string       - dgname
 422  */
 423 
 424 char *
 425 cfg_dgname(const char *pathname, char *buffer, size_t buflen)
 426 {
 427         scconf_errno_t conferr;
 428         char *dsname = NULL;
 429         struct stat stb;
 430 #ifdef DEBUG
 431         char errbuf[SCCONF_MAXSTRINGLEN];
 432 #endif
 433 
 434         bzero(buffer, buflen);
 435 
 436         if (pathname == NULL || *pathname == '\0') {
 437                 errno = EINVAL;
 438                 return ((char *)NULL);
 439         }
 440 
 441         /* Handle non-cluster configurations */
 442         if (cfg_cluster_init() < 0) {
 443                 errno = EINVAL;
 444                 return ((char *)NULL);
 445         } else  if (cl_nodeid == 0) {
 446                 /* must be local - return NULL-string dgname */
 447                 return (buffer);
 448         }
 449 
 450         if (stat(pathname, &stb) < 0) {
 451                 errno = EINVAL;
 452                 return ((char *)NULL);
 453         }
 454 
 455         conferr = scconf_get_ds_by_devt(major(stb.st_rdev),
 456             minor(stb.st_rdev), &dsname);
 457 
 458         if (conferr == SCCONF_ENOEXIST) {
 459                 return (buffer);
 460         } else if (conferr != SCCONF_NOERR) {
 461 #ifdef DEBUG
 462                 scconf_strerr(errbuf, conferr);
 463                 fprintf(stderr,
 464                     "scconf_get_ds_by_devt: %d: %s\n", conferr, errbuf);
 465 #endif
 466                 errno = EINVAL;
 467                 return ((char *)NULL);
 468         }
 469 
 470         strncpy(buffer, dsname, buflen);
 471         free(dsname);
 472 
 473         return (buffer);
 474 }
 475 
 476 
 477 /*
 478  * init_sc_entry
 479  *
 480  * Add an entry into the sclist and the schash for future lookups.
 481  *
 482  * - IMPORTANT -
 483  *
 484  * This must -never- be called from any command that can be started
 485  * from /usr/cluster/lib/sc/run_reserve (and hence
 486  * /usr/cluster/sbin/reconfig) or the system will deadlock
 487  * during switchover.  This includes:
 488  *
 489  *       - svadm (no options, "print") -- called during sv switchover
 490  *       - all boot commands
 491  *
 492  * Return values:
 493  *  -1  An error occurred.
 494  *   0  Entry added
 495  *   1  Entry already exists.
 496  */
 497 static int
 498 init_sc_entry()
 499 {
 500         scstat_ds_node_state_t *dsn;
 501         scstat_ds_name_t dsname;
 502         scstat_ds_t *dsstatus, *dsp;
 503         scstat_errno_t err;
 504 #ifdef DEBUG
 505         char errbuf[SCCONF_MAXSTRINGLEN];
 506 #endif
 507 
 508         hash_data_t *hdp;
 509 
 510         /*
 511          * Allocate a hash table
 512          */
 513         if ((schash = nsc_create_hash()) == NULL)
 514                 return (-1);
 515 
 516         /*
 517          * the API is broken here - the function is written to expect
 518          * the first argument to be (scstat_ds_name_t), but the function
 519          * declaration in scstat.h requires (scstat_ds_name_t *).
 520          *
 521          * We just cast it to get rid of the compiler warnings.
 522          * If "dsname" is NULL, information for all device services is returned
 523          */
 524         dsstatus = NULL;
 525         dsname = NULL;
 526         /* LINTED pointer alignment */
 527         err = scstat_get_ds_status((scstat_ds_name_t *)dsname, &dsstatus);
 528         if (err != SCSTAT_ENOERR) {
 529 #ifdef DEBUG
 530                 scstat_strerr(err, errbuf);
 531                 fprintf(stderr, "scstat_get_ds_status(): %d: %s\n",
 532                     err, errbuf);
 533 #endif
 534                 errno = ENOSYS;
 535                 return (-1);
 536         }
 537 
 538         if (dsstatus == NULL) {
 539                 errno = ENODEV;
 540                 return (-1);
 541         }
 542 
 543         /*
 544          * Traverse scstat_ds list, saving away resource in out hash table
 545          */
 546         for (dsp = dsstatus; dsp; dsp = dsp->scstat_ds_next) {
 547 
 548                 /* Skip over NULL scstat_ds_name's */
 549                 if ((dsp->scstat_ds_name == NULL) ||
 550                     (dsp->scstat_ds_name[0] == '\0'))
 551                         continue;
 552 
 553                 /* See element exits already, error if so */
 554                 if (nsc_lookup(schash, dsp->scstat_ds_name)) {
 555                         fprintf(stderr, "scstat_get_ds_status: duplicate %s",
 556                                 dsp->scstat_ds_name);
 557                         errno = EEXIST;
 558                         return (-1);
 559                 }
 560 
 561                 /* Traverse the node status list */
 562                 for (dsn = dsp->scstat_node_state_list; dsn;
 563                                         dsn = dsn->scstat_node_next) {
 564                         /*
 565                          * Only keep trace of primary nodes
 566                          */
 567                         if (dsn->scstat_node_state != SCSTAT_PRIMARY)
 568                                 continue;
 569 
 570                         /* Create an element to insert */
 571                         hdp = (hash_data_t *)malloc(sizeof (hash_data_t));
 572                         hdp->scstat_node_name = strdup(dsn->scstat_node_name);
 573                         nsc_insert_node(schash, hdp, dsp->scstat_ds_name);
 574                 }
 575         }
 576 
 577         /*
 578          * Free up scstat resources
 579          */
 580         scstat_free_ds_status(dsstatus);
 581         return (0);
 582 }