1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28 
  29 /*
  30  * This file contains public functions for managing DHCP network
  31  * containers.  For the semantics of these functions, please see the
  32  * Enterprise DHCP Architecture Document.
  33  *
  34  * This module uses synchronization guarantees provided by dsvclockd(1M);
  35  * please see $SRC/lib/libdhcpsvc/private/README.synch for details.
  36  *
  37  * Big Theory Statement for the SUNWbinfiles DHCP Network Module
  38  * =============================================================
  39  *
  40  * 1. On-disk Structure
  41  *
  42  * Each container consists of two basic pieces on-disk: a header and an
  43  * array of records.  In order to provide fast client IP lookup, the array
  44  * of records is directly indexed by client IP address (using a simple
  45  * mapping function).  In order to provide fast client id lookup, each
  46  * in-use record is also on exactly one doubly-linked client id hash chain;
  47  * the hash chains heads are contained in the header).  For all other
  48  * lookups, we can restrict our search to only the in-use records by merely
  49  * walking all of the hash chains.  Here's a crude illustration of what
  50  * this looks like on-disk (note that hash chains 2 and 3 are empty):
  51  *
  52  *              _______________________________________________
  53  *             | container info   | hash chain heads (buckets) |
  54  *    header   |                  | 1 | 2 | 3 |  [ .... ]  | N |
  55  *             |                  | | |   |   |            | | |
  56  *             |__________________|_|________________________|_|
  57  *             | rec1      | rec2   |  | rec3      | rec4    | |
  58  *             |           |        +--->          |         | |
  59  *             | unused    | unused    | hash1     | unused  | |
  60  *             |___________|___________|________^|_|_________|_|
  61  *             | rec5      | rec6      | rec7   |v | rec8    | |
  62  *             |           |           |           ->        | |
  63  *    records  | unused    | hashN     | hash1    <- hash1   | |
  64  *             |___________|________^|_|___________|_________|_|
  65  *             |           :        :: :           :         : |
  66  *             |           :        :: : [ more records... ] : |
  67  *             |           :        :: :           :         : |
  68  *             |___________:________::_:___________:_________:_|
  69  *             | recN-3    | recN-2 || | recN-1    | recN    v |
  70  *             |           |        |+-->          ->          |
  71  *             | unused    | unused +--- hashN    <- hashN     |
  72  *             |___________|___________|___________|___________|
  73  *
  74  * Note that the actual on-disk format is a bit more complicated than this
  75  * due to robustness issues; see section 3 below for details.
  76  *
  77  * 2. Robustness Requirements
  78  *
  79  * This module has been designed to be as efficient as possible while still
  80  * retaining the robustness minimally required for an enterprise-level
  81  * environment.  In particular, it is designed to handle the following
  82  * failure situations:
  83  *
  84  *      1. An update operation (add, modify, delete) on a container is
  85  *         unable to complete due to an unexpected internal error at
  86  *         any point in the update code.
  87  *
  88  *      2. An update operation (add, modify, delete) on a container is
  89  *         unable to complete due to unexpected program termination while
  90  *         at any point in the update code.
  91  *
  92  * If either of these situations occur, the container in question must be
  93  * left in a consistent (and viable) state.  In addition, only the pending
  94  * transaction (at most) may be lost.
  95  *
  96  * 3. Robustness Techniques
  97  *
  98  * This module uses a few different techniques to meet our robustness goals
  99  * while maintaining high performance.  The biggest problem we encounter
 100  * when trying to achieve robustness is updating the client id hash chain.
 101  * In particular, it is not possible to atomically add, move, or delete an
 102  * item from a doubly linked list, thus creating a window where a crash
 103  * could leave our hash chains in an inconsistent state.
 104  *
 105  * To address this problem, we actually maintain two images (copies) of all
 106  * the hash chains in the container.  At any point in time, exactly one of
 107  * the two images is active (and thus considered authoritative), as
 108  * indicated by a byte in the container header.  When performing an update
 109  * operation, all hash chain modifications are done on the *inactive*
 110  * image, then, once the inactive image has completed the hash chain
 111  * operations required by the update, the active and inactive images are
 112  * atomically switched, making the formerly-inactive image authoritative.
 113  * After the image switch, the update code then updates the formerly-active
 114  * image's hash chains to match the active image's hash chains.
 115  *
 116  * This approach has the nice property that internal container consistency
 117  * can always be restored after a crash by just resynchronizing the
 118  * inactive image's hash chains with the active image's chains.  Note that
 119  * the atomic image switch serves as the "commit point" for the operation:
 120  * if we crash before this point, we roll back the operation upon recovery
 121  * and it appears as though the operation never happened; if we crash after
 122  * this point, we roll forward the rest of the operation upon recovery as
 123  * if the crash had not happened.
 124  *
 125  * This technique is enough to robustly implement our add and delete
 126  * operations, but modify has an additional complication due to our direct
 127  * mapping of client IP addresses to records.  In particular, unless the
 128  * record modification includes changing the client IP address, the
 129  * modified record must be written at the same location as the original
 130  * record -- however, if the modify operation fails part way through
 131  * writing out the new client record, the record will be corrupt and we
 132  * will have no way to return the record to a consistent state.  To address
 133  * this issue, we allocate a spare record in the container header called
 134  * the "temporary" record.  Upon a modification of this type, we first
 135  * write the modified record to the temporary record and indicate that the
 136  * temporary record is currently proxying for the actual record.  We then
 137  * copy the temporary record to the actual record and make the temporary
 138  * record available again for future use.  If a crash occurs before the
 139  * copy to the temporary record is complete, then we just roll back as if
 140  * the modify never happened (since we have not modified the actual
 141  * record).  If a crash occurs after copying the temporary record, we roll
 142  * forward and complete the copy operation as if the crash never happened.
 143  * Note that there are some additional subtle complications here; see the
 144  * comments in the code for details.
 145  */
 146 
 147 #include <unistd.h>
 148 #include <sys/types.h>
 149 #include <sys/socket.h>
 150 #include <sys/isa_defs.h>
 151 #include <netinet/in.h>
 152 #include <dhcp_svc_public.h>
 153 #include <stdlib.h>
 154 #include <dirent.h>
 155 #include <string.h>
 156 #include <libgen.h>
 157 #include <errno.h>
 158 #include <stddef.h>
 159 #include <assert.h>
 160 
 161 #include "dhcp_network.h"
 162 #include "util.h"
 163 
 164 static uint16_t cidhash(const uchar_t *, size_t);
 165 static void     net2path(char *, size_t, const char *, ipaddr_t);
 166 static int      check_dn(dn_handle_t *);
 167 static int      getabyte(int, off_t, uchar_t *);
 168 static int      setabyte(int, off_t, uchar_t);
 169 static int      read_rec(int, dn_filerec_t *, dn_recid_t);
 170 static int      write_rec(int, dn_filerec_t *, dn_recid_t);
 171 static int      read_header(int, dn_header_t *, boolean_t);
 172 static int      write_header(int, dn_header_t *);
 173 static int      read_hashhead(int, dn_recid_t *, uint16_t, uchar_t);
 174 static int      write_hashhead(int, dn_recid_t, uint16_t, uchar_t);
 175 static boolean_t record_match(const dn_rec_t *, const dn_rec_t *, uint_t);
 176 
 177 int
 178 open_dn(void **handlep, const char *dir, uint_t flags,
 179     const struct in_addr *netp, const struct in_addr *maskp)
 180 {
 181         dn_handle_t     *dhp;
 182         dn_header_t     header = { 0 };
 183         char            dnpath[MAXPATHLEN];
 184         int             i, retval;
 185         off_t           filesz;
 186 
 187         dhp = malloc(sizeof (dn_handle_t));
 188         if (dhp == NULL)
 189                 return (DSVC_NO_MEMORY);
 190 
 191         /*
 192          * As a safeguard, check that the size of a dn_header_t hasn't
 193          * changed (since it contains a dn_rec_t, this will probably catch
 194          * a change in that structure as well).  If it has, bail rather
 195          * than totally corrupting the container (by continuing).  Note
 196          * that this situation indicates an internal programming error,
 197          * which is why we prefer assert() to just returning DSVC_INTERNAL.
 198          */
 199         /* CONSTCOND */
 200         assert(sizeof (header) == 32768);
 201 
 202         net2path(dnpath, MAXPATHLEN, dir, netp->s_addr);
 203         retval = open_file(dnpath, flags, &dhp->dh_fd);
 204         if (retval != DSVC_SUCCESS) {
 205                 free(dhp);
 206                 return (retval);
 207         }
 208 
 209         if (flags & DSVC_CREATE) {
 210                 /*
 211                  * We just created the per-network container; initialize
 212                  * the header and put it out on disk.  Note that we leave
 213                  * `dnh_version' zero until the entire header has been
 214                  * written, so we can detect partial failure.
 215                  */
 216                 header.dnh_version      = 0;
 217                 header.dnh_network      = netp->s_addr;
 218                 header.dnh_netmask      = maskp->s_addr;
 219                 header.dnh_magic        = DN_MAGIC;
 220                 header.dnh_tempimage    = DN_NOIMAGE;
 221                 header.dnh_image        = 0;
 222                 header.dnh_errors       = 0;
 223                 header.dnh_checks       = 0;
 224                 for (i = 0; i < DN_CIDHASHSZ; i++) {
 225                         header.dnh_cidhash[i][header.dnh_image]  = DN_NOREC;
 226                         header.dnh_cidhash[i][!header.dnh_image] = DN_NOREC;
 227                 }
 228 
 229                 if (write_header(dhp->dh_fd, &header) == -1) {
 230                         retval = syserr_to_dsvcerr(errno);
 231                         (void) remove_dn(dir, netp);
 232                         (void) close_dn((void **)&dhp);
 233                         return (retval);
 234                 }
 235 
 236                 /*
 237                  * Virtually reserve all the space we're going to need for
 238                  * the dn_rec_t's ahead of time, so that we don't have to
 239                  * worry about "growing" the file later (though it may
 240                  * increase in size as we fill in holes).  We're guaranteed
 241                  * that we'll read these holes as zeros, which we take
 242                  * advantage of since a dn_filerec_t with a rec_prev of
 243                  * DN_NOREC (which is 0) indicates that a record is unused.
 244                  */
 245                 filesz = RECID2OFFSET(RECID(~0, header.dnh_netmask) + 1);
 246                 retval = setabyte(dhp->dh_fd, filesz - 1, 0);
 247                 if (retval != DSVC_SUCCESS) {
 248                         (void) remove_dn(dir, netp);
 249                         (void) close_dn((void **)&dhp);
 250                         return (retval);
 251                 }
 252 
 253                 /*
 254                  * Set the version field on the container, effectively
 255                  * making it available for use.
 256                  */
 257                 retval = setabyte(dhp->dh_fd, offsetof(dn_header_t,
 258                     dnh_version), DSVC_CONVER);
 259                 if (retval != DSVC_SUCCESS) {
 260                         (void) remove_dn(dir, netp);
 261                         (void) close_dn((void **)&dhp);
 262                         return (retval);
 263                 }
 264         } else {
 265                 /*
 266                  * Container already exists; sanity check against the
 267                  * header that's on-disk.  If we detect a problem then
 268                  * either someone scribbled on our container or we
 269                  * terminated abnormally when creating the container.
 270                  */
 271                 if (read_header(dhp->dh_fd, &header, B_FALSE) == -1) {
 272                         retval = syserr_to_dsvcerr(errno);
 273                         (void) close_dn((void **)&dhp);
 274                         return (retval);
 275                 }
 276 
 277                 if (header.dnh_network != netp->s_addr ||
 278                     header.dnh_version != DSVC_CONVER ||
 279                     header.dnh_magic != DN_MAGIC) {
 280                         (void) close_dn((void **)&dhp);
 281                         return (DSVC_INTERNAL);
 282                 }
 283         }
 284 
 285         dhp->dh_netmask      = header.dnh_netmask;
 286         dhp->dh_oflags       = flags;
 287 
 288         *handlep = dhp;
 289         return (DSVC_SUCCESS);
 290 }
 291 
 292 int
 293 close_dn(void **handlep)
 294 {
 295         dn_handle_t *dhp = (dn_handle_t *)*handlep;
 296 
 297         if (close(dhp->dh_fd) == -1)
 298                 return (DSVC_INTERNAL);
 299 
 300         free(dhp);
 301         return (DSVC_SUCCESS);
 302 }
 303 
 304 int
 305 remove_dn(const char *dir, const struct in_addr *netp)
 306 {
 307         char dnpath[MAXPATHLEN];
 308 
 309         net2path(dnpath, MAXPATHLEN, dir, netp->s_addr);
 310         if (unlink(dnpath) == -1)
 311                 return (syserr_to_dsvcerr(errno));
 312 
 313         return (DSVC_SUCCESS);
 314 }
 315 
 316 int
 317 lookup_dn(void *handle, boolean_t partial, uint_t query, int count,
 318     const dn_rec_t *targetp, dn_rec_list_t **recordsp, uint_t *nrecordsp)
 319 {
 320         dn_handle_t     *dhp = (dn_handle_t *)handle;
 321         int             retval = DSVC_SUCCESS;
 322         uint_t          nrecords, n;
 323         uint16_t        hash;
 324         dn_rec_t        *recordp;
 325         dn_rec_list_t   *records, *new_records;
 326         dn_recid_t      recid, temp_recid = DN_NOREC;
 327         dn_filerec_t    rec;
 328         dn_header_t     header;
 329         uchar_t         image;
 330         int             fd = dhp->dh_fd;
 331 
 332         if ((dhp->dh_oflags & DSVC_READ) == 0)
 333                 return (DSVC_ACCESS);
 334 
 335         if (read_header(fd, &header, B_FALSE) == -1)
 336                 return (syserr_to_dsvcerr(errno));
 337 
 338         /*
 339          * It's possible that a previous update to this container failed
 340          * part-way through.  In general, this is fine since we always keep
 341          * our active image's hash chains correct and only swap to the
 342          * alternate image when the other image is completely safe to use.
 343          * However, for reasons explained in modify_dn(), it's possible
 344          * that a record being modified was not completely updated before a
 345          * failure occurred.  In this case, the actual data for that record
 346          * is contained in the temporary record in the header.  We need to
 347          * be careful to use that temporary record anywhere we'd otherwise
 348          * refer to the partially updated record.  Note that we do this
 349          * rather than attempting to restore the consistency of the
 350          * container because we're MT-hot here.
 351          */
 352         if (header.dnh_dirty && header.dnh_tempimage == header.dnh_image) {
 353                 temp_recid = RECID(header.dnh_temp.rec_dn.dn_cip.s_addr,
 354                     header.dnh_netmask);
 355         }
 356 
 357         image = header.dnh_image;
 358         records = NULL;
 359         for (n = 0, nrecords = 0; count < 0 || nrecords < count; n++) {
 360                 if (DSVC_QISEQ(query, DN_QCIP)) {
 361                         /*
 362                          * Lookup scenario 1: Caller has requested a QN_CIP
 363                          * query lookup; set `recid' to the only possible
 364                          * entry (which may not be in-use).
 365                          */
 366                         if (n != 0)
 367                                 break;
 368                         recid = RECID(targetp->dn_cip.s_addr, dhp->dh_netmask);
 369                 } else if (DSVC_QISEQ(query, DN_QCID)) {
 370                         /*
 371                          * Lookup scenario 2: Caller has requested a
 372                          * QN_CID-based lookup.  Walk the `cidhash' chain
 373                          * (one call at a time) and set `recid' to hash
 374                          * bucket candidates.
 375                          *
 376                          * Note that it's possible for the client id value
 377                          * 00 to appear more than once, and it's not
 378                          * impossible for other duplicate client ids to
 379                          * occur, so continue until we reach `nrecords'.
 380                          */
 381                         if (n == 0) {
 382                                 hash = cidhash(targetp->dn_cid,
 383                                     targetp->dn_cid_len);
 384                                 if (read_hashhead(fd, &recid, hash, image)
 385                                     == -1)
 386                                         return (syserr_to_dsvcerr(errno));
 387                         } else {
 388                                 /* sanity check */
 389                                 if (recid == rec.rec_next[image])
 390                                         break;
 391                                 recid = rec.rec_next[image];
 392                         }
 393                 } else {
 394                         /*
 395                          * Lookup scenario 3: Caller has requested any
 396                          * other type of search.  Walk the all the client
 397                          * id hashes.
 398                          */
 399                         if (n == 0) {
 400                                 hash = 0;
 401                                 if (read_header(fd, &header, B_TRUE) == -1)
 402                                         return (syserr_to_dsvcerr(errno));
 403                                 recid = header.dnh_cidhash[hash][image];
 404                         } else {
 405                                 /* sanity check */
 406                                 if (recid == rec.rec_next[image])
 407                                         break;
 408                                 recid = rec.rec_next[image];
 409                         }
 410 
 411                         while (recid == DN_NOREC && ++hash < DN_CIDHASHSZ)
 412                                 recid = header.dnh_cidhash[hash][image];
 413                 }
 414 
 415                 /*
 416                  * No more records; bail.
 417                  */
 418                 if (recid == DN_NOREC)
 419                         break;
 420 
 421                 if (recid == temp_recid) {
 422                         /*
 423                          * The temporary record is actually authoritative
 424                          * for this record's contents; use it instead.
 425                          */
 426                         recid = DN_TEMPREC;
 427                 }
 428 
 429                 if (read_rec(dhp->dh_fd, &rec, recid) == -1) {
 430                         retval = syserr_to_dsvcerr(errno);
 431                         break;
 432                 }
 433 
 434                 /*
 435                  * If the record isn't in-use, then skip...
 436                  */
 437                 if (rec.rec_prev[image] == DN_NOREC)
 438                         continue;
 439 
 440                 /*
 441                  * See if we've got a match...
 442                  */
 443                 if (!record_match(&rec.rec_dn, targetp, query))
 444                         continue;
 445 
 446                 /*
 447                  * Caller just wants a count of the number of matching
 448                  * records, not the records themselves; continue.
 449                  */
 450                 if (recordsp == NULL) {
 451                         nrecords++;
 452                         continue;
 453                 }
 454 
 455                 /*
 456                  * Allocate the record and fill it in.
 457                  */
 458                 recordp = malloc(sizeof (dn_rec_t));
 459                 if (recordp == NULL) {
 460                         if (!partial)
 461                                 retval = DSVC_NO_MEMORY;
 462                         break;
 463                 }
 464                 *recordp = rec.rec_dn;
 465 
 466                 /*
 467                  * Chuck the record on the list and up the counter.
 468                  */
 469                 new_records = add_dnrec_to_list(recordp, records);
 470                 if (new_records == NULL) {
 471                         free(recordp);
 472                         if (!partial)
 473                                 retval = DSVC_NO_MEMORY;
 474                         break;
 475                 }
 476 
 477                 records = new_records;
 478                 nrecords++;
 479         }
 480 
 481         if (retval == DSVC_SUCCESS) {
 482                 *nrecordsp = nrecords;
 483                 if (recordsp != NULL)
 484                         *recordsp = records;
 485                 return (DSVC_SUCCESS);
 486         }
 487 
 488         if (records != NULL)
 489                 free_dnrec_list(records);
 490 
 491         return (retval);
 492 }
 493 
 494 /*
 495  * Compares `dnp' to the target `targetp', using `query' to decide what
 496  * fields to compare.  Returns B_TRUE if `dnp' matches `targetp', B_FALSE
 497  * if not.
 498  */
 499 static boolean_t
 500 record_match(const dn_rec_t *dnp, const dn_rec_t *targetp, uint_t query)
 501 {
 502         unsigned int qflags[] = { DN_QFDYNAMIC, DN_QFAUTOMATIC, DN_QFMANUAL,
 503                                 DN_QFUNUSABLE, DN_QFBOOTP_ONLY };
 504         unsigned int flags[]  = { DN_FDYNAMIC, DN_FAUTOMATIC, DN_FMANUAL,
 505                                 DN_FUNUSABLE, DN_FBOOTP_ONLY };
 506         unsigned int i;
 507         unsigned int query0;
 508 
 509         /*
 510          * As an optimization, skip any checks if the query is empty.
 511          */
 512         DSVC_QINIT(query0);
 513         if (query == query0)
 514                 return (B_TRUE);
 515 
 516         if (DSVC_QISEQ(query, DN_QLEASE) &&
 517             targetp->dn_lease != dnp->dn_lease)
 518                 return (B_FALSE);
 519         if (DSVC_QISNEQ(query, DN_QLEASE) &&
 520             targetp->dn_lease == dnp->dn_lease)
 521                 return (B_FALSE);
 522 
 523         if (DSVC_QISEQ(query, DN_QCIP) &&
 524             dnp->dn_cip.s_addr != targetp->dn_cip.s_addr)
 525                 return (B_FALSE);
 526         if (DSVC_QISNEQ(query, DN_QCIP) &&
 527             dnp->dn_cip.s_addr == targetp->dn_cip.s_addr)
 528                 return (B_FALSE);
 529 
 530         if (DSVC_QISEQ(query, DN_QCID) &&
 531             (dnp->dn_cid_len != targetp->dn_cid_len ||
 532             (memcmp(dnp->dn_cid, targetp->dn_cid, dnp->dn_cid_len) != 0)))
 533                 return (B_FALSE);
 534         if (DSVC_QISNEQ(query, DN_QCID) &&
 535             (dnp->dn_cid_len == targetp->dn_cid_len &&
 536             (memcmp(dnp->dn_cid, targetp->dn_cid, dnp->dn_cid_len) == 0)))
 537                 return (B_FALSE);
 538 
 539         if (DSVC_QISEQ(query, DN_QSIP) &&
 540             dnp->dn_sip.s_addr != targetp->dn_sip.s_addr)
 541                 return (B_FALSE);
 542         if (DSVC_QISNEQ(query, DN_QSIP) &&
 543             dnp->dn_sip.s_addr == targetp->dn_sip.s_addr)
 544                 return (B_FALSE);
 545 
 546         if (DSVC_QISEQ(query, DN_QMACRO) &&
 547             strcmp(targetp->dn_macro, dnp->dn_macro) != 0)
 548                 return (B_FALSE);
 549         if (DSVC_QISNEQ(query, DN_QMACRO) &&
 550             strcmp(targetp->dn_macro, dnp->dn_macro) == 0)
 551                 return (B_FALSE);
 552 
 553         for (i = 0; i < sizeof (qflags) / sizeof (unsigned int); i++) {
 554                 if (DSVC_QISEQ(query, qflags[i]) &&
 555                     (dnp->dn_flags & flags[i]) !=
 556                     (targetp->dn_flags & flags[i]))
 557                         return (B_FALSE);
 558                 if (DSVC_QISNEQ(query, qflags[i]) &&
 559                     (dnp->dn_flags & flags[i]) ==
 560                     (targetp->dn_flags & flags[i]))
 561                         return (B_FALSE);
 562         }
 563 
 564         return (B_TRUE);
 565 }
 566 
 567 int
 568 add_dn(void *handle, dn_rec_t *addp)
 569 {
 570         dn_filerec_t    rec, rec_next;
 571         dn_recid_t      recid, recid_head;
 572         uint16_t        hash;
 573         uchar_t         image;
 574         int             retval;
 575         dn_handle_t     *dhp = (dn_handle_t *)handle;
 576         int             fd = dhp->dh_fd;
 577 
 578         if ((dhp->dh_oflags & DSVC_WRITE) == 0)
 579                 return (DSVC_ACCESS);
 580 
 581         retval = check_dn(dhp);
 582         if (retval != DSVC_SUCCESS)
 583                 return (retval);
 584 
 585         hash = cidhash(addp->dn_cid, addp->dn_cid_len);
 586 
 587         /*
 588          * Get the active image.
 589          */
 590         retval = getabyte(fd, offsetof(dn_header_t, dnh_image), &image);
 591         if (retval != DSVC_SUCCESS)
 592                 return (retval);
 593 
 594         /*
 595          * Doublecheck to make sure this entry doesn't exist already.
 596          */
 597         recid = RECID(addp->dn_cip.s_addr, dhp->dh_netmask);
 598         if (read_rec(fd, &rec, recid) == -1)
 599                 return (syserr_to_dsvcerr(errno));
 600 
 601         if (rec.rec_prev[image] != DN_NOREC)
 602                 return (DSVC_EXISTS);
 603 
 604         /*
 605          * We're going to insert `rec' at the head of the `hash' hash
 606          * chain; get it ready-to-go.  Note that we update the alternate
 607          * image's hash record id pointers so that the record will
 608          * atomically become in-use when we switch to the alternate image.
 609          */
 610         if (read_hashhead(fd, &recid_head, hash, image) == -1)
 611                 return (syserr_to_dsvcerr(errno));
 612 
 613         rec.rec_dn = *addp;
 614         rec.rec_dn.dn_sig = gensig();
 615         rec.rec_prev[!image] = DN_HASHHEAD;
 616         rec.rec_next[!image] = recid_head;
 617 
 618         /*
 619          * If there's a record currently on the hash chain (i.e, we're
 620          * not the first) then load the record.
 621          */
 622         if (rec.rec_next[!image] != DN_NOREC) {
 623                 if (read_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
 624                         return (syserr_to_dsvcerr(errno));
 625         }
 626 
 627         /*
 628          * Before we update any information on disk, mark the container as
 629          * dirty so that there's no chance the container is inconsistent
 630          * without us knowing about it.
 631          */
 632         retval = setabyte(fd, offsetof(dn_header_t, dnh_dirty), 1);
 633         if (retval != DSVC_SUCCESS)
 634                 return (retval);
 635 
 636         /*
 637          * Update the new record on-disk; note that it's not yet reachable
 638          * via hash.
 639          */
 640         if (write_rec(fd, &rec, recid) == -1)
 641                 return (syserr_to_dsvcerr(errno));
 642 
 643         /*
 644          * Update the alternate image's on-disk hash pointers.  We need to
 645          * do this before we switch to the alternate image so we cannot
 646          * abort with an inconsistent active image.
 647          */
 648         if (rec.rec_next[!image] != DN_NOREC) {
 649                 rec_next.rec_prev[!image] = recid;
 650                 if (write_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
 651                         return (syserr_to_dsvcerr(errno));
 652         }
 653         if (write_hashhead(fd, recid, hash, !image) == -1)
 654                 return (syserr_to_dsvcerr(errno));
 655 
 656         /*
 657          * Activate the alternate image.  This is our commit point -- if we
 658          * fail after this point, we will roll forward on recovery.
 659          */
 660         image = !image;
 661         retval = setabyte(fd, offsetof(dn_header_t, dnh_image), image);
 662         if (retval != DSVC_SUCCESS)
 663                 return (retval);
 664 
 665         /*
 666          * Update the old record id pointers to match
 667          */
 668         rec.rec_prev[!image] = rec.rec_prev[image];
 669         rec.rec_next[!image] = rec.rec_next[image];
 670         if (write_rec(fd, &rec, recid) == -1)
 671                 return (syserr_to_dsvcerr(errno));
 672 
 673         if (rec.rec_next[!image] != DN_NOREC) {
 674                 rec_next.rec_prev[!image] = recid;
 675                 if (write_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
 676                         return (syserr_to_dsvcerr(errno));
 677         }
 678         if (write_hashhead(fd, recid, hash, !image) == -1)
 679                 return (syserr_to_dsvcerr(errno));
 680 
 681         /*
 682          * Update the signature on the record handed back to the caller.
 683          */
 684         addp->dn_sig = rec.rec_dn.dn_sig;
 685 
 686         /*
 687          * Finally, mark the container as clean.
 688          */
 689         return (setabyte(fd, offsetof(dn_header_t, dnh_dirty), 0));
 690 }
 691 
 692 int
 693 delete_dn(void *handle, const dn_rec_t *delp)
 694 {
 695         dn_filerec_t    rec, rec_prev, rec_next;
 696         dn_recid_t      recid;
 697         uint16_t        hash;
 698         uchar_t         image;
 699         int             retval;
 700         dn_handle_t     *dhp = (dn_handle_t *)handle;
 701         int             fd = dhp->dh_fd;
 702 
 703         if ((dhp->dh_oflags & DSVC_WRITE) == 0)
 704                 return (DSVC_ACCESS);
 705 
 706         retval = check_dn(dhp);
 707         if (retval != DSVC_SUCCESS)
 708                 return (retval);
 709 
 710         /*
 711          * Get the active image.
 712          */
 713         retval = getabyte(fd, offsetof(dn_header_t, dnh_image), &image);
 714         if (retval != DSVC_SUCCESS)
 715                 return (retval);
 716 
 717         /*
 718          * Find the original entry in the network table, make sure the
 719          * record is in-use, and check the signature field (to guard
 720          * against collisions).
 721          */
 722         recid = RECID(delp->dn_cip.s_addr, dhp->dh_netmask);
 723         if (read_rec(fd, &rec, recid) == -1)
 724                 return (syserr_to_dsvcerr(errno));
 725 
 726         if (rec.rec_prev[image] == DN_NOREC)
 727                 return (DSVC_NOENT);
 728 
 729         hash = cidhash(rec.rec_dn.dn_cid, rec.rec_dn.dn_cid_len);
 730 
 731         /*
 732          * The signatures must match to delete a record, *except* when
 733          * delp->dn_sig == 0.  This is so records can be deleted that
 734          * weren't retrieved via lookup_dn()
 735          */
 736         if (delp->dn_sig != 0 && rec.rec_dn.dn_sig != delp->dn_sig)
 737                 return (DSVC_COLLISION);
 738 
 739         /*
 740          * Read our neighboring records.
 741          */
 742         if (rec.rec_next[image] != DN_NOREC) {
 743                 if (read_rec(fd, &rec_next, rec.rec_next[image]) == -1)
 744                         return (syserr_to_dsvcerr(errno));
 745         }
 746 
 747         if (rec.rec_prev[image] != DN_HASHHEAD) {
 748                 if (read_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
 749                         return (syserr_to_dsvcerr(errno));
 750         }
 751 
 752         /*
 753          * Before we update the alternate image's on-disk hash pointers,
 754          * mark the container as dirty so that there's no chance the
 755          * container is inconsistent without us knowing about it.
 756          */
 757         retval = setabyte(fd, offsetof(dn_header_t, dnh_dirty), 1);
 758         if (retval != DSVC_SUCCESS)
 759                 return (retval);
 760 
 761         /*
 762          * Update the alternate image's on-disk hash pointers.  We need to
 763          * do this before we switch to the alternate image so we do not
 764          * abort with an inconsistent active image.  Also reset the
 765          * record's alternate image record id pointers, so that the old
 766          * record will not be in-use when we switch to the alternate image.
 767          */
 768         if (rec.rec_next[image] != DN_NOREC) {
 769                 rec_next.rec_prev[!image] = rec.rec_prev[image];
 770                 if (write_rec(fd, &rec_next, rec.rec_next[image]) == -1)
 771                         return (syserr_to_dsvcerr(errno));
 772         }
 773 
 774         if (rec.rec_prev[image] != DN_HASHHEAD) {
 775                 rec_prev.rec_next[!image] = rec.rec_next[image];
 776                 if (write_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
 777                         return (syserr_to_dsvcerr(errno));
 778         } else {
 779                 if (write_hashhead(fd, rec.rec_next[image], hash, !image) == -1)
 780                         return (syserr_to_dsvcerr(errno));
 781         }
 782 
 783         rec.rec_next[!image] = DN_NOREC;
 784         rec.rec_prev[!image] = DN_NOREC;
 785         if (write_rec(fd, &rec, recid) == -1)
 786                 return (syserr_to_dsvcerr(errno));
 787 
 788         /*
 789          * Activate the alternate image.  This is our commit point -- if we
 790          * fail after this point, we will roll forward on recovery.
 791          */
 792         image = !image;
 793         retval = setabyte(fd, offsetof(dn_header_t, dnh_image), image);
 794         if (retval != DSVC_SUCCESS)
 795                 return (retval);
 796 
 797         /*
 798          * Update the old record id pointers to match.
 799          */
 800         if (rec.rec_next[!image] != DN_NOREC) {
 801                 rec_next.rec_prev[!image] = rec.rec_prev[!image];
 802                 if (write_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
 803                         return (syserr_to_dsvcerr(errno));
 804         }
 805 
 806         if (rec.rec_prev[!image] != DN_HASHHEAD) {
 807                 rec_prev.rec_next[!image] = rec.rec_next[!image];
 808                 if (write_rec(fd, &rec_prev, rec.rec_prev[!image]) == -1)
 809                         return (syserr_to_dsvcerr(errno));
 810         } else {
 811                 if (write_hashhead(fd, rec.rec_next[!image], hash, !image)
 812                     == -1)
 813                         return (syserr_to_dsvcerr(errno));
 814         }
 815 
 816         rec.rec_next[!image] = DN_NOREC;
 817         rec.rec_prev[!image] = DN_NOREC;
 818         if (write_rec(fd, &rec, recid) == -1)
 819                 return (syserr_to_dsvcerr(errno));
 820 
 821         /*
 822          * Finally, mark the container as clean.
 823          */
 824         return (setabyte(fd, offsetof(dn_header_t, dnh_dirty), 0));
 825 }
 826 
 827 int
 828 modify_dn(void *handle, const dn_rec_t *origp, dn_rec_t *newp)
 829 {
 830         dn_filerec_t    rec, new_rec, rec_head, rec_next, rec_prev;
 831         dn_recid_t      recid, new_recid, recid_head;
 832         uint16_t        hash, new_hash;
 833         uchar_t         image;
 834         int             retval;
 835         dn_handle_t     *dhp = (dn_handle_t *)handle;
 836         int             fd = dhp->dh_fd;
 837 
 838         if ((dhp->dh_oflags & DSVC_WRITE) == 0)
 839                 return (DSVC_ACCESS);
 840 
 841         retval = check_dn(dhp);
 842         if (retval != DSVC_SUCCESS)
 843                 return (retval);
 844 
 845         /*
 846          * Get the active image
 847          */
 848         retval = getabyte(fd, offsetof(dn_header_t, dnh_image), &image);
 849         if (retval != DSVC_SUCCESS)
 850                 return (retval);
 851 
 852         /*
 853          * Find the original entry in the network table, make sure the
 854          * entry is in-use, and check the signature field (to guard against
 855          * collisions).
 856          */
 857         recid = RECID(origp->dn_cip.s_addr, dhp->dh_netmask);
 858         if (read_rec(fd, &rec, recid) == -1)
 859                 return (syserr_to_dsvcerr(errno));
 860 
 861         if (rec.rec_prev[image] == DN_NOREC)
 862                 return (DSVC_NOENT);
 863 
 864         if (rec.rec_dn.dn_sig != origp->dn_sig)
 865                 return (DSVC_COLLISION);
 866 
 867         /*
 868          * Check if the record id is changing (as a result of modifying the
 869          * IP address). If it is, then make sure the new one is available
 870          * (if not, fail with DSVC_EXISTS).
 871          */
 872         new_recid = RECID(newp->dn_cip.s_addr, dhp->dh_netmask);
 873         if (recid != new_recid) {
 874                 if (read_rec(fd, &new_rec, new_recid) == -1)
 875                         return (syserr_to_dsvcerr(errno));
 876                 if (new_rec.rec_prev[image] != DN_NOREC)
 877                         return (DSVC_EXISTS);
 878         }
 879 
 880         /*
 881          * Update the record with the new information.
 882          */
 883         new_rec.rec_dn = *newp;
 884         new_rec.rec_dn.dn_sig = origp->dn_sig + 1;
 885 
 886         /*
 887          * Find out if our hash chain is changing.  If so, then update the
 888          * new record's record id pointers to be on the new chain;
 889          * otherwise just take the original record's pointers.  Note that
 890          * in either case, only update the alternate image pointers, so
 891          * that the new record becomes in-use when we switch to the
 892          * alternate image.
 893          */
 894         hash = cidhash(rec.rec_dn.dn_cid, rec.rec_dn.dn_cid_len);
 895         new_hash = cidhash(newp->dn_cid, newp->dn_cid_len);
 896 
 897         if (hash == new_hash) {
 898                 new_rec.rec_prev[!image] = rec.rec_prev[image];
 899                 new_rec.rec_next[!image] = rec.rec_next[image];
 900         } else {
 901                 if (read_hashhead(fd, &recid_head, new_hash, image) == -1)
 902                         return (syserr_to_dsvcerr(errno));
 903 
 904                 new_rec.rec_prev[!image] = DN_HASHHEAD;
 905                 new_rec.rec_next[!image] = recid_head;
 906         }
 907 
 908         /*
 909          * Write the record out; if this means overwriting the old record,
 910          * then write to a temporary record instead.
 911          */
 912         if (write_rec(fd, &new_rec, new_recid == recid ? DN_TEMPREC : new_recid)
 913             == -1)
 914                 return (syserr_to_dsvcerr(errno));
 915 
 916         /*
 917          * Mark the container as dirty so that there's no chance the
 918          * container is inconsistent without us knowing about it.
 919          */
 920         retval = setabyte(fd, offsetof(dn_header_t, dnh_dirty), 1);
 921         if (retval != DSVC_SUCCESS)
 922                 return (retval);
 923 
 924         /*
 925          * If we've changed either the hash chain or the record id, then
 926          * update our neighboring records' record id pointers.  If we're
 927          * changing hash chains, then remove ourselves from the old
 928          * hash chain and insert ourselves on the new one -- otherwise, if
 929          * we're changing record id's, then update our neighbors with our
 930          * new record id.  Note that we only apply these changes to the
 931          * alternate image for now so that we can recover upon failure.
 932          */
 933         if (hash != new_hash || recid != new_recid) {
 934                 if (rec.rec_next[image] != DN_NOREC) {
 935                         if (read_rec(fd, &rec_next, rec.rec_next[image]) == -1)
 936                                 return (syserr_to_dsvcerr(errno));
 937                 }
 938                 if (rec.rec_prev[image] != DN_HASHHEAD) {
 939                         if (read_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
 940                                 return (syserr_to_dsvcerr(errno));
 941                 }
 942 
 943                 if (hash != new_hash) {
 944                         rec_next.rec_prev[!image] = rec.rec_prev[!image];
 945                         rec_prev.rec_next[!image] = rec.rec_next[!image];
 946                 } else {
 947                         rec_next.rec_prev[!image] = new_recid;
 948                         rec_prev.rec_next[!image] = new_recid;
 949                 }
 950 
 951                 if (rec.rec_next[image] != DN_NOREC) {
 952                         if (write_rec(fd, &rec_next, rec.rec_next[image]) == -1)
 953                                 return (syserr_to_dsvcerr(errno));
 954                 }
 955                 if (rec.rec_prev[image] != DN_HASHHEAD) {
 956                         if (write_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
 957                                 return (syserr_to_dsvcerr(errno));
 958                 } else {
 959                         if (write_hashhead(fd, rec_prev.rec_next[!image], hash,
 960                             !image) == -1)
 961                                 return (syserr_to_dsvcerr(errno));
 962                 }
 963 
 964                 /*
 965                  * If our hash is changing, update the alternate image
 966                  * record id pointers to point to our moved record.
 967                  */
 968                 if (hash != new_hash) {
 969                         if (recid_head != DN_NOREC) {
 970                                 if (read_rec(fd, &rec_head, recid_head) == -1)
 971                                         return (syserr_to_dsvcerr(errno));
 972                                 rec_head.rec_prev[!image] = new_recid;
 973                                 if (write_rec(fd, &rec_head, recid_head) == -1)
 974                                         return (syserr_to_dsvcerr(errno));
 975                         }
 976                         if (write_hashhead(fd, new_recid, new_hash, !image)
 977                             == -1)
 978                                 return (syserr_to_dsvcerr(errno));
 979                 }
 980 
 981                 /*
 982                  * If our record id is changing, reset the old record's
 983                  * alternate image record id pointers, so that the old
 984                  * record will not be in-use once we switch over to the
 985                  * alternate image.
 986                  */
 987                 if (recid != new_recid) {
 988                         rec.rec_prev[!image] = DN_NOREC;
 989                         rec.rec_next[!image] = DN_NOREC;
 990                         if (write_rec(fd, &rec, recid) == -1)
 991                                 return (syserr_to_dsvcerr(errno));
 992                 }
 993         }
 994 
 995         /*
 996          * If we're using the temporary record, then set `dnh_tempimage' to
 997          * the image that will be active when we're done.  This piece of
 998          * state is critical in the case of failure, since it indicates
 999          * both that the temporary record is valid, and tells us whether we
1000          * failed before or after activating the alternate image (below).
1001          * If we failed before activating the alternate image, then the
1002          * failure code can just reset `dnh_tempimage' to DN_NOIMAGE and
1003          * resynchronize the pointers.  Otherwise, we failed somewhere
1004          * after making the alternate image active but before we completed
1005          * copying the temporary record over to the actual record, which
1006          * the recovery code will then complete on our behalf before
1007          * resynchronizing the pointers.
1008          */
1009         if (recid == new_recid) {
1010                 retval = setabyte(fd, offsetof(dn_header_t, dnh_tempimage),
1011                     !image);
1012                 if (retval != DSVC_SUCCESS)
1013                         return (retval);
1014         }
1015 
1016         /*
1017          * Activate the alternate image.  This is our commit point -- if we
1018          * fail after this point, we will roll forward on recovery.
1019          */
1020         image = !image;
1021         retval = setabyte(fd, offsetof(dn_header_t, dnh_image), image);
1022         if (retval != DSVC_SUCCESS)
1023                 return (retval);
1024 
1025         /*
1026          * If we used the temporary record, copy the data into the actual
1027          * record.  Once finished, reset `dnh_tempimage' to DN_NOIMAGE
1028          * since the temporary record no longer needs to be used.
1029          */
1030         if (recid == new_recid) {
1031                 if (write_rec(fd, &new_rec, new_recid) == -1)
1032                         return (syserr_to_dsvcerr(errno));
1033 
1034                 retval = setabyte(fd, offsetof(dn_header_t, dnh_tempimage),
1035                     DN_NOIMAGE);
1036                 if (retval != DSVC_SUCCESS)
1037                         return (retval);
1038         }
1039 
1040         /*
1041          * Update the old record id pointers to match.
1042          */
1043         new_rec.rec_prev[!image] = new_rec.rec_prev[image];
1044         new_rec.rec_next[!image] = new_rec.rec_next[image];
1045         if (write_rec(fd, &new_rec, new_recid) == -1)
1046                 return (syserr_to_dsvcerr(errno));
1047 
1048         if (hash != new_hash || recid != new_recid) {
1049                 if (rec.rec_next[image] != DN_NOREC) {
1050                         rec_next.rec_prev[!image] = rec.rec_prev[image];
1051                         if (write_rec(fd, &rec_next, rec.rec_next[image]) == -1)
1052                                 return (syserr_to_dsvcerr(errno));
1053                 }
1054                 if (rec.rec_prev[image] != DN_HASHHEAD) {
1055                         rec_prev.rec_next[!image] = rec.rec_next[image];
1056                         if (write_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
1057                                 return (syserr_to_dsvcerr(errno));
1058                 } else {
1059                         if (write_hashhead(fd, rec.rec_next[image], hash,
1060                             !image) == -1)
1061                                 return (syserr_to_dsvcerr(errno));
1062                 }
1063 
1064                 /*
1065                  * If our hash changed, update the alternate image record
1066                  * id pointers to point to our moved record.
1067                  */
1068                 if (hash != new_hash) {
1069                         if (recid_head != DN_NOREC) {
1070                                 rec_head.rec_prev[!image] =
1071                                     rec_head.rec_prev[image];
1072                                 if (write_rec(fd, &rec_head, recid_head) == -1)
1073                                         return (syserr_to_dsvcerr(errno));
1074                         }
1075                         if (write_hashhead(fd, new_recid, new_hash, !image)
1076                             == -1)
1077                                 return (syserr_to_dsvcerr(errno));
1078                 }
1079 
1080                 /*
1081                  * If our record id changed, then finish marking the old
1082                  * record as "not in use".
1083                  */
1084                 if (recid != new_recid) {
1085                         rec.rec_prev[!image] = DN_NOREC;
1086                         rec.rec_next[!image] = DN_NOREC;
1087                         if (write_rec(fd, &rec, recid) == -1)
1088                                 return (syserr_to_dsvcerr(errno));
1089                 }
1090         }
1091 
1092         /*
1093          * Update the signature on the new record handed back to the caller.
1094          */
1095         newp->dn_sig = new_rec.rec_dn.dn_sig;
1096 
1097         /*
1098          * Finally, mark the container as clean.
1099          */
1100         return (setabyte(fd, offsetof(dn_header_t, dnh_dirty), 0));
1101 }
1102 
1103 int
1104 list_dn(const char *location, char ***listppp, uint_t *countp)
1105 {
1106         char            ipaddr[INET_ADDRSTRLEN];
1107         struct dirent   *result;
1108         DIR             *dirp;
1109         unsigned int    i, count = 0;
1110         char            *re, **new_listpp, **listpp = NULL;
1111         char            conver[4];
1112         int             error;
1113 
1114         dirp = opendir(location);
1115         if (dirp == NULL) {
1116                 switch (errno) {
1117                 case EACCES:
1118                 case EPERM:
1119                         return (DSVC_ACCESS);
1120                 case ENOENT:
1121                         return (DSVC_NO_LOCATION);
1122                 default:
1123                         break;
1124                 }
1125                 return (DSVC_INTERNAL);
1126         }
1127 
1128         /*
1129          * Compile a regular expression matching "SUNWbinfilesX_" (where X
1130          * is a container version number) followed by an IP address
1131          * (roughly speaking).  Note that the $N constructions allow us to
1132          * get the container version and IP address when calling regex(3C).
1133          */
1134         re = regcmp("^SUNWbinfiles([0-9]{1,3})$0_"
1135             "(([0-9]{1,3}_){3}[0-9]{1,3})$1$", (char *)0);
1136         if (re == NULL)
1137                 return (DSVC_NO_MEMORY);
1138 
1139         while ((result = readdir(dirp)) != NULL) {
1140 
1141                 if (regex(re, result->d_name, conver, ipaddr) != NULL) {
1142                         if (atoi(conver) != DSVC_CONVER)
1143                                 continue;
1144 
1145                         for (i = 0; ipaddr[i] != '\0'; i++)
1146                                 if (ipaddr[i] == '_')
1147                                         ipaddr[i] = '.';
1148 
1149                         new_listpp = realloc(listpp,
1150                             (sizeof (char **)) * (count + 1));
1151                         if (new_listpp == NULL) {
1152                                 error = DSVC_NO_MEMORY;
1153                                 goto fail;
1154                         }
1155                         listpp = new_listpp;
1156                         listpp[count] = strdup(ipaddr);
1157                         if (listpp[count] == NULL) {
1158                                 error = DSVC_NO_MEMORY;
1159                                 goto fail;
1160                         }
1161                         count++;
1162                 }
1163         }
1164         free(re);
1165         (void) closedir(dirp);
1166 
1167         *countp = count;
1168         *listppp = listpp;
1169         return (DSVC_SUCCESS);
1170 fail:
1171         free(re);
1172         (void) closedir(dirp);
1173 
1174         for (i = 0; i < count; i++)
1175                 free(listpp[i]);
1176         free(listpp);
1177         return (error);
1178 }
1179 
1180 /*
1181  * Check (a la fsck) that a given DHCP network container is in a consistent
1182  * state.  If not, then attempt to restore internal consistency; this should
1183  * always be possible unless the container has been externally corrupted.
1184  */
1185 static int
1186 check_dn(dn_handle_t *dhp)
1187 {
1188         dn_header_t     header;
1189         uchar_t         image, dirty;
1190         uint16_t        hash;
1191         dn_filerec_t    rec;
1192         dn_recid_t      recid, maxrecid;
1193         int             retval;
1194 
1195         /*
1196          * Reading the whole header is a very expensive operation; only do
1197          * it once we're sure the container is actually dirty.  On an
1198          * E4500, this optimization lowers the wall-clock cost of creating
1199          * a 5000-record datastore by 20 percent.
1200          */
1201         retval = getabyte(dhp->dh_fd, offsetof(dn_header_t, dnh_dirty), &dirty);
1202         if (retval != DSVC_SUCCESS)
1203                 return (retval);
1204 
1205         if (dirty == 0)
1206                 return (DSVC_SUCCESS);
1207 
1208         if (read_header(dhp->dh_fd, &header, B_TRUE) == -1)
1209                 return (syserr_to_dsvcerr(errno));
1210 
1211         /*
1212          * If `dnh_tempimage' matches the current working image, then we
1213          * crashed in the middle of a modify_dn() operation.  Complete
1214          * writing out the temporary record before restoring internal
1215          * consistency.  This is a bit of a kludge but there doesn't seem
1216          * to be another way.
1217          */
1218         if (header.dnh_tempimage == header.dnh_image) {
1219                 recid = RECID(header.dnh_temp.rec_dn.dn_cip.s_addr,
1220                     header.dnh_netmask);
1221                 if (write_rec(dhp->dh_fd, &header.dnh_temp, recid) == -1)
1222                         return (syserr_to_dsvcerr(errno));
1223 
1224                 header.dnh_tempimage = DN_NOIMAGE;
1225         }
1226 
1227         /*
1228          * Blindly update all the header hashhead pointers since we're
1229          * going to have to re-write the header anyway.
1230          */
1231         image = header.dnh_image;
1232         for (hash = 0; hash < DN_CIDHASHSZ; hash++) {
1233                 header.dnh_cidhash[hash][!image] =
1234                     header.dnh_cidhash[hash][image];
1235         }
1236 
1237         /*
1238          * Synchronize the record pointers of all in-use records.  We do
1239          * this instead of just walking the hashheads because not all dirty
1240          * records are hashed (for instance, we may have failed part way
1241          * through an add_dn()).
1242          */
1243         maxrecid = RECID(~0, header.dnh_netmask);
1244         for (recid = RECID(0, header.dnh_netmask); recid <= maxrecid; recid++) {
1245                 if (read_rec(dhp->dh_fd, &rec, recid) == -1)
1246                         return (syserr_to_dsvcerr(errno));
1247 
1248                 /*
1249                  * Verify the pointers match.  If not, then correct
1250                  * the record and write it back to disk.
1251                  */
1252                 if (rec.rec_next[image] != rec.rec_next[!image] ||
1253                     rec.rec_prev[image] != rec.rec_prev[!image]) {
1254                         header.dnh_errors++;
1255 
1256                         rec.rec_prev[!image] = rec.rec_prev[image];
1257                         rec.rec_next[!image] = rec.rec_next[image];
1258 
1259                         if (write_rec(dhp->dh_fd, &rec, recid) == -1)
1260                                 return (syserr_to_dsvcerr(errno));
1261                 }
1262         }
1263 
1264         header.dnh_checks++;
1265         if (write_header(dhp->dh_fd, &header) == -1)
1266                 return (syserr_to_dsvcerr(errno));
1267 
1268         /*
1269          * Clear the dirty bit on the container.
1270          */
1271         return (setabyte(dhp->dh_fd, offsetof(dn_header_t, dnh_dirty), 0));
1272 }
1273 
1274 /*
1275  * Given a buffer `path' of `pathlen' bytes, fill it in with a path to the
1276  * DHCP Network table for IP network `ip' located in directory `dir'.
1277  */
1278 static void
1279 net2path(char *path, size_t pathlen, const char *dir, ipaddr_t ip)
1280 {
1281         (void) snprintf(path, pathlen, "%s/SUNWbinfiles%u_%d_%d_%d_%d", dir,
1282             DSVC_CONVER, ip >> 24, (ip >> 16) & 0xff, (ip >> 8) & 0xff,
1283             ip & 0xff);
1284 }
1285 
1286 /*
1287  * Given a `cid' that's `cidlen' bytes long, hash it to a value between 0
1288  * and DN_CIDHASHSZ - 1.  We use CRC16 for our hash since it's known to be
1289  * very evenly distributed.
1290  */
1291 static uint16_t
1292 cidhash(const uchar_t *cid, size_t cidlen)
1293 {
1294         uchar_t         bit;
1295         uint16_t        result = 0xffff;
1296         const uint16_t  crc16_poly = 0x8408; /* mutated CRC-CCITT polynomial */
1297 
1298         while (cidlen-- != 0) {
1299                 result ^= *cid++;
1300                 for (bit = 0; bit < 8; bit++) {
1301                         if (result & 1)
1302                                 result = (result >> 1) ^ crc16_poly;
1303                         else
1304                                 result >>= 1;
1305                 }
1306         }
1307         return (result % DN_CIDHASHSZ);
1308 }
1309 
1310 /*
1311  * Convert the dn_filerec_t pointed to by `rec' from native (host) to
1312  * network order or the other way.
1313  */
1314 /* ARGSUSED */
1315 static void
1316 nhconvert_rec(dn_filerec_t *rec)
1317 {
1318 #ifdef  _LITTLE_ENDIAN
1319         dn_rec_t *dnp = &rec->rec_dn;
1320 
1321         nhconvert(&rec->rec_prev[0], &rec->rec_prev[0], sizeof (dn_recid_t));
1322         nhconvert(&rec->rec_prev[1], &rec->rec_prev[1], sizeof (dn_recid_t));
1323         nhconvert(&rec->rec_next[0], &rec->rec_next[0], sizeof (dn_recid_t));
1324         nhconvert(&rec->rec_next[1], &rec->rec_next[1], sizeof (dn_recid_t));
1325 
1326         nhconvert(&dnp->dn_cip.s_addr, &dnp->dn_cip.s_addr, sizeof (ipaddr_t));
1327         nhconvert(&dnp->dn_sip.s_addr, &dnp->dn_sip.s_addr, sizeof (ipaddr_t));
1328         nhconvert(&dnp->dn_lease, &dnp->dn_lease, sizeof (lease_t));
1329         nhconvert(&dnp->dn_sig, &dnp->dn_sig, sizeof (uint64_t));
1330 #endif
1331 }
1332 
1333 /*
1334  * Convert the header pointed to by `hdrp' from native (host) to network
1335  * order or the other way.  If `hash' is false, then don't bother
1336  * converting the hash chains.
1337  */
1338 /* ARGSUSED */
1339 static void
1340 nhconvert_header(dn_header_t *hdrp, boolean_t hash)
1341 {
1342 #ifdef  _LITTLE_ENDIAN
1343         unsigned int i;
1344 
1345         nhconvert(&hdrp->dnh_network, &hdrp->dnh_network, sizeof (ipaddr_t));
1346         nhconvert(&hdrp->dnh_netmask, &hdrp->dnh_netmask, sizeof (ipaddr_t));
1347         nhconvert(&hdrp->dnh_magic, &hdrp->dnh_magic, sizeof (uint32_t));
1348         nhconvert_rec(&hdrp->dnh_temp);
1349 
1350         if (hash) {
1351                 for (i = 0; i < DN_CIDHASHSZ; i++) {
1352                         nhconvert(&hdrp->dnh_cidhash[i][0],
1353                             &hdrp->dnh_cidhash[i][0], sizeof (dn_recid_t));
1354                         nhconvert(&hdrp->dnh_cidhash[i][1],
1355                             &hdrp->dnh_cidhash[i][1], sizeof (dn_recid_t));
1356                 }
1357         }
1358 #endif
1359 }
1360 
1361 /*
1362  * Read the dn_filerec_t identified by `recid' from open container `fd'
1363  * into `rec'.  Returns 0 on success, -1 on failure (errno is set).
1364  */
1365 static int
1366 read_rec(int fd, dn_filerec_t *rec, dn_recid_t recid)
1367 {
1368         if (pnread(fd, rec, sizeof (*rec), RECID2OFFSET(recid)) == -1)
1369                 return (-1);
1370 
1371         nhconvert_rec(rec);
1372         return (0);
1373 }
1374 
1375 /*
1376  * Write the dn_filerec_t `rec' identified by `recid' into the open
1377  * container `fd'.  Returns 0 on success, -1 on failure (errno is set).
1378  */
1379 static int
1380 write_rec(int fd, dn_filerec_t *rec, dn_recid_t recid)
1381 {
1382         int retval;
1383 
1384         nhconvert_rec(rec);
1385         retval = pnwrite(fd, rec, sizeof (*rec), RECID2OFFSET(recid));
1386         nhconvert_rec(rec);
1387         return (retval);
1388 }
1389 
1390 /*
1391  * Read the dn_header_t from the open container `fd' into the dn_header_t
1392  * pointed to by `hdrp'; if `hash' is not set, then skip reading the
1393  * dn_header_t hash chains.  Returns 0 on success, -1 on failure (errno is
1394  * set).
1395  */
1396 static int
1397 read_header(int fd, dn_header_t *hdrp, boolean_t hash)
1398 {
1399         size_t size;
1400 
1401         size = hash ? sizeof (dn_header_t) : offsetof(dn_header_t, dnh_cidhash);
1402         if (pnread(fd, hdrp, size, 0) == -1)
1403                 return (-1);
1404 
1405         nhconvert_header(hdrp, hash);
1406         return (0);
1407 }
1408 
1409 /*
1410  * Write the dn_header_t pointed to by `hdrp' into open container `fd'.
1411  * Returns 0 on success, -1 on failure (errno is set).
1412  */
1413 static int
1414 write_header(int fd, dn_header_t *hdrp)
1415 {
1416         int retval;
1417 
1418         nhconvert_header(hdrp, B_TRUE);
1419         retval = pnwrite(fd, hdrp, sizeof (dn_header_t), 0);
1420         nhconvert_header(hdrp, B_TRUE);
1421         return (retval);
1422 }
1423 
1424 /*
1425  * Read in the head of the `cidhash' hash chain from open container `fd'
1426  * into `recid_headp', using image `image'.  Returns 0 on success, -1 on
1427  * failure (errno is set).
1428  */
1429 static int
1430 read_hashhead(int fd, dn_recid_t *recid_headp, uint16_t cidhash, uchar_t image)
1431 {
1432         if (pnread(fd, recid_headp, sizeof (dn_recid_t),
1433             offsetof(dn_header_t, dnh_cidhash[cidhash][image])) == -1)
1434                 return (-1);
1435 
1436         nhconvert(recid_headp, recid_headp, sizeof (dn_recid_t));
1437         return (0);
1438 }
1439 
1440 /*
1441  * Write out the head of the `cidhash' hash chain into open container `fd'
1442  * from `recid_head', using image `image'.  Returns 0 on success, -1 on
1443  * failure (errno is set).
1444  */
1445 static int
1446 write_hashhead(int fd, dn_recid_t recid_head, uint16_t cidhash, uchar_t image)
1447 {
1448         nhconvert(&recid_head, &recid_head, sizeof (dn_recid_t));
1449         return (pnwrite(fd, &recid_head, sizeof (dn_recid_t),
1450             offsetof(dn_header_t, dnh_cidhash[cidhash][image])));
1451 }
1452 
1453 /*
1454  * Get the byte `offset' bytes into open file `fd', and store in `bytep'.
1455  * Returns a DSVC_* return code.
1456  */
1457 static int
1458 getabyte(int fd, off_t offset, uchar_t *bytep)
1459 {
1460         switch (pread(fd, bytep, 1, offset)) {
1461         case 1:
1462                 return (DSVC_SUCCESS);
1463         case -1:
1464                 return (syserr_to_dsvcerr(errno));
1465         default:
1466                 break;
1467         }
1468 
1469         return (DSVC_INTERNAL);
1470 }
1471 
1472 /*
1473  * Set the byte `offset' bytes into open file `fd' to `byte'.  Returns a
1474  * DSVC_* return code.
1475  */
1476 static int
1477 setabyte(int fd, off_t offset, uchar_t byte)
1478 {
1479         switch (pwrite(fd, &byte, 1, offset)) {
1480         case 1:
1481                 return (DSVC_SUCCESS);
1482         case -1:
1483                 return (syserr_to_dsvcerr(errno));
1484         default:
1485                 break;
1486         }
1487 
1488         return (DSVC_INTERNAL);
1489 }