il_5595 Wdiff usr/src/tools/ctf/cvt/ctfmerge.c

Print this page

5595 libzpool won't build with a studio primary

Split	Close
Expand all
Collapse all

          --- old/usr/src/tools/ctf/cvt/ctfmerge.c
          +++ new/usr/src/tools/ctf/cvt/ctfmerge.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the

↓ open down ↓

15 lines elided

↑ open up ↑

  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26      -#pragma ident   "%Z%%M% %I%     %E% SMI"
  27      -
  28   26  /*
  29   27   * Given several files containing CTF data, merge and uniquify that data into
  30   28   * a single CTF section in an output file.
  31   29   *
  32   30   * Merges can proceed independently.  As such, we perform the merges in parallel
  33   31   * using a worker thread model.  A given glob of CTF data (either all of the CTF
  34   32   * data from a single input file, or the result of one or more merges) can only
  35   33   * be involved in a single merge at any given time, so the process decreases in
  36   34   * parallelism, especially towards the end, as more and more files are
  37   35   * consolidated, finally resulting in a single merge of two large CTF graphs.

  38   36   * Unfortunately, the last merge is also the slowest, as the two graphs being
  39   37   * merged are each the product of merges of half of the input files.
  40   38   *
  41   39   * The algorithm consists of two phases, described in detail below.  The first
  42   40   * phase entails the merging of CTF data in groups of eight.  The second phase
  43   41   * takes the results of Phase I, and merges them two at a time.  This disparity
  44   42   * is due to an observation that the merge time increases at least quadratically
  45   43   * with the size of the CTF data being merged.  As such, merges of CTF graphs
  46   44   * newly read from input files are much faster than merges of CTF graphs that
  47   45   * are themselves the results of prior merges.
  48   46   *
  49   47   * A further complication is the need to ensure the repeatability of CTF merges.
  50   48   * That is, a merge should produce the same output every time, given the same
  51   49   * input.  In both phases, this consistency requirement is met by imposing an
  52   50   * ordering on the merge process, thus ensuring that a given set of input files
  53   51   * are merged in the same order every time.
  54   52   *
  55   53   *   Phase I
  56   54   *
  57   55   *   The main thread reads the input files one by one, transforming the CTF
  58   56   *   data they contain into tdata structures.  When a given file has been read
  59   57   *   and parsed, it is placed on the work queue for retrieval by worker threads.
  60   58   *
  61   59   *   Central to Phase I is the Work In Progress (wip) array, which is used to
  62   60   *   merge batches of files in a predictable order.  Files are read by the main
  63   61   *   thread, and are merged into wip array elements in round-robin order.  When
  64   62   *   the number of files merged into a given array slot equals the batch size,
  65   63   *   the merged CTF graph in that array is added to the done slot in order by
  66   64   *   array slot.
  67   65   *
  68   66   *   For example, consider a case where we have five input files, a batch size
  69   67   *   of two, a wip array size of two, and two worker threads (T1 and T2).
  70   68   *
  71   69   *    1. The wip array elements are assigned initial batch numbers 0 and 1.
  72   70   *    2. T1 reads an input file from the input queue (wq_queue).  This is the
  73   71   *       first input file, so it is placed into wip[0].  The second file is
  74   72   *       similarly read and placed into wip[1].  The wip array slots now contain
  75   73   *       one file each (wip_nmerged == 1).
  76   74   *    3. T1 reads the third input file, which it merges into wip[0].  The
  77   75   *       number of files in wip[0] is equal to the batch size.
  78   76   *    4. T2 reads the fourth input file, which it merges into wip[1].  wip[1]
  79   77   *       is now full too.
  80   78   *    5. T2 attempts to place the contents of wip[1] on the done queue
  81   79   *       (wq_done_queue), but it can't, since the batch ID for wip[1] is 1.
  82   80   *       Batch 0 needs to be on the done queue before batch 1 can be added, so
  83   81   *       T2 blocks on wip[1]'s cv.
  84   82   *    6. T1 attempts to place the contents of wip[0] on the done queue, and
  85   83   *       succeeds, updating wq_lastdonebatch to 0.  It clears wip[0], and sets
  86   84   *       its batch ID to 2.  T1 then signals wip[1]'s cv to awaken T2.
  87   85   *    7. T2 wakes up, notices that wq_lastdonebatch is 0, which means that
  88   86   *       batch 1 can now be added.  It adds wip[1] to the done queue, clears
  89   87   *       wip[1], and sets its batch ID to 3.  It signals wip[0]'s cv, and
  90   88   *       restarts.
  91   89   *
  92   90   *   The above process continues until all input files have been consumed.  At
  93   91   *   this point, a pair of barriers are used to allow a single thread to move
  94   92   *   any partial batches from the wip array to the done array in batch ID order.
  95   93   *   When this is complete, wq_done_queue is moved to wq_queue, and Phase II
  96   94   *   begins.
  97   95   *
  98   96   *      Locking Semantics (Phase I)
  99   97   *
 100   98   *      The input queue (wq_queue) and the done queue (wq_done_queue) are
 101   99   *      protected by separate mutexes - wq_queue_lock and wq_done_queue.  wip
 102  100   *      array slots are protected by their own mutexes, which must be grabbed
 103  101   *      before releasing the input queue lock.  The wip array lock is dropped
 104  102   *      when the thread restarts the loop.  If the array slot was full, the
 105  103   *      array lock will be held while the slot contents are added to the done
 106  104   *      queue.  The done queue lock is used to protect the wip slot cv's.
 107  105   *
 108  106   *      The pow number is protected by the queue lock.  The master batch ID
 109  107   *      and last completed batch (wq_lastdonebatch) counters are protected *in
 110  108   *      Phase I* by the done queue lock.
 111  109   *
 112  110   *   Phase II
 113  111   *
 114  112   *   When Phase II begins, the queue consists of the merged batches from the
 115  113   *   first phase.  Assume we have five batches:
 116  114   *
 117  115   *      Q:      a b c d e
 118  116   *
 119  117   *   Using the same batch ID mechanism we used in Phase I, but without the wip
 120  118   *   array, worker threads remove two entries at a time from the beginning of
 121  119   *   the queue.  These two entries are merged, and are added back to the tail
 122  120   *   of the queue, as follows:
 123  121   *
 124  122   *      Q:      a b c d e       # start
 125  123   *      Q:      c d e ab        # a, b removed, merged, added to end
 126  124   *      Q:      e ab cd         # c, d removed, merged, added to end
 127  125   *      Q:      cd eab          # e, ab removed, merged, added to end
 128  126   *      Q:      cdeab           # cd, eab removed, merged, added to end
 129  127   *
 130  128   *   When one entry remains on the queue, with no merges outstanding, Phase II
 131  129   *   finishes.  We pre-determine the stopping point by pre-calculating the
 132  130   *   number of nodes that will appear on the list.  In the example above, the
 133  131   *   number (wq_ninqueue) is 9.  When ninqueue is 1, we conclude Phase II by
 134  132   *   signaling the main thread via wq_done_cv.
 135  133   *
 136  134   *      Locking Semantics (Phase II)
 137  135   *
 138  136   *      The queue (wq_queue), ninqueue, and the master batch ID and last
 139  137   *      completed batch counters are protected by wq_queue_lock.  The done
 140  138   *      queue and corresponding lock are unused in Phase II as is the wip array.
 141  139   *
 142  140   *   Uniquification
 143  141   *
 144  142   *   We want the CTF data that goes into a given module to be as small as
 145  143   *   possible.  For example, we don't want it to contain any type data that may
 146  144   *   be present in another common module.  As such, after creating the master
 147  145   *   tdata_t for a given module, we can, if requested by the user, uniquify it
 148  146   *   against the tdata_t from another module (genunix in the case of the SunOS
 149  147   *   kernel).  We perform a merge between the tdata_t for this module and the
 150  148   *   tdata_t from genunix.  Nodes found in this module that are not present in
 151  149   *   genunix are added to a third tdata_t - the uniquified tdata_t.
 152  150   *
 153  151   *   Additive Merges
 154  152   *
 155  153   *   In some cases, for example if we are issuing a new version of a common
 156  154   *   module in a patch, we need to make sure that the CTF data already present
 157  155   *   in that module does not change.  Changes to this data would void the CTF
 158  156   *   data in any module that uniquified against the common module.  To preserve
 159  157   *   the existing data, we can perform what is known as an additive merge.  In
 160  158   *   this case, a final uniquification is performed against the CTF data in the
 161  159   *   previous version of the module.  The result will be the placement of new
 162  160   *   and changed data after the existing data, thus preserving the existing type
 163  161   *   ID space.
 164  162   *
 165  163   *   Saving the result
 166  164   *
 167  165   *   When the merges are complete, the resulting tdata_t is placed into the
 168  166   *   output file, replacing the .SUNW_ctf section (if any) already in that file.
 169  167   *
 170  168   * The person who changes the merging thread code in this file without updating
 171  169   * this comment will not live to see the stock hit five.
 172  170   */
 173  171  
 174  172  #include <stdio.h>
 175  173  #include <stdlib.h>
 176  174  #include <unistd.h>
 177  175  #include <pthread.h>
 178  176  #include <assert.h>
 179  177  #include <synch.h>
 180  178  #include <signal.h>
 181  179  #include <libgen.h>
 182  180  #include <string.h>
 183  181  #include <errno.h>
 184  182  #include <alloca.h>
 185  183  #include <sys/param.h>
 186  184  #include <sys/types.h>
 187  185  #include <sys/mman.h>
 188  186  #include <sys/sysconf.h>
 189  187  
 190  188  #include "ctf_headers.h"
 191  189  #include "ctftools.h"
 192  190  #include "ctfmerge.h"
 193  191  #include "traverse.h"
 194  192  #include "memory.h"
 195  193  #include "fifo.h"
 196  194  #include "barrier.h"
 197  195  
 198  196  #pragma init(bigheap)
 199  197  
 200  198  #define MERGE_PHASE1_BATCH_SIZE         8
 201  199  #define MERGE_PHASE1_MAX_SLOTS          5
 202  200  #define MERGE_INPUT_THROTTLE_LEN        10
 203  201  
 204  202  const char *progname;
 205  203  static char *outfile = NULL;
 206  204  static char *tmpname = NULL;
 207  205  static int dynsym;
 208  206  int debug_level = DEBUG_LEVEL;
 209  207  static size_t maxpgsize = 0x400000;
 210  208  
 211  209  
 212  210  void
 213  211  usage(void)
 214  212  {
 215  213          (void) fprintf(stderr,
 216  214              "Usage: %s [-fgstv] -l label | -L labelenv -o outfile file ...\n"
 217  215              "       %s [-fgstv] -l label | -L labelenv -o outfile -d uniqfile\n"
 218  216              "       %*s [-g] [-D uniqlabel] file ...\n"
 219  217              "       %s [-fgstv] -l label | -L labelenv -o outfile -w withfile "
 220  218              "file ...\n"
 221  219              "       %s [-g] -c srcfile destfile\n"
 222  220              "\n"
 223  221              "  Note: if -L labelenv is specified and labelenv is not set in\n"
 224  222              "  the environment, a default value is used.\n",
 225  223              progname, progname, strlen(progname), " ",
 226  224              progname, progname);
 227  225  }
 228  226  
 229  227  static void
 230  228  bigheap(void)
 231  229  {
 232  230          size_t big, *size;
 233  231          int sizes;
 234  232          struct memcntl_mha mha;
 235  233  
 236  234          /*
 237  235           * First, get the available pagesizes.
 238  236           */
 239  237          if ((sizes = getpagesizes(NULL, 0)) == -1)
 240  238                  return;
 241  239  
 242  240          if (sizes == 1 || (size = alloca(sizeof (size_t) * sizes)) == NULL)
 243  241                  return;
 244  242  
 245  243          if (getpagesizes(size, sizes) == -1)
 246  244                  return;
 247  245  
 248  246          while (size[sizes - 1] > maxpgsize)
 249  247                  sizes--;
 250  248  
 251  249          /* set big to the largest allowed page size */
 252  250          big = size[sizes - 1];
 253  251          if (big & (big - 1)) {
 254  252                  /*
 255  253                   * The largest page size is not a power of two for some
 256  254                   * inexplicable reason; return.
 257  255                   */
 258  256                  return;
 259  257          }
 260  258  
 261  259          /*
 262  260           * Now, align our break to the largest page size.
 263  261           */
 264  262          if (brk((void *)((((uintptr_t)sbrk(0) - 1) & ~(big - 1)) + big)) != 0)
 265  263                  return;
 266  264  
 267  265          /*
 268  266           * set the preferred page size for the heap
 269  267           */
 270  268          mha.mha_cmd = MHA_MAPSIZE_BSSBRK;
 271  269          mha.mha_flags = 0;
 272  270          mha.mha_pagesize = big;
 273  271  
 274  272          (void) memcntl(NULL, 0, MC_HAT_ADVISE, (caddr_t)&mha, 0, 0);
 275  273  }
 276  274  
 277  275  static void
 278  276  finalize_phase_one(workqueue_t *wq)
 279  277  {
 280  278          int startslot, i;
 281  279  
 282  280          /*
 283  281           * wip slots are cleared out only when maxbatchsz td's have been merged
 284  282           * into them.  We're not guaranteed that the number of files we're
 285  283           * merging is a multiple of maxbatchsz, so there will be some partial
 286  284           * groups in the wip array.  Move them to the done queue in batch ID
 287  285           * order, starting with the slot containing the next batch that would
 288  286           * have been placed on the done queue, followed by the others.
 289  287           * One thread will be doing this while the others wait at the barrier
 290  288           * back in worker_thread(), so we don't need to worry about pesky things
 291  289           * like locks.
 292  290           */
 293  291  
 294  292          for (startslot = -1, i = 0; i < wq->wq_nwipslots; i++) {
 295  293                  if (wq->wq_wip[i].wip_batchid == wq->wq_lastdonebatch + 1) {
 296  294                          startslot = i;
 297  295                          break;
 298  296                  }
 299  297          }
 300  298  
 301  299          assert(startslot != -1);
 302  300  
 303  301          for (i = startslot; i < startslot + wq->wq_nwipslots; i++) {
 304  302                  int slotnum = i % wq->wq_nwipslots;
 305  303                  wip_t *wipslot = &wq->wq_wip[slotnum];
 306  304  
 307  305                  if (wipslot->wip_td != NULL) {
 308  306                          debug(2, "clearing slot %d (%d) (saving %d)\n",
 309  307                              slotnum, i, wipslot->wip_nmerged);
 310  308                  } else
 311  309                          debug(2, "clearing slot %d (%d)\n", slotnum, i);
 312  310  
 313  311                  if (wipslot->wip_td != NULL) {
 314  312                          fifo_add(wq->wq_donequeue, wipslot->wip_td);
 315  313                          wq->wq_wip[slotnum].wip_td = NULL;
 316  314                  }
 317  315          }
 318  316  
 319  317          wq->wq_lastdonebatch = wq->wq_next_batchid++;
 320  318  
 321  319          debug(2, "phase one done: donequeue has %d items\n",
 322  320              fifo_len(wq->wq_donequeue));
 323  321  }
 324  322  
 325  323  static void
 326  324  init_phase_two(workqueue_t *wq)
 327  325  {
 328  326          int num;
 329  327  
 330  328          /*
 331  329           * We're going to continually merge the first two entries on the queue,
 332  330           * placing the result on the end, until there's nothing left to merge.
 333  331           * At that point, everything will have been merged into one.  The
 334  332           * initial value of ninqueue needs to be equal to the total number of
 335  333           * entries that will show up on the queue, both at the start of the
 336  334           * phase and as generated by merges during the phase.
 337  335           */
 338  336          wq->wq_ninqueue = num = fifo_len(wq->wq_donequeue);
 339  337          while (num != 1) {
 340  338                  wq->wq_ninqueue += num / 2;
 341  339                  num = num / 2 + num % 2;
 342  340          }
 343  341  
 344  342          /*
 345  343           * Move the done queue to the work queue.  We won't be using the done
 346  344           * queue in phase 2.
 347  345           */
 348  346          assert(fifo_len(wq->wq_queue) == 0);
 349  347          fifo_free(wq->wq_queue, NULL);
 350  348          wq->wq_queue = wq->wq_donequeue;
 351  349  }
 352  350  
 353  351  static void
 354  352  wip_save_work(workqueue_t *wq, wip_t *slot, int slotnum)
 355  353  {
 356  354          pthread_mutex_lock(&wq->wq_donequeue_lock);
 357  355  
 358  356          while (wq->wq_lastdonebatch + 1 < slot->wip_batchid)
 359  357                  pthread_cond_wait(&slot->wip_cv, &wq->wq_donequeue_lock);
 360  358          assert(wq->wq_lastdonebatch + 1 == slot->wip_batchid);
 361  359  
 362  360          fifo_add(wq->wq_donequeue, slot->wip_td);
 363  361          wq->wq_lastdonebatch++;
 364  362          pthread_cond_signal(&wq->wq_wip[(slotnum + 1) %
 365  363              wq->wq_nwipslots].wip_cv);
 366  364  
 367  365          /* reset the slot for next use */
 368  366          slot->wip_td = NULL;
 369  367          slot->wip_batchid = wq->wq_next_batchid++;
 370  368  
 371  369          pthread_mutex_unlock(&wq->wq_donequeue_lock);
 372  370  }
 373  371  
 374  372  static void
 375  373  wip_add_work(wip_t *slot, tdata_t *pow)
 376  374  {
 377  375          if (slot->wip_td == NULL) {
 378  376                  slot->wip_td = pow;
 379  377                  slot->wip_nmerged = 1;
 380  378          } else {
 381  379                  debug(2, "%d: merging %p into %p\n", pthread_self(),
 382  380                      (void *)pow, (void *)slot->wip_td);
 383  381  
 384  382                  merge_into_master(pow, slot->wip_td, NULL, 0);
 385  383                  tdata_free(pow);
 386  384  
 387  385                  slot->wip_nmerged++;
 388  386          }
 389  387  }
 390  388  
 391  389  static void
 392  390  worker_runphase1(workqueue_t *wq)
 393  391  {
 394  392          wip_t *wipslot;
 395  393          tdata_t *pow;
 396  394          int wipslotnum, pownum;
 397  395  
 398  396          for (;;) {
 399  397                  pthread_mutex_lock(&wq->wq_queue_lock);
 400  398  
 401  399                  while (fifo_empty(wq->wq_queue)) {
 402  400                          if (wq->wq_nomorefiles == 1) {
 403  401                                  pthread_cond_broadcast(&wq->wq_work_avail);
 404  402                                  pthread_mutex_unlock(&wq->wq_queue_lock);
 405  403  
 406  404                                  /* on to phase 2 ... */
 407  405                                  return;
 408  406                          }
 409  407  
 410  408                          pthread_cond_wait(&wq->wq_work_avail,
 411  409                              &wq->wq_queue_lock);
 412  410                  }
 413  411  
 414  412                  /* there's work to be done! */
 415  413                  pow = fifo_remove(wq->wq_queue);
 416  414                  pownum = wq->wq_nextpownum++;
 417  415                  pthread_cond_broadcast(&wq->wq_work_removed);
 418  416  
 419  417                  assert(pow != NULL);
 420  418  
 421  419                  /* merge it into the right slot */
 422  420                  wipslotnum = pownum % wq->wq_nwipslots;
 423  421                  wipslot = &wq->wq_wip[wipslotnum];
 424  422  
 425  423                  pthread_mutex_lock(&wipslot->wip_lock);
 426  424  
 427  425                  pthread_mutex_unlock(&wq->wq_queue_lock);
 428  426  
 429  427                  wip_add_work(wipslot, pow);
 430  428  
 431  429                  if (wipslot->wip_nmerged == wq->wq_maxbatchsz)
 432  430                          wip_save_work(wq, wipslot, wipslotnum);
 433  431  
 434  432                  pthread_mutex_unlock(&wipslot->wip_lock);
 435  433          }
 436  434  }
 437  435  
 438  436  static void
 439  437  worker_runphase2(workqueue_t *wq)
 440  438  {
 441  439          tdata_t *pow1, *pow2;
 442  440          int batchid;
 443  441  
 444  442          for (;;) {
 445  443                  pthread_mutex_lock(&wq->wq_queue_lock);
 446  444  
 447  445                  if (wq->wq_ninqueue == 1) {
 448  446                          pthread_cond_broadcast(&wq->wq_work_avail);
 449  447                          pthread_mutex_unlock(&wq->wq_queue_lock);
 450  448  
 451  449                          debug(2, "%d: entering p2 completion barrier\n",
 452  450                              pthread_self());
 453  451                          if (barrier_wait(&wq->wq_bar1)) {
 454  452                                  pthread_mutex_lock(&wq->wq_queue_lock);
 455  453                                  wq->wq_alldone = 1;
 456  454                                  pthread_cond_signal(&wq->wq_alldone_cv);
 457  455                                  pthread_mutex_unlock(&wq->wq_queue_lock);
 458  456                          }
 459  457  
 460  458                          return;
 461  459                  }
 462  460  
 463  461                  if (fifo_len(wq->wq_queue) < 2) {
 464  462                          pthread_cond_wait(&wq->wq_work_avail,
 465  463                              &wq->wq_queue_lock);
 466  464                          pthread_mutex_unlock(&wq->wq_queue_lock);
 467  465                          continue;
 468  466                  }
 469  467  
 470  468                  /* there's work to be done! */
 471  469                  pow1 = fifo_remove(wq->wq_queue);
 472  470                  pow2 = fifo_remove(wq->wq_queue);
 473  471                  wq->wq_ninqueue -= 2;
 474  472  
 475  473                  batchid = wq->wq_next_batchid++;
 476  474  
 477  475                  pthread_mutex_unlock(&wq->wq_queue_lock);
 478  476  
 479  477                  debug(2, "%d: merging %p into %p\n", pthread_self(),
 480  478                      (void *)pow1, (void *)pow2);
 481  479                  merge_into_master(pow1, pow2, NULL, 0);
 482  480                  tdata_free(pow1);
 483  481  
 484  482                  /*
 485  483                   * merging is complete.  place at the tail of the queue in
 486  484                   * proper order.
 487  485                   */
 488  486                  pthread_mutex_lock(&wq->wq_queue_lock);
 489  487                  while (wq->wq_lastdonebatch + 1 != batchid) {
 490  488                          pthread_cond_wait(&wq->wq_done_cv,
 491  489                              &wq->wq_queue_lock);
 492  490                  }
 493  491  
 494  492                  wq->wq_lastdonebatch = batchid;
 495  493  
 496  494                  fifo_add(wq->wq_queue, pow2);
 497  495                  debug(2, "%d: added %p to queue, len now %d, ninqueue %d\n",
 498  496                      pthread_self(), (void *)pow2, fifo_len(wq->wq_queue),
 499  497                      wq->wq_ninqueue);
 500  498                  pthread_cond_broadcast(&wq->wq_done_cv);
 501  499                  pthread_cond_signal(&wq->wq_work_avail);
 502  500                  pthread_mutex_unlock(&wq->wq_queue_lock);
 503  501          }
 504  502  }
 505  503  
 506  504  /*
 507  505   * Main loop for worker threads.
 508  506   */
 509  507  static void
 510  508  worker_thread(workqueue_t *wq)
 511  509  {
 512  510          worker_runphase1(wq);
 513  511  
 514  512          debug(2, "%d: entering first barrier\n", pthread_self());
 515  513  
 516  514          if (barrier_wait(&wq->wq_bar1)) {
 517  515  
 518  516                  debug(2, "%d: doing work in first barrier\n", pthread_self());
 519  517  
 520  518                  finalize_phase_one(wq);
 521  519  
 522  520                  init_phase_two(wq);
 523  521  
 524  522                  debug(2, "%d: ninqueue is %d, %d on queue\n", pthread_self(),
 525  523                      wq->wq_ninqueue, fifo_len(wq->wq_queue));
 526  524          }
 527  525  
 528  526          debug(2, "%d: entering second barrier\n", pthread_self());
 529  527  
 530  528          (void) barrier_wait(&wq->wq_bar2);
 531  529  
 532  530          debug(2, "%d: phase 1 complete\n", pthread_self());
 533  531  
 534  532          worker_runphase2(wq);
 535  533  }
 536  534  
 537  535  /*
 538  536   * Pass a tdata_t tree, built from an input file, off to the work queue for
 539  537   * consumption by worker threads.
 540  538   */
 541  539  static int
 542  540  merge_ctf_cb(tdata_t *td, char *name, void *arg)
 543  541  {
 544  542          workqueue_t *wq = arg;
 545  543  
 546  544          debug(3, "Adding tdata %p for processing\n", (void *)td);
 547  545  
 548  546          pthread_mutex_lock(&wq->wq_queue_lock);
 549  547          while (fifo_len(wq->wq_queue) > wq->wq_ithrottle) {
 550  548                  debug(2, "Throttling input (len = %d, throttle = %d)\n",
 551  549                      fifo_len(wq->wq_queue), wq->wq_ithrottle);
 552  550                  pthread_cond_wait(&wq->wq_work_removed, &wq->wq_queue_lock);
 553  551          }
 554  552  
 555  553          fifo_add(wq->wq_queue, td);
 556  554          debug(1, "Thread %d announcing %s\n", pthread_self(), name);
 557  555          pthread_cond_broadcast(&wq->wq_work_avail);
 558  556          pthread_mutex_unlock(&wq->wq_queue_lock);
 559  557  
 560  558          return (1);
 561  559  }
 562  560  
 563  561  /*
 564  562   * This program is intended to be invoked from a Makefile, as part of the build.
 565  563   * As such, in the event of a failure or user-initiated interrupt (^C), we need
 566  564   * to ensure that a subsequent re-make will cause ctfmerge to be executed again.
 567  565   * Unfortunately, ctfmerge will usually be invoked directly after (and as part
 568  566   * of the same Makefile rule as) a link, and will operate on the linked file
 569  567   * in place.  If we merely exit upon receipt of a SIGINT, a subsequent make
 570  568   * will notice that the *linked* file is newer than the object files, and thus
 571  569   * will not reinvoke ctfmerge.  The only way to ensure that a subsequent make
 572  570   * reinvokes ctfmerge, is to remove the file to which we are adding CTF
 573  571   * data (confusingly named the output file).  This means that the link will need
 574  572   * to happen again, but links are generally fast, and we can't allow the merge
 575  573   * to be skipped.
 576  574   *
 577  575   * Another possibility would be to block SIGINT entirely - to always run to
 578  576   * completion.  The run time of ctfmerge can, however, be measured in minutes
 579  577   * in some cases, so this is not a valid option.
 580  578   */
 581  579  static void
 582  580  handle_sig(int sig)
 583  581  {
 584  582          terminate("Caught signal %d - exiting\n", sig);
 585  583  }
 586  584  
 587  585  static void
 588  586  terminate_cleanup(void)
 589  587  {
 590  588          int dounlink = getenv("CTFMERGE_TERMINATE_NO_UNLINK") ? 0 : 1;
 591  589  
 592  590          if (tmpname != NULL && dounlink)
 593  591                  unlink(tmpname);
 594  592

↓ open down ↓

557 lines elided

↑ open up ↑

 595  593          if (outfile == NULL)
 596  594                  return;
 597  595  
 598  596          if (dounlink) {
 599  597                  fprintf(stderr, "Removing %s\n", outfile);
 600  598                  unlink(outfile);
 601  599          }
 602  600  }
 603  601  
 604  602  static void
 605      -copy_ctf_data(char *srcfile, char *destfile, int keep_stabs)
      603 +copy_ctf_data(char *srcfile, char *destfile)
 606  604  {
 607  605          tdata_t *srctd;
 608  606  
 609  607          if (read_ctf(&srcfile, 1, NULL, read_ctf_save_cb, &srctd, 1) == 0)
 610  608                  terminate("No CTF data found in source file %s\n", srcfile);
 611  609  
 612  610          tmpname = mktmpname(destfile, ".ctf");
 613      -        write_ctf(srctd, destfile, tmpname, CTF_COMPRESS | keep_stabs);
      611 +        write_ctf(srctd, destfile, tmpname, CTF_COMPRESS);
 614  612          if (rename(tmpname, destfile) != 0) {
 615  613                  terminate("Couldn't rename temp file %s to %s", tmpname,
 616  614                      destfile);
 617  615          }
 618  616          free(tmpname);
 619  617          tdata_free(srctd);
 620  618  }
 621  619  
 622  620  static void
 623  621  wq_init(workqueue_t *wq, int nfiles)

 624  622  {
 625  623          int throttle, nslots, i;
 626  624  
 627  625          if (getenv("CTFMERGE_MAX_SLOTS"))
 628  626                  nslots = atoi(getenv("CTFMERGE_MAX_SLOTS"));
 629  627          else
 630  628                  nslots = MERGE_PHASE1_MAX_SLOTS;
 631  629  
 632  630          if (getenv("CTFMERGE_PHASE1_BATCH_SIZE"))
 633  631                  wq->wq_maxbatchsz = atoi(getenv("CTFMERGE_PHASE1_BATCH_SIZE"));
 634  632          else
 635  633                  wq->wq_maxbatchsz = MERGE_PHASE1_BATCH_SIZE;
 636  634  
 637  635          nslots = MIN(nslots, (nfiles + wq->wq_maxbatchsz - 1) /
 638  636              wq->wq_maxbatchsz);
 639  637  
 640  638          wq->wq_wip = xcalloc(sizeof (wip_t) * nslots);
 641  639          wq->wq_nwipslots = nslots;
 642  640          wq->wq_nthreads = MIN(sysconf(_SC_NPROCESSORS_ONLN) * 3 / 2, nslots);
 643  641          wq->wq_thread = xmalloc(sizeof (pthread_t) * wq->wq_nthreads);
 644  642  
 645  643          if (getenv("CTFMERGE_INPUT_THROTTLE"))
 646  644                  throttle = atoi(getenv("CTFMERGE_INPUT_THROTTLE"));
 647  645          else
 648  646                  throttle = MERGE_INPUT_THROTTLE_LEN;
 649  647          wq->wq_ithrottle = throttle * wq->wq_nthreads;
 650  648  
 651  649          debug(1, "Using %d slots, %d threads\n", wq->wq_nwipslots,
 652  650              wq->wq_nthreads);
 653  651  
 654  652          wq->wq_next_batchid = 0;
 655  653  
 656  654          for (i = 0; i < nslots; i++) {
 657  655                  pthread_mutex_init(&wq->wq_wip[i].wip_lock, NULL);
 658  656                  wq->wq_wip[i].wip_batchid = wq->wq_next_batchid++;
 659  657          }
 660  658  
 661  659          pthread_mutex_init(&wq->wq_queue_lock, NULL);
 662  660          wq->wq_queue = fifo_new();
 663  661          pthread_cond_init(&wq->wq_work_avail, NULL);
 664  662          pthread_cond_init(&wq->wq_work_removed, NULL);
 665  663          wq->wq_ninqueue = nfiles;
 666  664          wq->wq_nextpownum = 0;
 667  665  
 668  666          pthread_mutex_init(&wq->wq_donequeue_lock, NULL);
 669  667          wq->wq_donequeue = fifo_new();
 670  668          wq->wq_lastdonebatch = -1;
 671  669  
 672  670          pthread_cond_init(&wq->wq_done_cv, NULL);
 673  671  
 674  672          pthread_cond_init(&wq->wq_alldone_cv, NULL);
 675  673          wq->wq_alldone = 0;
 676  674  
 677  675          barrier_init(&wq->wq_bar1, wq->wq_nthreads);
 678  676          barrier_init(&wq->wq_bar2, wq->wq_nthreads);
 679  677  
 680  678          wq->wq_nomorefiles = 0;
 681  679  }
 682  680  
 683  681  static void
 684  682  start_threads(workqueue_t *wq)
 685  683  {
 686  684          sigset_t sets;
 687  685          int i;
 688  686  
 689  687          sigemptyset(&sets);
 690  688          sigaddset(&sets, SIGINT);
 691  689          sigaddset(&sets, SIGQUIT);
 692  690          sigaddset(&sets, SIGTERM);
 693  691          pthread_sigmask(SIG_BLOCK, &sets, NULL);
 694  692  
 695  693          for (i = 0; i < wq->wq_nthreads; i++) {
 696  694                  pthread_create(&wq->wq_thread[i], NULL,
 697  695                      (void *(*)(void *))worker_thread, wq);
 698  696          }
 699  697  
 700  698          sigset(SIGINT, handle_sig);
 701  699          sigset(SIGQUIT, handle_sig);
 702  700          sigset(SIGTERM, handle_sig);
 703  701          pthread_sigmask(SIG_UNBLOCK, &sets, NULL);
 704  702  }
 705  703  
 706  704  static void
 707  705  join_threads(workqueue_t *wq)
 708  706  {
 709  707          int i;
 710  708  
 711  709          for (i = 0; i < wq->wq_nthreads; i++) {
 712  710                  pthread_join(wq->wq_thread[i], NULL);
 713  711          }
 714  712  }
 715  713  
 716  714  static int
 717  715  strcompare(const void *p1, const void *p2)
 718  716  {
 719  717          char *s1 = *((char **)p1);
 720  718          char *s2 = *((char **)p2);
 721  719  
 722  720          return (strcmp(s1, s2));
 723  721  }
 724  722  
 725  723  /*
 726  724   * Core work queue structure; passed to worker threads on thread creation
 727  725   * as the main point of coordination.  Allocate as a static structure; we
 728  726   * could have put this into a local variable in main, but passing a pointer
 729  727   * into your stack to another thread is fragile at best and leads to some
 730  728   * hard-to-debug failure modes.
 731  729   */
 732  730  static workqueue_t wq;
 733  731

↓ open down ↓

110 lines elided

↑ open up ↑

 734  732  int
 735  733  main(int argc, char **argv)
 736  734  {
 737  735          tdata_t *mstrtd, *savetd;
 738  736          char *uniqfile = NULL, *uniqlabel = NULL;
 739  737          char *withfile = NULL;
 740  738          char *label = NULL;
 741  739          char **ifiles, **tifiles;
 742  740          int verbose = 0, docopy = 0;
 743  741          int write_fuzzy_match = 0;
 744      -        int keep_stabs = 0;
 745  742          int require_ctf = 0;
 746  743          int nifiles, nielems;
 747  744          int c, i, idx, tidx, err;
 748  745  
 749  746          progname = basename(argv[0]);
 750  747  
 751  748          if (getenv("CTFMERGE_DEBUG_LEVEL"))
 752  749                  debug_level = atoi(getenv("CTFMERGE_DEBUG_LEVEL"));
 753  750  
 754  751          err = 0;
 755      -        while ((c = getopt(argc, argv, ":cd:D:fgl:L:o:tvw:s")) != EOF) {
      752 +        while ((c = getopt(argc, argv, ":cd:D:fl:L:o:tvw:s")) != EOF) {
 756  753                  switch (c) {
 757  754                  case 'c':
 758  755                          docopy = 1;
 759  756                          break;
 760  757                  case 'd':
 761  758                          /* Uniquify against `uniqfile' */
 762  759                          uniqfile = optarg;
 763  760                          break;
 764  761                  case 'D':
 765  762                          /* Uniquify against label `uniqlabel' in `uniqfile' */
 766  763                          uniqlabel = optarg;
 767  764                          break;
 768  765                  case 'f':
 769  766                          write_fuzzy_match = CTF_FUZZY_MATCH;
 770  767                          break;
 771      -                case 'g':
 772      -                        keep_stabs = CTF_KEEP_STABS;
 773      -                        break;
 774  768                  case 'l':
 775  769                          /* Label merged types with `label' */
 776  770                          label = optarg;
 777  771                          break;
 778  772                  case 'L':
 779  773                          /* Label merged types with getenv(`label`) */
 780  774                          if ((label = getenv(optarg)) == NULL)
 781  775                                  label = CTF_DEFAULT_LABEL;
 782  776                          break;
 783  777                  case 'o':

 784  778                          /* Place merged types in CTF section in `outfile' */
 785  779                          outfile = optarg;
 786  780                          break;
 787  781                  case 't':
 788  782                          /* Insist *all* object files built from C have CTF */
 789  783                          require_ctf = 1;
 790  784                          break;
 791  785                  case 'v':
 792  786                          /* More debugging information */
 793  787                          verbose = 1;
 794  788                          break;
 795  789                  case 'w':
 796  790                          /* Additive merge with data from `withfile' */
 797  791                          withfile = optarg;
 798  792                          break;
 799  793                  case 's':
 800  794                          /* use the dynsym rather than the symtab */
 801  795                          dynsym = CTF_USE_DYNSYM;
 802  796                          break;
 803  797                  default:
 804  798                          usage();
 805  799                          exit(2);
 806  800                  }
 807  801          }
 808  802  
 809  803          /* Validate arguments */
 810  804          if (docopy) {
 811  805                  if (uniqfile != NULL || uniqlabel != NULL || label != NULL ||
 812  806                      outfile != NULL || withfile != NULL || dynsym != 0)
 813  807                          err++;
 814  808  
 815  809                  if (argc - optind != 2)
 816  810                          err++;
 817  811          } else {
 818  812                  if (uniqfile != NULL && withfile != NULL)
 819  813                          err++;
 820  814  
 821  815                  if (uniqlabel != NULL && uniqfile == NULL)
 822  816                          err++;
 823  817  
 824  818                  if (outfile == NULL || label == NULL)
 825  819                          err++;

↓ open down ↓

42 lines elided

↑ open up ↑

 826  820  
 827  821                  if (argc - optind == 0)
 828  822                          err++;
 829  823          }
 830  824  
 831  825          if (err) {
 832  826                  usage();
 833  827                  exit(2);
 834  828          }
 835  829  
 836      -        if (getenv("STRIPSTABS_KEEP_STABS") != NULL)
 837      -                keep_stabs = CTF_KEEP_STABS;
 838      -
 839  830          if (uniqfile && access(uniqfile, R_OK) != 0) {
 840  831                  warning("Uniquification file %s couldn't be opened and "
 841  832                      "will be ignored.\n", uniqfile);
 842  833                  uniqfile = NULL;
 843  834          }
 844  835          if (withfile && access(withfile, R_OK) != 0) {
 845  836                  warning("With file %s couldn't be opened and will be "
 846  837                      "ignored.\n", withfile);
 847  838                  withfile = NULL;
 848  839          }
 849  840          if (outfile && access(outfile, R_OK|W_OK) != 0)
 850  841                  terminate("Cannot open output file %s for r/w", outfile);
 851  842  
 852  843          /*
 853  844           * This is ugly, but we don't want to have to have a separate tool
 854  845           * (yet) just for copying an ELF section with our specific requirements,
 855  846           * so we shoe-horn a copier into ctfmerge.
 856  847           */
 857  848          if (docopy) {
 858      -                copy_ctf_data(argv[optind], argv[optind + 1], keep_stabs);
      849 +                copy_ctf_data(argv[optind], argv[optind + 1]);
 859  850  
 860  851                  exit(0);
 861  852          }
 862  853  
 863  854          set_terminate_cleanup(terminate_cleanup);
 864  855  
 865  856          /* Sort the input files and strip out duplicates */
 866  857          nifiles = argc - optind;
 867  858          ifiles = xmalloc(sizeof (char *) * nifiles);
 868  859          tifiles = xmalloc(sizeof (char *) * nifiles);

 869  860  
 870  861          for (i = 0; i < nifiles; i++)
 871  862                  tifiles[i] = argv[optind + i];
 872  863          qsort(tifiles, nifiles, sizeof (char *), (int (*)())strcompare);
 873  864  
 874  865          ifiles[0] = tifiles[0];
 875  866          for (idx = 0, tidx = 1; tidx < nifiles; tidx++) {
 876  867                  if (strcmp(ifiles[idx], tifiles[tidx]) != 0)
 877  868                          ifiles[++idx] = tifiles[tidx];
 878  869          }
 879  870          nifiles = idx + 1;
 880  871  
 881  872          /* Make sure they all exist */
 882  873          if ((nielems = count_files(ifiles, nifiles)) < 0)
 883  874                  terminate("Some input files were inaccessible\n");
 884  875  
 885  876          /* Prepare for the merge */
 886  877          wq_init(&wq, nielems);
 887  878  
 888  879          start_threads(&wq);
 889  880  
 890  881          /*
 891  882           * Start the merge
 892  883           *
 893  884           * We're reading everything from each of the object files, so we
 894  885           * don't need to specify labels.
 895  886           */
 896  887          if (read_ctf(ifiles, nifiles, NULL, merge_ctf_cb,
 897  888              &wq, require_ctf) == 0) {
 898  889                  /*
 899  890                   * If we're verifying that C files have CTF, it's safe to
 900  891                   * assume that in this case, we're building only from assembly
 901  892                   * inputs.
 902  893                   */
 903  894                  if (require_ctf)
 904  895                          exit(0);
 905  896                  terminate("No ctf sections found to merge\n");
 906  897          }
 907  898  
 908  899          pthread_mutex_lock(&wq.wq_queue_lock);
 909  900          wq.wq_nomorefiles = 1;
 910  901          pthread_cond_broadcast(&wq.wq_work_avail);
 911  902          pthread_mutex_unlock(&wq.wq_queue_lock);
 912  903  
 913  904          pthread_mutex_lock(&wq.wq_queue_lock);
 914  905          while (wq.wq_alldone == 0)
 915  906                  pthread_cond_wait(&wq.wq_alldone_cv, &wq.wq_queue_lock);
 916  907          pthread_mutex_unlock(&wq.wq_queue_lock);
 917  908  
 918  909          join_threads(&wq);
 919  910  
 920  911          /*
 921  912           * All requested files have been merged, with the resulting tree in
 922  913           * mstrtd.  savetd is the tree that will be placed into the output file.
 923  914           *
 924  915           * Regardless of whether we're doing a normal uniquification or an
 925  916           * additive merge, we need a type tree that has been uniquified
 926  917           * against uniqfile or withfile, as appropriate.
 927  918           *
 928  919           * If we're doing a uniquification, we stuff the resulting tree into
 929  920           * outfile.  Otherwise, we add the tree to the tree already in withfile.
 930  921           */
 931  922          assert(fifo_len(wq.wq_queue) == 1);
 932  923          mstrtd = fifo_remove(wq.wq_queue);
 933  924  
 934  925          if (verbose || debug_level) {
 935  926                  debug(2, "Statistics for td %p\n", (void *)mstrtd);
 936  927  
 937  928                  iidesc_stats(mstrtd->td_iihash);
 938  929          }
 939  930  
 940  931          if (uniqfile != NULL || withfile != NULL) {
 941  932                  char *reffile, *reflabel = NULL;
 942  933                  tdata_t *reftd;
 943  934  
 944  935                  if (uniqfile != NULL) {
 945  936                          reffile = uniqfile;
 946  937                          reflabel = uniqlabel;
 947  938                  } else
 948  939                          reffile = withfile;
 949  940  
 950  941                  if (read_ctf(&reffile, 1, reflabel, read_ctf_save_cb,
 951  942                      &reftd, require_ctf) == 0) {
 952  943                          terminate("No CTF data found in reference file %s\n",
 953  944                              reffile);
 954  945                  }
 955  946  
 956  947                  savetd = tdata_new();
 957  948  
 958  949                  if (CTF_TYPE_ISCHILD(reftd->td_nextid))
 959  950                          terminate("No room for additional types in master\n");
 960  951  
 961  952                  savetd->td_nextid = withfile ? reftd->td_nextid :
 962  953                      CTF_INDEX_TO_TYPE(1, TRUE);
 963  954                  merge_into_master(mstrtd, reftd, savetd, 0);
 964  955  
 965  956                  tdata_label_add(savetd, label, CTF_LABEL_LASTIDX);
 966  957  
 967  958                  if (withfile) {
 968  959                          /*
 969  960                           * savetd holds the new data to be added to the withfile
 970  961                           */
 971  962                          tdata_t *withtd = reftd;
 972  963  
 973  964                          tdata_merge(withtd, savetd);
 974  965  
 975  966                          savetd = withtd;
 976  967                  } else {
 977  968                          char uniqname[MAXPATHLEN];
 978  969                          labelent_t *parle;
 979  970  
 980  971                          parle = tdata_label_top(reftd);
 981  972  
 982  973                          savetd->td_parlabel = xstrdup(parle->le_name);
 983  974  
 984  975                          strncpy(uniqname, reffile, sizeof (uniqname));
 985  976                          uniqname[MAXPATHLEN - 1] = '\0';
 986  977                          savetd->td_parname = xstrdup(basename(uniqname));
 987  978                  }
 988  979  
 989  980          } else {
 990  981                  /*
 991  982                   * No post processing.  Write the merged tree as-is into the

↓ open down ↓

123 lines elided

↑ open up ↑

 992  983                   * output file.
 993  984                   */
 994  985                  tdata_label_free(mstrtd);
 995  986                  tdata_label_add(mstrtd, label, CTF_LABEL_LASTIDX);
 996  987  
 997  988                  savetd = mstrtd;
 998  989          }
 999  990  
1000  991          tmpname = mktmpname(outfile, ".ctf");
1001  992          write_ctf(savetd, outfile, tmpname,
1002      -            CTF_COMPRESS | write_fuzzy_match | dynsym | keep_stabs);
      993 +            CTF_COMPRESS | write_fuzzy_match | dynsym);
1003  994          if (rename(tmpname, outfile) != 0)
1004  995                  terminate("Couldn't rename output temp file %s", tmpname);
1005  996          free(tmpname);
1006  997  
1007  998          return (0);
1008  999  }

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX