il9707 Wdiff usr/src/uts/sun4u/opl/os/opl.c

Print this page

9709 Remove support for BZIP2 from dump
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/sun4u/opl/os/opl.c
          +++ new/usr/src/uts/sun4u/opl/os/opl.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *

↓ open down ↓

12 lines elided

↑ open up ↑

  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
       23 + * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
  23   24   */
  24   25  
  25   26  #include <sys/cpuvar.h>
  26   27  #include <sys/systm.h>
  27   28  #include <sys/sysmacros.h>
  28   29  #include <sys/promif.h>
  29   30  #include <sys/platform_module.h>
  30   31  #include <sys/cmn_err.h>
  31   32  #include <sys/errno.h>
  32   33  #include <sys/machsystm.h>

  33   34  #include <sys/bootconf.h>
  34   35  #include <sys/nvpair.h>
  35   36  #include <sys/kobj.h>
  36   37  #include <sys/mem_cage.h>
  37   38  #include <sys/opl.h>
  38   39  #include <sys/scfd/scfostoescf.h>
  39   40  #include <sys/cpu_sgnblk_defs.h>
  40   41  #include <sys/utsname.h>
  41   42  #include <sys/ddi.h>
  42   43  #include <sys/sunndi.h>
  43   44  #include <sys/lgrp.h>
  44   45  #include <sys/memnode.h>
  45   46  #include <sys/sysmacros.h>
  46   47  #include <sys/time.h>
  47   48  #include <sys/cpu.h>
  48   49  #include <sys/dumphdr.h>
  49   50  #include <vm/vm_dep.h>
  50   51  
  51   52  int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *);
  52   53  int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp);
  53   54  int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp);
  54   55  int (*opl_get_mem_addr)(char *unum, char *sid,
  55   56      uint64_t offset, uint64_t *paddr);
  56   57  
  57   58  /* Memory for fcode claims.  16k times # maximum possible IO units */
  58   59  #define EFCODE_SIZE     (OPL_MAX_BOARDS * OPL_MAX_IO_UNITS_PER_BOARD * 0x4000)
  59   60  int efcode_size = EFCODE_SIZE;
  60   61  
  61   62  #define OPL_MC_MEMBOARD_SHIFT 38        /* Boards on 256BG boundary */
  62   63  
  63   64  /* Set the maximum number of boards for DR */
  64   65  int opl_boards = OPL_MAX_BOARDS;
  65   66  
  66   67  void sgn_update_all_cpus(ushort_t, uchar_t, uchar_t);
  67   68  
  68   69  extern int tsb_lgrp_affinity;
  69   70  
  70   71  int opl_tsb_spares = (OPL_MAX_BOARDS) * (OPL_MAX_PCICH_UNITS_PER_BOARD) *
  71   72          (OPL_MAX_TSBS_PER_PCICH);
  72   73  
  73   74  pgcnt_t opl_startup_cage_size = 0;
  74   75  
  75   76  /*
  76   77   * The length of the delay in seconds in communication with XSCF after
  77   78   * which the warning message will be logged.
  78   79   */
  79   80  uint_t  xscf_connect_delay = 60 * 15;
  80   81  
  81   82  static opl_model_info_t opl_models[] = {
  82   83          { "FF1", OPL_MAX_BOARDS_FF1, FF1, STD_DISPATCH_TABLE },
  83   84          { "FF2", OPL_MAX_BOARDS_FF2, FF2, STD_DISPATCH_TABLE },
  84   85          { "DC1", OPL_MAX_BOARDS_DC1, DC1, STD_DISPATCH_TABLE },
  85   86          { "DC2", OPL_MAX_BOARDS_DC2, DC2, EXT_DISPATCH_TABLE },
  86   87          { "DC3", OPL_MAX_BOARDS_DC3, DC3, EXT_DISPATCH_TABLE },
  87   88          { "IKKAKU", OPL_MAX_BOARDS_IKKAKU, IKKAKU, STD_DISPATCH_TABLE },
  88   89  };
  89   90  static  int     opl_num_models = sizeof (opl_models)/sizeof (opl_model_info_t);
  90   91  
  91   92  /*
  92   93   * opl_cur_model
  93   94   */
  94   95  static  opl_model_info_t *opl_cur_model = NULL;
  95   96  
  96   97  static struct memlist *opl_memlist_per_board(struct memlist *ml);
  97   98  static void post_xscf_msg(char *, int);
  98   99  static void pass2xscf_thread();
  99  100  
 100  101  /*
 101  102   * Note FF/DC out-of-order instruction engine takes only a
 102  103   * single cycle to execute each spin loop
 103  104   * for comparison, Panther takes 6 cycles for same loop
 104  105   * OPL_BOFF_SPIN = base spin loop, roughly one memory reference time
 105  106   * OPL_BOFF_TM = approx nsec for OPL sleep instruction (1600 for OPL-C)
 106  107   * OPL_BOFF_SLEEP = approx number of SPIN iterations to equal one sleep
 107  108   * OPL_BOFF_MAX_SCALE - scaling factor for max backoff based on active cpus
 108  109   * Listed values tuned for 2.15GHz to 2.64GHz systems
 109  110   * Value may change for future systems
 110  111   */
 111  112  #define OPL_BOFF_SPIN 7
 112  113  #define OPL_BOFF_SLEEP 4
 113  114  #define OPL_BOFF_TM 1600
 114  115  #define OPL_BOFF_MAX_SCALE 8
 115  116  
 116  117  #define OPL_CLOCK_TICK_THRESHOLD        128
 117  118  #define OPL_CLOCK_TICK_NCPUS            64
 118  119  
 119  120  extern int      clock_tick_threshold;
 120  121  extern int      clock_tick_ncpus;
 121  122  
 122  123  int
 123  124  set_platform_max_ncpus(void)
 124  125  {
 125  126          return (OPL_MAX_CPU_PER_BOARD * OPL_MAX_BOARDS);
 126  127  }
 127  128  
 128  129  int
 129  130  set_platform_tsb_spares(void)
 130  131  {
 131  132          return (MIN(opl_tsb_spares, MAX_UPA));
 132  133  }
 133  134  
 134  135  static void
 135  136  set_model_info()
 136  137  {
 137  138          extern int ts_dispatch_extended;
 138  139          char    name[MAXSYSNAME];
 139  140          int     i;
 140  141  
 141  142          /*
 142  143           * Get model name from the root node.
 143  144           *
 144  145           * We are using the prom device tree since, at this point,
 145  146           * the Solaris device tree is not yet setup.
 146  147           */
 147  148          (void) prom_getprop(prom_rootnode(), "model", (caddr_t)name);
 148  149  
 149  150          for (i = 0; i < opl_num_models; i++) {
 150  151                  if (strncmp(name, opl_models[i].model_name, MAXSYSNAME) == 0) {
 151  152                          opl_cur_model = &opl_models[i];
 152  153                          break;
 153  154                  }
 154  155          }
 155  156  
 156  157          /*
 157  158           * If model not matched, it's an unknown model.
 158  159           * Just return.  It will default to standard dispatch tables.
 159  160           */
 160  161          if (i == opl_num_models)
 161  162                  return;
 162  163  
 163  164          if ((opl_cur_model->model_cmds & EXT_DISPATCH_TABLE) &&
 164  165              (ts_dispatch_extended == -1)) {
 165  166                  /*
 166  167                   * Based on a platform model, select a dispatch table.
 167  168                   * Only DC2 and DC3 systems uses the alternate/extended
 168  169                   * TS dispatch table.
 169  170                   * IKKAKU, FF1, FF2 and DC1 systems use standard dispatch
 170  171                   * tables.
 171  172                   */
 172  173                  ts_dispatch_extended = 1;
 173  174          }
 174  175  
 175  176  }
 176  177  
 177  178  static void
 178  179  set_max_mmu_ctxdoms()
 179  180  {
 180  181          extern uint_t   max_mmu_ctxdoms;
 181  182          int             max_boards;
 182  183  
 183  184          /*
 184  185           * From the model, get the maximum number of boards
 185  186           * supported and set the value accordingly. If the model
 186  187           * could not be determined or recognized, we assume the max value.
 187  188           */
 188  189          if (opl_cur_model == NULL)
 189  190                  max_boards = OPL_MAX_BOARDS;
 190  191          else
 191  192                  max_boards = opl_cur_model->model_max_boards;
 192  193  
 193  194          /*
 194  195           * On OPL, cores and MMUs are one-to-one.
 195  196           */
 196  197          max_mmu_ctxdoms = OPL_MAX_CORE_UNITS_PER_BOARD * max_boards;
 197  198  }
 198  199  
 199  200  #pragma weak mmu_init_large_pages
 200  201  
 201  202  void
 202  203  set_platform_defaults(void)
 203  204  {
 204  205          extern char *tod_module_name;
 205  206          extern void cpu_sgn_update(ushort_t, uchar_t, uchar_t, int);
 206  207          extern void mmu_init_large_pages(size_t);
 207  208  
 208  209          /* Set the CPU signature function pointer */
 209  210          cpu_sgn_func = cpu_sgn_update;
 210  211  
 211  212          /* Set appropriate tod module for OPL platform */
 212  213          ASSERT(tod_module_name == NULL);
 213  214          tod_module_name = "todopl";

↓ open down ↓

181 lines elided

↑ open up ↑

 214  215  
 215  216          if ((mmu_page_sizes == max_mmu_page_sizes) &&
 216  217              (mmu_ism_pagesize != DEFAULT_ISM_PAGESIZE)) {
 217  218                  if (&mmu_init_large_pages)
 218  219                          mmu_init_large_pages(mmu_ism_pagesize);
 219  220          }
 220  221  
 221  222          tsb_lgrp_affinity = 1;
 222  223  
 223  224          set_max_mmu_ctxdoms();
 224      -
 225      -        /* set OPL threshold for compressed dumps */
 226      -        dump_plat_mincpu_default = DUMP_PLAT_SUN4U_OPL_MINCPU;
 227  225  }
 228  226  
 229  227  /*
 230  228   * Convert logical a board number to a physical one.
 231  229   */
 232  230  
 233  231  #define LSBPROP         "board#"
 234  232  #define PSBPROP         "physical-board#"
 235  233  
 236  234  int

 237  235  opl_get_physical_board(int id)
 238  236  {
 239  237          dev_info_t      *root_dip, *dip = NULL;
 240  238          char            *dname = NULL;
 241  239          int             circ;
 242  240  
 243  241          pnode_t         pnode;
 244  242          char            pname[MAXSYSNAME] = {0};
 245  243  
 246  244          int             lsb_id; /* Logical System Board ID */
 247  245          int             psb_id; /* Physical System Board ID */
 248  246  
 249  247  
 250  248          /*
 251  249           * This function is called on early stage of bootup when the
 252  250           * kernel device tree is not initialized yet, and also
 253  251           * later on when the device tree is up. We want to try
 254  252           * the fast track first.
 255  253           */
 256  254          root_dip = ddi_root_node();
 257  255          if (root_dip) {
 258  256                  /* Get from devinfo node */
 259  257                  ndi_devi_enter(root_dip, &circ);
 260  258                  for (dip = ddi_get_child(root_dip); dip;
 261  259                      dip = ddi_get_next_sibling(dip)) {
 262  260  
 263  261                          dname = ddi_node_name(dip);
 264  262                          if (strncmp(dname, "pseudo-mc", 9) != 0)
 265  263                                  continue;
 266  264  
 267  265                          if ((lsb_id = (int)ddi_getprop(DDI_DEV_T_ANY, dip,
 268  266                              DDI_PROP_DONTPASS, LSBPROP, -1)) == -1)
 269  267                                  continue;
 270  268  
 271  269                          if (id == lsb_id) {
 272  270                                  if ((psb_id = (int)ddi_getprop(DDI_DEV_T_ANY,
 273  271                                      dip, DDI_PROP_DONTPASS, PSBPROP, -1))
 274  272                                      == -1) {
 275  273                                          ndi_devi_exit(root_dip, circ);
 276  274                                          return (-1);
 277  275                                  } else {
 278  276                                          ndi_devi_exit(root_dip, circ);
 279  277                                          return (psb_id);
 280  278                                  }
 281  279                          }
 282  280                  }
 283  281                  ndi_devi_exit(root_dip, circ);
 284  282          }
 285  283  
 286  284          /*
 287  285           * We do not have the kernel device tree, or we did not
 288  286           * find the node for some reason (let's say the kernel
 289  287           * device tree was modified), let's try the OBP tree.
 290  288           */
 291  289          pnode = prom_rootnode();
 292  290          for (pnode = prom_childnode(pnode); pnode;
 293  291              pnode = prom_nextnode(pnode)) {
 294  292  
 295  293                  if ((prom_getprop(pnode, "name", (caddr_t)pname) == -1) ||
 296  294                      (strncmp(pname, "pseudo-mc", 9) != 0))
 297  295                          continue;
 298  296  
 299  297                  if (prom_getprop(pnode, LSBPROP, (caddr_t)&lsb_id) == -1)
 300  298                          continue;
 301  299  
 302  300                  if (id == lsb_id) {
 303  301                          if (prom_getprop(pnode, PSBPROP,
 304  302                              (caddr_t)&psb_id) == -1) {
 305  303                                  return (-1);
 306  304                          } else {
 307  305                                  return (psb_id);
 308  306                          }
 309  307                  }
 310  308          }
 311  309  
 312  310          return (-1);
 313  311  }
 314  312  
 315  313  /*
 316  314   * For OPL it's possible that memory from two or more successive boards
 317  315   * will be contiguous across the boards, and therefore represented as a
 318  316   * single chunk.
 319  317   * This function splits such chunks down the board boundaries.
 320  318   */
 321  319  static struct memlist *
 322  320  opl_memlist_per_board(struct memlist *ml)
 323  321  {
 324  322          uint64_t ssize, low, high, boundary;
 325  323          struct memlist *head, *tail, *new;
 326  324  
 327  325          ssize = (1ull << OPL_MC_MEMBOARD_SHIFT);
 328  326  
 329  327          head = tail = NULL;
 330  328  
 331  329          for (; ml; ml = ml->ml_next) {
 332  330                  low  = (uint64_t)ml->ml_address;
 333  331                  high = low+(uint64_t)(ml->ml_size);
 334  332                  while (low < high) {
 335  333                          boundary = roundup(low+1, ssize);
 336  334                          boundary = MIN(high, boundary);
 337  335                          new = kmem_zalloc(sizeof (struct memlist), KM_SLEEP);
 338  336                          new->ml_address = low;
 339  337                          new->ml_size = boundary - low;
 340  338                          if (head == NULL)
 341  339                                  head = new;
 342  340                          if (tail) {
 343  341                                  tail->ml_next = new;
 344  342                                  new->ml_prev = tail;
 345  343                          }
 346  344                          tail = new;
 347  345                          low = boundary;
 348  346                  }
 349  347          }
 350  348          return (head);
 351  349  }
 352  350  
 353  351  void
 354  352  set_platform_cage_params(void)
 355  353  {
 356  354          extern pgcnt_t total_pages;
 357  355          extern struct memlist *phys_avail;
 358  356          struct memlist *ml, *tml;
 359  357  
 360  358          if (kernel_cage_enable) {
 361  359                  pgcnt_t preferred_cage_size;
 362  360  
 363  361                  preferred_cage_size = MAX(opl_startup_cage_size,
 364  362                      total_pages / 256);
 365  363  
 366  364                  ml = opl_memlist_per_board(phys_avail);
 367  365  
 368  366                  /*
 369  367                   * Note: we are assuming that post has load the
 370  368                   * whole show in to the high end of memory. Having
 371  369                   * taken this leap, we copy the whole of phys_avail
 372  370                   * the glist and arrange for the cage to grow
 373  371                   * downward (descending pfns).
 374  372                   */
 375  373                  kcage_range_init(ml, KCAGE_DOWN, preferred_cage_size);
 376  374  
 377  375                  /* free the memlist */
 378  376                  do {
 379  377                          tml = ml->ml_next;
 380  378                          kmem_free(ml, sizeof (struct memlist));
 381  379                          ml = tml;
 382  380                  } while (ml != NULL);
 383  381          }
 384  382  
 385  383          if (kcage_on)
 386  384                  cmn_err(CE_NOTE, "!DR Kernel Cage is ENABLED");
 387  385          else
 388  386                  cmn_err(CE_NOTE, "!DR Kernel Cage is DISABLED");
 389  387  }
 390  388  
 391  389  /*ARGSUSED*/
 392  390  int
 393  391  plat_cpu_poweron(struct cpu *cp)
 394  392  {
 395  393          int (*opl_cpu_poweron)(struct cpu *) = NULL;
 396  394  
 397  395          opl_cpu_poweron =
 398  396              (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweron", 0);
 399  397  
 400  398          if (opl_cpu_poweron == NULL)
 401  399                  return (ENOTSUP);
 402  400          else
 403  401                  return ((opl_cpu_poweron)(cp));
 404  402  
 405  403  }
 406  404  
 407  405  /*ARGSUSED*/
 408  406  int
 409  407  plat_cpu_poweroff(struct cpu *cp)
 410  408  {
 411  409          int (*opl_cpu_poweroff)(struct cpu *) = NULL;
 412  410  
 413  411          opl_cpu_poweroff =
 414  412              (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweroff", 0);
 415  413  
 416  414          if (opl_cpu_poweroff == NULL)
 417  415                  return (ENOTSUP);
 418  416          else
 419  417                  return ((opl_cpu_poweroff)(cp));
 420  418  
 421  419  }
 422  420  
 423  421  int
 424  422  plat_max_boards(void)
 425  423  {
 426  424          /*
 427  425           * If the model cannot be determined, default to the max value.
 428  426           * Otherwise, Ikkaku model only supports 1 system board.
 429  427           */
 430  428          if ((opl_cur_model != NULL) && (opl_cur_model->model_type == IKKAKU))
 431  429                  return (OPL_MAX_BOARDS_IKKAKU);
 432  430          else
 433  431                  return (OPL_MAX_BOARDS);
 434  432  }
 435  433  
 436  434  int
 437  435  plat_max_cpu_units_per_board(void)
 438  436  {
 439  437          return (OPL_MAX_CPU_PER_BOARD);
 440  438  }
 441  439  
 442  440  int
 443  441  plat_max_mem_units_per_board(void)
 444  442  {
 445  443          return (OPL_MAX_MEM_UNITS_PER_BOARD);
 446  444  }
 447  445  
 448  446  int
 449  447  plat_max_io_units_per_board(void)
 450  448  {
 451  449          return (OPL_MAX_IO_UNITS_PER_BOARD);
 452  450  }
 453  451  
 454  452  int
 455  453  plat_max_cmp_units_per_board(void)
 456  454  {
 457  455          return (OPL_MAX_CMP_UNITS_PER_BOARD);
 458  456  }
 459  457  
 460  458  int
 461  459  plat_max_core_units_per_board(void)
 462  460  {
 463  461          return (OPL_MAX_CORE_UNITS_PER_BOARD);
 464  462  }
 465  463  
 466  464  int
 467  465  plat_pfn_to_mem_node(pfn_t pfn)
 468  466  {
 469  467          return (pfn >> mem_node_pfn_shift);
 470  468  }
 471  469  
 472  470  /* ARGSUSED */
 473  471  void
 474  472  plat_build_mem_nodes(prom_memlist_t *list, size_t nelems)
 475  473  {
 476  474          size_t  elem;
 477  475          pfn_t   basepfn;
 478  476          pgcnt_t npgs;
 479  477          uint64_t        boundary, ssize;
 480  478          uint64_t        low, high;
 481  479  
 482  480          /*
 483  481           * OPL mem slices are always aligned on a 256GB boundary.
 484  482           */
 485  483          mem_node_pfn_shift = OPL_MC_MEMBOARD_SHIFT - MMU_PAGESHIFT;
 486  484          mem_node_physalign = 0;
 487  485  
 488  486          /*
 489  487           * Boot install lists are arranged <addr, len>, <addr, len>, ...
 490  488           */
 491  489          ssize = (1ull << OPL_MC_MEMBOARD_SHIFT);
 492  490          for (elem = 0; elem < nelems; list++, elem++) {
 493  491                  low  = list->addr;
 494  492                  high = low + list->size;
 495  493                  while (low < high) {
 496  494                          boundary = roundup(low+1, ssize);
 497  495                          boundary = MIN(high, boundary);
 498  496                          basepfn = btop(low);
 499  497                          npgs = btop(boundary - low);
 500  498                          mem_node_add_slice(basepfn, basepfn + npgs - 1);
 501  499                          low = boundary;
 502  500                  }
 503  501          }
 504  502  }
 505  503  
 506  504  /*
 507  505   * Find the CPU associated with a slice at boot-time.
 508  506   */
 509  507  void
 510  508  plat_fill_mc(pnode_t nodeid)
 511  509  {
 512  510          int board;
 513  511          int memnode;
 514  512          struct {
 515  513                  uint64_t        addr;
 516  514                  uint64_t        size;
 517  515          } mem_range;
 518  516  
 519  517          if (prom_getprop(nodeid, "board#", (caddr_t)&board) < 0) {
 520  518                  panic("Can not find board# property in mc node %x", nodeid);
 521  519          }
 522  520          if (prom_getprop(nodeid, "sb-mem-ranges", (caddr_t)&mem_range) < 0) {
 523  521                  panic("Can not find sb-mem-ranges property in mc node %x",
 524  522                      nodeid);
 525  523          }
 526  524          memnode = mem_range.addr >> OPL_MC_MEMBOARD_SHIFT;
 527  525          plat_assign_lgrphand_to_mem_node(board, memnode);
 528  526  }
 529  527  
 530  528  /*
 531  529   * Return the platform handle for the lgroup containing the given CPU
 532  530   *
 533  531   * For OPL, lgroup platform handle == board #.
 534  532   */
 535  533  
 536  534  extern int mpo_disabled;
 537  535  extern lgrp_handle_t lgrp_default_handle;
 538  536  
 539  537  lgrp_handle_t
 540  538  plat_lgrp_cpu_to_hand(processorid_t id)
 541  539  {
 542  540          lgrp_handle_t plathand;
 543  541  
 544  542          /*
 545  543           * Return the real platform handle for the CPU until
 546  544           * such time as we know that MPO should be disabled.
 547  545           * At that point, we set the "mpo_disabled" flag to true,
 548  546           * and from that point on, return the default handle.
 549  547           *
 550  548           * By the time we know that MPO should be disabled, the
 551  549           * first CPU will have already been added to a leaf
 552  550           * lgroup, but that's ok. The common lgroup code will
 553  551           * double check that the boot CPU is in the correct place,
 554  552           * and in the case where mpo should be disabled, will move
 555  553           * it to the root if necessary.
 556  554           */
 557  555          if (mpo_disabled) {
 558  556                  /* If MPO is disabled, return the default (UMA) handle */
 559  557                  plathand = lgrp_default_handle;
 560  558          } else
 561  559                  plathand = (lgrp_handle_t)LSB_ID(id);
 562  560          return (plathand);
 563  561  }
 564  562  
 565  563  /*
 566  564   * Platform specific lgroup initialization
 567  565   */
 568  566  void
 569  567  plat_lgrp_init(void)
 570  568  {
 571  569          extern uint32_t lgrp_expand_proc_thresh;
 572  570          extern uint32_t lgrp_expand_proc_diff;
 573  571          const uint_t m = LGRP_LOADAVG_THREAD_MAX;
 574  572  
 575  573          /*
 576  574           * Set tuneables for the OPL architecture
 577  575           *
 578  576           * lgrp_expand_proc_thresh is the threshold load on the set of
 579  577           * lgroups a process is currently using on before considering
 580  578           * adding another lgroup to the set.  For Oly-C and Jupiter
 581  579           * systems, there are four sockets per lgroup. Setting
 582  580           * lgrp_expand_proc_thresh to add lgroups when the load reaches
 583  581           * four threads will spread the load when it exceeds one thread
 584  582           * per socket, optimizing memory bandwidth and L2 cache space.
 585  583           *
 586  584           * lgrp_expand_proc_diff determines how much less another lgroup
 587  585           * must be loaded before shifting the start location of a thread
 588  586           * to it.
 589  587           *
 590  588           * lgrp_loadavg_tolerance is the threshold where two lgroups are
 591  589           * considered to have different loads.  It is set to be less than
 592  590           * 1% so that even a small residual load will be considered different
 593  591           * from no residual load.
 594  592           *
 595  593           * We note loadavg values are not precise.
 596  594           * Every 1/10 of a second loadavg values are reduced by 5%.
 597  595           * This adjustment can come in the middle of the lgroup selection
 598  596           * process, and for larger parallel apps with many threads can
 599  597           * frequently occur between the start of the second thread
 600  598           * placement and the finish of the last thread placement.
 601  599           * We also must be careful to not use too small of a threshold
 602  600           * since the cumulative decay for 1 second idle time is 40%.
 603  601           * That is, the residual load from completed threads will still
 604  602           * be 60% one second after the proc goes idle or 8% after 5 seconds.
 605  603           *
 606  604           * To allow for lag time in loadavg calculations
 607  605           * remote thresh = 3.75 * LGRP_LOADAVG_THREAD_MAX
 608  606           * local thresh  = 0.75 * LGRP_LOADAVG_THREAD_MAX
 609  607           * tolerance     = 0.0078 * LGRP_LOADAVG_THREAD_MAX
 610  608           *
 611  609           * The load placement algorithms consider LGRP_LOADAVG_THREAD_MAX
 612  610           * as the equivalent of a load of 1. To make the code more compact,
 613  611           * we set m = LGRP_LOADAVG_THREAD_MAX.
 614  612           */
 615  613          lgrp_expand_proc_thresh = (m * 3) + (m >> 1) + (m >> 2);
 616  614          lgrp_expand_proc_diff = (m >> 1) + (m >> 2);
 617  615          lgrp_loadavg_tolerance = (m >> 7);
 618  616  }
 619  617  
 620  618  /*
 621  619   * Platform notification of lgroup (re)configuration changes
 622  620   */
 623  621  /*ARGSUSED*/
 624  622  void
 625  623  plat_lgrp_config(lgrp_config_flag_t evt, uintptr_t arg)
 626  624  {
 627  625          update_membounds_t *umb;
 628  626          lgrp_config_mem_rename_t lmr;
 629  627          int sbd, tbd;
 630  628          lgrp_handle_t hand, shand, thand;
 631  629          int mnode, snode, tnode;
 632  630          pfn_t start, end;
 633  631  
 634  632          if (mpo_disabled)
 635  633                  return;
 636  634  
 637  635          switch (evt) {
 638  636  
 639  637          case LGRP_CONFIG_MEM_ADD:
 640  638                  /*
 641  639                   * Establish the lgroup handle to memnode translation.
 642  640                   */
 643  641                  umb = (update_membounds_t *)arg;
 644  642  
 645  643                  hand = umb->u_board;
 646  644                  mnode = plat_pfn_to_mem_node(umb->u_base >> MMU_PAGESHIFT);
 647  645                  plat_assign_lgrphand_to_mem_node(hand, mnode);
 648  646  
 649  647                  break;
 650  648  
 651  649          case LGRP_CONFIG_MEM_DEL:
 652  650                  /*
 653  651                   * Special handling for possible memory holes.
 654  652                   */
 655  653                  umb = (update_membounds_t *)arg;
 656  654                  hand = umb->u_board;
 657  655                  if ((mnode = plat_lgrphand_to_mem_node(hand)) != -1) {
 658  656                          if (mem_node_config[mnode].exists) {
 659  657                                  start = mem_node_config[mnode].physbase;
 660  658                                  end = mem_node_config[mnode].physmax;
 661  659                                  mem_node_del_slice(start, end);
 662  660                          }
 663  661                  }
 664  662  
 665  663                  break;
 666  664  
 667  665          case LGRP_CONFIG_MEM_RENAME:
 668  666                  /*
 669  667                   * During a DR copy-rename operation, all of the memory
 670  668                   * on one board is moved to another board -- but the
 671  669                   * addresses/pfns and memnodes don't change. This means
 672  670                   * the memory has changed locations without changing identity.
 673  671                   *
 674  672                   * Source is where we are copying from and target is where we
 675  673                   * are copying to.  After source memnode is copied to target
 676  674                   * memnode, the physical addresses of the target memnode are
 677  675                   * renamed to match what the source memnode had.  Then target
 678  676                   * memnode can be removed and source memnode can take its
 679  677                   * place.
 680  678                   *
 681  679                   * To do this, swap the lgroup handle to memnode mappings for
 682  680                   * the boards, so target lgroup will have source memnode and
 683  681                   * source lgroup will have empty target memnode which is where
 684  682                   * its memory will go (if any is added to it later).
 685  683                   *
 686  684                   * Then source memnode needs to be removed from its lgroup
 687  685                   * and added to the target lgroup where the memory was living
 688  686                   * but under a different name/memnode.  The memory was in the
 689  687                   * target memnode and now lives in the source memnode with
 690  688                   * different physical addresses even though it is the same
 691  689                   * memory.
 692  690                   */
 693  691                  sbd = arg & 0xffff;
 694  692                  tbd = (arg & 0xffff0000) >> 16;
 695  693                  shand = sbd;
 696  694                  thand = tbd;
 697  695                  snode = plat_lgrphand_to_mem_node(shand);
 698  696                  tnode = plat_lgrphand_to_mem_node(thand);
 699  697  
 700  698                  /*
 701  699                   * Special handling for possible memory holes.
 702  700                   */
 703  701                  if (tnode != -1 && mem_node_config[tnode].exists) {
 704  702                          start = mem_node_config[tnode].physbase;
 705  703                          end = mem_node_config[tnode].physmax;
 706  704                          mem_node_del_slice(start, end);
 707  705                  }
 708  706  
 709  707                  plat_assign_lgrphand_to_mem_node(thand, snode);
 710  708                  plat_assign_lgrphand_to_mem_node(shand, tnode);
 711  709  
 712  710                  lmr.lmem_rename_from = shand;
 713  711                  lmr.lmem_rename_to = thand;
 714  712  
 715  713                  /*
 716  714                   * Remove source memnode of copy rename from its lgroup
 717  715                   * and add it to its new target lgroup
 718  716                   */
 719  717                  lgrp_config(LGRP_CONFIG_MEM_RENAME, (uintptr_t)snode,
 720  718                      (uintptr_t)&lmr);
 721  719  
 722  720                  break;
 723  721  
 724  722          default:
 725  723                  break;
 726  724          }
 727  725  }
 728  726  
 729  727  /*
 730  728   * Return latency between "from" and "to" lgroups
 731  729   *
 732  730   * This latency number can only be used for relative comparison
 733  731   * between lgroups on the running system, cannot be used across platforms,
 734  732   * and may not reflect the actual latency.  It is platform and implementation
 735  733   * specific, so platform gets to decide its value.  It would be nice if the
 736  734   * number was at least proportional to make comparisons more meaningful though.
 737  735   * NOTE: The numbers below are supposed to be load latencies for uncached
 738  736   * memory divided by 10.
 739  737   *
 740  738   */
 741  739  int
 742  740  plat_lgrp_latency(lgrp_handle_t from, lgrp_handle_t to)
 743  741  {
 744  742          /*
 745  743           * Return min remote latency when there are more than two lgroups
 746  744           * (root and child) and getting latency between two different lgroups
 747  745           * or root is involved
 748  746           */
 749  747          if (lgrp_optimizations() && (from != to ||
 750  748              from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE))
 751  749                  return (42);
 752  750          else
 753  751                  return (35);
 754  752  }
 755  753  
 756  754  /*
 757  755   * Return platform handle for root lgroup
 758  756   */
 759  757  lgrp_handle_t
 760  758  plat_lgrp_root_hand(void)
 761  759  {
 762  760          if (mpo_disabled)
 763  761                  return (lgrp_default_handle);
 764  762  
 765  763          return (LGRP_DEFAULT_HANDLE);
 766  764  }
 767  765  
 768  766  /*ARGSUSED*/
 769  767  void
 770  768  plat_freelist_process(int mnode)
 771  769  {
 772  770  }
 773  771  
 774  772  void
 775  773  load_platform_drivers(void)
 776  774  {
 777  775          (void) i_ddi_attach_pseudo_node("dr");
 778  776  }
 779  777  
 780  778  /*
 781  779   * No platform drivers on this platform
 782  780   */
 783  781  char *platform_module_list[] = {
 784  782          (char *)0
 785  783  };
 786  784  
 787  785  /*ARGSUSED*/
 788  786  void
 789  787  plat_tod_fault(enum tod_fault_type tod_bad)
 790  788  {
 791  789  }
 792  790  
 793  791  /*ARGSUSED*/
 794  792  void
 795  793  cpu_sgn_update(ushort_t sgn, uchar_t state, uchar_t sub_state, int cpuid)
 796  794  {
 797  795          static void (*scf_panic_callback)(int);
 798  796          static void (*scf_shutdown_callback)(int);
 799  797  
 800  798          /*
 801  799           * This is for notifing system panic/shutdown to SCF.
 802  800           * In case of shutdown and panic, SCF call back
 803  801           * function should be called.
 804  802           *  <SCF call back functions>
 805  803           *   scf_panic_callb()   : panicsys()->panic_quiesce_hw()
 806  804           *   scf_shutdown_callb(): halt() or power_down() or reboot_machine()
 807  805           * cpuid should be -1 and state should be SIGST_EXIT.
 808  806           */
 809  807          if (state == SIGST_EXIT && cpuid == -1) {
 810  808  
 811  809                  /*
 812  810                   * find the symbol for the SCF panic callback routine in driver
 813  811                   */
 814  812                  if (scf_panic_callback == NULL)
 815  813                          scf_panic_callback = (void (*)(int))
 816  814                              modgetsymvalue("scf_panic_callb", 0);
 817  815                  if (scf_shutdown_callback == NULL)
 818  816                          scf_shutdown_callback = (void (*)(int))
 819  817                              modgetsymvalue("scf_shutdown_callb", 0);
 820  818  
 821  819                  switch (sub_state) {
 822  820                  case SIGSUBST_PANIC:
 823  821                          if (scf_panic_callback == NULL) {
 824  822                                  cmn_err(CE_NOTE, "!cpu_sgn_update: "
 825  823                                      "scf_panic_callb not found\n");
 826  824                                  return;
 827  825                          }
 828  826                          scf_panic_callback(SIGSUBST_PANIC);
 829  827                          break;
 830  828  
 831  829                  case SIGSUBST_HALT:
 832  830                          if (scf_shutdown_callback == NULL) {
 833  831                                  cmn_err(CE_NOTE, "!cpu_sgn_update: "
 834  832                                      "scf_shutdown_callb not found\n");
 835  833                                  return;
 836  834                          }
 837  835                          scf_shutdown_callback(SIGSUBST_HALT);
 838  836                          break;
 839  837  
 840  838                  case SIGSUBST_ENVIRON:
 841  839                          if (scf_shutdown_callback == NULL) {
 842  840                                  cmn_err(CE_NOTE, "!cpu_sgn_update: "
 843  841                                      "scf_shutdown_callb not found\n");
 844  842                                  return;
 845  843                          }
 846  844                          scf_shutdown_callback(SIGSUBST_ENVIRON);
 847  845                          break;
 848  846  
 849  847                  case SIGSUBST_REBOOT:
 850  848                          if (scf_shutdown_callback == NULL) {
 851  849                                  cmn_err(CE_NOTE, "!cpu_sgn_update: "
 852  850                                      "scf_shutdown_callb not found\n");
 853  851                                  return;

↓ open down ↓

617 lines elided

↑ open up ↑

 854  852                          }
 855  853                          scf_shutdown_callback(SIGSUBST_REBOOT);
 856  854                          break;
 857  855                  }
 858  856          }
 859  857  }
 860  858  
 861  859  /*ARGSUSED*/
 862  860  int
 863  861  plat_get_mem_unum(int synd_code, uint64_t flt_addr, int flt_bus_id,
 864      -        int flt_in_memory, ushort_t flt_status,
 865      -        char *buf, int buflen, int *lenp)
      862 +    int flt_in_memory, ushort_t flt_status, char *buf, int buflen, int *lenp)
 866  863  {
 867  864          /*
 868  865           * check if it's a Memory error.
 869  866           */
 870  867          if (flt_in_memory) {
 871  868                  if (opl_get_mem_unum != NULL) {
 872  869                          return (opl_get_mem_unum(synd_code, flt_addr, buf,
 873  870                              buflen, lenp));
 874  871                  } else {
 875  872                          return (ENOTSUP);

 876  873                  }
 877  874          } else {
 878  875                  return (ENOTSUP);
 879  876          }
 880  877  }
 881  878  
 882  879  /*ARGSUSED*/
 883  880  int
 884  881  plat_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
 885  882  {
 886  883          int     ret = 0;
 887  884          int     sb;
 888  885          int     plen;
 889  886  
 890  887          sb = opl_get_physical_board(LSB_ID(cpuid));
 891  888          if (sb == -1) {
 892  889                  return (ENXIO);
 893  890          }
 894  891  
 895  892          /*
 896  893           * opl_cur_model is assigned here
 897  894           */
 898  895          if (opl_cur_model == NULL) {
 899  896                  set_model_info();
 900  897  
 901  898                  /*
 902  899                   * if not matched, return
 903  900                   */
 904  901                  if (opl_cur_model == NULL)
 905  902                          return (ENODEV);
 906  903          }
 907  904  
 908  905          ASSERT((opl_cur_model - opl_models) == (opl_cur_model->model_type));
 909  906  
 910  907          switch (opl_cur_model->model_type) {
 911  908          case FF1:
 912  909                  plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_A",
 913  910                      CHIP_ID(cpuid) / 2);
 914  911                  break;
 915  912  
 916  913          case FF2:
 917  914                  plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_B",
 918  915                      (CHIP_ID(cpuid) / 2) + (sb * 2));
 919  916                  break;
 920  917  
 921  918          case DC1:
 922  919          case DC2:
 923  920          case DC3:
 924  921                  plen = snprintf(buf, buflen, "/%s%02d/CPUM%d", "CMU", sb,
 925  922                      CHIP_ID(cpuid));
 926  923                  break;
 927  924  
 928  925          case IKKAKU:
 929  926                  plen = snprintf(buf, buflen, "/%s", "MBU_A");
 930  927                  break;
 931  928  
 932  929          default:
 933  930                  /* This should never happen */
 934  931                  return (ENODEV);
 935  932          }
 936  933  
 937  934          if (plen >= buflen) {
 938  935                  ret = ENOSPC;
 939  936          } else {
 940  937                  if (lenp)
 941  938                          *lenp = strlen(buf);
 942  939          }
 943  940          return (ret);
 944  941  }
 945  942  
 946  943  void
 947  944  plat_nodename_set(void)
 948  945  {
 949  946          post_xscf_msg((char *)&utsname, sizeof (struct utsname));
 950  947  }
 951  948  
 952  949  caddr_t efcode_vaddr = NULL;
 953  950  
 954  951  /*
 955  952   * Preallocate enough memory for fcode claims.
 956  953   */
 957  954  
 958  955  caddr_t
 959  956  efcode_alloc(caddr_t alloc_base)
 960  957  {
 961  958          caddr_t efcode_alloc_base = (caddr_t)roundup((uintptr_t)alloc_base,
 962  959              MMU_PAGESIZE);
 963  960          caddr_t vaddr;
 964  961  
 965  962          /*
 966  963           * allocate the physical memory for the Oberon fcode.
 967  964           */
 968  965          if ((vaddr = (caddr_t)BOP_ALLOC(bootops, efcode_alloc_base,
 969  966              efcode_size, MMU_PAGESIZE)) == NULL)
 970  967                  cmn_err(CE_PANIC, "Cannot allocate Efcode Memory");
 971  968  
 972  969          efcode_vaddr = vaddr;
 973  970  
 974  971          return (efcode_alloc_base + efcode_size);
 975  972  }
 976  973  
 977  974  caddr_t
 978  975  plat_startup_memlist(caddr_t alloc_base)
 979  976  {
 980  977          caddr_t tmp_alloc_base;
 981  978  
 982  979          tmp_alloc_base = efcode_alloc(alloc_base);
 983  980          tmp_alloc_base =
 984  981              (caddr_t)roundup((uintptr_t)tmp_alloc_base, ecache_alignsize);
 985  982          return (tmp_alloc_base);
 986  983  }
 987  984  
 988  985  /* need to forward declare these */
 989  986  static void plat_lock_delay(uint_t);
 990  987  
 991  988  void
 992  989  startup_platform(void)
 993  990  {
 994  991          if (clock_tick_threshold == 0)
 995  992                  clock_tick_threshold = OPL_CLOCK_TICK_THRESHOLD;
 996  993          if (clock_tick_ncpus == 0)
 997  994                  clock_tick_ncpus = OPL_CLOCK_TICK_NCPUS;
 998  995          mutex_lock_delay = plat_lock_delay;
 999  996          mutex_cap_factor = OPL_BOFF_MAX_SCALE;
1000  997  }
1001  998  
1002  999  static uint_t
1003 1000  get_mmu_id(processorid_t cpuid)
1004 1001  {
1005 1002          int pb = opl_get_physical_board(LSB_ID(cpuid));
1006 1003  
1007 1004          if (pb == -1) {
1008 1005                  cmn_err(CE_PANIC,
1009 1006                      "opl_get_physical_board failed (cpu %d LSB %u)",
1010 1007                      cpuid, LSB_ID(cpuid));
1011 1008          }
1012 1009          return (pb * OPL_MAX_COREID_PER_BOARD) + (CHIP_ID(cpuid) *
1013 1010              OPL_MAX_COREID_PER_CMP) + CORE_ID(cpuid);
1014 1011  }
1015 1012  
1016 1013  void
1017 1014  plat_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *info)
1018 1015  {
1019 1016          int     impl;
1020 1017  
1021 1018          impl = cpunodes[cpuid].implementation;
1022 1019          if (IS_OLYMPUS_C(impl) || IS_JUPITER(impl)) {
1023 1020                  info->mmu_idx = get_mmu_id(cpuid);
1024 1021                  info->mmu_nctxs = 8192;
1025 1022          } else {
1026 1023                  cmn_err(CE_PANIC, "Unknown processor %d", impl);
1027 1024          }
1028 1025  }
1029 1026  
1030 1027  int
1031 1028  plat_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
1032 1029  {
1033 1030          if (opl_get_mem_sid == NULL) {
1034 1031                  return (ENOTSUP);
1035 1032          }
1036 1033          return (opl_get_mem_sid(unum, buf, buflen, lenp));
1037 1034  }
1038 1035  
1039 1036  int
1040 1037  plat_get_mem_offset(uint64_t paddr, uint64_t *offp)
1041 1038  {
1042 1039          if (opl_get_mem_offset == NULL) {
1043 1040                  return (ENOTSUP);
1044 1041          }
1045 1042          return (opl_get_mem_offset(paddr, offp));
1046 1043  }
1047 1044  
1048 1045  int
1049 1046  plat_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
1050 1047  {
1051 1048          if (opl_get_mem_addr == NULL) {
1052 1049                  return (ENOTSUP);
1053 1050          }
1054 1051          return (opl_get_mem_addr(unum, sid, offset, addrp));
1055 1052  }
1056 1053  
1057 1054  void
1058 1055  plat_lock_delay(uint_t backoff)
1059 1056  {
1060 1057          int i;
1061 1058          uint_t cnt, remcnt;
1062 1059          int ctr;
1063 1060          hrtime_t delay_start, rem_delay;
1064 1061          /*
1065 1062           * Platform specific lock delay code for OPL
1066 1063           *
1067 1064           * Using staged linear increases in the delay.
1068 1065           * The sleep instruction is the preferred method of delay,
1069 1066           * but is too large of granularity for the initial backoff.
1070 1067           */
1071 1068  
1072 1069          if (backoff < 100) {
1073 1070                  /*
1074 1071                   * If desired backoff is long enough,
1075 1072                   * use sleep for most of it
1076 1073                   */
1077 1074                  for (cnt = backoff;
1078 1075                      cnt >= OPL_BOFF_SLEEP;
1079 1076                      cnt -= OPL_BOFF_SLEEP) {
1080 1077                          cpu_smt_pause();
1081 1078                  }
1082 1079                  /*
1083 1080                   * spin for small remainder of backoff
1084 1081                   */
1085 1082                  for (ctr = cnt * OPL_BOFF_SPIN; ctr; ctr--) {
1086 1083                          mutex_delay_default();
1087 1084                  }
1088 1085          } else {
1089 1086                  /* backoff is large.  Fill it by sleeping */
1090 1087                  delay_start = gethrtime_waitfree();
1091 1088                  cnt = backoff / OPL_BOFF_SLEEP;
1092 1089                  /*
1093 1090                   * use sleep instructions for delay
1094 1091                   */
1095 1092                  for (i = 0; i < cnt; i++) {
1096 1093                          cpu_smt_pause();
1097 1094                  }
1098 1095  
1099 1096                  /*
1100 1097                   * Note: if the other strand executes a sleep instruction,
1101 1098                   * then the sleep ends immediately with a minimum time of
1102 1099                   * 42 clocks.  We check gethrtime to insure we have
1103 1100                   * waited long enough.  And we include both a short
1104 1101                   * spin loop and a sleep for repeated delay times.
1105 1102                   */
1106 1103  
1107 1104                  rem_delay = gethrtime_waitfree() - delay_start;
1108 1105                  while (rem_delay < cnt * OPL_BOFF_TM) {
1109 1106                          remcnt = cnt - (rem_delay / OPL_BOFF_TM);
1110 1107                          for (i = 0; i < remcnt; i++) {
1111 1108                                  cpu_smt_pause();
1112 1109                                  for (ctr = OPL_BOFF_SPIN; ctr; ctr--) {
1113 1110                                          mutex_delay_default();
1114 1111                                  }
1115 1112                          }
1116 1113                          rem_delay = gethrtime_waitfree() - delay_start;
1117 1114                  }
1118 1115          }
1119 1116  }
1120 1117  
1121 1118  /*
1122 1119   * The following code implements asynchronous call to XSCF to setup the
1123 1120   * domain node name.
1124 1121   */
1125 1122  
1126 1123  #define FREE_MSG(m)             kmem_free((m), NM_LEN((m)->len))
1127 1124  
1128 1125  /*
1129 1126   * The following three macros define the all operations on the request
1130 1127   * list we are using here, and hide the details of the list
1131 1128   * implementation from the code.
1132 1129   */
1133 1130  #define PUSH(m) \
1134 1131          { \
1135 1132                  (m)->next = ctl_msg.head; \
1136 1133                  (m)->prev = NULL; \
1137 1134                  if ((m)->next != NULL) \
1138 1135                          (m)->next->prev = (m); \
1139 1136                  ctl_msg.head = (m); \
1140 1137          }
1141 1138  
1142 1139  #define REMOVE(m) \
1143 1140          { \
1144 1141                  if ((m)->prev != NULL) \
1145 1142                          (m)->prev->next = (m)->next; \
1146 1143                  else \
1147 1144                          ctl_msg.head = (m)->next; \
1148 1145                  if ((m)->next != NULL) \
1149 1146                          (m)->next->prev = (m)->prev; \
1150 1147          }
1151 1148  
1152 1149  #define FREE_THE_TAIL(head) \
1153 1150          { \
1154 1151                  nm_msg_t *n_msg, *m; \
1155 1152                  m = (head)->next; \
1156 1153                  (head)->next = NULL; \
1157 1154                  while (m != NULL) { \
1158 1155                          n_msg = m->next; \
1159 1156                          FREE_MSG(m); \
1160 1157                          m = n_msg; \
1161 1158                  } \
1162 1159          }
1163 1160  
1164 1161  #define SCF_PUTINFO(f, s, p) \
1165 1162          f(KEY_ESCF, 0x01, 0, s, p)
1166 1163  
1167 1164  #define PASS2XSCF(m, r) ((r = SCF_PUTINFO(ctl_msg.scf_service_function, \
1168 1165                                              (m)->len, (m)->data)) == 0)
1169 1166  
1170 1167  /*
1171 1168   * The value of the following macro loosely depends on the
1172 1169   * value of the "device busy" timeout used in the SCF driver.
1173 1170   * (See pass2xscf_thread()).
1174 1171   */
1175 1172  #define SCF_DEVBUSY_DELAY       10
1176 1173  
1177 1174  /*
1178 1175   * The default number of attempts to contact the scf driver
1179 1176   * if we cannot fetch any information about the timeout value
1180 1177   * it uses.
1181 1178   */
1182 1179  
1183 1180  #define REPEATS         4
1184 1181  
1185 1182  typedef struct nm_msg {
1186 1183          struct nm_msg *next;
1187 1184          struct nm_msg *prev;
1188 1185          int len;
1189 1186          char data[1];
1190 1187  } nm_msg_t;
1191 1188  
1192 1189  #define NM_LEN(len)             (sizeof (nm_msg_t) + (len) - 1)
1193 1190  
1194 1191  static struct ctlmsg {
1195 1192          nm_msg_t        *head;
1196 1193          nm_msg_t        *now_serving;
1197 1194          kmutex_t        nm_lock;
1198 1195          kthread_t       *nmt;
1199 1196          int             cnt;
1200 1197          int (*scf_service_function)(uint32_t, uint8_t,
1201 1198                                      uint32_t, uint32_t, void *);
1202 1199  } ctl_msg;
1203 1200  
1204 1201  static void
1205 1202  post_xscf_msg(char *dp, int len)
1206 1203  {
1207 1204          nm_msg_t *msg;
1208 1205  
1209 1206          msg = (nm_msg_t *)kmem_zalloc(NM_LEN(len), KM_SLEEP);
1210 1207  
1211 1208          bcopy(dp, msg->data, len);
1212 1209          msg->len = len;
1213 1210  
1214 1211          mutex_enter(&ctl_msg.nm_lock);
1215 1212          if (ctl_msg.nmt == NULL) {
1216 1213                  ctl_msg.nmt =  thread_create(NULL, 0, pass2xscf_thread,
1217 1214                      NULL, 0, &p0, TS_RUN, minclsyspri);
1218 1215          }
1219 1216  
1220 1217          PUSH(msg);
1221 1218          ctl_msg.cnt++;
1222 1219          mutex_exit(&ctl_msg.nm_lock);
1223 1220  }
1224 1221  
1225 1222  static void
1226 1223  pass2xscf_thread()
1227 1224  {
1228 1225          nm_msg_t *msg;
1229 1226          int ret;
1230 1227          uint_t i, msg_sent, xscf_driver_delay;
1231 1228          static uint_t repeat_cnt;
1232 1229          uint_t *scf_wait_cnt;
1233 1230  
1234 1231          mutex_enter(&ctl_msg.nm_lock);
1235 1232  
1236 1233          /*
1237 1234           * Find the address of the SCF put routine if it's not done yet.
1238 1235           */
1239 1236          if (ctl_msg.scf_service_function == NULL) {
1240 1237                  if ((ctl_msg.scf_service_function =
1241 1238                      (int (*)(uint32_t, uint8_t, uint32_t, uint32_t, void *))
1242 1239                      modgetsymvalue("scf_service_putinfo", 0)) == NULL) {
1243 1240                          cmn_err(CE_NOTE, "pass2xscf_thread: "
1244 1241                              "scf_service_putinfo not found\n");
1245 1242                          ctl_msg.nmt = NULL;
1246 1243                          mutex_exit(&ctl_msg.nm_lock);
1247 1244                          return;
1248 1245                  }
1249 1246          }
1250 1247  
1251 1248          /*
1252 1249           * Calculate the number of attempts to connect XSCF based on the
1253 1250           * scf driver delay (which is
1254 1251           * SCF_DEVBUSY_DELAY*scf_online_wait_rcnt seconds) and the value
1255 1252           * of xscf_connect_delay (the total number of seconds to wait
1256 1253           * till xscf get ready.)
1257 1254           */
1258 1255          if (repeat_cnt == 0) {
1259 1256                  if ((scf_wait_cnt =
1260 1257                      (uint_t *)
1261 1258                      modgetsymvalue("scf_online_wait_rcnt", 0)) == NULL) {
1262 1259                          repeat_cnt = REPEATS;
1263 1260                  } else {
1264 1261  
1265 1262                          xscf_driver_delay = *scf_wait_cnt *
1266 1263                              SCF_DEVBUSY_DELAY;
1267 1264                          repeat_cnt = (xscf_connect_delay/xscf_driver_delay) + 1;
1268 1265                  }
1269 1266          }
1270 1267  
1271 1268          while (ctl_msg.cnt != 0) {
1272 1269  
1273 1270                  /*
1274 1271                   * Take the very last request from the queue,
1275 1272                   */
1276 1273                  ctl_msg.now_serving = ctl_msg.head;
1277 1274                  ASSERT(ctl_msg.now_serving != NULL);
1278 1275  
1279 1276                  /*
1280 1277                   * and discard all the others if any.
1281 1278                   */
1282 1279                  FREE_THE_TAIL(ctl_msg.now_serving);
1283 1280                  ctl_msg.cnt = 1;
1284 1281                  mutex_exit(&ctl_msg.nm_lock);
1285 1282  
1286 1283                  /*
1287 1284                   * Pass the name to XSCF. Note please, we do not hold the
1288 1285                   * mutex while we are doing this.
1289 1286                   */
1290 1287                  msg_sent = 0;
1291 1288                  for (i = 0; i < repeat_cnt; i++) {
1292 1289                          if (PASS2XSCF(ctl_msg.now_serving, ret)) {
1293 1290                                  msg_sent = 1;
1294 1291                                  break;
1295 1292                          } else {
1296 1293                                  if (ret != EBUSY) {
1297 1294                                          cmn_err(CE_NOTE, "pass2xscf_thread:"
1298 1295                                              " unexpected return code"
1299 1296                                              " from scf_service_putinfo():"
1300 1297                                              " %d\n", ret);
1301 1298                                  }
1302 1299                          }
1303 1300                  }
1304 1301  
1305 1302                  if (msg_sent) {
1306 1303  
1307 1304                          /*
1308 1305                           * Remove the request from the list
1309 1306                           */
1310 1307                          mutex_enter(&ctl_msg.nm_lock);
1311 1308                          msg = ctl_msg.now_serving;
1312 1309                          ctl_msg.now_serving = NULL;
1313 1310                          REMOVE(msg);
1314 1311                          ctl_msg.cnt--;
1315 1312                          mutex_exit(&ctl_msg.nm_lock);
1316 1313                          FREE_MSG(msg);
1317 1314                  } else {
1318 1315  
1319 1316                          /*
1320 1317                           * If while we have tried to communicate with
1321 1318                           * XSCF there were any other requests we are
1322 1319                           * going to drop this one and take the latest
1323 1320                           * one.  Otherwise we will try to pass this one
1324 1321                           * again.
1325 1322                           */
1326 1323                          cmn_err(CE_NOTE,
1327 1324                              "pass2xscf_thread: "
1328 1325                              "scf_service_putinfo "
1329 1326                              "not responding\n");
1330 1327                  }
1331 1328                  mutex_enter(&ctl_msg.nm_lock);
1332 1329          }
1333 1330  
1334 1331          /*
1335 1332           * The request queue is empty, exit.
1336 1333           */
1337 1334          ctl_msg.nmt = NULL;
1338 1335          mutex_exit(&ctl_msg.nm_lock);
1339 1336  }

↓ open down ↓

464 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX