1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2013 David Hoeppner.  All rights reserved.
  14  */
  15 
  16 /*
  17  * Interrupt Load Balancer.
  18  *
  19  * The interrupt load balancer reassigns interrupts from one cpu
  20  * to another, if the interrupt load
  21  */
  22 
  23 #include <sys/param.h>
  24 #include <sys/types.h>
  25 #include <sys/systm.h>
  26 #include <sys/callb.h>
  27 #include <sys/cpuvar.h>
  28 #include <sys/proc.h>
  29 #include <sys/processor.h>
  30 #include <sys/sdt.h>
  31 #include <sys/sysmacros.h>
  32 #include <sys/time.h>
  33 #include <sys/cmn_err.h>
  34 #include <sys/zone.h>
  35 #include <sys/lgrp.h>
  36 
  37 extern  proc_t  *proc_intrd;
  38 
  39 #define IB_NAME                 "intrd"
  40 
  41 /*
  42  * Various sleeptimes.
  43  */
  44 #define IB_NORMAL_SLEEPTIME     10
  45 #define IB_IDLE_SLEEPTIME       45
  46 #define IB_ONECPU_SLEEPTIME     (60 * 15)
  47 
  48 static kmutex_t         ib_lock;
  49 static kcondvar_t       ib_cv;
  50 
  51 /*
  52  * System tuneable.
  53  *
  54  * Exclude interrupts in this list.
  55  */
  56 static char *ib_exclude = NULL;
  57 
  58 typedef struct _ib_exclude {
  59         list_node_t     *ix_next;
  60         processorid_t   ix_cpu_id;
  61 } ib_exclude_list_t;
  62 
  63 static list_t   ib_exclude_list;
  64 
  65 
  66 
  67 /*
  68  * CPU structure.
  69  */
  70 typedef struct _ib_cpu {
  71         list_node_t     ic_next;        /* link */
  72         processorid_t   ic_cpu_id;      /* processor id */
  73         boolean_t       ic_offline;     /* CPU is offline */
  74         list_t          ic_ivec_list;   /* list of interrupt vectors */
  75         uint32_t        ic_num_ivecs;   /* number of interrupt vectors */
  76         hrtime_t        ic_tot;         /* total time */
  77         int64_t         ic_intrs;       /* number of interrupts */
  78         int             ic_intr_load;   /* interrupts / total time */
  79         int64_t         ic_big_intrs;
  80         int64_t         ic_bigintr;     /* largest interrupt on cpu */
  81         lgrp_t          *ic_lgrp;       /* locality group of this cpu */
  82 } ib_cpu_t;
  83 
  84 /*
  85  * Interrupt vector.
  86  */
  87 typedef struct _ib_ivec {
  88         list_node_t     ii_next;        /* link */
  89         uint64_t        ii_ihs;
  90         uint64_t        ii_ino;
  91         uint64_t        ii_num_ino;
  92         uint64_t        ii_pil;
  93         uint64_t        ii_time;
  94         char            *ii_buspath;
  95         char            *ii_name;
  96         processorid_t   ii_orig_cpu;    /* current CPU */
  97         processorid_t   ii_now_cpu;     /* new to be assigned CPU */
  98         uint64_t        ii_inum;
  99         boolean_t       ii_goal;
 100 } ib_ivec_t;
 101 
 102 /*
 103  * MSI.
 104  */
 105 typedef struct _ib_msi {
 106         list_node_t     im_next;        /* link */
 107         const char      *im_name;
 108         list_t          im_ino_list;
 109 } ib_msi_t;
 110 
 111 typedef struct _ib_msi_ino {
 112         list_node_t     imi_next;       /* link */
 113         uint64_t        imi_ino;
 114         ib_ivec_t       *imi_ivec;
 115 } ib_msi_ino_t;
 116 
 117 /*
 118  * Snapshot.
 119  */
 120 typedef struct _ib_snapshot {
 121         list_node_t     is_next;        /* link */
 122         list_t          is_cpu_list;
 123         processorid_t   is_num_cpus;
 124 } ib_snapshot_t;
 125 
 126 /*
 127  * Snapshot delta structure.
 128  */
 129 typedef struct _ib_delta {
 130         list_node_t     id_next;        /* link */
 131         list_t          id_cpu_list;
 132         boolean_t       id_missing;
 133         int             id_avgintrload; /* interrupts / total time */
 134         uint64_t        id_avgintrnsec;
 135         int             id_goodness;
 136 } ib_delta_t;
 137 
 138 static list_t   ib_cpu_list;            /* List of all OS CPUs */
 139 
 140 static long     ib_sleeptime = IB_NORMAL_SLEEPTIME;
 141 static processorid_t    ib_num_cpus;
 142 
 143 static int      goodness_unsafe_load = 90;
 144 static int      goodness_mindelta = 10;
 145 
 146 /*
 147  * Function prototypes.
 148  */
 149 static  void    ib_cpu_register(processorid_t);
 150 static  int     ib_cpu_setup(cpu_setup_t, int, void *);
 151 static  boolean_t       ib_cpu_exclude(processorid_t);
 152 static  ib_cpu_t        *ib_cpu_create(void);
 153 static  ib_cpu_t        *ib_cpu_find(list_t, processorid_t);
 154 static  void    ib_cpu_destroy(ib_cpu_t *);
 155 
 156 static  int     ib_goodness(ib_delta_t *);
 157 static  int     ib_do_reconfig(ib_delta_t *);
 158 static  int     ib_imbalanced(int, int);
 159 static  int     ib_interrupt_do_move(ib_ivec_t *, processorid_t);
 160 static  void    ib_interrupt_move_check(ib_delta_t *, processorid_t, processorid_t);
 161 
 162 static  ib_snapshot_t   *ib_get_statistics(void);
 163 static  ib_delta_t      *ib_delta_generate(ib_snapshot_t *, ib_snapshot_t *);
 164 
 165 /*
 166  * Helper macros.
 167  */
 168 #define IS_CPU(cpu_id)          (cpu[cpu_id] != NULL)
 169 
 170 #define FOREACH_CPU(icpu, icpu_list)                            \
 171         for (icpu = list_head(&icpu_list); icpu != NULL;    \
 172             icpu = list_next(&icpu_list, icpu))
 173 
 174 #define FOREACH_IVEC(ivec, ivec_list)                           \
 175         for (ivec = list_head(&ivec_list); ivec != NULL;    \
 176             ivec = list_next(&ivec_list, ivec))
 177 
 178 #define DTRACE_INTRD(name)      \
 179         DTRACE_PROBE(__intrd_##name)
 180 
 181 #define DEBUG   1
 182 #ifdef  DEBUG
 183 #define IB_APIDBG(args)         cmn_err args
 184 #define IB_IMPLDBG(args)        cmn_err args
 185 #else
 186 #define IB_APIDBG(args)
 187 #define IB_IMPLDBG(args)
 188 #endif
 189 
 190 #define IB_LOG(args)            cmn_err args
 191 
 192 void
 193 interrupt_balancer(void)
 194 {
 195         processorid_t   cpu_id;
 196         callb_cpr_t     cpr;
 197         user_t          *u = PTOU(curproc);
 198         int             error;
 199 
 200         boolean_t       do_reconfig = B_FALSE;
 201         int             goodness;
 202         int             baseline_goodness = 0;
 203         list_t          ib_delta_list;
 204         hrtime_t        statslen = 60;
 205 
 206         proc_intrd = ttoproc(curthread);
 207         proc_intrd->p_cstime = proc_intrd->p_stime = 0;
 208         proc_intrd->p_cutime = proc_intrd->p_utime = 0;
 209 
 210         (void) strncpy(u->u_psargs, IB_NAME, sizeof(u->u_psargs));
 211         (void) strncpy(u->u_comm, IB_NAME, sizeof(u->u_comm));
 212 
 213         /* Initialize global mutex lock */
 214         mutex_init(&ib_lock, NULL, MUTEX_DEFAULT, NULL);
 215 
 216         /* Initialize CPU list */
 217         list_create(&ib_cpu_list, sizeof (ib_cpu_t),
 218             offsetof(ib_cpu_t, ic_next));
 219 
 220         /* Initialize delta list */
 221         list_create(&ib_delta_list, sizeof (ib_delta_t),
 222             offsetof(ib_delta_t, id_next));
 223 
 224         /* Initialize interrupt exclude list */
 225         list_create(&ib_exclude_list, sizeof (ib_exclude_list_t),
 226            offsetof(ib_exclude_list_t, ix_next));
 227 
 228         /*
 229          * Parse list of interrupts to exclude.
 230          *
 231          * XXX: move interrupts to active processors.
 232          */
 233         if (ib_exclude != NULL) {
 234                 processorid_t   rval;
 235 
 236                 IB_LOG((CE_CONT, "XXX %s XXX", ib_exclude));
 237         }
 238 
 239         /*
 240          * Build a list of all CPUs available for interrupt handling.
 241          */
 242         for (cpu_id = 0; cpu_id <= max_cpu_seqid_ever; cpu_id++) {
 243                 if (IS_CPU(cpu_id))
 244                         ib_cpu_register(cpu_id);
 245         }
 246 
 247         /*
 248          * Locality group information.
 249          */
 250         int     i;
 251         for (i = 0; i < lgrp_plat_max_lgrps(); i++) {
 252                 lgrp_t  *lgrp;
 253 
 254                 lgrp = lgrp_table[i];
 255                 if (lgrp->lgrp_cpu != NULL) {
 256                         cpu_t   *lgrp_cpu;
 257 
 258                         for (lgrp_cpu = lgrp->lgrp_cpu; lgrp_cpu != NULL;
 259                             lgrp_cpu =lgrp_cpu->cpu_next_lgrp) {
 260                                 ib_cpu_t        *icpu;
 261 
 262                                 icpu = ib_cpu_find(ib_cpu_list, lgrp_cpu->cpu_id);
 263 
 264                                 /*
 265                                  * Assign locality group if we found a CPU.
 266                                  */
 267                                 if (icpu != NULL)
 268                                         icpu->ic_lgrp = lgrp;
 269                         }
 270                 }
 271         }
 272 
 273         /*
 274          * Register a callback if a CPU goes offline or comes online.
 275          */
 276         mutex_enter(&cpu_lock);
 277         register_cpu_setup_func(ib_cpu_setup, NULL);
 278         mutex_exit(&cpu_lock);
 279 
 280         CALLB_CPR_INIT(&cpr, &ib_lock, callb_generic_cpr, IB_NAME);
 281 
 282         ib_snapshot_t   *snapshot = NULL;
 283         ib_snapshot_t   *new_snapshot = NULL;
 284         hrtime_t        delta_time;
 285         hrtime_t        deltas_tottime = 0;
 286         boolean_t       below_statslen;
 287 
 288         snapshot = ib_get_statistics();
 289 
 290         mutex_enter(&ib_lock);
 291         for (;;) {
 292                 ib_delta_t      *delta;
 293 
 294                 DTRACE_INTRD(get_stats);
 295                 new_snapshot = ib_get_statistics();
 296 
 297                 delta = ib_delta_generate(snapshot, new_snapshot);
 298 
 299                 below_statslen = (deltas_tottime < statslen);
 300                 deltas_tottime += delta_time;
 301                 do_reconfig = (below_statslen && deltas_tottime >= statslen);
 302 
 303                 list_insert_tail(&ib_delta_list, delta);
 304 
 305                 /*
 306                  * Calculate the goodness of the current configuration.
 307                  */
 308                 goodness = ib_goodness(delta);
 309 
 310                 if (ib_imbalanced(goodness, baseline_goodness))
 311                         do_reconfig = B_TRUE;
 312 
 313                 /*
 314                  * Reconfigure interrupt distribution.
 315                  */
 316                 if (do_reconfig) {
 317                         error = ib_do_reconfig(delta);
 318 
 319                         if (error != 0) {
 320                                 if (error == -1)
 321                                         IB_LOG((CE_CONT, "ib_do_reconfig failed!"));
 322                         } else {
 323                                 IB_LOG((CE_CONT, "setting new baseline of %d", goodness));
 324                                 baseline_goodness = goodness;
 325                         }
 326                 }
 327 
 328                 /*
 329                  * Wait for timeout or CPU reconfiguration.
 330                  */
 331                 CALLB_CPR_SAFE_BEGIN(&cpr);
 332                 cv_timedwait(&ib_cv, &ib_lock, ddi_get_lbolt() +
 333                     SEC_TO_TICK(ib_sleeptime));
 334                 CALLB_CPR_SAFE_END(&cpr, &ib_lock);
 335         }
 336 
 337         CALLB_CPR_EXIT(&cpr);
 338 
 339         /*
 340          * Unregister CPU callback.
 341          */
 342         mutex_enter(&cpu_lock);
 343         unregister_cpu_setup_func(ib_cpu_setup, NULL);
 344         mutex_exit(&cpu_lock);
 345 
 346         list_destroy(&ib_exclude_list);
 347         list_destroy(&ib_delta_list);
 348         list_destroy(&ib_cpu_list);
 349 
 350 }
 351 
 352 /*
 353  * Register a new CPU in the global list of CPUs.
 354  */
 355 static void
 356 ib_cpu_register(processorid_t cpu_id)
 357 {
 358         cpu_t           *cp = cpu[cpu_id];
 359         ib_cpu_t        *new_cpu;
 360 
 361         /*
 362          * Is this CPU baned from interrupt handling?
 363          */
 364         if (ib_cpu_exclude(cpu_id))
 365                 return;
 366 
 367         new_cpu = ib_cpu_create();
 368         new_cpu->ic_cpu_id = cpu_id;
 369 
 370         /* Initialize list for interrupt vectors */
 371         list_create(&new_cpu->ic_ivec_list, sizeof (ib_ivec_t),
 372             offsetof(ib_ivec_t, ii_next));
 373 
 374         list_link_init(&new_cpu->ic_next);
 375 
 376         /* Check if this CPU can handle interrupts */
 377         mutex_enter(&cpu_lock);
 378         if (cpu_is_nointr(cp))
 379                 new_cpu->ic_offline = B_TRUE;
 380         mutex_exit(&cpu_lock);
 381 
 382         /* Add CPU to list of CPUs */
 383         list_insert_tail(&ib_cpu_list, new_cpu);
 384 
 385         ib_num_cpus++;
 386 
 387         IB_IMPLDBG((CE_CONT, "ib_cpu_register: cpu=0x%x", cpu_id));
 388 }
 389 
 390 /*
 391  * Unregister CPU from the global list of CPUs.
 392  */
 393 static void
 394 ib_cpu_unregister(processorid_t cpu_id)
 395 {
 396         ib_cpu_t        *icpu;
 397 
 398         mutex_enter(&ib_lock);
 399         FOREACH_CPU(icpu, ib_cpu_list) {
 400                 if (icpu->ic_cpu_id == cpu_id) {
 401                         /* Remove CPU from global list */
 402                         list_remove(&ib_cpu_list, icpu);
 403 
 404                         /* Free CPU structure */
 405                         ib_cpu_destroy(icpu);
 406 
 407                         /* XXX or just offline CPU; statistics? */
 408                         break;
 409                 }
 410         }
 411         mutex_exit(&ib_lock);
 412 
 413         ib_num_cpus--;
 414 
 415         IB_IMPLDBG((CE_CONT, "ib_cpu_unregister: cpu=0x%x",
 416             cpu_id));
 417 }
 418 
 419 /*
 420  * Hook for CPU changes.
 421  */
 422 static int
 423 ib_cpu_setup(cpu_setup_t what, int cpu_id, void *arg)
 424 {
 425 
 426         switch (what) {
 427         case CPU_UNCONFIG:
 428         case CPU_CPUPART_OUT:
 429         case CPU_OFF:
 430                 ib_cpu_unregister(cpu_id);
 431                 cv_signal(&ib_cv);
 432                 break;
 433 
 434         case CPU_INTR_ON:
 435                 ib_cpu_register(cpu_id);
 436                 cv_signal(&ib_cv);
 437                 break;
 438 
 439         default:
 440                 break;
 441         }
 442 
 443         return (0);
 444 }
 445 
 446 static ib_cpu_t *
 447 ib_cpu_create(void)
 448 {
 449         ib_cpu_t        *new_cpu;
 450 
 451         new_cpu = kmem_alloc(sizeof (ib_cpu_t), KM_SLEEP);
 452         new_cpu->ic_offline = B_FALSE;
 453 
 454         return (new_cpu);
 455 }
 456 
 457 static void
 458 ib_cpu_destroy(ib_cpu_t *old_cpu)
 459 {
 460         ib_ivec_t       *ivec;
 461 
 462         FOREACH_IVEC(ivec, old_cpu->ic_ivec_list) {
 463                 kmem_free(ivec, sizeof (ib_ivec_t));
 464         }
 465 
 466         kmem_free(old_cpu, sizeof (ib_cpu_t));
 467 }
 468 
 469 /*
 470  * Find a CPU in the global list of CPUs by processor id.
 471  */
 472 static ib_cpu_t *
 473 ib_cpu_find(list_t cpu_list, processorid_t cpu_id)
 474 {
 475         ib_cpu_t        *icpu;
 476 
 477         IB_APIDBG((CE_CONT, "ib_cpu_find: API cpu = %d", cpu_id));
 478 
 479         FOREACH_CPU(icpu, cpu_list) {
 480                 if (icpu->ic_cpu_id == cpu_id)
 481                         return (icpu);
 482         }
 483 
 484         return (NULL);
 485 }
 486 
 487 /*
 488  * Find a interrupt vector for a specific CPU.
 489  */
 490 static ib_ivec_t *
 491 ib_cpu_find_ivec(list_t cpu_list, processorid_t cpu_id, char *buspath,
 492     uint64_t ino)
 493 {
 494         ib_cpu_t        *icpu;
 495         ib_ivec_t       *ivec;
 496 
 497         icpu = ib_cpu_find(cpu_list, cpu_id);
 498         if (icpu == NULL)
 499                 return (NULL);
 500 
 501         for (ivec = list_head(&icpu->ic_ivec_list); ivec != NULL;
 502             ivec = list_next(&icpu->ic_ivec_list, ivec)) {
 503                 if (ivec->ii_ino == ino)
 504                         return (ivec);
 505         }
 506 
 507         return (NULL);
 508 }
 509 
 510 /*
 511  * Search exclude lists.
 512  */
 513 static boolean_t
 514 ib_cpu_exclude(processorid_t cpu_id)
 515 {
 516         ib_exclude_list_t       *excluded_cpu;
 517 
 518         /*
 519          * Search global list of CPUs excluded from interrupt handling.
 520          */
 521         for (excluded_cpu = list_head(&ib_exclude_list); excluded_cpu != NULL;
 522             excluded_cpu = list_next(&ib_exclude_list, excluded_cpu)) {
 523                 if (excluded_cpu->ix_cpu_id == cpu_id)
 524                         return (B_TRUE);
 525         }
 526 
 527         return (B_FALSE);
 528 }
 529 
 530 /*
 531  * Total times spend.
 532  */
 533 static void
 534 ib_cpu_statistics(ib_cpu_t *icpu)
 535 {
 536         cpu_t           *cp;
 537         hrtime_t        msnsecs[NCMSTATES];
 538         hrtime_t        new_tot;
 539 
 540         cp = cpu[icpu->ic_cpu_id];
 541         get_cpu_mstate(cp, msnsecs);
 542 
 543         icpu->ic_tot = msnsecs[CMS_IDLE] + msnsecs[CMS_USER] +
 544             msnsecs[CMS_SYSTEM];
 545 
 546 }
 547 
 548 /*
 549  * Create a new interrupt vector.
 550  */
 551 static ib_ivec_t *
 552 ib_ivec_create(const char *buspath, uint64_t ino)
 553 {
 554         ib_ivec_t       *ivec;
 555 
 556         ivec = (ib_ivec_t *)kmem_alloc(sizeof (ib_ivec_t), KM_SLEEP);
 557         
 558         list_link_init(&ivec->ii_next);
 559 
 560         ivec->ii_buspath = (char *)buspath;  /* XXX: strdup */
 561         ivec->ii_ino = ino;
 562         ivec->ii_ihs = 1;
 563 
 564         return (ivec);
 565 }
 566 
 567 static void
 568 ib_ivec_register(ib_cpu_t *icpu)
 569 {
 570 }
 571 
 572 /*
 573  * Find interrupt vector by ino.
 574  */
 575 static ib_ivec_t *
 576 ib_ivec_find_ino(list_t ivec_list, uint64_t ino)
 577 {
 578         ib_ivec_t       *ivec;
 579 
 580         FOREACH_IVEC(ivec, ivec_list) {
 581                 if (ivec->ii_inum == ino)
 582                         return (ivec);
 583         }
 584 
 585         return (NULL);
 586 }
 587 
 588 /*
 589  * Delete a interrupt vector from a list.
 590  */
 591 static void
 592 ib_ivec_delete_ino(list_t ivec_list, uint64_t ino)
 593 {
 594         ib_ivec_t       *ivec;
 595 
 596         FOREACH_IVEC(ivec, ivec_list) {
 597                 if (ivec->ii_inum == ino) {
 598                         /* XXX: remove from list */
 599                         ;
 600                 }
 601         }
 602 }
 603 
 604 /*
 605  * Add a new interrupt vector to a list.
 606  */
 607 static void
 608 ib_ivec_add_ino(list_t ivec_list, ib_ivec_t *ivec)
 609 {
 610         list_insert_tail(&ivec_list, ivec);
 611 }
 612 
 613 static ib_msi_t *
 614 ib_msi_create(const char *name)
 615 {
 616         ib_msi_t        *msi;
 617 
 618         msi = (ib_msi_t *)kmem_alloc(sizeof (ib_msi_t), KM_SLEEP);
 619 
 620         msi->im_name = name;
 621 
 622         list_link_init(&msi->im_next);
 623         list_create(&msi->im_ino_list, sizeof (ib_msi_ino_t),
 624             offsetof(ib_msi_ino_t, imi_next));
 625 
 626         return (msi);
 627 }
 628 
 629 /*
 630  * Allocate and initialize a new snapshot structure.
 631  */
 632 static ib_snapshot_t *
 633 ib_snapshot_create(void)
 634 {
 635         ib_snapshot_t   *snapshot;
 636 
 637         snapshot = kmem_alloc(sizeof (ib_snapshot_t), KM_SLEEP);
 638 
 639         /* init link */
 640 
 641         /* Initialize CPU list */
 642         list_create(&snapshot->is_cpu_list, sizeof (ib_cpu_t),
 643             offsetof(ib_cpu_t, ic_next));
 644 
 645         snapshot->is_num_cpus = 0;
 646 
 647         return (snapshot);
 648 }
 649 
 650 /*
 651  * Destroy a snapshot.
 652  */
 653 static void
 654 ib_snapshot_destroy(ib_snapshot_t *snapshot)
 655 {
 656         ib_cpu_t        *icpu;
 657 
 658         FOREACH_CPU(icpu, snapshot->is_cpu_list) {
 659                 ib_cpu_destroy(icpu);
 660         }
 661 
 662         kmem_free(snapshot, sizeof (ib_snapshot_t));
 663 }
 664 
 665 static ib_ivec_t *
 666 ib_irq_fill_ivec(kstat_t *ksp)
 667 {
 668         kstat_named_t   *knp;
 669         ib_ivec_t       *ivec;
 670         char            *datap;
 671         uint64_t        time;
 672         int             i;
 673 
 674         datap = ksp->ks_data;
 675         knp = KSTAT_NAMED_PTR(ksp);
 676         for (i = 0; i < ksp->ks_ndata; i++, knp++) {
 677                 IB_IMPLDBG((CE_CONT, "ib_irq_fill_ivec: %s",
 678                     knp->name));
 679 
 680                 if (strcmp(knp->name, "time") == 0) {
 681                         cmn_err(CE_CONT, "XXX ib time");
 682                         time = knp->value.ui64;
 683                 }
 684 
 685                 knp += sizeof (kstat_named_t);
 686                 datap += sizeof (kstat_named_t);
 687         }
 688 
 689         /* Allocate a new interrupt vector */
 690         ivec = ib_ivec_create("", 0);
 691         ivec->ii_time = time;
 692 
 693         return (ivec);
 694 }
 695 
 696 /*
 697  * XXX: icpu not needed, move out of loop
 698  */
 699 static void
 700 ib_irq_statistics(ib_cpu_t *icpu)
 701 {
 702         kstat_t         *ksp;
 703         int             instance = 1;
 704 
 705         /*
 706          * Read pci interrupts.
 707          */
 708         ksp = kstat_hold_byname("pci_intrs", instance, "pci", ALL_ZONES);
 709         while (ksp != NULL) {
 710                 KSTAT_ENTER(ksp);
 711 
 712                 if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED) {
 713                         ib_cpu_t        *icpu;
 714                         ib_ivec_t       *ivec;
 715                         kstat_named_t   *knp;
 716                         kstat_named_t   *datap;
 717                         uint64_t        ino;
 718                         char            *buspath;
 719                         char            *namep;
 720                         processorid_t   cpu_id;
 721                         int             i;
 722                         boolean_t       is_enabled = B_TRUE;
 723 
 724                         (void) KSTAT_UPDATE(ksp, KSTAT_READ);
 725 
 726                         /*
 727                          * Find the CPU this interrupt vector is on and
 728                          * if the vector itself is enabled.
 729                          */
 730                         datap = ksp->ks_data;
 731                         namep = KSTAT_NAMED_PTR(ksp)->name;
 732                         for (i = 0; i < ksp->ks_ndata; i++) {
 733                                 if (strcmp(namep, "cpu") == 0) {
 734                                         cpu_id = datap->value.ui64;
 735                                 } else if (strcmp(namep, "type") == 0) {
 736                                         if (strcmp(datap->value.c, "disabled") == 0) {
 737                                                 is_enabled = B_FALSE;
 738                                                 break;
 739                                         }
 740                                 }
 741 
 742                                 namep += sizeof (kstat_named_t);
 743                                 datap += sizeof (kstat_named_t);
 744                         }
 745 
 746                         /*
 747                          * Skip this interrupt vector if its disabled.
 748                          */
 749                         if (!is_enabled)
 750                                 continue;
 751 
 752                         /*
 753                          * Check if CPU is online.
 754                          */
 755                         icpu = ib_cpu_find(ib_cpu_list, cpu_id);
 756                         if (icpu == NULL || icpu->ic_offline)
 757                                 continue;
 758 
 759                         /*
 760                          * Fill information.
 761                          */
 762                         ivec = ib_irq_fill_ivec(ksp);
 763                         if (ivec == NULL)
 764                                 continue;
 765 
 766                         list_insert_tail(&icpu->ic_ivec_list, ivec);
 767                 }
 768 
 769                 KSTAT_EXIT(ksp);
 770                 kstat_rele(ksp);
 771 
 772                 instance++;
 773                 ksp = kstat_hold_byname("pci_intrs", instance, "pci", ALL_ZONES);
 774         }
 775 }
 776 
 777 /*
 778  * Collect data from CPUs and interrupt vectors.
 779  */
 780 static ib_snapshot_t *
 781 ib_get_statistics(void)
 782 {
 783         ib_cpu_t        *os_cpu;
 784         ib_snapshot_t   *snapshot;
 785         ib_cpu_t        *snapshot_cpu;
 786 
 787         /*
 788          * Nothing to balance with one CPU. XXX: right place?
 789          */
 790         if (ib_num_cpus <= 1) {
 791                 ib_sleeptime = IB_ONECPU_SLEEPTIME;
 792                 return (NULL);
 793         }
 794 
 795         /*
 796          * Store all CPUs and ivecs here.
 797          */
 798         snapshot = ib_snapshot_create();
 799 
 800         /*
 801          * Loop over all active CPUs
 802          */
 803         FOREACH_CPU(os_cpu, ib_cpu_list) {
 804 
 805                 snapshot->is_num_cpus++;
 806 
 807                 snapshot_cpu = ib_cpu_create();
 808                 snapshot_cpu->ic_cpu_id = os_cpu->ic_cpu_id;
 809 
 810                 list_insert_tail(&snapshot->is_cpu_list, snapshot_cpu);
 811 
 812                 ib_cpu_statistics(snapshot_cpu);
 813                 ib_irq_statistics(os_cpu);
 814         }
 815 
 816         return (snapshot);
 817 }
 818 
 819 static ib_delta_t *
 820 ib_delta_create(void)
 821 {
 822         ib_delta_t      *delta;
 823 
 824         delta = kmem_alloc(sizeof (ib_delta_t), KM_SLEEP);
 825         delta->id_missing = B_FALSE;
 826 
 827         list_create(&delta->id_cpu_list, sizeof (ib_cpu_t),
 828             offsetof(ib_cpu_t, ic_next));
 829 
 830         return (delta);
 831 }
 832 
 833 /*
 834  * Generate the delta of two snapshots.
 835  */
 836 static ib_delta_t *
 837 ib_delta_generate(ib_snapshot_t *old_snapshot, ib_snapshot_t *new_snapshot)
 838 {
 839         ib_cpu_t        *old_cpu, *new_cpu;
 840         ib_delta_t      *delta;
 841         int             intrload = 0;
 842         int             intrnsec = 0;
 843         processorid_t   cpus = 0;
 844 
 845         /*
 846          * Allocate a new delta structure.
 847          */
 848         delta = ib_delta_create();
 849 
 850         /*
 851          * Number of CPUs must be the same.
 852          */
 853         delta->id_missing = old_snapshot->is_num_cpus !=
 854             new_snapshot->is_num_cpus;
 855 
 856         if (delta->id_missing != 0) {
 857                 IB_LOG((CE_CONT, "ib_delta_generate: number of CPUs changed"));
 858                 return (delta);
 859         }
 860 
 861         /*
 862          * Loop over the CPUs in both snapshots.
 863          */
 864         for (new_cpu = list_head(&new_snapshot->is_cpu_list),
 865             old_cpu = list_head(&old_snapshot->is_cpu_list);
 866             new_cpu != NULL && old_cpu != NULL;
 867             new_cpu = list_next(&new_snapshot->is_cpu_list, new_cpu),
 868             old_cpu = list_next(&old_snapshot->is_cpu_list, old_cpu)) {
 869                 ib_cpu_t        *delta_cpu;
 870                 ib_ivec_t       *new_ivec;
 871 
 872                 /* XXX: just onlined CPU? */
 873 
 874                 /* Allocate a new CPU structure */
 875                 delta_cpu = ib_cpu_create();
 876 
 877                 /* Difference of total time */
 878                 delta_cpu->ic_tot = new_cpu->ic_tot - old_cpu->ic_tot;
 879                 if (!(delta_cpu->ic_tot >= 0)) {
 880                         delta->id_missing = B_TRUE;
 881                         kmem_free(delta_cpu, sizeof (ib_cpu_t));
 882                         return (delta);
 883                 }
 884 
 885                 list_insert_tail(&delta->id_cpu_list, delta_cpu);
 886 
 887                 /* Avoid division by zero */
 888                 if (delta_cpu->ic_tot == 0)
 889                         delta_cpu->ic_tot = 1;
 890 
 891                 delta_cpu->ic_intrs = 0;
 892                 delta_cpu->ic_big_intrs = 0;
 893 
 894                 /*
 895                  * Number of interrupt vectors must be the same.
 896                  */
 897                 if (old_cpu->ic_num_ivecs != new_cpu->ic_num_ivecs) {
 898                         IB_LOG((CE_CONT, "ib_delta_generate: cpu %d has more "
 899                             "or less interrupts", old_cpu->ic_cpu_id));
 900                         delta->id_missing = B_TRUE;
 901                         return (delta);
 902                 }
 903 
 904                 /*
 905                  * Loop over the interrupt vectors of the new CPU.
 906                  */
 907                 for (new_ivec = list_head(&new_cpu->ic_ivec_list);
 908                     new_ivec != NULL; new_ivec =
 909                     list_next(&new_cpu->ic_ivec_list, new_ivec)) {
 910                         ib_ivec_t       *ivec;
 911                         ib_ivec_t       *delta_ivec;
 912                         hrtime_t        time;
 913 
 914                         if (new_ivec->ii_num_ino == 0)
 915                                 continue;
 916 
 917                         /*
 918                          * If interrupt vector does not exists or XXX crtime
 919                          * is different, set missing.
 920                          */
 921                         ivec = ib_ivec_find_ino(old_cpu->ic_ivec_list,
 922                             new_ivec->ii_ino);
 923                         if (ivec == NULL) {
 924                                 delta->id_missing = B_TRUE;
 925                                 return (delta);
 926                         }
 927 
 928                         /* Allocate a new delta interrupt vector */
 929                         delta_ivec = ib_ivec_create(new_ivec->ii_buspath,
 930                             new_ivec->ii_ino);
 931 
 932                         /*
 933                          * Time used by this interrupt.
 934                          */
 935                         time = new_ivec->ii_time - ivec->ii_time;
 936                         if (time < 0) {
 937                                 delta->id_missing = B_TRUE;
 938                                 kmem_free(delta_ivec, sizeof (ib_delta_t));
 939                                 return (delta);
 940                         }
 941 
 942                         delta_cpu->ic_intrs += time;
 943                         delta_ivec->ii_time = time;
 944 
 945                         if (time > delta_cpu->ic_bigintr)
 946                                 delta_cpu->ic_bigintr = time;
 947 
 948                         /*
 949                          * Fill in the rest.
 950                          */
 951                         delta_ivec->ii_ihs = new_ivec->ii_ihs;
 952                         delta_ivec->ii_pil = new_ivec->ii_pil;
 953                         delta_ivec->ii_ino = new_ivec->ii_ino;
 954                         delta_ivec->ii_num_ino = new_ivec->ii_num_ino;
 955                         /* XXX: buspath, name */
 956                 }
 957 
 958                 /*
 959                  * Rounding error
 960                  */
 961                 if (delta_cpu->ic_tot < delta_cpu->ic_intrs)
 962                         delta_cpu->ic_tot = delta_cpu->ic_intrs;
 963 
 964                 delta_cpu->ic_intr_load =
 965                     delta_cpu->ic_intrs / delta_cpu->ic_tot;
 966                 intrload += delta_cpu->ic_intr_load;
 967                 intrnsec += delta_cpu->ic_intrs;
 968 
 969                 cpus++;
 970         }
 971 
 972         if (cpus > 0) {
 973                 delta->id_avgintrload = intrload / cpus;
 974                 delta->id_avgintrnsec = intrnsec / cpus;
 975         } else {
 976                 delta->id_avgintrload = 0;
 977                 delta->id_avgintrnsec = 0;
 978         }
 979 
 980         return (delta);
 981 }
 982 
 983 /*
 984  * Compress deltas.
 985  */
 986 static ib_delta_t *
 987 ib_delta_compress(list_t *deltas)
 988 {
 989         ib_cpu_t        *icpu;
 990         ib_ivec_t       *ivec;
 991         ib_delta_t      *new_delta, *delta;
 992         processorid_t   cpus = 0;
 993         int             high_intrload = 0;
 994         int             intrs = 0, tot;
 995 
 996         /* Check if empty list of deltas */
 997         if (deltas == NULL || list_is_empty(deltas) != 0) {
 998                 IB_LOG((CE_CONT, "ib_delta_compress: deltas are empty?"));
 999                 return (NULL);
1000         }
1001 
1002         /* Allocate a new delta structure */
1003         new_delta = ib_delta_create();
1004 
1005         /*
1006          * Loop over the deltas in the list.
1007          */
1008         for (delta = list_head(deltas); delta != NULL;
1009             delta = list_next(deltas, delta)) {
1010 
1011                 /* Compressing bad delta? */
1012                 if (delta->id_missing) {
1013                         IB_LOG((CE_CONT,
1014                             "ib_delta_compress: compressing bad deltas?"));
1015                         return (NULL);
1016                 }
1017 
1018                 FOREACH_CPU(icpu, delta->id_cpu_list) {
1019                         ib_cpu_t        *new_cpu;
1020                         ib_ivec_t       *new_ivec;
1021 
1022                         intrs += icpu->ic_intrs;
1023                         tot += icpu->ic_tot;
1024                         new_cpu = ib_cpu_create();
1025                         new_cpu->ic_cpu_id = icpu->ic_cpu_id;
1026                         new_cpu->ic_intrs = icpu->ic_intrs;
1027                         new_cpu->ic_tot = icpu->ic_tot;
1028 
1029                         /* XXX: exists ivecs */
1030                         FOREACH_IVEC(new_ivec, icpu->ic_ivec_list) {
1031                                 ib_ivec_t       *new_delta_ivec;
1032 
1033                                 new_delta_ivec = ib_ivec_create(
1034                                     new_ivec->ii_buspath, new_ivec->ii_ino);
1035 
1036                         }
1037                 }
1038         }
1039 
1040         FOREACH_CPU(icpu, new_delta->id_cpu_list) {
1041                 int     bigintr = 0;
1042 
1043                 cpus++;
1044 
1045                 FOREACH_IVEC(ivec, icpu->ic_ivec_list) {
1046                         if (ivec->ii_time > bigintr)
1047                                 bigintr = ivec->ii_time;
1048                 }
1049 
1050                 icpu->ic_bigintr = bigintr;
1051                 icpu->ic_intr_load = icpu->ic_intrs / icpu->ic_tot;
1052 
1053                 if (high_intrload < icpu->ic_intr_load)
1054                         high_intrload = icpu->ic_intr_load;
1055 
1056                 if (icpu->ic_tot <= 0)
1057                         icpu->ic_tot = 100;
1058         }
1059 
1060         if (cpus > 0) {
1061                 new_delta->id_avgintrload = intrs / tot;
1062                 new_delta->id_avgintrnsec = intrs / cpus;
1063         } else {
1064                 new_delta->id_avgintrload = 0;
1065                 new_delta->id_avgintrnsec = 0;
1066         }
1067 
1068         /* XXX: global sleeptime */
1069 
1070         return (new_delta);
1071 }
1072 
1073 /*
1074  * Decide if the load is out of balance.
1075  */
1076 static int
1077 ib_imbalanced(int goodness, int baseline)
1078 {
1079         if (goodness > 50)
1080                 return (100);
1081 
1082         /* XXX: abs */
1083         if ((goodness - baseline) > goodness_mindelta)
1084                 return (100);
1085 
1086         return (0);
1087 }
1088 
1089 /*
1090  * Calculate goodness of a CPU.
1091  */
1092 static int
1093 ib_goodness_cpu(ib_cpu_t *icpu, int avg_interrupt_load)
1094 {
1095         int     goodness;
1096         int     load, load_no_bigintr;
1097 
1098         load = icpu->ic_intrs / icpu->ic_tot;
1099         if (load < avg_interrupt_load)
1100                 return (0);
1101 
1102         load_no_bigintr = (icpu->ic_intrs - icpu->ic_bigintr) / icpu->ic_tot;
1103 
1104         if ((load > goodness_unsafe_load) && (icpu->ic_num_ivecs > 1))
1105                 return (1);
1106 
1107         goodness = load - avg_interrupt_load;
1108         if (goodness > load_no_bigintr)
1109                 goodness = load_no_bigintr;
1110 
1111         return (goodness);
1112 }
1113 
1114 /*
1115  * Calculate goodness.
1116  */
1117 static int
1118 ib_goodness(ib_delta_t *delta)
1119 {
1120         ib_cpu_t        *icpu;
1121         int             goodness, high_goodness = 0;
1122 
1123         if (delta->id_missing > 0)
1124                 return (1);
1125 
1126         FOREACH_CPU(icpu, delta->id_cpu_list) {
1127                 goodness = ib_goodness_cpu(icpu, delta->id_avgintrload);
1128                 if (!(goodness >= 0 && goodness <= 100)) {
1129                         IB_LOG((CE_CONT,
1130                             "ib_goodness: cpu goodness out of range?"));
1131                         return (100);
1132                 }
1133 
1134                 if (goodness == 100)
1135                         return (100);
1136 
1137                 if (goodness > high_goodness)
1138                         high_goodness = goodness;
1139         }
1140 
1141         return (high_goodness);
1142 }
1143 
1144 static void
1145 ib_do_find_goal(list_t ivecs, list_t loads, int goal, int idx)
1146 {
1147         list_t  goals_with;
1148         list_t  goals_without;
1149         int     with, without;
1150         int     which, load;
1151 
1152 
1153         if (goal <= load) {
1154                 with = load;
1155         } else {
1156                 /* XXX: do_find_goal */
1157                 with += load;
1158         }
1159 
1160         IB_LOG((CE_CONT, "XXX"));
1161 
1162         if (with >= goal && without < goal) {
1163                 which = 0;
1164         } else if (with < goal && without >= goal) {
1165                 which = 1;
1166         } else if (with >= goal && without >= goal) {
1167                 which = without < with;
1168         } else {
1169                 which = without > with;
1170         }
1171 
1172         if (which == 1) {
1173                 IB_LOG((CE_CONT, "ib_do_find_goal: going without"));
1174                 /* XXX */
1175         } else {
1176                 IB_LOG((CE_CONT, "ib_do_find_goal: going with"));
1177                 /* XXX */
1178         }
1179 }
1180 
1181 typedef struct _ib_goal {
1182         list_node_t     *ig_link;
1183         int             ig_value;
1184 } ib_goal_t;
1185 
1186 typedef struct _ib_goal_load {
1187         list_node_t     *igl_link;
1188         int             igl_value;
1189 } ib_goal_load_t;
1190 
1191 static void
1192 ib_find_goal(list_t ivecs, int goal)
1193 {
1194         ib_ivec_t       *ivec;
1195         list_t          goals;
1196         int             load;
1197 
1198         if (goal <= 0) {
1199                 list_create(&goals, sizeof (ib_goal_t),
1200                    offsetof (ib_goal_t, ig_link));
1201         } else {
1202                 list_t          loads;
1203                 hrtime_t        tot = 0;
1204 
1205                 IB_LOG((CE_CONT, "ib_find_goal: finding goal from intrs XXX"));
1206 
1207                 FOREACH_IVEC(ivec, ivecs) {
1208                         tot += ivec->ii_time;
1209                 }
1210 
1211                 list_create(&loads, sizeof (ib_goal_load_t),
1212                     offsetof (ib_goal_load_t, igl_link));
1213 
1214                 FOREACH_IVEC(ivec, ivecs) {
1215                         ib_goal_load_t  *igl = kmem_alloc(sizeof (ib_goal_load_t), KM_SLEEP);
1216 
1217                         igl->igl_value = tot;
1218                         list_insert_tail(&loads, igl);
1219 
1220                         tot -= ivec->ii_time;
1221                 }
1222         }
1223 }
1224 
1225 static void
1226 ib_do_reconfig_cpu2cpu(ib_delta_t *delta, processorid_t src_cpuid,
1227     processorid_t tgt_cpuid, int src_load)
1228 {
1229         ib_cpu_t        *src_cpu, *tgt_cpu;
1230         ib_ivec_t       *ivec;
1231         list_t          ivecs;
1232         int             goal, new_load;
1233         int             avg_nsec;
1234 
1235         if (delta == NULL)
1236                 return;
1237 
1238         goal = delta->id_avgintrnsec;
1239 
1240         src_cpu = ib_cpu_find(delta->id_cpu_list, src_cpuid);
1241         if (src_cpu == NULL)
1242                 return;
1243 
1244         tgt_cpu = ib_cpu_find(delta->id_cpu_list, tgt_cpuid);
1245         if (tgt_cpu == NULL)
1246                 return;
1247 
1248         avg_nsec = (src_cpu->ic_intrs + tgt_cpu->ic_intrs) / 2;
1249         if (goal < avg_nsec)
1250                 goal = avg_nsec;
1251 
1252 
1253         /*
1254          * Sort interrupt vectors by time.
1255          */
1256         list_create(&ivecs, sizeof (ib_ivec_t),
1257             offsetof (ib_ivec_t, ii_next));
1258 
1259         ivec = list_head(&ivecs);
1260         if (ivec->ii_orig_cpu == src_cpuid) {
1261                 IB_LOG((CE_CONT, "Keeping XXX on %d",
1262                     src_cpuid)); /* ivec->ii_inum, */
1263                 goal -= ivec->ii_time;
1264                 /* XXX: shift */
1265         }
1266 
1267         IB_LOG((CE_CONT, "ib_reconfig_cpu2cpu: inums should total %d", goal));
1268 
1269         ib_find_goal(ivecs, goal);
1270 
1271         FOREACH_IVEC(ivec, ivecs) {
1272                 if (!(ivec->ii_now_cpu == src_cpuid ||
1273                     ivec->ii_now_cpu == tgt_cpuid)) {
1274                         IB_LOG((CE_CONT, "ib_do_reconfig_cpu2cpu: "));
1275                 }
1276 
1277                 if (ivec->ii_goal && ivec->ii_now_cpu != src_cpuid) {
1278                         ib_interrupt_do_move(ivec, src_cpuid);
1279                 } else if (ivec->ii_goal == B_FALSE &&
1280                             ivec->ii_now_cpu != tgt_cpuid) {
1281                         ib_interrupt_do_move(ivec, tgt_cpuid);
1282                 }
1283         }
1284 
1285         ib_interrupt_move_check(delta, src_cpuid, tgt_cpuid);
1286 
1287         new_load = src_cpu->ic_intrs / src_cpu->ic_tot;
1288 
1289         if (!(new_load <= src_load && new_load > delta->id_avgintrload)) {
1290                 IB_LOG((CE_CONT, "ib_reconfig_cpu2cpu: %d", new_load));
1291         }
1292 }
1293 
1294 static void
1295 ib_do_reconfig_cpu(ib_delta_t *delta, list_t *cpu_sorted_list,
1296     processorid_t old_cpu_id)
1297 {
1298         ib_cpu_t        *icpu;
1299         int             avgintrload;
1300 
1301         if (delta == NULL)
1302                 return;
1303 
1304         icpu = ib_cpu_find(delta->id_cpu_list, old_cpu_id);
1305         if (icpu == NULL)
1306                 return;
1307 
1308         avgintrload = delta->id_avgintrload;
1309 
1310 }
1311 
1312 /*
1313  * Reconfigure interrupt distribution among CPUs.
1314  */
1315 static int
1316 ib_do_reconfig(ib_delta_t *delta)
1317 {
1318         ib_cpu_t        *icpu;
1319         ib_ivec_t       *ivec;
1320         list_t          cpu_sorted_list;
1321         int             goodness, new_goodness;
1322         int             warned = 0;
1323         int             rval = 1, ret = 1;
1324 
1325         if (delta == NULL)
1326                 return (-1);
1327 
1328         goodness = delta->id_goodness;
1329         if (goodness < goodness_mindelta) {
1330                 IB_LOG((CE_CONT, "ib_do_reconfig: goodness is good enough"));
1331                 return (0);
1332         }
1333 
1334         IB_LOG((CE_CONT, "ib_do_reconfig: optimizing interrupt assignments"));
1335 
1336         if (delta->id_missing != 0) {
1337                 IB_LOG((CE_CONT, "ib_do_reconfig: aborted"));
1338                 return (-1);
1339         }
1340 
1341         FOREACH_CPU(icpu, delta->id_cpu_list) {
1342                 FOREACH_IVEC(ivec, icpu->ic_ivec_list) {
1343                         ivec->ii_orig_cpu = icpu->ic_cpu_id;
1344                         ivec->ii_now_cpu = icpu->ic_cpu_id;
1345                         /* XXX: inum */
1346                 }
1347         }
1348 
1349         list_create(&cpu_sorted_list, sizeof (ib_cpu_t),
1350             offsetof(ib_cpu_t, ic_next));
1351 
1352         /*
1353          * Have we an improvement?
1354          */
1355         new_goodness = ib_goodness(delta);
1356         if (!(new_goodness <= goodness)) {
1357                 IB_LOG((CE_CONT,
1358                     "ib_do_reconfig: result has worse goodness"));
1359         }
1360 
1361         if ((goodness != 100 || new_goodness == 100) &&
1362             goodness - new_goodness < goodness_mindelta) {
1363                 IB_LOG((CE_CONT,
1364                     "ib_do_reconfig: goodness already near optimum"));
1365                 return (0);
1366         }
1367 
1368         /*
1369          * Move interrupts.
1370          */
1371         FOREACH_CPU(icpu, delta->id_cpu_list) {
1372                 FOREACH_IVEC(ivec, icpu->ic_ivec_list) {
1373                         int     error;
1374 
1375                         if (ivec->ii_orig_cpu == icpu->ic_cpu_id)
1376                                 continue;
1377 
1378                         error = ib_interrupt_do_move(ivec, icpu->ic_cpu_id);
1379                         if (error != 0) {
1380                                 if (warned++ == 0) {
1381                                         IB_LOG((CE_CONT, "ib_do_reconfig: "
1382                                             "unable to move interrupt"));
1383                                 }
1384 
1385                                 IB_LOG((CE_CONT, "ib_do_reconfig: "
1386                                     "unable to move buspath"));
1387 
1388                                 ret = -1;
1389                         }
1390                 }
1391         }
1392 
1393         return (rval);
1394 }
1395 
1396 
1397 /*
1398  * Check if the interrupt load did decrease.
1399  */
1400 static void
1401 ib_interrupt_move_check(ib_delta_t *delta, processorid_t old_cpuid,
1402     processorid_t new_cpuid)
1403 {
1404         ib_cpu_t        *old_cpu, *new_cpu;
1405 
1406         /*
1407          * Check old CPU.
1408          */
1409         old_cpu = ib_cpu_find(delta->id_cpu_list, old_cpuid);
1410         if (old_cpu == NULL)
1411                 return;
1412         if (!(old_cpu->ic_tot >= old_cpu->ic_intrs)) {
1413                 IB_LOG((CE_CONT,
1414                     "Moved interrupts left 100+%% load on source CPU"));
1415         }
1416 
1417         /*
1418          * Check new CPU.
1419          */
1420         new_cpu = ib_cpu_find(delta->id_cpu_list, new_cpuid);
1421         if (new_cpu == NULL)
1422                 return;
1423         if (!(new_cpu->ic_tot >= new_cpu->ic_intrs)) {
1424                 IB_LOG((CE_CONT,
1425                     "Moved interrupts left 100+%% load on target CPU"));
1426         }
1427 }
1428 
1429 /*
1430  * Actually move the interrupt.
1431  */
1432 static int
1433 ib_interrupt_do_move(ib_ivec_t *ivec, processorid_t cpu_id)
1434 {
1435         int     ret, result;
1436 
1437         struct psm_ops  *pops;
1438 
1439         //pops = mach_set[0];
1440 
1441         //      ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_SET_CPU,
1442         //          &result);
1443 
1444         return (-1);
1445 }
1446 
1447 /*
1448  * Move an interrupt to a different CPU.
1449  */
1450 static int
1451 ib_interrupt_move(ib_delta_t *delta, uint64_t inum, processorid_t old_cpuid,
1452     processorid_t new_cpuid)
1453 {
1454         ib_cpu_t        *old_cpu, *new_cpu;
1455         ib_ivec_t       *ivec;
1456 
1457         if (delta == NULL)
1458                 return (-1);
1459 
1460         /*
1461          * Remove interrupt vector from old CPU.
1462          */
1463         old_cpu = ib_cpu_find(delta->id_cpu_list, old_cpuid);
1464         if (old_cpu == NULL)
1465                 return (-1);
1466 
1467         ivec = ib_ivec_find_ino(old_cpu->ic_ivec_list, inum);
1468 
1469         old_cpu->ic_intrs -= ivec->ii_time;
1470         old_cpu->ic_intr_load = old_cpu->ic_intrs / old_cpu->ic_tot;
1471         ib_ivec_delete_ino(old_cpu->ic_ivec_list, inum);
1472 
1473         /*
1474          * Verify interrupts.
1475          */
1476         if (!(old_cpu->ic_intrs >= 0)) {
1477                 IB_LOG((CE_CONT,
1478                     "ib_interrupt_move: interrupt time > total time?"));
1479         }
1480 
1481         if (!(ivec->ii_time <= old_cpu->ic_bigintr)) {
1482                 IB_LOG((CE_CONT,
1483                     "ib_interrupt_move: interrupt time > big interrupt?"));
1484         }
1485 
1486         if (ivec->ii_time >= old_cpu->ic_bigintr) {
1487                 ib_ivec_t       *time_ivec;
1488                 uint64_t        bigtime = 0;
1489 
1490                 FOREACH_IVEC(time_ivec, old_cpu->ic_ivec_list) {
1491                         if (time_ivec->ii_time > bigtime)
1492                                 bigtime = time_ivec->ii_time;
1493                 }
1494         }
1495 
1496         /*
1497          * Insert interrupt vector into new CPU.
1498          */
1499         new_cpu = ib_cpu_find(delta->id_cpu_list, new_cpuid);
1500         if (new_cpu == NULL)
1501                 return (-1);
1502 
1503         ivec->ii_now_cpu = new_cpuid;
1504         new_cpu->ic_intrs += ivec->ii_time;
1505         new_cpu->ic_intr_load = new_cpu->ic_intrs / new_cpu->ic_tot;
1506         ib_ivec_add_ino(new_cpu->ic_ivec_list, ivec);
1507 
1508         if (ivec->ii_time > new_cpu->ic_bigintr)
1509                 new_cpu->ic_bigintr = ivec->ii_time;
1510 
1511         return (0);
1512 }