1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * evtchn.c
  29  *
  30  * Communication via hypervisor event channels.
  31  *
  32  * Copyright (c) 2002-2005, K A Fraser
  33  *
  34  * This file may be distributed separately from the Linux kernel, or
  35  * incorporated into other software packages, subject to the following license:
  36  *
  37  * Permission is hereby granted, free of charge, to any person obtaining a copy
  38  * of this source file (the "Software"), to deal in the Software without
  39  * restriction, including without limitation the rights to use, copy, modify,
  40  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  41  * and to permit persons to whom the Software is furnished to do so, subject to
  42  * the following conditions:
  43  *
  44  * The above copyright notice and this permission notice shall be included in
  45  * all copies or substantial portions of the Software.
  46  *
  47  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  48  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  49  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  50  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  51  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  52  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  53  * IN THE SOFTWARE.
  54  */
  55 
  56 /* some parts derived from netbsd's hypervisor_machdep.c 1.2.2.2 */
  57 
  58 /*
  59  *
  60  * Copyright (c) 2004 Christian Limpach.
  61  * All rights reserved.
  62  *
  63  * Redistribution and use in source and binary forms, with or without
  64  * modification, are permitted provided that the following conditions
  65  * are met:
  66  * 1. Redistributions of source code must retain the above copyright
  67  *    notice, this list of conditions and the following disclaimer.
  68  * 2. Redistributions in binary form must reproduce the above copyright
  69  *    notice, this list of conditions and the following disclaimer in the
  70  *    documentation and/or other materials provided with the distribution.
  71  * 3. This section intentionally left blank.
  72  * 4. The name of the author may not be used to endorse or promote products
  73  *    derived from this software without specific prior written permission.
  74  *
  75  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  76  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  77  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  78  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  79  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  80  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  81  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  82  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  83  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  84  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  85  */
  86 /*
  87  * Section 3 of the above license was updated in response to bug 6379571.
  88  */
  89 
  90 #include <sys/types.h>
  91 #include <sys/hypervisor.h>
  92 #include <sys/machsystm.h>
  93 #include <sys/mutex.h>
  94 #include <sys/evtchn_impl.h>
  95 #include <sys/ddi_impldefs.h>
  96 #include <sys/avintr.h>
  97 #include <sys/cpuvar.h>
  98 #include <sys/smp_impldefs.h>
  99 #include <sys/archsystm.h>
 100 #include <sys/sysmacros.h>
 101 #include <sys/cmn_err.h>
 102 #include <sys/promif.h>
 103 #include <sys/debug.h>
 104 #include <sys/psm.h>
 105 #include <sys/privregs.h>
 106 #include <sys/trap.h>
 107 #include <sys/atomic.h>
 108 #include <sys/cpu.h>
 109 #include <sys/psw.h>
 110 #include <sys/traptrace.h>
 111 #include <sys/stack.h>
 112 #include <sys/x_call.h>
 113 #include <xen/public/physdev.h>
 114 
 115 /*
 116  * This file manages our association between hypervisor event channels and
 117  * Solaris's IRQs.  This is a one-to-one mapping, with the exception of
 118  * IPI IRQs, for which there is one event channel per CPU participating
 119  * in the IPI, and the clock VIRQ which also has an event channel per cpu
 120  * and the IRQ for /dev/xen/evtchn. The IRQ types are:
 121  *
 122  * IRQT_VIRQ:
 123  *      The hypervisor's standard virtual IRQ, used for the clock timer, for
 124  *      example.  This code allows any cpu to bind to one of these, although
 125  *      some are treated specially (i.e. VIRQ_DEBUG).
 126  *      Event channel binding is done via EVTCHNOP_bind_virq.
 127  *
 128  * IRQT_PIRQ:
 129  *      These associate a physical IRQ with an event channel via
 130  *      EVTCHNOP_bind_pirq.
 131  *
 132  * IRQT_IPI:
 133  *      A cross-call IRQ. Maps to "ncpus" event channels, each of which is
 134  *      bound to exactly one of the vcpus.  We do not currently support
 135  *      unbinding of IPIs (since Solaris doesn't need it). Uses
 136  *      EVTCHNOP_bind_ipi.
 137  *
 138  * IRQT_EVTCHN:
 139  *      A "normal" binding to an event channel, typically used by the frontend
 140  *      drivers to bind to the their backend event channel.
 141  *
 142  * IRQT_DEV_EVTCHN:
 143  *      This is a one-time IRQ used by /dev/xen/evtchn. Unlike other IRQs, we
 144  *      have a one-IRQ to many-evtchn mapping. We only track evtchn->irq for
 145  *      these event channels, which are managed via ec_irq_add/rm_evtchn().
 146  *      We enforce that IRQT_DEV_EVTCHN's representative evtchn (->ii_evtchn)
 147  *      is zero, and make any calls to irq_evtchn() an error, to prevent
 148  *      accidentally attempting to use the illegal evtchn 0.
 149  *
 150  * Suspend/resume
 151  *
 152  *      During a suspend/resume cycle, we need to tear down the event channels.
 153  *      All other mapping data is kept. The drivers will remove their own event
 154  *      channels via xendev on receiving a DDI_SUSPEND.  This leaves us with
 155  *      the IPIs and VIRQs, which we handle in ec_suspend() and ec_resume()
 156  *      below.
 157  *
 158  * CPU binding
 159  *
 160  *      When an event channel is bound to a CPU, we set a bit in a mask present
 161  *      in the machcpu (evt_affinity) to indicate that this CPU can accept this
 162  *      event channel.  For both IPIs and VIRQs, this binding is fixed at
 163  *      allocation time and we never modify it.  All other event channels are
 164  *      bound via the PSM either as part of add_avintr(), or interrupt
 165  *      redistribution (xen_psm_dis/enable_intr()) as a result of CPU
 166  *      offline/online.
 167  *
 168  * Locking
 169  *
 170  *      Updates are done holding the ec_lock.  The xen_callback_handler()
 171  *      routine reads the mapping data in a lockless fashion.  Additionally
 172  *      suspend takes ec_lock to prevent update races during a suspend/resume
 173  *      cycle.  The IPI info is also examined without the lock; this is OK
 174  *      since we only ever change IPI info during initial setup and resume.
 175  */
 176 
 177 #define IRQ_IS_CPUPOKE(irq) (ipi_info[XC_CPUPOKE_PIL].mi_irq == (irq))
 178 
 179 #define EVTCHN_MASKED(ev) \
 180         (HYPERVISOR_shared_info->evtchn_mask[(ev) >> EVTCHN_SHIFT] & \
 181         (1ul << ((ev) & ((1ul << EVTCHN_SHIFT) - 1))))
 182 
 183 static short evtchn_to_irq[NR_EVENT_CHANNELS];
 184 static cpuset_t evtchn_cpus[NR_EVENT_CHANNELS];
 185 static int      evtchn_owner[NR_EVENT_CHANNELS];
 186 #ifdef DEBUG
 187 static kthread_t *evtchn_owner_thread[NR_EVENT_CHANNELS];
 188 #endif
 189 
 190 static irq_info_t irq_info[NR_IRQS];
 191 static mec_info_t ipi_info[MAXIPL];
 192 static mec_info_t virq_info[NR_VIRQS];
 193 
 194 /*
 195  * See the locking description above.
 196  */
 197 kmutex_t ec_lock;
 198 
 199 /*
 200  * Bitmap indicating which PIRQs require the hypervisor to be notified
 201  * on unmask.
 202  */
 203 static unsigned long pirq_needs_eoi[NR_PIRQS / (sizeof (unsigned long) * NBBY)];
 204 
 205 static int ec_debug_irq = INVALID_IRQ;
 206 int ec_dev_irq = INVALID_IRQ;
 207 
 208 int
 209 xen_bind_virq(unsigned int virq, processorid_t cpu, int *port)
 210 {
 211         evtchn_bind_virq_t bind;
 212         int err;
 213 
 214         bind.virq = virq;
 215         bind.vcpu = cpu;
 216         if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind)) == 0)
 217                 *port = bind.port;
 218         else
 219                 err = xen_xlate_errcode(err);
 220         return (err);
 221 }
 222 
 223 int
 224 xen_bind_interdomain(int domid, int remote_port, int *port)
 225 {
 226         evtchn_bind_interdomain_t bind;
 227         int err;
 228 
 229         bind.remote_dom  = domid;
 230         bind.remote_port = remote_port;
 231         if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
 232             &bind)) == 0)
 233                 *port = bind.local_port;
 234         else
 235                 err = xen_xlate_errcode(err);
 236         return (err);
 237 }
 238 
 239 int
 240 xen_alloc_unbound_evtchn(int domid, int *evtchnp)
 241 {
 242         evtchn_alloc_unbound_t alloc;
 243         int err;
 244 
 245         alloc.dom = DOMID_SELF;
 246         alloc.remote_dom = domid;
 247 
 248         if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
 249             &alloc)) == 0) {
 250                 *evtchnp = alloc.port;
 251                 /* ensure evtchn is masked till we're ready to use it */
 252                 (void) ec_mask_evtchn(*evtchnp);
 253         } else {
 254                 err = xen_xlate_errcode(err);
 255         }
 256 
 257         return (err);
 258 }
 259 
 260 static int
 261 xen_close_evtchn(int evtchn)
 262 {
 263         evtchn_close_t close;
 264         int err;
 265 
 266         close.port = evtchn;
 267         err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
 268         if (err)
 269                 err = xen_xlate_errcode(err);
 270         return (err);
 271 }
 272 
 273 static int
 274 xen_bind_ipi(processorid_t cpu)
 275 {
 276         evtchn_bind_ipi_t bind;
 277 
 278         ASSERT(MUTEX_HELD(&ec_lock));
 279 
 280         bind.vcpu = cpu;
 281         if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind) != 0)
 282                 panic("xen_bind_ipi() failed");
 283         return (bind.port);
 284 }
 285 
 286 /* Send future instances of this interrupt to other vcpu. */
 287 static void
 288 xen_bind_vcpu(int evtchn, int cpu)
 289 {
 290         evtchn_bind_vcpu_t bind;
 291 
 292         ASSERT(MUTEX_HELD(&ec_lock));
 293 
 294         bind.port = evtchn;
 295         bind.vcpu = cpu;
 296         if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind) != 0)
 297                 panic("xen_bind_vcpu() failed");
 298 }
 299 
 300 static int
 301 xen_bind_pirq(int pirq)
 302 {
 303         evtchn_bind_pirq_t bind;
 304         int ret;
 305 
 306         bind.pirq = pirq;
 307         bind.flags = BIND_PIRQ__WILL_SHARE;
 308         if ((ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind)) != 0)
 309                 panic("xen_bind_pirq() failed (err %d)", ret);
 310         return (bind.port);
 311 }
 312 
 313 /* unmask an evtchn and send upcall to appropriate vcpu if pending bit is set */
 314 static void
 315 xen_evtchn_unmask(int evtchn)
 316 {
 317         evtchn_unmask_t unmask;
 318 
 319         unmask.port = evtchn;
 320         if (HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask) != 0)
 321                 panic("xen_evtchn_unmask() failed");
 322 }
 323 
 324 static void
 325 update_evtchn_affinity(int evtchn)
 326 {
 327         cpu_t *cp;
 328         struct xen_evt_data *cpe;
 329 
 330         ASSERT(evtchn_to_irq[evtchn] != INVALID_IRQ);
 331         ASSERT(MUTEX_HELD(&ec_lock));
 332 
 333         /*
 334          * Use lockless search of cpu_list, similar to mutex_vector_enter().
 335          */
 336         kpreempt_disable();
 337         cp = cpu_list;
 338         do {
 339                 cpe = cp->cpu_m.mcpu_evt_pend;
 340                 if (CPU_IN_SET(evtchn_cpus[evtchn], cp->cpu_id))
 341                         SET_EVTCHN_BIT(evtchn, cpe->evt_affinity);
 342                 else
 343                         CLEAR_EVTCHN_BIT(evtchn, cpe->evt_affinity);
 344         } while ((cp = cp->cpu_next) != cpu_list);
 345         kpreempt_enable();
 346 }
 347 
 348 static void
 349 bind_evtchn_to_cpuset(int evtchn, cpuset_t cpus)
 350 {
 351         ASSERT(evtchn_to_irq[evtchn] != INVALID_IRQ);
 352 
 353         CPUSET_ZERO(evtchn_cpus[evtchn]);
 354         CPUSET_OR(evtchn_cpus[evtchn], cpus);
 355         update_evtchn_affinity(evtchn);
 356 }
 357 
 358 static void
 359 clear_evtchn_affinity(int evtchn)
 360 {
 361         CPUSET_ZERO(evtchn_cpus[evtchn]);
 362         update_evtchn_affinity(evtchn);
 363 }
 364 
 365 static void
 366 alloc_irq_evtchn(int irq, int index, int evtchn, int cpu)
 367 {
 368         irq_info_t *irqp = &irq_info[irq];
 369 
 370         switch (irqp->ii_type) {
 371         case IRQT_IPI:
 372                 ipi_info[index].mi_evtchns[cpu] = evtchn;
 373                 irqp->ii_u.index = index;
 374                 break;
 375         case IRQT_VIRQ:
 376                 virq_info[index].mi_evtchns[cpu] = evtchn;
 377                 irqp->ii_u.index = index;
 378                 break;
 379         default:
 380                 irqp->ii_u.evtchn = evtchn;
 381                 break;
 382         }
 383 
 384         evtchn_to_irq[evtchn] = irq;
 385 
 386         /*
 387          * If a CPU is not specified, we expect to bind it to a CPU later via
 388          * the PSM.
 389          */
 390         if (cpu != -1) {
 391                 cpuset_t tcpus;
 392                 CPUSET_ONLY(tcpus, cpu);
 393                 bind_evtchn_to_cpuset(evtchn, tcpus);
 394         }
 395 }
 396 
 397 static int
 398 alloc_irq(int type, int index, int evtchn, int cpu)
 399 {
 400         int irq;
 401         irq_info_t *irqp;
 402 
 403         ASSERT(MUTEX_HELD(&ec_lock));
 404         ASSERT(type != IRQT_IPI || cpu != -1);
 405 
 406         for (irq = 0; irq < NR_IRQS; irq++) {
 407                 if (irq_info[irq].ii_type == IRQT_UNBOUND)
 408                         break;
 409         }
 410 
 411         if (irq == NR_IRQS)
 412                 panic("No available IRQ to bind to: increase NR_IRQS!\n");
 413 
 414         irqp = &irq_info[irq];
 415 
 416         irqp->ii_type = type;
 417         /*
 418          * Set irq/has_handler field to zero which means handler not installed
 419          */
 420         irqp->ii_u2.has_handler = 0;
 421 
 422         alloc_irq_evtchn(irq, index, evtchn, cpu);
 423         return (irq);
 424 }
 425 
 426 static int
 427 irq_evtchn(irq_info_t *irqp)
 428 {
 429         int evtchn;
 430 
 431         ASSERT(irqp->ii_type != IRQT_DEV_EVTCHN);
 432 
 433         switch (irqp->ii_type) {
 434         case IRQT_IPI:
 435                 ASSERT(irqp->ii_u.index != 0);
 436                 evtchn = ipi_info[irqp->ii_u.index].mi_evtchns[CPU->cpu_id];
 437                 break;
 438         case IRQT_VIRQ:
 439                 evtchn = virq_info[irqp->ii_u.index].mi_evtchns[CPU->cpu_id];
 440                 break;
 441         default:
 442                 evtchn = irqp->ii_u.evtchn;
 443                 break;
 444         }
 445 
 446         return (evtchn);
 447 }
 448 
 449 int
 450 ec_is_edge_pirq(int irq)
 451 {
 452         return (irq_info[irq].ii_type == IRQT_PIRQ &&
 453             !TEST_EVTCHN_BIT(irq, &pirq_needs_eoi[0]));
 454 }
 455 
 456 static void
 457 unbind_evtchn(ushort_t *evtchnp)
 458 {
 459         int err;
 460 
 461         ASSERT(MUTEX_HELD(&ec_lock));
 462 
 463         ASSERT(*evtchnp != 0);
 464 
 465         err = xen_close_evtchn(*evtchnp);
 466         ASSERT(err == 0);
 467         clear_evtchn_affinity(*evtchnp);
 468         evtchn_to_irq[*evtchnp] = INVALID_IRQ;
 469         *evtchnp = 0;
 470 }
 471 
 472 static void
 473 pirq_unmask_notify(int pirq)
 474 {
 475         struct physdev_eoi eoi;
 476 
 477         if (TEST_EVTCHN_BIT(pirq, &pirq_needs_eoi[0])) {
 478                 eoi.irq = pirq;
 479                 (void) HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
 480         }
 481 }
 482 
 483 static void
 484 pirq_query_unmask(int pirq)
 485 {
 486         struct physdev_irq_status_query irq_status;
 487 
 488         irq_status.irq = pirq;
 489         (void) HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status);
 490         CLEAR_EVTCHN_BIT(pirq, &pirq_needs_eoi[0]);
 491         if (irq_status.flags & XENIRQSTAT_needs_eoi)
 492                 SET_EVTCHN_BIT(pirq, &pirq_needs_eoi[0]);
 493 }
 494 
 495 static void
 496 end_pirq(int irq)
 497 {
 498         int evtchn = irq_evtchn(&irq_info[irq]);
 499 
 500         /*
 501          * If it is an edge-triggered interrupt we have already unmasked
 502          */
 503         if (TEST_EVTCHN_BIT(irq, &pirq_needs_eoi[0])) {
 504                 ec_unmask_evtchn(evtchn);
 505                 pirq_unmask_notify(IRQ_TO_PIRQ(irq));
 506         }
 507 }
 508 
 509 /*
 510  * Bind an event channel to a vcpu
 511  */
 512 void
 513 ec_bind_vcpu(int evtchn, int cpu)
 514 {
 515         mutex_enter(&ec_lock);
 516         xen_bind_vcpu(evtchn, cpu);
 517         mutex_exit(&ec_lock);
 518 }
 519 
 520 /*
 521  * Set up a physical device irq to be associated with an event channel.
 522  */
 523 void
 524 ec_setup_pirq(int irq, int ipl, cpuset_t *cpusp)
 525 {
 526         int evtchn;
 527         irq_info_t *irqp = &irq_info[irq];
 528 
 529         /*
 530          * Test if this PIRQ is already bound to an evtchn,
 531          * which means it is a shared IRQ and we don't want to
 532          * bind and do some initial setup that has already been
 533          * done for this irq on a previous trip through this code.
 534          */
 535         if (irqp->ii_u.evtchn == INVALID_EVTCHN) {
 536                 evtchn = xen_bind_pirq(irq);
 537 
 538                 pirq_query_unmask(IRQ_TO_PIRQ(irq));
 539 
 540                 irqp->ii_type = IRQT_PIRQ;
 541                 irqp->ii_u.evtchn = evtchn;
 542 
 543                 evtchn_to_irq[evtchn] = irq;
 544                 irqp->ii_u2.ipl = ipl;
 545                 ec_set_irq_affinity(irq, *cpusp);
 546                 ec_enable_irq(irq);
 547                 pirq_unmask_notify(IRQ_TO_PIRQ(irq));
 548         } else {
 549                 ASSERT(irqp->ii_u2.ipl != 0);
 550                 cmn_err(CE_NOTE, "!IRQ%d is shared", irq);
 551                 if (ipl > irqp->ii_u2.ipl)
 552                         irqp->ii_u2.ipl = ipl;
 553                 *cpusp = evtchn_cpus[irqp->ii_u.evtchn];
 554         }
 555 }
 556 
 557 void
 558 ec_unbind_irq(int irq)
 559 {
 560         irq_info_t *irqp = &irq_info[irq];
 561         mec_info_t *virqp;
 562         int drop_lock = 0;
 563         int type, i;
 564 
 565         /*
 566          * Nasty, but we need this during suspend.
 567          */
 568         if (mutex_owner(&ec_lock) != curthread) {
 569                 mutex_enter(&ec_lock);
 570                 drop_lock = 1;
 571         }
 572 
 573         type = irqp->ii_type;
 574 
 575         ASSERT((type == IRQT_EVTCHN) || (type == IRQT_PIRQ) ||
 576             (type == IRQT_VIRQ));
 577 
 578         if ((type == IRQT_EVTCHN) || (type == IRQT_PIRQ)) {
 579                 /* There's only one event channel associated with this irq */
 580                 unbind_evtchn(&irqp->ii_u.evtchn);
 581         } else if (type == IRQT_VIRQ) {
 582                 /*
 583                  * Each cpu on the system can have it's own event channel
 584                  * associated with a virq.  Unbind them all.
 585                  */
 586                 virqp = &virq_info[irqp->ii_u.index];
 587                 for (i = 0; i < NCPU; i++) {
 588                         if (virqp->mi_evtchns[i] != 0)
 589                                 unbind_evtchn(&virqp->mi_evtchns[i]);
 590                 }
 591                 /* Mark the virq structure as invalid. */
 592                 virqp->mi_irq = INVALID_IRQ;
 593         }
 594 
 595         bzero(irqp, sizeof (*irqp));
 596         /* Re-reserve PIRQ. */
 597         if (type == IRQT_PIRQ)
 598                 irqp->ii_type = IRQT_PIRQ;
 599 
 600         if (drop_lock)
 601                 mutex_exit(&ec_lock);
 602 }
 603 
 604 /*
 605  * Rebind an event channel for delivery to a CPU.
 606  */
 607 void
 608 ec_set_irq_affinity(int irq, cpuset_t dest)
 609 {
 610         int evtchn, tcpu;
 611         irq_info_t *irqp = &irq_info[irq];
 612 
 613         mutex_enter(&ec_lock);
 614 
 615         ASSERT(irq < NR_IRQS);
 616         ASSERT(irqp->ii_type != IRQT_UNBOUND);
 617 
 618         /*
 619          * Binding is done at allocation time for these types, so we should
 620          * never modify them.
 621          */
 622         if (irqp->ii_type == IRQT_IPI || irqp->ii_type == IRQT_VIRQ ||
 623             irqp->ii_type == IRQT_DEV_EVTCHN) {
 624                 mutex_exit(&ec_lock);
 625                 return;
 626         }
 627 
 628         CPUSET_FIND(dest, tcpu);
 629         ASSERT(tcpu != CPUSET_NOTINSET);
 630 
 631         evtchn = irq_evtchn(irqp);
 632 
 633         xen_bind_vcpu(evtchn, tcpu);
 634 
 635         bind_evtchn_to_cpuset(evtchn, dest);
 636 
 637         mutex_exit(&ec_lock);
 638 
 639         /*
 640          * Now send the new target processor a NOP IPI.
 641          * It will check for any pending interrupts, and so service any that
 642          * got delivered to the wrong processor by mistake.
 643          */
 644         if (ncpus > 1)
 645                 poke_cpu(tcpu);
 646 }
 647 
 648 int
 649 ec_set_irq_priority(int irq, int pri)
 650 {
 651         irq_info_t *irqp;
 652 
 653         if (irq >= NR_IRQS)
 654                 return (-1);
 655 
 656         irqp = &irq_info[irq];
 657 
 658         if (irqp->ii_type == IRQT_UNBOUND)
 659                 return (-1);
 660 
 661         irqp->ii_u2.ipl = pri;
 662 
 663         return (0);
 664 }
 665 
 666 void
 667 ec_clear_irq_priority(int irq)
 668 {
 669         irq_info_t *irqp = &irq_info[irq];
 670 
 671         ASSERT(irq < NR_IRQS);
 672         ASSERT(irqp->ii_type != IRQT_UNBOUND);
 673 
 674         irqp->ii_u2.ipl = 0;
 675 }
 676 
 677 int
 678 ec_bind_evtchn_to_irq(int evtchn)
 679 {
 680         mutex_enter(&ec_lock);
 681 
 682         ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ);
 683 
 684         (void) alloc_irq(IRQT_EVTCHN, 0, evtchn, -1);
 685 
 686         mutex_exit(&ec_lock);
 687         return (evtchn_to_irq[evtchn]);
 688 }
 689 
 690 int
 691 ec_bind_virq_to_irq(int virq, int cpu)
 692 {
 693         int err;
 694         int evtchn;
 695         mec_info_t *virqp;
 696 
 697         virqp = &virq_info[virq];
 698         mutex_enter(&ec_lock);
 699 
 700         err = xen_bind_virq(virq, cpu, &evtchn);
 701         ASSERT(err == 0);
 702 
 703         ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ);
 704 
 705         if (virqp->mi_irq == INVALID_IRQ) {
 706                 virqp->mi_irq = alloc_irq(IRQT_VIRQ, virq, evtchn, cpu);
 707         } else {
 708                 alloc_irq_evtchn(virqp->mi_irq, virq, evtchn, cpu);
 709         }
 710 
 711         mutex_exit(&ec_lock);
 712 
 713         return (virqp->mi_irq);
 714 }
 715 
 716 int
 717 ec_bind_ipi_to_irq(int ipl, int cpu)
 718 {
 719         int evtchn;
 720         ulong_t flags;
 721         mec_info_t *ipip;
 722 
 723         mutex_enter(&ec_lock);
 724 
 725         ipip = &ipi_info[ipl];
 726 
 727         evtchn = xen_bind_ipi(cpu);
 728 
 729         ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ);
 730 
 731         if (ipip->mi_irq == INVALID_IRQ) {
 732                 ipip->mi_irq = alloc_irq(IRQT_IPI, ipl, evtchn, cpu);
 733         } else {
 734                 alloc_irq_evtchn(ipip->mi_irq, ipl, evtchn, cpu);
 735         }
 736 
 737         /*
 738          * Unmask the new evtchn so that it can be seen by the target cpu
 739          */
 740         flags = intr_clear();
 741         ec_unmask_evtchn(evtchn);
 742         intr_restore(flags);
 743 
 744         mutex_exit(&ec_lock);
 745         return (ipip->mi_irq);
 746 }
 747 
 748 /*
 749  * When bringing up a CPU, bind to all the IPIs that CPU0 bound.
 750  */
 751 void
 752 ec_bind_cpu_ipis(int cpu)
 753 {
 754         int i;
 755 
 756         for (i = 0; i < MAXIPL; i++) {
 757                 mec_info_t *ipip = &ipi_info[i];
 758                 if (ipip->mi_irq == INVALID_IRQ)
 759                         continue;
 760 
 761                 (void) ec_bind_ipi_to_irq(i, cpu);
 762         }
 763 }
 764 
 765 /*
 766  * Can this IRQ be rebound to another CPU?
 767  */
 768 int
 769 ec_irq_rebindable(int irq)
 770 {
 771         irq_info_t *irqp = &irq_info[irq];
 772 
 773         if (irqp->ii_u.evtchn == 0)
 774                 return (0);
 775 
 776         return (irqp->ii_type == IRQT_EVTCHN || irqp->ii_type == IRQT_PIRQ);
 777 }
 778 
 779 /*
 780  * Should this IRQ be unbound from this CPU (which is being offlined) to
 781  * another?
 782  */
 783 int
 784 ec_irq_needs_rebind(int irq, int cpu)
 785 {
 786         irq_info_t *irqp = &irq_info[irq];
 787 
 788         return (ec_irq_rebindable(irq) &&
 789             CPU_IN_SET(evtchn_cpus[irqp->ii_u.evtchn], cpu));
 790 }
 791 
 792 void
 793 ec_send_ipi(int ipl, int cpu)
 794 {
 795         mec_info_t *ipip = &ipi_info[ipl];
 796 
 797         ASSERT(ipip->mi_irq != INVALID_IRQ);
 798 
 799         ec_notify_via_evtchn(ipip->mi_evtchns[cpu]);
 800 }
 801 
 802 void
 803 ec_try_ipi(int ipl, int cpu)
 804 {
 805         mec_info_t *ipip = &ipi_info[ipl];
 806 
 807         if (ipip->mi_irq == INVALID_IRQ || ipip->mi_irq == 0)
 808                 return;
 809 
 810         ec_notify_via_evtchn(ipip->mi_evtchns[cpu]);
 811 }
 812 
 813 void
 814 ec_irq_add_evtchn(int irq, int evtchn)
 815 {
 816         mutex_enter(&ec_lock);
 817 
 818         /*
 819          * See description of IRQT_DEV_EVTCHN above.
 820          */
 821         ASSERT(irq == ec_dev_irq);
 822 
 823         alloc_irq_evtchn(irq, 0, evtchn, 0);
 824         /*
 825          * We enforce that the representative event channel for IRQT_DEV_EVTCHN
 826          * is zero, so PSM operations on it have no effect.
 827          */
 828         irq_info[irq].ii_u.evtchn = 0;
 829         mutex_exit(&ec_lock);
 830 }
 831 
 832 void
 833 ec_irq_rm_evtchn(int irq, int evtchn)
 834 {
 835         ushort_t ec = evtchn;
 836 
 837         mutex_enter(&ec_lock);
 838         ASSERT(irq == ec_dev_irq);
 839         unbind_evtchn(&ec);
 840         mutex_exit(&ec_lock);
 841 }
 842 
 843 /*
 844  * Allocate an /dev/xen/evtchn IRQ.  See the big comment at the top
 845  * for an explanation.
 846  */
 847 int
 848 ec_dev_alloc_irq(void)
 849 {
 850         int i;
 851         irq_info_t *irqp;
 852 
 853         for (i = 0; i < NR_IRQS; i++) {
 854                 if (irq_info[i].ii_type == IRQT_UNBOUND)
 855                         break;
 856         }
 857 
 858         ASSERT(i != NR_IRQS);
 859 
 860         irqp = &irq_info[i];
 861         irqp->ii_type = IRQT_DEV_EVTCHN;
 862         irqp->ii_u2.ipl = IPL_EVTCHN;
 863         /*
 864          * Force the evtchn to zero for the special evtchn device irq
 865          */
 866         irqp->ii_u.evtchn = 0;
 867         return (i);
 868 }
 869 
 870 void
 871 ec_enable_irq(unsigned int irq)
 872 {
 873         ulong_t flag;
 874         irq_info_t *irqp = &irq_info[irq];
 875 
 876         if (irqp->ii_type == IRQT_DEV_EVTCHN)
 877                 return;
 878 
 879         flag = intr_clear();
 880         ec_unmask_evtchn(irq_evtchn(irqp));
 881         intr_restore(flag);
 882 }
 883 
 884 void
 885 ec_disable_irq(unsigned int irq)
 886 {
 887         irq_info_t *irqp = &irq_info[irq];
 888 
 889         if (irqp->ii_type == IRQT_DEV_EVTCHN)
 890                 return;
 891 
 892         /*
 893          * Spin till we are the one to mask the evtchn
 894          * Ensures no one else can be servicing this evtchn.
 895          */
 896         while (!ec_mask_evtchn(irq_evtchn(irqp)))
 897                 SMT_PAUSE();
 898 }
 899 
 900 static int
 901 ec_evtchn_pending(uint_t ev)
 902 {
 903         uint_t evi;
 904         shared_info_t *si = HYPERVISOR_shared_info;
 905 
 906         evi = ev >> EVTCHN_SHIFT;
 907         ev &= (1ul << EVTCHN_SHIFT) - 1;
 908         return ((si->evtchn_pending[evi] & (1ul << ev)) != 0);
 909 }
 910 
 911 int
 912 ec_pending_irq(unsigned int irq)
 913 {
 914         int evtchn = irq_evtchn(&irq_info[irq]);
 915 
 916         return (ec_evtchn_pending(evtchn));
 917 }
 918 
 919 void
 920 ec_clear_irq(int irq)
 921 {
 922         irq_info_t *irqp = &irq_info[irq];
 923         int evtchn;
 924 
 925         if (irqp->ii_type == IRQT_DEV_EVTCHN)
 926                 return;
 927 
 928         ASSERT(irqp->ii_type != IRQT_UNBOUND);
 929 
 930         evtchn = irq_evtchn(irqp);
 931 
 932         ASSERT(EVTCHN_MASKED(evtchn));
 933         ec_clear_evtchn(evtchn);
 934 }
 935 
 936 void
 937 ec_unmask_irq(int irq)
 938 {
 939         ulong_t flags;
 940         irq_info_t *irqp = &irq_info[irq];
 941 
 942         flags = intr_clear();
 943         switch (irqp->ii_type) {
 944         case IRQT_PIRQ:
 945                 end_pirq(irq);
 946                 break;
 947         case IRQT_DEV_EVTCHN:
 948                 break;
 949         default:
 950                 ec_unmask_evtchn(irq_evtchn(irqp));
 951                 break;
 952         }
 953         intr_restore(flags);
 954 }
 955 
 956 void
 957 ec_try_unmask_irq(int irq)
 958 {
 959         ulong_t flags;
 960         irq_info_t *irqp = &irq_info[irq];
 961         int evtchn;
 962 
 963         flags = intr_clear();
 964         switch (irqp->ii_type) {
 965         case IRQT_PIRQ:
 966                 end_pirq(irq);
 967                 break;
 968         case IRQT_DEV_EVTCHN:
 969                 break;
 970         default:
 971                 if ((evtchn = irq_evtchn(irqp)) != 0)
 972                         ec_unmask_evtchn(evtchn);
 973                 break;
 974         }
 975         intr_restore(flags);
 976 }
 977 
 978 /*
 979  * Poll until an event channel is ready or 'check_func' returns true.  This can
 980  * only be used in a situation where interrupts are masked, otherwise we have a
 981  * classic time-of-check vs. time-of-use race.
 982  */
 983 void
 984 ec_wait_on_evtchn(int evtchn, int (*check_func)(void *), void *arg)
 985 {
 986         if (DOMAIN_IS_INITDOMAIN(xen_info)) {
 987                 while (!check_func(arg))
 988                         (void) HYPERVISOR_yield();
 989                 return;
 990         }
 991 
 992         ASSERT(CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0);
 993 
 994         for (;;) {
 995                 evtchn_port_t ports[1];
 996 
 997                 ports[0] = evtchn;
 998 
 999                 ec_clear_evtchn(evtchn);
1000 
1001                 if (check_func(arg))
1002                         return;
1003 
1004                 (void) HYPERVISOR_poll(ports, 1, 0);
1005         }
1006 }
1007 
1008 void
1009 ec_wait_on_ipi(int ipl, int (*check_func)(void *), void *arg)
1010 {
1011         mec_info_t *ipip = &ipi_info[ipl];
1012 
1013         if (ipip->mi_irq == INVALID_IRQ || ipip->mi_irq == 0)
1014                 return;
1015 
1016         ec_wait_on_evtchn(ipip->mi_evtchns[CPU->cpu_id], check_func, arg);
1017 }
1018 
1019 void
1020 ec_suspend(void)
1021 {
1022         irq_info_t *irqp;
1023         ushort_t *evtchnp;
1024         int i;
1025         int c;
1026 
1027         ASSERT(MUTEX_HELD(&ec_lock));
1028 
1029         for (i = 0; i < MAXIPL; i++) {
1030                 if (ipi_info[i].mi_irq == INVALID_IRQ)
1031                         continue;
1032 
1033                 for (c = 0; c < NCPU; c++) {
1034                         if (cpu[c] == NULL)
1035                                 continue;
1036 
1037                         if (CPU_IN_SET(cpu_suspend_lost_set, c))
1038                                 continue;
1039 
1040                         evtchnp = &ipi_info[i].mi_evtchns[c];
1041                         ASSERT(*evtchnp != 0);
1042                         unbind_evtchn(evtchnp);
1043                 }
1044         }
1045 
1046         for (i = 0; i < NR_VIRQS; i++) {
1047                 if (virq_info[i].mi_irq == INVALID_IRQ)
1048                         continue;
1049 
1050                 /*
1051                  * If we're sharing a single event channel across all CPUs, we
1052                  * should only unbind once.
1053                  */
1054                 if (virq_info[i].mi_shared) {
1055                         evtchnp = &virq_info[i].mi_evtchns[0];
1056                         unbind_evtchn(evtchnp);
1057                         for (c = 1; c < NCPU; c++)
1058                                 virq_info[i].mi_evtchns[c] = 0;
1059                 } else {
1060                         for (c = 0; c < NCPU; c++) {
1061                                 if (cpu[c] == NULL)
1062                                         continue;
1063 
1064                                 evtchnp = &virq_info[i].mi_evtchns[c];
1065                                 if (*evtchnp != 0)
1066                                         unbind_evtchn(evtchnp);
1067                         }
1068                 }
1069         }
1070 
1071         for (i = 0; i < NR_IRQS; i++) {
1072                 irqp = &irq_info[i];
1073 
1074                 switch (irqp->ii_type) {
1075                 case IRQT_EVTCHN:
1076                 case IRQT_DEV_EVTCHN:
1077                         (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
1078                         break;
1079                 case IRQT_PIRQ:
1080                         if (irqp->ii_u.evtchn != 0)
1081                                 (void) HYPERVISOR_shutdown(SHUTDOWN_crash);
1082                         break;
1083                 default:
1084                         break;
1085                 }
1086         }
1087 }
1088 
1089 /*
1090  * The debug irq is special, we only have one evtchn and irq but we allow all
1091  * cpus to service it.  It's marked as shared and we propogate the event
1092  * channel into all CPUs by hand.
1093  */
1094 static void
1095 share_virq(mec_info_t *virqp)
1096 {
1097         int evtchn = virqp->mi_evtchns[0];
1098         cpuset_t tset;
1099         int i;
1100 
1101         ASSERT(evtchn != 0);
1102 
1103         virqp->mi_shared = 1;
1104 
1105         for (i = 1; i < NCPU; i++)
1106                 virqp->mi_evtchns[i] = evtchn;
1107         CPUSET_ALL(tset);
1108         bind_evtchn_to_cpuset(evtchn, tset);
1109 }
1110 
1111 static void
1112 virq_resume(int virq)
1113 {
1114         mec_info_t *virqp = &virq_info[virq];
1115         int evtchn;
1116         int i, err;
1117 
1118         for (i = 0; i < NCPU; i++) {
1119                 cpuset_t tcpus;
1120 
1121                 if (cpu[i] == NULL || CPU_IN_SET(cpu_suspend_lost_set, i))
1122                         continue;
1123 
1124                 err = xen_bind_virq(virq, i, &evtchn);
1125                 ASSERT(err == 0);
1126 
1127                 virqp->mi_evtchns[i] = evtchn;
1128                 evtchn_to_irq[evtchn] = virqp->mi_irq;
1129                 CPUSET_ONLY(tcpus, i);
1130                 bind_evtchn_to_cpuset(evtchn, tcpus);
1131                 ec_unmask_evtchn(evtchn);
1132                 /*
1133                  * only timer VIRQ is bound to all cpus
1134                  */
1135                 if (virq != VIRQ_TIMER)
1136                         break;
1137         }
1138 
1139         if (virqp->mi_shared)
1140                 share_virq(virqp);
1141 }
1142 
1143 static void
1144 ipi_resume(int ipl)
1145 {
1146         mec_info_t *ipip = &ipi_info[ipl];
1147         int i;
1148 
1149         for (i = 0; i < NCPU; i++) {
1150                 cpuset_t tcpus;
1151                 int evtchn;
1152 
1153                 if (cpu[i] == NULL || CPU_IN_SET(cpu_suspend_lost_set, i))
1154                         continue;
1155 
1156                 evtchn = xen_bind_ipi(i);
1157                 ipip->mi_evtchns[i] = evtchn;
1158                 evtchn_to_irq[evtchn] = ipip->mi_irq;
1159                 CPUSET_ONLY(tcpus, i);
1160                 bind_evtchn_to_cpuset(evtchn, tcpus);
1161                 ec_unmask_evtchn(evtchn);
1162         }
1163 }
1164 
1165 void
1166 ec_resume(void)
1167 {
1168         int i;
1169 
1170         /* New event-channel space is not 'live' yet. */
1171         for (i = 0; i < NR_EVENT_CHANNELS; i++)
1172                 (void) ec_mask_evtchn(i);
1173 
1174         for (i = 0; i < MAXIPL; i++) {
1175                 if (ipi_info[i].mi_irq == INVALID_IRQ)
1176                         continue;
1177                 ipi_resume(i);
1178         }
1179 
1180         for (i = 0; i < NR_VIRQS; i++) {
1181                 if (virq_info[i].mi_irq == INVALID_IRQ)
1182                         continue;
1183                 virq_resume(i);
1184         }
1185 }
1186 
1187 int
1188 ec_init(void)
1189 {
1190         int i;
1191         mutex_init(&ec_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL7));
1192 
1193         for (i = 0; i < NR_EVENT_CHANNELS; i++) {
1194                 CPUSET_ZERO(evtchn_cpus[i]);
1195                 evtchn_to_irq[i] = INVALID_IRQ;
1196                 (void) ec_mask_evtchn(i);
1197         }
1198 
1199         for (i = 0; i < MAXIPL; i++)
1200                 ipi_info[i].mi_irq = INVALID_IRQ;
1201 
1202         for (i = 0; i < NR_VIRQS; i++)
1203                 virq_info[i].mi_irq = INVALID_IRQ;
1204 
1205         /*
1206          * Phys IRQ space is statically bound (1:1 mapping), grab the IRQs
1207          * now.
1208          */
1209         for (i = PIRQ_BASE; i < NR_PIRQS; i++) {
1210                 irq_info[PIRQ_TO_IRQ(i)].ii_type = IRQT_PIRQ;
1211         }
1212 
1213         return (0);
1214 }
1215 
1216 void
1217 ec_init_debug_irq()
1218 {
1219         int irq;
1220 
1221         irq = ec_bind_virq_to_irq(VIRQ_DEBUG, 0);
1222         (void) add_avintr(NULL, IPL_DEBUG, (avfunc)xen_debug_handler,
1223             "debug", irq, NULL, NULL, NULL, NULL);
1224 
1225         mutex_enter(&ec_lock);
1226         share_virq(&virq_info[irq_info[irq].ii_u.index]);
1227         mutex_exit(&ec_lock);
1228         ec_debug_irq = irq;
1229 }
1230 
1231 #define UNBLOCKED_EVENTS(si, ix, cpe, cpu_id) \
1232         ((si)->evtchn_pending[ix] & ~(si)->evtchn_mask[ix] & \
1233                 (cpe)->evt_affinity[ix])
1234 
1235 
1236 /*
1237  * This is the entry point for processing events from xen
1238  *
1239  * (See the commentary associated with the shared_info_st structure
1240  * in hypervisor-if.h)
1241  *
1242  * Since the event channel mechanism doesn't really implement the
1243  * concept of priority like hardware interrupt controllers, we simulate
1244  * that in software here using the cpu priority field and the pending
1245  * interrupts field.  Events/interrupts that are not able to be serviced
1246  * now because they are at a lower priority than the current cpu priority
1247  * cause a level bit to be recorded in the pending interrupts word.  When
1248  * the priority is lowered (either by spl or interrupt exit code) the pending
1249  * levels are checked and an upcall is scheduled if there are events/interrupts
1250  * that have become deliverable.
1251  */
1252 void
1253 xen_callback_handler(struct regs *rp, trap_trace_rec_t *ttp)
1254 {
1255         ulong_t pending_sels, pe, selbit;
1256         int i, j, port, pri, curpri, irq, sipri;
1257         uint16_t pending_ints, sip;
1258         struct cpu *cpu = CPU;
1259         volatile shared_info_t *si = HYPERVISOR_shared_info;
1260         volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info;
1261         volatile struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend;
1262         volatile uint16_t *cpu_ipp = &cpu->cpu_m.mcpu_intr_pending;
1263         extern void dosoftint(struct regs *);
1264 
1265         ASSERT(rp->r_trapno == T_AST && rp->r_err == 0);
1266         ASSERT(&si->vcpu_info[cpu->cpu_id] == vci);
1267         ASSERT_STACK_ALIGNED();
1268 
1269         vci->evtchn_upcall_pending = 0;
1270 
1271         /*
1272          * To expedite scanning of pending notifications, any 0->1
1273          * pending transition on an unmasked channel causes a
1274          * corresponding bit in evtchn_pending_sel to be set.
1275          * Each bit in the selector covers a 32-bit word in
1276          * the evtchn_pending[] array.
1277          */
1278         membar_enter();
1279         do {
1280                 pending_sels = vci->evtchn_pending_sel;
1281         } while (atomic_cas_ulong((volatile ulong_t *)&vci->evtchn_pending_sel,
1282             pending_sels, 0) != pending_sels);
1283 
1284         pending_ints = *cpu_ipp;
1285         while ((i = ffs(pending_sels)) != 0) {
1286                 i--;
1287                 selbit = 1ul << i;
1288                 pending_sels &= ~selbit;
1289 
1290                 membar_enter();
1291                 while ((pe = UNBLOCKED_EVENTS(si, i, cpe, cpu->cpu_id)) != 0) {
1292                         j = ffs(pe) - 1;
1293                         pe &= ~(1ul << j);
1294 
1295                         port = (i << EVTCHN_SHIFT) + j;
1296 
1297                         irq = evtchn_to_irq[port];
1298 
1299                         /*
1300                          * If no irq set, just ignore the event.
1301                          * On e.g. netbsd they call evtchn_device_upcall(port)
1302                          * We require the evtchn driver to install a handler
1303                          * so there will be an irq associated with user mode
1304                          * evtchns.
1305                          */
1306                         if (irq == INVALID_IRQ) {
1307                                 ec_clear_evtchn(port);
1308                                 continue;
1309                         }
1310 
1311                         /*
1312                          * If there's no handler, it could be a poke, so just
1313                          * accept the event and continue.
1314                          */
1315                         if (!irq_info[irq].ii_u2.has_handler) {
1316 #ifdef TRAPTRACE
1317                                 ttp->ttr_ipl = 0xff;
1318                                 if (IRQ_IS_CPUPOKE(irq)) {
1319                                         ttp->ttr_ipl = XC_CPUPOKE_PIL;
1320                                         ttp->ttr_marker = TT_INTERRUPT;
1321                                 }
1322                                 ttp->ttr_pri = cpu->cpu_pri;
1323                                 ttp->ttr_spl = cpu->cpu_base_spl;
1324                                 ttp->ttr_vector = 0xff;
1325 #endif /* TRAPTRACE */
1326                                 if (ec_mask_evtchn(port)) {
1327                                         ec_clear_evtchn(port);
1328                                         ec_unmask_evtchn(port);
1329                                         continue;
1330                                 }
1331                         }
1332 
1333                         pri = irq_info[irq].ii_u2.ipl;
1334 
1335                         /*
1336                          * If we are the cpu that successfully masks
1337                          * the event, then record it as a pending event
1338                          * for this cpu to service
1339                          */
1340                         if (ec_mask_evtchn(port)) {
1341                                 if (ec_evtchn_pending(port)) {
1342                                         cpe->pending_sel[pri] |= selbit;
1343                                         cpe->pending_evts[pri][i] |= (1ul << j);
1344                                         pending_ints |= 1 << pri;
1345                                         /*
1346                                          * We have recorded a pending interrupt
1347                                          * for this cpu.  If it is an edge
1348                                          * triggered interrupt then we go ahead
1349                                          * and clear the pending and mask bits
1350                                          * from the shared info to avoid having
1351                                          * the hypervisor see the pending event
1352                                          * again and possibly disabling the
1353                                          * interrupt.  This should also help
1354                                          * keep us from missing an interrupt.
1355                                          */
1356                                         if (ec_is_edge_pirq(irq)) {
1357                                                 ec_clear_evtchn(port);
1358                                                 ec_unmask_evtchn(port);
1359                                         }
1360                                 } else {
1361                                         /*
1362                                          * another cpu serviced this event
1363                                          * before us, clear the mask.
1364                                          */
1365                                         ec_unmask_evtchn(port);
1366                                 }
1367                         }
1368                 }
1369         }
1370         *cpu_ipp = pending_ints;
1371         if (pending_ints == 0)
1372                 return;
1373         /*
1374          * We have gathered all the pending events/interrupts,
1375          * go service all the ones we can from highest priority to lowest.
1376          * Note: This loop may not actually complete and service all
1377          * pending interrupts since one of the interrupt threads may
1378          * block and the pinned thread runs.  In that case, when we
1379          * exit the interrupt thread that blocked we will check for
1380          * any unserviced interrupts and re-post an upcall to process
1381          * any unserviced pending events.
1382          */
1383 restart:
1384         curpri = cpu->cpu_pri;
1385         pri = bsrw_insn(*cpu_ipp);
1386         while (pri > curpri) {
1387                 while ((pending_sels = cpe->pending_sel[pri]) != 0) {
1388                         i = ffs(pending_sels) - 1;
1389                         while ((pe = cpe->pending_evts[pri][i]) != 0) {
1390                                 j = ffs(pe) - 1;
1391                                 port = (i << EVTCHN_SHIFT) + j;
1392                                 pe &= ~(1ul << j);
1393                                 cpe->pending_evts[pri][i] = pe;
1394                                 if (pe == 0) {
1395                                         /*
1396                                          * Must reload pending selector bits
1397                                          * here as they could have changed on
1398                                          * a previous trip around the inner loop
1399                                          * while we were interrupt enabled
1400                                          * in a interrupt service routine.
1401                                          */
1402                                         pending_sels = cpe->pending_sel[pri];
1403                                         pending_sels &= ~(1ul << i);
1404                                         cpe->pending_sel[pri] = pending_sels;
1405                                         if (pending_sels == 0)
1406                                                 *cpu_ipp &= ~(1 << pri);
1407                                 }
1408                                 irq = evtchn_to_irq[port];
1409                                 if (irq == INVALID_IRQ) {
1410                                         /*
1411                                          * No longer a handler for this event
1412                                          * channel.  Clear the event and
1413                                          * ignore it, unmask the event.
1414                                          */
1415                                         ec_clear_evtchn(port);
1416                                         ec_unmask_evtchn(port);
1417                                         continue;
1418                                 }
1419                                 if (irq == ec_dev_irq) {
1420                                         ASSERT(cpu->cpu_m.mcpu_ec_mbox == 0);
1421                                         cpu->cpu_m.mcpu_ec_mbox = port;
1422                                 }
1423                                 /*
1424                                  * Set up the regs struct to
1425                                  * look like a normal hardware int
1426                                  * and do normal interrupt handling.
1427                                  */
1428                                 rp->r_trapno = irq;
1429                                 do_interrupt(rp, ttp);
1430                                 /*
1431                                  * Check for cpu priority change
1432                                  * Can happen if int thread blocks
1433                                  */
1434                                 if (cpu->cpu_pri != curpri)
1435                                         goto restart;
1436                         }
1437                 }
1438                 /*
1439                  * Dispatch any soft interrupts that are
1440                  * higher priority than any hard ones remaining.
1441                  */
1442                 pri = bsrw_insn(*cpu_ipp);
1443                 sip = (uint16_t)cpu->cpu_softinfo.st_pending;
1444                 if (sip != 0) {
1445                         sipri = bsrw_insn(sip);
1446                         if (sipri > pri && sipri > cpu->cpu_pri) {
1447                                 dosoftint(rp);
1448                                 /*
1449                                  * Check for cpu priority change
1450                                  * Can happen if softint thread blocks
1451                                  */
1452                                 if (cpu->cpu_pri != curpri)
1453                                         goto restart;
1454                         }
1455                 }
1456         }
1457         /*
1458          * Deliver any pending soft interrupts.
1459          */
1460         if (cpu->cpu_softinfo.st_pending)
1461                 dosoftint(rp);
1462 }
1463 
1464 
1465 void
1466 ec_unmask_evtchn(unsigned int ev)
1467 {
1468         uint_t evi, evb;
1469         volatile shared_info_t *si = HYPERVISOR_shared_info;
1470         volatile vcpu_info_t *vci = CPU->cpu_m.mcpu_vcpu_info;
1471         volatile ulong_t *ulp;
1472 
1473         ASSERT(!interrupts_enabled());
1474         /*
1475          * Check if we need to take slow path
1476          */
1477         if (!CPU_IN_SET(evtchn_cpus[ev], CPU->cpu_id)) {
1478                 xen_evtchn_unmask(ev);
1479                 return;
1480         }
1481         evi = ev >> EVTCHN_SHIFT;
1482         evb = ev & ((1ul << EVTCHN_SHIFT) - 1);
1483         ulp = (volatile ulong_t *)&si->evtchn_mask[evi];
1484         atomic_and_ulong(ulp, ~(1ul << evb));
1485         /*
1486          * The following is basically the equivalent of
1487          * 'hw_resend_irq'. Just like a real IO-APIC we 'lose the
1488          * interrupt edge' if the channel is masked.
1489          * XXPV - slight race if upcall was about to be set, we may get
1490          * an extra upcall.
1491          */
1492         membar_enter();
1493         if (si->evtchn_pending[evi] & (1ul << evb)) {
1494                 membar_consumer();
1495                 ulp = (volatile ulong_t *)&vci->evtchn_pending_sel;
1496                 if (!(*ulp & (1ul << evi))) {
1497                         atomic_or_ulong(ulp, (1ul << evi));
1498                 }
1499                 vci->evtchn_upcall_pending = 1;
1500         }
1501 }
1502 
1503 /*
1504  * Set a bit in an evtchan mask word, return true if we are the cpu that
1505  * set the bit.
1506  */
1507 int
1508 ec_mask_evtchn(unsigned int ev)
1509 {
1510         uint_t evi, evb;
1511         ulong_t new, old, bit;
1512         volatile shared_info_t *si = HYPERVISOR_shared_info;
1513         volatile ulong_t *maskp;
1514         int masked;
1515 
1516         kpreempt_disable();
1517         evi = ev >> EVTCHN_SHIFT;
1518         evb = ev & ((1ul << EVTCHN_SHIFT) - 1);
1519         bit = 1ul << evb;
1520         maskp = (volatile ulong_t *)&si->evtchn_mask[evi];
1521         do {
1522                 old = si->evtchn_mask[evi];
1523                 new = old | bit;
1524         } while (atomic_cas_ulong(maskp, old, new) != old);
1525         masked = (old & bit) == 0;
1526         if (masked) {
1527                 evtchn_owner[ev] = CPU->cpu_id;
1528 #ifdef DEBUG
1529                 evtchn_owner_thread[ev] = curthread;
1530 #endif
1531         }
1532         kpreempt_enable();
1533         return (masked);
1534 }
1535 
1536 void
1537 ec_clear_evtchn(unsigned int ev)
1538 {
1539         uint_t evi;
1540         shared_info_t *si = HYPERVISOR_shared_info;
1541         volatile ulong_t *pendp;
1542 
1543         evi = ev >> EVTCHN_SHIFT;
1544         ev &= (1ul << EVTCHN_SHIFT) - 1;
1545         pendp = (volatile ulong_t *)&si->evtchn_pending[evi];
1546         atomic_and_ulong(pendp, ~(1ul << ev));
1547 }
1548 
1549 void
1550 ec_notify_via_evtchn(unsigned int port)
1551 {
1552         evtchn_send_t send;
1553 
1554         ASSERT(port != INVALID_EVTCHN);
1555 
1556         send.port = port;
1557         (void) HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
1558 }
1559 
1560 int
1561 ec_block_irq(int irq)
1562 {
1563         irq_info_t *irqp = &irq_info[irq];
1564         int evtchn;
1565 
1566 
1567         evtchn = irq_evtchn(irqp);
1568         (void) ec_mask_evtchn(evtchn);
1569         return (evtchn_owner[evtchn]);
1570 }
1571 
1572 /*
1573  * Make a event that is pending for delivery on the current cpu  "go away"
1574  * without servicing the interrupt.
1575  */
1576 void
1577 ec_unpend_irq(int irq)
1578 {
1579         irq_info_t *irqp = &irq_info[irq];
1580         int pri = irqp->ii_u2.ipl;
1581         ulong_t flags;
1582         uint_t evtchn, evi, bit;
1583         unsigned long pe, pending_sels;
1584         struct xen_evt_data *cpe;
1585 
1586         /*
1587          * The evtchn must be masked
1588          */
1589         evtchn = irq_evtchn(irqp);
1590         ASSERT(EVTCHN_MASKED(evtchn));
1591         evi = evtchn >> EVTCHN_SHIFT;
1592         bit = evtchn & (1ul << EVTCHN_SHIFT) - 1;
1593         flags = intr_clear();
1594         cpe = CPU->cpu_m.mcpu_evt_pend;
1595         pe = cpe->pending_evts[pri][evi] & ~(1ul << bit);
1596         cpe->pending_evts[pri][evi] = pe;
1597         if (pe == 0) {
1598                 pending_sels = cpe->pending_sel[pri];
1599                 pending_sels &= ~(1ul << evi);
1600                 cpe->pending_sel[pri] = pending_sels;
1601                 if (pending_sels == 0)
1602                         CPU->cpu_m.mcpu_intr_pending &= ~(1 << pri);
1603         }
1604         intr_restore(flags);
1605 }