1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright (c) 2018, Joyent, Inc.
  14  */
  15 
  16 /*
  17  * Use a cpu_uarray_t for an array of uint64_t values that are written on a
  18  * per-CPU basis.  We align each CPU on a 128-byte boundary (so two cachelines).
  19  * It's not clear why, but this can have a significant effect in multi-socket
  20  * systems running certain benchmarks on a relatively current Intel system.
  21  *
  22  * So the layout is like this, for example:
  23  *
  24  * 0:   STAT1 for CPU 0
  25  * 8:   STAT2 for CPU 0
  26  * 16:  STAT3 for CPU 0
  27  * 24:  padding
  28  * 128: STAT1 for CPU 1
  29  * 136: STAT2 for CPU 1
  30  * ...
  31  *
  32  * At collection time, cpu_uarray_sum() can be used to sum the given value index
  33  * across all CPUs, or cpu_uarray_sum_all() sums all stats across all CPUs.
  34  * The summation is done such that it saturates at UINT64_MAX.
  35  */
  36 
  37 #ifndef _SYS_CPU_UARRAY_H
  38 #define _SYS_CPU_UARRAY_H
  39 
  40 #include <sys/types.h>
  41 
  42 #ifdef  __cplusplus
  43 extern "C" {
  44 #endif
  45 
  46 #ifdef _KERNEL
  47 
  48 /*
  49  * Trying to include sysmacros.h for P2ROUNDUP() here is just too painful.
  50  */
  51 #define CUA_ROUNDUP(x, align) (-(-(x) & -(align)))
  52 #define CUA_ALIGN (128)
  53 #define CUA_CPU_STRIDE(nr_items) \
  54         CUA_ROUNDUP((nr_items), CUA_ALIGN / sizeof (uint64_t))
  55 #define CUA_INDEX(nr_items, c, i) (((c) * CUA_CPU_STRIDE(nr_items)) + (i))
  56 
  57 #define CPU_UARRAY_VAL(cua, cpu_index, stat_index) \
  58         ((cua)->cu_vals[CUA_INDEX((cua)->cu_nr_items, cpu_index, stat_index)])
  59 
  60 typedef struct {
  61         uint64_t cu_nr_items;
  62         char cu_pad[CUA_ALIGN - sizeof (uint64_t)];
  63 #ifdef  __lint
  64         volatile uint64_t cu_vals[1];
  65 #else
  66         volatile uint64_t cu_vals[];
  67 #endif
  68 } cpu_uarray_t __aligned(CUA_ALIGN);
  69 
  70 extern cpu_uarray_t *cpu_uarray_zalloc(size_t, int);
  71 extern void cpu_uarray_free(cpu_uarray_t *);
  72 extern uint64_t cpu_uarray_sum(cpu_uarray_t *, size_t);
  73 extern uint64_t cpu_uarray_sum_all(cpu_uarray_t *);
  74 
  75 #endif /* _KERNEL */
  76 
  77 #ifdef  __cplusplus
  78 }
  79 #endif
  80 
  81 #endif  /* _SYS_CPU_UARRAY_H */