1 // Copyright 2017 The Fuchsia Authors
2 //
3 // Use of this source code is governed by a MIT-style
4 // license that can be found in the LICENSE file or at
5 // https://opensource.org/licenses/MIT
6 
7 #pragma once
8 
9 #include <arch/ops.h>
10 #include <kernel/atomic.h>
11 #include <kernel/percpu.h>
12 
13 #include <zircon/compiler.h>
14 
15 // Kernel counters are a facility designed to help field diagnostics and
16 // to help devs properly dimension the load/clients/size of the kernel
17 // constructs. It answers questions like:
18 //   - after N seconds how many outstanding <x> things are allocated?
19 //   - up to this point has <Y> ever happened?
20 //
21 // Currently the only query interface to the counters is the console
22 // k counters command. Issue 'k counters help' to learn what it can do.
23 //
24 // Kernel counters public API:
25 // 1- define a new counter.
26 //      KCOUNTER(counter_name, "<counter name>");
27 //      KCOUNTER_MAX(counter_name, "<counter name>");
28 //
29 // 2- counters start at zero, increment the counter:
30 //      kcounter_add(counter_name, 1);
31 //    or
32 //      kcounter_max(counter_name, value);
33 //
34 // By default with KCOUNTER, the `k counters` presentation will calculate a
35 // sum() across cores rather than summing. KCOUNTER_MAX() calculates the max()
36 // of the counters across cores.
37 //
38 //
39 // Naming the counters
40 // The naming convention is "kernel.subsystem.thing_or_action"
41 // for example "kernel.dispatcher.destroy"
42 //             "kernel.exceptions.fpu"
43 //             "kernel.handles.new"
44 //
45 // Reading the counters in code
46 // Don't. The counters are maintained in a per-cpu arena and atomic
47 // operations are never used to set their value so they are both
48 // imprecise and reflect only the operations on a particular core.
49 
50 enum class k_counter_type : uint64_t {
51     sum = 1,
52     min = 2,
53     max = 3,
54 };
55 
56 struct k_counter_desc {
57     const char* name;
58     k_counter_type type;
59 };
60 static_assert(
61     sizeof(struct k_counter_desc) == 16,
62     "kernel.ld uses this size to ASSERT that enough space has been reserved in the counters arena");
63 
64 // Define the descriptor and reserve the arena space for the counters.
65 // Because of -fdata-sections, each kcounter_arena_* array will be
66 // placed in a .bss.kcounter.* section; kernel.ld recognizes those names
67 // and places them all together to become the contiguous kcounters_arena
68 // array.  Note that each kcounter_arena_* does not correspond with the
69 // slots used for this particular counter (that would have terrible
70 // cache effects); it just reserves enough space for counters_init() to
71 // dole out in per-CPU chunks.
72 #define KCOUNTER(var, name)                                                                        \
73     __USED int64_t kcounter_arena_##var[SMP_MAX_CPUS] __asm__("kcounter." name);                   \
74     __USED __SECTION("kcountdesc." name) static const struct k_counter_desc var[] = {              \
75         {name, k_counter_type::sum}}
76 
77 #define KCOUNTER_MAX(var, name)                                                                    \
78     __USED int64_t kcounter_arena_##var[SMP_MAX_CPUS] __asm__("kcounter." name);                   \
79     __USED __SECTION("kcountdesc." name) static const struct k_counter_desc var[] = {              \
80         {name, k_counter_type::max}}
81 
82 // Via magic in kernel.ld, all the descriptors wind up in a contiguous
83 // array bounded by these two symbols, sorted by name.
84 extern const struct k_counter_desc kcountdesc_begin[], kcountdesc_end[];
85 
86 // The order of the descriptors is the order of the slots in each per-cpu array.
kcounter_index(const struct k_counter_desc * var)87 static inline size_t kcounter_index(const struct k_counter_desc* var) {
88     return var - kcountdesc_begin;
89 }
90 
91 // The counter, as named |var| and defined is just an offset into
92 // per-cpu table, therefore to add an atomic is not required.
kcounter_slot(const struct k_counter_desc * var)93 static inline int64_t* kcounter_slot(const struct k_counter_desc* var) {
94     return &get_local_percpu()->counters[kcounter_index(var)];
95 }
96 
kcounter_add(const struct k_counter_desc * var,int64_t add)97 static inline void kcounter_add(const struct k_counter_desc* var,
98                                 int64_t add) {
99 #if defined(__aarch64__)
100     // use a relaxed atomic load/store for arm64 to avoid a potentially nasty
101     // race between the regular load/store operations on for a +1. Relaxed
102     // atomic load/stores are about as efficient as a regular load/store.
103     atomic_add_64_relaxed(kcounter_slot(var), add);
104 #else
105     // x86 can do the add in a single non atomic instruction, so the data loss
106     // of a preemption in the middle of this sequence is fairly minimal.
107     *kcounter_slot(var) += add;
108 #endif
109 }
110 
111 // TODO(travisg|scottmg): Revisit, consider more efficient arm-specific
112 // instruction sequence here.
kcounter_max(const struct k_counter_desc * var,int64_t value)113 static inline void kcounter_max(const struct k_counter_desc* var, int64_t value) {
114     int64_t prev_value = atomic_load_64_relaxed(kcounter_slot(var));
115     while (prev_value < value && !atomic_cmpxchg_64_relaxed(kcounter_slot(var), &prev_value, value))
116         ;
117 }
118 
kcounter_max_counter(const struct k_counter_desc * var,const struct k_counter_desc * other_var)119 static inline void kcounter_max_counter(const struct k_counter_desc* var, const struct k_counter_desc* other_var) {
120     kcounter_max(var, *kcounter_slot(other_var));
121 }
122