1 // Copyright 2017 The Fuchsia Authors
2 //
3 // Use of this source code is governed by a MIT-style
4 // license that can be found in the LICENSE file or at
5 // https://opensource.org/licenses/MIT
6
7 #pragma once
8
9 #include <arch/ops.h>
10 #include <kernel/atomic.h>
11 #include <kernel/percpu.h>
12
13 #include <zircon/compiler.h>
14
15 // Kernel counters are a facility designed to help field diagnostics and
16 // to help devs properly dimension the load/clients/size of the kernel
17 // constructs. It answers questions like:
18 // - after N seconds how many outstanding <x> things are allocated?
19 // - up to this point has <Y> ever happened?
20 //
21 // Currently the only query interface to the counters is the console
22 // k counters command. Issue 'k counters help' to learn what it can do.
23 //
24 // Kernel counters public API:
25 // 1- define a new counter.
26 // KCOUNTER(counter_name, "<counter name>");
27 // KCOUNTER_MAX(counter_name, "<counter name>");
28 //
29 // 2- counters start at zero, increment the counter:
30 // kcounter_add(counter_name, 1);
31 // or
32 // kcounter_max(counter_name, value);
33 //
34 // By default with KCOUNTER, the `k counters` presentation will calculate a
35 // sum() across cores rather than summing. KCOUNTER_MAX() calculates the max()
36 // of the counters across cores.
37 //
38 //
39 // Naming the counters
40 // The naming convention is "kernel.subsystem.thing_or_action"
41 // for example "kernel.dispatcher.destroy"
42 // "kernel.exceptions.fpu"
43 // "kernel.handles.new"
44 //
45 // Reading the counters in code
46 // Don't. The counters are maintained in a per-cpu arena and atomic
47 // operations are never used to set their value so they are both
48 // imprecise and reflect only the operations on a particular core.
49
50 enum class k_counter_type : uint64_t {
51 sum = 1,
52 min = 2,
53 max = 3,
54 };
55
56 struct k_counter_desc {
57 const char* name;
58 k_counter_type type;
59 };
60 static_assert(
61 sizeof(struct k_counter_desc) == 16,
62 "kernel.ld uses this size to ASSERT that enough space has been reserved in the counters arena");
63
64 // Define the descriptor and reserve the arena space for the counters.
65 // Because of -fdata-sections, each kcounter_arena_* array will be
66 // placed in a .bss.kcounter.* section; kernel.ld recognizes those names
67 // and places them all together to become the contiguous kcounters_arena
68 // array. Note that each kcounter_arena_* does not correspond with the
69 // slots used for this particular counter (that would have terrible
70 // cache effects); it just reserves enough space for counters_init() to
71 // dole out in per-CPU chunks.
72 #define KCOUNTER(var, name) \
73 __USED int64_t kcounter_arena_##var[SMP_MAX_CPUS] __asm__("kcounter." name); \
74 __USED __SECTION("kcountdesc." name) static const struct k_counter_desc var[] = { \
75 {name, k_counter_type::sum}}
76
77 #define KCOUNTER_MAX(var, name) \
78 __USED int64_t kcounter_arena_##var[SMP_MAX_CPUS] __asm__("kcounter." name); \
79 __USED __SECTION("kcountdesc." name) static const struct k_counter_desc var[] = { \
80 {name, k_counter_type::max}}
81
82 // Via magic in kernel.ld, all the descriptors wind up in a contiguous
83 // array bounded by these two symbols, sorted by name.
84 extern const struct k_counter_desc kcountdesc_begin[], kcountdesc_end[];
85
86 // The order of the descriptors is the order of the slots in each per-cpu array.
kcounter_index(const struct k_counter_desc * var)87 static inline size_t kcounter_index(const struct k_counter_desc* var) {
88 return var - kcountdesc_begin;
89 }
90
91 // The counter, as named |var| and defined is just an offset into
92 // per-cpu table, therefore to add an atomic is not required.
kcounter_slot(const struct k_counter_desc * var)93 static inline int64_t* kcounter_slot(const struct k_counter_desc* var) {
94 return &get_local_percpu()->counters[kcounter_index(var)];
95 }
96
kcounter_add(const struct k_counter_desc * var,int64_t add)97 static inline void kcounter_add(const struct k_counter_desc* var,
98 int64_t add) {
99 #if defined(__aarch64__)
100 // use a relaxed atomic load/store for arm64 to avoid a potentially nasty
101 // race between the regular load/store operations on for a +1. Relaxed
102 // atomic load/stores are about as efficient as a regular load/store.
103 atomic_add_64_relaxed(kcounter_slot(var), add);
104 #else
105 // x86 can do the add in a single non atomic instruction, so the data loss
106 // of a preemption in the middle of this sequence is fairly minimal.
107 *kcounter_slot(var) += add;
108 #endif
109 }
110
111 // TODO(travisg|scottmg): Revisit, consider more efficient arm-specific
112 // instruction sequence here.
kcounter_max(const struct k_counter_desc * var,int64_t value)113 static inline void kcounter_max(const struct k_counter_desc* var, int64_t value) {
114 int64_t prev_value = atomic_load_64_relaxed(kcounter_slot(var));
115 while (prev_value < value && !atomic_cmpxchg_64_relaxed(kcounter_slot(var), &prev_value, value))
116 ;
117 }
118
kcounter_max_counter(const struct k_counter_desc * var,const struct k_counter_desc * other_var)119 static inline void kcounter_max_counter(const struct k_counter_desc* var, const struct k_counter_desc* other_var) {
120 kcounter_max(var, *kcounter_slot(other_var));
121 }
122