counters.h - OpenGrok cross reference for /kernel/lib/counters/include/lib/counters.h

// Copyright 2017 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT

#pragma once

#include <arch/ops.h>
#include <kernel/atomic.h>
#include <kernel/percpu.h>

#include <zircon/compiler.h>

// Kernel counters are a facility designed to help field diagnostics and
// to help devs properly dimension the load/clients/size of the kernel
// constructs. It answers questions like:
//   - after N seconds how many outstanding <x> things are allocated?
//   - up to this point has <Y> ever happened?
//
// Currently the only query interface to the counters is the console
// k counters command. Issue 'k counters help' to learn what it can do.
//
// Kernel counters public API:
// 1- define a new counter.
//      KCOUNTER(counter_name, "<counter name>");
//      KCOUNTER_MAX(counter_name, "<counter name>");
//
// 2- counters start at zero, increment the counter:
//      kcounter_add(counter_name, 1);
//    or
//      kcounter_max(counter_name, value);
//
// By default with KCOUNTER, the `k counters` presentation will calculate a
// sum() across cores rather than summing. KCOUNTER_MAX() calculates the max()
// of the counters across cores.
//
//
// Naming the counters
// The naming convention is "kernel.subsystem.thing_or_action"
// for example "kernel.dispatcher.destroy"
//             "kernel.exceptions.fpu"
//             "kernel.handles.new"
//
// Reading the counters in code
// Don't. The counters are maintained in a per-cpu arena and atomic
// operations are never used to set their value so they are both
// imprecise and reflect only the operations on a particular core.

enum class k_counter_type : uint64_t {
    sum = 1,
    min = 2,
    max = 3,
};

struct k_counter_desc {
    const char* name;
    k_counter_type type;
};
static_assert(
    sizeof(struct k_counter_desc) == 16,
    "kernel.ld uses this size to ASSERT that enough space has been reserved in the counters arena");

// Define the descriptor and reserve the arena space for the counters.
// Because of -fdata-sections, each kcounter_arena_* array will be
// placed in a .bss.kcounter.* section; kernel.ld recognizes those names
// and places them all together to become the contiguous kcounters_arena
// array.  Note that each kcounter_arena_* does not correspond with the
// slots used for this particular counter (that would have terrible
// cache effects); it just reserves enough space for counters_init() to
// dole out in per-CPU chunks.
#define KCOUNTER(var, name)                                                                        \
    __USED int64_t kcounter_arena_##var[SMP_MAX_CPUS] __asm__("kcounter." name);                   \
    __USED __SECTION("kcountdesc." name) static const struct k_counter_desc var[] = {              \
        {name, k_counter_type::sum}}

#define KCOUNTER_MAX(var, name)                                                                    \
    __USED int64_t kcounter_arena_##var[SMP_MAX_CPUS] __asm__("kcounter." name);                   \
    __USED __SECTION("kcountdesc." name) static const struct k_counter_desc var[] = {              \
        {name, k_counter_type::max}}

// Via magic in kernel.ld, all the descriptors wind up in a contiguous
// array bounded by these two symbols, sorted by name.
extern const struct k_counter_desc kcountdesc_begin[], kcountdesc_end[];

// The order of the descriptors is the order of the slots in each per-cpu array.
static inline size_t kcounter_index(const struct k_counter_desc* var) {
    return var - kcountdesc_begin;
}

// The counter, as named |var| and defined is just an offset into
// per-cpu table, therefore to add an atomic is not required.
static inline int64_t* kcounter_slot(const struct k_counter_desc* var) {
    return &get_local_percpu()->counters[kcounter_index(var)];
}

static inline void kcounter_add(const struct k_counter_desc* var,
                                int64_t add) {
#if defined(__aarch64__)
    // use a relaxed atomic load/store for arm64 to avoid a potentially nasty
    // race between the regular load/store operations on for a +1. Relaxed
    // atomic load/stores are about as efficient as a regular load/store.
    atomic_add_64_relaxed(kcounter_slot(var), add);
#else
    // x86 can do the add in a single non atomic instruction, so the data loss
    // of a preemption in the middle of this sequence is fairly minimal.
    *kcounter_slot(var) += add;
#endif
}

// TODO(travisg|scottmg): Revisit, consider more efficient arm-specific
// instruction sequence here.
static inline void kcounter_max(const struct k_counter_desc* var, int64_t value) {
    int64_t prev_value = atomic_load_64_relaxed(kcounter_slot(var));
    while (prev_value < value && !atomic_cmpxchg_64_relaxed(kcounter_slot(var), &prev_value, value))
        ;
}

static inline void kcounter_max_counter(const struct k_counter_desc* var, const struct k_counter_desc* other_var) {
    kcounter_max(var, *kcounter_slot(other_var));
}