1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Trace raw_syscalls tracepoints to collect system call statistics.
4  */
5 
6 #include "vmlinux.h"
7 #include "syscall_summary.h"
8 
9 #include <bpf/bpf_helpers.h>
10 #include <bpf/bpf_tracing.h>
11 #include <bpf/bpf_core_read.h>
12 
13 /* This is to calculate a delta between sys-enter and sys-exit for each thread */
14 struct syscall_trace {
15 	int nr; /* syscall number is only available at sys-enter */
16 	int unused;
17 	u64 timestamp;
18 };
19 
20 #define MAX_ENTRIES	(128 * 1024)
21 
22 struct syscall_trace_map {
23 	__uint(type, BPF_MAP_TYPE_HASH);
24 	__type(key, int); /* tid */
25 	__type(value, struct syscall_trace);
26 	__uint(max_entries, MAX_ENTRIES);
27 } syscall_trace_map SEC(".maps");
28 
29 struct syscall_stats_map {
30 	__uint(type, BPF_MAP_TYPE_HASH);
31 	__type(key, struct syscall_key);
32 	__type(value, struct syscall_stats);
33 	__uint(max_entries, MAX_ENTRIES);
34 } syscall_stats_map SEC(".maps");
35 
36 int enabled; /* controlled from userspace */
37 
38 const volatile enum syscall_aggr_mode aggr_mode;
39 const volatile int use_cgroup_v2;
40 
41 int perf_subsys_id = -1;
42 
get_current_cgroup_id(void)43 static inline __u64 get_current_cgroup_id(void)
44 {
45 	struct task_struct *task;
46 	struct cgroup *cgrp;
47 
48 	if (use_cgroup_v2)
49 		return bpf_get_current_cgroup_id();
50 
51 	task = bpf_get_current_task_btf();
52 
53 	if (perf_subsys_id == -1) {
54 #if __has_builtin(__builtin_preserve_enum_value)
55 		perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
56 						     perf_event_cgrp_id);
57 #else
58 		perf_subsys_id = perf_event_cgrp_id;
59 #endif
60 	}
61 
62 	cgrp = BPF_CORE_READ(task, cgroups, subsys[perf_subsys_id], cgroup);
63 	return BPF_CORE_READ(cgrp, kn, id);
64 }
65 
update_stats(int cpu_or_tid,u64 cgroup_id,int nr,s64 duration,long ret)66 static void update_stats(int cpu_or_tid, u64 cgroup_id, int nr, s64 duration,
67 			 long ret)
68 {
69 	struct syscall_key key = {
70 		.cpu_or_tid = cpu_or_tid,
71 		.cgroup = cgroup_id,
72 		.nr = nr,
73 	};
74 	struct syscall_stats *stats;
75 
76 	stats = bpf_map_lookup_elem(&syscall_stats_map, &key);
77 	if (stats == NULL) {
78 		struct syscall_stats zero = {};
79 
80 		bpf_map_update_elem(&syscall_stats_map, &key, &zero, BPF_NOEXIST);
81 		stats = bpf_map_lookup_elem(&syscall_stats_map, &key);
82 		if (stats == NULL)
83 			return;
84 	}
85 
86 	__sync_fetch_and_add(&stats->count, 1);
87 	if (ret < 0)
88 		__sync_fetch_and_add(&stats->error, 1);
89 
90 	if (duration > 0) {
91 		__sync_fetch_and_add(&stats->total_time, duration);
92 		__sync_fetch_and_add(&stats->squared_sum, duration * duration);
93 		if (stats->max_time < duration)
94 			stats->max_time = duration;
95 		if (stats->min_time > duration || stats->min_time == 0)
96 			stats->min_time = duration;
97 	}
98 
99 	return;
100 }
101 
102 SEC("tp_btf/sys_enter")
sys_enter(u64 * ctx)103 int sys_enter(u64 *ctx)
104 {
105 	int tid;
106 	struct syscall_trace st;
107 
108 	if (!enabled)
109 		return 0;
110 
111 	st.nr = ctx[1]; /* syscall number */
112 	st.unused = 0;
113 	st.timestamp = bpf_ktime_get_ns();
114 
115 	tid = bpf_get_current_pid_tgid();
116 	bpf_map_update_elem(&syscall_trace_map, &tid, &st, BPF_ANY);
117 
118 	return 0;
119 }
120 
121 SEC("tp_btf/sys_exit")
sys_exit(u64 * ctx)122 int sys_exit(u64 *ctx)
123 {
124 	int tid;
125 	int key = 0;
126 	u64 cgroup = 0;
127 	long ret = ctx[1]; /* return value of the syscall */
128 	struct syscall_trace *st;
129 	s64 delta;
130 
131 	if (!enabled)
132 		return 0;
133 
134 	tid = bpf_get_current_pid_tgid();
135 	st = bpf_map_lookup_elem(&syscall_trace_map, &tid);
136 	if (st == NULL)
137 		return 0;
138 
139 	if (aggr_mode == SYSCALL_AGGR_THREAD)
140 		key = tid;
141 	else if (aggr_mode == SYSCALL_AGGR_CGROUP)
142 		cgroup = get_current_cgroup_id();
143 	else
144 		key = bpf_get_smp_processor_id();
145 
146 	delta = bpf_ktime_get_ns() - st->timestamp;
147 	update_stats(key, cgroup, st->nr, delta, ret);
148 
149 	bpf_map_delete_elem(&syscall_trace_map, &tid);
150 	return 0;
151 }
152 
153 char _license[] SEC("license") = "GPL";
154