1 #include <stdio.h>
2 #include <fcntl.h>
3 #include <stdint.h>
4 #include <stdlib.h>
5
6 #include <linux/err.h>
7
8 #include "util/ftrace.h"
9 #include "util/cpumap.h"
10 #include "util/thread_map.h"
11 #include "util/debug.h"
12 #include "util/evlist.h"
13 #include "util/bpf_counter.h"
14 #include "util/stat.h"
15
16 #include "util/bpf_skel/func_latency.skel.h"
17
18 static struct func_latency_bpf *skel;
19
perf_ftrace__latency_prepare_bpf(struct perf_ftrace * ftrace)20 int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
21 {
22 int fd, err;
23 int i, ncpus = 1, ntasks = 1;
24 struct filter_entry *func = NULL;
25
26 if (!list_empty(&ftrace->filters)) {
27 if (!list_is_singular(&ftrace->filters)) {
28 pr_err("ERROR: Too many target functions.\n");
29 return -1;
30 }
31 func = list_first_entry(&ftrace->filters, struct filter_entry, list);
32 } else {
33 int count = 0;
34 struct list_head *pos;
35
36 list_for_each(pos, &ftrace->event_pair)
37 count++;
38
39 if (count != 2) {
40 pr_err("ERROR: Needs two target events.\n");
41 return -1;
42 }
43 }
44
45 skel = func_latency_bpf__open();
46 if (!skel) {
47 pr_err("Failed to open func latency skeleton\n");
48 return -1;
49 }
50
51 skel->rodata->bucket_range = ftrace->bucket_range;
52 skel->rodata->min_latency = ftrace->min_latency;
53 skel->rodata->bucket_num = ftrace->bucket_num;
54 if (ftrace->bucket_range && ftrace->bucket_num) {
55 bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num);
56 }
57
58 /* don't need to set cpu filter for system-wide mode */
59 if (ftrace->target.cpu_list) {
60 ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
61 bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
62 skel->rodata->has_cpu = 1;
63 }
64
65 if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
66 ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
67 bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
68 skel->rodata->has_task = 1;
69 }
70
71 skel->rodata->use_nsec = ftrace->use_nsec;
72
73 set_max_rlimit();
74
75 err = func_latency_bpf__load(skel);
76 if (err) {
77 pr_err("Failed to load func latency skeleton\n");
78 goto out;
79 }
80
81 if (ftrace->target.cpu_list) {
82 u32 cpu;
83 u8 val = 1;
84
85 fd = bpf_map__fd(skel->maps.cpu_filter);
86
87 for (i = 0; i < ncpus; i++) {
88 cpu = perf_cpu_map__cpu(ftrace->evlist->core.user_requested_cpus, i).cpu;
89 bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
90 }
91 }
92
93 if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
94 u32 pid;
95 u8 val = 1;
96
97 fd = bpf_map__fd(skel->maps.task_filter);
98
99 for (i = 0; i < ntasks; i++) {
100 pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
101 bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
102 }
103 }
104
105 skel->bss->min = INT64_MAX;
106
107 if (func) {
108 skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
109 false, func->name);
110 if (IS_ERR(skel->links.func_begin)) {
111 pr_err("Failed to attach fentry program\n");
112 err = PTR_ERR(skel->links.func_begin);
113 goto out;
114 }
115
116 skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
117 true, func->name);
118 if (IS_ERR(skel->links.func_end)) {
119 pr_err("Failed to attach fexit program\n");
120 err = PTR_ERR(skel->links.func_end);
121 goto out;
122 }
123 } else {
124 struct filter_entry *event;
125
126 event = list_first_entry(&ftrace->event_pair, struct filter_entry, list);
127
128 skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin,
129 event->name);
130 if (IS_ERR(skel->links.event_begin)) {
131 pr_err("Failed to attach first tracepoint program\n");
132 err = PTR_ERR(skel->links.event_begin);
133 goto out;
134 }
135
136 event = list_next_entry(event, list);
137
138 skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end,
139 event->name);
140 if (IS_ERR(skel->links.event_end)) {
141 pr_err("Failed to attach second tracepoint program\n");
142 err = PTR_ERR(skel->links.event_end);
143 goto out;
144 }
145 }
146
147 /* XXX: we don't actually use this fd - just for poll() */
148 return open("/dev/null", O_RDONLY);
149
150 out:
151 return err;
152 }
153
perf_ftrace__latency_start_bpf(struct perf_ftrace * ftrace __maybe_unused)154 int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
155 {
156 skel->bss->enabled = 1;
157 return 0;
158 }
159
perf_ftrace__latency_stop_bpf(struct perf_ftrace * ftrace __maybe_unused)160 int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
161 {
162 skel->bss->enabled = 0;
163 return 0;
164 }
165
perf_ftrace__latency_read_bpf(struct perf_ftrace * ftrace,int buckets[],struct stats * stats)166 int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
167 int buckets[], struct stats *stats)
168 {
169 int i, fd, err;
170 u32 idx;
171 u64 *hist;
172 int ncpus = cpu__max_cpu().cpu;
173
174 fd = bpf_map__fd(skel->maps.latency);
175
176 hist = calloc(ncpus, sizeof(*hist));
177 if (hist == NULL)
178 return -ENOMEM;
179
180 for (idx = 0; idx < skel->rodata->bucket_num; idx++) {
181 err = bpf_map_lookup_elem(fd, &idx, hist);
182 if (err) {
183 buckets[idx] = 0;
184 continue;
185 }
186
187 for (i = 0; i < ncpus; i++)
188 buckets[idx] += hist[i];
189 }
190
191 if (skel->bss->count) {
192 stats->mean = skel->bss->total / skel->bss->count;
193 stats->n = skel->bss->count;
194 stats->max = skel->bss->max;
195 stats->min = skel->bss->min;
196
197 if (!ftrace->use_nsec) {
198 stats->mean /= 1000;
199 stats->max /= 1000;
200 stats->min /= 1000;
201 }
202 }
203
204 free(hist);
205 return 0;
206 }
207
perf_ftrace__latency_cleanup_bpf(struct perf_ftrace * ftrace __maybe_unused)208 int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
209 {
210 func_latency_bpf__destroy(skel);
211 return 0;
212 }
213