1 #include <stdio.h>
2 #include <fcntl.h>
3 #include <stdint.h>
4 #include <stdlib.h>
5 
6 #include <linux/err.h>
7 
8 #include "util/ftrace.h"
9 #include "util/cpumap.h"
10 #include "util/thread_map.h"
11 #include "util/debug.h"
12 #include "util/evlist.h"
13 #include "util/bpf_counter.h"
14 #include "util/stat.h"
15 
16 #include "util/bpf_skel/func_latency.skel.h"
17 
18 static struct func_latency_bpf *skel;
19 
perf_ftrace__latency_prepare_bpf(struct perf_ftrace * ftrace)20 int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
21 {
22 	int fd, err;
23 	int i, ncpus = 1, ntasks = 1;
24 	struct filter_entry *func = NULL;
25 
26 	if (!list_empty(&ftrace->filters)) {
27 		if (!list_is_singular(&ftrace->filters)) {
28 			pr_err("ERROR: Too many target functions.\n");
29 			return -1;
30 		}
31 		func = list_first_entry(&ftrace->filters, struct filter_entry, list);
32 	} else {
33 		int count = 0;
34 		struct list_head *pos;
35 
36 		list_for_each(pos, &ftrace->event_pair)
37 			count++;
38 
39 		if (count != 2) {
40 			pr_err("ERROR: Needs two target events.\n");
41 			return -1;
42 		}
43 	}
44 
45 	skel = func_latency_bpf__open();
46 	if (!skel) {
47 		pr_err("Failed to open func latency skeleton\n");
48 		return -1;
49 	}
50 
51 	skel->rodata->bucket_range = ftrace->bucket_range;
52 	skel->rodata->min_latency = ftrace->min_latency;
53 	skel->rodata->bucket_num = ftrace->bucket_num;
54 	if (ftrace->bucket_range && ftrace->bucket_num) {
55 		bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num);
56 	}
57 
58 	/* don't need to set cpu filter for system-wide mode */
59 	if (ftrace->target.cpu_list) {
60 		ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
61 		bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
62 		skel->rodata->has_cpu = 1;
63 	}
64 
65 	if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
66 		ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
67 		bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
68 		skel->rodata->has_task = 1;
69 	}
70 
71 	skel->rodata->use_nsec = ftrace->use_nsec;
72 
73 	set_max_rlimit();
74 
75 	err = func_latency_bpf__load(skel);
76 	if (err) {
77 		pr_err("Failed to load func latency skeleton\n");
78 		goto out;
79 	}
80 
81 	if (ftrace->target.cpu_list) {
82 		u32 cpu;
83 		u8 val = 1;
84 
85 		fd = bpf_map__fd(skel->maps.cpu_filter);
86 
87 		for (i = 0; i < ncpus; i++) {
88 			cpu = perf_cpu_map__cpu(ftrace->evlist->core.user_requested_cpus, i).cpu;
89 			bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
90 		}
91 	}
92 
93 	if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
94 		u32 pid;
95 		u8 val = 1;
96 
97 		fd = bpf_map__fd(skel->maps.task_filter);
98 
99 		for (i = 0; i < ntasks; i++) {
100 			pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
101 			bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
102 		}
103 	}
104 
105 	skel->bss->min = INT64_MAX;
106 
107 	if (func) {
108 		skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
109 								    false, func->name);
110 		if (IS_ERR(skel->links.func_begin)) {
111 			pr_err("Failed to attach fentry program\n");
112 			err = PTR_ERR(skel->links.func_begin);
113 			goto out;
114 		}
115 
116 		skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
117 								  true, func->name);
118 		if (IS_ERR(skel->links.func_end)) {
119 			pr_err("Failed to attach fexit program\n");
120 			err = PTR_ERR(skel->links.func_end);
121 			goto out;
122 		}
123 	} else {
124 		struct filter_entry *event;
125 
126 		event = list_first_entry(&ftrace->event_pair, struct filter_entry, list);
127 
128 		skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin,
129 									     event->name);
130 		if (IS_ERR(skel->links.event_begin)) {
131 			pr_err("Failed to attach first tracepoint program\n");
132 			err = PTR_ERR(skel->links.event_begin);
133 			goto out;
134 		}
135 
136 		event = list_next_entry(event, list);
137 
138 		skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end,
139 									     event->name);
140 		if (IS_ERR(skel->links.event_end)) {
141 			pr_err("Failed to attach second tracepoint program\n");
142 			err = PTR_ERR(skel->links.event_end);
143 			goto out;
144 		}
145 	}
146 
147 	/* XXX: we don't actually use this fd - just for poll() */
148 	return open("/dev/null", O_RDONLY);
149 
150 out:
151 	return err;
152 }
153 
perf_ftrace__latency_start_bpf(struct perf_ftrace * ftrace __maybe_unused)154 int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
155 {
156 	skel->bss->enabled = 1;
157 	return 0;
158 }
159 
perf_ftrace__latency_stop_bpf(struct perf_ftrace * ftrace __maybe_unused)160 int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
161 {
162 	skel->bss->enabled = 0;
163 	return 0;
164 }
165 
perf_ftrace__latency_read_bpf(struct perf_ftrace * ftrace,int buckets[],struct stats * stats)166 int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
167 				  int buckets[], struct stats *stats)
168 {
169 	int i, fd, err;
170 	u32 idx;
171 	u64 *hist;
172 	int ncpus = cpu__max_cpu().cpu;
173 
174 	fd = bpf_map__fd(skel->maps.latency);
175 
176 	hist = calloc(ncpus, sizeof(*hist));
177 	if (hist == NULL)
178 		return -ENOMEM;
179 
180 	for (idx = 0; idx < skel->rodata->bucket_num; idx++) {
181 		err = bpf_map_lookup_elem(fd, &idx, hist);
182 		if (err) {
183 			buckets[idx] = 0;
184 			continue;
185 		}
186 
187 		for (i = 0; i < ncpus; i++)
188 			buckets[idx] += hist[i];
189 	}
190 
191 	if (skel->bss->count) {
192 		stats->mean = skel->bss->total / skel->bss->count;
193 		stats->n = skel->bss->count;
194 		stats->max = skel->bss->max;
195 		stats->min = skel->bss->min;
196 
197 		if (!ftrace->use_nsec) {
198 			stats->mean /= 1000;
199 			stats->max /= 1000;
200 			stats->min /= 1000;
201 		}
202 	}
203 
204 	free(hist);
205 	return 0;
206 }
207 
perf_ftrace__latency_cleanup_bpf(struct perf_ftrace * ftrace __maybe_unused)208 int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
209 {
210 	func_latency_bpf__destroy(skel);
211 	return 0;
212 }
213