1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Arm Statistical Profiling Extensions (SPE) support
4 * Copyright (c) 2017-2018, Arm Ltd.
5 */
6
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tsc.h"
30 #include "tool.h"
31 #include "util/synthetic-events.h"
32
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36
37 #define MAX_TIMESTAMP (~0ULL)
38
39 struct arm_spe {
40 struct auxtrace auxtrace;
41 struct auxtrace_queues queues;
42 struct auxtrace_heap heap;
43 struct itrace_synth_opts synth_opts;
44 u32 auxtrace_type;
45 struct perf_session *session;
46 struct machine *machine;
47 u32 pmu_type;
48
49 struct perf_tsc_conversion tc;
50
51 u8 timeless_decoding;
52 u8 data_queued;
53
54 u64 sample_type;
55 u8 sample_flc;
56 u8 sample_llc;
57 u8 sample_tlb;
58 u8 sample_branch;
59 u8 sample_remote_access;
60 u8 sample_memory;
61
62 u64 l1d_miss_id;
63 u64 l1d_access_id;
64 u64 llc_miss_id;
65 u64 llc_access_id;
66 u64 tlb_miss_id;
67 u64 tlb_access_id;
68 u64 branch_miss_id;
69 u64 remote_access_id;
70 u64 memory_id;
71
72 u64 kernel_start;
73
74 unsigned long num_events;
75 u8 use_ctx_pkt_for_pid;
76 };
77
78 struct arm_spe_queue {
79 struct arm_spe *spe;
80 unsigned int queue_nr;
81 struct auxtrace_buffer *buffer;
82 struct auxtrace_buffer *old_buffer;
83 union perf_event *event_buf;
84 bool on_heap;
85 bool done;
86 pid_t pid;
87 pid_t tid;
88 int cpu;
89 struct arm_spe_decoder *decoder;
90 u64 time;
91 u64 timestamp;
92 struct thread *thread;
93 };
94
arm_spe_dump(struct arm_spe * spe __maybe_unused,unsigned char * buf,size_t len)95 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
96 unsigned char *buf, size_t len)
97 {
98 struct arm_spe_pkt packet;
99 size_t pos = 0;
100 int ret, pkt_len, i;
101 char desc[ARM_SPE_PKT_DESC_MAX];
102 const char *color = PERF_COLOR_BLUE;
103
104 color_fprintf(stdout, color,
105 ". ... ARM SPE data: size %#zx bytes\n",
106 len);
107
108 while (len) {
109 ret = arm_spe_get_packet(buf, len, &packet);
110 if (ret > 0)
111 pkt_len = ret;
112 else
113 pkt_len = 1;
114 printf(".");
115 color_fprintf(stdout, color, " %08x: ", pos);
116 for (i = 0; i < pkt_len; i++)
117 color_fprintf(stdout, color, " %02x", buf[i]);
118 for (; i < 16; i++)
119 color_fprintf(stdout, color, " ");
120 if (ret > 0) {
121 ret = arm_spe_pkt_desc(&packet, desc,
122 ARM_SPE_PKT_DESC_MAX);
123 if (!ret)
124 color_fprintf(stdout, color, " %s\n", desc);
125 } else {
126 color_fprintf(stdout, color, " Bad packet!\n");
127 }
128 pos += pkt_len;
129 buf += pkt_len;
130 len -= pkt_len;
131 }
132 }
133
arm_spe_dump_event(struct arm_spe * spe,unsigned char * buf,size_t len)134 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
135 size_t len)
136 {
137 printf(".\n");
138 arm_spe_dump(spe, buf, len);
139 }
140
arm_spe_get_trace(struct arm_spe_buffer * b,void * data)141 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
142 {
143 struct arm_spe_queue *speq = data;
144 struct auxtrace_buffer *buffer = speq->buffer;
145 struct auxtrace_buffer *old_buffer = speq->old_buffer;
146 struct auxtrace_queue *queue;
147
148 queue = &speq->spe->queues.queue_array[speq->queue_nr];
149
150 buffer = auxtrace_buffer__next(queue, buffer);
151 /* If no more data, drop the previous auxtrace_buffer and return */
152 if (!buffer) {
153 if (old_buffer)
154 auxtrace_buffer__drop_data(old_buffer);
155 b->len = 0;
156 return 0;
157 }
158
159 speq->buffer = buffer;
160
161 /* If the aux_buffer doesn't have data associated, try to load it */
162 if (!buffer->data) {
163 /* get the file desc associated with the perf data file */
164 int fd = perf_data__fd(speq->spe->session->data);
165
166 buffer->data = auxtrace_buffer__get_data(buffer, fd);
167 if (!buffer->data)
168 return -ENOMEM;
169 }
170
171 b->len = buffer->size;
172 b->buf = buffer->data;
173
174 if (b->len) {
175 if (old_buffer)
176 auxtrace_buffer__drop_data(old_buffer);
177 speq->old_buffer = buffer;
178 } else {
179 auxtrace_buffer__drop_data(buffer);
180 return arm_spe_get_trace(b, data);
181 }
182
183 return 0;
184 }
185
arm_spe__alloc_queue(struct arm_spe * spe,unsigned int queue_nr)186 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
187 unsigned int queue_nr)
188 {
189 struct arm_spe_params params = { .get_trace = 0, };
190 struct arm_spe_queue *speq;
191
192 speq = zalloc(sizeof(*speq));
193 if (!speq)
194 return NULL;
195
196 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
197 if (!speq->event_buf)
198 goto out_free;
199
200 speq->spe = spe;
201 speq->queue_nr = queue_nr;
202 speq->pid = -1;
203 speq->tid = -1;
204 speq->cpu = -1;
205
206 /* params set */
207 params.get_trace = arm_spe_get_trace;
208 params.data = speq;
209
210 /* create new decoder */
211 speq->decoder = arm_spe_decoder_new(¶ms);
212 if (!speq->decoder)
213 goto out_free;
214
215 return speq;
216
217 out_free:
218 zfree(&speq->event_buf);
219 free(speq);
220
221 return NULL;
222 }
223
arm_spe_cpumode(struct arm_spe * spe,u64 ip)224 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
225 {
226 return ip >= spe->kernel_start ?
227 PERF_RECORD_MISC_KERNEL :
228 PERF_RECORD_MISC_USER;
229 }
230
arm_spe_set_pid_tid_cpu(struct arm_spe * spe,struct auxtrace_queue * queue)231 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
232 struct auxtrace_queue *queue)
233 {
234 struct arm_spe_queue *speq = queue->priv;
235 pid_t tid;
236
237 tid = machine__get_current_tid(spe->machine, speq->cpu);
238 if (tid != -1) {
239 speq->tid = tid;
240 thread__zput(speq->thread);
241 } else
242 speq->tid = queue->tid;
243
244 if ((!speq->thread) && (speq->tid != -1)) {
245 speq->thread = machine__find_thread(spe->machine, -1,
246 speq->tid);
247 }
248
249 if (speq->thread) {
250 speq->pid = speq->thread->pid_;
251 if (queue->cpu == -1)
252 speq->cpu = speq->thread->cpu;
253 }
254 }
255
arm_spe_set_tid(struct arm_spe_queue * speq,pid_t tid)256 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
257 {
258 struct arm_spe *spe = speq->spe;
259 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
260
261 if (err)
262 return err;
263
264 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
265
266 return 0;
267 }
268
arm_spe_prep_sample(struct arm_spe * spe,struct arm_spe_queue * speq,union perf_event * event,struct perf_sample * sample)269 static void arm_spe_prep_sample(struct arm_spe *spe,
270 struct arm_spe_queue *speq,
271 union perf_event *event,
272 struct perf_sample *sample)
273 {
274 struct arm_spe_record *record = &speq->decoder->record;
275
276 if (!spe->timeless_decoding)
277 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
278
279 sample->ip = record->from_ip;
280 sample->cpumode = arm_spe_cpumode(spe, sample->ip);
281 sample->pid = speq->pid;
282 sample->tid = speq->tid;
283 sample->period = 1;
284 sample->cpu = speq->cpu;
285
286 event->sample.header.type = PERF_RECORD_SAMPLE;
287 event->sample.header.misc = sample->cpumode;
288 event->sample.header.size = sizeof(struct perf_event_header);
289 }
290
arm_spe__inject_event(union perf_event * event,struct perf_sample * sample,u64 type)291 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
292 {
293 event->header.size = perf_event__sample_event_size(sample, type, 0);
294 return perf_event__synthesize_sample(event, type, 0, sample);
295 }
296
297 static inline int
arm_spe_deliver_synth_event(struct arm_spe * spe,struct arm_spe_queue * speq __maybe_unused,union perf_event * event,struct perf_sample * sample)298 arm_spe_deliver_synth_event(struct arm_spe *spe,
299 struct arm_spe_queue *speq __maybe_unused,
300 union perf_event *event,
301 struct perf_sample *sample)
302 {
303 int ret;
304
305 if (spe->synth_opts.inject) {
306 ret = arm_spe__inject_event(event, sample, spe->sample_type);
307 if (ret)
308 return ret;
309 }
310
311 ret = perf_session__deliver_synth_event(spe->session, event, sample);
312 if (ret)
313 pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
314
315 return ret;
316 }
317
arm_spe__synth_mem_sample(struct arm_spe_queue * speq,u64 spe_events_id,u64 data_src)318 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
319 u64 spe_events_id, u64 data_src)
320 {
321 struct arm_spe *spe = speq->spe;
322 struct arm_spe_record *record = &speq->decoder->record;
323 union perf_event *event = speq->event_buf;
324 struct perf_sample sample = { .ip = 0, };
325
326 arm_spe_prep_sample(spe, speq, event, &sample);
327
328 sample.id = spe_events_id;
329 sample.stream_id = spe_events_id;
330 sample.addr = record->virt_addr;
331 sample.phys_addr = record->phys_addr;
332 sample.data_src = data_src;
333
334 return arm_spe_deliver_synth_event(spe, speq, event, &sample);
335 }
336
arm_spe__synth_branch_sample(struct arm_spe_queue * speq,u64 spe_events_id)337 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
338 u64 spe_events_id)
339 {
340 struct arm_spe *spe = speq->spe;
341 struct arm_spe_record *record = &speq->decoder->record;
342 union perf_event *event = speq->event_buf;
343 struct perf_sample sample = { .ip = 0, };
344
345 arm_spe_prep_sample(spe, speq, event, &sample);
346
347 sample.id = spe_events_id;
348 sample.stream_id = spe_events_id;
349 sample.addr = record->to_ip;
350
351 return arm_spe_deliver_synth_event(spe, speq, event, &sample);
352 }
353
354 #define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
355 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
356 ARM_SPE_REMOTE_ACCESS)
357
arm_spe__is_memory_event(enum arm_spe_sample_type type)358 static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
359 {
360 if (type & SPE_MEM_TYPE)
361 return true;
362
363 return false;
364 }
365
arm_spe__synth_data_source(const struct arm_spe_record * record)366 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
367 {
368 union perf_mem_data_src data_src = { 0 };
369
370 if (record->op == ARM_SPE_LD)
371 data_src.mem_op = PERF_MEM_OP_LOAD;
372 else
373 data_src.mem_op = PERF_MEM_OP_STORE;
374
375 if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
376 data_src.mem_lvl = PERF_MEM_LVL_L3;
377
378 if (record->type & ARM_SPE_LLC_MISS)
379 data_src.mem_lvl |= PERF_MEM_LVL_MISS;
380 else
381 data_src.mem_lvl |= PERF_MEM_LVL_HIT;
382 } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
383 data_src.mem_lvl = PERF_MEM_LVL_L1;
384
385 if (record->type & ARM_SPE_L1D_MISS)
386 data_src.mem_lvl |= PERF_MEM_LVL_MISS;
387 else
388 data_src.mem_lvl |= PERF_MEM_LVL_HIT;
389 }
390
391 if (record->type & ARM_SPE_REMOTE_ACCESS)
392 data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
393
394 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
395 data_src.mem_dtlb = PERF_MEM_TLB_WK;
396
397 if (record->type & ARM_SPE_TLB_MISS)
398 data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
399 else
400 data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
401 }
402
403 return data_src.val;
404 }
405
arm_spe_sample(struct arm_spe_queue * speq)406 static int arm_spe_sample(struct arm_spe_queue *speq)
407 {
408 const struct arm_spe_record *record = &speq->decoder->record;
409 struct arm_spe *spe = speq->spe;
410 u64 data_src;
411 int err;
412
413 data_src = arm_spe__synth_data_source(record);
414
415 if (spe->sample_flc) {
416 if (record->type & ARM_SPE_L1D_MISS) {
417 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
418 data_src);
419 if (err)
420 return err;
421 }
422
423 if (record->type & ARM_SPE_L1D_ACCESS) {
424 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
425 data_src);
426 if (err)
427 return err;
428 }
429 }
430
431 if (spe->sample_llc) {
432 if (record->type & ARM_SPE_LLC_MISS) {
433 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
434 data_src);
435 if (err)
436 return err;
437 }
438
439 if (record->type & ARM_SPE_LLC_ACCESS) {
440 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
441 data_src);
442 if (err)
443 return err;
444 }
445 }
446
447 if (spe->sample_tlb) {
448 if (record->type & ARM_SPE_TLB_MISS) {
449 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
450 data_src);
451 if (err)
452 return err;
453 }
454
455 if (record->type & ARM_SPE_TLB_ACCESS) {
456 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
457 data_src);
458 if (err)
459 return err;
460 }
461 }
462
463 if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
464 err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
465 if (err)
466 return err;
467 }
468
469 if (spe->sample_remote_access &&
470 (record->type & ARM_SPE_REMOTE_ACCESS)) {
471 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
472 data_src);
473 if (err)
474 return err;
475 }
476
477 if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
478 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
479 if (err)
480 return err;
481 }
482
483 return 0;
484 }
485
arm_spe_run_decoder(struct arm_spe_queue * speq,u64 * timestamp)486 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
487 {
488 struct arm_spe *spe = speq->spe;
489 struct arm_spe_record *record;
490 int ret;
491
492 if (!spe->kernel_start)
493 spe->kernel_start = machine__kernel_start(spe->machine);
494
495 while (1) {
496 /*
497 * The usual logic is firstly to decode the packets, and then
498 * based the record to synthesize sample; but here the flow is
499 * reversed: it calls arm_spe_sample() for synthesizing samples
500 * prior to arm_spe_decode().
501 *
502 * Two reasons for this code logic:
503 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
504 * has decoded trace data and generated a record, but the record
505 * is left to generate sample until run to here, so it's correct
506 * to synthesize sample for the left record.
507 * 2. After decoding trace data, it needs to compare the record
508 * timestamp with the coming perf event, if the record timestamp
509 * is later than the perf event, it needs bail out and pushs the
510 * record into auxtrace heap, thus the record can be deferred to
511 * synthesize sample until run to here at the next time; so this
512 * can correlate samples between Arm SPE trace data and other
513 * perf events with correct time ordering.
514 */
515
516 /*
517 * Update pid/tid info.
518 */
519 record = &speq->decoder->record;
520 if (!spe->timeless_decoding && record->context_id != (u64)-1) {
521 ret = arm_spe_set_tid(speq, record->context_id);
522 if (ret)
523 return ret;
524
525 spe->use_ctx_pkt_for_pid = true;
526 }
527
528 ret = arm_spe_sample(speq);
529 if (ret)
530 return ret;
531
532 ret = arm_spe_decode(speq->decoder);
533 if (!ret) {
534 pr_debug("No data or all data has been processed.\n");
535 return 1;
536 }
537
538 /*
539 * Error is detected when decode SPE trace data, continue to
540 * the next trace data and find out more records.
541 */
542 if (ret < 0)
543 continue;
544
545 record = &speq->decoder->record;
546
547 /* Update timestamp for the last record */
548 if (record->timestamp > speq->timestamp)
549 speq->timestamp = record->timestamp;
550
551 /*
552 * If the timestamp of the queue is later than timestamp of the
553 * coming perf event, bail out so can allow the perf event to
554 * be processed ahead.
555 */
556 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
557 *timestamp = speq->timestamp;
558 return 0;
559 }
560 }
561
562 return 0;
563 }
564
arm_spe__setup_queue(struct arm_spe * spe,struct auxtrace_queue * queue,unsigned int queue_nr)565 static int arm_spe__setup_queue(struct arm_spe *spe,
566 struct auxtrace_queue *queue,
567 unsigned int queue_nr)
568 {
569 struct arm_spe_queue *speq = queue->priv;
570 struct arm_spe_record *record;
571
572 if (list_empty(&queue->head) || speq)
573 return 0;
574
575 speq = arm_spe__alloc_queue(spe, queue_nr);
576
577 if (!speq)
578 return -ENOMEM;
579
580 queue->priv = speq;
581
582 if (queue->cpu != -1)
583 speq->cpu = queue->cpu;
584
585 if (!speq->on_heap) {
586 int ret;
587
588 if (spe->timeless_decoding)
589 return 0;
590
591 retry:
592 ret = arm_spe_decode(speq->decoder);
593
594 if (!ret)
595 return 0;
596
597 if (ret < 0)
598 goto retry;
599
600 record = &speq->decoder->record;
601
602 speq->timestamp = record->timestamp;
603 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
604 if (ret)
605 return ret;
606 speq->on_heap = true;
607 }
608
609 return 0;
610 }
611
arm_spe__setup_queues(struct arm_spe * spe)612 static int arm_spe__setup_queues(struct arm_spe *spe)
613 {
614 unsigned int i;
615 int ret;
616
617 for (i = 0; i < spe->queues.nr_queues; i++) {
618 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
619 if (ret)
620 return ret;
621 }
622
623 return 0;
624 }
625
arm_spe__update_queues(struct arm_spe * spe)626 static int arm_spe__update_queues(struct arm_spe *spe)
627 {
628 if (spe->queues.new_data) {
629 spe->queues.new_data = false;
630 return arm_spe__setup_queues(spe);
631 }
632
633 return 0;
634 }
635
arm_spe__is_timeless_decoding(struct arm_spe * spe)636 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
637 {
638 struct evsel *evsel;
639 struct evlist *evlist = spe->session->evlist;
640 bool timeless_decoding = true;
641
642 /*
643 * Circle through the list of event and complain if we find one
644 * with the time bit set.
645 */
646 evlist__for_each_entry(evlist, evsel) {
647 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
648 timeless_decoding = false;
649 }
650
651 return timeless_decoding;
652 }
653
arm_spe_process_queues(struct arm_spe * spe,u64 timestamp)654 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
655 {
656 unsigned int queue_nr;
657 u64 ts;
658 int ret;
659
660 while (1) {
661 struct auxtrace_queue *queue;
662 struct arm_spe_queue *speq;
663
664 if (!spe->heap.heap_cnt)
665 return 0;
666
667 if (spe->heap.heap_array[0].ordinal >= timestamp)
668 return 0;
669
670 queue_nr = spe->heap.heap_array[0].queue_nr;
671 queue = &spe->queues.queue_array[queue_nr];
672 speq = queue->priv;
673
674 auxtrace_heap__pop(&spe->heap);
675
676 if (spe->heap.heap_cnt) {
677 ts = spe->heap.heap_array[0].ordinal + 1;
678 if (ts > timestamp)
679 ts = timestamp;
680 } else {
681 ts = timestamp;
682 }
683
684 /*
685 * A previous context-switch event has set pid/tid in the machine's context, so
686 * here we need to update the pid/tid in the thread and SPE queue.
687 */
688 if (!spe->use_ctx_pkt_for_pid)
689 arm_spe_set_pid_tid_cpu(spe, queue);
690
691 ret = arm_spe_run_decoder(speq, &ts);
692 if (ret < 0) {
693 auxtrace_heap__add(&spe->heap, queue_nr, ts);
694 return ret;
695 }
696
697 if (!ret) {
698 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
699 if (ret < 0)
700 return ret;
701 } else {
702 speq->on_heap = false;
703 }
704 }
705
706 return 0;
707 }
708
arm_spe_process_timeless_queues(struct arm_spe * spe,pid_t tid,u64 time_)709 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
710 u64 time_)
711 {
712 struct auxtrace_queues *queues = &spe->queues;
713 unsigned int i;
714 u64 ts = 0;
715
716 for (i = 0; i < queues->nr_queues; i++) {
717 struct auxtrace_queue *queue = &spe->queues.queue_array[i];
718 struct arm_spe_queue *speq = queue->priv;
719
720 if (speq && (tid == -1 || speq->tid == tid)) {
721 speq->time = time_;
722 arm_spe_set_pid_tid_cpu(spe, queue);
723 arm_spe_run_decoder(speq, &ts);
724 }
725 }
726 return 0;
727 }
728
arm_spe_context_switch(struct arm_spe * spe,union perf_event * event,struct perf_sample * sample)729 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
730 struct perf_sample *sample)
731 {
732 pid_t pid, tid;
733 int cpu;
734
735 if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
736 return 0;
737
738 pid = event->context_switch.next_prev_pid;
739 tid = event->context_switch.next_prev_tid;
740 cpu = sample->cpu;
741
742 if (tid == -1)
743 pr_warning("context_switch event has no tid\n");
744
745 return machine__set_current_tid(spe->machine, cpu, pid, tid);
746 }
747
arm_spe_process_event(struct perf_session * session,union perf_event * event,struct perf_sample * sample,struct perf_tool * tool)748 static int arm_spe_process_event(struct perf_session *session,
749 union perf_event *event,
750 struct perf_sample *sample,
751 struct perf_tool *tool)
752 {
753 int err = 0;
754 u64 timestamp;
755 struct arm_spe *spe = container_of(session->auxtrace,
756 struct arm_spe, auxtrace);
757
758 if (dump_trace)
759 return 0;
760
761 if (!tool->ordered_events) {
762 pr_err("SPE trace requires ordered events\n");
763 return -EINVAL;
764 }
765
766 if (sample->time && (sample->time != (u64) -1))
767 timestamp = perf_time_to_tsc(sample->time, &spe->tc);
768 else
769 timestamp = 0;
770
771 if (timestamp || spe->timeless_decoding) {
772 err = arm_spe__update_queues(spe);
773 if (err)
774 return err;
775 }
776
777 if (spe->timeless_decoding) {
778 if (event->header.type == PERF_RECORD_EXIT) {
779 err = arm_spe_process_timeless_queues(spe,
780 event->fork.tid,
781 sample->time);
782 }
783 } else if (timestamp) {
784 err = arm_spe_process_queues(spe, timestamp);
785 if (err)
786 return err;
787
788 if (!spe->use_ctx_pkt_for_pid &&
789 (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
790 event->header.type == PERF_RECORD_SWITCH))
791 err = arm_spe_context_switch(spe, event, sample);
792 }
793
794 return err;
795 }
796
arm_spe_process_auxtrace_event(struct perf_session * session,union perf_event * event,struct perf_tool * tool __maybe_unused)797 static int arm_spe_process_auxtrace_event(struct perf_session *session,
798 union perf_event *event,
799 struct perf_tool *tool __maybe_unused)
800 {
801 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
802 auxtrace);
803
804 if (!spe->data_queued) {
805 struct auxtrace_buffer *buffer;
806 off_t data_offset;
807 int fd = perf_data__fd(session->data);
808 int err;
809
810 if (perf_data__is_pipe(session->data)) {
811 data_offset = 0;
812 } else {
813 data_offset = lseek(fd, 0, SEEK_CUR);
814 if (data_offset == -1)
815 return -errno;
816 }
817
818 err = auxtrace_queues__add_event(&spe->queues, session, event,
819 data_offset, &buffer);
820 if (err)
821 return err;
822
823 /* Dump here now we have copied a piped trace out of the pipe */
824 if (dump_trace) {
825 if (auxtrace_buffer__get_data(buffer, fd)) {
826 arm_spe_dump_event(spe, buffer->data,
827 buffer->size);
828 auxtrace_buffer__put_data(buffer);
829 }
830 }
831 }
832
833 return 0;
834 }
835
arm_spe_flush(struct perf_session * session __maybe_unused,struct perf_tool * tool __maybe_unused)836 static int arm_spe_flush(struct perf_session *session __maybe_unused,
837 struct perf_tool *tool __maybe_unused)
838 {
839 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
840 auxtrace);
841 int ret;
842
843 if (dump_trace)
844 return 0;
845
846 if (!tool->ordered_events)
847 return -EINVAL;
848
849 ret = arm_spe__update_queues(spe);
850 if (ret < 0)
851 return ret;
852
853 if (spe->timeless_decoding)
854 return arm_spe_process_timeless_queues(spe, -1,
855 MAX_TIMESTAMP - 1);
856
857 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
858 if (ret)
859 return ret;
860
861 if (!spe->use_ctx_pkt_for_pid)
862 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
863 "Matching of TIDs to SPE events could be inaccurate.\n");
864
865 return 0;
866 }
867
arm_spe_free_queue(void * priv)868 static void arm_spe_free_queue(void *priv)
869 {
870 struct arm_spe_queue *speq = priv;
871
872 if (!speq)
873 return;
874 thread__zput(speq->thread);
875 arm_spe_decoder_free(speq->decoder);
876 zfree(&speq->event_buf);
877 free(speq);
878 }
879
arm_spe_free_events(struct perf_session * session)880 static void arm_spe_free_events(struct perf_session *session)
881 {
882 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
883 auxtrace);
884 struct auxtrace_queues *queues = &spe->queues;
885 unsigned int i;
886
887 for (i = 0; i < queues->nr_queues; i++) {
888 arm_spe_free_queue(queues->queue_array[i].priv);
889 queues->queue_array[i].priv = NULL;
890 }
891 auxtrace_queues__free(queues);
892 }
893
arm_spe_free(struct perf_session * session)894 static void arm_spe_free(struct perf_session *session)
895 {
896 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
897 auxtrace);
898
899 auxtrace_heap__free(&spe->heap);
900 arm_spe_free_events(session);
901 session->auxtrace = NULL;
902 free(spe);
903 }
904
arm_spe_evsel_is_auxtrace(struct perf_session * session,struct evsel * evsel)905 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
906 struct evsel *evsel)
907 {
908 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
909
910 return evsel->core.attr.type == spe->pmu_type;
911 }
912
913 static const char * const arm_spe_info_fmts[] = {
914 [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
915 };
916
arm_spe_print_info(__u64 * arr)917 static void arm_spe_print_info(__u64 *arr)
918 {
919 if (!dump_trace)
920 return;
921
922 fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
923 }
924
925 struct arm_spe_synth {
926 struct perf_tool dummy_tool;
927 struct perf_session *session;
928 };
929
arm_spe_event_synth(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)930 static int arm_spe_event_synth(struct perf_tool *tool,
931 union perf_event *event,
932 struct perf_sample *sample __maybe_unused,
933 struct machine *machine __maybe_unused)
934 {
935 struct arm_spe_synth *arm_spe_synth =
936 container_of(tool, struct arm_spe_synth, dummy_tool);
937
938 return perf_session__deliver_synth_event(arm_spe_synth->session,
939 event, NULL);
940 }
941
arm_spe_synth_event(struct perf_session * session,struct perf_event_attr * attr,u64 id)942 static int arm_spe_synth_event(struct perf_session *session,
943 struct perf_event_attr *attr, u64 id)
944 {
945 struct arm_spe_synth arm_spe_synth;
946
947 memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
948 arm_spe_synth.session = session;
949
950 return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
951 &id, arm_spe_event_synth);
952 }
953
arm_spe_set_event_name(struct evlist * evlist,u64 id,const char * name)954 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
955 const char *name)
956 {
957 struct evsel *evsel;
958
959 evlist__for_each_entry(evlist, evsel) {
960 if (evsel->core.id && evsel->core.id[0] == id) {
961 if (evsel->name)
962 zfree(&evsel->name);
963 evsel->name = strdup(name);
964 break;
965 }
966 }
967 }
968
969 static int
arm_spe_synth_events(struct arm_spe * spe,struct perf_session * session)970 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
971 {
972 struct evlist *evlist = session->evlist;
973 struct evsel *evsel;
974 struct perf_event_attr attr;
975 bool found = false;
976 u64 id;
977 int err;
978
979 evlist__for_each_entry(evlist, evsel) {
980 if (evsel->core.attr.type == spe->pmu_type) {
981 found = true;
982 break;
983 }
984 }
985
986 if (!found) {
987 pr_debug("No selected events with SPE trace data\n");
988 return 0;
989 }
990
991 memset(&attr, 0, sizeof(struct perf_event_attr));
992 attr.size = sizeof(struct perf_event_attr);
993 attr.type = PERF_TYPE_HARDWARE;
994 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
995 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
996 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
997 if (spe->timeless_decoding)
998 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
999 else
1000 attr.sample_type |= PERF_SAMPLE_TIME;
1001
1002 spe->sample_type = attr.sample_type;
1003
1004 attr.exclude_user = evsel->core.attr.exclude_user;
1005 attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1006 attr.exclude_hv = evsel->core.attr.exclude_hv;
1007 attr.exclude_host = evsel->core.attr.exclude_host;
1008 attr.exclude_guest = evsel->core.attr.exclude_guest;
1009 attr.sample_id_all = evsel->core.attr.sample_id_all;
1010 attr.read_format = evsel->core.attr.read_format;
1011
1012 /* create new id val to be a fixed offset from evsel id */
1013 id = evsel->core.id[0] + 1000000000;
1014
1015 if (!id)
1016 id = 1;
1017
1018 if (spe->synth_opts.flc) {
1019 spe->sample_flc = true;
1020
1021 /* Level 1 data cache miss */
1022 err = arm_spe_synth_event(session, &attr, id);
1023 if (err)
1024 return err;
1025 spe->l1d_miss_id = id;
1026 arm_spe_set_event_name(evlist, id, "l1d-miss");
1027 id += 1;
1028
1029 /* Level 1 data cache access */
1030 err = arm_spe_synth_event(session, &attr, id);
1031 if (err)
1032 return err;
1033 spe->l1d_access_id = id;
1034 arm_spe_set_event_name(evlist, id, "l1d-access");
1035 id += 1;
1036 }
1037
1038 if (spe->synth_opts.llc) {
1039 spe->sample_llc = true;
1040
1041 /* Last level cache miss */
1042 err = arm_spe_synth_event(session, &attr, id);
1043 if (err)
1044 return err;
1045 spe->llc_miss_id = id;
1046 arm_spe_set_event_name(evlist, id, "llc-miss");
1047 id += 1;
1048
1049 /* Last level cache access */
1050 err = arm_spe_synth_event(session, &attr, id);
1051 if (err)
1052 return err;
1053 spe->llc_access_id = id;
1054 arm_spe_set_event_name(evlist, id, "llc-access");
1055 id += 1;
1056 }
1057
1058 if (spe->synth_opts.tlb) {
1059 spe->sample_tlb = true;
1060
1061 /* TLB miss */
1062 err = arm_spe_synth_event(session, &attr, id);
1063 if (err)
1064 return err;
1065 spe->tlb_miss_id = id;
1066 arm_spe_set_event_name(evlist, id, "tlb-miss");
1067 id += 1;
1068
1069 /* TLB access */
1070 err = arm_spe_synth_event(session, &attr, id);
1071 if (err)
1072 return err;
1073 spe->tlb_access_id = id;
1074 arm_spe_set_event_name(evlist, id, "tlb-access");
1075 id += 1;
1076 }
1077
1078 if (spe->synth_opts.branches) {
1079 spe->sample_branch = true;
1080
1081 /* Branch miss */
1082 err = arm_spe_synth_event(session, &attr, id);
1083 if (err)
1084 return err;
1085 spe->branch_miss_id = id;
1086 arm_spe_set_event_name(evlist, id, "branch-miss");
1087 id += 1;
1088 }
1089
1090 if (spe->synth_opts.remote_access) {
1091 spe->sample_remote_access = true;
1092
1093 /* Remote access */
1094 err = arm_spe_synth_event(session, &attr, id);
1095 if (err)
1096 return err;
1097 spe->remote_access_id = id;
1098 arm_spe_set_event_name(evlist, id, "remote-access");
1099 id += 1;
1100 }
1101
1102 if (spe->synth_opts.mem) {
1103 spe->sample_memory = true;
1104
1105 err = arm_spe_synth_event(session, &attr, id);
1106 if (err)
1107 return err;
1108 spe->memory_id = id;
1109 arm_spe_set_event_name(evlist, id, "memory");
1110 }
1111
1112 return 0;
1113 }
1114
arm_spe_process_auxtrace_info(union perf_event * event,struct perf_session * session)1115 int arm_spe_process_auxtrace_info(union perf_event *event,
1116 struct perf_session *session)
1117 {
1118 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1119 size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1120 struct perf_record_time_conv *tc = &session->time_conv;
1121 struct arm_spe *spe;
1122 int err;
1123
1124 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1125 min_sz)
1126 return -EINVAL;
1127
1128 spe = zalloc(sizeof(struct arm_spe));
1129 if (!spe)
1130 return -ENOMEM;
1131
1132 err = auxtrace_queues__init(&spe->queues);
1133 if (err)
1134 goto err_free;
1135
1136 spe->session = session;
1137 spe->machine = &session->machines.host; /* No kvm support */
1138 spe->auxtrace_type = auxtrace_info->type;
1139 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1140
1141 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1142
1143 /*
1144 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1145 * and the parameters for hardware clock are stored in the session
1146 * context. Passes these parameters to the struct perf_tsc_conversion
1147 * in "spe->tc", which is used for later conversion between clock
1148 * counter and timestamp.
1149 *
1150 * For backward compatibility, copies the fields starting from
1151 * "time_cycles" only if they are contained in the event.
1152 */
1153 spe->tc.time_shift = tc->time_shift;
1154 spe->tc.time_mult = tc->time_mult;
1155 spe->tc.time_zero = tc->time_zero;
1156
1157 if (event_contains(*tc, time_cycles)) {
1158 spe->tc.time_cycles = tc->time_cycles;
1159 spe->tc.time_mask = tc->time_mask;
1160 spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1161 spe->tc.cap_user_time_short = tc->cap_user_time_short;
1162 }
1163
1164 spe->auxtrace.process_event = arm_spe_process_event;
1165 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1166 spe->auxtrace.flush_events = arm_spe_flush;
1167 spe->auxtrace.free_events = arm_spe_free_events;
1168 spe->auxtrace.free = arm_spe_free;
1169 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1170 session->auxtrace = &spe->auxtrace;
1171
1172 arm_spe_print_info(&auxtrace_info->priv[0]);
1173
1174 if (dump_trace)
1175 return 0;
1176
1177 if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1178 spe->synth_opts = *session->itrace_synth_opts;
1179 else
1180 itrace_synth_opts__set_default(&spe->synth_opts, false);
1181
1182 err = arm_spe_synth_events(spe, session);
1183 if (err)
1184 goto err_free_queues;
1185
1186 err = auxtrace_queues__process_index(&spe->queues, session);
1187 if (err)
1188 goto err_free_queues;
1189
1190 if (spe->queues.populated)
1191 spe->data_queued = true;
1192
1193 return 0;
1194
1195 err_free_queues:
1196 auxtrace_queues__free(&spe->queues);
1197 session->auxtrace = NULL;
1198 err_free:
1199 free(spe);
1200 return err;
1201 }
1202