1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * xapic_ipi_test
4 *
5 * Copyright (C) 2020, Google LLC.
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2.
8 *
9 * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
10 * another vCPU that is halted when KVM's backing page for the APIC access
11 * address has been moved by mm.
12 *
13 * The test starts two vCPUs: one that sends IPIs and one that continually
14 * executes HLT. The sender checks that the halter has woken from the HLT and
15 * has reentered HLT before sending the next IPI. While the vCPUs are running,
16 * the host continually calls migrate_pages to move all of the process' pages
17 * amongst the available numa nodes on the machine.
18 *
19 * Migration is a command line option. When used on non-numa machines will
20 * exit with error. Test is still usefull on non-numa for testing IPIs.
21 */
22
23 #define _GNU_SOURCE /* for program_invocation_short_name */
24 #include <getopt.h>
25 #include <pthread.h>
26 #include <inttypes.h>
27 #include <string.h>
28 #include <time.h>
29
30 #include "kvm_util.h"
31 #include "numaif.h"
32 #include "processor.h"
33 #include "test_util.h"
34 #include "vmx.h"
35
36 /* Default running time for the test */
37 #define DEFAULT_RUN_SECS 3
38
39 /* Default delay between migrate_pages calls (microseconds) */
40 #define DEFAULT_DELAY_USECS 500000
41
42 /*
43 * Vector for IPI from sender vCPU to halting vCPU.
44 * Value is arbitrary and was chosen for the alternating bit pattern. Any
45 * value should work.
46 */
47 #define IPI_VECTOR 0xa5
48
49 /*
50 * Incremented in the IPI handler. Provides evidence to the sender that the IPI
51 * arrived at the destination
52 */
53 static volatile uint64_t ipis_rcvd;
54
55 /* Data struct shared between host main thread and vCPUs */
56 struct test_data_page {
57 uint32_t halter_apic_id;
58 volatile uint64_t hlt_count;
59 volatile uint64_t wake_count;
60 uint64_t ipis_sent;
61 uint64_t migrations_attempted;
62 uint64_t migrations_completed;
63 uint32_t icr;
64 uint32_t icr2;
65 uint32_t halter_tpr;
66 uint32_t halter_ppr;
67
68 /*
69 * Record local version register as a cross-check that APIC access
70 * worked. Value should match what KVM reports (APIC_VERSION in
71 * arch/x86/kvm/lapic.c). If test is failing, check that values match
72 * to determine whether APIC access exits are working.
73 */
74 uint32_t halter_lvr;
75 };
76
77 struct thread_params {
78 struct test_data_page *data;
79 struct kvm_vcpu *vcpu;
80 uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
81 };
82
verify_apic_base_addr(void)83 void verify_apic_base_addr(void)
84 {
85 uint64_t msr = rdmsr(MSR_IA32_APICBASE);
86 uint64_t base = GET_APIC_BASE(msr);
87
88 GUEST_ASSERT(base == APIC_DEFAULT_GPA);
89 }
90
halter_guest_code(struct test_data_page * data)91 static void halter_guest_code(struct test_data_page *data)
92 {
93 verify_apic_base_addr();
94 xapic_enable();
95
96 data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
97 data->halter_lvr = xapic_read_reg(APIC_LVR);
98
99 /*
100 * Loop forever HLTing and recording halts & wakes. Disable interrupts
101 * each time around to minimize window between signaling the pending
102 * halt to the sender vCPU and executing the halt. No need to disable on
103 * first run as this vCPU executes first and the host waits for it to
104 * signal going into first halt before starting the sender vCPU. Record
105 * TPR and PPR for diagnostic purposes in case the test fails.
106 */
107 for (;;) {
108 data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
109 data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
110 data->hlt_count++;
111 asm volatile("sti; hlt; cli");
112 data->wake_count++;
113 }
114 }
115
116 /*
117 * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
118 * enable diagnosing errant writes to the APIC access address backing page in
119 * case of test failure.
120 */
guest_ipi_handler(struct ex_regs * regs)121 static void guest_ipi_handler(struct ex_regs *regs)
122 {
123 ipis_rcvd++;
124 xapic_write_reg(APIC_EOI, 77);
125 }
126
sender_guest_code(struct test_data_page * data)127 static void sender_guest_code(struct test_data_page *data)
128 {
129 uint64_t last_wake_count;
130 uint64_t last_hlt_count;
131 uint64_t last_ipis_rcvd_count;
132 uint32_t icr_val;
133 uint32_t icr2_val;
134 uint64_t tsc_start;
135
136 verify_apic_base_addr();
137 xapic_enable();
138
139 /*
140 * Init interrupt command register for sending IPIs
141 *
142 * Delivery mode=fixed, per SDM:
143 * "Delivers the interrupt specified in the vector field to the target
144 * processor."
145 *
146 * Destination mode=physical i.e. specify target by its local APIC
147 * ID. This vCPU assumes that the halter vCPU has already started and
148 * set data->halter_apic_id.
149 */
150 icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
151 icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
152 data->icr = icr_val;
153 data->icr2 = icr2_val;
154
155 last_wake_count = data->wake_count;
156 last_hlt_count = data->hlt_count;
157 last_ipis_rcvd_count = ipis_rcvd;
158 for (;;) {
159 /*
160 * Send IPI to halter vCPU.
161 * First IPI can be sent unconditionally because halter vCPU
162 * starts earlier.
163 */
164 xapic_write_reg(APIC_ICR2, icr2_val);
165 xapic_write_reg(APIC_ICR, icr_val);
166 data->ipis_sent++;
167
168 /*
169 * Wait up to ~1 sec for halter to indicate that it has:
170 * 1. Received the IPI
171 * 2. Woken up from the halt
172 * 3. Gone back into halt
173 * Current CPUs typically run at 2.x Ghz which is ~2
174 * billion ticks per second.
175 */
176 tsc_start = rdtsc();
177 while (rdtsc() - tsc_start < 2000000000) {
178 if ((ipis_rcvd != last_ipis_rcvd_count) &&
179 (data->wake_count != last_wake_count) &&
180 (data->hlt_count != last_hlt_count))
181 break;
182 }
183
184 GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
185 (data->wake_count != last_wake_count) &&
186 (data->hlt_count != last_hlt_count));
187
188 last_wake_count = data->wake_count;
189 last_hlt_count = data->hlt_count;
190 last_ipis_rcvd_count = ipis_rcvd;
191 }
192 }
193
vcpu_thread(void * arg)194 static void *vcpu_thread(void *arg)
195 {
196 struct thread_params *params = (struct thread_params *)arg;
197 struct kvm_vcpu *vcpu = params->vcpu;
198 struct ucall uc;
199 int old;
200 int r;
201 unsigned int exit_reason;
202
203 r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
204 TEST_ASSERT(r == 0,
205 "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
206 vcpu->id, r);
207
208 fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
209 vcpu_run(vcpu);
210 exit_reason = vcpu->run->exit_reason;
211
212 TEST_ASSERT(exit_reason == KVM_EXIT_IO,
213 "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
214 vcpu->id, exit_reason, exit_reason_str(exit_reason));
215
216 if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
217 TEST_ASSERT(false,
218 "vCPU %u exited with error: %s.\n"
219 "Sending vCPU sent %lu IPIs to halting vCPU\n"
220 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
221 "Halter TPR=%#x PPR=%#x LVR=%#x\n"
222 "Migrations attempted: %lu\n"
223 "Migrations completed: %lu\n",
224 vcpu->id, (const char *)uc.args[0],
225 params->data->ipis_sent, params->data->hlt_count,
226 params->data->wake_count,
227 *params->pipis_rcvd, params->data->halter_tpr,
228 params->data->halter_ppr, params->data->halter_lvr,
229 params->data->migrations_attempted,
230 params->data->migrations_completed);
231 }
232
233 return NULL;
234 }
235
cancel_join_vcpu_thread(pthread_t thread,struct kvm_vcpu * vcpu)236 static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
237 {
238 void *retval;
239 int r;
240
241 r = pthread_cancel(thread);
242 TEST_ASSERT(r == 0,
243 "pthread_cancel on vcpu_id=%d failed with errno=%d",
244 vcpu->id, r);
245
246 r = pthread_join(thread, &retval);
247 TEST_ASSERT(r == 0,
248 "pthread_join on vcpu_id=%d failed with errno=%d",
249 vcpu->id, r);
250 TEST_ASSERT(retval == PTHREAD_CANCELED,
251 "expected retval=%p, got %p", PTHREAD_CANCELED,
252 retval);
253 }
254
do_migrations(struct test_data_page * data,int run_secs,int delay_usecs,uint64_t * pipis_rcvd)255 void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
256 uint64_t *pipis_rcvd)
257 {
258 long pages_not_moved;
259 unsigned long nodemask = 0;
260 unsigned long nodemasks[sizeof(nodemask) * 8];
261 int nodes = 0;
262 time_t start_time, last_update, now;
263 time_t interval_secs = 1;
264 int i, r;
265 int from, to;
266 unsigned long bit;
267 uint64_t hlt_count;
268 uint64_t wake_count;
269 uint64_t ipis_sent;
270
271 fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
272 delay_usecs);
273
274 /* Get set of first 64 numa nodes available */
275 r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
276 0, MPOL_F_MEMS_ALLOWED);
277 TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
278
279 fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
280 "(each 1-bit indicates node is present): %#lx\n",
281 sizeof(nodemask) * 8, nodemask);
282
283 /* Init array of masks containing a single-bit in each, one for each
284 * available node. migrate_pages called below requires specifying nodes
285 * as bit masks.
286 */
287 for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
288 if (nodemask & bit) {
289 nodemasks[nodes] = nodemask & bit;
290 nodes++;
291 }
292 }
293
294 TEST_ASSERT(nodes > 1,
295 "Did not find at least 2 numa nodes. Can't do migration\n");
296
297 fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
298
299 from = 0;
300 to = 1;
301 start_time = time(NULL);
302 last_update = start_time;
303
304 ipis_sent = data->ipis_sent;
305 hlt_count = data->hlt_count;
306 wake_count = data->wake_count;
307
308 while ((int)(time(NULL) - start_time) < run_secs) {
309 data->migrations_attempted++;
310
311 /*
312 * migrate_pages with PID=0 will migrate all pages of this
313 * process between the nodes specified as bitmasks. The page
314 * backing the APIC access address belongs to this process
315 * because it is allocated by KVM in the context of the
316 * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
317 * test may break or give a false positive signal.
318 */
319 pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
320 &nodemasks[from],
321 &nodemasks[to]);
322 if (pages_not_moved < 0)
323 fprintf(stderr,
324 "migrate_pages failed, errno=%d\n", errno);
325 else if (pages_not_moved > 0)
326 fprintf(stderr,
327 "migrate_pages could not move %ld pages\n",
328 pages_not_moved);
329 else
330 data->migrations_completed++;
331
332 from = to;
333 to++;
334 if (to == nodes)
335 to = 0;
336
337 now = time(NULL);
338 if (((now - start_time) % interval_secs == 0) &&
339 (now != last_update)) {
340 last_update = now;
341 fprintf(stderr,
342 "%lu seconds: Migrations attempted=%lu completed=%lu, "
343 "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
344 now - start_time, data->migrations_attempted,
345 data->migrations_completed,
346 data->ipis_sent, *pipis_rcvd,
347 data->hlt_count, data->wake_count);
348
349 TEST_ASSERT(ipis_sent != data->ipis_sent &&
350 hlt_count != data->hlt_count &&
351 wake_count != data->wake_count,
352 "IPI, HLT and wake count have not increased "
353 "in the last %lu seconds. "
354 "HLTer is likely hung.\n", interval_secs);
355
356 ipis_sent = data->ipis_sent;
357 hlt_count = data->hlt_count;
358 wake_count = data->wake_count;
359 }
360 usleep(delay_usecs);
361 }
362 }
363
get_cmdline_args(int argc,char * argv[],int * run_secs,bool * migrate,int * delay_usecs)364 void get_cmdline_args(int argc, char *argv[], int *run_secs,
365 bool *migrate, int *delay_usecs)
366 {
367 for (;;) {
368 int opt = getopt(argc, argv, "s:d:m");
369
370 if (opt == -1)
371 break;
372 switch (opt) {
373 case 's':
374 *run_secs = parse_size(optarg);
375 break;
376 case 'm':
377 *migrate = true;
378 break;
379 case 'd':
380 *delay_usecs = parse_size(optarg);
381 break;
382 default:
383 TEST_ASSERT(false,
384 "Usage: -s <runtime seconds>. Default is %d seconds.\n"
385 "-m adds calls to migrate_pages while vCPUs are running."
386 " Default is no migrations.\n"
387 "-d <delay microseconds> - delay between migrate_pages() calls."
388 " Default is %d microseconds.\n",
389 DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
390 }
391 }
392 }
393
main(int argc,char * argv[])394 int main(int argc, char *argv[])
395 {
396 int r;
397 int wait_secs;
398 const int max_halter_wait = 10;
399 int run_secs = 0;
400 int delay_usecs = 0;
401 struct test_data_page *data;
402 vm_vaddr_t test_data_page_vaddr;
403 bool migrate = false;
404 pthread_t threads[2];
405 struct thread_params params[2];
406 struct kvm_vm *vm;
407 uint64_t *pipis_rcvd;
408
409 get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
410 if (run_secs <= 0)
411 run_secs = DEFAULT_RUN_SECS;
412 if (delay_usecs <= 0)
413 delay_usecs = DEFAULT_DELAY_USECS;
414
415 vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code);
416
417 vm_init_descriptor_tables(vm);
418 vcpu_init_descriptor_tables(params[0].vcpu);
419 vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
420
421 virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
422
423 params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
424
425 test_data_page_vaddr = vm_vaddr_alloc_page(vm);
426 data = addr_gva2hva(vm, test_data_page_vaddr);
427 memset(data, 0, sizeof(*data));
428 params[0].data = data;
429 params[1].data = data;
430
431 vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
432 vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
433
434 pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
435 params[0].pipis_rcvd = pipis_rcvd;
436 params[1].pipis_rcvd = pipis_rcvd;
437
438 /* Start halter vCPU thread and wait for it to execute first HLT. */
439 r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]);
440 TEST_ASSERT(r == 0,
441 "pthread_create halter failed errno=%d", errno);
442 fprintf(stderr, "Halter vCPU thread started\n");
443
444 wait_secs = 0;
445 while ((wait_secs < max_halter_wait) && !data->hlt_count) {
446 sleep(1);
447 wait_secs++;
448 }
449
450 TEST_ASSERT(data->hlt_count,
451 "Halter vCPU did not execute first HLT within %d seconds",
452 max_halter_wait);
453
454 fprintf(stderr,
455 "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
456 data->halter_apic_id, wait_secs);
457
458 r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]);
459 TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
460
461 fprintf(stderr,
462 "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
463 run_secs);
464
465 if (!migrate)
466 sleep(run_secs);
467 else
468 do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
469
470 /*
471 * Cancel threads and wait for them to stop.
472 */
473 cancel_join_vcpu_thread(threads[0], params[0].vcpu);
474 cancel_join_vcpu_thread(threads[1], params[1].vcpu);
475
476 fprintf(stderr,
477 "Test successful after running for %d seconds.\n"
478 "Sending vCPU sent %lu IPIs to halting vCPU\n"
479 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
480 "Halter APIC ID=%#x\n"
481 "Sender ICR value=%#x ICR2 value=%#x\n"
482 "Halter TPR=%#x PPR=%#x LVR=%#x\n"
483 "Migrations attempted: %lu\n"
484 "Migrations completed: %lu\n",
485 run_secs, data->ipis_sent,
486 data->hlt_count, data->wake_count, *pipis_rcvd,
487 data->halter_apic_id,
488 data->icr, data->icr2,
489 data->halter_tpr, data->halter_ppr, data->halter_lvr,
490 data->migrations_attempted, data->migrations_completed);
491
492 kvm_vm_free(vm);
493
494 return 0;
495 }
496