1 // Copyright 2016 The Fuchsia Authors
2 // Copyright (c) 2014 Travis Geiselbrecht
3 //
4 // Use of this source code is governed by a MIT-style
5 // license that can be found in the LICENSE file or at
6 // https://opensource.org/licenses/MIT
7
8 #include <kernel/mp.h>
9
10 #include <arch/mp.h>
11 #include <arch/ops.h>
12 #include <assert.h>
13 #include <debug.h>
14 #include <dev/interrupt.h>
15 #include <err.h>
16 #include <fbl/algorithm.h>
17 #include <inttypes.h>
18 #include <kernel/align.h>
19 #include <kernel/dpc.h>
20 #include <kernel/event.h>
21 #include <kernel/mp.h>
22 #include <kernel/mutex.h>
23 #include <kernel/sched.h>
24 #include <kernel/spinlock.h>
25 #include <kernel/stats.h>
26 #include <kernel/timer.h>
27 #include <lk/init.h>
28 #include <platform.h>
29 #include <platform/timer.h>
30 #include <stdlib.h>
31 #include <trace.h>
32 #include <zircon/types.h>
33
34 #define LOCAL_TRACE 0
35
36 // a global state structure, aligned on cpu cache line to minimize aliasing
37 struct mp_state mp __CPU_ALIGN_EXCLUSIVE;
38
39 // Helpers used for implementing mp_sync
40 struct mp_sync_context;
41 static void mp_sync_task(void* context);
42
mp_init(void)43 void mp_init(void) {
44 mutex_init(&mp.hotplug_lock);
45 mp.ipi_task_lock = SPIN_LOCK_INITIAL_VALUE;
46 for (uint i = 0; i < fbl::count_of(mp.ipi_task_list); ++i) {
47 list_initialize(&mp.ipi_task_list[i]);
48 }
49 }
50
mp_prepare_current_cpu_idle_state(bool idle)51 void mp_prepare_current_cpu_idle_state(bool idle) {
52 arch_prepare_current_cpu_idle_state(idle);
53 }
54
mp_reschedule(cpu_mask_t mask,uint flags)55 void mp_reschedule(cpu_mask_t mask, uint flags) {
56 // we must be holding the thread lock to access some of the cpu
57 // state bitmaps and some arch_mp_reschedule implementations.
58 DEBUG_ASSERT(thread_lock_held());
59
60 const cpu_num_t local_cpu = arch_curr_cpu_num();
61
62 LTRACEF("local %u, mask %#x\n", local_cpu, mask);
63
64 // mask out cpus that are not active and the local cpu
65 mask &= mp.active_cpus;
66 mask &= ~cpu_num_to_mask(local_cpu);
67
68 // mask out cpus that are currently running realtime code
69 if ((flags & MP_RESCHEDULE_FLAG_REALTIME) == 0) {
70 mask &= ~mp.realtime_cpus;
71 }
72
73 LTRACEF("local %u, post mask target now 0x%x\n", local_cpu, mask);
74
75 // if we have no work to do, return
76 if (mask == 0) {
77 return;
78 }
79
80 arch_mp_reschedule(mask);
81 }
82
mp_interrupt(mp_ipi_target_t target,cpu_mask_t mask)83 void mp_interrupt(mp_ipi_target_t target, cpu_mask_t mask) {
84 arch_mp_send_ipi(target, mask, MP_IPI_INTERRUPT);
85 }
86
87 struct mp_sync_context {
88 mp_sync_task_t task;
89 void* task_context;
90 // Mask of which CPUs need to finish the task
91 volatile cpu_mask_t outstanding_cpus;
92 };
93
mp_sync_task(void * raw_context)94 static void mp_sync_task(void* raw_context) {
95 auto context = reinterpret_cast<mp_sync_context*>(raw_context);
96 context->task(context->task_context);
97 // use seq-cst atomic to ensure this update is not seen before the
98 // side-effects of context->task
99 atomic_and((int*)&context->outstanding_cpus, ~cpu_num_to_mask(arch_curr_cpu_num()));
100 }
101
102 /* @brief Execute a task on the specified CPUs, and block on the calling
103 * CPU until all CPUs have finished the task.
104 *
105 * If MP_IPI_TARGET_ALL or MP_IPI_TARGET_ALL_BUT_LOCAL is the target, the online CPU
106 * mask will be used to determine actual targets.
107 *
108 * Interrupts must be disabled if calling with MP_IPI_TARGET_ALL_BUT_LOCAL as target
109 *
110 * The callback in |task| will always be called with |arch_blocking_disallowed()|
111 * set to true.
112 */
mp_sync_exec(mp_ipi_target_t target,cpu_mask_t mask,mp_sync_task_t task,void * context)113 void mp_sync_exec(mp_ipi_target_t target, cpu_mask_t mask, mp_sync_task_t task, void* context) {
114 uint num_cpus = arch_max_num_cpus();
115
116 if (target == MP_IPI_TARGET_ALL) {
117 mask = mp_get_online_mask();
118 } else if (target == MP_IPI_TARGET_ALL_BUT_LOCAL) {
119 // targeting all other CPUs but the current one is hazardous
120 // if the local CPU may be changed underneath us
121 DEBUG_ASSERT(arch_ints_disabled());
122 mask = mp_get_online_mask() & ~cpu_num_to_mask(arch_curr_cpu_num());
123 } else {
124 // Mask any offline CPUs from target list
125 mask &= mp_get_online_mask();
126 }
127
128 // disable interrupts so our current CPU doesn't change
129 spin_lock_saved_state_t irqstate;
130 arch_interrupt_save(&irqstate, SPIN_LOCK_FLAG_INTERRUPTS);
131 smp_mb();
132
133 const uint local_cpu = arch_curr_cpu_num();
134
135 // remove self from target lists, since no need to IPI ourselves
136 bool targetting_self = !!(mask & cpu_num_to_mask(local_cpu));
137 mask &= ~cpu_num_to_mask(local_cpu);
138
139 // create tasks to enqueue (we need one per target due to each containing
140 // a linked list node
141 struct mp_sync_context sync_context = {
142 .task = task,
143 .task_context = context,
144 .outstanding_cpus = mask,
145 };
146
147 struct mp_ipi_task sync_tasks[SMP_MAX_CPUS] = {};
148 for (uint i = 0; i < num_cpus; ++i) {
149 sync_tasks[i].func = mp_sync_task;
150 sync_tasks[i].context = &sync_context;
151 }
152
153 // enqueue tasks
154 spin_lock(&mp.ipi_task_lock);
155 cpu_mask_t remaining = mask;
156 uint cpu_id = 0;
157 while (remaining && cpu_id < num_cpus) {
158 if (remaining & 1) {
159 list_add_tail(&mp.ipi_task_list[cpu_id], &sync_tasks[cpu_id].node);
160 }
161 remaining >>= 1;
162 cpu_id++;
163 }
164 spin_unlock(&mp.ipi_task_lock);
165
166 // let CPUs know to begin executing
167 __UNUSED zx_status_t status = arch_mp_send_ipi(MP_IPI_TARGET_MASK, mask, MP_IPI_GENERIC);
168 DEBUG_ASSERT(status == ZX_OK);
169
170 if (targetting_self) {
171 bool previous_blocking_disallowed = arch_blocking_disallowed();
172 arch_set_blocking_disallowed(true);
173 mp_sync_task(&sync_context);
174 arch_set_blocking_disallowed(previous_blocking_disallowed);
175 }
176 smp_mb();
177
178 // we can take interrupts again once we've executed our task
179 arch_interrupt_restore(irqstate, SPIN_LOCK_FLAG_INTERRUPTS);
180
181 bool ints_disabled = arch_ints_disabled();
182 // wait for all other CPUs to be done with the context
183 while (1) {
184 // See comment in mp_unplug_trampoline about related CPU hotplug
185 // guarantees.
186 cpu_mask_t outstanding = atomic_load_relaxed(
187 (int*)&sync_context.outstanding_cpus);
188 cpu_mask_t online = mp_get_online_mask();
189 if ((outstanding & online) == 0) {
190 break;
191 }
192
193 // If interrupts are still disabled, we need to attempt to process any
194 // tasks queued for us in order to prevent deadlock.
195 if (ints_disabled) {
196 // Optimistically check if our task list has work without the lock.
197 // mp_mbx_generic_irq will take the lock and check again.
198 if (!list_is_empty(&mp.ipi_task_list[local_cpu])) {
199 bool previous_blocking_disallowed = arch_blocking_disallowed();
200 arch_set_blocking_disallowed(true);
201 mp_mbx_generic_irq(nullptr);
202 arch_set_blocking_disallowed(previous_blocking_disallowed);
203 continue;
204 }
205 }
206
207 arch_spinloop_pause();
208 }
209 smp_mb();
210
211 // make sure the sync_tasks aren't in lists anymore, since they're
212 // stack allocated
213 spin_lock_irqsave(&mp.ipi_task_lock, irqstate);
214 for (uint i = 0; i < num_cpus; ++i) {
215 // If a task is still around, it's because the CPU went offline.
216 if (list_in_list(&sync_tasks[i].node)) {
217 list_delete(&sync_tasks[i].node);
218 }
219 }
220 spin_unlock_irqrestore(&mp.ipi_task_lock, irqstate);
221 }
222
223 static void mp_unplug_trampoline(void) TA_REQ(thread_lock) __NO_RETURN;
mp_unplug_trampoline(void)224 static void mp_unplug_trampoline(void) {
225 // We're still holding the thread lock from the reschedule that took us
226 // here.
227
228 thread_t* ct = get_current_thread();
229 auto unplug_done = reinterpret_cast<event_t*>(ct->arg);
230
231 cpu_num_t cpu_num = arch_curr_cpu_num();
232 sched_transition_off_cpu(cpu_num);
233
234 // Note that before this invocation, but after we stopped accepting
235 // interrupts, we may have received a synchronous task to perform.
236 // Clearing this flag will cause the mp_sync_exec caller to consider
237 // this CPU done. If this CPU comes back online before other all
238 // of the other CPUs finish their work (very unlikely, since tasks
239 // should be quick), then this CPU may execute the task.
240 mp_set_curr_cpu_online(false);
241
242 // do *not* enable interrupts, we want this CPU to never receive another
243 // interrupt
244 spin_unlock(&thread_lock);
245
246 // Stop and then shutdown this CPU's platform timer.
247 platform_stop_timer();
248 platform_shutdown_timer();
249
250 // Shutdown the interrupt controller for this CPU. On some platforms (arm64 with GIC) receiving
251 // an interrupt at a powered off CPU can result in implementation defined behavior (including
252 // resetting the whole system).
253 shutdown_interrupts_curr_cpu();
254
255 // flush all of our caches
256 arch_flush_state_and_halt(unplug_done);
257 }
258
259 // Hotplug the given cpus. Blocks until the CPUs are up, or a failure is
260 // detected.
261 //
262 // This should be called in a thread context
mp_hotplug_cpu_mask(cpu_mask_t cpu_mask)263 zx_status_t mp_hotplug_cpu_mask(cpu_mask_t cpu_mask) {
264 DEBUG_ASSERT(!arch_ints_disabled());
265
266 zx_status_t status = ZX_OK;
267
268 mutex_acquire(&mp.hotplug_lock);
269
270 // Make sure all of the requested CPUs are offline
271 if (cpu_mask & mp_get_online_mask()) {
272 status = ZX_ERR_BAD_STATE;
273 goto cleanup_mutex;
274 }
275
276 while (cpu_mask != 0) {
277 cpu_num_t cpu_id = highest_cpu_set(cpu_mask);
278 cpu_mask &= ~cpu_num_to_mask(cpu_id);
279
280 status = platform_mp_cpu_hotplug(cpu_id);
281 if (status != ZX_OK) {
282 break;
283 }
284 }
285 cleanup_mutex:
286 mutex_release(&mp.hotplug_lock);
287 return status;
288 }
289
290 // Unplug a single CPU. Must be called while hodling the hotplug lock
mp_unplug_cpu_mask_single_locked(cpu_num_t cpu_id)291 static zx_status_t mp_unplug_cpu_mask_single_locked(cpu_num_t cpu_id) {
292 // Wait for |cpu_id| to complete any in-progress DPCs and terminate its DPC thread. Later, once
293 // nothing is running on it, we'll migrate its queued DPCs to another CPU.
294 dpc_shutdown(cpu_id);
295
296 // TODO(maniscalco): |cpu_id| is about to shutdown. We should ensure it has no pinned threads
297 // (except maybe the idle thread). Once we're confident we've terminated/migrated them all,
298 // this would be a good place to DEBUG_ASSERT.
299
300 // Create a thread for the unplug. We will cause the target CPU to
301 // context switch to this thread. After this happens, it should no
302 // longer be accessing system state and can be safely shut down.
303 //
304 // This thread is pinned to the target CPU and set to run with the
305 // highest priority. This should cause it to pick up the thread
306 // immediately (or very soon, if for some reason there is another
307 // HIGHEST_PRIORITY task scheduled in between when we resume the
308 // thread and when the CPU is woken up).
309 event_t unplug_done = EVENT_INITIAL_VALUE(unplug_done, false, 0);
310 thread_t* t = thread_create_etc(
311 NULL,
312 "unplug_thread",
313 NULL,
314 &unplug_done,
315 HIGHEST_PRIORITY,
316 mp_unplug_trampoline);
317 if (t == NULL) {
318 return ZX_ERR_NO_MEMORY;
319 }
320
321 zx_status_t status = platform_mp_prep_cpu_unplug(cpu_id);
322 if (status != ZX_OK) {
323 return status;
324 }
325
326 // Pin to the target CPU
327 thread_set_cpu_affinity(t, cpu_num_to_mask(cpu_id));
328 // Set real time to cancel the pre-emption timer
329 thread_set_real_time(t);
330
331 status = thread_detach_and_resume(t);
332 if (status != ZX_OK) {
333 goto cleanup_thread;
334 }
335
336 // Wait for the unplug thread to get scheduled on the target
337 do {
338 status = event_wait(&unplug_done);
339 } while (status != ZX_OK);
340
341 // Now that the CPU is no longer processing tasks, move all of its timers
342 timer_transition_off_cpu(cpu_id);
343 // Move the CPU's queued DPCs to the current CPU.
344 dpc_shutdown_transition_off_cpu(cpu_id);
345
346 status = platform_mp_cpu_unplug(cpu_id);
347 if (status != ZX_OK) {
348 // Do not cleanup the unplug thread in this case. We have successfully
349 // unplugged the CPU from the scheduler's perspective, but the platform
350 // may have failed to shut down the CPU
351 return status;
352 }
353
354 // Fall through. Since the thread is scheduled, it should not be in any
355 // queues. Since the CPU running this thread is now shutdown, we can just
356 // erase the thread's existence.
357 cleanup_thread:
358 thread_forget(t);
359 return status;
360 }
361
362 // Unplug the given cpus. Blocks until the CPUs are removed. Partial
363 // failure may occur (in which some CPUs are removed but not others).
364 //
365 // This should be called in a thread context
mp_unplug_cpu_mask(cpu_mask_t cpu_mask)366 zx_status_t mp_unplug_cpu_mask(cpu_mask_t cpu_mask) {
367 DEBUG_ASSERT(!arch_ints_disabled());
368
369 zx_status_t status = ZX_OK;
370
371 mutex_acquire(&mp.hotplug_lock);
372
373 // Make sure all of the requested CPUs are online
374 if (cpu_mask & ~mp_get_online_mask()) {
375 status = ZX_ERR_BAD_STATE;
376 goto cleanup_mutex;
377 }
378
379 while (cpu_mask != 0) {
380 cpu_num_t cpu_id = highest_cpu_set(cpu_mask);
381 cpu_mask &= ~cpu_num_to_mask(cpu_id);
382
383 status = mp_unplug_cpu_mask_single_locked(cpu_id);
384 if (status != ZX_OK) {
385 break;
386 }
387 }
388
389 cleanup_mutex:
390 mutex_release(&mp.hotplug_lock);
391 return status;
392 }
393
mp_mbx_generic_irq(void *)394 interrupt_eoi mp_mbx_generic_irq(void*) {
395 DEBUG_ASSERT(arch_ints_disabled());
396 const cpu_num_t local_cpu = arch_curr_cpu_num();
397
398 CPU_STATS_INC(generic_ipis);
399
400 while (1) {
401 struct mp_ipi_task* task;
402 spin_lock(&mp.ipi_task_lock);
403 task = list_remove_head_type(&mp.ipi_task_list[local_cpu], struct mp_ipi_task, node);
404 spin_unlock(&mp.ipi_task_lock);
405 if (task == NULL) {
406 break;
407 }
408
409 task->func(task->context);
410 }
411
412 return IRQ_EOI_DEACTIVATE;
413 }
414
mp_mbx_reschedule_irq(void *)415 interrupt_eoi mp_mbx_reschedule_irq(void*) {
416 const cpu_num_t cpu = arch_curr_cpu_num();
417
418 LTRACEF("cpu %u\n", cpu);
419
420 CPU_STATS_INC(reschedule_ipis);
421
422 if (mp.active_cpus & cpu_num_to_mask(cpu)) {
423 thread_preempt_set_pending();
424 }
425
426 return IRQ_EOI_DEACTIVATE;
427 }
428
mp_mbx_interrupt_irq(void *)429 interrupt_eoi mp_mbx_interrupt_irq(void*) {
430 const cpu_num_t cpu = arch_curr_cpu_num();
431
432 LTRACEF("cpu %u\n", cpu);
433
434 // do nothing, the entire point of this interrupt is to simply have one
435 // delivered to the cpu.
436
437 return IRQ_EOI_DEACTIVATE;
438 }
439
arch_mp_cpu_hotplug(uint cpu_id)440 __WEAK zx_status_t arch_mp_cpu_hotplug(uint cpu_id) {
441 return ZX_ERR_NOT_SUPPORTED;
442 }
arch_mp_prep_cpu_unplug(uint cpu_id)443 __WEAK zx_status_t arch_mp_prep_cpu_unplug(uint cpu_id) {
444 return ZX_ERR_NOT_SUPPORTED;
445 }
arch_mp_cpu_unplug(uint cpu_id)446 __WEAK zx_status_t arch_mp_cpu_unplug(uint cpu_id) {
447 return ZX_ERR_NOT_SUPPORTED;
448 }
platform_mp_cpu_hotplug(uint cpu_id)449 __WEAK zx_status_t platform_mp_cpu_hotplug(uint cpu_id) {
450 return arch_mp_cpu_hotplug(cpu_id);
451 }
platform_mp_prep_cpu_unplug(uint cpu_id)452 __WEAK zx_status_t platform_mp_prep_cpu_unplug(uint cpu_id) {
453 return arch_mp_prep_cpu_unplug(cpu_id);
454 }
platform_mp_cpu_unplug(uint cpu_id)455 __WEAK zx_status_t platform_mp_cpu_unplug(uint cpu_id) {
456 return arch_mp_cpu_unplug(cpu_id);
457 }
458