1 // Copyright 2017 The Fuchsia Authors
2 //
3 // Use of this source code is governed by a MIT-style
4 // license that can be found in the LICENSE file or at
5 // https://opensource.org/licenses/MIT
6
7 #include <arch/arch_ops.h>
8 #include <arch/mp.h>
9 #include <debug.h>
10 #include <dev/interrupt.h>
11 #include <kernel/cmdline.h>
12 #include <kernel/mp.h>
13 #include <kernel/range_check.h>
14 #include <kernel/thread.h>
15 #include <lib/debuglog.h>
16 #include <libzbi/zbi-cpp.h>
17 #include <mexec.h>
18 #include <object/process_dispatcher.h>
19 #include <object/resource.h>
20 #include <object/vm_object_dispatcher.h>
21 #include <platform.h>
22 #include <string.h>
23 #include <trace.h>
24 #include <vm/physmap.h>
25 #include <vm/pmm.h>
26 #include <vm/vm.h>
27 #include <vm/vm_aspace.h>
28 #include <zircon/boot/image.h>
29 #include <zircon/compiler.h>
30 #include <zircon/syscalls/resource.h>
31 #include <zircon/syscalls/system.h>
32 #include <zircon/types.h>
33
34 #include "system_priv.h"
35
36 #define LOCAL_TRACE 0
37
38 // Allocate this many extra bytes at the end of the bootdata for the platform
39 // to fill in with platform specific boot structures.
40 const size_t kBootdataPlatformExtraBytes = PAGE_SIZE * 4;
41
42 __BEGIN_CDECLS
43 extern void mexec_asm(void);
44 extern void mexec_asm_end(void);
45 __END_CDECLS
46
47 /* Allocates a page of memory that has the same physical and virtual addresses.
48 */
identity_page_allocate(fbl::RefPtr<VmAspace> * new_aspace,void ** result_addr)49 static zx_status_t identity_page_allocate(fbl::RefPtr<VmAspace>* new_aspace,
50 void** result_addr) {
51 zx_status_t result;
52
53 // Start by obtaining an unused physical page. This address will eventually
54 // be the physical/virtual address of our identity mapped page.
55 paddr_t pa;
56 result = pmm_alloc_page(0, &pa);
57 if (result != ZX_OK) {
58 return ZX_ERR_NO_MEMORY;
59 }
60
61 // The kernel address space may be in high memory which cannot be identity
62 // mapped since all Kernel Virtual Addresses might be out of range of the
63 // physical address space. For this reason, we need to make a new address
64 // space.
65 fbl::RefPtr<VmAspace> identity_aspace =
66 VmAspace::Create(VmAspace::TYPE_LOW_KERNEL, "mexec identity");
67 if (!identity_aspace)
68 return ZX_ERR_INTERNAL;
69
70 // Create a new allocation in the new address space that identity maps the
71 // target page.
72 const uint perm_flags_rwx = ARCH_MMU_FLAG_PERM_READ |
73 ARCH_MMU_FLAG_PERM_WRITE |
74 ARCH_MMU_FLAG_PERM_EXECUTE;
75 void* identity_address = (void*)pa;
76 result = identity_aspace->AllocPhysical("identity mapping", PAGE_SIZE,
77 &identity_address, 0, pa,
78 VmAspace::VMM_FLAG_VALLOC_SPECIFIC,
79 perm_flags_rwx);
80 if (result != ZX_OK)
81 return result;
82
83 *new_aspace = ktl::move(identity_aspace);
84 *result_addr = identity_address;
85
86 return ZX_OK;
87 }
88
89 /* Takes all the pages in a VMO and creates a copy of them where all the pages
90 * occupy a physically contiguous region of physical memory.
91 * TODO(gkalsi): Don't coalesce pages into a physically contiguous region and
92 * just pass a vectored I/O list to the mexec assembly.
93 */
vmo_coalesce_pages(zx_handle_t vmo_hdl,const size_t extra_bytes,paddr_t * addr,uint8_t ** vaddr,size_t * size)94 static zx_status_t vmo_coalesce_pages(zx_handle_t vmo_hdl, const size_t extra_bytes,
95 paddr_t* addr, uint8_t** vaddr, size_t* size) {
96 DEBUG_ASSERT(addr);
97 if (!addr) {
98 return ZX_ERR_INVALID_ARGS;
99 }
100
101 DEBUG_ASSERT(size);
102 if (!size) {
103 return ZX_ERR_INVALID_ARGS;
104 }
105
106 auto up = ProcessDispatcher::GetCurrent();
107 fbl::RefPtr<VmObjectDispatcher> vmo_dispatcher;
108 zx_status_t st =
109 up->GetDispatcherWithRights(vmo_hdl, ZX_RIGHT_READ, &vmo_dispatcher);
110 if (st != ZX_OK)
111 return st;
112
113 fbl::RefPtr<VmObject> vmo = vmo_dispatcher->vmo();
114
115 const size_t vmo_size = vmo->size();
116
117 const size_t num_pages = ROUNDUP(vmo_size + extra_bytes, PAGE_SIZE) / PAGE_SIZE;
118
119 paddr_t base_addr;
120 list_node list = LIST_INITIAL_VALUE(list);
121 st = pmm_alloc_contiguous(num_pages, PMM_ALLOC_FLAG_ANY, 0, &base_addr, &list);
122 if (st != ZX_OK) {
123 // TODO(gkalsi): Free pages allocated by pmm_alloc_contiguous pages
124 // and return an error.
125 panic("Failed to allocate contiguous memory");
126 }
127
128 uint8_t* dst_addr = (uint8_t*)paddr_to_physmap(base_addr);
129
130 st = vmo->Read(dst_addr, 0, vmo_size);
131 if (st != ZX_OK) {
132 // TODO(gkalsi): Free pages allocated by pmm_alloc_contiguous pages
133 // and return an error.
134 panic("Failed to read to contiguous vmo");
135 }
136
137 arch_clean_invalidate_cache_range((addr_t)dst_addr, vmo_size);
138
139 *size = num_pages * PAGE_SIZE;
140 *addr = base_addr;
141 if (vaddr)
142 *vaddr = dst_addr;
143
144 return ZX_OK;
145 }
146
147 static fbl::RefPtr<VmObject> stashed_crashlog;
mexec_stash_crashlog(fbl::RefPtr<VmObject> vmo)148 void mexec_stash_crashlog(fbl::RefPtr<VmObject> vmo) {
149 stashed_crashlog = ktl::move(vmo);
150 }
151
152 // zx_status_t zx_system_mexec_payload_get
sys_system_mexec_payload_get(zx_handle_t resource,user_out_ptr<void> user_buffer,size_t buffer_size)153 zx_status_t sys_system_mexec_payload_get(zx_handle_t resource,
154 user_out_ptr<void> user_buffer,
155 size_t buffer_size) {
156 // Highly privilidged, only root resource should have access.
157 zx_status_t result = validate_resource(resource, ZX_RSRC_KIND_ROOT);
158 if (result != ZX_OK) {
159 return result;
160 }
161
162 // Limit the size of the result that we can return to userspace.
163 if (buffer_size > kBootdataPlatformExtraBytes) {
164 return ZX_ERR_INVALID_ARGS;
165 }
166
167 fbl::AllocChecker ac;
168 ktl::unique_ptr<uint8_t[]> buffer;
169 buffer.reset(new (&ac) uint8_t[buffer_size]);
170 if (!ac.check()) {
171 return ZX_ERR_NO_MEMORY;
172 }
173 memset(buffer.get(), 0, buffer_size);
174
175 // Create a zero length ZBI in the buffer.
176 zbi::Zbi image(buffer.get(), buffer_size);
177 zbi_result_t zbi_result = image.Reset();
178 if (zbi_result != ZBI_RESULT_OK) {
179 return ZX_ERR_INTERNAL;
180 }
181
182 result = platform_mexec_patch_zbi(buffer.get(), buffer_size);
183 if (result != ZX_OK) {
184 return result;
185 }
186
187 return user_buffer.copy_array_to_user(buffer.get(), buffer_size);
188 }
189
190 // zx_status_t zx_system_mexec
sys_system_mexec(zx_handle_t resource,zx_handle_t kernel_vmo,zx_handle_t bootimage_vmo)191 zx_status_t sys_system_mexec(zx_handle_t resource, zx_handle_t kernel_vmo, zx_handle_t bootimage_vmo) {
192 // TODO(ZX-971): finer grained validation
193 zx_status_t result = validate_resource(resource, ZX_RSRC_KIND_ROOT);
194 if (result != ZX_OK)
195 return result;
196
197 paddr_t new_kernel_addr;
198 size_t new_kernel_len;
199 result = vmo_coalesce_pages(kernel_vmo, 0, &new_kernel_addr, NULL,
200 &new_kernel_len);
201 if (result != ZX_OK) {
202 return result;
203 }
204
205 // for kernels that are bootdata based (eg, x86-64), the location
206 // to find the entrypoint depends on the bootdata format
207 paddr_t entry64_addr = (get_kernel_base_phys() +
208 sizeof(zbi_header_t) + // ZBI_TYPE_CONTAINER header
209 sizeof(zbi_header_t) + // ZBI_TYPE_KERNEL header
210 offsetof(zbi_kernel_t, entry));
211
212 paddr_t new_bootimage_addr;
213 uint8_t* bootimage_buffer;
214 size_t new_bootimage_len;
215 result = vmo_coalesce_pages(bootimage_vmo, kBootdataPlatformExtraBytes,
216 &new_bootimage_addr, &bootimage_buffer,
217 &new_bootimage_len);
218 if (result != ZX_OK) {
219 return result;
220 }
221
222 // Allow the platform to patch the bootdata with any platform specific
223 // sections before mexecing.
224 result = platform_mexec_patch_zbi(bootimage_buffer, new_bootimage_len);
225 if (result != ZX_OK) {
226 printf("mexec: could not patch bootdata\n");
227 return result;
228 }
229
230 if (stashed_crashlog && stashed_crashlog->size() <= UINT32_MAX) {
231 size_t crashlog_len = stashed_crashlog->size();
232 uint8_t* bootdata_section;
233 zbi::Zbi image(bootimage_buffer, new_bootimage_len);
234
235 zbi_result_t res = image.CreateSection(static_cast<uint32_t>(crashlog_len),
236 ZBI_TYPE_CRASHLOG, 0, 0,
237 reinterpret_cast<void**>(&bootdata_section));
238
239 if (res != ZBI_RESULT_OK) {
240 printf("mexec: could not append crashlog\n");
241 return ZX_ERR_INTERNAL;
242 }
243
244 result = stashed_crashlog->Read(bootdata_section, 0, crashlog_len);
245 if (result != ZX_OK) {
246 return result;
247 }
248 }
249
250 void* id_page_addr = 0x0;
251 fbl::RefPtr<VmAspace> aspace;
252 result = identity_page_allocate(&aspace, &id_page_addr);
253 if (result != ZX_OK) {
254 return result;
255 }
256
257 LTRACEF("zx_system_mexec allocated identity mapped page at %p\n",
258 id_page_addr);
259
260 thread_migrate_to_cpu(BOOT_CPU_ID);
261
262 // We assume that when the system starts, only one CPU is running. We denote
263 // this as the boot CPU.
264 // We want to make sure that this is the CPU that eventually branches into
265 // the new kernel so we attempt to migrate this thread to that cpu.
266 platform_halt_secondary_cpus();
267
268 platform_mexec_prep(new_bootimage_addr, new_bootimage_len);
269
270 arch_disable_ints();
271
272 // WARNING
273 // It is unsafe to return from this function beyond this point.
274 // This is because we have swapped out the user address space and halted the
275 // secondary cores and there is no trivial way to bring both of these back.
276 vmm_set_active_aspace(reinterpret_cast<vmm_aspace_t*>(aspace.get()));
277
278 // We're going to copy this into our identity page, make sure it's not
279 // longer than a single page.
280 size_t mexec_asm_length = (uintptr_t)mexec_asm_end - (uintptr_t)mexec_asm;
281 DEBUG_ASSERT(mexec_asm_length <= PAGE_SIZE);
282
283 memcpy(id_page_addr, (const void*)mexec_asm, mexec_asm_length);
284 arch_sync_cache_range((addr_t)id_page_addr, mexec_asm_length);
285
286 // We must pass in an arg that represents a list of memory regions to
287 // shuffle around. We put this args list immediately after the mexec
288 // assembly.
289 uintptr_t ops_ptr = ((((uintptr_t)id_page_addr) + mexec_asm_length + 8) | 0x7) + 1;
290 memmov_ops_t* ops = (memmov_ops_t*)(ops_ptr);
291
292 const size_t num_ops = 2;
293 // Make sure that we can also pack the arguments in the same page as the
294 // final mexec assembly shutdown code.
295 DEBUG_ASSERT(((sizeof(*ops) * num_ops + ops_ptr) - (uintptr_t)id_page_addr) < PAGE_SIZE);
296
297 // Op to move the new kernel into place.
298 ops[0].src = (void*)new_kernel_addr;
299 ops[0].dst = (void*)get_kernel_base_phys();
300 ops[0].len = new_kernel_len;
301
302 // Null terminated list.
303 ops[1] = {0, 0, 0};
304
305 // Make sure that the kernel, when copied, will not overwrite the bootdata.
306 DEBUG_ASSERT(!Intersects(reinterpret_cast<uintptr_t>(ops[0].dst),
307 ops[0].len,
308 reinterpret_cast<uintptr_t>(new_bootimage_addr),
309 new_bootimage_len));
310
311 // Sync because there is code in here that we intend to run.
312 arch_sync_cache_range((addr_t)id_page_addr, PAGE_SIZE);
313
314 // Clean because we're going to turn the MMU/caches off and we want to make
315 // sure that things are still available afterwards.
316 arch_clean_cache_range((addr_t)id_page_addr, PAGE_SIZE);
317
318 shutdown_interrupts();
319
320 // Ask the platform to mexec into the next kernel.
321 mexec_asm_func mexec_assembly = (mexec_asm_func)id_page_addr;
322 platform_mexec(mexec_assembly, ops, new_bootimage_addr, new_bootimage_len, entry64_addr);
323
324 panic("Execution should never reach here\n");
325 return ZX_OK;
326 }
327
328 // Gracefully halt and perform |action|.
platform_graceful_halt(platform_halt_action action)329 static void platform_graceful_halt(platform_halt_action action) {
330 thread_migrate_to_cpu(BOOT_CPU_ID);
331 platform_halt_secondary_cpus();
332
333 // Delay shutdown of debuglog to ensure log messages emitted by above calls will be written.
334 dlog_shutdown();
335
336 platform_halt(action, HALT_REASON_SW_RESET);
337 panic("ERROR: failed to halt the platform\n");
338 }
339
340 // zx_status_t zx_system_powerctl
sys_system_powerctl(zx_handle_t root_rsrc,uint32_t cmd,user_in_ptr<const zx_system_powerctl_arg_t> raw_arg)341 zx_status_t sys_system_powerctl(zx_handle_t root_rsrc, uint32_t cmd,
342 user_in_ptr<const zx_system_powerctl_arg_t> raw_arg) {
343
344 zx_status_t status;
345 if ((status = validate_resource(root_rsrc, ZX_RSRC_KIND_ROOT)) < 0) {
346 return status;
347 }
348
349 switch (cmd) {
350 case ZX_SYSTEM_POWERCTL_ENABLE_ALL_CPUS: {
351 cpu_mask_t all_cpus = ((cpu_mask_t)1u << arch_max_num_cpus()) - 1;
352 return mp_hotplug_cpu_mask(~mp_get_online_mask() & all_cpus);
353 }
354 case ZX_SYSTEM_POWERCTL_DISABLE_ALL_CPUS_BUT_PRIMARY: {
355 cpu_mask_t primary = cpu_num_to_mask(0);
356 return mp_unplug_cpu_mask(mp_get_online_mask() & ~primary);
357 }
358 case ZX_SYSTEM_POWERCTL_ACPI_TRANSITION_S_STATE:
359 case ZX_SYSTEM_POWERCTL_X86_SET_PKG_PL1: {
360 zx_system_powerctl_arg_t arg;
361 status = raw_arg.copy_from_user(&arg);
362 if (status != ZX_OK) {
363 return status;
364 }
365
366 return arch_system_powerctl(cmd, &arg);
367 }
368 case ZX_SYSTEM_POWERCTL_REBOOT:
369 platform_graceful_halt(HALT_ACTION_REBOOT);
370 break;
371 case ZX_SYSTEM_POWERCTL_REBOOT_BOOTLOADER:
372 platform_graceful_halt(HALT_ACTION_REBOOT_BOOTLOADER);
373 break;
374 case ZX_SYSTEM_POWERCTL_REBOOT_RECOVERY:
375 platform_graceful_halt(HALT_ACTION_REBOOT_RECOVERY);
376 break;
377 case ZX_SYSTEM_POWERCTL_SHUTDOWN:
378 platform_graceful_halt(HALT_ACTION_SHUTDOWN);
379 break;
380 default:
381 return ZX_ERR_INVALID_ARGS;
382 }
383 return ZX_OK;
384 }
385