// Copyright 2017 The Fuchsia Authors // // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "system_priv.h" #define LOCAL_TRACE 0 // Allocate this many extra bytes at the end of the bootdata for the platform // to fill in with platform specific boot structures. const size_t kBootdataPlatformExtraBytes = PAGE_SIZE * 4; __BEGIN_CDECLS extern void mexec_asm(void); extern void mexec_asm_end(void); __END_CDECLS /* Allocates a page of memory that has the same physical and virtual addresses. */ static zx_status_t identity_page_allocate(fbl::RefPtr* new_aspace, void** result_addr) { zx_status_t result; // Start by obtaining an unused physical page. This address will eventually // be the physical/virtual address of our identity mapped page. paddr_t pa; result = pmm_alloc_page(0, &pa); if (result != ZX_OK) { return ZX_ERR_NO_MEMORY; } // The kernel address space may be in high memory which cannot be identity // mapped since all Kernel Virtual Addresses might be out of range of the // physical address space. For this reason, we need to make a new address // space. fbl::RefPtr identity_aspace = VmAspace::Create(VmAspace::TYPE_LOW_KERNEL, "mexec identity"); if (!identity_aspace) return ZX_ERR_INTERNAL; // Create a new allocation in the new address space that identity maps the // target page. const uint perm_flags_rwx = ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_WRITE | ARCH_MMU_FLAG_PERM_EXECUTE; void* identity_address = (void*)pa; result = identity_aspace->AllocPhysical("identity mapping", PAGE_SIZE, &identity_address, 0, pa, VmAspace::VMM_FLAG_VALLOC_SPECIFIC, perm_flags_rwx); if (result != ZX_OK) return result; *new_aspace = ktl::move(identity_aspace); *result_addr = identity_address; return ZX_OK; } /* Takes all the pages in a VMO and creates a copy of them where all the pages * occupy a physically contiguous region of physical memory. * TODO(gkalsi): Don't coalesce pages into a physically contiguous region and * just pass a vectored I/O list to the mexec assembly. */ static zx_status_t vmo_coalesce_pages(zx_handle_t vmo_hdl, const size_t extra_bytes, paddr_t* addr, uint8_t** vaddr, size_t* size) { DEBUG_ASSERT(addr); if (!addr) { return ZX_ERR_INVALID_ARGS; } DEBUG_ASSERT(size); if (!size) { return ZX_ERR_INVALID_ARGS; } auto up = ProcessDispatcher::GetCurrent(); fbl::RefPtr vmo_dispatcher; zx_status_t st = up->GetDispatcherWithRights(vmo_hdl, ZX_RIGHT_READ, &vmo_dispatcher); if (st != ZX_OK) return st; fbl::RefPtr vmo = vmo_dispatcher->vmo(); const size_t vmo_size = vmo->size(); const size_t num_pages = ROUNDUP(vmo_size + extra_bytes, PAGE_SIZE) / PAGE_SIZE; paddr_t base_addr; list_node list = LIST_INITIAL_VALUE(list); st = pmm_alloc_contiguous(num_pages, PMM_ALLOC_FLAG_ANY, 0, &base_addr, &list); if (st != ZX_OK) { // TODO(gkalsi): Free pages allocated by pmm_alloc_contiguous pages // and return an error. panic("Failed to allocate contiguous memory"); } uint8_t* dst_addr = (uint8_t*)paddr_to_physmap(base_addr); st = vmo->Read(dst_addr, 0, vmo_size); if (st != ZX_OK) { // TODO(gkalsi): Free pages allocated by pmm_alloc_contiguous pages // and return an error. panic("Failed to read to contiguous vmo"); } arch_clean_invalidate_cache_range((addr_t)dst_addr, vmo_size); *size = num_pages * PAGE_SIZE; *addr = base_addr; if (vaddr) *vaddr = dst_addr; return ZX_OK; } static fbl::RefPtr stashed_crashlog; void mexec_stash_crashlog(fbl::RefPtr vmo) { stashed_crashlog = ktl::move(vmo); } // zx_status_t zx_system_mexec_payload_get zx_status_t sys_system_mexec_payload_get(zx_handle_t resource, user_out_ptr user_buffer, size_t buffer_size) { // Highly privilidged, only root resource should have access. zx_status_t result = validate_resource(resource, ZX_RSRC_KIND_ROOT); if (result != ZX_OK) { return result; } // Limit the size of the result that we can return to userspace. if (buffer_size > kBootdataPlatformExtraBytes) { return ZX_ERR_INVALID_ARGS; } fbl::AllocChecker ac; ktl::unique_ptr buffer; buffer.reset(new (&ac) uint8_t[buffer_size]); if (!ac.check()) { return ZX_ERR_NO_MEMORY; } memset(buffer.get(), 0, buffer_size); // Create a zero length ZBI in the buffer. zbi::Zbi image(buffer.get(), buffer_size); zbi_result_t zbi_result = image.Reset(); if (zbi_result != ZBI_RESULT_OK) { return ZX_ERR_INTERNAL; } result = platform_mexec_patch_zbi(buffer.get(), buffer_size); if (result != ZX_OK) { return result; } return user_buffer.copy_array_to_user(buffer.get(), buffer_size); } // zx_status_t zx_system_mexec zx_status_t sys_system_mexec(zx_handle_t resource, zx_handle_t kernel_vmo, zx_handle_t bootimage_vmo) { // TODO(ZX-971): finer grained validation zx_status_t result = validate_resource(resource, ZX_RSRC_KIND_ROOT); if (result != ZX_OK) return result; paddr_t new_kernel_addr; size_t new_kernel_len; result = vmo_coalesce_pages(kernel_vmo, 0, &new_kernel_addr, NULL, &new_kernel_len); if (result != ZX_OK) { return result; } // for kernels that are bootdata based (eg, x86-64), the location // to find the entrypoint depends on the bootdata format paddr_t entry64_addr = (get_kernel_base_phys() + sizeof(zbi_header_t) + // ZBI_TYPE_CONTAINER header sizeof(zbi_header_t) + // ZBI_TYPE_KERNEL header offsetof(zbi_kernel_t, entry)); paddr_t new_bootimage_addr; uint8_t* bootimage_buffer; size_t new_bootimage_len; result = vmo_coalesce_pages(bootimage_vmo, kBootdataPlatformExtraBytes, &new_bootimage_addr, &bootimage_buffer, &new_bootimage_len); if (result != ZX_OK) { return result; } // Allow the platform to patch the bootdata with any platform specific // sections before mexecing. result = platform_mexec_patch_zbi(bootimage_buffer, new_bootimage_len); if (result != ZX_OK) { printf("mexec: could not patch bootdata\n"); return result; } if (stashed_crashlog && stashed_crashlog->size() <= UINT32_MAX) { size_t crashlog_len = stashed_crashlog->size(); uint8_t* bootdata_section; zbi::Zbi image(bootimage_buffer, new_bootimage_len); zbi_result_t res = image.CreateSection(static_cast(crashlog_len), ZBI_TYPE_CRASHLOG, 0, 0, reinterpret_cast(&bootdata_section)); if (res != ZBI_RESULT_OK) { printf("mexec: could not append crashlog\n"); return ZX_ERR_INTERNAL; } result = stashed_crashlog->Read(bootdata_section, 0, crashlog_len); if (result != ZX_OK) { return result; } } void* id_page_addr = 0x0; fbl::RefPtr aspace; result = identity_page_allocate(&aspace, &id_page_addr); if (result != ZX_OK) { return result; } LTRACEF("zx_system_mexec allocated identity mapped page at %p\n", id_page_addr); thread_migrate_to_cpu(BOOT_CPU_ID); // We assume that when the system starts, only one CPU is running. We denote // this as the boot CPU. // We want to make sure that this is the CPU that eventually branches into // the new kernel so we attempt to migrate this thread to that cpu. platform_halt_secondary_cpus(); platform_mexec_prep(new_bootimage_addr, new_bootimage_len); arch_disable_ints(); // WARNING // It is unsafe to return from this function beyond this point. // This is because we have swapped out the user address space and halted the // secondary cores and there is no trivial way to bring both of these back. vmm_set_active_aspace(reinterpret_cast(aspace.get())); // We're going to copy this into our identity page, make sure it's not // longer than a single page. size_t mexec_asm_length = (uintptr_t)mexec_asm_end - (uintptr_t)mexec_asm; DEBUG_ASSERT(mexec_asm_length <= PAGE_SIZE); memcpy(id_page_addr, (const void*)mexec_asm, mexec_asm_length); arch_sync_cache_range((addr_t)id_page_addr, mexec_asm_length); // We must pass in an arg that represents a list of memory regions to // shuffle around. We put this args list immediately after the mexec // assembly. uintptr_t ops_ptr = ((((uintptr_t)id_page_addr) + mexec_asm_length + 8) | 0x7) + 1; memmov_ops_t* ops = (memmov_ops_t*)(ops_ptr); const size_t num_ops = 2; // Make sure that we can also pack the arguments in the same page as the // final mexec assembly shutdown code. DEBUG_ASSERT(((sizeof(*ops) * num_ops + ops_ptr) - (uintptr_t)id_page_addr) < PAGE_SIZE); // Op to move the new kernel into place. ops[0].src = (void*)new_kernel_addr; ops[0].dst = (void*)get_kernel_base_phys(); ops[0].len = new_kernel_len; // Null terminated list. ops[1] = {0, 0, 0}; // Make sure that the kernel, when copied, will not overwrite the bootdata. DEBUG_ASSERT(!Intersects(reinterpret_cast(ops[0].dst), ops[0].len, reinterpret_cast(new_bootimage_addr), new_bootimage_len)); // Sync because there is code in here that we intend to run. arch_sync_cache_range((addr_t)id_page_addr, PAGE_SIZE); // Clean because we're going to turn the MMU/caches off and we want to make // sure that things are still available afterwards. arch_clean_cache_range((addr_t)id_page_addr, PAGE_SIZE); shutdown_interrupts(); // Ask the platform to mexec into the next kernel. mexec_asm_func mexec_assembly = (mexec_asm_func)id_page_addr; platform_mexec(mexec_assembly, ops, new_bootimage_addr, new_bootimage_len, entry64_addr); panic("Execution should never reach here\n"); return ZX_OK; } // Gracefully halt and perform |action|. static void platform_graceful_halt(platform_halt_action action) { thread_migrate_to_cpu(BOOT_CPU_ID); platform_halt_secondary_cpus(); // Delay shutdown of debuglog to ensure log messages emitted by above calls will be written. dlog_shutdown(); platform_halt(action, HALT_REASON_SW_RESET); panic("ERROR: failed to halt the platform\n"); } // zx_status_t zx_system_powerctl zx_status_t sys_system_powerctl(zx_handle_t root_rsrc, uint32_t cmd, user_in_ptr raw_arg) { zx_status_t status; if ((status = validate_resource(root_rsrc, ZX_RSRC_KIND_ROOT)) < 0) { return status; } switch (cmd) { case ZX_SYSTEM_POWERCTL_ENABLE_ALL_CPUS: { cpu_mask_t all_cpus = ((cpu_mask_t)1u << arch_max_num_cpus()) - 1; return mp_hotplug_cpu_mask(~mp_get_online_mask() & all_cpus); } case ZX_SYSTEM_POWERCTL_DISABLE_ALL_CPUS_BUT_PRIMARY: { cpu_mask_t primary = cpu_num_to_mask(0); return mp_unplug_cpu_mask(mp_get_online_mask() & ~primary); } case ZX_SYSTEM_POWERCTL_ACPI_TRANSITION_S_STATE: case ZX_SYSTEM_POWERCTL_X86_SET_PKG_PL1: { zx_system_powerctl_arg_t arg; status = raw_arg.copy_from_user(&arg); if (status != ZX_OK) { return status; } return arch_system_powerctl(cmd, &arg); } case ZX_SYSTEM_POWERCTL_REBOOT: platform_graceful_halt(HALT_ACTION_REBOOT); break; case ZX_SYSTEM_POWERCTL_REBOOT_BOOTLOADER: platform_graceful_halt(HALT_ACTION_REBOOT_BOOTLOADER); break; case ZX_SYSTEM_POWERCTL_REBOOT_RECOVERY: platform_graceful_halt(HALT_ACTION_REBOOT_RECOVERY); break; case ZX_SYSTEM_POWERCTL_SHUTDOWN: platform_graceful_halt(HALT_ACTION_SHUTDOWN); break; default: return ZX_ERR_INVALID_ARGS; } return ZX_OK; }