1 // Copyright 2016 The Fuchsia Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "bootdata.h"
6 #include "bootfs.h"
7 #include "loader-service.h"
8 #include "option.h"
9 #include "userboot-elf.h"
10 #include "util.h"
11 
12 #pragma GCC visibility push(hidden)
13 
14 #include <zircon/stack.h>
15 #include <zircon/syscalls.h>
16 #include <zircon/syscalls/log.h>
17 #include <runtime/message.h>
18 #include <runtime/processargs.h>
19 #include <stdalign.h>
20 #include <stdnoreturn.h>
21 #include <string.h>
22 #include <sys/param.h>
23 #include <zircon/syscalls/system.h>
24 
25 #pragma GCC visibility pop
26 
27 #define STACK_VMO_NAME "userboot-child-initial-stack"
28 
do_powerctl(zx_handle_t log,zx_handle_t rroot,uint32_t reason)29 static noreturn void do_powerctl(zx_handle_t log, zx_handle_t rroot, uint32_t reason) {
30     const char* r_str = (reason == ZX_SYSTEM_POWERCTL_SHUTDOWN) ? "poweroff" : "reboot";
31     if (reason == ZX_SYSTEM_POWERCTL_REBOOT) {
32         printl(log, "Waiting 3 seconds...");
33         zx_nanosleep(zx_deadline_after(ZX_SEC(3u)));
34     }
35 
36     printl(log, "Process exited.  Executing \"%s\".", r_str);
37     zx_system_powerctl(rroot, reason, NULL);
38     printl(log, "still here after %s!", r_str);
39     while (true)
40         __builtin_trap();
41 }
42 
load_child_process(zx_handle_t log,const struct options * o,struct bootfs * bootfs,zx_handle_t vdso_vmo,zx_handle_t proc,zx_handle_t vmar,zx_handle_t thread,zx_handle_t to_child,zx_vaddr_t * entry,zx_vaddr_t * vdso_base,size_t * stack_size,zx_handle_t * loader_svc)43 static void load_child_process(zx_handle_t log,
44                                const struct options* o, struct bootfs* bootfs,
45                                zx_handle_t vdso_vmo, zx_handle_t proc,
46                                zx_handle_t vmar, zx_handle_t thread,
47                                zx_handle_t to_child,
48                                zx_vaddr_t* entry, zx_vaddr_t* vdso_base,
49                                size_t* stack_size, zx_handle_t* loader_svc) {
50     // Examine the bootfs image and find the requested file in it.
51     // This will handle a PT_INTERP by doing a second lookup in bootfs.
52     *entry = elf_load_bootfs(log, bootfs, proc, vmar, thread,
53                              o->value[OPTION_FILENAME], to_child, stack_size,
54                              loader_svc);
55 
56     // Now load the vDSO into the child, so it has access to system calls.
57     *vdso_base = elf_load_vmo(log, vmar, vdso_vmo);
58 }
59 
60 // Reserve roughly the low half of the address space, so the initial
61 // process can use sanitizers that need to allocate shadow memory there.
62 // The reservation VMAR is kept around just long enough to make sure all
63 // the initial allocations (mapping in the initial ELF object, and
64 // allocating the initial stack) stay out of this area, and then destroyed.
65 // The process's own allocations can then use the full address space; if
66 // it's using a sanitizer, it will set up its shadow memory first thing.
reserve_low_address_space(zx_handle_t log,zx_handle_t root_vmar)67 static zx_handle_t reserve_low_address_space(zx_handle_t log,
68                                              zx_handle_t root_vmar) {
69     zx_info_vmar_t info;
70     check(log, zx_object_get_info(root_vmar, ZX_INFO_VMAR,
71                                   &info, sizeof(info), NULL, NULL),
72           "zx_object_get_info failed on child root VMAR handle");
73     zx_handle_t vmar;
74     uintptr_t addr;
75     size_t reserve_size =
76         (((info.base + info.len) / 2) + PAGE_SIZE - 1) & -PAGE_SIZE;
77     zx_status_t status = zx_vmar_allocate(root_vmar, ZX_VM_SPECIFIC,
78                                           0, reserve_size - info.base,
79                                           &vmar, &addr);
80     check(log, status,
81           "zx_vmar_allocate failed for low address space reservation");
82     if (addr != info.base)
83         fail(log, "zx_vmar_allocate gave wrong address?!?");
84     return vmar;
85 }
86 
87 enum {
88     EXTRA_HANDLE_BOOTFS,
89     EXTRA_HANDLE_COUNT
90 };
91 
92 // This is the main logic:
93 // 1. Read the kernel's bootstrap message.
94 // 2. Load up the child process from ELF file(s) on the bootfs.
95 // 3. Create the initial thread and allocate a stack for it.
96 // 4. Load up a channel with the zx_proc_args_t message for the child.
97 // 5. Start the child process running.
98 // 6. Optionally, wait for it to exit and then shut down.
bootstrap(zx_handle_t log,zx_handle_t bootstrap_pipe)99 static noreturn void bootstrap(zx_handle_t log, zx_handle_t bootstrap_pipe) {
100     // Sample the bootstrap message to see how big it is.
101     uint32_t nbytes;
102     uint32_t nhandles;
103 
104     zx_status_t status = zxr_message_size(bootstrap_pipe, &nbytes, &nhandles);
105     check(log, status, "zxr_message_size failed on bootstrap pipe!");
106 
107     // Read the bootstrap message from the kernel.
108     ZXR_PROCESSARGS_BUFFER(buffer,
109                            nbytes + EXTRA_HANDLE_COUNT * sizeof(uint32_t));
110     zx_handle_t handles[nhandles + EXTRA_HANDLE_COUNT];
111     zx_proc_args_t* pargs;
112     uint32_t* handle_info;
113     status = zxr_processargs_read(bootstrap_pipe,
114                                   buffer, nbytes, handles, nhandles,
115                                   &pargs, &handle_info);
116     check(log, status, "zxr_processargs_read failed on bootstrap message!");
117 
118     // All done with the channel from the kernel now.  Let it go.
119     zx_handle_close(bootstrap_pipe);
120 
121     // We're adding some extra handles, so we have to rearrange the
122     // incoming message buffer to make space for their info slots.
123     if (pargs->args_off != 0 || pargs->args_num != 0) {
124         fail(log, "unexpected bootstrap message layout: args");
125     }
126     if (pargs->environ_off != (pargs->handle_info_off +
127                                nhandles * sizeof(uint32_t))) {
128         fail(log, "unexpected bootstrap message layout: environ");
129     }
130     const size_t environ_size = nbytes - pargs->environ_off;
131     pargs->environ_off += EXTRA_HANDLE_COUNT * sizeof(uint32_t);
132     memmove(&buffer[pargs->environ_off],
133             &buffer[pargs->handle_info_off + nhandles * sizeof(uint32_t)],
134             environ_size);
135     nbytes += EXTRA_HANDLE_COUNT * sizeof(uint32_t);
136 
137     // Extract the environment (aka kernel command line) strings.
138     char* environ[pargs->environ_num + 1];
139     status = zxr_processargs_strings(buffer, nbytes, NULL, environ, NULL);
140     check(log, status,
141           "zxr_processargs_strings failed on bootstrap message");
142 
143     // Process the kernel command line, which gives us options and also
144     // becomes the environment strings for our child.
145     struct options o;
146     parse_options(log, &o, environ);
147 
148     zx_handle_t resource_root = ZX_HANDLE_INVALID;
149     zx_handle_t bootdata_vmo = ZX_HANDLE_INVALID;
150     zx_handle_t vdso_vmo = ZX_HANDLE_INVALID;
151     zx_handle_t job = ZX_HANDLE_INVALID;
152     zx_handle_t* proc_handle_loc = NULL;
153     zx_handle_t* vmar_root_handle_loc = NULL;
154     zx_handle_t* thread_handle_loc = NULL;
155     zx_handle_t* stack_vmo_handle_loc = NULL;
156     for (uint32_t i = 0; i < nhandles; ++i) {
157         switch (handle_info[i]) {
158         case PA_HND(PA_VMO_VDSO, 0):
159             vdso_vmo = handles[i];
160             break;
161         case PA_HND(PA_PROC_SELF, 0):
162             proc_handle_loc = &handles[i];
163             break;
164         case PA_HND(PA_VMAR_ROOT, 0):
165             vmar_root_handle_loc = &handles[i];
166             break;
167         case PA_HND(PA_THREAD_SELF, 0):
168             thread_handle_loc = &handles[i];
169             break;
170         case PA_HND(PA_VMO_STACK, 0):
171             stack_vmo_handle_loc = &handles[i];
172             break;
173         case PA_HND(PA_RESOURCE, 0):
174             resource_root = handles[i];
175             break;
176         case PA_HND(PA_JOB_DEFAULT, 0):
177             job = handles[i];
178             break;
179         case PA_HND(PA_VMO_BOOTDATA, 0):
180             if (bootdata_vmo == ZX_HANDLE_INVALID) {
181                 bootdata_vmo = handles[i];
182                 zx_object_set_property(bootdata_vmo, ZX_PROP_NAME, "bootdata", 8);
183             }
184             break;
185         }
186     }
187     if (vdso_vmo == ZX_HANDLE_INVALID)
188         fail(log, "no vDSO handle in bootstrap message");
189     if (resource_root == ZX_HANDLE_INVALID)
190         fail(log, "no resource handle in bootstrap message");
191     if (job == ZX_HANDLE_INVALID)
192         fail(log, "no job handle in bootstrap message");
193     if (vmar_root_handle_loc == NULL)
194         fail(log, "no vmar root handle in bootstrap message");
195     if (bootdata_vmo == ZX_HANDLE_INVALID)
196         fail(log, "no bootdata VMO in bootstrap message");
197 
198     // Hang on to our own process handle.  If we closed it, our process
199     // would be killed.  Exiting will clean it up.
200     __UNUSED const zx_handle_t proc_self = *proc_handle_loc;
201     const zx_handle_t vmar_self = *vmar_root_handle_loc;
202 
203     // Hang on to the resource root handle.
204     zx_handle_t root_resource_handle;
205     status = zx_handle_duplicate(resource_root, ZX_RIGHT_SAME_RIGHTS,
206                                  &root_resource_handle);
207     if (status < 0)
208         fail(log, "zx_handle_duplicate failed: %d", status);
209 
210     // Locate the first bootfs bootdata section and decompress it.
211     // We need it to load devmgr and libc from.
212     // Later bootfs sections will be processed by devmgr.
213     zx_handle_t bootfs_vmo = bootdata_get_bootfs(log, vmar_self, bootdata_vmo);
214 
215     // TODO(mdempsky): Push further down the stack? Seems unnecessary to
216     // mark the entire bootfs VMO as executable.
217     zx_vmo_replace_as_executable(bootfs_vmo, ZX_HANDLE_INVALID, &bootfs_vmo);
218 
219     // Pass the decompressed bootfs VMO on.
220     handles[nhandles + EXTRA_HANDLE_BOOTFS] = bootfs_vmo;
221     handle_info[nhandles + EXTRA_HANDLE_BOOTFS] =
222         PA_HND(PA_VMO_BOOTFS, 0);
223 
224     // Map in the bootfs so we can look for files in it.
225     struct bootfs bootfs;
226     bootfs_mount(vmar_self, log, bootfs_vmo, &bootfs);
227 
228     // Make the channel for the bootstrap message.
229     zx_handle_t to_child;
230     zx_handle_t child_start_handle;
231     status = zx_channel_create(0, &to_child, &child_start_handle);
232     check(log, status, "zx_channel_create failed");
233 
234     const char* filename = o.value[OPTION_FILENAME];
235     zx_handle_t proc;
236     zx_handle_t vmar;
237     status = zx_process_create(job, filename, strlen(filename), 0,
238                                &proc, &vmar);
239     if (status < 0)
240         fail(log, "zx_process_create failed: %d", status);
241 
242     zx_handle_t reserve_vmar = reserve_low_address_space(log, vmar);
243 
244     // Create the initial thread in the new process
245     zx_handle_t thread;
246     status = zx_thread_create(proc, filename, strlen(filename), 0, &thread);
247     if (status < 0)
248         fail(log, "zx_thread_create failed: %d", status);
249 
250     zx_vaddr_t entry, vdso_base;
251     size_t stack_size = ZIRCON_DEFAULT_STACK_SIZE;
252     zx_handle_t loader_service_channel = ZX_HANDLE_INVALID;
253     load_child_process(log, &o, &bootfs, vdso_vmo, proc, vmar,
254                        thread, to_child, &entry, &vdso_base, &stack_size,
255                        &loader_service_channel);
256 
257     // Allocate the stack for the child.
258     stack_size = (stack_size + PAGE_SIZE - 1) & -PAGE_SIZE;
259     zx_handle_t stack_vmo;
260     status = zx_vmo_create(stack_size, 0, &stack_vmo);
261     if (status < 0)
262         fail(log, "zx_vmo_create failed for child stack: %d", status);
263     zx_object_set_property(stack_vmo, ZX_PROP_NAME,
264                            STACK_VMO_NAME, sizeof(STACK_VMO_NAME) - 1);
265     zx_vaddr_t stack_base;
266     status = zx_vmar_map(vmar, ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0,
267                          stack_vmo, 0, stack_size, &stack_base);
268     check(log, status, "zx_vmar_map failed for child stack");
269     uintptr_t sp = compute_initial_stack_pointer(stack_base, stack_size);
270     if (stack_vmo_handle_loc != NULL) {
271         // This is our own stack VMO handle, but we don't need it for anything.
272         if (*stack_vmo_handle_loc != ZX_HANDLE_INVALID)
273             zx_handle_close(*stack_vmo_handle_loc);
274         *stack_vmo_handle_loc = stack_vmo;
275     } else {
276         zx_handle_close(stack_vmo);
277     }
278 
279     // We're done doing mappings, so clear out the reservation VMAR.
280     check(log, zx_vmar_destroy(reserve_vmar),
281           "zx_vmar_destroy failed on reservation VMAR handle");
282     check(log, zx_handle_close(reserve_vmar),
283           "zx_handle_close failed on reservation VMAR handle");
284 
285     // Reuse the slot for the child's handle.
286     status = zx_handle_duplicate(proc, ZX_RIGHT_SAME_RIGHTS, proc_handle_loc);
287     if (status < 0)
288         fail(log, "zx_handle_duplicate failed on child process handle: %d", status);
289 
290     if (thread_handle_loc != NULL) {
291         // Reuse the slot for the child's handle.
292         // NOTE: Leaks the current thread handle the same way as the process handle.
293         status = zx_handle_duplicate(thread, ZX_RIGHT_SAME_RIGHTS,
294                                      thread_handle_loc);
295         if (status < 0)
296             fail(log, "zx_handle_duplicate failed on child thread handle: %d", status);
297     }
298 
299     // Reuse the slot for the child's root VMAR handle.  We don't need to hold
300     // a reference to this, so just pass ours to the child.
301     *vmar_root_handle_loc = vmar;
302 
303     // Now send the bootstrap message, consuming both our VMO handles. We also
304     // send the job handle, which in the future means that we can't create more
305     // processes from here on.
306     status = zx_channel_write(to_child, 0, buffer, nbytes,
307                               handles, nhandles + EXTRA_HANDLE_COUNT);
308     check(log, status, "zx_channel_write to child failed");
309     status = zx_handle_close(to_child);
310     check(log, status, "zx_handle_close failed on channel handle");
311 
312     // Start the process going.
313     status = zx_process_start(proc, thread, entry, sp,
314                               child_start_handle, vdso_base);
315     check(log, status, "zx_process_start failed");
316     status = zx_handle_close(thread);
317     check(log, status, "zx_handle_close failed on thread handle");
318 
319     printl(log, "process %s started.", o.value[OPTION_FILENAME]);
320 
321     // Now become the loader service for as long as that's needed.
322     if (loader_service_channel != ZX_HANDLE_INVALID)
323         loader_service(log, &bootfs, loader_service_channel);
324 
325     // All done with bootfs!
326     bootfs_unmount(vmar_self, log, &bootfs);
327 
328     if ((o.value[OPTION_SHUTDOWN] != NULL) || (o.value[OPTION_REBOOT] != NULL)) {
329         printl(log, "Waiting for %s to exit...", o.value[OPTION_FILENAME]);
330         status = zx_object_wait_one(
331             proc, ZX_PROCESS_TERMINATED, ZX_TIME_INFINITE, NULL);
332         check(log, status, "zx_object_wait_one on process failed");
333         if (o.value[OPTION_SHUTDOWN] != NULL) {
334             do_powerctl(log, root_resource_handle, ZX_SYSTEM_POWERCTL_SHUTDOWN);
335         } else if (o.value[OPTION_REBOOT] != NULL) {
336             do_powerctl(log, root_resource_handle, ZX_SYSTEM_POWERCTL_REBOOT);
337         }
338     }
339 
340     // Now we've accomplished our purpose in life, and we can die happy.
341 
342     status = zx_handle_close(proc);
343     check(log, status, "zx_handle_close failed on process handle");
344 
345     printl(log, "finished!");
346     zx_process_exit(0);
347 }
348 
349 // This is the entry point for the whole show, the very first bit of code
350 // to run in user mode.
_start(void * start_arg)351 noreturn void _start(void* start_arg) {
352     zx_handle_t log = ZX_HANDLE_INVALID;
353     zx_debuglog_create(ZX_HANDLE_INVALID, 0, &log);
354     if (log == ZX_HANDLE_INVALID)
355         printl(log, "zx_debuglog_create failed, using zx_debug_write instead");
356 
357     zx_handle_t bootstrap_pipe = (uintptr_t)start_arg;
358     bootstrap(log, bootstrap_pipe);
359 }
360