1 #include "libc.h"
2 #include "zircon_impl.h"
3 #include "threads_impl.h"
4 
5 #include <zircon/process.h>
6 #include <zircon/syscalls.h>
7 #include <stddef.h>
8 #include <string.h>
9 
10 static pthread_rwlock_t allocation_lock = PTHREAD_RWLOCK_INITIALIZER;
11 
12 // Many threads could be reading the TLS state.
thread_allocation_acquire(void)13 static void thread_allocation_acquire(void) {
14     pthread_rwlock_rdlock(&allocation_lock);
15 }
16 
17 // dlopen calls this under another lock. Only one dlopen call can be
18 // modifying state at a time.
__thread_allocation_inhibit(void)19 void __thread_allocation_inhibit(void) {
20     pthread_rwlock_wrlock(&allocation_lock);
21 }
22 
__thread_allocation_release(void)23 void __thread_allocation_release(void) {
24     pthread_rwlock_unlock(&allocation_lock);
25 }
26 
round_up_to_page(size_t sz)27 static inline size_t round_up_to_page(size_t sz) {
28     return (sz + PAGE_SIZE - 1) & -PAGE_SIZE;
29 }
30 
offset_for_module(const struct tls_module * module)31 static ptrdiff_t offset_for_module(const struct tls_module* module) {
32 #ifdef TLS_ABOVE_TP
33     return module->offset;
34 #else
35     return - module->offset;
36 #endif
37 }
38 
copy_tls(unsigned char * mem,size_t alloc)39 __NO_SAFESTACK static thrd_t copy_tls(unsigned char* mem, size_t alloc) {
40     thrd_t td;
41     struct tls_module* p;
42     size_t i;
43     void** dtv;
44 
45 #ifdef TLS_ABOVE_TP
46     // *-----------------------------------------------------------------------*
47     // | pthread | tcb | X | tls_1 | ... | tlsN | ... | tls_cnt | dtv[1] | ... |
48     // *-----------------------------------------------------------------------*
49     // ^         ^         ^             ^            ^
50     // td        tp      dtv[1]       dtv[n+1]       dtv
51     //
52     // Note: The TCB is actually the last member of pthread.
53     // See: "Addenda to, and Errata in, the ABI for the ARM Architecture"
54 
55     dtv = (void**)(mem + libc.tls_size) - (libc.tls_cnt + 1);
56     // We need to make sure that the thread pointer is maximally aligned so
57     // that tp + dtv[N] is aligned to align_N no matter what N is. So we need
58     // 'mem' to be such that if mem == td then td->head is maximially aligned.
59     // To do this we need take &td->head (e.g. mem + offset of head) and align
60     // it then subtract out the offset of ->head to ensure that &td->head is
61     // aligned.
62     uintptr_t tp = (uintptr_t)mem + PTHREAD_TP_OFFSET;
63     tp = (tp + libc.tls_align - 1) & -libc.tls_align;
64     td = (thrd_t)(tp - PTHREAD_TP_OFFSET);
65     // Now mem should be the new thread pointer.
66     mem = (unsigned char*)tp;
67 #else
68     // *-----------------------------------------------------------------------*
69     // | tls_cnt | dtv[1] | ... | tls_n | ... | tls_1 | tcb | pthread | unused |
70     // *-----------------------------------------------------------------------*
71     // ^                        ^             ^       ^
72     // dtv                   dtv[n+1]       dtv[1]  tp/td
73     //
74     // Note: The TCB is actually the first member of pthread.
75     dtv = (void**)mem;
76 
77     mem += alloc - sizeof(struct pthread);
78     mem -= (uintptr_t)mem & (libc.tls_align - 1);
79     td = (thrd_t)mem;
80 #endif
81 
82     for (i = 1, p = libc.tls_head; p; i++, p = p->next) {
83         dtv[i] = mem + offset_for_module(p);
84         memcpy(dtv[i], p->image, p->len);
85     }
86 
87     dtv[0] = (void*)libc.tls_cnt;
88     td->head.dtv = dtv;
89     return td;
90 }
91 
map_block(zx_handle_t parent_vmar,zx_handle_t vmo,size_t vmo_offset,size_t size,size_t before,size_t after,struct iovec * mapping,struct iovec * region)92 __NO_SAFESTACK static bool map_block(zx_handle_t parent_vmar,
93                                      zx_handle_t vmo, size_t vmo_offset,
94                                      size_t size, size_t before, size_t after,
95                                      struct iovec* mapping,
96                                      struct iovec* region) {
97     region->iov_len = before + size + after;
98     zx_handle_t vmar;
99     uintptr_t addr;
100     zx_status_t status = _zx_vmar_allocate(parent_vmar,
101                                            ZX_VM_CAN_MAP_READ |
102                                            ZX_VM_CAN_MAP_WRITE |
103                                            ZX_VM_CAN_MAP_SPECIFIC,
104                                             0, region->iov_len, &vmar, &addr);
105     if (status != ZX_OK)
106         return true;
107     region->iov_base = (void*)addr;
108     status = _zx_vmar_map(vmar,
109                           ZX_VM_PERM_READ |
110                           ZX_VM_PERM_WRITE |
111                           ZX_VM_SPECIFIC,
112                           before, vmo, vmo_offset, size, &addr);
113     if (status != ZX_OK)
114         _zx_vmar_destroy(vmar);
115     _zx_handle_close(vmar);
116     mapping->iov_base = (void*)addr;
117     mapping->iov_len = size;
118     return status != ZX_OK;
119 }
120 
121 // This allocates all the per-thread memory for a new thread about to
122 // be created, or for the initial thread at startup.  It's called
123 // either at startup or under thread_allocation_acquire.  Hence,
124 // it's serialized with any dynamic linker changes to the TLS
125 // bookkeeping.
126 //
127 // This conceptually allocates four things, but concretely allocates
128 // three separate blocks.
129 // 1. The safe stack (where the thread's SP will point).
130 // 2. The unsafe stack (where __builtin___get_unsafe_stack_ptr() will point).
131 // 3. The thread descriptor (struct pthread).  The thread pointer points
132 //    into this (where into it depends on the machine ABI).
133 // 4. The static TLS area.  The ELF TLS ABI for the Initial Exec model
134 //    mandates a fixed distance from the thread pointer to the TLS area
135 //    across all threads.  So effectively this must always be allocated
136 //    as part of the same block with the thread descriptor.
137 // This function also copies in the TLS initializer data.
138 // It initializes the basic thread descriptor fields.
139 // Everything else is zero-initialized.
140 
__allocate_thread(size_t requested_guard_size,size_t requested_stack_size,const char * thread_name,char vmo_name[ZX_MAX_NAME_LEN])141 __NO_SAFESTACK thrd_t __allocate_thread(
142     size_t requested_guard_size,
143     size_t requested_stack_size,
144     const char* thread_name,
145     char vmo_name[ZX_MAX_NAME_LEN]) {
146     thread_allocation_acquire();
147 
148     const size_t guard_size =
149         requested_guard_size == 0 ? 0 : round_up_to_page(requested_guard_size);
150     const size_t stack_size = round_up_to_page(requested_stack_size);
151 
152     const size_t tls_size = libc.tls_size;
153     const size_t tcb_size = round_up_to_page(tls_size);
154 
155     const size_t vmo_size = tcb_size + stack_size * 2;
156     zx_handle_t vmo;
157     zx_status_t status = _zx_vmo_create(vmo_size, 0, &vmo);
158     if (status != ZX_OK) {
159         __thread_allocation_release();
160         return NULL;
161     }
162     struct iovec tcb, tcb_region;
163     if (map_block(_zx_vmar_root_self(), vmo, 0, tcb_size, PAGE_SIZE, PAGE_SIZE,
164                   &tcb, &tcb_region)) {
165         __thread_allocation_release();
166         _zx_handle_close(vmo);
167         return NULL;
168     }
169 
170     thrd_t td = copy_tls(tcb.iov_base, tcb.iov_len);
171 
172     // At this point all our access to global TLS state is done, so we
173     // can allow dlopen again.
174     __thread_allocation_release();
175 
176     // For the initial thread, it's too early to call snprintf because
177     // it's not __NO_SAFESTACK.
178     if (vmo_name != NULL) {
179         // For other threads, try to give the VMO a name that includes
180         // the thrd_t value (and the TLS size if that fits too), but
181         // don't use a truncated value since that would be confusing to
182         // interpret.
183         if (snprintf(vmo_name, ZX_MAX_NAME_LEN, "%s:%p/TLS=%#zx",
184                      thread_name, td, tls_size) < ZX_MAX_NAME_LEN ||
185             snprintf(vmo_name, ZX_MAX_NAME_LEN, "%s:%p",
186                      thread_name, td) < ZX_MAX_NAME_LEN)
187             thread_name = vmo_name;
188     }
189     _zx_object_set_property(vmo, ZX_PROP_NAME,
190                             thread_name, strlen(thread_name));
191 
192     if (map_block(_zx_vmar_root_self(), vmo,
193                   tcb_size, stack_size, guard_size, 0,
194                   &td->safe_stack, &td->safe_stack_region)) {
195         _zx_vmar_unmap(_zx_vmar_root_self(),
196                        (uintptr_t)tcb_region.iov_base, tcb_region.iov_len);
197         _zx_handle_close(vmo);
198         return NULL;
199     }
200 
201     if (map_block(_zx_vmar_root_self(), vmo,
202                   tcb_size + stack_size, stack_size, guard_size, 0,
203                   &td->unsafe_stack, &td->unsafe_stack_region)) {
204         _zx_vmar_unmap(_zx_vmar_root_self(),
205                        (uintptr_t)td->safe_stack_region.iov_base,
206                        td->safe_stack_region.iov_len);
207         _zx_vmar_unmap(_zx_vmar_root_self(),
208                        (uintptr_t)tcb_region.iov_base, tcb_region.iov_len);
209         _zx_handle_close(vmo);
210         return NULL;
211     }
212 
213     _zx_handle_close(vmo);
214     td->tcb_region = tcb_region;
215     td->locale = &libc.global_locale;
216     td->head.tp = (uintptr_t)pthread_to_tp(td);
217     td->abi.stack_guard = __stack_chk_guard;
218     td->abi.unsafe_sp =
219         (uintptr_t)td->unsafe_stack.iov_base + td->unsafe_stack.iov_len;
220     return td;
221 }
222