1 /* Copyright (C) 2002-2007, 2009 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <http://www.gnu.org/licenses/>.  */
18 
19 #include <assert.h>
20 #include <errno.h>
21 #include <signal.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <sys/mman.h>
26 #include <sys/param.h>
27 #include <dl-tls.h>
28 #include <tls.h>
29 #include <lowlevellock.h>
30 #include <link.h>
31 #include <bits/kernel-features.h>
32 
33 
34 #ifndef NEED_SEPARATE_REGISTER_STACK
35 
36 /* Most architectures have exactly one stack pointer.  Some have more.  */
37 # define STACK_VARIABLES void *stackaddr = NULL
38 
39 /* How to pass the values to the 'create_thread' function.  */
40 # define STACK_VARIABLES_ARGS stackaddr
41 
42 /* How to declare function which gets there parameters.  */
43 # define STACK_VARIABLES_PARMS void *stackaddr
44 
45 /* How to declare allocate_stack.  */
46 # define ALLOCATE_STACK_PARMS void **stack
47 
48 /* This is how the function is called.  We do it this way to allow
49    other variants of the function to have more parameters.  */
50 # define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
51 
52 #else
53 
54 /* We need two stacks.  The kernel will place them but we have to tell
55    the kernel about the size of the reserved address space.  */
56 # define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
57 
58 /* How to pass the values to the 'create_thread' function.  */
59 # define STACK_VARIABLES_ARGS stackaddr, stacksize
60 
61 /* How to declare function which gets there parameters.  */
62 # define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
63 
64 /* How to declare allocate_stack.  */
65 # define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
66 
67 /* This is how the function is called.  We do it this way to allow
68    other variants of the function to have more parameters.  */
69 # define ALLOCATE_STACK(attr, pd) \
70   allocate_stack (attr, pd, &stackaddr, &stacksize)
71 
72 #endif
73 
74 
75 /* Default alignment of stack.  */
76 #ifndef STACK_ALIGN
77 # define STACK_ALIGN __alignof__ (long double)
78 #endif
79 
80 /* Default value for minimal stack size after allocating thread
81    descriptor and guard.  */
82 #ifndef MINIMAL_REST_STACK
83 # define MINIMAL_REST_STACK	4096
84 #endif
85 
86 
87 /* Newer kernels have the MAP_STACK flag to indicate a mapping is used for
88    a stack.  Use it when possible.  */
89 #ifndef MAP_STACK
90 # define MAP_STACK 0
91 #endif
92 
93 /* This yields the pointer that TLS support code calls the thread pointer.  */
94 #if defined(TLS_TCB_AT_TP)
95 # define TLS_TPADJ(pd) (pd)
96 #elif defined(TLS_DTV_AT_TP)
97 # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
98 #endif
99 
100 /* Cache handling for not-yet free stacks.  */
101 
102 /*
103    Maximum size in kB of cache. GNU libc default is 40MiB
104    embedded systems don't have enough ram for big dirty stack caches,
105    reduce it to 16MiB. 4 does not work, f.e. tst-kill4 segfaults.
106 */
107 static size_t stack_cache_maxsize = 16 * 1024 * 1024;
108 static size_t stack_cache_actsize;
109 
110 /* Mutex protecting this variable.  */
111 static int stack_cache_lock = LLL_LOCK_INITIALIZER;
112 
113 /* List of queued stack frames.  */
114 static LIST_HEAD (stack_cache);
115 
116 /* List of the stacks in use.  */
117 static LIST_HEAD (stack_used);
118 
119 /* We need to record what list operations we are going to do so that,
120    in case of an asynchronous interruption due to a fork() call, we
121    can correct for the work.  */
122 static uintptr_t in_flight_stack;
123 
124 /* List of the threads with user provided stacks in use.  No need to
125    initialize this, since it's done in __pthread_initialize_minimal.  */
126 list_t __stack_user __attribute__ ((nocommon));
hidden_data_def(__stack_user)127 hidden_data_def (__stack_user)
128 
129 #if defined COLORING_INCREMENT && COLORING_INCREMENT != 0
130 /* Number of threads created.  */
131 static unsigned int nptl_ncreated;
132 #endif
133 
134 
135 /* Check whether the stack is still used or not.  */
136 #define FREE_P(descr) ((descr)->tid <= 0)
137 
138 
139 static void
140 stack_list_del (list_t *elem)
141 {
142   in_flight_stack = (uintptr_t) elem;
143 
144   atomic_write_barrier ();
145 
146   list_del (elem);
147 
148   atomic_write_barrier ();
149 
150   in_flight_stack = 0;
151 }
152 
153 
154 static void
stack_list_add(list_t * elem,list_t * list)155 stack_list_add (list_t *elem, list_t *list)
156 {
157   in_flight_stack = (uintptr_t) elem | 1;
158 
159   atomic_write_barrier ();
160 
161   list_add (elem, list);
162 
163   atomic_write_barrier ();
164 
165   in_flight_stack = 0;
166 }
167 
168 
169 /* We create a double linked list of all cache entries.  Double linked
170    because this allows removing entries from the end.  */
171 
172 
173 /* Get a stack frame from the cache.  We have to match by size since
174    some blocks might be too small or far too large.  */
175 static struct pthread *
get_cached_stack(size_t * sizep,void ** memp)176 get_cached_stack (size_t *sizep, void **memp)
177 {
178   size_t size = *sizep;
179   struct pthread *result = NULL;
180   list_t *entry;
181 
182   lll_lock (stack_cache_lock, LLL_PRIVATE);
183 
184   /* Search the cache for a matching entry.  We search for the
185      smallest stack which has at least the required size.  Note that
186      in normal situations the size of all allocated stacks is the
187      same.  As the very least there are only a few different sizes.
188      Therefore this loop will exit early most of the time with an
189      exact match.  */
190   list_for_each (entry, &stack_cache)
191     {
192       struct pthread *curr;
193 
194       curr = list_entry (entry, struct pthread, list);
195       if (FREE_P (curr) && curr->stackblock_size >= size)
196 	{
197 	  if (curr->stackblock_size == size)
198 	    {
199 	      result = curr;
200 	      break;
201 	    }
202 
203 	  if (result == NULL
204 	      || result->stackblock_size > curr->stackblock_size)
205 	    result = curr;
206 	}
207     }
208 
209   if (__builtin_expect (result == NULL, 0)
210       /* Make sure the size difference is not too excessive.  In that
211 	 case we do not use the block.  */
212       || __builtin_expect (result->stackblock_size > 4 * size, 0))
213     {
214       /* Release the lock.  */
215       lll_unlock (stack_cache_lock, LLL_PRIVATE);
216 
217       return NULL;
218     }
219 
220   /* Dequeue the entry.  */
221   stack_list_del (&result->list);
222 
223   /* And add to the list of stacks in use.  */
224   stack_list_add (&result->list, &stack_used);
225 
226   /* And decrease the cache size.  */
227   stack_cache_actsize -= result->stackblock_size;
228 
229   /* Release the lock early.  */
230   lll_unlock (stack_cache_lock, LLL_PRIVATE);
231 
232   /* Report size and location of the stack to the caller.  */
233   *sizep = result->stackblock_size;
234   *memp = result->stackblock;
235 
236   /* Cancellation handling is back to the default.  */
237   result->cancelhandling = 0;
238   result->cleanup = NULL;
239 
240   /* No pending event.  */
241   result->nextevent = NULL;
242 
243   /* Clear the DTV.  */
244   dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
245   for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
246     if (! dtv[1 + cnt].pointer.is_static
247 	      && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
248       free (dtv[1 + cnt].pointer.val);
249   memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
250 
251   /* Re-initialize the TLS.  */
252   _dl_allocate_tls_init (TLS_TPADJ (result));
253 
254   return result;
255 }
256 
257 
258 /* Free stacks until cache size is lower than LIMIT.  */
259 void
__free_stacks(size_t limit)260 __free_stacks (size_t limit)
261 {
262   /* We reduce the size of the cache.  Remove the last entries until
263      the size is below the limit.  */
264   list_t *entry;
265   list_t *prev;
266 
267   /* Search from the end of the list.  */
268   list_for_each_prev_safe (entry, prev, &stack_cache)
269     {
270       struct pthread *curr;
271 
272       curr = list_entry (entry, struct pthread, list);
273       if (FREE_P (curr))
274 	{
275 	  /* Unlink the block.  */
276 	  stack_list_del (entry);
277 
278 	  /* Account for the freed memory.  */
279 	  stack_cache_actsize -= curr->stackblock_size;
280 
281 	  /* Free the memory associated with the ELF TLS.  */
282 	  _dl_deallocate_tls (TLS_TPADJ (curr), false);
283 
284 	  /* Remove this block.  This should never fail.  If it does
285 	     something is really wrong.  */
286 	  if (munmap (curr->stackblock, curr->stackblock_size) != 0)
287 	    abort ();
288 
289 	  /* Maybe we have freed enough.  */
290 	  if (stack_cache_actsize <= limit)
291 	    break;
292 	}
293     }
294 }
295 
296 
297 /* Add a stack frame which is not used anymore to the stack.  Must be
298    called with the cache lock held.  */
299 static inline void
300 __attribute ((always_inline))
queue_stack(struct pthread * stack)301 queue_stack (struct pthread *stack)
302 {
303   /* We unconditionally add the stack to the list.  The memory may
304      still be in use but it will not be reused until the kernel marks
305      the stack as not used anymore.  */
306   stack_list_add (&stack->list, &stack_cache);
307 
308   stack_cache_actsize += stack->stackblock_size;
309   if (__builtin_expect (stack_cache_actsize > stack_cache_maxsize, 0))
310     __free_stacks (stack_cache_maxsize);
311 }
312 
313 
314 static int
315 internal_function
change_stack_perm(struct pthread * pd,size_t pagemask)316 change_stack_perm (struct pthread *pd
317 #ifdef NEED_SEPARATE_REGISTER_STACK
318 		   , size_t pagemask
319 #endif
320 		   )
321 {
322 #ifdef NEED_SEPARATE_REGISTER_STACK
323   void *stack = (pd->stackblock
324 		 + (((((pd->stackblock_size - pd->guardsize) / 2)
325 		      & pagemask) + pd->guardsize) & pagemask));
326   size_t len = pd->stackblock + pd->stackblock_size - stack;
327 #elif defined _STACK_GROWS_DOWN
328   void *stack = pd->stackblock + pd->guardsize;
329   size_t len = pd->stackblock_size - pd->guardsize;
330 #elif defined _STACK_GROWS_UP
331   void *stack = pd->stackblock;
332   size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
333 #else
334 # error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
335 #endif
336 #ifdef __ARCH_USE_MMU__
337   if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
338     return errno;
339 #endif
340 
341   return 0;
342 }
343 
344 
345 static int
allocate_stack(const struct pthread_attr * attr,struct pthread ** pdp,ALLOCATE_STACK_PARMS)346 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
347 		ALLOCATE_STACK_PARMS)
348 {
349   struct pthread *pd;
350   size_t size;
351   size_t pagesize_m1 = __getpagesize () - 1;
352   void *stacktop;
353 
354   assert (attr != NULL);
355   assert (powerof2 (pagesize_m1 + 1));
356   assert (TCB_ALIGNMENT >= STACK_ALIGN);
357 
358   /* Get the stack size from the attribute if it is set.  Otherwise we
359      use the default we determined at start time.  */
360   size = attr->stacksize ?: __default_stacksize;
361 
362   /* Get memory for the stack.  */
363   if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0))
364     {
365       uintptr_t adj;
366 
367       /* If the user also specified the size of the stack make sure it
368 	 is large enough.  */
369       if (attr->stacksize != 0
370 	  && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
371 	return EINVAL;
372 
373       /* Adjust stack size for alignment of the TLS block.  */
374 #if defined(TLS_TCB_AT_TP)
375       adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
376 	    & __static_tls_align_m1;
377       assert (size > adj + TLS_TCB_SIZE);
378 #elif defined(TLS_DTV_AT_TP)
379       adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
380 	    & __static_tls_align_m1;
381       assert (size > adj);
382 #endif
383 
384       /* The user provided some memory.  Let's hope it matches the
385 	 size...  We do not allocate guard pages if the user provided
386 	 the stack.  It is the user's responsibility to do this if it
387 	 is wanted.  */
388 #if defined(TLS_TCB_AT_TP)
389       pd = (struct pthread *) ((uintptr_t) attr->stackaddr
390 			       - TLS_TCB_SIZE - adj);
391 #elif defined(TLS_DTV_AT_TP)
392       pd = (struct pthread *) (((uintptr_t) attr->stackaddr
393 			        - __static_tls_size - adj)
394 			       - TLS_PRE_TCB_SIZE);
395 #endif
396 
397       /* The user provided stack memory needs to be cleared.  */
398       memset (pd, '\0', sizeof (struct pthread));
399 
400       /* The first TSD block is included in the TCB.  */
401       pd->specific[0] = pd->specific_1stblock;
402 
403       /* Remember the stack-related values.  */
404       pd->stackblock = (char *) attr->stackaddr - size;
405       pd->stackblock_size = size;
406 
407       /* This is a user-provided stack.  It will not be queued in the
408 	 stack cache nor will the memory (except the TLS memory) be freed.  */
409       pd->user_stack = true;
410 
411       /* This is at least the second thread.  */
412       pd->header.multiple_threads = 1;
413 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
414       __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
415 #endif
416 
417 #ifndef __ASSUME_PRIVATE_FUTEX
418       /* The thread must know when private futexes are supported.  */
419       pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
420 						header.private_futex);
421 #endif
422 
423 #ifdef NEED_DL_SYSINFO
424       /* Copy the sysinfo value from the parent.  */
425       THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
426 #endif
427 
428       /* Allocate the DTV for this thread.  */
429       if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
430 	{
431 	  /* Something went wrong.  */
432 	  assert (errno == ENOMEM);
433 	  return EAGAIN;
434 	}
435 
436 
437       /* Prepare to modify global data.  */
438       lll_lock (stack_cache_lock, LLL_PRIVATE);
439 
440       /* And add to the list of stacks in use.  */
441       list_add (&pd->list, &__stack_user);
442 
443       lll_unlock (stack_cache_lock, LLL_PRIVATE);
444     }
445   else
446     {
447       /* Allocate some anonymous memory.  If possible use the cache.  */
448       size_t guardsize;
449       size_t reqsize;
450       void *mem = 0;
451       const int prot = (PROT_READ | PROT_WRITE);
452 
453 #if defined COLORING_INCREMENT && COLORING_INCREMENT != 0
454       /* Add one more page for stack coloring.  Don't do it for stacks
455 	 with 16 times pagesize or larger.  This might just cause
456 	 unnecessary misalignment.  */
457       if (size <= 16 * pagesize_m1)
458 	size += pagesize_m1 + 1;
459 #endif
460 
461       /* Adjust the stack size for alignment.  */
462       size &= ~__static_tls_align_m1;
463       assert (size != 0);
464 
465       /* Make sure the size of the stack is enough for the guard and
466 	 eventually the thread descriptor.  */
467       guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
468       if (__builtin_expect (size < ((guardsize + __static_tls_size
469 				     + MINIMAL_REST_STACK + pagesize_m1)
470 				    & ~pagesize_m1),
471 			    0))
472 	/* The stack is too small (or the guard too large).  */
473 	return EINVAL;
474 
475       /* Try to get a stack from the cache.  */
476       reqsize = size;
477       pd = get_cached_stack (&size, &mem);
478       if (pd == NULL)
479 	{
480 	  /* To avoid aliasing effects on a larger scale than pages we
481 	     adjust the allocated stack size if necessary.  This way
482 	     allocations directly following each other will not have
483 	     aliasing problems.  */
484 #if defined MULTI_PAGE_ALIASING && MULTI_PAGE_ALIASING != 0
485 	  if ((size % MULTI_PAGE_ALIASING) == 0)
486 	    size += pagesize_m1 + 1;
487 #endif
488 
489 	  mem = mmap (NULL, size, prot,
490 		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
491 
492 	  if (__builtin_expect (mem == MAP_FAILED, 0))
493 	    {
494 	      if (errno == ENOMEM)
495 		__set_errno (EAGAIN);
496 
497 	       return errno;
498 	    }
499 
500 	  /* SIZE is guaranteed to be greater than zero.
501 	     So we can never get a null pointer back from mmap.  */
502 	  assert (mem != NULL);
503 
504 #if defined COLORING_INCREMENT && COLORING_INCREMENT != 0
505 	  /* Atomically increment NCREATED.  */
506 	  unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
507 
508 	  /* We chose the offset for coloring by incrementing it for
509 	     every new thread by a fixed amount.  The offset used
510 	     module the page size.  Even if coloring would be better
511 	     relative to higher alignment values it makes no sense to
512 	     do it since the mmap() interface does not allow us to
513 	     specify any alignment for the returned memory block.  */
514 	  size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
515 
516 	  /* Make sure the coloring offsets does not disturb the alignment
517 	     of the TCB and static TLS block.  */
518 	  if (__builtin_expect ((coloring & __static_tls_align_m1) != 0, 0))
519 	    coloring = (((coloring + __static_tls_align_m1)
520 			 & ~(__static_tls_align_m1))
521 			& ~pagesize_m1);
522 #else
523 	  /* Unless specified we do not make any adjustments.  */
524 # define coloring 0
525 #endif
526 
527 	  /* Place the thread descriptor at the end of the stack.  */
528 #if defined(TLS_TCB_AT_TP)
529 	  pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
530 #elif defined(TLS_DTV_AT_TP)
531 	  pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
532 				    - __static_tls_size)
533 				    & ~__static_tls_align_m1)
534 				   - TLS_PRE_TCB_SIZE);
535 #endif
536 
537 	  /* Remember the stack-related values.  */
538 	  pd->stackblock = mem;
539 	  pd->stackblock_size = size;
540 
541 	  /* We allocated the first block thread-specific data array.
542 	     This address will not change for the lifetime of this
543 	     descriptor.  */
544 	  pd->specific[0] = pd->specific_1stblock;
545 
546 	  /* This is at least the second thread.  */
547 	  pd->header.multiple_threads = 1;
548 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
549 	  __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
550 #endif
551 
552 #ifndef __ASSUME_PRIVATE_FUTEX
553 	  /* The thread must know when private futexes are supported.  */
554 	  pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
555                                                     header.private_futex);
556 #endif
557 
558 #ifdef NEED_DL_SYSINFO
559 	  /* Copy the sysinfo value from the parent.  */
560 	  THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
561 #endif
562 
563 	  /* Allocate the DTV for this thread.  */
564 	  if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
565 	    {
566 	      /* Something went wrong.  */
567 	      assert (errno == ENOMEM);
568 
569 	      /* Free the stack memory we just allocated.  */
570 	      (void) munmap (mem, size);
571 
572 	      return EAGAIN;
573 	    }
574 
575 
576 	  /* Prepare to modify global data.  */
577 	  lll_lock (stack_cache_lock, LLL_PRIVATE);
578 
579 	  /* And add to the list of stacks in use.  */
580 	  stack_list_add (&pd->list, &stack_used);
581 
582 	  lll_unlock (stack_cache_lock, LLL_PRIVATE);
583 
584 
585 	  /* Note that all of the stack and the thread descriptor is
586 	     zeroed.  This means we do not have to initialize fields
587 	     with initial value zero.  This is specifically true for
588 	     the 'tid' field which is always set back to zero once the
589 	     stack is not used anymore and for the 'guardsize' field
590 	     which will be read next.  */
591 	}
592 
593       /* Create or resize the guard area if necessary.  */
594       if (__builtin_expect (guardsize > pd->guardsize, 0))
595 	{
596 #ifdef NEED_SEPARATE_REGISTER_STACK
597 	  char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
598 #elif defined _STACK_GROWS_DOWN
599 	  char *guard = mem;
600 #elif defined _STACK_GROWS_UP
601 	  char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
602 #endif
603 #ifdef __ARCH_USE_MMU__
604 	  if (mprotect (guard, guardsize, PROT_NONE) != 0)
605 	    {
606 	      int err;
607 #ifdef NEED_SEPARATE_REGISTER_STACK
608 	    mprot_error:
609 #endif
610 	      err = errno;
611 
612 	      lll_lock (stack_cache_lock, LLL_PRIVATE);
613 
614 	      /* Remove the thread from the list.  */
615 	      stack_list_del (&pd->list);
616 
617 	      lll_unlock (stack_cache_lock, LLL_PRIVATE);
618 
619 	      /* Get rid of the TLS block we allocated.  */
620 	      _dl_deallocate_tls (TLS_TPADJ (pd), false);
621 
622 	      /* Free the stack memory regardless of whether the size
623 		 of the cache is over the limit or not.  If this piece
624 		 of memory caused problems we better do not use it
625 		 anymore.  Uh, and we ignore possible errors.  There
626 		 is nothing we could do.  */
627 	      (void) munmap (mem, size);
628 
629 	      return err;
630 	    }
631 #endif
632 
633 	  pd->guardsize = guardsize;
634 	}
635       else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
636 				 0))
637 	{
638 	  /* The old guard area is too large.  */
639 
640 #ifdef NEED_SEPARATE_REGISTER_STACK
641 	  char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
642 	  char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
643 
644 #ifdef __ARCH_USE_MMU__
645 	  if (oldguard < guard
646 	      && mprotect (oldguard, guard - oldguard, prot) != 0)
647 	    goto mprot_error;
648 
649 	  if (mprotect (guard + guardsize,
650 			oldguard + pd->guardsize - guard - guardsize,
651 			prot) != 0)
652 	    goto mprot_error;
653 #elif defined _STACK_GROWS_DOWN
654 	  if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
655 			prot) != 0)
656 	    goto mprot_error;
657 #elif defined _STACK_GROWS_UP
658 	  if (mprotect ((char *) (((uintptr_t) pd - pd->guardsize) & ~pagesize_m1),
659 			pd->guardsize - guardsize, prot) != 0)
660 	    goto mprot_error;
661 #endif
662 #endif
663 
664 	  pd->guardsize = guardsize;
665 	}
666       /* The pthread_getattr_np() calls need to get passed the size
667 	 requested in the attribute, regardless of how large the
668 	 actually used guardsize is.  */
669       pd->reported_guardsize = guardsize;
670     }
671 
672   /* Initialize the lock.  We have to do this unconditionally since the
673      stillborn thread could be canceled while the lock is taken.  */
674   pd->lock = LLL_LOCK_INITIALIZER;
675 
676   /* The robust mutex lists also need to be initialized
677      unconditionally because the cleanup for the previous stack owner
678      might have happened in the kernel.  */
679   pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
680 				  - offsetof (pthread_mutex_t,
681 					      __data.__list.__next));
682   pd->robust_head.list_op_pending = NULL;
683 #ifdef __PTHREAD_MUTEX_HAVE_PREV
684   pd->robust_prev = &pd->robust_head;
685 #endif
686   pd->robust_head.list = &pd->robust_head;
687 
688   /* We place the thread descriptor at the end of the stack.  */
689   *pdp = pd;
690 
691 #if defined(TLS_TCB_AT_TP)
692   /* The stack begins before the TCB and the static TLS block.  */
693   stacktop = ((char *) (pd + 1) - __static_tls_size);
694 #elif defined(TLS_DTV_AT_TP)
695   stacktop = (char *) (pd - 1);
696 #endif
697 
698 #ifdef NEED_SEPARATE_REGISTER_STACK
699   *stack = pd->stackblock;
700   *stacksize = stacktop - *stack;
701 #elif defined _STACK_GROWS_DOWN
702   *stack = stacktop;
703 #elif defined _STACK_GROWS_UP
704   *stack = pd->stackblock;
705   assert (*stack > 0);
706 #endif
707 
708   return 0;
709 }
710 
711 
712 void
713 internal_function
__deallocate_stack(struct pthread * pd)714 __deallocate_stack (struct pthread *pd)
715 {
716   lll_lock (stack_cache_lock, LLL_PRIVATE);
717 
718   /* Remove the thread from the list of threads with user defined
719      stacks.  */
720   stack_list_del (&pd->list);
721 
722   /* Not much to do.  Just free the mmap()ed memory.  Note that we do
723      not reset the 'used' flag in the 'tid' field.  This is done by
724      the kernel.  If no thread has been created yet this field is
725      still zero.  */
726   if (__builtin_expect (! pd->user_stack, 1))
727     (void) queue_stack (pd);
728   else
729     /* Free the memory associated with the ELF TLS.  */
730     _dl_deallocate_tls (TLS_TPADJ (pd), false);
731 
732   lll_unlock (stack_cache_lock, LLL_PRIVATE);
733 }
734 
735 
736 int
737 internal_function
__make_stacks_executable(void ** stack_endp)738 __make_stacks_executable (void **stack_endp)
739 {
740   /* First the main thread's stack.  */
741   int err = EPERM;
742   if (err != 0)
743     return err;
744 
745 #ifdef NEED_SEPARATE_REGISTER_STACK
746   const size_t pagemask = ~(__getpagesize () - 1);
747 #endif
748 
749   lll_lock (stack_cache_lock, LLL_PRIVATE);
750 
751   list_t *runp;
752   list_for_each (runp, &stack_used)
753     {
754       err = change_stack_perm (list_entry (runp, struct pthread, list)
755 #ifdef NEED_SEPARATE_REGISTER_STACK
756 			       , pagemask
757 #endif
758 			       );
759       if (err != 0)
760 	break;
761     }
762 
763   /* Also change the permission for the currently unused stacks.  This
764      might be wasted time but better spend it here than adding a check
765      in the fast path.  */
766   if (err == 0)
767     list_for_each (runp, &stack_cache)
768       {
769 	err = change_stack_perm (list_entry (runp, struct pthread, list)
770 #ifdef NEED_SEPARATE_REGISTER_STACK
771 				 , pagemask
772 #endif
773 				 );
774 	if (err != 0)
775 	  break;
776       }
777 
778   lll_unlock (stack_cache_lock, LLL_PRIVATE);
779 
780   return err;
781 }
782 
783 
784 /* In case of a fork() call the memory allocation in the child will be
785    the same but only one thread is running.  All stacks except that of
786    the one running thread are not used anymore.  We have to recycle
787    them.  */
788 void
__reclaim_stacks(void)789 __reclaim_stacks (void)
790 {
791   struct pthread *self = (struct pthread *) THREAD_SELF;
792 
793   /* No locking necessary.  The caller is the only stack in use.  But
794      we have to be aware that we might have interrupted a list
795      operation.  */
796 
797   if (in_flight_stack != 0)
798     {
799       bool add_p = in_flight_stack & 1;
800       list_t *elem = (list_t *)(uintptr_t)(in_flight_stack & ~UINTMAX_C (1));
801 
802       if (add_p)
803 	{
804 	  /* We always add at the beginning of the list.  So in this
805 	     case we only need to check the beginning of these lists.  */
806 	  int check_list (list_t *l)
807 	  {
808 	    if (l->next->prev != l)
809 	      {
810 		assert (l->next->prev == elem);
811 
812 		elem->next = l->next;
813 		elem->prev = l;
814 		l->next = elem;
815 
816 		return 1;
817 	      }
818 
819 	    return 0;
820 	  }
821 
822 	  if (check_list (&stack_used) == 0)
823 	    (void) check_list (&stack_cache);
824 	}
825       else
826 	{
827 	  /* We can simply always replay the delete operation.  */
828 	  elem->next->prev = elem->prev;
829 	  elem->prev->next = elem->next;
830 	}
831     }
832 
833   /* Mark all stacks except the still running one as free.  */
834   list_t *runp;
835   list_for_each (runp, &stack_used)
836     {
837       struct pthread *curp = list_entry (runp, struct pthread, list);
838       if (curp != self)
839 	{
840 	  /* This marks the stack as free.  */
841 	  curp->tid = 0;
842 
843 	  /* Account for the size of the stack.  */
844 	  stack_cache_actsize += curp->stackblock_size;
845 
846 	  if (curp->specific_used)
847 	    {
848 	      /* Clear the thread-specific data.  */
849 	      memset (curp->specific_1stblock, '\0',
850 		      sizeof (curp->specific_1stblock));
851 
852 	      curp->specific_used = false;
853 
854 	      size_t cnt;
855 	      for (cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
856 		if (curp->specific[cnt] != NULL)
857 		  {
858 		    memset (curp->specific[cnt], '\0',
859 			    sizeof (curp->specific_1stblock));
860 
861 		    /* We have allocated the block which we do not
862 		       free here so re-set the bit.  */
863 		    curp->specific_used = true;
864 		  }
865 	    }
866 	}
867     }
868 
869   /* Add the stack of all running threads to the cache.  */
870   list_splice (&stack_used, &stack_cache);
871 
872   /* Remove the entry for the current thread to from the cache list
873      and add it to the list of running threads.  Which of the two
874      lists is decided by the user_stack flag.  */
875   stack_list_del (&self->list);
876 
877   /* Re-initialize the lists for all the threads.  */
878   INIT_LIST_HEAD (&stack_used);
879   INIT_LIST_HEAD (&__stack_user);
880 
881   if (__builtin_expect (THREAD_GETMEM (self, user_stack), 0))
882     list_add (&self->list, &__stack_user);
883   else
884     list_add (&self->list, &stack_used);
885 
886   /* There is one thread running.  */
887   __nptl_nthreads = 1;
888 
889   in_flight_stack = 0;
890 
891   /* Initialize the lock.  */
892   stack_cache_lock = LLL_LOCK_INITIALIZER;
893 }
894 
895 
896 static void
897 internal_function
setxid_mark_thread(struct xid_command * cmdp,struct pthread * t)898 setxid_mark_thread (struct xid_command *cmdp, struct pthread *t)
899 {
900   int ch;
901 
902   /* Don't let the thread exit before the setxid handler runs.  */
903   t->setxid_futex = 0;
904 
905   do
906     {
907       ch = t->cancelhandling;
908 
909       /* If the thread is exiting right now, ignore it.  */
910       if ((ch & EXITING_BITMASK) != 0)
911 	return;
912     }
913   while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
914 					       ch | SETXID_BITMASK, ch));
915 }
916 
917 
918 static void
919 internal_function
setxid_unmark_thread(struct xid_command * cmdp,struct pthread * t)920 setxid_unmark_thread (struct xid_command *cmdp, struct pthread *t)
921 {
922   int ch;
923 
924   do
925     {
926       ch = t->cancelhandling;
927       if ((ch & SETXID_BITMASK) == 0)
928 	return;
929     }
930   while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
931 					       ch & ~SETXID_BITMASK, ch));
932 
933   /* Release the futex just in case.  */
934   t->setxid_futex = 1;
935   lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
936 }
937 
938 
939 static int
940 internal_function
setxid_signal_thread(struct xid_command * cmdp,struct pthread * t)941 setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
942 {
943   if ((t->cancelhandling & SETXID_BITMASK) == 0)
944     return 0;
945 
946   int val;
947   pid_t pid = getpid ();
948   INTERNAL_SYSCALL_DECL (err);
949   val = INTERNAL_SYSCALL (tgkill, err, 3, pid, t->tid, SIGSETXID);
950 
951   /* If this failed, it must have had not started yet or else exited.  */
952   if (!INTERNAL_SYSCALL_ERROR_P (val, err))
953     {
954       atomic_increment (&cmdp->cntr);
955       return 1;
956     }
957   else
958     return 0;
959 }
960 
961 
962 int
963 attribute_hidden
__nptl_setxid(struct xid_command * cmdp)964 __nptl_setxid (struct xid_command *cmdp)
965 {
966   int signalled;
967   int result;
968   lll_lock (stack_cache_lock, LLL_PRIVATE);
969 
970   __xidcmd = cmdp;
971   cmdp->cntr = 0;
972 
973   struct pthread *self = THREAD_SELF;
974 
975   /* Iterate over the list with system-allocated threads first.  */
976   list_t *runp;
977   list_for_each (runp, &stack_used)
978     {
979       struct pthread *t = list_entry (runp, struct pthread, list);
980       if (t == self)
981 	continue;
982 
983       setxid_mark_thread (cmdp, t);
984     }
985 
986   /* Now the list with threads using user-allocated stacks.  */
987   list_for_each (runp, &__stack_user)
988     {
989       struct pthread *t = list_entry (runp, struct pthread, list);
990       if (t == self)
991 	continue;
992 
993       setxid_mark_thread (cmdp, t);
994     }
995 
996   /* Iterate until we don't succeed in signalling anyone.  That means
997      we have gotten all running threads, and their children will be
998      automatically correct once started.  */
999   do
1000     {
1001       signalled = 0;
1002 
1003       list_for_each (runp, &stack_used)
1004 	{
1005 	  struct pthread *t = list_entry (runp, struct pthread, list);
1006 	  if (t == self)
1007 	    continue;
1008 
1009 	  signalled += setxid_signal_thread (cmdp, t);
1010 	}
1011 
1012       list_for_each (runp, &__stack_user)
1013 	{
1014 	  struct pthread *t = list_entry (runp, struct pthread, list);
1015 	  if (t == self)
1016 	    continue;
1017 
1018 	  signalled += setxid_signal_thread (cmdp, t);
1019 	}
1020 
1021       int cur = cmdp->cntr;
1022       while (cur != 0)
1023 	{
1024 	  lll_futex_wait (&cmdp->cntr, cur, LLL_PRIVATE);
1025 	  cur = cmdp->cntr;
1026 	}
1027     }
1028   while (signalled != 0);
1029 
1030   /* Clean up flags, so that no thread blocks during exit waiting
1031      for a signal which will never come.  */
1032   list_for_each (runp, &stack_used)
1033     {
1034       struct pthread *t = list_entry (runp, struct pthread, list);
1035       if (t == self)
1036 	continue;
1037 
1038       setxid_unmark_thread (cmdp, t);
1039     }
1040 
1041   list_for_each (runp, &__stack_user)
1042     {
1043       struct pthread *t = list_entry (runp, struct pthread, list);
1044       if (t == self)
1045 	continue;
1046 
1047       setxid_unmark_thread (cmdp, t);
1048     }
1049 
1050   /* This must be last, otherwise the current thread might not have
1051      permissions to send SIGSETXID syscall to the other threads.  */
1052   INTERNAL_SYSCALL_DECL (err);
1053   result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
1054 				 cmdp->id[0], cmdp->id[1], cmdp->id[2]);
1055   if (INTERNAL_SYSCALL_ERROR_P (result, err))
1056     {
1057       __set_errno (INTERNAL_SYSCALL_ERRNO (result, err));
1058       result = -1;
1059     }
1060 
1061   lll_unlock (stack_cache_lock, LLL_PRIVATE);
1062   return result;
1063 }
1064 
1065 static inline void __attribute__((always_inline))
init_one_static_tls(struct pthread * curp,struct link_map * map)1066 init_one_static_tls (struct pthread *curp, struct link_map *map)
1067 {
1068   dtv_t *dtv = GET_DTV (TLS_TPADJ (curp));
1069 # if defined(TLS_TCB_AT_TP)
1070   void *dest = (char *) curp - map->l_tls_offset;
1071 # elif defined(TLS_DTV_AT_TP)
1072   void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1073 # else
1074 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1075 # endif
1076 
1077   /* Fill in the DTV slot so that a later LD/GD access will find it.  */
1078   dtv[map->l_tls_modid].pointer.val = dest;
1079   dtv[map->l_tls_modid].pointer.is_static = true;
1080 
1081   /* Initialize the memory.  */
1082   memset (mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1083 	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
1084 }
1085 
1086 void
1087 attribute_hidden
__pthread_init_static_tls(struct link_map * map)1088 __pthread_init_static_tls (struct link_map *map)
1089 {
1090   lll_lock (stack_cache_lock, LLL_PRIVATE);
1091 
1092   /* Iterate over the list with system-allocated threads first.  */
1093   list_t *runp;
1094   list_for_each (runp, &stack_used)
1095     init_one_static_tls (list_entry (runp, struct pthread, list), map);
1096 
1097   /* Now the list with threads using user-allocated stacks.  */
1098   list_for_each (runp, &__stack_user)
1099     init_one_static_tls (list_entry (runp, struct pthread, list), map);
1100 
1101   lll_unlock (stack_cache_lock, LLL_PRIVATE);
1102 }
1103 
1104 
1105 void
1106 attribute_hidden
__wait_lookup_done(void)1107 __wait_lookup_done (void)
1108 {
1109   lll_lock (stack_cache_lock, LLL_PRIVATE);
1110 
1111   struct pthread *self = THREAD_SELF;
1112 
1113   /* Iterate over the list with system-allocated threads first.  */
1114   list_t *runp;
1115   list_for_each (runp, &stack_used)
1116     {
1117       struct pthread *t = list_entry (runp, struct pthread, list);
1118       if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1119 	continue;
1120 
1121       int *const gscope_flagp = &t->header.gscope_flag;
1122 
1123       /* We have to wait until this thread is done with the global
1124 	 scope.  First tell the thread that we are waiting and
1125 	 possibly have to be woken.  */
1126       if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1127 						THREAD_GSCOPE_FLAG_WAIT,
1128 						THREAD_GSCOPE_FLAG_USED))
1129 	continue;
1130 
1131       do
1132 	lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
1133       while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1134     }
1135 
1136   /* Now the list with threads using user-allocated stacks.  */
1137   list_for_each (runp, &__stack_user)
1138     {
1139       struct pthread *t = list_entry (runp, struct pthread, list);
1140       if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1141 	continue;
1142 
1143       int *const gscope_flagp = &t->header.gscope_flag;
1144 
1145       /* We have to wait until this thread is done with the global
1146 	 scope.  First tell the thread that we are waiting and
1147 	 possibly have to be woken.  */
1148       if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1149 						THREAD_GSCOPE_FLAG_WAIT,
1150 						THREAD_GSCOPE_FLAG_USED))
1151 	continue;
1152 
1153       do
1154 	lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
1155       while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1156     }
1157 
1158   lll_unlock (stack_cache_lock, LLL_PRIVATE);
1159 }
1160