1 /*
2  * Copyright 2016-2025 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 /* We need to use the OPENSSL_fork_*() deprecated APIs */
11 #define OPENSSL_SUPPRESS_DEPRECATED
12 
13 #if !defined(__GNUC__) || !defined(__ATOMIC_ACQ_REL) || \
14     defined(BROKEN_CLANG_ATOMICS) || defined(OPENSSL_NO_STDIO)
15 /*
16  * we only enable REPORT_RWLOCK_CONTENTION on clang/gcc when we have
17  * atomics available.  We do this because we need to use an atomic to track
18  * when we can close the log file.  We could use the CRYPTO_atomic_ api
19  * but that requires lock creation which gets us into a bad recursive loop
20  * when we try to initialize the file pointer
21  */
22 # ifdef REPORT_RWLOCK_CONTENTION
23 #  warning "RWLOCK CONTENTION REPORTING NOT SUPPORTED, Disabling"
24 #  undef REPORT_RWLOCK_CONTENTION
25 # endif
26 #endif
27 
28 #ifdef REPORT_RWLOCK_CONTENTION
29 # define _GNU_SOURCE
30 # include <execinfo.h>
31 # include <unistd.h>
32 #endif
33 
34 #include <openssl/crypto.h>
35 #include <crypto/cryptlib.h>
36 #include <crypto/sparse_array.h>
37 #include "internal/cryptlib.h"
38 #include "internal/threads_common.h"
39 #include "internal/rcu.h"
40 #ifdef REPORT_RWLOCK_CONTENTION
41 # include <fcntl.h>
42 # include <stdbool.h>
43 # include <sys/syscall.h>
44 # include <sys/uio.h>
45 # include "internal/time.h"
46 #endif
47 #include "rcu_internal.h"
48 
49 #if defined(__clang__) && defined(__has_feature)
50 # if __has_feature(thread_sanitizer)
51 #  define __SANITIZE_THREAD__
52 # endif
53 #endif
54 
55 #if defined(__SANITIZE_THREAD__)
56 # include <sanitizer/tsan_interface.h>
57 # define TSAN_FAKE_UNLOCK(x)   __tsan_mutex_pre_unlock((x), 0); \
58 __tsan_mutex_post_unlock((x), 0)
59 
60 # define TSAN_FAKE_LOCK(x)  __tsan_mutex_pre_lock((x), 0); \
61 __tsan_mutex_post_lock((x), 0, 0)
62 #else
63 # define TSAN_FAKE_UNLOCK(x)
64 # define TSAN_FAKE_LOCK(x)
65 #endif
66 
67 #if defined(__sun)
68 # include <atomic.h>
69 #endif
70 
71 #if defined(__apple_build_version__) && __apple_build_version__ < 6000000
72 /*
73  * OS/X 10.7 and 10.8 had a weird version of clang which has __ATOMIC_ACQUIRE and
74  * __ATOMIC_ACQ_REL but which expects only one parameter for __atomic_is_lock_free()
75  * rather than two which has signature __atomic_is_lock_free(sizeof(_Atomic(T))).
76  * All of this makes impossible to use __atomic_is_lock_free here.
77  *
78  * See: https://github.com/llvm/llvm-project/commit/a4c2602b714e6c6edb98164550a5ae829b2de760
79  */
80 # define BROKEN_CLANG_ATOMICS
81 #endif
82 
83 #if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) && !defined(OPENSSL_SYS_WINDOWS)
84 
85 # if defined(OPENSSL_SYS_UNIX)
86 #  include <sys/types.h>
87 #  include <unistd.h>
88 # endif
89 
90 # include <assert.h>
91 
92 /*
93  * The Non-Stop KLT thread model currently seems broken in its rwlock
94  * implementation
95  */
96 # if defined(PTHREAD_RWLOCK_INITIALIZER) && !defined(_KLT_MODEL_)
97 #  define USE_RWLOCK
98 # endif
99 
100 /*
101  * For all GNU/clang atomic builtins, we also need fallbacks, to cover all
102  * other compilers.
103 
104  * Unfortunately, we can't do that with some "generic type", because there's no
105  * guarantee that the chosen generic type is large enough to cover all cases.
106  * Therefore, we implement fallbacks for each applicable type, with composed
107  * names that include the type they handle.
108  *
109  * (an anecdote: we previously tried to use |void *| as the generic type, with
110  * the thought that the pointer itself is the largest type.  However, this is
111  * not true on 32-bit pointer platforms, as a |uint64_t| is twice as large)
112  *
113  * All applicable ATOMIC_ macros take the intended type as first parameter, so
114  * they can map to the correct fallback function.  In the GNU/clang case, that
115  * parameter is simply ignored.
116  */
117 
118 /*
119  * Internal types used with the ATOMIC_ macros, to make it possible to compose
120  * fallback function names.
121  */
122 typedef void *pvoid;
123 
124 # if defined(__GNUC__) && defined(__ATOMIC_ACQUIRE) && !defined(BROKEN_CLANG_ATOMICS) \
125     && !defined(USE_ATOMIC_FALLBACKS)
126 #  define ATOMIC_LOAD_N(t, p, o) __atomic_load_n(p, o)
127 #  define ATOMIC_STORE_N(t, p, v, o) __atomic_store_n(p, v, o)
128 #  define ATOMIC_STORE(t, p, v, o) __atomic_store(p, v, o)
129 #  define ATOMIC_ADD_FETCH(p, v, o) __atomic_add_fetch(p, v, o)
130 #  define ATOMIC_SUB_FETCH(p, v, o) __atomic_sub_fetch(p, v, o)
131 # else
132 static pthread_mutex_t atomic_sim_lock = PTHREAD_MUTEX_INITIALIZER;
133 
134 #  define IMPL_fallback_atomic_load_n(t)                        \
135     static ossl_inline t fallback_atomic_load_n_##t(t *p)            \
136     {                                                           \
137         t ret;                                                  \
138                                                                 \
139         pthread_mutex_lock(&atomic_sim_lock);                   \
140         ret = *p;                                               \
141         pthread_mutex_unlock(&atomic_sim_lock);                 \
142         return ret;                                             \
143     }
144 IMPL_fallback_atomic_load_n(uint32_t)
IMPL_fallback_atomic_load_n(uint64_t)145 IMPL_fallback_atomic_load_n(uint64_t)
146 IMPL_fallback_atomic_load_n(pvoid)
147 
148 #  define ATOMIC_LOAD_N(t, p, o) fallback_atomic_load_n_##t(p)
149 
150 #  define IMPL_fallback_atomic_store_n(t)                       \
151     static ossl_inline t fallback_atomic_store_n_##t(t *p, t v)      \
152     {                                                           \
153         t ret;                                                  \
154                                                                 \
155         pthread_mutex_lock(&atomic_sim_lock);                   \
156         ret = *p;                                               \
157         *p = v;                                                 \
158         pthread_mutex_unlock(&atomic_sim_lock);                 \
159         return ret;                                             \
160     }
161 IMPL_fallback_atomic_store_n(uint32_t)
162 
163 #  define ATOMIC_STORE_N(t, p, v, o) fallback_atomic_store_n_##t(p, v)
164 
165 #  define IMPL_fallback_atomic_store(t)                         \
166     static ossl_inline void fallback_atomic_store_##t(t *p, t *v)    \
167     {                                                           \
168         pthread_mutex_lock(&atomic_sim_lock);                   \
169         *p = *v;                                                \
170         pthread_mutex_unlock(&atomic_sim_lock);                 \
171     }
172 IMPL_fallback_atomic_store(pvoid)
173 
174 #  define ATOMIC_STORE(t, p, v, o) fallback_atomic_store_##t(p, v)
175 
176 /*
177  * The fallbacks that follow don't need any per type implementation, as
178  * they are designed for uint64_t only.  If there comes a time when multiple
179  * types need to be covered, it's relatively easy to refactor them the same
180  * way as the fallbacks above.
181  */
182 
183 static ossl_inline uint64_t fallback_atomic_add_fetch(uint64_t *p, uint64_t v)
184 {
185     uint64_t ret;
186 
187     pthread_mutex_lock(&atomic_sim_lock);
188     *p += v;
189     ret = *p;
190     pthread_mutex_unlock(&atomic_sim_lock);
191     return ret;
192 }
193 
194 #  define ATOMIC_ADD_FETCH(p, v, o) fallback_atomic_add_fetch(p, v)
195 
fallback_atomic_sub_fetch(uint64_t * p,uint64_t v)196 static ossl_inline uint64_t fallback_atomic_sub_fetch(uint64_t *p, uint64_t v)
197 {
198     uint64_t ret;
199 
200     pthread_mutex_lock(&atomic_sim_lock);
201     *p -= v;
202     ret = *p;
203     pthread_mutex_unlock(&atomic_sim_lock);
204     return ret;
205 }
206 
207 #  define ATOMIC_SUB_FETCH(p, v, o) fallback_atomic_sub_fetch(p, v)
208 # endif
209 
210 /*
211  * This is the core of an rcu lock. It tracks the readers and writers for the
212  * current quiescence point for a given lock. Users is the 64 bit value that
213  * stores the READERS/ID as defined above
214  *
215  */
216 struct rcu_qp {
217     uint64_t users;
218 };
219 
220 struct thread_qp {
221     struct rcu_qp *qp;
222     unsigned int depth;
223     CRYPTO_RCU_LOCK *lock;
224 };
225 
226 # define MAX_QPS 10
227 /*
228  * This is the per thread tracking data
229  * that is assigned to each thread participating
230  * in an rcu qp
231  *
232  * qp points to the qp that it last acquired
233  *
234  */
235 struct rcu_thr_data {
236     struct thread_qp thread_qps[MAX_QPS];
237 };
238 
239 /*
240  * This is the internal version of a CRYPTO_RCU_LOCK
241  * it is cast from CRYPTO_RCU_LOCK
242  */
243 struct rcu_lock_st {
244     /* Callbacks to call for next ossl_synchronize_rcu */
245     struct rcu_cb_item *cb_items;
246 
247     /* The context we are being created against */
248     OSSL_LIB_CTX *ctx;
249 
250     /* Array of quiescent points for synchronization */
251     struct rcu_qp *qp_group;
252 
253     /* rcu generation counter for in-order retirement */
254     uint32_t id_ctr;
255 
256     /* Number of elements in qp_group array */
257     uint32_t group_count;
258 
259     /* Index of the current qp in the qp_group array */
260     uint32_t reader_idx;
261 
262     /* value of the next id_ctr value to be retired */
263     uint32_t next_to_retire;
264 
265     /* index of the next free rcu_qp in the qp_group */
266     uint32_t current_alloc_idx;
267 
268     /* number of qp's in qp_group array currently being retired */
269     uint32_t writers_alloced;
270 
271     /* lock protecting write side operations */
272     pthread_mutex_t write_lock;
273 
274     /* lock protecting updates to writers_alloced/current_alloc_idx */
275     pthread_mutex_t alloc_lock;
276 
277     /* signal to wake threads waiting on alloc_lock */
278     pthread_cond_t alloc_signal;
279 
280     /* lock to enforce in-order retirement */
281     pthread_mutex_t prior_lock;
282 
283     /* signal to wake threads waiting on prior_lock */
284     pthread_cond_t prior_signal;
285 };
286 
287 /* Read side acquisition of the current qp */
get_hold_current_qp(struct rcu_lock_st * lock)288 static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock)
289 {
290     uint32_t qp_idx;
291 
292     /* get the current qp index */
293     for (;;) {
294         qp_idx = ATOMIC_LOAD_N(uint32_t, &lock->reader_idx, __ATOMIC_RELAXED);
295 
296         /*
297          * Notes on use of __ATOMIC_ACQUIRE
298          * We need to ensure the following:
299          * 1) That subsequent operations aren't optimized by hoisting them above
300          * this operation.  Specifically, we don't want the below re-load of
301          * qp_idx to get optimized away
302          * 2) We want to ensure that any updating of reader_idx on the write side
303          * of the lock is flushed from a local cpu cache so that we see any
304          * updates prior to the load.  This is a non-issue on cache coherent
305          * systems like x86, but is relevant on other arches
306          */
307         ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, (uint64_t)1,
308                          __ATOMIC_ACQUIRE);
309 
310         /* if the idx hasn't changed, we're good, else try again */
311         if (qp_idx == ATOMIC_LOAD_N(uint32_t, &lock->reader_idx,
312                                     __ATOMIC_RELAXED))
313             break;
314 
315         ATOMIC_SUB_FETCH(&lock->qp_group[qp_idx].users, (uint64_t)1,
316                          __ATOMIC_RELAXED);
317     }
318 
319     return &lock->qp_group[qp_idx];
320 }
321 
ossl_rcu_free_local_data(void * arg)322 static void ossl_rcu_free_local_data(void *arg)
323 {
324     OSSL_LIB_CTX *ctx = arg;
325     struct rcu_thr_data *data = CRYPTO_THREAD_get_local_ex(CRYPTO_THREAD_LOCAL_RCU_KEY, ctx);
326 
327     CRYPTO_THREAD_set_local_ex(CRYPTO_THREAD_LOCAL_RCU_KEY, ctx, NULL);
328     OPENSSL_free(data);
329 }
330 
ossl_rcu_read_lock(CRYPTO_RCU_LOCK * lock)331 int ossl_rcu_read_lock(CRYPTO_RCU_LOCK *lock)
332 {
333     struct rcu_thr_data *data;
334     int i, available_qp = -1;
335 
336     /*
337      * we're going to access current_qp here so ask the
338      * processor to fetch it
339      */
340     data = CRYPTO_THREAD_get_local_ex(CRYPTO_THREAD_LOCAL_RCU_KEY, lock->ctx);
341 
342     if (data == NULL) {
343         data = OPENSSL_zalloc(sizeof(*data));
344         if (data == NULL)
345             return 0;
346 
347         if (!CRYPTO_THREAD_set_local_ex(CRYPTO_THREAD_LOCAL_RCU_KEY, lock->ctx, data)) {
348             OPENSSL_free(data);
349             return 0;
350         }
351         if (!ossl_init_thread_start(NULL, lock->ctx, ossl_rcu_free_local_data)) {
352             OPENSSL_free(data);
353             CRYPTO_THREAD_set_local_ex(CRYPTO_THREAD_LOCAL_RCU_KEY, lock->ctx, NULL);
354             return 0;
355         }
356     }
357 
358     for (i = 0; i < MAX_QPS; i++) {
359         if (data->thread_qps[i].qp == NULL && available_qp == -1)
360             available_qp = i;
361         /* If we have a hold on this lock already, we're good */
362         if (data->thread_qps[i].lock == lock) {
363             data->thread_qps[i].depth++;
364             return 1;
365         }
366     }
367 
368     /*
369      * if we get here, then we don't have a hold on this lock yet
370      */
371     assert(available_qp != -1);
372 
373     data->thread_qps[available_qp].qp = get_hold_current_qp(lock);
374     data->thread_qps[available_qp].depth = 1;
375     data->thread_qps[available_qp].lock = lock;
376     return 1;
377 }
378 
ossl_rcu_read_unlock(CRYPTO_RCU_LOCK * lock)379 void ossl_rcu_read_unlock(CRYPTO_RCU_LOCK *lock)
380 {
381     int i;
382     struct rcu_thr_data *data = CRYPTO_THREAD_get_local_ex(CRYPTO_THREAD_LOCAL_RCU_KEY, lock->ctx);
383     uint64_t ret;
384 
385     assert(data != NULL);
386 
387     for (i = 0; i < MAX_QPS; i++) {
388         if (data->thread_qps[i].lock == lock) {
389             /*
390              * we have to use __ATOMIC_RELEASE here
391              * to ensure that all preceding read instructions complete
392              * before the decrement is visible to ossl_synchronize_rcu
393              */
394             data->thread_qps[i].depth--;
395             if (data->thread_qps[i].depth == 0) {
396                 ret = ATOMIC_SUB_FETCH(&data->thread_qps[i].qp->users,
397                                        (uint64_t)1, __ATOMIC_RELEASE);
398                 OPENSSL_assert(ret != UINT64_MAX);
399                 data->thread_qps[i].qp = NULL;
400                 data->thread_qps[i].lock = NULL;
401             }
402             return;
403         }
404     }
405     /*
406      * If we get here, we're trying to unlock a lock that we never acquired -
407      * that's fatal.
408      */
409     assert(0);
410 }
411 
412 /*
413  * Write side allocation routine to get the current qp
414  * and replace it with a new one
415  */
update_qp(CRYPTO_RCU_LOCK * lock,uint32_t * curr_id)416 static struct rcu_qp *update_qp(CRYPTO_RCU_LOCK *lock, uint32_t *curr_id)
417 {
418     uint32_t current_idx;
419 
420     pthread_mutex_lock(&lock->alloc_lock);
421 
422     /*
423      * we need at least one qp to be available with one
424      * left over, so that readers can start working on
425      * one that isn't yet being waited on
426      */
427     while (lock->group_count - lock->writers_alloced < 2)
428         /* we have to wait for one to be free */
429         pthread_cond_wait(&lock->alloc_signal, &lock->alloc_lock);
430 
431     current_idx = lock->current_alloc_idx;
432 
433     /* Allocate the qp */
434     lock->writers_alloced++;
435 
436     /* increment the allocation index */
437     lock->current_alloc_idx =
438         (lock->current_alloc_idx + 1) % lock->group_count;
439 
440     *curr_id = lock->id_ctr;
441     lock->id_ctr++;
442 
443     ATOMIC_STORE_N(uint32_t, &lock->reader_idx, lock->current_alloc_idx,
444                    __ATOMIC_RELAXED);
445 
446     /*
447      * this should make sure that the new value of reader_idx is visible in
448      * get_hold_current_qp, directly after incrementing the users count
449      */
450     ATOMIC_ADD_FETCH(&lock->qp_group[current_idx].users, (uint64_t)0,
451                      __ATOMIC_RELEASE);
452 
453     /* wake up any waiters */
454     pthread_cond_signal(&lock->alloc_signal);
455     pthread_mutex_unlock(&lock->alloc_lock);
456     return &lock->qp_group[current_idx];
457 }
458 
retire_qp(CRYPTO_RCU_LOCK * lock,struct rcu_qp * qp)459 static void retire_qp(CRYPTO_RCU_LOCK *lock, struct rcu_qp *qp)
460 {
461     pthread_mutex_lock(&lock->alloc_lock);
462     lock->writers_alloced--;
463     pthread_cond_signal(&lock->alloc_signal);
464     pthread_mutex_unlock(&lock->alloc_lock);
465 }
466 
allocate_new_qp_group(CRYPTO_RCU_LOCK * lock,uint32_t count)467 static struct rcu_qp *allocate_new_qp_group(CRYPTO_RCU_LOCK *lock,
468                                             uint32_t count)
469 {
470     struct rcu_qp *new =
471         OPENSSL_calloc(count, sizeof(*new));
472 
473     lock->group_count = count;
474     return new;
475 }
476 
ossl_rcu_write_lock(CRYPTO_RCU_LOCK * lock)477 void ossl_rcu_write_lock(CRYPTO_RCU_LOCK *lock)
478 {
479     pthread_mutex_lock(&lock->write_lock);
480     TSAN_FAKE_UNLOCK(&lock->write_lock);
481 }
482 
ossl_rcu_write_unlock(CRYPTO_RCU_LOCK * lock)483 void ossl_rcu_write_unlock(CRYPTO_RCU_LOCK *lock)
484 {
485     TSAN_FAKE_LOCK(&lock->write_lock);
486     pthread_mutex_unlock(&lock->write_lock);
487 }
488 
ossl_synchronize_rcu(CRYPTO_RCU_LOCK * lock)489 void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock)
490 {
491     struct rcu_qp *qp;
492     uint64_t count;
493     uint32_t curr_id;
494     struct rcu_cb_item *cb_items, *tmpcb;
495 
496     pthread_mutex_lock(&lock->write_lock);
497     cb_items = lock->cb_items;
498     lock->cb_items = NULL;
499     pthread_mutex_unlock(&lock->write_lock);
500 
501     qp = update_qp(lock, &curr_id);
502 
503     /* retire in order */
504     pthread_mutex_lock(&lock->prior_lock);
505     while (lock->next_to_retire != curr_id)
506         pthread_cond_wait(&lock->prior_signal, &lock->prior_lock);
507 
508     /*
509      * wait for the reader count to reach zero
510      * Note the use of __ATOMIC_ACQUIRE here to ensure that any
511      * prior __ATOMIC_RELEASE write operation in ossl_rcu_read_unlock
512      * is visible prior to our read
513      * however this is likely just necessary to silence a tsan warning
514      * because the read side should not do any write operation
515      * outside the atomic itself
516      */
517     do {
518         count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE);
519     } while (count != (uint64_t)0);
520 
521     lock->next_to_retire++;
522     pthread_cond_broadcast(&lock->prior_signal);
523     pthread_mutex_unlock(&lock->prior_lock);
524 
525     retire_qp(lock, qp);
526 
527     /* handle any callbacks that we have */
528     while (cb_items != NULL) {
529         tmpcb = cb_items;
530         cb_items = cb_items->next;
531         tmpcb->fn(tmpcb->data);
532         OPENSSL_free(tmpcb);
533     }
534 }
535 
536 /*
537  * Note: This call assumes its made under the protection of
538  * ossl_rcu_write_lock
539  */
ossl_rcu_call(CRYPTO_RCU_LOCK * lock,rcu_cb_fn cb,void * data)540 int ossl_rcu_call(CRYPTO_RCU_LOCK *lock, rcu_cb_fn cb, void *data)
541 {
542     struct rcu_cb_item *new =
543         OPENSSL_zalloc(sizeof(*new));
544 
545     if (new == NULL)
546         return 0;
547 
548     new->data = data;
549     new->fn = cb;
550 
551     new->next = lock->cb_items;
552     lock->cb_items = new;
553 
554     return 1;
555 }
556 
ossl_rcu_uptr_deref(void ** p)557 void *ossl_rcu_uptr_deref(void **p)
558 {
559     return ATOMIC_LOAD_N(pvoid, p, __ATOMIC_ACQUIRE);
560 }
561 
ossl_rcu_assign_uptr(void ** p,void ** v)562 void ossl_rcu_assign_uptr(void **p, void **v)
563 {
564     ATOMIC_STORE(pvoid, p, v, __ATOMIC_RELEASE);
565 }
566 
ossl_rcu_lock_new(int num_writers,OSSL_LIB_CTX * ctx)567 CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx)
568 {
569     struct rcu_lock_st *new;
570 
571     /*
572      * We need a minimum of 2 qp's
573      */
574     if (num_writers < 2)
575         num_writers = 2;
576 
577     ctx = ossl_lib_ctx_get_concrete(ctx);
578     if (ctx == NULL)
579         return 0;
580 
581     new = OPENSSL_zalloc(sizeof(*new));
582     if (new == NULL)
583         return NULL;
584 
585     new->ctx = ctx;
586     pthread_mutex_init(&new->write_lock, NULL);
587     pthread_mutex_init(&new->prior_lock, NULL);
588     pthread_mutex_init(&new->alloc_lock, NULL);
589     pthread_cond_init(&new->prior_signal, NULL);
590     pthread_cond_init(&new->alloc_signal, NULL);
591 
592     new->qp_group = allocate_new_qp_group(new, num_writers);
593     if (new->qp_group == NULL) {
594         OPENSSL_free(new);
595         new = NULL;
596     }
597 
598     return new;
599 }
600 
ossl_rcu_lock_free(CRYPTO_RCU_LOCK * lock)601 void ossl_rcu_lock_free(CRYPTO_RCU_LOCK *lock)
602 {
603     struct rcu_lock_st *rlock = (struct rcu_lock_st *)lock;
604 
605     if (lock == NULL)
606         return;
607 
608     /* make sure we're synchronized */
609     ossl_synchronize_rcu(rlock);
610 
611     OPENSSL_free(rlock->qp_group);
612     /* There should only be a single qp left now */
613     OPENSSL_free(rlock);
614 }
615 
616 # ifdef REPORT_RWLOCK_CONTENTION
617 /*
618  * Normally we would use a BIO here to do this, but we create locks during
619  * library initialization, and creating a bio too early, creates a recursive set
620  * of stack calls that leads us to call CRYPTO_thread_run_once while currently
621  * executing the init routine for various run_once functions, which leads to
622  * deadlock.  Avoid that by just using a FILE pointer.  Also note that we
623  * directly use a pthread_mutex_t to protect access from multiple threads
624  * to the contention log file.  We do this because we want to avoid use
625  * of the CRYPTO_THREAD api so as to prevent recursive blocking reports.
626  */
627 static CRYPTO_ONCE init_contention_data_flag = CRYPTO_ONCE_STATIC_INIT;
628 pthread_mutex_t log_lock = PTHREAD_MUTEX_INITIALIZER;
629 CRYPTO_THREAD_LOCAL thread_contention_data;
630 
631 struct stack_info {
632     unsigned int nptrs;
633     int write;
634     OSSL_TIME start;
635     OSSL_TIME duration;
636     char **strings;
637 };
638 
639 #  define STACKS_COUNT 32
640 #  define BT_BUF_SIZE 1024
641 struct stack_traces {
642     int fd;
643     int lock_depth;
644     size_t idx;
645     struct stack_info stacks[STACKS_COUNT];
646 };
647 
648 /* The glibc gettid() definition presents only since 2.30. */
get_tid(void)649 static ossl_inline pid_t get_tid(void)
650 {
651     return syscall(SYS_gettid);
652 }
653 
654 #  ifdef FIPS_MODULE
655 #   define FIPS_SFX "-fips"
656 #  else
657 #   define FIPS_SFX ""
658 #  endif
init_contention_data(void)659 static void *init_contention_data(void)
660 {
661     struct stack_traces *traces;
662     char fname_fmt[] = "lock-contention-log" FIPS_SFX ".%d.txt";
663     char fname[sizeof(fname_fmt) + sizeof(int) * 3];
664 
665     traces = OPENSSL_zalloc(sizeof(struct stack_traces));
666 
667     snprintf(fname, sizeof(fname), fname_fmt, get_tid());
668 
669     traces->fd = open(fname, O_WRONLY | O_APPEND | O_CLOEXEC | O_CREAT, 0600);
670 
671     return traces;
672 }
673 
destroy_contention_data(void * data)674 static void destroy_contention_data(void *data)
675 {
676     struct stack_traces *st = data;
677 
678     close(st->fd);
679     OPENSSL_free(data);
680 }
681 
init_contention_data_once(void)682 static void init_contention_data_once(void)
683 {
684     /*
685      * Create a thread local key here to store our list of stack traces
686      * to be printed when we unlock the lock we are holding
687      */
688     CRYPTO_THREAD_init_local(&thread_contention_data, destroy_contention_data);
689     return;
690 }
691 
get_stack_traces(bool init)692 static struct stack_traces *get_stack_traces(bool init)
693 {
694     struct stack_traces *traces = CRYPTO_THREAD_get_local(&thread_contention_data);
695 
696     if (!traces && init) {
697         traces = init_contention_data();
698         CRYPTO_THREAD_set_local(&thread_contention_data, traces);
699     }
700 
701     return traces;
702 }
703 
print_stack_traces(struct stack_traces * traces)704 static void print_stack_traces(struct stack_traces *traces)
705 {
706     unsigned int j;
707     struct iovec *iov;
708     int iovcnt;
709 
710     while (traces != NULL && traces->idx >= 1) {
711         traces->idx--;
712         dprintf(traces->fd,
713                 "lock blocked on %s for %zu usec at time %zu tid %d\n",
714                 traces->stacks[traces->idx].write == 1 ? "WRITE" : "READ",
715                 ossl_time2us(traces->stacks[traces->idx].duration),
716                 ossl_time2us(traces->stacks[traces->idx].start),
717                 get_tid());
718         if (traces->stacks[traces->idx].strings != NULL) {
719             static const char lf = '\n';
720 
721             iovcnt = traces->stacks[traces->idx].nptrs * 2 + 1;
722             iov = alloca(iovcnt * sizeof(*iov));
723             for (j = 0; j < traces->stacks[traces->idx].nptrs; j++) {
724                 iov[2 * j].iov_base = traces->stacks[traces->idx].strings[j];
725                 iov[2 * j].iov_len = strlen(traces->stacks[traces->idx].strings[j]);
726                 iov[2 * j + 1].iov_base = (char *) &lf;
727                 iov[2 * j + 1].iov_len = 1;
728             }
729             iov[traces->stacks[traces->idx].nptrs * 2].iov_base = (char *) &lf;
730             iov[traces->stacks[traces->idx].nptrs * 2].iov_len = 1;
731         } else {
732             static const char no_bt[] = "No stack trace available\n\n";
733 
734             iovcnt = 1;
735             iov = alloca(iovcnt * sizeof(*iov));
736             iov[0].iov_base = (char *) no_bt;
737             iov[0].iov_len = sizeof(no_bt) - 1;
738         }
739         writev(traces->fd, iov, iovcnt);
740         free(traces->stacks[traces->idx].strings);
741     }
742 }
743 
ossl_init_rwlock_contention_data(void)744 static ossl_inline void ossl_init_rwlock_contention_data(void)
745 {
746     CRYPTO_THREAD_run_once(&init_contention_data_flag, init_contention_data_once);
747 }
748 
record_lock_contention(pthread_rwlock_t * lock,struct stack_traces * traces,bool write)749 static int record_lock_contention(pthread_rwlock_t *lock,
750                                   struct stack_traces *traces, bool write)
751 {
752     void *buffer[BT_BUF_SIZE];
753     OSSL_TIME start, end;
754     int ret;
755 
756     start = ossl_time_now();
757     ret = (write ? pthread_rwlock_wrlock : pthread_rwlock_rdlock)(lock);
758     if (ret)
759         return ret;
760     end = ossl_time_now();
761     traces->stacks[traces->idx].nptrs = backtrace(buffer, BT_BUF_SIZE);
762     traces->stacks[traces->idx].strings = backtrace_symbols(buffer,
763                                                             traces->stacks[traces->idx].nptrs);
764     traces->stacks[traces->idx].duration = ossl_time_subtract(end, start);
765     traces->stacks[traces->idx].start = start;
766     traces->stacks[traces->idx].write = write;
767     traces->idx++;
768     if (traces->idx >= STACKS_COUNT) {
769         fprintf(stderr, "STACK RECORD OVERFLOW!\n");
770         print_stack_traces(traces);
771     }
772 
773     return 0;
774 }
775 
ossl_rwlock_rdlock(pthread_rwlock_t * lock)776 static ossl_inline int ossl_rwlock_rdlock(pthread_rwlock_t *lock)
777 {
778     struct stack_traces *traces = get_stack_traces(true);
779 
780     if (ossl_unlikely(traces == NULL))
781         return ENOMEM;
782 
783     traces->lock_depth++;
784     if (pthread_rwlock_tryrdlock(lock)) {
785         int ret = record_lock_contention(lock, traces, false);
786 
787         if (ret)
788             traces->lock_depth--;
789 
790         return ret;
791     }
792 
793     return 0;
794 }
795 
ossl_rwlock_wrlock(pthread_rwlock_t * lock)796 static ossl_inline int ossl_rwlock_wrlock(pthread_rwlock_t *lock)
797 {
798     struct stack_traces *traces = get_stack_traces(true);
799 
800     if (ossl_unlikely(traces == NULL))
801         return ENOMEM;
802 
803     traces->lock_depth++;
804     if (pthread_rwlock_trywrlock(lock)) {
805         int ret = record_lock_contention(lock, traces, true);
806 
807         if (ret)
808             traces->lock_depth--;
809 
810         return ret;
811     }
812 
813     return 0;
814 }
815 
ossl_rwlock_unlock(pthread_rwlock_t * lock)816 static ossl_inline int ossl_rwlock_unlock(pthread_rwlock_t *lock)
817 {
818     int ret;
819 
820     ret = pthread_rwlock_unlock(lock);
821     if (ret)
822         return ret;
823 
824     {
825         struct stack_traces *traces = get_stack_traces(false);
826 
827         if (traces != NULL) {
828             traces->lock_depth--;
829             assert(traces->lock_depth >= 0);
830             if (traces->lock_depth == 0)
831                 print_stack_traces(traces);
832         }
833     }
834 
835     return 0;
836 }
837 
838 # else /* !REPORT_RWLOCK_CONTENTION */
839 
ossl_init_rwlock_contention_data(void)840 static ossl_inline void ossl_init_rwlock_contention_data(void)
841 {
842 }
843 
ossl_rwlock_rdlock(pthread_rwlock_t * rwlock)844 static ossl_inline int ossl_rwlock_rdlock(pthread_rwlock_t *rwlock)
845 {
846     return pthread_rwlock_rdlock(rwlock);
847 }
848 
ossl_rwlock_wrlock(pthread_rwlock_t * rwlock)849 static ossl_inline int ossl_rwlock_wrlock(pthread_rwlock_t *rwlock)
850 {
851     return pthread_rwlock_wrlock(rwlock);
852 }
853 
ossl_rwlock_unlock(pthread_rwlock_t * rwlock)854 static ossl_inline int ossl_rwlock_unlock(pthread_rwlock_t *rwlock)
855 {
856     return pthread_rwlock_unlock(rwlock);
857 }
858 # endif /* REPORT_RWLOCK_CONTENTION */
859 
CRYPTO_THREAD_lock_new(void)860 CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void)
861 {
862 # ifdef USE_RWLOCK
863     CRYPTO_RWLOCK *lock;
864 
865     ossl_init_rwlock_contention_data();
866 
867     if ((lock = OPENSSL_zalloc(sizeof(pthread_rwlock_t))) == NULL)
868         /* Don't set error, to avoid recursion blowup. */
869         return NULL;
870 
871     if (pthread_rwlock_init(lock, NULL) != 0) {
872         OPENSSL_free(lock);
873         return NULL;
874     }
875 # else
876     pthread_mutexattr_t attr;
877     CRYPTO_RWLOCK *lock;
878 
879     if ((lock = OPENSSL_zalloc(sizeof(pthread_mutex_t))) == NULL)
880         /* Don't set error, to avoid recursion blowup. */
881         return NULL;
882 
883     /*
884      * We don't use recursive mutexes, but try to catch errors if we do.
885      */
886     pthread_mutexattr_init(&attr);
887 #  if !defined (__TANDEM) && !defined (_SPT_MODEL_)
888 #   if !defined(NDEBUG) && !defined(OPENSSL_NO_MUTEX_ERRORCHECK)
889     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
890 #   endif
891 #  else
892     /* The SPT Thread Library does not define MUTEX attributes. */
893 #  endif
894 
895     if (pthread_mutex_init(lock, &attr) != 0) {
896         pthread_mutexattr_destroy(&attr);
897         OPENSSL_free(lock);
898         return NULL;
899     }
900 
901     pthread_mutexattr_destroy(&attr);
902 # endif
903 
904     return lock;
905 }
906 
CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK * lock)907 __owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock)
908 {
909 # ifdef USE_RWLOCK
910     if (!ossl_assert(ossl_rwlock_rdlock(lock) == 0))
911         return 0;
912 # else
913     if (pthread_mutex_lock(lock) != 0) {
914         assert(errno != EDEADLK && errno != EBUSY);
915         return 0;
916     }
917 # endif
918 
919     return 1;
920 }
921 
CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK * lock)922 __owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock)
923 {
924 # ifdef USE_RWLOCK
925     if (!ossl_assert(ossl_rwlock_wrlock(lock) == 0))
926         return 0;
927 # else
928     if (pthread_mutex_lock(lock) != 0) {
929         assert(errno != EDEADLK && errno != EBUSY);
930         return 0;
931     }
932 # endif
933 
934     return 1;
935 }
936 
CRYPTO_THREAD_unlock(CRYPTO_RWLOCK * lock)937 int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock)
938 {
939 # ifdef USE_RWLOCK
940     if (ossl_rwlock_unlock(lock) != 0)
941         return 0;
942 # else
943     if (pthread_mutex_unlock(lock) != 0) {
944         assert(errno != EPERM);
945         return 0;
946     }
947 # endif
948 
949     return 1;
950 }
951 
CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK * lock)952 void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock)
953 {
954     if (lock == NULL)
955         return;
956 
957 # ifdef USE_RWLOCK
958     pthread_rwlock_destroy(lock);
959 # else
960     pthread_mutex_destroy(lock);
961 # endif
962     OPENSSL_free(lock);
963 
964     return;
965 }
966 
CRYPTO_THREAD_run_once(CRYPTO_ONCE * once,void (* init)(void))967 int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void))
968 {
969     if (ossl_unlikely(pthread_once(once, init) != 0))
970         return 0;
971 
972     return 1;
973 }
974 
CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL * key,void (* cleanup)(void *))975 int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *))
976 {
977     if (pthread_key_create(key, cleanup) != 0)
978         return 0;
979 
980     return 1;
981 }
982 
CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL * key)983 void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key)
984 {
985     return pthread_getspecific(*key);
986 }
987 
CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL * key,void * val)988 int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val)
989 {
990     if (pthread_setspecific(*key, val) != 0)
991         return 0;
992 
993     return 1;
994 }
995 
CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL * key)996 int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key)
997 {
998     if (pthread_key_delete(*key) != 0)
999         return 0;
1000 
1001     return 1;
1002 }
1003 
CRYPTO_THREAD_get_current_id(void)1004 CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void)
1005 {
1006     return pthread_self();
1007 }
1008 
CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a,CRYPTO_THREAD_ID b)1009 int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b)
1010 {
1011     return pthread_equal(a, b);
1012 }
1013 
CRYPTO_atomic_add(int * val,int amount,int * ret,CRYPTO_RWLOCK * lock)1014 int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock)
1015 {
1016 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
1017     if (__atomic_is_lock_free(sizeof(*val), val)) {
1018         *ret = __atomic_add_fetch(val, amount, __ATOMIC_ACQ_REL);
1019         return 1;
1020     }
1021 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
1022     /* This will work for all future Solaris versions. */
1023     if (ret != NULL) {
1024         *ret = atomic_add_int_nv((volatile unsigned int *)val, amount);
1025         return 1;
1026     }
1027 # endif
1028     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
1029         return 0;
1030 
1031     *val += amount;
1032     *ret  = *val;
1033 
1034     if (!CRYPTO_THREAD_unlock(lock))
1035         return 0;
1036 
1037     return 1;
1038 }
1039 
CRYPTO_atomic_add64(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)1040 int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret,
1041                         CRYPTO_RWLOCK *lock)
1042 {
1043 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
1044     if (__atomic_is_lock_free(sizeof(*val), val)) {
1045         *ret = __atomic_add_fetch(val, op, __ATOMIC_ACQ_REL);
1046         return 1;
1047     }
1048 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
1049     /* This will work for all future Solaris versions. */
1050     if (ret != NULL) {
1051         *ret = atomic_add_64_nv(val, op);
1052         return 1;
1053     }
1054 # endif
1055     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
1056         return 0;
1057     *val += op;
1058     *ret  = *val;
1059 
1060     if (!CRYPTO_THREAD_unlock(lock))
1061         return 0;
1062 
1063     return 1;
1064 }
1065 
CRYPTO_atomic_and(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)1066 int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret,
1067                       CRYPTO_RWLOCK *lock)
1068 {
1069 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
1070     if (__atomic_is_lock_free(sizeof(*val), val)) {
1071         *ret = __atomic_and_fetch(val, op, __ATOMIC_ACQ_REL);
1072         return 1;
1073     }
1074 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
1075     /* This will work for all future Solaris versions. */
1076     if (ret != NULL) {
1077         *ret = atomic_and_64_nv(val, op);
1078         return 1;
1079     }
1080 # endif
1081     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
1082         return 0;
1083     *val &= op;
1084     *ret  = *val;
1085 
1086     if (!CRYPTO_THREAD_unlock(lock))
1087         return 0;
1088 
1089     return 1;
1090 }
1091 
CRYPTO_atomic_or(uint64_t * val,uint64_t op,uint64_t * ret,CRYPTO_RWLOCK * lock)1092 int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret,
1093                      CRYPTO_RWLOCK *lock)
1094 {
1095 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
1096     if (__atomic_is_lock_free(sizeof(*val), val)) {
1097         *ret = __atomic_or_fetch(val, op, __ATOMIC_ACQ_REL);
1098         return 1;
1099     }
1100 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
1101     /* This will work for all future Solaris versions. */
1102     if (ret != NULL) {
1103         *ret = atomic_or_64_nv(val, op);
1104         return 1;
1105     }
1106 # endif
1107     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
1108         return 0;
1109     *val |= op;
1110     *ret  = *val;
1111 
1112     if (!CRYPTO_THREAD_unlock(lock))
1113         return 0;
1114 
1115     return 1;
1116 }
1117 
CRYPTO_atomic_load(uint64_t * val,uint64_t * ret,CRYPTO_RWLOCK * lock)1118 int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock)
1119 {
1120 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
1121     if (__atomic_is_lock_free(sizeof(*val), val)) {
1122         __atomic_load(val, ret, __ATOMIC_ACQUIRE);
1123         return 1;
1124     }
1125 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
1126     /* This will work for all future Solaris versions. */
1127     if (ret != NULL) {
1128         *ret = atomic_or_64_nv(val, 0);
1129         return 1;
1130     }
1131 # endif
1132     if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
1133         return 0;
1134     *ret  = *val;
1135     if (!CRYPTO_THREAD_unlock(lock))
1136         return 0;
1137 
1138     return 1;
1139 }
1140 
CRYPTO_atomic_store(uint64_t * dst,uint64_t val,CRYPTO_RWLOCK * lock)1141 int CRYPTO_atomic_store(uint64_t *dst, uint64_t val, CRYPTO_RWLOCK *lock)
1142 {
1143 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
1144     if (__atomic_is_lock_free(sizeof(*dst), dst)) {
1145         __atomic_store(dst, &val, __ATOMIC_RELEASE);
1146         return 1;
1147     }
1148 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
1149     /* This will work for all future Solaris versions. */
1150     if (dst != NULL) {
1151         atomic_swap_64(dst, val);
1152         return 1;
1153     }
1154 # endif
1155     if (lock == NULL || !CRYPTO_THREAD_write_lock(lock))
1156         return 0;
1157     *dst  = val;
1158     if (!CRYPTO_THREAD_unlock(lock))
1159         return 0;
1160 
1161     return 1;
1162 }
1163 
CRYPTO_atomic_load_int(int * val,int * ret,CRYPTO_RWLOCK * lock)1164 int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock)
1165 {
1166 # if defined(__GNUC__) && defined(__ATOMIC_ACQ_REL) && !defined(BROKEN_CLANG_ATOMICS)
1167     if (__atomic_is_lock_free(sizeof(*val), val)) {
1168         __atomic_load(val, ret, __ATOMIC_ACQUIRE);
1169         return 1;
1170     }
1171 # elif defined(__sun) && (defined(__SunOS_5_10) || defined(__SunOS_5_11))
1172     /* This will work for all future Solaris versions. */
1173     if (ret != NULL) {
1174         *ret = (int)atomic_or_uint_nv((unsigned int *)val, 0);
1175         return 1;
1176     }
1177 # endif
1178     if (lock == NULL || !CRYPTO_THREAD_read_lock(lock))
1179         return 0;
1180     *ret  = *val;
1181     if (!CRYPTO_THREAD_unlock(lock))
1182         return 0;
1183 
1184     return 1;
1185 }
1186 
1187 # ifndef FIPS_MODULE
openssl_init_fork_handlers(void)1188 int openssl_init_fork_handlers(void)
1189 {
1190     return 1;
1191 }
1192 # endif /* FIPS_MODULE */
1193 
openssl_get_fork_id(void)1194 int openssl_get_fork_id(void)
1195 {
1196     return getpid();
1197 }
1198 #endif
1199