1 // SPDX-License-Identifier: GPL-2.0
2
3 #include <pthread.h>
4 #include <sys/shm.h>
5 #include <sys/mman.h>
6 #include <fcntl.h>
7 #include <stdbool.h>
8 #include <time.h>
9 #include <assert.h>
10 #include "logging.h"
11 #include "futextest.h"
12 #include "futex2test.h"
13
14 typedef u_int32_t u32;
15 typedef int32_t s32;
16 typedef u_int64_t u64;
17
18 static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE);
19 static int fnode = FUTEX_NO_NODE;
20
21 /* fairly stupid test-and-set lock with a waiter flag */
22
23 #define N_LOCK 0x0000001
24 #define N_WAITERS 0x0001000
25
26 struct futex_numa_32 {
27 union {
28 u64 full;
29 struct {
30 u32 val;
31 u32 node;
32 };
33 };
34 };
35
futex_numa_32_lock(struct futex_numa_32 * lock)36 void futex_numa_32_lock(struct futex_numa_32 *lock)
37 {
38 for (;;) {
39 struct futex_numa_32 new, old = {
40 .full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED),
41 };
42
43 for (;;) {
44 new = old;
45 if (old.val == 0) {
46 /* no waiter, no lock -> first lock, set no-node */
47 new.node = fnode;
48 }
49 if (old.val & N_LOCK) {
50 /* contention, set waiter */
51 new.val |= N_WAITERS;
52 }
53 new.val |= N_LOCK;
54
55 /* nothing changed, ready to block */
56 if (old.full == new.full)
57 break;
58
59 /*
60 * Use u64 cmpxchg to set the futex value and node in a
61 * consistent manner.
62 */
63 if (__atomic_compare_exchange_n(&lock->full,
64 &old.full, new.full,
65 /* .weak */ false,
66 __ATOMIC_ACQUIRE,
67 __ATOMIC_RELAXED)) {
68
69 /* if we just set N_LOCK, we own it */
70 if (!(old.val & N_LOCK))
71 return;
72
73 /* go block */
74 break;
75 }
76 }
77
78 futex2_wait(lock, new.val, fflags, NULL, 0);
79 }
80 }
81
futex_numa_32_unlock(struct futex_numa_32 * lock)82 void futex_numa_32_unlock(struct futex_numa_32 *lock)
83 {
84 u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE);
85 assert((s32)val >= 0);
86 if (val & N_WAITERS) {
87 int woken = futex2_wake(lock, 1, fflags);
88 assert(val == N_WAITERS);
89 if (!woken) {
90 __atomic_compare_exchange_n(&lock->val, &val, 0U,
91 false, __ATOMIC_RELAXED,
92 __ATOMIC_RELAXED);
93 }
94 }
95 }
96
97 static long nanos = 50000;
98
99 struct thread_args {
100 pthread_t tid;
101 volatile int * done;
102 struct futex_numa_32 *lock;
103 int val;
104 int *val1, *val2;
105 int node;
106 };
107
threadfn(void * _arg)108 static void *threadfn(void *_arg)
109 {
110 struct thread_args *args = _arg;
111 struct timespec ts = {
112 .tv_nsec = nanos,
113 };
114 int node;
115
116 while (!*args->done) {
117
118 futex_numa_32_lock(args->lock);
119 args->val++;
120
121 assert(*args->val1 == *args->val2);
122 (*args->val1)++;
123 nanosleep(&ts, NULL);
124 (*args->val2)++;
125
126 node = args->lock->node;
127 futex_numa_32_unlock(args->lock);
128
129 if (node != args->node) {
130 args->node = node;
131 printf("node: %d\n", node);
132 }
133
134 nanosleep(&ts, NULL);
135 }
136
137 return NULL;
138 }
139
contendfn(void * _arg)140 static void *contendfn(void *_arg)
141 {
142 struct thread_args *args = _arg;
143
144 while (!*args->done) {
145 /*
146 * futex2_wait() will take hb-lock, verify *var == val and
147 * queue/abort. By knowingly setting val 'wrong' this will
148 * abort and thereby generate hb-lock contention.
149 */
150 futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0);
151 args->val++;
152 }
153
154 return NULL;
155 }
156
157 static volatile int done = 0;
158 static struct futex_numa_32 lock = { .val = 0, };
159 static int val1, val2;
160
main(int argc,char * argv[])161 int main(int argc, char *argv[])
162 {
163 struct thread_args *tas[512], *cas[512];
164 int c, t, threads = 2, contenders = 0;
165 int sleeps = 10;
166 int total = 0;
167
168 while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) {
169 switch (c) {
170 case 'c':
171 contenders = atoi(optarg);
172 break;
173 case 't':
174 threads = atoi(optarg);
175 break;
176 case 's':
177 sleeps = atoi(optarg);
178 break;
179 case 'n':
180 nanos = atoi(optarg);
181 break;
182 case 'N':
183 fflags |= FUTEX2_NUMA;
184 if (optarg)
185 fnode = atoi(optarg);
186 break;
187 default:
188 exit(1);
189 break;
190 }
191 }
192
193 for (t = 0; t < contenders; t++) {
194 struct thread_args *args = calloc(1, sizeof(*args));
195 if (!args) {
196 perror("thread_args");
197 exit(-1);
198 }
199
200 args->done = &done;
201 args->lock = &lock;
202 args->val1 = &val1;
203 args->val2 = &val2;
204 args->node = -1;
205
206 if (pthread_create(&args->tid, NULL, contendfn, args)) {
207 perror("pthread_create");
208 exit(-1);
209 }
210
211 cas[t] = args;
212 }
213
214 for (t = 0; t < threads; t++) {
215 struct thread_args *args = calloc(1, sizeof(*args));
216 if (!args) {
217 perror("thread_args");
218 exit(-1);
219 }
220
221 args->done = &done;
222 args->lock = &lock;
223 args->val1 = &val1;
224 args->val2 = &val2;
225 args->node = -1;
226
227 if (pthread_create(&args->tid, NULL, threadfn, args)) {
228 perror("pthread_create");
229 exit(-1);
230 }
231
232 tas[t] = args;
233 }
234
235 sleep(sleeps);
236
237 done = true;
238
239 for (t = 0; t < threads; t++) {
240 struct thread_args *args = tas[t];
241
242 pthread_join(args->tid, NULL);
243 total += args->val;
244 // printf("tval: %d\n", args->val);
245 }
246 printf("total: %d\n", total);
247
248 if (contenders) {
249 total = 0;
250 for (t = 0; t < contenders; t++) {
251 struct thread_args *args = cas[t];
252
253 pthread_join(args->tid, NULL);
254 total += args->val;
255 // printf("tval: %d\n", args->val);
256 }
257 printf("contenders: %d\n", total);
258 }
259
260 return 0;
261 }
262
263