1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <pthread.h>
4 #include <sys/shm.h>
5 #include <sys/mman.h>
6 #include <fcntl.h>
7 #include <stdbool.h>
8 #include <time.h>
9 #include <assert.h>
10 #include "logging.h"
11 #include "futextest.h"
12 #include "futex2test.h"
13 
14 typedef u_int32_t u32;
15 typedef int32_t   s32;
16 typedef u_int64_t u64;
17 
18 static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE);
19 static int fnode = FUTEX_NO_NODE;
20 
21 /* fairly stupid test-and-set lock with a waiter flag */
22 
23 #define N_LOCK		0x0000001
24 #define N_WAITERS	0x0001000
25 
26 struct futex_numa_32 {
27 	union {
28 		u64 full;
29 		struct {
30 			u32 val;
31 			u32 node;
32 		};
33 	};
34 };
35 
futex_numa_32_lock(struct futex_numa_32 * lock)36 void futex_numa_32_lock(struct futex_numa_32 *lock)
37 {
38 	for (;;) {
39 		struct futex_numa_32 new, old = {
40 			.full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED),
41 		};
42 
43 		for (;;) {
44 			new = old;
45 			if (old.val == 0) {
46 				/* no waiter, no lock -> first lock, set no-node */
47 				new.node = fnode;
48 			}
49 			if (old.val & N_LOCK) {
50 				/* contention, set waiter */
51 				new.val |= N_WAITERS;
52 			}
53 			new.val |= N_LOCK;
54 
55 			/* nothing changed, ready to block */
56 			if (old.full == new.full)
57 				break;
58 
59 			/*
60 			 * Use u64 cmpxchg to set the futex value and node in a
61 			 * consistent manner.
62 			 */
63 			if (__atomic_compare_exchange_n(&lock->full,
64 							&old.full, new.full,
65 							/* .weak */ false,
66 							__ATOMIC_ACQUIRE,
67 							__ATOMIC_RELAXED)) {
68 
69 				/* if we just set N_LOCK, we own it */
70 				if (!(old.val & N_LOCK))
71 					return;
72 
73 				/* go block */
74 				break;
75 			}
76 		}
77 
78 		futex2_wait(lock, new.val, fflags, NULL, 0);
79 	}
80 }
81 
futex_numa_32_unlock(struct futex_numa_32 * lock)82 void futex_numa_32_unlock(struct futex_numa_32 *lock)
83 {
84 	u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE);
85 	assert((s32)val >= 0);
86 	if (val & N_WAITERS) {
87 		int woken = futex2_wake(lock, 1, fflags);
88 		assert(val == N_WAITERS);
89 		if (!woken) {
90 			__atomic_compare_exchange_n(&lock->val, &val, 0U,
91 						    false, __ATOMIC_RELAXED,
92 						    __ATOMIC_RELAXED);
93 		}
94 	}
95 }
96 
97 static long nanos = 50000;
98 
99 struct thread_args {
100 	pthread_t tid;
101 	volatile int * done;
102 	struct futex_numa_32 *lock;
103 	int val;
104 	int *val1, *val2;
105 	int node;
106 };
107 
threadfn(void * _arg)108 static void *threadfn(void *_arg)
109 {
110 	struct thread_args *args = _arg;
111 	struct timespec ts = {
112 		.tv_nsec = nanos,
113 	};
114 	int node;
115 
116 	while (!*args->done) {
117 
118 		futex_numa_32_lock(args->lock);
119 		args->val++;
120 
121 		assert(*args->val1 == *args->val2);
122 		(*args->val1)++;
123 		nanosleep(&ts, NULL);
124 		(*args->val2)++;
125 
126 		node = args->lock->node;
127 		futex_numa_32_unlock(args->lock);
128 
129 		if (node != args->node) {
130 			args->node = node;
131 			printf("node: %d\n", node);
132 		}
133 
134 		nanosleep(&ts, NULL);
135 	}
136 
137 	return NULL;
138 }
139 
contendfn(void * _arg)140 static void *contendfn(void *_arg)
141 {
142 	struct thread_args *args = _arg;
143 
144 	while (!*args->done) {
145 		/*
146 		 * futex2_wait() will take hb-lock, verify *var == val and
147 		 * queue/abort.  By knowingly setting val 'wrong' this will
148 		 * abort and thereby generate hb-lock contention.
149 		 */
150 		futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0);
151 		args->val++;
152 	}
153 
154 	return NULL;
155 }
156 
157 static volatile int done = 0;
158 static struct futex_numa_32 lock = { .val = 0, };
159 static int val1, val2;
160 
main(int argc,char * argv[])161 int main(int argc, char *argv[])
162 {
163 	struct thread_args *tas[512], *cas[512];
164 	int c, t, threads = 2, contenders = 0;
165 	int sleeps = 10;
166 	int total = 0;
167 
168 	while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) {
169 		switch (c) {
170 		case 'c':
171 			contenders = atoi(optarg);
172 			break;
173 		case 't':
174 			threads = atoi(optarg);
175 			break;
176 		case 's':
177 			sleeps = atoi(optarg);
178 			break;
179 		case 'n':
180 			nanos = atoi(optarg);
181 			break;
182 		case 'N':
183 			fflags |= FUTEX2_NUMA;
184 			if (optarg)
185 				fnode = atoi(optarg);
186 			break;
187 		default:
188 			exit(1);
189 			break;
190 		}
191 	}
192 
193 	for (t = 0; t < contenders; t++) {
194 		struct thread_args *args = calloc(1, sizeof(*args));
195 		if (!args) {
196 			perror("thread_args");
197 			exit(-1);
198 		}
199 
200 		args->done = &done;
201 		args->lock = &lock;
202 		args->val1 = &val1;
203 		args->val2 = &val2;
204 		args->node = -1;
205 
206 		if (pthread_create(&args->tid, NULL, contendfn, args)) {
207 			perror("pthread_create");
208 			exit(-1);
209 		}
210 
211 		cas[t] = args;
212 	}
213 
214 	for (t = 0; t < threads; t++) {
215 		struct thread_args *args = calloc(1, sizeof(*args));
216 		if (!args) {
217 			perror("thread_args");
218 			exit(-1);
219 		}
220 
221 		args->done = &done;
222 		args->lock = &lock;
223 		args->val1 = &val1;
224 		args->val2 = &val2;
225 		args->node = -1;
226 
227 		if (pthread_create(&args->tid, NULL, threadfn, args)) {
228 			perror("pthread_create");
229 			exit(-1);
230 		}
231 
232 		tas[t] = args;
233 	}
234 
235 	sleep(sleeps);
236 
237 	done = true;
238 
239 	for (t = 0; t < threads; t++) {
240 		struct thread_args *args = tas[t];
241 
242 		pthread_join(args->tid, NULL);
243 		total += args->val;
244 //		printf("tval: %d\n", args->val);
245 	}
246 	printf("total: %d\n", total);
247 
248 	if (contenders) {
249 		total = 0;
250 		for (t = 0; t < contenders; t++) {
251 			struct thread_args *args = cas[t];
252 
253 			pthread_join(args->tid, NULL);
254 			total += args->val;
255 			//		printf("tval: %d\n", args->val);
256 		}
257 		printf("contenders: %d\n", total);
258 	}
259 
260 	return 0;
261 }
262 
263