1 #ifndef _X86_BITOPS_H
2 #define _X86_BITOPS_H
3
4 /*
5 * Copyright 1992, Linus Torvalds.
6 */
7
8 #include <asm/alternative.h>
9 #include <asm/asm_defns.h>
10 #include <asm/cpufeatureset.h>
11
12 /*
13 * We specify the memory operand as both input and output because the memory
14 * operand is both read from and written to. Since the operand is in fact a
15 * word array, we also specify "memory" in the clobbers list to indicate that
16 * words other than the one directly addressed by the memory operand may be
17 * modified.
18 */
19
20 #define ADDR (*(volatile int *) addr)
21 #define CONST_ADDR (*(const volatile int *) addr)
22
23 /**
24 * set_bit - Atomically set a bit in memory
25 * @nr: the bit to set
26 * @addr: the address to start counting from
27 *
28 * This function is atomic and may not be reordered. See __set_bit()
29 * if you do not require the atomic guarantees.
30 * Note that @nr may be almost arbitrarily large; this function is not
31 * restricted to acting on a single-word quantity.
32 */
set_bit(int nr,volatile void * addr)33 static inline void set_bit(int nr, volatile void *addr)
34 {
35 asm volatile ( "lock btsl %1,%0"
36 : "+m" (ADDR) : "Ir" (nr) : "memory");
37 }
38 #define set_bit(nr, addr) ({ \
39 if ( bitop_bad_size(addr) ) __bitop_bad_size(); \
40 set_bit(nr, addr); \
41 })
42
43 /**
44 * __set_bit - Set a bit in memory
45 * @nr: the bit to set
46 * @addr: the address to start counting from
47 *
48 * Unlike set_bit(), this function is non-atomic and may be reordered.
49 * If it's called on the same region of memory simultaneously, the effect
50 * may be that only one operation succeeds.
51 */
variable_set_bit(int nr,void * addr)52 static inline void variable_set_bit(int nr, void *addr)
53 {
54 asm volatile ( "btsl %1,%0" : "+m" (*(int *)addr) : "Ir" (nr) : "memory" );
55 }
constant_set_bit(int nr,void * addr)56 static inline void constant_set_bit(int nr, void *addr)
57 {
58 ((unsigned int *)addr)[nr >> 5] |= (1u << (nr & 31));
59 }
60 #define __set_bit(nr, addr) ({ \
61 if ( bitop_bad_size(addr) ) __bitop_bad_size(); \
62 __builtin_constant_p(nr) ? \
63 constant_set_bit(nr, addr) : \
64 variable_set_bit(nr, addr); \
65 })
66
67 /**
68 * clear_bit - Clears a bit in memory
69 * @nr: Bit to clear
70 * @addr: Address to start counting from
71 *
72 * clear_bit() is atomic and may not be reordered.
73 */
clear_bit(int nr,volatile void * addr)74 static inline void clear_bit(int nr, volatile void *addr)
75 {
76 asm volatile ( "lock btrl %1,%0"
77 : "+m" (ADDR) : "Ir" (nr) : "memory");
78 }
79 #define clear_bit(nr, addr) ({ \
80 if ( bitop_bad_size(addr) ) __bitop_bad_size(); \
81 clear_bit(nr, addr); \
82 })
83
84 /**
85 * __clear_bit - Clears a bit in memory
86 * @nr: Bit to clear
87 * @addr: Address to start counting from
88 *
89 * Unlike clear_bit(), this function is non-atomic and may be reordered.
90 * If it's called on the same region of memory simultaneously, the effect
91 * may be that only one operation succeeds.
92 */
variable_clear_bit(int nr,void * addr)93 static inline void variable_clear_bit(int nr, void *addr)
94 {
95 asm volatile ( "btrl %1,%0" : "+m" (*(int *)addr) : "Ir" (nr) : "memory" );
96 }
constant_clear_bit(int nr,void * addr)97 static inline void constant_clear_bit(int nr, void *addr)
98 {
99 ((unsigned int *)addr)[nr >> 5] &= ~(1u << (nr & 31));
100 }
101 #define __clear_bit(nr, addr) ({ \
102 if ( bitop_bad_size(addr) ) __bitop_bad_size(); \
103 __builtin_constant_p(nr) ? \
104 constant_clear_bit(nr, addr) : \
105 variable_clear_bit(nr, addr); \
106 })
107
108 /**
109 * __change_bit - Toggle a bit in memory
110 * @nr: the bit to set
111 * @addr: the address to start counting from
112 *
113 * Unlike change_bit(), this function is non-atomic and may be reordered.
114 * If it's called on the same region of memory simultaneously, the effect
115 * may be that only one operation succeeds.
116 */
variable_change_bit(int nr,void * addr)117 static inline void variable_change_bit(int nr, void *addr)
118 {
119 asm volatile ( "btcl %1,%0" : "+m" (*(int *)addr) : "Ir" (nr) : "memory" );
120 }
constant_change_bit(int nr,void * addr)121 static inline void constant_change_bit(int nr, void *addr)
122 {
123 ((unsigned int *)addr)[nr >> 5] ^= (1u << (nr & 31));
124 }
125 #define __change_bit(nr, addr) ({ \
126 if ( bitop_bad_size(addr) ) __bitop_bad_size(); \
127 __builtin_constant_p(nr) ? \
128 constant_change_bit(nr, addr) : \
129 variable_change_bit(nr, addr); \
130 })
131
132 /**
133 * change_bit - Toggle a bit in memory
134 * @nr: Bit to clear
135 * @addr: Address to start counting from
136 *
137 * change_bit() is atomic and may not be reordered.
138 * Note that @nr may be almost arbitrarily large; this function is not
139 * restricted to acting on a single-word quantity.
140 */
change_bit(int nr,volatile void * addr)141 static inline void change_bit(int nr, volatile void *addr)
142 {
143 asm volatile ( "lock btcl %1,%0"
144 : "+m" (ADDR) : "Ir" (nr) : "memory");
145 }
146 #define change_bit(nr, addr) ({ \
147 if ( bitop_bad_size(addr) ) __bitop_bad_size(); \
148 change_bit(nr, addr); \
149 })
150
151 /**
152 * test_and_set_bit - Set a bit and return its old value
153 * @nr: Bit to set
154 * @addr: Address to count from
155 *
156 * This operation is atomic and cannot be reordered.
157 * It also implies a memory barrier.
158 */
test_and_set_bit(int nr,volatile void * addr)159 static inline int test_and_set_bit(int nr, volatile void *addr)
160 {
161 int oldbit;
162
163 asm volatile ( "lock btsl %[nr], %[addr]\n\t"
164 ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
165 : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
166 [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
167
168 return oldbit;
169 }
170 #define test_and_set_bit(nr, addr) ({ \
171 if ( bitop_bad_size(addr) ) __bitop_bad_size(); \
172 test_and_set_bit(nr, addr); \
173 })
174
175 /**
176 * arch__test_and_set_bit - Set a bit and return its old value
177 * @nr: Bit to set
178 * @addr: Address to count from
179 *
180 * This operation is non-atomic and can be reordered.
181 * If two examples of this operation race, one can appear to succeed
182 * but actually fail. You must protect multiple accesses with a lock.
183 */
arch__test_and_set_bit(int nr,volatile void * addr)184 static inline int arch__test_and_set_bit(int nr, volatile void *addr)
185 {
186 int oldbit;
187
188 asm volatile ( "btsl %[nr], %[addr]\n\t"
189 ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
190 : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
191 [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
192
193 return oldbit;
194 }
195 #define arch__test_and_set_bit arch__test_and_set_bit
196
197 /**
198 * test_and_clear_bit - Clear a bit and return its old value
199 * @nr: Bit to set
200 * @addr: Address to count from
201 *
202 * This operation is atomic and cannot be reordered.
203 * It also implies a memory barrier.
204 */
test_and_clear_bit(int nr,volatile void * addr)205 static inline int test_and_clear_bit(int nr, volatile void *addr)
206 {
207 int oldbit;
208
209 asm volatile ( "lock btrl %[nr], %[addr]\n\t"
210 ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
211 : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
212 [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
213
214 return oldbit;
215 }
216 #define test_and_clear_bit(nr, addr) ({ \
217 if ( bitop_bad_size(addr) ) __bitop_bad_size(); \
218 test_and_clear_bit(nr, addr); \
219 })
220
221 /**
222 * arch__test_and_clear_bit - Clear a bit and return its old value
223 * @nr: Bit to set
224 * @addr: Address to count from
225 *
226 * This operation is non-atomic and can be reordered.
227 * If two examples of this operation race, one can appear to succeed
228 * but actually fail. You must protect multiple accesses with a lock.
229 */
arch__test_and_clear_bit(int nr,volatile void * addr)230 static inline int arch__test_and_clear_bit(int nr, volatile void *addr)
231 {
232 int oldbit;
233
234 asm volatile ( "btrl %[nr], %[addr]\n\t"
235 ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
236 : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
237 [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
238
239 return oldbit;
240 }
241 #define arch__test_and_clear_bit arch__test_and_clear_bit
242
243 /* WARNING: non atomic and it can be reordered! */
arch__test_and_change_bit(int nr,volatile void * addr)244 static inline int arch__test_and_change_bit(int nr, volatile void *addr)
245 {
246 int oldbit;
247
248 asm volatile ( "btcl %[nr], %[addr]\n\t"
249 ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
250 : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
251 [addr] "+m" (*(int *)addr) : [nr] "Ir" (nr) : "memory" );
252
253 return oldbit;
254 }
255 #define arch__test_and_change_bit arch__test_and_change_bit
256
257 /**
258 * test_and_change_bit - Change a bit and return its new value
259 * @nr: Bit to set
260 * @addr: Address to count from
261 *
262 * This operation is atomic and cannot be reordered.
263 * It also implies a memory barrier.
264 */
test_and_change_bit(int nr,volatile void * addr)265 static inline int test_and_change_bit(int nr, volatile void *addr)
266 {
267 int oldbit;
268
269 asm volatile ( "lock btcl %[nr], %[addr]\n\t"
270 ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
271 : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit),
272 [addr] "+m" (ADDR) : [nr] "Ir" (nr) : "memory" );
273
274 return oldbit;
275 }
276 #define test_and_change_bit(nr, addr) ({ \
277 if ( bitop_bad_size(addr) ) __bitop_bad_size(); \
278 test_and_change_bit(nr, addr); \
279 })
280
variable_test_bit(int nr,const volatile void * addr)281 static inline int variable_test_bit(int nr, const volatile void *addr)
282 {
283 int oldbit;
284
285 asm volatile ( "btl %[nr], %[addr]\n\t"
286 ASM_FLAG_OUT(, "sbbl %[old], %[old]\n\t")
287 : [old] ASM_FLAG_OUT("=@ccc", "=r") (oldbit)
288 : [addr] "m" (CONST_ADDR), [nr] "Ir" (nr) : "memory" );
289
290 return oldbit;
291 }
292
293 #define arch_test_bit(nr, addr) ({ \
294 __builtin_constant_p(nr) ? \
295 generic_test_bit(nr, addr) : \
296 variable_test_bit(nr, addr); \
297 })
298
299 extern unsigned int __find_first_bit(
300 const unsigned long *addr, unsigned int size);
301 extern unsigned int __find_next_bit(
302 const unsigned long *addr, unsigned int size, unsigned int offset);
303 extern unsigned int __find_first_zero_bit(
304 const unsigned long *addr, unsigned int size);
305 extern unsigned int __find_next_zero_bit(
306 const unsigned long *addr, unsigned int size, unsigned int offset);
307
__scanbit(unsigned long val,unsigned int max)308 static always_inline unsigned int __scanbit(unsigned long val, unsigned int max)
309 {
310 if ( __builtin_constant_p(max) && max == BITS_PER_LONG )
311 alternative_io("bsf %[in],%[out]; cmovz %[max],%k[out]",
312 "rep; bsf %[in],%[out]",
313 X86_FEATURE_BMI1,
314 [out] "=&r" (val),
315 [in] "r" (val), [max] "r" (max));
316 else
317 asm ( "bsf %1,%0 ; cmovz %2,%k0"
318 : "=&r" (val) : "r" (val), "r" (max) );
319 return (unsigned int)val;
320 }
321
322 /**
323 * find_first_bit - find the first set bit in a memory region
324 * @addr: The address to start the search at
325 * @size: The maximum size to search
326 *
327 * Returns the bit-number of the first set bit, not the number of the byte
328 * containing a bit.
329 */
330 #define find_first_bit(addr, size) find_next_bit(addr, size, 0)
331
332 /**
333 * find_next_bit - find the first set bit in a memory region
334 * @addr: The address to base the search on
335 * @offset: The bitnumber to start searching at
336 * @size: The maximum size to search
337 */
338 #define find_next_bit(addr, size, off) ({ \
339 unsigned int r__; \
340 const unsigned long *a__ = (addr); \
341 unsigned int s__ = (size); \
342 unsigned int o__ = (off); \
343 if ( o__ >= s__ ) \
344 r__ = s__; \
345 else if ( __builtin_constant_p(size) && s__ <= BITS_PER_LONG ) \
346 r__ = o__ + __scanbit(*(const unsigned long *)(a__) >> o__, s__); \
347 else if ( __builtin_constant_p(off) && !o__ ) \
348 r__ = __find_first_bit(a__, s__); \
349 else \
350 r__ = __find_next_bit(a__, s__, o__); \
351 r__; \
352 })
353
354 /**
355 * find_first_zero_bit - find the first zero bit in a memory region
356 * @addr: The address to start the search at
357 * @size: The maximum size to search
358 *
359 * Returns the bit-number of the first zero bit, not the number of the byte
360 * containing a bit.
361 */
362 #define find_first_zero_bit(addr, size) find_next_zero_bit(addr, size, 0)
363
364 /**
365 * find_next_zero_bit - find the first zero bit in a memory region
366 * @addr: The address to base the search on
367 * @offset: The bitnumber to start searching at
368 * @size: The maximum size to search
369 */
370 #define find_next_zero_bit(addr, size, off) ({ \
371 unsigned int r__; \
372 const unsigned long *a__ = (addr); \
373 unsigned int s__ = (size); \
374 unsigned int o__ = (off); \
375 if ( o__ >= s__ ) \
376 r__ = s__; \
377 else if ( __builtin_constant_p(size) && s__ <= BITS_PER_LONG ) \
378 r__ = o__ + __scanbit(~*(const unsigned long *)(a__) >> o__, s__); \
379 else if ( __builtin_constant_p(off) && !o__ ) \
380 r__ = __find_first_zero_bit(a__, s__); \
381 else \
382 r__ = __find_next_zero_bit(a__, s__, o__); \
383 r__; \
384 })
385
arch_ffs(unsigned int x)386 static always_inline unsigned int arch_ffs(unsigned int x)
387 {
388 unsigned int r;
389
390 if ( __builtin_constant_p(x > 0) && x > 0 )
391 {
392 /*
393 * A common code pattern is:
394 *
395 * while ( bits )
396 * {
397 * bit = ffs(bits);
398 * ...
399 *
400 * and the optimiser really can work with the knowledge of x being
401 * non-zero without knowing it's exact value, in which case we don't
402 * need to compensate for BSF's corner cases. Otherwise...
403 */
404 asm ( "bsf %[val], %[res]"
405 : [res] "=r" (r)
406 : [val] "rm" (x) );
407 }
408 else
409 {
410 /*
411 * ... the AMD manual states that BSF won't modify the destination
412 * register if x=0. The Intel manual states that the result is
413 * undefined, but the architects have said that the register is
414 * written back with it's old value (zero extended as normal).
415 */
416 asm ( "bsf %[val], %[res]"
417 : [res] "=r" (r)
418 : [val] "rm" (x), "[res]" (-1) );
419 }
420
421 return r + 1;
422 }
423 #define arch_ffs arch_ffs
424
arch_ffsl(unsigned long x)425 static always_inline unsigned int arch_ffsl(unsigned long x)
426 {
427 unsigned int r;
428
429 /* See arch_ffs() for safety discussions. */
430 if ( __builtin_constant_p(x > 0) && x > 0 )
431 asm ( "bsf %[val], %q[res]"
432 : [res] "=r" (r)
433 : [val] "rm" (x) );
434 else
435 asm ( "bsf %[val], %q[res]"
436 : [res] "=r" (r)
437 : [val] "rm" (x), "[res]" (-1) );
438
439 return r + 1;
440 }
441 #define arch_ffsl arch_ffsl
442
arch_fls(unsigned int x)443 static always_inline unsigned int arch_fls(unsigned int x)
444 {
445 unsigned int r;
446
447 /* See arch_ffs() for safety discussions. */
448 if ( __builtin_constant_p(x > 0) && x > 0 )
449 asm ( "bsr %[val], %[res]"
450 : [res] "=r" (r)
451 : [val] "rm" (x) );
452 else
453 asm ( "bsr %[val], %[res]"
454 : [res] "=r" (r)
455 : [val] "rm" (x), "[res]" (-1) );
456
457 return r + 1;
458 }
459 #define arch_fls arch_fls
460
arch_flsl(unsigned long x)461 static always_inline unsigned int arch_flsl(unsigned long x)
462 {
463 unsigned int r;
464
465 /* See arch_ffs() for safety discussions. */
466 if ( __builtin_constant_p(x > 0) && x > 0 )
467 asm ( "bsr %[val], %q[res]"
468 : [res] "=r" (r)
469 : [val] "rm" (x) );
470 else
471 asm ( "bsr %[val], %q[res]"
472 : [res] "=r" (r)
473 : [val] "rm" (x), "[res]" (-1) );
474
475 return r + 1;
476 }
477 #define arch_flsl arch_flsl
478
479 unsigned int arch_generic_hweightl(unsigned long x);
480
arch_hweightl(unsigned long x)481 static always_inline unsigned int arch_hweightl(unsigned long x)
482 {
483 unsigned int r;
484
485 /*
486 * arch_generic_hweightl() is written in ASM in order to preserve all
487 * registers, as the compiler can't see the call.
488 *
489 * This limits the POPCNT instruction to using the same ABI as a function
490 * call (input in %rdi, output in %eax) but that's fine.
491 *
492 * On Intel CPUs prior to Cannon Lake, the POPCNT instruction has a false
493 * input dependency on it's destination register (errata HSD146, SKL029
494 * amongst others), impacting loops such as bitmap_weight(). Insert an
495 * XOR to manually break the dependency.
496 */
497 alternative_io("call arch_generic_hweightl",
498 "xor %k[res], %k[res]\n\t"
499 "popcnt %[val], %q[res]", X86_FEATURE_POPCNT,
500 ASM_OUTPUT2([res] "=&a" (r) ASM_CALL_CONSTRAINT),
501 [val] "D" (x));
502
503 return r;
504 }
505 #define arch_hweightl arch_hweightl
506
507 #endif /* _X86_BITOPS_H */
508