1/* SPDX-License-Identifier: MIT */ 2/* 3 * memset - fill memory with a constant byte 4 * 5 * Copyright (c) 2012-2021, Arm Limited. 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. 11 * 12 */ 13 14#include <asm/macro.h> 15#include "asmdefs.h" 16 17#define dstin x0 18#define val x1 19#define valw w1 20#define count x2 21#define dst x3 22#define dstend x4 23#define zva_val x5 24 25ENTRY (memset) 26 PTR_ARG (0) 27 SIZE_ARG (2) 28 29 /* 30 * The optimized memset uses the dc opcode, which causes problems 31 * when the cache is disabled. Let's check if the cache is disabled 32 * and use a very simple memset implementation in this case. Otherwise 33 * jump to the optimized version. 34 */ 35 switch_el x6, 3f, 2f, 1f 363: mrs x6, sctlr_el3 37 b 0f 382: mrs x6, sctlr_el2 39 b 0f 401: mrs x6, sctlr_el1 410: 42 tst x6, #CR_C 43 bne 9f 44 45 /* 46 * A very "simple" memset implementation without the use of the 47 * dc opcode. Can be run with caches disabled. 48 */ 49 mov x3, #0x0 50 cmp count, x3 /* check for zero length */ 51 beq 8f 524: strb valw, [dstin, x3] 53 add x3, x3, #0x1 54 cmp count, x3 55 bne 4b 568: ret 579: 58 59 /* Here the optimized memset version starts */ 60 dup v0.16B, valw 61 add dstend, dstin, count 62 63 cmp count, 96 64 b.hi L(set_long) 65 cmp count, 16 66 b.hs L(set_medium) 67 mov val, v0.D[0] 68 69 /* Set 0..15 bytes. */ 70 tbz count, 3, 1f 71 str val, [dstin] 72 str val, [dstend, -8] 73 ret 74 .p2align 4 751: tbz count, 2, 2f 76 str valw, [dstin] 77 str valw, [dstend, -4] 78 ret 792: cbz count, 3f 80 strb valw, [dstin] 81 tbz count, 1, 3f 82 strh valw, [dstend, -2] 833: ret 84 85 /* Set 17..96 bytes. */ 86L(set_medium): 87 str q0, [dstin] 88 tbnz count, 6, L(set96) 89 str q0, [dstend, -16] 90 tbz count, 5, 1f 91 str q0, [dstin, 16] 92 str q0, [dstend, -32] 931: ret 94 95 .p2align 4 96 /* Set 64..96 bytes. Write 64 bytes from the start and 97 32 bytes from the end. */ 98L(set96): 99 str q0, [dstin, 16] 100 stp q0, q0, [dstin, 32] 101 stp q0, q0, [dstend, -32] 102 ret 103 104 .p2align 4 105L(set_long): 106 and valw, valw, 255 107 bic dst, dstin, 15 108 str q0, [dstin] 109 cmp count, 160 110 ccmp valw, 0, 0, hs 111 b.ne L(no_zva) 112 113#ifndef SKIP_ZVA_CHECK 114 mrs zva_val, dczid_el0 115 and zva_val, zva_val, 31 116 cmp zva_val, 4 /* ZVA size is 64 bytes. */ 117 b.ne L(no_zva) 118#endif 119 str q0, [dst, 16] 120 stp q0, q0, [dst, 32] 121 bic dst, dst, 63 122 sub count, dstend, dst /* Count is now 64 too large. */ 123 sub count, count, 128 /* Adjust count and bias for loop. */ 124 125 .p2align 4 126L(zva_loop): 127 add dst, dst, 64 128 dc zva, dst 129 subs count, count, 64 130 b.hi L(zva_loop) 131 stp q0, q0, [dstend, -64] 132 stp q0, q0, [dstend, -32] 133 ret 134 135L(no_zva): 136 sub count, dstend, dst /* Count is 16 too large. */ 137 sub dst, dst, 16 /* Dst is biased by -32. */ 138 sub count, count, 64 + 16 /* Adjust count and bias for loop. */ 139L(no_zva_loop): 140 stp q0, q0, [dst, 32] 141 stp q0, q0, [dst, 64]! 142 subs count, count, 64 143 b.hi L(no_zva_loop) 144 stp q0, q0, [dstend, -64] 145 stp q0, q0, [dstend, -32] 146 ret 147 148END (memset) 149