1/* 2 * Copyright (C) 2013 ARM Ltd. 3 * Copyright (C) 2013 Linaro. 4 * 5 * This code is based on glibc cortex strings work originally authored by Linaro 6 * and re-licensed under GPLv2 for the Linux kernel. The original code can 7 * be found @ 8 * 9 * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 10 * files/head:/src/aarch64/ 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License version 2 as 14 * published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program. If not, see <http://www.gnu.org/licenses/>. 23 */ 24 25#include "assembler.h" 26 27/* 28 * compare two strings 29 * 30 * Parameters: 31 * x0 - const string 1 pointer 32 * x1 - const string 2 pointer 33 * Returns: 34 * x0 - an integer less than, equal to, or greater than zero 35 * if s1 is found, respectively, to be less than, to match, 36 * or be greater than s2. 37 */ 38 39#define REP8_01 0x0101010101010101 40#define REP8_7f 0x7f7f7f7f7f7f7f7f 41#define REP8_80 0x8080808080808080 42 43/* Parameters and result. */ 44src1 .req x0 45src2 .req x1 46result .req x0 47 48/* Internal variables. */ 49data1 .req x2 50data1w .req w2 51data2 .req x3 52data2w .req w3 53has_nul .req x4 54diff .req x5 55syndrome .req x6 56tmp1 .req x7 57tmp2 .req x8 58tmp3 .req x9 59zeroones .req x10 60pos .req x11 61 62ENTRY(strcmp) 63 eor tmp1, src1, src2 64 mov zeroones, #REP8_01 65 tst tmp1, #7 66 b.ne .Lmisaligned8 67 ands tmp1, src1, #7 68 b.ne .Lmutual_align 69 70 /* 71 * NUL detection works on the principle that (X - 1) & (~X) & 0x80 72 * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 73 * can be done in parallel across the entire word. 74 */ 75.Lloop_aligned: 76 ldr data1, [src1], #8 77 ldr data2, [src2], #8 78.Lstart_realigned: 79 sub tmp1, data1, zeroones 80 orr tmp2, data1, #REP8_7f 81 eor diff, data1, data2 /* Non-zero if differences found. */ 82 bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 83 orr syndrome, diff, has_nul 84 cbz syndrome, .Lloop_aligned 85 b .Lcal_cmpresult 86 87.Lmutual_align: 88 /* 89 * Sources are mutually aligned, but are not currently at an 90 * alignment boundary. Round down the addresses and then mask off 91 * the bytes that preceed the start point. 92 */ 93 bic src1, src1, #7 94 bic src2, src2, #7 95 lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ 96 ldr data1, [src1], #8 97 neg tmp1, tmp1 /* Bits to alignment -64. */ 98 ldr data2, [src2], #8 99 mov tmp2, #~0 100 /* Big-endian. Early bytes are at MSB. */ 101CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ 102 /* Little-endian. Early bytes are at LSB. */ 103CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ 104 105 orr data1, data1, tmp2 106 orr data2, data2, tmp2 107 b .Lstart_realigned 108 109.Lmisaligned8: 110 /* 111 * Get the align offset length to compare per byte first. 112 * After this process, one string's address will be aligned. 113 */ 114 and tmp1, src1, #7 115 neg tmp1, tmp1 116 add tmp1, tmp1, #8 117 and tmp2, src2, #7 118 neg tmp2, tmp2 119 add tmp2, tmp2, #8 120 subs tmp3, tmp1, tmp2 121 csel pos, tmp1, tmp2, hi /*Choose the maximum. */ 122.Ltinycmp: 123 ldrb data1w, [src1], #1 124 ldrb data2w, [src2], #1 125 subs pos, pos, #1 126 ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */ 127 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 128 b.eq .Ltinycmp 129 cbnz pos, 1f /*find the null or unequal...*/ 130 cmp data1w, #1 131 ccmp data1w, data2w, #0, cs 132 b.eq .Lstart_align /*the last bytes are equal....*/ 1331: 134 sub result, data1, data2 135 ret 136 137.Lstart_align: 138 ands xzr, src1, #7 139 b.eq .Lrecal_offset 140 /*process more leading bytes to make str1 aligned...*/ 141 add src1, src1, tmp3 142 add src2, src2, tmp3 143 /*load 8 bytes from aligned str1 and non-aligned str2..*/ 144 ldr data1, [src1], #8 145 ldr data2, [src2], #8 146 147 sub tmp1, data1, zeroones 148 orr tmp2, data1, #REP8_7f 149 bic has_nul, tmp1, tmp2 150 eor diff, data1, data2 /* Non-zero if differences found. */ 151 orr syndrome, diff, has_nul 152 cbnz syndrome, .Lcal_cmpresult 153 /*How far is the current str2 from the alignment boundary...*/ 154 and tmp3, tmp3, #7 155.Lrecal_offset: 156 neg pos, tmp3 157.Lloopcmp_proc: 158 /* 159 * Divide the eight bytes into two parts. First,backwards the src2 160 * to an alignment boundary,load eight bytes from the SRC2 alignment 161 * boundary,then compare with the relative bytes from SRC1. 162 * If all 8 bytes are equal,then start the second part's comparison. 163 * Otherwise finish the comparison. 164 * This special handle can garantee all the accesses are in the 165 * thread/task space in avoid to overrange access. 166 */ 167 ldr data1, [src1,pos] 168 ldr data2, [src2,pos] 169 sub tmp1, data1, zeroones 170 orr tmp2, data1, #REP8_7f 171 bic has_nul, tmp1, tmp2 172 eor diff, data1, data2 /* Non-zero if differences found. */ 173 orr syndrome, diff, has_nul 174 cbnz syndrome, .Lcal_cmpresult 175 176 /*The second part process*/ 177 ldr data1, [src1], #8 178 ldr data2, [src2], #8 179 sub tmp1, data1, zeroones 180 orr tmp2, data1, #REP8_7f 181 bic has_nul, tmp1, tmp2 182 eor diff, data1, data2 /* Non-zero if differences found. */ 183 orr syndrome, diff, has_nul 184 cbz syndrome, .Lloopcmp_proc 185 186.Lcal_cmpresult: 187 /* 188 * reversed the byte-order as big-endian,then CLZ can find the most 189 * significant zero bits. 190 */ 191CPU_LE( rev syndrome, syndrome ) 192CPU_LE( rev data1, data1 ) 193CPU_LE( rev data2, data2 ) 194 195 /* 196 * For big-endian we cannot use the trick with the syndrome value 197 * as carry-propagation can corrupt the upper bits if the trailing 198 * bytes in the string contain 0x01. 199 * However, if there is no NUL byte in the dword, we can generate 200 * the result directly. We ca not just subtract the bytes as the 201 * MSB might be significant. 202 */ 203CPU_BE( cbnz has_nul, 1f ) 204CPU_BE( cmp data1, data2 ) 205CPU_BE( cset result, ne ) 206CPU_BE( cneg result, result, lo ) 207CPU_BE( ret ) 208CPU_BE( 1: ) 209 /*Re-compute the NUL-byte detection, using a byte-reversed value. */ 210CPU_BE( rev tmp3, data1 ) 211CPU_BE( sub tmp1, tmp3, zeroones ) 212CPU_BE( orr tmp2, tmp3, #REP8_7f ) 213CPU_BE( bic has_nul, tmp1, tmp2 ) 214CPU_BE( rev has_nul, has_nul ) 215CPU_BE( orr syndrome, diff, has_nul ) 216 217 clz pos, syndrome 218 /* 219 * The MS-non-zero bit of the syndrome marks either the first bit 220 * that is different, or the top bit of the first zero byte. 221 * Shifting left now will bring the critical information into the 222 * top bits. 223 */ 224 lsl data1, data1, pos 225 lsl data2, data2, pos 226 /* 227 * But we need to zero-extend (char is unsigned) the value and then 228 * perform a signed 32-bit subtraction. 229 */ 230 lsr data1, data1, #56 231 sub result, data1, data2, lsr #56 232 ret 233ENDPROC(strcmp) 234