1/* Optimized strcmp for Xtensa. 2 Copyright (C) 2001, 2007 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <http://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20#include <bits/xtensa-config.h> 21#include <features.h> 22 23#ifdef __XTENSA_EB__ 24#define MASK0 0xff000000 25#define MASK1 0x00ff0000 26#define MASK2 0x0000ff00 27#define MASK3 0x000000ff 28#else 29#define MASK0 0x000000ff 30#define MASK1 0x0000ff00 31#define MASK2 0x00ff0000 32#define MASK3 0xff000000 33#endif 34 35#define MASK4 0x40404040 36 37 .text 38 .align 4 39 .literal_position 40ENTRY (strcmp) 41 /* a2 = s1, a3 = s2 */ 42 43 l8ui a8, a2, 0 /* byte 0 from s1 */ 44 l8ui a9, a3, 0 /* byte 0 from s2 */ 45 movi a10, 3 /* mask */ 46 bne a8, a9, .Lretdiff 47 48 or a11, a2, a3 49 bnone a11, a10, .Laligned 50 51 xor a11, a2, a3 /* compare low two bits of s1 and s2 */ 52 bany a11, a10, .Lunaligned /* if they have different alignment */ 53 54 /* s1/s2 are not word-aligned. */ 55 addi a2, a2, 1 /* advance s1 */ 56 beqz a8, .Leq /* bytes equal, if zero, strings are equal */ 57 addi a3, a3, 1 /* advance s2 */ 58 bnone a2, a10, .Laligned /* if s1/s2 now aligned */ 59 l8ui a8, a2, 0 /* byte 1 from s1 */ 60 l8ui a9, a3, 0 /* byte 1 from s2 */ 61 addi a2, a2, 1 /* advance s1 */ 62 bne a8, a9, .Lretdiff /* if different, return difference */ 63 beqz a8, .Leq /* bytes equal, if zero, strings are equal */ 64 addi a3, a3, 1 /* advance s2 */ 65 bnone a2, a10, .Laligned /* if s1/s2 now aligned */ 66 l8ui a8, a2, 0 /* byte 2 from s1 */ 67 l8ui a9, a3, 0 /* byte 2 from s2 */ 68 addi a2, a2, 1 /* advance s1 */ 69 bne a8, a9, .Lretdiff /* if different, return difference */ 70 beqz a8, .Leq /* bytes equal, if zero, strings are equal */ 71 addi a3, a3, 1 /* advance s2 */ 72 j .Laligned 73 74/* s1 and s2 have different alignment. 75 76 If the zero-overhead loop option is available, use an (almost) 77 infinite zero-overhead loop with conditional exits so we only pay 78 for taken branches when exiting the loop. 79 80 Note: It is important for this unaligned case to come before the 81 code for aligned strings, because otherwise some of the branches 82 above cannot reach and have to be transformed to branches around 83 jumps. The unaligned code is smaller and the branches can reach 84 over it. */ 85 86 .align 4 87 /* (2 mod 4) alignment for loop instruction */ 88.Lunaligned: 89#if XCHAL_HAVE_LOOPS 90 movi a11, 0 /* set up for the maximum loop count */ 91 loop a11, .Lretdiff /* loop forever (almost anyway) */ 92#endif 93.Lnextbyte: 94 l8ui a8, a2, 0 95 l8ui a9, a3, 0 96 addi a2, a2, 1 97 bne a8, a9, .Lretdiff 98 addi a3, a3, 1 99#if XCHAL_HAVE_LOOPS 100 beqz a8, .Lretdiff 101#else 102 bnez a8, .Lnextbyte 103#endif 104.Lretdiff: 105 sub a2, a8, a9 106 abi_ret 107 108/* s1 is word-aligned; s2 is word-aligned. 109 110 If the zero-overhead loop option is available, use an (almost) 111 infinite zero-overhead loop with conditional exits so we only pay 112 for taken branches when exiting the loop. */ 113 114/* New algorithm, relying on the fact that all normal ASCII is between 115 32 and 127. 116 117 Rather than check all bytes for zero: 118 Take one word (4 bytes). Call it w1. 119 Shift w1 left by one into w1'. 120 Or w1 and w1'. For all normal ASCII bit 6 will be 1; for zero it won't. 121 Check that all 4 bit 6's (one for each byte) are one: 122 If they are, we are definitely not done. 123 If they are not, we are probably done, but need to check for zero. */ 124 125 .align 4 126#if XCHAL_HAVE_LOOPS 127.Laligned: 128 movi a11, 0 129 movi a4, MASK0 /* mask for byte 0 */ 130 movi a7, MASK4 131 loop a11, .Laligned_done /* Loop forever. */ 132 133 /* First unrolled loop body. */ 134 l32i a8, a2, 0 /* get word from s1 */ 135 l32i a9, a3, 0 /* get word from s2 */ 136 slli a5, a8, 1 137 bne a8, a9, .Lwne2 138 or a9, a8, a5 139 bnall a9, a7, .Lprobeq 140 141 /* Second unrolled loop body. */ 142 l32i a8, a2, 4 /* get word from s1+4 */ 143 l32i a9, a3, 4 /* get word from s2+4 */ 144 slli a5, a8, 1 145 bne a8, a9, .Lwne2 146 or a9, a8, a5 147 bnall a9, a7, .Lprobeq2 148 149 addi a2, a2, 8 /* advance s1 pointer */ 150 addi a3, a3, 8 /* advance s2 pointer */ 151.Laligned_done: 152.Lprobeq2: 153 /* Adjust pointers to account for the loop unrolling. */ 154 addi a2, a2, 4 155 addi a3, a3, 4 156 157#else /* !XCHAL_HAVE_LOOPS */ 158 159.Laligned: 160 movi a4, MASK0 /* mask for byte 0 */ 161 movi a7, MASK4 162 j .Lfirstword 163.Lnextword: 164 addi a2, a2, 4 /* advance s1 pointer */ 165 addi a3, a3, 4 /* advance s2 pointer */ 166.Lfirstword: 167 l32i a8, a2, 0 /* get word from s1 */ 168 l32i a9, a3, 0 /* get word from s2 */ 169 slli a5, a8, 1 170 bne a8, a9, .Lwne2 171 or a9, a8, a5 172 ball a9, a7, .Lnextword 173#endif /* !XCHAL_HAVE_LOOPS */ 174 175 /* align (0 mod 4) */ 176.Lprobeq: 177 /* Words are probably equal, but check for sure. 178 If not, loop over the rest of string using normal algorithm. */ 179 180 bnone a8, a4, .Leq /* if byte 0 is zero */ 181 movi a5, MASK1 /* mask for byte 1 */ 182 movi a6, MASK2 /* mask for byte 2 */ 183 bnone a8, a5, .Leq /* if byte 1 is zero */ 184 movi a7, MASK3 /* mask for byte 3 */ 185 bnone a8, a6, .Leq /* if byte 2 is zero */ 186 bnone a8, a7, .Leq /* if byte 3 is zero */ 187 addi.n a2, a2, 4 /* advance s1 pointer */ 188 addi.n a3, a3, 4 /* advance s2 pointer */ 189#if XCHAL_HAVE_LOOPS 190 191 /* align (1 mod 4) */ 192 loop a11, .Leq /* loop forever */ 193 194 l32i a8, a2, 0 /* get word from s1 */ 195 l32i a9, a3, 0 /* get word from s2 */ 196 addi a2, a2, 4 /* advance s1 pointer */ 197 bne a8, a9, .Lwne 198 bnone a8, a4, .Leq /* if byte 0 is zero */ 199 bnone a8, a5, .Leq /* if byte 1 is zero */ 200 bnone a8, a6, .Leq /* if byte 2 is zero */ 201 bnone a8, a7, .Leq /* if byte 3 is zero */ 202 addi a3, a3, 4 /* advance s2 pointer */ 203 204#else /* !XCHAL_HAVE_LOOPS */ 205 206 j .Lfirstword2 207.Lnextword2: 208 addi a3, a3, 4 /* advance s2 pointer */ 209.Lfirstword2: 210 l32i a8, a2, 0 /* get word from s1 */ 211 l32i a9, a3, 0 /* get word from s2 */ 212 addi a2, a2, 4 /* advance s1 pointer */ 213 bne a8, a9, .Lwne 214 bnone a8, a4, .Leq /* if byte 0 is zero */ 215 bnone a8, a5, .Leq /* if byte 1 is zero */ 216 bnone a8, a6, .Leq /* if byte 2 is zero */ 217 bany a8, a7, .Lnextword2 /* if byte 3 is zero */ 218#endif /* !XCHAL_HAVE_LOOPS */ 219 220 /* Words are equal; some byte is zero. */ 221.Leq: movi a2, 0 /* return equal */ 222 abi_ret 223 224.Lwne2: /* Words are not equal. On big-endian processors, if none of the 225 bytes are zero, the return value can be determined by a simple 226 comparison. */ 227#ifdef __XTENSA_EB__ 228 or a10, a8, a5 229 bnall a10, a7, .Lsomezero 230 bgeu a8, a9, .Lposreturn 231 movi a2, -1 232 abi_ret 233.Lposreturn: 234 movi a2, 1 235 abi_ret 236.Lsomezero: /* There is probably some zero byte. */ 237#endif /* __XTENSA_EB__ */ 238.Lwne: /* Words are not equal. */ 239 xor a2, a8, a9 /* get word with nonzero in byte that differs */ 240 bany a2, a4, .Ldiff0 /* if byte 0 differs */ 241 movi a5, MASK1 /* mask for byte 1 */ 242 bnone a8, a4, .Leq /* if byte 0 is zero */ 243 bany a2, a5, .Ldiff1 /* if byte 1 differs */ 244 movi a6, MASK2 /* mask for byte 2 */ 245 bnone a8, a5, .Leq /* if byte 1 is zero */ 246 bany a2, a6, .Ldiff2 /* if byte 2 differs */ 247 bnone a8, a6, .Leq /* if byte 2 is zero */ 248#ifdef __XTENSA_EB__ 249.Ldiff3: 250.Ldiff2: 251.Ldiff1: 252 /* Byte 0 is equal (at least) and there is a difference before a zero 253 byte. Just subtract words to get the return value. 254 The high order equal bytes cancel, leaving room for the sign. */ 255 sub a2, a8, a9 256 abi_ret 257 258.Ldiff0: 259 /* Need to make room for the sign, so can't subtract whole words. */ 260 extui a10, a8, 24, 8 261 extui a11, a9, 24, 8 262 sub a2, a10, a11 263 abi_ret 264 265#else /* !__XTENSA_EB__ */ 266 /* Little-endian is a little more difficult because can't subtract 267 whole words. */ 268.Ldiff3: 269 /* Bytes 0-2 are equal; byte 3 is different. 270 For little-endian need to have a sign bit for the difference. */ 271 extui a10, a8, 24, 8 272 extui a11, a9, 24, 8 273 sub a2, a10, a11 274 abi_ret 275 276.Ldiff0: 277 /* Byte 0 is different. */ 278 extui a10, a8, 0, 8 279 extui a11, a9, 0, 8 280 sub a2, a10, a11 281 abi_ret 282 283.Ldiff1: 284 /* Byte 0 is equal; byte 1 is different. */ 285 extui a10, a8, 8, 8 286 extui a11, a9, 8, 8 287 sub a2, a10, a11 288 abi_ret 289 290.Ldiff2: 291 /* Bytes 0-1 are equal; byte 2 is different. */ 292 extui a10, a8, 16, 8 293 extui a11, a9, 16, 8 294 sub a2, a10, a11 295 abi_ret 296 297#endif /* !__XTENSA_EB */ 298 299libc_hidden_def (strcmp) 300 301#ifndef __UCLIBC_HAS_LOCALE__ 302strong_alias (strcmp, strcoll) 303libc_hidden_def (strcoll) 304#endif 305