1/* 2 * Copyright (C) 2017 Hangzhou C-SKY Microsystems co.,ltd. 3 * 4 * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB 5 * in this tarball. 6 */ 7 8.macro GET_FRONT_BITS rx ry 9#ifdef __cskyLE__ 10 lsr \rx, \ry 11#else 12 lsl \rx, \ry 13#endif 14.endm 15 16.macro GET_AFTER_BITS rx ry 17#ifdef __cskyLE__ 18 lsl \rx, \ry 19#else 20 lsr \rx, \ry 21#endif 22.endm 23 24 25#ifdef WANT_WIDE 26# define Wmemcpy wmemcpy 27#else 28# define Wmemcpy memcpy 29#endif 30 31/* void *memcpy(void *dest, const void *src, size_t n); */ 32 33 .text 34 .align 2 35 .global Wmemcpy 36 .type Wmemcpy, @function 37Wmemcpy: 38 mov r7, r2 39 cmplti r4, 4 /* If len less than 4 bytes */ 40 jbt .L_copy_by_byte 41 42 mov r6, r2 43 andi r6, 3 44 cmpnei r6, 0 45 jbt .L_dest_not_aligned /* If dest is not 4 bytes aligned */ 46.L0: 47 mov r6, r3 48 andi r6, 3 49 cmpnei r6, 0 50 jbt .L_dest_aligned_but_src_not_aligned /* If dest is aligned, but src is not aligned */ 51 52 cmplti r4, 16 /* dest and src are all aligned */ 53 jbt .L_aligned_and_len_less_16bytes /* If len less than 16 bytes */ 54 55 subi sp, 8 56 stw r8, (sp, 0) 57 stw r9, (sp, 4) 58.L_aligned_and_len_larger_16bytes: /* src and dst are all aligned, and len > 16 bytes */ 59 ldw r1, (r3, 0) 60 ldw r5, (r3, 4) 61 ldw r8, (r3, 8) 62 ldw r9, (r3, 12) 63 stw r1, (r7, 0) 64 stw r5, (r7, 4) 65 stw r8, (r7, 8) 66 stw r9, (r7, 12) 67 subi r4, 16 68 addi r3, 16 69 addi r7, 16 70 cmplti r4, 16 71 jbf .L_aligned_and_len_larger_16bytes 72 ldw r8, (sp, 0) 73 ldw r9, (sp, 4) 74 addi sp, 8 75 76.L_aligned_and_len_less_16bytes: 77 cmplti r4, 4 78 jbt .L_copy_by_byte 79 ldw r1, (r3, 0) 80 stw r1, (r7, 0) 81 subi r4, 4 82 addi r3, 4 83 addi r7, 4 84 jbr .L_aligned_and_len_less_16bytes 85 86.L_copy_by_byte: /* len less than 4 bytes */ 87 cmpnei r4, 0 88 jbf .L_return 89 ldb r1, (r3, 0) 90 stb r1, (r7, 0) 91 subi r4, 1 92 addi r3, 1 93 addi r7, 1 94 jbr .L_copy_by_byte 95 96.L_return: 97 rts 98 99/* If dest is not aligned, we copy some bytes to make dest align. 100 Then we should judge whether src is aligned. */ 101 102.L_dest_not_aligned: 103 mov r5, r3 /* consider overlapped case */ 104 rsub r5, r5, r7 105 abs r5, r5 106 cmplt r5, r4 107 jbt .L_copy_by_byte 108 109.L1: 110 ldb r1, (r3, 0) /* makes the dest align. */ 111 stb r1, (r7, 0) 112 addi r6, 1 113 subi r4, 1 114 addi r3, 1 115 addi r7, 1 116 cmpnei r6, 4 117 jbt .L1 118 cmplti r4, 4 119 jbt .L_copy_by_byte 120 jbf .L0 /* judge whether the src is aligned. */ 121 122.L_dest_aligned_but_src_not_aligned: 123 mov r5, r3 /* consider overlapped case*/ 124 rsub r5, r5, r7 125 abs r5, r5 126 cmplt r5, r4 127 jbt .L_copy_by_byte 128 129 bclri r3, 0 130 bclri r3, 1 131 ldw r1, (r3, 0) 132 addi r3, 4 133 134 subi sp, 16 135 stw r11, (sp,0) 136 stw r12, (sp,4) 137 stw r13, (sp,8) 138 movi r5, 8 139 mult r5, r6 /* r6 is used to store tne misaligned bits */ 140 mov r12, r5 141 rsubi r5, 31 142 addi r5, 1 143 mov r13, r5 144 145 cmplti r4, 16 146 jbt .L_not_aligned_and_len_less_16bytes 147 148 stw r8, (sp, 12) 149 subi sp, 8 150 stw r9, (sp, 0) 151 stw r10, (sp, 4) 152.L_not_aligned_and_len_larger_16bytes: 153 ldw r5, (r3, 0) 154 ldw r11, (r3, 4) 155 ldw r8, (r3, 8) 156 ldw r9, (r3, 12) 157 158 GET_FRONT_BITS r1 r12 /* little or big endian? */ 159 mov r10, r5 160 GET_AFTER_BITS r5 r13 161 or r5, r1 162 163 GET_FRONT_BITS r10 r12 164 mov r1, r11 165 GET_AFTER_BITS r11 r13 166 or r11, r10 167 168 GET_FRONT_BITS r1 r12 169 mov r10, r8 170 GET_AFTER_BITS r8 r13 171 or r8, r1 172 173 GET_FRONT_BITS r10 r12 174 mov r1, r9 175 GET_AFTER_BITS r9 r13 176 or r9, r10 177 178 stw r5, (r7, 0) 179 stw r11, (r7, 4) 180 stw r8, (r7, 8) 181 stw r9, (r7, 12) 182 subi r4, 16 183 addi r3, 16 184 addi r7, 16 185 cmplti r4, 16 186 jbf .L_not_aligned_and_len_larger_16bytes 187 ldw r9, (sp, 0) 188 ldw r10, (sp, 4) 189 addi sp, 8 190 ldw r8, (sp,12) 191 192.L_not_aligned_and_len_less_16bytes: 193 cmplti r4, 4 194 jbf .L2 195 rsubi r6, 4 /* r6 is used to stored the misaligned bits */ 196 subu r3, r6 /* initial the position */ 197 ldw r11, (sp, 0) 198 ldw r12, (sp, 4) 199 ldw r13, (sp, 8) 200 addi sp, 16 201 jbr .L_copy_by_byte 202.L2: 203 ldw r5, (r3, 0) 204 GET_FRONT_BITS r1 r12 205 mov r11, r1 206 mov r1, r5 207 GET_AFTER_BITS r5 r13 208 or r5, r11 209 stw r5, (r7, 0) 210 subi r4, 4 211 addi r3, 4 212 addi r7, 4 213 jbr .L_not_aligned_and_len_less_16bytes 214 215.size Wmemcpy, .-Wmemcpy 216 217libc_hidden_def(Wmemcpy) 218.weak Wmemcpy 219