1/* 2 * Copyright (C) 2017 Hangzhou C-SKY Microsystems co.,ltd. 3 * 4 * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB 5 * in this tarball. 6 */ 7 8.macro GET_FRONT_BITS rx ry 9#ifdef __cskyLE__ 10 lsr \rx, \ry 11#else 12 lsl \rx, \ry 13#endif 14.endm 15 16.macro GET_AFTER_BITS rx ry 17#ifdef __cskyLE__ 18 lsl \rx, \ry 19#else 20 lsr \rx, \ry 21#endif 22.endm 23 24 25#ifdef WANT_WIDE 26# define Wmemcpy wmemcpy 27#else 28# define Wmemcpy memcpy 29#endif 30 31/* void *memcpy(void *dest, const void *src, size_t n); */ 32 33 .text 34 .align 2 35 .global Wmemcpy 36 .type Wmemcpy, @function 37Wmemcpy: 38 mov r3, r0 39 cmplti r2, 4 /* If len less than 4 bytes */ 40 jbt .L_copy_by_byte 41 42 mov r12, r0 43 andi r12, 3 44 bnez r12, .L_dest_not_aligned /* If dest is not 4 bytes aligned */ 45.L0: 46 mov r12, r1 47 andi r12, 3 48 bnez r12, .L_dest_aligned_but_src_not_aligned /* If dest is aligned, but src is not aligned */ 49 50 cmplti r2, 16 /* dest and src are all aligned */ 51 jbt .L_aligned_and_len_less_16bytes /* If len less than 16 bytes */ 52 53.L_aligned_and_len_larger_16bytes: /* src and dst are all aligned, and len > 16 bytes */ 54 ldw r18, (r1, 0) 55 ldw r19, (r1, 4) 56 ldw r20, (r1, 8) 57 ldw r21, (r1, 12) 58 stw r18, (r3, 0) 59 stw r19, (r3, 4) 60 stw r20, (r3, 8) 61 stw r21, (r3, 12) 62 subi r2, 16 63 addi r1, 16 64 addi r3, 16 65 cmplti r2, 16 66 jbf .L_aligned_and_len_larger_16bytes 67 68.L_aligned_and_len_less_16bytes: 69 cmplti r2, 4 70 jbt .L_copy_by_byte 71 ldw r18, (r1, 0) 72 stw r18, (r3, 0) 73 subi r2, 4 74 addi r1, 4 75 addi r3, 4 76 jbr .L_aligned_and_len_less_16bytes 77 78.L_copy_by_byte: /* len less than 4 bytes */ 79 cmpnei r2, 0 80 jbf .L_return 81 ldb r18, (r1, 0) 82 stb r18, (r3, 0) 83 subi r2, 1 84 addi r1, 1 85 addi r3, 1 86 jbr .L_copy_by_byte 87 88.L_return: 89 rts 90 91/* If dest is not aligned, just copying some bytes makes the dest align. 92 After that, we judge whether the src is aligned. */ 93 94.L_dest_not_aligned: 95 rsub r13, r1, r3 /* consider overlapped case */ 96 abs r13, r13 97 cmplt r13, r2 98 jbt .L_copy_by_byte 99 100.L1: 101 ldb r18, (r1, 0) /* makes the dest align. */ 102 stb r18, (r3, 0) 103 addi r12, 1 104 subi r2, 1 105 addi r1, 1 106 addi r3, 1 107 cmpnei r12, 4 108 jbt .L1 109 cmplti r2, 4 110 jbt .L_copy_by_byte 111 jbf .L0 /* judge whether the src is aligned. */ 112 113.L_dest_aligned_but_src_not_aligned: 114 rsub r13, r1, r3 /* consider overlapped case */ 115 abs r13, r13 116 cmplt r13, r2 117 jbt .L_copy_by_byte 118 119 bclri r1, 0 120 bclri r1, 1 121 ldw r18, (r1, 0) 122 addi r1, 4 123 124 movi r13, 8 125 mult r13, r12 126 mov r24, r13 /* r12 is used to store the misaligned bits */ 127 rsubi r13, 32 128 mov r25, r13 129 130 cmplti r2, 16 131 jbt .L_not_aligned_and_len_less_16bytes 132 133.L_not_aligned_and_len_larger_16bytes: 134 ldw r20, (r1, 0) 135 ldw r21, (r1, 4) 136 ldw r22, (r1, 8) 137 ldw r23, (r1, 12) 138 139 GET_FRONT_BITS r18 r24 /* little or big endian? */ 140 mov r19, r20 141 GET_AFTER_BITS r20 r25 142 or r20, r18 143 144 GET_FRONT_BITS r19 r24 145 mov r18, r21 146 GET_AFTER_BITS r21 r13 147 or r21, r19 148 149 GET_FRONT_BITS r18 r24 150 mov r19, r22 151 GET_AFTER_BITS r22 r25 152 or r22, r18 153 154 GET_FRONT_BITS r19 r24 155 mov r18, r23 156 GET_AFTER_BITS r23 r25 157 or r23, r19 158 159 stw r20, (r3, 0) 160 stw r21, (r3, 4) 161 stw r22, (r3, 8) 162 stw r23, (r3, 12) 163 subi r2, 16 164 addi r1, 16 165 addi r3, 16 166 cmplti r2, 16 167 jbf .L_not_aligned_and_len_larger_16bytes 168 169.L_not_aligned_and_len_less_16bytes: 170 cmplti r2, 4 171 jbf .L2 172 rsubi r12, 4 /* r12 is used to stored the misaligned bits */ 173 subu r1, r12 /* initial the position */ 174 jbr .L_copy_by_byte 175.L2: 176 ldw r21, (r1, 0) 177 GET_FRONT_BITS r18 r24 178 mov r19, r18 179 mov r18, r21 180 GET_AFTER_BITS r21 r25 181 or r21, r19 182 stw r21, (r3, 0) 183 subi r2, 4 184 addi r1, 4 185 addi r3, 4 186 jbr .L_not_aligned_and_len_less_16bytes 187 188.size Wmemcpy, .-Wmemcpy 189 190libc_hidden_def(Wmemcpy) 191.weak Wmemcpy 192