1/*
2 * Copyright (C) 2004-2007 Atmel Corporation
3 *
4 * This file is subject to the terms and conditions of the GNU Lesser General
5 * Public License.  See the file "COPYING.LIB" in the main directory of this
6 * archive for more details.
7 */
8
9/* Don't use r12 as dst since we must return it unmodified */
10#define dst r9
11#define src r11
12#define len r10
13
14	.text
15	.global	memcpy
16	.type	memcpy, @function
17memcpy:
18	pref	src[0]
19	mov	dst, r12
20
21	/* If we have less than 32 bytes, don't do anything fancy */
22	cp.w	len, 32
23	brge	.Lmore_than_31
24
25	sub	len, 1
26	retlt	r12
271:	ld.ub	r8, src++
28	st.b	dst++, r8
29	sub	len, 1
30	brge	1b
31	retal	r12
32
33.Lmore_than_31:
34	pushm	r0-r7, lr
35
36	/* Check alignment */
37	mov	r8, src
38	andl	r8, 31, COH
39	brne	.Lunaligned_src
40	mov	r8, dst
41	andl	r8, 3, COH
42	brne	.Lunaligned_dst
43
44.Laligned_copy:
45	sub	len, 32
46	brlt	.Lless_than_32
47
481:	/* Copy 32 bytes at a time */
49	ldm	src, r0-r7
50	sub	src, -32
51	stm	dst, r0-r7
52	sub	dst, -32
53	sub	len, 32
54	brge	1b
55
56.Lless_than_32:
57	/* Copy 16 more bytes if possible */
58	sub	len, -16
59	brlt	.Lless_than_16
60	ldm	src, r0-r3
61	sub	src, -16
62	sub	len, 16
63	stm	dst, r0-r3
64	sub	dst, -16
65
66.Lless_than_16:
67	/* Do the remaining as byte copies */
68	neg	len
69	add	pc, pc, len << 2
70	.rept	15
71	ld.ub	r0, src++
72	st.b	dst++, r0
73	.endr
74
75	popm	r0-r7, pc
76
77.Lunaligned_src:
78	/* Make src cacheline-aligned. r8 = (src & 31) */
79	rsub	r8, r8, 32
80	sub	len, r8
811:	ld.ub	r0, src++
82	st.b	dst++, r0
83	sub	r8, 1
84	brne	1b
85
86	/* If dst is word-aligned, we're ready to go */
87	pref	src[0]
88	mov	r8, 3
89	tst	dst, r8
90	breq	.Laligned_copy
91
92.Lunaligned_dst:
93	/* src is aligned, but dst is not. Expect bad performance */
94	sub	len, 4
95	brlt	2f
961:	ld.w	r0, src++
97	st.w	dst++, r0
98	sub	len, 4
99	brge	1b
100
1012:	neg	len
102	add	pc, pc, len << 2
103	.rept	3
104	ld.ub	r0, src++
105	st.b	dst++, r0
106	.endr
107
108	popm	r0-r7, pc
109	.size	memcpy, . - memcpy
110
111libc_hidden_def(memcpy)
112