1 /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
2    block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
3    Both SRCP and DSTP should be aligned for memory operations on `op_t's.  */
4 
_wordcopy_fwd_aligned(long int dstp,long int srcp,size_t len)5 static void _wordcopy_fwd_aligned (long int dstp, long int srcp, size_t len)
6 {
7   op_t a0, a1;
8 
9   switch (len % 8)
10     {
11     case 2:
12       a0 = ((op_t *) srcp)[0];
13       srcp -= 6 * OPSIZ;
14       dstp -= 7 * OPSIZ;
15       len += 6;
16       goto do1;
17     case 3:
18       a1 = ((op_t *) srcp)[0];
19       srcp -= 5 * OPSIZ;
20       dstp -= 6 * OPSIZ;
21       len += 5;
22       goto do2;
23     case 4:
24       a0 = ((op_t *) srcp)[0];
25       srcp -= 4 * OPSIZ;
26       dstp -= 5 * OPSIZ;
27       len += 4;
28       goto do3;
29     case 5:
30       a1 = ((op_t *) srcp)[0];
31       srcp -= 3 * OPSIZ;
32       dstp -= 4 * OPSIZ;
33       len += 3;
34       goto do4;
35     case 6:
36       a0 = ((op_t *) srcp)[0];
37       srcp -= 2 * OPSIZ;
38       dstp -= 3 * OPSIZ;
39       len += 2;
40       goto do5;
41     case 7:
42       a1 = ((op_t *) srcp)[0];
43       srcp -= 1 * OPSIZ;
44       dstp -= 2 * OPSIZ;
45       len += 1;
46       goto do6;
47 
48     case 0:
49       if (OP_T_THRES <= 3 * OPSIZ && len == 0)
50 	return;
51       a0 = ((op_t *) srcp)[0];
52       srcp -= 0 * OPSIZ;
53       dstp -= 1 * OPSIZ;
54       goto do7;
55     case 1:
56       a1 = ((op_t *) srcp)[0];
57       srcp -=-1 * OPSIZ;
58       dstp -= 0 * OPSIZ;
59       len -= 1;
60       if (OP_T_THRES <= 3 * OPSIZ && len == 0)
61 	goto do0;
62       goto do8;			/* No-op.  */
63     }
64 
65   do
66     {
67     do8:
68       a0 = ((op_t *) srcp)[0];
69       ((op_t *) dstp)[0] = a1;
70     do7:
71       a1 = ((op_t *) srcp)[1];
72       ((op_t *) dstp)[1] = a0;
73     do6:
74       a0 = ((op_t *) srcp)[2];
75       ((op_t *) dstp)[2] = a1;
76     do5:
77       a1 = ((op_t *) srcp)[3];
78       ((op_t *) dstp)[3] = a0;
79     do4:
80       a0 = ((op_t *) srcp)[4];
81       ((op_t *) dstp)[4] = a1;
82     do3:
83       a1 = ((op_t *) srcp)[5];
84       ((op_t *) dstp)[5] = a0;
85     do2:
86       a0 = ((op_t *) srcp)[6];
87       ((op_t *) dstp)[6] = a1;
88     do1:
89       a1 = ((op_t *) srcp)[7];
90       ((op_t *) dstp)[7] = a0;
91 
92       srcp += 8 * OPSIZ;
93       dstp += 8 * OPSIZ;
94       len -= 8;
95     }
96   while (len != 0);
97 
98   /* This is the right position for do0.  Please don't move
99      it into the loop.  */
100  do0:
101   ((op_t *) dstp)[0] = a1;
102 }
103 
104 /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
105    block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
106    DSTP should be aligned for memory operations on `op_t's, but SRCP must
107    *not* be aligned.  */
108 
_wordcopy_fwd_dest_aligned(long int dstp,long int srcp,size_t len)109 static void _wordcopy_fwd_dest_aligned (long int dstp, long int srcp, size_t len)
110 {
111   op_t a0, a1, a2, a3;
112   int sh_1, sh_2;
113 
114   /* Calculate how to shift a word read at the memory operation
115      aligned srcp to make it aligned for copy.  */
116 
117   sh_1 = 8 * (srcp % OPSIZ);
118   sh_2 = 8 * OPSIZ - sh_1;
119 
120   /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
121      it points in the middle of.  */
122   srcp &= -OPSIZ;
123 
124   switch (len % 4)
125     {
126     case 2:
127       a1 = ((op_t *) srcp)[0];
128       a2 = ((op_t *) srcp)[1];
129       srcp -= 1 * OPSIZ;
130       dstp -= 3 * OPSIZ;
131       len += 2;
132       goto do1;
133     case 3:
134       a0 = ((op_t *) srcp)[0];
135       a1 = ((op_t *) srcp)[1];
136       srcp -= 0 * OPSIZ;
137       dstp -= 2 * OPSIZ;
138       len += 1;
139       goto do2;
140     case 0:
141       if (OP_T_THRES <= 3 * OPSIZ && len == 0)
142 	return;
143       a3 = ((op_t *) srcp)[0];
144       a0 = ((op_t *) srcp)[1];
145       srcp -=-1 * OPSIZ;
146       dstp -= 1 * OPSIZ;
147       len += 0;
148       goto do3;
149     case 1:
150       a2 = ((op_t *) srcp)[0];
151       a3 = ((op_t *) srcp)[1];
152       srcp -=-2 * OPSIZ;
153       dstp -= 0 * OPSIZ;
154       len -= 1;
155       if (OP_T_THRES <= 3 * OPSIZ && len == 0)
156 	goto do0;
157       goto do4;			/* No-op.  */
158     }
159 
160   do
161     {
162     do4:
163       a0 = ((op_t *) srcp)[0];
164       ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
165     do3:
166       a1 = ((op_t *) srcp)[1];
167       ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
168     do2:
169       a2 = ((op_t *) srcp)[2];
170       ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
171     do1:
172       a3 = ((op_t *) srcp)[3];
173       ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
174 
175       srcp += 4 * OPSIZ;
176       dstp += 4 * OPSIZ;
177       len -= 4;
178     }
179   while (len != 0);
180 
181   /* This is the right position for do0.  Please don't move
182      it into the loop.  */
183  do0:
184   ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
185 }
186