1 // Copyright 2014 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // MIPS version of rescaling functions
11 //
12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
13
14 #include "src/dsp/dsp.h"
15
16 #if defined(WEBP_USE_MIPS_DSP_R2) && !defined(WEBP_REDUCE_SIZE)
17
18 #include <assert.h>
19 #include "src/utils/rescaler_utils.h"
20
21 #define ROUNDER (WEBP_RESCALER_ONE >> 1)
22 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
23 #define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)
24
25 //------------------------------------------------------------------------------
26 // Row export
27
28 #if 0 // disabled for now. TODO(skal): make match the C-code
29 static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
30 int i;
31 const int x_out_max = wrk->dst_width * wrk->num_channels;
32 uint8_t* dst = wrk->dst;
33 rescaler_t* irow = wrk->irow;
34 const rescaler_t* frow = wrk->frow;
35 const int yscale = wrk->fy_scale * (-wrk->y_accum);
36 int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
37 const int temp7 = (int)wrk->fxy_scale;
38 const int temp6 = (x_out_max & ~0x3) << 2;
39 assert(!WebPRescalerOutputDone(wrk));
40 assert(wrk->y_accum <= 0);
41 assert(!wrk->y_expand);
42 assert(wrk->fxy_scale != 0);
43 if (yscale) {
44 if (x_out_max >= 4) {
45 int temp8, temp9, temp10, temp11;
46 __asm__ volatile (
47 "li %[temp3], 0x10000 \n\t"
48 "li %[temp4], 0x8000 \n\t"
49 "addu %[loop_end], %[frow], %[temp6] \n\t"
50 "1: \n\t"
51 "lw %[temp0], 0(%[frow]) \n\t"
52 "lw %[temp1], 4(%[frow]) \n\t"
53 "lw %[temp2], 8(%[frow]) \n\t"
54 "lw %[temp5], 12(%[frow]) \n\t"
55 "mult $ac0, %[temp3], %[temp4] \n\t"
56 "maddu $ac0, %[temp0], %[yscale] \n\t"
57 "mult $ac1, %[temp3], %[temp4] \n\t"
58 "maddu $ac1, %[temp1], %[yscale] \n\t"
59 "mult $ac2, %[temp3], %[temp4] \n\t"
60 "maddu $ac2, %[temp2], %[yscale] \n\t"
61 "mult $ac3, %[temp3], %[temp4] \n\t"
62 "maddu $ac3, %[temp5], %[yscale] \n\t"
63 "addiu %[frow], %[frow], 16 \n\t"
64 "mfhi %[temp0], $ac0 \n\t"
65 "mfhi %[temp1], $ac1 \n\t"
66 "mfhi %[temp2], $ac2 \n\t"
67 "mfhi %[temp5], $ac3 \n\t"
68 "lw %[temp8], 0(%[irow]) \n\t"
69 "lw %[temp9], 4(%[irow]) \n\t"
70 "lw %[temp10], 8(%[irow]) \n\t"
71 "lw %[temp11], 12(%[irow]) \n\t"
72 "addiu %[dst], %[dst], 4 \n\t"
73 "addiu %[irow], %[irow], 16 \n\t"
74 "subu %[temp8], %[temp8], %[temp0] \n\t"
75 "subu %[temp9], %[temp9], %[temp1] \n\t"
76 "subu %[temp10], %[temp10], %[temp2] \n\t"
77 "subu %[temp11], %[temp11], %[temp5] \n\t"
78 "mult $ac0, %[temp3], %[temp4] \n\t"
79 "maddu $ac0, %[temp8], %[temp7] \n\t"
80 "mult $ac1, %[temp3], %[temp4] \n\t"
81 "maddu $ac1, %[temp9], %[temp7] \n\t"
82 "mult $ac2, %[temp3], %[temp4] \n\t"
83 "maddu $ac2, %[temp10], %[temp7] \n\t"
84 "mult $ac3, %[temp3], %[temp4] \n\t"
85 "maddu $ac3, %[temp11], %[temp7] \n\t"
86 "mfhi %[temp8], $ac0 \n\t"
87 "mfhi %[temp9], $ac1 \n\t"
88 "mfhi %[temp10], $ac2 \n\t"
89 "mfhi %[temp11], $ac3 \n\t"
90 "sw %[temp0], -16(%[irow]) \n\t"
91 "sw %[temp1], -12(%[irow]) \n\t"
92 "sw %[temp2], -8(%[irow]) \n\t"
93 "sw %[temp5], -4(%[irow]) \n\t"
94 "sb %[temp8], -4(%[dst]) \n\t"
95 "sb %[temp9], -3(%[dst]) \n\t"
96 "sb %[temp10], -2(%[dst]) \n\t"
97 "sb %[temp11], -1(%[dst]) \n\t"
98 "bne %[frow], %[loop_end], 1b \n\t"
99 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
100 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
101 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
102 [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
103 [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
104 : [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6)
105 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
106 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
107 );
108 }
109 for (i = 0; i < (x_out_max & 0x3); ++i) {
110 const uint32_t frac = (uint32_t)MULT_FIX(*frow++, yscale);
111 const int v = (int)MULT_FIX_FLOOR(*irow - frac, wrk->fxy_scale);
112 assert(v >= 0 && v <= 255);
113 *dst++ = v;
114 *irow++ = frac; // new fractional start
115 }
116 } else {
117 if (x_out_max >= 4) {
118 __asm__ volatile (
119 "li %[temp3], 0x10000 \n\t"
120 "li %[temp4], 0x8000 \n\t"
121 "addu %[loop_end], %[irow], %[temp6] \n\t"
122 "1: \n\t"
123 "lw %[temp0], 0(%[irow]) \n\t"
124 "lw %[temp1], 4(%[irow]) \n\t"
125 "lw %[temp2], 8(%[irow]) \n\t"
126 "lw %[temp5], 12(%[irow]) \n\t"
127 "addiu %[dst], %[dst], 4 \n\t"
128 "addiu %[irow], %[irow], 16 \n\t"
129 "mult $ac0, %[temp3], %[temp4] \n\t"
130 "maddu $ac0, %[temp0], %[temp7] \n\t"
131 "mult $ac1, %[temp3], %[temp4] \n\t"
132 "maddu $ac1, %[temp1], %[temp7] \n\t"
133 "mult $ac2, %[temp3], %[temp4] \n\t"
134 "maddu $ac2, %[temp2], %[temp7] \n\t"
135 "mult $ac3, %[temp3], %[temp4] \n\t"
136 "maddu $ac3, %[temp5], %[temp7] \n\t"
137 "mfhi %[temp0], $ac0 \n\t"
138 "mfhi %[temp1], $ac1 \n\t"
139 "mfhi %[temp2], $ac2 \n\t"
140 "mfhi %[temp5], $ac3 \n\t"
141 "sw $zero, -16(%[irow]) \n\t"
142 "sw $zero, -12(%[irow]) \n\t"
143 "sw $zero, -8(%[irow]) \n\t"
144 "sw $zero, -4(%[irow]) \n\t"
145 "sb %[temp0], -4(%[dst]) \n\t"
146 "sb %[temp1], -3(%[dst]) \n\t"
147 "sb %[temp2], -2(%[dst]) \n\t"
148 "sb %[temp5], -1(%[dst]) \n\t"
149 "bne %[irow], %[loop_end], 1b \n\t"
150 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
151 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
152 [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
153 : [temp7]"r"(temp7), [temp6]"r"(temp6)
154 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
155 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
156 );
157 }
158 for (i = 0; i < (x_out_max & 0x3); ++i) {
159 const int v = (int)MULT_FIX_FLOOR(*irow, wrk->fxy_scale);
160 assert(v >= 0 && v <= 255);
161 *dst++ = v;
162 *irow++ = 0;
163 }
164 }
165 }
166 #endif // 0
167
ExportRowExpand_MIPSdspR2(WebPRescaler * const wrk)168 static void ExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
169 int i;
170 uint8_t* dst = wrk->dst;
171 rescaler_t* irow = wrk->irow;
172 const int x_out_max = wrk->dst_width * wrk->num_channels;
173 const rescaler_t* frow = wrk->frow;
174 int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
175 const int temp6 = (x_out_max & ~0x3) << 2;
176 const int temp7 = (int)wrk->fy_scale;
177 assert(!WebPRescalerOutputDone(wrk));
178 assert(wrk->y_accum <= 0);
179 assert(wrk->y_expand);
180 assert(wrk->y_sub != 0);
181 if (wrk->y_accum == 0) {
182 if (x_out_max >= 4) {
183 __asm__ volatile (
184 "li %[temp4], 0x10000 \n\t"
185 "li %[temp5], 0x8000 \n\t"
186 "addu %[loop_end], %[frow], %[temp6] \n\t"
187 "1: \n\t"
188 "lw %[temp0], 0(%[frow]) \n\t"
189 "lw %[temp1], 4(%[frow]) \n\t"
190 "lw %[temp2], 8(%[frow]) \n\t"
191 "lw %[temp3], 12(%[frow]) \n\t"
192 "addiu %[dst], %[dst], 4 \n\t"
193 "addiu %[frow], %[frow], 16 \n\t"
194 "mult $ac0, %[temp4], %[temp5] \n\t"
195 "maddu $ac0, %[temp0], %[temp7] \n\t"
196 "mult $ac1, %[temp4], %[temp5] \n\t"
197 "maddu $ac1, %[temp1], %[temp7] \n\t"
198 "mult $ac2, %[temp4], %[temp5] \n\t"
199 "maddu $ac2, %[temp2], %[temp7] \n\t"
200 "mult $ac3, %[temp4], %[temp5] \n\t"
201 "maddu $ac3, %[temp3], %[temp7] \n\t"
202 "mfhi %[temp0], $ac0 \n\t"
203 "mfhi %[temp1], $ac1 \n\t"
204 "mfhi %[temp2], $ac2 \n\t"
205 "mfhi %[temp3], $ac3 \n\t"
206 "sb %[temp0], -4(%[dst]) \n\t"
207 "sb %[temp1], -3(%[dst]) \n\t"
208 "sb %[temp2], -2(%[dst]) \n\t"
209 "sb %[temp3], -1(%[dst]) \n\t"
210 "bne %[frow], %[loop_end], 1b \n\t"
211 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
212 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
213 [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
214 : [temp7]"r"(temp7), [temp6]"r"(temp6)
215 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
216 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
217 );
218 }
219 for (i = 0; i < (x_out_max & 0x3); ++i) {
220 const uint32_t J = *frow++;
221 const int v = (int)MULT_FIX(J, wrk->fy_scale);
222 assert(v >= 0 && v <= 255);
223 *dst++ = v;
224 }
225 } else {
226 const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
227 const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
228 if (x_out_max >= 4) {
229 int temp8, temp9, temp10, temp11;
230 __asm__ volatile (
231 "li %[temp8], 0x10000 \n\t"
232 "li %[temp9], 0x8000 \n\t"
233 "addu %[loop_end], %[frow], %[temp6] \n\t"
234 "1: \n\t"
235 "lw %[temp0], 0(%[frow]) \n\t"
236 "lw %[temp1], 4(%[frow]) \n\t"
237 "lw %[temp2], 8(%[frow]) \n\t"
238 "lw %[temp3], 12(%[frow]) \n\t"
239 "lw %[temp4], 0(%[irow]) \n\t"
240 "lw %[temp5], 4(%[irow]) \n\t"
241 "lw %[temp10], 8(%[irow]) \n\t"
242 "lw %[temp11], 12(%[irow]) \n\t"
243 "addiu %[dst], %[dst], 4 \n\t"
244 "mult $ac0, %[temp8], %[temp9] \n\t"
245 "maddu $ac0, %[A], %[temp0] \n\t"
246 "maddu $ac0, %[B], %[temp4] \n\t"
247 "mult $ac1, %[temp8], %[temp9] \n\t"
248 "maddu $ac1, %[A], %[temp1] \n\t"
249 "maddu $ac1, %[B], %[temp5] \n\t"
250 "mult $ac2, %[temp8], %[temp9] \n\t"
251 "maddu $ac2, %[A], %[temp2] \n\t"
252 "maddu $ac2, %[B], %[temp10] \n\t"
253 "mult $ac3, %[temp8], %[temp9] \n\t"
254 "maddu $ac3, %[A], %[temp3] \n\t"
255 "maddu $ac3, %[B], %[temp11] \n\t"
256 "addiu %[frow], %[frow], 16 \n\t"
257 "addiu %[irow], %[irow], 16 \n\t"
258 "mfhi %[temp0], $ac0 \n\t"
259 "mfhi %[temp1], $ac1 \n\t"
260 "mfhi %[temp2], $ac2 \n\t"
261 "mfhi %[temp3], $ac3 \n\t"
262 "mult $ac0, %[temp8], %[temp9] \n\t"
263 "maddu $ac0, %[temp0], %[temp7] \n\t"
264 "mult $ac1, %[temp8], %[temp9] \n\t"
265 "maddu $ac1, %[temp1], %[temp7] \n\t"
266 "mult $ac2, %[temp8], %[temp9] \n\t"
267 "maddu $ac2, %[temp2], %[temp7] \n\t"
268 "mult $ac3, %[temp8], %[temp9] \n\t"
269 "maddu $ac3, %[temp3], %[temp7] \n\t"
270 "mfhi %[temp0], $ac0 \n\t"
271 "mfhi %[temp1], $ac1 \n\t"
272 "mfhi %[temp2], $ac2 \n\t"
273 "mfhi %[temp3], $ac3 \n\t"
274 "sb %[temp0], -4(%[dst]) \n\t"
275 "sb %[temp1], -3(%[dst]) \n\t"
276 "sb %[temp2], -2(%[dst]) \n\t"
277 "sb %[temp3], -1(%[dst]) \n\t"
278 "bne %[frow], %[loop_end], 1b \n\t"
279 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
280 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
281 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
282 [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
283 [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
284 : [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
285 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
286 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
287 );
288 }
289 for (i = 0; i < (x_out_max & 0x3); ++i) {
290 const uint64_t I = (uint64_t)A * *frow++
291 + (uint64_t)B * *irow++;
292 const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
293 const int v = (int)MULT_FIX(J, wrk->fy_scale);
294 assert(v >= 0 && v <= 255);
295 *dst++ = v;
296 }
297 }
298 }
299
300 #undef MULT_FIX_FLOOR
301 #undef MULT_FIX
302 #undef ROUNDER
303
304 //------------------------------------------------------------------------------
305 // Entry point
306
307 extern void WebPRescalerDspInitMIPSdspR2(void);
308
WebPRescalerDspInitMIPSdspR2(void)309 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
310 WebPRescalerExportRowExpand = ExportRowExpand_MIPSdspR2;
311 // WebPRescalerExportRowShrink = ExportRowShrink_MIPSdspR2;
312 }
313
314 #else // !WEBP_USE_MIPS_DSP_R2
315
316 WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)
317
318 #endif // WEBP_USE_MIPS_DSP_R2
319