1 // Copyright 2014 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // MIPS version of rescaling functions
11 //
12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
13 
14 #include "src/dsp/dsp.h"
15 
16 #if defined(WEBP_USE_MIPS_DSP_R2) && !defined(WEBP_REDUCE_SIZE)
17 
18 #include <assert.h>
19 #include "src/utils/rescaler_utils.h"
20 
21 #define ROUNDER (WEBP_RESCALER_ONE >> 1)
22 #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
23 #define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)
24 
25 //------------------------------------------------------------------------------
26 // Row export
27 
28 #if 0  // disabled for now. TODO(skal): make match the C-code
29 static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
30   int i;
31   const int x_out_max = wrk->dst_width * wrk->num_channels;
32   uint8_t* dst = wrk->dst;
33   rescaler_t* irow = wrk->irow;
34   const rescaler_t* frow = wrk->frow;
35   const int yscale = wrk->fy_scale * (-wrk->y_accum);
36   int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
37   const int temp7 = (int)wrk->fxy_scale;
38   const int temp6 = (x_out_max & ~0x3) << 2;
39   assert(!WebPRescalerOutputDone(wrk));
40   assert(wrk->y_accum <= 0);
41   assert(!wrk->y_expand);
42   assert(wrk->fxy_scale != 0);
43   if (yscale) {
44     if (x_out_max >= 4) {
45       int temp8, temp9, temp10, temp11;
46       __asm__ volatile (
47         "li       %[temp3],    0x10000                    \n\t"
48         "li       %[temp4],    0x8000                     \n\t"
49         "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
50       "1:                                                 \n\t"
51         "lw       %[temp0],    0(%[frow])                 \n\t"
52         "lw       %[temp1],    4(%[frow])                 \n\t"
53         "lw       %[temp2],    8(%[frow])                 \n\t"
54         "lw       %[temp5],    12(%[frow])                \n\t"
55         "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
56         "maddu    $ac0,        %[temp0],    %[yscale]     \n\t"
57         "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
58         "maddu    $ac1,        %[temp1],    %[yscale]     \n\t"
59         "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
60         "maddu    $ac2,        %[temp2],    %[yscale]     \n\t"
61         "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
62         "maddu    $ac3,        %[temp5],    %[yscale]     \n\t"
63         "addiu    %[frow],     %[frow],     16            \n\t"
64         "mfhi     %[temp0],    $ac0                       \n\t"
65         "mfhi     %[temp1],    $ac1                       \n\t"
66         "mfhi     %[temp2],    $ac2                       \n\t"
67         "mfhi     %[temp5],    $ac3                       \n\t"
68         "lw       %[temp8],    0(%[irow])                 \n\t"
69         "lw       %[temp9],    4(%[irow])                 \n\t"
70         "lw       %[temp10],   8(%[irow])                 \n\t"
71         "lw       %[temp11],   12(%[irow])                \n\t"
72         "addiu    %[dst],      %[dst],      4             \n\t"
73         "addiu    %[irow],     %[irow],     16            \n\t"
74         "subu     %[temp8],    %[temp8],    %[temp0]      \n\t"
75         "subu     %[temp9],    %[temp9],    %[temp1]      \n\t"
76         "subu     %[temp10],   %[temp10],   %[temp2]      \n\t"
77         "subu     %[temp11],   %[temp11],   %[temp5]      \n\t"
78         "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
79         "maddu    $ac0,        %[temp8],    %[temp7]      \n\t"
80         "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
81         "maddu    $ac1,        %[temp9],    %[temp7]      \n\t"
82         "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
83         "maddu    $ac2,        %[temp10],   %[temp7]      \n\t"
84         "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
85         "maddu    $ac3,        %[temp11],   %[temp7]      \n\t"
86         "mfhi     %[temp8],    $ac0                       \n\t"
87         "mfhi     %[temp9],    $ac1                       \n\t"
88         "mfhi     %[temp10],   $ac2                       \n\t"
89         "mfhi     %[temp11],   $ac3                       \n\t"
90         "sw       %[temp0],    -16(%[irow])               \n\t"
91         "sw       %[temp1],    -12(%[irow])               \n\t"
92         "sw       %[temp2],    -8(%[irow])                \n\t"
93         "sw       %[temp5],    -4(%[irow])                \n\t"
94         "sb       %[temp8],    -4(%[dst])                 \n\t"
95         "sb       %[temp9],    -3(%[dst])                 \n\t"
96         "sb       %[temp10],   -2(%[dst])                 \n\t"
97         "sb       %[temp11],   -1(%[dst])                 \n\t"
98         "bne      %[frow],     %[loop_end], 1b            \n\t"
99         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
100           [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
101           [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
102           [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
103           [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
104         : [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6)
105         : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
106           "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
107       );
108     }
109     for (i = 0; i < (x_out_max & 0x3); ++i) {
110       const uint32_t frac = (uint32_t)MULT_FIX(*frow++, yscale);
111       const int v = (int)MULT_FIX_FLOOR(*irow - frac, wrk->fxy_scale);
112       assert(v >= 0 && v <= 255);
113       *dst++ = v;
114       *irow++ = frac;   // new fractional start
115     }
116   } else {
117     if (x_out_max >= 4) {
118       __asm__ volatile (
119         "li       %[temp3],    0x10000                    \n\t"
120         "li       %[temp4],    0x8000                     \n\t"
121         "addu     %[loop_end], %[irow],     %[temp6]      \n\t"
122       "1:                                                 \n\t"
123         "lw       %[temp0],    0(%[irow])                 \n\t"
124         "lw       %[temp1],    4(%[irow])                 \n\t"
125         "lw       %[temp2],    8(%[irow])                 \n\t"
126         "lw       %[temp5],    12(%[irow])                \n\t"
127         "addiu    %[dst],      %[dst],      4             \n\t"
128         "addiu    %[irow],     %[irow],     16            \n\t"
129         "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
130         "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
131         "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
132         "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
133         "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
134         "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
135         "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
136         "maddu    $ac3,        %[temp5],    %[temp7]      \n\t"
137         "mfhi     %[temp0],    $ac0                       \n\t"
138         "mfhi     %[temp1],    $ac1                       \n\t"
139         "mfhi     %[temp2],    $ac2                       \n\t"
140         "mfhi     %[temp5],    $ac3                       \n\t"
141         "sw       $zero,       -16(%[irow])               \n\t"
142         "sw       $zero,       -12(%[irow])               \n\t"
143         "sw       $zero,       -8(%[irow])                \n\t"
144         "sw       $zero,       -4(%[irow])                \n\t"
145         "sb       %[temp0],    -4(%[dst])                 \n\t"
146         "sb       %[temp1],    -3(%[dst])                 \n\t"
147         "sb       %[temp2],    -2(%[dst])                 \n\t"
148         "sb       %[temp5],    -1(%[dst])                 \n\t"
149         "bne      %[irow],     %[loop_end], 1b            \n\t"
150         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
151           [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
152           [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
153         : [temp7]"r"(temp7), [temp6]"r"(temp6)
154         : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
155           "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
156       );
157     }
158     for (i = 0; i < (x_out_max & 0x3); ++i) {
159       const int v = (int)MULT_FIX_FLOOR(*irow, wrk->fxy_scale);
160       assert(v >= 0 && v <= 255);
161       *dst++ = v;
162       *irow++ = 0;
163     }
164   }
165 }
166 #endif  // 0
167 
ExportRowExpand_MIPSdspR2(WebPRescaler * const wrk)168 static void ExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
169   int i;
170   uint8_t* dst = wrk->dst;
171   rescaler_t* irow = wrk->irow;
172   const int x_out_max = wrk->dst_width * wrk->num_channels;
173   const rescaler_t* frow = wrk->frow;
174   int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
175   const int temp6 = (x_out_max & ~0x3) << 2;
176   const int temp7 = (int)wrk->fy_scale;
177   assert(!WebPRescalerOutputDone(wrk));
178   assert(wrk->y_accum <= 0);
179   assert(wrk->y_expand);
180   assert(wrk->y_sub != 0);
181   if (wrk->y_accum == 0) {
182     if (x_out_max >= 4) {
183       __asm__ volatile (
184         "li       %[temp4],    0x10000                    \n\t"
185         "li       %[temp5],    0x8000                     \n\t"
186         "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
187       "1:                                                 \n\t"
188         "lw       %[temp0],    0(%[frow])                 \n\t"
189         "lw       %[temp1],    4(%[frow])                 \n\t"
190         "lw       %[temp2],    8(%[frow])                 \n\t"
191         "lw       %[temp3],    12(%[frow])                \n\t"
192         "addiu    %[dst],      %[dst],      4             \n\t"
193         "addiu    %[frow],     %[frow],     16            \n\t"
194         "mult     $ac0,        %[temp4],    %[temp5]      \n\t"
195         "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
196         "mult     $ac1,        %[temp4],    %[temp5]      \n\t"
197         "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
198         "mult     $ac2,        %[temp4],    %[temp5]      \n\t"
199         "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
200         "mult     $ac3,        %[temp4],    %[temp5]      \n\t"
201         "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
202         "mfhi     %[temp0],    $ac0                       \n\t"
203         "mfhi     %[temp1],    $ac1                       \n\t"
204         "mfhi     %[temp2],    $ac2                       \n\t"
205         "mfhi     %[temp3],    $ac3                       \n\t"
206         "sb       %[temp0],    -4(%[dst])                 \n\t"
207         "sb       %[temp1],    -3(%[dst])                 \n\t"
208         "sb       %[temp2],    -2(%[dst])                 \n\t"
209         "sb       %[temp3],    -1(%[dst])                 \n\t"
210         "bne      %[frow],     %[loop_end], 1b            \n\t"
211         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
212           [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
213           [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
214         : [temp7]"r"(temp7), [temp6]"r"(temp6)
215         : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
216           "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
217       );
218     }
219     for (i = 0; i < (x_out_max & 0x3); ++i) {
220       const uint32_t J = *frow++;
221       const int v = (int)MULT_FIX(J, wrk->fy_scale);
222       assert(v >= 0 && v <= 255);
223       *dst++ = v;
224     }
225   } else {
226     const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
227     const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
228     if (x_out_max >= 4) {
229       int temp8, temp9, temp10, temp11;
230       __asm__ volatile (
231         "li       %[temp8],    0x10000                    \n\t"
232         "li       %[temp9],    0x8000                     \n\t"
233         "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
234       "1:                                                 \n\t"
235         "lw       %[temp0],    0(%[frow])                 \n\t"
236         "lw       %[temp1],    4(%[frow])                 \n\t"
237         "lw       %[temp2],    8(%[frow])                 \n\t"
238         "lw       %[temp3],    12(%[frow])                \n\t"
239         "lw       %[temp4],    0(%[irow])                 \n\t"
240         "lw       %[temp5],    4(%[irow])                 \n\t"
241         "lw       %[temp10],   8(%[irow])                 \n\t"
242         "lw       %[temp11],   12(%[irow])                \n\t"
243         "addiu    %[dst],      %[dst],      4             \n\t"
244         "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
245         "maddu    $ac0,        %[A],        %[temp0]      \n\t"
246         "maddu    $ac0,        %[B],        %[temp4]      \n\t"
247         "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
248         "maddu    $ac1,        %[A],        %[temp1]      \n\t"
249         "maddu    $ac1,        %[B],        %[temp5]      \n\t"
250         "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
251         "maddu    $ac2,        %[A],        %[temp2]      \n\t"
252         "maddu    $ac2,        %[B],        %[temp10]     \n\t"
253         "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
254         "maddu    $ac3,        %[A],        %[temp3]      \n\t"
255         "maddu    $ac3,        %[B],        %[temp11]     \n\t"
256         "addiu    %[frow],     %[frow],     16            \n\t"
257         "addiu    %[irow],     %[irow],     16            \n\t"
258         "mfhi     %[temp0],    $ac0                       \n\t"
259         "mfhi     %[temp1],    $ac1                       \n\t"
260         "mfhi     %[temp2],    $ac2                       \n\t"
261         "mfhi     %[temp3],    $ac3                       \n\t"
262         "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
263         "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
264         "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
265         "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
266         "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
267         "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
268         "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
269         "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
270         "mfhi     %[temp0],    $ac0                       \n\t"
271         "mfhi     %[temp1],    $ac1                       \n\t"
272         "mfhi     %[temp2],    $ac2                       \n\t"
273         "mfhi     %[temp3],    $ac3                       \n\t"
274         "sb       %[temp0],    -4(%[dst])                 \n\t"
275         "sb       %[temp1],    -3(%[dst])                 \n\t"
276         "sb       %[temp2],    -2(%[dst])                 \n\t"
277         "sb       %[temp3],    -1(%[dst])                 \n\t"
278         "bne      %[frow],     %[loop_end], 1b            \n\t"
279         : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
280           [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
281           [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
282           [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
283           [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
284         : [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
285         : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
286           "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
287       );
288     }
289     for (i = 0; i < (x_out_max & 0x3); ++i) {
290       const uint64_t I = (uint64_t)A * *frow++
291                        + (uint64_t)B * *irow++;
292       const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
293       const int v = (int)MULT_FIX(J, wrk->fy_scale);
294       assert(v >= 0 && v <= 255);
295       *dst++ = v;
296     }
297   }
298 }
299 
300 #undef MULT_FIX_FLOOR
301 #undef MULT_FIX
302 #undef ROUNDER
303 
304 //------------------------------------------------------------------------------
305 // Entry point
306 
307 extern void WebPRescalerDspInitMIPSdspR2(void);
308 
WebPRescalerDspInitMIPSdspR2(void)309 WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
310   WebPRescalerExportRowExpand = ExportRowExpand_MIPSdspR2;
311 //  WebPRescalerExportRowShrink = ExportRowShrink_MIPSdspR2;
312 }
313 
314 #else  // !WEBP_USE_MIPS_DSP_R2
315 
316 WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)
317 
318 #endif  // WEBP_USE_MIPS_DSP_R2
319