1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Hantro VP9 codec driver
4 *
5 * Copyright (C) 2021 Collabora Ltd.
6 */
7
8 #include <linux/types.h>
9 #include <media/v4l2-mem2mem.h>
10
11 #include "hantro.h"
12 #include "hantro_hw.h"
13 #include "hantro_vp9.h"
14
15 #define POW2(x) (1 << (x))
16
17 #define MAX_LOG2_TILE_COLUMNS 6
18 #define MAX_NUM_TILE_COLS POW2(MAX_LOG2_TILE_COLUMNS)
19 #define MAX_TILE_COLS 20
20 #define MAX_TILE_ROWS 22
21
hantro_vp9_tile_filter_size(unsigned int height)22 static size_t hantro_vp9_tile_filter_size(unsigned int height)
23 {
24 u32 h, height32, size;
25
26 h = roundup(height, 8);
27
28 height32 = roundup(h, 64);
29 size = 24 * height32 * (MAX_NUM_TILE_COLS - 1); /* luma: 8, chroma: 8 + 8 */
30
31 return size;
32 }
33
hantro_vp9_bsd_control_size(unsigned int height)34 static size_t hantro_vp9_bsd_control_size(unsigned int height)
35 {
36 u32 h, height32;
37
38 h = roundup(height, 8);
39 height32 = roundup(h, 64);
40
41 return 16 * (height32 / 4) * (MAX_NUM_TILE_COLS - 1);
42 }
43
hantro_vp9_segment_map_size(unsigned int width,unsigned int height)44 static size_t hantro_vp9_segment_map_size(unsigned int width, unsigned int height)
45 {
46 u32 w, h;
47 int num_ctbs;
48
49 w = roundup(width, 8);
50 h = roundup(height, 8);
51 num_ctbs = ((w + 63) / 64) * ((h + 63) / 64);
52
53 return num_ctbs * 32;
54 }
55
hantro_vp9_prob_tab_size(void)56 static inline size_t hantro_vp9_prob_tab_size(void)
57 {
58 return roundup(sizeof(struct hantro_g2_all_probs), 16);
59 }
60
hantro_vp9_count_tab_size(void)61 static inline size_t hantro_vp9_count_tab_size(void)
62 {
63 return roundup(sizeof(struct symbol_counts), 16);
64 }
65
hantro_vp9_tile_info_size(void)66 static inline size_t hantro_vp9_tile_info_size(void)
67 {
68 return roundup((MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 15 + 16) & ~0xf, 16);
69 }
70
get_coeffs_arr(struct symbol_counts * cnts,int i,int j,int k,int l,int m)71 static void *get_coeffs_arr(struct symbol_counts *cnts, int i, int j, int k, int l, int m)
72 {
73 if (i == 0)
74 return &cnts->count_coeffs[j][k][l][m];
75
76 if (i == 1)
77 return &cnts->count_coeffs8x8[j][k][l][m];
78
79 if (i == 2)
80 return &cnts->count_coeffs16x16[j][k][l][m];
81
82 if (i == 3)
83 return &cnts->count_coeffs32x32[j][k][l][m];
84
85 return NULL;
86 }
87
get_eobs1(struct symbol_counts * cnts,int i,int j,int k,int l,int m)88 static void *get_eobs1(struct symbol_counts *cnts, int i, int j, int k, int l, int m)
89 {
90 if (i == 0)
91 return &cnts->count_coeffs[j][k][l][m][3];
92
93 if (i == 1)
94 return &cnts->count_coeffs8x8[j][k][l][m][3];
95
96 if (i == 2)
97 return &cnts->count_coeffs16x16[j][k][l][m][3];
98
99 if (i == 3)
100 return &cnts->count_coeffs32x32[j][k][l][m][3];
101
102 return NULL;
103 }
104
105 #define INNER_LOOP \
106 do { \
107 for (m = 0; m < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0][0]); ++m) { \
108 vp9_ctx->cnts.coeff[i][j][k][l][m] = \
109 get_coeffs_arr(cnts, i, j, k, l, m); \
110 vp9_ctx->cnts.eob[i][j][k][l][m][0] = \
111 &cnts->count_eobs[i][j][k][l][m]; \
112 vp9_ctx->cnts.eob[i][j][k][l][m][1] = \
113 get_eobs1(cnts, i, j, k, l, m); \
114 } \
115 } while (0)
116
init_v4l2_vp9_count_tbl(struct hantro_ctx * ctx)117 static void init_v4l2_vp9_count_tbl(struct hantro_ctx *ctx)
118 {
119 struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
120 struct symbol_counts *cnts = vp9_ctx->misc.cpu + vp9_ctx->ctx_counters_offset;
121 int i, j, k, l, m;
122
123 vp9_ctx->cnts.partition = &cnts->partition_counts;
124 vp9_ctx->cnts.skip = &cnts->mbskip_count;
125 vp9_ctx->cnts.intra_inter = &cnts->intra_inter_count;
126 vp9_ctx->cnts.tx32p = &cnts->tx32x32_count;
127 /*
128 * g2 hardware uses tx16x16_count[2][3], while the api
129 * expects tx16p[2][4], so this must be explicitly copied
130 * into vp9_ctx->cnts.tx16p when passing the data to the
131 * vp9 library function
132 */
133 vp9_ctx->cnts.tx8p = &cnts->tx8x8_count;
134
135 vp9_ctx->cnts.y_mode = &cnts->sb_ymode_counts;
136 vp9_ctx->cnts.uv_mode = &cnts->uv_mode_counts;
137 vp9_ctx->cnts.comp = &cnts->comp_inter_count;
138 vp9_ctx->cnts.comp_ref = &cnts->comp_ref_count;
139 vp9_ctx->cnts.single_ref = &cnts->single_ref_count;
140 vp9_ctx->cnts.filter = &cnts->switchable_interp_counts;
141 vp9_ctx->cnts.mv_joint = &cnts->mv_counts.joints;
142 vp9_ctx->cnts.sign = &cnts->mv_counts.sign;
143 vp9_ctx->cnts.classes = &cnts->mv_counts.classes;
144 vp9_ctx->cnts.class0 = &cnts->mv_counts.class0;
145 vp9_ctx->cnts.bits = &cnts->mv_counts.bits;
146 vp9_ctx->cnts.class0_fp = &cnts->mv_counts.class0_fp;
147 vp9_ctx->cnts.fp = &cnts->mv_counts.fp;
148 vp9_ctx->cnts.class0_hp = &cnts->mv_counts.class0_hp;
149 vp9_ctx->cnts.hp = &cnts->mv_counts.hp;
150
151 for (i = 0; i < ARRAY_SIZE(vp9_ctx->cnts.coeff); ++i)
152 for (j = 0; j < ARRAY_SIZE(vp9_ctx->cnts.coeff[i]); ++j)
153 for (k = 0; k < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0]); ++k)
154 for (l = 0; l < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0]); ++l)
155 INNER_LOOP;
156 }
157
hantro_vp9_dec_init(struct hantro_ctx * ctx)158 int hantro_vp9_dec_init(struct hantro_ctx *ctx)
159 {
160 struct hantro_dev *vpu = ctx->dev;
161 const struct hantro_variant *variant = vpu->variant;
162 struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec;
163 struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge;
164 struct hantro_aux_buf *segment_map = &vp9_dec->segment_map;
165 struct hantro_aux_buf *misc = &vp9_dec->misc;
166 u32 i, max_width, max_height, size;
167
168 if (variant->num_dec_fmts < 1)
169 return -EINVAL;
170
171 for (i = 0; i < variant->num_dec_fmts; ++i)
172 if (variant->dec_fmts[i].fourcc == V4L2_PIX_FMT_VP9_FRAME)
173 break;
174
175 if (i == variant->num_dec_fmts)
176 return -EINVAL;
177
178 max_width = vpu->variant->dec_fmts[i].frmsize.max_width;
179 max_height = vpu->variant->dec_fmts[i].frmsize.max_height;
180
181 size = hantro_vp9_tile_filter_size(max_height);
182 vp9_dec->bsd_ctrl_offset = size;
183 size += hantro_vp9_bsd_control_size(max_height);
184
185 tile_edge->cpu = dma_alloc_coherent(vpu->dev, size, &tile_edge->dma, GFP_KERNEL);
186 if (!tile_edge->cpu)
187 return -ENOMEM;
188
189 tile_edge->size = size;
190 memset(tile_edge->cpu, 0, size);
191
192 size = hantro_vp9_segment_map_size(max_width, max_height);
193 vp9_dec->segment_map_size = size;
194 size *= 2; /* we need two areas of this size, used alternately */
195
196 segment_map->cpu = dma_alloc_coherent(vpu->dev, size, &segment_map->dma, GFP_KERNEL);
197 if (!segment_map->cpu)
198 goto err_segment_map;
199
200 segment_map->size = size;
201 memset(segment_map->cpu, 0, size);
202
203 size = hantro_vp9_prob_tab_size();
204 vp9_dec->ctx_counters_offset = size;
205 size += hantro_vp9_count_tab_size();
206 vp9_dec->tile_info_offset = size;
207 size += hantro_vp9_tile_info_size();
208
209 misc->cpu = dma_alloc_coherent(vpu->dev, size, &misc->dma, GFP_KERNEL);
210 if (!misc->cpu)
211 goto err_misc;
212
213 misc->size = size;
214 memset(misc->cpu, 0, size);
215
216 init_v4l2_vp9_count_tbl(ctx);
217
218 return 0;
219
220 err_misc:
221 dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma);
222
223 err_segment_map:
224 dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma);
225
226 return -ENOMEM;
227 }
228
hantro_vp9_dec_exit(struct hantro_ctx * ctx)229 void hantro_vp9_dec_exit(struct hantro_ctx *ctx)
230 {
231 struct hantro_dev *vpu = ctx->dev;
232 struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec;
233 struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge;
234 struct hantro_aux_buf *segment_map = &vp9_dec->segment_map;
235 struct hantro_aux_buf *misc = &vp9_dec->misc;
236
237 dma_free_coherent(vpu->dev, misc->size, misc->cpu, misc->dma);
238 dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma);
239 dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma);
240 }
241