1 /*
2  *  Fast C2P (Chunky-to-Planar) Conversion
3  *
4  *  Copyright (C) 2003-2008 Geert Uytterhoeven
5  *
6  *  NOTES:
7  *    - This code was inspired by Scout's C2P tutorial
8  *    - It assumes to run on a big endian system
9  *
10  *  This file is subject to the terms and conditions of the GNU General Public
11  *  License. See the file COPYING in the main directory of this archive
12  *  for more details.
13  */
14 
15 #include <linux/build_bug.h>
16 
17 
18     /*
19      *  Basic transpose step
20      */
21 
_transp(u32 d[],unsigned int i1,unsigned int i2,unsigned int shift,u32 mask)22 static inline void _transp(u32 d[], unsigned int i1, unsigned int i2,
23 			   unsigned int shift, u32 mask)
24 {
25 	u32 t = (d[i1] ^ (d[i2] >> shift)) & mask;
26 
27 	d[i1] ^= t;
28 	d[i2] ^= t << shift;
29 }
30 
31 
get_mask(unsigned int n)32 static __always_inline u32 get_mask(unsigned int n)
33 {
34 	switch (n) {
35 	case 1:
36 		return 0x55555555;
37 
38 	case 2:
39 		return 0x33333333;
40 
41 	case 4:
42 		return 0x0f0f0f0f;
43 
44 	case 8:
45 		return 0x00ff00ff;
46 
47 	case 16:
48 		return 0x0000ffff;
49 	}
50 
51 	BUILD_BUG();
52 	return 0;
53 }
54 
55 
56     /*
57      *  Transpose operations on 8 32-bit words
58      */
59 
transp8(u32 d[],unsigned int n,unsigned int m)60 static __always_inline void transp8(u32 d[], unsigned int n, unsigned int m)
61 {
62 	u32 mask = get_mask(n);
63 
64 	switch (m) {
65 	case 1:
66 		/* First n x 1 block */
67 		_transp(d, 0, 1, n, mask);
68 		/* Second n x 1 block */
69 		_transp(d, 2, 3, n, mask);
70 		/* Third n x 1 block */
71 		_transp(d, 4, 5, n, mask);
72 		/* Fourth n x 1 block */
73 		_transp(d, 6, 7, n, mask);
74 		return;
75 
76 	case 2:
77 		/* First n x 2 block */
78 		_transp(d, 0, 2, n, mask);
79 		_transp(d, 1, 3, n, mask);
80 		/* Second n x 2 block */
81 		_transp(d, 4, 6, n, mask);
82 		_transp(d, 5, 7, n, mask);
83 		return;
84 
85 	case 4:
86 		/* Single n x 4 block */
87 		_transp(d, 0, 4, n, mask);
88 		_transp(d, 1, 5, n, mask);
89 		_transp(d, 2, 6, n, mask);
90 		_transp(d, 3, 7, n, mask);
91 		return;
92 	}
93 
94 	BUILD_BUG();
95 }
96 
97 
98     /*
99      *  Transpose operations on 4 32-bit words
100      */
101 
transp4(u32 d[],unsigned int n,unsigned int m)102 static __always_inline void transp4(u32 d[], unsigned int n, unsigned int m)
103 {
104 	u32 mask = get_mask(n);
105 
106 	switch (m) {
107 	case 1:
108 		/* First n x 1 block */
109 		_transp(d, 0, 1, n, mask);
110 		/* Second n x 1 block */
111 		_transp(d, 2, 3, n, mask);
112 		return;
113 
114 	case 2:
115 		/* Single n x 2 block */
116 		_transp(d, 0, 2, n, mask);
117 		_transp(d, 1, 3, n, mask);
118 		return;
119 	}
120 
121 	BUILD_BUG();
122 }
123 
124 
125     /*
126      *  Transpose operations on 4 32-bit words (reverse order)
127      */
128 
transp4x(u32 d[],unsigned int n,unsigned int m)129 static __always_inline void transp4x(u32 d[], unsigned int n, unsigned int m)
130 {
131 	u32 mask = get_mask(n);
132 
133 	switch (m) {
134 	case 2:
135 		/* Single n x 2 block */
136 		_transp(d, 2, 0, n, mask);
137 		_transp(d, 3, 1, n, mask);
138 		return;
139 	}
140 
141 	BUILD_BUG();
142 }
143 
144 
145     /*
146      *  Compose two values, using a bitmask as decision value
147      *  This is equivalent to (a & mask) | (b & ~mask)
148      */
149 
comp(u32 a,u32 b,u32 mask)150 static inline u32 comp(u32 a, u32 b, u32 mask)
151 {
152 	return ((a ^ b) & mask) ^ b;
153 }
154