1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_rfft_q15.c
4  * Description:  RFFT & RIFFT Q15 process function
5  *
6  * $Date:        27. January 2017
7  * $Revision:    V.1.5.1
8  *
9  * Target Processor: Cortex-M cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "arm_math.h"
30 
31 /* ----------------------------------------------------------------------
32  * Internal functions prototypes
33  * -------------------------------------------------------------------- */
34 
35 void arm_split_rfft_q15(
36     q15_t * pSrc,
37     uint32_t fftLen,
38     q15_t * pATable,
39     q15_t * pBTable,
40     q15_t * pDst,
41     uint32_t modifier);
42 
43 void arm_split_rifft_q15(
44     q15_t * pSrc,
45     uint32_t fftLen,
46     q15_t * pATable,
47     q15_t * pBTable,
48     q15_t * pDst,
49     uint32_t modifier);
50 
51 /**
52 * @addtogroup RealFFT
53 * @{
54 */
55 
56 /**
57 * @brief Processing function for the Q15 RFFT/RIFFT.
58 * @param[in]  *S    points to an instance of the Q15 RFFT/RIFFT structure.
59 * @param[in]  *pSrc points to the input buffer.
60 * @param[out] *pDst points to the output buffer.
61 * @return none.
62 *
63 * \par Input an output formats:
64 * \par
65 * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
66 * Hence the output format is different for different RFFT sizes.
67 * The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
68 * \par
69 * \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"
70 * \par
71 * \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"
72 */
73 
arm_rfft_q15(const arm_rfft_instance_q15 * S,q15_t * pSrc,q15_t * pDst)74 void arm_rfft_q15(
75     const arm_rfft_instance_q15 * S,
76     q15_t * pSrc,
77     q15_t * pDst)
78 {
79     const arm_cfft_instance_q15 *S_CFFT = S->pCfft;
80     uint32_t i;
81     uint32_t L2 = S->fftLenReal >> 1;
82 
83     /* Calculation of RIFFT of input */
84     if (S->ifftFlagR == 1u)
85     {
86         /*  Real IFFT core process */
87         arm_split_rifft_q15(pSrc, L2, S->pTwiddleAReal,
88                             S->pTwiddleBReal, pDst, S->twidCoefRModifier);
89 
90         /* Complex IFFT process */
91         arm_cfft_q15(S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
92 
93         for(i=0;i<S->fftLenReal;i++)
94         {
95             pDst[i] = pDst[i] << 1;
96         }
97     }
98     else
99     {
100         /* Calculation of RFFT of input */
101 
102         /* Complex FFT process */
103         arm_cfft_q15(S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
104 
105         /*  Real FFT core process */
106         arm_split_rfft_q15(pSrc, L2, S->pTwiddleAReal,
107                             S->pTwiddleBReal, pDst, S->twidCoefRModifier);
108     }
109 }
110 
111 /**
112 * @} end of RealFFT group
113 */
114 
115 /**
116 * @brief  Core Real FFT process
117 * @param  *pSrc 				points to the input buffer.
118 * @param  fftLen  				length of FFT.
119 * @param  *pATable 			points to the A twiddle Coef buffer.
120 * @param  *pBTable 			points to the B twiddle Coef buffer.
121 * @param  *pDst 				points to the output buffer.
122 * @param  modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
123 * @return none.
124 * The function implements a Real FFT
125 */
126 
arm_split_rfft_q15(q15_t * pSrc,uint32_t fftLen,q15_t * pATable,q15_t * pBTable,q15_t * pDst,uint32_t modifier)127 void arm_split_rfft_q15(
128     q15_t * pSrc,
129     uint32_t fftLen,
130     q15_t * pATable,
131     q15_t * pBTable,
132     q15_t * pDst,
133     uint32_t modifier)
134 {
135     uint32_t i;                                    /* Loop Counter */
136     q31_t outR, outI;                              /* Temporary variables for output */
137     q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
138     q15_t *pSrc1, *pSrc2;
139 #if defined (ARM_MATH_DSP)
140     q15_t *pD1, *pD2;
141 #endif
142 
143     //  pSrc[2u * fftLen] = pSrc[0];
144     //  pSrc[(2u * fftLen) + 1u] = pSrc[1];
145 
146     pCoefA = &pATable[modifier * 2u];
147     pCoefB = &pBTable[modifier * 2u];
148 
149     pSrc1 = &pSrc[2];
150     pSrc2 = &pSrc[(2u * fftLen) - 2u];
151 
152 #if defined (ARM_MATH_DSP)
153 
154     /* Run the below code for Cortex-M4 and Cortex-M3 */
155     i = 1u;
156     pD1 = pDst + 2;
157     pD2 = pDst + (4u * fftLen) - 2;
158 
159     for(i = fftLen - 1; i > 0; i--)
160     {
161         /*
162         outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
163         + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
164         pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
165         */
166 
167         /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
168         pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
169         pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
170 
171 
172 #ifndef ARM_MATH_BIG_ENDIAN
173 
174         /* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */
175         outR = __SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA));
176 
177 #else
178 
179         /* -(pSrc[2 * i + 1] * pATable[2 * i + 1] - pSrc[2 * i] * pATable[2 * i]) */
180         outR = -(__SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA)));
181 
182 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
183 
184         /* pSrc[2 * n - 2 * i] * pBTable[2 * i] +
185         pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
186         outR = __SMLAD(*__SIMD32(pSrc2), *__SIMD32(pCoefB), outR) >> 16u;
187 
188         /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
189         pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
190 
191 #ifndef ARM_MATH_BIG_ENDIAN
192 
193         outI = __SMUSDX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
194 
195 #else
196 
197         outI = __SMUSDX(*__SIMD32(pCoefB), *__SIMD32(pSrc2)--);
198 
199 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
200 
201         /* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */
202         outI = __SMLADX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), outI);
203 
204         /* write output */
205         *pD1++ = (q15_t) outR;
206         *pD1++ = outI >> 16u;
207 
208         /* write complex conjugate output */
209         pD2[0] = (q15_t) outR;
210         pD2[1] = -(outI >> 16u);
211         pD2 -= 2;
212 
213         /* update coefficient pointer */
214         pCoefB = pCoefB + (2u * modifier);
215         pCoefA = pCoefA + (2u * modifier);
216     }
217 
218     pDst[2u * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
219     pDst[(2u * fftLen) + 1u] = 0;
220 
221     pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
222     pDst[1] = 0;
223 
224 #else
225 
226     /* Run the below code for Cortex-M0 */
227     i = 1u;
228 
229     while (i < fftLen)
230     {
231         /*
232         outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
233         + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
234         pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
235         */
236 
237         outR = *pSrc1 * *pCoefA;
238         outR = outR - (*(pSrc1 + 1) * *(pCoefA + 1));
239         outR = outR + (*pSrc2 * *pCoefB);
240         outR = (outR + (*(pSrc2 + 1) * *(pCoefB + 1))) >> 16;
241 
242 
243         /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
244         pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
245         pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
246         */
247 
248         outI = *pSrc2 * *(pCoefB + 1);
249         outI = outI - (*(pSrc2 + 1) * *pCoefB);
250         outI = outI + (*(pSrc1 + 1) * *pCoefA);
251         outI = outI + (*pSrc1 * *(pCoefA + 1));
252 
253         /* update input pointers */
254         pSrc1 += 2u;
255         pSrc2 -= 2u;
256 
257         /* write output */
258         pDst[2u * i] = (q15_t) outR;
259         pDst[(2u * i) + 1u] = outI >> 16u;
260 
261         /* write complex conjugate output */
262         pDst[(4u * fftLen) - (2u * i)] = (q15_t) outR;
263         pDst[((4u * fftLen) - (2u * i)) + 1u] = -(outI >> 16u);
264 
265         /* update coefficient pointer */
266         pCoefB = pCoefB + (2u * modifier);
267         pCoefA = pCoefA + (2u * modifier);
268 
269         i++;
270     }
271 
272     pDst[2u * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
273     pDst[(2u * fftLen) + 1u] = 0;
274 
275     pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
276     pDst[1] = 0;
277 
278 #endif /* #if defined (ARM_MATH_DSP) */
279 }
280 
281 
282 /**
283 * @brief  Core Real IFFT process
284 * @param[in]   *pSrc 				points to the input buffer.
285 * @param[in]   fftLen  		    length of FFT.
286 * @param[in]   *pATable 			points to the twiddle Coef A buffer.
287 * @param[in]   *pBTable 			points to the twiddle Coef B buffer.
288 * @param[out]  *pDst 				points to the output buffer.
289 * @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
290 * @return none.
291 * The function implements a Real IFFT
292 */
arm_split_rifft_q15(q15_t * pSrc,uint32_t fftLen,q15_t * pATable,q15_t * pBTable,q15_t * pDst,uint32_t modifier)293 void arm_split_rifft_q15(
294     q15_t * pSrc,
295     uint32_t fftLen,
296     q15_t * pATable,
297     q15_t * pBTable,
298     q15_t * pDst,
299     uint32_t modifier)
300 {
301     uint32_t i;                                    /* Loop Counter */
302     q31_t outR, outI;                              /* Temporary variables for output */
303     q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
304     q15_t *pSrc1, *pSrc2;
305     q15_t *pDst1 = &pDst[0];
306 
307     pCoefA = &pATable[0];
308     pCoefB = &pBTable[0];
309 
310     pSrc1 = &pSrc[0];
311     pSrc2 = &pSrc[2u * fftLen];
312 
313 #if defined (ARM_MATH_DSP)
314 
315     /* Run the below code for Cortex-M4 and Cortex-M3 */
316     i = fftLen;
317 
318     while (i > 0u)
319     {
320         /*
321         outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
322         pIn[2 * n - 2 * i] * pBTable[2 * i] -
323         pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
324 
325         outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
326         pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
327         pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
328         */
329 
330 
331 #ifndef ARM_MATH_BIG_ENDIAN
332 
333         /* pIn[2 * n - 2 * i] * pBTable[2 * i] -
334         pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
335         outR = __SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB));
336 
337 #else
338 
339         /* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] +
340         pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */
341         outR = -(__SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB)));
342 
343 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
344 
345         /* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
346         pIn[2 * n - 2 * i] * pBTable[2 * i] */
347         outR = __SMLAD(*__SIMD32(pSrc1), *__SIMD32(pCoefA), outR) >> 16u;
348 
349         /*
350         -pIn[2 * n - 2 * i] * pBTable[2 * i + 1] +
351         pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
352         outI = __SMUADX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
353 
354         /* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */
355 
356 #ifndef ARM_MATH_BIG_ENDIAN
357 
358         outI = __SMLSDX(*__SIMD32(pCoefA), *__SIMD32(pSrc1)++, -outI);
359 
360 #else
361 
362         outI = __SMLSDX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), -outI);
363 
364 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
365         /* write output */
366 
367 #ifndef ARM_MATH_BIG_ENDIAN
368 
369         *__SIMD32(pDst1)++ = __PKHBT(outR, (outI >> 16u), 16);
370 
371 #else
372 
373         *__SIMD32(pDst1)++ = __PKHBT((outI >> 16u), outR, 16);
374 
375 #endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
376 
377         /* update coefficient pointer */
378         pCoefB = pCoefB + (2u * modifier);
379         pCoefA = pCoefA + (2u * modifier);
380 
381         i--;
382     }
383 #else
384     /* Run the below code for Cortex-M0 */
385     i = fftLen;
386 
387     while (i > 0u)
388     {
389         /*
390         outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
391         pIn[2 * n - 2 * i] * pBTable[2 * i] -
392         pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
393         */
394 
395         outR = *pSrc2 * *pCoefB;
396         outR = outR - (*(pSrc2 + 1) * *(pCoefB + 1));
397         outR = outR + (*pSrc1 * *pCoefA);
398         outR = (outR + (*(pSrc1 + 1) * *(pCoefA + 1))) >> 16;
399 
400         /*
401         outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
402         pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
403         pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
404         */
405 
406         outI = *(pSrc1 + 1) * *pCoefA;
407         outI = outI - (*pSrc1 * *(pCoefA + 1));
408         outI = outI - (*pSrc2 * *(pCoefB + 1));
409         outI = outI - (*(pSrc2 + 1) * *(pCoefB));
410 
411         /* update input pointers */
412         pSrc1 += 2u;
413         pSrc2 -= 2u;
414 
415         /* write output */
416         *pDst1++ = (q15_t) outR;
417         *pDst1++ = (q15_t) (outI >> 16);
418 
419         /* update coefficient pointer */
420         pCoefB = pCoefB + (2u * modifier);
421         pCoefA = pCoefA + (2u * modifier);
422 
423         i--;
424     }
425 #endif /* #if defined (ARM_MATH_DSP) */
426 }
427