| /* ---------------------------------------------------------------------- |
| * Project: CMSIS DSP Library |
| * Title: arm_biquad_cascade_df1_fast_q15.c |
| * Description: Fast processing function for the Q15 Biquad cascade filter |
| * |
| * $Date: 18. March 2019 |
| * $Revision: V1.6.0 |
| * |
| * Target Processor: Cortex-M cores |
| * -------------------------------------------------------------------- */ |
| /* |
| * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved. |
| * |
| * SPDX-License-Identifier: Apache-2.0 |
| * |
| * Licensed under the Apache License, Version 2.0 (the License); you may |
| * not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "arm_math.h" |
| |
| /** |
| @ingroup groupFilters |
| */ |
| |
| /** |
| @addtogroup BiquadCascadeDF1 |
| @{ |
| */ |
| |
| /** |
| @brief Processing function for the Q15 Biquad cascade filter (fast variant). |
| @param[in] S points to an instance of the Q15 Biquad cascade structure |
| @param[in] pSrc points to the block of input data |
| @param[out] pDst points to the block of output data |
| @param[in] blockSize number of samples to process per call |
| @return none |
| |
| @par Scaling and Overflow Behavior |
| This fast version uses a 32-bit accumulator with 2.30 format. |
| The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit. |
| Thus, if the accumulator result overflows it wraps around and distorts the result. |
| In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25). |
| The 2.30 accumulator is then shifted by <code>postShift</code> bits and the result truncated to 1.15 format by discarding the low 16 bits. |
| @remark |
| Refer to \ref arm_biquad_cascade_df1_q15() for a slower implementation of this function |
| which uses 64-bit accumulation to avoid wrap around distortion. Both the slow and the fast versions use the same instance structure. |
| Use the function \ref arm_biquad_cascade_df1_init_q15() to initialize the filter structure. |
| */ |
| |
| void arm_biquad_cascade_df1_fast_q15( |
| const arm_biquad_casd_df1_inst_q15 * S, |
| const q15_t * pSrc, |
| q15_t * pDst, |
| uint32_t blockSize) |
| { |
| const q15_t *pIn = pSrc; /* Source pointer */ |
| q15_t *pOut = pDst; /* Destination pointer */ |
| q15_t *pState = S->pState; /* State pointer */ |
| const q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ |
| q31_t acc; /* Accumulator */ |
| q31_t in; /* Temporary variable to hold input value */ |
| q31_t out; /* Temporary variable to hold output value */ |
| q31_t b0; /* Temporary variable to hold bo value */ |
| q31_t b1, a1; /* Filter coefficients */ |
| q31_t state_in, state_out; /* Filter state variables */ |
| int32_t shift = (int32_t) (15 - S->postShift); /* Post shift */ |
| uint32_t sample, stage = S->numStages; /* Loop counters */ |
| |
| do |
| { |
| /* Read the b0 and 0 coefficients using SIMD */ |
| b0 = read_q15x2_ia ((q15_t **) &pCoeffs); |
| |
| /* Read the b1 and b2 coefficients using SIMD */ |
| b1 = read_q15x2_ia ((q15_t **) &pCoeffs); |
| |
| /* Read the a1 and a2 coefficients using SIMD */ |
| a1 = read_q15x2_ia ((q15_t **) &pCoeffs); |
| |
| /* Read the input state values from the state buffer: x[n-1], x[n-2] */ |
| state_in = read_q15x2_ia (&pState); |
| |
| /* Read the output state values from the state buffer: y[n-1], y[n-2] */ |
| state_out = read_q15x2_da (&pState); |
| |
| #if defined (ARM_MATH_LOOPUNROLL) |
| |
| /* Apply loop unrolling and compute 2 output values simultaneously. */ |
| /* Variable acc hold output values that are being computed: |
| * |
| * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] |
| * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] |
| */ |
| |
| /* Loop unrolling: Compute 2 outputs at a time */ |
| sample = blockSize >> 1U; |
| |
| while (sample > 0U) |
| { |
| |
| /* Read the input */ |
| in = read_q15x2_ia ((q15_t **) &pIn); |
| |
| /* out = b0 * x[n] + 0 * 0 */ |
| out = __SMUAD(b0, in); |
| /* acc = b1 * x[n-1] + acc += b2 * x[n-2] + out */ |
| acc = __SMLAD(b1, state_in, out); |
| /* acc += a1 * y[n-1] + acc += a2 * y[n-2] */ |
| acc = __SMLAD(a1, state_out, acc); |
| |
| /* The result is converted from 3.29 to 1.31 and then saturation is applied */ |
| out = __SSAT((acc >> shift), 16); |
| |
| /* Every time after the output is computed state should be updated. */ |
| /* The states should be updated as: */ |
| /* Xn2 = Xn1 */ |
| /* Xn1 = Xn */ |
| /* Yn2 = Yn1 */ |
| /* Yn1 = acc */ |
| /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ |
| /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ |
| |
| #ifndef ARM_MATH_BIG_ENDIAN |
| state_in = __PKHBT(in, state_in, 16); |
| state_out = __PKHBT(out, state_out, 16); |
| #else |
| state_in = __PKHBT(state_in >> 16, (in >> 16), 16); |
| state_out = __PKHBT(state_out >> 16, (out), 16); |
| #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
| |
| /* out = b0 * x[n] + 0 * 0 */ |
| out = __SMUADX(b0, in); |
| /* acc0 = b1 * x[n-1] , acc0 += b2 * x[n-2] + out */ |
| acc = __SMLAD(b1, state_in, out); |
| /* acc += a1 * y[n-1] + acc += a2 * y[n-2] */ |
| acc = __SMLAD(a1, state_out, acc); |
| |
| /* The result is converted from 3.29 to 1.31 and then saturation is applied */ |
| out = __SSAT((acc >> shift), 16); |
| |
| /* Store the output in the destination buffer. */ |
| #ifndef ARM_MATH_BIG_ENDIAN |
| write_q15x2_ia (&pOut, __PKHBT(state_out, out, 16)); |
| #else |
| write_q15x2_ia (&pOut, __PKHBT(out, state_out >> 16, 16)); |
| #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
| |
| /* Every time after the output is computed state should be updated. */ |
| /* The states should be updated as: */ |
| /* Xn2 = Xn1 */ |
| /* Xn1 = Xn */ |
| /* Yn2 = Yn1 */ |
| /* Yn1 = acc */ |
| /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ |
| /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ |
| #ifndef ARM_MATH_BIG_ENDIAN |
| state_in = __PKHBT(in >> 16, state_in, 16); |
| state_out = __PKHBT(out, state_out, 16); |
| #else |
| state_in = __PKHBT(state_in >> 16, in, 16); |
| state_out = __PKHBT(state_out >> 16, out, 16); |
| #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
| |
| /* Decrement loop counter */ |
| sample--; |
| } |
| |
| /* Loop unrolling: Compute remaining outputs */ |
| sample = (blockSize & 0x1U); |
| |
| #else |
| |
| /* Initialize blkCnt with number of samples */ |
| sample = blockSize; |
| |
| #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ |
| |
| while (sample > 0U) |
| { |
| /* Read the input */ |
| in = *pIn++; |
| |
| /* out = b0 * x[n] + 0 * 0 */ |
| #ifndef ARM_MATH_BIG_ENDIAN |
| out = __SMUAD(b0, in); |
| #else |
| out = __SMUADX(b0, in); |
| #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
| |
| /* acc = b1 * x[n-1], acc += b2 * x[n-2] + out */ |
| acc = __SMLAD(b1, state_in, out); |
| /* acc += a1 * y[n-1] + acc += a2 * y[n-2] */ |
| acc = __SMLAD(a1, state_out, acc); |
| |
| /* The result is converted from 3.29 to 1.31 and then saturation is applied */ |
| out = __SSAT((acc >> shift), 16); |
| |
| /* Store the output in the destination buffer. */ |
| *pOut++ = (q15_t) out; |
| |
| /* Every time after the output is computed state should be updated. */ |
| /* The states should be updated as: */ |
| /* Xn2 = Xn1 */ |
| /* Xn1 = Xn */ |
| /* Yn2 = Yn1 */ |
| /* Yn1 = acc */ |
| /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ |
| /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ |
| #ifndef ARM_MATH_BIG_ENDIAN |
| state_in = __PKHBT(in, state_in, 16); |
| state_out = __PKHBT(out, state_out, 16); |
| #else |
| state_in = __PKHBT(state_in >> 16, in, 16); |
| state_out = __PKHBT(state_out >> 16, out, 16); |
| #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ |
| |
| /* decrement loop counter */ |
| sample--; |
| } |
| |
| /* The first stage goes from the input buffer to the output buffer. */ |
| /* Subsequent (numStages - 1) occur in-place in the output buffer */ |
| pIn = pDst; |
| |
| /* Reset the output pointer */ |
| pOut = pDst; |
| |
| /* Store the updated state variables back into the state array */ |
| write_q15x2_ia(&pState, state_in); |
| write_q15x2_ia(&pState, state_out); |
| |
| /* Decrement loop counter */ |
| stage--; |
| |
| } while (stage > 0U); |
| } |
| |
| /** |
| @} end of BiquadCascadeDF1 group |
| */ |