| #include "ref.h" |
| |
| void ref_conv_f32( |
| float32_t * pSrcA, |
| uint32_t srcALen, |
| float32_t * pSrcB, |
| uint32_t srcBLen, |
| float32_t * pDst) |
| { |
| float32_t sum; /* Accumulator */ |
| uint32_t i, j; /* loop counters */ |
| |
| /* Loop to calculate convolution for output length number of times */ |
| for (i = 0; i < srcALen + srcBLen - 1; i++) |
| { |
| /* Initialize sum with zero to carry out MAC operations */ |
| sum = 0.0f; |
| |
| /* Loop to perform MAC operations according to convolution equation */ |
| for (j = 0; j <= i; j++) |
| { |
| /* Check the array limitations */ |
| if ((i - j < srcBLen) && (j < srcALen)) |
| { |
| /* z[i] += x[i-j] * y[j] */ |
| sum += pSrcB[i - j] * pSrcA[j]; |
| } |
| } |
| /* Store the output in the destination buffer */ |
| pDst[i] = sum; |
| } |
| } |
| |
| arm_status ref_conv_partial_f32( |
| float32_t * pSrcA, |
| uint32_t srcALen, |
| float32_t * pSrcB, |
| uint32_t srcBLen, |
| float32_t * pDst, |
| uint32_t firstIndex, |
| uint32_t numPoints) |
| { |
| ref_conv_f32(pSrcA,srcALen,pSrcB,srcBLen,pDst); |
| |
| return ARM_MATH_SUCCESS; |
| } |
| |
| void ref_conv_q31( |
| q31_t * pSrcA, |
| uint32_t srcALen, |
| q31_t * pSrcB, |
| uint32_t srcBLen, |
| q31_t * pDst) |
| { |
| q63_t sum; /* Accumulator */ |
| uint32_t i, j; /* loop counter */ |
| |
| /* Loop to calculate output of convolution for output length number of times */ |
| for (i = 0; i < srcALen + srcBLen - 1; i++) |
| { |
| /* Initialize sum with zero to carry on MAC operations */ |
| sum = 0; |
| |
| /* Loop to perform MAC operations according to convolution equation */ |
| for (j = 0; j <= i; j++) |
| { |
| /* Check the array limitations */ |
| if ((i - j < srcBLen) && (j < srcALen)) |
| { |
| /* z[i] += x[i-j] * y[j] */ |
| sum += (q63_t) pSrcA[j] * (pSrcB[i - j]); |
| } |
| } |
| |
| /* Store the output in the destination buffer */ |
| pDst[i] = (q31_t)(sum >> 31U); |
| } |
| } |
| |
| void ref_conv_fast_q31( |
| q31_t * pSrcA, |
| uint32_t srcALen, |
| q31_t * pSrcB, |
| uint32_t srcBLen, |
| q31_t * pDst) |
| { |
| q31_t sum; /* Accumulator */ |
| uint32_t i, j; /* loop counter */ |
| |
| /* Loop to calculate output of convolution for output length number of times */ |
| for (i = 0; i < srcALen + srcBLen - 1; i++) |
| { |
| /* Initialize sum with zero to carry on MAC operations */ |
| sum = 0; |
| |
| /* Loop to perform MAC operations according to convolution equation */ |
| for (j = 0; j <= i; j++) |
| { |
| /* Check the array limitations */ |
| if ((i - j < srcBLen) && (j < srcALen)) |
| { |
| /* z[i] += x[i-j] * y[j] */ |
| sum = (q31_t) ((((q63_t)sum << 32) + |
| ((q63_t)pSrcA[j] * pSrcB[i - j])) >> 32); |
| } |
| } |
| |
| /* Store the output in the destination buffer */ |
| pDst[i] = (q31_t)(sum << 1U); |
| } |
| } |
| |
| arm_status ref_conv_partial_q31( |
| q31_t * pSrcA, |
| uint32_t srcALen, |
| q31_t * pSrcB, |
| uint32_t srcBLen, |
| q31_t * pDst, |
| uint32_t firstIndex, |
| uint32_t numPoints) |
| { |
| ref_conv_q31(pSrcA,srcALen,pSrcB,srcBLen,pDst); |
| |
| return ARM_MATH_SUCCESS; |
| } |
| |
| arm_status ref_conv_partial_fast_q31( |
| q31_t * pSrcA, |
| uint32_t srcALen, |
| q31_t * pSrcB, |
| uint32_t srcBLen, |
| q31_t * pDst, |
| uint32_t firstIndex, |
| uint32_t numPoints) |
| { |
| ref_conv_fast_q31(pSrcA,srcALen,pSrcB,srcBLen,pDst); |
| |
| return ARM_MATH_SUCCESS; |
| } |
| |
| void ref_conv_q15( |
| q15_t * pSrcA, |
| uint32_t srcALen, |
| q15_t * pSrcB, |
| uint32_t srcBLen, |
| q15_t * pDst) |
| { |
| q63_t sum; /* Accumulator */ |
| uint32_t i, j; /* loop counter */ |
| |
| /* Loop to calculate output of convolution for output length number of times */ |
| for (i = 0; i < srcALen + srcBLen - 1; i++) |
| { |
| /* Initialize sum with zero to carry on MAC operations */ |
| sum = 0; |
| |
| /* Loop to perform MAC operations according to convolution equation */ |
| for (j = 0; j <= i; j++) |
| { |
| /* Check the array limitations */ |
| if ((i - j < srcBLen) && (j < srcALen)) |
| { |
| /* z[i] += x[i-j] * y[j] */ |
| sum += (q31_t)pSrcA[j] * pSrcB[i - j]; |
| } |
| } |
| |
| /* Store the output in the destination buffer */ |
| pDst[i] = ref_sat_q15(sum >> 15U); |
| } |
| } |
| |
| arm_status ref_conv_partial_fast_opt_q15( |
| q15_t * pSrcA, |
| uint32_t srcALen, |
| q15_t * pSrcB, |
| uint32_t srcBLen, |
| q15_t * pDst, |
| uint32_t firstIndex, |
| uint32_t numPoints, |
| q15_t * pScratch1, |
| q15_t * pScratch2) |
| { |
| q31_t sum; /* Accumulator */ |
| uint32_t i, j; /* loop counter */ |
| |
| /* Loop to calculate output of convolution for output length number of times */ |
| for (i = 0; i < srcALen + srcBLen - 1; i++) |
| { |
| /* Initialize sum with zero to carry on MAC operations */ |
| sum = 0; |
| |
| /* Loop to perform MAC operations according to convolution equation */ |
| for (j = 0; j <= i; j++) |
| { |
| /* Check the array limitations */ |
| if ((i - j < srcBLen) && (j < srcALen)) |
| { |
| /* z[i] += x[i-j] * y[j] */ |
| sum += (q31_t)pSrcA[j] * pSrcB[i - j]; |
| } |
| } |
| |
| /* Store the output in the destination buffer */ |
| pDst[i] = ref_sat_q15(sum >> 15U); |
| } |
| |
| return ARM_MATH_SUCCESS; |
| } |
| |
| void ref_conv_fast_q15( |
| q15_t * pSrcA, |
| uint32_t srcALen, |
| q15_t * pSrcB, |
| uint32_t srcBLen, |
| q15_t * pDst) |
| { |
| q31_t sum; /* Accumulator */ |
| uint32_t i, j; /* loop counter */ |
| |
| /* Loop to calculate output of convolution for output length number of times */ |
| for (i = 0; i < srcALen + srcBLen - 1; i++) |
| { |
| /* Initialize sum with zero to carry on MAC operations */ |
| sum = 0; |
| |
| /* Loop to perform MAC operations according to convolution equation */ |
| for (j = 0; j <= i; j++) |
| { |
| /* Check the array limitations */ |
| if ((i - j < srcBLen) && (j < srcALen)) |
| { |
| /* z[i] += x[i-j] * y[j] */ |
| sum += (q31_t)pSrcA[j] * pSrcB[i - j]; |
| } |
| } |
| |
| /* Store the output in the destination buffer */ |
| pDst[i] = sum >> 15U; |
| } |
| } |
| |
| void ref_conv_fast_opt_q15( |
| q15_t * pSrcA, |
| uint32_t srcALen, |
| q15_t * pSrcB, |
| uint32_t srcBLen, |
| q15_t * pDst, |
| q15_t * pScratch1, |
| q15_t * pScratch2) |
| { |
| q31_t sum; /* Accumulator */ |
| uint32_t i, j; /* loop counter */ |
| |
| /* Loop to calculate output of convolution for output length number of times */ |
| for (i = 0; i < srcALen + srcBLen - 1; i++) |
| { |
| /* Initialize sum with zero to carry on MAC operations */ |
| sum = 0; |
| |
| /* Loop to perform MAC operations according to convolution equation */ |
| for (j = 0; j <= i; j++) |
| { |
| /* Check the array limitations */ |
| if ((i - j < srcBLen) && (j < srcALen)) |
| { |
| /* z[i] += x[i-j] * y[j] */ |
| sum += (q31_t)pSrcA[j] * pSrcB[i - j]; |
| } |
| } |
| |
| /* Store the output in the destination buffer */ |
| pDst[i] = ref_sat_q15(sum >> 15U); |
| } |
| } |
| |
| arm_status ref_conv_partial_q15( |
| q15_t * pSrcA, |
| uint32_t srcALen, |
| q15_t * pSrcB, |
| uint32_t srcBLen, |
| q15_t * pDst, |
| uint32_t firstIndex, |
| uint32_t numPoints) |
| { |
| ref_conv_q15(pSrcA,srcALen,pSrcB,srcBLen,pDst); |
| |
| return ARM_MATH_SUCCESS; |
| } |
| |
| arm_status ref_conv_partial_fast_q15( |
| q15_t * pSrcA, |
| uint32_t srcALen, |
| q15_t * pSrcB, |
| uint32_t srcBLen, |
| q15_t * pDst, |
| uint32_t firstIndex, |
| uint32_t numPoints) |
| { |
| ref_conv_fast_q15(pSrcA,srcALen,pSrcB,srcBLen,pDst); |
| |
| return ARM_MATH_SUCCESS; |
| } |
| |
| |
| void ref_conv_q7( |
| q7_t * pSrcA, |
| uint32_t srcALen, |
| q7_t * pSrcB, |
| uint32_t srcBLen, |
| q7_t * pDst) |
| { |
| q31_t sum; /* Accumulator */ |
| uint32_t i, j; /* loop counter */ |
| |
| /* Loop to calculate output of convolution for output length number of times */ |
| for (i = 0; i < srcALen + srcBLen - 1; i++) |
| { |
| /* Initialize sum with zero to carry on MAC operations */ |
| sum = 0; |
| |
| /* Loop to perform MAC operations according to convolution equation */ |
| for (j = 0; j <= i; j++) |
| { |
| /* Check the array limitations */ |
| if ((i - j < srcBLen) && (j < srcALen)) |
| { |
| /* z[i] += x[i-j] * y[j] */ |
| sum += (q15_t)pSrcA[j] * pSrcB[i - j]; |
| } |
| } |
| |
| /* Store the output in the destination buffer */ |
| pDst[i] = (q7_t)ref_sat_q7(sum >> 7); |
| } |
| } |
| |
| arm_status ref_conv_partial_q7( |
| q7_t * pSrcA, |
| uint32_t srcALen, |
| q7_t * pSrcB, |
| uint32_t srcBLen, |
| q7_t * pDst, |
| uint32_t firstIndex, |
| uint32_t numPoints) |
| { |
| ref_conv_q7(pSrcA,srcALen,pSrcB,srcBLen,pDst); |
| |
| return ARM_MATH_SUCCESS; |
| } |