blob: 9ce170cec22eaf29c6e93a8f94bd5a55615f598c [file] [log] [blame]
#include "ref.h"
void ref_correlate_f32(
float32_t * pSrcA,
uint32_t srcALen,
float32_t * pSrcB,
uint32_t srcBLen,
float32_t * pDst)
{
float32_t *pIn1 = pSrcA; /* inputA pointer */
float32_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
float32_t sum; /* Accumulator */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* The algorithm implementation is based on the lengths of the inputs.
* srcB is always made to slide across srcA.
* So srcBLen is always considered as shorter or equal to srcALen
* But CORR(x, y) is reverse of CORR(y, x)
* So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer
* and a variable, inv is set to 1
* If lengths are not equal then zero pad has to be done to make the two
* inputs of same length. But to improve the performance, we include zeroes
* in the output instead of zero padding either of the the inputs
* If srcALen > srcBLen, (srcALen - srcBLen) zeroes has to included in the
* starting of the output buffer
* If srcALen < srcBLen, (srcALen - srcBLen) zeroes has to included in the
* ending of the output buffer
* Once the zero padding is done the remaining of the output is calcualted
* using convolution but with the shorter signal time shifted.
*/
/* Calculate the length of the remaining sequence */
tot = srcALen + srcBLen - 2U;
if (srcALen > srcBLen)
{
/* Calculating the number of zeros to be padded to the output */
/* Initialise the pointer after zero padding */
pDst += srcALen - srcBLen;
}
else if (srcALen < srcBLen)
{
/* Initialization to inputB pointer */
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + srcALen - 1U;
/* Initialisation of the pointer after zero padding */
pDst += tot;
/* Swapping the lengths */
j = srcALen;
srcALen = srcBLen;
srcBLen = j;
/* Setting the reverse flag */
inv = 1;
}
/* Loop to calculate convolution for output length number of times */
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0.0f;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((i - j < srcBLen) && (j < srcALen))
{
/* z[i] += x[i-j] * y[j] */
sum += pIn1[j] * pIn2[-((int32_t)i - j)];
}
}
/* Store the output in the destination buffer */
if (inv == 1)
*pDst-- = sum;
else
*pDst++ = sum;
}
}
void ref_correlate_q31(
q31_t * pSrcA,
uint32_t srcALen,
q31_t * pSrcB,
uint32_t srcBLen,
q31_t * pDst)
{
q31_t *pIn1 = pSrcA; /* inputA pointer */
q31_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
q63_t sum; /* Accumulators */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* Calculate the length of the remaining sequence */
tot = ((srcALen + srcBLen) - 2U);
if (srcALen > srcBLen)
{
/* Calculating the number of zeros to be padded to the output */
j = srcALen - srcBLen;
/* Initialise the pointer after zero padding */
pDst += j;
}
else if (srcALen < srcBLen)
{
/* Initialization to inputB pointer */
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + (srcALen - 1U);
/* Initialisation of the pointer after zero padding */
pDst = pDst + tot;
/* Swapping the lengths */
j = srcALen;
srcALen = srcBLen;
srcBLen = j;
/* Setting the reverse flag */
inv = 1;
}
/* Loop to calculate correlation for output length number of times */
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to correlation equation */
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((((i - j) < srcBLen) && (j < srcALen)))
{
/* z[i] += x[i-j] * y[j] */
sum += ((q63_t) pIn1[j] * pIn2[-((int32_t) i - j)]);
}
}
/* Store the output in the destination buffer */
if (inv == 1)
*pDst-- = (q31_t)(sum >> 31U);
else
*pDst++ = (q31_t)(sum >> 31U);
}
}
void ref_correlate_fast_q31(
q31_t * pSrcA,
uint32_t srcALen,
q31_t * pSrcB,
uint32_t srcBLen,
q31_t * pDst)
{
q31_t *pIn1 = pSrcA; /* inputA pointer */
q31_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
q63_t sum; /* Accumulators */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* Calculate the length of the remaining sequence */
tot = ((srcALen + srcBLen) - 2U);
if (srcALen > srcBLen)
{
/* Calculating the number of zeros to be padded to the output */
j = srcALen - srcBLen;
/* Initialise the pointer after zero padding */
pDst += j;
}
else if (srcALen < srcBLen)
{
/* Initialization to inputB pointer */
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + (srcALen - 1U);
/* Initialisation of the pointer after zero padding */
pDst = pDst + tot;
/* Swapping the lengths */
j = srcALen;
srcALen = srcBLen;
srcBLen = j;
/* Setting the reverse flag */
inv = 1;
}
/* Loop to calculate correlation for output length number of times */
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to correlation equation */
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((((i - j) < srcBLen) && (j < srcALen)))
{
/* z[i] += x[i-j] * y[j] */
sum = (q31_t) ((((q63_t) sum << 32) +
((q63_t) pIn1[j] * pIn2[-((int32_t) i - j)])) >> 32);
}
}
/* Store the output in the destination buffer */
if (inv == 1)
*pDst-- = (q31_t)(sum << 1U);
else
*pDst++ = (q31_t)(sum << 1U);
}
}
void ref_correlate_q15(
q15_t * pSrcA,
uint32_t srcALen,
q15_t * pSrcB,
uint32_t srcBLen,
q15_t * pDst)
{
q15_t *pIn1 = pSrcA; /* inputA pointer */
q15_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
q63_t sum; /* Accumulators */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* Calculate the length of the remaining sequence */
tot = ((srcALen + srcBLen) - 2U);
if (srcALen > srcBLen)
{
/* Calculating the number of zeros to be padded to the output */
j = srcALen - srcBLen;
/* Initialise the pointer after zero padding */
pDst += j;
}
else if (srcALen < srcBLen)
{
/* Initialization to inputB pointer */
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + (srcALen - 1U);
/* Initialisation of the pointer after zero padding */
pDst = pDst + tot;
/* Swapping the lengths */
j = srcALen;
srcALen = srcBLen;
srcBLen = j;
/* Setting the reverse flag */
inv = 1;
}
/* Loop to calculate convolution for output length number of times */
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((((i - j) < srcBLen) && (j < srcALen)))
{
/* z[i] += x[i-j] * y[j] */
sum += ((q31_t) pIn1[j] * pIn2[-((int32_t) i - j)]);
}
}
/* Store the output in the destination buffer */
if (inv == 1)
*pDst-- = (q15_t) ref_sat_q15(sum >> 15U);
else
*pDst++ = (q15_t) ref_sat_q15(sum >> 15U);
}
}
void ref_correlate_fast_q15(
q15_t * pSrcA,
uint32_t srcALen,
q15_t * pSrcB,
uint32_t srcBLen,
q15_t * pDst)
{
q15_t *pIn1 = pSrcA; /* inputA pointer */
q15_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
q63_t sum; /* Accumulators */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* Calculate the length of the remaining sequence */
tot = ((srcALen + srcBLen) - 2U);
if (srcALen > srcBLen)
{
/* Calculating the number of zeros to be padded to the output */
j = srcALen - srcBLen;
/* Initialise the pointer after zero padding */
pDst += j;
}
else if (srcALen < srcBLen)
{
/* Initialization to inputB pointer */
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + (srcALen - 1U);
/* Initialisation of the pointer after zero padding */
pDst = pDst + tot;
/* Swapping the lengths */
j = srcALen;
srcALen = srcBLen;
srcBLen = j;
/* Setting the reverse flag */
inv = 1;
}
/* Loop to calculate convolution for output length number of times */
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((((i - j) < srcBLen) && (j < srcALen)))
{
/* z[i] += x[i-j] * y[j] */
sum += ((q31_t) pIn1[j] * pIn2[-((int32_t) i - j)]);
}
}
/* Store the output in the destination buffer */
if (inv == 1)
*pDst-- = (q15_t)(sum >> 15U);
else
*pDst++ = (q15_t)(sum >> 15U);
}
}
void ref_correlate_fast_opt_q15(
q15_t * pSrcA,
uint32_t srcALen,
q15_t * pSrcB,
uint32_t srcBLen,
q15_t * pDst,
q15_t * pScratch)
{
q15_t *pIn1 = pSrcA; /* inputA pointer */
q15_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
q31_t sum; /* Accumulators */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* Calculate the length of the remaining sequence */
tot = ((srcALen + srcBLen) - 2U);
if (srcALen > srcBLen)
{
/* Calculating the number of zeros to be padded to the output */
j = srcALen - srcBLen;
/* Initialise the pointer after zero padding */
pDst += j;
}
else if (srcALen < srcBLen)
{
/* Initialization to inputB pointer */
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + (srcALen - 1U);
/* Initialisation of the pointer after zero padding */
pDst = pDst + tot;
/* Swapping the lengths */
j = srcALen;
srcALen = srcBLen;
srcBLen = j;
/* Setting the reverse flag */
inv = 1;
}
/* Loop to calculate convolution for output length number of times */
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((((i - j) < srcBLen) && (j < srcALen)))
{
/* z[i] += x[i-j] * y[j] */
sum += ((q31_t) pIn1[j] * pIn2[-((int32_t) i - j)]);
}
}
/* Store the output in the destination buffer */
if (inv == 1)
*pDst-- = (q15_t) ref_sat_q15(sum >> 15U);
else
*pDst++ = (q15_t) ref_sat_q15(sum >> 15U);
}
}
void ref_correlate_q7(
q7_t * pSrcA,
uint32_t srcALen,
q7_t * pSrcB,
uint32_t srcBLen,
q7_t * pDst)
{
q7_t *pIn1 = pSrcA; /* inputA pointer */
q7_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
q31_t sum; /* Accumulator */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* Calculate the length of the remaining sequence */
tot = ((srcALen + srcBLen) - 2U);
if (srcALen > srcBLen)
{
/* Calculating the number of zeros to be padded to the output */
j = srcALen - srcBLen;
/* Initialise the pointer after zero padding */
pDst += j;
}
else if (srcALen < srcBLen)
{
/* Initialization to inputB pointer */
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + (srcALen - 1U);
/* Initialisation of the pointer after zero padding */
pDst = pDst + tot;
/* Swapping the lengths */
j = srcALen;
srcALen = srcBLen;
srcBLen = j;
/* Setting the reverse flag */
inv = 1;
}
/* Loop to calculate convolution for output length number of times */
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((((i - j) < srcBLen) && (j < srcALen)))
{
/* z[i] += x[i-j] * y[j] */
sum += ((q15_t) pIn1[j] * pIn2[-((int32_t) i - j)]);
}
}
/* Store the output in the destination buffer */
if (inv == 1)
*pDst-- = (q7_t) __SSAT((sum >> 7U), 8U);
else
*pDst++ = (q7_t) __SSAT((sum >> 7U), 8U);
}
}