DSP/Source/DistanceFunctions/arm_euclidean_distance_f32.c - third_party/github/STMicroelectronics/cmsis_core - Git at Google


 /* ----------------------------------------------------------------------
  * Project:      CMSIS DSP Library
  * Title:        arm_euclidean_distance_f32.c
  * Description:  Euclidean distance between two vectors
  *
  * $Date:        23 April 2021
  * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the License); you may
  * not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  * www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "dsp/distance_functions.h"
 #include <limits.h>
 #include <math.h>


 /**
   @addtogroup Euclidean
   @{
  */


 /**
  * @brief        Euclidean distance between two vectors
  * @param[in]    pA         First vector
  * @param[in]    pB         Second vector
  * @param[in]    blockSize  vector length
  * @return distance
  *
  */
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

 #include "arm_helium_utils.h"
 #include "arm_vec_math.h"
 float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
 {
     uint32_t        blkCnt;
     float32_t       tmp;
     f32x4_t         a, b, accumV, tempV;

     accumV = vdupq_n_f32(0.0f);

     blkCnt = blockSize >> 2;
     while (blkCnt > 0U) {
         a = vld1q(pA);
         b = vld1q(pB);

         tempV = vsubq(a, b);
         accumV = vfmaq(accumV, tempV, tempV);

         pA += 4;
         pB += 4;
         blkCnt--;
     }

     /*
      * tail
      * (will be merged thru tail predication)
      */
     blkCnt = blockSize & 3;
     if (blkCnt > 0U) {
         mve_pred16_t    p0 = vctp32q(blkCnt);

         a = vldrwq_z_f32(pA, p0);
         b = vldrwq_z_f32(pB, p0);

         tempV = vsubq(a, b);
         accumV = vfmaq_m(accumV, tempV, tempV, p0);
     }

     arm_sqrt_f32(vecAddAcrossF32Mve(accumV), &tmp);
     return (tmp);
 }
 #else
 #if defined(ARM_MATH_NEON)

 #include "NEMath.h"

 float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
 {
    float32_t accum=0.0f,tmp;
    uint32_t blkCnt;
    float32x4_t a,b,accumV;
    float32x2_t accumV2;

    accumV = vdupq_n_f32(0.0f);
    blkCnt = blockSize >> 2;
    while(blkCnt > 0)
    {
         a = vld1q_f32(pA);
         b = vld1q_f32(pB);

         a = vsubq_f32(a,b);
         accumV = vmlaq_f32(accumV,a,a);
         pA += 4;
         pB += 4;
         blkCnt --;
    }
    accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
    accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);

    blkCnt = blockSize & 3;
    while(blkCnt > 0)
    {
       tmp = *pA++ - *pB++;
       accum += SQ(tmp);
       blkCnt --;
    }
    arm_sqrt_f32(accum,&tmp);
    return(tmp);
 }

 #else
 float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
 {
    float32_t accum=0.0f,tmp;

    while(blockSize > 0)
    {
       tmp = *pA++ - *pB++;
       accum += SQ(tmp);
       blockSize --;
    }
    arm_sqrt_f32(accum,&tmp);
    return(tmp);
 }
 #endif
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */


 /**
  * @} end of Euclidean group
  */

	/* ----------------------------------------------------------------------
	* Project: CMSIS DSP Library
	* Title: arm_euclidean_distance_f32.c
	* Description: Euclidean distance between two vectors
	*
	* $Date: 23 April 2021
	* $Revision: V1.9.0
	*
	* Target Processor: Cortex-M and Cortex-A cores
	* -------------------------------------------------------------------- */
	/*
	* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
	*
	* SPDX-License-Identifier: Apache-2.0
	*
	* Licensed under the Apache License, Version 2.0 (the License); you may
	* not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an AS IS BASIS, WITHOUT
	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include "dsp/distance_functions.h"
	#include <limits.h>
	#include <math.h>



	/**
	@addtogroup Euclidean
	@{
	*/


	/**
	* @brief Euclidean distance between two vectors
	* @param[in] pA First vector
	* @param[in] pB Second vector
	* @param[in] blockSize vector length
	* @return distance
	*
	*/
	#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

	#include "arm_helium_utils.h"
	#include "arm_vec_math.h"
	float32_t arm_euclidean_distance_f32(const float32_t pA,const float32_t pB, uint32_t blockSize)
	{
	uint32_t blkCnt;
	float32_t tmp;
	f32x4_t a, b, accumV, tempV;

	accumV = vdupq_n_f32(0.0f);

	blkCnt = blockSize >> 2;
	while (blkCnt > 0U) {
	a = vld1q(pA);
	b = vld1q(pB);

	tempV = vsubq(a, b);
	accumV = vfmaq(accumV, tempV, tempV);

	pA += 4;
	pB += 4;
	blkCnt--;
	}

	/*
	* tail
	* (will be merged thru tail predication)
	*/
	blkCnt = blockSize & 3;
	if (blkCnt > 0U) {
	mve_pred16_t p0 = vctp32q(blkCnt);

	a = vldrwq_z_f32(pA, p0);
	b = vldrwq_z_f32(pB, p0);

	tempV = vsubq(a, b);
	accumV = vfmaq_m(accumV, tempV, tempV, p0);
	}

	arm_sqrt_f32(vecAddAcrossF32Mve(accumV), &tmp);
	return (tmp);
	}
	#else
	#if defined(ARM_MATH_NEON)

	#include "NEMath.h"

	float32_t arm_euclidean_distance_f32(const float32_t pA,const float32_t pB, uint32_t blockSize)
	{
	float32_t accum=0.0f,tmp;
	uint32_t blkCnt;
	float32x4_t a,b,accumV;
	float32x2_t accumV2;

	accumV = vdupq_n_f32(0.0f);
	blkCnt = blockSize >> 2;
	while(blkCnt > 0)
	{
	a = vld1q_f32(pA);
	b = vld1q_f32(pB);

	a = vsubq_f32(a,b);
	accumV = vmlaq_f32(accumV,a,a);
	pA += 4;
	pB += 4;
	blkCnt --;
	}
	accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
	accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);

	blkCnt = blockSize & 3;
	while(blkCnt > 0)
	{
	tmp = pA++ - pB++;
	accum += SQ(tmp);
	blkCnt --;
	}
	arm_sqrt_f32(accum,&tmp);
	return(tmp);
	}

	#else
	float32_t arm_euclidean_distance_f32(const float32_t pA,const float32_t pB, uint32_t blockSize)
	{
	float32_t accum=0.0f,tmp;

	while(blockSize > 0)
	{
	tmp = pA++ - pB++;
	accum += SQ(tmp);
	blockSize --;
	}
	arm_sqrt_f32(accum,&tmp);
	return(tmp);
	}
	#endif
	#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */


	/**
	* @} end of Euclidean group
	*/