DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f32.c - third_party/github/STMicroelectronics/cmsis_core - Git at Google

 /* ----------------------------------------------------------------------
  * Project:      CMSIS DSP Library
  * Title:        arm_mat_solve_upper_triangular_f32.c
  * Description:  Solve linear system UT X = A with UT upper triangular matrix
  *
  * $Date:        23 April 2021
  * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the License); you may
  * not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  * www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "dsp/matrix_functions.h"


 /**
   @ingroup groupMatrix
  */


 /**
   @addtogroup MatrixInv
   @{
  */

 /**
    * @brief Solve UT . X = A where UT is an upper triangular matrix
    * @param[in]  ut  The upper triangular matrix
    * @param[in]  a  The matrix a
    * @param[out] dst The solution X of UT . X = A
    * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
   */

 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

 #include "arm_helium_utils.h"

   arm_status arm_mat_solve_upper_triangular_f32(
   const arm_matrix_instance_f32 * ut,
   const arm_matrix_instance_f32 * a,
   arm_matrix_instance_f32 * dst)
   {
 arm_status status;                             /* status of matrix inverse */


 #ifdef ARM_MATH_MATRIX_CHECK

   /* Check for matrix mismatch condition */
   if ((ut->numRows != ut->numCols) ||
       (ut->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
     status = ARM_MATH_SIZE_MISMATCH;
   }
   else

 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */

   {

     int i,j,k,n,cols;

     n = dst->numRows;
     cols = dst->numCols;

     float32_t *pX = dst->pData;
     float32_t *pUT = ut->pData;
     float32_t *pA = a->pData;

     float32_t *ut_row;
     float32_t *a_col;

     float32_t invUT;

     f32x4_t vecA;
     f32x4_t vecX;

     for(i=n-1; i >= 0 ; i--)
     {
       for(j=0; j+3 < cols; j +=4)
       {
             vecA = vld1q_f32(&pA[i * cols + j]);

             for(k=n-1; k > i; k--)
             {
                 vecX = vld1q_f32(&pX[cols*k+j]);
                 vecA = vfmsq(vecA,vdupq_n_f32(pUT[n*i + k]),vecX);
             }

             if (pUT[n*i + i]==0.0f)
             {
               return(ARM_MATH_SINGULAR);
             }

             invUT = 1.0f / pUT[n*i + i];
             vecA = vmulq(vecA,vdupq_n_f32(invUT));


             vst1q(&pX[i*cols+j],vecA);
       }

       for(; j < cols; j ++)
       {
             a_col = &pA[j];

             ut_row = &pUT[n*i];

             float32_t tmp=a_col[i * cols];

             for(k=n-1; k > i; k--)
             {
                 tmp -= ut_row[k] * pX[cols*k+j];
             }

             if (ut_row[i]==0.0f)
             {
               return(ARM_MATH_SINGULAR);
             }
             tmp = tmp / ut_row[i];
             pX[i*cols+j] = tmp;
        }

     }
     status = ARM_MATH_SUCCESS;

   }


   /* Return to application */
   return (status);
 }

 #else
 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
   arm_status arm_mat_solve_upper_triangular_f32(
   const arm_matrix_instance_f32 * ut,
   const arm_matrix_instance_f32 * a,
   arm_matrix_instance_f32 * dst)
   {
 arm_status status;                             /* status of matrix inverse */


 #ifdef ARM_MATH_MATRIX_CHECK

   /* Check for matrix mismatch condition */
   if ((ut->numRows != ut->numCols) ||
       (ut->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
     status = ARM_MATH_SIZE_MISMATCH;
   }
   else

 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */

   {

     int i,j,k,n,cols;

     n = dst->numRows;
     cols = dst->numCols;

     float32_t *pX = dst->pData;
     float32_t *pUT = ut->pData;
     float32_t *pA = a->pData;

     float32_t *ut_row;
     float32_t *a_col;

     float32_t invUT;

     f32x4_t vecA;
     f32x4_t vecX;

     for(i=n-1; i >= 0 ; i--)
     {
       for(j=0; j+3 < cols; j +=4)
       {
             vecA = vld1q_f32(&pA[i * cols + j]);

             for(k=n-1; k > i; k--)
             {
                 vecX = vld1q_f32(&pX[cols*k+j]);
                 vecA = vfmsq_f32(vecA,vdupq_n_f32(pUT[n*i + k]),vecX);
             }

             if (pUT[n*i + i]==0.0f)
             {
               return(ARM_MATH_SINGULAR);
             }

             invUT = 1.0f / pUT[n*i + i];
             vecA = vmulq_f32(vecA,vdupq_n_f32(invUT));


             vst1q_f32(&pX[i*cols+j],vecA);
       }

       for(; j < cols; j ++)
       {
             a_col = &pA[j];

             ut_row = &pUT[n*i];

             float32_t tmp=a_col[i * cols];

             for(k=n-1; k > i; k--)
             {
                 tmp -= ut_row[k] * pX[cols*k+j];
             }

             if (ut_row[i]==0.0f)
             {
               return(ARM_MATH_SINGULAR);
             }
             tmp = tmp / ut_row[i];
             pX[i*cols+j] = tmp;
        }

     }
     status = ARM_MATH_SUCCESS;

   }


   /* Return to application */
   return (status);
 }

 #else
   arm_status arm_mat_solve_upper_triangular_f32(
   const arm_matrix_instance_f32 * ut,
   const arm_matrix_instance_f32 * a,
   arm_matrix_instance_f32 * dst)
   {
 arm_status status;                             /* status of matrix inverse */


 #ifdef ARM_MATH_MATRIX_CHECK

   /* Check for matrix mismatch condition */
   if ((ut->numRows != ut->numCols) ||
       (ut->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
     status = ARM_MATH_SIZE_MISMATCH;
   }
   else

 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */

   {

     int i,j,k,n,cols;

     float32_t *pX = dst->pData;
     float32_t *pUT = ut->pData;
     float32_t *pA = a->pData;

     float32_t *ut_row;
     float32_t *a_col;

     n = dst->numRows;
     cols = dst->numCols;

     for(j=0; j < cols; j ++)
     {
        a_col = &pA[j];

        for(i=n-1; i >= 0 ; i--)
        {
             float32_t tmp=a_col[i * cols];

             ut_row = &pUT[n*i];

             for(k=n-1; k > i; k--)
             {
                 tmp -= ut_row[k] * pX[cols*k+j];
             }

             if (ut_row[i]==0.0f)
             {
               return(ARM_MATH_SINGULAR);
             }
             tmp = tmp / ut_row[i];
             pX[i*cols+j] = tmp;
        }

     }
     status = ARM_MATH_SUCCESS;

   }


   /* Return to application */
   return (status);
 }
 #endif /* #if defined(ARM_MATH_NEON) */
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
   @} end of MatrixInv group
  */
	/* ----------------------------------------------------------------------
	* Project: CMSIS DSP Library
	* Title: arm_mat_solve_upper_triangular_f32.c
	* Description: Solve linear system UT X = A with UT upper triangular matrix
	*
	* $Date: 23 April 2021
	* $Revision: V1.9.0
	*
	* Target Processor: Cortex-M and Cortex-A cores
	* -------------------------------------------------------------------- */
	/*
	* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
	*
	* SPDX-License-Identifier: Apache-2.0
	*
	* Licensed under the Apache License, Version 2.0 (the License); you may
	* not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an AS IS BASIS, WITHOUT
	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include "dsp/matrix_functions.h"


	/**
	@ingroup groupMatrix
	*/


	/**
	@addtogroup MatrixInv
	@{
	*/

	/**
	* @brief Solve UT . X = A where UT is an upper triangular matrix
	* @param[in] ut The upper triangular matrix
	* @param[in] a The matrix a
	* @param[out] dst The solution X of UT . X = A
	* @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
	*/

	#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

	#include "arm_helium_utils.h"

	arm_status arm_mat_solve_upper_triangular_f32(
	const arm_matrix_instance_f32 * ut,
	const arm_matrix_instance_f32 * a,
	arm_matrix_instance_f32 * dst)
	{
	arm_status status; /* status of matrix inverse */


	#ifdef ARM_MATH_MATRIX_CHECK

	/* Check for matrix mismatch condition */
	if ((ut->numRows != ut->numCols) \|\|
	(ut->numRows != a->numRows) )
	{
	/* Set status as ARM_MATH_SIZE_MISMATCH */
	status = ARM_MATH_SIZE_MISMATCH;
	}
	else

	#endif /* #ifdef ARM_MATH_MATRIX_CHECK */

	{

	int i,j,k,n,cols;

	n = dst->numRows;
	cols = dst->numCols;

	float32_t *pX = dst->pData;
	float32_t *pUT = ut->pData;
	float32_t *pA = a->pData;

	float32_t *ut_row;
	float32_t *a_col;

	float32_t invUT;

	f32x4_t vecA;
	f32x4_t vecX;

	for(i=n-1; i >= 0 ; i--)
	{
	for(j=0; j+3 < cols; j +=4)
	{
	vecA = vld1q_f32(&pA[i * cols + j]);

	for(k=n-1; k > i; k--)
	{
	vecX = vld1q_f32(&pX[cols*k+j]);
	vecA = vfmsq(vecA,vdupq_n_f32(pUT[n*i + k]),vecX);
	}

	if (pUT[n*i + i]==0.0f)
	{
	return(ARM_MATH_SINGULAR);
	}

	invUT = 1.0f / pUT[n*i + i];
	vecA = vmulq(vecA,vdupq_n_f32(invUT));


	vst1q(&pX[i*cols+j],vecA);
	}

	for(; j < cols; j ++)
	{
	a_col = &pA[j];

	ut_row = &pUT[n*i];

	float32_t tmp=a_col[i * cols];

	for(k=n-1; k > i; k--)
	{
	tmp -= ut_row[k] * pX[cols*k+j];
	}

	if (ut_row[i]==0.0f)
	{
	return(ARM_MATH_SINGULAR);
	}
	tmp = tmp / ut_row[i];
	pX[i*cols+j] = tmp;
	}

	}
	status = ARM_MATH_SUCCESS;

	}


	/* Return to application */
	return (status);
	}

	#else
	#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
	arm_status arm_mat_solve_upper_triangular_f32(
	const arm_matrix_instance_f32 * ut,
	const arm_matrix_instance_f32 * a,
	arm_matrix_instance_f32 * dst)
	{
	arm_status status; /* status of matrix inverse */


	#ifdef ARM_MATH_MATRIX_CHECK

	/* Check for matrix mismatch condition */
	if ((ut->numRows != ut->numCols) \|\|
	(ut->numRows != a->numRows) )
	{
	/* Set status as ARM_MATH_SIZE_MISMATCH */
	status = ARM_MATH_SIZE_MISMATCH;
	}
	else

	#endif /* #ifdef ARM_MATH_MATRIX_CHECK */

	{

	int i,j,k,n,cols;

	n = dst->numRows;
	cols = dst->numCols;

	float32_t *pX = dst->pData;
	float32_t *pUT = ut->pData;
	float32_t *pA = a->pData;

	float32_t *ut_row;
	float32_t *a_col;

	float32_t invUT;

	f32x4_t vecA;
	f32x4_t vecX;

	for(i=n-1; i >= 0 ; i--)
	{
	for(j=0; j+3 < cols; j +=4)
	{
	vecA = vld1q_f32(&pA[i * cols + j]);

	for(k=n-1; k > i; k--)
	{
	vecX = vld1q_f32(&pX[cols*k+j]);
	vecA = vfmsq_f32(vecA,vdupq_n_f32(pUT[n*i + k]),vecX);
	}

	if (pUT[n*i + i]==0.0f)
	{
	return(ARM_MATH_SINGULAR);
	}

	invUT = 1.0f / pUT[n*i + i];
	vecA = vmulq_f32(vecA,vdupq_n_f32(invUT));


	vst1q_f32(&pX[i*cols+j],vecA);
	}

	for(; j < cols; j ++)
	{
	a_col = &pA[j];

	ut_row = &pUT[n*i];

	float32_t tmp=a_col[i * cols];

	for(k=n-1; k > i; k--)
	{
	tmp -= ut_row[k] * pX[cols*k+j];
	}

	if (ut_row[i]==0.0f)
	{
	return(ARM_MATH_SINGULAR);
	}
	tmp = tmp / ut_row[i];
	pX[i*cols+j] = tmp;
	}

	}
	status = ARM_MATH_SUCCESS;

	}


	/* Return to application */
	return (status);
	}

	#else
	arm_status arm_mat_solve_upper_triangular_f32(
	const arm_matrix_instance_f32 * ut,
	const arm_matrix_instance_f32 * a,
	arm_matrix_instance_f32 * dst)
	{
	arm_status status; /* status of matrix inverse */


	#ifdef ARM_MATH_MATRIX_CHECK

	/* Check for matrix mismatch condition */
	if ((ut->numRows != ut->numCols) \|\|
	(ut->numRows != a->numRows) )
	{
	/* Set status as ARM_MATH_SIZE_MISMATCH */
	status = ARM_MATH_SIZE_MISMATCH;
	}
	else

	#endif /* #ifdef ARM_MATH_MATRIX_CHECK */

	{

	int i,j,k,n,cols;

	float32_t *pX = dst->pData;
	float32_t *pUT = ut->pData;
	float32_t *pA = a->pData;

	float32_t *ut_row;
	float32_t *a_col;

	n = dst->numRows;
	cols = dst->numCols;

	for(j=0; j < cols; j ++)
	{
	a_col = &pA[j];

	for(i=n-1; i >= 0 ; i--)
	{
	float32_t tmp=a_col[i * cols];

	ut_row = &pUT[n*i];

	for(k=n-1; k > i; k--)
	{
	tmp -= ut_row[k] * pX[cols*k+j];
	}

	if (ut_row[i]==0.0f)
	{
	return(ARM_MATH_SINGULAR);
	}
	tmp = tmp / ut_row[i];
	pX[i*cols+j] = tmp;
	}

	}
	status = ARM_MATH_SUCCESS;

	}


	/* Return to application */
	return (status);
	}
	#endif /* #if defined(ARM_MATH_NEON) */
	#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

	/**
	@} end of MatrixInv group
	*/