libc: minimal: math sqrt: sqrtf: fix numeric accuracy of sqrt and sqrtf.

Changed initial guess from a simple x/3 to dividing the exponent by 2.
This makes large or small numbers like 10e10 and 01e-10 converge in a few
loops.

Added a loop counter to ensure that the algorithm breaks out of the loop in
the case that the algorithm doesn't converge (toggling between two
numbers).

Added test cases for sqrt and sqrtf in libc. Tested with a range of numbers
between 10e10 and 10e-10. Verify good accuracy in test case.

Closes: #55962

Signed-off-by: Lawrence King <lawrencek52@gmail.com>
diff --git a/lib/libc/minimal/source/math/sqrt.c b/lib/libc/minimal/source/math/sqrt.c
index 4ed372a..df0f8c0 100644
--- a/lib/libc/minimal/source/math/sqrt.c
+++ b/lib/libc/minimal/source/math/sqrt.c
@@ -5,19 +5,52 @@
  */
 
 #include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <zephyr/sys/util.h>
 
-double sqrt(double value)
+#define MAX_D_ITTERATIONS	8  /* usually converges in 5 loops */
+				   /* this ensures we break out of the loop */
+#define MAX_D_ERROR_COUNT	5  /* when result almost converges, stop */
+#define EXP_MASK64	GENMASK64(62, 52)
+
+double sqrt(double square)
 {
-	double sqrt = value / 3;
 	int i;
+	int64_t	exponent;
+	double root;
+	double last;
+	int64_t *p_square = (int64_t *)&square;
+	int64_t *p_root = (int64_t *)&root;
+	int64_t *p_last = (int64_t *)&last;
 
-	if (value <= 0) {
-		return 0;
+	if (square == 0.0) {
+		return square;
+	}
+	if (square < 0.0) {
+		return (square - square) / (square - square);
 	}
 
-	for (i = 0; i < 6; i++) {
-		sqrt = (sqrt + value / sqrt) / 2;
+	/* we need a good starting guess so that this will converge quickly,
+	 * we can do this by dividing the exponent part of the float by 2
+	 * this assumes IEEE-754 format doubles
+	 */
+	exponent = ((*p_square & EXP_MASK64)>>52)-1023;
+	if (exponent == 0x7FF-1023) {
+		/* the number is a NAN or inf, return NaN or inf */
+		return square + square;
 	}
+	exponent /= 2;
+	*p_root = (*p_square & ~EXP_MASK64) | (exponent+1023)<<52;
 
-	return sqrt;
+	for (i = 0; i < MAX_D_ITTERATIONS; i++) {
+		last = root;
+		root = (root + square / root) * 0.5;
+		/* if (llabs(*p_root-*p_last)<MAX_D_ERROR_COUNT) */
+		if ((*p_root ^ *p_last) < MAX_D_ERROR_COUNT) {
+			break;
+		}
+	}
+	return root;
 }
diff --git a/lib/libc/minimal/source/math/sqrtf.c b/lib/libc/minimal/source/math/sqrtf.c
index a14970a..6eb3fbc 100644
--- a/lib/libc/minimal/source/math/sqrtf.c
+++ b/lib/libc/minimal/source/math/sqrtf.c
@@ -5,24 +5,52 @@
  */
 
 #include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <zephyr/sys/util.h>
 
-#define MINDIFF 2.25e-308
+#define MAX_F_ITTERATIONS	6  /* usually converges in 4 loops */
+				   /* this ensures we break out of the loop */
+#define MAX_F_ERROR_COUNT	3  /* when result almost converges, stop */
+#define EXP_MASK32	GENMASK(30, 23)
 
 float sqrtf(float square)
 {
-	float root, last, diff;
+	int i;
+	float root;
+	float last;
+	int32_t	exponent;
+	int32_t *p_square = (int32_t *)&square;
+	int32_t *p_root = (int32_t *)&root;
+	int32_t *p_last = (int32_t *)&last;
 
-	root = square / 3.0;
-
-	if (square <= 0) {
-		return 0;
+	if (square == 0.0f) {
+		return square;
+	}
+	if (square < 0.0f) {
+		return (square - square) / (square - square);
 	}
 
-	do {
-		last = root;
-		root = (root + square / root) / 2.0;
-		diff = root - last;
-	} while (diff > MINDIFF || diff < -MINDIFF);
+	/* we need a good starting guess so that this will converge quickly,
+	 * we can do this by dividing the exponent part of the float by 2
+	 * this assumes IEEE-754 format doubles
+	 */
+	exponent = ((*p_square & EXP_MASK32)>>23)-127;
+	if (exponent == 0xFF-127) {
+		/* the number is a NAN or inf, return NaN or inf */
+		return square + square;
+	}
+	exponent /= 2;
+	*p_root = (*p_square & ~EXP_MASK32) | (exponent+127) << 23;
 
+	for (i = 0; i < MAX_F_ITTERATIONS; i++) {
+		last = root;
+		root = (root + square / root) * 0.5f;
+		/* if (labs(*p_root - *p_last) < MAX_F_ERROR_COUNT) */
+		if ((*p_root ^ *p_last) < MAX_F_ERROR_COUNT) {
+			break;
+		}
+	}
 	return root;
 }
diff --git a/tests/lib/c_lib/src/test_sqrt.c b/tests/lib/c_lib/src/test_sqrt.c
new file mode 100644
index 0000000..ce7fd4f
--- /dev/null
+++ b/tests/lib/c_lib/src/test_sqrt.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2023 Lawrence King
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+#include <math.h>
+#include <zephyr/kernel.h>
+#include <zephyr/ztest.h>
+
+
+#define local_abs(x) (((x) < 0) ? -(x) : (x))
+
+#ifndef NAN
+#define NAN	(__builtin_nansf(""))
+#endif
+
+#ifndef NANF
+#define NANF	(__builtin_nans(""))
+#endif
+
+#ifndef INF
+#define INF	(__builtin_inf())
+#endif
+
+#ifndef INFF
+#define INFF	(__builtin_inff())
+#endif
+
+static float test_floats[] = {
+	1.0f, 2.0f, 3.0f, 4.0f,
+	5.0f, 6.0f, 7.0f, 8.0f, 9.0f,	/* numbers across the decade */
+	3.14159265359f, 2.718281828f,	/* irrational numbers pi and e */
+	123.4f,	0.025f,	0.10f,	1.875f	/* numbers with infinite */
+					/* repeating binary representation */
+	};
+#define	NUM_TEST_FLOATS	(sizeof(test_floats)/sizeof(float))
+
+	static double test_doubles[] = {
+		1.0, 2.0, 3.0, 4.0,
+		5.0, 6.0, 7.0, 8.0, 9.0,	/* numbers across the decade */
+		3.14159265359, 2.718281828,	/* irrational numbers pi and e */
+		123.4,	0.025,	0.10,	1.875	/* numbers with infinite */
+						/* repeating binary representationa */
+};
+#define	NUM_TEST_DOUBLES	(sizeof(test_floats)/sizeof(float))
+
+#ifndef	isinf
+static int isinf(double x)
+{
+	union { uint64_t u; double d; } ieee754;
+	ieee754.d = x;
+	ieee754.u &= ~0x8000000000000000;	/* ignore the sign */
+	return ((ieee754.u >> 52) == 0x7FF) &&
+		((ieee754.u & 0x000fffffffffffff) == 0);
+}
+#endif
+
+#ifndef	isnan
+static int isnan(double x)
+{
+	union { uint64_t u; double d; } ieee754;
+	ieee754.d = x;
+	ieee754.u &= ~0x8000000000000000;	/* ignore the sign */
+	return ((ieee754.u >> 52) == 0x7FF) &&
+		((ieee754.u & 0x000fffffffffffff) != 0);
+}
+#endif
+
+#ifndef	isinff
+static int isinff(float x)
+{
+	union { uint32_t u; float f; } ieee754;
+	ieee754.f = x;
+	ieee754.u &= ~0x80000000;		/* ignore the sign */
+	return ((ieee754.u >> 23) == 0xFF) &&
+		((ieee754.u & 0x7FFFFF) == 0);
+}
+#endif
+
+#ifndef	isnanf
+static int isnanf(float x)
+{
+	union { uint32_t u; float f; } ieee754;
+	ieee754.f = x;
+	ieee754.u &= ~0x80000000;		/* ignore the sign */
+	return ((ieee754.u >> 23) == 0xFF) &&
+		((ieee754.u & 0x7FFFFF) != 0);
+}
+#endif
+
+/* small errors are expected, computed as percentage error */
+#define MAX_FLOAT_ERROR_PERCENT		(3.5e-5)
+#define MAX_DOUBLE_ERROR_PERCENT	(4.5e-14)
+
+ZTEST(test_c_lib, test_sqrtf)
+{
+int i;
+float	exponent, resf, square, root_squared;
+double	error;
+uint32_t max_error;
+int32_t ierror;
+int32_t *p_square = (int32_t *)&square;
+int32_t *p_root_squared = (int32_t *)&root_squared;
+
+
+	max_error = 0;
+	/* Conversion not supported with minimal_libc without
+	 * CBPRINTF_FP_SUPPORT.
+	 *
+	 * Conversion not supported without FPU except on native POSIX.
+	 */
+	if (!(IS_ENABLED(CONFIG_FPU)
+		 || IS_ENABLED(CONFIG_BOARD_NATIVE_POSIX))) {
+		ztest_test_skip();
+		return;
+	}
+
+	/* test the special cases of 0.0, NAN, -NAN, INF, -INF,  and -10.0 */
+	zassert_true(sqrtf(0.0f) == 0.0f, "sqrtf(0.0)");
+	zassert_true(isnanf(sqrtf(NANF)), "sqrt(nan)");
+#ifdef issignallingf
+	zassert_true(issignallingf(sqrtf(NANF)), "ssignalingf(sqrtf(nan))");
+/*	printf("issignallingf();\n"); */
+#endif
+	zassert_true(isnanf(sqrtf(-NANF)), "isnanf(sqrtf(-nan))");
+	zassert_true(isinff(sqrtf(INFF)), "isinff(sqrt(inf))");
+	zassert_true(isnanf(sqrtf(-INFF)), "isnanf(sqrt(-inf))");
+	zassert_true(isnanf(sqrtf(-10.0f)), "isnanf(sqrt(-10.0))");
+
+	for (exponent = 1.0e-10f; exponent < 1.0e10f; exponent *= 10.0f) {
+		for (i = 0; i < NUM_TEST_FLOATS; i++) {
+			square = test_floats[i] * exponent;
+			resf = sqrtf(square);
+			root_squared = resf * resf;
+			zassert_true((resf > 0.0f) && (resf < INFF),
+					"sqrtf out of range");
+			if ((resf > 0.0f) && (resf < INFF)) {
+				error = (square - root_squared) /
+					square * 100;
+				if (error < 0.0) {
+					error = -error;
+				}
+				/* square and root_squared should be almost identical
+				 * except the last few bits, the EXOR will only set
+				 * the bits that are different
+				 */
+				ierror = (*p_square - *p_root_squared);
+				ierror = local_abs(ierror);
+				if (ierror > max_error) {
+					max_error = ierror;
+				}
+			} else {
+				/* negative, +NaN, -NaN, inf or -inf */
+				error = 0.0;
+			}
+			zassert_true(error < MAX_FLOAT_ERROR_PERCENT,
+					"max sqrtf error exceeded");
+		}
+	}
+	zassert_true(max_error < 0x03, "huge errors in sqrt implementation");
+	/* print the max error */
+	TC_PRINT("test_sqrtf max error %d counts\n", max_error);
+}
+
+ZTEST(test_c_lib, test_sqrt)
+{
+int i;
+float	exponent;
+double	resd, error, square, root_squared;
+uint64_t max_error;
+int64_t ierror;
+int64_t *p_square = (int64_t *)&square;
+int64_t *p_root_squared = (int64_t *)&root_squared;
+
+
+	max_error = 0;
+	/*
+	 * sqrt is not supported without FPU except on native POSIX.
+	 */
+	if (!(IS_ENABLED(CONFIG_FPU)
+		 || IS_ENABLED(CONFIG_BOARD_NATIVE_POSIX))) {
+		ztest_test_skip();
+		return;
+	}
+
+	/* test the special cases of 0.0, NAN, -NAN, INF, -INF,  and -10.0 */
+	zassert_true(sqrt(0.0) == 0.0, "sqrt(0.0)");
+	zassert_true(isnan(sqrt(NAN)), "sqrt(nan)");
+#ifdef issignalling
+	zassert_true(issignalling(sqrt(NAN)), "ssignaling(sqrt(nan))");
+/*	printf("issignalling();\n"); */
+#endif
+	zassert_true(isnan(sqrt(-NAN)), "isnan(sqrt(-nan))");
+	zassert_true(isinf(sqrt(INF)), "isinf(sqrt(inf))");
+	zassert_true(isnan(sqrt(-INF)), "isnan(sqrt(-inf))");
+	zassert_true(isnan(sqrt(-10.0)), "isnan(sqrt(-10.0))");
+
+	for (exponent = 1.0e-10; exponent < 1.0e10; exponent *= 10.0) {
+		for (i = 0; i < NUM_TEST_DOUBLES; i++) {
+			square = test_doubles[i] * exponent;
+			resd = sqrt(square);
+			root_squared = resd * resd;
+			zassert_true((resd > 0.0) && (resd < INF),
+					"sqrt out of range");
+			if ((resd > 0.0) && (resd < INF)) {
+				error = (square - root_squared) /
+					square * 100;
+				if (error < 0.0) {
+					error = -error;
+				}
+				/* square and root_squared should be almost identical
+				 * except the last few bits, the EXOR will only set
+				 * the bits that are different
+				 */
+				ierror = (*p_square - *p_root_squared);
+				ierror = local_abs(ierror);
+				if (ierror > max_error) {
+					max_error = ierror;
+				}
+			} else {
+				/* negative, +NaN, -NaN, inf or -inf */
+				error = 0.0;
+			}
+			zassert_true(error < MAX_DOUBLE_ERROR_PERCENT,
+					"max sqrt error exceeded");
+		}
+	}
+	zassert_true(max_error < 0x04, "huge errors in sqrt implementation");
+	/* print the max error */
+	TC_PRINT("test_sqrt max error %d counts\n", (uint32_t)max_error);
+}