libc: minimal: add size optimized string functions

The current implementations of memcpy and memset are optimized for
performance and use a word based loop before the byte based loop.

Add a config option that skips the word based loop. This saves 120
bytes on the Cortex-M0+ which is worthwhile on small apps like a
bootloader.

Enable by default if SIZE_OPTIMIZATIONS is set.

Signed-off-by: Michael Hope <mlhx@google.com>
diff --git a/lib/libc/minimal/source/string/string.c b/lib/libc/minimal/source/string/string.c
index 20996e4..1f889e9 100644
--- a/lib/libc/minimal/source/string/string.c
+++ b/lib/libc/minimal/source/string/string.c
@@ -301,6 +301,8 @@
 
 	unsigned char *d_byte = (unsigned char *)d;
 	const unsigned char *s_byte = (const unsigned char *)s;
+
+#if !defined(CONFIG_MINIMAL_LIBC_OPTIMIZE_STRING_FOR_SIZE)
 	const uintptr_t mask = sizeof(mem_word_t) - 1;
 
 	if ((((uintptr_t)d ^ (uintptr_t)s_byte) & mask) == 0) {
@@ -328,6 +330,7 @@
 		d_byte = (unsigned char *)d_word;
 		s_byte = (unsigned char *)s_word;
 	}
+#endif
 
 	/* do byte-sized copying until finished */
 
@@ -353,6 +356,7 @@
 	unsigned char *d_byte = (unsigned char *)buf;
 	unsigned char c_byte = (unsigned char)c;
 
+#if !defined(CONFIG_MINIMAL_LIBC_OPTIMIZE_STRING_FOR_SIZE)
 	while (((uintptr_t)d_byte) & (sizeof(mem_word_t) - 1)) {
 		if (n == 0) {
 			return buf;
@@ -380,6 +384,7 @@
 	/* do byte-sized initialization until finished */
 
 	d_byte = (unsigned char *)d_word;
+#endif
 
 	while (n > 0) {
 		*(d_byte++) = c_byte;