lib/os/utf8.c - third_party/github/zephyrproject-rtos/zephyr - Git at Google

 /*
  * Copyright (c) 2021 Nordic Semiconductor ASA
  *
  * SPDX-License-Identifier: Apache-2.0
  */

 #include <stdint.h>
 #include <string.h>
 #include <zephyr/sys/__assert.h>

 #define ASCII_CHAR 0x7F
 #define SEQUENCE_FIRST_MASK 0xC0
 #define SEQUENCE_LEN_2_BYTE 0xC0
 #define SEQUENCE_LEN_3_BYTE 0xE0
 #define SEQUENCE_LEN_4_BYTE 0xF0

 char *utf8_trunc(char *utf8_str)
 {
 	char *last_byte_p = utf8_str + strlen(utf8_str) - 1;
 	uint8_t bytes_truncated;
 	char seq_start_byte;

 	if ((*last_byte_p & ASCII_CHAR) == *last_byte_p) {
 		/* Not part of an UTF8 sequence, return */
 		return utf8_str;
 	}

 	/* Find the starting byte and NULL-terminate other bytes */
 	bytes_truncated = 0;
 	while ((*last_byte_p & SEQUENCE_FIRST_MASK) != SEQUENCE_FIRST_MASK &&
 	       last_byte_p > utf8_str) {
 		last_byte_p--;
 		bytes_truncated++;
 	}
 	bytes_truncated++; /* include the starting byte */

 	/* Verify if the the last character actually need to be truncated
 	 * Handles the case where the number of bytes in the last UTF8-char
 	 * matches the number of bytes we searched for the starting byte
 	 */
 	seq_start_byte = *last_byte_p;
 	if ((seq_start_byte & SEQUENCE_LEN_4_BYTE) == SEQUENCE_LEN_4_BYTE) {
 		if (bytes_truncated == 4) {
 			return utf8_str;
 		}
 	} else if ((seq_start_byte & SEQUENCE_LEN_3_BYTE) == SEQUENCE_LEN_3_BYTE) {
 		if (bytes_truncated == 3) {
 			return utf8_str;
 		}
 	} else if ((seq_start_byte & SEQUENCE_LEN_2_BYTE) == SEQUENCE_LEN_2_BYTE) {
 		if (bytes_truncated == 2) {
 			return utf8_str;
 		}
 	}

 	/* NULL-terminate the unterminated starting byte */
 	*last_byte_p = '\0';

 	return utf8_str;
 }

 char *utf8_lcpy(char *dst, const char *src, size_t n)
 {
 	__ASSERT(n > 0, "n shall not be 0");

 	strncpy(dst, src, n - 1);
 	dst[n - 1] = '\0';

 	if (n != 1) {
 		utf8_trunc(dst);
 	}

 	return dst;
 }
	/*
	* Copyright (c) 2021 Nordic Semiconductor ASA
	*
	* SPDX-License-Identifier: Apache-2.0
	*/

	#include <stdint.h>
	#include <string.h>
	#include <zephyr/sys/__assert.h>

	#define ASCII_CHAR 0x7F
	#define SEQUENCE_FIRST_MASK 0xC0
	#define SEQUENCE_LEN_2_BYTE 0xC0
	#define SEQUENCE_LEN_3_BYTE 0xE0
	#define SEQUENCE_LEN_4_BYTE 0xF0

	char utf8_trunc(char utf8_str)
	{
	char *last_byte_p = utf8_str + strlen(utf8_str) - 1;
	uint8_t bytes_truncated;
	char seq_start_byte;

	if ((last_byte_p & ASCII_CHAR) == last_byte_p) {
	/* Not part of an UTF8 sequence, return */
	return utf8_str;
	}

	/* Find the starting byte and NULL-terminate other bytes */
	bytes_truncated = 0;
	while ((*last_byte_p & SEQUENCE_FIRST_MASK) != SEQUENCE_FIRST_MASK &&
	last_byte_p > utf8_str) {
	last_byte_p--;
	bytes_truncated++;
	}
	bytes_truncated++; /* include the starting byte */

	/* Verify if the the last character actually need to be truncated
	* Handles the case where the number of bytes in the last UTF8-char
	* matches the number of bytes we searched for the starting byte
	*/
	seq_start_byte = *last_byte_p;
	if ((seq_start_byte & SEQUENCE_LEN_4_BYTE) == SEQUENCE_LEN_4_BYTE) {
	if (bytes_truncated == 4) {
	return utf8_str;
	}
	} else if ((seq_start_byte & SEQUENCE_LEN_3_BYTE) == SEQUENCE_LEN_3_BYTE) {
	if (bytes_truncated == 3) {
	return utf8_str;
	}
	} else if ((seq_start_byte & SEQUENCE_LEN_2_BYTE) == SEQUENCE_LEN_2_BYTE) {
	if (bytes_truncated == 2) {
	return utf8_str;
	}
	}

	/* NULL-terminate the unterminated starting byte */
	*last_byte_p = '\0';

	return utf8_str;
	}

	char utf8_lcpy(char dst, const char *src, size_t n)
	{
	__ASSERT(n > 0, "n shall not be 0");

	strncpy(dst, src, n - 1);
	dst[n - 1] = '\0';

	if (n != 1) {
	utf8_trunc(dst);
	}

	return dst;
	}