third_party/utf8_range/lookup.c - third_party/github/protocolbuffers/protobuf - Git at Google

 #include <stdio.h>

 /* http://bjoern.hoehrmann.de/utf-8/decoder/dfa */
 /* Optimized version based on Rich Felker's variant. */
 #define UTF8_ACCEPT	0
 #define UTF8_REJECT	12

 static const unsigned char utf8d[] = {
     /* The first part of the table maps bytes to character classes that
      * to reduce the size of the transition table and create bitmasks. */
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
      7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
      8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
     10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
 };
 /* Note: Splitting the table improves performance on ARM due to its simpler
  * addressing modes not being able to encode x[y + 256]. */
 static const unsigned char utf8s[] = {
     /* The second part is a transition table that maps a combination
      * of a state of the automaton and a character class to a state. */
      0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
     12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
     12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
     12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
     12,36,12,12,12,12,12,12,12,12,12,12
 };

 /* Return 0 on success, -1 on error */
 int utf8_lookup(const unsigned char *data, int len)
 {
     int state = 0;

     while (len-- && state != UTF8_REJECT)
         state = utf8s[state + utf8d[*data++]];

     return state == UTF8_ACCEPT ? 0 : -1;
 }
	#include <stdio.h>

	/* http://bjoern.hoehrmann.de/utf-8/decoder/dfa */
	/* Optimized version based on Rich Felker's variant. */
	#define UTF8_ACCEPT 0
	#define UTF8_REJECT 12

	static const unsigned char utf8d[] = {
	/* The first part of the table maps bytes to character classes that
	* to reduce the size of the transition table and create bitmasks. */
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
	8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
	10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
	};
	/* Note: Splitting the table improves performance on ARM due to its simpler
	* addressing modes not being able to encode x[y + 256]. */
	static const unsigned char utf8s[] = {
	/* The second part is a transition table that maps a combination
	* of a state of the automaton and a character class to a state. */
	0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
	12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
	12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
	12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
	12,36,12,12,12,12,12,12,12,12,12,12
	};

	/* Return 0 on success, -1 on error */
	int utf8_lookup(const unsigned char *data, int len)
	{
	int state = 0;

	while (len-- && state != UTF8_REJECT)
	state = utf8s[state + utf8d[*data++]];

	return state == UTF8_ACCEPT ? 0 : -1;
	}