Improve optimization for little-endian platforms.
Previously there was a fast path for little endian platforms
in pb_decode_fixed64() but not in pb_encode_fixed64(). Also
the macros used for the check didn't trigger on GCC.
Macro checks were expanded to cover all common compilers and
now it is possible to specify PB_LITTLE_ENDIAN_8BIT manually
if it is not automatically detected.
diff --git a/pb.h b/pb.h
index 3bd1908..f4a9bc2 100644
--- a/pb.h
+++ b/pb.h
@@ -14,7 +14,8 @@
/* #define PB_ENABLE_MALLOC 1 */
/* Define this if your CPU / compiler combination does not support
- * unaligned memory access to packed structures. */
+ * unaligned memory access to packed structures. Note that packed
+ * structures are only used when requested in .proto options. */
/* #define PB_NO_PACKED_STRUCTS 1 */
/* Increase the number of required fields that are tracked.
@@ -47,6 +48,10 @@
* the string processing slightly and slightly increases code size. */
/* #define PB_VALIDATE_UTF8 1 */
+/* This can be defined if the platform is little-endian and has 8-bit bytes.
+ * Normally it is automatically detected based on __BYTE_ORDER__ macro. */
+/* #define PB_LITTLE_ENDIAN_8BIT 1 */
+
/******************************************************************
* You usually don't need to change anything below this line. *
* Feel free to look around and use the defined macros, though. *
@@ -116,6 +121,18 @@
# define pb_packed
#endif
+/* Detect endianess */
+#ifndef PB_LITTLE_ENDIAN_8BIT
+#if ((defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN) || \
+ (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || \
+ defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || \
+ defined(__THUMBEL__) || defined(__AARCH64EL__) || defined(_MIPSEL) || \
+ defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM)) \
+ && CHAR_BIT == 8
+#define PB_LITTLE_ENDIAN_8BIT 1
+#endif
+#endif
+
/* Handly macro for suppressing unreferenced-parameter compiler warnings. */
#ifndef PB_UNUSED
#define PB_UNUSED(x) (void)(x)
diff --git a/pb_decode.c b/pb_decode.c
index d9ecf25..f388932 100644
--- a/pb_decode.c
+++ b/pb_decode.c
@@ -1362,7 +1362,7 @@
if (!pb_read(stream, u.bytes, 4))
return false;
-#if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN && CHAR_BIT == 8
+#if defined(PB_LITTLE_ENDIAN_8BIT) && PB_LITTLE_ENDIAN_8BIT == 1
/* fast path - if we know that we're on little endian, assign directly */
*(uint32_t*)dest = u.fixed32;
#else
@@ -1385,7 +1385,7 @@
if (!pb_read(stream, u.bytes, 8))
return false;
-#if defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN && CHAR_BIT == 8
+#if defined(PB_LITTLE_ENDIAN_8BIT) && PB_LITTLE_ENDIAN_8BIT == 1
/* fast path - if we know that we're on little endian, assign directly */
*(uint64_t*)dest = u.fixed64;
#else
diff --git a/pb_encode.c b/pb_encode.c
index de716f7..f5f1676 100644
--- a/pb_encode.c
+++ b/pb_encode.c
@@ -632,6 +632,10 @@
bool checkreturn pb_encode_fixed32(pb_ostream_t *stream, const void *value)
{
+#if defined(PB_LITTLE_ENDIAN_8BIT) && PB_LITTLE_ENDIAN_8BIT == 1
+ /* Fast path if we know that we're on little endian */
+ return pb_write(stream, (const pb_byte_t*)value, 4);
+#else
uint32_t val = *(const uint32_t*)value;
pb_byte_t bytes[4];
bytes[0] = (pb_byte_t)(val & 0xFF);
@@ -639,11 +643,16 @@
bytes[2] = (pb_byte_t)((val >> 16) & 0xFF);
bytes[3] = (pb_byte_t)((val >> 24) & 0xFF);
return pb_write(stream, bytes, 4);
+#endif
}
#ifndef PB_WITHOUT_64BIT
bool checkreturn pb_encode_fixed64(pb_ostream_t *stream, const void *value)
{
+#if defined(PB_LITTLE_ENDIAN_8BIT) && PB_LITTLE_ENDIAN_8BIT == 1
+ /* Fast path if we know that we're on little endian */
+ return pb_write(stream, (const pb_byte_t*)value, 8);
+#else
uint64_t val = *(const uint64_t*)value;
pb_byte_t bytes[8];
bytes[0] = (pb_byte_t)(val & 0xFF);
@@ -655,6 +664,7 @@
bytes[6] = (pb_byte_t)((val >> 48) & 0xFF);
bytes[7] = (pb_byte_t)((val >> 56) & 0xFF);
return pb_write(stream, bytes, 8);
+#endif
}
#endif