Fix HH_ALIGNAS for alignas (fixes #109), add experimental CMake build
diff --git a/README.md b/README.md
index 0de5ceb..f37dbf9 100644
--- a/README.md
+++ b/README.md
@@ -15,31 +15,37 @@
 
 64-bit SipHash for any CPU:
 
-    #include "highwayhash/sip_hash.h"
-    using namespace highwayhash;
-    const HH_U64 key2[2] HH_ALIGNAS(16) = {1234, 5678};
-    char in[8] = {1};
-    return SipHash(key2, in, 8);
+```
+#include "highwayhash/sip_hash.h"
+using namespace highwayhash;
+HH_ALIGNAS(16) const HH_U64 key2[2] = {1234, 5678};
+char in[8] = {1};
+return SipHash(key2, in, 8);
+```
 
 64, 128 or 256 bit HighwayHash for the CPU determined by compiler flags:
 
-    #include "highwayhash/highwayhash.h"
-    using namespace highwayhash;
-    const HHKey key HH_ALIGNAS(32) = {1, 2, 3, 4};
-    char in[8] = {1};
-    HHResult64 result;  // or HHResult128 or HHResult256
-    HHStateT<HH_TARGET> state(key);
-    HighwayHashT(&state, in, 8, &result);
+```
+#include "highwayhash/highwayhash.h"
+using namespace highwayhash;
+HH_ALIGNAS(32) const HHKey key = {1, 2, 3, 4};
+char in[8] = {1};
+HHResult64 result;  // or HHResult128 or HHResult256
+HHStateT<HH_TARGET> state(key);
+HighwayHashT(&state, in, 8, &result);
+```
 
 64, 128 or 256 bit HighwayHash for the CPU on which we're currently running:
 
-    #include "highwayhash/highwayhash_target.h"
-    #include "highwayhash/instruction_sets.h"
-    using namespace highwayhash;
-    const HHKey key HH_ALIGNAS(32) = {1, 2, 3, 4};
-    char in[8] = {1};
-    HHResult64 result;  // or HHResult128 or HHResult256
-    InstructionSets::Run<HighwayHash>(key, in, 8, &result);
+```
+#include "highwayhash/highwayhash_target.h"
+#include "highwayhash/instruction_sets.h"
+using namespace highwayhash;
+HH_ALIGNAS(32) const HHKey key = {1, 2, 3, 4};
+char in[8] = {1};
+HHResult64 result;  // or HHResult128 or HHResult256
+InstructionSets::Run<HighwayHash>(key, in, 8, &result);
+```
 
 C-callable 64-bit HighwayHash for the CPU on which we're currently running:
 
diff --git a/highwayhash.3 b/highwayhash.3
index f7b9278..54f3d1d 100644
--- a/highwayhash.3
+++ b/highwayhash.3
@@ -57,7 +57,7 @@
 
     #include "highwayhash/sip_hash.h"
     using namespace highwayhash;
-    const HH_U64 key2[2] HH_ALIGNAS(16) = {1234, 5678};
+    HH_ALIGNAS(16) const HH_U64 key2[2] = {1234, 5678};
     char in[8] = {1};
     return SipHash(key2, in, 8);
 
@@ -65,7 +65,7 @@
 
     #include "highwayhash/highwayhash.h"
     using namespace highwayhash;
-    const HHKey key HH_ALIGNAS(32) = {1, 2, 3, 4};
+    HH_ALIGNAS(32) const HHKey key = {1, 2, 3, 4};
     char in[8] = {1};
     HHResult64 result;  // or HHResult128 or HHResult256
     HHStateT<HH_TARGET> state(key);
@@ -76,7 +76,7 @@
     #include "highwayhash/highwayhash_target.h"
     #include "highwayhash/instruction_sets.h"
     using namespace highwayhash;
-    const HHKey key HH_ALIGNAS(32) = {1, 2, 3, 4};
+    HH_ALIGNAS(32) const HHKey key = {1, 2, 3, 4};
     char in[8] = {1};
     HHResult64 result;  // or HHResult128 or HHResult256
     InstructionSets::Run<HighwayHash>(key, in, 8, &result);
diff --git a/highwayhash/benchmark.cc b/highwayhash/benchmark.cc
index 3317d6a..a22e06f 100644
--- a/highwayhash/benchmark.cc
+++ b/highwayhash/benchmark.cc
@@ -208,14 +208,14 @@
 #if BENCHMARK_SIP
 
 uint64_t RunSip(const void*, const size_t size) {
-  const HH_U64 key2[2] HH_ALIGNAS(16) = {0, 1};
+  HH_ALIGNAS(16) const HH_U64 key2[2] = {0, 1};
   char in[kMaxBenchmarkInputSize];
   memcpy(in, &size, sizeof(size));
   return SipHash(key2, in, size);
 }
 
 uint64_t RunSip13(const void*, const size_t size) {
-  const HH_U64 key2[2] HH_ALIGNAS(16) = {0, 1};
+  HH_ALIGNAS(16) const HH_U64 key2[2] = {0, 1};
   char in[kMaxBenchmarkInputSize];
   memcpy(in, &size, sizeof(size));
   return SipHash13(key2, in, size);
@@ -226,14 +226,14 @@
 #if BENCHMARK_SIP_TREE
 
 uint64_t RunSipTree(const void*, const size_t size) {
-  const HH_U64 key4[4] HH_ALIGNAS(32) = {0, 1, 2, 3};
+  HH_ALIGNAS(32) const HH_U64 key4[4] = {0, 1, 2, 3};
   char in[kMaxBenchmarkInputSize];
   memcpy(in, &size, sizeof(size));
   return SipTreeHash(key4, in, size);
 }
 
 uint64_t RunSipTree13(const void*, const size_t size) {
-  const HH_U64 key4[4] HH_ALIGNAS(32) = {0, 1, 2, 3};
+  HH_ALIGNAS(32) const HH_U64 key4[4] = {0, 1, 2, 3};
   char in[kMaxBenchmarkInputSize];
   memcpy(in, &size, sizeof(size));
   return SipTreeHash13(key4, in, size);
diff --git a/highwayhash/example.cc b/highwayhash/example.cc
index ce05897..e3939dd 100644
--- a/highwayhash/example.cc
+++ b/highwayhash/example.cc
@@ -17,7 +17,7 @@
   }
 
   // Please use a different key to ensure your hashes aren't identical.
-  const HHKey key HH_ALIGNAS(32) = {1, 2, 3, 4};
+  HH_ALIGNAS(32) const HHKey key = {1, 2, 3, 4};
 
   // Aligning inputs to 32 bytes may help but is not required.
   const char* in = argv[1];
diff --git a/highwayhash/hh_neon.h b/highwayhash/hh_neon.h
index 9324e13..5470939 100644
--- a/highwayhash/hh_neon.h
+++ b/highwayhash/hh_neon.h
@@ -187,7 +187,7 @@
                                  const size_t size_mod32,
                                  const char* HH_RESTRICT buffer,
                                  const size_t buffer_valid) {
-    HHPacket tmp HH_ALIGNAS(32);
+    HH_ALIGNAS(32) HHPacket tmp;
     for (size_t i = 0; i < buffer_valid; ++i) {
       tmp[i] = buffer[i];
     }
diff --git a/highwayhash/hh_portable.h b/highwayhash/hh_portable.h
index 9da024d..3b1a394 100644
--- a/highwayhash/hh_portable.h
+++ b/highwayhash/hh_portable.h
@@ -75,7 +75,7 @@
     const size_t size_mod4 = size_mod32 & 3;
     const char* remainder = bytes + (size_mod32 & ~3);
 
-    HHPacket packet HH_ALIGNAS(32) = {0};
+    HH_ALIGNAS(32) HHPacket packet = {0};
     CopyPartial(bytes, remainder - bytes, &packet[0]);
 
     if (size_mod32 & 16) {  // 16..31 bytes left
@@ -150,7 +150,7 @@
                                  const size_t size_mod32,
                                  const char* HH_RESTRICT buffer,
                                  const size_t buffer_valid) {
-    HHPacket tmp HH_ALIGNAS(32);
+    HH_ALIGNAS(32) HHPacket tmp;
     for (size_t i = 0; i < buffer_valid; ++i) {
       tmp[i] = buffer[i];
     }
diff --git a/highwayhash/hh_sse41.h b/highwayhash/hh_sse41.h
index ab1871c..6bbed22 100644
--- a/highwayhash/hh_sse41.h
+++ b/highwayhash/hh_sse41.h
@@ -173,7 +173,7 @@
                                  const size_t size_mod32,
                                  const char* HH_RESTRICT buffer,
                                  const size_t buffer_valid) {
-    HHPacket tmp HH_ALIGNAS(32);
+    HH_ALIGNAS(32) HHPacket tmp;
     for (size_t i = 0; i < buffer_valid; ++i) {
       tmp[i] = buffer[i];
     }
diff --git a/highwayhash/hh_vsx.h b/highwayhash/hh_vsx.h
index 7681dab..e503abe 100644
--- a/highwayhash/hh_vsx.h
+++ b/highwayhash/hh_vsx.h
@@ -214,7 +214,7 @@
                                  const size_t size_mod32,
                                  const char* HH_RESTRICT buffer,
                                  const size_t buffer_valid) {
-    HHPacket tmp HH_ALIGNAS(32);
+    HH_ALIGNAS(32) HHPacket tmp;
     for (size_t i = 0; i < buffer_valid; ++i) {
       tmp[i] = buffer[i];
     }
diff --git a/highwayhash/highwayhash.h b/highwayhash/highwayhash.h
index 3c079a5..3655ce3 100644
--- a/highwayhash/highwayhash.h
+++ b/highwayhash/highwayhash.h
@@ -205,8 +205,8 @@
   }
 
  private:
-  HHPacket buffer_ HH_ALIGNAS(64);
-  HHStateT<Target> state_ HH_ALIGNAS(32);
+  HH_ALIGNAS(64) HHPacket buffer_;
+  HH_ALIGNAS(32) HHStateT<Target> state_;
   // How many bytes in buffer_ (starting with offset 0) are valid.
   size_t buffer_usage_ = 0;
 };
diff --git a/highwayhash/highwayhash_fuzzer.cc b/highwayhash/highwayhash_fuzzer.cc
index af5931d..5234fcb 100644
--- a/highwayhash/highwayhash_fuzzer.cc
+++ b/highwayhash/highwayhash_fuzzer.cc
@@ -13,7 +13,7 @@
 
   // Generate the key.
   const uint64_t *u64s = reinterpret_cast<const uint64_t*>(data);
-  const HHKey key HH_ALIGNAS(32) = {u64s[0], u64s[1], u64s[2], u64s[3]};
+  HH_ALIGNAS(32) const HHKey key = {u64s[0], u64s[1], u64s[2], u64s[3]};
   data += sizeof(uint64_t) * 4;
   size -= sizeof(uint64_t) * 4;
 
diff --git a/highwayhash/highwayhash_test_target.cc b/highwayhash/highwayhash_test_target.cc
index e47735b..e999d9f 100644
--- a/highwayhash/highwayhash_test_target.cc
+++ b/highwayhash/highwayhash_test_target.cc
@@ -162,7 +162,7 @@
 
 template <TargetBits Target>
 uint64_t RunHighway(const void*, const size_t size) {
-  static const HHKey key HH_ALIGNAS(32) = {0, 1, 2, 3};
+  HH_ALIGNAS(32) static const HHKey key = {0, 1, 2, 3};
   char in[kMaxBenchmarkInputSize];
   in[0] = static_cast<char>(size & 0xFF);
   HHResult64 result;
@@ -173,7 +173,7 @@
 
 template <TargetBits Target>
 uint64_t RunHighwayCat(const void*, const size_t size) {
-  static const HHKey key HH_ALIGNAS(32) = {0, 1, 2, 3};
+  HH_ALIGNAS(32) static const HHKey key = {0, 1, 2, 3};
   HH_ALIGNAS(64) HighwayHashCatT<Target> cat(key);
   char in[kMaxBenchmarkInputSize];
   in[0] = static_cast<char>(size & 0xFF);
diff --git a/highwayhash/sip_hash_fuzzer.cc b/highwayhash/sip_hash_fuzzer.cc
index 94a6b1f..2ecc4d5 100644
--- a/highwayhash/sip_hash_fuzzer.cc
+++ b/highwayhash/sip_hash_fuzzer.cc
@@ -10,7 +10,7 @@
 
   // Generate the key.
   const HH_U64 *hhU64s = reinterpret_cast<const HH_U64*>(data);
-  const HH_U64 key[2] HH_ALIGNAS(16) = {hhU64s[0], hhU64s[1]};
+  HH_ALIGNAS(16) const HH_U64 key[2] = {hhU64s[0], hhU64s[1]};
   data += sizeof(HH_U64) * 2;
   size -= sizeof(HH_U64) * 2;
 
diff --git a/highwayhash/sip_tree_hash.cc b/highwayhash/sip_tree_hash.cc
index ebbb6b6..3543cb2 100644
--- a/highwayhash/sip_tree_hash.cc
+++ b/highwayhash/sip_tree_hash.cc
@@ -186,7 +186,7 @@
   state.Update(final_packet);
 
   // Faster than passing __m256i and extracting.
-  uint64_t hashes[kNumLanes] HH_ALIGNAS(32);
+  HH_ALIGNAS(32) uint64_t hashes[kNumLanes];
   Store(state.Finalize(), hashes);
 
   typename SipHashStateT<kUpdateRounds, kFinalizeRounds>::Key reduce_key;
diff --git a/highwayhash/state_helpers.h b/highwayhash/state_helpers.h
index 83d63c1..4dd6512 100644
--- a/highwayhash/state_helpers.h
+++ b/highwayhash/state_helpers.h
@@ -46,7 +46,7 @@
 template <class State>
 HH_INLINE void PaddedUpdate(const HH_U64 size, const char* remaining_bytes,
                             const HH_U64 remaining_size, State* state) {
-  char final_packet[State::kPacketSize] HH_ALIGNAS(32) = {0};
+  HH_ALIGNAS(32) char final_packet[State::kPacketSize] = {0};
 
   // This layout matches the AVX-2 specialization in highway_tree_hash.h.
   uint32_t packet4 = static_cast<uint32_t>(size) << 24;
diff --git a/highwayhash/vector_test_target.cc b/highwayhash/vector_test_target.cc
index 90d9741..5852adb 100644
--- a/highwayhash/vector_test_target.cc
+++ b/highwayhash/vector_test_target.cc
@@ -52,7 +52,7 @@
 template <class T>
 void NotifyIfUnequal(const V<T>& v, const T expected, const size_t line,
                      const HHNotify notify) {
-  T lanes[V<T>::N] HH_ALIGNAS(32);
+  HH_ALIGNAS(32) T lanes[V<T>::N];
   Store(v, lanes);
   for (size_t i = 0; i < V<T>::N; ++i) {
     if (lanes[i] != expected) {
@@ -157,7 +157,7 @@
 template <class T>
 void TestLoadStore(const HHNotify notify) {
   const size_t n = V<T>::N;
-  T lanes[2 * n] HH_ALIGNAS(32);
+  HH_ALIGNAS(32) T lanes[2 * n];
   for (size_t i = 0; i < n; ++i) {
     lanes[i] = 4;
   }
@@ -169,7 +169,7 @@
   NotifyIfUnequal(v4, T(4), __LINE__, notify);
 
   // Aligned store
-  T lanes4[n] HH_ALIGNAS(32);
+  HH_ALIGNAS(32) T lanes4[n];
   Store(v4, lanes4);
   NotifyIfUnequal(Load<V<T>>(lanes4), T(4), __LINE__, notify);