Implement ClobberMemory() and fix DoNotOptimize on MSVC. (#352)

I recently learned Windows provides a function called _ReadWriteBarrier
which is literally ClobberMemory under a different name. This patch
uses it to implement ClobberMemory under MSVC.
diff --git a/README.md b/README.md
index 456b0a6..4446637 100644
--- a/README.md
+++ b/README.md
@@ -365,7 +365,7 @@
 }
 ```
 
-Note that `ClobberMemory()` is only available for GNU based compilers.
+Note that `ClobberMemory()` is only available for GNU or MSVC based compilers.
 
 ### Set time unit manually
 If a benchmark runs a few milliseconds it may be hard to visually compare the
diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h
index f72a64a..8cde61b 100644
--- a/include/benchmark/benchmark_api.h
+++ b/include/benchmark/benchmark_api.h
@@ -165,6 +165,10 @@
 #include <utility>
 #endif
 
+#if defined(_MSC_VER)
+#include <intrin.h> // for _ReadWriteBarrier
+#endif
+
 namespace benchmark {
 class BenchmarkReporter;
 
@@ -215,11 +219,16 @@
 
 }  // end namespace internal
 
+
+#if !defined(__GNUC__) || defined(__pnacl__) || defined(EMSCRIPTN)
+# define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
+#endif
+
 // The DoNotOptimize(...) function can be used to prevent a value or
 // expression from being optimized away by the compiler. This function is
 // intended to add little to no overhead.
 // See: https://youtu.be/nXaxk27zwlk?t=2441
-#if defined(__GNUC__) && !defined(__pnacl__) && !defined(EMSCRIPTEN)
+#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
 template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
   asm volatile("" : : "g"(value) : "memory");
@@ -229,12 +238,22 @@
 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
   asm volatile("" : : : "memory");
 }
+#elif defined(_MSC_VER)
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
+  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
+  _ReadWriteBarrier();
+}
+
+inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
+  _ReadWriteBarrier();
+}
 #else
 template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
 }
-// FIXME Add ClobberMemory() for non-gnu compilers
+// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers
 #endif