Support for Web platforms (#340)
* Implement cycleclock::Now for PNaCl
* Make cycleclock::Now compatible with NaCl/ARM
* Support Emscripten (Asm.js, WebAssembly)
* Rearrange #ifs from to handle specific cases first
* DoNotOptimize without inline asm for Emscripten & PNaCl
diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h
index f953e59..66cbd7e 100644
--- a/include/benchmark/benchmark_api.h
+++ b/include/benchmark/benchmark_api.h
@@ -230,7 +230,7 @@
// expression from being optimized away by the compiler. This function is
// intended to add little to no overhead.
// See: https://youtu.be/nXaxk27zwlk?t=2441
-#if defined(__GNUC__)
+#if defined(__GNUC__) && !defined(__pnacl__) && !defined(EMSCRIPTEN)
template <class Tp>
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
asm volatile("" : : "g"(value) : "memory");
diff --git a/src/cycleclock.h b/src/cycleclock.h
index ca26cca..e0f9b01 100644
--- a/src/cycleclock.h
+++ b/src/cycleclock.h
@@ -43,6 +43,11 @@
#ifndef BENCHMARK_OS_WINDOWS
#include <sys/time.h>
+#include <time.h>
+#endif
+
+#ifdef BENCHMARK_OS_EMSCRIPTEN
+#include <emscripten.h>
#endif
namespace benchmark {
@@ -65,6 +70,10 @@
// counter pauses; it does not continue counting, nor does it
// reset to zero.
return mach_absolute_time();
+#elif defined(BENCHMARK_OS_EMSCRIPTEN)
+ // this goes above x86-specific code because old versions of Emscripten
+ // define __x86_64__, although they have nothing to do with it.
+ return static_cast<int64_t>(emscripten_get_now() * 1e+6);
#elif defined(__i386__)
int64_t ret;
__asm__ volatile("rdtsc" : "=A"(ret));
@@ -99,6 +108,22 @@
_asm rdtsc
#elif defined(COMPILER_MSVC)
return __rdtsc();
+#elif defined(BENCHMARK_OS_NACL)
+ // Native Client validator on x86/x86-64 allows RDTSC instructions,
+ // and this case is handled above. Native Client validator on ARM
+ // rejects MRC instructions (used in the ARM-specific sequence below),
+ // so we handle it here. Portable Native Client compiles to
+ // architecture-agnostic bytecode, which doesn't provide any
+ // cycle counter access mnemonics.
+
+ // Native Client does not provide any API to access cycle counter.
+ // Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
+ // because is provides nanosecond resolution (which is noticable at
+ // least for PNaCl modules running on x86 Mac & Linux).
+ // Initialize to always return 0 if clock_gettime fails.
+ struct timespec ts = { 0, 0 };
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return static_cast<int64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#elif defined(__aarch64__)
// System timer of ARMv8 runs at a different frequency than the CPU's.
// The frequency is fixed, typically in the range 1-50MHz. It can be
@@ -108,7 +133,9 @@
asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
return virtual_timer_value;
#elif defined(__ARM_ARCH)
-#if (__ARM_ARCH >= 6) // V6 is the earliest arch that has a standard cyclecount
+ // V6 is the earliest arch that has a standard cyclecount
+ // Native Client validator doesn't allow MRC instructions.
+#if (__ARM_ARCH >= 6)
uint32_t pmccntr;
uint32_t pmuseren;
uint32_t pmcntenset;
diff --git a/src/internal_macros.h b/src/internal_macros.h
index 2b3f32f..ab9dd85 100644
--- a/src/internal_macros.h
+++ b/src/internal_macros.h
@@ -41,6 +41,10 @@
#define BENCHMARK_OS_FREEBSD 1
#elif defined(__linux__)
#define BENCHMARK_OS_LINUX 1
+#elif defined(__native_client__)
+#define BENCHMARK_OS_NACL 1
+#elif defined(EMSCRIPTEN)
+#define BENCHMARK_OS_EMSCRIPTEN 1
#endif
#if !__has_feature(cxx_exceptions) && !defined(__cpp_exceptions) \
diff --git a/src/timers.cc b/src/timers.cc
index fadc08f..8d56e8a 100644
--- a/src/timers.cc
+++ b/src/timers.cc
@@ -35,6 +35,10 @@
#endif
#endif
+#ifdef BENCHMARK_OS_EMSCRIPTEN
+#include <emscripten.h>
+#endif
+
#include <cerrno>
#include <cstdint>
#include <cstdio>
@@ -100,14 +104,7 @@
} // end namespace
double ProcessCPUUsage() {
-// FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
-// https://github.com/google/benchmark/pull/292
-#if defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
- struct timespec spec;
- if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0)
- return MakeTime(spec);
- DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
-#elif defined(BENCHMARK_OS_WINDOWS)
+#if defined(BENCHMARK_OS_WINDOWS)
HANDLE proc = GetCurrentProcess();
FILETIME creation_time;
FILETIME exit_time;
@@ -117,21 +114,28 @@
&user_time))
return MakeTime(kernel_time, user_time);
DiagnoseAndExit("GetProccessTimes() failed");
+#elif defined(BENCHMARK_OS_EMSCRIPTEN)
+ // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten.
+ // Use Emscripten-specific API. Reported CPU time would be exactly the
+ // same as total time, but this is ok because there aren't long-latency
+ // syncronous system calls in Emscripten.
+ return emscripten_get_now() * 1e-3;
+#elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
+ // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
+ // https://github.com/google/benchmark/pull/292
+ struct timespec spec;
+ if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0)
+ return MakeTime(spec);
+ DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
#else
struct rusage ru;
if (getrusage(RUSAGE_SELF, &ru) == 0) return MakeTime(ru);
- DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
+ DiagnoseAndExit("getrusage(RUSAGE_SELF, ...) failed");
#endif
}
double ThreadCPUUsage() {
-// FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
-// https://github.com/google/benchmark/pull/292
-#if defined(CLOCK_THREAD_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
- struct timespec ts;
- if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts);
- DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed");
-#elif defined(BENCHMARK_OS_WINDOWS)
+#if defined(BENCHMARK_OS_WINDOWS)
HANDLE this_thread = GetCurrentThread();
FILETIME creation_time;
FILETIME exit_time;
@@ -141,6 +145,8 @@
&user_time);
return MakeTime(kernel_time, user_time);
#elif defined(BENCHMARK_OS_MACOSX)
+ // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
+ // https://github.com/google/benchmark/pull/292
mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
thread_basic_info_data_t info;
mach_port_t thread = pthread_mach_thread_np(pthread_self());
@@ -149,6 +155,13 @@
return MakeTime(info);
}
DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info");
+#elif defined(BENCHMARK_OS_EMSCRIPTEN)
+ // Emscripten doesn't support traditional threads
+ return ProcessCPUUsage();
+#elif defined(CLOCK_THREAD_CPUTIME_ID)
+ struct timespec ts;
+ if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts);
+ DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed");
#else
#error Per-thread timing is not available on your system.
#endif