| /* |
| * Copyright (c) 2024 Raspberry Pi Ltd. |
| * |
| * SPDX-License-Identifier: BSD-3-Clause |
| */ |
| |
| #include <stdio.h> |
| #include <fenv.h> |
| #include <stdbool.h> |
| #include <stdint.h> |
| |
| // xoroshiro256++ pseudorandom number generator. |
| // Adapted from: https://prng.di.unimi.it/xoshiro256plusplus.c |
| // Original copyright notice: |
| |
| /* Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org) |
| |
| To the extent possible under law, the author has dedicated all copyright |
| and related and neighboring rights to this software to the public domain |
| worldwide. This software is distributed without any warranty. |
| |
| See <http://creativecommons.org/publicdomain/zero/1.0/>. */ |
| |
| /* This is xoshiro256++ 1.0, one of our all-purpose, rock-solid generators. |
| It has excellent (sub-ns) speed, a state (256 bits) that is large |
| enough for any parallel application, and it passes all tests we are |
| aware of. |
| |
| For generating just floating-point numbers, xoshiro256+ is even faster. |
| |
| The state must be seeded so that it is not everywhere zero. If you have |
| a 64-bit seed, we suggest to seed a splitmix64 generator and use its |
| output to fill s. */ |
| |
| static inline uint64_t xr256_rotl(const uint64_t x, int k) { |
| return (x << k) | (x >> (64 - k)); |
| } |
| |
| uint64_t xr256_next(uint64_t s[4]) { |
| const uint64_t result = xr256_rotl(s[0] + s[3], 23) + s[0]; |
| |
| const uint64_t t = s[1] << 17; |
| |
| s[2] ^= s[0]; |
| s[3] ^= s[1]; |
| s[1] ^= s[2]; |
| s[0] ^= s[3]; |
| |
| s[2] ^= t; |
| |
| s[3] = xr256_rotl(s[3], 45); |
| |
| return result; |
| } |
| uint32_t bitcast_f2u(float x) { |
| // This is UB but then so is every C program |
| union { |
| float f; |
| uint32_t u; |
| } un; |
| un.f = x; |
| return un.u; |
| } |
| |
| float bitcast_u2f(uint32_t x) { |
| union { |
| float f; |
| uint32_t u; |
| } un; |
| un.u = x; |
| return un.f; |
| } |
| |
| bool is_nan_u(uint32_t x) { |
| return ((x >> 23) & 0xffu) == 0xffu && (x & ~(-1u << 23)); |
| } |
| |
| uint32_t flush_to_zero_u(uint32_t x) { |
| if (!(x & (0xffu << 23))) { |
| x &= -1u << 23; |
| } |
| return x; |
| } |
| |
| uint32_t model_fadd(uint32_t x, uint32_t y) { |
| x = flush_to_zero_u(x); |
| y = flush_to_zero_u(y); |
| // Use local hardware implementation to perform calculation |
| uint32_t result = bitcast_f2u(bitcast_u2f(x) + bitcast_u2f(y)); |
| // Use correct canonical generated nan |
| if (is_nan_u(result)) { |
| result = -1u; |
| } |
| result = flush_to_zero_u(result); |
| return result; |
| } |
| |
| uint32_t model_fmul(uint32_t x, uint32_t y) { |
| x = flush_to_zero_u(x); |
| y = flush_to_zero_u(y); |
| // Use local hardware implementation to perform calculation |
| uint32_t result = bitcast_f2u(bitcast_u2f(x) * bitcast_u2f(y)); |
| // Use correct canonical generated nan |
| if (is_nan_u(result)) { |
| result = -1u; |
| } |
| result = flush_to_zero_u(result); |
| return result; |
| } |
| |
| int main() { |
| // SHA-256 of a rude word |
| uint64_t rand_state[4] = { |
| 0x5891b5b522d5df08u, |
| 0x6d0ff0b110fbd9d2u, |
| 0x1bb4fc7163af34d0u, |
| 0x8286a2e846f6be03u |
| }; |
| for (int i = 0; i < 1000; ++i) { |
| uint32_t x, y; |
| x = xr256_next(rand_state) & 0xffffffffu; |
| y = xr256_next(rand_state) & 0xffffffffu; |
| // Map nan to +-inf (input nans should already be well-covered) |
| if (is_nan_u(x)) { |
| x &= -1u << 23; |
| } |
| if (is_nan_u(y)) { |
| y &= -1u << 23; |
| } |
| #if 1 |
| printf("{0x%08xu, 0x%08xu, 0x%08xu},\n", x, y, model_fadd(x, y)); |
| #else |
| printf("{0x%08xu, 0x%08xu, 0x%08xu},\n", x, y, model_fmul(x, y)); |
| #endif |
| } |
| } |