/* { dg-do run } */ /* { dg-options "-msse2 -O2 -ffast-math" } */ #include #include #include #include __bf16 foo (float a) { return a; } static __bf16 CALC (float *a) { uint32_t bits; memcpy (&bits, a, sizeof (bits)); bits >>= 16; uint16_t bfloat16_bits = (uint16_t) bits; __bf16 bf16; memcpy (&bf16, &bfloat16_bits, sizeof (bf16)); return bf16; } int main (void) { float test_values[] = { 0.0f, -0.0f, 1.0f, -1.0f, 0.5f, -0.5f, 1000.0f, -1000.0f, 3.1415926f, -3.1415926f, 1e-8f, -1e-8f, 1.0e+38f, -1.0e+38f, 1.0e-38f, -1.0e-38f }; size_t num_values = sizeof (test_values) / sizeof (test_values[0]); for (size_t i = 0; i < num_values; ++i) { float original = test_values[i]; __bf16 hw_bf16 = foo (original); __bf16 sw_bf16 = CALC (&original); /* Verify psrld $16, %0 == %0 >> 16 */ if (memcmp (&hw_bf16, &sw_bf16, sizeof (__bf16)) != 0) abort (); /* Reconstruct the float value from the __bf16 bits */ uint16_t bf16_bits; memcpy (&bf16_bits, &hw_bf16, sizeof (bf16_bits)); uint32_t reconstructed_bits = ((uint32_t) bf16_bits) << 16; float converted; memcpy (&converted, &reconstructed_bits, sizeof (converted)); float diff = fabsf (original - converted); /* Expected Maximum Precision Loss */ uint32_t orig_bits; memcpy (&orig_bits, &original, sizeof (orig_bits)); int exponent = ((orig_bits >> 23) & 0xFF) - 127; float expected_loss = (exponent == -127) ? ldexpf (1.0f, -126 - 7) : ldexpf (1.0f, exponent - 7); if (diff > expected_loss) abort (); } return 0; }