#ifndef FP8_HELPER_UNCLUDED
#define FP8_HELPER_UNCLUDED

typedef union 
{
  _Float16 f16;
  unsigned short u16;
} Float16Union;

static unsigned char
convert_fp16_to_hf8 (_Float16 x, unsigned char b, int s)
{
  Float16Union ux = { .f16 = x };
  const unsigned short fp16_bias = 15, hf8_bias = 7;
  unsigned short sign = (ux.u16 & 0x8000) >> 8;
  unsigned short e_fp16 = (ux.u16 & 0x7c00) >> 10;
  unsigned short m_fp16 = ux.u16 & 0x03ff;

  /* If bias */
  unsigned short x_bias = b ? ux.u16 + (b >> 1) : ux.u16;
  unsigned short e = (x_bias & 0x7c00) >> 10;
  unsigned short m = (x_bias & 0x03ff) >> 7;

  if (e_fp16 == 0x1f)
  {
    /* Special value: NaN or Infinity. */
    return (0xf << 3) | 0x7 | sign;
  }
  else if ((e_fp16 > (fp16_bias - hf8_bias + 15))
          || ((e_fp16 == (fp16_bias - hf8_bias + 15))
          && (m_fp16 > 0x0300)))
  {
    /* Overflow: Return Max or NaN. */
    return (0xf << 3) | (s ? 0x6 : 0x7) | sign;
  }
  else if (e_fp16 < fp16_bias - hf8_bias - 3)
  {
    /* Value too small: Return zero. */
    return sign;
  }
  else if (e_fp16 <= fp16_bias - hf8_bias)
  {
    /* Denormalized value: Adjust mantissa. */
    m = ((m_fp16 | 0x0400) >> ((fp16_bias - hf8_bias) + 1 - e_fp16))
        | (((m_fp16 & 0x007f) + 0x007f) >> 7);
    return sign;
  }
  else
  {
    /* Normal value: Adjust exponent and mantissa. */
    e -= (fp16_bias - hf8_bias);
    return (e << 3) | m | sign;
  }
}

static unsigned char
convert_fp16_to_bf8 (_Float16 x, unsigned char b, int s)
{
  Float16Union ux = { .f16 = x };
  unsigned short temp;
  unsigned short fp8_res = 0;

  if (__builtin_isinf (x) || __builtin_isnan (x))
  {
    /* Special value: NaN or Infinity. */
    fp8_res = (ux.u16 >> 8) & 0xFF;
    if (__builtin_isnan (x))
      fp8_res |= 0x02;
  }
  else
  {
    unsigned short rounding_bias = b ? b & 0xFF 
                                     : ((ux.u16 >> 8) & 0x1) + 0x7F;
    temp = ux.u16 + rounding_bias;
    fp8_res = (temp >> 8) & 0xFF;
    if (((temp >> 8) & 0x7F) == 0x7C && s)
      fp8_res = (fp8_res & 0x80) | 0x7B;
    }
  return fp8_res;
}

static unsigned char
convert_fp16_to_fp8 (_Float16 x, unsigned char b, int y, int s)
{
  return y ? convert_fp16_to_bf8 (x, b, s) 
           : convert_fp16_to_hf8 (x, b, s);
}

static _Float16
convert_bf8_to_fp16(unsigned char x)
{
  Float16Union u = {.u16 = (x << 8) & 0xff00};
  return u.f16;
}

static _Float16
convert_hf8_to_fp16(unsigned char x)
{
  unsigned char hf8_bias;
  Float16Union res;
  unsigned short fp_16bias, s, e, m, e_norm, lz_cnt;

  fp_16bias = 15;
  hf8_bias = 7;
  s = (x & 0x80) << 8;
  e = (x & 0x78) >> 3;
  m = x & 0x07;
  e_norm = e + fp_16bias - hf8_bias;

  /* convert denormal hf8 number into a normal fp16 number */
  if ((e == 0) && (m !=0))
  {
    lz_cnt = 2;
    lz_cnt = (m > 0x1) ? 1 : lz_cnt;
    lz_cnt = (m > 0x3) ? 0 : lz_cnt;
    e_norm -= lz_cnt;
    m = (m << (lz_cnt + 1)) & 0x07;
  }
  else if ((e == 0) && (m == 0))
    e_norm = 0;
  else if ((e == 0xf) && (m == 0x7))
  {
    e_norm = 0x1f;
    m = 0x4;
  }

  res.u16 = 0;
  res.u16 |= e_norm << 10;
  res.u16 |= m << 7;
  res.u16 |= s;

  return res.f16;
}

#endif