// { dg-additional-options "-frust-compile-until=lowering" } #[lang = "sized"] trait Sized {} enum Result { Ok(T), Err(E), } use Result::{Err, Ok}; struct Utf8Error; const CONT_MASK: u8 = 15; const TAG_CONT_U8: u8 = 15; #[inline(always)] pub fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { let mut index = 0; let len = 64; let usize_bytes = 8; let ascii_block_size = 2 * usize_bytes; let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 }; while index < len { let old_offset = index; macro_rules! err { ($error_len: expr) => { return Err(Utf8Error) }; } macro_rules! next { () => {{ index += 1; // we needed data, but there was none: error! if index >= len { err!(None) } v[index] }}; } let first = v[index]; if first >= 128 { let w = 15; // 2-byte encoding is for codepoints \u{0080} to \u{07ff} // first C2 80 last DF BF // 3-byte encoding is for codepoints \u{0800} to \u{ffff} // first E0 A0 80 last EF BF BF // excluding surrogates codepoints \u{d800} to \u{dfff} // ED A0 80 to ED BF BF // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff // first F0 90 80 80 last F4 8F BF BF // // Use the UTF-8 syntax from the RFC // // https://tools.ietf.org/html/rfc3629 // UTF8-1 = %x00-7F // UTF8-2 = %xC2-DF UTF8-tail // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / // %xF4 %x80-8F 2( UTF8-tail ) match w { 2 => { if next!() & !CONT_MASK != TAG_CONT_U8 { err!(Some(1)) } } 3 => { match (first, next!()) { (0xE0, 0xA0..=0xBF) | (0xE1..=0xEC, 0x80..=0xBF) | (0xED, 0x80..=0x9F) | (0xEE..=0xEF, 0x80..=0xBF) => {} _ => err!(Some(1)), } if next!() & !CONT_MASK != TAG_CONT_U8 { err!(Some(2)) } } 4 => { match (first, next!()) { (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {} _ => err!(Some(1)), } if next!() & !CONT_MASK != TAG_CONT_U8 { err!(Some(2)) } if next!() & !CONT_MASK != TAG_CONT_U8 { err!(Some(3)) } } _ => err!(Some(1)), } index += 1; } else { index += 1; } } Ok(()) } fn main() {}