diff options
Diffstat (limited to 'gcc/testsuite/rust/compile/issue-3898.rs')
| -rw-r--r-- | gcc/testsuite/rust/compile/issue-3898.rs | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/gcc/testsuite/rust/compile/issue-3898.rs b/gcc/testsuite/rust/compile/issue-3898.rs new file mode 100644 index 0000000..114370c --- /dev/null +++ b/gcc/testsuite/rust/compile/issue-3898.rs @@ -0,0 +1,112 @@ +// { dg-additional-options "-frust-compile-until=lowering" } + +#[lang = "sized"] +trait Sized {} + +enum Result<T, E> { + Ok(T), + Err(E), +} + +use Result::{Err, Ok}; + +struct Utf8Error; + +const CONT_MASK: u8 = 15; +const TAG_CONT_U8: u8 = 15; + +#[inline(always)] +pub fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { + let mut index = 0; + let len = 64; + + let usize_bytes = 8; + let ascii_block_size = 2 * usize_bytes; + let blocks_end = if len >= ascii_block_size { + len - ascii_block_size + 1 + } else { + 0 + }; + + while index < len { + let old_offset = index; + macro_rules! err { + ($error_len: expr) => { + return Err(Utf8Error) + }; + } + + macro_rules! next { + () => {{ + index += 1; + // we needed data, but there was none: error! + if index >= len { + err!(None) + } + v[index] + }}; + } + + let first = v[index]; + if first >= 128 { + let w = 15; + // 2-byte encoding is for codepoints \u{0080} to \u{07ff} + // first C2 80 last DF BF + // 3-byte encoding is for codepoints \u{0800} to \u{ffff} + // first E0 A0 80 last EF BF BF + // excluding surrogates codepoints \u{d800} to \u{dfff} + // ED A0 80 to ED BF BF + // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff + // first F0 90 80 80 last F4 8F BF BF + // + // Use the UTF-8 syntax from the RFC + // + // https://tools.ietf.org/html/rfc3629 + // UTF8-1 = %x00-7F + // UTF8-2 = %xC2-DF UTF8-tail + // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / + // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) + // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / + // %xF4 %x80-8F 2( UTF8-tail ) + match w { + 2 => { + if next!() & !CONT_MASK != TAG_CONT_U8 { + err!(Some(1)) + } + } + 3 => { + match (first, next!()) { + (0xE0, 0xA0..=0xBF) + | (0xE1..=0xEC, 0x80..=0xBF) + | (0xED, 0x80..=0x9F) + | (0xEE..=0xEF, 0x80..=0xBF) => {} + _ => err!(Some(1)), + } + if next!() & !CONT_MASK != TAG_CONT_U8 { + err!(Some(2)) + } + } + 4 => { + match (first, next!()) { + (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {} + _ => err!(Some(1)), + } + if next!() & !CONT_MASK != TAG_CONT_U8 { + err!(Some(2)) + } + if next!() & !CONT_MASK != TAG_CONT_U8 { + err!(Some(3)) + } + } + _ => err!(Some(1)), + } + index += 1; + } else { + index += 1; + } + } + + Ok(()) +} + +fn main() {} |
