diff options
Diffstat (limited to 'libgrust/rustc-lib/stdarch/crates/stdarch-test')
4 files changed, 432 insertions, 0 deletions
diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-test/Cargo.toml b/libgrust/rustc-lib/stdarch/crates/stdarch-test/Cargo.toml new file mode 100644 index 0000000..9eb6b64 --- /dev/null +++ b/libgrust/rustc-lib/stdarch/crates/stdarch-test/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "stdarch-test" +version = "0.1.0" +authors = ["Alex Crichton <alex@alexcrichton.com>"] + +[dependencies] +assert-instr-macro = { path = "../assert-instr-macro" } +simd-test-macro = { path = "../simd-test-macro" } +cc = "1.0" +lazy_static = "1.0" +rustc-demangle = "0.1.8" +cfg-if = "0.1" + +# We use a crates.io dependency to disassemble wasm binaries to look for +# instructions for `#[assert_instr]`. Note that we use an `=` dependency here +# instead of a floating dependency because the text format for wasm changes over +# time, and we want to make updates to this explicit rather than automatically +# picking up updates which might break CI with new instruction names. +[target.'cfg(target_arch = "wasm32")'.dependencies] +wasmprinter = "=0.2.6" + +[features] +default = [] diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-test/src/disassembly.rs b/libgrust/rustc-lib/stdarch/crates/stdarch-test/src/disassembly.rs new file mode 100644 index 0000000..d82b07d --- /dev/null +++ b/libgrust/rustc-lib/stdarch/crates/stdarch-test/src/disassembly.rs @@ -0,0 +1,183 @@ +//! Disassembly calling function for most targets. + +use crate::Function; +use std::{collections::HashSet, env, process::Command, str}; + +// Extracts the "shim" name from the `symbol`. +fn normalize(mut symbol: &str) -> String { +    // Remove trailing colon: +    if symbol.ends_with(':') { +        symbol = &symbol[..symbol.len() - 1]; +    } +    if symbol.ends_with('>') { +        symbol = &symbol[..symbol.len() - 1]; +    } +    if let Some(idx) = symbol.find('<') { +        symbol = &symbol[idx + 1..]; +    } + +    let mut symbol = rustc_demangle::demangle(symbol).to_string(); +    symbol = match symbol.rfind("::h") { +        Some(i) => symbol[..i].to_string(), +        None => symbol.to_string(), +    }; + +    // Remove Rust paths +    if let Some(last_colon) = symbol.rfind(':') { +        symbol = (&symbol[last_colon + 1..]).to_string(); +    } + +    // Normalize to no leading underscore to handle platforms that may +    // inject extra ones in symbol names. +    while symbol.starts_with('_') { +        symbol.remove(0); +    } +    symbol +} + +pub(crate) fn disassemble_myself() -> HashSet<Function> { +    let me = env::current_exe().expect("failed to get current exe"); + +    let disassembly = +        if cfg!(target_arch = "x86_64") && cfg!(target_os = "windows") && cfg!(target_env = "msvc") +        { +            let mut cmd = cc::windows_registry::find("x86_64-pc-windows-msvc", "dumpbin.exe") +                .expect("failed to find `dumpbin` tool"); +            let output = cmd +                .arg("/DISASM") +                .arg(&me) +                .output() +                .expect("failed to execute dumpbin"); +            println!( +                "{}\n{}", +                output.status, +                String::from_utf8_lossy(&output.stderr) +            ); +            assert!(output.status.success()); +            // Windows does not return valid UTF-8 output: +            String::from_utf8_lossy(Vec::leak(output.stdout)) +        } else if cfg!(target_os = "windows") { +            panic!("disassembly unimplemented") +        } else if cfg!(target_os = "macos") { +            let output = Command::new("otool") +                .arg("-vt") +                .arg(&me) +                .output() +                .expect("failed to execute otool"); +            println!( +                "{}\n{}", +                output.status, +                String::from_utf8_lossy(&output.stderr) +            ); +            assert!(output.status.success()); + +            String::from_utf8_lossy(Vec::leak(output.stdout)) +        } else { +            let objdump = env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string()); +            let output = Command::new(objdump.clone()) +                .arg("--disassemble") +                .arg(&me) +                .output() +                .unwrap_or_else(|_| panic!("failed to execute objdump. OBJDUMP={}", objdump)); +            println!( +                "{}\n{}", +                output.status, +                String::from_utf8_lossy(&output.stderr) +            ); +            assert!(output.status.success()); + +            String::from_utf8_lossy(Vec::leak(output.stdout)) +        }; + +    parse(&disassembly) +} + +fn parse(output: &str) -> HashSet<Function> { +    let mut lines = output.lines(); + +    println!( +        "First 100 lines of the disassembly input containing {} lines:", +        lines.clone().count() +    ); +    for line in output.lines().take(100) { +        println!("{}", line); +    } + +    let mut functions = HashSet::new(); +    let mut cached_header = None; +    while let Some(header) = cached_header.take().or_else(|| lines.next()) { +        if !header.ends_with(':') || !header.contains("stdarch_test_shim") { +            continue; +        } +        eprintln!("header: {}", header); +        let symbol = normalize(header); +        eprintln!("normalized symbol: {}", symbol); +        let mut instructions = Vec::new(); +        while let Some(instruction) = lines.next() { +            if instruction.ends_with(':') { +                cached_header = Some(instruction); +                break; +            } +            if instruction.is_empty() { +                cached_header = None; +                break; +            } +            let parts = if cfg!(target_os = "macos") { +                // Each line of instructions should look like: +                // +                //      $addr    $instruction... +                instruction +                    .split_whitespace() +                    .skip(1) +                    .map(std::string::ToString::to_string) +                    .collect::<Vec<String>>() +            } else if cfg!(target_env = "msvc") { +                // Each line looks like: +                // +                // >  $addr: ab cd ef     $instr.. +                // >         00 12          # this line os optional +                if instruction.starts_with("       ") { +                    continue; +                } +                instruction +                    .split_whitespace() +                    .skip(1) +                    .skip_while(|s| s.len() == 2 && usize::from_str_radix(s, 16).is_ok()) +                    .map(std::string::ToString::to_string) +                    .skip_while(|s| *s == "lock") // skip x86-specific prefix +                    .collect::<Vec<String>>() +            } else { +                // objdump +                // Each line of instructions should look like: +                // +                //      $rel_offset: ab cd ef 00    $instruction... +                let expected_len = if cfg!(target_arch = "arm") || cfg!(target_arch = "aarch64") { +                    8 +                } else { +                    2 +                }; + +                instruction +                    .split_whitespace() +                    .skip(1) +                    .skip_while(|s| s.len() == expected_len && usize::from_str_radix(s, 16).is_ok()) +                    .skip_while(|s| *s == "lock") // skip x86-specific prefix +                    .map(std::string::ToString::to_string) +                    .collect::<Vec<String>>() +            }; +            instructions.push(parts.join(" ")); +        } +        let function = Function { +            name: symbol, +            instrs: instructions, +        }; +        assert!(functions.insert(function)); +    } + +    eprintln!("all found functions dump:"); +    for k in &functions { +        eprintln!("  f: {}", k.name); +    } + +    functions +} diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-test/src/lib.rs b/libgrust/rustc-lib/stdarch/crates/stdarch-test/src/lib.rs new file mode 100644 index 0000000..03711e9 --- /dev/null +++ b/libgrust/rustc-lib/stdarch/crates/stdarch-test/src/lib.rs @@ -0,0 +1,171 @@ +//! Runtime support needed for testing the stdarch crate. +//! +//! This basically just disassembles the current executable and then parses the +//! output once globally and then provides the `assert` function which makes +//! assertions about the disassembly of a function. +#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)] + +extern crate assert_instr_macro; +extern crate cc; +#[macro_use] +extern crate lazy_static; +extern crate rustc_demangle; +extern crate simd_test_macro; +#[macro_use] +extern crate cfg_if; + +pub use assert_instr_macro::*; +pub use simd_test_macro::*; +use std::{cmp, collections::HashSet, env, hash, str, sync::atomic::AtomicPtr}; + +cfg_if! { +    if #[cfg(target_arch = "wasm32")] { +        pub mod wasm; +        use wasm::disassemble_myself; +    } else { +        mod disassembly; +        use disassembly::disassemble_myself; +    } +} + +lazy_static! { +    static ref DISASSEMBLY: HashSet<Function> = disassemble_myself(); +} + +#[derive(Debug)] +struct Function { +    name: String, +    instrs: Vec<String>, +} +impl Function { +    fn new(n: &str) -> Self { +        Self { +            name: n.to_string(), +            instrs: Vec::new(), +        } +    } +} + +impl cmp::PartialEq for Function { +    fn eq(&self, other: &Self) -> bool { +        self.name == other.name +    } +} +impl cmp::Eq for Function {} + +impl hash::Hash for Function { +    fn hash<H: hash::Hasher>(&self, state: &mut H) { +        self.name.hash(state) +    } +} + +/// Main entry point for this crate, called by the `#[assert_instr]` macro. +/// +/// This asserts that the function at `fnptr` contains the instruction +/// `expected` provided. +pub fn assert(_fnptr: usize, fnname: &str, expected: &str) { +    //eprintln!("shim name: {}", fnname); +    let function = &DISASSEMBLY +        .get(&Function::new(fnname)) +        .unwrap_or_else(|| panic!("function \"{}\" not found in the disassembly", fnname)); +    //eprintln!("  function: {:?}", function); + +    let mut instrs = &function.instrs[..]; +    while instrs.last().map_or(false, |s| s == "nop") { +        instrs = &instrs[..instrs.len() - 1]; +    } + +    // If the expected intrinsic is a nop it is compiled away so we +    // can't check for it - aka the intrinsic is not generating any code +    if expected == "nop" { +        return; +    } + +    // Look for `expected` as the first part of any instruction in this +    // function, e.g., tzcntl in tzcntl %rax,%rax. +    let found = instrs.iter().any(|s| s.starts_with(expected)); + +    // Look for `call` instructions in the disassembly to detect whether +    // inlining failed: all intrinsics are `#[inline(always)]`, so +    // calling one intrinsic from another should not generate `call` +    // instructions. +    let inlining_failed = instrs.windows(2).any(|s| { +        // On 32-bit x86 position independent code will call itself and be +        // immediately followed by a `pop` to learn about the current address. +        // Let's not take that into account when considering whether a function +        // failed inlining something. +        s[0].contains("call") && (!cfg!(target_arch = "x86") || s[1].contains("pop")) +    }); + +    let instruction_limit = std::env::var("STDARCH_ASSERT_INSTR_LIMIT") +        .ok() +        .map_or_else( +            || match expected { +                // `cpuid` returns a pretty big aggregate structure, so exempt +                // it from the slightly more restrictive 22 instructions below. +                "cpuid" => 30, + +                // Apparently, on Windows, LLVM generates a bunch of +                // saves/restores of xmm registers around these intstructions, +                // which exceeds the limit of 20 below. As it seems dictated by +                // Windows's ABI (I believe?), we probably can't do much +                // about it. +                "vzeroall" | "vzeroupper" if cfg!(windows) => 30, + +                // Intrinsics using `cvtpi2ps` are typically "composites" and +                // in some cases exceed the limit. +                "cvtpi2ps" => 25, + +                // core_arch/src/acle/simd32 +                "usad8" => 27, +                "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29, + +                // Original limit was 20 instructions, but ARM DSP Intrinsics +                // are exactly 20 instructions long. So, bump the limit to 22 +                // instead of adding here a long list of exceptions. +                _ => 22, +            }, +            |v| v.parse().unwrap(), +        ); +    let probably_only_one_instruction = instrs.len() < instruction_limit; + +    if found && probably_only_one_instruction && !inlining_failed { +        return; +    } + +    // Help debug by printing out the found disassembly, and then panic as we +    // didn't find the instruction. +    println!("disassembly for {}: ", fnname,); +    for (i, instr) in instrs.iter().enumerate() { +        println!("\t{:2}: {}", i, instr); +    } + +    if !found { +        panic!( +            "failed to find instruction `{}` in the disassembly", +            expected +        ); +    } else if !probably_only_one_instruction { +        panic!( +            "instruction found, but the disassembly contains too many \ +             instructions: #instructions = {} >= {} (limit)", +            instrs.len(), +            instruction_limit +        ); +    } else if inlining_failed { +        panic!( +            "instruction found, but the disassembly contains `call` \ +             instructions, which hint that inlining failed" +        ); +    } +} + +pub fn assert_skip_test_ok(name: &str) { +    if env::var("STDARCH_TEST_EVERYTHING").is_err() { +        return; +    } +    panic!("skipped test `{}` when it shouldn't be skipped", name); +} + +// See comment in `assert-instr-macro` crate for why this exists +pub static _DONT_DEDUP: AtomicPtr<u8> = AtomicPtr::new(b"".as_ptr() as *mut _); diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-test/src/wasm.rs b/libgrust/rustc-lib/stdarch/crates/stdarch-test/src/wasm.rs new file mode 100644 index 0000000..bf411c1 --- /dev/null +++ b/libgrust/rustc-lib/stdarch/crates/stdarch-test/src/wasm.rs @@ -0,0 +1,55 @@ +//! Disassembly calling function for `wasm32` targets. + +use crate::Function; +use std::collections::HashSet; + +pub(crate) fn disassemble_myself() -> HashSet<Function> { +    // Use `std::env::args` to find the path to our executable. Assume the +    // environment is configured such that we can read that file. Read it and +    // use the `wasmprinter` crate to transform the binary to text, then search +    // the text for appropriately named functions. +    let me = std::env::args() +        .next() +        .expect("failed to find current wasm file"); +    let output = wasmprinter::print_file(&me).unwrap(); + +    let mut ret: HashSet<Function> = HashSet::new(); +    let mut lines = output.lines().map(|s| s.trim()); +    while let Some(line) = lines.next() { +        // If this isn't a function, we don't care about it. +        if !line.starts_with("(func ") { +            continue; +        } + +        let mut function = Function { +            name: String::new(), +            instrs: Vec::new(), +        }; + +        // Empty functions will end in `))` so there's nothing to do, otherwise +        // we'll have a bunch of following lines which are instructions. +        // +        // Lines that have an imbalanced `)` mark the end of a function. +        if !line.ends_with("))") { +            while let Some(line) = lines.next() { +                function.instrs.push(line.to_string()); +                if !line.starts_with("(") && line.ends_with(")") { +                    break; +                } +            } +        } +        // The second element here split on whitespace should be the name of +        // the function, skipping the type/params/results +        function.name = line.split_whitespace().nth(1).unwrap().to_string(); +        if function.name.starts_with("$") { +            function.name = function.name[1..].to_string() +        } + +        if !function.name.contains("stdarch_test_shim") { +            continue; +        } + +        assert!(ret.insert(function)); +    } +    return ret; +}  | 
