diff options
Diffstat (limited to 'libgrust/rustc-lib/stdarch/crates/stdarch-gen')
4 files changed, 1239 insertions, 0 deletions
diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-gen/Cargo.toml b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/Cargo.toml new file mode 100644 index 0000000..b339672 --- /dev/null +++ b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "stdarch-gen" +version = "0.1.0" +authors = ["Heinz Gies <heinz@licenser.net>"] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-gen/README.md b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/README.md new file mode 100644 index 0000000..54b602c --- /dev/null +++ b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/README.md @@ -0,0 +1,11 @@ +# Neon intrinsic code generator + +A small tool that allows to quickly generate intrinsics for the NEON architecture. + +The specification for the intrinsics can be found in `neon.spec`. + +To run and re-generate the code run the following from the root of the `stdarch` crate. + +``` +OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec +```
\ No newline at end of file diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec new file mode 100644 index 0000000..0343a72 --- /dev/null +++ b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec @@ -0,0 +1,469 @@ +// ARM Neon intrinsic specification. +// +// This file contains the specification for a number of +// intrinsics that allows us to generate them along with +// their test cases. +// +// To the syntax of the file - it's not very intelligently parsed! +// +// # Comments +// start with AT LEAST two, or four or more slashes so // is a +// comment /////// is too. +// +// # Sections +// Sections start with EXACTLY three slashes followed +// by AT LEAST one space. Sections are used for two things: +// +// 1) they serve as the doc comment for the given intrinics. +// 2) they reset all variables (name, fn, etc.) +// +// # Variables +// +// name - The prefix of the function, suffixes are auto +// generated by the type they get passed. +// +// fn - The function to call in rust-land. +// +// aarch64 - The intrinsic to check on aarch64 architecture. +// If this is given but no arm intrinsic is provided, +// the function will exclusively be generated for +// aarch64. +// This is used to generate both aarch64 specific and +// shared intrinics by first only specifying th aarch64 +// variant then the arm variant. +// +// arm - The arm v7 intrinics used to checked for arm code +// generation. All neon functions available in arm are +// also available in aarch64. If no aarch64 intrinic was +// set they are assumed to be the same. +// Intrinics ending with a `.` will have a size suffixes +// added (such as `i8` or `i64`) that is not sign specific +// Intrinics ending with a `.s` will have a size suffixes +// added (such as `s8` or `u64`) that is sign specific +// +// a - First input for tests, it gets scaled to the size of +// the type. +// +// b - Second input for tests, it gets scaled to the size of +// the type. +// +// # special values +// +// TRUE - 'true' all bits are set to 1 +// FALSE - 'false' all bits are set to 0 +// FF - same as 'true' +// MIN - minimal value (either 0 or the lowest negative number) +// MAX - maximal value propr to overflow +// +// # validate <values> +// Validates a and b aginst the expected result of the test. +// The special values 'TRUE' and 'FALSE' can be used to +// represent the corect NEON representation of true or +// false values. It too gets scaled to the type. +// +// Validate needs to be called before generate as it sets +// up the rules for validation that get generated for each +// type. +// # generate <types> +// The generate command generates the intrinsics, it uses the +// Variables set and can be called multiple times while overwriting +// some of the variables. + +/// Vector bitwise and +name = vand +fn = simd_and +arm = vand +aarch64 = and +a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00 +b = 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F +validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00 +b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +validate 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +generate int*_t, uint*_t, int64x*_t, uint64x*_t + +/// Vector bitwise or (immediate, inclusive) +name = vorr +fn = simd_or +arm = vorr +aarch64 = orr +a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F +b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F +generate int*_t, uint*_t, int64x*_t, uint64x*_t + + +/// Vector bitwise exclusive or (vector) +name = veor +fn = simd_xor +arm = veor +aarch64 = eor +a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F +b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F +generate int*_t, uint*_t, int64x*_t, uint64x*_t + +//////////////////// +// equality +//////////////////// + +/// Compare bitwise Equal (vector) +name = vceq +fn = simd_eq +a = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX +b = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE +a = MIN, MIN, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, MAX +b = MIN, MAX, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, MIN +validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE + +aarch64 = cmeq +generate uint64x*_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t + +arm = vceq. +generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Floating-point compare equal +name = vceq +fn = simd_eq +a = 1.2, 3.4, 5.6, 7.8 +b = 1.2, 3.4, 5.6, 7.8 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = fcmeq +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vceq. +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +//////////////////// +// greater then +//////////////////// + +/// Compare signed greater than +name = vcgt +fn = simd_gt +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE +aarch64 = cmgt +generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t + +arm = vcgt.s +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Compare unsigned highe +name = vcgt +fn = simd_gt +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmhi +generate uint64x*_t + +arm = vcgt.s +generate uint*_t + +/// Floating-point compare greater than +name = vcgt +fn = simd_gt +a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 +b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = fcmgt +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vcgt.s +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +//////////////////// +// lesser then +//////////////////// + +/// Compare signed less than +name = vclt +fn = simd_lt +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE +aarch64 = cmgt +generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t + +arm = vcgt.s +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Compare unsigned less than +name = vclt +fn = simd_lt +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmhi +generate uint64x*_t + +arm = vcgt.s +generate uint*_t + +/// Floating-point compare less than +name = vclt +fn = simd_lt +a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 +b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = fcmgt +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vcgt.s +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +//////////////////// +// lesser then equals +//////////////////// + +/// Compare signed less than or equal +name = vcle +fn = simd_le +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmge +generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t + +arm = vcge.s +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Compare unsigned less than or equal +name = vcle +fn = simd_le +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmhs +generate uint64x*_t + +arm = vcge.s +generate uint*_t + +/// Floating-point compare less than or equal +name = vcle +fn = simd_le +a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 +b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE +aarch64 = fcmge +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +arm = vcge.s +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +//////////////////// +// greater then equals +//////////////////// + +/// Compare signed greater than or equal +name = vcge +fn = simd_ge +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmge +generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t + +arm = vcge.s +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Compare unsigned greater than or equal +name = vcge +fn = simd_ge +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmhs +generate uint64x*_t + +arm = vcge.s +generate uint*_t + +/// Floating-point compare greater than or equal +name = vcge +fn = simd_ge +a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 +b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = fcmge +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vcge.s +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +/// Saturating subtract +name = vqsub +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26 + +arm = vqsub.s +aarch64 = uqsub +link-arm = vqsubu._EXT_ +link-aarch64 = uqsub._EXT_ +generate uint*_t + +arm = vqsub.s +aarch64 = sqsub +link-arm = vqsubs._EXT_ +link-aarch64 = sqsub._EXT_ +generate int*_t + +/// Halving add +name = vhadd +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29 + + +arm = vhadd.s +aarch64 = uhadd +link-aarch64 = uhadd._EXT_ +link-arm = vhaddu._EXT_ +generate uint*_t + + +arm = vhadd.s +aarch64 = shadd +link-aarch64 = shadd._EXT_ +link-arm = vhadds._EXT_ +generate int*_t + +/// Rounding halving add +name = vrhadd +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29 + +arm = vrhadd.s +aarch64 = urhadd +link-arm = vrhaddu._EXT_ +link-aarch64 = urhadd._EXT_ +generate uint*_t + +arm = vrhadd.s +aarch64 = srhadd +link-arm = vrhadds._EXT_ +link-aarch64 = srhadd._EXT_ +generate int*_t + +/// Saturating add +name = vqadd +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 + +arm = vqadd.s +aarch64 = uqadd +link-arm = vqaddu._EXT_ +link-aarch64 = uqadd._EXT_ +generate uint*_t + +arm = vqadd.s +aarch64 = sqadd +link-arm = vqadds._EXT_ +link-aarch64 = sqadd._EXT_ +generate int*_t + +// requires 1st and second argument to be different, this not implemented yet +// /// Signed saturating accumulate of unsigned value +// +// name = vuqadd +// a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +// b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +// e = 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 + +// it seems like we don't have those in rustland :( +// aarch64 = suqadd +// link-aarch64 = usqadd._EXT_ +// generate int64x*_t + +/ arm = suqadd +// link-arm = vuqadds._EXT_ +// link-aarch64 = suqadd._EXT_ +// generate int*_t + + +/// Multiply +name = vmul +a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32 +arm = vmul. +aarch64 = mul +fn = simd_mul +generate int*_t, uint*_t + +/// Multiply +name = vmul +fn = simd_mul +a = 1.0, 2.0, 1.0, 2.0 +b = 2.0, 3.0, 4.0, 5.0 +validate 2.0, 6.0, 4.0, 10.0 + +aarch64 = fmul +generate float64x*_t + +arm = vmul. +generate float*_t + + +/// Subtract +name = vsub +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 +validate 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 +arm = vsub. +aarch64 = sub +fn = simd_sub +generate int*_t, uint*_t, int64x*_t, uint64x*_t + +/// Subtract +name = vsub +fn = simd_sub +a = 1.0, 4.0, 3.0, 8.0 +b = 1.0, 2.0, 3.0, 4.0 +validate 0.0, 2.0, 0.0, 4.0 + +aarch64 = fsub +generate float64x*_t + +arm = vsub. +generate float*_t + + +/// Signed halving subtract +name = vhsub +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 +validate 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 + +arm = vhsub.s +aarch64 = uhsub +link-arm = vhsubu._EXT_ +link-aarch64 = uhsub._EXT_ +generate uint*_t + +arm = vhsub.s +aarch64 = shsub +link-arm = vhsubs._EXT_ +link-aarch64 = shsub._EXT_ +generate int*_t diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-gen/src/main.rs b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/src/main.rs new file mode 100644 index 0000000..8a9d9f2 --- /dev/null +++ b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/src/main.rs @@ -0,0 +1,750 @@ +use std::env; +use std::fs::File; +use std::io::prelude::*; +use std::io::{self, BufReader}; +use std::path::PathBuf; + +const IN: &str = "neon.spec"; +const ARM_OUT: &str = "generated.rs"; +const AARCH64_OUT: &str = "generated.rs"; + +const UINT_TYPES: [&str; 6] = [ + "uint8x8_t", + "uint8x16_t", + "uint16x4_t", + "uint16x8_t", + "uint32x2_t", + "uint32x4_t", +]; + +const UINT_TYPES_64: [&str; 2] = ["uint64x1_t", "uint64x2_t"]; + +const INT_TYPES: [&str; 6] = [ + "int8x8_t", + "int8x16_t", + "int16x4_t", + "int16x8_t", + "int32x2_t", + "int32x4_t", +]; + +const INT_TYPES_64: [&str; 2] = ["int64x1_t", "int64x2_t"]; + +const FLOAT_TYPES: [&str; 2] = [ + //"float8x8_t", not supported by rust + //"float8x16_t", not supported by rust + //"float16x4_t", not supported by rust + //"float16x8_t", not supported by rust + "float32x2_t", + "float32x4_t", +]; + +const FLOAT_TYPES_64: [&str; 2] = [ + //"float8x8_t", not supported by rust + //"float8x16_t", not supported by rust + //"float16x4_t", not supported by rust + //"float16x8_t", not supported by rust + "float64x1_t", + "float64x2_t", +]; + +fn type_len(t: &str) -> usize { + match t { + "int8x8_t" => 8, + "int8x16_t" => 16, + "int16x4_t" => 4, + "int16x8_t" => 8, + "int32x2_t" => 2, + "int32x4_t" => 4, + "int64x1_t" => 1, + "int64x2_t" => 2, + "uint8x8_t" => 8, + "uint8x16_t" => 16, + "uint16x4_t" => 4, + "uint16x8_t" => 8, + "uint32x2_t" => 2, + "uint32x4_t" => 4, + "uint64x1_t" => 1, + "uint64x2_t" => 2, + "float16x4_t" => 4, + "float16x8_t" => 8, + "float32x2_t" => 2, + "float32x4_t" => 4, + "float64x1_t" => 1, + "float64x2_t" => 2, + "poly64x1_t" => 1, + "poly64x2_t" => 2, + _ => panic!("unknown type: {}", t), + } +} + +fn type_to_suffix(t: &str) -> &str { + match t { + "int8x8_t" => "_s8", + "int8x16_t" => "q_s8", + "int16x4_t" => "_s16", + "int16x8_t" => "q_s16", + "int32x2_t" => "_s32", + "int32x4_t" => "q_s32", + "int64x1_t" => "_s64", + "int64x2_t" => "q_s64", + "uint8x8_t" => "_u8", + "uint8x16_t" => "q_u8", + "uint16x4_t" => "_u16", + "uint16x8_t" => "q_u16", + "uint32x2_t" => "_u32", + "uint32x4_t" => "q_u32", + "uint64x1_t" => "_u64", + "uint64x2_t" => "q_u64", + "float16x4_t" => "_f16", + "float16x8_t" => "q_f16", + "float32x2_t" => "_f32", + "float32x4_t" => "q_f32", + "float64x1_t" => "_f64", + "float64x2_t" => "q_f64", + "poly64x1_t" => "_p64", + "poly64x2_t" => "q_p64", + _ => panic!("unknown type: {}", t), + } +} + +fn type_to_global_type(t: &str) -> &str { + match t { + "int8x8_t" => "i8x8", + "int8x16_t" => "i8x16", + "int16x4_t" => "i16x4", + "int16x8_t" => "i16x8", + "int32x2_t" => "i32x2", + "int32x4_t" => "i32x4", + "int64x1_t" => "i64x1", + "int64x2_t" => "i64x2", + "uint8x8_t" => "u8x8", + "uint8x16_t" => "u8x16", + "uint16x4_t" => "u16x4", + "uint16x8_t" => "u16x8", + "uint32x2_t" => "u32x2", + "uint32x4_t" => "u32x4", + "uint64x1_t" => "u64x1", + "uint64x2_t" => "u64x2", + "float16x4_t" => "f16x4", + "float16x8_t" => "f16x8", + "float32x2_t" => "f32x2", + "float32x4_t" => "f32x4", + "float64x1_t" => "f64", + "float64x2_t" => "f64x2", + "poly64x1_t" => "i64x1", + "poly64x2_t" => "i64x2", + _ => panic!("unknown type: {}", t), + } +} + +// fn type_to_native_type(t: &str) -> &str { +// match t { +// "int8x8_t" => "i8", +// "int8x16_t" => "i8", +// "int16x4_t" => "i16", +// "int16x8_t" => "i16", +// "int32x2_t" => "i32", +// "int32x4_t" => "i32", +// "int64x1_t" => "i64", +// "int64x2_t" => "i64", +// "uint8x8_t" => "u8", +// "uint8x16_t" => "u8", +// "uint16x4_t" => "u16", +// "uint16x8_t" => "u16", +// "uint32x2_t" => "u32", +// "uint32x4_t" => "u32", +// "uint64x1_t" => "u64", +// "uint64x2_t" => "u64", +// "float16x4_t" => "f16", +// "float16x8_t" => "f16", +// "float32x2_t" => "f32", +// "float32x4_t" => "f32", +// "float64x1_t" => "f64", +// "float64x2_t" => "f64", +// "poly64x1_t" => "i64", +// "poly64x2_t" => "i64", +// _ => panic!("unknown type: {}", t), +// } +// } + +fn type_to_ext(t: &str) -> &str { + match t { + "int8x8_t" => "v8i8", + "int8x16_t" => "v16i8", + "int16x4_t" => "v4i16", + "int16x8_t" => "v8i16", + "int32x2_t" => "v2i32", + "int32x4_t" => "v4i32", + "int64x1_t" => "v1i64", + "int64x2_t" => "v2i64", + "uint8x8_t" => "v8i8", + "uint8x16_t" => "v16i8", + "uint16x4_t" => "v4i16", + "uint16x8_t" => "v8i16", + "uint32x2_t" => "v2i32", + "uint32x4_t" => "v4i32", + "uint64x1_t" => "v1i64", + "uint64x2_t" => "v2i64", + "float16x4_t" => "v4f16", + "float16x8_t" => "v8f16", + "float32x2_t" => "v2f32", + "float32x4_t" => "v4f32", + "float64x1_t" => "v1f64", + "float64x2_t" => "v2f64", + /* + "poly64x1_t" => "i64x1", + "poly64x2_t" => "i64x2", + */ + _ => panic!("unknown type for extension: {}", t), + } +} + +fn values(t: &str, vs: &[String]) -> String { + if vs.len() == 1 && !t.contains('x') { + format!(": {} = {}", t, vs[0]) + } else if vs.len() == 1 && type_to_global_type(t) == "f64" { + format!(": {} = {}", type_to_global_type(t), vs[0]) + } else { + format!( + ": {} = {}::new({})", + type_to_global_type(t), + type_to_global_type(t), + vs.iter() + .map(|v| map_val(type_to_global_type(t), v)) + //.map(|v| format!("{}{}", v, type_to_native_type(t))) + .collect::<Vec<_>>() + .join(", ") + ) + } +} + +fn max_val(t: &str) -> &'static str { + match &t[..3] { + "u8x" => "0xFF", + "u16" => "0xFF_FF", + "u32" => "0xFF_FF_FF_FF", + "u64" => "0xFF_FF_FF_FF_FF_FF_FF_FF", + "i8x" => "0x7F", + "i16" => "0x7F_FF", + "i32" => "0x7F_FF_FF_FF", + "i64" => "0x7F_FF_FF_FF_FF_FF_FF_FF", + "f32" => "3.40282347e+38", + "f64" => "1.7976931348623157e+308", + _ => panic!("No TRUE for type {}", t), + } +} + +fn min_val(t: &str) -> &'static str { + match &t[..3] { + "u8x" => "0", + "u16" => "0", + "u32" => "0", + "u64" => "0", + "i8x" => "-128", + "i16" => "-32768", + "i32" => "-2147483648", + "i64" => "-9223372036854775808", + "f32" => "-3.40282347e+38", + "f64" => "-1.7976931348623157e+308", + _ => panic!("No TRUE for type {}", t), + } +} + +fn true_val(t: &str) -> &'static str { + match &t[..3] { + "u8x" => "0xFF", + "u16" => "0xFF_FF", + "u32" => "0xFF_FF_FF_FF", + "u64" => "0xFF_FF_FF_FF_FF_FF_FF_FF", + _ => panic!("No TRUE for type {}", t), + } +} + +fn ff_val(t: &str) -> &'static str { + match &t[..3] { + "u8x" => "0xFF", + "u16" => "0xFF_FF", + "u32" => "0xFF_FF_FF_FF", + "u64" => "0xFF_FF_FF_FF_FF_FF_FF_FF", + "i8x" => "0xFF", + "i16" => "0xFF_FF", + "i32" => "0xFF_FF_FF_FF", + "i64" => "0xFF_FF_FF_FF_FF_FF_FF_FF", + _ => panic!("No TRUE for type {}", t), + } +} + +fn false_val(_t: &str) -> &'static str { + "0" +} +fn map_val<'v>(t: &str, v: &'v str) -> &'v str { + match v { + "FALSE" => false_val(t), + "TRUE" => true_val(t), + "MAX" => min_val(t), + "MIN" => max_val(t), + "FF" => ff_val(t), + o => o, + } +} + +#[allow(clippy::too_many_arguments)] +fn gen_aarch64( + current_comment: &str, + current_fn: &Option<String>, + name: &str, + current_aarch64: &Option<String>, + link_aarch64: &Option<String>, + in_t: &str, + out_t: &str, + current_tests: &[(Vec<String>, Vec<String>, Vec<String>)], +) -> (String, String) { + let _global_t = type_to_global_type(in_t); + let _global_ret_t = type_to_global_type(out_t); + let current_fn = if let Some(current_fn) = current_fn.clone() { + if link_aarch64.is_some() { + panic!("[{}] Can't specify link and fn at the same time.", name) + } + current_fn + } else { + if link_aarch64.is_none() { + panic!("[{}] Either fn or link-aarch have to be specified.", name) + } + format!("{}_", name) + }; + let current_aarch64 = current_aarch64.clone().unwrap(); + let ext_c = if let Some(link_aarch64) = link_aarch64.clone() { + let ext = type_to_ext(in_t); + + format!( + r#" + #[allow(improper_ctypes)] + extern "C" {{ + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")] + fn {}(a: {}, a: {}) -> {}; + }} +"#, + link_aarch64.replace("_EXT_", ext), + current_fn, + in_t, + in_t, + out_t + ) + } else { + String::new() + }; + let function = format!( + r#" +{} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr({}))] +pub unsafe fn {}(a: {}, b: {}) -> {} {{ + {}{}(a, b) +}} +"#, + current_comment, current_aarch64, name, in_t, in_t, out_t, ext_c, current_fn, + ); + + let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t)); + (function, test) +} + +fn gen_test( + name: &str, + in_t: &str, + out_t: &str, + current_tests: &[(Vec<String>, Vec<String>, Vec<String>)], + len: usize, +) -> String { + let mut test = format!( + r#" + #[simd_test(enable = "neon")] + unsafe fn test_{}() {{"#, + name, + ); + for (a, b, e) in current_tests { + let a: Vec<String> = a.iter().take(len).cloned().collect(); + let b: Vec<String> = b.iter().take(len).cloned().collect(); + let e: Vec<String> = e.iter().take(len).cloned().collect(); + let t = format!( + r#" + let a{}; + let b{}; + let e{}; + let r: {} = transmute({}(transmute(a), transmute(b))); + assert_eq!(r, e); +"#, + values(in_t, &a), + values(in_t, &b), + values(out_t, &e), + type_to_global_type(out_t), + name + ); + test.push_str(&t); + } + test.push_str(" }\n"); + test +} + +#[allow(clippy::too_many_arguments)] +fn gen_arm( + current_comment: &str, + current_fn: &Option<String>, + name: &str, + current_arm: &str, + link_arm: &Option<String>, + current_aarch64: &Option<String>, + link_aarch64: &Option<String>, + in_t: &str, + out_t: &str, + current_tests: &[(Vec<String>, Vec<String>, Vec<String>)], +) -> (String, String) { + let _global_t = type_to_global_type(in_t); + let _global_ret_t = type_to_global_type(out_t); + let current_aarch64 = current_aarch64 + .clone() + .unwrap_or_else(|| current_arm.to_string()); + + let current_fn = if let Some(current_fn) = current_fn.clone() { + if link_aarch64.is_some() || link_arm.is_some() { + panic!( + "[{}] Can't specify link and function at the same time. {} / {:?} / {:?}", + name, current_fn, link_aarch64, link_arm + ) + } + current_fn + } else { + if link_aarch64.is_none() || link_arm.is_none() { + panic!( + "[{}] Either fn or link-arm and link-aarch have to be specified.", + name + ) + } + format!("{}_", name) + }; + + let ext_c = + if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) { + let ext = type_to_ext(in_t); + + format!( + r#"#[allow(improper_ctypes)] + extern "C" {{ + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.{}")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")] + fn {}(a: {}, b: {}) -> {}; + }} +"#, + link_arm.replace("_EXT_", ext), + link_aarch64.replace("_EXT_", ext), + current_fn, + in_t, + in_t, + out_t + ) + } else { + String::new() + }; + + let function = format!( + r#" +{} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr({}))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}))] +pub unsafe fn {}(a: {}, b: {}) -> {} {{ + {}{}(a, b) +}} +"#, + current_comment, + expand_intrinsic(¤t_arm, in_t), + expand_intrinsic(¤t_aarch64, in_t), + name, + in_t, + in_t, + out_t, + ext_c, + current_fn, + ); + let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t)); + + (function, test) +} + +fn expand_intrinsic(intr: &str, t: &str) -> String { + if intr.ends_with(".") { + let ext = match t { + "int8x8_t" => "i8", + "int8x16_t" => "i8", + "int16x4_t" => "i16", + "int16x8_t" => "i16", + "int32x2_t" => "i32", + "int32x4_t" => "i32", + "int64x1_t" => "i64", + "int64x2_t" => "i64", + "uint8x8_t" => "i8", + "uint8x16_t" => "i8", + "uint16x4_t" => "i16", + "uint16x8_t" => "i16", + "uint32x2_t" => "i32", + "uint32x4_t" => "i32", + "uint64x1_t" => "i64", + "uint64x2_t" => "i64", + "float16x4_t" => "f16", + "float16x8_t" => "f16", + "float32x2_t" => "f32", + "float32x4_t" => "f32", + "float64x1_t" => "f64", + "float64x2_t" => "f64", + /* + "poly64x1_t" => "i64x1", + "poly64x2_t" => "i64x2", + */ + _ => panic!("unknown type for extension: {}", t), + }; + format!(r#""{}{}""#, intr, ext) + } else if intr.ends_with(".s") { + let ext = match t { + "int8x8_t" => "s8", + "int8x16_t" => "s8", + "int16x4_t" => "s16", + "int16x8_t" => "s16", + "int32x2_t" => "s32", + "int32x4_t" => "s32", + "int64x1_t" => "s64", + "int64x2_t" => "s64", + "uint8x8_t" => "u8", + "uint8x16_t" => "u8", + "uint16x4_t" => "u16", + "uint16x8_t" => "u16", + "uint32x2_t" => "u32", + "uint32x4_t" => "u32", + "uint64x1_t" => "u64", + "uint64x2_t" => "u64", + "float16x4_t" => "f16", + "float16x8_t" => "f16", + "float32x2_t" => "f32", + "float32x4_t" => "f32", + "float64x1_t" => "f64", + "float64x2_t" => "f64", + /* + "poly64x1_t" => "i64x1", + "poly64x2_t" => "i64x2", + */ + _ => panic!("unknown type for extension: {}", t), + }; + format!(r#""{}{}""#, &intr[..intr.len() - 1], ext) + } else { + intr.to_string() + } +} + +fn main() -> io::Result<()> { + let args: Vec<String> = env::args().collect(); + let in_file = args.get(1).cloned().unwrap_or_else(|| IN.to_string()); + + let f = File::open(in_file).expect("Failed to open neon.spec"); + let f = BufReader::new(f); + + let mut current_comment = String::new(); + let mut current_name: Option<String> = None; + let mut current_fn: Option<String> = None; + let mut current_arm: Option<String> = None; + let mut current_aarch64: Option<String> = None; + let mut link_arm: Option<String> = None; + let mut link_aarch64: Option<String> = None; + let mut a: Vec<String> = Vec::new(); + let mut b: Vec<String> = Vec::new(); + let mut current_tests: Vec<(Vec<String>, Vec<String>, Vec<String>)> = Vec::new(); + + // + // THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY + // + let mut out_arm = String::from( + r#"// This code is automatically generated. DO NOT MODIFY. +// +// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file: +// +// ``` +// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec +// ``` +use super::*; +#[cfg(test)] +use stdarch_test::assert_instr; +"#, + ); + let mut tests_arm = String::from( + r#" +#[cfg(test)] +#[allow(overflowing_literals)] +mod test { + use super::*; + use crate::core_arch::simd::*; + use std::mem::transmute; + use stdarch_test::simd_test; +"#, + ); + // + // THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY + // + let mut out_aarch64 = String::from( + r#"// This code is automatically generated. DO NOT MODIFY. +// +// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file: +// +// ``` +// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec +// ``` +use super::*; +#[cfg(test)] +use stdarch_test::assert_instr; +"#, + ); + let mut tests_aarch64 = String::from( + r#" +#[cfg(test)] +mod test { + use super::*; + use crate::core_arch::simd::*; + use std::mem::transmute; + use stdarch_test::simd_test; +"#, + ); + + for line in f.lines() { + let line = line.unwrap(); + if line.is_empty() { + continue; + } + if line.starts_with("/// ") { + current_comment = line; + current_name = None; + current_fn = None; + current_arm = None; + current_aarch64 = None; + link_aarch64 = None; + link_arm = None; + current_tests = Vec::new(); + } else if line.starts_with("//") { + } else if line.starts_with("name = ") { + current_name = Some(String::from(&line[7..])); + } else if line.starts_with("fn = ") { + current_fn = Some(String::from(&line[5..])); + } else if line.starts_with("arm = ") { + current_arm = Some(String::from(&line[6..])); + } else if line.starts_with("aarch64 = ") { + current_aarch64 = Some(String::from(&line[10..])); + } else if line.starts_with("a = ") { + a = line[4..].split(',').map(|v| v.trim().to_string()).collect(); + } else if line.starts_with("b = ") { + b = line[4..].split(',').map(|v| v.trim().to_string()).collect(); + } else if line.starts_with("validate ") { + let e = line[9..].split(',').map(|v| v.trim().to_string()).collect(); + current_tests.push((a.clone(), b.clone(), e)); + } else if line.starts_with("link-aarch64 = ") { + link_aarch64 = Some(String::from(&line[15..])); + } else if line.starts_with("link-arm = ") { + link_arm = Some(String::from(&line[11..])); + } else if line.starts_with("generate ") { + let line = &line[9..]; + let types: Vec<String> = line + .split(',') + .map(|v| v.trim().to_string()) + .flat_map(|v| match v.as_str() { + "uint*_t" => UINT_TYPES.iter().map(|v| v.to_string()).collect(), + "uint64x*_t" => UINT_TYPES_64.iter().map(|v| v.to_string()).collect(), + "int*_t" => INT_TYPES.iter().map(|v| v.to_string()).collect(), + "int64x*_t" => INT_TYPES_64.iter().map(|v| v.to_string()).collect(), + "float*_t" => FLOAT_TYPES.iter().map(|v| v.to_string()).collect(), + "float64x*_t" => FLOAT_TYPES_64.iter().map(|v| v.to_string()).collect(), + _ => vec![v], + }) + .collect(); + + for line in types { + let spec: Vec<&str> = line.split(':').map(|e| e.trim()).collect(); + let in_t; + let out_t; + if spec.len() == 1 { + in_t = spec[0]; + out_t = spec[0]; + } else if spec.len() == 2 { + in_t = spec[0]; + out_t = spec[1]; + } else { + panic!("Bad spec: {}", line) + } + let current_name = current_name.clone().unwrap(); + let name = format!("{}{}", current_name, type_to_suffix(in_t),); + + if let Some(current_arm) = current_arm.clone() { + let (function, test) = gen_arm( + ¤t_comment, + ¤t_fn, + &name, + ¤t_arm, + &link_arm, + ¤t_aarch64, + &link_aarch64, + &in_t, + &out_t, + ¤t_tests, + ); + out_arm.push_str(&function); + tests_arm.push_str(&test); + } else { + let (function, test) = gen_aarch64( + ¤t_comment, + ¤t_fn, + &name, + ¤t_aarch64, + &link_aarch64, + &in_t, + &out_t, + ¤t_tests, + ); + out_aarch64.push_str(&function); + tests_aarch64.push_str(&test); + } + } + } + } + tests_arm.push('}'); + tests_arm.push('\n'); + tests_aarch64.push('}'); + tests_aarch64.push('\n'); + + let arm_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap()) + .join("src") + .join("arm") + .join("neon"); + std::fs::create_dir_all(&arm_out_path)?; + + let mut file_arm = File::create(arm_out_path.join(ARM_OUT))?; + file_arm.write_all(out_arm.as_bytes())?; + file_arm.write_all(tests_arm.as_bytes())?; + + let aarch64_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap()) + .join("src") + .join("aarch64") + .join("neon"); + std::fs::create_dir_all(&aarch64_out_path)?; + + let mut file_aarch = File::create(aarch64_out_path.join(AARCH64_OUT))?; + file_aarch.write_all(out_aarch64.as_bytes())?; + file_aarch.write_all(tests_aarch64.as_bytes())?; + /* + if let Err(e) = Command::new("rustfmt") + .arg(&arm_out_path) + .arg(&aarch64_out_path) + .status() { + eprintln!("Could not format `{}`: {}", arm_out_path.to_str().unwrap(), e); + eprintln!("Could not format `{}`: {}", aarch64_out_path.to_str().unwrap(), e); + }; + */ + Ok(()) +} |
