aboutsummaryrefslogtreecommitdiff
path: root/libgrust/rustc-lib/stdarch/crates/stdarch-gen
diff options
context:
space:
mode:
Diffstat (limited to 'libgrust/rustc-lib/stdarch/crates/stdarch-gen')
-rw-r--r--libgrust/rustc-lib/stdarch/crates/stdarch-gen/Cargo.toml9
-rw-r--r--libgrust/rustc-lib/stdarch/crates/stdarch-gen/README.md11
-rw-r--r--libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec469
-rw-r--r--libgrust/rustc-lib/stdarch/crates/stdarch-gen/src/main.rs750
4 files changed, 1239 insertions, 0 deletions
diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-gen/Cargo.toml b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/Cargo.toml
new file mode 100644
index 0000000..b339672
--- /dev/null
+++ b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "stdarch-gen"
+version = "0.1.0"
+authors = ["Heinz Gies <heinz@licenser.net>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-gen/README.md b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/README.md
new file mode 100644
index 0000000..54b602c
--- /dev/null
+++ b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/README.md
@@ -0,0 +1,11 @@
+# Neon intrinsic code generator
+
+A small tool that allows to quickly generate intrinsics for the NEON architecture.
+
+The specification for the intrinsics can be found in `neon.spec`.
+
+To run and re-generate the code run the following from the root of the `stdarch` crate.
+
+```
+OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec
+``` \ No newline at end of file
diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec
new file mode 100644
index 0000000..0343a72
--- /dev/null
+++ b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec
@@ -0,0 +1,469 @@
+// ARM Neon intrinsic specification.
+//
+// This file contains the specification for a number of
+// intrinsics that allows us to generate them along with
+// their test cases.
+//
+// To the syntax of the file - it's not very intelligently parsed!
+//
+// # Comments
+// start with AT LEAST two, or four or more slashes so // is a
+// comment /////// is too.
+//
+// # Sections
+// Sections start with EXACTLY three slashes followed
+// by AT LEAST one space. Sections are used for two things:
+//
+// 1) they serve as the doc comment for the given intrinics.
+// 2) they reset all variables (name, fn, etc.)
+//
+// # Variables
+//
+// name - The prefix of the function, suffixes are auto
+// generated by the type they get passed.
+//
+// fn - The function to call in rust-land.
+//
+// aarch64 - The intrinsic to check on aarch64 architecture.
+// If this is given but no arm intrinsic is provided,
+// the function will exclusively be generated for
+// aarch64.
+// This is used to generate both aarch64 specific and
+// shared intrinics by first only specifying th aarch64
+// variant then the arm variant.
+//
+// arm - The arm v7 intrinics used to checked for arm code
+// generation. All neon functions available in arm are
+// also available in aarch64. If no aarch64 intrinic was
+// set they are assumed to be the same.
+// Intrinics ending with a `.` will have a size suffixes
+// added (such as `i8` or `i64`) that is not sign specific
+// Intrinics ending with a `.s` will have a size suffixes
+// added (such as `s8` or `u64`) that is sign specific
+//
+// a - First input for tests, it gets scaled to the size of
+// the type.
+//
+// b - Second input for tests, it gets scaled to the size of
+// the type.
+//
+// # special values
+//
+// TRUE - 'true' all bits are set to 1
+// FALSE - 'false' all bits are set to 0
+// FF - same as 'true'
+// MIN - minimal value (either 0 or the lowest negative number)
+// MAX - maximal value propr to overflow
+//
+// # validate <values>
+// Validates a and b aginst the expected result of the test.
+// The special values 'TRUE' and 'FALSE' can be used to
+// represent the corect NEON representation of true or
+// false values. It too gets scaled to the type.
+//
+// Validate needs to be called before generate as it sets
+// up the rules for validation that get generated for each
+// type.
+// # generate <types>
+// The generate command generates the intrinsics, it uses the
+// Variables set and can be called multiple times while overwriting
+// some of the variables.
+
+/// Vector bitwise and
+name = vand
+fn = simd_and
+arm = vand
+aarch64 = and
+a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
+b = 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F
+validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
+b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+validate 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+generate int*_t, uint*_t, int64x*_t, uint64x*_t
+
+/// Vector bitwise or (immediate, inclusive)
+name = vorr
+fn = simd_or
+arm = vorr
+aarch64 = orr
+a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+generate int*_t, uint*_t, int64x*_t, uint64x*_t
+
+
+/// Vector bitwise exclusive or (vector)
+name = veor
+fn = simd_xor
+arm = veor
+aarch64 = eor
+a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+generate int*_t, uint*_t, int64x*_t, uint64x*_t
+
+////////////////////
+// equality
+////////////////////
+
+/// Compare bitwise Equal (vector)
+name = vceq
+fn = simd_eq
+a = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
+b = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+a = MIN, MIN, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, MAX
+b = MIN, MAX, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, MIN
+validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
+
+aarch64 = cmeq
+generate uint64x*_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t
+
+arm = vceq.
+generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
+
+/// Floating-point compare equal
+name = vceq
+fn = simd_eq
+a = 1.2, 3.4, 5.6, 7.8
+b = 1.2, 3.4, 5.6, 7.8
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = fcmeq
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+
+arm = vceq.
+// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
+generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
+
+////////////////////
+// greater then
+////////////////////
+
+/// Compare signed greater than
+name = vcgt
+fn = simd_gt
+a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+aarch64 = cmgt
+generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
+
+arm = vcgt.s
+generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
+
+/// Compare unsigned highe
+name = vcgt
+fn = simd_gt
+a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = cmhi
+generate uint64x*_t
+
+arm = vcgt.s
+generate uint*_t
+
+/// Floating-point compare greater than
+name = vcgt
+fn = simd_gt
+a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
+b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = fcmgt
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+
+arm = vcgt.s
+// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
+generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
+
+////////////////////
+// lesser then
+////////////////////
+
+/// Compare signed less than
+name = vclt
+fn = simd_lt
+a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+aarch64 = cmgt
+generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
+
+arm = vcgt.s
+generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
+
+/// Compare unsigned less than
+name = vclt
+fn = simd_lt
+a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = cmhi
+generate uint64x*_t
+
+arm = vcgt.s
+generate uint*_t
+
+/// Floating-point compare less than
+name = vclt
+fn = simd_lt
+a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
+b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = fcmgt
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+
+arm = vcgt.s
+// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
+generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
+
+////////////////////
+// lesser then equals
+////////////////////
+
+/// Compare signed less than or equal
+name = vcle
+fn = simd_le
+a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = cmge
+generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
+
+arm = vcge.s
+generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
+
+/// Compare unsigned less than or equal
+name = vcle
+fn = simd_le
+a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = cmhs
+generate uint64x*_t
+
+arm = vcge.s
+generate uint*_t
+
+/// Floating-point compare less than or equal
+name = vcle
+fn = simd_le
+a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
+b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+aarch64 = fcmge
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+
+// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
+arm = vcge.s
+generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
+
+////////////////////
+// greater then equals
+////////////////////
+
+/// Compare signed greater than or equal
+name = vcge
+fn = simd_ge
+a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = cmge
+generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
+
+arm = vcge.s
+generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
+
+/// Compare unsigned greater than or equal
+name = vcge
+fn = simd_ge
+a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = cmhs
+generate uint64x*_t
+
+arm = vcge.s
+generate uint*_t
+
+/// Floating-point compare greater than or equal
+name = vcge
+fn = simd_ge
+a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9
+b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = fcmge
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+
+arm = vcge.s
+// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
+generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
+
+/// Saturating subtract
+name = vqsub
+a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26
+
+arm = vqsub.s
+aarch64 = uqsub
+link-arm = vqsubu._EXT_
+link-aarch64 = uqsub._EXT_
+generate uint*_t
+
+arm = vqsub.s
+aarch64 = sqsub
+link-arm = vqsubs._EXT_
+link-aarch64 = sqsub._EXT_
+generate int*_t
+
+/// Halving add
+name = vhadd
+a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29
+
+
+arm = vhadd.s
+aarch64 = uhadd
+link-aarch64 = uhadd._EXT_
+link-arm = vhaddu._EXT_
+generate uint*_t
+
+
+arm = vhadd.s
+aarch64 = shadd
+link-aarch64 = shadd._EXT_
+link-arm = vhadds._EXT_
+generate int*_t
+
+/// Rounding halving add
+name = vrhadd
+a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29
+
+arm = vrhadd.s
+aarch64 = urhadd
+link-arm = vrhaddu._EXT_
+link-aarch64 = urhadd._EXT_
+generate uint*_t
+
+arm = vrhadd.s
+aarch64 = srhadd
+link-arm = vrhadds._EXT_
+link-aarch64 = srhadd._EXT_
+generate int*_t
+
+/// Saturating add
+name = vqadd
+a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
+
+arm = vqadd.s
+aarch64 = uqadd
+link-arm = vqaddu._EXT_
+link-aarch64 = uqadd._EXT_
+generate uint*_t
+
+arm = vqadd.s
+aarch64 = sqadd
+link-arm = vqadds._EXT_
+link-aarch64 = sqadd._EXT_
+generate int*_t
+
+// requires 1st and second argument to be different, this not implemented yet
+// /// Signed saturating accumulate of unsigned value
+//
+// name = vuqadd
+// a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
+// b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+// e = 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
+
+// it seems like we don't have those in rustland :(
+// aarch64 = suqadd
+// link-aarch64 = usqadd._EXT_
+// generate int64x*_t
+
+/ arm = suqadd
+// link-arm = vuqadds._EXT_
+// link-aarch64 = suqadd._EXT_
+// generate int*_t
+
+
+/// Multiply
+name = vmul
+a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
+arm = vmul.
+aarch64 = mul
+fn = simd_mul
+generate int*_t, uint*_t
+
+/// Multiply
+name = vmul
+fn = simd_mul
+a = 1.0, 2.0, 1.0, 2.0
+b = 2.0, 3.0, 4.0, 5.0
+validate 2.0, 6.0, 4.0, 10.0
+
+aarch64 = fmul
+generate float64x*_t
+
+arm = vmul.
+generate float*_t
+
+
+/// Subtract
+name = vsub
+a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
+validate 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14
+arm = vsub.
+aarch64 = sub
+fn = simd_sub
+generate int*_t, uint*_t, int64x*_t, uint64x*_t
+
+/// Subtract
+name = vsub
+fn = simd_sub
+a = 1.0, 4.0, 3.0, 8.0
+b = 1.0, 2.0, 3.0, 4.0
+validate 0.0, 2.0, 0.0, 4.0
+
+aarch64 = fsub
+generate float64x*_t
+
+arm = vsub.
+generate float*_t
+
+
+/// Signed halving subtract
+name = vhsub
+a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
+validate 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7
+
+arm = vhsub.s
+aarch64 = uhsub
+link-arm = vhsubu._EXT_
+link-aarch64 = uhsub._EXT_
+generate uint*_t
+
+arm = vhsub.s
+aarch64 = shsub
+link-arm = vhsubs._EXT_
+link-aarch64 = shsub._EXT_
+generate int*_t
diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-gen/src/main.rs b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/src/main.rs
new file mode 100644
index 0000000..8a9d9f2
--- /dev/null
+++ b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/src/main.rs
@@ -0,0 +1,750 @@
+use std::env;
+use std::fs::File;
+use std::io::prelude::*;
+use std::io::{self, BufReader};
+use std::path::PathBuf;
+
+const IN: &str = "neon.spec";
+const ARM_OUT: &str = "generated.rs";
+const AARCH64_OUT: &str = "generated.rs";
+
+const UINT_TYPES: [&str; 6] = [
+ "uint8x8_t",
+ "uint8x16_t",
+ "uint16x4_t",
+ "uint16x8_t",
+ "uint32x2_t",
+ "uint32x4_t",
+];
+
+const UINT_TYPES_64: [&str; 2] = ["uint64x1_t", "uint64x2_t"];
+
+const INT_TYPES: [&str; 6] = [
+ "int8x8_t",
+ "int8x16_t",
+ "int16x4_t",
+ "int16x8_t",
+ "int32x2_t",
+ "int32x4_t",
+];
+
+const INT_TYPES_64: [&str; 2] = ["int64x1_t", "int64x2_t"];
+
+const FLOAT_TYPES: [&str; 2] = [
+ //"float8x8_t", not supported by rust
+ //"float8x16_t", not supported by rust
+ //"float16x4_t", not supported by rust
+ //"float16x8_t", not supported by rust
+ "float32x2_t",
+ "float32x4_t",
+];
+
+const FLOAT_TYPES_64: [&str; 2] = [
+ //"float8x8_t", not supported by rust
+ //"float8x16_t", not supported by rust
+ //"float16x4_t", not supported by rust
+ //"float16x8_t", not supported by rust
+ "float64x1_t",
+ "float64x2_t",
+];
+
+fn type_len(t: &str) -> usize {
+ match t {
+ "int8x8_t" => 8,
+ "int8x16_t" => 16,
+ "int16x4_t" => 4,
+ "int16x8_t" => 8,
+ "int32x2_t" => 2,
+ "int32x4_t" => 4,
+ "int64x1_t" => 1,
+ "int64x2_t" => 2,
+ "uint8x8_t" => 8,
+ "uint8x16_t" => 16,
+ "uint16x4_t" => 4,
+ "uint16x8_t" => 8,
+ "uint32x2_t" => 2,
+ "uint32x4_t" => 4,
+ "uint64x1_t" => 1,
+ "uint64x2_t" => 2,
+ "float16x4_t" => 4,
+ "float16x8_t" => 8,
+ "float32x2_t" => 2,
+ "float32x4_t" => 4,
+ "float64x1_t" => 1,
+ "float64x2_t" => 2,
+ "poly64x1_t" => 1,
+ "poly64x2_t" => 2,
+ _ => panic!("unknown type: {}", t),
+ }
+}
+
+fn type_to_suffix(t: &str) -> &str {
+ match t {
+ "int8x8_t" => "_s8",
+ "int8x16_t" => "q_s8",
+ "int16x4_t" => "_s16",
+ "int16x8_t" => "q_s16",
+ "int32x2_t" => "_s32",
+ "int32x4_t" => "q_s32",
+ "int64x1_t" => "_s64",
+ "int64x2_t" => "q_s64",
+ "uint8x8_t" => "_u8",
+ "uint8x16_t" => "q_u8",
+ "uint16x4_t" => "_u16",
+ "uint16x8_t" => "q_u16",
+ "uint32x2_t" => "_u32",
+ "uint32x4_t" => "q_u32",
+ "uint64x1_t" => "_u64",
+ "uint64x2_t" => "q_u64",
+ "float16x4_t" => "_f16",
+ "float16x8_t" => "q_f16",
+ "float32x2_t" => "_f32",
+ "float32x4_t" => "q_f32",
+ "float64x1_t" => "_f64",
+ "float64x2_t" => "q_f64",
+ "poly64x1_t" => "_p64",
+ "poly64x2_t" => "q_p64",
+ _ => panic!("unknown type: {}", t),
+ }
+}
+
+fn type_to_global_type(t: &str) -> &str {
+ match t {
+ "int8x8_t" => "i8x8",
+ "int8x16_t" => "i8x16",
+ "int16x4_t" => "i16x4",
+ "int16x8_t" => "i16x8",
+ "int32x2_t" => "i32x2",
+ "int32x4_t" => "i32x4",
+ "int64x1_t" => "i64x1",
+ "int64x2_t" => "i64x2",
+ "uint8x8_t" => "u8x8",
+ "uint8x16_t" => "u8x16",
+ "uint16x4_t" => "u16x4",
+ "uint16x8_t" => "u16x8",
+ "uint32x2_t" => "u32x2",
+ "uint32x4_t" => "u32x4",
+ "uint64x1_t" => "u64x1",
+ "uint64x2_t" => "u64x2",
+ "float16x4_t" => "f16x4",
+ "float16x8_t" => "f16x8",
+ "float32x2_t" => "f32x2",
+ "float32x4_t" => "f32x4",
+ "float64x1_t" => "f64",
+ "float64x2_t" => "f64x2",
+ "poly64x1_t" => "i64x1",
+ "poly64x2_t" => "i64x2",
+ _ => panic!("unknown type: {}", t),
+ }
+}
+
+// fn type_to_native_type(t: &str) -> &str {
+// match t {
+// "int8x8_t" => "i8",
+// "int8x16_t" => "i8",
+// "int16x4_t" => "i16",
+// "int16x8_t" => "i16",
+// "int32x2_t" => "i32",
+// "int32x4_t" => "i32",
+// "int64x1_t" => "i64",
+// "int64x2_t" => "i64",
+// "uint8x8_t" => "u8",
+// "uint8x16_t" => "u8",
+// "uint16x4_t" => "u16",
+// "uint16x8_t" => "u16",
+// "uint32x2_t" => "u32",
+// "uint32x4_t" => "u32",
+// "uint64x1_t" => "u64",
+// "uint64x2_t" => "u64",
+// "float16x4_t" => "f16",
+// "float16x8_t" => "f16",
+// "float32x2_t" => "f32",
+// "float32x4_t" => "f32",
+// "float64x1_t" => "f64",
+// "float64x2_t" => "f64",
+// "poly64x1_t" => "i64",
+// "poly64x2_t" => "i64",
+// _ => panic!("unknown type: {}", t),
+// }
+// }
+
+fn type_to_ext(t: &str) -> &str {
+ match t {
+ "int8x8_t" => "v8i8",
+ "int8x16_t" => "v16i8",
+ "int16x4_t" => "v4i16",
+ "int16x8_t" => "v8i16",
+ "int32x2_t" => "v2i32",
+ "int32x4_t" => "v4i32",
+ "int64x1_t" => "v1i64",
+ "int64x2_t" => "v2i64",
+ "uint8x8_t" => "v8i8",
+ "uint8x16_t" => "v16i8",
+ "uint16x4_t" => "v4i16",
+ "uint16x8_t" => "v8i16",
+ "uint32x2_t" => "v2i32",
+ "uint32x4_t" => "v4i32",
+ "uint64x1_t" => "v1i64",
+ "uint64x2_t" => "v2i64",
+ "float16x4_t" => "v4f16",
+ "float16x8_t" => "v8f16",
+ "float32x2_t" => "v2f32",
+ "float32x4_t" => "v4f32",
+ "float64x1_t" => "v1f64",
+ "float64x2_t" => "v2f64",
+ /*
+ "poly64x1_t" => "i64x1",
+ "poly64x2_t" => "i64x2",
+ */
+ _ => panic!("unknown type for extension: {}", t),
+ }
+}
+
+fn values(t: &str, vs: &[String]) -> String {
+ if vs.len() == 1 && !t.contains('x') {
+ format!(": {} = {}", t, vs[0])
+ } else if vs.len() == 1 && type_to_global_type(t) == "f64" {
+ format!(": {} = {}", type_to_global_type(t), vs[0])
+ } else {
+ format!(
+ ": {} = {}::new({})",
+ type_to_global_type(t),
+ type_to_global_type(t),
+ vs.iter()
+ .map(|v| map_val(type_to_global_type(t), v))
+ //.map(|v| format!("{}{}", v, type_to_native_type(t)))
+ .collect::<Vec<_>>()
+ .join(", ")
+ )
+ }
+}
+
+fn max_val(t: &str) -> &'static str {
+ match &t[..3] {
+ "u8x" => "0xFF",
+ "u16" => "0xFF_FF",
+ "u32" => "0xFF_FF_FF_FF",
+ "u64" => "0xFF_FF_FF_FF_FF_FF_FF_FF",
+ "i8x" => "0x7F",
+ "i16" => "0x7F_FF",
+ "i32" => "0x7F_FF_FF_FF",
+ "i64" => "0x7F_FF_FF_FF_FF_FF_FF_FF",
+ "f32" => "3.40282347e+38",
+ "f64" => "1.7976931348623157e+308",
+ _ => panic!("No TRUE for type {}", t),
+ }
+}
+
+fn min_val(t: &str) -> &'static str {
+ match &t[..3] {
+ "u8x" => "0",
+ "u16" => "0",
+ "u32" => "0",
+ "u64" => "0",
+ "i8x" => "-128",
+ "i16" => "-32768",
+ "i32" => "-2147483648",
+ "i64" => "-9223372036854775808",
+ "f32" => "-3.40282347e+38",
+ "f64" => "-1.7976931348623157e+308",
+ _ => panic!("No TRUE for type {}", t),
+ }
+}
+
+fn true_val(t: &str) -> &'static str {
+ match &t[..3] {
+ "u8x" => "0xFF",
+ "u16" => "0xFF_FF",
+ "u32" => "0xFF_FF_FF_FF",
+ "u64" => "0xFF_FF_FF_FF_FF_FF_FF_FF",
+ _ => panic!("No TRUE for type {}", t),
+ }
+}
+
+fn ff_val(t: &str) -> &'static str {
+ match &t[..3] {
+ "u8x" => "0xFF",
+ "u16" => "0xFF_FF",
+ "u32" => "0xFF_FF_FF_FF",
+ "u64" => "0xFF_FF_FF_FF_FF_FF_FF_FF",
+ "i8x" => "0xFF",
+ "i16" => "0xFF_FF",
+ "i32" => "0xFF_FF_FF_FF",
+ "i64" => "0xFF_FF_FF_FF_FF_FF_FF_FF",
+ _ => panic!("No TRUE for type {}", t),
+ }
+}
+
+fn false_val(_t: &str) -> &'static str {
+ "0"
+}
+fn map_val<'v>(t: &str, v: &'v str) -> &'v str {
+ match v {
+ "FALSE" => false_val(t),
+ "TRUE" => true_val(t),
+ "MAX" => min_val(t),
+ "MIN" => max_val(t),
+ "FF" => ff_val(t),
+ o => o,
+ }
+}
+
+#[allow(clippy::too_many_arguments)]
+fn gen_aarch64(
+ current_comment: &str,
+ current_fn: &Option<String>,
+ name: &str,
+ current_aarch64: &Option<String>,
+ link_aarch64: &Option<String>,
+ in_t: &str,
+ out_t: &str,
+ current_tests: &[(Vec<String>, Vec<String>, Vec<String>)],
+) -> (String, String) {
+ let _global_t = type_to_global_type(in_t);
+ let _global_ret_t = type_to_global_type(out_t);
+ let current_fn = if let Some(current_fn) = current_fn.clone() {
+ if link_aarch64.is_some() {
+ panic!("[{}] Can't specify link and fn at the same time.", name)
+ }
+ current_fn
+ } else {
+ if link_aarch64.is_none() {
+ panic!("[{}] Either fn or link-aarch have to be specified.", name)
+ }
+ format!("{}_", name)
+ };
+ let current_aarch64 = current_aarch64.clone().unwrap();
+ let ext_c = if let Some(link_aarch64) = link_aarch64.clone() {
+ let ext = type_to_ext(in_t);
+
+ format!(
+ r#"
+ #[allow(improper_ctypes)]
+ extern "C" {{
+ #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")]
+ fn {}(a: {}, a: {}) -> {};
+ }}
+"#,
+ link_aarch64.replace("_EXT_", ext),
+ current_fn,
+ in_t,
+ in_t,
+ out_t
+ )
+ } else {
+ String::new()
+ };
+ let function = format!(
+ r#"
+{}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr({}))]
+pub unsafe fn {}(a: {}, b: {}) -> {} {{
+ {}{}(a, b)
+}}
+"#,
+ current_comment, current_aarch64, name, in_t, in_t, out_t, ext_c, current_fn,
+ );
+
+ let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t));
+ (function, test)
+}
+
+fn gen_test(
+ name: &str,
+ in_t: &str,
+ out_t: &str,
+ current_tests: &[(Vec<String>, Vec<String>, Vec<String>)],
+ len: usize,
+) -> String {
+ let mut test = format!(
+ r#"
+ #[simd_test(enable = "neon")]
+ unsafe fn test_{}() {{"#,
+ name,
+ );
+ for (a, b, e) in current_tests {
+ let a: Vec<String> = a.iter().take(len).cloned().collect();
+ let b: Vec<String> = b.iter().take(len).cloned().collect();
+ let e: Vec<String> = e.iter().take(len).cloned().collect();
+ let t = format!(
+ r#"
+ let a{};
+ let b{};
+ let e{};
+ let r: {} = transmute({}(transmute(a), transmute(b)));
+ assert_eq!(r, e);
+"#,
+ values(in_t, &a),
+ values(in_t, &b),
+ values(out_t, &e),
+ type_to_global_type(out_t),
+ name
+ );
+ test.push_str(&t);
+ }
+ test.push_str(" }\n");
+ test
+}
+
+#[allow(clippy::too_many_arguments)]
+fn gen_arm(
+ current_comment: &str,
+ current_fn: &Option<String>,
+ name: &str,
+ current_arm: &str,
+ link_arm: &Option<String>,
+ current_aarch64: &Option<String>,
+ link_aarch64: &Option<String>,
+ in_t: &str,
+ out_t: &str,
+ current_tests: &[(Vec<String>, Vec<String>, Vec<String>)],
+) -> (String, String) {
+ let _global_t = type_to_global_type(in_t);
+ let _global_ret_t = type_to_global_type(out_t);
+ let current_aarch64 = current_aarch64
+ .clone()
+ .unwrap_or_else(|| current_arm.to_string());
+
+ let current_fn = if let Some(current_fn) = current_fn.clone() {
+ if link_aarch64.is_some() || link_arm.is_some() {
+ panic!(
+ "[{}] Can't specify link and function at the same time. {} / {:?} / {:?}",
+ name, current_fn, link_aarch64, link_arm
+ )
+ }
+ current_fn
+ } else {
+ if link_aarch64.is_none() || link_arm.is_none() {
+ panic!(
+ "[{}] Either fn or link-arm and link-aarch have to be specified.",
+ name
+ )
+ }
+ format!("{}_", name)
+ };
+
+ let ext_c =
+ if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) {
+ let ext = type_to_ext(in_t);
+
+ format!(
+ r#"#[allow(improper_ctypes)]
+ extern "C" {{
+ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.{}")]
+ #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")]
+ fn {}(a: {}, b: {}) -> {};
+ }}
+"#,
+ link_arm.replace("_EXT_", ext),
+ link_aarch64.replace("_EXT_", ext),
+ current_fn,
+ in_t,
+ in_t,
+ out_t
+ )
+ } else {
+ String::new()
+ };
+
+ let function = format!(
+ r#"
+{}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr({}))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}))]
+pub unsafe fn {}(a: {}, b: {}) -> {} {{
+ {}{}(a, b)
+}}
+"#,
+ current_comment,
+ expand_intrinsic(&current_arm, in_t),
+ expand_intrinsic(&current_aarch64, in_t),
+ name,
+ in_t,
+ in_t,
+ out_t,
+ ext_c,
+ current_fn,
+ );
+ let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t));
+
+ (function, test)
+}
+
+fn expand_intrinsic(intr: &str, t: &str) -> String {
+ if intr.ends_with(".") {
+ let ext = match t {
+ "int8x8_t" => "i8",
+ "int8x16_t" => "i8",
+ "int16x4_t" => "i16",
+ "int16x8_t" => "i16",
+ "int32x2_t" => "i32",
+ "int32x4_t" => "i32",
+ "int64x1_t" => "i64",
+ "int64x2_t" => "i64",
+ "uint8x8_t" => "i8",
+ "uint8x16_t" => "i8",
+ "uint16x4_t" => "i16",
+ "uint16x8_t" => "i16",
+ "uint32x2_t" => "i32",
+ "uint32x4_t" => "i32",
+ "uint64x1_t" => "i64",
+ "uint64x2_t" => "i64",
+ "float16x4_t" => "f16",
+ "float16x8_t" => "f16",
+ "float32x2_t" => "f32",
+ "float32x4_t" => "f32",
+ "float64x1_t" => "f64",
+ "float64x2_t" => "f64",
+ /*
+ "poly64x1_t" => "i64x1",
+ "poly64x2_t" => "i64x2",
+ */
+ _ => panic!("unknown type for extension: {}", t),
+ };
+ format!(r#""{}{}""#, intr, ext)
+ } else if intr.ends_with(".s") {
+ let ext = match t {
+ "int8x8_t" => "s8",
+ "int8x16_t" => "s8",
+ "int16x4_t" => "s16",
+ "int16x8_t" => "s16",
+ "int32x2_t" => "s32",
+ "int32x4_t" => "s32",
+ "int64x1_t" => "s64",
+ "int64x2_t" => "s64",
+ "uint8x8_t" => "u8",
+ "uint8x16_t" => "u8",
+ "uint16x4_t" => "u16",
+ "uint16x8_t" => "u16",
+ "uint32x2_t" => "u32",
+ "uint32x4_t" => "u32",
+ "uint64x1_t" => "u64",
+ "uint64x2_t" => "u64",
+ "float16x4_t" => "f16",
+ "float16x8_t" => "f16",
+ "float32x2_t" => "f32",
+ "float32x4_t" => "f32",
+ "float64x1_t" => "f64",
+ "float64x2_t" => "f64",
+ /*
+ "poly64x1_t" => "i64x1",
+ "poly64x2_t" => "i64x2",
+ */
+ _ => panic!("unknown type for extension: {}", t),
+ };
+ format!(r#""{}{}""#, &intr[..intr.len() - 1], ext)
+ } else {
+ intr.to_string()
+ }
+}
+
+fn main() -> io::Result<()> {
+ let args: Vec<String> = env::args().collect();
+ let in_file = args.get(1).cloned().unwrap_or_else(|| IN.to_string());
+
+ let f = File::open(in_file).expect("Failed to open neon.spec");
+ let f = BufReader::new(f);
+
+ let mut current_comment = String::new();
+ let mut current_name: Option<String> = None;
+ let mut current_fn: Option<String> = None;
+ let mut current_arm: Option<String> = None;
+ let mut current_aarch64: Option<String> = None;
+ let mut link_arm: Option<String> = None;
+ let mut link_aarch64: Option<String> = None;
+ let mut a: Vec<String> = Vec::new();
+ let mut b: Vec<String> = Vec::new();
+ let mut current_tests: Vec<(Vec<String>, Vec<String>, Vec<String>)> = Vec::new();
+
+ //
+ // THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY
+ //
+ let mut out_arm = String::from(
+ r#"// This code is automatically generated. DO NOT MODIFY.
+//
+// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file:
+//
+// ```
+// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec
+// ```
+use super::*;
+#[cfg(test)]
+use stdarch_test::assert_instr;
+"#,
+ );
+ let mut tests_arm = String::from(
+ r#"
+#[cfg(test)]
+#[allow(overflowing_literals)]
+mod test {
+ use super::*;
+ use crate::core_arch::simd::*;
+ use std::mem::transmute;
+ use stdarch_test::simd_test;
+"#,
+ );
+ //
+ // THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY
+ //
+ let mut out_aarch64 = String::from(
+ r#"// This code is automatically generated. DO NOT MODIFY.
+//
+// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file:
+//
+// ```
+// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec
+// ```
+use super::*;
+#[cfg(test)]
+use stdarch_test::assert_instr;
+"#,
+ );
+ let mut tests_aarch64 = String::from(
+ r#"
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::core_arch::simd::*;
+ use std::mem::transmute;
+ use stdarch_test::simd_test;
+"#,
+ );
+
+ for line in f.lines() {
+ let line = line.unwrap();
+ if line.is_empty() {
+ continue;
+ }
+ if line.starts_with("/// ") {
+ current_comment = line;
+ current_name = None;
+ current_fn = None;
+ current_arm = None;
+ current_aarch64 = None;
+ link_aarch64 = None;
+ link_arm = None;
+ current_tests = Vec::new();
+ } else if line.starts_with("//") {
+ } else if line.starts_with("name = ") {
+ current_name = Some(String::from(&line[7..]));
+ } else if line.starts_with("fn = ") {
+ current_fn = Some(String::from(&line[5..]));
+ } else if line.starts_with("arm = ") {
+ current_arm = Some(String::from(&line[6..]));
+ } else if line.starts_with("aarch64 = ") {
+ current_aarch64 = Some(String::from(&line[10..]));
+ } else if line.starts_with("a = ") {
+ a = line[4..].split(',').map(|v| v.trim().to_string()).collect();
+ } else if line.starts_with("b = ") {
+ b = line[4..].split(',').map(|v| v.trim().to_string()).collect();
+ } else if line.starts_with("validate ") {
+ let e = line[9..].split(',').map(|v| v.trim().to_string()).collect();
+ current_tests.push((a.clone(), b.clone(), e));
+ } else if line.starts_with("link-aarch64 = ") {
+ link_aarch64 = Some(String::from(&line[15..]));
+ } else if line.starts_with("link-arm = ") {
+ link_arm = Some(String::from(&line[11..]));
+ } else if line.starts_with("generate ") {
+ let line = &line[9..];
+ let types: Vec<String> = line
+ .split(',')
+ .map(|v| v.trim().to_string())
+ .flat_map(|v| match v.as_str() {
+ "uint*_t" => UINT_TYPES.iter().map(|v| v.to_string()).collect(),
+ "uint64x*_t" => UINT_TYPES_64.iter().map(|v| v.to_string()).collect(),
+ "int*_t" => INT_TYPES.iter().map(|v| v.to_string()).collect(),
+ "int64x*_t" => INT_TYPES_64.iter().map(|v| v.to_string()).collect(),
+ "float*_t" => FLOAT_TYPES.iter().map(|v| v.to_string()).collect(),
+ "float64x*_t" => FLOAT_TYPES_64.iter().map(|v| v.to_string()).collect(),
+ _ => vec![v],
+ })
+ .collect();
+
+ for line in types {
+ let spec: Vec<&str> = line.split(':').map(|e| e.trim()).collect();
+ let in_t;
+ let out_t;
+ if spec.len() == 1 {
+ in_t = spec[0];
+ out_t = spec[0];
+ } else if spec.len() == 2 {
+ in_t = spec[0];
+ out_t = spec[1];
+ } else {
+ panic!("Bad spec: {}", line)
+ }
+ let current_name = current_name.clone().unwrap();
+ let name = format!("{}{}", current_name, type_to_suffix(in_t),);
+
+ if let Some(current_arm) = current_arm.clone() {
+ let (function, test) = gen_arm(
+ &current_comment,
+ &current_fn,
+ &name,
+ &current_arm,
+ &link_arm,
+ &current_aarch64,
+ &link_aarch64,
+ &in_t,
+ &out_t,
+ &current_tests,
+ );
+ out_arm.push_str(&function);
+ tests_arm.push_str(&test);
+ } else {
+ let (function, test) = gen_aarch64(
+ &current_comment,
+ &current_fn,
+ &name,
+ &current_aarch64,
+ &link_aarch64,
+ &in_t,
+ &out_t,
+ &current_tests,
+ );
+ out_aarch64.push_str(&function);
+ tests_aarch64.push_str(&test);
+ }
+ }
+ }
+ }
+ tests_arm.push('}');
+ tests_arm.push('\n');
+ tests_aarch64.push('}');
+ tests_aarch64.push('\n');
+
+ let arm_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap())
+ .join("src")
+ .join("arm")
+ .join("neon");
+ std::fs::create_dir_all(&arm_out_path)?;
+
+ let mut file_arm = File::create(arm_out_path.join(ARM_OUT))?;
+ file_arm.write_all(out_arm.as_bytes())?;
+ file_arm.write_all(tests_arm.as_bytes())?;
+
+ let aarch64_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap())
+ .join("src")
+ .join("aarch64")
+ .join("neon");
+ std::fs::create_dir_all(&aarch64_out_path)?;
+
+ let mut file_aarch = File::create(aarch64_out_path.join(AARCH64_OUT))?;
+ file_aarch.write_all(out_aarch64.as_bytes())?;
+ file_aarch.write_all(tests_aarch64.as_bytes())?;
+ /*
+ if let Err(e) = Command::new("rustfmt")
+ .arg(&arm_out_path)
+ .arg(&aarch64_out_path)
+ .status() {
+ eprintln!("Could not format `{}`: {}", arm_out_path.to_str().unwrap(), e);
+ eprintln!("Could not format `{}`: {}", aarch64_out_path.to_str().unwrap(), e);
+ };
+ */
+ Ok(())
+}