4 files changed, 1239 insertions, 0 deletions
diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-gen/Cargo.toml b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/Cargo.toml
new file mode 100644
index 0000000..b339672
--- /dev/null
+++ b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "stdarch-gen"
+version = "0.1.0"
+authors = ["Heinz Gies <heinz@licenser.net>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-gen/README.md b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/README.md
new file mode 100644
index 0000000..54b602c
--- /dev/null
+++ b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/README.md
@@ -0,0 +1,11 @@
+# Neon intrinsic code generator
+
+A small tool that allows to quickly generate intrinsics for the NEON architecture.
+
+The specification for the intrinsics can be found in `neon.spec`.
+
+To run and re-generate the code run the following from the root of the `stdarch` crate.
+
+```
+OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec
+```
+\ No newline at end of file
diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec
new file mode 100644
index 0000000..0343a72
--- /dev/null
+++ b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec
@@ -0,0 +1,469 @@
+// ARM Neon intrinsic specification.
+// 
+// This file contains the specification for a number of 
+// intrinsics that allows us to generate them along with
+// their test cases.
+//
+// To the syntax of the file - it's not very intelligently parsed!
+//
+// # Comments
+// start with AT LEAST two, or four or more slashes  so // is a
+// comment /////// is too.
+//
+// # Sections
+// Sections start with EXACTLY three slashes followed
+// by AT LEAST one space. Sections are used for two things:
+//
+// 1) they serve as the doc comment for the given intrinics.
+// 2) they reset all variables (name, fn, etc.)
+//
+// # Variables
+//
+// name    - The prefix of the function, suffixes are auto
+//           generated by the type they get passed.
+//
+// fn      - The function to call in rust-land.
+//
+// aarch64 - The intrinsic to check on aarch64 architecture.
+//           If this is given but no arm intrinsic is provided,
+//           the function will exclusively be generated for
+//           aarch64.
+//           This is used to generate both aarch64 specific and
+//           shared intrinics by first only specifying th aarch64
+//           variant then the arm variant.
+// 
+// arm     - The arm v7 intrinics used to checked for arm code
+//           generation. All neon functions available in arm are
+//           also available in aarch64. If no aarch64 intrinic was
+//           set they are assumed to be the same.
+//           Intrinics ending with a `.` will have a size suffixes
+//           added (such as `i8` or `i64`) that is not sign specific
+//           Intrinics ending with a `.s` will have a size suffixes
+//           added (such as `s8` or `u64`) that is sign specific
+//
+// a       - First input for tests, it gets scaled to the size of
+//           the type.
+//
+// b       - Second input for tests, it gets scaled to the size of
+//           the type.
+//
+// # special values
+//
+// TRUE - 'true' all bits are set to 1
+// FALSE - 'false' all bits are set to 0
+// FF - same as 'true'
+// MIN - minimal value (either 0 or the lowest negative number)
+// MAX - maximal value propr to overflow
+//
+// # validate <values>
+// Validates a and b aginst the expected result of the test.
+// The special values 'TRUE' and 'FALSE' can be used to
+// represent the corect NEON representation of true or
+// false values. It too gets scaled to the type.
+// 
+// Validate needs to be called before generate as it sets
+// up the rules for validation that get generated for each
+// type.
+// # generate <types>
+// The generate command generates the intrinsics, it uses the
+// Variables set and can be called multiple times while overwriting
+// some of the variables.
+
+/// Vector bitwise and
+name = vand
+fn = simd_and
+arm = vand
+aarch64 = and
+a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
+b = 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F
+validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00
+b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+validate 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+generate int*_t, uint*_t, int64x*_t, uint64x*_t
+
+/// Vector bitwise or (immediate, inclusive)
+name = vorr
+fn = simd_or
+arm = vorr
+aarch64 = orr
+a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+generate int*_t, uint*_t, int64x*_t, uint64x*_t
+
+
+/// Vector bitwise exclusive or (vector)
+name = veor
+fn = simd_xor
+arm = veor
+aarch64 = eor
+a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
+generate int*_t, uint*_t, int64x*_t, uint64x*_t
+
+////////////////////
+// equality
+////////////////////
+
+/// Compare bitwise Equal (vector)
+name = vceq
+fn = simd_eq
+a = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
+b = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+a = MIN, MIN, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, MAX
+b = MIN, MAX, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, MIN
+validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE
+
+aarch64 = cmeq
+generate uint64x*_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t
+
+arm = vceq.
+generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
+
+/// Floating-point compare equal
+name = vceq
+fn = simd_eq
+a = 1.2, 3.4, 5.6, 7.8
+b = 1.2, 3.4, 5.6, 7.8
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = fcmeq
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+
+arm = vceq.
+// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
+generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
+
+////////////////////
+// greater then
+////////////////////
+
+/// Compare signed greater than
+name = vcgt
+fn = simd_gt
+a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+aarch64 = cmgt
+generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
+
+arm = vcgt.s
+generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
+
+/// Compare unsigned highe
+name = vcgt
+fn = simd_gt
+a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = cmhi
+generate uint64x*_t
+
+arm = vcgt.s
+generate uint*_t
+
+/// Floating-point compare greater than
+name = vcgt
+fn = simd_gt
+a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 
+b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = fcmgt
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+
+arm = vcgt.s
+// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
+generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
+
+////////////////////
+// lesser then
+////////////////////
+
+/// Compare signed less than
+name = vclt
+fn = simd_lt
+a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+aarch64 = cmgt
+generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
+
+arm = vcgt.s
+generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
+
+/// Compare unsigned less than
+name = vclt
+fn = simd_lt
+a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = cmhi
+generate uint64x*_t
+
+arm = vcgt.s
+generate uint*_t
+
+/// Floating-point compare less than
+name = vclt
+fn = simd_lt
+a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
+b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = fcmgt
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+
+arm = vcgt.s
+// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
+generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
+
+////////////////////
+// lesser then equals
+////////////////////
+
+/// Compare signed less than or equal
+name = vcle
+fn = simd_le
+a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = cmge
+generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
+
+arm = vcge.s
+generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
+
+/// Compare unsigned less than or equal
+name = vcle
+fn = simd_le
+a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = cmhs
+generate uint64x*_t
+
+arm = vcge.s
+generate uint*_t
+
+/// Floating-point compare less than or equal
+name = vcle
+fn = simd_le
+a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
+b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+aarch64 = fcmge
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+
+// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
+arm = vcge.s
+generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
+
+////////////////////
+// greater then equals
+////////////////////
+
+/// Compare signed greater than or equal
+name = vcge
+fn = simd_ge
+a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = cmge
+generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
+
+arm = vcge.s
+generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
+
+/// Compare unsigned greater than or equal
+name = vcge
+fn = simd_ge
+a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = cmhs
+generate uint64x*_t
+
+arm = vcge.s
+generate uint*_t
+
+/// Floating-point compare greater than or equal
+name = vcge
+fn = simd_ge
+a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 
+b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8
+validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
+
+aarch64 = fcmge
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+
+arm = vcge.s
+// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
+generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
+
+/// Saturating subtract
+name = vqsub
+a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26
+
+arm = vqsub.s
+aarch64 = uqsub
+link-arm = vqsubu._EXT_
+link-aarch64 = uqsub._EXT_
+generate uint*_t
+
+arm = vqsub.s
+aarch64 = sqsub
+link-arm = vqsubs._EXT_
+link-aarch64 = sqsub._EXT_
+generate int*_t
+
+/// Halving add
+name = vhadd
+a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29
+
+
+arm = vhadd.s
+aarch64 = uhadd
+link-aarch64 = uhadd._EXT_
+link-arm = vhaddu._EXT_
+generate uint*_t
+
+
+arm = vhadd.s
+aarch64 = shadd
+link-aarch64 = shadd._EXT_
+link-arm = vhadds._EXT_
+generate int*_t
+
+/// Rounding halving add
+name = vrhadd
+a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29
+
+arm = vrhadd.s
+aarch64 = urhadd
+link-arm = vrhaddu._EXT_
+link-aarch64 = urhadd._EXT_
+generate uint*_t
+
+arm = vrhadd.s
+aarch64 = srhadd
+link-arm = vrhadds._EXT_
+link-aarch64 = srhadd._EXT_
+generate int*_t
+
+/// Saturating add
+name = vqadd
+a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
+
+arm = vqadd.s
+aarch64 = uqadd
+link-arm = vqaddu._EXT_
+link-aarch64 = uqadd._EXT_
+generate uint*_t
+
+arm = vqadd.s
+aarch64 = sqadd
+link-arm = vqadds._EXT_
+link-aarch64 = sqadd._EXT_
+generate int*_t
+
+// requires 1st and second argument to be different, this not implemented yet
+// /// Signed saturating accumulate of unsigned value
+// 
+// name = vuqadd
+// a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
+// b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+// e = 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
+
+// it seems like we don't have those in rustland :( 
+// aarch64 = suqadd 
+// link-aarch64 = usqadd._EXT_
+// generate int64x*_t
+
+/ arm = suqadd
+// link-arm = vuqadds._EXT_
+// link-aarch64 = suqadd._EXT_
+// generate int*_t
+
+
+/// Multiply
+name = vmul
+a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
+b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
+arm = vmul.
+aarch64 = mul
+fn = simd_mul
+generate int*_t, uint*_t
+
+/// Multiply
+name = vmul
+fn = simd_mul
+a = 1.0, 2.0, 1.0, 2.0
+b = 2.0, 3.0, 4.0, 5.0
+validate 2.0, 6.0, 4.0, 10.0
+
+aarch64 = fmul
+generate float64x*_t
+
+arm = vmul.
+generate float*_t
+
+
+/// Subtract
+name = vsub
+a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
+validate 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14
+arm = vsub.
+aarch64 = sub
+fn = simd_sub
+generate int*_t, uint*_t, int64x*_t, uint64x*_t
+
+/// Subtract
+name = vsub
+fn = simd_sub
+a = 1.0, 4.0, 3.0, 8.0
+b = 1.0, 2.0, 3.0, 4.0
+validate 0.0, 2.0, 0.0, 4.0
+
+aarch64 = fsub
+generate float64x*_t
+
+arm = vsub.
+generate float*_t
+
+
+/// Signed halving subtract
+name = vhsub
+a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
+validate 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7
+
+arm = vhsub.s
+aarch64 = uhsub
+link-arm = vhsubu._EXT_
+link-aarch64 = uhsub._EXT_
+generate uint*_t
+
+arm = vhsub.s
+aarch64 = shsub
+link-arm = vhsubs._EXT_
+link-aarch64 = shsub._EXT_
+generate int*_t
diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-gen/src/main.rs b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/src/main.rs
new file mode 100644
index 0000000..8a9d9f2
--- /dev/null
+++ b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/src/main.rs
@@ -0,0 +1,750 @@
+use std::env;
+use std::fs::File;
+use std::io::prelude::*;
+use std::io::{self, BufReader};
+use std::path::PathBuf;
+
+const IN: &str = "neon.spec";
+const ARM_OUT: &str = "generated.rs";
+const AARCH64_OUT: &str = "generated.rs";
+
+const UINT_TYPES: [&str; 6] = [
+    "uint8x8_t",
+    "uint8x16_t",
+    "uint16x4_t",
+    "uint16x8_t",
+    "uint32x2_t",
+    "uint32x4_t",
+];
+
+const UINT_TYPES_64: [&str; 2] = ["uint64x1_t", "uint64x2_t"];
+
+const INT_TYPES: [&str; 6] = [
+    "int8x8_t",
+    "int8x16_t",
+    "int16x4_t",
+    "int16x8_t",
+    "int32x2_t",
+    "int32x4_t",
+];
+
+const INT_TYPES_64: [&str; 2] = ["int64x1_t", "int64x2_t"];
+
+const FLOAT_TYPES: [&str; 2] = [
+    //"float8x8_t", not supported by rust
+    //"float8x16_t", not supported by rust
+    //"float16x4_t", not supported by rust
+    //"float16x8_t", not supported by rust
+    "float32x2_t",
+    "float32x4_t",
+];
+
+const FLOAT_TYPES_64: [&str; 2] = [
+    //"float8x8_t", not supported by rust
+    //"float8x16_t", not supported by rust
+    //"float16x4_t", not supported by rust
+    //"float16x8_t", not supported by rust
+    "float64x1_t",
+    "float64x2_t",
+];
+
+fn type_len(t: &str) -> usize {
+    match t {
+        "int8x8_t" => 8,
+        "int8x16_t" => 16,
+        "int16x4_t" => 4,
+        "int16x8_t" => 8,
+        "int32x2_t" => 2,
+        "int32x4_t" => 4,
+        "int64x1_t" => 1,
+        "int64x2_t" => 2,
+        "uint8x8_t" => 8,
+        "uint8x16_t" => 16,
+        "uint16x4_t" => 4,
+        "uint16x8_t" => 8,
+        "uint32x2_t" => 2,
+        "uint32x4_t" => 4,
+        "uint64x1_t" => 1,
+        "uint64x2_t" => 2,
+        "float16x4_t" => 4,
+        "float16x8_t" => 8,
+        "float32x2_t" => 2,
+        "float32x4_t" => 4,
+        "float64x1_t" => 1,
+        "float64x2_t" => 2,
+        "poly64x1_t" => 1,
+        "poly64x2_t" => 2,
+        _ => panic!("unknown type: {}", t),
+    }
+}
+
+fn type_to_suffix(t: &str) -> &str {
+    match t {
+        "int8x8_t" => "_s8",
+        "int8x16_t" => "q_s8",
+        "int16x4_t" => "_s16",
+        "int16x8_t" => "q_s16",
+        "int32x2_t" => "_s32",
+        "int32x4_t" => "q_s32",
+        "int64x1_t" => "_s64",
+        "int64x2_t" => "q_s64",
+        "uint8x8_t" => "_u8",
+        "uint8x16_t" => "q_u8",
+        "uint16x4_t" => "_u16",
+        "uint16x8_t" => "q_u16",
+        "uint32x2_t" => "_u32",
+        "uint32x4_t" => "q_u32",
+        "uint64x1_t" => "_u64",
+        "uint64x2_t" => "q_u64",
+        "float16x4_t" => "_f16",
+        "float16x8_t" => "q_f16",
+        "float32x2_t" => "_f32",
+        "float32x4_t" => "q_f32",
+        "float64x1_t" => "_f64",
+        "float64x2_t" => "q_f64",
+        "poly64x1_t" => "_p64",
+        "poly64x2_t" => "q_p64",
+        _ => panic!("unknown type: {}", t),
+    }
+}
+
+fn type_to_global_type(t: &str) -> &str {
+    match t {
+        "int8x8_t" => "i8x8",
+        "int8x16_t" => "i8x16",
+        "int16x4_t" => "i16x4",
+        "int16x8_t" => "i16x8",
+        "int32x2_t" => "i32x2",
+        "int32x4_t" => "i32x4",
+        "int64x1_t" => "i64x1",
+        "int64x2_t" => "i64x2",
+        "uint8x8_t" => "u8x8",
+        "uint8x16_t" => "u8x16",
+        "uint16x4_t" => "u16x4",
+        "uint16x8_t" => "u16x8",
+        "uint32x2_t" => "u32x2",
+        "uint32x4_t" => "u32x4",
+        "uint64x1_t" => "u64x1",
+        "uint64x2_t" => "u64x2",
+        "float16x4_t" => "f16x4",
+        "float16x8_t" => "f16x8",
+        "float32x2_t" => "f32x2",
+        "float32x4_t" => "f32x4",
+        "float64x1_t" => "f64",
+        "float64x2_t" => "f64x2",
+        "poly64x1_t" => "i64x1",
+        "poly64x2_t" => "i64x2",
+        _ => panic!("unknown type: {}", t),
+    }
+}
+
+// fn type_to_native_type(t: &str) -> &str {
+//     match t {
+//         "int8x8_t" => "i8",
+//         "int8x16_t" => "i8",
+//         "int16x4_t" => "i16",
+//         "int16x8_t" => "i16",
+//         "int32x2_t" => "i32",
+//         "int32x4_t" => "i32",
+//         "int64x1_t" => "i64",
+//         "int64x2_t" => "i64",
+//         "uint8x8_t" => "u8",
+//         "uint8x16_t" => "u8",
+//         "uint16x4_t" => "u16",
+//         "uint16x8_t" => "u16",
+//         "uint32x2_t" => "u32",
+//         "uint32x4_t" => "u32",
+//         "uint64x1_t" => "u64",
+//         "uint64x2_t" => "u64",
+//         "float16x4_t" => "f16",
+//         "float16x8_t" => "f16",
+//         "float32x2_t" => "f32",
+//         "float32x4_t" => "f32",
+//         "float64x1_t" => "f64",
+//         "float64x2_t" => "f64",
+//         "poly64x1_t" => "i64",
+//         "poly64x2_t" => "i64",
+//         _ => panic!("unknown type: {}", t),
+//     }
+// }
+
+fn type_to_ext(t: &str) -> &str {
+    match t {
+        "int8x8_t" => "v8i8",
+        "int8x16_t" => "v16i8",
+        "int16x4_t" => "v4i16",
+        "int16x8_t" => "v8i16",
+        "int32x2_t" => "v2i32",
+        "int32x4_t" => "v4i32",
+        "int64x1_t" => "v1i64",
+        "int64x2_t" => "v2i64",
+        "uint8x8_t" => "v8i8",
+        "uint8x16_t" => "v16i8",
+        "uint16x4_t" => "v4i16",
+        "uint16x8_t" => "v8i16",
+        "uint32x2_t" => "v2i32",
+        "uint32x4_t" => "v4i32",
+        "uint64x1_t" => "v1i64",
+        "uint64x2_t" => "v2i64",
+        "float16x4_t" => "v4f16",
+        "float16x8_t" => "v8f16",
+        "float32x2_t" => "v2f32",
+        "float32x4_t" => "v4f32",
+        "float64x1_t" => "v1f64",
+        "float64x2_t" => "v2f64",
+        /*
+        "poly64x1_t" => "i64x1",
+        "poly64x2_t" => "i64x2",
+        */
+        _ => panic!("unknown type for extension: {}", t),
+    }
+}
+
+fn values(t: &str, vs: &[String]) -> String {
+    if vs.len() == 1 && !t.contains('x') {
+        format!(": {} = {}", t, vs[0])
+    } else if vs.len() == 1 && type_to_global_type(t) == "f64" {
+        format!(": {} = {}", type_to_global_type(t), vs[0])
+    } else {
+        format!(
+            ": {} = {}::new({})",
+            type_to_global_type(t),
+            type_to_global_type(t),
+            vs.iter()
+                .map(|v| map_val(type_to_global_type(t), v))
+                //.map(|v| format!("{}{}", v, type_to_native_type(t)))
+                .collect::<Vec<_>>()
+                .join(", ")
+        )
+    }
+}
+
+fn max_val(t: &str) -> &'static str {
+    match &t[..3] {
+        "u8x" => "0xFF",
+        "u16" => "0xFF_FF",
+        "u32" => "0xFF_FF_FF_FF",
+        "u64" => "0xFF_FF_FF_FF_FF_FF_FF_FF",
+        "i8x" => "0x7F",
+        "i16" => "0x7F_FF",
+        "i32" => "0x7F_FF_FF_FF",
+        "i64" => "0x7F_FF_FF_FF_FF_FF_FF_FF",
+        "f32" => "3.40282347e+38",
+        "f64" => "1.7976931348623157e+308",
+        _ => panic!("No TRUE for type {}", t),
+    }
+}
+
+fn min_val(t: &str) -> &'static str {
+    match &t[..3] {
+        "u8x" => "0",
+        "u16" => "0",
+        "u32" => "0",
+        "u64" => "0",
+        "i8x" => "-128",
+        "i16" => "-32768",
+        "i32" => "-2147483648",
+        "i64" => "-9223372036854775808",
+        "f32" => "-3.40282347e+38",
+        "f64" => "-1.7976931348623157e+308",
+        _ => panic!("No TRUE for type {}", t),
+    }
+}
+
+fn true_val(t: &str) -> &'static str {
+    match &t[..3] {
+        "u8x" => "0xFF",
+        "u16" => "0xFF_FF",
+        "u32" => "0xFF_FF_FF_FF",
+        "u64" => "0xFF_FF_FF_FF_FF_FF_FF_FF",
+        _ => panic!("No TRUE for type {}", t),
+    }
+}
+
+fn ff_val(t: &str) -> &'static str {
+    match &t[..3] {
+        "u8x" => "0xFF",
+        "u16" => "0xFF_FF",
+        "u32" => "0xFF_FF_FF_FF",
+        "u64" => "0xFF_FF_FF_FF_FF_FF_FF_FF",
+        "i8x" => "0xFF",
+        "i16" => "0xFF_FF",
+        "i32" => "0xFF_FF_FF_FF",
+        "i64" => "0xFF_FF_FF_FF_FF_FF_FF_FF",
+        _ => panic!("No TRUE for type {}", t),
+    }
+}
+
+fn false_val(_t: &str) -> &'static str {
+    "0"
+}
+fn map_val<'v>(t: &str, v: &'v str) -> &'v str {
+    match v {
+        "FALSE" => false_val(t),
+        "TRUE" => true_val(t),
+        "MAX" => min_val(t),
+        "MIN" => max_val(t),
+        "FF" => ff_val(t),
+        o => o,
+    }
+}
+
+#[allow(clippy::too_many_arguments)]
+fn gen_aarch64(
+    current_comment: &str,
+    current_fn: &Option<String>,
+    name: &str,
+    current_aarch64: &Option<String>,
+    link_aarch64: &Option<String>,
+    in_t: &str,
+    out_t: &str,
+    current_tests: &[(Vec<String>, Vec<String>, Vec<String>)],
+) -> (String, String) {
+    let _global_t = type_to_global_type(in_t);
+    let _global_ret_t = type_to_global_type(out_t);
+    let current_fn = if let Some(current_fn) = current_fn.clone() {
+        if link_aarch64.is_some() {
+            panic!("[{}] Can't specify link and fn at the same time.", name)
+        }
+        current_fn
+    } else {
+        if link_aarch64.is_none() {
+            panic!("[{}] Either fn or link-aarch have to be specified.", name)
+        }
+        format!("{}_", name)
+    };
+    let current_aarch64 = current_aarch64.clone().unwrap();
+    let ext_c = if let Some(link_aarch64) = link_aarch64.clone() {
+        let ext = type_to_ext(in_t);
+
+        format!(
+            r#"
+    #[allow(improper_ctypes)]
+    extern "C" {{
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")]
+        fn {}(a: {}, a: {}) -> {};
+    }}
+"#,
+            link_aarch64.replace("_EXT_", ext),
+            current_fn,
+            in_t,
+            in_t,
+            out_t
+        )
+    } else {
+        String::new()
+    };
+    let function = format!(
+        r#"
+{}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr({}))]
+pub unsafe fn {}(a: {}, b: {}) -> {} {{
+    {}{}(a, b)
+}}
+"#,
+        current_comment, current_aarch64, name, in_t, in_t, out_t, ext_c, current_fn,
+    );
+
+    let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t));
+    (function, test)
+}
+
+fn gen_test(
+    name: &str,
+    in_t: &str,
+    out_t: &str,
+    current_tests: &[(Vec<String>, Vec<String>, Vec<String>)],
+    len: usize,
+) -> String {
+    let mut test = format!(
+        r#"
+    #[simd_test(enable = "neon")]
+    unsafe fn test_{}() {{"#,
+        name,
+    );
+    for (a, b, e) in current_tests {
+        let a: Vec<String> = a.iter().take(len).cloned().collect();
+        let b: Vec<String> = b.iter().take(len).cloned().collect();
+        let e: Vec<String> = e.iter().take(len).cloned().collect();
+        let t = format!(
+            r#"
+        let a{};
+        let b{};
+        let e{};
+        let r: {} = transmute({}(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+"#,
+            values(in_t, &a),
+            values(in_t, &b),
+            values(out_t, &e),
+            type_to_global_type(out_t),
+            name
+        );
+        test.push_str(&t);
+    }
+    test.push_str("    }\n");
+    test
+}
+
+#[allow(clippy::too_many_arguments)]
+fn gen_arm(
+    current_comment: &str,
+    current_fn: &Option<String>,
+    name: &str,
+    current_arm: &str,
+    link_arm: &Option<String>,
+    current_aarch64: &Option<String>,
+    link_aarch64: &Option<String>,
+    in_t: &str,
+    out_t: &str,
+    current_tests: &[(Vec<String>, Vec<String>, Vec<String>)],
+) -> (String, String) {
+    let _global_t = type_to_global_type(in_t);
+    let _global_ret_t = type_to_global_type(out_t);
+    let current_aarch64 = current_aarch64
+        .clone()
+        .unwrap_or_else(|| current_arm.to_string());
+
+    let current_fn = if let Some(current_fn) = current_fn.clone() {
+        if link_aarch64.is_some() || link_arm.is_some() {
+            panic!(
+                "[{}] Can't specify link and function at the same time. {} / {:?} / {:?}",
+                name, current_fn, link_aarch64, link_arm
+            )
+        }
+        current_fn
+    } else {
+        if link_aarch64.is_none() || link_arm.is_none() {
+            panic!(
+                "[{}] Either fn or link-arm and link-aarch have to be specified.",
+                name
+            )
+        }
+        format!("{}_", name)
+    };
+
+    let ext_c =
+        if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) {
+            let ext = type_to_ext(in_t);
+
+            format!(
+                r#"#[allow(improper_ctypes)]
+    extern "C" {{
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.{}")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")]
+        fn {}(a: {}, b: {}) -> {};
+    }}
+"#,
+                link_arm.replace("_EXT_", ext),
+                link_aarch64.replace("_EXT_", ext),
+                current_fn,
+                in_t,
+                in_t,
+                out_t
+            )
+        } else {
+            String::new()
+        };
+
+    let function = format!(
+        r#"
+{}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr({}))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}))]
+pub unsafe fn {}(a: {}, b: {}) -> {} {{
+    {}{}(a, b)
+}}
+"#,
+        current_comment,
+        expand_intrinsic(&current_arm, in_t),
+        expand_intrinsic(&current_aarch64, in_t),
+        name,
+        in_t,
+        in_t,
+        out_t,
+        ext_c,
+        current_fn,
+    );
+    let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t));
+
+    (function, test)
+}
+
+fn expand_intrinsic(intr: &str, t: &str) -> String {
+    if intr.ends_with(".") {
+        let ext = match t {
+            "int8x8_t" => "i8",
+            "int8x16_t" => "i8",
+            "int16x4_t" => "i16",
+            "int16x8_t" => "i16",
+            "int32x2_t" => "i32",
+            "int32x4_t" => "i32",
+            "int64x1_t" => "i64",
+            "int64x2_t" => "i64",
+            "uint8x8_t" => "i8",
+            "uint8x16_t" => "i8",
+            "uint16x4_t" => "i16",
+            "uint16x8_t" => "i16",
+            "uint32x2_t" => "i32",
+            "uint32x4_t" => "i32",
+            "uint64x1_t" => "i64",
+            "uint64x2_t" => "i64",
+            "float16x4_t" => "f16",
+            "float16x8_t" => "f16",
+            "float32x2_t" => "f32",
+            "float32x4_t" => "f32",
+            "float64x1_t" => "f64",
+            "float64x2_t" => "f64",
+            /*
+            "poly64x1_t" => "i64x1",
+            "poly64x2_t" => "i64x2",
+            */
+            _ => panic!("unknown type for extension: {}", t),
+        };
+        format!(r#""{}{}""#, intr, ext)
+    } else if intr.ends_with(".s") {
+        let ext = match t {
+            "int8x8_t" => "s8",
+            "int8x16_t" => "s8",
+            "int16x4_t" => "s16",
+            "int16x8_t" => "s16",
+            "int32x2_t" => "s32",
+            "int32x4_t" => "s32",
+            "int64x1_t" => "s64",
+            "int64x2_t" => "s64",
+            "uint8x8_t" => "u8",
+            "uint8x16_t" => "u8",
+            "uint16x4_t" => "u16",
+            "uint16x8_t" => "u16",
+            "uint32x2_t" => "u32",
+            "uint32x4_t" => "u32",
+            "uint64x1_t" => "u64",
+            "uint64x2_t" => "u64",
+            "float16x4_t" => "f16",
+            "float16x8_t" => "f16",
+            "float32x2_t" => "f32",
+            "float32x4_t" => "f32",
+            "float64x1_t" => "f64",
+            "float64x2_t" => "f64",
+            /*
+            "poly64x1_t" => "i64x1",
+            "poly64x2_t" => "i64x2",
+            */
+            _ => panic!("unknown type for extension: {}", t),
+        };
+        format!(r#""{}{}""#, &intr[..intr.len() - 1], ext)
+    } else {
+        intr.to_string()
+    }
+}
+
+fn main() -> io::Result<()> {
+    let args: Vec<String> = env::args().collect();
+    let in_file = args.get(1).cloned().unwrap_or_else(|| IN.to_string());
+
+    let f = File::open(in_file).expect("Failed to open neon.spec");
+    let f = BufReader::new(f);
+
+    let mut current_comment = String::new();
+    let mut current_name: Option<String> = None;
+    let mut current_fn: Option<String> = None;
+    let mut current_arm: Option<String> = None;
+    let mut current_aarch64: Option<String> = None;
+    let mut link_arm: Option<String> = None;
+    let mut link_aarch64: Option<String> = None;
+    let mut a: Vec<String> = Vec::new();
+    let mut b: Vec<String> = Vec::new();
+    let mut current_tests: Vec<(Vec<String>, Vec<String>, Vec<String>)> = Vec::new();
+
+    //
+    // THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY
+    //
+    let mut out_arm = String::from(
+        r#"// This code is automatically generated. DO NOT MODIFY.
+//
+// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file:
+//
+// ```
+// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec
+// ```
+use super::*;
+#[cfg(test)]
+use stdarch_test::assert_instr;
+"#,
+    );
+    let mut tests_arm = String::from(
+        r#"
+#[cfg(test)]
+#[allow(overflowing_literals)]
+mod test {
+    use super::*;
+    use crate::core_arch::simd::*;
+    use std::mem::transmute;
+    use stdarch_test::simd_test;
+"#,
+    );
+    //
+    // THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY
+    //
+    let mut out_aarch64 = String::from(
+        r#"// This code is automatically generated. DO NOT MODIFY.
+//
+// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file:
+//
+// ```
+// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec
+// ```
+use super::*;
+#[cfg(test)]
+use stdarch_test::assert_instr;
+"#,
+    );
+    let mut tests_aarch64 = String::from(
+        r#"
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::core_arch::simd::*;
+    use std::mem::transmute;
+    use stdarch_test::simd_test;
+"#,
+    );
+
+    for line in f.lines() {
+        let line = line.unwrap();
+        if line.is_empty() {
+            continue;
+        }
+        if line.starts_with("/// ") {
+            current_comment = line;
+            current_name = None;
+            current_fn = None;
+            current_arm = None;
+            current_aarch64 = None;
+            link_aarch64 = None;
+            link_arm = None;
+            current_tests = Vec::new();
+        } else if line.starts_with("//") {
+        } else if line.starts_with("name = ") {
+            current_name = Some(String::from(&line[7..]));
+        } else if line.starts_with("fn = ") {
+            current_fn = Some(String::from(&line[5..]));
+        } else if line.starts_with("arm = ") {
+            current_arm = Some(String::from(&line[6..]));
+        } else if line.starts_with("aarch64 = ") {
+            current_aarch64 = Some(String::from(&line[10..]));
+        } else if line.starts_with("a = ") {
+            a = line[4..].split(',').map(|v| v.trim().to_string()).collect();
+        } else if line.starts_with("b = ") {
+            b = line[4..].split(',').map(|v| v.trim().to_string()).collect();
+        } else if line.starts_with("validate ") {
+            let e = line[9..].split(',').map(|v| v.trim().to_string()).collect();
+            current_tests.push((a.clone(), b.clone(), e));
+        } else if line.starts_with("link-aarch64 = ") {
+            link_aarch64 = Some(String::from(&line[15..]));
+        } else if line.starts_with("link-arm = ") {
+            link_arm = Some(String::from(&line[11..]));
+        } else if line.starts_with("generate ") {
+            let line = &line[9..];
+            let types: Vec<String> = line
+                .split(',')
+                .map(|v| v.trim().to_string())
+                .flat_map(|v| match v.as_str() {
+                    "uint*_t" => UINT_TYPES.iter().map(|v| v.to_string()).collect(),
+                    "uint64x*_t" => UINT_TYPES_64.iter().map(|v| v.to_string()).collect(),
+                    "int*_t" => INT_TYPES.iter().map(|v| v.to_string()).collect(),
+                    "int64x*_t" => INT_TYPES_64.iter().map(|v| v.to_string()).collect(),
+                    "float*_t" => FLOAT_TYPES.iter().map(|v| v.to_string()).collect(),
+                    "float64x*_t" => FLOAT_TYPES_64.iter().map(|v| v.to_string()).collect(),
+                    _ => vec![v],
+                })
+                .collect();
+
+            for line in types {
+                let spec: Vec<&str> = line.split(':').map(|e| e.trim()).collect();
+                let in_t;
+                let out_t;
+                if spec.len() == 1 {
+                    in_t = spec[0];
+                    out_t = spec[0];
+                } else if spec.len() == 2 {
+                    in_t = spec[0];
+                    out_t = spec[1];
+                } else {
+                    panic!("Bad spec: {}", line)
+                }
+                let current_name = current_name.clone().unwrap();
+                let name = format!("{}{}", current_name, type_to_suffix(in_t),);
+
+                if let Some(current_arm) = current_arm.clone() {
+                    let (function, test) = gen_arm(
+                        &current_comment,
+                        &current_fn,
+                        &name,
+                        &current_arm,
+                        &link_arm,
+                        &current_aarch64,
+                        &link_aarch64,
+                        &in_t,
+                        &out_t,
+                        &current_tests,
+                    );
+                    out_arm.push_str(&function);
+                    tests_arm.push_str(&test);
+                } else {
+                    let (function, test) = gen_aarch64(
+                        &current_comment,
+                        &current_fn,
+                        &name,
+                        &current_aarch64,
+                        &link_aarch64,
+                        &in_t,
+                        &out_t,
+                        &current_tests,
+                    );
+                    out_aarch64.push_str(&function);
+                    tests_aarch64.push_str(&test);
+                }
+            }
+        }
+    }
+    tests_arm.push('}');
+    tests_arm.push('\n');
+    tests_aarch64.push('}');
+    tests_aarch64.push('\n');
+
+    let arm_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap())
+        .join("src")
+        .join("arm")
+        .join("neon");
+    std::fs::create_dir_all(&arm_out_path)?;
+
+    let mut file_arm = File::create(arm_out_path.join(ARM_OUT))?;
+    file_arm.write_all(out_arm.as_bytes())?;
+    file_arm.write_all(tests_arm.as_bytes())?;
+
+    let aarch64_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap())
+        .join("src")
+        .join("aarch64")
+        .join("neon");
+    std::fs::create_dir_all(&aarch64_out_path)?;
+
+    let mut file_aarch = File::create(aarch64_out_path.join(AARCH64_OUT))?;
+    file_aarch.write_all(out_aarch64.as_bytes())?;
+    file_aarch.write_all(tests_aarch64.as_bytes())?;
+    /*
+    if let Err(e) = Command::new("rustfmt")
+        .arg(&arm_out_path)
+        .arg(&aarch64_out_path)
+        .status() {
+            eprintln!("Could not format `{}`: {}", arm_out_path.to_str().unwrap(), e);
+            eprintln!("Could not format `{}`: {}", aarch64_out_path.to_str().unwrap(), e);
+    };
+    */
+    Ok(())
+}