diff options
Diffstat (limited to 'libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec')
| -rw-r--r-- | libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec | 469 | 
1 files changed, 469 insertions, 0 deletions
diff --git a/libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec new file mode 100644 index 0000000..0343a72 --- /dev/null +++ b/libgrust/rustc-lib/stdarch/crates/stdarch-gen/neon.spec @@ -0,0 +1,469 @@ +// ARM Neon intrinsic specification. +//  +// This file contains the specification for a number of  +// intrinsics that allows us to generate them along with +// their test cases. +// +// To the syntax of the file - it's not very intelligently parsed! +// +// # Comments +// start with AT LEAST two, or four or more slashes  so // is a +// comment /////// is too. +// +// # Sections +// Sections start with EXACTLY three slashes followed +// by AT LEAST one space. Sections are used for two things: +// +// 1) they serve as the doc comment for the given intrinics. +// 2) they reset all variables (name, fn, etc.) +// +// # Variables +// +// name    - The prefix of the function, suffixes are auto +//           generated by the type they get passed. +// +// fn      - The function to call in rust-land. +// +// aarch64 - The intrinsic to check on aarch64 architecture. +//           If this is given but no arm intrinsic is provided, +//           the function will exclusively be generated for +//           aarch64. +//           This is used to generate both aarch64 specific and +//           shared intrinics by first only specifying th aarch64 +//           variant then the arm variant. +//  +// arm     - The arm v7 intrinics used to checked for arm code +//           generation. All neon functions available in arm are +//           also available in aarch64. If no aarch64 intrinic was +//           set they are assumed to be the same. +//           Intrinics ending with a `.` will have a size suffixes +//           added (such as `i8` or `i64`) that is not sign specific +//           Intrinics ending with a `.s` will have a size suffixes +//           added (such as `s8` or `u64`) that is sign specific +// +// a       - First input for tests, it gets scaled to the size of +//           the type. +// +// b       - Second input for tests, it gets scaled to the size of +//           the type. +// +// # special values +// +// TRUE - 'true' all bits are set to 1 +// FALSE - 'false' all bits are set to 0 +// FF - same as 'true' +// MIN - minimal value (either 0 or the lowest negative number) +// MAX - maximal value propr to overflow +// +// # validate <values> +// Validates a and b aginst the expected result of the test. +// The special values 'TRUE' and 'FALSE' can be used to +// represent the corect NEON representation of true or +// false values. It too gets scaled to the type. +//  +// Validate needs to be called before generate as it sets +// up the rules for validation that get generated for each +// type. +// # generate <types> +// The generate command generates the intrinsics, it uses the +// Variables set and can be called multiple times while overwriting +// some of the variables. + +/// Vector bitwise and +name = vand +fn = simd_and +arm = vand +aarch64 = and +a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00 +b = 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F +validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00 +b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +validate 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +generate int*_t, uint*_t, int64x*_t, uint64x*_t + +/// Vector bitwise or (immediate, inclusive) +name = vorr +fn = simd_or +arm = vorr +aarch64 = orr +a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F +b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F +generate int*_t, uint*_t, int64x*_t, uint64x*_t + + +/// Vector bitwise exclusive or (vector) +name = veor +fn = simd_xor +arm = veor +aarch64 = eor +a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F +b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F +generate int*_t, uint*_t, int64x*_t, uint64x*_t + +//////////////////// +// equality +//////////////////// + +/// Compare bitwise Equal (vector) +name = vceq +fn = simd_eq +a = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX +b = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE +a = MIN, MIN, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, MAX +b = MIN, MAX, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, MIN +validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE + +aarch64 = cmeq +generate uint64x*_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t + +arm = vceq. +generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Floating-point compare equal +name = vceq +fn = simd_eq +a = 1.2, 3.4, 5.6, 7.8 +b = 1.2, 3.4, 5.6, 7.8 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = fcmeq +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vceq. +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +//////////////////// +// greater then +//////////////////// + +/// Compare signed greater than +name = vcgt +fn = simd_gt +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE +aarch64 = cmgt +generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t + +arm = vcgt.s +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Compare unsigned highe +name = vcgt +fn = simd_gt +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmhi +generate uint64x*_t + +arm = vcgt.s +generate uint*_t + +/// Floating-point compare greater than +name = vcgt +fn = simd_gt +a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9  +b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = fcmgt +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vcgt.s +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +//////////////////// +// lesser then +//////////////////// + +/// Compare signed less than +name = vclt +fn = simd_lt +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE +aarch64 = cmgt +generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t + +arm = vcgt.s +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Compare unsigned less than +name = vclt +fn = simd_lt +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmhi +generate uint64x*_t + +arm = vcgt.s +generate uint*_t + +/// Floating-point compare less than +name = vclt +fn = simd_lt +a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 +b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9  +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = fcmgt +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vcgt.s +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +//////////////////// +// lesser then equals +//////////////////// + +/// Compare signed less than or equal +name = vcle +fn = simd_le +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmge +generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t + +arm = vcge.s +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Compare unsigned less than or equal +name = vcle +fn = simd_le +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmhs +generate uint64x*_t + +arm = vcge.s +generate uint*_t + +/// Floating-point compare less than or equal +name = vcle +fn = simd_le +a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 +b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9  +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE +aarch64 = fcmge +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +arm = vcge.s +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +//////////////////// +// greater then equals +//////////////////// + +/// Compare signed greater than or equal +name = vcge +fn = simd_ge +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmge +generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t + +arm = vcge.s +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Compare unsigned greater than or equal +name = vcge +fn = simd_ge +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmhs +generate uint64x*_t + +arm = vcge.s +generate uint*_t + +/// Floating-point compare greater than or equal +name = vcge +fn = simd_ge +a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9  +b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = fcmge +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vcge.s +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +/// Saturating subtract +name = vqsub +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26 + +arm = vqsub.s +aarch64 = uqsub +link-arm = vqsubu._EXT_ +link-aarch64 = uqsub._EXT_ +generate uint*_t + +arm = vqsub.s +aarch64 = sqsub +link-arm = vqsubs._EXT_ +link-aarch64 = sqsub._EXT_ +generate int*_t + +/// Halving add +name = vhadd +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29 + + +arm = vhadd.s +aarch64 = uhadd +link-aarch64 = uhadd._EXT_ +link-arm = vhaddu._EXT_ +generate uint*_t + + +arm = vhadd.s +aarch64 = shadd +link-aarch64 = shadd._EXT_ +link-arm = vhadds._EXT_ +generate int*_t + +/// Rounding halving add +name = vrhadd +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29 + +arm = vrhadd.s +aarch64 = urhadd +link-arm = vrhaddu._EXT_ +link-aarch64 = urhadd._EXT_ +generate uint*_t + +arm = vrhadd.s +aarch64 = srhadd +link-arm = vrhadds._EXT_ +link-aarch64 = srhadd._EXT_ +generate int*_t + +/// Saturating add +name = vqadd +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 + +arm = vqadd.s +aarch64 = uqadd +link-arm = vqaddu._EXT_ +link-aarch64 = uqadd._EXT_ +generate uint*_t + +arm = vqadd.s +aarch64 = sqadd +link-arm = vqadds._EXT_ +link-aarch64 = sqadd._EXT_ +generate int*_t + +// requires 1st and second argument to be different, this not implemented yet +// /// Signed saturating accumulate of unsigned value +//  +// name = vuqadd +// a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +// b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +// e = 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 + +// it seems like we don't have those in rustland :(  +// aarch64 = suqadd  +// link-aarch64 = usqadd._EXT_ +// generate int64x*_t + +/ arm = suqadd +// link-arm = vuqadds._EXT_ +// link-aarch64 = suqadd._EXT_ +// generate int*_t + + +/// Multiply +name = vmul +a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32 +arm = vmul. +aarch64 = mul +fn = simd_mul +generate int*_t, uint*_t + +/// Multiply +name = vmul +fn = simd_mul +a = 1.0, 2.0, 1.0, 2.0 +b = 2.0, 3.0, 4.0, 5.0 +validate 2.0, 6.0, 4.0, 10.0 + +aarch64 = fmul +generate float64x*_t + +arm = vmul. +generate float*_t + + +/// Subtract +name = vsub +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 +validate 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 +arm = vsub. +aarch64 = sub +fn = simd_sub +generate int*_t, uint*_t, int64x*_t, uint64x*_t + +/// Subtract +name = vsub +fn = simd_sub +a = 1.0, 4.0, 3.0, 8.0 +b = 1.0, 2.0, 3.0, 4.0 +validate 0.0, 2.0, 0.0, 4.0 + +aarch64 = fsub +generate float64x*_t + +arm = vsub. +generate float*_t + + +/// Signed halving subtract +name = vhsub +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 +validate 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 + +arm = vhsub.s +aarch64 = uhsub +link-arm = vhsubu._EXT_ +link-aarch64 = uhsub._EXT_ +generate uint*_t + +arm = vhsub.s +aarch64 = shsub +link-arm = vhsubs._EXT_ +link-aarch64 = shsub._EXT_ +generate int*_t  | 
