diff options
| author | David Green <david.green@arm.com> | 2021-09-19 14:25:21 +0100 |
|---|---|---|
| committer | David Green <david.green@arm.com> | 2021-09-19 14:25:21 +0100 |
| commit | 1da52ef2943b67c0ec1ccd3b8e459d0e57e67a6d (patch) | |
| tree | 8359af37ca9c9a84825b9c4089ac7546942a4eec | |
| parent | 9de88fc0eac1bfc719dfd63a32b7eb069489407e (diff) | |
| download | llvm-1da52ef2943b67c0ec1ccd3b8e459d0e57e67a6d.zip llvm-1da52ef2943b67c0ec1ccd3b8e459d0e57e67a6d.tar.gz llvm-1da52ef2943b67c0ec1ccd3b8e459d0e57e67a6d.tar.bz2 | |
[ARM] Add VGETLANEu patterns for v4f16 and v8f16
These were apparently missing, having no pattern that could convert a
VGETLANEu of a v4f16 to an i32. Added bf16 whilst here, following the
same code.
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrNEON.td | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll | 52 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/fp16-insert-extract.ll | 73 |
3 files changed, 137 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 94e5641..aaf3280 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -6446,6 +6446,18 @@ def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane), (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane))>; +def : Pat<(ARMvgetlaneu (v8f16 QPR:$src), imm:$lane), + (VGETLNu16 (v4f16 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane))>; +def : Pat<(ARMvgetlaneu (v4f16 DPR:$src), imm:$lane), + (VGETLNu16 (v4f16 DPR:$src), imm:$lane)>; +def : Pat<(ARMvgetlaneu (v8bf16 QPR:$src), imm:$lane), + (VGETLNu16 (v4bf16 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane))>; +def : Pat<(ARMvgetlaneu (v4bf16 DPR:$src), imm:$lane), + (VGETLNu16 (v4bf16 DPR:$src), imm:$lane)>; } def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, diff --git a/llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll b/llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll index 55d01de..3416e50 100644 --- a/llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll +++ b/llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll @@ -43,3 +43,55 @@ entry: %0 = extractelement <4 x bfloat> %v, i32 1 ret bfloat %0 } + +define i16 @bextract_v4i16(<4 x bfloat> %a) { +; CHECK-LABEL: bextract_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %elt = extractelement <4 x bfloat> %a, i32 0 + %t = bitcast bfloat %elt to i16 + ret i16 %t +} + +define i16 @bextract_v8i16(<8 x bfloat> %a) { +; CHECK-LABEL: bextract_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %elt = extractelement <8 x bfloat> %a, i32 0 + %t = bitcast bfloat %elt to i16 + ret i16 %t +} + +define i32 @bextract_v4s32(<4 x bfloat> %a) { +; CHECK-LABEL: bextract_v4s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr +entry: + %elt = extractelement <4 x bfloat> %a, i32 0 + %t = bitcast bfloat %elt to i16 + %s = sext i16 %t to i32 + ret i32 %s +} + +define i32 @bextract_v8s32(<8 x bfloat> %a) { +; CHECK-LABEL: bextract_v8s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr +entry: + %elt = extractelement <8 x bfloat> %a, i32 0 + %t = bitcast bfloat %elt to i16 + %s = sext i16 %t to i32 + ret i32 %s +} diff --git a/llvm/test/CodeGen/ARM/fp16-insert-extract.ll b/llvm/test/CodeGen/ARM/fp16-insert-extract.ll index c53090e..d95d908 100644 --- a/llvm/test/CodeGen/ARM/fp16-insert-extract.ll +++ b/llvm/test/CodeGen/ARM/fp16-insert-extract.ll @@ -295,3 +295,76 @@ entry: ret <8 x half> %r } +define i16 @extract_v4i16(<4 x half> %a) { +; CHECKHARD-LABEL: extract_v4i16: +; CHECKHARD: @ %bb.0: @ %entry +; CHECKHARD-NEXT: vmov.u16 r0, d0[0] +; CHECKHARD-NEXT: bx lr +; +; CHECKSOFT-LABEL: extract_v4i16: +; CHECKSOFT: @ %bb.0: @ %entry +; CHECKSOFT-NEXT: vmov d16, r0, r1 +; CHECKSOFT-NEXT: vmov.u16 r0, d16[0] +; CHECKSOFT-NEXT: bx lr +entry: + %elt = extractelement <4 x half> %a, i32 0 + %t = bitcast half %elt to i16 + ret i16 %t +} + +define i16 @extract_v8i16(<8 x half> %a) { +; CHECKHARD-LABEL: extract_v8i16: +; CHECKHARD: @ %bb.0: @ %entry +; CHECKHARD-NEXT: vmov.u16 r0, d0[0] +; CHECKHARD-NEXT: bx lr +; +; CHECKSOFT-LABEL: extract_v8i16: +; CHECKSOFT: @ %bb.0: @ %entry +; CHECKSOFT-NEXT: vmov d16, r0, r1 +; CHECKSOFT-NEXT: vmov.u16 r0, d16[0] +; CHECKSOFT-NEXT: bx lr +entry: + %elt = extractelement <8 x half> %a, i32 0 + %t = bitcast half %elt to i16 + ret i16 %t +} + +define i32 @extract_v4s32(<4 x half> %a) { +; CHECKHARD-LABEL: extract_v4s32: +; CHECKHARD: @ %bb.0: @ %entry +; CHECKHARD-NEXT: vmov.u16 r0, d0[0] +; CHECKHARD-NEXT: sxth r0, r0 +; CHECKHARD-NEXT: bx lr +; +; CHECKSOFT-LABEL: extract_v4s32: +; CHECKSOFT: @ %bb.0: @ %entry +; CHECKSOFT-NEXT: vmov d16, r0, r1 +; CHECKSOFT-NEXT: vmov.u16 r0, d16[0] +; CHECKSOFT-NEXT: sxth r0, r0 +; CHECKSOFT-NEXT: bx lr +entry: + %elt = extractelement <4 x half> %a, i32 0 + %t = bitcast half %elt to i16 + %s = sext i16 %t to i32 + ret i32 %s +} + +define i32 @extract_v8s32(<8 x half> %a) { +; CHECKHARD-LABEL: extract_v8s32: +; CHECKHARD: @ %bb.0: @ %entry +; CHECKHARD-NEXT: vmov.u16 r0, d0[0] +; CHECKHARD-NEXT: sxth r0, r0 +; CHECKHARD-NEXT: bx lr +; +; CHECKSOFT-LABEL: extract_v8s32: +; CHECKSOFT: @ %bb.0: @ %entry +; CHECKSOFT-NEXT: vmov d16, r0, r1 +; CHECKSOFT-NEXT: vmov.u16 r0, d16[0] +; CHECKSOFT-NEXT: sxth r0, r0 +; CHECKSOFT-NEXT: bx lr +entry: + %elt = extractelement <8 x half> %a, i32 0 + %t = bitcast half %elt to i16 + %s = sext i16 %t to i32 + ret i32 %s +} |
