aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2024-03-18 17:04:31 +0000
committerGitHub <noreply@github.com>2024-03-18 10:04:31 -0700
commit9a784303a3666e9d9b93088a1519e1dc2ba4d015 (patch)
treed42fae9e7ab2948dafabc07674c7f2d8d625e5c3
parent705788c84623b4f1dab72a108e039a0de2d53cf6 (diff)
downloadllvm-9a784303a3666e9d9b93088a1519e1dc2ba4d015.zip
llvm-9a784303a3666e9d9b93088a1519e1dc2ba4d015.tar.gz
llvm-9a784303a3666e9d9b93088a1519e1dc2ba4d015.tar.bz2
[AArch64][GlobalISel] Legalize small G_TRUNC (#85625)
This is an alternative to #85610, that moreElement's small G_TRUNC vectors to widen the vectors. It needs to disable one of the existing Unmerge(Trunc(..)) combines, and some of the code is not as optimal as it could be. I believe with some extra optimizations it could look better (I was thinking combining trunc(buildvector) -> buildvector and possibly improving buildvector lowering by generating insert_vector_element earlier).
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h6
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp4
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir8
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir14
-rw-r--r--llvm/test/CodeGen/AArch64/bitcast.ll74
-rw-r--r--llvm/test/CodeGen/AArch64/itofp.ll16
6 files changed, 80 insertions, 42 deletions
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index da330b5..ca62f38 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -432,9 +432,13 @@ public:
DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1;
LLT UnmergeTy = CastSrcTy.changeElementCount(
ElementCount::getFixed(UnmergeNumElts));
+ LLT SrcWideTy =
+ SrcTy.changeElementCount(ElementCount::getFixed(UnmergeNumElts));
if (isInstUnsupported(
- {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}))
+ {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}) ||
+ LI.getAction({TargetOpcode::G_TRUNC, {SrcWideTy, UnmergeTy}})
+ .Action == LegalizeActions::MoreElements)
return false;
Builder.setInstr(MI);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 2ae2923..996abe8 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -628,7 +628,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
})
-
+ .clampMinNumElements(0, s8, 8)
+ .clampMinNumElements(0, s16, 4)
+ .clampMinNumElements(0, s32, 2)
.alwaysLegal();
getActionDefinitionsBuilder(G_SEXT_INREG)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
index aa152ae..b8328ed 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
@@ -607,9 +607,11 @@ body: |
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s16)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: $s0 = COPY [[TRUNC]](<2 x s16>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[TRUNC]](<4 x s16>)
+ ; CHECK-NEXT: $s0 = COPY [[UV]](<2 x s16>)
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1(<2 x s16>) = G_LOAD %0(p0) :: (load (<2 x s16>))
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir
index 6612651..ed40a2f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir
@@ -540,9 +540,17 @@ body: |
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[COPY]](<2 x s32>)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s8>), [[TRUNC]](<2 x s8>)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[CONCAT_VECTORS]](<4 x s8>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s8>), [[UV3:%[0-9]+]]:_(<2 x s8>), [[UV4:%[0-9]+]]:_(<2 x s8>), [[UV5:%[0-9]+]]:_(<2 x s8>) = G_UNMERGE_VALUES [[TRUNC2]](<8 x s8>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s8>) = G_CONCAT_VECTORS [[UV2]](<2 x s8>), [[UV2]](<2 x s8>)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[CONCAT_VECTORS1]](<4 x s8>)
; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<4 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(<2 x s32>) = COPY $d0
diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll
index 9ebd570..b7c02d6 100644
--- a/llvm/test/CodeGen/AArch64/bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast.ll
@@ -4,12 +4,10 @@
; PR23065: SCALAR_TO_VECTOR implies the top elements 1 to N-1 of the N-element vector are undefined.
-; CHECK-GI: warning: Instruction selection used fallback path for bitcast_v4i8_i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v4i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v2i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_v4i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i8_v2i16
+; CHECK-GI: warning: Instruction selection used fallback path for bitcast_i32_v4i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v2i16
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_v4i8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i8_v2i16
define <4 x i16> @foo1(<2 x i32> %a) {
; CHECK-SD-LABEL: foo1:
@@ -54,15 +52,28 @@ define <4 x i16> @foo2(<2 x i32> %a) {
; ===== To and From Scalar Types =====
define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){
-; CHECK-LABEL: bitcast_v4i8_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_v4i8_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #16
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: add sp, sp, #16
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v4i8_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NEXT: mov h3, v0.h[3]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
%c = add <4 x i8> %a, %b
%d = bitcast <4 x i8> %c to i32
ret i32 %d
@@ -81,18 +92,27 @@ define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){
}
define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){
-; CHECK-LABEL: bitcast_v2i16_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: fmov w9, s0
-; CHECK-NEXT: strh w9, [sp, #12]
-; CHECK-NEXT: strh w8, [sp, #14]
-; CHECK-NEXT: ldr w0, [sp, #12]
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_v2i16_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #16
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: mov w8, v0.s[1]
+; CHECK-SD-NEXT: fmov w9, s0
+; CHECK-SD-NEXT: strh w9, [sp, #12]
+; CHECK-SD-NEXT: strh w8, [sp, #14]
+; CHECK-SD-NEXT: ldr w0, [sp, #12]
+; CHECK-SD-NEXT: add sp, sp, #16
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v2i16_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
%c = add <2 x i16> %a, %b
%d = bitcast <2 x i16> %c to i32
ret i32 %d
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index 2164c2a..ceac37f 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -5521,7 +5521,8 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-FP16-NEXT: mov s1, v0.s[1]
-; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-FP16-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-FP16-NEXT: shl v0.4h, v0.4h, #8
; CHECK-GI-FP16-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
@@ -5580,12 +5581,13 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
;
; CHECK-GI-FP16-LABEL: utofp_v2i8_v2f16:
; CHECK-GI-FP16: // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-GI-FP16-NEXT: mov s1, v0.s[1]
-; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
-; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h
-; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT: mov w8, v0.s[1]
+; CHECK-GI-FP16-NEXT: fmov w9, s0
+; CHECK-GI-FP16-NEXT: and w9, w9, #0xff
+; CHECK-GI-FP16-NEXT: and w8, w8, #0xff
+; CHECK-GI-FP16-NEXT: ucvtf h0, w9
+; CHECK-GI-FP16-NEXT: ucvtf h1, w8
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-FP16-NEXT: ret