diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2020-02-06 20:33:47 -0500 |
---|---|---|
committer | Matt Arsenault <arsenm2@gmail.com> | 2020-02-06 21:43:54 -0500 |
commit | 6a570dc548078af92a3cc0dda0d2ad1f371b0280 (patch) | |
tree | 438f2b93ea8b32f34b9fac0eed0340149a3f6ee6 | |
parent | 02fffbb5fa9f34a16f2c7959364d0668364332a2 (diff) | |
download | llvm-6a570dc548078af92a3cc0dda0d2ad1f371b0280.zip llvm-6a570dc548078af92a3cc0dda0d2ad1f371b0280.tar.gz llvm-6a570dc548078af92a3cc0dda0d2ad1f371b0280.tar.bz2 |
AMDGPU/GlobalISel: Fix non-pow-2 add/sub/mul for 16-bit insts
These wouldn't legalize between 16-bits and 32-bits on targets with
16-bit instructions.
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir | 55 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir | 55 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir | 133 |
4 files changed, 245 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 149e871..78eaab4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -284,7 +284,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) .legalFor({S32, S16}) .clampScalar(0, S16, S32) - .scalarize(0); + .scalarize(0) + .widenScalarToNextPow2(0, 32); } else { getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) .legalFor({S32}) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir index d6913a9..e43c541 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir @@ -266,3 +266,58 @@ body: | %5:_(s32) = G_ZEXT %4 $vgpr0 = COPY %5 ... + +--- +name: test_add_s24 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: test_add_s24 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8-LABEL: name: test_add_s24 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; GFX8: $vgpr0 = COPY [[COPY4]](s32) + ; GFX9-LABEL: name: test_add_s24 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; GFX9: $vgpr0 = COPY [[COPY4]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s24) = G_TRUNC %0 + %3:_(s24) = G_TRUNC %1 + %4:_(s24) = G_ADD %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +# FIXME +# --- +# name: test_add_s33 +# body: | +# bb.0: +# liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + +# %0:_(s64) = COPY $vgpr0_vgpr1 +# %1:_(s64) = COPY $vgpr2_vgpr3 +# %2:_(s33) = G_TRUNC %0 +# %3:_(s33) = G_TRUNC %1 +# %4:_(s33) = G_ADD %2, %3 +# %5:_(s64) = G_ANYEXT %4 +# $vgpr0_vgpr1 = COPY %5 +# ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir index 9987f81..bcb9997 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -320,3 +320,58 @@ body: | %2:_(<2 x s16>) = G_MUL %0, %1 $vgpr0 = COPY %2 ... + +--- +name: test_mul_s24 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: test_mul_s24 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY3]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[MUL]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8-LABEL: name: test_mul_s24 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY3]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[MUL]](s32) + ; GFX8: $vgpr0 = COPY [[COPY4]](s32) + ; GFX9-LABEL: name: test_mul_s24 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[MUL]](s32) + ; GFX9: $vgpr0 = COPY [[COPY4]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s24) = G_TRUNC %0 + %3:_(s24) = G_TRUNC %1 + %4:_(s24) = G_MUL %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +# FIXME: +# --- +# name: test_mul_s33 +# body: | +# bb.0: +# liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + +# %0:_(s64) = COPY $vgpr0_vgpr1 +# %1:_(s64) = COPY $vgpr2_vgpr3 +# %2:_(s33) = G_TRUNC %0 +# %3:_(s33) = G_TRUNC %1 +# %4:_(s33) = G_MUL %2, %3 +# %5:_(s64) = G_ANYEXT %4 +# $vgpr0_vgpr1 = COPY %5 +# ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir index 9563ace..8da7c0f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir @@ -182,3 +182,136 @@ body: | %2:_(<2 x s16>) = G_SUB %0, %1 $vgpr0 = COPY %2 ... + +--- +name: test_sub_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX6-LABEL: name: test_sub_s64 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX8-LABEL: name: test_sub_s64 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX9-LABEL: name: test_sub_s64 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_SUB %0, %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_sub_s7 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: test_sub_s7 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY3]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8-LABEL: name: test_sub_s7 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC1]] + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: test_sub_s7 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC1]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s7) = G_TRUNC %0 + %3:_(s7) = G_TRUNC %1 + %4:_(s7) = G_SUB %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: test_sub_s24 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: test_sub_s24 + ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY3]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8-LABEL: name: test_sub_s24 + ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY3]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; GFX8: $vgpr0 = COPY [[COPY4]](s32) + ; GFX9-LABEL: name: test_sub_s24 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY2]], [[COPY3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; GFX9: $vgpr0 = COPY [[COPY4]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s24) = G_TRUNC %0 + %3:_(s24) = G_TRUNC %1 + %4:_(s24) = G_SUB %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +# FIXME +# --- +# name: test_sub_s33 +# body: | +# bb.0: +# liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + +# %0:_(s64) = COPY $vgpr0_vgpr1 +# %1:_(s64) = COPY $vgpr2_vgpr3 +# %2:_(s33) = G_TRUNC %0 +# %3:_(s33) = G_TRUNC %1 +# %4:_(s33) = G_SUB %2, %3 +# %5:_(s64) = G_ANYEXT %4 +# $vgpr0_vgpr1 = COPY %5 +# ... |