aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCombine.td')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombine.td17
1 files changed, 15 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index e8b211f..7f00ead 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -176,6 +176,19 @@ def binop_s64_with_s32_mask_combines : GICombineGroup<[
combine_or_s64_with_s32_mask, combine_and_s64_with_s32_mask
]>;
+// (or i64:x, (zext i32:y)) -> i64:(merge (or lo_32(x), i32:y), hi_32(x))
+// (or (zext i32:y), i64:x) -> i64:(merge (or lo_32(x), i32:y), hi_32(x))
+def or_s64_zext_s32_frag : GICombinePatFrag<(outs root:$dst), (ins $src_s64, $src_s32),
+ [(pattern (G_OR $dst, i64:$src_s64, i64:$zext_val), (G_ZEXT i64:$zext_val, i32:$src_s32)),
+ (pattern (G_OR $dst, i64:$zext_val, i64:$src_s64), (G_ZEXT i64:$zext_val, i32:$src_s32))]>;
+
+def combine_or_s64_s32 : GICombineRule<
+ (defs root:$dst),
+ (match (or_s64_zext_s32_frag $dst, i64:$x, i32:$y):$dst),
+ (apply (G_UNMERGE_VALUES $x_lo, $x_hi, $x),
+ (G_OR $or, $x_lo, $y),
+ (G_MERGE_VALUES $dst, $or, $x_hi))>;
+
let Predicates = [Has16BitInsts, NotHasMed3_16] in {
// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
// saves one instruction compared to the promotion.
@@ -206,7 +219,7 @@ def AMDGPUPreLegalizerCombiner: GICombiner<
"AMDGPUPreLegalizerCombinerImpl",
[all_combines, combine_fmul_with_select_to_fldexp, clamp_i64_to_i16,
foldable_fneg, combine_shuffle_vector_to_build_vector,
- binop_s64_with_s32_mask_combines]> {
+ binop_s64_with_s32_mask_combines, combine_or_s64_s32]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
@@ -215,7 +228,7 @@ def AMDGPUPostLegalizerCombiner: GICombiner<
[all_combines, gfx6gfx7_combines, gfx8_combines, combine_fmul_with_select_to_fldexp,
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64,
- binop_s64_with_s32_mask_combines]> {
+ binop_s64_with_s32_mask_combines, combine_or_s64_s32]> {
let CombineAllMethodName = "tryCombineAllImpl";
}