diff options
author | Drew Kersnar <dkersnar@nvidia.com> | 2025-08-08 12:05:29 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-08-08 12:05:29 -0500 |
commit | 90e8c8e7186616241549e7bc62d95b51467a674b (patch) | |
tree | 6cf0c144d9b9141f75125db932cbfd92b85354ab /llvm/test | |
parent | 51bc0c1d6bb9ab2c1c2acb3f37d00ed919202973 (diff) | |
download | llvm-90e8c8e7186616241549e7bc62d95b51467a674b.zip llvm-90e8c8e7186616241549e7bc62d95b51467a674b.tar.gz llvm-90e8c8e7186616241549e7bc62d95b51467a674b.tar.bz2 |
[InferAlignment] Propagate alignment between loads/stores of the same base pointer (#145733)
We can derive and upgrade alignment for loads/stores using other
well-aligned loads/stores. This optimization does a single forward pass through
each basic block and uses loads/stores (the alignment and the offset) to
derive the best possible alignment for a base pointer, caching the
result. If it encounters another load/store based on that pointer, it
tries to upgrade the alignment. The optimization must be a forward pass within a basic
block because control flow and exception throwing can impact alignment guarantees.
---------
Co-authored-by: Nikita Popov <github@npopov.com>
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/Transforms/InferAlignment/propagate-from-other-load-stores.ll | 194 | ||||
-rw-r--r-- | llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll | 2 |
2 files changed, 195 insertions, 1 deletions
diff --git a/llvm/test/Transforms/InferAlignment/propagate-from-other-load-stores.ll b/llvm/test/Transforms/InferAlignment/propagate-from-other-load-stores.ll new file mode 100644 index 0000000..3fc7c59 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/propagate-from-other-load-stores.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=infer-alignment -S | FileCheck %s +%struct.S1 = type { %struct.float3, %struct.float3, i32, i32 } +%struct.float3 = type { float, float, float } + + +; ------------------------------------------------------------------------------ +; Test that we can propagate the align 16 to the load and store that are set to align 4 +; ------------------------------------------------------------------------------ + +define void @prop_align(ptr %v, ptr %vout) { +; CHECK-LABEL: define void @prop_align( +; CHECK-SAME: ptr [[V:%.*]], ptr [[VOUT:%.*]]) { +; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4 +; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4 +; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8 +; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8 +; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4 +; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 16 +; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4 +; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24 +; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8 +; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28 +; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4 +; CHECK-NEXT: store float [[DOTUNPACK_UNPACK]], ptr [[VOUT]], align 16 +; CHECK-NEXT: [[VOUT_REPACK23:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 4 +; CHECK-NEXT: store float [[DOTUNPACK_UNPACK8]], ptr [[VOUT_REPACK23]], align 4 +; CHECK-NEXT: [[VOUT_REPACK25:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 8 +; CHECK-NEXT: store float [[DOTUNPACK_UNPACK10]], ptr [[VOUT_REPACK25]], align 8 +; CHECK-NEXT: [[VOUT_REPACK17:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 12 +; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK]], ptr [[VOUT_REPACK17]], align 4 +; CHECK-NEXT: [[VOUT_REPACK17_REPACK27:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 16 +; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK13]], ptr [[VOUT_REPACK17_REPACK27]], align 16 +; CHECK-NEXT: [[VOUT_REPACK17_REPACK29:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 20 +; CHECK-NEXT: store float [[DOTUNPACK2_UNPACK15]], ptr [[VOUT_REPACK17_REPACK29]], align 4 +; CHECK-NEXT: [[VOUT_REPACK19:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 24 +; CHECK-NEXT: store i32 [[DOTUNPACK4]], ptr [[VOUT_REPACK19]], align 8 +; CHECK-NEXT: [[VOUT_REPACK21:%.*]] = getelementptr inbounds nuw i8, ptr [[VOUT]], i64 28 +; CHECK-NEXT: store i32 [[DOTUNPACK6]], ptr [[VOUT_REPACK21]], align 4 +; CHECK-NEXT: ret void +; + %.unpack.unpack = load float, ptr %v, align 16 + %.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4 + %.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4 + %.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8 + %.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8 + %.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12 + %.unpack2.unpack = load float, ptr %.elt1, align 4 + %.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16 + %.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4 + %.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20 + %.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4 + %.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24 + %.unpack4 = load i32, ptr %.elt3, align 8 + %.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28 + %.unpack6 = load i32, ptr %.elt5, align 4 + store float %.unpack.unpack, ptr %vout, align 16 + %vout.repack23 = getelementptr inbounds nuw i8, ptr %vout, i64 4 + store float %.unpack.unpack8, ptr %vout.repack23, align 4 + %vout.repack25 = getelementptr inbounds nuw i8, ptr %vout, i64 8 + store float %.unpack.unpack10, ptr %vout.repack25, align 8 + %vout.repack17 = getelementptr inbounds nuw i8, ptr %vout, i64 12 + store float %.unpack2.unpack, ptr %vout.repack17, align 4 + %vout.repack17.repack27 = getelementptr inbounds nuw i8, ptr %vout, i64 16 + store float %.unpack2.unpack13, ptr %vout.repack17.repack27, align 4 + %vout.repack17.repack29 = getelementptr inbounds nuw i8, ptr %vout, i64 20 + store float %.unpack2.unpack15, ptr %vout.repack17.repack29, align 4 + %vout.repack19 = getelementptr inbounds nuw i8, ptr %vout, i64 24 + store i32 %.unpack4, ptr %vout.repack19, align 8 + %vout.repack21 = getelementptr inbounds nuw i8, ptr %vout, i64 28 + store i32 %.unpack6, ptr %vout.repack21, align 4 + ret void +} + +; ------------------------------------------------------------------------------ +; Test that alignment is not propagated from a source that does not dominate the destination +; ------------------------------------------------------------------------------ + +define void @no_prop_align(ptr %v, ptr %vout, i1 %cond) { +; CHECK-LABEL: define void @no_prop_align( +; CHECK-SAME: ptr [[V:%.*]], ptr [[VOUT:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: br i1 [[COND]], label %[[BRANCH1:.*]], label %[[BRANCH2:.*]] +; CHECK: [[BRANCH1]]: +; CHECK-NEXT: [[DOTUNPACK_UNPACK:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[DOTUNPACK_ELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4 +; CHECK-NEXT: [[DOTUNPACK_UNPACK8:%.*]] = load float, ptr [[DOTUNPACK_ELT7]], align 4 +; CHECK-NEXT: [[DOTUNPACK_ELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8 +; CHECK-NEXT: [[DOTUNPACK_UNPACK10:%.*]] = load float, ptr [[DOTUNPACK_ELT9]], align 8 +; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 12 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK:%.*]] = load float, ptr [[DOTELT1]], align 4 +; CHECK-NEXT: br label %[[END:.*]] +; CHECK: [[BRANCH2]]: +; CHECK-NEXT: [[DOTUNPACK2_ELT12:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 16 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK13:%.*]] = load float, ptr [[DOTUNPACK2_ELT12]], align 4 +; CHECK-NEXT: [[DOTUNPACK2_ELT14:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 20 +; CHECK-NEXT: [[DOTUNPACK2_UNPACK15:%.*]] = load float, ptr [[DOTUNPACK2_ELT14]], align 4 +; CHECK-NEXT: [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 24 +; CHECK-NEXT: [[DOTUNPACK4:%.*]] = load i32, ptr [[DOTELT3]], align 8 +; CHECK-NEXT: [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 28 +; CHECK-NEXT: [[DOTUNPACK6:%.*]] = load i32, ptr [[DOTELT5]], align 4 +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: +; CHECK-NEXT: ret void +; + br i1 %cond, label %branch1, label %branch2 + +branch1: + %.unpack.unpack = load float, ptr %v, align 16 + %.unpack.elt7 = getelementptr inbounds nuw i8, ptr %v, i64 4 + %.unpack.unpack8 = load float, ptr %.unpack.elt7, align 4 + %.unpack.elt9 = getelementptr inbounds nuw i8, ptr %v, i64 8 + %.unpack.unpack10 = load float, ptr %.unpack.elt9, align 8 + %.elt1 = getelementptr inbounds nuw i8, ptr %v, i64 12 + %.unpack2.unpack = load float, ptr %.elt1, align 4 + br label %end + +branch2: + %.unpack2.elt12 = getelementptr inbounds nuw i8, ptr %v, i64 16 + %.unpack2.unpack13 = load float, ptr %.unpack2.elt12, align 4 + %.unpack2.elt14 = getelementptr inbounds nuw i8, ptr %v, i64 20 + %.unpack2.unpack15 = load float, ptr %.unpack2.elt14, align 4 + %.elt3 = getelementptr inbounds nuw i8, ptr %v, i64 24 + %.unpack4 = load i32, ptr %.elt3, align 8 + %.elt5 = getelementptr inbounds nuw i8, ptr %v, i64 28 + %.unpack6 = load i32, ptr %.elt5, align 4 + br label %end + +end: + ret void +} + +; ------------------------------------------------------------------------------ +; Test that we can propagate to/from negative offset GEPs +; ------------------------------------------------------------------------------ + +define void @prop_align_negative_offset(ptr %v) { +; CHECK-LABEL: define void @prop_align_negative_offset( +; CHECK-SAME: ptr [[V:%.*]]) { +; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -16 +; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 16 +; CHECK-NEXT: ret void +; + %loadAligned= load float, ptr %v, align 16 + %gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -16 + %loadUnaligned = load float, ptr %gepNegative, align 4 + ret void +} + +define void @prop_align_negative_offset_2(ptr %v) { +; CHECK-LABEL: define void @prop_align_negative_offset_2( +; CHECK-SAME: ptr [[V:%.*]]) { +; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -16 +; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 16 +; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: ret void +; + %gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -16 + %loadAligned = load float, ptr %gepNegative, align 16 + %loadUnaligned= load float, ptr %v, align 4 + ret void +} + +define void @prop_align_negative_offset_3(ptr %v) { +; CHECK-LABEL: define void @prop_align_negative_offset_3( +; CHECK-SAME: ptr [[V:%.*]]) { +; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -8 +; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 8 +; CHECK-NEXT: ret void +; + %loadAligned= load float, ptr %v, align 16 + %gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -8 + %loadUnaligned = load float, ptr %gepNegative, align 4 + ret void +} + +define void @prop_align_negative_offset_4(ptr %v) { +; CHECK-LABEL: define void @prop_align_negative_offset_4( +; CHECK-SAME: ptr [[V:%.*]]) { +; CHECK-NEXT: [[LOADALIGNED:%.*]] = load float, ptr [[V]], align 16 +; CHECK-NEXT: [[GEPNEGATIVE:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 -20 +; CHECK-NEXT: [[LOADUNALIGNED:%.*]] = load float, ptr [[GEPNEGATIVE]], align 4 +; CHECK-NEXT: ret void +; + %loadAligned= load float, ptr %v, align 16 + %gepNegative = getelementptr inbounds nuw i8, ptr %v, i64 -20 + %loadUnaligned = load float, ptr %gepNegative, align 4 + ret void +} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll b/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll index 405a26d..c649f29e 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll @@ -13,7 +13,7 @@ define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) { ; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16> -; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 8, <1 x i1> [[TMP0]]) ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32> ; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64> |