; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s ; The LIT tests rely on i32, i16 and i8 being valid machine types. ; The bounds checking tests require also i64 and i128. target datalayout = "n8:16:32:64:128" ; This LIT test checks if TruncInstCombine pass correctly recognizes the ; constraints from a signed min-max clamp. The clamp is a sequence of smin and ; smax instructions limiting a variable into a range, smin <= x <= smax. ; ; Each LIT test (except the last ones) has two versions depending on the order ; of smin and smax: ; a) y = smax(smin(x, upper_limit), lower_limit) ; b) y = smin(smax(x, lower_limit), upper_limit) ; ; The clamp is used in TruncInstCombine.cpp pass (as part of aggressive-instcombine) ; to optimize extensions and truncations of lshr. This is what is tested here. ; The pass also optimizes extensions and truncations of other binary operators, ; but in such cases the smin-smax clamp may not be used. define i8 @test_0a(i16 %x) { ; CHECK-LABEL: define i8 @test_0a( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0) ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8 ; CHECK-NEXT: [[B:%.*]] = lshr i8 [[A]], 2 ; CHECK-NEXT: ret i8 [[B]] ; %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 31) %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0) %a = sext i16 %2 to i32 %b = lshr i32 %a, 2 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } define i8 @test_0b(i16 %x) { ; CHECK-LABEL: define i8 @test_0b( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 0) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 31) ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8 ; CHECK-NEXT: [[B:%.*]] = lshr i8 [[A]], 2 ; CHECK-NEXT: ret i8 [[B]] ; %1 = tail call i16 @llvm.smax.i16(i16 %x, i16 0) %2 = tail call i16 @llvm.smin.i16(i16 %1, i16 31) %a = sext i16 %2 to i32 %b = lshr i32 %a, 2 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } ; The following two tests contain add instead of lshr. ; The optimization works here as well. define i8 @test_1a(i16 %x) { ; CHECK-LABEL: define i8 @test_1a( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0) ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8 ; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2 ; CHECK-NEXT: ret i8 [[B]] ; %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 31) %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0) %a = sext i16 %2 to i32 %b = add i32 %a, 2 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } define i8 @test_1b(i16 %x) { ; CHECK-LABEL: define i8 @test_1b( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 0) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 31) ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8 ; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2 ; CHECK-NEXT: ret i8 [[B]] ; %1 = tail call i16 @llvm.smax.i16(i16 %x, i16 0) %2 = tail call i16 @llvm.smin.i16(i16 %1, i16 31) %a = sext i16 %2 to i32 %b = add i32 %a, 2 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } ; Tests for clamping with negative min and max. ; With sext no optimization occurs. define i8 @test_2a(i16 %x) { ; CHECK-LABEL: define i8 @test_2a( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 -1) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31) ; CHECK-NEXT: [[A:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[B:%.*]] = lshr i32 [[A]], 2 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i32 [[B]] to i8 ; CHECK-NEXT: ret i8 [[B_TRUNC]] ; %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 -1) %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 -31) %a = sext i16 %2 to i32 %b = lshr i32 %a, 2 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } define i8 @test_2b(i16 %x) { ; CHECK-LABEL: define i8 @test_2b( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 -1) ; CHECK-NEXT: [[A:%.*]] = sext i16 [[TMP2]] to i32 ; CHECK-NEXT: [[B:%.*]] = lshr i32 [[A]], 2 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i32 [[B]] to i8 ; CHECK-NEXT: ret i8 [[B_TRUNC]] ; %1 = tail call i16 @llvm.smax.i16(i16 %x, i16 -31) %2 = tail call i16 @llvm.smin.i16(i16 %1, i16 -1) %a = sext i16 %2 to i32 %b = lshr i32 %a, 2 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } ; With zext the optimization occurs. define i8 @test_2c(i16 %x) { ; CHECK-LABEL: define i8 @test_2c( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 -1) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31) ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8 ; CHECK-NEXT: ret i8 [[B_TRUNC]] ; %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 -1) %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 -31) %a = zext i16 %2 to i32 %b = lshr i32 %a, 2 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } define i8 @test_2d(i16 %x) { ; CHECK-LABEL: define i8 @test_2d( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 -1) ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8 ; CHECK-NEXT: ret i8 [[B_TRUNC]] ; %1 = tail call i16 @llvm.smax.i16(i16 %x, i16 -31) %2 = tail call i16 @llvm.smin.i16(i16 %1, i16 -1) %a = zext i16 %2 to i32 %b = lshr i32 %a, 2 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } ; Tests for clamping with mixed-signed min and max. ; With zext the optimization occurs. define i8 @test_3a(i16 %x) { ; CHECK-LABEL: define i8 @test_3a( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31) ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8 ; CHECK-NEXT: ret i8 [[B_TRUNC]] ; %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 31) %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 -31) %a = zext i16 %2 to i32 %b = lshr i32 %a, 2 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } define i8 @test_3b(i16 %x) { ; CHECK-LABEL: define i8 @test_3b( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 31) ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8 ; CHECK-NEXT: ret i8 [[B_TRUNC]] ; %1 = tail call i16 @llvm.smax.i16(i16 %x, i16 -31) %2 = tail call i16 @llvm.smin.i16(i16 %1, i16 31) %a = zext i16 %2 to i32 %b = lshr i32 %a, 2 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } ; Optimizations with vector types. define <16 x i8> @test_vec_1a(<16 x i16> %x) { ; CHECK-LABEL: define <16 x i8> @test_vec_1a( ; CHECK-SAME: <16 x i16> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> [[X]], <16 x i16> splat (i16 127)) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> [[TMP1]], <16 x i16> zeroinitializer) ; CHECK-NEXT: [[A:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8> ; CHECK-NEXT: [[B:%.*]] = lshr <16 x i8> [[A]], splat (i8 2) ; CHECK-NEXT: ret <16 x i8> [[B]] ; %1 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %x, <16 x i16> splat (i16 127)) %2 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %1, <16 x i16> zeroinitializer) %a = sext <16 x i16> %2 to <16 x i32> %b = lshr <16 x i32> %a, splat (i32 2) %b.trunc = trunc <16 x i32> %b to <16 x i8> ret <16 x i8> %b.trunc } define <16 x i8> @test_vec_1b(<16 x i16> %x) { ; CHECK-LABEL: define <16 x i8> @test_vec_1b( ; CHECK-SAME: <16 x i16> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> [[X]], <16 x i16> zeroinitializer) ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> [[TMP1]], <16 x i16> splat (i16 127)) ; CHECK-NEXT: [[A:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8> ; CHECK-NEXT: [[B:%.*]] = lshr <16 x i8> [[A]], splat (i8 2) ; CHECK-NEXT: ret <16 x i8> [[B]] ; %1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %x, <16 x i16> zeroinitializer) %2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127)) %a = sext <16 x i16> %2 to <16 x i32> %b = lshr <16 x i32> %a, splat (i32 2) %b.trunc = trunc <16 x i32> %b to <16 x i8> ret <16 x i8> %b.trunc } ; A longer test that was the original motivation for the smin-smax clamping. define i8 @test_final(i16 %x, i16 %y) { ; CHECK-LABEL: define i8 @test_final( ; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 127) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0) ; CHECK-NEXT: [[TMP3:%.*]] = tail call i16 @llvm.smax.i16(i16 [[Y]], i16 0) ; CHECK-NEXT: [[TMP4:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP3]], i16 127) ; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[SHR:%.*]] = lshr i16 [[MUL]], 7 ; CHECK-NEXT: [[TRUNC:%.*]] = trunc i16 [[SHR]] to i8 ; CHECK-NEXT: ret i8 [[TRUNC]] ; %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 127) %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0) %x.clamp = zext nneg i16 %2 to i32 %3 = tail call i16 @llvm.smax.i16(i16 %y, i16 0) %4 = tail call i16 @llvm.smin.i16(i16 %3, i16 127) %y.clamp = zext nneg i16 %4 to i32 %mul = mul nuw nsw i32 %x.clamp, %y.clamp %shr = lshr i32 %mul, 7 %trunc= trunc nuw nsw i32 %shr to i8 ret i8 %trunc } ; Range tests below check if the bounds are dealt with correctly. ; This gets optimized. define i8 @test_bounds_1(i16 %x) { ; CHECK-LABEL: define i8 @test_bounds_1( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 127) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0) ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8 ; CHECK-NEXT: [[B:%.*]] = lshr i8 [[A]], 7 ; CHECK-NEXT: ret i8 [[B]] ; %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 127) %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0) %a = sext i16 %2 to i32 %b = lshr i32 %a, 7 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } ; While this does not. define i8 @test_bounds_2(i16 %x) { ; CHECK-LABEL: define i8 @test_bounds_2( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 128) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0) ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8 ; CHECK-NEXT: [[B:%.*]] = lshr i8 [[A]], 7 ; CHECK-NEXT: ret i8 [[B]] ; %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 128) %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 0) %a = sext i16 %2 to i32 %b = lshr i32 %a, 7 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } ; This should get optimized. We test here if the optimization works correctly ; if the upper limit is signed max int. define i8 @test_bounds_3(i16 %x) { ; CHECK-LABEL: define i8 @test_bounds_3( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 32767) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 32752) ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8 ; CHECK-NEXT: ret i8 [[B_TRUNC]] ; %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 32767) %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 32752) %a = sext i16 %2 to i32 %b = lshr i32 %a, 2 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } ; Here min = 128 is greater than max = 0. define i8 @test_bounds_4(i16 %x) { ; CHECK-LABEL: define i8 @test_bounds_4( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 0) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 128) ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8 ; CHECK-NEXT: ret i8 [[B_TRUNC]] ; %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 0) %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 128) %a = sext i16 %2 to i32 %b = lshr i32 %a, 2 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } ; The following 3 tests check the situation where min and max are minimal and ; maximal signed values. No transformations should occur here. define i8 @test_bounds_5(i16 %x) { ; CHECK-LABEL: define i8 @test_bounds_5( ; CHECK-SAME: i16 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 32767) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -32768) ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8 ; CHECK-NEXT: ret i8 [[B_TRUNC]] ; %1 = tail call i16 @llvm.smin.i16(i16 %x, i16 32767) %2 = tail call i16 @llvm.smax.i16(i16 %1, i16 -32768) %a = zext i16 %2 to i32 %b = lshr i32 %a, 2 %b.trunc = trunc i32 %b to i8 ret i8 %b.trunc } define i8 @test_bounds_6(i32 %x) { ; CHECK-LABEL: define i8 @test_bounds_6( ; CHECK-SAME: i32 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.smin.i32(i32 [[X]], i32 2147483647) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP1]], i32 -2147483648) ; CHECK-NEXT: [[B:%.*]] = lshr i32 [[TMP2]], 2 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i32 [[B]] to i8 ; CHECK-NEXT: ret i8 [[B_TRUNC]] ; %1 = tail call i32 @llvm.smin.i32(i32 %x, i32 2147483647) %2 = tail call i32 @llvm.smax.i32(i32 %1, i32 -2147483648) %a = zext i32 %2 to i64 %b = lshr i64 %a, 2 %b.trunc = trunc i64 %b to i8 ret i8 %b.trunc } define i8 @test_bounds_7(i64 %x) { ; CHECK-LABEL: define i8 @test_bounds_7( ; CHECK-SAME: i64 [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.smin.i64(i64 [[X]], i64 9223372036854775807) ; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 -9223372036854775808) ; CHECK-NEXT: [[B:%.*]] = lshr i64 [[TMP2]], 2 ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i64 [[B]] to i8 ; CHECK-NEXT: ret i8 [[B_TRUNC]] ; %1 = tail call i64 @llvm.smin.i64(i64 %x, i64 9223372036854775807) %2 = tail call i64 @llvm.smax.i64(i64 %1, i64 -9223372036854775808) %a = zext i64 %2 to i128 %b = lshr i128 %a, 2 %b.trunc = trunc i128 %b to i8 ret i8 %b.trunc }