aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/benchmarks/SpecialCaseListBM.cpp29
-rw-r--r--llvm/include/llvm/ADT/Bitfields.h9
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/SCCPSolver.cpp96
-rw-r--r--llvm/test/CodeGen/AMDGPU/abs_i32.ll92
-rw-r--r--llvm/test/Transforms/SCCP/constant-range-struct.ll65
6 files changed, 245 insertions, 49 deletions
diff --git a/llvm/benchmarks/SpecialCaseListBM.cpp b/llvm/benchmarks/SpecialCaseListBM.cpp
index 00aa3cd..b5d8268 100644
--- a/llvm/benchmarks/SpecialCaseListBM.cpp
+++ b/llvm/benchmarks/SpecialCaseListBM.cpp
@@ -110,6 +110,26 @@ std::string genGlobAtBothSides(const std::vector<std::string> &Files) {
return S;
}
+std::string genGlobAtBothSidesAndMid(const std::vector<std::string> &Files) {
+ std::string S;
+ std::minstd_rand Rng(RNG_SEED);
+ for (std::string F : Files) {
+ std::uniform_int_distribution<> PosDistrib(0, F.size() - 1);
+ F[PosDistrib(Rng)] = '*';
+
+ std::uniform_int_distribution<> Ends(0, 1);
+ if (Ends(Rng)) {
+ F.back() = '*';
+ F.front() = '*';
+ }
+
+ S += "src:";
+ S += F;
+ S += "\n";
+ }
+ return S;
+}
+
void BM_Make_(
benchmark::State &state,
std::string (*GenerateCaseList)(const std::vector<std::string> &Files)) {
@@ -171,6 +191,9 @@ BENCHMARK_CAPTURE(BM_Make_, Mid__, genGlobInMid)
BENCHMARK_CAPTURE(BM_Make_, Both_, genGlobAtBothSides)
->RangeMultiplier(MAX_LIST_MUL)
->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_Make_, Mix__, genGlobAtBothSidesAndMid)
+ ->RangeMultiplier(MAX_LIST_MUL)
+ ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
BENCHMARK_CAPTURE(BM_True_, None_, genGlobNone)
->RangeMultiplier(MAX_LIST_MUL)
@@ -187,6 +210,9 @@ BENCHMARK_CAPTURE(BM_True_, Mid__, genGlobInMid)
BENCHMARK_CAPTURE(BM_True_, Both_, genGlobAtBothSides)
->RangeMultiplier(MAX_LIST_MUL)
->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_True_, Mix__, genGlobAtBothSidesAndMid)
+ ->RangeMultiplier(MAX_LIST_MUL)
+ ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
BENCHMARK_CAPTURE(BM_False, None_, genGlobNone)
->RangeMultiplier(MAX_LIST_MUL)
@@ -203,5 +229,8 @@ BENCHMARK_CAPTURE(BM_False, Mid__, genGlobInMid)
BENCHMARK_CAPTURE(BM_False, Both_, genGlobAtBothSides)
->RangeMultiplier(MAX_LIST_MUL)
->Range(MAX_LIST_MIN, MAX_LIST_MAX);
+BENCHMARK_CAPTURE(BM_False, Mix__, genGlobAtBothSidesAndMid)
+ ->RangeMultiplier(MAX_LIST_MUL)
+ ->Range(MAX_LIST_MIN, MAX_LIST_MAX);
BENCHMARK_MAIN();
diff --git a/llvm/include/llvm/ADT/Bitfields.h b/llvm/include/llvm/ADT/Bitfields.h
index 1af2761..1fbc41c 100644
--- a/llvm/include/llvm/ADT/Bitfields.h
+++ b/llvm/include/llvm/ADT/Bitfields.h
@@ -154,12 +154,9 @@ struct ResolveUnderlyingType {
using type = std::underlying_type_t<T>;
};
template <typename T> struct ResolveUnderlyingType<T, false> {
- using type = T;
-};
-template <> struct ResolveUnderlyingType<bool, false> {
- /// In case sizeof(bool) != 1, replace `void` by an additionnal
- /// std::conditional.
- using type = std::conditional_t<sizeof(bool) == 1, uint8_t, void>;
+ static_assert(!std::is_same_v<T, bool> || sizeof(bool) == 1,
+ "T being bool requires sizeof(bool) == 1.");
+ using type = std::conditional_t<std::is_same_v<T, bool>, uint8_t, T>;
};
} // namespace bitfields_details
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 2aa54c9..09ef6ac 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -45,6 +45,9 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
+ // 32-bit ABS is legal for AMDGPU except for R600
+ setOperationAction(ISD::ABS, MVT::i32, Expand);
+
// EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
// spaces, so it is custom lowered to handle those where it isn't.
for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD})
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index b80c3c9..4947d03 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/ValueLatticeUtils.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
@@ -760,6 +761,7 @@ private:
void handleCallArguments(CallBase &CB);
void handleExtractOfWithOverflow(ExtractValueInst &EVI,
const WithOverflowInst *WO, unsigned Idx);
+ bool isInstFullyOverDefined(Instruction &Inst);
private:
friend class InstVisitor<SCCPInstVisitor>;
@@ -1374,49 +1376,66 @@ bool SCCPInstVisitor::isEdgeFeasible(BasicBlock *From, BasicBlock *To) const {
// 7. If a conditional branch has a value that is overdefined, make all
// successors executable.
void SCCPInstVisitor::visitPHINode(PHINode &PN) {
- // If this PN returns a struct, just mark the result overdefined.
- // TODO: We could do a lot better than this if code actually uses this.
- if (PN.getType()->isStructTy())
- return (void)markOverdefined(&PN);
-
- if (getValueState(&PN).isOverdefined())
- return; // Quick exit
-
// Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
// and slow us down a lot. Just mark them overdefined.
if (PN.getNumIncomingValues() > 64)
return (void)markOverdefined(&PN);
- unsigned NumActiveIncoming = 0;
+ if (isInstFullyOverDefined(PN))
+ return;
+ SmallVector<unsigned> FeasibleIncomingIndices;
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
+ continue;
+ FeasibleIncomingIndices.push_back(i);
+ }
// Look at all of the executable operands of the PHI node. If any of them
// are overdefined, the PHI becomes overdefined as well. If they are all
// constant, and they agree with each other, the PHI becomes the identical
// constant. If they are constant and don't agree, the PHI is a constant
// range. If there are no executable operands, the PHI remains unknown.
- ValueLatticeElement PhiState = getValueState(&PN);
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
- if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
- continue;
-
- const ValueLatticeElement &IV = getValueState(PN.getIncomingValue(i));
- PhiState.mergeIn(IV);
- NumActiveIncoming++;
- if (PhiState.isOverdefined())
- break;
+ if (StructType *STy = dyn_cast<StructType>(PN.getType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ ValueLatticeElement PhiState = getStructValueState(&PN, i);
+ if (PhiState.isOverdefined())
+ continue;
+ for (unsigned j : FeasibleIncomingIndices) {
+ const ValueLatticeElement &IV =
+ getStructValueState(PN.getIncomingValue(j), i);
+ PhiState.mergeIn(IV);
+ if (PhiState.isOverdefined())
+ break;
+ }
+ ValueLatticeElement &PhiStateRef = getStructValueState(&PN, i);
+ mergeInValue(PhiStateRef, &PN, PhiState,
+ ValueLatticeElement::MergeOptions().setMaxWidenSteps(
+ FeasibleIncomingIndices.size() + 1));
+ PhiStateRef.setNumRangeExtensions(
+ std::max((unsigned)FeasibleIncomingIndices.size(),
+ PhiStateRef.getNumRangeExtensions()));
+ }
+ } else {
+ ValueLatticeElement PhiState = getValueState(&PN);
+ for (unsigned i : FeasibleIncomingIndices) {
+ const ValueLatticeElement &IV = getValueState(PN.getIncomingValue(i));
+ PhiState.mergeIn(IV);
+ if (PhiState.isOverdefined())
+ break;
+ }
+ // We allow up to 1 range extension per active incoming value and one
+ // additional extension. Note that we manually adjust the number of range
+ // extensions to match the number of active incoming values. This helps to
+ // limit multiple extensions caused by the same incoming value, if other
+ // incoming values are equal.
+ ValueLatticeElement &PhiStateRef = ValueState[&PN];
+ mergeInValue(PhiStateRef, &PN, PhiState,
+ ValueLatticeElement::MergeOptions().setMaxWidenSteps(
+ FeasibleIncomingIndices.size() + 1));
+ PhiStateRef.setNumRangeExtensions(
+ std::max((unsigned)FeasibleIncomingIndices.size(),
+ PhiStateRef.getNumRangeExtensions()));
}
-
- // We allow up to 1 range extension per active incoming value and one
- // additional extension. Note that we manually adjust the number of range
- // extensions to match the number of active incoming values. This helps to
- // limit multiple extensions caused by the same incoming value, if other
- // incoming values are equal.
- ValueLatticeElement &PhiStateRef = ValueState[&PN];
- mergeInValue(PhiStateRef, &PN, PhiState,
- ValueLatticeElement::MergeOptions().setMaxWidenSteps(
- NumActiveIncoming + 1));
- PhiStateRef.setNumRangeExtensions(
- std::max(NumActiveIncoming, PhiStateRef.getNumRangeExtensions()));
}
void SCCPInstVisitor::visitReturnInst(ReturnInst &I) {
@@ -2127,6 +2146,21 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
}
}
+bool SCCPInstVisitor::isInstFullyOverDefined(Instruction &Inst) {
+ // For structure Type, we handle each member separately.
+ // A structure object won't be considered as overdefined when
+ // there is at least one member that is not overdefined.
+ if (StructType *STy = dyn_cast<StructType>(Inst.getType())) {
+ for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) {
+ if (!getStructValueState(&Inst, i).isOverdefined())
+ return false;
+ }
+ return true;
+ }
+
+ return getValueState(&Inst).isOverdefined();
+}
+
void SCCPInstVisitor::solve() {
// Process the work lists until they are empty!
while (!BBWorkList.empty() || !InstWorkList.empty()) {
diff --git a/llvm/test/CodeGen/AMDGPU/abs_i32.ll b/llvm/test/CodeGen/AMDGPU/abs_i32.ll
new file mode 100644
index 0000000..b53047f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/abs_i32.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=r600 -mcpu=cypress < %s | FileCheck -check-prefixes=R600 %s
+
+define amdgpu_kernel void @abs_v1(ptr addrspace(1) %out, i32 %arg) {
+; GFX9-LABEL: abs_v1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_abs_i32 s2, s2
+; GFX9-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; R600-LABEL: abs_v1:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: MOV * T0.W, KC0[2].Z,
+; R600-NEXT: SUB_INT * T1.W, 0.0, PV.W,
+; R600-NEXT: MAX_INT T0.X, T0.W, PV.W,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %res = call i32 @llvm.abs.i32(i32 %arg, i1 false)
+ store i32 %res, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @abs_v2(ptr addrspace(1) %out, i32 %arg) {
+; GFX9-LABEL: abs_v2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_abs_i32 s2, s2
+; GFX9-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; R600-LABEL: abs_v2:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[2].Z,
+; R600-NEXT: MAX_INT T0.X, KC0[2].Z, PV.W,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %neg = sub i32 0, %arg
+ %cond = icmp sgt i32 %arg, %neg
+ %res = select i1 %cond, i32 %arg, i32 %neg
+ store i32 %res, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @abs_v3(ptr addrspace(1) %out, i32 %arg) {
+; GFX9-LABEL: abs_v3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_abs_i32 s2, s2
+; GFX9-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; R600-LABEL: abs_v3:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[2].Z,
+; R600-NEXT: MAX_INT T0.X, PV.W, KC0[2].Z,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %neg = sub i32 0, %arg
+ %cond = icmp sgt i32 %neg, %arg
+ %res = select i1 %cond, i32 %neg, i32 %arg
+ store i32 %res, ptr addrspace(1) %out, align 4
+ ret void
+}
diff --git a/llvm/test/Transforms/SCCP/constant-range-struct.ll b/llvm/test/Transforms/SCCP/constant-range-struct.ll
index 7a399df..0f45b38 100644
--- a/llvm/test/Transforms/SCCP/constant-range-struct.ll
+++ b/llvm/test/Transforms/SCCP/constant-range-struct.ll
@@ -25,7 +25,7 @@ true:
br label %exit
false:
- %s.3 = insertvalue {i64, i64} undef, i64 30, 0
+ %s.3 = insertvalue {i64, i64} poison, i64 30, 0
%s.4 = insertvalue {i64, i64} %s.3, i64 300, 1
br label %exit
@@ -39,14 +39,14 @@ define void @struct1_caller() {
; CHECK-NEXT: [[S:%.*]] = call { i64, i64 } @struct1()
; CHECK-NEXT: [[V1:%.*]] = extractvalue { i64, i64 } [[S]], 0
; CHECK-NEXT: [[V2:%.*]] = extractvalue { i64, i64 } [[S]], 1
-; CHECK-NEXT: [[T_1:%.*]] = icmp ne i64 [[V1]], 10
-; CHECK-NEXT: call void @use(i1 [[T_1]])
-; CHECK-NEXT: [[T_2:%.*]] = icmp ult i64 [[V1]], 100
-; CHECK-NEXT: call void @use(i1 [[T_2]])
-; CHECK-NEXT: [[T_3:%.*]] = icmp ne i64 [[V2]], 0
+; CHECK-NEXT: call void @use(i1 true)
+; CHECK-NEXT: call void @use(i1 true)
+; CHECK-NEXT: [[T_3:%.*]] = icmp eq i64 [[V1]], 20
; CHECK-NEXT: call void @use(i1 [[T_3]])
-; CHECK-NEXT: [[T_4:%.*]] = icmp ult i64 [[V2]], 301
-; CHECK-NEXT: call void @use(i1 [[T_4]])
+; CHECK-NEXT: call void @use(i1 true)
+; CHECK-NEXT: call void @use(i1 true)
+; CHECK-NEXT: [[T_6:%.*]] = icmp eq i64 [[V2]], 300
+; CHECK-NEXT: call void @use(i1 [[T_6]])
; CHECK-NEXT: ret void
;
%s = call {i64, i64} @struct1()
@@ -57,10 +57,14 @@ define void @struct1_caller() {
call void @use(i1 %t.1)
%t.2 = icmp ult i64 %v1, 100
call void @use(i1 %t.2)
- %t.3 = icmp ne i64 %v2, 0
+ %t.3 = icmp eq i64 %v1, 20
call void @use(i1 %t.3)
- %t.4 = icmp ult i64 %v2, 301
+ %t.4 = icmp ne i64 %v2, 0
call void @use(i1 %t.4)
+ %t.5 = icmp ult i64 %v2, 301
+ call void @use(i1 %t.5)
+ %t.6 = icmp eq i64 %v2, 300
+ call void @use(i1 %t.6)
ret void
}
@@ -76,7 +80,7 @@ define internal {i64, i64} @struct2() {
; CHECK: exit:
; CHECK-NEXT: [[V1:%.*]] = phi i64 [ 20, [[TRUE]] ], [ 30, [[FALSE]] ]
; CHECK-NEXT: [[V2:%.*]] = phi i64 [ 200, [[TRUE]] ], [ 300, [[FALSE]] ]
-; CHECK-NEXT: [[S_1:%.*]] = insertvalue { i64, i64 } undef, i64 [[V1]], 0
+; CHECK-NEXT: [[S_1:%.*]] = insertvalue { i64, i64 } poison, i64 [[V1]], 0
; CHECK-NEXT: [[S_2:%.*]] = insertvalue { i64, i64 } [[S_1]], i64 [[V2]], 1
; CHECK-NEXT: ret { i64, i64 } [[S_2]]
;
@@ -92,7 +96,7 @@ false:
exit:
%v1 = phi i64 [ 20, %true ], [ 30, %false ]
%v2 = phi i64 [ 200, %true ], [ 300, %false ]
- %s.1 = insertvalue {i64, i64} undef, i64 %v1, 0
+ %s.1 = insertvalue {i64, i64} poison, i64 %v1, 0
%s.2 = insertvalue {i64, i64} %s.1, i64 %v2, 1
ret {i64, i64} %s.2
}
@@ -153,3 +157,40 @@ define void @struct2_caller() {
ret void
}
+
+%"phi_type" = type {i64, i64}
+
+define internal %"phi_type" @test(i32 %input) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: br label [[COND_TRUE_I:%.*]]
+; CHECK: cond.true.i:
+; CHECK-NEXT: br label [[COND_END_I:%.*]]
+; CHECK: cond.end.i:
+; CHECK-NEXT: ret [[PHI_TYPE:%.*]] poison
+;
+ %cmp.cond = icmp eq i32 %input, 1
+ br i1 %cmp.cond, label %cond.true.i, label %cond.false.i
+
+cond.true.i:
+ %r1.tmp = insertvalue %"phi_type" poison, i64 1, 0
+ %r1.tmp.2 = insertvalue %"phi_type" %r1.tmp, i64 2, 1
+ br label %cond.end.i
+
+cond.false.i:
+ %r2.tmp = insertvalue %"phi_type" poison, i64 3, 0
+ %r2.tmp.2 = insertvalue %"phi_type" %r2.tmp, i64 4, 1
+ br label %cond.end.i
+
+cond.end.i:
+ %retval = phi %"phi_type" [ %r1.tmp.2, %cond.true.i ], [ %r2.tmp.2, %cond.false.i ]
+ ret %"phi_type" %retval
+}
+
+define %"phi_type" @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: [[CALL_1:%.*]] = tail call fastcc [[PHI_TYPE:%.*]] @[[TEST:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 noundef 1)
+; CHECK-NEXT: ret [[PHI_TYPE]] { i64 1, i64 2 }
+;
+ %call.1 = tail call fastcc noundef %"phi_type" @test(i32 noundef 1)
+ ret %"phi_type" %call.1
+}