aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author4ast <alexei.starovoitov@gmail.com>2024-03-12 17:27:25 -0700
committerGitHub <noreply@github.com>2024-03-13 02:27:25 +0200
commit2aacb56e8361213c1bd69c2ceafdea3aa0ca9125 (patch)
treef36cadc807511d082e48c1ca0401ee63f282f7ab
parentf467cc9caf37fcf6b3523271f977585c39372d55 (diff)
downloadllvm-2aacb56e8361213c1bd69c2ceafdea3aa0ca9125.zip
llvm-2aacb56e8361213c1bd69c2ceafdea3aa0ca9125.tar.gz
llvm-2aacb56e8361213c1bd69c2ceafdea3aa0ca9125.tar.bz2
BPF address space insn (#84410)
This commit aims to support BPF arena kernel side [feature](https://lore.kernel.org/bpf/20240209040608.98927-1-alexei.starovoitov@gmail.com/): - arena is a memory region accessible from both BPF program and userspace; - base pointers for this memory region differ between kernel and user spaces; - `dst_reg = addr_space_cast(src_reg, dst_addr_space, src_addr_space)` translates src_reg, a pointer in src_addr_space to dst_reg, equivalent pointer in dst_addr_space, {src,dst}_addr_space are immediate constants; - number 0 is assigned to kernel address space; - number 1 is assigned to user address space. On the LLVM side, the goal is to make load and store operations on arena pointers "transparent" for BPF programs: - assume that pointers with non-zero address space are pointers to arena memory; - assume that arena is identified by address space number; - assume that address space zero corresponds to kernel address space; - assume that every BPF-side load or store from arena is done via pointer in user address space, thus convert base pointers using `addr_space_cast(src_reg, 0, 1)`; Only load, store, cmpxchg and atomicrmw IR instructions are handled by this transformation. For example, the following C code: ```c #define __as __attribute__((address_space(1))) void copy(int __as *from, int __as *to) { *to = *from; } ``` Compiled to the following IR: ```llvm define void @copy(ptr addrspace(1) %from, ptr addrspace(1) %to) { entry: %0 = load i32, ptr addrspace(1) %from, align 4 store i32 %0, ptr addrspace(1) %to, align 4 ret void } ``` Is transformed to: ```llvm %to2 = addrspacecast ptr addrspace(1) %to to ptr ;; ! %from1 = addrspacecast ptr addrspace(1) %from to ptr ;; ! %0 = load i32, ptr %from1, align 4, !tbaa !3 store i32 %0, ptr %to2, align 4, !tbaa !3 ret void ``` And compiled as: ```asm r2 = addr_space_cast(r2, 0, 1) r1 = addr_space_cast(r1, 0, 1) r1 = *(u32 *)(r1 + 0) *(u32 *)(r2 + 0) = r1 exit ``` Co-authored-by: Eduard Zingerman <eddyz87@gmail.com>
-rw-r--r--clang/lib/Basic/Targets/BPF.cpp3
-rw-r--r--clang/test/Preprocessor/bpf-predefined-macros.c8
-rw-r--r--llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp1
-rw-r--r--llvm/lib/Target/BPF/BPF.h8
-rw-r--r--llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp92
-rw-r--r--llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp116
-rw-r--r--llvm/lib/Target/BPF/BPFInstrInfo.td29
-rw-r--r--llvm/lib/Target/BPF/BPFTargetMachine.cpp5
-rw-r--r--llvm/lib/Target/BPF/CMakeLists.txt1
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-auto-casts.ll78
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-cast.ll22
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-gep-chain.ll25
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-globals.ll30
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-globals2.ll25
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-phi.ll53
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-simplify-1.ll19
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-simplify-2.ll21
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-simplify-3.ll26
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-simplify-4.ll21
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-simplify-5.ll25
-rw-r--r--llvm/test/CodeGen/BPF/assembler-disassembler.s7
21 files changed, 615 insertions, 0 deletions
diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
index e3fbbb7..26a54f6 100644
--- a/clang/lib/Basic/Targets/BPF.cpp
+++ b/clang/lib/Basic/Targets/BPF.cpp
@@ -35,6 +35,9 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__BPF_CPU_VERSION__", "0");
return;
}
+
+ Builder.defineMacro("__BPF_FEATURE_ARENA_CAST");
+
if (CPU.empty() || CPU == "generic" || CPU == "v1") {
Builder.defineMacro("__BPF_CPU_VERSION__", "1");
return;
diff --git a/clang/test/Preprocessor/bpf-predefined-macros.c b/clang/test/Preprocessor/bpf-predefined-macros.c
index ff4d00a..fea24d1 100644
--- a/clang/test/Preprocessor/bpf-predefined-macros.c
+++ b/clang/test/Preprocessor/bpf-predefined-macros.c
@@ -61,6 +61,9 @@ int r;
#ifdef __BPF_FEATURE_ST
int s;
#endif
+#ifdef __BPF_FEATURE_ARENA_CAST
+int t;
+#endif
// CHECK: int b;
// CHECK: int c;
@@ -90,6 +93,11 @@ int s;
// CPU_V4: int r;
// CPU_V4: int s;
+// CPU_V1: int t;
+// CPU_V2: int t;
+// CPU_V3: int t;
+// CPU_V4: int t;
+
// CPU_GENERIC: int g;
// CPU_PROBE: int f;
diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 0d1eef6..3145bc3 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -271,6 +271,7 @@ public:
.Case("xchg32_32", true)
.Case("cmpxchg_64", true)
.Case("cmpxchg32_32", true)
+ .Case("addr_space_cast", true)
.Default(false);
}
};
diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index 5c77d18..bbdbdbb 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -66,6 +66,14 @@ public:
static bool isRequired() { return true; }
};
+class BPFASpaceCastSimplifyPass
+ : public PassInfoMixin<BPFASpaceCastSimplifyPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ static bool isRequired() { return true; }
+};
+
class BPFAdjustOptPass : public PassInfoMixin<BPFAdjustOptPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
diff --git a/llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp b/llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp
new file mode 100644
index 0000000..f87b299
--- /dev/null
+++ b/llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp
@@ -0,0 +1,92 @@
+//===-- BPFASpaceCastSimplifyPass.cpp - BPF addrspacecast simplications --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include <optional>
+
+#define DEBUG_TYPE "bpf-aspace-simplify"
+
+using namespace llvm;
+
+namespace {
+
+struct CastGEPCast {
+ AddrSpaceCastInst *OuterCast;
+
+ // Match chain of instructions:
+ // %inner = addrspacecast N->M
+ // %gep = getelementptr %inner, ...
+ // %outer = addrspacecast M->N %gep
+ // Where I is %outer.
+ static std::optional<CastGEPCast> match(Value *I) {
+ auto *OuterCast = dyn_cast<AddrSpaceCastInst>(I);
+ if (!OuterCast)
+ return std::nullopt;
+ auto *GEP = dyn_cast<GetElementPtrInst>(OuterCast->getPointerOperand());
+ if (!GEP)
+ return std::nullopt;
+ auto *InnerCast = dyn_cast<AddrSpaceCastInst>(GEP->getPointerOperand());
+ if (!InnerCast)
+ return std::nullopt;
+ if (InnerCast->getSrcAddressSpace() != OuterCast->getDestAddressSpace())
+ return std::nullopt;
+ if (InnerCast->getDestAddressSpace() != OuterCast->getSrcAddressSpace())
+ return std::nullopt;
+ return CastGEPCast{OuterCast};
+ }
+
+ static PointerType *changeAddressSpace(PointerType *Ty, unsigned AS) {
+ return Ty->get(Ty->getContext(), AS);
+ }
+
+ // Assuming match(this->OuterCast) is true, convert:
+ // (addrspacecast M->N (getelementptr (addrspacecast N->M ptr) ...))
+ // To:
+ // (getelementptr ptr ...)
+ GetElementPtrInst *rewrite() {
+ auto *GEP = cast<GetElementPtrInst>(OuterCast->getPointerOperand());
+ auto *InnerCast = cast<AddrSpaceCastInst>(GEP->getPointerOperand());
+ unsigned AS = OuterCast->getDestAddressSpace();
+ auto *NewGEP = cast<GetElementPtrInst>(GEP->clone());
+ NewGEP->setName(GEP->getName());
+ NewGEP->insertAfter(OuterCast);
+ NewGEP->setOperand(0, InnerCast->getPointerOperand());
+ auto *GEPTy = cast<PointerType>(GEP->getType());
+ NewGEP->mutateType(changeAddressSpace(GEPTy, AS));
+ OuterCast->replaceAllUsesWith(NewGEP);
+ OuterCast->eraseFromParent();
+ if (GEP->use_empty())
+ GEP->eraseFromParent();
+ if (InnerCast->use_empty())
+ InnerCast->eraseFromParent();
+ return NewGEP;
+ }
+};
+
+} // anonymous namespace
+
+PreservedAnalyses BPFASpaceCastSimplifyPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ SmallVector<CastGEPCast, 16> WorkList;
+ bool Changed = false;
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB)
+ if (auto It = CastGEPCast::match(&I))
+ WorkList.push_back(It.value());
+ Changed |= !WorkList.empty();
+
+ while (!WorkList.empty()) {
+ CastGEPCast InsnChain = WorkList.pop_back_val();
+ GetElementPtrInst *NewGEP = InsnChain.rewrite();
+ for (User *U : NewGEP->users())
+ if (auto It = CastGEPCast::match(U))
+ WorkList.push_back(It.value());
+ }
+ }
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
index 81effc9..edd59aa 100644
--- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
+++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
@@ -14,6 +14,8 @@
// optimizations are done and those builtins can be removed.
// - remove llvm.bpf.getelementptr.and.load builtins.
// - remove llvm.bpf.getelementptr.and.store builtins.
+// - for loads and stores with base addresses from non-zero address space
+// cast base address to zero address space (support for BPF arenas).
//
//===----------------------------------------------------------------------===//
@@ -55,6 +57,7 @@ private:
bool removeCompareBuiltin(Module &M);
bool sinkMinMax(Module &M);
bool removeGEPBuiltins(Module &M);
+ bool insertASpaceCasts(Module &M);
};
} // End anonymous namespace
@@ -416,11 +419,124 @@ bool BPFCheckAndAdjustIR::removeGEPBuiltins(Module &M) {
return Changed;
}
+// Wrap ToWrap with cast to address space zero:
+// - if ToWrap is a getelementptr,
+// wrap it's base pointer instead and return a copy;
+// - if ToWrap is Instruction, insert address space cast
+// immediately after ToWrap;
+// - if ToWrap is not an Instruction (function parameter
+// or a global value), insert address space cast at the
+// beginning of the Function F;
+// - use Cache to avoid inserting too many casts;
+static Value *aspaceWrapValue(DenseMap<Value *, Value *> &Cache, Function *F,
+ Value *ToWrap) {
+ auto It = Cache.find(ToWrap);
+ if (It != Cache.end())
+ return It->getSecond();
+
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(ToWrap)) {
+ Value *Ptr = GEP->getPointerOperand();
+ Value *WrappedPtr = aspaceWrapValue(Cache, F, Ptr);
+ auto *GEPTy = cast<PointerType>(GEP->getType());
+ auto *NewGEP = GEP->clone();
+ NewGEP->insertAfter(GEP);
+ NewGEP->mutateType(GEPTy->getPointerTo(0));
+ NewGEP->setOperand(GEP->getPointerOperandIndex(), WrappedPtr);
+ NewGEP->setName(GEP->getName());
+ Cache[ToWrap] = NewGEP;
+ return NewGEP;
+ }
+
+ IRBuilder IB(F->getContext());
+ if (Instruction *InsnPtr = dyn_cast<Instruction>(ToWrap))
+ IB.SetInsertPoint(*InsnPtr->getInsertionPointAfterDef());
+ else
+ IB.SetInsertPoint(F->getEntryBlock().getFirstInsertionPt());
+ auto *PtrTy = cast<PointerType>(ToWrap->getType());
+ auto *ASZeroPtrTy = PtrTy->getPointerTo(0);
+ auto *ACast = IB.CreateAddrSpaceCast(ToWrap, ASZeroPtrTy, ToWrap->getName());
+ Cache[ToWrap] = ACast;
+ return ACast;
+}
+
+// Wrap a pointer operand OpNum of instruction I
+// with cast to address space zero
+static void aspaceWrapOperand(DenseMap<Value *, Value *> &Cache, Instruction *I,
+ unsigned OpNum) {
+ Value *OldOp = I->getOperand(OpNum);
+ if (OldOp->getType()->getPointerAddressSpace() == 0)
+ return;
+
+ Value *NewOp = aspaceWrapValue(Cache, I->getFunction(), OldOp);
+ I->setOperand(OpNum, NewOp);
+ // Check if there are any remaining users of old GEP,
+ // delete those w/o users
+ for (;;) {
+ auto *OldGEP = dyn_cast<GetElementPtrInst>(OldOp);
+ if (!OldGEP)
+ break;
+ if (!OldGEP->use_empty())
+ break;
+ OldOp = OldGEP->getPointerOperand();
+ OldGEP->eraseFromParent();
+ }
+}
+
+// Support for BPF arenas:
+// - for each function in the module M, update pointer operand of
+// each memory access instruction (load/store/cmpxchg/atomicrmw)
+// by casting it from non-zero address space to zero address space, e.g:
+//
+// (load (ptr addrspace (N) %p) ...)
+// -> (load (addrspacecast ptr addrspace (N) %p to ptr))
+//
+// - assign section with name .arena.N for globals defined in
+// non-zero address space N
+bool BPFCheckAndAdjustIR::insertASpaceCasts(Module &M) {
+ bool Changed = false;
+ for (Function &F : M) {
+ DenseMap<Value *, Value *> CastsCache;
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ unsigned PtrOpNum;
+
+ if (auto *LD = dyn_cast<LoadInst>(&I))
+ PtrOpNum = LD->getPointerOperandIndex();
+ else if (auto *ST = dyn_cast<StoreInst>(&I))
+ PtrOpNum = ST->getPointerOperandIndex();
+ else if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(&I))
+ PtrOpNum = CmpXchg->getPointerOperandIndex();
+ else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
+ PtrOpNum = RMW->getPointerOperandIndex();
+ else
+ continue;
+
+ aspaceWrapOperand(CastsCache, &I, PtrOpNum);
+ }
+ }
+ Changed |= !CastsCache.empty();
+ }
+ // Merge all globals within same address space into single
+ // .arena.<addr space no> section
+ for (GlobalVariable &G : M.globals()) {
+ if (G.getAddressSpace() == 0 || G.hasSection())
+ continue;
+ SmallString<16> SecName;
+ raw_svector_ostream OS(SecName);
+ OS << ".arena." << G.getAddressSpace();
+ G.setSection(SecName);
+ // Prevent having separate section for constants
+ G.setConstant(false);
+ }
+ return Changed;
+}
+
bool BPFCheckAndAdjustIR::adjustIR(Module &M) {
bool Changed = removePassThroughBuiltin(M);
Changed = removeCompareBuiltin(M) || Changed;
Changed = sinkMinMax(M) || Changed;
Changed = removeGEPBuiltins(M) || Changed;
+ Changed = insertASpaceCasts(M) || Changed;
return Changed;
}
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 82d3470..7198e94 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -420,6 +420,35 @@ let Predicates = [BPFHasMovsx] in {
}
}
+def ADDR_SPACE_CAST
+ : ALU_RR<BPF_ALU64, BPF_MOV, 64,
+ (outs GPR:$dst),
+ (ins GPR:$src, i64imm:$dst_as, i64imm:$src_as),
+ "$dst = addr_space_cast($src, $dst_as, $src_as)",
+ []> {
+ bits<64> dst_as;
+ bits<64> src_as;
+
+ let Inst{47-32} = 1;
+ let Inst{31-16} = dst_as{15-0};
+ let Inst{15-0} = src_as{15-0};
+}
+
+def SrcAddrSpace : SDNodeXForm<addrspacecast, [{
+ return CurDAG->getTargetConstant(
+ cast<AddrSpaceCastSDNode>(N)->getSrcAddressSpace(),
+ SDLoc(N), MVT::i64);
+}]>;
+
+def DstAddrSpace : SDNodeXForm<addrspacecast, [{
+ return CurDAG->getTargetConstant(
+ cast<AddrSpaceCastSDNode>(N)->getDestAddressSpace(),
+ SDLoc(N), MVT::i64);
+}]>;
+
+def : Pat<(addrspacecast:$this GPR:$src),
+ (ADDR_SPACE_CAST $src, (DstAddrSpace $this), (SrcAddrSpace $this))>;
+
def FI_ri
: TYPE_LD_ST<BPF_IMM.Value, BPF_DW.Value,
(outs GPR:$dst),
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 08ac4b2..5f26bec 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -121,6 +121,10 @@ void BPFTargetMachine::registerPassBuilderCallbacks(
FPM.addPass(BPFPreserveStaticOffsetPass(false));
return true;
}
+ if (PassName == "bpf-aspace-simplify") {
+ FPM.addPass(BPFASpaceCastSimplifyPass());
+ return true;
+ }
return false;
});
PB.registerPipelineStartEPCallback(
@@ -135,6 +139,7 @@ void BPFTargetMachine::registerPassBuilderCallbacks(
PB.registerPeepholeEPCallback([=](FunctionPassManager &FPM,
OptimizationLevel Level) {
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
+ FPM.addPass(BPFASpaceCastSimplifyPass());
});
PB.registerScalarOptimizerLateEPCallback(
[=](FunctionPassManager &FPM, OptimizationLevel Level) {
diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt
index d88e7ad..cb21ed0 100644
--- a/llvm/lib/Target/BPF/CMakeLists.txt
+++ b/llvm/lib/Target/BPF/CMakeLists.txt
@@ -24,6 +24,7 @@ add_llvm_target(BPFCodeGen
BPFAbstractMemberAccess.cpp
BPFAdjustOpt.cpp
BPFAsmPrinter.cpp
+ BPFASpaceCastSimplifyPass.cpp
BPFCheckAndAdjustIR.cpp
BPFFrameLowering.cpp
BPFInstrInfo.cpp
diff --git a/llvm/test/CodeGen/BPF/addr-space-auto-casts.ll b/llvm/test/CodeGen/BPF/addr-space-auto-casts.ll
new file mode 100644
index 0000000..08e11e8
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-auto-casts.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+define void @simple_store(ptr addrspace(272) %foo) {
+; CHECK-LABEL: define void @simple_store(
+; CHECK-SAME: ptr addrspace(272) [[FOO:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[FOO1:%.*]] = addrspacecast ptr addrspace(272) [[FOO]] to ptr
+; CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[FOO1]], i64 16
+; CHECK-NEXT: store volatile i32 57005, ptr [[ADD_PTR2]], align 4
+; CHECK-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, ptr [[FOO1]], i64 12
+; CHECK-NEXT: store volatile i32 48879, ptr [[ADD_PTR13]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %add.ptr = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 16
+ store volatile i32 57005, ptr addrspace(272) %add.ptr, align 4
+ %add.ptr1 = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 12
+ store volatile i32 48879, ptr addrspace(272) %add.ptr1, align 4
+ ret void
+}
+
+define void @separate_addr_store(ptr addrspace(272) %foo, ptr addrspace(272) %bar) {
+; CHECK-LABEL: define void @separate_addr_store(
+; CHECK-SAME: ptr addrspace(272) [[FOO:%.*]], ptr addrspace(272) [[BAR:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BAR3:%.*]] = addrspacecast ptr addrspace(272) [[BAR]] to ptr
+; CHECK-NEXT: [[FOO1:%.*]] = addrspacecast ptr addrspace(272) [[FOO]] to ptr
+; CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[FOO1]], i64 16
+; CHECK-NEXT: store volatile i32 57005, ptr [[ADD_PTR2]], align 4
+; CHECK-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, ptr [[BAR3]], i64 12
+; CHECK-NEXT: store volatile i32 48879, ptr [[ADD_PTR14]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %add.ptr = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 16
+ store volatile i32 57005, ptr addrspace(272) %add.ptr, align 4
+ %add.ptr1 = getelementptr inbounds i8, ptr addrspace(272) %bar, i64 12
+ store volatile i32 48879, ptr addrspace(272) %add.ptr1, align 4
+ ret void
+}
+
+define i32 @simple_load(ptr addrspace(272) %foo) {
+; CHECK-LABEL: define i32 @simple_load(
+; CHECK-SAME: ptr addrspace(272) [[FOO:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[FOO1:%.*]] = addrspacecast ptr addrspace(272) [[FOO]] to ptr
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[FOO1]], align 4
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %0 = load i32, ptr addrspace(272) %foo, align 4
+ ret i32 %0
+}
+
+define { i32, i1 } @simple_cmpxchg(ptr addrspace(1) %i) {
+; CHECK-LABEL: define { i32, i1 } @simple_cmpxchg(
+; CHECK-SAME: ptr addrspace(1) [[I:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I1:%.*]] = addrspacecast ptr addrspace(1) [[I]] to ptr
+; CHECK-NEXT: [[A:%.*]] = cmpxchg ptr [[I1]], i32 7, i32 42 monotonic monotonic, align 4
+; CHECK-NEXT: ret { i32, i1 } [[A]]
+;
+entry:
+ %a = cmpxchg ptr addrspace(1) %i, i32 7, i32 42 monotonic monotonic, align 4
+ ret { i32, i1 } %a
+}
+
+define void @simple_atomicrmw(ptr addrspace(1) %p) {
+; CHECK-LABEL: define void @simple_atomicrmw(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT: [[P1:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
+; CHECK-NEXT: [[A:%.*]] = atomicrmw add ptr [[P1]], i64 42 monotonic, align 8
+; CHECK-NEXT: ret void
+;
+ %a = atomicrmw add ptr addrspace(1) %p, i64 42 monotonic, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/BPF/addr-space-cast.ll b/llvm/test/CodeGen/BPF/addr-space-cast.ll
new file mode 100644
index 0000000..ad2860d
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-cast.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -march=bpfel -mcpu=v4 -filetype=asm -show-mc-encoding < %s | FileCheck %s
+
+define ptr addrspace(1) @foo(ptr %p) {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: r0 = addr_space_cast(r1, 1, 0) # encoding: [0xbf,0x10,0x01,0x00,0x00,0x00,0x01,0x00]
+; CHECK-NEXT: exit # encoding: [0x95,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+entry:
+ %0 = addrspacecast ptr %p to ptr addrspace(1)
+ ret ptr addrspace(1) %0
+}
+
+define ptr @bar(ptr addrspace(1) %p) {
+; CHECK-LABEL: bar:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: r0 = addr_space_cast(r1, 0, 1) # encoding: [0xbf,0x10,0x01,0x00,0x01,0x00,0x00,0x00]
+; CHECK-NEXT: exit # encoding: [0x95,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+entry:
+ %0 = addrspacecast ptr addrspace(1) %p to ptr
+ ret ptr %0
+}
diff --git a/llvm/test/CodeGen/BPF/addr-space-gep-chain.ll b/llvm/test/CodeGen/BPF/addr-space-gep-chain.ll
new file mode 100644
index 0000000..3ac85fb
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-gep-chain.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+define void @test(ptr addrspace(1) %p) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
+; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 8
+; CHECK-NEXT: [[B3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 16
+; CHECK-NEXT: [[C4:%.*]] = getelementptr inbounds i8, ptr [[B3]], i64 24
+; CHECK-NEXT: [[D5:%.*]] = getelementptr inbounds i8, ptr [[C4]], i64 32
+; CHECK-NEXT: store i64 11, ptr [[C4]], align 8
+; CHECK-NEXT: store i64 22, ptr [[D5]], align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %a = getelementptr inbounds i8, ptr addrspace(1) %p, i64 8
+ %b = getelementptr inbounds i8, ptr addrspace(1) %a, i64 16
+ %c = getelementptr inbounds i8, ptr addrspace(1) %b, i64 24
+ %d = getelementptr inbounds i8, ptr addrspace(1) %c, i64 32
+ store i64 11, ptr addrspace(1) %c, align 8
+ store i64 22, ptr addrspace(1) %d, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/BPF/addr-space-globals.ll b/llvm/test/CodeGen/BPF/addr-space-globals.ll
new file mode 100644
index 0000000..878ba0d
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-globals.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck %s
+
+; Generated from the following C code:
+;
+; #define __as __attribute__((address_space(272)))
+; __as const char a[2] = {1,2};
+; __as char b[2] = {3,4};
+; __as char c[2];
+;
+; Using the following command:
+;
+; clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c
+
+
+@a = dso_local local_unnamed_addr addrspace(272) constant [2 x i8] [i8 1, i8 2], align 1
+@b = dso_local local_unnamed_addr addrspace(272) global [2 x i8] [i8 3, i8 4], align 1
+@c = dso_local local_unnamed_addr addrspace(272) global [2 x i8] zeroinitializer, align 1
+
+; Verify that a,b,c reside in the same section
+
+; CHECK: .section .arena.272,"aw",@progbits
+; CHECK-NOT: .section
+; CHECK: .globl a
+; CHECK: .ascii "\001\002"
+; CHECK-NOT: .section
+; CHECK: .globl b
+; CHECK: .ascii "\003\004"
+; CHECK-NOT: .section
+; CHECK: .globl c
+; CHECK: .zero 2
diff --git a/llvm/test/CodeGen/BPF/addr-space-globals2.ll b/llvm/test/CodeGen/BPF/addr-space-globals2.ll
new file mode 100644
index 0000000..d1e2318
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-globals2.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck %s
+
+; Generated from the following C code:
+;
+; __attribute__((address_space(1))) char a[2] = {1,2};
+; __attribute__((address_space(2))) char b[2] = {3,4};
+;
+; Using the following command:
+;
+; clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c
+
+@a = dso_local local_unnamed_addr addrspace(1) global [2 x i8] [i8 1, i8 2], align 1
+@b = dso_local local_unnamed_addr addrspace(2) global [2 x i8] [i8 3, i8 4], align 1
+
+; Verify that a,b reside in separate sections
+
+; CHECK: .section .arena.1,"aw",@progbits
+; CHECK-NOT: .section
+; CHECK: .globl a
+; CHECK: .ascii "\001\002"
+
+; CHECK: .section .arena.2,"aw",@progbits
+; CHECK-NOT: .section
+; CHECK: .globl b
+; CHECK: .ascii "\003\004"
diff --git a/llvm/test/CodeGen/BPF/addr-space-phi.ll b/llvm/test/CodeGen/BPF/addr-space-phi.ll
new file mode 100644
index 0000000..6d28b07
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-phi.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+; Generated from the following C code:
+;
+; #define __uptr __attribute__((address_space(1)))
+;
+; extern int __uptr *magic1();
+; extern int __uptr *magic2();
+;
+; void test(long i) {
+; int __uptr *a;
+;
+; if (i > 42)
+; a = magic1();
+; else
+; a = magic2();
+; a[5] = 7;
+; }
+;
+; Using the following command:
+;
+; clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c
+
+define void @test(i64 noundef %i) {
+; CHECK: if.end:
+; CHECK-NEXT: [[A_0:%.*]] = phi ptr addrspace(1)
+; CHECK-NEXT: [[A_01:%.*]] = addrspacecast ptr addrspace(1) [[A_0]] to ptr
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A_01]], i64 5
+; CHECK-NEXT: store i32 7, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp = icmp sgt i64 %i, 42
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %call = tail call ptr addrspace(1) @magic1()
+ br label %if.end
+
+if.else: ; preds = %entry
+ %call1 = tail call ptr addrspace(1) @magic2()
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %a.0 = phi ptr addrspace(1) [ %call, %if.then ], [ %call1, %if.else ]
+ %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %a.0, i64 5
+ store i32 7, ptr addrspace(1) %arrayidx, align 4
+ ret void
+}
+
+declare ptr addrspace(1) @magic1(...)
+declare ptr addrspace(1) @magic2(...)
diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-1.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-1.ll
new file mode 100644
index 0000000..32d6728
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-simplify-1.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s
+
+; Check that bpf-aspace-simplify pass removes unnecessary (for BPF)
+; address space casts for cast M->N -> GEP -> cast N->M chain.
+
+define dso_local ptr addrspace(1) @test (ptr addrspace(1) %p) {
+; CHECK-LABEL: define dso_local ptr addrspace(1) @test(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 8
+; CHECK-NEXT: ret ptr addrspace(1) [[B1]]
+;
+ entry:
+ %a = addrspacecast ptr addrspace(1) %p to ptr
+ %b = getelementptr inbounds i8, ptr %a, i64 8
+ %c = addrspacecast ptr %b to ptr addrspace(1)
+ ret ptr addrspace(1) %c
+}
diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-2.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-2.ll
new file mode 100644
index 0000000..a296555
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-simplify-2.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s
+
+; Check that bpf-aspace-simplify pass does not change
+; chain 'cast M->N -> GEP -> cast N->K'.
+
+define dso_local ptr addrspace(2) @test (ptr addrspace(1) %p) {
+; CHECK-LABEL: define dso_local ptr addrspace(2) @test(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
+; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[C:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(2)
+; CHECK-NEXT: ret ptr addrspace(2) [[C]]
+;
+ entry:
+ %a = addrspacecast ptr addrspace(1) %p to ptr
+ %b = getelementptr inbounds i8, ptr %a, i64 8
+ %c = addrspacecast ptr %b to ptr addrspace(2)
+ ret ptr addrspace(2) %c
+}
diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-3.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-3.ll
new file mode 100644
index 0000000..a7736c4
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-simplify-3.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s
+
+; Check that when bpf-aspace-simplify pass modifies chain
+; 'cast M->N -> GEP -> cast N->M' it does not remove GEP,
+; when that GEP is used by some other instruction.
+
+define dso_local ptr addrspace(1) @test (ptr addrspace(1) %p) {
+; CHECK-LABEL: define dso_local ptr addrspace(1) @test(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
+; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 8
+; CHECK-NEXT: call void @sink(ptr [[B]])
+; CHECK-NEXT: ret ptr addrspace(1) [[B1]]
+;
+ entry:
+ %a = addrspacecast ptr addrspace(1) %p to ptr
+ %b = getelementptr inbounds i8, ptr %a, i64 8
+ %c = addrspacecast ptr %b to ptr addrspace(1)
+ call void @sink(ptr %b)
+ ret ptr addrspace(1) %c
+}
+
+declare dso_local void @sink(ptr)
diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-4.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-4.ll
new file mode 100644
index 0000000..b2c384b
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-simplify-4.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s
+
+; Check that bpf-aspace-simplify pass simplifies chain
+; 'cast K->M -> cast M->N -> GEP -> cast N->M -> cast M->K'.
+
+define dso_local ptr addrspace(2) @test (ptr addrspace(2) %p) {
+; CHECK-LABEL: define dso_local ptr addrspace(2) @test(
+; CHECK-SAME: ptr addrspace(2) [[P:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C12:%.*]] = getelementptr inbounds i8, ptr addrspace(2) [[P]], i64 8
+; CHECK-NEXT: ret ptr addrspace(2) [[C12]]
+;
+ entry:
+ %a = addrspacecast ptr addrspace(2) %p to ptr addrspace(1)
+ %b = addrspacecast ptr addrspace(1) %a to ptr
+ %c = getelementptr inbounds i8, ptr %b, i64 8
+ %d = addrspacecast ptr %c to ptr addrspace(1)
+ %e = addrspacecast ptr addrspace (1) %d to ptr addrspace(2)
+ ret ptr addrspace(2) %e
+}
diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-5.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-5.ll
new file mode 100644
index 0000000..b62d253
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-simplify-5.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s
+
+; Check that bpf-aspace-simplify pass removes unnecessary (for BPF)
+; address space casts for cast M->N -> GEP -> cast N->M chain,
+; where chain is split between several BBs.
+
+define dso_local ptr addrspace(1) @test (ptr addrspace(1) %p) {
+; CHECK-LABEL: define dso_local ptr addrspace(1) @test(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 8
+; CHECK-NEXT: ret ptr addrspace(1) [[B1]]
+;
+entry:
+ %a = addrspacecast ptr addrspace(1) %p to ptr
+ %b = getelementptr inbounds i8, ptr %a, i64 8
+ br label %exit
+
+exit:
+ %c = addrspacecast ptr %b to ptr addrspace(1)
+ ret ptr addrspace(1) %c
+}
diff --git a/llvm/test/CodeGen/BPF/assembler-disassembler.s b/llvm/test/CodeGen/BPF/assembler-disassembler.s
index 2bc7421..991d6ed 100644
--- a/llvm/test/CodeGen/BPF/assembler-disassembler.s
+++ b/llvm/test/CodeGen/BPF/assembler-disassembler.s
@@ -289,3 +289,10 @@ r0 = *(u32*)skb[42]
r0 = *(u8*)skb[r1]
r0 = *(u16*)skb[r1]
r0 = *(u32*)skb[r1]
+
+// CHECK: bf 10 01 00 01 00 00 00 r0 = addr_space_cast(r1, 0x0, 0x1)
+// CHECK: bf 21 01 00 00 00 01 00 r1 = addr_space_cast(r2, 0x1, 0x0)
+// CHECK: bf 43 01 00 2a 00 07 00 r3 = addr_space_cast(r4, 0x7, 0x2a)
+r0 = addr_space_cast(r1, 0, 1)
+r1 = addr_space_cast(r2, 1, 0)
+r3 = addr_space_cast(r4, 7, 42)