aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Henrique Barboza <dbarboza@ventanamicro.com>2025-08-06 13:08:25 -0300
committerGitHub <noreply@github.com>2025-08-06 09:08:25 -0700
commit8e57689c34f0b0af70f9aaf009c3be0e85d90dda (patch)
tree5c5aa7160f367ba997c11eb86944ee969818baed
parent3686e5b52f2a02c1c19050479d1dd0fd9d1dd4f8 (diff)
downloadllvm-8e57689c34f0b0af70f9aaf009c3be0e85d90dda.zip
llvm-8e57689c34f0b0af70f9aaf009c3be0e85d90dda.tar.gz
llvm-8e57689c34f0b0af70f9aaf009c3be0e85d90dda.tar.bz2
[RISCV] add load/store misched/PostRA subtarget features (#149409)
Some processors benefit more from store clustering than load clustering, and vice-versa, depending on factors that are exclusive to each one (e.g. macrofusions implemented). Likewise, certain optimizations benefits more from misched clustering than postRA clustering. Macrofusions are again an example: in a processor with store pair macrofusions, like the veyron-v1, it is observed that misched clustering increases the amount of macrofusions more than postRA clustering. This of course isn't necessarily true for other processors, but it shows that processors can benefit from a more fine grained control of clustering mutations, and each one is able to do it differently. Add 4 new subtarget features that deprecates the existing riscv-misched-load-store-clustering and riscv-postmisched-load-store-clustering options: - disable-misched-load-clustering and disable-misched-store-clustering: disable load/store clustering during misched; - disable-postmisched-load-clustering and disable-postmisched-store-clustering: disable load/store clustering during PostRA. Note that the new subtarget features disables specific stages of the default clustering settings. The default per se (load and store clustering for both misched and PostRA) is left untouched. Disable all clustering but misched-store-clustering for the veyron-v1 processor using the new features.
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td12
-rw-r--r--llvm/lib/Target/RISCV/RISCVProcessors.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp25
-rw-r--r--llvm/test/CodeGen/RISCV/features-info.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/misched-load-clustering.ll47
-rw-r--r--llvm/test/CodeGen/RISCV/misched-mem-clustering.mir6
-rw-r--r--llvm/test/CodeGen/RISCV/misched-store-clustering.ll83
7 files changed, 160 insertions, 20 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 171940e..a7329d2 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1700,6 +1700,18 @@ def TuneNLogNVRGather
def TunePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;
+def TuneDisableMISchedLoadClustering : SubtargetFeature<"disable-misched-load-clustering",
+ "EnableMISchedLoadClustering", "false", "Disable load clustering in the machine scheduler">;
+
+def TuneDisableMISchedStoreClustering : SubtargetFeature<"disable-misched-store-clustering",
+ "EnableMISchedStoreClustering", "false", "Disable store clustering in the machine scheduler">;
+
+def TuneDisablePostMISchedLoadClustering : SubtargetFeature<"disable-postmisched-load-clustering",
+ "EnablePostMISchedLoadClustering", "false", "Disable PostRA load clustering in the machine scheduler">;
+
+def TuneDisablePostMISchedStoreClustering : SubtargetFeature<"disable-postmisched-store-clustering",
+ "EnablePostMISchedStoreClustering", "false", "Disable PostRA store clustering in the machine scheduler">;
+
def TuneDisableLatencySchedHeuristic
: SubtargetFeature<"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 838edf6..8445730 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -590,6 +590,9 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
FeatureStdExtZicboz,
FeatureVendorXVentanaCondOps],
[TuneVentanaVeyron,
+ TuneDisableMISchedLoadClustering,
+ TuneDisablePostMISchedLoadClustering,
+ TuneDisablePostMISchedStoreClustering,
TuneLUIADDIFusion,
TuneAUIPCADDIFusion,
TuneZExtHFusion,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 3f2a83f..66ce134 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -94,16 +94,6 @@ static cl::opt<bool>
cl::desc("Enable the loop data prefetch pass"),
cl::init(true));
-static cl::opt<bool> EnableMISchedLoadStoreClustering(
- "riscv-misched-load-store-clustering", cl::Hidden,
- cl::desc("Enable load and store clustering in the machine scheduler"),
- cl::init(true));
-
-static cl::opt<bool> EnablePostMISchedLoadStoreClustering(
- "riscv-postmisched-load-store-clustering", cl::Hidden,
- cl::desc("Enable PostRA load and store clustering in the machine scheduler"),
- cl::init(true));
-
static cl::opt<bool> DisableVectorMaskMutation(
"riscv-disable-vector-mask-mutation",
cl::desc("Disable the vector mask scheduling mutation"), cl::init(false),
@@ -294,15 +284,17 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
ScheduleDAGInstrs *
RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
+ const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
ScheduleDAGMILive *DAG = createSchedLive(C);
- if (EnableMISchedLoadStoreClustering) {
+
+ if (ST.enableMISchedLoadClustering())
DAG->addMutation(createLoadClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+
+ if (ST.enableMISchedStoreClustering())
DAG->addMutation(createStoreClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
- }
- const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
if (!DisableVectorMaskMutation && ST.hasVInstructions())
DAG->addMutation(createRISCVVectorMaskDAGMutation(DAG->TRI));
@@ -311,13 +303,16 @@ RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
ScheduleDAGInstrs *
RISCVTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
+ const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
ScheduleDAGMI *DAG = createSchedPostRA(C);
- if (EnablePostMISchedLoadStoreClustering) {
+
+ if (ST.enablePostMISchedLoadClustering())
DAG->addMutation(createLoadClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+
+ if (ST.enablePostMISchedStoreClustering())
DAG->addMutation(createStoreClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
- }
return DAG;
}
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index b94665b..a5ee412 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -13,6 +13,10 @@
; CHECK-NEXT: conditional-cmv-fusion - Enable branch+c.mv fusion.
; CHECK-NEXT: d - 'D' (Double-Precision Floating-Point).
; CHECK-NEXT: disable-latency-sched-heuristic - Disable latency scheduling heuristic.
+; CHECK-NEXT: disable-misched-load-clustering - Disable load clustering in the machine scheduler.
+; CHECK-NEXT: disable-misched-store-clustering - Disable store clustering in the machine scheduler.
+; CHECK-NEXT: disable-postmisched-load-clustering - Disable PostRA load clustering in the machine scheduler.
+; CHECK-NEXT: disable-postmisched-store-clustering - Disable PostRA store clustering in the machine scheduler.
; CHECK-NEXT: dlen-factor-2 - Vector unit DLEN(data path width) is half of VLEN.
; CHECK-NEXT: e - 'E' (Embedded Instruction Set with 16 GPRs).
; CHECK-NEXT: exact-asm - Enable Exact Assembly (Disables Compression and Relaxation).
diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
index 160f0ae..abdc1ba 100644
--- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
+++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
@@ -1,17 +1,42 @@
; REQUIRES: asserts
-; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-store-clustering=false \
+;
+; Disable all misched clustering
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
-; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-store-clustering=false \
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
+;
+; ST misched clustering only
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=STCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=STCLUSTER %s
+;
+; LD misched clustering only
; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -mattr=+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -mattr=+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
-
+;
+; Default misched cluster settings (i.e. both LD and ST clustering)
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
define i32 @load_clustering_1(ptr nocapture %p) {
; NOCLUSTER: ********** MI Scheduling **********
@@ -22,6 +47,14 @@ define i32 @load_clustering_1(ptr nocapture %p) {
; NOCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
; NOCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
;
+; STCLUSTER: ********** MI Scheduling **********
+; STCLUSTER-LABEL: load_clustering_1:%bb.0
+; STCLUSTER: *** Final schedule for %bb.0 ***
+; STCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
+; STCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
+; STCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
+; STCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
+;
; LDCLUSTER: ********** MI Scheduling **********
; LDCLUSTER-LABEL: load_clustering_1:%bb.0
; LDCLUSTER: *** Final schedule for %bb.0 ***
@@ -29,6 +62,14 @@ define i32 @load_clustering_1(ptr nocapture %p) {
; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
+;
+; DEFAULTCLUSTER: ********** MI Scheduling **********
+; DEFAULTCLUSTER-LABEL: load_clustering_1:%bb.0
+; DEFAULTCLUSTER: *** Final schedule for %bb.0 ***
+; DEFAULTCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
+; DEFAULTCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
+; DEFAULTCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
+; DEFAULTCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
entry:
%arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3
%val0 = load i32, ptr %arrayidx0
diff --git a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
index 21398d3..01960f9 100644
--- a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
+++ b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
@@ -1,10 +1,12 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -verify-misched -enable-post-misched=false \
-# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
+# RUN: -mattr=+disable-postmisched-load-clustering \
+# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \
# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=NOPOSTMISCHED %s
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
-# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
+# RUN: -mattr=+disable-postmisched-load-clustering \
+# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \
# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=NOCLUSTER %s
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
diff --git a/llvm/test/CodeGen/RISCV/misched-store-clustering.ll b/llvm/test/CodeGen/RISCV/misched-store-clustering.ll
new file mode 100644
index 0000000..02e853d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/misched-store-clustering.ll
@@ -0,0 +1,83 @@
+; REQUIRES: asserts
+;
+; Disable all misched clustering
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=NOCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=NOCLUSTER %s
+;
+; ST misched clustering only
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=STCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=STCLUSTER %s
+;
+; LD misched clustering only
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -mattr=+disable-misched-store-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=LDCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -mattr=+disable-misched-store-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=LDCLUSTER %s
+;
+; Default misched cluster settings (i.e. both LD and ST clustering)
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
+
+define i32 @store_clustering_1(ptr nocapture %p, i32 %v) {
+; NOCLUSTER: ********** MI Scheduling **********
+; NOCLUSTER-LABEL: store_clustering_1:%bb.0
+; NOCLUSTER: *** Final schedule for %bb.0 ***
+; NOCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
+; NOCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
+; NOCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
+; NOCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
+;
+; STCLUSTER: ********** MI Scheduling **********
+; STCLUSTER-LABEL: store_clustering_1:%bb.0
+; STCLUSTER: *** Final schedule for %bb.0 ***
+; STCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
+; STCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
+; STCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
+; STCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
+;
+; LDCLUSTER: ********** MI Scheduling **********
+; LDCLUSTER-LABEL: store_clustering_1:%bb.0
+; LDCLUSTER: *** Final schedule for %bb.0 ***
+; LDCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
+; LDCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
+; LDCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
+; LDCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
+;
+; DEFAULTCLUSTER: ********** MI Scheduling **********
+; DEFAULTCLUSTER-LABEL: store_clustering_1:%bb.0
+; DEFAULTCLUSTER: *** Final schedule for %bb.0 ***
+; DEFAULTCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
+; DEFAULTCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
+; DEFAULTCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
+; DEFAULTCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
+entry:
+ %arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3
+ store i32 %v, ptr %arrayidx0
+ %arrayidx1 = getelementptr inbounds i32, ptr %p, i32 2
+ store i32 %v, ptr %arrayidx1
+ %arrayidx2 = getelementptr inbounds i32, ptr %p, i32 1
+ store i32 %v, ptr %arrayidx2
+ %arrayidx3 = getelementptr inbounds i32, ptr %p, i32 4
+ store i32 %v, ptr %arrayidx3
+ ret i32 %v
+}