From ecb34599bdadfb74ee22529ad150e7500dd22641 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 15:20:16 +0100 Subject: [X86] Add missing immediate qualifier to the (V)ROUND instructions (#87636) Makes it easier to algorithmically recreate the instruction name in various analysis scripts I'm working on --- llvm/lib/Target/X86/X86InstrInfo.cpp | 32 +++--- llvm/lib/Target/X86/X86InstrSSE.td | 116 ++++++++++----------- llvm/lib/Target/X86/X86SchedSapphireRapids.td | 14 +-- llvm/test/TableGen/x86-fold-tables.inc | 28 ++--- .../utils/TableGen/X86ManualCompressEVEXTables.def | 16 +-- 5 files changed, 103 insertions(+), 103 deletions(-) (limited to 'llvm') diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index f243343..a5b2e48 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6276,10 +6276,10 @@ static bool hasPartialRegUpdate(unsigned Opcode, const X86Subtarget &Subtarget, case X86::RCPSSm: case X86::RCPSSr_Int: case X86::RCPSSm_Int: - case X86::ROUNDSDr: - case X86::ROUNDSDm: - case X86::ROUNDSSr: - case X86::ROUNDSSm: + case X86::ROUNDSDri: + case X86::ROUNDSDmi: + case X86::ROUNDSSri: + case X86::ROUNDSSmi: case X86::RSQRTSSr: case X86::RSQRTSSm: case X86::RSQRTSSr_Int: @@ -6778,14 +6778,14 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum, case X86::VRCPSSr_Int: case X86::VRCPSSm: case X86::VRCPSSm_Int: - case X86::VROUNDSDr: - case X86::VROUNDSDm: - case X86::VROUNDSDr_Int: - case X86::VROUNDSDm_Int: - case X86::VROUNDSSr: - case X86::VROUNDSSm: - case X86::VROUNDSSr_Int: - case X86::VROUNDSSm_Int: + case X86::VROUNDSDri: + case X86::VROUNDSDmi: + case X86::VROUNDSDri_Int: + case X86::VROUNDSDmi_Int: + case X86::VROUNDSSri: + case X86::VROUNDSSmi: + case X86::VROUNDSSri_Int: + case X86::VROUNDSSmi_Int: case X86::VRSQRTSSr: case X86::VRSQRTSSr_Int: case X86::VRSQRTSSm: @@ -7516,8 +7516,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VRCPSSr_Int: case X86::RSQRTSSr_Int: case X86::VRSQRTSSr_Int: - case X86::ROUNDSSr_Int: - case X86::VROUNDSSr_Int: + case X86::ROUNDSSri_Int: + case X86::VROUNDSSri_Int: case X86::COMISSrr_Int: case X86::VCOMISSrr_Int: case X86::VCOMISSZrr_Int: @@ -7685,8 +7685,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VCVTSD2USI64Zrr_Int: case X86::VCVTTSD2USIZrr_Int: case X86::VCVTTSD2USI64Zrr_Int: - case X86::ROUNDSDr_Int: - case X86::VROUNDSDr_Int: + case X86::ROUNDSDri_Int: + case X86::VROUNDSDri_Int: case X86::COMISDrr_Int: case X86::VCOMISDrr_Int: case X86::VCOMISDZrr_Int: diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 69d4536..2b391b6 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5475,35 +5475,35 @@ multiclass sse41_fp_unop_p opc, string OpcodeStr, // Intrinsic operation, reg. // Vector intrinsic operation, reg let Uses = [MXCSR], mayRaiseFPException = 1 in { - def r : SS4AIi8, - Sched<[sched]>; + def ri : SS4AIi8, + Sched<[sched]>; // Vector intrinsic operation, mem - def m : SS4AIi8, - Sched<[sched.Folded]>; + def mi : SS4AIi8, + Sched<[sched.Folded]>; } } multiclass avx_fp_unop_rm opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { - def SSr : SS4AIi8, Sched<[sched]>; let mayLoad = 1 in - def SSm : SS4AIi8, Sched<[sched]>; let mayLoad = 1 in - def SDm : SS4AIi8 opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { - def SSr : SS4AIi8, Sched<[sched]>; + def SSri : SS4AIi8, Sched<[sched]>; let mayLoad = 1 in - def SSm : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; + def SSmi : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedSingle, hasSideEffects = 0 let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { - def SDr : SS4AIi8, Sched<[sched]>; + def SDri : SS4AIi8, Sched<[sched]>; let mayLoad = 1 in - def SDm : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; + def SDmi : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedDouble, hasSideEffects = 0 } } -multiclass sse41_fp_binop_s opcss, bits<8> opcsd, - string OpcodeStr, X86FoldableSchedWrite sched, - ValueType VT32, ValueType VT64, - SDNode OpNode, bit Is2Addr = 1> { +multiclass sse41_fp_unop_s_int opcss, bits<8> opcsd, + string OpcodeStr, X86FoldableSchedWrite sched, + ValueType VT32, ValueType VT64, + SDNode OpNode, bit Is2Addr = 1> { let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle in { - def SSr_Int : SS4AIi8, Sched<[sched]>; - def SSm_Int : SS4AIi8, Sched<[sched]>; - def SDm_Int : SS4AIi8, - VEX, VVVV, VEX_LIG, WIG, SIMD_EXC; + defm VROUND : sse41_fp_unop_s_int<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl, + v4f32, v2f64, X86RndScales, 0>, + VEX, VVVV, VEX_LIG, WIG, SIMD_EXC; defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>, VEX, VVVV, VEX_LIG, WIG, SIMD_EXC; } let Predicates = [UseAVX] in { def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), - (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; + (VROUNDSSri (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), - (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; + (VROUNDSDri (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; } let Predicates = [UseAVX, OptForSize] in { def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), - (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; + (VROUNDSSmi (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), - (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; + (VROUNDSDmi (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; } let ExeDomain = SSEPackedSingle in @@ -5667,21 +5667,21 @@ defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64, defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>; let Constraints = "$src1 = $dst" in -defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, - v4f32, v2f64, X86RndScales>; +defm ROUND : sse41_fp_unop_s_int<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, + v4f32, v2f64, X86RndScales>; let Predicates = [UseSSE41] in { def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), - (ROUNDSSr FR32:$src1, timm:$src2)>; + (ROUNDSSri FR32:$src1, timm:$src2)>; def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), - (ROUNDSDr FR64:$src1, timm:$src2)>; + (ROUNDSDri FR64:$src1, timm:$src2)>; } let Predicates = [UseSSE41, OptForSize] in { def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), - (ROUNDSSm addr:$src1, timm:$src2)>; + (ROUNDSSmi addr:$src1, timm:$src2)>; def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), - (ROUNDSDm addr:$src1, timm:$src2)>; + (ROUNDSDmi addr:$src1, timm:$src2)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td index 88bb9ad..ff3fe32 100644 --- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td +++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td @@ -2290,8 +2290,8 @@ def SPRWriteResGroup218 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { let Latency = 15; let NumMicroOps = 3; } -def : InstRW<[SPRWriteResGroup218], (instregex "^(V?)ROUNDP(D|S)m$")>; -def : InstRW<[SPRWriteResGroup218, ReadAfterVecXLd], (instregex "^(V?)ROUNDS(D|S)m((_Int)?)$", +def : InstRW<[SPRWriteResGroup218], (instregex "^(V?)ROUNDP(D|S)mi$")>; +def : InstRW<[SPRWriteResGroup218, ReadAfterVecXLd], (instregex "^(V?)ROUNDS(D|S)mi((_Int)?)$", "^VRNDSCALEP(D|S)Z128rm(bi|ik)$", "^VRNDSCALEP(D|S)Z128rmbik(z?)$", "^VRNDSCALEP(D|S)Z128rmi((kz)?)$", @@ -2303,13 +2303,13 @@ def SPRWriteResGroup219 : SchedWriteRes<[SPRPort00_01]> { let Latency = 8; let NumMicroOps = 2; } -def : InstRW<[SPRWriteResGroup219], (instregex "^(V?)ROUND(PD|SS)r$", - "^(V?)ROUND(PS|SD)r$", - "^(V?)ROUNDS(D|S)r_Int$", +def : InstRW<[SPRWriteResGroup219], (instregex "^(V?)ROUND(PD|SS)ri$", + "^(V?)ROUND(PS|SD)ri$", + "^(V?)ROUNDS(D|S)ri_Int$", "^VRNDSCALEP(D|S)Z(128|256)rri((k|kz)?)$", "^VRNDSCALES(D|S)Zr$", "^VRNDSCALES(D|S)Zr(b?)_Int((k|kz)?)$", - "^VROUNDP(D|S)Yr$")>; + "^VROUNDP(D|S)Yri$")>; def SPRWriteResGroup220 : SchedWriteRes<[SPRPort00_06]> { let ReleaseAtCycles = [2]; @@ -3737,7 +3737,7 @@ def SPRWriteResGroup390 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup390], (instregex "^VF(C?)MADDCPHZ(128|256)m(b?)$", - "^VROUNDP(D|S)Ym$")>; + "^VROUNDP(D|S)Ymi$")>; def : InstRW<[SPRWriteResGroup390, ReadAfterVecXLd], (instregex "^VF(C?)MADDCSHZm$", "^VF(C?)MULCPHZ128rm(b?)$", "^VF(C?)MULCSHZrm$", diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 4ab5567..493350d 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -984,10 +984,10 @@ static const X86FoldTableEntry Table1[] = { {X86::RORX32ri_EVEX, X86::RORX32mi_EVEX, 0}, {X86::RORX64ri, X86::RORX64mi, 0}, {X86::RORX64ri_EVEX, X86::RORX64mi_EVEX, 0}, - {X86::ROUNDPDr, X86::ROUNDPDm, TB_ALIGN_16}, - {X86::ROUNDPSr, X86::ROUNDPSm, TB_ALIGN_16}, - {X86::ROUNDSDr, X86::ROUNDSDm, 0}, - {X86::ROUNDSSr, X86::ROUNDSSm, 0}, + {X86::ROUNDPDri, X86::ROUNDPDmi, TB_ALIGN_16}, + {X86::ROUNDPSri, X86::ROUNDPSmi, TB_ALIGN_16}, + {X86::ROUNDSDri, X86::ROUNDSDmi, 0}, + {X86::ROUNDSSri, X86::ROUNDSSmi, 0}, {X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16}, {X86::RSQRTSSr, X86::RSQRTSSm, 0}, {X86::SAR16r1_ND, X86::SAR16m1_ND, 0}, @@ -1791,10 +1791,10 @@ static const X86FoldTableEntry Table1[] = { {X86::VRNDSCALEPSZ128rri, X86::VRNDSCALEPSZ128rmi, 0}, {X86::VRNDSCALEPSZ256rri, X86::VRNDSCALEPSZ256rmi, 0}, {X86::VRNDSCALEPSZrri, X86::VRNDSCALEPSZrmi, 0}, - {X86::VROUNDPDYr, X86::VROUNDPDYm, 0}, - {X86::VROUNDPDr, X86::VROUNDPDm, 0}, - {X86::VROUNDPSYr, X86::VROUNDPSYm, 0}, - {X86::VROUNDPSr, X86::VROUNDPSm, 0}, + {X86::VROUNDPDYri, X86::VROUNDPDYmi, 0}, + {X86::VROUNDPDri, X86::VROUNDPDmi, 0}, + {X86::VROUNDPSYri, X86::VROUNDPSYmi, 0}, + {X86::VROUNDPSri, X86::VROUNDPSmi, 0}, {X86::VRSQRT14PDZ128r, X86::VRSQRT14PDZ128m, 0}, {X86::VRSQRT14PDZ256r, X86::VRSQRT14PDZ256m, 0}, {X86::VRSQRT14PDZr, X86::VRSQRT14PDZm, 0}, @@ -2234,8 +2234,8 @@ static const X86FoldTableEntry Table2[] = { {X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16}, {X86::PXORrr, X86::PXORrm, TB_ALIGN_16}, {X86::RCPSSr_Int, X86::RCPSSm_Int, TB_NO_REVERSE}, - {X86::ROUNDSDr_Int, X86::ROUNDSDm_Int, TB_NO_REVERSE}, - {X86::ROUNDSSr_Int, X86::ROUNDSSm_Int, TB_NO_REVERSE}, + {X86::ROUNDSDri_Int, X86::ROUNDSDmi_Int, TB_NO_REVERSE}, + {X86::ROUNDSSri_Int, X86::ROUNDSSmi_Int, TB_NO_REVERSE}, {X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, TB_NO_REVERSE}, {X86::SBB16rr, X86::SBB16rm, 0}, {X86::SBB16rr_ND, X86::SBB16rm_ND, 0}, @@ -3778,10 +3778,10 @@ static const X86FoldTableEntry Table2[] = { {X86::VRNDSCALESHZr_Int, X86::VRNDSCALESHZm_Int, TB_NO_REVERSE}, {X86::VRNDSCALESSZr, X86::VRNDSCALESSZm, 0}, {X86::VRNDSCALESSZr_Int, X86::VRNDSCALESSZm_Int, TB_NO_REVERSE}, - {X86::VROUNDSDr, X86::VROUNDSDm, 0}, - {X86::VROUNDSDr_Int, X86::VROUNDSDm_Int, TB_NO_REVERSE}, - {X86::VROUNDSSr, X86::VROUNDSSm, 0}, - {X86::VROUNDSSr_Int, X86::VROUNDSSm_Int, TB_NO_REVERSE}, + {X86::VROUNDSDri, X86::VROUNDSDmi, 0}, + {X86::VROUNDSDri_Int, X86::VROUNDSDmi_Int, TB_NO_REVERSE}, + {X86::VROUNDSSri, X86::VROUNDSSmi, 0}, + {X86::VROUNDSSri_Int, X86::VROUNDSSmi_Int, TB_NO_REVERSE}, {X86::VRSQRT14PDZ128rkz, X86::VRSQRT14PDZ128mkz, 0}, {X86::VRSQRT14PDZ256rkz, X86::VRSQRT14PDZ256mkz, 0}, {X86::VRSQRT14PDZrkz, X86::VRSQRT14PDZmkz, 0}, diff --git a/llvm/utils/TableGen/X86ManualCompressEVEXTables.def b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def index 77cf65b..665a394 100644 --- a/llvm/utils/TableGen/X86ManualCompressEVEXTables.def +++ b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def @@ -197,12 +197,12 @@ ENTRY(VPUNPCKLQDQZ128rm, VPUNPCKLQDQrm) ENTRY(VPUNPCKLQDQZ128rr, VPUNPCKLQDQrr) ENTRY(VPXORQZ128rm, VPXORrm) ENTRY(VPXORQZ128rr, VPXORrr) -ENTRY(VRNDSCALEPDZ128rmi, VROUNDPDm) -ENTRY(VRNDSCALEPDZ128rri, VROUNDPDr) -ENTRY(VRNDSCALESDZm, VROUNDSDm) -ENTRY(VRNDSCALESDZm_Int, VROUNDSDm_Int) -ENTRY(VRNDSCALESDZr, VROUNDSDr) -ENTRY(VRNDSCALESDZr_Int, VROUNDSDr_Int) +ENTRY(VRNDSCALEPDZ128rmi, VROUNDPDmi) +ENTRY(VRNDSCALEPDZ128rri, VROUNDPDri) +ENTRY(VRNDSCALESDZm, VROUNDSDmi) +ENTRY(VRNDSCALESDZm_Int, VROUNDSDmi_Int) +ENTRY(VRNDSCALESDZr, VROUNDSDri) +ENTRY(VRNDSCALESDZr_Int, VROUNDSDri_Int) ENTRY(VSHUFPDZ128rmi, VSHUFPDrmi) ENTRY(VSHUFPDZ128rri, VSHUFPDrri) ENTRY(VSQRTPDZ128m, VSQRTPDm) @@ -306,8 +306,8 @@ ENTRY(VPUNPCKLQDQZ256rm, VPUNPCKLQDQYrm) ENTRY(VPUNPCKLQDQZ256rr, VPUNPCKLQDQYrr) ENTRY(VPXORQZ256rm, VPXORYrm) ENTRY(VPXORQZ256rr, VPXORYrr) -ENTRY(VRNDSCALEPDZ256rmi, VROUNDPDYm) -ENTRY(VRNDSCALEPDZ256rri, VROUNDPDYr) +ENTRY(VRNDSCALEPDZ256rmi, VROUNDPDYmi) +ENTRY(VRNDSCALEPDZ256rri, VROUNDPDYri) ENTRY(VSHUFPDZ256rmi, VSHUFPDYrmi) ENTRY(VSHUFPDZ256rri, VSHUFPDYrri) ENTRY(VSQRTPDZ256m, VSQRTPDYm) -- cgit v1.1