diff options
Diffstat (limited to 'llvm/lib/Target')
20 files changed, 346 insertions, 173 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td index 65b752e..9438917 100644 --- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td +++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td @@ -816,8 +816,8 @@ def : BTI<"jc", 0b110>; // TLBI (translation lookaside buffer invalidate) instruction options. //===----------------------------------------------------------------------===// -class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, - bits<3> op2, bit needsreg> { +class TLBICommon<string name, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2, bit needsreg> { string Name = name; bits<14> Encoding; let Encoding{13-11} = op1; @@ -830,131 +830,150 @@ class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }]; } -def TLBITable : GenericTable { - let FilterClass = "TLBIEntry"; - let CppTypeName = "TLBI"; - let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"]; - - let PrimaryKey = ["Encoding"]; - let PrimaryKeyName = "lookupTLBIByEncoding"; +class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2, bit needsreg> + : TLBICommon<name, op1, crn, crm, op2, needsreg>; + +class TLBIPEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2, bit needsreg> + : TLBICommon<name, op1, crn, crm, op2, needsreg>; + +multiclass TLBITableBase { + def NAME # Table : GenericTable { + let FilterClass = NAME # "Entry"; + let CppTypeName = NAME; + let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"]; + let PrimaryKey = ["Encoding"]; + let PrimaryKeyName = "lookup" # NAME # "ByEncoding"; + } + def lookup # NAME # ByName : SearchIndex { + let Table = !cast<GenericTable>(NAME # "Table"); + let Key = ["Name"]; + } } -def lookupTLBIByName : SearchIndex { - let Table = TLBITable; - let Key = ["Name"]; -} +defm TLBI : TLBITableBase; +defm TLBIP : TLBITableBase; -multiclass TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm, +multiclass TLBI<string name, bit hasTLBIP, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2, bit needsreg = 1> { def : TLBIEntry<name, op1, crn, crm, op2, needsreg>; def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> { let Encoding{7} = 1; let ExtraRequires = ["AArch64::FeatureXS"]; } + if !eq(hasTLBIP, true) then { + def : TLBIPEntry<name, op1, crn, crm, op2, needsreg>; + def : TLBIPEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> { + let Encoding{7} = 1; + let ExtraRequires = ["AArch64::FeatureXS"]; + } + } } -defm : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>; -defm : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>; -defm : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>; -defm : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>; -defm : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>; -defm : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>; -defm : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>; -defm : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>; -defm : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>; -defm : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>; -defm : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>; -defm : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>; -defm : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>; -defm : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>; -defm : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>; -defm : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>; -defm : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>; -defm : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>; -defm : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>; -defm : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>; -defm : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>; -defm : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>; -defm : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>; -defm : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>; -defm : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>; -defm : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>; +// hasTLBIP op1 CRn CRm op2 needsreg +defm : TLBI<"IPAS2E1IS", 1, 0b100, 0b1000, 0b0000, 0b001>; +defm : TLBI<"IPAS2LE1IS", 1, 0b100, 0b1000, 0b0000, 0b101>; +defm : TLBI<"VMALLE1IS", 0, 0b000, 0b1000, 0b0011, 0b000, 0>; +defm : TLBI<"ALLE2IS", 0, 0b100, 0b1000, 0b0011, 0b000, 0>; +defm : TLBI<"ALLE3IS", 0, 0b110, 0b1000, 0b0011, 0b000, 0>; +defm : TLBI<"VAE1IS", 1, 0b000, 0b1000, 0b0011, 0b001>; +defm : TLBI<"VAE2IS", 1, 0b100, 0b1000, 0b0011, 0b001>; +defm : TLBI<"VAE3IS", 1, 0b110, 0b1000, 0b0011, 0b001>; +defm : TLBI<"ASIDE1IS", 0, 0b000, 0b1000, 0b0011, 0b010>; +defm : TLBI<"VAAE1IS", 1, 0b000, 0b1000, 0b0011, 0b011>; +defm : TLBI<"ALLE1IS", 0, 0b100, 0b1000, 0b0011, 0b100, 0>; +defm : TLBI<"VALE1IS", 1, 0b000, 0b1000, 0b0011, 0b101>; +defm : TLBI<"VALE2IS", 1, 0b100, 0b1000, 0b0011, 0b101>; +defm : TLBI<"VALE3IS", 1, 0b110, 0b1000, 0b0011, 0b101>; +defm : TLBI<"VMALLS12E1IS", 0, 0b100, 0b1000, 0b0011, 0b110, 0>; +defm : TLBI<"VAALE1IS", 1, 0b000, 0b1000, 0b0011, 0b111>; +defm : TLBI<"IPAS2E1", 1, 0b100, 0b1000, 0b0100, 0b001>; +defm : TLBI<"IPAS2LE1", 1, 0b100, 0b1000, 0b0100, 0b101>; +defm : TLBI<"VMALLE1", 0, 0b000, 0b1000, 0b0111, 0b000, 0>; +defm : TLBI<"ALLE2", 0, 0b100, 0b1000, 0b0111, 0b000, 0>; +defm : TLBI<"ALLE3", 0, 0b110, 0b1000, 0b0111, 0b000, 0>; +defm : TLBI<"VAE1", 1, 0b000, 0b1000, 0b0111, 0b001>; +defm : TLBI<"VAE2", 1, 0b100, 0b1000, 0b0111, 0b001>; +defm : TLBI<"VAE3", 1, 0b110, 0b1000, 0b0111, 0b001>; +defm : TLBI<"ASIDE1", 0, 0b000, 0b1000, 0b0111, 0b010>; +defm : TLBI<"VAAE1", 1, 0b000, 0b1000, 0b0111, 0b011>; +defm : TLBI<"ALLE1", 0, 0b100, 0b1000, 0b0111, 0b100, 0>; +defm : TLBI<"VALE1", 1, 0b000, 0b1000, 0b0111, 0b101>; +defm : TLBI<"VALE2", 1, 0b100, 0b1000, 0b0111, 0b101>; +defm : TLBI<"VALE3", 1, 0b110, 0b1000, 0b0111, 0b101>; +defm : TLBI<"VMALLS12E1", 0, 0b100, 0b1000, 0b0111, 0b110, 0>; +defm : TLBI<"VAALE1", 1, 0b000, 0b1000, 0b0111, 0b111>; // Armv8.4-A Translation Lookaside Buffer Instructions (TLBI) let Requires = ["AArch64::FeatureTLB_RMI"] in { // Armv8.4-A Outer Sharable TLB Maintenance instructions: -// op1 CRn CRm op2 -defm : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>; -defm : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>; -defm : TLBI<"ASIDE1OS", 0b000, 0b1000, 0b0001, 0b010>; -defm : TLBI<"VAAE1OS", 0b000, 0b1000, 0b0001, 0b011>; -defm : TLBI<"VALE1OS", 0b000, 0b1000, 0b0001, 0b101>; -defm : TLBI<"VAALE1OS", 0b000, 0b1000, 0b0001, 0b111>; -defm : TLBI<"IPAS2E1OS", 0b100, 0b1000, 0b0100, 0b000>; -defm : TLBI<"IPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b100>; -defm : TLBI<"VAE2OS", 0b100, 0b1000, 0b0001, 0b001>; -defm : TLBI<"VALE2OS", 0b100, 0b1000, 0b0001, 0b101>; -defm : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>; -defm : TLBI<"VAE3OS", 0b110, 0b1000, 0b0001, 0b001>; -defm : TLBI<"VALE3OS", 0b110, 0b1000, 0b0001, 0b101>; -defm : TLBI<"ALLE2OS", 0b100, 0b1000, 0b0001, 0b000, 0>; -defm : TLBI<"ALLE1OS", 0b100, 0b1000, 0b0001, 0b100, 0>; -defm : TLBI<"ALLE3OS", 0b110, 0b1000, 0b0001, 0b000, 0>; +// hasTLBIP op1 CRn CRm op2 needsreg +defm : TLBI<"VMALLE1OS", 0, 0b000, 0b1000, 0b0001, 0b000, 0>; +defm : TLBI<"VAE1OS", 1, 0b000, 0b1000, 0b0001, 0b001>; +defm : TLBI<"ASIDE1OS", 0, 0b000, 0b1000, 0b0001, 0b010>; +defm : TLBI<"VAAE1OS", 1, 0b000, 0b1000, 0b0001, 0b011>; +defm : TLBI<"VALE1OS", 1, 0b000, 0b1000, 0b0001, 0b101>; +defm : TLBI<"VAALE1OS", 1, 0b000, 0b1000, 0b0001, 0b111>; +defm : TLBI<"IPAS2E1OS", 1, 0b100, 0b1000, 0b0100, 0b000>; +defm : TLBI<"IPAS2LE1OS", 1, 0b100, 0b1000, 0b0100, 0b100>; +defm : TLBI<"VAE2OS", 1, 0b100, 0b1000, 0b0001, 0b001>; +defm : TLBI<"VALE2OS", 1, 0b100, 0b1000, 0b0001, 0b101>; +defm : TLBI<"VMALLS12E1OS", 0, 0b100, 0b1000, 0b0001, 0b110, 0>; +defm : TLBI<"VAE3OS", 1, 0b110, 0b1000, 0b0001, 0b001>; +defm : TLBI<"VALE3OS", 1, 0b110, 0b1000, 0b0001, 0b101>; +defm : TLBI<"ALLE2OS", 0, 0b100, 0b1000, 0b0001, 0b000, 0>; +defm : TLBI<"ALLE1OS", 0, 0b100, 0b1000, 0b0001, 0b100, 0>; +defm : TLBI<"ALLE3OS", 0, 0b110, 0b1000, 0b0001, 0b000, 0>; // Armv8.4-A TLB Range Maintenance instructions: -// op1 CRn CRm op2 -defm : TLBI<"RVAE1", 0b000, 0b1000, 0b0110, 0b001>; -defm : TLBI<"RVAAE1", 0b000, 0b1000, 0b0110, 0b011>; -defm : TLBI<"RVALE1", 0b000, 0b1000, 0b0110, 0b101>; -defm : TLBI<"RVAALE1", 0b000, 0b1000, 0b0110, 0b111>; -defm : TLBI<"RVAE1IS", 0b000, 0b1000, 0b0010, 0b001>; -defm : TLBI<"RVAAE1IS", 0b000, 0b1000, 0b0010, 0b011>; -defm : TLBI<"RVALE1IS", 0b000, 0b1000, 0b0010, 0b101>; -defm : TLBI<"RVAALE1IS", 0b000, 0b1000, 0b0010, 0b111>; -defm : TLBI<"RVAE1OS", 0b000, 0b1000, 0b0101, 0b001>; -defm : TLBI<"RVAAE1OS", 0b000, 0b1000, 0b0101, 0b011>; -defm : TLBI<"RVALE1OS", 0b000, 0b1000, 0b0101, 0b101>; -defm : TLBI<"RVAALE1OS", 0b000, 0b1000, 0b0101, 0b111>; -defm : TLBI<"RIPAS2E1IS", 0b100, 0b1000, 0b0000, 0b010>; -defm : TLBI<"RIPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b110>; -defm : TLBI<"RIPAS2E1", 0b100, 0b1000, 0b0100, 0b010>; -defm : TLBI<"RIPAS2LE1", 0b100, 0b1000, 0b0100, 0b110>; -defm : TLBI<"RIPAS2E1OS", 0b100, 0b1000, 0b0100, 0b011>; -defm : TLBI<"RIPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b111>; -defm : TLBI<"RVAE2", 0b100, 0b1000, 0b0110, 0b001>; -defm : TLBI<"RVALE2", 0b100, 0b1000, 0b0110, 0b101>; -defm : TLBI<"RVAE2IS", 0b100, 0b1000, 0b0010, 0b001>; -defm : TLBI<"RVALE2IS", 0b100, 0b1000, 0b0010, 0b101>; -defm : TLBI<"RVAE2OS", 0b100, 0b1000, 0b0101, 0b001>; -defm : TLBI<"RVALE2OS", 0b100, 0b1000, 0b0101, 0b101>; -defm : TLBI<"RVAE3", 0b110, 0b1000, 0b0110, 0b001>; -defm : TLBI<"RVALE3", 0b110, 0b1000, 0b0110, 0b101>; -defm : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>; -defm : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>; -defm : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>; -defm : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>; +// hasTLBIP op1 CRn CRm op2 needsreg +defm : TLBI<"RVAE1", 1, 0b000, 0b1000, 0b0110, 0b001>; +defm : TLBI<"RVAAE1", 1, 0b000, 0b1000, 0b0110, 0b011>; +defm : TLBI<"RVALE1", 1, 0b000, 0b1000, 0b0110, 0b101>; +defm : TLBI<"RVAALE1", 1, 0b000, 0b1000, 0b0110, 0b111>; +defm : TLBI<"RVAE1IS", 1, 0b000, 0b1000, 0b0010, 0b001>; +defm : TLBI<"RVAAE1IS", 1, 0b000, 0b1000, 0b0010, 0b011>; +defm : TLBI<"RVALE1IS", 1, 0b000, 0b1000, 0b0010, 0b101>; +defm : TLBI<"RVAALE1IS", 1, 0b000, 0b1000, 0b0010, 0b111>; +defm : TLBI<"RVAE1OS", 1, 0b000, 0b1000, 0b0101, 0b001>; +defm : TLBI<"RVAAE1OS", 1, 0b000, 0b1000, 0b0101, 0b011>; +defm : TLBI<"RVALE1OS", 1, 0b000, 0b1000, 0b0101, 0b101>; +defm : TLBI<"RVAALE1OS", 1, 0b000, 0b1000, 0b0101, 0b111>; +defm : TLBI<"RIPAS2E1IS", 1, 0b100, 0b1000, 0b0000, 0b010>; +defm : TLBI<"RIPAS2LE1IS", 1, 0b100, 0b1000, 0b0000, 0b110>; +defm : TLBI<"RIPAS2E1", 1, 0b100, 0b1000, 0b0100, 0b010>; +defm : TLBI<"RIPAS2LE1", 1, 0b100, 0b1000, 0b0100, 0b110>; +defm : TLBI<"RIPAS2E1OS", 1, 0b100, 0b1000, 0b0100, 0b011>; +defm : TLBI<"RIPAS2LE1OS", 1, 0b100, 0b1000, 0b0100, 0b111>; +defm : TLBI<"RVAE2", 1, 0b100, 0b1000, 0b0110, 0b001>; +defm : TLBI<"RVALE2", 1, 0b100, 0b1000, 0b0110, 0b101>; +defm : TLBI<"RVAE2IS", 1, 0b100, 0b1000, 0b0010, 0b001>; +defm : TLBI<"RVALE2IS", 1, 0b100, 0b1000, 0b0010, 0b101>; +defm : TLBI<"RVAE2OS", 1, 0b100, 0b1000, 0b0101, 0b001>; +defm : TLBI<"RVALE2OS", 1, 0b100, 0b1000, 0b0101, 0b101>; +defm : TLBI<"RVAE3", 1, 0b110, 0b1000, 0b0110, 0b001>; +defm : TLBI<"RVALE3", 1, 0b110, 0b1000, 0b0110, 0b101>; +defm : TLBI<"RVAE3IS", 1, 0b110, 0b1000, 0b0010, 0b001>; +defm : TLBI<"RVALE3IS", 1, 0b110, 0b1000, 0b0010, 0b101>; +defm : TLBI<"RVAE3OS", 1, 0b110, 0b1000, 0b0101, 0b001>; +defm : TLBI<"RVALE3OS", 1, 0b110, 0b1000, 0b0101, 0b101>; } //FeatureTLB_RMI // Armv9-A Realm Management Extension TLBI Instructions let Requires = ["AArch64::FeatureRME"] in { -defm : TLBI<"RPAOS", 0b110, 0b1000, 0b0100, 0b011>; -defm : TLBI<"RPALOS", 0b110, 0b1000, 0b0100, 0b111>; -defm : TLBI<"PAALLOS", 0b110, 0b1000, 0b0001, 0b100, 0>; -defm : TLBI<"PAALL", 0b110, 0b1000, 0b0111, 0b100, 0>; +defm : TLBI<"RPAOS", 0, 0b110, 0b1000, 0b0100, 0b011>; +defm : TLBI<"RPALOS", 0, 0b110, 0b1000, 0b0100, 0b111>; +defm : TLBI<"PAALLOS", 0, 0b110, 0b1000, 0b0001, 0b100, 0>; +defm : TLBI<"PAALL", 0, 0b110, 0b1000, 0b0111, 0b100, 0>; } // Armv9.5-A TLBI VMALL for Dirty State let Requires = ["AArch64::FeatureTLBIW"] in { -// op1, CRn, CRm, op2, needsreg -defm : TLBI<"VMALLWS2E1", 0b100, 0b1000, 0b0110, 0b010, 0>; -defm : TLBI<"VMALLWS2E1IS", 0b100, 0b1000, 0b0010, 0b010, 0>; -defm : TLBI<"VMALLWS2E1OS", 0b100, 0b1000, 0b0101, 0b010, 0>; +// op1, CRn, CRm, op2, needsreg +defm : TLBI<"VMALLWS2E1", 0, 0b100, 0b1000, 0b0110, 0b010, 0>; +defm : TLBI<"VMALLWS2E1IS", 0, 0b100, 0b1000, 0b0010, 0b010, 0>; +defm : TLBI<"VMALLWS2E1OS", 0, 0b100, 0b1000, 0b0101, 0b010, 0>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 3641e22..2c3870c 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -4020,23 +4020,23 @@ bool AArch64AsmParser::parseSyspAlias(StringRef Name, SMLoc NameLoc, if (HasnXSQualifier) { Op = Op.drop_back(3); } - const AArch64TLBI::TLBI *TLBIorig = AArch64TLBI::lookupTLBIByName(Op); - if (!TLBIorig) + const AArch64TLBIP::TLBIP *TLBIPorig = AArch64TLBIP::lookupTLBIPByName(Op); + if (!TLBIPorig) return TokError("invalid operand for TLBIP instruction"); - const AArch64TLBI::TLBI TLBI( - TLBIorig->Name, TLBIorig->Encoding | (HasnXSQualifier ? (1 << 7) : 0), - TLBIorig->NeedsReg, + const AArch64TLBIP::TLBIP TLBIP( + TLBIPorig->Name, TLBIPorig->Encoding | (HasnXSQualifier ? (1 << 7) : 0), + TLBIPorig->NeedsReg, HasnXSQualifier - ? TLBIorig->FeaturesRequired | FeatureBitset({AArch64::FeatureXS}) - : TLBIorig->FeaturesRequired); - if (!TLBI.haveFeatures(getSTI().getFeatureBits())) { + ? TLBIPorig->FeaturesRequired | FeatureBitset({AArch64::FeatureXS}) + : TLBIPorig->FeaturesRequired); + if (!TLBIP.haveFeatures(getSTI().getFeatureBits())) { std::string Name = - std::string(TLBI.Name) + (HasnXSQualifier ? "nXS" : ""); + std::string(TLBIP.Name) + (HasnXSQualifier ? "nXS" : ""); std::string Str("TLBIP " + Name + " requires: "); - setRequiredFeatureString(TLBI.getRequiredFeatures(), Str); + setRequiredFeatureString(TLBIP.getRequiredFeatures(), Str); return TokError(Str); } - createSysAlias(TLBI.Encoding, Operands, S); + createSysAlias(TLBIP.Encoding, Operands, S); } Lex(); // Eat operand. diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index 2552ee3..35bd244 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1066,12 +1066,13 @@ bool AArch64InstPrinter::printSyspAlias(const MCInst *MI, Encoding &= ~(1 << 7); } - const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByEncoding(Encoding); - if (!TLBI || !TLBI->haveFeatures(STI.getFeatureBits())) + const AArch64TLBIP::TLBIP *TLBIP = + AArch64TLBIP::lookupTLBIPByEncoding(Encoding); + if (!TLBIP || !TLBIP->haveFeatures(STI.getFeatureBits())) return false; Ins = "tlbip\t"; - Name = std::string(TLBI->Name); + Name = std::string(TLBIP->Name); if (CnVal == 9) Name += "nXS"; } else diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 7767028..d6cb0e8 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -186,6 +186,13 @@ namespace llvm { } namespace llvm { +namespace AArch64TLBIP { +#define GET_TLBIPTable_IMPL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64TLBIP +} // namespace llvm + +namespace llvm { namespace AArch64SVCR { #define GET_SVCRsList_IMPL #include "AArch64GenSystemOperands.inc" diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index a4ee963..fea33ef 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -795,6 +795,14 @@ namespace AArch64TLBI { #include "AArch64GenSystemOperands.inc" } +namespace AArch64TLBIP { +struct TLBIP : SysAliasReg { + using SysAliasReg::SysAliasReg; +}; +#define GET_TLBIPTable_DECL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64TLBIP + namespace AArch64II { /// Target Operand Flag enum. enum TOF { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index ef58004..9907c88f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1288,16 +1288,17 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA, return std::min(MaxVirtReg + MaxPhysReg, 256u); } -// TODO: Migrate to range merge of amdgpu-agpr-alloc. -struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> { - using Base = StateWrapper<BooleanState, AbstractAttribute>; - AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : Base(IRP) {} +struct AAAMDGPUMinAGPRAlloc + : public StateWrapper<DecIntegerState<>, AbstractAttribute> { + using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>; + AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {} - static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP, - Attributor &A) { + static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP, + Attributor &A) { if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) - return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A); - llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position"); + return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A); + llvm_unreachable( + "AAAMDGPUMinAGPRAlloc is only valid for function position"); } void initialize(Attributor &A) override { @@ -1310,25 +1311,33 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> { } const std::string getAsStr(Attributor *A) const override { - return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr"; + std::string Str = "amdgpu-agpr-alloc="; + raw_string_ostream OS(Str); + OS << getAssumed(); + return OS.str(); } void trackStatistics() const override {} ChangeStatus updateImpl(Attributor &A) override { - // TODO: Use AACallEdges, but then we need a way to inspect asm edges. + DecIntegerState<> Maximum; - auto CheckForNoAGPRs = [&](Instruction &I) { + // Check for cases which require allocation of AGPRs. The only cases where + // AGPRs are required are if there are direct references to AGPRs, so inline + // assembly and special intrinsics. + auto CheckForMinAGPRAllocs = [&](Instruction &I) { const auto &CB = cast<CallBase>(I); const Value *CalleeOp = CB.getCalledOperand(); - const Function *Callee = dyn_cast<Function>(CalleeOp); - if (!Callee) { - if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) - return inlineAsmGetNumRequiredAGPRs(IA, CB) == 0; - return false; + + if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) { + // Technically, the inline asm could be invoking a call to an unknown + // external function that requires AGPRs, but ignore that. + unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB); + Maximum.takeAssumedMaximum(NumRegs); + return true; } - switch (Callee->getIntrinsicID()) { + switch (CB.getIntrinsicID()) { case Intrinsic::not_intrinsic: break; case Intrinsic::write_register: @@ -1340,7 +1349,10 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> { ->getOperand(0)); auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmPhysRegName(RegName->getString()); - return Kind != 'a'; + if (Kind == 'a') + Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u)); + + return true; } default: // Some intrinsics may use AGPRs, but if we have a choice, we are not @@ -1349,32 +1361,50 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> { } // TODO: Handle callsite attributes - const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>( - *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); - return CalleeInfo && CalleeInfo->isValidState() && - CalleeInfo->getAssumed(); + auto *CBEdges = A.getAAFor<AACallEdges>( + *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); + if (!CBEdges || CBEdges->hasUnknownCallee()) { + Maximum.indicatePessimisticFixpoint(); + return false; + } + + for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) { + const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>( + *this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED); + if (!CalleeInfo || !CalleeInfo->isValidState()) { + Maximum.indicatePessimisticFixpoint(); + return false; + } + + Maximum.takeAssumedMaximum(CalleeInfo->getAssumed()); + } + + return true; }; bool UsedAssumedInformation = false; - if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this, + if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this, UsedAssumedInformation)) return indicatePessimisticFixpoint(); - return ChangeStatus::UNCHANGED; + + return clampStateAndIndicateChange(getState(), Maximum); } ChangeStatus manifest(Attributor &A) override { - if (!getAssumed()) - return ChangeStatus::UNCHANGED; LLVMContext &Ctx = getAssociatedFunction()->getContext(); - return A.manifestAttrs(getIRPosition(), - {Attribute::get(Ctx, "amdgpu-agpr-alloc", "0")}); + SmallString<4> Buffer; + raw_svector_ostream OS(Buffer); + OS << getAssumed(); + + return A.manifestAttrs( + getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())}); } - StringRef getName() const override { return "AAAMDGPUNoAGPR"; } + StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; } const char *getIdAddr() const override { return &ID; } /// This function should return true if the type of the \p AA is - /// AAAMDGPUNoAGPRs + /// AAAMDGPUMinAGPRAllocs static bool classof(const AbstractAttribute *AA) { return (AA->getIdAddr() == &ID); } @@ -1382,7 +1412,7 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> { static const char ID; }; -const char AAAMDGPUNoAGPR::ID = 0; +const char AAAMDGPUMinAGPRAlloc::ID = 0; /// An abstract attribute to propagate the function attribute /// "amdgpu-cluster-dims" from kernel entry functions to device functions. @@ -1550,10 +1580,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, DenseSet<const char *> Allowed( {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, - &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, - &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID, - &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, - &AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID}); + &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, + &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID, + &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID, + &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID, + &AAAMDGPUClusterDims::ID}); AttributorConfig AC(CGUpdater); AC.IsClosedWorldModule = Options.IsClosedWorld; @@ -1595,7 +1626,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F)); if (ST.hasGFX90AInsts()) - A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F)); + A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F)); for (auto &I : instructions(F)) { Value *Ptr = nullptr; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index fedb694..89c16da 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -482,12 +482,13 @@ void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const { } sort(StackIntervals, [](const LiveInterval *A, const LiveInterval *B) { + // The ordering has to be strictly weak. /// Sort heaviest intervals first to prioritize their unspilling - if (A->weight() > B->weight()) - return true; + if (A->weight() != B->weight()) + return A->weight() > B->weight(); - if (A->getSize() > B->getSize()) - return true; + if (A->getSize() != B->getSize()) + return A->getSize() > B->getSize(); // Tie breaker by number to avoid need for stable sort return A->reg().stackSlotIndex() < B->reg().stackSlotIndex(); diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index bc1a3a7..82c43ff 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -1507,7 +1507,7 @@ void DXILBitcodeWriter::writeDICompileUnit(const DICompileUnit *N, SmallVectorImpl<uint64_t> &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); - Record.push_back(N->getSourceLanguage()); + Record.push_back(N->getSourceLanguage().getUnversionedName()); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(VE.getMetadataOrNullID(N->getRawProducer())); Record.push_back(N->isOptimized()); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 8d9b777..a29b7dd 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -788,32 +788,32 @@ multiclass ShxAdd_UWPat<int i, Instruction shxadd_uw> { } multiclass Sh1Add_UWPat<Instruction sh1add_uw> { - def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 1)), (i64 0x1FFFFFFFF)), + (XLenVT GPR:$rs2)), (sh1add_uw GPR:$rs1, GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. - def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x1FFFFFFFE), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x1FFFFFFFE)), + (XLenVT GPR:$rs2)), (sh1add_uw (XLenVT (SRLI GPR:$rs1, 1)), GPR:$rs2)>; } multiclass Sh2Add_UWPat<Instruction sh2add_uw> { - def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 2)), (i64 0x3FFFFFFFF)), + (XLenVT GPR:$rs2)), (sh2add_uw GPR:$rs1, GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. - def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x3FFFFFFFC)), + (XLenVT GPR:$rs2)), (sh2add_uw (XLenVT (SRLI GPR:$rs1, 2)), GPR:$rs2)>; } multiclass Sh3Add_UWPat<Instruction sh3add_uw> { - def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and (shl GPR:$rs1, (i64 3)), (i64 0x7FFFFFFFF)), + (XLenVT GPR:$rs2)), (sh3add_uw GPR:$rs1, GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. - def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8), - (XLenVT GPR:$rs2))), + def : Pat<(add_like_non_imm12 (and GPR:$rs1, (i64 0x7FFFFFFF8)), + (XLenVT GPR:$rs2)), (sh3add_uw (XLenVT (SRLI GPR:$rs1, 3)), GPR:$rs2)>; } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 82e768d..6605a5c 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -238,7 +238,7 @@ class RISCVRegisterClass<list<ValueType> regTypes, int align, dag regList> } class GPRRegisterClass<dag regList> - : RISCVRegisterClass<[XLenVT, XLenFVT, i32, i16], 32, regList> { + : RISCVRegisterClass<[XLenVT, XLenFVT], 32, regList> { let RegInfos = XLenRI; } diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp index 776208b..35a2ee1 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp @@ -284,6 +284,17 @@ void SPIRVInstPrinter::printInst(const MCInst *MI, uint64_t Address, } break; } + case SPIRV::OpPredicatedLoadINTEL: + case SPIRV::OpPredicatedStoreINTEL: { + const unsigned NumOps = MI->getNumOperands(); + if (NumOps > NumFixedOps) { + OS << ' '; + printSymbolicOperand<OperandCategory::MemoryOperandOperand>( + MI, NumOps - 1, OS); + break; + } + break; + } default: printRemainingVariableOps(MI, NumFixedOps, OS); break; diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 0e0c454..dbe8e18 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -2419,6 +2419,27 @@ static bool generatePipeInst(const SPIRV::IncomingCall *Call, return buildPipeInst(Call, Opcode, Scope, MIRBuilder, GR); } +static bool generatePredicatedLoadStoreInst(const SPIRV::IncomingCall *Call, + MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + const SPIRV::DemangledBuiltin *Builtin = Call->Builtin; + unsigned Opcode = + SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode; + + bool IsSet = Opcode != SPIRV::OpPredicatedStoreINTEL; + unsigned ArgSz = Call->Arguments.size(); + SmallVector<uint32_t, 1> ImmArgs; + MachineRegisterInfo *MRI = MIRBuilder.getMRI(); + // Memory operand is optional and is literal. + if (ArgSz > 3) + ImmArgs.push_back( + getConstFromIntrinsic(Call->Arguments[/*Literal index*/ 3], MRI)); + + Register TypeReg = GR->getSPIRVTypeID(Call->ReturnType); + return buildOpFromWrapper(MIRBuilder, Opcode, Call, + IsSet ? TypeReg : Register(0), ImmArgs); +} + static bool buildNDRange(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, SPIRVGlobalRegistry *GR) { @@ -3019,6 +3040,8 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall, return generate2DBlockIOINTELInst(Call.get(), MIRBuilder, GR); case SPIRV::Pipe: return generatePipeInst(Call.get(), MIRBuilder, GR); + case SPIRV::PredicatedLoadStore: + return generatePredicatedLoadStoreInst(Call.get(), MIRBuilder, GR); } return false; } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index 2a8deb6..3b8764a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -70,6 +70,7 @@ def BindlessINTEL : BuiltinGroup; def TernaryBitwiseINTEL : BuiltinGroup; def Block2DLoadStore : BuiltinGroup; def Pipe : BuiltinGroup; +def PredicatedLoadStore : BuiltinGroup; //===----------------------------------------------------------------------===// // Class defining a demangled builtin record. The information in the record @@ -752,6 +753,10 @@ defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockLoadTransformINTEL", OpenC defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockPrefetchINTEL", OpenCL_std, Block2DLoadStore, 9, 9, OpSubgroup2DBlockPrefetchINTEL>; defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockStoreINTEL", OpenCL_std, Block2DLoadStore, 10, 10, OpSubgroup2DBlockStoreINTEL>; +// SPV_INTEL_predicated_io builtin records +defm : DemangledNativeBuiltin<"__spirv_PredicatedLoadINTEL", OpenCL_std, PredicatedLoadStore, 3, 4, OpPredicatedLoadINTEL>; +defm : DemangledNativeBuiltin<"__spirv_PredicatedStoreINTEL", OpenCL_std, PredicatedLoadStore, 3, 4, OpPredicatedStoreINTEL>; + //===----------------------------------------------------------------------===// // Class defining a work/sub group builtin that should be translated into a // SPIR-V instruction using the defined properties. diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index 85ea9e1..5f3ed86 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -151,7 +151,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>> {"SPV_KHR_bfloat16", SPIRV::Extension::Extension::SPV_KHR_bfloat16}, {"SPV_EXT_relaxed_printf_string_address_space", SPIRV::Extension::Extension:: - SPV_EXT_relaxed_printf_string_address_space}}; + SPV_EXT_relaxed_printf_string_address_space}, + {"SPV_INTEL_predicated_io", + SPIRV::Extension::Extension::SPV_INTEL_predicated_io}}; bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName, StringRef ArgValue, diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp index 275463e..318ef06 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp @@ -112,7 +112,8 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { FilePaths.emplace_back(); sys::path::append(FilePaths.back(), File->getDirectory(), File->getFilename()); - LLVMSourceLanguages.push_back(CompileUnit->getSourceLanguage()); + LLVMSourceLanguages.push_back( + CompileUnit->getSourceLanguage().getUnversionedName()); } } const NamedMDNode *ModuleFlags = M->getNamedMetadata("llvm.module.flags"); diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index 1723bfb..a61351e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -987,3 +987,9 @@ def OpSubgroup2DBlockPrefetchINTEL: Op<6234, (outs), (ins ID:$element_size, ID:$ def OpSubgroup2DBlockStoreINTEL: Op<6235, (outs), (ins ID:$element_size, ID:$block_width, ID:$block_height, ID:$block_count, ID:$src_ptr, ID:$dst_base_ptr, ID:$memory_width, ID:$memory_height, ID:$memory_pitch, ID:$coord), "OpSubgroup2DBlockStoreINTEL $element_size $block_width $block_height $block_count $src_ptr $dst_base_ptr $memory_width $memory_height $memory_pitch $coord">; + +// SPV_INTEL_predicated_io +def OpPredicatedLoadINTEL: Op<6528, (outs ID:$res), (ins TYPE:$resType, ID:$ptr, ID:$predicate, ID:$default_value, variable_ops), + "$res = OpPredicatedLoadINTEL $resType $ptr $predicate $default_value">; +def OpPredicatedStoreINTEL: Op<6529, (outs), (ins ID:$ptr, ID:$object, ID:$predicate, variable_ops), + "OpPredicatedStoreINTEL $ptr $object $predicate">; diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index dc717a6..5144fb1 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -2035,6 +2035,17 @@ void addInstrRequirements(const MachineInstr &MI, // TODO: Add UntypedPointersKHR when implemented. break; } + case SPIRV::OpPredicatedLoadINTEL: + case SPIRV::OpPredicatedStoreINTEL: { + if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_predicated_io)) + report_fatal_error( + "OpPredicated[Load/Store]INTEL instructions require " + "the following SPIR-V extension: SPV_INTEL_predicated_io", + false); + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_predicated_io); + Reqs.addCapability(SPIRV::Capability::PredicatedIOINTEL); + break; + } default: break; diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index 6a32dba..2625642 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -385,6 +385,7 @@ defm SPV_INTEL_int4 : ExtensionOperand<123, [EnvOpenCL]>; defm SPV_KHR_float_controls2 : ExtensionOperand<124, [EnvVulkan, EnvOpenCL]>; defm SPV_INTEL_tensor_float32_conversion : ExtensionOperand<125, [EnvOpenCL]>; defm SPV_KHR_bfloat16 : ExtensionOperand<126, [EnvVulkan, EnvOpenCL]>; +defm SPV_INTEL_predicated_io : ExtensionOperand<127, [EnvOpenCL]>; //===----------------------------------------------------------------------===// // Multiclass used to define Capabilities enum values and at the same time @@ -594,6 +595,7 @@ defm SubgroupMatrixMultiplyAccumulateINTEL : CapabilityOperand<6236, 0, 0, [SPV_ defm Subgroup2DBlockIOINTEL : CapabilityOperand<6228, 0, 0, [SPV_INTEL_2d_block_io], []>; defm Subgroup2DBlockTransformINTEL : CapabilityOperand<6229, 0, 0, [SPV_INTEL_2d_block_io], [Subgroup2DBlockIOINTEL]>; defm Subgroup2DBlockTransposeINTEL : CapabilityOperand<6230, 0, 0, [SPV_INTEL_2d_block_io], [Subgroup2DBlockIOINTEL]>; +defm PredicatedIOINTEL : CapabilityOperand<6257, 0, 0, [SPV_INTEL_predicated_io], []>; defm Int4TypeINTEL : CapabilityOperand<5112, 0, 0, [SPV_INTEL_int4], []>; defm Int4CooperativeMatrixINTEL : CapabilityOperand<5114, 0, 0, [SPV_INTEL_int4], [Int4TypeINTEL, CooperativeMatrixKHR]>; defm TensorFloat32RoundingINTEL : CapabilityOperand<6425, 0, 0, [SPV_INTEL_tensor_float32_conversion], []>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 6bb064a..526420b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -441,7 +441,9 @@ void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) { llvm::SmallSet<StringRef, 4> SeenLanguages; for (size_t I = 0, E = Debug->getNumOperands(); I < E; ++I) { const auto *CU = cast<DICompileUnit>(Debug->getOperand(I)); - StringRef Language = dwarf::LanguageString(CU->getSourceLanguage()); + StringRef Language = + dwarf::LanguageString(CU->getSourceLanguage().getUnversionedName()); + Language.consume_front("DW_LANG_"); if (SeenLanguages.insert(Language).second) Languages.emplace_back(Language.str(), ""); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 1306026..49af78b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1445,6 +1445,49 @@ def : Pat<(v16i8 (wasm_narrow_u (v8i16 V128:$left), (v8i16 V128:$right))), def : Pat<(v8i16 (wasm_narrow_u (v4i32 V128:$left), (v4i32 V128:$right))), (NARROW_U_I16x8 $left, $right)>; +// Recognize a saturating truncation and convert into the corresponding +// narrow_TYPE_s or narrow_TYPE_u instruction. +multiclass SignedSaturatingTruncate<ValueType input, ValueType output, + Instruction narrow, int minval, + int maxval, int mask> { + def : Pat< + (output (wasm_narrow_u + (and (smin (smax (input V128:$a), (splat_vector (i32 minval))), + (splat_vector (i32 maxval))), (splat_vector (i32 mask))), + (and (smin (smax (input V128:$b), (splat_vector (i32 minval))), + (splat_vector (i32 maxval))), (splat_vector (i32 mask))) + )), + (narrow V128:$a, V128:$b) + >; + + def : Pat< + (output (wasm_narrow_u + (and (smax (smin (input V128:$a), (splat_vector (i32 maxval))), + (splat_vector (i32 minval))), (splat_vector (i32 mask))), + (and (smax (smin (input V128:$b), (splat_vector (i32 maxval))), + (splat_vector (i32 minval))), (splat_vector (i32 mask))) + )), + (narrow V128:$a, V128:$b) + >; +} + +defm : SignedSaturatingTruncate<v8i16, v16i8, NARROW_S_I8x16, -128, 127, 0xFF>; +defm : SignedSaturatingTruncate<v4i32, v8i16, NARROW_S_I16x8, -32768, 32767, 0xFFFF>; + +multiclass UnsignedSaturatingTruncate<ValueType input, ValueType output, + Instruction narrow, int maxval> { + def : Pat< + (output (wasm_narrow_u + (umin (input V128:$a), (splat_vector (i32 maxval))), + (umin (input V128:$b), (splat_vector (i32 maxval))) + )), + (narrow V128:$a, V128:$b) + >; +} + +defm : UnsignedSaturatingTruncate<v8i16, v16i8, NARROW_U_I8x16, 0xFF>; +defm : UnsignedSaturatingTruncate<v4i32, v8i16, NARROW_U_I16x8, 0xFFFF>; + // Bitcasts are nops // Matching bitcast t1 to t1 causes strange errors, so avoid repeating types foreach t1 = AllVecs in |