diff options
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 194 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve2.md | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/gather_earlyclobber.c | 96 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve2/gather_earlyclobber.c | 32 |
4 files changed, 263 insertions, 71 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 955bbdf..da5534c 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1429,12 +1429,18 @@ UNSPEC_LD1_GATHER))] "TARGET_SVE" {@ [cons: =0, 1, 2, 3, 4, 5 ] - [w, Z, w, Ui1, Ui1, Upl ] ld1<Vesize>\t%0.s, %5/z, [%2.s] - [w, vgw, w, Ui1, Ui1, Upl ] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1] - [w, rk, w, Z, Ui1, Upl ] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] - [w, rk, w, Ui1, Ui1, Upl ] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] - [w, rk, w, Z, i, Upl ] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] - [w, rk, w, Ui1, i, Upl ] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4] + [&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s] + [?w, Z, 0, Ui1, Ui1, Upl] ^ + [&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1] + [?w, vgw, 0, Ui1, Ui1, Upl] ^ + [&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] + [?w, rk, 0, Z, Ui1, Upl] ^ + [&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] + [?w, rk, 0, Ui1, Ui1, Upl] ^ + [&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] + [?w, rk, 0, Z, i, Upl] ^ + [&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4] + [?w, rk, 0, Ui1, i, Upl] ^ } ) @@ -1451,11 +1457,15 @@ (mem:BLK (scratch))] UNSPEC_LD1_GATHER))] "TARGET_SVE" - {@ [cons: =0, 1, 2, 3, 4, 5 ] - [w, Z, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%2.d] - [w, vgd, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1] - [w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d] - [w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4] + {@ [cons: =0, 1, 2, 3, 4, 5] + [&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d] + [?w, Z, 0, i, Ui1, Upl] ^ + [&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1] + [?w, vgd, 0, i, Ui1, Upl] ^ + [&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d] + [?w, rk, 0, i, Ui1, Upl] ^ + [&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4] + [?w, rk, 0, i, i, Upl] ^ } ) @@ -1475,9 +1485,11 @@ (mem:BLK (scratch))] UNSPEC_LD1_GATHER))] "TARGET_SVE" - {@ [cons: =0, 1, 2, 3, 4, 5 ] - [w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw] - [w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4] + {@ [cons: =0, 1, 2, 3, 4, 5] + [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw] + [?w, rk, 0, i, Ui1, Upl ] ^ + [&w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4] + [?w, rk, 0, i, i, Upl ] ^ } "&& !CONSTANT_P (operands[6])" { @@ -1503,9 +1515,11 @@ (mem:BLK (scratch))] UNSPEC_LD1_GATHER))] "TARGET_SVE" - {@ [cons: =0, 1, 2, 3, 4, 5 ] - [w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] - [w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4] + {@ [cons: =0, 1, 2, 3, 4, 5] + [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] + [?w, rk, 0, i, Ui1, Upl ] ^ + [&w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4] + [?w, rk, 0, i, i, Upl ] ^ } "&& !CONSTANT_P (operands[6])" { @@ -1528,9 +1542,11 @@ (mem:BLK (scratch))] UNSPEC_LD1_GATHER))] "TARGET_SVE" - {@ [cons: =0, 1, 2, 3, 4, 5 ] - [w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] - [w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4] + {@ [cons: =0, 1, 2, 3, 4, 5] + [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] + [?w, rk, 0, i, Ui1, Upl ] ^ + [&w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4] + [?w, rk, 0, i, i, Upl ] ^ } ) @@ -1563,13 +1579,19 @@ UNSPEC_LD1_GATHER))] UNSPEC_PRED_X))] "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0" - {@ [cons: =0, 1, 2, 3, 4, 5, 6 ] - [w, Z, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s] - [w, vg<SVE_4BHI:Vesize>, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1] - [w, rk, w, Z, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] - [w, rk, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] - [w, rk, w, Z, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] - [w, rk, w, Ui1, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4] + {@ [cons: =0, 1, 2, 3, 4, 5, 6] + [&w, Z, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s] + [?w, Z, 0, Ui1, Ui1, Upl, UplDnm] ^ + [&w, vg<SVE_4BHI:Vesize>, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1] + [?w, vg<SVE_4BHI:Vesize>, 0, Ui1, Ui1, Upl, UplDnm] ^ + [&w, rk, w, Z, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] + [?w, rk, 0, Z, Ui1, Upl, UplDnm] ^ + [&w, rk, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] + [?w, rk, 0, Ui1, Ui1, Upl, UplDnm] ^ + [&w, rk, w, Z, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] + [?w, rk, 0, Z, i, Upl, UplDnm] ^ + [&w, rk, w, Ui1, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4] + [?w, rk, 0, Ui1, i, Upl, UplDnm] ^ } "&& !CONSTANT_P (operands[6])" { @@ -1595,10 +1617,14 @@ UNSPEC_PRED_X))] "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" {@ [cons: =0, 1, 2, 3, 4, 5, 6] - [w, Z, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d] - [w, vg<SVE_2BHSI:Vesize>, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1] - [w, rk, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d] - [w, rk, w, i, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4] + [&w, Z, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d] + [?w, Z, 0, i, Ui1, Upl, UplDnm] ^ + [&w, vg<SVE_2BHSI:Vesize>, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1] + [?w, vg<SVE_2BHSI:Vesize>, 0, i, Ui1, Upl, UplDnm] ^ + [&w, rk, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d] + [?w, rk, 0, i, Ui1, Upl, UplDnm] ^ + [&w, rk, w, i, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4] + [?w, rk, 0, i, i, Upl, UplDnm] ^ } "&& !CONSTANT_P (operands[6])" { @@ -1627,8 +1653,10 @@ UNSPEC_PRED_X))] "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" {@ [cons: =0, 1, 2, 3, 4, 5] - [w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw] - [w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4] + [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw] + [?w, rk, 0, i, Ui1, Upl ] ^ + [&w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4] + [?w, rk, 0, i, i, Upl ] ^ } "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))" { @@ -1660,8 +1688,10 @@ UNSPEC_PRED_X))] "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" {@ [cons: =0, 1, 2, 3, 4, 5] - [w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] - [w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4] + [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] + [?w, rk, 0, i, Ui1, Upl ] ^ + [&w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4] + [?w, rk, 0, i, i, Upl ] ^ } "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))" { @@ -1690,8 +1720,10 @@ UNSPEC_PRED_X))] "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" {@ [cons: =0, 1, 2, 3, 4, 5] - [w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] - [w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4] + [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] + [?w, rk, 0, i, Ui1, Upl ] ^ + [&w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4] + [?w, rk, 0, i, i, Upl ] ^ } "&& !CONSTANT_P (operands[7])" { @@ -1721,13 +1753,19 @@ (reg:VNx16BI FFRT_REGNUM)] UNSPEC_LDFF1_GATHER))] "TARGET_SVE" - {@ [cons: =0, 1, 2, 3, 4, 5 ] - [w, Z, w, i, Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s] - [w, vgw, w, i, Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s, #%1] - [w, rk, w, Z, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw] - [w, rk, w, Ui1, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw] - [w, rk, w, Z, i, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] - [w, rk, w, Ui1, i, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4] + {@ [cons: =0, 1, 2, 3, 4, 5 ] + [&w, Z, w, i, Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s] + [?w, Z, 0, i, Ui1, Upl] ^ + [&w, vgw, w, i, Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s, #%1] + [?w, vgw, 0, i, Ui1, Upl] ^ + [&w, rk, w, Z, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw] + [?w, rk, 0, Z, Ui1, Upl] ^ + [&w, rk, w, Ui1, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw] + [?w, rk, 0, Ui1, Ui1, Upl] ^ + [&w, rk, w, Z, i, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] + [?w, rk, 0, Z, i, Upl] ^ + [&w, rk, w, Ui1, i, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4] + [?w, rk, 0, Ui1, i, Upl] ^ } ) @@ -1745,11 +1783,15 @@ (reg:VNx16BI FFRT_REGNUM)] UNSPEC_LDFF1_GATHER))] "TARGET_SVE" - {@ [cons: =0, 1, 2, 3, 4, 5] - [w, Z, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d] - [w, vgd, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d, #%1] - [w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d] - [w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4] + {@ [cons: =0, 1, 2, 3, 4, 5 ] + [&w, Z, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d] + [?w, Z, 0, i, Ui1, Upl ] ^ + [&w, vgd, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d, #%1] + [?w, vgd, 0, i, Ui1, Upl ] ^ + [&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d] + [?w, rk, 0, i, Ui1, Upl ] ^ + [&w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4] + [?w, rk, 0, i, i, Upl ] ^ } ) @@ -1772,8 +1814,10 @@ UNSPEC_LDFF1_GATHER))] "TARGET_SVE" {@ [cons: =0, 1, 2, 3, 4, 5] - [w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw] - [w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4] + [&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw] + [?w, rk, 0, i, Ui1, Upl ] ^ + [&w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4] + [?w, rk, 0, i, i, Upl ] ^ } "&& !CONSTANT_P (operands[6])" { @@ -1797,8 +1841,10 @@ UNSPEC_LDFF1_GATHER))] "TARGET_SVE" {@ [cons: =0, 1, 2, 3, 4, 5] - [w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw] - [w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4] + [&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw] + [?w, rk, 0, i, Ui1, Upl ] ^ + [&w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4] + [?w, rk, 0, i, i, Upl ] ^ } ) @@ -1833,12 +1879,18 @@ UNSPEC_PRED_X))] "TARGET_SVE" {@ [cons: =0, 1, 2, 3, 4, 5, 6] - [w, Z, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s] - [w, vg<VNx4_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1] - [w, rk, w, Z, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] - [w, rk, w, Ui1, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] - [w, rk, w, Z, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] - [w, rk, w, Ui1, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4] + [&w, Z, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s] + [?w, Z, 0, i, Ui1, Upl, UplDnm] ^ + [&w, vg<VNx4_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1] + [?w, vg<VNx4_NARROW:Vesize>, 0, i, Ui1, Upl, UplDnm] ^ + [&w, rk, w, Z, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] + [?w, rk, 0, Z, Ui1, Upl, UplDnm] ^ + [&w, rk, w, Ui1, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] + [?w, rk, 0, Ui1, Ui1, Upl, UplDnm] ^ + [&w, rk, w, Z, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] + [?w, rk, 0, Z, i, Upl, UplDnm] ^ + [&w, rk, w, Ui1, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4] + [?w, rk, 0, Ui1, i, Upl, UplDnm] ^ } "&& !CONSTANT_P (operands[6])" { @@ -1865,10 +1917,14 @@ UNSPEC_PRED_X))] "TARGET_SVE" {@ [cons: =0, 1, 2, 3, 4, 5, 6] - [w, Z, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d] - [w, vg<VNx2_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1] - [w, rk, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d] - [w, rk, w, i, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4] + [&w, Z, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d] + [?w, Z, 0, i, Ui1, Upl, UplDnm] ^ + [&w, vg<VNx2_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1] + [?w, vg<VNx2_NARROW:Vesize>, 0, i, Ui1, Upl, UplDnm] ^ + [&w, rk, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d] + [?w, rk, 0, i, Ui1, Upl, UplDnm] ^ + [&w, rk, w, i, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4] + [?w, rk, w, i, i, Upl, UplDnm] ^ } "&& !CONSTANT_P (operands[6])" { @@ -1899,8 +1955,10 @@ UNSPEC_PRED_X))] "TARGET_SVE" {@ [cons: =0, 1, 2, 3, 4, 5] - [w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] - [w, rk, w, i, i, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4] + [&w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] + [?w, rk, 0, i, Ui1, Upl ] ^ + [&w, rk, w, i, i, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4] + [?w, rk, 0, i, i, Upl ] ^ } "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))" { @@ -1929,8 +1987,10 @@ UNSPEC_PRED_X))] "TARGET_SVE" {@ [cons: =0, 1, 2, 3, 4, 5] - [w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] - [w, rk, w, i, i, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4] + [&w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] + [?w, rk, 0, i, Ui1, Upl ] ^ + [&w, rk, w, i, i, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4] + [?w, rk, 0, i, i, Upl ] ^ } "&& !CONSTANT_P (operands[7])" { diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index f84ff74..7a77e9b 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -111,8 +111,10 @@ UNSPEC_LDNT1_GATHER))] "TARGET_SVE2" {@ [cons: =0, 1, 2, 3] - [w, Upl, Z, w ] ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>] - [w, Upl, r, w ] ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2] + [&w, Upl, Z, w ] ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>] + [?w, Upl, Z, 0 ] ^ + [&w, Upl, r, w ] ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2] + [?w, Upl, r, 0 ] ^ } ) @@ -132,8 +134,10 @@ "TARGET_SVE2 && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" {@ [cons: =0, 1, 2, 3, 4] - [w, Upl, Z, w, UplDnm] ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>] - [w, Upl, r, w, UplDnm] ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2] + [&w, Upl, Z, w, UplDnm] ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>] + [?w, Upl, Z, 0, UplDnm] ^ + [&w, Upl, r, w, UplDnm] ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2] + [?w, Upl, r, 0, UplDnm] ^ } "&& !CONSTANT_P (operands[4])" { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_earlyclobber.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_earlyclobber.c new file mode 100644 index 0000000..db4a1db --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/gather_earlyclobber.c @@ -0,0 +1,96 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_sve.h> + +/* +** food: +** ld1d (?:z[1-9][0-9]*)\.d, p0/z, \[x0, z0\.d\] +** ... +** ret +*/ + +svuint64_t +food (svbool_t p, uint64_t *in, svint64_t offsets, svuint64_t a) +{ + return svadd_u64_x (p, a, svld1_gather_offset(p, in, offsets)); +} + +/* +** foodb: +** ld1d (?:z[1-9][0-9]*)\.d, p0/z, \[z0\.d\] +** ... +** ret +*/ + +svuint64_t +foodb (svbool_t p, svuint64_t bases, svuint64_t a) +{ + return svadd_u64_x (p, a, svld1_gather_u64 (p, bases)); +} + +/* +** foodff: +** ldff1d (?:z[1-9][0-9]*)\.d, p0/z, \[z0\.d\] +** ... +** ret +*/ + +svuint64_t +foodff (svbool_t p, svuint64_t bases, svuint64_t a) +{ + return svadd_u64_x (p, a, svldff1_gather_u64 (p, bases)); +} + +/* +** foow: +** ld1w (?:z[1-9][0-9]*)\.s, p0/z, \[z0\.s\] +** ... +** ret +*/ + +svuint32_t +foow (svbool_t p, svuint32_t bases, svuint32_t a) +{ + return svadd_u32_x (p, a, svld1_gather_u32 (p, bases)); +} + +/* +** foowff: +** ldff1w (?:z[1-9][0-9]*)\.s, p0/z, \[z0\.s\] +** ... +** ret +*/ + +svuint32_t +foowff (svbool_t p, svuint32_t bases, svuint32_t a) +{ + return svadd_u32_x (p, a, svldff1_gather_u32 (p, bases)); +} + +/* +** fooubd: +** ld1b (?:z[1-9][0-9]*)\.d, p0/z, \[x0, z0\.d\] +** ... +** ret +*/ + +svuint64_t +fooubd (svbool_t p, uint8_t *base, svuint64_t offsets, svuint64_t a) +{ + return svadd_u64_x (p, a, svld1ub_gather_offset_u64 (p, base, offsets)); +} + +/* +** foosbd: +** ld1sb (?:z[1-9][0-9]*)\.d, p0/z, \[x0, z0\.d\] +** ... +** ret +*/ +svint64_t +foosbd (svbool_t p, int8_t *base, svint64_t offsets, svint64_t a) +{ + return svadd_s64_x (p, a, svld1sb_gather_offset_s64 (p, base, offsets)); +} + diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/gather_earlyclobber.c b/gcc/testsuite/gcc.target/aarch64/sve2/gather_earlyclobber.c new file mode 100644 index 0000000..a6003cc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/gather_earlyclobber.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_sve.h> + +/* +** foownt: +** ldnt1w (?:z[1-9][0-9]*)\.s, p0/z, \[z0\.s\] +** ... +** ret +*/ + +svuint32_t +foownt (svbool_t p, svuint32_t bases, svuint32_t a) +{ + return svadd_u32_x (p, a, svldnt1_gather_u32 (p, bases)); +} + +/* +** foodbnt: +** ldnt1d (?:z[1-9][0-9]*)\.d, p0/z, \[z0\.d\] +** ... +** ret +*/ + +svuint64_t +foodbnt (svbool_t p, svuint64_t bases, svuint64_t a) +{ + return svadd_u64_x (p, a, svldnt1_gather_u64 (p, bases)); +} + |