aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md30
-rw-r--r--gcc/config/i386/driver-i386.cc2
-rw-r--r--gcc/config/i386/i386.cc3
-rw-r--r--gcc/config/i386/i386.md8
-rw-r--r--gcc/config/riscv/riscv-v.cc2
5 files changed, 39 insertions, 6 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e771def..af574d5 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5046,6 +5046,36 @@
DONE;
})
+;; convert (truncate)(~x >> imm) into (truncate)(((u16)-1 - x) >> imm)
+;; because it will result in the 'not' being replaced with a constant load
+;; which allows for better loop optimization.
+;; We limit this to truncations that take the upper half and shift it to the
+;; lower half as we use subhn (patterns that would have generated an shrn
+;; otherwise).
+;; On some implementations the use of subhn also result in better throughput.
+(define_insn_and_split "*shrn_to_subhn_<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand" "=&w")
+ (truncate:<VNARROWQ>
+ (lshiftrt:VQN
+ (not:VQN (match_operand:VQN 1 "register_operand" "w"))
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top"))))]
+ "TARGET_SIMD"
+ "#"
+ "&& true"
+ [(const_int 0)]
+{
+ rtx tmp;
+ if (can_create_pseudo_p ())
+ tmp = gen_reg_rtx (<MODE>mode);
+ else
+ tmp = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
+ emit_move_insn (tmp, CONSTM1_RTX (<MODE>mode));
+ emit_insn (gen_aarch64_subhn<mode>_insn (operands[0], tmp,
+ operands[1], operands[2]));
+ DONE;
+})
+
+
;; pmul.
(define_insn "aarch64_pmul<mode>"
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index 63c7d79..fe71f55 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -600,7 +600,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
if (has_feature (FEATURE_AVX512F))
{
/* Assume Diamond Rapids. */
- if (has_feature (FEATURE_AMX_TRANSPOSE))
+ if (has_feature (FEATURE_AMX_FP8))
cpu = "diamondrapids";
/* Assume Granite Rapids D. */
else if (has_feature (FEATURE_AMX_COMPLEX))
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 44763c8..9657c6a 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -12562,11 +12562,12 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
if (TARGET_64BIT)
{
rtx rax = gen_rtx_REG (Pmode, AX_REG);
+ rtx rdi = gen_rtx_REG (Pmode, DI_REG);
rtx_insn *insns;
start_sequence ();
emit_call_insn
- (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
+ (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi));
insns = end_sequence ();
if (GET_MODE (x) != Pmode)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index adff2af..370e79b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -23201,7 +23201,8 @@
(match_operand 3)))
(unspec:P [(match_operand 1 "tls_symbolic_operand")
(reg:P SP_REG)]
- UNSPEC_TLS_GD)]
+ UNSPEC_TLS_GD)
+ (clobber (match_operand:P 4 "register_operand" "=D"))]
"TARGET_64BIT"
{
if (!TARGET_X32)
@@ -23218,7 +23219,7 @@
Use data16 prefix instead, which doesn't have this problem. */
fputs ("\tdata16", asm_out_file);
output_asm_insn
- ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
+ ("lea{q}\t{%E1@tlsgd(%%rip), %q4|%q4, %E1@tlsgd[rip]}", operands);
if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
fputs (ASM_SHORT "0x6666\n", asm_out_file);
else
@@ -23265,7 +23266,8 @@
(const_int 0)))
(unspec:P [(match_operand 1 "tls_symbolic_operand")
(reg:P SP_REG)]
- UNSPEC_TLS_GD)])]
+ UNSPEC_TLS_GD)
+ (clobber (match_operand:P 3 "register_operand"))])]
"TARGET_64BIT"
"ix86_tls_descriptor_calls_expanded_in_cfun = true;")
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 69f2a1ec..ce1633c 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1598,7 +1598,7 @@ expand_const_vector_interleaved_stepped_npatterns (rtx target, rtx src,
shifted_vid = gen_reg_rtx (mode);
rtx shift = gen_int_mode (1, Xmode);
rtx shift_ops[] = {shifted_vid, vid, shift};
- emit_vlmax_insn (code_for_pred_scalar (ASHIFT, mode), BINARY_OP,
+ emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, mode), BINARY_OP,
shift_ops);
}
else