diff options
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 30 | ||||
-rw-r--r-- | gcc/config/i386/driver-i386.cc | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 8 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-v.cc | 2 |
5 files changed, 39 insertions, 6 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e771def..af574d5 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -5046,6 +5046,36 @@ DONE; }) +;; convert (truncate)(~x >> imm) into (truncate)(((u16)-1 - x) >> imm) +;; because it will result in the 'not' being replaced with a constant load +;; which allows for better loop optimization. +;; We limit this to truncations that take the upper half and shift it to the +;; lower half as we use subhn (patterns that would have generated an shrn +;; otherwise). +;; On some implementations the use of subhn also result in better throughput. +(define_insn_and_split "*shrn_to_subhn_<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=&w") + (truncate:<VNARROWQ> + (lshiftrt:VQN + (not:VQN (match_operand:VQN 1 "register_operand" "w")) + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top"))))] + "TARGET_SIMD" + "#" + "&& true" + [(const_int 0)] +{ + rtx tmp; + if (can_create_pseudo_p ()) + tmp = gen_reg_rtx (<MODE>mode); + else + tmp = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); + emit_move_insn (tmp, CONSTM1_RTX (<MODE>mode)); + emit_insn (gen_aarch64_subhn<mode>_insn (operands[0], tmp, + operands[1], operands[2])); + DONE; +}) + + ;; pmul. (define_insn "aarch64_pmul<mode>" diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc index 63c7d79..fe71f55 100644 --- a/gcc/config/i386/driver-i386.cc +++ b/gcc/config/i386/driver-i386.cc @@ -600,7 +600,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) if (has_feature (FEATURE_AVX512F)) { /* Assume Diamond Rapids. */ - if (has_feature (FEATURE_AMX_TRANSPOSE)) + if (has_feature (FEATURE_AMX_FP8)) cpu = "diamondrapids"; /* Assume Granite Rapids D. */ else if (has_feature (FEATURE_AMX_COMPLEX)) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 44763c8..9657c6a 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -12562,11 +12562,12 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) if (TARGET_64BIT) { rtx rax = gen_rtx_REG (Pmode, AX_REG); + rtx rdi = gen_rtx_REG (Pmode, DI_REG); rtx_insn *insns; start_sequence (); emit_call_insn - (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr)); + (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi)); insns = end_sequence (); if (GET_MODE (x) != Pmode) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index adff2af..370e79b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -23201,7 +23201,8 @@ (match_operand 3))) (unspec:P [(match_operand 1 "tls_symbolic_operand") (reg:P SP_REG)] - UNSPEC_TLS_GD)] + UNSPEC_TLS_GD) + (clobber (match_operand:P 4 "register_operand" "=D"))] "TARGET_64BIT" { if (!TARGET_X32) @@ -23218,7 +23219,7 @@ Use data16 prefix instead, which doesn't have this problem. */ fputs ("\tdata16", asm_out_file); output_asm_insn - ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); + ("lea{q}\t{%E1@tlsgd(%%rip), %q4|%q4, %E1@tlsgd[rip]}", operands); if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) fputs (ASM_SHORT "0x6666\n", asm_out_file); else @@ -23265,7 +23266,8 @@ (const_int 0))) (unspec:P [(match_operand 1 "tls_symbolic_operand") (reg:P SP_REG)] - UNSPEC_TLS_GD)])] + UNSPEC_TLS_GD) + (clobber (match_operand:P 3 "register_operand"))])] "TARGET_64BIT" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 69f2a1ec..ce1633c 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1598,7 +1598,7 @@ expand_const_vector_interleaved_stepped_npatterns (rtx target, rtx src, shifted_vid = gen_reg_rtx (mode); rtx shift = gen_int_mode (1, Xmode); rtx shift_ops[] = {shifted_vid, vid, shift}; - emit_vlmax_insn (code_for_pred_scalar (ASHIFT, mode), BINARY_OP, + emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, mode), BINARY_OP, shift_ops); } else |