diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2024-05-28 20:25:14 +0200 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2024-05-28 22:59:12 +0200 |
commit | 91d79053f2b416cb9e97d9c0c3fb5b73075289e6 (patch) | |
tree | 8ebbcc1acc93086a73d8386388587fb2b3afe729 /gcc | |
parent | 21fc89bac61983a869c066f7377f8280a6adca49 (diff) | |
download | gcc-91d79053f2b416cb9e97d9c0c3fb5b73075289e6.zip gcc-91d79053f2b416cb9e97d9c0c3fb5b73075289e6.tar.gz gcc-91d79053f2b416cb9e97d9c0c3fb5b73075289e6.tar.bz2 |
i386: Improve access to _Atomic DImode location via XMM regs for SSE4.1 x86_32 targets
Use MOVD/PEXTRD and MOVD/PINSRD insn sequences to move DImode value
between XMM and GPR register sets for SSE4.1 x86_32 targets in order
to avoid spilling the value to stack.
The load from _Atomic location a improves from:
movq a, %xmm0
movq %xmm0, (%esp)
movl (%esp), %eax
movl 4(%esp), %edx
to:
movq a, %xmm0
movd %xmm0, %eax
pextrd $1, %xmm0, %edx
The store to _Atomic location b improves from:
movl %eax, (%esp)
movl %edx, 4(%esp)
movq (%esp), %xmm0
movq %xmm0, b
to:
movd %eax, %xmm0
pinsrd $1, %edx, %xmm0
movq %xmm0, b
gcc/ChangeLog:
* config/i386/sync.md (atomic_loaddi_fpu): Use movd/pextrd
to move DImode value from XMM to GPR for TARGET_SSE4_1.
(atomic_storedi_fpu): Use movd/pinsrd to move DImode value
from GPR to XMM for TARGET_SSE4_1.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/sync.md | 36 |
1 files changed, 28 insertions, 8 deletions
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index 8317581..f2b3ba0 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -215,8 +215,18 @@ } else { + rtx tmpdi = gen_lowpart (DImode, tmp); + emit_insn (gen_loaddi_via_sse (tmp, src)); - emit_insn (gen_storedi_via_sse (mem, tmp)); + + if (GENERAL_REG_P (dst) + && TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_FROM_VEC) + { + emit_move_insn (dst, tmpdi); + DONE; + } + else + emit_move_insn (mem, tmpdi); } if (mem != dst) @@ -294,20 +304,30 @@ emit_move_insn (dst, src); else { - if (REG_P (src)) - { - emit_move_insn (mem, src); - src = mem; - } - if (STACK_REG_P (tmp)) { + if (GENERAL_REG_P (src)) + { + emit_move_insn (mem, src); + src = mem; + } + emit_insn (gen_loaddi_via_fpu (tmp, src)); emit_insn (gen_storedi_via_fpu (dst, tmp)); } else { - emit_insn (gen_loaddi_via_sse (tmp, src)); + rtx tmpdi = gen_lowpart (DImode, tmp); + + if (GENERAL_REG_P (src) + && !(TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_TO_VEC)) + { + emit_move_insn (mem, src); + src = mem; + } + + emit_move_insn (tmpdi, src); + emit_insn (gen_storedi_via_sse (dst, tmp)); } } |