diff options
author | Claudiu Zissulescu <claziss@synopsys.com> | 2016-01-25 12:15:58 +0100 |
---|---|---|
committer | Claudiu Zissulescu <claziss@gcc.gnu.org> | 2016-01-25 12:15:58 +0100 |
commit | d34a0fdc03bf55b287d884a9138e3965387af4b1 (patch) | |
tree | a518e7e353c9e58626c676f4a558352d9ee4df96 /gcc | |
parent | 02ef53f28854628ee15784bf860e7db24427a18f (diff) | |
download | gcc-d34a0fdc03bf55b287d884a9138e3965387af4b1.zip gcc-d34a0fdc03bf55b287d884a9138e3965387af4b1.tar.gz gcc-d34a0fdc03bf55b287d884a9138e3965387af4b1.tar.bz2 |
[ARC] Add basic support for double load and store instructions
gcc/
2016-01-25 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc.c (TARGET_DWARF_REGISTER_SPAN): Define.
(arc_init): Check validity mll64 option.
(arc_save_restore): Use double load/store instruction.
(arc_expand_movmem): Likewise.
(arc_split_move): Don't split if we have double load/store
instructions. Returns a boolean.
(arc_process_double_reg_moves): Change function to return boolean
instead of a sequence of instructions.
(arc_dwarf_register_span): New function.
* config/arc/arc-protos.h (arc_split_move): Change prototype.
* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_LL64__.
* config/arc/arc.md (*movdi_insn): Emit ldd/std instructions.
(*movdf_insn): Likewise.
* config/arc/arc.opt (mll64): New option.
* config/arc/predicates.md (even_register_operand): New predicate.
* doc/invoke.texi (ARC Options): Add mll64 documentation.
From-SVN: r232788
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/config/arc/arc-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/arc/arc.c | 120 | ||||
-rw-r--r-- | gcc/config/arc/arc.h | 4 | ||||
-rw-r--r-- | gcc/config/arc/arc.md | 109 | ||||
-rw-r--r-- | gcc/config/arc/arc.opt | 4 | ||||
-rw-r--r-- | gcc/config/arc/predicates.md | 13 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 6 |
8 files changed, 191 insertions, 86 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8a9798d..03ac502 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2016-01-25 Claudiu Zissulescu <claziss@synopsys.com> + + * config/arc/arc.c (TARGET_DWARF_REGISTER_SPAN): Define. + (arc_init): Check validity mll64 option. + (arc_save_restore): Use double load/store instruction. + (arc_expand_movmem): Likewise. + (arc_split_move): Don't split if we have double load/store + instructions. Returns a boolean. + (arc_process_double_reg_moves): Change function to return boolean + instead of a sequence of instructions. + (arc_dwarf_register_span): New function. + * config/arc/arc-protos.h (arc_split_move): Change prototype. + * config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_LL64__. + * config/arc/arc.md (*movdi_insn): Emit ldd/std instructions. + (*movdf_insn): Likewise. + * config/arc/arc.opt (mll64): New option. + * config/arc/predicates.md (even_register_operand): New predicate. + * doc/invoke.texi (ARC Options): Add mll64 documentation. + 2016-01-25 Richard Biener <rguenther@suse.de> PR lto/69393 diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h index 3f96455..f487291 100644 --- a/gcc/config/arc/arc-protos.h +++ b/gcc/config/arc/arc-protos.h @@ -104,7 +104,7 @@ extern void arc_toggle_unalign (void); extern void split_addsi (rtx *); extern void split_subsi (rtx *); extern void arc_pad_return (void); -extern rtx arc_split_move (rtx *); +extern void arc_split_move (rtx *); extern int arc_verify_short (rtx_insn *insn, int unalign, int); extern const char *arc_short_long (rtx_insn *insn, const char *, const char *); extern rtx arc_regno_use_in (unsigned int, rtx); diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index f636534..b9799a0 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -420,6 +420,9 @@ static void arc_finalize_pic (void); #undef TARGET_ASM_ALIGNED_SI_OP #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" +#undef TARGET_DWARF_REGISTER_SPAN +#define TARGET_DWARF_REGISTER_SPAN arc_dwarf_register_span + /* Try to keep the (mov:DF _, reg) as early as possible so that the d<add/sub/mul>h-lr insns appear together and can use the peephole2 pattern. */ @@ -736,6 +739,10 @@ arc_init (void) if (TARGET_ATOMIC && !(TARGET_ARC700 || TARGET_HS)) error ("-matomic is only supported for ARC700 or ARC HS cores"); + /* ll64 ops only available for HS. */ + if (TARGET_LL64 && !TARGET_HS) + error ("-mll64 is only supported for ARC HS cores"); + arc_init_reg_tables (); /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P. */ @@ -2175,9 +2182,26 @@ arc_save_restore (rtx base_reg, for (regno = 0; regno <= 31; regno++) { - if ((gmask & (1L << regno)) != 0) + enum machine_mode mode = SImode; + bool found = false; + + if (TARGET_LL64 + && (regno % 2 == 0) + && ((gmask & (1L << regno)) != 0) + && ((gmask & (1L << (regno+1))) != 0)) + { + found = true; + mode = DImode; + } + else if ((gmask & (1L << regno)) != 0) { - rtx reg = gen_rtx_REG (SImode, regno); + found = true; + mode = SImode; + } + + if (found) + { + rtx reg = gen_rtx_REG (mode, regno); rtx addr, mem; int cfa_adjust = *first_offset; @@ -2193,7 +2217,7 @@ arc_save_restore (rtx base_reg, gcc_assert (SMALL_INT (offset)); addr = plus_constant (Pmode, base_reg, offset); } - mem = gen_frame_mem (SImode, addr); + mem = gen_frame_mem (mode, addr); if (epilogue_p) { rtx insn = @@ -2212,6 +2236,11 @@ arc_save_restore (rtx base_reg, else frame_move_inc (mem, reg, base_reg, addr); offset += UNITS_PER_WORD; + if (mode == DImode) + { + offset += UNITS_PER_WORD; + ++regno; + } } /* if */ } /* for */ }/* if */ @@ -6986,9 +7015,8 @@ force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse) return addr; } -/* Like move_by_pieces, but take account of load latency, - and actual offset ranges. - Return true on success. */ +/* Like move_by_pieces, but take account of load latency, and actual + offset ranges. Return true on success. */ bool arc_expand_movmem (rtx *operands) @@ -7009,14 +7037,23 @@ arc_expand_movmem (rtx *operands) size = INTVAL (operands[2]); /* move_by_pieces_ninsns is static, so we can't use it. */ if (align >= 4) - n_pieces = (size + 2) / 4U + (size & 1); + { + if (TARGET_LL64) + n_pieces = (size + 4) / 8U + ((size >> 1) & 1) + (size & 1); + else + n_pieces = (size + 2) / 4U + (size & 1); + } else if (align == 2) n_pieces = (size + 1) / 2U; else n_pieces = size; if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15)) return false; - if (piece > 4) + /* Force 32 bit aligned and larger datum to use 64 bit transfers, if + possible. */ + if (TARGET_LL64 && (piece >= 4) && (size >= 8)) + piece = 8; + else if (piece > 4) piece = 4; dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0); src_addr = force_offsettable (XEXP (operands[1], 0), size, 0); @@ -7027,8 +7064,8 @@ arc_expand_movmem (rtx *operands) rtx tmp; machine_mode mode; - if (piece > size) - piece = size & -size; + while (piece > size) + piece >>= 1; mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT); /* If we don't re-use temporaries, the scheduler gets carried away, and the register pressure gets unnecessarily high. */ @@ -8463,12 +8500,11 @@ split_subsi (rtx *operands) Operand 0: destination register Operand 1: source register */ -static rtx +static bool arc_process_double_reg_moves (rtx *operands) { rtx dest = operands[0]; rtx src = operands[1]; - rtx val; enum usesDxState { none, srcDx, destDx, maxDx }; enum usesDxState state = none; @@ -8483,9 +8519,7 @@ arc_process_double_reg_moves (rtx *operands) } if (state == none) - return NULL_RTX; - - start_sequence (); + return false; if (state == srcDx) { @@ -8532,30 +8566,36 @@ arc_process_double_reg_moves (rtx *operands) else gcc_unreachable (); - val = get_insns (); - end_sequence (); - return val; + return true; } /* operands 0..1 are the operands of a 64 bit move instruction. split it into two moves with operands 2/3 and 4/5. */ -rtx +void arc_split_move (rtx *operands) { machine_mode mode = GET_MODE (operands[0]); int i; int swap = 0; rtx xop[4]; - rtx val; if (TARGET_DPFP) { - val = arc_process_double_reg_moves (operands); - if (val) - return val; + if (arc_process_double_reg_moves (operands)) + return; } + if (TARGET_LL64 + && ((memory_operand (operands[0], mode) + && even_register_operand (operands[1], mode)) + || (memory_operand (operands[1], mode) + && even_register_operand (operands[0], mode)))) + { + emit_move_insn (operands[0], operands[1]); + return; + } + for (i = 0; i < 2; i++) { if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0))) @@ -8603,18 +8643,10 @@ arc_split_move (rtx *operands) swap = 2; gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1])); } - operands[2+swap] = xop[0]; - operands[3+swap] = xop[1]; - operands[4-swap] = xop[2]; - operands[5-swap] = xop[3]; - start_sequence (); - emit_insn (gen_rtx_SET (operands[2], operands[3])); - emit_insn (gen_rtx_SET (operands[4], operands[5])); - val = get_insns (); - end_sequence (); + emit_move_insn (xop[0 + swap], xop[1 + swap]); + emit_move_insn (xop[2 - swap], xop[3 - swap]); - return val; } /* Select between the instruction output templates s_tmpl (for short INSNs) @@ -9329,6 +9361,28 @@ arc_no_speculation_in_delay_slots_p () return true; } +/* Return a parallel of registers to represent where to find the + register pieces if required, otherwise NULL_RTX. */ + +static rtx +arc_dwarf_register_span (rtx rtl) +{ + enum machine_mode mode = GET_MODE (rtl); + unsigned regno; + rtx p; + + if (GET_MODE_SIZE (mode) != 8) + return NULL_RTX; + + p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + regno = REGNO (rtl); + XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno); + XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1); + + return p; +} + + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-arc.h" diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h index 70a2b1d..27665b0 100644 --- a/gcc/config/arc/arc.h +++ b/gcc/config/arc/arc.h @@ -97,6 +97,10 @@ along with GCC; see the file COPYING3. If not see builtin_define ("__ARC_NORM__");\ builtin_define ("__Xnorm"); \ } \ + if (TARGET_LL64) \ + { \ + builtin_define ("__ARC_LL64__");\ + } \ if (TARGET_MUL64_SET) \ builtin_define ("__ARC_MUL64__");\ if (TARGET_MULMAC_32BY16_SET) \ diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 80f1daa..222a468 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -984,7 +984,7 @@ }") (define_insn_and_split "*movdi_insn" - [(set (match_operand:DI 0 "move_dest_operand" "=w,w,r,m") + [(set (match_operand:DI 0 "move_dest_operand" "=w, w,r,m") (match_operand:DI 1 "move_double_src_operand" "c,Hi,m,c"))] "register_operand (operands[0], DImode) || register_operand (operands[1], DImode)" @@ -993,50 +993,36 @@ switch (which_alternative) { default: - case 0 : - /* We normally copy the low-numbered register first. However, if - the first register operand 0 is the same as the second register of - operand 1, we must copy in the opposite order. */ - if (REGNO (operands[0]) == REGNO (operands[1]) + 1) - return \"mov%? %R0,%R1\;mov%? %0,%1\"; - else - return \"mov%? %0,%1\;mov%? %R0,%R1\"; - case 1 : - return \"mov%? %L0,%L1\;mov%? %H0,%H1\"; - case 2 : - /* If the low-address word is used in the address, we must load it - last. Otherwise, load it first. Note that we cannot have - auto-increment in that case since the address register is known to be - dead. */ - if (refers_to_regno_p (REGNO (operands[0]), operands[1])) - return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\"; - else switch (GET_CODE (XEXP(operands[1], 0))) - { - case POST_MODIFY: case POST_INC: case POST_DEC: - return \"ld%V1 %R0,%R1\;ld%U1%V1 %0,%1\"; - case PRE_MODIFY: case PRE_INC: case PRE_DEC: - return \"ld%U1%V1 %0,%1\;ld%V1 %R0,%R1\"; - default: - return \"ld%U1%V1 %0,%1\;ld%U1%V1 %R0,%R1\"; - } - case 3 : - switch (GET_CODE (XEXP(operands[0], 0))) - { - case POST_MODIFY: case POST_INC: case POST_DEC: - return \"st%V0 %R1,%R0\;st%U0%V0 %1,%0\"; - case PRE_MODIFY: case PRE_INC: case PRE_DEC: - return \"st%U0%V0 %1,%0\;st%V0 %R1,%R0\"; - default: - return \"st%U0%V0 %1,%0\;st%U0%V0 %R1,%R0\"; - } + return \"#\"; + + case 2: + if (TARGET_LL64 + && ((even_register_operand (operands[0], DImode) + && memory_operand (operands[1], DImode)) + || (memory_operand (operands[0], DImode) + && even_register_operand (operands[1], DImode)))) + return \"ldd%U1%V1 %0,%1%&\"; + return \"#\"; + + case 3: + if (TARGET_LL64 + && ((even_register_operand (operands[0], DImode) + && memory_operand (operands[1], DImode)) + || (memory_operand (operands[0], DImode) + && even_register_operand (operands[1], DImode)))) + return \"std%U0%V0 %1,%0\"; + return \"#\"; } }" - "&& reload_completed && optimize" - [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))] - "arc_split_move (operands);" + "reload_completed" + [(const_int 0)] + { + arc_split_move (operands); + DONE; + } [(set_attr "type" "move,move,load,store") ;; ??? The ld/st values could be 4 if it's [reg,bignum]. - (set_attr "length" "8,16,16,16")]) + (set_attr "length" "8,16,*,*")]) ;; Floating point move insns. @@ -1066,23 +1052,46 @@ "" "if (prepare_move_operands (operands, DFmode)) DONE;") -(define_insn "*movdf_insn" +(define_insn_and_split "*movdf_insn" [(set (match_operand:DF 0 "move_dest_operand" "=D,r,c,c,r,m") (match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))] "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)" - "#" + "* +{ + switch (which_alternative) + { + default: + return \"#\"; + case 4: + if (TARGET_LL64 + && ((even_register_operand (operands[0], DFmode) + && memory_operand (operands[1], DFmode)) + || (memory_operand (operands[0], DFmode) + && even_register_operand (operands[1], DFmode)))) + return \"ldd%U1%V1 %0,%1%&\"; + return \"#\"; + + case 5: + if (TARGET_LL64 + && ((even_register_operand (operands[0], DFmode) + && memory_operand (operands[1], DFmode)) + || (memory_operand (operands[0], DFmode) + && even_register_operand (operands[1], DFmode)))) + return \"std%U0%V0 %1,%0\"; + return \"#\"; + } +}" + "reload_completed" + [(const_int 0)] + { + arc_split_move (operands); + DONE; + } [(set_attr "type" "move,move,move,move,load,store") (set_attr "predicable" "no,no,yes,yes,no,no") ;; ??? The ld/st values could be 16 if it's [reg,bignum]. (set_attr "length" "4,16,8,16,16,16")]) -(define_split - [(set (match_operand:DF 0 "move_dest_operand" "") - (match_operand:DF 1 "move_double_src_operand" ""))] - "reload_completed" - [(match_dup 2)] - "operands[2] = arc_split_move (operands);") - (define_insn_and_split "*movdf_insn_nolrsr" [(set (match_operand:DF 0 "register_operand" "=r") (match_operand:DF 1 "arc_double_register_operand" "D")) diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt index 79113a5..00b98d5 100644 --- a/gcc/config/arc/arc.opt +++ b/gcc/config/arc/arc.opt @@ -409,3 +409,7 @@ Target Joined matomic Target Report Mask(ATOMIC) Enable atomic instructions. + +mll64 +Target Report Mask(LL64) +Enable double load/store instructions for ARC HS. diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md index fba878b..52ac2ac 100644 --- a/gcc/config/arc/predicates.md +++ b/gcc/config/arc/predicates.md @@ -783,4 +783,15 @@ (match_code "reg" "0"))) (define_predicate "any_mem_operand" - (match_code "mem"))
\ No newline at end of file + (match_code "mem")) + +; Special predicate to match even-odd double register pair +(define_predicate "even_register_operand" + (match_code "reg") + { + if ((GET_MODE (op) != mode) && (mode != VOIDmode)) + return 0; + + return (REG_P (op) && ((REGNO (op) >= FIRST_PSEUDO_REGISTER) + || ((REGNO (op) & 1) == 0))); + }) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index d281975..ba0b4b2 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -599,7 +599,7 @@ Objective-C and Objective-C++ Dialects}. -mmixed-code -mq-class -mRcq -mRcw -msize-level=@var{level} @gol -mtune=@var{cpu} -mmultcost=@var{num} @gol -munalign-prob-threshold=@var{probability} -mmpy-option=@var{multo} @gol --mdiv-rem -mcode-density} +-mdiv-rem -mcode-density -mll64} @emph{ARM Options} @gccoptlist{-mapcs-frame -mno-apcs-frame @gol @@ -13259,6 +13259,10 @@ Enable DIV/REM instructions for ARCv2 cores. @opindex mcode-density Enable code density instructions for ARC EM, default on for ARC HS. +@item -mll64 +@opindex mll64 +Enable double load/store operations for ARC HS cores. + @item -mmpy-option=@var{multo} @opindex mmpy-option Compile ARCv2 code with a multiplier design option. @samp{wlh1} is |