diff options
Diffstat (limited to 'gcc/config/arm/arm.c')
| -rw-r--r-- | gcc/config/arm/arm.c | 830 |
1 files changed, 313 insertions, 517 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 6f1eb13..951d65c 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1,6 +1,6 @@ /* Output routines for GCC for ARM. Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, - 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 + 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) and Martin Simmons (@harleqn.co.uk). @@ -11730,7 +11730,7 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y) return the rtx for register 0 in the proper mode. FP means this is a floating point compare: I don't think that it is needed on the arm. */ rtx -arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y) +arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch) { enum machine_mode mode; rtx cc_reg; @@ -11755,11 +11755,18 @@ arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y) CC_CZmode is cheaper. */ if (mode == CC_Zmode && y != const0_rtx) { + gcc_assert (!reload_completed); x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN); y = const0_rtx; } + /* A scratch register is required. */ - clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode)); + if (reload_completed) + gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode); + else + scratch = gen_rtx_SCRATCH (SImode); + + clobber = gen_rtx_CLOBBER (VOIDmode, scratch); set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)); emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); } @@ -24417,520 +24424,6 @@ arm_have_conditional_execution (void) return !TARGET_THUMB1; } -/* Legitimize a memory reference for sync primitive implemented using - ldrex / strex. We currently force the form of the reference to be - indirect without offset. We do not yet support the indirect offset - addressing supported by some ARM targets for these - instructions. */ -static rtx -arm_legitimize_sync_memory (rtx memory) -{ - rtx addr = force_reg (Pmode, XEXP (memory, 0)); - rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr); - - set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER); - MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory); - return legitimate_memory; -} - -/* An instruction emitter. */ -typedef void (* emit_f) (int label, const char *, rtx *); - -/* An instruction emitter that emits via the conventional - output_asm_insn. */ -static void -arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands) -{ - output_asm_insn (pattern, operands); -} - -/* Count the number of emitted synchronization instructions. */ -static unsigned arm_insn_count; - -/* An emitter that counts emitted instructions but does not actually - emit instruction into the instruction stream. */ -static void -arm_count (int label, - const char *pattern ATTRIBUTE_UNUSED, - rtx *operands ATTRIBUTE_UNUSED) -{ - if (! label) - ++ arm_insn_count; -} - -/* Construct a pattern using conventional output formatting and feed - it to output_asm_insn. Provides a mechanism to construct the - output pattern on the fly. Note the hard limit on the pattern - buffer size. */ -static void ATTRIBUTE_PRINTF_4 -arm_output_asm_insn (emit_f emit, int label, rtx *operands, - const char *pattern, ...) -{ - va_list ap; - char buffer[256]; - - va_start (ap, pattern); - vsprintf (buffer, pattern, ap); - va_end (ap); - emit (label, buffer, operands); -} - -/* Emit the memory barrier instruction, if any, provided by this - target to a specified emitter. */ -static void -arm_process_output_memory_barrier (emit_f emit, rtx *operands) -{ - if (TARGET_HAVE_DMB) - { - /* Note we issue a system level barrier. We should consider - issuing a inner shareabilty zone barrier here instead, ie. - "DMB ISH". */ - emit (0, "dmb\tsy", operands); - return; - } - - if (TARGET_HAVE_DMB_MCR) - { - emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands); - return; - } - - gcc_unreachable (); -} - -/* Emit the memory barrier instruction, if any, provided by this - target. */ -const char * -arm_output_memory_barrier (rtx *operands) -{ - arm_process_output_memory_barrier (arm_emit, operands); - return ""; -} - -/* Helper to figure out the instruction suffix required on ldrex/strex - for operations on an object of the specified mode. */ -static const char * -arm_ldrex_suffix (enum machine_mode mode) -{ - switch (mode) - { - case QImode: return "b"; - case HImode: return "h"; - case SImode: return ""; - case DImode: return "d"; - default: - gcc_unreachable (); - } - return ""; -} - -/* Emit an ldrex{b,h,d, } instruction appropriate for the specified - mode. */ -static void -arm_output_ldrex (emit_f emit, - enum machine_mode mode, - rtx target, - rtx memory) -{ - rtx operands[3]; - - operands[0] = target; - if (mode != DImode) - { - const char *suffix = arm_ldrex_suffix (mode); - operands[1] = memory; - arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix); - } - else - { - /* The restrictions on target registers in ARM mode are that the two - registers are consecutive and the first one is even; Thumb is - actually more flexible, but DI should give us this anyway. - Note that the 1st register always gets the lowest word in memory. */ - gcc_assert ((REGNO (target) & 1) == 0); - operands[1] = gen_rtx_REG (SImode, REGNO (target) + 1); - operands[2] = memory; - arm_output_asm_insn (emit, 0, operands, "ldrexd\t%%0, %%1, %%C2"); - } -} - -/* Emit a strex{b,h,d, } instruction appropriate for the specified - mode. */ -static void -arm_output_strex (emit_f emit, - enum machine_mode mode, - const char *cc, - rtx result, - rtx value, - rtx memory) -{ - rtx operands[4]; - - operands[0] = result; - operands[1] = value; - if (mode != DImode) - { - const char *suffix = arm_ldrex_suffix (mode); - operands[2] = memory; - arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", - suffix, cc); - } - else - { - /* The restrictions on target registers in ARM mode are that the two - registers are consecutive and the first one is even; Thumb is - actually more flexible, but DI should give us this anyway. - Note that the 1st register always gets the lowest word in memory. */ - gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2); - operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1); - operands[3] = memory; - arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, %%C3", - cc); - } -} - -/* Helper to emit an it instruction in Thumb2 mode only; although the assembler - will ignore it in ARM mode, emitting it will mess up instruction counts we - sometimes keep 'flags' are the extra t's and e's if it's more than one - instruction that is conditional. */ -static void -arm_output_it (emit_f emit, const char *flags, const char *cond) -{ - rtx operands[1]; /* Don't actually use the operand. */ - if (TARGET_THUMB2) - arm_output_asm_insn (emit, 0, operands, "it%s\t%s", flags, cond); -} - -/* Helper to emit a two operand instruction. */ -static void -arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s) -{ - rtx operands[2]; - - operands[0] = d; - operands[1] = s; - arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic); -} - -/* Helper to emit a three operand instruction. */ -static void -arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b) -{ - rtx operands[3]; - - operands[0] = d; - operands[1] = a; - operands[2] = b; - arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic); -} - -/* Emit a load store exclusive synchronization loop. - - do - old_value = [mem] - if old_value != required_value - break; - t1 = sync_op (old_value, new_value) - [mem] = t1, t2 = [0|1] - while ! t2 - - Note: - t1 == t2 is not permitted - t1 == old_value is permitted - - required_value: - - RTX register representing the required old_value for - the modify to continue, if NULL no comparsion is performed. */ -static void -arm_output_sync_loop (emit_f emit, - enum machine_mode mode, - rtx old_value, - rtx memory, - rtx required_value, - rtx new_value, - rtx t1, - rtx t2, - enum attr_sync_op sync_op, - int early_barrier_required) -{ - rtx operands[2]; - /* We'll use the lo for the normal rtx in the none-DI case - as well as the least-sig word in the DI case. */ - rtx old_value_lo, required_value_lo, new_value_lo, t1_lo; - rtx old_value_hi, required_value_hi, new_value_hi, t1_hi; - - bool is_di = mode == DImode; - - gcc_assert (t1 != t2); - - if (early_barrier_required) - arm_process_output_memory_barrier (emit, NULL); - - arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX); - - arm_output_ldrex (emit, mode, old_value, memory); - - if (is_di) - { - old_value_lo = gen_lowpart (SImode, old_value); - old_value_hi = gen_highpart (SImode, old_value); - if (required_value) - { - required_value_lo = gen_lowpart (SImode, required_value); - required_value_hi = gen_highpart (SImode, required_value); - } - else - { - /* Silence false potentially unused warning. */ - required_value_lo = NULL_RTX; - required_value_hi = NULL_RTX; - } - new_value_lo = gen_lowpart (SImode, new_value); - new_value_hi = gen_highpart (SImode, new_value); - t1_lo = gen_lowpart (SImode, t1); - t1_hi = gen_highpart (SImode, t1); - } - else - { - old_value_lo = old_value; - new_value_lo = new_value; - required_value_lo = required_value; - t1_lo = t1; - - /* Silence false potentially unused warning. */ - t1_hi = NULL_RTX; - new_value_hi = NULL_RTX; - required_value_hi = NULL_RTX; - old_value_hi = NULL_RTX; - } - - if (required_value) - { - operands[0] = old_value_lo; - operands[1] = required_value_lo; - - arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1"); - if (is_di) - { - arm_output_it (emit, "", "eq"); - arm_output_op2 (emit, "cmpeq", old_value_hi, required_value_hi); - } - arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX); - } - - switch (sync_op) - { - case SYNC_OP_ADD: - arm_output_op3 (emit, is_di ? "adds" : "add", - t1_lo, old_value_lo, new_value_lo); - if (is_di) - arm_output_op3 (emit, "adc", t1_hi, old_value_hi, new_value_hi); - break; - - case SYNC_OP_SUB: - arm_output_op3 (emit, is_di ? "subs" : "sub", - t1_lo, old_value_lo, new_value_lo); - if (is_di) - arm_output_op3 (emit, "sbc", t1_hi, old_value_hi, new_value_hi); - break; - - case SYNC_OP_IOR: - arm_output_op3 (emit, "orr", t1_lo, old_value_lo, new_value_lo); - if (is_di) - arm_output_op3 (emit, "orr", t1_hi, old_value_hi, new_value_hi); - break; - - case SYNC_OP_XOR: - arm_output_op3 (emit, "eor", t1_lo, old_value_lo, new_value_lo); - if (is_di) - arm_output_op3 (emit, "eor", t1_hi, old_value_hi, new_value_hi); - break; - - case SYNC_OP_AND: - arm_output_op3 (emit,"and", t1_lo, old_value_lo, new_value_lo); - if (is_di) - arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi); - break; - - case SYNC_OP_NAND: - arm_output_op3 (emit, "and", t1_lo, old_value_lo, new_value_lo); - if (is_di) - arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi); - arm_output_op2 (emit, "mvn", t1_lo, t1_lo); - if (is_di) - arm_output_op2 (emit, "mvn", t1_hi, t1_hi); - break; - - case SYNC_OP_NONE: - t1 = new_value; - t1_lo = new_value_lo; - if (is_di) - t1_hi = new_value_hi; - break; - } - - /* Note that the result of strex is a 0/1 flag that's always 1 register. */ - if (t2) - { - arm_output_strex (emit, mode, "", t2, t1, memory); - operands[0] = t2; - arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); - arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", - LOCAL_LABEL_PREFIX); - } - else - { - /* Use old_value for the return value because for some operations - the old_value can easily be restored. This saves one register. */ - arm_output_strex (emit, mode, "", old_value_lo, t1, memory); - operands[0] = old_value_lo; - arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); - arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", - LOCAL_LABEL_PREFIX); - - /* Note that we only used the _lo half of old_value as a temporary - so in DI we don't have to restore the _hi part. */ - switch (sync_op) - { - case SYNC_OP_ADD: - arm_output_op3 (emit, "sub", old_value_lo, t1_lo, new_value_lo); - break; - - case SYNC_OP_SUB: - arm_output_op3 (emit, "add", old_value_lo, t1_lo, new_value_lo); - break; - - case SYNC_OP_XOR: - arm_output_op3 (emit, "eor", old_value_lo, t1_lo, new_value_lo); - break; - - case SYNC_OP_NONE: - arm_output_op2 (emit, "mov", old_value_lo, required_value_lo); - break; - - default: - gcc_unreachable (); - } - } - - /* Note: label is before barrier so that in cmp failure case we still get - a barrier to stop subsequent loads floating upwards past the ldrex - PR target/48126. */ - arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX); - arm_process_output_memory_barrier (emit, NULL); -} - -static rtx -arm_get_sync_operand (rtx *operands, int index, rtx default_value) -{ - if (index > 0) - default_value = operands[index - 1]; - - return default_value; -} - -#define FETCH_SYNC_OPERAND(NAME, DEFAULT) \ - arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT); - -/* Extract the operands for a synchroniztion instruction from the - instructions attributes and emit the instruction. */ -static void -arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands) -{ - rtx result, memory, required_value, new_value, t1, t2; - int early_barrier; - enum machine_mode mode; - enum attr_sync_op sync_op; - - result = FETCH_SYNC_OPERAND(result, 0); - memory = FETCH_SYNC_OPERAND(memory, 0); - required_value = FETCH_SYNC_OPERAND(required_value, 0); - new_value = FETCH_SYNC_OPERAND(new_value, 0); - t1 = FETCH_SYNC_OPERAND(t1, 0); - t2 = FETCH_SYNC_OPERAND(t2, 0); - early_barrier = - get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES; - sync_op = get_attr_sync_op (insn); - mode = GET_MODE (memory); - - arm_output_sync_loop (emit, mode, result, memory, required_value, - new_value, t1, t2, sync_op, early_barrier); -} - -/* Emit a synchronization instruction loop. */ -const char * -arm_output_sync_insn (rtx insn, rtx *operands) -{ - arm_process_output_sync_insn (arm_emit, insn, operands); - return ""; -} - -/* Count the number of machine instruction that will be emitted for a - synchronization instruction. Note that the emitter used does not - emit instructions, it just counts instructions being carefull not - to count labels. */ -unsigned int -arm_sync_loop_insns (rtx insn, rtx *operands) -{ - arm_insn_count = 0; - arm_process_output_sync_insn (arm_count, insn, operands); - return arm_insn_count; -} - -/* Helper to call a target sync instruction generator, dealing with - the variation in operands required by the different generators. */ -static rtx -arm_call_generator (struct arm_sync_generator *generator, rtx old_value, - rtx memory, rtx required_value, rtx new_value) -{ - switch (generator->op) - { - case arm_sync_generator_omn: - gcc_assert (! required_value); - return generator->u.omn (old_value, memory, new_value); - - case arm_sync_generator_omrn: - gcc_assert (required_value); - return generator->u.omrn (old_value, memory, required_value, new_value); - } - - return NULL; -} - -/* Expand a synchronization loop. The synchronization loop is expanded - as an opaque block of instructions in order to ensure that we do - not subsequently get extraneous memory accesses inserted within the - critical region. The exclusive access property of ldrex/strex is - only guaranteed in there are no intervening memory accesses. */ -void -arm_expand_sync (enum machine_mode mode, - struct arm_sync_generator *generator, - rtx target, rtx memory, rtx required_value, rtx new_value) -{ - if (target == NULL) - target = gen_reg_rtx (mode); - - memory = arm_legitimize_sync_memory (memory); - if (mode != SImode && mode != DImode) - { - rtx load_temp = gen_reg_rtx (SImode); - - if (required_value) - required_value = convert_modes (SImode, mode, required_value, true); - - new_value = convert_modes (SImode, mode, new_value, true); - emit_insn (arm_call_generator (generator, load_temp, memory, - required_value, new_value)); - emit_move_insn (target, gen_lowpart (mode, load_temp)); - } - else - { - emit_insn (arm_call_generator (generator, target, memory, required_value, - new_value)); - } -} - static unsigned int arm_autovectorize_vector_sizes (void) { @@ -25150,5 +24643,308 @@ vfp3_const_double_for_fract_bits (rtx operand) return 0; } +/* Emit a memory barrier around an atomic sequence according to MODEL. */ + +static void +arm_pre_atomic_barrier (enum memmodel model) +{ + switch (model) + { + case MEMMODEL_RELAXED: + case MEMMODEL_CONSUME: + case MEMMODEL_ACQUIRE: + break; + case MEMMODEL_RELEASE: + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: + emit_insn (gen_memory_barrier ()); + break; + default: + gcc_unreachable (); + } +} + +static void +arm_post_atomic_barrier (enum memmodel model) +{ + switch (model) + { + case MEMMODEL_RELAXED: + case MEMMODEL_CONSUME: + case MEMMODEL_RELEASE: + break; + case MEMMODEL_ACQUIRE: + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: + emit_insn (gen_memory_barrier ()); + break; + default: + gcc_unreachable (); + } +} + +/* Emit the load-exclusive and store-exclusive instructions. */ + +static void +arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem) +{ + rtx (*gen) (rtx, rtx); + + switch (mode) + { + case QImode: gen = gen_arm_load_exclusiveqi; break; + case HImode: gen = gen_arm_load_exclusivehi; break; + case SImode: gen = gen_arm_load_exclusivesi; break; + case DImode: gen = gen_arm_load_exclusivedi; break; + default: + gcc_unreachable (); + } + + emit_insn (gen (rval, mem)); +} + +static void +arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem) +{ + rtx (*gen) (rtx, rtx, rtx); + + switch (mode) + { + case QImode: gen = gen_arm_store_exclusiveqi; break; + case HImode: gen = gen_arm_store_exclusivehi; break; + case SImode: gen = gen_arm_store_exclusivesi; break; + case DImode: gen = gen_arm_store_exclusivedi; break; + default: + gcc_unreachable (); + } + + emit_insn (gen (bval, rval, mem)); +} + +/* Mark the previous jump instruction as unlikely. */ + +static void +emit_unlikely_jump (rtx insn) +{ + rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1); + + insn = emit_jump_insn (insn); + add_reg_note (insn, REG_BR_PROB, very_unlikely); +} + +/* Expand a compare and swap pattern. */ + +void +arm_expand_compare_and_swap (rtx operands[]) +{ + rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; + enum machine_mode mode; + rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx); + + bval = operands[0]; + rval = operands[1]; + mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + is_weak = operands[5]; + mod_s = operands[6]; + mod_f = operands[7]; + mode = GET_MODE (mem); + + switch (mode) + { + case QImode: + case HImode: + /* For narrow modes, we're going to perform the comparison in SImode, + so do the zero-extension now. */ + rval = gen_reg_rtx (SImode); + oldval = convert_modes (SImode, mode, oldval, true); + /* FALLTHRU */ + + case SImode: + /* Force the value into a register if needed. We waited until after + the zero-extension above to do this properly. */ + if (!arm_add_operand (oldval, mode)) + oldval = force_reg (mode, oldval); + break; + + case DImode: + if (!cmpdi_operand (oldval, mode)) + oldval = force_reg (mode, oldval); + break; + + default: + gcc_unreachable (); + } + + switch (mode) + { + case QImode: gen = gen_atomic_compare_and_swapqi_1; break; + case HImode: gen = gen_atomic_compare_and_swaphi_1; break; + case SImode: gen = gen_atomic_compare_and_swapsi_1; break; + case DImode: gen = gen_atomic_compare_and_swapdi_1; break; + default: + gcc_unreachable (); + } + + emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f)); + + if (mode == QImode || mode == HImode) + emit_move_insn (operands[1], gen_lowpart (mode, rval)); + + /* In all cases, we arrange for success to be signaled by Z set. + This arrangement allows for the boolean result to be used directly + in a subsequent branch, post optimization. */ + x = gen_rtx_REG (CCmode, CC_REGNUM); + x = gen_rtx_EQ (SImode, x, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, bval, x)); +} + +/* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether + another memory store between the load-exclusive and store-exclusive can + reset the monitor from Exclusive to Open state. This means we must wait + until after reload to split the pattern, lest we get a register spill in + the middle of the atomic sequence. */ + +void +arm_split_compare_and_swap (rtx operands[]) +{ + rtx rval, mem, oldval, newval, scratch; + enum machine_mode mode; + enum memmodel mod_s, mod_f; + bool is_weak; + rtx label1, label2, x, cond; + + rval = operands[0]; + mem = operands[1]; + oldval = operands[2]; + newval = operands[3]; + is_weak = (operands[4] != const0_rtx); + mod_s = (enum memmodel) INTVAL (operands[5]); + mod_f = (enum memmodel) INTVAL (operands[6]); + scratch = operands[7]; + mode = GET_MODE (mem); + + arm_pre_atomic_barrier (mod_s); + + label1 = NULL_RTX; + if (!is_weak) + { + label1 = gen_label_rtx (); + emit_label (label1); + } + label2 = gen_label_rtx (); + + arm_emit_load_exclusive (mode, rval, mem); + + cond = arm_gen_compare_reg (NE, rval, oldval, scratch); + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); + emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); + + arm_emit_store_exclusive (mode, scratch, mem, newval); + + /* Weak or strong, we want EQ to be true for success, so that we + match the flags that we got from the compare above. */ + cond = gen_rtx_REG (CCmode, CC_REGNUM); + x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, cond, x)); + + if (!is_weak) + { + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label1), pc_rtx); + emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); + } + + if (mod_f != MEMMODEL_RELAXED) + emit_label (label2); + + arm_post_atomic_barrier (mod_s); + + if (mod_f == MEMMODEL_RELAXED) + emit_label (label2); +} + +void +arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, + rtx value, rtx model_rtx, rtx cond) +{ + enum memmodel model = (enum memmodel) INTVAL (model_rtx); + enum machine_mode mode = GET_MODE (mem); + enum machine_mode wmode = (mode == DImode ? DImode : SImode); + rtx label, x; + + arm_pre_atomic_barrier (model); + + label = gen_label_rtx (); + emit_label (label); + + if (new_out) + new_out = gen_lowpart (wmode, new_out); + if (old_out) + old_out = gen_lowpart (wmode, old_out); + else + old_out = new_out; + value = simplify_gen_subreg (wmode, value, mode, 0); + + arm_emit_load_exclusive (mode, old_out, mem); + + switch (code) + { + case SET: + new_out = value; + break; + + case NOT: + x = gen_rtx_AND (wmode, old_out, value); + emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); + x = gen_rtx_NOT (wmode, new_out); + emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); + break; + + case MINUS: + if (CONST_INT_P (value)) + { + value = GEN_INT (-INTVAL (value)); + code = PLUS; + } + /* FALLTHRU */ + + case PLUS: + if (mode == DImode) + { + /* DImode plus/minus need to clobber flags. */ + /* The adddi3 and subdi3 patterns are incorrectly written so that + they require matching operands, even when we could easily support + three operands. Thankfully, this can be fixed up post-splitting, + as the individual add+adc patterns do accept three operands and + post-reload cprop can make these moves go away. */ + emit_move_insn (new_out, old_out); + if (code == PLUS) + x = gen_adddi3 (new_out, new_out, value); + else + x = gen_subdi3 (new_out, new_out, value); + emit_insn (x); + break; + } + /* FALLTHRU */ + + default: + x = gen_rtx_fmt_ee (code, wmode, old_out, value); + emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); + break; + } + + arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out)); + + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label)); + + arm_post_atomic_barrier (model); +} + #include "gt-arm.h" |
