From a51c146ebce41b5e4326b222f2d9e04bb22d276f Mon Sep 17 00:00:00 2001 From: Stefan Schulze Frielinghaus Date: Mon, 21 Jul 2025 13:05:26 +0200 Subject: Hard register constraints Implement hard register constraints of the form {regname} where regname must be a valid register name for the target. Such constraints may be used in asm statements as a replacement for register asm and in machine descriptions. A more verbose description is given in extend.texi. It is expected and desired that optimizations coalesce multiple pseudos into one whenever possible. However, in case of hard register constraints we may have to undo this and introduce copies since otherwise we would constraint a single pseudo to multiple hard registers. This is done prior RA during asmcons in match_asm_constraints_2(). While IRA tries to reduce live ranges, it also replaces some register-register moves. That in turn might undo those copies of a pseudo which we just introduced during asmcons. Thus, check in decrease_live_ranges_number() via valid_replacement_for_asm_input_p() whether it is valid to perform a replacement. The reminder of the patch mostly deals with parsing and decoding hard register constraints. The actual work is done by LRA in process_alt_operands() where a register filter, according to the constraint, is installed. For the sake of "reviewability" and in order to show the beauty of LRA, error handling (which gets pretty involved) is spread out into a subsequent patch. Limitation ---------- Currently, a fixed register cannot be used as hard register constraint. For example, loading the stack pointer on x86_64 via void * foo (void) { void *y; __asm__ ("" : "={rsp}" (y)); return y; } leads to an error. Asm Adjust Hook --------------- The following targets implement TARGET_MD_ASM_ADJUST: - aarch64 - arm - avr - cris - i386 - mn10300 - nds32 - pdp11 - rs6000 - s390 - vax Most of them only add the CC register to the list of clobbered register. However, cris, i386, and s390 need some minor adjustment. gcc/ChangeLog: * config/cris/cris.cc (cris_md_asm_adjust): Deal with hard register constraint. * config/i386/i386.cc (map_egpr_constraints): Ditto. * config/s390/s390.cc (f_constraint_p): Ditto. * doc/extend.texi: Document hard register constraints. * doc/md.texi: Ditto. * function.cc (match_asm_constraints_2): Have a unique pseudo for each operand with a hard register constraint. (pass_match_asm_constraints::execute): Calling into new helper match_asm_constraints_2(). * genoutput.cc (mdep_constraint_len): Return the length of a hard register constraint. * genpreds.cc (write_insn_constraint_len): Support hard register constraints for insn_constraint_len(). * ira.cc (valid_replacement_for_asm_input_p_1): New helper. (valid_replacement_for_asm_input_p): New helper. (decrease_live_ranges_number): Similar to match_asm_constraints_2() ensure that each operand has a unique pseudo if constrained by a hard register. * lra-constraints.cc (process_alt_operands): Install hard register filter according to constraint. * recog.cc (asm_operand_ok): Accept register type for hard register constrained asm operands. (constrain_operands): Validate hard register constraints. * stmt.cc (decode_hard_reg_constraint): Parse a hard register constraint into the corresponding register number or bail out. (parse_output_constraint): Parse hard register constraint and set *ALLOWS_REG. (parse_input_constraint): Ditto. * stmt.h (decode_hard_reg_constraint): Declaration of new function. gcc/testsuite/ChangeLog: * gcc.dg/asm-hard-reg-1.c: New test. * gcc.dg/asm-hard-reg-2.c: New test. * gcc.dg/asm-hard-reg-3.c: New test. * gcc.dg/asm-hard-reg-4.c: New test. * gcc.dg/asm-hard-reg-5.c: New test. * gcc.dg/asm-hard-reg-6.c: New test. * gcc.dg/asm-hard-reg-7.c: New test. * gcc.dg/asm-hard-reg-8.c: New test. * gcc.target/aarch64/asm-hard-reg-1.c: New test. * gcc.target/i386/asm-hard-reg-1.c: New test. * gcc.target/i386/asm-hard-reg-2.c: New test. * gcc.target/s390/asm-hard-reg-1.c: New test. * gcc.target/s390/asm-hard-reg-2.c: New test. * gcc.target/s390/asm-hard-reg-3.c: New test. * gcc.target/s390/asm-hard-reg-4.c: New test. * gcc.target/s390/asm-hard-reg-5.c: New test. * gcc.target/s390/asm-hard-reg-6.c: New test. * gcc.target/s390/asm-hard-reg-longdouble.h: New test. --- gcc/function.cc | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) (limited to 'gcc/function.cc') diff --git a/gcc/function.cc b/gcc/function.cc index 48167b0c..2b77bbd 100644 --- a/gcc/function.cc +++ b/gcc/function.cc @@ -7009,6 +7009,115 @@ match_asm_constraints_1 (rtx_insn *insn, rtx *p_sets, int noutputs) df_insn_rescan (insn); } +/* It is expected and desired that optimizations coalesce multiple pseudos into + one whenever possible. However, in case of hard register constraints we may + have to undo this and introduce copies since otherwise we could constraint a + single pseudo to different hard registers. For example, during register + allocation the following insn would be unsatisfiable since pseudo 60 is + constrained to hard register r5 and r6 at the same time. + + (insn 7 5 0 2 (asm_operands/v ("foo") ("") 0 [ + (reg:DI 60) repeated x2 + ] + [ + (asm_input:DI ("{r5}") t.c:4) + (asm_input:DI ("{r6}") t.c:4) + ] + [] t.c:4) "t.c":4:3 -1 + (expr_list:REG_DEAD (reg:DI 60) + (nil))) + + Therefore, introduce a copy of pseudo 60 and transform it into + + (insn 10 5 7 2 (set (reg:DI 62) + (reg:DI 60)) "t.c":4:3 1503 {*movdi_64} + (nil)) + (insn 7 10 11 2 (asm_operands/v ("foo") ("") 0 [ + (reg:DI 60) + (reg:DI 62) + ] + [ + (asm_input:DI ("{r5}") t.c:4) + (asm_input:DI ("{r6}") t.c:4) + ] + [] t.c:4) "t.c":4:3 -1 + (expr_list:REG_DEAD (reg:DI 62) + (expr_list:REG_DEAD (reg:DI 60) + (nil)))) + + Now, LRA can assign pseudo 60 to r5, and pseudo 62 to r6. + + TODO: The current implementation is conservative and we could do a bit + better in case of alternatives. For example + + (insn 7 5 0 2 (asm_operands/v ("foo") ("") 0 [ + (reg:DI 60) repeated x2 + ] + [ + (asm_input:DI ("r,{r5}") t.c:4) + (asm_input:DI ("{r6},r") t.c:4) + ] + [] t.c:4) "t.c":4:3 -1 + (expr_list:REG_DEAD (reg:DI 60) + (nil))) + + For this insn we wouldn't need to come up with a copy of pseudo 60 since in + each alternative pseudo 60 is constrained exactly one time. */ + +static void +match_asm_constraints_2 (rtx_insn *insn, rtx pat) +{ + rtx op; + if (GET_CODE (pat) == SET && GET_CODE (SET_SRC (pat)) == ASM_OPERANDS) + op = SET_SRC (pat); + else if (GET_CODE (pat) == ASM_OPERANDS) + op = pat; + else + return; + int ninputs = ASM_OPERANDS_INPUT_LENGTH (op); + rtvec inputs = ASM_OPERANDS_INPUT_VEC (op); + bool changed = false; + auto_bitmap constrained_regs; + + for (int i = 0; i < ninputs; ++i) + { + rtx input = RTVEC_ELT (inputs, i); + const char *constraint = ASM_OPERANDS_INPUT_CONSTRAINT (op, i); + if ((!REG_P (input) && !SUBREG_P (input)) + || (REG_P (input) && HARD_REGISTER_P (input)) + || strchr (constraint, '{') == nullptr) + continue; + int regno; + if (SUBREG_P (input)) + { + if (REG_P (SUBREG_REG (input))) + regno = REGNO (SUBREG_REG (input)); + else + continue; + } + else + regno = REGNO (input); + /* Keep the first usage of a constrained pseudo as is and only + introduce copies for subsequent usages. */ + if (! bitmap_bit_p (constrained_regs, regno)) + { + bitmap_set_bit (constrained_regs, regno); + continue; + } + rtx tmp = gen_reg_rtx (GET_MODE (input)); + start_sequence (); + emit_move_insn (tmp, input); + rtx_insn *insns = get_insns (); + end_sequence (); + emit_insn_before (insns, insn); + RTVEC_ELT (inputs, i) = tmp; + changed = true; + } + + if (changed) + df_insn_rescan (insn); +} + /* Add the decl D to the local_decls list of FUN. */ void @@ -7065,6 +7174,13 @@ pass_match_asm_constraints::execute (function *fun) continue; pat = PATTERN (insn); + + if (GET_CODE (pat) == PARALLEL) + for (int i = XVECLEN (pat, 0) - 1; i >= 0; --i) + match_asm_constraints_2 (insn, XVECEXP (pat, 0, i)); + else + match_asm_constraints_2 (insn, pat); + if (GET_CODE (pat) == PARALLEL) p_sets = &XVECEXP (pat, 0, 0), noutputs = XVECLEN (pat, 0); else if (GET_CODE (pat) == SET) -- cgit v1.1