diff options
author | Hongyu Wang <hongyu.wang@intel.com> | 2024-04-18 16:53:26 +0800 |
---|---|---|
committer | Hongyu Wang <hongyu.wang@intel.com> | 2024-07-15 14:50:33 +0800 |
commit | 681ff5ccca153864eb86099eed201838d8d98bc2 (patch) | |
tree | 130ab779d12a465e72a96b07ab203c9974274643 /gcc/config/i386/i386-features.cc | |
parent | 60ba989220d9dec07d82009b0dafe684e652577f (diff) | |
download | gcc-681ff5ccca153864eb86099eed201838d8d98bc2.zip gcc-681ff5ccca153864eb86099eed201838d8d98bc2.tar.gz gcc-681ff5ccca153864eb86099eed201838d8d98bc2.tar.bz2 |
[APX NF] Add a pass to convert legacy insn to NF insns
For APX ccmp, current infrastructure will always generate cstore for
the ccmp flag user, like
cmpe %rcx, %r8
ccmpnel %rax, %rbx
seta %dil
add %rcx, %r9
add %r9, %rdx
testb %dil, %dil
je .L2
For such case, the legacy add clobbers FLAGS_REG so there should have
extra cstore to avoid the flag be reset before using it. If the
instructions between flag producer and user are NF insns, the setcc/
test sequence is not required.
Add a pass to convert legacy flag clobber insns to their NF counterpart.
The convertion only happens when
1. APX_NF enabled.
2. For a BB, cstore was find, and there are insns between such cstore
and next explicit set insn to FLAGS_REG (test or cmp).
3. All the insns found should have NF counterpart.
The pass was added after rtl-ifcvt which eliminates some branch when
profitable, which could cause some flag-clobbering insn put between
cstore and jcc.
gcc/ChangeLog:
* config/i386/i386.md (has_nf): New define_attr, add to all
nf related patterns.
* config/i386/i386-features.cc (apx_nf_convert): New function
to convert Non-NF insns to their NF counterparts.
(class pass_apx_nf_convert): New pass class.
(make_pass_apx_nf_convert): New.
* config/i386/i386-passes.def: Add pass_apx_nf_convert after
rtl_ifcvt.
* config/i386/i386-protos.h (make_pass_apx_nf_convert): Declare.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-nf-2.c: New test.
Diffstat (limited to 'gcc/config/i386/i386-features.cc')
-rw-r--r-- | gcc/config/i386/i386-features.cc | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index fc224ed..3da56dd 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3259,6 +3259,169 @@ make_pass_remove_partial_avx_dependency (gcc::context *ctxt) return new pass_remove_partial_avx_dependency (ctxt); } +/* Convert legacy instructions that clobbers EFLAGS to APX_NF + instructions when there are no flag set between a flag + producer and user. */ + +static unsigned int +ix86_apx_nf_convert (void) +{ + timevar_push (TV_MACH_DEP); + + basic_block bb; + rtx_insn *insn; + hash_map <rtx_insn *, rtx> converting_map; + auto_vec <rtx_insn *> current_convert_list; + + bool converting_seq = false; + rtx cc = gen_rtx_REG (CCmode, FLAGS_REG); + + FOR_EACH_BB_FN (bb, cfun) + { + /* Reset conversion for each bb. */ + converting_seq = false; + FOR_BB_INSNS (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + if (recog_memoized (insn) < 0) + continue; + + /* Convert candidate insns after cstore, which should + satisify the two conditions: + 1. Is not flag user or producer, only clobbers + FLAGS_REG. + 2. Have corresponding nf pattern. */ + + rtx pat = PATTERN (insn); + + /* Starting convertion at first cstorecc. */ + rtx set = NULL_RTX; + if (!converting_seq + && (set = single_set (insn)) + && ix86_comparison_operator (SET_SRC (set), VOIDmode) + && reg_overlap_mentioned_p (cc, SET_SRC (set)) + && !reg_overlap_mentioned_p (cc, SET_DEST (set))) + { + converting_seq = true; + current_convert_list.truncate (0); + } + /* Terminate at the next explicit flag set. */ + else if (reg_set_p (cc, pat) + && GET_CODE (set_of (cc, pat)) != CLOBBER) + converting_seq = false; + + if (!converting_seq) + continue; + + if (get_attr_has_nf (insn) + && GET_CODE (pat) == PARALLEL) + { + /* Record the insn to candidate map. */ + current_convert_list.safe_push (insn); + converting_map.put (insn, pat); + } + /* If the insn clobbers flags but has no nf_attr, + revoke all previous candidates. */ + else if (!get_attr_has_nf (insn) + && reg_set_p (cc, pat) + && GET_CODE (set_of (cc, pat)) == CLOBBER) + { + for (auto item : current_convert_list) + converting_map.remove (item); + converting_seq = false; + } + } + } + + if (!converting_map.is_empty ()) + { + for (auto iter = converting_map.begin (); + iter != converting_map.end (); ++iter) + { + rtx_insn *replace = (*iter).first; + rtx pat = (*iter).second; + int i, n = 0, len = XVECLEN (pat, 0); + rtx *new_elems = XALLOCAVEC (rtx, len); + rtx new_pat; + for (i = 0; i < len; i++) + { + rtx temp = XVECEXP (pat, 0, i); + if (! (GET_CODE (temp) == CLOBBER + && reg_overlap_mentioned_p (cc, + XEXP (temp, 0)))) + { + new_elems[n] = temp; + n++; + } + } + + if (n == 1) + new_pat = new_elems[0]; + else + new_pat = + gen_rtx_PARALLEL (VOIDmode, + gen_rtvec_v (n, + new_elems)); + + PATTERN (replace) = new_pat; + INSN_CODE (replace) = -1; + recog_memoized (replace); + df_insn_rescan (replace); + } + } + + timevar_pop (TV_MACH_DEP); + return 0; +} + + +namespace { + +const pass_data pass_data_apx_nf_convert = +{ + RTL_PASS, /* type */ + "apx_nfcvt", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_MACH_DEP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_apx_nf_convert : public rtl_opt_pass +{ +public: + pass_apx_nf_convert (gcc::context *ctxt) + : rtl_opt_pass (pass_data_apx_nf_convert, ctxt) + {} + + /* opt_pass methods: */ + bool gate (function *) final override + { + return (TARGET_APX_NF + && optimize + && optimize_function_for_speed_p (cfun)); + } + + unsigned int execute (function *) final override + { + return ix86_apx_nf_convert (); + } +}; // class pass_rpad + +} // anon namespace + +rtl_opt_pass * +make_pass_apx_nf_convert (gcc::context *ctxt) +{ + return new pass_apx_nf_convert (ctxt); +} + + /* This compares the priority of target features in function DECL1 and DECL2. It returns positive value if DECL1 is higher priority, negative value if DECL2 is higher priority and 0 if they are the |