diff options
author | Ju-Zhe Zhong <juzhe.zhong@rivai.ai> | 2023-07-15 07:45:00 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2023-07-19 21:36:56 +0800 |
commit | ba49332baba622cb9af8e34629636f2586664c7e (patch) | |
tree | 55b32252c1641d98571fb7b2374c9d3e1781d4a3 | |
parent | e029635cb72e6db72f1826b6b43fa4b299b2145f (diff) | |
download | gcc-ba49332baba622cb9af8e34629636f2586664c7e.zip gcc-ba49332baba622cb9af8e34629636f2586664c7e.tar.gz gcc-ba49332baba622cb9af8e34629636f2586664c7e.tar.bz2 |
VECT: Add mask_len_fold_left_plus for in-order floating-point reduction
Hi, Richard and Richi.
This patch adds mask_len_fold_left_plus pattern to support in-order floating-point
reduction for target support len loop control.
Consider this following case:
double
foo2 (double *__restrict a,
double init,
int *__restrict cond,
int n)
{
for (int i = 0; i < n; i++)
if (cond[i])
init += a[i];
return init;
}
ARM SVE:
...
vec_mask_and_60 = loop_mask_54 & mask__23.33_57;
vect__ifc__35.37_64 = .VCOND_MASK (vec_mask_and_60, vect__8.36_61, { 0.0, ... });
_36 = .MASK_FOLD_LEFT_PLUS (init_20, vect__ifc__35.37_64, loop_mask_54);
...
For RVV, we want to see:
...
_36 = .MASK_LEN_FOLD_LEFT_PLUS (init_20, vect__ifc__35.37_64, control_mask, loop_len, bias);
...
gcc/ChangeLog:
* doc/md.texi: Add mask_len_fold_left_plus.
* internal-fn.cc (mask_len_fold_left_direct): Ditto.
(expand_mask_len_fold_left_optab_fn): Ditto.
(direct_mask_len_fold_left_optab_supported_p): Ditto.
* internal-fn.def (MASK_LEN_FOLD_LEFT_PLUS): Ditto.
* optabs.def (OPTAB_D): Ditto.
-rw-r--r-- | gcc/doc/md.texi | 13 | ||||
-rw-r--r-- | gcc/internal-fn.cc | 5 | ||||
-rw-r--r-- | gcc/internal-fn.def | 3 | ||||
-rw-r--r-- | gcc/optabs.def | 1 |
4 files changed, 22 insertions, 0 deletions
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index cbcb992..6f44e66 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5615,6 +5615,19 @@ no reassociation. Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand (operand 3) that specifies which elements of the source vector should be added. +@cindex @code{mask_len_fold_left_plus_@var{m}} instruction pattern +@item @code{mask_len_fold_left_plus_@var{m}} +Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand +(operand 3), len operand (operand 4) and bias operand (operand 5) that +performs following operations strictly in-order (no reassociation): + +@smallexample +operand0 = operand1; +for (i = 0; i < LEN + BIAS; i++) + if (operand3[i]) + operand0 += operand2[i]; +@end smallexample + @cindex @code{sdot_prod@var{m}} instruction pattern @item @samp{sdot_prod@var{m}} diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index e698f0b..2bf4fc4 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -190,6 +190,7 @@ init_internal_fns () #define fold_extract_direct { 2, 2, false } #define fold_left_direct { 1, 1, false } #define mask_fold_left_direct { 1, 1, false } +#define mask_len_fold_left_direct { 1, 1, false } #define check_ptrs_direct { 0, 0, false } const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = { @@ -3890,6 +3891,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab, #define expand_mask_fold_left_optab_fn(FN, STMT, OPTAB) \ expand_direct_optab_fn (FN, STMT, OPTAB, 3) +#define expand_mask_len_fold_left_optab_fn(FN, STMT, OPTAB) \ + expand_direct_optab_fn (FN, STMT, OPTAB, 5) + #define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \ expand_direct_optab_fn (FN, STMT, OPTAB, 4) @@ -3997,6 +4001,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_fold_extract_optab_supported_p direct_optab_supported_p #define direct_fold_left_optab_supported_p direct_optab_supported_p #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p +#define direct_mask_len_fold_left_optab_supported_p direct_optab_supported_p #define direct_check_ptrs_optab_supported_p direct_optab_supported_p #define direct_vec_set_optab_supported_p direct_optab_supported_p #define direct_vec_extract_optab_supported_p direct_optab_supported_p diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index ea750a9..d3aec51 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -319,6 +319,9 @@ DEF_INTERNAL_OPTAB_FN (FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW, DEF_INTERNAL_OPTAB_FN (MASK_FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW, mask_fold_left_plus, mask_fold_left) +DEF_INTERNAL_OPTAB_FN (MASK_LEN_FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW, + mask_len_fold_left_plus, mask_len_fold_left) + /* Unary math functions. */ DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary) DEF_INTERNAL_FLT_FN (ACOSH, ECF_CONST, acosh, unary) diff --git a/gcc/optabs.def b/gcc/optabs.def index 3dae228..7023392 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -385,6 +385,7 @@ OPTAB_D (reduc_ior_scal_optab, "reduc_ior_scal_$a") OPTAB_D (reduc_xor_scal_optab, "reduc_xor_scal_$a") OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a") OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a") +OPTAB_D (mask_len_fold_left_plus_optab, "mask_len_fold_left_plus_$a") OPTAB_D (extract_last_optab, "extract_last_$a") OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a") |