aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2021-11-10 12:38:43 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2021-11-10 12:38:43 +0000
commit6d331688fcb69e9aae84bb94cb7cc54641a90ab6 (patch)
tree1315025fd1850985e49e7e4295bba05d3ca16504
parent0612883d9dc6eebecdbe937893b86597acae237c (diff)
downloadgcc-6d331688fcb69e9aae84bb94cb7cc54641a90ab6.zip
gcc-6d331688fcb69e9aae84bb94cb7cc54641a90ab6.tar.gz
gcc-6d331688fcb69e9aae84bb94cb7cc54641a90ab6.tar.bz2
aarch64: Tweak FMAX/FMIN iterators
There was some duplication between the maxmin_uns (uns for unspec rather than unsigned) int attribute and the optab int attribute. The difficulty for FMAXNM and FMINNM is that the instructions really correspond to two things: the smax/smin optabs for floats (used only for fast-math-like flags) and the fmax/fmin optabs (used for built-in functions). The optab attribute was consistently for the former but maxmin_uns had a mixture of both. This patch renames maxmin_uns to fmaxmin and only uses it for the fmax and fmin optabs. The reductions that previously used the maxmin_uns attribute now use the optab attribute instead. FMAX and FMIN are awkward in that they don't correspond to any optab. It's nevertheless useful to define them alongside the “real” optabs. Previously they were known as “smax_nan” and “smin_nan”, but the problem with those names it that smax and smin are only used for floats if NaNs don't matter. This patch therefore uses fmax_nan and fmin_nan instead. There is still some inconsistency, in that the optab attribute handles UNSPEC_COND_FMAX but the fmaxmin attribute handles UNSPEC_FMAX. This is because the SVE FP instructions, being predicated, have to use unspecs in cases where the Advanced SIMD ones could use rtl codes. At least there are no duplicate entries though, so this seemed like the best compromise for now. gcc/ * config/aarch64/iterators.md (optab): Use fmax_nan instead of smax_nan and fmin_nan instead of smin_nan. (maxmin_uns): Rename to... (fmaxmin): ...this and make the same changes. Remove entries unrelated to fmax* and fmin*. * config/aarch64/aarch64.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): Rename to... (aarch64_<optab>p<mode>): ...this. (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. (reduc_<maxmin_uns>_scal_<mode>): Rename to... (reduc_<optab>_scal_<mode>): ...this and update gen* call. (aarch64_reduc_<maxmin_uns>_internal<mode>): Rename to... (aarch64_reduc_<optab>_internal<mode>): ...this. (aarch64_reduc_<maxmin_uns>_internalv2si): Rename to... (aarch64_reduc_<optab>_internalv2si): ...this. * config/aarch64/aarch64-sve.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan) Rename to... (fmax_nan, fmin_nan): ...this. * config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64) (vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16) (vmin_f16, vminq_f16): Update accordingly.
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def12
-rw-r--r--gcc/config/aarch64/aarch64-simd.md24
-rw-r--r--gcc/config/aarch64/aarch64-sve.md2
-rw-r--r--gcc/config/aarch64/aarch64.md2
-rw-r--r--gcc/config/aarch64/arm_neon.h24
-rw-r--r--gcc/config/aarch64/iterators.md26
6 files changed, 39 insertions, 51 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 4a7e2cf..9b0a6ec 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -502,21 +502,19 @@
BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10, NONE)
BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10, NONE)
- /* Implemented by <maxmin_uns><mode>3.
- smax variants map to fmaxnm,
- smax_nan variants map to fmax. */
+ /* Implemented by <optab><mode>3. */
BUILTIN_VDQ_BHSI (BINOP, smax, 3, NONE)
BUILTIN_VDQ_BHSI (BINOP, smin, 3, NONE)
BUILTIN_VDQ_BHSI (BINOP, umax, 3, NONE)
BUILTIN_VDQ_BHSI (BINOP, umin, 3, NONE)
- BUILTIN_VHSDF_DF (BINOP, smax_nan, 3, NONE)
- BUILTIN_VHSDF_DF (BINOP, smin_nan, 3, NONE)
- /* Implemented by <maxmin_uns><mode>3. */
+ /* Implemented by <fmaxmin><mode>3. */
BUILTIN_VHSDF_HSDF (BINOP, fmax, 3, FP)
BUILTIN_VHSDF_HSDF (BINOP, fmin, 3, FP)
+ BUILTIN_VHSDF_DF (BINOP, fmax_nan, 3, FP)
+ BUILTIN_VHSDF_DF (BINOP, fmin_nan, 3, FP)
- /* Implemented by aarch64_<maxmin_uns>p<mode>. */
+ /* Implemented by aarch64_<optab>p<mode>. */
BUILTIN_VDQ_BHSI (BINOP, smaxp, 0, NONE)
BUILTIN_VDQ_BHSI (BINOP, sminp, 0, NONE)
BUILTIN_VDQ_BHSI (BINOP, umaxp, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index bff76e4..35d55a3 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1553,7 +1553,7 @@
})
;; Pairwise Integer Max/Min operations.
-(define_insn "aarch64_<maxmin_uns>p<mode>"
+(define_insn "aarch64_<optab>p<mode>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
(match_operand:VDQ_BHSI 2 "register_operand" "w")]
@@ -1564,7 +1564,7 @@
)
;; Pairwise FP Max/Min operations.
-(define_insn "aarch64_<maxmin_uns>p<mode>"
+(define_insn "aarch64_<optab>p<mode>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")]
@@ -3488,7 +3488,7 @@
;; Vector forms for fmax, fmin, fmaxnm, fminnm.
;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
;; which implement the IEEE fmax ()/fmin () functions.
-(define_insn "<maxmin_uns><mode>3"
+(define_insn "<fmaxmin><mode>3"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")]
@@ -3622,7 +3622,7 @@
;; Template for outputting a scalar, so we can create __builtins which can be
;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+(define_expand "reduc_<optab>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
FMAXMINV)]
@@ -3630,15 +3630,15 @@
{
rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
rtx scratch = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
- operands[1]));
+ emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
+ operands[1]));
emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
DONE;
}
)
;; Likewise for integer cases, signed and unsigned.
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+(define_expand "reduc_<optab>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
MAXMINV)]
@@ -3646,14 +3646,14 @@
{
rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
rtx scratch = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
- operands[1]));
+ emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
+ operands[1]));
emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
DONE;
}
)
-(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
+(define_insn "aarch64_reduc_<optab>_internal<mode>"
[(set (match_operand:VDQV_S 0 "register_operand" "=w")
(unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
MAXMINV))]
@@ -3662,7 +3662,7 @@
[(set_attr "type" "neon_reduc_minmax<q>")]
)
-(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
+(define_insn "aarch64_reduc_<optab>_internalv2si"
[(set (match_operand:V2SI 0 "register_operand" "=w")
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
MAXMINV))]
@@ -3671,7 +3671,7 @@
[(set_attr "type" "neon_reduc_minmax")]
)
-(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
+(define_insn "aarch64_reduc_<optab>_internal<mode>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
FMAXMINV))]
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 8fe4c72..5de479e 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6288,7 +6288,7 @@
;; Unpredicated fmax/fmin (the libm functions). The optabs for the
;; smin/smax rtx codes are handled in the generic section above.
-(define_expand "<maxmin_uns><mode>3"
+(define_expand "<fmaxmin><mode>3"
[(set (match_operand:SVE_FULL_F 0 "register_operand")
(unspec:SVE_FULL_F
[(match_dup 3)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 4035e06..5297b2d 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6475,7 +6475,7 @@
;; Scalar forms for fmax, fmin, fmaxnm, fminnm.
;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
;; which implement the IEEE fmax ()/fmin () functions.
-(define_insn "<maxmin_uns><mode>3"
+(define_insn "<fmaxmin><mode>3"
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")
(match_operand:GPF_F16 2 "register_operand" "w")]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 398a2e3..2e64f07 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -18264,7 +18264,7 @@ __extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmax_f32 (float32x2_t __a, float32x2_t __b)
{
- return __builtin_aarch64_smax_nanv2sf (__a, __b);
+ return __builtin_aarch64_fmax_nanv2sf (__a, __b);
}
__extension__ extern __inline float64x1_t
@@ -18272,7 +18272,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmax_f64 (float64x1_t __a, float64x1_t __b)
{
return (float64x1_t)
- { __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0),
+ { __builtin_aarch64_fmax_nandf (vget_lane_f64 (__a, 0),
vget_lane_f64 (__b, 0)) };
}
@@ -18325,14 +18325,14 @@ __extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmaxq_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_aarch64_smax_nanv4sf (__a, __b);
+ return __builtin_aarch64_fmax_nanv4sf (__a, __b);
}
__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmaxq_f64 (float64x2_t __a, float64x2_t __b)
{
- return __builtin_aarch64_smax_nanv2df (__a, __b);
+ return __builtin_aarch64_fmax_nanv2df (__a, __b);
}
__extension__ extern __inline int8x16_t
@@ -19003,7 +19003,7 @@ __extension__ extern __inline float32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmin_f32 (float32x2_t __a, float32x2_t __b)
{
- return __builtin_aarch64_smin_nanv2sf (__a, __b);
+ return __builtin_aarch64_fmin_nanv2sf (__a, __b);
}
__extension__ extern __inline float64x1_t
@@ -19011,7 +19011,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmin_f64 (float64x1_t __a, float64x1_t __b)
{
return (float64x1_t)
- { __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0),
+ { __builtin_aarch64_fmin_nandf (vget_lane_f64 (__a, 0),
vget_lane_f64 (__b, 0)) };
}
@@ -19064,14 +19064,14 @@ __extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vminq_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_aarch64_smin_nanv4sf (__a, __b);
+ return __builtin_aarch64_fmin_nanv4sf (__a, __b);
}
__extension__ extern __inline float64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vminq_f64 (float64x2_t __a, float64x2_t __b)
{
- return __builtin_aarch64_smin_nanv2df (__a, __b);
+ return __builtin_aarch64_fmin_nanv2df (__a, __b);
}
__extension__ extern __inline int8x16_t
@@ -29131,14 +29131,14 @@ __extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmax_f16 (float16x4_t __a, float16x4_t __b)
{
- return __builtin_aarch64_smax_nanv4hf (__a, __b);
+ return __builtin_aarch64_fmax_nanv4hf (__a, __b);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmaxq_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_aarch64_smax_nanv8hf (__a, __b);
+ return __builtin_aarch64_fmax_nanv8hf (__a, __b);
}
__extension__ extern __inline float16x4_t
@@ -29159,14 +29159,14 @@ __extension__ extern __inline float16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vmin_f16 (float16x4_t __a, float16x4_t __b)
{
- return __builtin_aarch64_smin_nanv4hf (__a, __b);
+ return __builtin_aarch64_fmin_nanv4hf (__a, __b);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vminq_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_aarch64_smin_nanv8hf (__a, __b);
+ return __builtin_aarch64_fmin_nanv8hf (__a, __b);
}
__extension__ extern __inline float16x4_t
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index bdc8ba3..e8eebd8 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -3189,9 +3189,9 @@
(UNSPEC_COND_FCVTZS "fix_trunc")
(UNSPEC_COND_FCVTZU "fixuns_trunc")
(UNSPEC_COND_FDIV "div")
- (UNSPEC_COND_FMAX "smax_nan")
+ (UNSPEC_COND_FMAX "fmax_nan")
(UNSPEC_COND_FMAXNM "smax")
- (UNSPEC_COND_FMIN "smin_nan")
+ (UNSPEC_COND_FMIN "fmin_nan")
(UNSPEC_COND_FMINNM "smin")
(UNSPEC_COND_FMLA "fma")
(UNSPEC_COND_FMLS "fnma")
@@ -3214,22 +3214,12 @@
(UNSPEC_COND_SCVTF "float")
(UNSPEC_COND_UCVTF "floatuns")])
-(define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax")
- (UNSPEC_UMINV "umin")
- (UNSPEC_SMAXV "smax")
- (UNSPEC_SMINV "smin")
- (UNSPEC_FMAX "smax_nan")
- (UNSPEC_FMAXNMV "smax")
- (UNSPEC_FMAXV "smax_nan")
- (UNSPEC_FMIN "smin_nan")
- (UNSPEC_FMINNMV "smin")
- (UNSPEC_FMINV "smin_nan")
- (UNSPEC_FMAXNM "fmax")
- (UNSPEC_FMINNM "fmin")
- (UNSPEC_COND_FMAX "fmax_nan")
- (UNSPEC_COND_FMAXNM "fmax")
- (UNSPEC_COND_FMIN "fmin_nan")
- (UNSPEC_COND_FMINNM "fmin")])
+(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan")
+ (UNSPEC_FMAXNM "fmax")
+ (UNSPEC_FMIN "fmin_nan")
+ (UNSPEC_FMINNM "fmin")
+ (UNSPEC_COND_FMAXNM "fmax")
+ (UNSPEC_COND_FMINNM "fmin")])
(define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax")
(UNSPEC_UMINV "umin")