aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorMichael Meissner <meissner@linux.ibm.com>2021-12-15 00:57:44 -0500
committerMichael Meissner <meissner@linux.ibm.com>2021-12-15 00:57:44 -0500
commit8ccd8b12ded1782f4273fd1f381b7d554df61a12 (patch)
treeda03675a2aa6ac752ea46c9b1c242d10e8bb77cb /gcc
parentc6756b3bc1d2af1c8e86f0ad1711e9b9134520ba (diff)
downloadgcc-8ccd8b12ded1782f4273fd1f381b7d554df61a12.zip
gcc-8ccd8b12ded1782f4273fd1f381b7d554df61a12.tar.gz
gcc-8ccd8b12ded1782f4273fd1f381b7d554df61a12.tar.bz2
Add LXVKQ support.
This patch adds support to generate the LXVKQ instruction to load specific IEEE-128 floating point constants. Compared to the last time I submitted this patch, I modified it so that it uses the bit pattern of the vector to see if it can generate the LXVKQ instruction. This means on a little endian Power<xxx> system, the following code will generate a LXVKQ 34,16 instruction: vector long long foo (void) { return (vector long long) { 0x0000000000000000, 0x8000000000000000 }; } because that vector pattern is the same bit pattern as -0.0F128. 2021-12-14 Michael Meissner <meissner@the-meissners.org> gcc/ * config/rs6000/constraints.md (eQ): New constraint. * config/rs6000/predicates.md (easy_fp_constant): Add support for generating the LXVKQ instruction. (easy_vector_constant_ieee128): New predicate. (easy_vector_constant): Add support for generating the LXVKQ instruction. * config/rs6000/rs6000-protos.h (constant_generates_lxvkq): New declaration. * config/rs6000/rs6000.c (output_vec_const_move): Add support for generating LXVKQ. (constant_generates_lxvkq): New function. * config/rs6000/rs6000.opt (-mieee128-constant): New debug option. * config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for generating LXVKQ. (vsx_mov<mode>_32bit): Likewise. * doc/md.texi (PowerPC and IBM RS6000 constraints): Document the eQ constraint. gcc/testsuite/ * gcc.target/powerpc/float128-constant.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/rs6000/constraints.md6
-rw-r--r--gcc/config/rs6000/predicates.md34
-rw-r--r--gcc/config/rs6000/rs6000-protos.h1
-rw-r--r--gcc/config/rs6000/rs6000.c62
-rw-r--r--gcc/config/rs6000/rs6000.opt4
-rw-r--r--gcc/config/rs6000/vsx.md14
-rw-r--r--gcc/doc/md.texi4
-rw-r--r--gcc/testsuite/gcc.target/powerpc/float128-constant.c160
8 files changed, 285 insertions, 0 deletions
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index c8cff1a..e72132b 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -213,6 +213,12 @@
"A signed 34-bit integer constant if prefixed instructions are supported."
(match_operand 0 "cint34_operand"))
+;; A TF/KF scalar constant or a vector constant that can load certain IEEE
+;; 128-bit constants into vector registers using LXVKQ.
+(define_constraint "eQ"
+ "An IEEE 128-bit constant that can be loaded into VSX registers."
+ (match_operand 0 "easy_vector_constant_ieee128"))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index f216ffd..be72167 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -601,6 +601,14 @@
if (TARGET_VSX && op == CONST0_RTX (mode))
return 1;
+ /* Constants that can be generated with ISA 3.1 instructions are easy. */
+ vec_const_128bit_type vsx_const;
+ if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
+ {
+ if (constant_generates_lxvkq (&vsx_const) != 0)
+ return true;
+ }
+
/* Otherwise consider floating point constants hard, so that the
constant gets pushed to memory during the early RTL phases. This
has the advantage that double precision constants that can be
@@ -609,6 +617,23 @@
return 0;
})
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+ (match_code "const_vector,const_double")
+{
+ vec_const_128bit_type vsx_const;
+
+ /* Can we generate the LXVKQ instruction? */
+ if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
+ || !TARGET_VSX)
+ return false;
+
+ return (vec_const_128bit_to_bytes (op, mode, &vsx_const)
+ && constant_generates_lxvkq (&vsx_const) != 0);
+})
+
;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
@@ -653,6 +678,15 @@
if (zero_constant (op, mode) || all_ones_constant (op, mode))
return true;
+ /* Constants that can be generated with ISA 3.1 instructions are
+ easy. */
+ vec_const_128bit_type vsx_const;
+ if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
+ {
+ if (constant_generates_lxvkq (&vsx_const) != 0)
+ return true;
+ }
+
if (TARGET_P9_VECTOR
&& xxspltib_constant_p (op, mode, &num_insns, &value))
return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 81345d8..4a2e7fa 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -250,6 +250,7 @@ typedef struct {
extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
vec_const_128bit_type *);
+extern unsigned constant_generates_lxvkq (vec_const_128bit_type *);
#endif /* RTX_CODE */
#ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 819314d..0bc3844 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6700,6 +6700,17 @@ output_vec_const_move (rtx *operands)
gcc_unreachable ();
}
+ vec_const_128bit_type vsx_const;
+ if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
+ {
+ unsigned imm = constant_generates_lxvkq (&vsx_const);
+ if (imm)
+ {
+ operands[2] = GEN_INT (imm);
+ return "lxvkq %x0,%2";
+ }
+ }
+
if (TARGET_P9_VECTOR
&& xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
{
@@ -28587,6 +28598,57 @@ vec_const_128bit_to_bytes (rtx op,
return true;
}
+/* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
+ if the LXVKQ instruction cannot be used. Otherwise return the immediate
+ value to be used with the LXVKQ instruction. */
+
+unsigned
+constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
+{
+ /* Is the instruction supported with power10 code generation, IEEE 128-bit
+ floating point hardware and VSX registers are available. */
+ if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
+ || !TARGET_VSX)
+ return 0;
+
+ /* All of the constants that are generated by LXVKQ have the bottom 3 words
+ that are 0. */
+ if (vsx_const->words[1] != 0
+ || vsx_const->words[2] != 0
+ || vsx_const->words[3] != 0)
+ return 0;
+
+ /* See if we have a match for the first word. */
+ switch (vsx_const->words[0])
+ {
+ case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
+ case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
+ case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
+ case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
+ case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
+ case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
+ case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
+ case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
+ case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
+ case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
+ case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
+ case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
+ case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
+ case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
+ case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
+ case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
+ case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
+ case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
+
+ /* anything else cannot be loaded. */
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-rs6000.h"
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 9d7878f..b7433ec 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -640,6 +640,10 @@ mprivileged
Target Var(rs6000_privileged) Init(0)
Generate code that will run in privileged state.
+mieee128-constant
+Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
+Generate (do not generate) code that uses the LXVKQ instruction.
+
-param=rs6000-density-pct-threshold=
Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 83d6c7b..de04840 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1192,16 +1192,19 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
+;; LXVKQ
;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
+ wa,
?wa, v, <??r>, wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
+ eQ,
?jwM, W, <nW>, v, wZ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1213,35 +1216,43 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
+ vecperm,
vecsimple, *, *, vecstore, vecload")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
+ *,
*, 5, 2, *, *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
+ *,
*, *, *, *, *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
+ *,
*, 20, 8, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
+ p10,
<VSisa>, *, *, *, *")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
+;; LXVKQ
;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
;; LVX (VMX) STVX (VMX)
(define_insn "*vsx_mov<mode>_32bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
+ wa,
wa, v, ?wa, v, <??r>,
wZ, v")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
+ eQ,
wE, jwM, ?jwM, W, <nW>,
v, wZ"))]
@@ -1253,14 +1264,17 @@
}
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
+ vecperm,
vecsimple, vecsimple, vecsimple, *, *,
vecstore, vecload")
(set_attr "length"
"*, *, *, 16, 16, 16,
+ *,
*, *, *, 20, 16,
*, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
+ p10,
p9v, *, <VSisa>, *, *,
*, *")])
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 8fd0f8d..69cb7e3 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3336,6 +3336,10 @@ A constant whose negation is a signed 16-bit constant.
@item eI
A signed 34-bit integer constant if prefixed instructions are supported.
+@item eQ
+An IEEE 128-bit constant that can be loaded into a VSX register with
+the @code{lxvkq} instruction.
+
@ifset INTERNALS
@item G
A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 0000000..e3286a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,160 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+ constants. */
+
+_Float128
+return_0 (void)
+{
+ return 0.0f128; /* XXSPLTIB 34,0. */
+}
+
+_Float128
+return_1 (void)
+{
+ return 1.0f128; /* LXVKQ 34,1. */
+}
+
+_Float128
+return_2 (void)
+{
+ return 2.0f128; /* LXVKQ 34,2. */
+}
+
+_Float128
+return_3 (void)
+{
+ return 3.0f128; /* LXVKQ 34,3. */
+}
+
+_Float128
+return_4 (void)
+{
+ return 4.0f128; /* LXVKQ 34,4. */
+}
+
+_Float128
+return_5 (void)
+{
+ return 5.0f128; /* LXVKQ 34,5. */
+}
+
+_Float128
+return_6 (void)
+{
+ return 6.0f128; /* LXVKQ 34,6. */
+}
+
+_Float128
+return_7 (void)
+{
+ return 7.0f128; /* LXVKQ 34,7. */
+}
+
+_Float128
+return_m0 (void)
+{
+ return -0.0f128; /* LXVKQ 34,16. */
+}
+
+_Float128
+return_m1 (void)
+{
+ return -1.0f128; /* LXVKQ 34,17. */
+}
+
+_Float128
+return_m2 (void)
+{
+ return -2.0f128; /* LXVKQ 34,18. */
+}
+
+_Float128
+return_m3 (void)
+{
+ return -3.0f128; /* LXVKQ 34,19. */
+}
+
+_Float128
+return_m4 (void)
+{
+ return -4.0f128; /* LXVKQ 34,20. */
+}
+
+_Float128
+return_m5 (void)
+{
+ return -5.0f128; /* LXVKQ 34,21. */
+}
+
+_Float128
+return_m6 (void)
+{
+ return -6.0f128; /* LXVKQ 34,22. */
+}
+
+_Float128
+return_m7 (void)
+{
+ return -7.0f128; /* LXVKQ 34,23. */
+}
+
+_Float128
+return_inf (void)
+{
+ return __builtin_inff128 (); /* LXVKQ 34,8. */
+}
+
+_Float128
+return_minf (void)
+{
+ return - __builtin_inff128 (); /* LXVKQ 34,24. */
+}
+
+_Float128
+return_nan (void)
+{
+ return __builtin_nanf128 (""); /* LXVKQ 34,9. */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction. */
+_Float128
+return_mnan (void)
+{
+ return - __builtin_nanf128 (""); /* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+ return __builtin_nanf128 ("1"); /* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+ return __builtin_nansf128 (""); /* PLXV 34,... */
+}
+
+vector long long
+return_longlong_neg_0 (void)
+{
+ /* This vector is the same pattern as -0.0F128. */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define FIRST 0x8000000000000000
+#define SECOND 0x0000000000000000
+
+#else
+#define FIRST 0x0000000000000000
+#define SECOND 0x8000000000000000
+#endif
+
+ return (vector long long) { FIRST, SECOND }; /* LXVKQ 34,16. */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M} 19 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */
+