Add LXVKQ support.

This patch adds support to generate the LXVKQ instruction to load specific IEEE-128 floating point constants. Compared to the last time I submitted this patch, I modified it so that it uses the bit pattern of the vector to see if it can generate the LXVKQ instruction. This means on a little endian Power<xxx> system, the following code will generate a LXVKQ 34,16 instruction: vector long long foo (void) { return (vector long long) { 0x0000000000000000, 0x8000000000000000 }; } because that vector pattern is the same bit pattern as -0.0F128. 2021-12-14 Michael Meissner <meissner@the-meissners.org> gcc/ * config/rs6000/constraints.md (eQ): New constraint. * config/rs6000/predicates.md (easy_fp_constant): Add support for generating the LXVKQ instruction. (easy_vector_constant_ieee128): New predicate. (easy_vector_constant): Add support for generating the LXVKQ instruction. * config/rs6000/rs6000-protos.h (constant_generates_lxvkq): New declaration. * config/rs6000/rs6000.c (output_vec_const_move): Add support for generating LXVKQ. (constant_generates_lxvkq): New function. * config/rs6000/rs6000.opt (-mieee128-constant): New debug option. * config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for generating LXVKQ. (vsx_mov<mode>_32bit): Likewise. * doc/md.texi (PowerPC and IBM RS6000 constraints): Document the eQ constraint. gcc/testsuite/ * gcc.target/powerpc/float128-constant.c: New test.
author: Michael Meissner <meissner@linux.ibm.com> 2021-12-15 00:57:44 -0500
committer: Michael Meissner <meissner@linux.ibm.com> 2021-12-15 00:57:44 -0500
commit: 8ccd8b12ded1782f4273fd1f381b7d554df61a12 (patch)
tree: da03675a2aa6ac752ea46c9b1c242d10e8bb77cb /gcc/config
parent: c6756b3bc1d2af1c8e86f0ad1711e9b9134520ba (diff)
download: gcc-8ccd8b12ded1782f4273fd1f381b7d554df61a12.zip
gcc-8ccd8b12ded1782f4273fd1f381b7d554df61a12.tar.gz
gcc-8ccd8b12ded1782f4273fd1f381b7d554df61a12.tar.bz2
6 files changed, 121 insertions, 0 deletions
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index c8cff1a..e72132b 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -213,6 +213,12 @@
   "A signed 34-bit integer constant if prefixed instructions are supported."
   (match_operand 0 "cint34_operand"))
 
+;; A TF/KF scalar constant or a vector constant that can load certain IEEE
+;; 128-bit constants into vector registers using LXVKQ.
+(define_constraint "eQ"
+  "An IEEE 128-bit constant that can be loaded into VSX registers."
+  (match_operand 0 "easy_vector_constant_ieee128"))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index f216ffd..be72167 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -601,6 +601,14 @@
   if (TARGET_VSX && op == CONST0_RTX (mode))
     return 1;
 
+  /* Constants that can be generated with ISA 3.1 instructions are easy.  */
+  vec_const_128bit_type vsx_const;
+  if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
+    {
+      if (constant_generates_lxvkq (&vsx_const) != 0)
+	return true;
+    }
+
   /* Otherwise consider floating point constants hard, so that the
      constant gets pushed to memory during the early RTL phases.  This
      has the advantage that double precision constants that can be
@@ -609,6 +617,23 @@
    return 0;
 })
 
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+  (match_code "const_vector,const_double")
+{
+  vec_const_128bit_type vsx_const;
+
+  /* Can we generate the LXVKQ instruction?  */
+  if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
+      || !TARGET_VSX)
+    return false;
+
+  return (vec_const_128bit_to_bytes (op, mode, &vsx_const)
+	  && constant_generates_lxvkq (&vsx_const) != 0);
+})
+
 ;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
 ;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
 
@@ -653,6 +678,15 @@
       if (zero_constant (op, mode) || all_ones_constant (op, mode))
 	return true;
 
+      /* Constants that can be generated with ISA 3.1 instructions are
+         easy.  */
+      vec_const_128bit_type vsx_const;
+      if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const))
+	{
+	  if (constant_generates_lxvkq (&vsx_const) != 0)
+	    return true;
+	}
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 81345d8..4a2e7fa 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -250,6 +250,7 @@ typedef struct {
 
 extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
 				       vec_const_128bit_type *);
+extern unsigned constant_generates_lxvkq (vec_const_128bit_type *);
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 819314d..0bc3844 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6700,6 +6700,17 @@ output_vec_const_move (rtx *operands)
 	    gcc_unreachable ();
 	}
 
+      vec_const_128bit_type vsx_const;
+      if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
+	{
+	  unsigned imm = constant_generates_lxvkq (&vsx_const);
+	  if (imm)
+	    {
+	      operands[2] = GEN_INT (imm);
+	      return "lxvkq %x0,%2";
+	    }
+	}
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
@@ -28587,6 +28598,57 @@ vec_const_128bit_to_bytes (rtx op,
   return true;
 }
 
+/* Determine if an IEEE 128-bit constant can be loaded with LXVKQ.  Return zero
+   if the LXVKQ instruction cannot be used.  Otherwise return the immediate
+   value to be used with the LXVKQ instruction.  */
+
+unsigned
+constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
+{
+  /* Is the instruction supported with power10 code generation, IEEE 128-bit
+     floating point hardware and VSX registers are available.  */
+  if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
+      || !TARGET_VSX)
+    return 0;
+
+  /* All of the constants that are generated by LXVKQ have the bottom 3 words
+     that are 0.  */
+  if (vsx_const->words[1] != 0
+      || vsx_const->words[2] != 0
+      || vsx_const->words[3] != 0)
+      return 0;
+
+  /* See if we have a match for the first word.  */
+  switch (vsx_const->words[0])
+    {
+    case 0x3FFF0000U: return 1;		/* IEEE 128-bit +1.0.  */
+    case 0x40000000U: return 2;		/* IEEE 128-bit +2.0.  */
+    case 0x40008000U: return 3;		/* IEEE 128-bit +3.0.  */
+    case 0x40010000U: return 4;		/* IEEE 128-bit +4.0.  */
+    case 0x40014000U: return 5;		/* IEEE 128-bit +5.0.  */
+    case 0x40018000U: return 6;		/* IEEE 128-bit +6.0.  */
+    case 0x4001C000U: return 7;		/* IEEE 128-bit +7.0.  */
+    case 0x7FFF0000U: return 8;		/* IEEE 128-bit +Infinity.  */
+    case 0x7FFF8000U: return 9;		/* IEEE 128-bit quiet NaN.  */
+    case 0x80000000U: return 16;	/* IEEE 128-bit -0.0.  */
+    case 0xBFFF0000U: return 17;	/* IEEE 128-bit -1.0.  */
+    case 0xC0000000U: return 18;	/* IEEE 128-bit -2.0.  */
+    case 0xC0008000U: return 19;	/* IEEE 128-bit -3.0.  */
+    case 0xC0010000U: return 20;	/* IEEE 128-bit -4.0.  */
+    case 0xC0014000U: return 21;	/* IEEE 128-bit -5.0.  */
+    case 0xC0018000U: return 22;	/* IEEE 128-bit -6.0.  */
+    case 0xC001C000U: return 23;	/* IEEE 128-bit -7.0.  */
+    case 0xFFFF0000U: return 24;	/* IEEE 128-bit -Infinity.  */
+
+      /* anything else cannot be loaded.  */
+    default:
+      break;
+    }
+
+  return 0;
+}
+
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rs6000.h"
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 9d7878f..b7433ec 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -640,6 +640,10 @@ mprivileged
 Target Var(rs6000_privileged) Init(0)
 Generate code that will run in privileged state.
 
+mieee128-constant
+Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
+Generate (do not generate) code that uses the LXVKQ instruction.
+
 -param=rs6000-density-pct-threshold=
 Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
 When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 83d6c7b..de04840 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1192,16 +1192,19 @@
 
 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
+;;              LXVKQ
 ;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
 (define_insn "vsx_mov<mode>_64bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      wa,        wa,        r,         we,        ?wQ,
                 ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
+                wa,
                 ?wa,       v,         <??r>,     wZ,        v")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "wa,        ZwO,       wa,        we,        r,         r,
                 wQ,        Y,         r,         r,         wE,        jwM,
+                eQ,
                 ?jwM,      W,         <nW>,      v,         wZ"))]
 
   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1213,35 +1216,43 @@
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, mtvsr,     mfvsr,     load,
                 store,     load,      store,     *,         vecsimple, vecsimple,
+                vecperm,
                 vecsimple, *,         *,         vecstore,  vecload")
    (set_attr "num_insns"
                "*,         *,         *,         2,         *,         2,
                 2,         2,         2,         2,         *,         *,
+                *,
                 *,         5,         2,         *,         *")
    (set_attr "max_prefixed_insns"
                "*,         *,         *,         *,         *,         2,
                 2,         2,         2,         2,         *,         *,
+                *,
                 *,         *,         *,         *,         *")
    (set_attr "length"
                "*,         *,         *,         8,         *,         8,
                 8,         8,         8,         8,         *,         *,
+                *,
                 *,         20,        8,         *,         *")
    (set_attr "isa"
                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
                 *,         *,         *,         *,         p9v,       *,
+                p10,
                 <VSisa>,   *,         *,         *,         *")])
 
 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
+;;              LXVKQ
 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
 ;;              LVX (VMX)  STVX (VMX)
 (define_insn "*vsx_mov<mode>_32bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
+                wa,
                 wa,        v,         ?wa,       v,         <??r>,
                 wZ,        v")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "wa,        ZwO,       wa,        Y,         r,         r,
+                eQ,
                 wE,        jwM,       ?jwM,      W,         <nW>,
                 v,         wZ"))]
 
@@ -1253,14 +1264,17 @@
 }
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, load,      store,    *,
+                vecperm,
                 vecsimple, vecsimple, vecsimple, *,         *,
                 vecstore,  vecload")
    (set_attr "length"
                "*,         *,         *,         16,        16,        16,
+                *,
                 *,         *,         *,         20,        16,
                 *,         *")
    (set_attr "isa"
                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
+                p10,
                 p9v,       *,         <VSisa>,   *,         *,
                 *,         *")])
author	Michael Meissner <meissner@linux.ibm.com>	2021-12-15 00:57:44 -0500
committer	Michael Meissner <meissner@linux.ibm.com>	2021-12-15 00:57:44 -0500
commit	8ccd8b12ded1782f4273fd1f381b7d554df61a12 (patch)
tree	da03675a2aa6ac752ea46c9b1c242d10e8bb77cb /gcc/config
parent	c6756b3bc1d2af1c8e86f0ad1711e9b9134520ba (diff)
download	gcc-8ccd8b12ded1782f4273fd1f381b7d554df61a12.zip gcc-8ccd8b12ded1782f4273fd1f381b7d554df61a12.tar.gz gcc-8ccd8b12ded1782f4273fd1f381b7d554df61a12.tar.bz2