aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2018-05-29 13:58:24 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2018-05-29 13:58:24 +0200
commit1bda738bab8193f0fb4551672d3be928d2015cd2 (patch)
treebcf9ea503ca283dec2dd4da451af24384493afe2
parentf8c0baaf31ac987bd1e85a3ba2fa8a2edeff92a8 (diff)
downloadgcc-1bda738bab8193f0fb4551672d3be928d2015cd2.zip
gcc-1bda738bab8193f0fb4551672d3be928d2015cd2.tar.gz
gcc-1bda738bab8193f0fb4551672d3be928d2015cd2.tar.bz2
re PR target/85918 (Conversions to/from [unsigned] long long are not vectorized for AVX512DQ target)
PR target/85918 * tree.def (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR, VEC_PACK_FLOAT_EXPR): New tree codes. * tree-pretty-print.c (op_code_prio): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR. (dump_generic_node): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. * tree-inline.c (estimate_operator_cost): Likewise. * gimple-pretty-print.c (dump_binary_rhs): Handle VEC_PACK_FLOAT_EXPR. * fold-const.c (const_binop): Likewise. (const_unop): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR. * tree-cfg.c (verify_gimple_assign_unary): Likewise. (verify_gimple_assign_binary): Handle VEC_PACK_FLOAT_EXPR. * cfgexpand.c (expand_debug_expr): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. * expr.c (expand_expr_real_2): Likewise. * optabs.def (vec_packs_float_optab, vec_packu_float_optab, vec_unpack_sfix_trunc_hi_optab, vec_unpack_sfix_trunc_lo_optab, vec_unpack_ufix_trunc_hi_optab, vec_unpack_ufix_trunc_lo_optab): New optabs. * optabs.c (expand_widen_pattern_expr): For VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR use sign from result type rather than operand's type. (expand_binop_directly): For vec_packu_float_optab and vec_packs_float_optab allow result type to be different from operand's type. * optabs-tree.c (optab_for_tree_code): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. Formatting fixes. * tree-vect-generic.c (expand_vector_operations_1): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. * tree-vect-stmts.c (supportable_widening_operation): Handle FIX_TRUNC_EXPR. (supportable_narrowing_operation): Handle FLOAT_EXPR. * config/i386/i386.md (fixprefix, floatprefix): New code attributes. * config/i386/sse.md (*float<floatunssuffix>v2div2sf2): Rename to ... (float<floatunssuffix>v2div2sf2): ... this. Formatting fix. (vpckfloat_concat_mode, vpckfloat_temp_mode, vpckfloat_op_mode): New mode attributes. (vec_pack<floatprefix>_float_<mode>): New expander. (vunpckfixt_mode, vunpckfixt_model, vunpckfixt_extract_mode): New mode attributes. (vec_unpack_<fixprefix>fix_trunc_lo_<mode>, vec_unpack_<fixprefix>fix_trunc_hi_<mode>): New expanders. * doc/md.texi (vec_packs_float_@var{m}, vec_packu_float_@var{m}, vec_unpack_sfix_trunc_hi_@var{m}, vec_unpack_sfix_trunc_lo_@var{m}, vec_unpack_ufix_trunc_hi_@var{m}, vec_unpack_ufix_trunc_lo_@var{m}): Document. * doc/generic.texi (VEC_UNPACK_FLOAT_HI_EXPR, VEC_UNPACK_FLOAT_LO_EXPR): Fix pasto in description. (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR, VEC_PACK_FLOAT_EXPR): Document. * gcc.target/i386/avx512dq-pr85918.c: Add -mprefer-vector-width=512 and -fno-vect-cost-model options. Add aligned(64) attribute to the arrays. Add suffix 1 to all functions and use 4 iterations rather than N. Add functions with conversions to and from float. Add new set of functions with 8 iterations and another one with 16 iterations, expect 24 vectorized loops instead of just 4. * gcc.target/i386/avx512dq-pr85918-2.c: New test. From-SVN: r260893
-rw-r--r--gcc/ChangeLog57
-rw-r--r--gcc/cfgexpand.c3
-rw-r--r--gcc/config/i386/i386.md2
-rw-r--r--gcc/config/i386/sse.md81
-rw-r--r--gcc/doc/generic.texi27
-rw-r--r--gcc/doc/md.texi22
-rw-r--r--gcc/expr.c14
-rw-r--r--gcc/fold-const.c15
-rw-r--r--gcc/gimple-pretty-print.c1
-rw-r--r--gcc/optabs-tree.c61
-rw-r--r--gcc/optabs.c15
-rw-r--r--gcc/optabs.def6
-rw-r--r--gcc/testsuite/ChangeLog11
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-pr85918-2.c435
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-pr85918.c187
-rw-r--r--gcc/tree-cfg.c20
-rw-r--r--gcc/tree-inline.c3
-rw-r--r--gcc/tree-pretty-print.c22
-rw-r--r--gcc/tree-vect-generic.c5
-rw-r--r--gcc/tree-vect-stmts.c27
-rw-r--r--gcc/tree.def15
21 files changed, 970 insertions, 59 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d2d02b2..6a3747c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,60 @@
+2018-05-29 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/85918
+ * tree.def (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR,
+ VEC_PACK_FLOAT_EXPR): New tree codes.
+ * tree-pretty-print.c (op_code_prio): Handle
+ VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR.
+ (dump_generic_node): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR,
+ VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR.
+ * tree-inline.c (estimate_operator_cost): Likewise.
+ * gimple-pretty-print.c (dump_binary_rhs): Handle VEC_PACK_FLOAT_EXPR.
+ * fold-const.c (const_binop): Likewise.
+ (const_unop): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR and
+ VEC_UNPACK_FIX_TRUNC_LO_EXPR.
+ * tree-cfg.c (verify_gimple_assign_unary): Likewise.
+ (verify_gimple_assign_binary): Handle VEC_PACK_FLOAT_EXPR.
+ * cfgexpand.c (expand_debug_expr): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR,
+ VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR.
+ * expr.c (expand_expr_real_2): Likewise.
+ * optabs.def (vec_packs_float_optab, vec_packu_float_optab,
+ vec_unpack_sfix_trunc_hi_optab, vec_unpack_sfix_trunc_lo_optab,
+ vec_unpack_ufix_trunc_hi_optab, vec_unpack_ufix_trunc_lo_optab): New
+ optabs.
+ * optabs.c (expand_widen_pattern_expr): For
+ VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR use
+ sign from result type rather than operand's type.
+ (expand_binop_directly): For vec_packu_float_optab and
+ vec_packs_float_optab allow result type to be different from operand's
+ type.
+ * optabs-tree.c (optab_for_tree_code): Handle
+ VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and
+ VEC_PACK_FLOAT_EXPR. Formatting fixes.
+ * tree-vect-generic.c (expand_vector_operations_1): Handle
+ VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and
+ VEC_PACK_FLOAT_EXPR.
+ * tree-vect-stmts.c (supportable_widening_operation): Handle
+ FIX_TRUNC_EXPR.
+ (supportable_narrowing_operation): Handle FLOAT_EXPR.
+ * config/i386/i386.md (fixprefix, floatprefix): New code attributes.
+ * config/i386/sse.md (*float<floatunssuffix>v2div2sf2): Rename to ...
+ (float<floatunssuffix>v2div2sf2): ... this. Formatting fix.
+ (vpckfloat_concat_mode, vpckfloat_temp_mode, vpckfloat_op_mode): New
+ mode attributes.
+ (vec_pack<floatprefix>_float_<mode>): New expander.
+ (vunpckfixt_mode, vunpckfixt_model, vunpckfixt_extract_mode): New mode
+ attributes.
+ (vec_unpack_<fixprefix>fix_trunc_lo_<mode>,
+ vec_unpack_<fixprefix>fix_trunc_hi_<mode>): New expanders.
+ * doc/md.texi (vec_packs_float_@var{m}, vec_packu_float_@var{m},
+ vec_unpack_sfix_trunc_hi_@var{m}, vec_unpack_sfix_trunc_lo_@var{m},
+ vec_unpack_ufix_trunc_hi_@var{m}, vec_unpack_ufix_trunc_lo_@var{m}):
+ Document.
+ * doc/generic.texi (VEC_UNPACK_FLOAT_HI_EXPR,
+ VEC_UNPACK_FLOAT_LO_EXPR): Fix pasto in description.
+ (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR,
+ VEC_PACK_FLOAT_EXPR): Document.
+
2018-05-29 Richard Biener <rguenther@suse.de>
* tree-vectorizer.h (struct vec_info): Add stmt_vec_infos
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 5c323be..c61104d 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -5101,8 +5101,11 @@ expand_debug_expr (tree exp)
case REALIGN_LOAD_EXPR:
case VEC_COND_EXPR:
case VEC_PACK_FIX_TRUNC_EXPR:
+ case VEC_PACK_FLOAT_EXPR:
case VEC_PACK_SAT_EXPR:
case VEC_PACK_TRUNC_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
case VEC_UNPACK_FLOAT_HI_EXPR:
case VEC_UNPACK_FLOAT_LO_EXPR:
case VEC_UNPACK_HI_EXPR:
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 12995be..209bf3f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -982,11 +982,13 @@
(define_code_iterator any_fix [fix unsigned_fix])
(define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
(define_code_attr fixunssuffix [(fix "") (unsigned_fix "uns")])
+(define_code_attr fixprefix [(fix "s") (unsigned_fix "u")])
;; Used in signed and unsigned float.
(define_code_iterator any_float [float unsigned_float])
(define_code_attr floatsuffix [(float "") (unsigned_float "u")])
(define_code_attr floatunssuffix [(float "") (unsigned_float "uns")])
+(define_code_attr floatprefix [(float "s") (unsigned_float "u")])
;; All integer modes.
(define_mode_iterator SWI1248x [QI HI SI DI])
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ed37b98..dd65e57 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4887,9 +4887,9 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "*float<floatunssuffix>v2div2sf2"
+(define_insn "float<floatunssuffix>v2div2sf2"
[(set (match_operand:V4SF 0 "register_operand" "=v")
- (vec_concat:V4SF
+ (vec_concat:V4SF
(any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
(const_vector:V2SF [(const_int 0) (const_int 0)])))]
"TARGET_AVX512DQ && TARGET_AVX512VL"
@@ -4898,6 +4898,33 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V4SF")])
+(define_mode_attr vpckfloat_concat_mode
+ [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
+(define_mode_attr vpckfloat_temp_mode
+ [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
+(define_mode_attr vpckfloat_op_mode
+ [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
+
+(define_expand "vec_pack<floatprefix>_float_<mode>"
+ [(match_operand:<ssePSmode> 0 "register_operand")
+ (any_float:<ssePSmode>
+ (match_operand:VI8_AVX512VL 1 "register_operand"))
+ (match_operand:VI8_AVX512VL 2 "register_operand")]
+ "TARGET_AVX512DQ"
+{
+ rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
+ rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
+ rtx (*gen) (rtx, rtx) = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
+ emit_insn (gen (r1, operands[1]));
+ emit_insn (gen (r2, operands[2]));
+ if (<MODE>mode == V2DImode)
+ emit_insn (gen_sse_movlhps (operands[0], r1, r2));
+ else
+ emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
+ r1, r2));
+ DONE;
+})
+
(define_insn "float<floatunssuffix>v2div2sf2_mask"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(vec_concat:V4SF
@@ -5177,6 +5204,56 @@
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
+(define_mode_attr vunpckfixt_mode
+ [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
+(define_mode_attr vunpckfixt_model
+ [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
+(define_mode_attr vunpckfixt_extract_mode
+ [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
+
+(define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
+ [(match_operand:<vunpckfixt_mode> 0 "register_operand")
+ (any_fix:<vunpckfixt_mode>
+ (match_operand:VF1_AVX512VL 1 "register_operand"))]
+ "TARGET_AVX512DQ"
+{
+ rtx tem = operands[1];
+ if (<MODE>mode != V4SFmode)
+ {
+ tem = gen_reg_rtx (<ssehalfvecmode>mode);
+ emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
+ operands[1]));
+ }
+ rtx (*gen) (rtx, rtx)
+ = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
+ emit_insn (gen (operands[0], tem));
+ DONE;
+})
+
+(define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
+ [(match_operand:<vunpckfixt_mode> 0 "register_operand")
+ (any_fix:<vunpckfixt_mode>
+ (match_operand:VF1_AVX512VL 1 "register_operand"))]
+ "TARGET_AVX512DQ"
+{
+ rtx tem;
+ if (<MODE>mode != V4SFmode)
+ {
+ tem = gen_reg_rtx (<ssehalfvecmode>mode);
+ emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
+ operands[1]));
+ }
+ else
+ {
+ tem = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
+ }
+ rtx (*gen) (rtx, rtx)
+ = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
+ emit_insn (gen (operands[0], tem));
+ DONE;
+})
+
(define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
[(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
(unsigned_fix:<sseintvecmode>
diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
index 653a17c..a4d0af4 100644
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -1789,9 +1789,12 @@ a value from @code{enum annot_expr_kind}, the third is an @code{INTEGER_CST}.
@tindex VEC_UNPACK_LO_EXPR
@tindex VEC_UNPACK_FLOAT_HI_EXPR
@tindex VEC_UNPACK_FLOAT_LO_EXPR
+@tindex VEC_UNPACK_FIX_TRUNC_HI_EXPR
+@tindex VEC_UNPACK_FIX_TRUNC_LO_EXPR
@tindex VEC_PACK_TRUNC_EXPR
@tindex VEC_PACK_SAT_EXPR
@tindex VEC_PACK_FIX_TRUNC_EXPR
+@tindex VEC_PACK_FLOAT_EXPR
@tindex VEC_COND_EXPR
@tindex SAD_EXPR
@@ -1846,10 +1849,22 @@ where the values are converted from fixed point to floating point. The
single operand is a vector that contains @code{N} elements of the same
integral type. The result is a vector that contains half as many elements
of a floating point type whose size is twice as wide. In the case of
-@code{VEC_UNPACK_HI_EXPR} the high @code{N/2} elements of the vector are
-extracted, converted and widened. In the case of @code{VEC_UNPACK_LO_EXPR}
+@code{VEC_UNPACK_FLOAT_HI_EXPR} the high @code{N/2} elements of the vector are
+extracted, converted and widened. In the case of @code{VEC_UNPACK_FLOAT_LO_EXPR}
the low @code{N/2} elements of the vector are extracted, converted and widened.
+@item VEC_UNPACK_FIX_TRUNC_HI_EXPR
+@itemx VEC_UNPACK_FIX_TRUNC_LO_EXPR
+These nodes represent unpacking of the high and low parts of the input vector,
+where the values are truncated from floating point to fixed point. The
+single operand is a vector that contains @code{N} elements of the same
+floating point type. The result is a vector that contains half as many
+elements of an integral type whose size is twice as wide. In the case of
+@code{VEC_UNPACK_FIX_TRUNC_HI_EXPR} the high @code{N/2} elements of the
+vector are extracted and converted with truncation. In the case of
+@code{VEC_UNPACK_FIX_TRUNC_LO_EXPR} the low @code{N/2} elements of the
+vector are extracted and converted with truncation.
+
@item VEC_PACK_TRUNC_EXPR
This node represents packing of truncated elements of the two input vectors
into the output vector. Input operands are vectors that contain the same
@@ -1875,6 +1890,14 @@ twice as many elements of an integral type whose size is half as wide. The
elements of the two vectors are merged (concatenated) to form the output
vector.
+@item VEC_PACK_FLOAT_EXPR
+This node represents packing of elements of the two input vectors into the
+output vector, where the values are converted from fixed point to floating
+point. Input operands are vectors that contain the same number of elements
+of an integral type. The result is a vector that contains twice as many
+elements of floating point type whose size is half as wide. The elements of
+the two vectors are merged (concatenated) to form the output vector.
+
@item VEC_COND_EXPR
These nodes represent @code{?:} expressions. The three operands must be
vectors of the same size and number of elements. The second and third
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 02fbfb3..be37619 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5371,6 +5371,14 @@ of two vectors. Operands 1 and 2 are vectors of the same mode having N
floating point elements of size S@. Operand 0 is the resulting vector
in which 2*N elements of size N/2 are concatenated.
+@cindex @code{vec_packs_float_@var{m}} instruction pattern
+@cindex @code{vec_packu_float_@var{m}} instruction pattern
+@item @samp{vec_packs_float_@var{m}}, @samp{vec_packu_float_@var{m}}
+Narrow, convert to floating point type and merge the elements
+of two vectors. Operands 1 and 2 are vectors of the same mode having N
+signed/unsigned integral elements of size S@. Operand 0 is the resulting vector
+in which 2*N elements of size N/2 are concatenated.
+
@cindex @code{vec_unpacks_hi_@var{m}} instruction pattern
@cindex @code{vec_unpacks_lo_@var{m}} instruction pattern
@item @samp{vec_unpacks_hi_@var{m}}, @samp{vec_unpacks_lo_@var{m}}
@@ -5400,6 +5408,20 @@ has N elements of size S@. Convert the high/low elements of the vector using
floating point conversion and place the resulting N/2 values of size 2*S in
the output vector (operand 0).
+@cindex @code{vec_unpack_sfix_trunc_hi_@var{m}} instruction pattern
+@cindex @code{vec_unpack_sfix_trunc_lo_@var{m}} instruction pattern
+@cindex @code{vec_unpack_ufix_trunc_hi_@var{m}} instruction pattern
+@cindex @code{vec_unpack_ufix_trunc_lo_@var{m}} instruction pattern
+@item @samp{vec_unpack_sfix_trunc_hi_@var{m}},
+@itemx @samp{vec_unpack_sfix_trunc_lo_@var{m}}
+@itemx @samp{vec_unpack_ufix_trunc_hi_@var{m}}
+@itemx @samp{vec_unpack_ufix_trunc_lo_@var{m}}
+Extract, convert to signed/unsigned integer type and widen the high/low part of a
+vector of floating point elements. The input vector (operand 1)
+has N elements of size S@. Convert the high/low elements of the vector
+to integers and place the resulting N/2 values of size 2*S in
+the output vector (operand 0).
+
@cindex @code{vec_widen_umult_hi_@var{m}} instruction pattern
@cindex @code{vec_widen_umult_lo_@var{m}} instruction pattern
@cindex @code{vec_widen_smult_hi_@var{m}} instruction pattern
diff --git a/gcc/expr.c b/gcc/expr.c
index ecc5292..51fbc32 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9458,6 +9458,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
case VEC_UNPACK_HI_EXPR:
case VEC_UNPACK_LO_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
{
op0 = expand_normal (treeop0);
temp = expand_widen_pattern_expr (ops, op0, NULL_RTX, NULL_RTX,
@@ -9497,6 +9499,18 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
mode = TYPE_MODE (TREE_TYPE (treeop0));
goto binop;
+ case VEC_PACK_FLOAT_EXPR:
+ mode = TYPE_MODE (TREE_TYPE (treeop0));
+ expand_operands (treeop0, treeop1,
+ subtarget, &op0, &op1, EXPAND_NORMAL);
+ this_optab = optab_for_tree_code (code, TREE_TYPE (treeop0),
+ optab_default);
+ target = expand_binop (mode, this_optab, op0, op1, target,
+ TYPE_UNSIGNED (TREE_TYPE (treeop0)),
+ OPTAB_LIB_WIDEN);
+ gcc_assert (target);
+ return target;
+
case VEC_PERM_EXPR:
{
expand_operands (treeop0, treeop1, target, &op0, &op1, EXPAND_NORMAL);
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0f57f07..3258aad 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -1622,6 +1622,7 @@ const_binop (enum tree_code code, tree type, tree arg1, tree arg2)
case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_FIX_TRUNC_EXPR:
+ case VEC_PACK_FLOAT_EXPR:
{
unsigned int HOST_WIDE_INT out_nelts, in_nelts, i;
@@ -1643,7 +1644,9 @@ const_binop (enum tree_code code, tree type, tree arg1, tree arg2)
? VECTOR_CST_ELT (arg1, i)
: VECTOR_CST_ELT (arg2, i - in_nelts));
elt = fold_convert_const (code == VEC_PACK_TRUNC_EXPR
- ? NOP_EXPR : FIX_TRUNC_EXPR,
+ ? NOP_EXPR
+ : code == VEC_PACK_FLOAT_EXPR
+ ? FLOAT_EXPR : FIX_TRUNC_EXPR,
TREE_TYPE (type), elt);
if (elt == NULL_TREE || !CONSTANT_CLASS_P (elt))
return NULL_TREE;
@@ -1817,6 +1820,8 @@ const_unop (enum tree_code code, tree type, tree arg0)
case VEC_UNPACK_HI_EXPR:
case VEC_UNPACK_FLOAT_LO_EXPR:
case VEC_UNPACK_FLOAT_HI_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
{
unsigned HOST_WIDE_INT out_nelts, in_nelts, i;
enum tree_code subcode;
@@ -1831,13 +1836,17 @@ const_unop (enum tree_code code, tree type, tree arg0)
unsigned int offset = 0;
if ((!BYTES_BIG_ENDIAN) ^ (code == VEC_UNPACK_LO_EXPR
- || code == VEC_UNPACK_FLOAT_LO_EXPR))
+ || code == VEC_UNPACK_FLOAT_LO_EXPR
+ || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR))
offset = out_nelts;
if (code == VEC_UNPACK_LO_EXPR || code == VEC_UNPACK_HI_EXPR)
subcode = NOP_EXPR;
- else
+ else if (code == VEC_UNPACK_FLOAT_LO_EXPR
+ || code == VEC_UNPACK_FLOAT_HI_EXPR)
subcode = FLOAT_EXPR;
+ else
+ subcode = FIX_TRUNC_EXPR;
tree_vector_builder elts (type, out_nelts, 1);
for (i = 0; i < out_nelts; i++)
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index 49e9e12..c0d6e15 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -429,6 +429,7 @@ dump_binary_rhs (pretty_printer *buffer, gassign *gs, int spc,
case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
case VEC_PACK_FIX_TRUNC_EXPR:
+ case VEC_PACK_FLOAT_EXPR:
case VEC_WIDEN_LSHIFT_HI_EXPR:
case VEC_WIDEN_LSHIFT_LO_EXPR:
case VEC_SERIES_EXPR:
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
index 73e6654..11cea17 100644
--- a/gcc/optabs-tree.c
+++ b/gcc/optabs-tree.c
@@ -144,46 +144,58 @@ optab_for_tree_code (enum tree_code code, const_tree type,
? ssmsub_widen_optab : smsub_widen_optab));
case VEC_WIDEN_MULT_HI_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab);
case VEC_WIDEN_MULT_LO_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab);
case VEC_WIDEN_MULT_EVEN_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_widen_umult_even_optab : vec_widen_smult_even_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_widen_umult_even_optab : vec_widen_smult_even_optab);
case VEC_WIDEN_MULT_ODD_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab);
case VEC_WIDEN_LSHIFT_HI_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab);
case VEC_WIDEN_LSHIFT_LO_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab);
case VEC_UNPACK_HI_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_unpacku_hi_optab : vec_unpacks_hi_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_unpacku_hi_optab : vec_unpacks_hi_optab);
case VEC_UNPACK_LO_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_unpacku_lo_optab : vec_unpacks_lo_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_unpacku_lo_optab : vec_unpacks_lo_optab);
case VEC_UNPACK_FLOAT_HI_EXPR:
/* The signedness is determined from input operand. */
- return TYPE_UNSIGNED (type) ?
- vec_unpacku_float_hi_optab : vec_unpacks_float_hi_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_unpacku_float_hi_optab : vec_unpacks_float_hi_optab);
case VEC_UNPACK_FLOAT_LO_EXPR:
/* The signedness is determined from input operand. */
- return TYPE_UNSIGNED (type) ?
- vec_unpacku_float_lo_optab : vec_unpacks_float_lo_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_unpacku_float_lo_optab : vec_unpacks_float_lo_optab);
+
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ /* The signedness is determined from output operand. */
+ return (TYPE_UNSIGNED (type)
+ ? vec_unpack_ufix_trunc_hi_optab
+ : vec_unpack_sfix_trunc_hi_optab);
+
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
+ /* The signedness is determined from output operand. */
+ return (TYPE_UNSIGNED (type)
+ ? vec_unpack_ufix_trunc_lo_optab
+ : vec_unpack_sfix_trunc_lo_optab);
case VEC_PACK_TRUNC_EXPR:
return vec_pack_trunc_optab;
@@ -193,8 +205,13 @@ optab_for_tree_code (enum tree_code code, const_tree type,
case VEC_PACK_FIX_TRUNC_EXPR:
/* The signedness is determined from output operand. */
- return TYPE_UNSIGNED (type) ?
- vec_pack_ufix_trunc_optab : vec_pack_sfix_trunc_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_pack_ufix_trunc_optab : vec_pack_sfix_trunc_optab);
+
+ case VEC_PACK_FLOAT_EXPR:
+ /* The signedness is determined from input operand. */
+ return (TYPE_UNSIGNED (type)
+ ? vec_packu_float_optab : vec_packs_float_optab);
case VEC_DUPLICATE_EXPR:
return vec_duplicate_optab;
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 278046a..cadf467 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -259,8 +259,15 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
oprnd0 = ops->op0;
tmode0 = TYPE_MODE (TREE_TYPE (oprnd0));
- widen_pattern_optab =
- optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
+ if (ops->code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
+ || ops->code == VEC_UNPACK_FIX_TRUNC_LO_EXPR)
+ /* The sign is from the result type rather than operand's type
+ for these ops. */
+ widen_pattern_optab
+ = optab_for_tree_code (ops->code, ops->type, optab_default);
+ else
+ widen_pattern_optab
+ = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
if (ops->code == WIDEN_MULT_PLUS_EXPR
|| ops->code == WIDEN_MULT_MINUS_EXPR)
icode = find_widening_optab_handler (widen_pattern_optab,
@@ -1068,7 +1075,9 @@ expand_binop_directly (enum insn_code icode, machine_mode mode, optab binoptab,
|| binoptab == vec_pack_usat_optab
|| binoptab == vec_pack_ssat_optab
|| binoptab == vec_pack_ufix_trunc_optab
- || binoptab == vec_pack_sfix_trunc_optab)
+ || binoptab == vec_pack_sfix_trunc_optab
+ || binoptab == vec_packu_float_optab
+ || binoptab == vec_packs_float_optab)
{
/* The mode of the result is different then the mode of the
arguments. */
diff --git a/gcc/optabs.def b/gcc/optabs.def
index a1ecb75..11af7aa 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -327,10 +327,16 @@ OPTAB_D (vec_pack_ssat_optab, "vec_pack_ssat_$a")
OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a")
OPTAB_D (vec_pack_ufix_trunc_optab, "vec_pack_ufix_trunc_$a")
OPTAB_D (vec_pack_usat_optab, "vec_pack_usat_$a")
+OPTAB_D (vec_packs_float_optab, "vec_packs_float_$a")
+OPTAB_D (vec_packu_float_optab, "vec_packu_float_$a")
OPTAB_D (vec_perm_optab, "vec_perm$a")
OPTAB_D (vec_realign_load_optab, "vec_realign_load_$a")
OPTAB_D (vec_set_optab, "vec_set$a")
OPTAB_D (vec_shr_optab, "vec_shr_$a")
+OPTAB_D (vec_unpack_sfix_trunc_hi_optab, "vec_unpack_sfix_trunc_hi_$a")
+OPTAB_D (vec_unpack_sfix_trunc_lo_optab, "vec_unpack_sfix_trunc_lo_$a")
+OPTAB_D (vec_unpack_ufix_trunc_hi_optab, "vec_unpack_ufix_trunc_hi_$a")
+OPTAB_D (vec_unpack_ufix_trunc_lo_optab, "vec_unpack_ufix_trunc_lo_$a")
OPTAB_D (vec_unpacks_float_hi_optab, "vec_unpacks_float_hi_$a")
OPTAB_D (vec_unpacks_float_lo_optab, "vec_unpacks_float_lo_$a")
OPTAB_D (vec_unpacks_hi_optab, "vec_unpacks_hi_$a")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 0305734..28a2844 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,14 @@
+2018-05-29 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/85918
+ * gcc.target/i386/avx512dq-pr85918.c: Add -mprefer-vector-width=512
+ and -fno-vect-cost-model options. Add aligned(64) attribute to the
+ arrays. Add suffix 1 to all functions and use 4 iterations rather
+ than N. Add functions with conversions to and from float.
+ Add new set of functions with 8 iterations and another one
+ with 16 iterations, expect 24 vectorized loops instead of just 4.
+ * gcc.target/i386/avx512dq-pr85918-2.c: New test.
+
2018-05-29 Javier Miranda <miranda@adacore.com>
* gnat.dg/equal2.adb: New testcase.
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr85918-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-pr85918-2.c
new file mode 100644
index 0000000..fdf7b15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-pr85918-2.c
@@ -0,0 +1,435 @@
+/* PR target/85918 */
+/* { dg-do run } */
+/* { dg-require-effective-target avx512dq } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-O3 -mavx512dq -mavx512vl -mprefer-vector-width=512 -fno-vect-cost-model" } */
+
+#define AVX512DQ
+#define AVX512VL
+#define DO_TEST avx512dqvl_test
+
+static void avx512dqvl_test (void);
+
+#include "avx512-check.h"
+
+#define N 16
+
+long long ll[N] __attribute__((aligned (64)));
+unsigned long long ull[N] __attribute__((aligned (64)));
+float f[N] __attribute__((aligned (64)));
+double d[N] __attribute__((aligned (64)));
+
+__attribute__((noipa)) void
+ll2d1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ d[i] = ll[i];
+}
+
+__attribute__((noipa)) void
+ull2d1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ d[i] = ull[i];
+}
+
+__attribute__((noipa)) void
+d2ll1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ ll[i] = d[i];
+}
+
+__attribute__((noipa)) void
+d2ull1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ ull[i] = d[i];
+}
+
+__attribute__((noipa)) void
+ll2f1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ f[i] = ll[i];
+}
+
+__attribute__((noipa)) void
+ull2f1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ f[i] = ull[i];
+}
+
+__attribute__((noipa)) void
+f2ll1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ ll[i] = f[i];
+}
+
+__attribute__((noipa)) void
+f2ull1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ ull[i] = f[i];
+}
+
+__attribute__((noipa)) void
+ll2d2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ d[i] = ll[i];
+}
+
+__attribute__((noipa)) void
+ull2d2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ d[i] = ull[i];
+}
+
+__attribute__((noipa)) void
+d2ll2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ll[i] = d[i];
+}
+
+__attribute__((noipa)) void
+d2ull2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ull[i] = d[i];
+}
+
+__attribute__((noipa)) void
+ll2f2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ f[i] = ll[i];
+}
+
+__attribute__((noipa)) void
+ull2f2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ f[i] = ull[i];
+}
+
+__attribute__((noipa)) void
+f2ll2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ll[i] = f[i];
+}
+
+__attribute__((noipa)) void
+f2ull2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ull[i] = f[i];
+}
+
+__attribute__((noipa)) void
+ll2d3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ d[i] = ll[i];
+}
+
+__attribute__((noipa)) void
+ull2d3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ d[i] = ull[i];
+}
+
+__attribute__((noipa)) void
+d2ll3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ll[i] = d[i];
+}
+
+__attribute__((noipa)) void
+d2ull3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ull[i] = d[i];
+}
+
+__attribute__((noipa)) void
+ll2f3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ f[i] = ll[i];
+}
+
+__attribute__((noipa)) void
+ull2f3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ f[i] = ull[i];
+}
+
+__attribute__((noipa)) void
+f2ll3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ll[i] = f[i];
+}
+
+__attribute__((noipa)) void
+f2ull3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ull[i] = f[i];
+}
+
+unsigned long long ullt[] = {
+ 13835058055282163712ULL, 9223653511831486464ULL, 9218868437227405312ULL,
+ 1ULL, 9305281255077576704ULL, 1191936ULL, 18446462598732840960ULL, 0ULL,
+ 9223372036854775808ULL, 4611686018427387904ULL, 2305843009213693952ULL,
+ 9ULL, 9223653511831486464ULL, 0ULL, 65536ULL, 131071ULL
+};
+float uft[] = {
+ 13835058055282163712.0f, 9223653511831486464.0f, 9218868437227405312.0f,
+ 1.0f, 9305281255077576704.0f, 1191936.0f, 18446462598732840960.0f, 0.0f,
+ 9223372036854775808.0f, 4611686018427387904.0f, 2305843009213693952.0f,
+ 9.0f, 9223653511831486464.0f, 0.0f, 65536.0f, 131071.0f
+};
+long long llt[] = {
+ 9223090561878065152LL, -9223372036854775807LL - 1, -9223090561878065152LL,
+ -4LL, -8074672656898588672LL, 8074672656898588672LL, 29LL, -15LL,
+ 7574773098260463616LL, -7579276697887834112LL, -8615667562136469504LL,
+ 148LL, -255LL, 9151595917793558528LL, -9218868437227405312LL, 9LL
+};
+float ft[] = {
+ 9223090561878065152.0f, -9223372036854775808.0f, -9223090561878065152.0f,
+ -4.0f, -8074672656898588672.0f, 8074672656898588672.0f, 29.0f, -15.0f,
+ 7574773098260463616.0f, -7579276697887834112.0f, -8615667562136469504.0f,
+ 148.0f, -255.0f, 9151595917793558528.0f, -9218868437227405312.0f, 9.0f
+};
+
+static void
+avx512dqvl_test (void)
+{
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ ll[i] = llt[i];
+ ull[i] = ullt[i];
+ }
+ ll2d1 ();
+ for (i = 0; i < 4; i++)
+ if (d[i] != ft[i])
+ abort ();
+ ull2d1 ();
+ for (i = 0; i < 4; i++)
+ if (d[i] != uft[i])
+ abort ();
+ else
+ d[i] = ft[i + 4];
+ d2ll1 ();
+ for (i = 0; i < 4; i++)
+ if (ll[i] != llt[i + 4])
+ abort ();
+ else
+ d[i] = uft[i + 4];
+ d2ull1 ();
+ for (i = 0; i < 4; i++)
+ if (ull[i] != ullt[i + 4])
+ abort ();
+ else
+ {
+ ll[i] = llt[i + 8];
+ ull[i] = ullt[i + 8];
+ }
+ ll2f1 ();
+ for (i = 0; i < 4; i++)
+ if (f[i] != ft[i + 8])
+ abort ();
+ ull2f1 ();
+ for (i = 0; i < 4; i++)
+ if (f[i] != uft[i + 8])
+ abort ();
+ else
+ f[i] = ft[i + 12];
+ f2ll1 ();
+ for (i = 0; i < 4; i++)
+ if (ll[i] != llt[i + 12])
+ abort ();
+ else
+ f[i] = uft[i + 12];
+ f2ull1 ();
+ for (i = 0; i < 4; i++)
+ if (ull[i] != ullt[i + 12])
+ abort ();
+ for (i = 0; i < 8; i++)
+ {
+ ll[i] = llt[i];
+ ull[i] = ullt[i];
+ }
+ ll2d2 ();
+ for (i = 0; i < 8; i++)
+ if (d[i] != ft[i])
+ abort ();
+ ull2d2 ();
+ for (i = 0; i < 8; i++)
+ if (d[i] != uft[i])
+ abort ();
+ else
+ {
+ d[i] = ft[i];
+ ll[i] = 1234567LL;
+ ull[i] = 7654321ULL;
+ }
+ d2ll2 ();
+ for (i = 0; i < 8; i++)
+ if (ll[i] != llt[i])
+ abort ();
+ else
+ d[i] = uft[i];
+ d2ull2 ();
+ for (i = 0; i < 8; i++)
+ if (ull[i] != ullt[i])
+ abort ();
+ else
+ {
+ ll[i] = llt[i + 8];
+ ull[i] = ullt[i + 8];
+ }
+ ll2f2 ();
+ for (i = 0; i < 8; i++)
+ if (f[i] != ft[i + 8])
+ abort ();
+ ull2f2 ();
+ for (i = 0; i < 8; i++)
+ if (f[i] != uft[i + 8])
+ abort ();
+ else
+ {
+ f[i] = ft[i + 8];
+ ll[i] = 1234567LL;
+ ull[i] = 7654321ULL;
+ }
+ f2ll2 ();
+ for (i = 0; i < 8; i++)
+ if (ll[i] != llt[i + 8])
+ abort ();
+ else
+ f[i] = uft[i + 8];
+ f2ull2 ();
+ for (i = 0; i < 8; i++)
+ if (ull[i] != ullt[i + 8])
+ abort ();
+ for (i = 0; i < 16; i++)
+ {
+ ll[i] = llt[i];
+ ull[i] = ullt[i];
+ }
+ ll2d3 ();
+ for (i = 0; i < 16; i++)
+ if (d[i] != ft[i])
+ abort ();
+ ull2d3 ();
+ for (i = 0; i < 16; i++)
+ if (d[i] != uft[i])
+ abort ();
+ else
+ {
+ d[i] = ft[i];
+ ll[i] = 1234567LL;
+ ull[i] = 7654321ULL;
+ }
+ d2ll3 ();
+ for (i = 0; i < 16; i++)
+ if (ll[i] != llt[i])
+ abort ();
+ else
+ d[i] = uft[i];
+ d2ull3 ();
+ for (i = 0; i < 16; i++)
+ if (ull[i] != ullt[i])
+ abort ();
+ else
+ {
+ ll[i] = llt[i];
+ ull[i] = ullt[i];
+ f[i] = 3.0f;
+ d[i] = 4.0;
+ }
+ ll2f3 ();
+ for (i = 0; i < 16; i++)
+ if (f[i] != ft[i])
+ abort ();
+ ull2f3 ();
+ for (i = 0; i < 16; i++)
+ if (f[i] != uft[i])
+ abort ();
+ else
+ {
+ f[i] = ft[i];
+ ll[i] = 1234567LL;
+ ull[i] = 7654321ULL;
+ }
+ f2ll3 ();
+ for (i = 0; i < 16; i++)
+ if (ll[i] != llt[i])
+ abort ();
+ else
+ f[i] = uft[i];
+ f2ull3 ();
+ for (i = 0; i < 16; i++)
+ if (ull[i] != ullt[i])
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr85918.c b/gcc/testsuite/gcc.target/i386/avx512dq-pr85918.c
index 3c69f93..79593f2 100644
--- a/gcc/testsuite/gcc.target/i386/avx512dq-pr85918.c
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-pr85918.c
@@ -1,42 +1,203 @@
/* PR target/85918 */
/* { dg-do compile } */
-/* { dg-options "-O3 -mavx512dq -mavx512vl -fdump-tree-vect-details" } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+/* { dg-options "-O3 -mavx512dq -mavx512vl -mprefer-vector-width=512 -fno-vect-cost-model -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 24 "vect" } } */
#define N 1024
-long long ll[N];
-unsigned long long ull[N];
-double d[N];
+long long ll[N] __attribute__((aligned (64)));
+unsigned long long ull[N] __attribute__((aligned (64)));
+float f[N] __attribute__((aligned (64)));
+double d[N] __attribute__((aligned (64)));
-void ll2d (void)
+void ll2d1 (void)
{
int i;
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4; i++)
d[i] = ll[i];
}
-void ull2d (void)
+void ull2d1 (void)
{
int i;
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4; i++)
d[i] = ull[i];
}
-void d2ll (void)
+void d2ll1 (void)
{
int i;
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4; i++)
ll[i] = d[i];
}
-void d2ull (void)
+void d2ull1 (void)
{
int i;
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4; i++)
ull[i] = d[i];
}
+
+void ll2f1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ f[i] = ll[i];
+}
+
+void ull2f1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ f[i] = ull[i];
+}
+
+void f2ll1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ ll[i] = f[i];
+}
+
+void f2ull1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ ull[i] = f[i];
+}
+
+void ll2d2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ d[i] = ll[i];
+}
+
+void ull2d2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ d[i] = ull[i];
+}
+
+void d2ll2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ll[i] = d[i];
+}
+
+void d2ull2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ull[i] = d[i];
+}
+
+void ll2f2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ f[i] = ll[i];
+}
+
+void ull2f2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ f[i] = ull[i];
+}
+
+void f2ll2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ll[i] = f[i];
+}
+
+void f2ull2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ull[i] = f[i];
+}
+
+void ll2d3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ d[i] = ll[i];
+}
+
+void ull2d3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ d[i] = ull[i];
+}
+
+void d2ll3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ll[i] = d[i];
+}
+
+void d2ull3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ull[i] = d[i];
+}
+
+void ll2f3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ f[i] = ll[i];
+}
+
+void ull2f3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ f[i] = ull[i];
+}
+
+void f2ll3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ll[i] = f[i];
+}
+
+void f2ull3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ull[i] = f[i];
+}
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 68f4fd3..ab2feed 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3676,6 +3676,8 @@ verify_gimple_assign_unary (gassign *stmt)
case VEC_UNPACK_LO_EXPR:
case VEC_UNPACK_FLOAT_HI_EXPR:
case VEC_UNPACK_FLOAT_LO_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
/* FIXME. */
return false;
@@ -4003,6 +4005,24 @@ verify_gimple_assign_binary (gassign *stmt)
return false;
}
+ case VEC_PACK_FLOAT_EXPR:
+ if (TREE_CODE (rhs1_type) != VECTOR_TYPE
+ || TREE_CODE (lhs_type) != VECTOR_TYPE
+ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type))
+ || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs_type))
+ || !types_compatible_p (rhs1_type, rhs2_type)
+ || maybe_ne (GET_MODE_SIZE (element_mode (rhs1_type)),
+ 2 * GET_MODE_SIZE (element_mode (lhs_type))))
+ {
+ error ("type mismatch in vector pack expression");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ return true;
+ }
+
+ return false;
+
case MULT_EXPR:
case MULT_HIGHPART_EXPR:
case TRUNC_DIV_EXPR:
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 7881131..ae36cc7 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3924,9 +3924,12 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
case VEC_UNPACK_LO_EXPR:
case VEC_UNPACK_FLOAT_HI_EXPR:
case VEC_UNPACK_FLOAT_LO_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
case VEC_PACK_FIX_TRUNC_EXPR:
+ case VEC_PACK_FLOAT_EXPR:
case VEC_WIDEN_LSHIFT_HI_EXPR:
case VEC_WIDEN_LSHIFT_LO_EXPR:
case VEC_DUPLICATE_EXPR:
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index 5a8c8eb..125507e 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -3235,6 +3235,18 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, dump_flags_t flags,
pp_string (pp, " > ");
break;
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ pp_string (pp, " VEC_UNPACK_FIX_TRUNC_HI_EXPR < ");
+ dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (pp, " > ");
+ break;
+
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
+ pp_string (pp, " VEC_UNPACK_FIX_TRUNC_LO_EXPR < ");
+ dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (pp, " > ");
+ break;
+
case VEC_PACK_TRUNC_EXPR:
pp_string (pp, " VEC_PACK_TRUNC_EXPR < ");
dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
@@ -3259,6 +3271,14 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, dump_flags_t flags,
pp_string (pp, " > ");
break;
+ case VEC_PACK_FLOAT_EXPR:
+ pp_string (pp, " VEC_PACK_FLOAT_EXPR < ");
+ dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (pp, ", ");
+ dump_generic_node (pp, TREE_OPERAND (node, 1), spc, flags, false);
+ pp_string (pp, " > ");
+ break;
+
case BLOCK:
dump_block_node (pp, node, spc, flags);
break;
@@ -3575,6 +3595,8 @@ op_code_prio (enum tree_code code)
case VEC_UNPACK_LO_EXPR:
case VEC_UNPACK_FLOAT_HI_EXPR:
case VEC_UNPACK_FLOAT_LO_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
return 16;
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index 2ade60b..46502c4 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -1653,7 +1653,8 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
/* The signedness is determined from input argument. */
if (code == VEC_UNPACK_FLOAT_HI_EXPR
- || code == VEC_UNPACK_FLOAT_LO_EXPR)
+ || code == VEC_UNPACK_FLOAT_LO_EXPR
+ || code == VEC_PACK_FLOAT_EXPR)
{
type = TREE_TYPE (rhs1);
/* We do not know how to scalarize those. */
@@ -1670,6 +1671,8 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
|| code == VEC_WIDEN_MULT_ODD_EXPR
|| code == VEC_UNPACK_HI_EXPR
|| code == VEC_UNPACK_LO_EXPR
+ || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
+ || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
|| code == VEC_PACK_TRUNC_EXPR
|| code == VEC_PACK_SAT_EXPR
|| code == VEC_PACK_FIX_TRUNC_EXPR
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 759ea23..caa157f 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -10249,10 +10249,10 @@ vect_is_simple_use (tree operand, vec_info *vinfo,
vector form (i.e., when operating on arguments of type VECTYPE_IN
producing a result of type VECTYPE_OUT).
- Widening operations we currently support are NOP (CONVERT), FLOAT
- and WIDEN_MULT. This function checks if these operations are supported
- by the target platform either directly (via vector tree-codes), or via
- target builtins.
+ Widening operations we currently support are NOP (CONVERT), FLOAT,
+ FIX_TRUNC and WIDEN_MULT. This function checks if these operations
+ are supported by the target platform either directly (via vector
+ tree-codes), or via target builtins.
Output:
- CODE1 and CODE2 are codes of vector operations to be used when
@@ -10382,10 +10382,9 @@ supportable_widening_operation (enum tree_code code, gimple *stmt,
break;
case FIX_TRUNC_EXPR:
- /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
- VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
- computing the operation. */
- return false;
+ c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
+ c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
+ break;
default:
gcc_unreachable ();
@@ -10493,8 +10492,8 @@ supportable_widening_operation (enum tree_code code, gimple *stmt,
vector form (i.e., when operating on arguments of type VECTYPE_IN
and producing a result of type VECTYPE_OUT).
- Narrowing operations we currently support are NOP (CONVERT) and
- FIX_TRUNC. This function checks if these operations are supported by
+ Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
+ and FLOAT. This function checks if these operations are supported by
the target platform directly via vector tree-codes.
Output:
@@ -10535,9 +10534,8 @@ supportable_narrowing_operation (enum tree_code code,
break;
case FLOAT_EXPR:
- /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
- tree code and optabs used for computing the operation. */
- return false;
+ c1 = VEC_PACK_FLOAT_EXPR;
+ break;
default:
gcc_unreachable ();
@@ -10566,6 +10564,9 @@ supportable_narrowing_operation (enum tree_code code,
|| known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
TYPE_VECTOR_SUBPARTS (narrow_vectype)));
+ if (code == FLOAT_EXPR)
+ return false;
+
/* Check if it's a multi-step conversion that can be done using intermediate
types. */
prev_mode = vec_mode;
diff --git a/gcc/tree.def b/gcc/tree.def
index c660b2c..9696fee 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1371,6 +1371,15 @@ DEFTREECODE (VEC_UNPACK_LO_EXPR, "vec_unpack_lo_expr", tcc_unary, 1)
DEFTREECODE (VEC_UNPACK_FLOAT_HI_EXPR, "vec_unpack_float_hi_expr", tcc_unary, 1)
DEFTREECODE (VEC_UNPACK_FLOAT_LO_EXPR, "vec_unpack_float_lo_expr", tcc_unary, 1)
+/* Unpack (extract) the high/low elements of the input vector, convert
+ floating point values to integer and widen elements into the output
+ vector. The input vector has twice as many elements as the output
+ vector, that are half the size of the elements of the output vector. */
+DEFTREECODE (VEC_UNPACK_FIX_TRUNC_HI_EXPR, "vec_unpack_fix_trunc_hi_expr",
+ tcc_unary, 1)
+DEFTREECODE (VEC_UNPACK_FIX_TRUNC_LO_EXPR, "vec_unpack_fix_trunc_lo_expr",
+ tcc_unary, 1)
+
/* Pack (demote/narrow and merge) the elements of the two input vectors
into the output vector using truncation/saturation.
The elements of the input vectors are twice the size of the elements of the
@@ -1384,6 +1393,12 @@ DEFTREECODE (VEC_PACK_SAT_EXPR, "vec_pack_sat_expr", tcc_binary, 2)
the output vector. */
DEFTREECODE (VEC_PACK_FIX_TRUNC_EXPR, "vec_pack_fix_trunc_expr", tcc_binary, 2)
+/* Convert fixed point values of the two input vectors to floating point
+ and pack (narrow and merge) the elements into the output vector. The
+ elements of the input vector are twice the size of the elements of
+ the output vector. */
+DEFTREECODE (VEC_PACK_FLOAT_EXPR, "vec_pack_float_expr", tcc_binary, 2)
+
/* Widening vector shift left in bits.
Operand 0 is a vector to be shifted with N elements of size S.
Operand 1 is an integer shift amount in bits.