From 6785eb595981abd93ad85edcfdf1d2e43c0841f5 Mon Sep 17 00:00:00 2001 From: "prathamesh.kulkarni" Date: Mon, 12 Jul 2021 15:18:21 +0530 Subject: arm/66791: Replace builtins for unsigned and fp vmul_n intrinsics. gcc/ChangeLog: PR target/66791 * config/arm/arm_neon.h (vmul_n_u32): Replace call to builtin with __a * __b. (vmulq_n_u32): Likewise. (vmul_n_f32): Gate __a * __b on __FAST_MATH__. (vmulq_n_f32): Likewise. (vmul_n_f16): Likewise. (vmulq_n_f16): Likewise. gcc/testsuite/ChangeLog: PR target/66791 * gcc.target/arm/armv8_2-fp16-neon-2.c: Adjust. --- gcc/config/arm/arm_neon.h | 24 ++++++++++++++++++---- gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-2.c | 10 ++++----- 2 files changed, 25 insertions(+), 9 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index f42a15f..41b596b 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -8384,21 +8384,25 @@ __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_n_f32 (float32x2_t __a, float32_t __b) { +#ifdef __FAST_MATH__ + return __a * __b; +#else return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b); +#endif } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_n_u16 (uint16x4_t __a, uint16_t __b) { - return (uint16x4_t)__builtin_neon_vmul_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b); + return __a * __b; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_n_u32 (uint32x2_t __a, uint32_t __b) { - return (uint32x2_t)__builtin_neon_vmul_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b); + return __a * __b; } __extension__ extern __inline int16x8_t @@ -8419,21 +8423,25 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_n_f32 (float32x4_t __a, float32_t __b) { +#ifdef __FAST_MATH__ + return __a * __b; +#else return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b); +#endif } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_n_u16 (uint16x8_t __a, uint16_t __b) { - return (uint16x8_t)__builtin_neon_vmul_nv8hi ((int16x8_t) __a, (__builtin_neon_hi) __b); + return __a * __b; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_n_u32 (uint32x4_t __a, uint32_t __b) { - return (uint32x4_t)__builtin_neon_vmul_nv4si ((int32x4_t) __a, (__builtin_neon_si) __b); + return __a * __b; } __extension__ extern __inline int32x4_t @@ -17740,7 +17748,11 @@ __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_n_f16 (float16x4_t __a, float16_t __b) { +#ifdef __FAST_MATH__ + return __a * __b; +#else return __builtin_neon_vmul_nv4hf (__a, __b); +#endif } __extension__ extern __inline float16x8_t @@ -17765,7 +17777,11 @@ __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_n_f16 (float16x8_t __a, float16_t __b) { +#ifdef __FAST_MATH__ + return __a * __b; +#else return __builtin_neon_vmul_nv8hf (__a, __b); +#endif } __extension__ extern __inline float16x4_t diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-2.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-2.c index 50f6893..6808576 100644 --- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-2.c +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-2.c @@ -327,13 +327,13 @@ BINOP_TEST (vminnm) BINOP_TEST (vmul) /* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 3 } } - { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */ BINOP_LANE_TEST (vmul, 2) /* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+\[2\]} 1 } } { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[2\]} 1 } } */ BINOP_N_TEST (vmul) -/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+\[0\]} 1 } } - { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\]} 1 } }*/ +/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 3 } } + { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } }*/ float16x4_t test_vpadd_16x4 (float16x4_t a, float16x4_t b) @@ -387,7 +387,7 @@ test_vdup_n_f16 (float16_t a) { return vdup_n_f16 (a); } -/* { dg-final { scan-assembler-times {vdup\.16\td[0-9]+, r[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vdup\.16\td[0-9]+, r[0-9]+} 3 } } */ float16x8_t test_vmovq_n_f16 (float16_t a) @@ -400,7 +400,7 @@ test_vdupq_n_f16 (float16_t a) { return vdupq_n_f16 (a); } -/* { dg-final { scan-assembler-times {vdup\.16\tq[0-9]+, r[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vdup\.16\tq[0-9]+, r[0-9]+} 3 } } */ float16x4_t test_vdup_lane_f16 (float16x4_t a) -- cgit v1.1 From 0192c3eedbc7e6fe703abd8b321f400ddb02adf7 Mon Sep 17 00:00:00 2001 From: Roger Sayle Date: Mon, 12 Jul 2021 10:59:08 +0100 Subject: Tweak testcase for PR tree-optimization/101403. Initialize unused variable u in compound expression. Committed as obvious. 2021-07-12 Roger Sayle Jakub Jelinek gcc/testsuite/ChangeLog PR tree-optimization/101403 * gcc.dg/pr101403.c: Avoid (unimportant) uninitialized variable. --- gcc/testsuite/gcc.dg/pr101403.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/pr101403.c b/gcc/testsuite/gcc.dg/pr101403.c index ac5fa79..88df112 100644 --- a/gcc/testsuite/gcc.dg/pr101403.c +++ b/gcc/testsuite/gcc.dg/pr101403.c @@ -2,7 +2,7 @@ /* { dg-options "-O2" } */ unsigned int foo (unsigned int a) { - unsigned int u; + unsigned int u = 0; unsigned short b = __builtin_bswap16 (a); return b >> (u, 12); } -- cgit v1.1 From 123d0a597beb6d9865f808bf15b0d67f2a8fd5b2 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 12 Jul 2021 10:26:25 +0200 Subject: middle-end/101423 - internal calls do not trap This adjusts gimple_could_trap_p to not consider internal function calls to trap compared to indirect calls or calls to weak functions. 2021-07-12 Richard Biener PR middle-end/101423 * gimple.c (gimple_could_trap_p_1): Internal function calls do not trap. * tree-eh.c (tree_could_trap_p): Likewise. --- gcc/gimple.c | 4 +++- gcc/tree-eh.c | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple.c b/gcc/gimple.c index 60a9066..cc46454 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -2149,8 +2149,10 @@ gimple_could_trap_p_1 (gimple *s, bool include_mem, bool include_stores) return gimple_asm_volatile_p (as_a (s)); case GIMPLE_CALL: + if (gimple_call_internal_p (s)) + return false; t = gimple_call_fndecl (s); - /* Assume that calls to weak functions may trap. */ + /* Assume that indirect and calls to weak functions may trap. */ if (!t || !DECL_P (t) || DECL_WEAK (t)) return true; return false; diff --git a/gcc/tree-eh.c b/gcc/tree-eh.c index 601285c..57ce8f0 100644 --- a/gcc/tree-eh.c +++ b/gcc/tree-eh.c @@ -2723,8 +2723,11 @@ tree_could_trap_p (tree expr) return TREE_THIS_VOLATILE (expr); case CALL_EXPR: + /* Internal function calls do not trap. */ + if (CALL_EXPR_FN (expr) == NULL_TREE) + return false; t = get_callee_fndecl (expr); - /* Assume that calls to weak functions may trap. */ + /* Assume that indirect and calls to weak functions may trap. */ if (!t || !DECL_P (t)) return true; if (DECL_WEAK (t)) -- cgit v1.1 From 92343e0ba4d47f21ae20ffcb83d736bdbc15dae0 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 12 Jul 2021 10:49:03 +0200 Subject: tree-optimization/101394 - fix PRE full redundancy wrt abnormals This avoids adding a copy from an abnormal picked up from PHI translation much like we'd avoid inserting the translated expression on pred edges. 2021-07-12 Richard Biener PR tree-optimization/101394 * tree-ssa-pre.c (do_pre_regular_insertion): Avoid inserting copies from abnormals for a full redundancy. * gcc.dg/torture/pr101394.c: New testcase. --- gcc/testsuite/gcc.dg/torture/pr101394.c | 18 ++++++++++++++++++ gcc/tree-ssa-pre.c | 6 +++++- 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr101394.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/torture/pr101394.c b/gcc/testsuite/gcc.dg/torture/pr101394.c new file mode 100644 index 0000000..87fbdad --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr101394.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ + +int a, b, c, d; +void h(); +int e() __attribute__((returns_twice)); +void f() { + int *g = (int *)(__INTPTR_TYPE__)c; + if (b) { + h(); + g--; + if (a) + if (d) + h(); + } + if (g++) + e(); + c = (__INTPTR_TYPE__)g; +} diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c index d86fe26..69141c2 100644 --- a/gcc/tree-ssa-pre.c +++ b/gcc/tree-ssa-pre.c @@ -3412,7 +3412,11 @@ do_pre_regular_insertion (basic_block block, basic_block dom, /* If all edges produce the same value and that value is an invariant, then the PHI has the same value on all edges. Note this. */ - else if (!cant_insert && all_same) + else if (!cant_insert + && all_same + && (edoubleprime->kind != NAME + || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI + (PRE_EXPR_NAME (edoubleprime)))) { gcc_assert (edoubleprime->kind == CONSTANT || edoubleprime->kind == NAME); -- cgit v1.1 From c03cae4e066066278c8435c409829a9bf851e49f Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 7 Jul 2021 11:45:43 +0200 Subject: Display the number of components BB vectorized This amends the optimization message printed when a basic-block part is vectorized to mention the number of SLP graph entries. This helps when debugging vectorization differences and we end up merging SLP instances for costing purposes. 2021-07-07 Richard Biener * tree-vect-slp.c (vect_slp_region): Show the number of SLP graph entries in the optimization message. * g++.dg/vect/slp-pr87105.cc: Adjust. * gcc.dg/vect/bb-slp-pr54400.c: Likewise. --- gcc/testsuite/g++.dg/vect/slp-pr87105.cc | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c | 2 +- gcc/tree-vect-slp.c | 12 ++++++++---- 3 files changed, 10 insertions(+), 6 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc index d07b1cd..451a117 100644 --- a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc +++ b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc @@ -99,7 +99,7 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept { // We should have if-converted everything down to straight-line code // { dg-final { scan-tree-dump-times "" 1 "slp2" } } -// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } } +// { dg-final { scan-tree-dump-times "optimized: basic block part" 1 "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } } // It's a bit awkward to detect that all stores were vectorized but the // following more or less does the trick // { dg-final { scan-tree-dump "vect_\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } } diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c index 6b427aa..7c46fa0 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c @@ -39,5 +39,5 @@ main () } /* We are lacking an effective target for .REDUC_PLUS support. */ -/* { dg-final { scan-tree-dump-times "basic block part vectorized" 3 "slp2" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "optimized: basic block part" 3 "slp2" { target x86_64-*-* } } } */ /* { dg-final { scan-tree-dump-not " = VEC_PERM_EXPR" "slp2" { target x86_64-*-* } } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 5357cd0..cd002b3 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -5827,12 +5827,16 @@ vect_slp_region (vec bbs, vec datarefs, if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes)) dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, - "basic block part vectorized using %wu " - "byte vectors\n", bytes); + "basic block part with %u components " + "vectorized using %wu byte vectors\n", + instance->subgraph_entries.length (), + bytes); else dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, - "basic block part vectorized using " - "variable length vectors\n"); + "basic block part with %u components " + "vectorized using variable length " + "vectors\n", + instance->subgraph_entries.length ()); } } } -- cgit v1.1 From 86b228b87b64ffa6991fce0dc188985d7a9a173a Mon Sep 17 00:00:00 2001 From: Piotr Trojanek Date: Thu, 17 Jun 2021 18:49:11 +0200 Subject: [Ada] Avoid unnecessary work when expanding 'Image into 'Put_Image gcc/ada/ * exp_imgv.adb (Expand_Image_Attribute): Move rewriting to attribute Put_Image to the beginning of expansion of attribute Image. --- gcc/ada/exp_imgv.adb | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'gcc') diff --git a/gcc/ada/exp_imgv.adb b/gcc/ada/exp_imgv.adb index d2605fb..69b9f2d 100644 --- a/gcc/ada/exp_imgv.adb +++ b/gcc/ada/exp_imgv.adb @@ -1044,6 +1044,15 @@ package body Exp_Imgv is return; end if; + -- If Image should be transformed using Put_Image, then do so. See + -- Exp_Put_Image for details. + + if Exp_Put_Image.Image_Should_Call_Put_Image (N) then + Rewrite (N, Exp_Put_Image.Build_Image_Call (N)); + Analyze_And_Resolve (N, Standard_String, Suppress => All_Checks); + return; + end if; + Ptyp := Underlying_Type (Entity (Pref)); -- Ada 2022 allows 'Image on private types, so fetch the underlying @@ -1063,15 +1072,7 @@ package body Exp_Imgv is Enum_Case := False; - -- If this is a case where Image should be transformed using Put_Image, - -- then do so. See Exp_Put_Image for details. - - if Exp_Put_Image.Image_Should_Call_Put_Image (N) then - Rewrite (N, Exp_Put_Image.Build_Image_Call (N)); - Analyze_And_Resolve (N, Standard_String, Suppress => All_Checks); - return; - - elsif Rtyp = Standard_Boolean then + if Rtyp = Standard_Boolean then -- Use inline expansion if the -gnatd_x switch is not passed to the -- compiler. Otherwise expand into a call to the runtime. -- cgit v1.1 From 9b89dabfd851f0ee0e9f0c6e141f8e3fba08d1d7 Mon Sep 17 00:00:00 2001 From: Bob Duff Date: Wed, 16 Jun 2021 06:47:57 -0400 Subject: [Ada] Duplicate Size/Value_Size clause gcc/ada/ * sem_ch13.adb (Duplicate_Clause): Add a helper routine Check_One_Attr, with a parameter for the attribute_designator we are looking for, and one for the attribute_designator of the current node (which are usually the same). For Size and Value_Size, call it twice, once for each. * errout.ads: Fix a typo. --- gcc/ada/errout.ads | 2 +- gcc/ada/sem_ch13.adb | 67 ++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 55 insertions(+), 14 deletions(-) (limited to 'gcc') diff --git a/gcc/ada/errout.ads b/gcc/ada/errout.ads index b0cbd82..9b2e08d 100644 --- a/gcc/ada/errout.ads +++ b/gcc/ada/errout.ads @@ -279,7 +279,7 @@ package Errout is -- The character ? appearing anywhere in a message makes the message -- warning instead of a normal error message, and the text of the -- message will be preceded by "warning:" in the normal case. The - -- handling of warnings if further controlled by the Warning_Mode + -- handling of warnings is further controlled by the Warning_Mode -- option (-w switch), see package Opt for further details, and also by -- the current setting from pragma Warnings. This pragma applies only -- to warnings issued from the semantic phase (not the parser), but diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb index f0962ca0..91d41b4 100644 --- a/gcc/ada/sem_ch13.adb +++ b/gcc/ada/sem_ch13.adb @@ -5181,7 +5181,9 @@ package body Sem_Ch13 is -- This routine checks if the aspect for U_Ent being given by attribute -- definition clause N is for an aspect that has already been specified, -- and if so gives an error message. If there is a duplicate, True is - -- returned, otherwise if there is no error, False is returned. + -- returned, otherwise there is no error, and False is returned. Size + -- and Value_Size are considered to conflict, but for compatibility, + -- this is merely a warning. procedure Check_Indexing_Functions; -- Check that the function in Constant_Indexing or Variable_Indexing @@ -6007,7 +6009,47 @@ package body Sem_Ch13 is ---------------------- function Duplicate_Clause return Boolean is - A : Node_Id; + + function Check_One_Attr (Attr_1, Attr_2 : Name_Id) return Boolean; + -- Check for one attribute; Attr_1 is the attribute_designator we are + -- looking for. Attr_2 is the attribute_designator of the current + -- node. Normally, this is called just once by Duplicate_Clause, with + -- Attr_1 = Attr_2. However, it needs to be called twice for Size and + -- Value_Size, because these mean the same thing. For compatibility, + -- we allow specifying both Size and Value_Size, but only if the two + -- sizes are equal. + + -------------------- + -- Check_One_Attr -- + -------------------- + + function Check_One_Attr (Attr_1, Attr_2 : Name_Id) return Boolean is + A : constant Node_Id := + Get_Rep_Item (U_Ent, Attr_1, Check_Parents => False); + begin + if Present (A) then + if Attr_1 = Attr_2 then + Error_Msg_Name_1 := Attr_1; + Error_Msg_Sloc := Sloc (A); + Error_Msg_NE ("aspect% for & previously given#", N, U_Ent); + + else + pragma Assert (Attr_1 in Name_Size | Name_Value_Size); + pragma Assert (Attr_2 in Name_Size | Name_Value_Size); + + Error_Msg_Name_1 := Attr_2; + Error_Msg_Name_2 := Attr_1; + Error_Msg_Sloc := Sloc (A); + Error_Msg_NE ("?% for & conflicts with % #", N, U_Ent); + end if; + + return True; + end if; + + return False; + end Check_One_Attr; + + -- Start of processing for Duplicate_Clause begin -- Nothing to do if this attribute definition clause comes from @@ -6019,21 +6061,20 @@ package body Sem_Ch13 is return False; end if; - -- Otherwise current clause may duplicate previous clause, or a - -- previously given pragma or aspect specification for the same - -- aspect. - - A := Get_Rep_Item (U_Ent, Chars (N), Check_Parents => False); + -- Special cases for Size and Value_Size - if Present (A) then - Error_Msg_Name_1 := Chars (N); - Error_Msg_Sloc := Sloc (A); - - Error_Msg_NE ("aspect% for & previously given#", N, U_Ent); + if (Chars (N) = Name_Size + and then Check_One_Attr (Name_Value_Size, Name_Size)) + or else + (Chars (N) = Name_Value_Size + and then Check_One_Attr (Name_Size, Name_Value_Size)) + then return True; end if; - return False; + -- Normal case (including Size and Value_Size) + + return Check_One_Attr (Chars (N), Chars (N)); end Duplicate_Clause; -- Start of processing for Analyze_Attribute_Definition_Clause -- cgit v1.1 From 5cb3843bca9a28c28dbc1fafd88c144a43e141df Mon Sep 17 00:00:00 2001 From: Eric Botcazou Date: Fri, 18 Jun 2021 16:47:48 +0200 Subject: [Ada] Add DWARF 5 support to System.Dwarf_Line gcc/ada/ * libgnat/s-dwalin.ads: Adjust a few comments left and right. (Line_Info_Register): Comment out unused components. (Line_Info_Header): Add DWARF 5 support. (Dwarf_Context): Likewise. Rename "prologue" into "header". * libgnat/s-dwalin.adb: Alphabetize "with" clauses. (DWARF constants): Add DWARF 5 support and reorder. (For_Each_Row): Adjust. (Initialize_Pass): Likewise. (Initialize_State_Machine): Likewise and fix typo. (Open): Add DWARF 5 support. (Parse_Prologue): Rename into... (Parse_Header): ...this and add DWARF 5 support. (Read_And_Execute_Isn): Rename into... (Read_And_Execute_Insn): ...this and adjust. (To_File_Name): Change parameter name and add DWARF 5 support. (Read_Entry_Format_Array): New procedure. (Skip_Form): Add DWARF 5 support and reorder. (Seek_Abbrev): Do not count entries and add DWARF 5 support. (Debug_Info_Lookup): Add DWARF 5 support. (Symbolic_Address.Set_Result): Likewise. (Symbolic_Address): Adjust. --- gcc/ada/libgnat/s-dwalin.adb | 842 ++++++++++++++++++++++++++++++------------- gcc/ada/libgnat/s-dwalin.ads | 124 ++++--- 2 files changed, 653 insertions(+), 313 deletions(-) (limited to 'gcc') diff --git a/gcc/ada/libgnat/s-dwalin.adb b/gcc/ada/libgnat/s-dwalin.adb index 55989c5..4a9d538 100644 --- a/gcc/ada/libgnat/s-dwalin.adb +++ b/gcc/ada/libgnat/s-dwalin.adb @@ -30,20 +30,20 @@ ------------------------------------------------------------------------------ with Ada.Characters.Handling; +with Ada.Containers.Generic_Array_Sort; with Ada.Exceptions.Traceback; use Ada.Exceptions.Traceback; with Ada.Unchecked_Deallocation; -with Ada.Containers.Generic_Array_Sort; with Interfaces; use Interfaces; with System; use System; -with System.Storage_Elements; use System.Storage_Elements; with System.Address_Image; +with System.Bounded_Strings; use System.Bounded_Strings; with System.IO; use System.IO; +with System.Mmap; use System.Mmap; with System.Object_Reader; use System.Object_Reader; with System.Traceback_Entries; use System.Traceback_Entries; -with System.Mmap; use System.Mmap; -with System.Bounded_Strings; use System.Bounded_Strings; +with System.Storage_Elements; use System.Storage_Elements; package body System.Dwarf_Lines is @@ -60,13 +60,19 @@ package body System.Dwarf_Lines is (S : in out Mapped_Stream; Len : out Offset; Is64 : out Boolean); - -- Read initial length as specified by Dwarf-4 7.2.2 + -- Read initial length as specified by 7.2.2 procedure Read_Section_Offset (S : in out Mapped_Stream; Len : out Offset; Is64 : Boolean); - -- Read a section offset, as specified by Dwarf-4 7.4 + -- Read a section offset, as specified by 7.4 + + procedure Read_Entry_Format_Array + (S : in out Mapped_Stream; + A : out Entry_Format_Array; + Len : uint8); + -- Read an entry format array, as specified by 6.2.4.1 procedure Read_Aranges_Entry (C : in out Dwarf_Context; @@ -111,24 +117,24 @@ package body System.Dwarf_Lines is -- a compilation unit. procedure Initialize_Pass (C : in out Dwarf_Context); - -- Seek to the first byte of the first prologue and prepare to make a pass + -- Seek to the first byte of the first header and prepare to make a pass -- over the line number entries. procedure Initialize_State_Machine (C : in out Dwarf_Context); -- Set all state machine registers to their specified initial values - procedure Parse_Prologue (C : in out Dwarf_Context); - -- Decode a DWARF statement program prologue + procedure Parse_Header (C : in out Dwarf_Context); + -- Decode a DWARF statement program header - procedure Read_And_Execute_Isn + procedure Read_And_Execute_Insn (C : in out Dwarf_Context; Done : out Boolean); -- Read an execute a statement program instruction function To_File_Name (C : in out Dwarf_Context; - Code : uint32) return String; - -- Extract a file name from the prologue + File : uint32) return String; + -- Extract a file name from the header type Callback is not null access procedure (C : in out Dwarf_Context); procedure For_Each_Row (C : in out Dwarf_Context; F : Callback); @@ -158,8 +164,25 @@ package body System.Dwarf_Lines is -- DWARF constants -- ----------------------- + -- 3.1.1 Full and Partial Compilation Unit Entries + + DW_TAG_Compile_Unit : constant := 16#11#; + + DW_AT_Stmt_List : constant := 16#10#; + + -- 6.2.4.1 Standard Content Descriptions (DWARF 5) + + DW_LNCT_path : constant := 1; + DW_LNCT_directory_index : constant := 2; + -- DW_LNCT_timestamp : constant := 3; + -- DW_LNCT_size : constant := 4; + DW_LNCT_MD5 : constant := 5; + DW_LNCT_lo_user : constant := 16#2000#; + DW_LNCT_hi_user : constant := 16#3fff#; + -- 6.2.5.2 Standard Opcodes + DW_LNS_extended_op : constant := 0; DW_LNS_copy : constant := 1; DW_LNS_advance_pc : constant := 2; DW_LNS_advance_line : constant := 3; @@ -175,45 +198,56 @@ package body System.Dwarf_Lines is -- 6.2.5.3 Extended Opcodes - DW_LNE_end_sequence : constant := 1; - DW_LNE_set_address : constant := 2; - DW_LNE_define_file : constant := 3; - - -- From the DWARF version 4 public review draft - + DW_LNE_end_sequence : constant := 1; + DW_LNE_set_address : constant := 2; + DW_LNE_define_file : constant := 3; DW_LNE_set_discriminator : constant := 4; - -- Attribute encodings - - DW_TAG_Compile_Unit : constant := 16#11#; - - DW_AT_Stmt_List : constant := 16#10#; - - DW_FORM_addr : constant := 16#01#; - DW_FORM_block2 : constant := 16#03#; - DW_FORM_block4 : constant := 16#04#; - DW_FORM_data2 : constant := 16#05#; - DW_FORM_data4 : constant := 16#06#; - DW_FORM_data8 : constant := 16#07#; - DW_FORM_string : constant := 16#08#; - DW_FORM_block : constant := 16#09#; - DW_FORM_block1 : constant := 16#0a#; - DW_FORM_data1 : constant := 16#0b#; - DW_FORM_flag : constant := 16#0c#; - DW_FORM_sdata : constant := 16#0d#; - DW_FORM_strp : constant := 16#0e#; - DW_FORM_udata : constant := 16#0f#; - DW_FORM_ref_addr : constant := 16#10#; - DW_FORM_ref1 : constant := 16#11#; - DW_FORM_ref2 : constant := 16#12#; - DW_FORM_ref4 : constant := 16#13#; - DW_FORM_ref8 : constant := 16#14#; - DW_FORM_ref_udata : constant := 16#15#; - DW_FORM_indirect : constant := 16#16#; - DW_FORM_sec_offset : constant := 16#17#; - DW_FORM_exprloc : constant := 16#18#; - DW_FORM_flag_present : constant := 16#19#; - DW_FORM_ref_sig8 : constant := 16#20#; + -- 7.5.5 Classes and Forms + + DW_FORM_addr : constant := 16#01#; + DW_FORM_block2 : constant := 16#03#; + DW_FORM_block4 : constant := 16#04#; + DW_FORM_data2 : constant := 16#05#; + DW_FORM_data4 : constant := 16#06#; + DW_FORM_data8 : constant := 16#07#; + DW_FORM_string : constant := 16#08#; + DW_FORM_block : constant := 16#09#; + DW_FORM_block1 : constant := 16#0a#; + DW_FORM_data1 : constant := 16#0b#; + DW_FORM_flag : constant := 16#0c#; + DW_FORM_sdata : constant := 16#0d#; + DW_FORM_strp : constant := 16#0e#; + DW_FORM_udata : constant := 16#0f#; + DW_FORM_ref_addr : constant := 16#10#; + DW_FORM_ref1 : constant := 16#11#; + DW_FORM_ref2 : constant := 16#12#; + DW_FORM_ref4 : constant := 16#13#; + DW_FORM_ref8 : constant := 16#14#; + DW_FORM_ref_udata : constant := 16#15#; + DW_FORM_indirect : constant := 16#16#; + DW_FORM_sec_offset : constant := 16#17#; + DW_FORM_exprloc : constant := 16#18#; + DW_FORM_flag_present : constant := 16#19#; + DW_FORM_strx : constant := 16#1a#; + DW_FORM_addrx : constant := 16#1b#; + DW_FORM_ref_sup4 : constant := 16#1c#; + DW_FORM_strp_sup : constant := 16#1d#; + DW_FORM_data16 : constant := 16#1e#; + DW_FORM_line_strp : constant := 16#1f#; + DW_FORM_ref_sig8 : constant := 16#20#; + DW_FORM_implicit_const : constant := 16#21#; + DW_FORM_loclistx : constant := 16#22#; + DW_FORM_rnglistx : constant := 16#23#; + DW_FORM_ref_sup8 : constant := 16#24#; + DW_FORM_strx1 : constant := 16#25#; + DW_FORM_strx2 : constant := 16#26#; + DW_FORM_strx3 : constant := 16#27#; + DW_FORM_strx4 : constant := 16#28#; + DW_FORM_addrx1 : constant := 16#29#; + DW_FORM_addrx2 : constant := 16#2a#; + DW_FORM_addrx3 : constant := 16#2b#; + DW_FORM_addrx4 : constant := 16#2c#; --------- -- "<" -- @@ -235,6 +269,7 @@ package body System.Dwarf_Lines is procedure Unchecked_Deallocation is new Ada.Unchecked_Deallocation (Search_Array, Search_Array_Access); + begin if C.Has_Debug then Close (C.Lines); @@ -265,6 +300,7 @@ package body System.Dwarf_Lines is procedure Dump_Row (C : in out Dwarf_Context) is PC : constant Integer_Address := Integer_Address (C.Registers.Address); Off : Offset; + begin Tell (C.Lines, Off); @@ -286,11 +322,13 @@ package body System.Dwarf_Lines is Cache : constant Search_Array_Access := C.Cache; S : Object_Symbol; Name : String_Ptr_Len; + begin if Cache = null then Put_Line ("No cache"); return; end if; + for I in Cache'Range loop declare E : Search_Entry renames Cache (I); @@ -322,7 +360,7 @@ package body System.Dwarf_Lines is Initialize_Pass (C); loop - Read_And_Execute_Isn (C, Done); + Read_And_Execute_Insn (C, Done); if C.Registers.Is_Row then F.all (C); @@ -339,8 +377,7 @@ package body System.Dwarf_Lines is procedure Initialize_Pass (C : in out Dwarf_Context) is begin Seek (C.Lines, 0); - C.Next_Prologue := 0; - + C.Next_Header := 0; Initialize_State_Machine (C); end Initialize_Pass; @@ -350,17 +387,16 @@ package body System.Dwarf_Lines is procedure Initialize_State_Machine (C : in out Dwarf_Context) is begin + -- Table 6.4: Line number program initial state + C.Registers := (Address => 0, File => 1, Line => 1, Column => 0, - Is_Stmt => C.Prologue.Default_Is_Stmt = 0, + Is_Stmt => C.Header.Default_Is_Stmt /= 0, Basic_Block => False, End_Sequence => False, - Prologue_End => False, - Epilogue_Begin => False, - ISA => 0, Is_Row => False); end Initialize_State_Machine; @@ -378,8 +414,7 @@ package body System.Dwarf_Lines is -- Low_Address -- ----------------- - function Low_Address (C : Dwarf_Context) - return System.Address is + function Low_Address (C : Dwarf_Context) return System.Address is begin return C.Load_Address + C.Low; end Low_Address; @@ -393,19 +428,24 @@ package body System.Dwarf_Lines is C : out Dwarf_Context; Success : out Boolean) is - Line_Sec, Info_Sec, Abbrev_Sec, Aranges_Sec : Object_Section; - Hi, Lo : uint64; + Abbrev, Aranges, Lines, Info, Line_Str : Object_Section; + Hi, Lo : uint64; + begin -- Not a success by default Success := False; - -- Open file + -- Open file with In_Exception set so we can control the failure mode - C.Obj := Open (File_Name, C.In_Exception); + C.Obj := Open (File_Name, In_Exception => True); if C.Obj = null then - return; + if C.In_Exception then + return; + else + raise Dwarf_Error with "could not open file"; + end if; end if; Success := True; @@ -420,21 +460,23 @@ package body System.Dwarf_Lines is -- Create a stream for debug sections if Format (C.Obj.all) = XCOFF32 then - Line_Sec := Get_Section (C.Obj.all, ".dwline"); - Abbrev_Sec := Get_Section (C.Obj.all, ".dwabrev"); - Info_Sec := Get_Section (C.Obj.all, ".dwinfo"); - Aranges_Sec := Get_Section (C.Obj.all, ".dwarnge"); + Abbrev := Get_Section (C.Obj.all, ".dwabrev"); + Aranges := Get_Section (C.Obj.all, ".dwarnge"); + Info := Get_Section (C.Obj.all, ".dwinfo"); + Lines := Get_Section (C.Obj.all, ".dwline"); + Line_Str := Get_Section (C.Obj.all, ".dwlistr"); else - Line_Sec := Get_Section (C.Obj.all, ".debug_line"); - Abbrev_Sec := Get_Section (C.Obj.all, ".debug_abbrev"); - Info_Sec := Get_Section (C.Obj.all, ".debug_info"); - Aranges_Sec := Get_Section (C.Obj.all, ".debug_aranges"); + Abbrev := Get_Section (C.Obj.all, ".debug_abbrev"); + Aranges := Get_Section (C.Obj.all, ".debug_aranges"); + Info := Get_Section (C.Obj.all, ".debug_info"); + Lines := Get_Section (C.Obj.all, ".debug_line"); + Line_Str := Get_Section (C.Obj.all, ".debug_line_str"); end if; - if Line_Sec = Null_Section - or else Abbrev_Sec = Null_Section - or else Info_Sec = Null_Section - or else Aranges_Sec = Null_Section + if Abbrev = Null_Section + or else Aranges = Null_Section + or else Info = Null_Section + or else Lines = Null_Section then pragma Annotate (CodePeer, False_Positive, @@ -444,21 +486,29 @@ package body System.Dwarf_Lines is return; end if; - C.Lines := Create_Stream (C.Obj.all, Line_Sec); - C.Abbrev := Create_Stream (C.Obj.all, Abbrev_Sec); - C.Info := Create_Stream (C.Obj.all, Info_Sec); - C.Aranges := Create_Stream (C.Obj.all, Aranges_Sec); + C.Abbrev := Create_Stream (C.Obj.all, Abbrev); + C.Aranges := Create_Stream (C.Obj.all, Aranges); + C.Info := Create_Stream (C.Obj.all, Info); + C.Lines := Create_Stream (C.Obj.all, Lines); + + -- The .debug_line_str section may be available in DWARF 5 + + if Line_Str /= Null_Section then + C.Line_Str := Create_Stream (C.Obj.all, Line_Str); + end if; -- All operations are successful, context is valid C.Has_Debug := True; end Open; - -------------------- - -- Parse_Prologue -- - -------------------- + ------------------ + -- Parse_Header -- + ------------------ + + procedure Parse_Header (C : in out Dwarf_Context) is + Header : Line_Info_Header renames C.Header; - procedure Parse_Prologue (C : in out Dwarf_Context) is Char : uint8; Prev : uint8; -- The most recently read character and the one preceding it @@ -469,94 +519,147 @@ package body System.Dwarf_Lines is Buf : Buffer; Off : Offset; - First_Byte_Of_Prologue : Offset; - Last_Byte_Of_Prologue : Offset; - - Max_Op_Per_Insn : uint8; - pragma Unreferenced (Max_Op_Per_Insn); + First_Byte_Of_Header : Offset; + Last_Byte_Of_Header : Offset; - Prologue : Line_Info_Prologue renames C.Prologue; + Standard_Opcode_Lengths : Opcode_Length_Array; + pragma Unreferenced (Standard_Opcode_Lengths); begin - Tell (C.Lines, First_Byte_Of_Prologue); - Prologue.Unit_Length := Read (C.Lines); + Tell (C.Lines, First_Byte_Of_Header); + + Read_Initial_Length (C.Lines, Header.Unit_Length, Header.Is64); + Tell (C.Lines, Off); - C.Next_Prologue := Off + Offset (Prologue.Unit_Length); + C.Next_Header := Off + Header.Unit_Length; + + Header.Version := Read (C.Lines); + + if Header.Version >= 5 then + Header.Address_Size := Read (C.Lines); + Header.Segment_Selector_Size := Read (C.Lines); + else + Header.Address_Size := 0; + Header.Segment_Selector_Size := 0; + end if; - Prologue.Version := Read (C.Lines); - Prologue.Prologue_Length := Read (C.Lines); - Tell (C.Lines, Last_Byte_Of_Prologue); - Last_Byte_Of_Prologue := - Last_Byte_Of_Prologue + Offset (Prologue.Prologue_Length) - 1; + Header.Header_Length := Read (C.Lines); + Tell (C.Lines, Last_Byte_Of_Header); + Last_Byte_Of_Header := + Last_Byte_Of_Header + Offset (Header.Header_Length) - 1; - Prologue.Min_Isn_Length := Read (C.Lines); + Header.Minimum_Insn_Length := Read (C.Lines); - if Prologue.Version >= 4 then - Max_Op_Per_Insn := Read (C.Lines); + if Header.Version >= 4 then + Header.Maximum_Op_Per_Insn := Read (C.Lines); + else + Header.Maximum_Op_Per_Insn := 0; end if; - Prologue.Default_Is_Stmt := Read (C.Lines); - Prologue.Line_Base := Read (C.Lines); - Prologue.Line_Range := Read (C.Lines); - Prologue.Opcode_Base := Read (C.Lines); + Header.Default_Is_Stmt := Read (C.Lines); + Header.Line_Base := Read (C.Lines); + Header.Line_Range := Read (C.Lines); + Header.Opcode_Base := Read (C.Lines); - -- Opcode_Lengths is an array of Opcode_Base bytes specifying the number - -- of LEB128 operands for each of the standard opcodes. + -- Standard_Opcode_Lengths is an array of Opcode_Base bytes specifying + -- the number of LEB128 operands for each of the standard opcodes. - for J in 1 .. uint32 (Prologue.Opcode_Base - 1) loop - Prologue.Opcode_Lengths (J) := Read (C.Lines); + for J in 1 .. Integer (Header.Opcode_Base - 1) loop + Standard_Opcode_Lengths (J) := Read (C.Lines); end loop; - -- The include directories table follows. This is a list of null - -- terminated strings terminated by a double null. We only store - -- its offset for later decoding. + -- The directories table follows. Up to DWARF 4, this is a list of null + -- terminated strings terminated by a null byte. In DWARF 5, this is a + -- sequence of Directories_Count entries encoded as described by the + -- Directory_Entry_Format field. We store its offset for later decoding. - Tell (C.Lines, Prologue.Includes_Offset); - Char := Read (C.Lines); + if Header.Version <= 4 then + Tell (C.Lines, Header.Directories); + Char := Read (C.Lines); - if Char /= 0 then - loop - Prev := Char; - Char := Read (C.Lines); - exit when Char = 0 and Prev = 0; + if Char /= 0 then + loop + Prev := Char; + Char := Read (C.Lines); + exit when Char = 0 and Prev = 0; + end loop; + end if; + + else + Header.Directory_Entry_Format_Count := Read (C.Lines); + Read_Entry_Format_Array (C.Lines, + Header.Directory_Entry_Format, + Header.Directory_Entry_Format_Count); + + Header.Directories_Count := Read_LEB128 (C.Lines); + Tell (C.Lines, Header.Directories); + for J in 1 .. Header.Directories_Count loop + for K in 1 .. Integer (Header.Directory_Entry_Format_Count) loop + Skip_Form (C.Lines, + Header.Directory_Entry_Format (K).Form, + Header.Is64, + Header.Address_Size); + end loop; end loop; end if; - -- The file_names table is next. Each record is a null terminated string - -- for the file name, an unsigned LEB128 directory index, an unsigned - -- LEB128 modification time, and an LEB128 file length. The table is - -- terminated by a null byte. + -- The file_names table is next. Up to DWARF 4, this is a list of record + -- containing a null terminated string for the file name, an unsigned + -- LEB128 directory index in the Directories table, an unsigned LEB128 + -- modification time, and an unsigned LEB128 for the file length; the + -- table is terminated by a null byte. In DWARF 5, this is a sequence + -- of File_Names_Count entries encoded as described by the + -- File_Name_Entry_Format field. We store its offset for later decoding. - Tell (C.Lines, Prologue.File_Names_Offset); + if Header.Version <= 4 then + Tell (C.Lines, Header.File_Names); - loop - -- Read the filename + -- Read the file names - Read_C_String (C.Lines, Buf); - exit when Buf (0) = 0; - Dummy := Read_LEB128 (C.Lines); -- Skip the directory index. - Dummy := Read_LEB128 (C.Lines); -- Skip the modification time. - Dummy := Read_LEB128 (C.Lines); -- Skip the file length. - end loop; + loop + Read_C_String (C.Lines, Buf); + exit when Buf (0) = 0; + Dummy := Read_LEB128 (C.Lines); -- Skip the directory index. + Dummy := Read_LEB128 (C.Lines); -- Skip the modification time. + Dummy := Read_LEB128 (C.Lines); -- Skip the file length. + end loop; + + else + Header.File_Name_Entry_Format_Count := Read (C.Lines); + Read_Entry_Format_Array (C.Lines, + Header.File_Name_Entry_Format, + Header.File_Name_Entry_Format_Count); + + Header.File_Names_Count := Read_LEB128 (C.Lines); + Tell (C.Lines, Header.File_Names); + for J in 1 .. Header.File_Names_Count loop + for K in 1 .. Integer (Header.File_Name_Entry_Format_Count) loop + Skip_Form (C.Lines, + Header.File_Name_Entry_Format (K).Form, + Header.Is64, + Header.Address_Size); + end loop; + end loop; + end if; -- Check we're where we think we are. This sanity check ensures we think - -- the prologue ends where the prologue says it does. It we aren't then - -- we've probably gotten out of sync somewhere. + -- the header ends where the header says it does. It we aren't, then we + -- have probably gotten out of sync somewhere. Tell (C.Lines, Off); - if Prologue.Unit_Length /= 0 - and then Off /= Last_Byte_Of_Prologue + 1 + if Header.Unit_Length /= 0 + and then Off /= Last_Byte_Of_Header + 1 then - raise Dwarf_Error with "Parse error reading DWARF information"; + raise Dwarf_Error with "parse error reading DWARF information"; end if; - end Parse_Prologue; + end Parse_Header; - -------------------------- - -- Read_And_Execute_Isn -- - -------------------------- + --------------------------- + -- Read_And_Execute_Insn -- + --------------------------- - procedure Read_And_Execute_Isn + procedure Read_And_Execute_Insn (C : in out Dwarf_Context; Done : out Boolean) is @@ -572,7 +675,7 @@ package body System.Dwarf_Lines is Obj : Object_File renames C.Obj.all; Registers : Line_Info_Registers renames C.Registers; - Prologue : Line_Info_Prologue renames C.Prologue; + Header : Line_Info_Header renames C.Header; begin Done := False; @@ -582,8 +685,8 @@ package body System.Dwarf_Lines is Initialize_State_Machine (C); end if; - -- If we have reached the next prologue, read it. Beware of possibly - -- empty blocks. + -- If we have reached the next header, read it. Beware of possibly empty + -- blocks. -- When testing for the end of section, beware of possible zero padding -- at the end. Bail out as soon as there's not even room for at least a @@ -592,9 +695,9 @@ package body System.Dwarf_Lines is -- or Off+3 > Section_Length. Tell (C.Lines, Off); - while Off = C.Next_Prologue loop + while Off = C.Next_Header loop Initialize_State_Machine (C); - Parse_Prologue (C); + Parse_Header (C); Tell (C.Lines, Off); exit when Off + 3 > Length (C.Lines); end loop; @@ -606,7 +709,7 @@ package body System.Dwarf_Lines is -- We are finished when we either reach the end of the section, or we -- have reached zero padding at the end of the section. - if Prologue.Unit_Length = 0 or else Off + 3 > Length (C.Lines) then + if Header.Unit_Length = 0 or else Off + 3 > Length (C.Lines) then Done := True; return; end if; @@ -617,7 +720,7 @@ package body System.Dwarf_Lines is -- Extended opcodes - if Opcode = 0 then + if Opcode = DW_LNS_extended_op then Extended_Length := Read_LEB128 (C.Lines); Extended_Opcode := Read (C.Lines); @@ -656,7 +759,7 @@ package body System.Dwarf_Lines is -- Standard opcodes - elsif Opcode < Prologue.Opcode_Base then + elsif Opcode < Header.Opcode_Base then case Opcode is -- Append a row to the line info matrix @@ -671,7 +774,7 @@ package body System.Dwarf_Lines is uint32_Operand := Read_LEB128 (C.Lines); Registers.Address := Registers.Address + - uint64 (uint32_Operand * uint32 (Prologue.Min_Isn_Length)); + uint64 (uint32_Operand * uint32 (Header.Minimum_Insn_Length)); -- Add a signed word to the current source line @@ -708,8 +811,8 @@ package body System.Dwarf_Lines is Registers.Address := Registers.Address + uint64 - (((255 - Prologue.Opcode_Base) / Prologue.Line_Range) * - Prologue.Min_Isn_Length); + (((255 - Header.Opcode_Base) / Header.Line_Range) * + Header.Minimum_Insn_Length); -- Advance the program counter by a constant @@ -744,7 +847,7 @@ package body System.Dwarf_Lines is Line_Increment : int32; begin - Opcode := Opcode - Prologue.Opcode_Base; + Opcode := Opcode - Header.Opcode_Base; -- The adjusted opcode is a uint8 encoding an address increment -- and a signed line increment. The upperbound is allowed to be @@ -752,18 +855,16 @@ package body System.Dwarf_Lines is -- prevent overflows. Address_Increment := - int32 (Opcode / Prologue.Line_Range) * - int32 (Prologue.Min_Isn_Length); + int32 (Opcode / Header.Line_Range) * + int32 (Header.Minimum_Insn_Length); Line_Increment := - int32 (Prologue.Line_Base) + - int32 (Opcode mod Prologue.Line_Range); + int32 (Header.Line_Base) + + int32 (Opcode mod Header.Line_Range); Registers.Address := Registers.Address + uint64 (Address_Increment); Registers.Line := uint32 (int32 (Registers.Line) + Line_Increment); Registers.Basic_Block := False; - Registers.Prologue_End := False; - Registers.Epilogue_Begin := False; Registers.Is_Row := True; end; end if; @@ -775,7 +876,7 @@ package body System.Dwarf_Lines is Registers.Is_Row := False; Done := True; - end Read_And_Execute_Isn; + end Read_And_Execute_Insn; ---------------------- -- Set_Load_Address -- @@ -792,10 +893,10 @@ package body System.Dwarf_Lines is function To_File_Name (C : in out Dwarf_Context; - Code : uint32) return String + File : uint32) return String is Buf : Buffer; - J : uint32; + Off : Offset; Dir_Idx : uint32; pragma Unreferenced (Dir_Idx); @@ -806,25 +907,56 @@ package body System.Dwarf_Lines is Length : uint32; pragma Unreferenced (Length); + File_Entry_Format : Entry_Format_Array + renames C.Header.File_Name_Entry_Format; + begin - Seek (C.Lines, C.Prologue.File_Names_Offset); + Seek (C.Lines, C.Header.File_Names); - -- Find the entry + -- Find the entry. Note that, up to DWARF 4, the index is 1-based + -- whereas, in DWARF 5, it is 0-based. - J := 0; - loop - J := J + 1; - Read_C_String (C.Lines, Buf); + if C.Header.Version <= 4 then + for J in 1 .. File loop + Read_C_String (C.Lines, Buf); - if Buf (Buf'First) = 0 then - return "???"; - end if; + if Buf (Buf'First) = 0 then + return "???"; + end if; - Dir_Idx := Read_LEB128 (C.Lines); - Mod_Time := Read_LEB128 (C.Lines); - Length := Read_LEB128 (C.Lines); - exit when J = Code; - end loop; + Dir_Idx := Read_LEB128 (C.Lines); + Mod_Time := Read_LEB128 (C.Lines); + Length := Read_LEB128 (C.Lines); + end loop; + + -- DWARF 5 + + else + for J in 0 .. File loop + for K in 1 .. Integer (C.Header.File_Name_Entry_Format_Count) loop + if File_Entry_Format (K).C_Type = DW_LNCT_path then + case File_Entry_Format (K).Form is + when DW_FORM_string => + Read_C_String (C.Lines, Buf); + + when DW_FORM_line_strp => + Read_Section_Offset (C.Lines, Off, C.Header.Is64); + Seek (C.Line_Str, Off); + Read_C_String (C.Line_Str, Buf); + + when others => + raise Dwarf_Error with "DWARF form not implemented"; + end case; + + else + Skip_Form (C.Lines, + File_Entry_Format (K).Form, + C.Header.Is64, + C.Header.Address_Size); + end if; + end loop; + end loop; + end if; return To_String (Buf); end To_File_Name; @@ -840,6 +972,7 @@ package body System.Dwarf_Lines is is Len32 : uint32; Len64 : uint64; + begin Len32 := Read (S); if Len32 < 16#ffff_fff0# then @@ -872,6 +1005,43 @@ package body System.Dwarf_Lines is end if; end Read_Section_Offset; + ----------------------------- + -- Read_Entry_Format_Array -- + ----------------------------- + + procedure Read_Entry_Format_Array + (S : in out Mapped_Stream; + A : out Entry_Format_Array; + Len : uint8) + is + C_Type, Form : uint32; + N : Integer; + + begin + N := A'First; + + for J in 1 .. Len loop + C_Type := Read_LEB128 (S); + Form := Read_LEB128 (S); + + case C_Type is + when DW_LNCT_path .. DW_LNCT_MD5 => + if N not in A'Range then + raise Dwarf_Error with "DWARF duplicate content type"; + end if; + + A (N) := (C_Type, Form); + N := N + 1; + + when DW_LNCT_lo_user .. DW_LNCT_hi_user => + null; + + when others => + raise Dwarf_Error with "DWARF content type not implemented"; + end case; + end loop; + end Read_Entry_Format_Array; + -------------------- -- Aranges_Lookup -- -------------------- @@ -921,31 +1091,53 @@ package body System.Dwarf_Lines is Ptr_Sz : uint8) is Skip : Offset; + begin + -- 7.5.5 Classes and Forms + case Form is when DW_FORM_addr => Skip := Offset (Ptr_Sz); + when DW_FORM_addrx => + Skip := Offset (uint32'(Read_LEB128 (S))); + when DW_FORM_block1 => + Skip := Offset (uint8'(Read (S))); when DW_FORM_block2 => Skip := Offset (uint16'(Read (S))); when DW_FORM_block4 => Skip := Offset (uint32'(Read (S))); - when DW_FORM_data2 | DW_FORM_ref2 => - Skip := 2; - when DW_FORM_data4 | DW_FORM_ref4 => - Skip := 4; - when DW_FORM_data8 | DW_FORM_ref8 | DW_FORM_ref_sig8 => - Skip := 8; - when DW_FORM_string => - while uint8'(Read (S)) /= 0 loop - null; - end loop; - return; when DW_FORM_block | DW_FORM_exprloc => Skip := Offset (uint32'(Read_LEB128 (S))); - when DW_FORM_block1 | DW_FORM_ref1 => - Skip := Offset (uint8'(Read (S))); - when DW_FORM_data1 | DW_FORM_flag => + when DW_FORM_addrx1 + | DW_FORM_data1 + | DW_FORM_flag + | DW_FORM_ref1 + | DW_FORM_strx1 + => Skip := 1; + when DW_FORM_addrx2 + | DW_FORM_data2 + | DW_FORM_ref2 + | DW_FORM_strx2 + => + Skip := 2; + when DW_FORM_addrx3 | DW_FORM_strx3 => + Skip := 3; + when DW_FORM_addrx4 + | DW_FORM_data4 + | DW_FORM_ref4 + | DW_FORM_ref_sup4 + | DW_FORM_strx4 + => + Skip := 4; + when DW_FORM_data8 + | DW_FORM_ref8 + | DW_FORM_ref_sup8 + | DW_FORM_ref_sig8 + => + Skip := 8; + when DW_FORM_data16 => + Skip := 16; when DW_FORM_sdata => declare Val : constant int32 := Read_LEB128 (S); @@ -953,9 +1145,12 @@ package body System.Dwarf_Lines is begin return; end; - when DW_FORM_strp | DW_FORM_ref_addr | DW_FORM_sec_offset => - Skip := (if Is64 then 8 else 4); - when DW_FORM_udata | DW_FORM_ref_udata => + when DW_FORM_udata + | DW_FORM_ref_udata + | DW_FORM_loclistx + | DW_FORM_rnglistx + | DW_FORM_strx + => declare Val : constant uint32 := Read_LEB128 (S); pragma Unreferenced (Val); @@ -964,11 +1159,24 @@ package body System.Dwarf_Lines is end; when DW_FORM_flag_present => return; - when DW_FORM_indirect => + when DW_FORM_ref_addr + | DW_FORM_sec_offset + | DW_FORM_strp + | DW_FORM_line_strp + | DW_FORM_strp_sup + => + Skip := (if Is64 then 8 else 4); + when DW_FORM_string => + while uint8'(Read (S)) /= 0 loop + null; + end loop; + return; + when DW_FORM_implicit_const | DW_FORM_indirect => raise Constraint_Error; when others => raise Constraint_Error; end case; + Seek (S, Tell (S) + Skip); end Skip_Form; @@ -981,20 +1189,21 @@ package body System.Dwarf_Lines is Abbrev_Offset : Offset; Abbrev_Num : uint32) is - Num : uint32; Abbrev : uint32; Tag : uint32; Has_Child : uint8; - pragma Unreferenced (Abbrev, Tag, Has_Child); + pragma Unreferenced (Tag, Has_Child); + begin Seek (C.Abbrev, Abbrev_Offset); - Num := 1; + -- 7.5.3 Abbreviations Tables loop - exit when Num = Abbrev_Num; + Abbrev := Read_LEB128 (C.Abbrev); + + exit when Abbrev = Abbrev_Num; - Abbrev := Read_LEB128 (C.Abbrev); Tag := Read_LEB128 (C.Abbrev); Has_Child := Read (C.Abbrev); @@ -1002,12 +1211,19 @@ package body System.Dwarf_Lines is declare Name : constant uint32 := Read_LEB128 (C.Abbrev); Form : constant uint32 := Read_LEB128 (C.Abbrev); + Cst : int32; + pragma Unreferenced (Cst); + begin - exit when Name = 0 and Form = 0; + -- DW_FORM_implicit_const takes its value from the table + + if Form = DW_FORM_implicit_const then + Cst := Read_LEB128 (C.Abbrev); + end if; + + exit when Name = 0 and then Form = 0; end; end loop; - - Num := Num + 1; end loop; end Seek_Abbrev; @@ -1029,23 +1245,40 @@ package body System.Dwarf_Lines is Abbrev : uint32; Has_Child : uint8; pragma Unreferenced (Has_Child); + Unit_Type : uint8; + pragma Unreferenced (Unit_Type); + begin Line_Offset := 0; Success := False; Seek (C.Info, Info_Offset); + -- 7.5.1.1 Compilation Unit Header + Read_Initial_Length (C.Info, Unit_Length, Is64); Version := Read (C.Info); - if Version not in 2 .. 4 then - return; - end if; - Read_Section_Offset (C.Info, Abbrev_Offset, Is64); + if Version >= 5 then + Unit_Type := Read (C.Info); + + Addr_Sz := Read (C.Info); + if Addr_Sz /= (Address'Size / SSU) then + return; + end if; + + Read_Section_Offset (C.Info, Abbrev_Offset, Is64); + + elsif Version >= 2 then + Read_Section_Offset (C.Info, Abbrev_Offset, Is64); + + Addr_Sz := Read (C.Info); + if Addr_Sz /= (Address'Size / SSU) then + return; + end if; - Addr_Sz := Read (C.Info); - if Addr_Sz /= (Address'Size / SSU) then + else return; end if; @@ -1060,17 +1293,9 @@ package body System.Dwarf_Lines is Seek_Abbrev (C, Abbrev_Offset, Abbrev); - -- First ULEB128 is the abbrev code - - if Read_LEB128 (C.Abbrev) /= Abbrev then - -- Ill formed abbrev table - return; - end if; - -- Then the tag if Read_LEB128 (C.Abbrev) /= uint32'(DW_TAG_Compile_Unit) then - -- Expect compile unit return; end if; @@ -1104,8 +1329,6 @@ package body System.Dwarf_Lines is end if; end; end loop; - - return; end Debug_Info_Lookup; ------------------------- @@ -1121,6 +1344,7 @@ package body System.Dwarf_Lines is Is64 : Boolean; Version : uint16; Sz : uint8; + begin Success := False; Info_Offset := 0; @@ -1149,6 +1373,7 @@ package body System.Dwarf_Lines is end if; -- Handle alignment on twice the address size + declare Cur_Off : constant Offset := Tell (C.Aranges); Align : constant Offset := 2 * Address'Size / SSU; @@ -1173,6 +1398,7 @@ package body System.Dwarf_Lines is is begin -- Read table + if Address'Size = 32 then declare S, L : uint32; @@ -1182,6 +1408,7 @@ package body System.Dwarf_Lines is Start := Storage_Offset (S); Len := Storage_Count (L); end; + elsif Address'Size = 64 then declare S, L : uint64; @@ -1191,6 +1418,7 @@ package body System.Dwarf_Lines is Start := Storage_Offset (S); Len := Storage_Count (L); end; + else raise Constraint_Error; end if; @@ -1202,8 +1430,11 @@ package body System.Dwarf_Lines is procedure Enable_Cache (C : in out Dwarf_Context) is Cache : Search_Array_Access; + begin - -- Phase 1: count number of symbols. Phase 2: fill the cache. + -- Phase 1: count number of symbols. + -- Phase 2: fill the cache. + declare S : Object_Symbol; Val : uint64; @@ -1220,6 +1451,7 @@ package body System.Dwarf_Lines is while S /= Null_Symbol loop -- Discard symbols of length 0 or located outside of the -- execution code section outer boundaries. + Sz := uint32 (Size (S)); Val := Value (S); @@ -1227,11 +1459,11 @@ package body System.Dwarf_Lines is and then Val >= Xcode_Low and then Val <= Xcode_High then - Addr := uint32 (Val - Xcode_Low); -- Try to filter symbols at the same address. This is a best -- effort as they might not be consecutive. + if Addr /= Prev_Addr then Nbr_Symbols := Nbr_Symbols + 1; Prev_Addr := Addr; @@ -1251,6 +1483,7 @@ package body System.Dwarf_Lines is if Phase = 1 then -- Allocate the cache + Cache := new Search_Array (1 .. Nbr_Symbols); C.Cache := Cache; end if; @@ -1258,13 +1491,16 @@ package body System.Dwarf_Lines is pragma Assert (Nbr_Symbols = C.Cache'Last); end; - -- Sort the cache. + -- Sort the cache + Sort_Search_Array (C.Cache.all); -- Set line offsets + if not C.Has_Debug then return; end if; + declare Info_Offset : Offset; Line_Offset : Offset; @@ -1285,6 +1521,7 @@ package body System.Dwarf_Lines is exit when not Success; -- Read table + loop Read_Aranges_Entry (C, Ar_Start, Ar_Len); exit when Ar_Start = 0 and Ar_Len = 0; @@ -1293,6 +1530,7 @@ package body System.Dwarf_Lines is Start := uint32 (Ar_Start - C.Low); -- Search START in the array + First := Cache'First; Last := Cache'Last; Mid := First; -- In case of array with one element @@ -1307,9 +1545,10 @@ package body System.Dwarf_Lines is end if; end loop; - -- Fill info. + -- Fill info -- There can be overlapping symbols + while Mid > Cache'First and then Cache (Mid - 1).First <= Start and then Cache (Mid - 1).First + Cache (Mid - 1).Size > Start @@ -1321,9 +1560,11 @@ package body System.Dwarf_Lines is and then Start + Len > Cache (Mid).First then -- MID is within the bounds + Cache (Mid).Line := uint32 (Line_Offset); elsif Start + Len <= Cache (Mid).First then -- Over + exit; end if; Mid := Mid + 1; @@ -1350,7 +1591,7 @@ package body System.Dwarf_Lines is procedure Set_Result (Match : Line_Info_Registers) is Dir_Idx : uint32; - J : uint32; + Off : Offset; Mod_Time : uint32; pragma Unreferenced (Mod_Time); @@ -1358,46 +1599,123 @@ package body System.Dwarf_Lines is Length : uint32; pragma Unreferenced (Length); + Directory_Entry_Format : Entry_Format_Array + renames C.Header.Directory_Entry_Format; + + File_Entry_Format : Entry_Format_Array + renames C.Header.File_Name_Entry_Format; + begin - Seek (C.Lines, C.Prologue.File_Names_Offset); + Seek (C.Lines, C.Header.File_Names); + Dir_Idx := 0; - -- Find the entry + -- Find the entry. Note that, up to DWARF 4, the index is 1-based + -- whereas, in DWARF 5, it is 0-based. - J := 0; - loop - J := J + 1; - File_Name := Read_C_String (C.Lines); + if C.Header.Version <= 4 then + for J in 1 .. Match.File loop + File_Name := Read_C_String (C.Lines); - if File_Name (File_Name'First) = ASCII.NUL then - -- End of file list, so incorrect entry - return; - end if; + if File_Name (File_Name'First) = ASCII.NUL then + -- End of file list, so incorrect entry + return; + end if; - Dir_Idx := Read_LEB128 (C.Lines); - Mod_Time := Read_LEB128 (C.Lines); - Length := Read_LEB128 (C.Lines); - exit when J = Match.File; - end loop; + Dir_Idx := Read_LEB128 (C.Lines); + Mod_Time := Read_LEB128 (C.Lines); + Length := Read_LEB128 (C.Lines); + end loop; + + if Dir_Idx = 0 then + -- No directory + + Dir_Name := null; + + else + Seek (C.Lines, C.Header.Directories); + + for J in 1 .. Dir_Idx loop + Dir_Name := Read_C_String (C.Lines); - if Dir_Idx = 0 then - -- No directory - Dir_Name := null; + if Dir_Name (Dir_Name'First) = ASCII.NUL then + -- End of directory list, so ill-formed table + + return; + end if; + end loop; + end if; + + -- DWARF 5 else - Seek (C.Lines, C.Prologue.Includes_Offset); + for J in 0 .. Match.File loop + for K in 1 .. Integer (C.Header.File_Name_Entry_Format_Count) + loop + if File_Entry_Format (K).C_Type = DW_LNCT_path then + case File_Entry_Format (K).Form is + when DW_FORM_string => + File_Name := Read_C_String (C.Lines); - J := 0; - loop - J := J + 1; - Dir_Name := Read_C_String (C.Lines); + when DW_FORM_line_strp => + Read_Section_Offset (C.Lines, Off, C.Header.Is64); + Seek (C.Line_Str, Off); + File_Name := Read_C_String (C.Line_Str); - if Dir_Name (Dir_Name'First) = ASCII.NUL then - -- End of directory list, so ill-formed table - return; - end if; + when others => + raise Dwarf_Error with "DWARF form not implemented"; + end case; + + elsif File_Entry_Format (K).C_Type = DW_LNCT_directory_index + then + case File_Entry_Format (K).Form is + when DW_FORM_data1 => + Dir_Idx := uint32 (uint8'(Read (C.Lines))); + + when DW_FORM_data2 => + Dir_Idx := uint32 (uint16'(Read (C.Lines))); + + when DW_FORM_udata => + Dir_Idx := Read_LEB128 (C.Lines); - exit when J = Dir_Idx; + when others => + raise Dwarf_Error with "invalid DWARF"; + end case; + else + Skip_Form (C.Lines, + File_Entry_Format (K).Form, + C.Header.Is64, + C.Header.Address_Size); + end if; + end loop; + end loop; + + Seek (C.Lines, C.Header.Directories); + + for J in 0 .. Dir_Idx loop + for K in 1 .. Integer (C.Header.Directory_Entry_Format_Count) + loop + if Directory_Entry_Format (K).C_Type = DW_LNCT_path then + case Directory_Entry_Format (K).Form is + when DW_FORM_string => + Dir_Name := Read_C_String (C.Lines); + + when DW_FORM_line_strp => + Read_Section_Offset (C.Lines, Off, C.Header.Is64); + Seek (C.Line_Str, Off); + Dir_Name := Read_C_String (C.Line_Str); + + when others => + raise Dwarf_Error with "DWARF form not implemented"; + end case; + + else + Skip_Form (C.Lines, + Directory_Entry_Format (K).Form, + C.Header.Is64, + C.Header.Address_Size); + end if; + end loop; end loop; end if; @@ -1414,13 +1732,15 @@ package body System.Dwarf_Lines is begin -- Initialize result + Dir_Name := null; File_Name := null; Subprg_Name := (null, 0); Line_Num := 0; + -- Look up the symbol in the cache + if C.Cache /= null then - -- Look in the cache declare Addr_Off : constant uint32 := uint32 (Addr - C.Low); First, Last, Mid : Natural; @@ -1447,12 +1767,13 @@ package body System.Dwarf_Lines is S := Read_Symbol (C.Obj.all, Offset (C.Cache (Mid).Sym)); Subprg_Name := Object_Reader.Name (C.Obj.all, S); else - -- Not found return; end if; end; + + -- Search for the symbol in the binary + else - -- Search symbol S := First_Symbol (C.Obj.all); while S /= Null_Symbol loop if Spans (S, Addr_Int) then @@ -1479,15 +1800,15 @@ package body System.Dwarf_Lines is end if; Seek (C.Lines, Line_Offset); - C.Next_Prologue := 0; + C.Next_Header := 0; Initialize_State_Machine (C); - Parse_Prologue (C); + Parse_Header (C); Previous_Row.Line := 0; -- Advance to the first entry loop - Read_And_Execute_Isn (C, Done); + Read_And_Execute_Insn (C, Done); if C.Registers.Is_Row then Previous_Row := C.Registers; @@ -1499,8 +1820,8 @@ package body System.Dwarf_Lines is -- Read the rest of the entries - while Tell (C.Lines) < C.Next_Prologue loop - Read_And_Execute_Isn (C, Done); + while Tell (C.Lines) < C.Next_Header loop + Read_And_Execute_Insn (C, Done); if C.Registers.Is_Row then if not Previous_Row.End_Sequence @@ -1533,6 +1854,7 @@ package body System.Dwarf_Lines is return I - Str'First; end if; end loop; + return Str'Last; end String_Length; @@ -1558,6 +1880,7 @@ package body System.Dwarf_Lines is Subprg_Name : String_Ptr_Len; Line_Num : Natural; Off : Natural; + begin if not C.Has_Debug then Symbol_Found := False; @@ -1657,4 +1980,5 @@ package body System.Dwarf_Lines is Append (Res, ASCII.LF); end loop; end Symbolic_Traceback; + end System.Dwarf_Lines; diff --git a/gcc/ada/libgnat/s-dwalin.ads b/gcc/ada/libgnat/s-dwalin.ads index c7bb103..132d3e1 100644 --- a/gcc/ada/libgnat/s-dwalin.ads +++ b/gcc/ada/libgnat/s-dwalin.ads @@ -30,13 +30,10 @@ ------------------------------------------------------------------------------ -- This package provides routines to read DWARF line number information from --- a generic object file with as little overhead as possible. This allows --- conversions from PC addresses to human readable source locations. +-- a binary file with as little overhead as possible. This allows conversions +-- from PC addresses to human-readable source locations. -- --- Objects must be built with debugging information, however only the --- .debug_line section of the object file is referenced. In cases where object --- size is a consideration it's possible to strip all other .debug sections, --- which will decrease the size of the object significantly. +-- Files must be compiled with at least minimal debugging information (-g1). with Ada.Exceptions.Traceback; @@ -50,11 +47,11 @@ package System.Dwarf_Lines is package SOR renames System.Object_Reader; type Dwarf_Context (In_Exception : Boolean := False) is private; - -- Type encapsulation the state of the Dwarf reader. When In_Exception - -- is True we are parsing as part of a exception handler decorator, we do - -- not want an exception to be raised, the parsing is done safely skipping - -- DWARF file that cannot be read or with stripped debug section for - -- example. + -- Type encapsulating the state of the DWARF reader. When In_Exception is + -- True, we are parsing as part of an exception handler decorator so we do + -- not want another exception to be raised and the parsing is done safely, + -- skipping binary files that cannot be read or have been stripped from + -- their debug sections for example. procedure Open (File_Name : String; @@ -65,14 +62,13 @@ package System.Dwarf_Lines is procedure Set_Load_Address (C : in out Dwarf_Context; Addr : Address); -- Set the load address of a file. This is used to rebase PIE (Position - -- Independant Executable) binaries. + -- Independent Executable) binaries. function Is_Inside (C : Dwarf_Context; Addr : Address) return Boolean; pragma Inline (Is_Inside); -- Return true iff a run-time address Addr is within the module - function Low_Address (C : Dwarf_Context) - return System.Address; + function Low_Address (C : Dwarf_Context) return System.Address; pragma Inline (Low_Address); -- Return the lowest address of C, accounting for the module load address @@ -83,7 +79,7 @@ package System.Dwarf_Lines is -- Dump the cache (if present) procedure Enable_Cache (C : in out Dwarf_Context); - -- Read symbols information to speed up Symbolic_Traceback. + -- Read symbol information to speed up Symbolic_Traceback. procedure Symbolic_Traceback (Cin : Dwarf_Context; @@ -102,45 +98,64 @@ package System.Dwarf_Lines is private -- The following section numbers reference - -- "DWARF Debugging Information Format, Version 3" + -- "DWARF Debugging Information Format, Version 5" -- published by the Standards Group, http://freestandards.org. -- 6.2.2 State Machine Registers type Line_Info_Registers is record - Address : SOR.uint64; - File : SOR.uint32; - Line : SOR.uint32; - Column : SOR.uint32; - Is_Stmt : Boolean; - Basic_Block : Boolean; - End_Sequence : Boolean; - Prologue_End : Boolean; - Epilogue_Begin : Boolean; - ISA : SOR.uint32; - Is_Row : Boolean; + Address : SOR.uint64; + File : SOR.uint32; + Line : SOR.uint32; + Column : SOR.uint32; + Is_Stmt : Boolean; + Basic_Block : Boolean; + End_Sequence : Boolean; + -- Prologue_End : Boolean; + -- Epilogue_Begin : Boolean; + -- ISA : SOR.uint32; + -- Discriminator : SOR.uint32; -- DWARF 4/5 + Is_Row : Boolean; -- local end record; - -- 6.2.4 The Line Number Program Prologue - - MAX_OPCODE_LENGTHS : constant := 256; - - type Opcodes_Lengths_Array is - array (SOR.uint32 range 1 .. MAX_OPCODE_LENGTHS) of SOR.uint8; - - type Line_Info_Prologue is record - Unit_Length : SOR.uint32; - Version : SOR.uint16; - Prologue_Length : SOR.uint32; - Min_Isn_Length : SOR.uint8; - Default_Is_Stmt : SOR.uint8; - Line_Base : SOR.int8; - Line_Range : SOR.uint8; - Opcode_Base : SOR.uint8; - Opcode_Lengths : Opcodes_Lengths_Array; - Includes_Offset : SOR.Offset; - File_Names_Offset : SOR.Offset; + -- 6.2.4 The Line Number Program Header + + MAX_OPCODE : constant := 256; + + type Opcode_Length_Array is array (1 .. MAX_OPCODE) of SOR.uint8; + + MAX_ENTRY : constant := 5; + + type Entry_Format_Pair is record + C_Type : SOR.uint32; + Form : SOR.uint32; + end record; + + type Entry_Format_Array is array (1 .. MAX_ENTRY) of Entry_Format_Pair; + + type Line_Info_Header is record + Unit_Length : SOR.Offset; + Version : SOR.uint16; + Address_Size : SOR.uint8; -- DWARF 5 + Segment_Selector_Size : SOR.uint8; -- DWARF 5 + Header_Length : SOR.uint32; + Minimum_Insn_Length : SOR.uint8; + Maximum_Op_Per_Insn : SOR.uint8; -- DWARF 4/5 + Default_Is_Stmt : SOR.uint8; + Line_Base : SOR.int8; + Line_Range : SOR.uint8; + Opcode_Base : SOR.uint8; + -- Standard_Opcode_Lengths : Opcode_Length_Array; + Directory_Entry_Format_Count : SOR.uint8; -- DWARF 5 + Directory_Entry_Format : Entry_Format_Array; -- DWARF 5 + Directories_Count : SOR.uint32; -- DWARF 5 + Directories : SOR.Offset; + File_Name_Entry_Format_Count : SOR.uint8; -- DWARF 5 + File_Name_Entry_Format : Entry_Format_Array; -- DWARF 5 + File_Names_Count : SOR.uint32; -- DWARF 5 + File_Names : SOR.Offset; + Is64 : Boolean; -- local end record; type Search_Entry is record @@ -175,15 +190,16 @@ private Cache : Search_Array_Access; -- Quick access to symbol and debug info (when present). - Lines : SOR.Mapped_Stream; - Aranges : SOR.Mapped_Stream; - Info : SOR.Mapped_Stream; - Abbrev : SOR.Mapped_Stream; - -- Dwarf line, aranges, info and abbrev sections + Abbrev : SOR.Mapped_Stream; + Aranges : SOR.Mapped_Stream; + Info : SOR.Mapped_Stream; + Lines : SOR.Mapped_Stream; + Line_Str : SOR.Mapped_Stream; -- DWARF 5 + -- DWARF sections - Prologue : Line_Info_Prologue; - Registers : Line_Info_Registers; - Next_Prologue : SOR.Offset; + Header : Line_Info_Header; + Registers : Line_Info_Registers; + Next_Header : SOR.Offset; -- State for lines end record; -- cgit v1.1 From 0c8ff35eb982a49882ed71b1b85e8436675adf88 Mon Sep 17 00:00:00 2001 From: Bob Duff Date: Tue, 15 Jun 2021 09:12:36 -0400 Subject: [Ada] Clean up Uint fields gcc/ada/ * uintp.ads, types.h: New subtypes of Uint: Valid_Uint, Unat, Upos, Nonzero_Uint with predicates. These correspond to new field types in Gen_IL. * gen_il-types.ads (Valid_Uint, Unat, Upos, Nonzero_Uint): New field types. * einfo-utils.ads, einfo-utils.adb, fe.h (Known_Alignment, Init_Alignment): Use the initial zero value to represent "unknown". This will ensure that if Alignment is called before Set_Alignment, the compiler will blow up (if assertions are enabled). * atree.ads, atree.adb, atree.h, gen_il-gen.adb (Get_Valid_32_Bit_Field): New generic low-level getter for subtypes of Uint. (Copy_Alignment): New procedure to copy Alignment field even when Unknown. (Init_Object_Size_Align, Init_Size_Align): Do not bypass the Init_ procedures. * exp_pakd.adb, freeze.adb, layout.adb, repinfo.adb, sem_util.adb: Protect calls to Alignment with Known_Alignment. Use Copy_Alignment when it might be unknown. * gen_il-gen-gen_entities.adb (Alignment, String_Literal_Length): Use type Unat instead of Uint, to ensure that the field is always Set_ before we get it, and that it is set to a nonnegative value. (Enumeration_Pos): Unat. (Enumeration_Rep): Valid_Uint. Can be negative, but must be valid before fetching. (Discriminant_Number): Upos. (Renaming_Map): Remove. * gen_il-gen-gen_nodes.adb (Char_Literal_Value, Reason): Unat. (Intval, Corresponding_Integer_Value): Valid_Uint. * gen_il-internals.ads: New functions for dealing with special defaults and new subtypes of Uint. * scans.ads: Correct comments. * scn.adb (Post_Scan): Do not set Intval to No_Uint; that is no longer allowed. * sem_ch13.adb (Analyze_Enumeration_Representation_Clause): Do not set Enumeration_Rep to No_Uint; that is no longer allowed. (Offset_Value): Protect calls to Alignment with Known_Alignment. * sem_prag.adb (Set_Atomic_VFA): Do not use Uint_0 to mean "unknown"; call Init_Alignment instead. * sinfo.ads: Minor comment fix. * treepr.adb: Deal with printing of new field types. * einfo.ads, gen_il-fields.ads (Renaming_Map): Remove. * gcc-interface/decl.c (gnat_to_gnu_entity): Use Known_Alignment before calling Alignment. This preserve some probably buggy behavior: if the alignment is not set, it previously defaulted to Uint_0; we now make that explicit. Use Copy_Alignment, because "Set_Alignment (Y, Alignment (X));" no longer works when the Alignment of X has not yet been set. * gcc-interface/trans.c (process_freeze_entity): Use Copy_Alignment. --- gcc/ada/atree.adb | 24 +++++++++++-- gcc/ada/atree.ads | 8 +++++ gcc/ada/atree.h | 9 +++++ gcc/ada/einfo-utils.adb | 25 ++++++++----- gcc/ada/einfo-utils.ads | 7 ++++ gcc/ada/einfo.ads | 12 ------- gcc/ada/exp_pakd.adb | 4 +-- gcc/ada/fe.h | 3 ++ gcc/ada/freeze.adb | 12 ++++--- gcc/ada/gcc-interface/decl.c | 8 +++-- gcc/ada/gcc-interface/trans.c | 2 +- gcc/ada/gen_il-fields.ads | 1 - gcc/ada/gen_il-gen-gen_entities.adb | 27 ++++++-------- gcc/ada/gen_il-gen-gen_nodes.adb | 12 +++---- gcc/ada/gen_il-gen.adb | 72 ++++++++++++++++++------------------- gcc/ada/gen_il-internals.ads | 21 +++++++++++ gcc/ada/gen_il-types.ads | 19 ++++++---- gcc/ada/layout.adb | 2 +- gcc/ada/repinfo.adb | 24 ++++++++----- gcc/ada/scans.ads | 6 ++-- gcc/ada/scn.adb | 9 ++++- gcc/ada/sem_ch13.adb | 26 +++++++++----- gcc/ada/sem_prag.adb | 2 +- gcc/ada/sem_util.adb | 4 +-- gcc/ada/sinfo.ads | 12 +++---- gcc/ada/treepr.adb | 43 ++++++++++++++++++---- gcc/ada/types.h | 4 +++ gcc/ada/uintp.ads | 5 +++ 28 files changed, 268 insertions(+), 135 deletions(-) (limited to 'gcc') diff --git a/gcc/ada/atree.adb b/gcc/ada/atree.adb index 33cde5a..c7e295b 100644 --- a/gcc/ada/atree.adb +++ b/gcc/ada/atree.adb @@ -25,7 +25,7 @@ -- Assertions in this package are too slow, and are mostly needed when working -- on this package itself, or on gen_il, so we disable them. --- To debug low-level bugs in this area, comment out the following pragmas, +-- To debug low-level bugs in this area, comment out the following pragma, -- and run with -gnatd_v. pragma Assertion_Policy (Ignore); @@ -521,19 +521,37 @@ package body Atree is (N : Node_Or_Entity_Id; Offset : Field_Offset) return Field_Type is function Get_Field is new Get_32_Bit_Field (Field_Type) with Inline; + Result : Field_Type; begin -- If the field has not yet been set, it will be equal to zero. -- That is of the "wrong" type, so we fetch it as a -- Field_Size_32_Bit. if Get_32_Bit_Val (N, Offset) = 0 then - return Default_Val; + Result := Default_Val; else - return Get_Field (N, Offset); + Result := Get_Field (N, Offset); end if; + + return Result; end Get_32_Bit_Field_With_Default; + function Get_Valid_32_Bit_Field + (N : Node_Or_Entity_Id; Offset : Field_Offset) return Field_Type + is + pragma Assert (Get_32_Bit_Val (N, Offset) /= 0); + -- If the field has not yet been set, it will be equal to zero. + -- This asserts that we don't call Get_ before Set_. Note that + -- the predicate on the Val parameter of Set_ checks for the No_... + -- value, so it can't possibly be (for example) No_Uint here. + + function Get_Field is new Get_32_Bit_Field (Field_Type) with Inline; + Result : constant Field_Type := Get_Field (N, Offset); + begin + return Result; + end Get_Valid_32_Bit_Field; + procedure Set_1_Bit_Field (N : Node_Or_Entity_Id; Offset : Field_Offset; Val : Field_Type) is diff --git a/gcc/ada/atree.ads b/gcc/ada/atree.ads index 42df950..6fb5aa6 100644 --- a/gcc/ada/atree.ads +++ b/gcc/ada/atree.ads @@ -764,6 +764,14 @@ package Atree is generic type Field_Type is private; + function Get_Valid_32_Bit_Field + (N : Node_Or_Entity_Id; Offset : Field_Offset) return Field_Type + with Inline; + -- Assert that the field has already been set. This is currently used + -- only for Uints, but could be used more generally. + + generic + type Field_Type is private; procedure Set_1_Bit_Field (N : Node_Or_Entity_Id; Offset : Field_Offset; Val : Field_Type) with Inline; diff --git a/gcc/ada/atree.h b/gcc/ada/atree.h index e4750e1..08b791c 100644 --- a/gcc/ada/atree.h +++ b/gcc/ada/atree.h @@ -79,6 +79,7 @@ INLINE unsigned int Get_8_Bit_Field (Node_Id, Field_Offset); INLINE unsigned int Get_32_Bit_Field (Node_Id, Field_Offset); INLINE unsigned int Get_32_Bit_Field_With_Default (Node_Id, Field_Offset, unsigned int); +INLINE unsigned int Get_Valid_32_Bit_Field (Node_Id, Field_Offset); INLINE unsigned int Get_1_Bit_Field (Node_Id N, Field_Offset Offset) @@ -127,6 +128,14 @@ Get_32_Bit_Field_With_Default (Node_Id N, Field_Offset Offset, return slot == Empty ? Default_Value : slot; } +INLINE unsigned int +Get_Valid_32_Bit_Field (Node_Id N, Field_Offset Offset) +{ + any_slot slot = *(Slots_Ptr + Node_Offsets_Ptr[N] + Offset); + gcc_assert (slot != Empty); + return slot; +} + #ifdef __cplusplus } #endif diff --git a/gcc/ada/einfo-utils.adb b/gcc/ada/einfo-utils.adb index 21d7bfb..4690c8f 100644 --- a/gcc/ada/einfo-utils.adb +++ b/gcc/ada/einfo-utils.adb @@ -364,7 +364,7 @@ package body Einfo.Utils is procedure Init_Alignment (Id : E) is begin - Set_Alignment (Id, Uint_0); + Reinit_Field_To_Zero (Id, F_Alignment); end Init_Alignment; procedure Init_Alignment (Id : E; V : Int) is @@ -452,6 +452,15 @@ package body Einfo.Utils is Set_RM_Size (Id, UI_From_Int (V)); end Init_RM_Size; + procedure Copy_Alignment (To, From : E) is + begin + if Known_Alignment (From) then + Set_Alignment (To, Alignment (From)); + else + Init_Alignment (To); + end if; + end Copy_Alignment; + ----------------------------- -- Init_Component_Location -- ----------------------------- @@ -471,8 +480,8 @@ package body Einfo.Utils is procedure Init_Object_Size_Align (Id : E) is begin - Set_Esize (Id, Uint_0); - Set_Alignment (Id, Uint_0); + Init_Esize (Id); + Init_Alignment (Id); end Init_Object_Size_Align; --------------- @@ -499,9 +508,9 @@ package body Einfo.Utils is procedure Init_Size_Align (Id : E) is begin pragma Assert (Ekind (Id) in Type_Kind | E_Void); - Set_Esize (Id, Uint_0); - Set_RM_Size (Id, Uint_0); - Set_Alignment (Id, Uint_0); + Init_Esize (Id); + Init_RM_Size (Id); + Init_Alignment (Id); end Init_Size_Align; ---------------------------------------------- @@ -509,9 +518,9 @@ package body Einfo.Utils is ---------------------------------------------- function Known_Alignment (E : Entity_Id) return B is + Result : constant B := not Field_Is_Initial_Zero (E, F_Alignment); begin - return Alignment (E) /= Uint_0 - and then Alignment (E) /= No_Uint; + return Result; end Known_Alignment; function Known_Component_Bit_Offset (E : Entity_Id) return B is diff --git a/gcc/ada/einfo-utils.ads b/gcc/ada/einfo-utils.ads index dbf3ad6..a6517b9 100644 --- a/gcc/ada/einfo-utils.ads +++ b/gcc/ada/einfo-utils.ads @@ -454,6 +454,13 @@ package Einfo.Utils is procedure Init_Normalized_Position_Max (Id : E); procedure Init_RM_Size (Id : E); + -- The following Copy_xxx procedures copy the value of xxx from From to + -- To. If xxx is set to its initial invalid (zero-bits) value, then it is + -- reset to invalid in To. We only have Copy_Alignment so far, but more are + -- planned. + + procedure Copy_Alignment (To, From : E); + pragma Inline (Init_Alignment); pragma Inline (Init_Component_Bit_Offset); pragma Inline (Init_Component_Size); diff --git a/gcc/ada/einfo.ads b/gcc/ada/einfo.ads index 6a8d493..e87ce4c 100644 --- a/gcc/ada/einfo.ads +++ b/gcc/ada/einfo.ads @@ -4173,15 +4173,6 @@ package Einfo is -- within an accept statement. For all remaining cases (discriminants, -- loop parameters) the field is Empty. --- Renaming_Map --- Defined in generic subprograms, generic packages, and their --- instances. Also defined in the instances of the corresponding --- bodies. Denotes the renaming map (generic entities => instance --- entities) used to construct the instance by giving an index into --- the tables used to represent these maps. See Sem_Ch12 for further --- details. The maps for package instances are also used when the --- instance is the actual corresponding to a formal package. - -- Requires_Overriding -- Defined in all subprograms and entries. Set for subprograms that -- require overriding as defined by RM-2005-3.9.3(6/2). Note that this @@ -5474,7 +5465,6 @@ package Einfo is -- E_Function -- E_Generic_Function -- Mechanism (Mechanism_Type) - -- Renaming_Map -- Handler_Records (non-generic case only) -- Protected_Body_Subprogram -- Next_Inlined_Subprogram @@ -5734,7 +5724,6 @@ package Einfo is -- E_Package -- E_Generic_Package -- Dependent_Instances (for an instance) - -- Renaming_Map -- Handler_Records (non-generic case only) -- Generic_Homonym (generic case only) -- Associated_Formal_Package @@ -5832,7 +5821,6 @@ package Einfo is -- E_Procedure -- E_Generic_Procedure -- Associated_Node_For_Itype $$$ E_Procedure - -- Renaming_Map -- Handler_Records (non-generic case only) -- Protected_Body_Subprogram -- Next_Inlined_Subprogram diff --git a/gcc/ada/exp_pakd.adb b/gcc/ada/exp_pakd.adb index 47919fc..88f86f4 100644 --- a/gcc/ada/exp_pakd.adb +++ b/gcc/ada/exp_pakd.adb @@ -613,7 +613,7 @@ package body Exp_Pakd is -- type or component, take it into account. if Csize <= 2 or else Csize = 4 or else Csize mod 2 /= 0 - or else Alignment (Typ) = 1 + or else (Known_Alignment (Typ) and then Alignment (Typ) = 1) or else Component_Alignment (Typ) = Calign_Storage_Unit then if Reverse_Storage_Order (Typ) then @@ -623,7 +623,7 @@ package body Exp_Pakd is end if; elsif Csize mod 4 /= 0 - or else Alignment (Typ) = 2 + or else (Known_Alignment (Typ) and then Alignment (Typ) = 2) then if Reverse_Storage_Order (Typ) then PB_Type := RTE (RE_Rev_Packed_Bytes2); diff --git a/gcc/ada/fe.h b/gcc/ada/fe.h index d7ab361b..4517c59 100644 --- a/gcc/ada/fe.h +++ b/gcc/ada/fe.h @@ -636,6 +636,9 @@ B Known_Static_Normalized_Position_Max (Entity_Id E); #define Known_Static_RM_Size einfo__utils__known_static_rm_size B Known_Static_RM_Size (Entity_Id E); +#define Copy_Alignment einfo__utils__copy_alignment +B Copy_Alignment(Entity_Id To, Entity_Id From); + #define Is_Discrete_Or_Fixed_Point_Type einfo__utils__is_discrete_or_fixed_point_type B Is_Discrete_Or_Fixed_Point_Type (E Id); diff --git a/gcc/ada/freeze.adb b/gcc/ada/freeze.adb index 12d10ee..84502d8 100644 --- a/gcc/ada/freeze.adb +++ b/gcc/ada/freeze.adb @@ -3307,7 +3307,7 @@ package body Freeze is -- cases of types whose alignment exceeds their size (the -- padded type cases). - if Csiz /= 0 then + if Csiz /= 0 and then Known_Alignment (Ctyp) then declare A : constant Uint := Alignment_In_Bits (Ctyp); begin @@ -3478,9 +3478,12 @@ package body Freeze is -- Processing that is done only for subtypes else - -- Acquire alignment from base type + -- Acquire alignment from base type. Known_Alignment of the base + -- type is False for Wide_String, for example. - if not Known_Alignment (Arr) then + if not Known_Alignment (Arr) + and then Known_Alignment (Base_Type (Arr)) + then Set_Alignment (Arr, Alignment (Base_Type (Arr))); Adjust_Esize_Alignment (Arr); end if; @@ -3642,7 +3645,8 @@ package body Freeze is end if; if not Has_Alignment_Clause (Arr) then - Set_Alignment (Arr, Alignment (Packed_Array_Impl_Type (Arr))); + Copy_Alignment + (To => Arr, From => Packed_Array_Impl_Type (Arr)); end if; end if; diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c index b09e20d..83ca31a 100644 --- a/gcc/ada/gcc-interface/decl.c +++ b/gcc/ada/gcc-interface/decl.c @@ -4417,9 +4417,13 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) const bool derived_p = Is_Derived_Type (gnat_entity); const Entity_Id gnat_parent = derived_p ? Etype (Base_Type (gnat_entity)) : Empty; + /* The following test for Known_Alignment preserves the old behavior, + but is probably wrong. */ const unsigned int inherited_align = derived_p - ? UI_To_Int (Alignment (gnat_parent)) * BITS_PER_UNIT + ? (Known_Alignment (gnat_parent) + ? UI_To_Int (Alignment (gnat_parent)) * BITS_PER_UNIT + : 0) : POINTER_SIZE; const unsigned int align = MAX (TYPE_ALIGN (gnu_type), inherited_align); @@ -4724,7 +4728,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) && Present (gnat_annotate_type)) { if (!Known_Alignment (gnat_entity)) - Set_Alignment (gnat_entity, Alignment (gnat_annotate_type)); + Copy_Alignment (gnat_entity, gnat_annotate_type); if (!Known_Esize (gnat_entity)) Set_Esize (gnat_entity, Esize (gnat_annotate_type)); if (!Known_RM_Size (gnat_entity)) diff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c index 8f8bc70..f61183d 100644 --- a/gcc/ada/gcc-interface/trans.c +++ b/gcc/ada/gcc-interface/trans.c @@ -9274,7 +9274,7 @@ process_freeze_entity (Node_Id gnat_node) /* Propagate back-annotations from full view to partial view. */ if (!Known_Alignment (gnat_entity)) - Set_Alignment (gnat_entity, Alignment (full_view)); + Copy_Alignment (gnat_entity, full_view); if (!Known_Esize (gnat_entity)) Set_Esize (gnat_entity, Esize (full_view)); diff --git a/gcc/ada/gen_il-fields.ads b/gcc/ada/gen_il-fields.ads index e2592ee..0a3046e 100644 --- a/gcc/ada/gen_il-fields.ads +++ b/gcc/ada/gen_il-fields.ads @@ -868,7 +868,6 @@ package Gen_IL.Fields is Relative_Deadline_Variable, Renamed_In_Spec, Renamed_Or_Alias, -- Shared among Alias, Renamed_Entity, Renamed_Object - Renaming_Map, Requires_Overriding, Return_Applies_To, Return_Present, diff --git a/gcc/ada/gen_il-gen-gen_entities.adb b/gcc/ada/gen_il-gen-gen_entities.adb index d5977ad..41dd232 100644 --- a/gcc/ada/gen_il-gen-gen_entities.adb +++ b/gcc/ada/gen_il-gen-gen_entities.adb @@ -246,7 +246,7 @@ begin -- Gen_IL.Gen.Gen_Entities -- dummy type for the return type of a procedure (the reason we create -- this type is to share the circuits for performing overload -- resolution on calls). - (Sm (Alignment, Uint), + (Sm (Alignment, Unat), Sm (Contract, Node_Id), Sm (Is_Elaboration_Warnings_OK_Id, Flag), Sm (Original_Record_Component, Node_Id), @@ -272,7 +272,7 @@ begin -- Gen_IL.Gen.Gen_Entities Sm (Debug_Renaming_Link, Node_Id), Sm (Discriminal_Link, Node_Id), Sm (Discriminant_Default_Value, Node_Id), - Sm (Discriminant_Number, Uint), + Sm (Discriminant_Number, Upos), Sm (Enclosing_Scope, Node_Id), Sm (Entry_Bodies_Array, Node_Id, Pre => "Has_Entries (N)"), @@ -293,7 +293,6 @@ begin -- Gen_IL.Gen.Gen_Entities Sm (Last_Entity, Node_Id), Sm (Next_Inlined_Subprogram, Node_Id), Sm (Renamed_Or_Alias, Node_Id), -- See Einfo.Utils - Sm (Renaming_Map, Uint), Sm (Return_Applies_To, Node_Id), Sm (Scalar_Range, Node_Id), Sm (Scale_Value, Uint), @@ -334,7 +333,7 @@ begin -- Gen_IL.Gen.Gen_Entities Ab (Allocatable_Kind, Object_Kind, (Sm (Activation_Record_Component, Node_Id), - Sm (Alignment, Uint), + Sm (Alignment, Unat), Sm (Esize, Uint), Sm (Interface_Name, Node_Id), Sm (Is_Finalized_Transient, Flag), @@ -374,7 +373,7 @@ begin -- Gen_IL.Gen.Gen_Entities Sm (CR_Discriminant, Node_Id), Sm (Discriminal, Node_Id), Sm (Discriminant_Default_Value, Node_Id), - Sm (Discriminant_Number, Uint), + Sm (Discriminant_Number, Upos), Sm (Is_Completely_Hidden, Flag))); Cc (E_Loop_Parameter, Allocatable_Kind); @@ -400,7 +399,7 @@ begin -- Gen_IL.Gen.Gen_Entities -- Formal parameters are also objects (Sm (Activation_Record_Component, Node_Id), Sm (Actual_Subtype, Node_Id), - Sm (Alignment, Uint), + Sm (Alignment, Unat), Sm (Default_Expr_Function, Node_Id), Sm (Default_Value, Node_Id), Sm (Entry_Component, Node_Id), @@ -456,7 +455,7 @@ begin -- Gen_IL.Gen.Gen_Entities -- Named numbers created by a number declaration with a real value Ab (Type_Kind, Void_Or_Type_Kind, - (Sm (Alignment, Uint), + (Sm (Alignment, Unat), Sm (Associated_Node_For_Itype, Node_Id), Sm (Can_Use_Internal_Rep, Flag, Base_Type_Only, Pre => "Ekind (Base_Type (N)) in Access_Subprogram_Kind"), @@ -745,7 +744,7 @@ begin -- Gen_IL.Gen.Gen_Entities Cc (E_String_Literal_Subtype, Array_Kind, -- A special string subtype, used only to describe the type of a string -- literal (will always be one dimensional, with literal bounds). - (Sm (String_Literal_Length, Uint), + (Sm (String_Literal_Length, Unat), Sm (String_Literal_Low_Bound, Node_Id))); Ab (Class_Wide_Kind, Aggregate_Kind, @@ -970,11 +969,11 @@ begin -- Gen_IL.Gen.Gen_Entities Cc (E_Enumeration_Literal, Overloadable_Kind, -- An enumeration literal, created by the use of the literal in an -- enumeration type definition. - (Sm (Enumeration_Pos, Uint), - Sm (Enumeration_Rep, Uint), + (Sm (Enumeration_Pos, Unat), + Sm (Enumeration_Rep, Valid_Uint), Sm (Enumeration_Rep_Expr, Node_Id), Sm (Esize, Uint), - Sm (Alignment, Uint), + Sm (Alignment, Unat), Sm (Interface_Name, Node_Id))); Ab (Subprogram_Kind, Overloadable_Kind, @@ -1039,7 +1038,6 @@ begin -- Gen_IL.Gen.Gen_Entities Sm (Protected_Subprogram, Node_Id), Sm (Protection_Object, Node_Id), Sm (Related_Expression, Node_Id), - Sm (Renaming_Map, Uint), Sm (Rewritten_For_C, Flag), Sm (Thunk_Entity, Node_Id, Pre => "Is_Thunk (N)"), @@ -1089,7 +1087,6 @@ begin -- Gen_IL.Gen.Gen_Entities Sm (Protected_Subprogram, Node_Id), Sm (Protection_Object, Node_Id), Sm (Receiving_Entry, Node_Id), - Sm (Renaming_Map, Uint), Sm (Static_Initialization, Node_Id, Pre => "not Is_Dispatching_Operation (N)"), Sm (Thunk_Entity, Node_Id, @@ -1184,7 +1181,7 @@ begin -- Gen_IL.Gen.Gen_Entities -- An exception created by an exception declaration. The exception -- itself uses E_Exception for the Ekind, the implicit type that is -- created to represent its type uses the Ekind E_Exception_Type. - (Sm (Alignment, Uint), + (Sm (Alignment, Unat), Sm (Esize, Uint), Sm (Interface_Name, Node_Id), Sm (Is_Raised, Flag), @@ -1204,7 +1201,6 @@ begin -- Gen_IL.Gen.Gen_Entities Sm (Is_Elaboration_Warnings_OK_Id, Flag), Sm (Last_Entity, Node_Id), Sm (Renamed_Or_Alias, Node_Id), - Sm (Renaming_Map, Uint), Sm (Scope_Depth_Value, Uint), Sm (SPARK_Pragma, Node_Id), Sm (SPARK_Pragma_Inherited, Flag))); @@ -1299,7 +1295,6 @@ begin -- Gen_IL.Gen.Gen_Entities Sm (Related_Instance, Node_Id), Sm (Renamed_In_Spec, Flag), Sm (Renamed_Or_Alias, Node_Id), - Sm (Renaming_Map, Uint), Sm (Scope_Depth_Value, Uint), Sm (SPARK_Aux_Pragma, Node_Id), Sm (SPARK_Aux_Pragma_Inherited, Flag), diff --git a/gcc/ada/gen_il-gen-gen_nodes.adb b/gcc/ada/gen_il-gen-gen_nodes.adb index 2427a1e..55ba71d 100644 --- a/gcc/ada/gen_il-gen-gen_nodes.adb +++ b/gcc/ada/gen_il-gen-gen_nodes.adb @@ -193,7 +193,7 @@ begin -- Gen_IL.Gen.Gen_Nodes Cc (N_Character_Literal, N_Direct_Name, (Sy (Chars, Name_Id, Default_No_Name), - Sy (Char_Literal_Value, Uint))); + Sy (Char_Literal_Value, Unat))); Ab (N_Op, N_Has_Entity, (Sm (Do_Overflow_Check, Flag), @@ -412,26 +412,26 @@ begin -- Gen_IL.Gen.Gen_Nodes Cc (N_Raise_Constraint_Error, N_Raise_xxx_Error, (Sy (Condition, Node_Id, Default_Empty), - Sy (Reason, Uint))); + Sy (Reason, Unat))); Cc (N_Raise_Program_Error, N_Raise_xxx_Error, (Sy (Condition, Node_Id, Default_Empty), - Sy (Reason, Uint))); + Sy (Reason, Unat))); Cc (N_Raise_Storage_Error, N_Raise_xxx_Error, (Sy (Condition, Node_Id, Default_Empty), - Sy (Reason, Uint))); + Sy (Reason, Unat))); Ab (N_Numeric_Or_String_Literal, N_Subexpr); Cc (N_Integer_Literal, N_Numeric_Or_String_Literal, - (Sy (Intval, Uint), + (Sy (Intval, Valid_Uint), Sm (Original_Entity, Node_Id), Sm (Print_In_Hex, Flag))); Cc (N_Real_Literal, N_Numeric_Or_String_Literal, (Sy (Realval, Ureal), - Sm (Corresponding_Integer_Value, Uint), + Sm (Corresponding_Integer_Value, Valid_Uint), Sm (Is_Machine_Number, Flag), Sm (Original_Entity, Node_Id))); diff --git a/gcc/ada/gen_il-gen.adb b/gcc/ada/gen_il-gen.adb index 94f7c9c..a9c7bd7 100644 --- a/gcc/ada/gen_il-gen.adb +++ b/gcc/ada/gen_il-gen.adb @@ -849,6 +849,7 @@ package body Gen_IL.Gen is | Name_Id | String_Id | Uint + | Uint_Subtype | Ureal | Source_Ptr | Union_Id @@ -1562,22 +1563,25 @@ package body Gen_IL.Gen is (S : in out Sink; T : Type_Enum) is begin - -- Special case for types that have defaults; instantiate - -- Get_32_Bit_Field_With_Default and pass in the Default_Val. + -- Special case for subtypes of Uint that have predicates. Use + -- Get_Valid_32_Bit_Field in that case. - if T in Elist_Id | Uint then + if T in Uint_Subtype then pragma Assert (Field_Size (T) = 32); + Put (S, LF & "function " & Low_Level_Getter_Name (T) & + " is new Get_Valid_32_Bit_Field (" & + Get_Set_Id_Image (T) & + ") with " & Inline & ";" & LF); - declare - Default_Val : constant String := - (if T = Elist_Id then "No_Elist" else "Uint_0"); + -- Special case for types that have special defaults; instantiate + -- Get_32_Bit_Field_With_Default and pass in the Default_Val. - begin - Put (S, LF & "function " & Low_Level_Getter_Name (T) & - " is new Get_32_Bit_Field_With_Default (" & - Get_Set_Id_Image (T) & ", " & Default_Val & - ") with " & Inline & ";" & LF); - end; + elsif Field_Has_Special_Default (T) then + pragma Assert (Field_Size (T) = 32); + Put (S, LF & "function " & Low_Level_Getter_Name (T) & + " is new Get_32_Bit_Field_With_Default (" & + Get_Set_Id_Image (T) & ", " & Special_Default (T) & + ") with " & Inline & ";" & LF); -- Otherwise, instantiate the normal getter for the right size in -- bits. @@ -1588,16 +1592,16 @@ package body Gen_IL.Gen is Get_Set_Id_Image (T) & ") with " & Inline & ";" & LF); end if; - -- No special case for the setter - if T in Node_Kind_Type | Entity_Kind_Type then Put (S, "pragma Warnings (Off);" & LF); -- Set_Node_Kind_Type and Set_Entity_Kind_Type might not be called end if; + -- No special cases for the setter + Put (S, "procedure " & Low_Level_Setter_Name (T) & " is new Set_" & - Image (Field_Size (T)) & "_Bit_Field (" & Get_Set_Id_Image (T) & - ") with " & Inline & ";" & LF); + Image (Field_Size (T)) & "_Bit_Field (" & Get_Set_Id_Image (T) & + ") with " & Inline & ";" & LF); if T in Node_Kind_Type | Entity_Kind_Type then Put (S, "pragma Warnings (On);" & LF); @@ -1689,11 +1693,9 @@ package body Gen_IL.Gen is procedure Put_Getter_Spec (S : in out Sink; F : Field_Enum) is begin - Put (S, "function " & Image (F) & LF); - Increase_Indent (S, 2); - Put (S, "(N : " & N_Type (F) & ") return " & + Put (S, "function " & Image (F)); + Put (S, " (N : " & N_Type (F) & ") return " & Get_Set_Id_Image (Field_Table (F).Field_Type)); - Decrease_Indent (S, 2); end Put_Getter_Spec; --------------------- @@ -1757,11 +1759,9 @@ package body Gen_IL.Gen is Default : constant String := (if Rec.Field_Type = Flag then " := True" else ""); begin - Put (S, "procedure Set_" & Image (F) & LF); - Increase_Indent (S, 2); - Put (S, "(N : " & N_Type (F) & "; Val : " & + Put (S, "procedure Set_" & Image (F)); + Put (S, " (N : " & N_Type (F) & "; Val : " & Get_Set_Id_Image (Rec.Field_Type) & Default & ")"); - Decrease_Indent (S, 2); end Put_Setter_Spec; --------------------- @@ -2776,7 +2776,8 @@ package body Gen_IL.Gen is Put (S, "-- This package is not used by the compiler." & LF); Put (S, "-- The body contains tables that are intended to be used by humans to" & LF); - Put (S, "-- help understand the layout of various data structures." & LF & LF); + Put (S, "-- help understand the layout of various data structures." & LF); + Put (S, "-- Search for ""--"" to find major sections of code." & LF & LF); Put (S, "pragma Elaborate_Body;" & LF); @@ -3001,20 +3002,19 @@ package body Gen_IL.Gen is Increase_Indent (S, 3); - -- Same special case as in Put_Low_Level_Accessor_Instantiations + -- Same special cases for getters as in + -- Put_Low_Level_Accessor_Instantiations. - if T in Elist_Id | Uint then + if T in Uint_Subtype then pragma Assert (Field_Size (T) = 32); + Put (S, "{ return (" & T_Image & + ") Get_Valid_32_Bit_Field(N, Offset); }" & LF & LF); - declare - Default_Val : constant String := - (if T = Elist_Id then "No_Elist" else "Uint_0"); - - begin - Put (S, "{ return (" & T_Image & - ") Get_32_Bit_Field_With_Default(N, Offset, " & - Default_Val & "); }" & LF & LF); - end; + elsif Field_Has_Special_Default (T) then + pragma Assert (Field_Size (T) = 32); + Put (S, "{ return (" & T_Image & + ") Get_32_Bit_Field_With_Default(N, Offset, " & + Special_Default (T) & "); }" & LF & LF); else Put (S, "{ return (" & T_Image & ") Get_" & diff --git a/gcc/ada/gen_il-internals.ads b/gcc/ada/gen_il-internals.ads index b8911ec..ae448de 100644 --- a/gcc/ada/gen_il-internals.ads +++ b/gcc/ada/gen_il-internals.ads @@ -174,6 +174,27 @@ package Gen_IL.Internals is -- Table mapping from enumeration literals representing fields to -- information about the field. + -- Getters for fields of types Elist_Id and Uint need special treatment of + -- defaults. In particular, if the field has its initial 0 value, getters + -- need to return the appropriate default value. Note that these defaults + -- have nothing to do with the defaults mentioned above for Nmake + -- functions. + + function Field_Has_Special_Default + (Field_Type : Type_Enum) return Boolean is + (Field_Type in Elist_Id | Uint); + -- These are the field types that have a default value that is not + -- represented as zero. + + function Special_Default + (Field_Type : Type_Enum) return String is + (if Field_Type = Elist_Id then "No_Elist" else "Uint_0"); + + function Invalid_Val + (Field_Type : Uint_Subtype) return String is + ("No_Uint"); + -- We could generalize this to other than Uint at some point + ---------------- subtype Node_Field is diff --git a/gcc/ada/gen_il-types.ads b/gcc/ada/gen_il-types.ads index 84eb63f..321eec6 100644 --- a/gcc/ada/gen_il-types.ads +++ b/gcc/ada/gen_il-types.ads @@ -55,6 +55,10 @@ package Gen_IL.Types is Name_Id, String_Id, Uint, + Valid_Uint, + Unat, + Upos, + Nonzero_Uint, Ureal, Node_Kind_Type, -- Type of result of Nkind function, i.e. Node_Kind @@ -562,14 +566,17 @@ package Gen_IL.Types is | N_Defining_Operator_Symbol; subtype Opt_Abstract_Type is Opt_Type_Enum with - Predicate => Opt_Abstract_Type = No_Type or - Opt_Abstract_Type in Abstract_Type; + Predicate => Opt_Abstract_Type = No_Type or + Opt_Abstract_Type in Abstract_Type; subtype Type_Boundaries is Type_Enum with - Predicate => Type_Boundaries in - Between_Abstract_Node_And_Abstract_Entity_Types | - Between_Abstract_Entity_And_Concrete_Node_Types | - Between_Concrete_Node_And_Concrete_Entity_Types; + Predicate => Type_Boundaries in + Between_Abstract_Node_And_Abstract_Entity_Types | + Between_Abstract_Entity_And_Concrete_Node_Types | + Between_Concrete_Node_And_Concrete_Entity_Types; -- These are not used, other than to separate the various subranges. + subtype Uint_Subtype is Type_Enum with + Predicate => Uint_Subtype in Valid_Uint | Unat | Upos | Nonzero_Uint; + end Gen_IL.Types; diff --git a/gcc/ada/layout.adb b/gcc/ada/layout.adb index f716488..e69386c 100644 --- a/gcc/ada/layout.adb +++ b/gcc/ada/layout.adb @@ -433,7 +433,7 @@ package body Layout is Set_RM_Size (E, RM_Size (PAT)); end if; - if not Known_Alignment (E) then + if not Known_Alignment (E) and then Known_Alignment (PAT) then Set_Alignment (E, Alignment (PAT)); end if; end; diff --git a/gcc/ada/repinfo.adb b/gcc/ada/repinfo.adb index 25b5237..148de53 100644 --- a/gcc/ada/repinfo.adb +++ b/gcc/ada/repinfo.adb @@ -410,15 +410,23 @@ package body Repinfo is end if; end if; - if List_Representation_Info_To_JSON then - Write_Str (" ""Alignment"": "); - Write_Val (Alignment (Ent)); + if Known_Alignment (Ent) then + if List_Representation_Info_To_JSON then + Write_Str (" ""Alignment"": "); + Write_Val (Alignment (Ent)); + else + Write_Str ("for "); + List_Name (Ent); + Write_Str ("'Alignment use "); + Write_Val (Alignment (Ent)); + Write_Line (";"); + end if; + + -- Alignment is not always set for task and protected types + else - Write_Str ("for "); - List_Name (Ent); - Write_Str ("'Alignment use "); - Write_Val (Alignment (Ent)); - Write_Line (";"); + pragma Assert + (Is_Concurrent_Type (Ent) or else Is_Class_Wide_Type (Ent)); end if; end List_Common_Type_Info; diff --git a/gcc/ada/scans.ads b/gcc/ada/scans.ads index 0e9ccd2..5cbae5a 100644 --- a/gcc/ada/scans.ads +++ b/gcc/ada/scans.ads @@ -441,12 +441,12 @@ package Scans is -- scanned literal. Real_Literal_Value : Ureal; - -- Valid only when Token is Tok_Real_Literal, contains the value of the + -- Valid only when Token is Tok_Real_Literal. Contains the value of the -- scanned literal. Int_Literal_Value : Uint; - -- Valid only when Token = Tok_Integer_Literal, contains the value of the - -- scanned literal. + -- Valid only when Token = Tok_Integer_Literal, and we are not in + -- syntax-only mode. Contains the value of the scanned literal. Based_Literal_Uses_Colon : Boolean; -- Valid only when Token = Tok_Integer_Literal or Tok_Real_Literal. Set diff --git a/gcc/ada/scn.adb b/gcc/ada/scn.adb index 7272ad4..ad53279 100644 --- a/gcc/ada/scn.adb +++ b/gcc/ada/scn.adb @@ -155,7 +155,14 @@ package body Scn is when Tok_Integer_Literal => Token_Node := New_Node (N_Integer_Literal, Token_Ptr); - Set_Intval (Token_Node, Int_Literal_Value); + + -- Int_Literal_Value can be No_Uint in some cases in syntax-only + -- mode (see Scng.Scan.Nlit). + + if Int_Literal_Value /= No_Uint then + Set_Intval (Token_Node, Int_Literal_Value); + end if; + Check_Obsolete_Base_Char; when Tok_String_Literal => diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb index 91d41b4..76859c5 100644 --- a/gcc/ada/sem_ch13.adb +++ b/gcc/ada/sem_ch13.adb @@ -8101,10 +8101,12 @@ package body Sem_Ch13 is elsif Val < Lo or else Hi < Val then Error_Msg_N ("value outside permitted range", Expr); Err := True; + + else + Set_Enumeration_Rep (Elit, Val); + Set_Enumeration_Rep_Expr (Elit, Expr); end if; - Set_Enumeration_Rep (Elit, Val); - Set_Enumeration_Rep_Expr (Elit, Expr); Next (Expr); Next (Elit); end loop; @@ -8178,9 +8180,10 @@ package body Sem_Ch13 is elsif Val < Lo or else Hi < Val then Error_Msg_N ("value outside permitted range", Expr); Err := True; - end if; - Set_Enumeration_Rep (Elit, Val); + else + Set_Enumeration_Rep (Elit, Val); + end if; end if; end if; end if; @@ -8274,9 +8277,10 @@ package body Sem_Ch13 is Set_Enum_Esize (Enumtype); end if; - Set_RM_Size (Base_Type (Enumtype), RM_Size (Enumtype)); - Set_Esize (Base_Type (Enumtype), Esize (Enumtype)); - Set_Alignment (Base_Type (Enumtype), Alignment (Enumtype)); + Set_RM_Size (Base_Type (Enumtype), RM_Size (Enumtype)); + Set_Esize (Base_Type (Enumtype), Esize (Enumtype)); + + Copy_Alignment (To => Base_Type (Enumtype), From => Enumtype); end; end if; @@ -16299,9 +16303,13 @@ package body Sem_Ch13 is X_Offs : Uint; begin - -- Skip processing of this entry if warning already posted + -- Skip processing of this entry if warning already posted, or if + -- alignments are not set. - if not Address_Warning_Posted (ACCR.N) then + if not Address_Warning_Posted (ACCR.N) + and then Known_Alignment (ACCR.X) + and then Known_Alignment (ACCR.Y) + then Expr := Original_Node (Expression (ACCR.N)); -- Get alignments, sizes and offset, if any diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb index 5705aa7..0ff4e49 100644 --- a/gcc/ada/sem_prag.adb +++ b/gcc/ada/sem_prag.adb @@ -7562,7 +7562,7 @@ package body Sem_Prag is end if; if not Has_Alignment_Clause (Ent) then - Set_Alignment (Ent, Uint_0); + Init_Alignment (Ent); end if; end Set_Atomic_VFA; diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb index 5d0aa49..01a4e2b 100644 --- a/gcc/ada/sem_util.adb +++ b/gcc/ada/sem_util.adb @@ -12079,7 +12079,7 @@ package body Sem_Util is -- do it when there is an address clause since we can do more if the -- alignment is known. - if not Known_Alignment (Obj) then + if not Known_Alignment (Obj) and then Known_Alignment (Etype (Obj)) then Set_Alignment (Obj, Alignment (Etype (Obj))); end if; @@ -28366,7 +28366,7 @@ package body Sem_Util is Set_Is_Unsigned_Type (T1, Is_Unsigned_Type (T2)); end if; - Set_Alignment (T1, Alignment (T2)); + Copy_Alignment (To => T1, From => T2); end Set_Size_Info; ------------------------------ diff --git a/gcc/ada/sinfo.ads b/gcc/ada/sinfo.ads index 71da7fc..20a6125 100644 --- a/gcc/ada/sinfo.ads +++ b/gcc/ada/sinfo.ads @@ -2177,12 +2177,12 @@ package Sinfo is -- Present in an N_Variant node. This has a meaningful value only after -- Gigi has back annotated the tree with representation information. At -- this point, it contains a reference to a gcc expression that depends - -- on the values of one or more discriminants. Give a set of discriminant - -- values, this expression evaluates to False (zero) if variant is not - -- present, and True (non-zero) if it is present. See unit Repinfo for - -- further details on gigi back annotation. This field is used during - -- back-annotation processing (for -gnatR -gnatc) to determine if a field - -- is present or not. + -- on the values of one or more discriminants. Given a set of + -- discriminant values, this expression evaluates to False (zero) if + -- variant is not present, and True (non-zero) if it is present. See + -- unit Repinfo for further details on gigi back annotation. This field + -- is used during back-annotation processing (for -gnatR -gnatc) to + -- determine if a field is present or not. -- Prev_Use_Clause -- Present in both N_Use_Package_Clause and N_Use_Type_Clause. Used in diff --git a/gcc/ada/treepr.adb b/gcc/ada/treepr.adb index ff4ff84..054d06c 100644 --- a/gcc/ada/treepr.adb +++ b/gcc/ada/treepr.adb @@ -721,6 +721,12 @@ package body Treepr is function Get_Uint is new Get_32_Bit_Field_With_Default (Uint, Uint_0) with Inline; + function Get_Valid_Uint is new Get_32_Bit_Field + (Uint) with Inline; + -- Used for both Valid_Uint and other subtypes of Uint. Note that we don't + -- instantiate Get_Valid_32_Bit_Field; we don't want to blow up if the + -- value is wrong. + function Get_Ureal is new Get_32_Bit_Field (Ureal) with Inline; @@ -893,13 +899,36 @@ package body Treepr is Val : constant Uint := Get_Uint (N, FD.Offset); function Cast is new Unchecked_Conversion (Uint, Int); begin - if Val /= No_Uint then - Print_Initial; - UI_Write (Val, Format); - Write_Str (" (Uint = "); - Write_Int (Cast (Val)); - Write_Char (')'); - end if; + -- Do this even if Val = No_Uint, because Uint fields default + -- to Uint_0. + + Print_Initial; + UI_Write (Val, Format); + Write_Str (" (Uint = "); + Write_Int (Cast (Val)); + Write_Char (')'); + end; + + when Valid_Uint_Field | Unat_Field | Upos_Field + | Nonzero_Uint_Field => + declare + Val : constant Uint := Get_Valid_Uint (N, FD.Offset); + function Cast is new Unchecked_Conversion (Uint, Int); + begin + Print_Initial; + UI_Write (Val, Format); + + case FD.Kind is + when Valid_Uint_Field => Write_Str (" v"); + when Unat_Field => Write_Str (" n"); + when Upos_Field => Write_Str (" p"); + when Nonzero_Uint_Field => Write_Str (" nz"); + when others => raise Program_Error; + end case; + + Write_Str (" (Uint = "); + Write_Int (Cast (Val)); + Write_Char (')'); end; when Ureal_Field => diff --git a/gcc/ada/types.h b/gcc/ada/types.h index ac30db3..2806e50 100644 --- a/gcc/ada/types.h +++ b/gcc/ada/types.h @@ -261,6 +261,10 @@ typedef Int String_Id; /* Type used for representation of universal integers. */ typedef Int Uint; +typedef Int Valid_Uint; +typedef Int Unat; +typedef Int Upos; +typedef Int Nonzero_Uint; /* Used to indicate missing Uint value. */ #define No_Uint Uint_Low_Bound diff --git a/gcc/ada/uintp.ads b/gcc/ada/uintp.ads index 607e7ef..b2f2315 100644 --- a/gcc/ada/uintp.ads +++ b/gcc/ada/uintp.ads @@ -90,6 +90,11 @@ package Uintp is Uint_Minus_127 : constant Uint; Uint_Minus_128 : constant Uint; + subtype Valid_Uint is Uint with Predicate => Valid_Uint /= No_Uint; + subtype Unat is Valid_Uint with Predicate => Unat >= Uint_0; + subtype Upos is Valid_Uint with Predicate => Upos >= Uint_0; + subtype Nonzero_Uint is Valid_Uint with Predicate => Nonzero_Uint /= Uint_0; + type UI_Vector is array (Pos range <>) of Int; -- Vector containing the integer values of a Uint value -- cgit v1.1 From 3ccd5d7192603e0ed6d0020658291b7c96f5651b Mon Sep 17 00:00:00 2001 From: Eric Botcazou Date: Tue, 27 Apr 2021 21:18:12 +0200 Subject: [Ada] Implement support for unconstrained array types with FLB gcc/ada/ * gcc-interface/decl.c (gnat_to_gnu_entity) : Use a fixed lower bound if the index subtype is marked so, as well as a more efficient formula for the upper bound if the array cannot be superflat. (flb_cannot_be_superflat): New predicate. (cannot_be_superflat): Rename into... (range_cannot_be_superfla): ...this. Minor tweak. --- gcc/ada/gcc-interface/decl.c | 112 +++++++++++++++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 24 deletions(-) (limited to 'gcc') diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c index 83ca31a..8eb1e30 100644 --- a/gcc/ada/gcc-interface/decl.c +++ b/gcc/ada/gcc-interface/decl.c @@ -217,7 +217,8 @@ static void set_reverse_storage_order_on_array_type (tree); static bool same_discriminant_p (Entity_Id, Entity_Id); static bool array_type_has_nonaliased_component (tree, Entity_Id); static bool compile_time_known_address_p (Node_Id); -static bool cannot_be_superflat (Node_Id); +static bool flb_cannot_be_superflat (Node_Id); +static bool range_cannot_be_superflat (Node_Id); static bool constructor_address_p (tree); static bool allocatable_size_p (tree, bool); static bool initial_value_needs_conversion (tree, tree); @@ -2238,13 +2239,15 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) index += (convention_fortran_p ? - 1 : 1), gnat_index = Next_Index (gnat_index)) { - char field_name[16]; + const bool is_flb + = Is_Fixed_Lower_Bound_Index_Subtype (Etype (gnat_index)); tree gnu_index_type = get_unpadded_type (Etype (gnat_index)); tree gnu_orig_min = TYPE_MIN_VALUE (gnu_index_type); tree gnu_orig_max = TYPE_MAX_VALUE (gnu_index_type); tree gnu_index_base_type = get_base_type (gnu_index_type); tree gnu_lb_field, gnu_hb_field; tree gnu_min, gnu_max, gnu_high; + char field_name[16]; /* Update the maximum size of the array in elements. */ if (gnu_max_size) @@ -2278,25 +2281,38 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) /* We can't use build_component_ref here since the template type isn't complete yet. */ - gnu_orig_min = build3 (COMPONENT_REF, TREE_TYPE (gnu_lb_field), - gnu_template_reference, gnu_lb_field, - NULL_TREE); + if (!is_flb) + { + gnu_orig_min = build3 (COMPONENT_REF, TREE_TYPE (gnu_lb_field), + gnu_template_reference, gnu_lb_field, + NULL_TREE); + TREE_READONLY (gnu_orig_min) = 1; + } + gnu_orig_max = build3 (COMPONENT_REF, TREE_TYPE (gnu_hb_field), gnu_template_reference, gnu_hb_field, NULL_TREE); - TREE_READONLY (gnu_orig_min) = TREE_READONLY (gnu_orig_max) = 1; + TREE_READONLY (gnu_orig_max) = 1; gnu_min = convert (sizetype, gnu_orig_min); gnu_max = convert (sizetype, gnu_orig_max); /* Compute the size of this dimension. See the E_Array_Subtype case below for the rationale. */ - gnu_high - = build3 (COND_EXPR, sizetype, - build2 (GE_EXPR, boolean_type_node, - gnu_orig_max, gnu_orig_min), - gnu_max, - size_binop (MINUS_EXPR, gnu_min, size_one_node)); + if (is_flb + && Nkind (gnat_index) == N_Subtype_Indication + && flb_cannot_be_superflat (gnat_index)) + gnu_high = gnu_max; + + else + gnu_high + = build3 (COND_EXPR, sizetype, + build2 (GE_EXPR, boolean_type_node, + gnu_orig_max, gnu_orig_min), + gnu_max, + TREE_CODE (gnu_min) == INTEGER_CST + ? int_const_binop (MINUS_EXPR, gnu_min, size_one_node) + : size_binop (MINUS_EXPR, gnu_min, size_one_node)); /* Make a range type with the new range in the Ada base type. Then make an index type with the size range in sizetype. */ @@ -2595,7 +2611,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) this. If we can prove that the array can never be superflat, we can just use the high bound of the index type. */ else if ((Nkind (gnat_index) == N_Range - && cannot_be_superflat (gnat_index)) + && range_cannot_be_superflat (gnat_index)) /* Bit-Packed Array Impl. Types are never superflat. */ || (Is_Packed_Array_Impl_Type (gnat_entity) && Is_Bit_Packed_Array @@ -6414,33 +6430,81 @@ compile_time_known_address_p (Node_Id gnat_address) return Compile_Time_Known_Value (gnat_address); } +/* Return true if GNAT_INDIC, a N_Subtype_Indication node for the index of a + FLB, cannot yield superflat objects, i.e. if the inequality HB >= LB - 1 + is true for these objects. LB and HB are the low and high bounds. */ + +static bool +flb_cannot_be_superflat (Node_Id gnat_indic) +{ + const Entity_Id gnat_type = Entity (Subtype_Mark (gnat_indic)); + const Entity_Id gnat_subtype = Etype (gnat_indic); + Node_Id gnat_scalar_range, gnat_lb, gnat_hb; + tree gnu_lb, gnu_hb, gnu_lb_minus_one; + + /* This is a FLB so LB is fixed. */ + if ((Ekind (gnat_subtype) == E_Signed_Integer_Subtype + || Ekind (gnat_subtype) == E_Modular_Integer_Subtype) + && (gnat_scalar_range = Scalar_Range (gnat_subtype))) + { + gnat_lb = Low_Bound (gnat_scalar_range); + gcc_assert (Nkind (gnat_lb) == N_Integer_Literal); + } + else + return false; + + /* The low bound of the type is a lower bound for HB. */ + if ((Ekind (gnat_type) == E_Signed_Integer_Subtype + || Ekind (gnat_type) == E_Modular_Integer_Subtype) + && (gnat_scalar_range = Scalar_Range (gnat_type))) + { + gnat_hb = Low_Bound (gnat_scalar_range); + gcc_assert (Nkind (gnat_hb) == N_Integer_Literal); + } + else + return false; + + /* We need at least a signed 64-bit type to catch most cases. */ + gnu_lb = UI_To_gnu (Intval (gnat_lb), sbitsizetype); + gnu_hb = UI_To_gnu (Intval (gnat_hb), sbitsizetype); + if (TREE_OVERFLOW (gnu_lb) || TREE_OVERFLOW (gnu_hb)) + return false; + + /* If the low bound is the smallest integer, nothing can be smaller. */ + gnu_lb_minus_one = size_binop (MINUS_EXPR, gnu_lb, sbitsize_one_node); + if (TREE_OVERFLOW (gnu_lb_minus_one)) + return true; + + return !tree_int_cst_lt (gnu_hb, gnu_lb_minus_one); +} + /* Return true if GNAT_RANGE, a N_Range node, cannot be superflat, i.e. if the - inequality HB >= LB-1 is true. LB and HB are the low and high bounds. */ + inequality HB >= LB - 1 is true. LB and HB are the low and high bounds. */ static bool -cannot_be_superflat (Node_Id gnat_range) +range_cannot_be_superflat (Node_Id gnat_range) { Node_Id gnat_lb = Low_Bound (gnat_range), gnat_hb = High_Bound (gnat_range); - Node_Id scalar_range; + Node_Id gnat_scalar_range; tree gnu_lb, gnu_hb, gnu_lb_minus_one; /* If the low bound is not constant, try to find an upper bound. */ while (Nkind (gnat_lb) != N_Integer_Literal && (Ekind (Etype (gnat_lb)) == E_Signed_Integer_Subtype || Ekind (Etype (gnat_lb)) == E_Modular_Integer_Subtype) - && (scalar_range = Scalar_Range (Etype (gnat_lb))) - && (Nkind (scalar_range) == N_Signed_Integer_Type_Definition - || Nkind (scalar_range) == N_Range)) - gnat_lb = High_Bound (scalar_range); + && (gnat_scalar_range = Scalar_Range (Etype (gnat_lb))) + && (Nkind (gnat_scalar_range) == N_Signed_Integer_Type_Definition + || Nkind (gnat_scalar_range) == N_Range)) + gnat_lb = High_Bound (gnat_scalar_range); /* If the high bound is not constant, try to find a lower bound. */ while (Nkind (gnat_hb) != N_Integer_Literal && (Ekind (Etype (gnat_hb)) == E_Signed_Integer_Subtype || Ekind (Etype (gnat_hb)) == E_Modular_Integer_Subtype) - && (scalar_range = Scalar_Range (Etype (gnat_hb))) - && (Nkind (scalar_range) == N_Signed_Integer_Type_Definition - || Nkind (scalar_range) == N_Range)) - gnat_hb = Low_Bound (scalar_range); + && (gnat_scalar_range = Scalar_Range (Etype (gnat_hb))) + && (Nkind (gnat_scalar_range) == N_Signed_Integer_Type_Definition + || Nkind (gnat_scalar_range) == N_Range)) + gnat_hb = Low_Bound (gnat_scalar_range); /* If we have failed to find constant bounds, punt. */ if (Nkind (gnat_lb) != N_Integer_Literal -- cgit v1.1 From 58d32c72ca0156b0267a9b36b91b414cab8978f2 Mon Sep 17 00:00:00 2001 From: Eric Botcazou Date: Fri, 4 Jun 2021 18:22:17 +0200 Subject: [Ada] Use GNAT encodings only when -fgnat-encodings=all is specified gcc/ada/ * gcc-interface/decl.c (gnat_to_gnu_entity) : Add a parallel type only when -fgnat-encodings=all is specified. : Use the PAT name and special suffixes only when -fgnat-encodings=all is specified. : Build a special type for debugging purposes only when -fgnat-encodings=all is specified. Add a parallel type or use the PAT name only when -fgnat-encodings=all is specified. : Generate debug info for the inner record types only when -fgnat-encodings=all is specified. : Use a debug type for an artificial subtype only except when -fgnat-encodings=all is specified. (elaborate_expression_1): Reset need_for_debug when possible only except when -fgnat-encodings=all is specified. (components_to_record): Use XV encodings for variable size only when -fgnat-encodings=all is specified. (associate_original_type_to_packed_array): Add a parallel type only when -fgnat-encodings=all is specified. * gcc-interface/misc.c (gnat_get_array_descr_info): Do not return full information only when -fgnat-encodings=all is specified. * gcc-interface/utils.c (make_packable_type): Add a parallel type only when -fgnat-encodings=all is specified. (maybe_pad_type): Make the inner type a debug type only except when -fgnat-encodings=all is specified. Create an XVS type for variable size only when -fgnat-encodings=all is specified. (rest_of_record_type_compilation): Add a parallel type only when -fgnat-encodings=all is specified. --- gcc/ada/gcc-interface/decl.c | 54 +++++++++++++++++++++---------------------- gcc/ada/gcc-interface/misc.c | 6 ++--- gcc/ada/gcc-interface/utils.c | 8 +++---- 3 files changed, 33 insertions(+), 35 deletions(-) (limited to 'gcc') diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c index 8eb1e30..4b6479b 100644 --- a/gcc/ada/gcc-interface/decl.c +++ b/gcc/ada/gcc-interface/decl.c @@ -1998,10 +1998,10 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) so we use an intermediate step for standard DWARF. */ if (debug_info_p) { - if (gnat_encodings == DWARF_GNAT_ENCODINGS_MINIMAL) - SET_TYPE_DEBUG_TYPE (gnu_type, gnu_field_type); - else if (DECL_PARALLEL_TYPE (t)) + if (gnat_encodings == DWARF_GNAT_ENCODINGS_ALL) add_parallel_type (gnu_type, DECL_PARALLEL_TYPE (t)); + else + SET_TYPE_DEBUG_TYPE (gnu_type, gnu_field_type); } } @@ -2210,11 +2210,11 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) implementation type. But, in any case, mark it as artificial so the debugger can skip it. */ const Entity_Id gnat_name - = Present (PAT) && gnat_encodings != DWARF_GNAT_ENCODINGS_MINIMAL + = Present (PAT) && gnat_encodings == DWARF_GNAT_ENCODINGS_ALL ? PAT : gnat_entity; tree xup_name - = gnat_encodings != DWARF_GNAT_ENCODINGS_MINIMAL + = gnat_encodings == DWARF_GNAT_ENCODINGS_ALL ? create_concat_name (gnat_name, "XUP") : gnu_entity_name; create_type_decl (xup_name, gnu_fat_type, true, debug_info_p, @@ -2420,11 +2420,9 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) template at a negative offset, but this was somewhat of a kludge; we now shift thin pointer values explicitly but only those which have a TYPE_UNCONSTRAINED_ARRAY attached to the designated RECORD_TYPE. - Note that GDB can handle standard DWARF information for them, so we - don't have to name them as a GNAT encoding, except if specifically - asked to. */ + If the GNAT encodings are used, give it a name. */ tree xut_name - = (gnat_encodings != DWARF_GNAT_ENCODINGS_MINIMAL) + = (gnat_encodings == DWARF_GNAT_ENCODINGS_ALL) ? create_concat_name (gnat_name, "XUT") : gnu_entity_name; obj = build_unc_object_type (gnu_template_type, tem, xut_name, @@ -2673,7 +2671,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) && TREE_CODE (TREE_TYPE (gnu_index_type)) != INTEGER_TYPE) || TYPE_BIASED_REPRESENTATION_P (gnu_index_type)) - && gnat_encodings != DWARF_GNAT_ENCODINGS_MINIMAL) + && gnat_encodings == DWARF_GNAT_ENCODINGS_ALL) need_index_type_struct = true; } @@ -2850,7 +2848,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) gnu_entity_name = gnu_name; } - else if (gnat_encodings != DWARF_GNAT_ENCODINGS_MINIMAL) + else if (gnat_encodings == DWARF_GNAT_ENCODINGS_ALL) { tree gnu_base_decl = gnat_to_gnu_entity (Etype (gnat_entity), NULL_TREE, @@ -2897,7 +2895,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) save_gnu_tree (gnat_entity, NULL_TREE, false); /* Set the ___XP suffix for GNAT encodings. */ - if (gnat_encodings != DWARF_GNAT_ENCODINGS_MINIMAL) + if (gnat_encodings == DWARF_GNAT_ENCODINGS_ALL) gnu_entity_name = DECL_NAME (TYPE_NAME (gnu_type)); tree gnu_inner = gnu_type; @@ -3372,14 +3370,14 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) = build_subst_list (gnat_entity, gnat_parent_type, definition); /* Set the layout of the type to match that of the parent type, - doing required substitutions. If we are in minimal GNAT - encodings mode, we don't need debug info for the inner record + doing required substitutions. Note that, if we do not use the + GNAT encodings, we don't need debug info for the inner record types, as they will be part of the embedding variant record's debug info. */ copy_and_substitute_in_layout (gnat_entity, gnat_parent_type, gnu_type, gnu_parent_type, gnu_subst_list, - debug_info_p && gnat_encodings != DWARF_GNAT_ENCODINGS_MINIMAL); + debug_info_p && gnat_encodings == DWARF_GNAT_ENCODINGS_ALL); } else { @@ -3518,11 +3516,11 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) annotate_rep (gnat_entity, gnu_type); /* If debugging information is being written for the type and if - we are asked to output such encodings, write a record that + we are asked to output GNAT encodings, write a record that shows what we are a subtype of and also make a variable that indicates our size, if still variable. */ if (debug_info_p - && gnat_encodings != DWARF_GNAT_ENCODINGS_MINIMAL) + && gnat_encodings == DWARF_GNAT_ENCODINGS_ALL) { tree gnu_subtype_marker = make_node (RECORD_TYPE); tree gnu_unpad_base_name @@ -3553,11 +3551,11 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) true, true, NULL, gnat_entity, false); } - /* Or else, if the subtype is artificial and encodings are not - used, use the base record type as the debug type. */ + /* Or else, if the subtype is artificial and GNAT encodings are + not used, use the base record type as the debug type. */ else if (debug_info_p && artificial_p - && gnat_encodings == DWARF_GNAT_ENCODINGS_MINIMAL) + && gnat_encodings != DWARF_GNAT_ENCODINGS_ALL) SET_TYPE_DEBUG_TYPE (gnu_type, gnu_unpad_base_type); } @@ -6892,7 +6890,7 @@ elaborate_expression_1 (tree gnu_expr, Entity_Id gnat_entity, const char *s, we must be careful because we do not generate debug info for external variables so DECL_IGNORED_P is not stable across units. */ if (need_for_debug - && gnat_encodings == DWARF_GNAT_ENCODINGS_MINIMAL + && gnat_encodings != DWARF_GNAT_ENCODINGS_ALL && (TREE_CONSTANT (gnu_expr) || (!expr_public_p && DECL_P (gnu_expr) @@ -7777,7 +7775,7 @@ components_to_record (Node_Id gnat_component_list, Entity_Id gnat_record_type, tree *p_gnu_rep_list) { const bool needs_xv_encodings - = debug_info && gnat_encodings != DWARF_GNAT_ENCODINGS_MINIMAL; + = debug_info && gnat_encodings == DWARF_GNAT_ENCODINGS_ALL; bool all_rep_and_size = all_rep && TYPE_SIZE (gnu_record_type); bool variants_have_rep = all_rep; bool layout_with_rep = false; @@ -10241,7 +10239,12 @@ associate_original_type_to_packed_array (tree gnu_type, Entity_Id gnat_entity) gcc_assert (TYPE_IMPL_PACKED_ARRAY_P (gnu_type)); - if (gnat_encodings == DWARF_GNAT_ENCODINGS_MINIMAL) + if (gnat_encodings == DWARF_GNAT_ENCODINGS_ALL) + { + add_parallel_type (gnu_type, gnu_original_array_type); + return NULL_TREE; + } + else { SET_TYPE_ORIGINAL_PACKED_ARRAY (gnu_type, gnu_original_array_type); @@ -10250,11 +10253,6 @@ associate_original_type_to_packed_array (tree gnu_type, Entity_Id gnat_entity) original_name = DECL_NAME (original_name); return original_name; } - else - { - add_parallel_type (gnu_type, gnu_original_array_type); - return NULL_TREE; - } } /* Given a type T, a FIELD_DECL F, and a replacement value R, return an diff --git a/gcc/ada/gcc-interface/misc.c b/gcc/ada/gcc-interface/misc.c index 72a2624..186367a 100644 --- a/gcc/ada/gcc-interface/misc.c +++ b/gcc/ada/gcc-interface/misc.c @@ -806,7 +806,7 @@ gnat_get_array_descr_info (const_tree const_type, /* As well as array types embedded in a record type with their bounds. */ else if (TREE_CODE (type) == RECORD_TYPE && TYPE_CONTAINS_TEMPLATE_P (type) - && gnat_encodings == DWARF_GNAT_ENCODINGS_MINIMAL) + && gnat_encodings != DWARF_GNAT_ENCODINGS_ALL) { /* This will be our base object address. Note that we assume that pointers to this will actually point to the array field (thin @@ -901,7 +901,7 @@ gnat_get_array_descr_info (const_tree const_type, if (TYPE_CONTEXT (first_dimen) && TREE_CODE (TYPE_CONTEXT (first_dimen)) != RECORD_TYPE && CONTAINS_PLACEHOLDER_P (TYPE_MIN_VALUE (index_type)) - && gnat_encodings != DWARF_GNAT_ENCODINGS_MINIMAL) + && gnat_encodings == DWARF_GNAT_ENCODINGS_ALL) { info->dimen[i].lower_bound = NULL_TREE; info->dimen[i].upper_bound = NULL_TREE; @@ -943,7 +943,7 @@ gnat_get_array_descr_info (const_tree const_type, info->associated = NULL_TREE; info->data_location = NULL_TREE; - if (gnat_encodings == DWARF_GNAT_ENCODINGS_MINIMAL) + if (gnat_encodings != DWARF_GNAT_ENCODINGS_ALL) { /* When arrays contain dynamically-sized elements, we usually wrap them in padding types, or we create constrained types for them. Then, if diff --git a/gcc/ada/gcc-interface/utils.c b/gcc/ada/gcc-interface/utils.c index 535f4ca..846d20a 100644 --- a/gcc/ada/gcc-interface/utils.c +++ b/gcc/ada/gcc-interface/utils.c @@ -1277,7 +1277,7 @@ make_packable_type (tree type, bool in_record, unsigned int max_align) finish_record_type (new_type, nreverse (new_field_list), 2, false); relate_alias_sets (new_type, type, ALIAS_SET_COPY); - if (gnat_encodings == DWARF_GNAT_ENCODINGS_MINIMAL) + if (gnat_encodings != DWARF_GNAT_ENCODINGS_ALL) SET_TYPE_DEBUG_TYPE (new_type, TYPE_DEBUG_TYPE (type)); else if (TYPE_STUB_DECL (type)) SET_DECL_PARALLEL_TYPE (TYPE_STUB_DECL (new_type), @@ -1610,7 +1610,7 @@ maybe_pad_type (tree type, tree size, unsigned int align, } /* Make the inner type the debug type of the padded type. */ - if (gnat_encodings == DWARF_GNAT_ENCODINGS_MINIMAL) + if (gnat_encodings != DWARF_GNAT_ENCODINGS_ALL) SET_TYPE_DEBUG_TYPE (record, maybe_debug_type (type)); /* Unless debugging information isn't being written for the input type, @@ -1645,7 +1645,7 @@ maybe_pad_type (tree type, tree size, unsigned int align, /* There is no need to show what we are a subtype of when outputting as few encodings as possible: regular debugging infomation makes this redundant. */ - if (gnat_encodings != DWARF_GNAT_ENCODINGS_MINIMAL) + if (gnat_encodings == DWARF_GNAT_ENCODINGS_ALL) { tree marker = make_node (RECORD_TYPE); tree orig_name = TYPE_IDENTIFIER (type); @@ -2274,7 +2274,7 @@ rest_of_record_type_compilation (tree record_type) /* If this record type is of variable size, make a parallel record type that will tell the debugger how the former is laid out (see exp_dbug.ads). */ - if (var_size && gnat_encodings != DWARF_GNAT_ENCODINGS_MINIMAL) + if (var_size && gnat_encodings == DWARF_GNAT_ENCODINGS_ALL) { tree new_record_type = make_node (TREE_CODE (record_type) == QUAL_UNION_TYPE -- cgit v1.1 From 6bebd55e12375b397ed187630bb57d58611dfc5f Mon Sep 17 00:00:00 2001 From: Pierre-Marie de Rodat Date: Fri, 25 Jun 2021 09:22:19 +0000 Subject: [Ada] adaint.c minor reformatting gcc/ada/ * adaint.c (__gnat_number_of_cpus): Replace "#ifdef" by "#if defined". --- gcc/ada/adaint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ada/adaint.c b/gcc/ada/adaint.c index 26be260..06a4895 100644 --- a/gcc/ada/adaint.c +++ b/gcc/ada/adaint.c @@ -2485,7 +2485,7 @@ __gnat_number_of_cpus (void) { int cores = 1; -#ifdef _SC_NPROCESSORS_ONLN +#if defined (_SC_NPROCESSORS_ONLN) cores = (int) sysconf (_SC_NPROCESSORS_ONLN); #elif defined (__QNX__) -- cgit v1.1 From fe610051a803131822bd02a8842a67b573b8e46a Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 12 Jul 2021 16:34:41 +0200 Subject: Change the type of memory classification functions to bool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2021-07-12 Uroš Bizjak gcc/ * recog.c (memory_address_addr_space_p): Change the type to bool. Return true/false instead of 1/0. (offsettable_memref_p): Ditto. (offsettable_nonstrict_memref_p): Ditto. (offsettable_address_addr_space_p): Ditto. Change the type of addressp indirect function to bool. * recog.h (memory_address_addr_space_p): Change the type to bool. (strict_memory_address_addr_space_p): Ditto. (offsettable_memref_p): Ditto. (offsettable_nonstrict_memref_p): Ditto. (offsettable_address_addr_space_p): Ditto. * reload.c (maybe_memory_address_addr_space_p): Ditto. (strict_memory_address_addr_space_p): Change the type to bool. Return true/false instead of 1/0. (maybe_memory_address_addr_space_p): Change the type to bool. --- gcc/recog.c | 36 +++++++++++++++++------------------- gcc/recog.h | 12 ++++++------ gcc/reload.c | 16 ++++++++-------- 3 files changed, 31 insertions(+), 33 deletions(-) (limited to 'gcc') diff --git a/gcc/recog.c b/gcc/recog.c index 2114df8..5a42c45 100644 --- a/gcc/recog.c +++ b/gcc/recog.c @@ -1776,20 +1776,20 @@ pop_operand (rtx op, machine_mode mode) return XEXP (op, 0) == stack_pointer_rtx; } -/* Return 1 if ADDR is a valid memory address +/* Return true if ADDR is a valid memory address for mode MODE in address space AS. */ -int +bool memory_address_addr_space_p (machine_mode mode ATTRIBUTE_UNUSED, rtx addr, addr_space_t as) { #ifdef GO_IF_LEGITIMATE_ADDRESS gcc_assert (ADDR_SPACE_GENERIC_P (as)); GO_IF_LEGITIMATE_ADDRESS (mode, addr, win); - return 0; + return false; win: - return 1; + return true; #else return targetm.addr_space.legitimate_address_p (mode, addr, 0, as); #endif @@ -2361,18 +2361,16 @@ find_constant_term_loc (rtx *p) return 0; } -/* Return 1 if OP is a memory reference - whose address contains no side effects - and remains valid after the addition - of a positive integer less than the - size of the object being referenced. +/* Return true if OP is a memory reference whose address contains + no side effects and remains valid after the addition of a positive + integer less than the size of the object being referenced. We assume that the original address is valid and do not check it. This uses strict_memory_address_p as a subroutine, so don't use it before reload. */ -int +bool offsettable_memref_p (rtx op) { return ((MEM_P (op)) @@ -2383,7 +2381,7 @@ offsettable_memref_p (rtx op) /* Similar, but don't require a strictly valid mem ref: consider pseudo-regs valid as index or base regs. */ -int +bool offsettable_nonstrict_memref_p (rtx op) { return ((MEM_P (op)) @@ -2391,7 +2389,7 @@ offsettable_nonstrict_memref_p (rtx op) MEM_ADDR_SPACE (op))); } -/* Return 1 if Y is a memory address which contains no side effects +/* Return true if Y is a memory address which contains no side effects and would remain valid for address space AS after the addition of a positive integer less than the size of that mode. @@ -2401,7 +2399,7 @@ offsettable_nonstrict_memref_p (rtx op) If STRICTP is nonzero, we require a strictly valid address, for the sake of use in reload.c. */ -int +bool offsettable_address_addr_space_p (int strictp, machine_mode mode, rtx y, addr_space_t as) { @@ -2409,19 +2407,19 @@ offsettable_address_addr_space_p (int strictp, machine_mode mode, rtx y, rtx z; rtx y1 = y; rtx *y2; - int (*addressp) (machine_mode, rtx, addr_space_t) = + bool (*addressp) (machine_mode, rtx, addr_space_t) = (strictp ? strict_memory_address_addr_space_p : memory_address_addr_space_p); poly_int64 mode_sz = GET_MODE_SIZE (mode); if (CONSTANT_ADDRESS_P (y)) - return 1; + return true; /* Adjusting an offsettable address involves changing to a narrower mode. Make sure that's OK. */ if (mode_dependent_address_p (y, as)) - return 0; + return false; machine_mode address_mode = GET_MODE (y); if (address_mode == VOIDmode) @@ -2442,7 +2440,7 @@ offsettable_address_addr_space_p (int strictp, machine_mode mode, rtx y, if ((ycode == PLUS) && (y2 = find_constant_term_loc (&y1))) { - int good; + bool good; y1 = *y2; *y2 = plus_constant (address_mode, *y2, mode_sz - 1); @@ -2456,7 +2454,7 @@ offsettable_address_addr_space_p (int strictp, machine_mode mode, rtx y, } if (GET_RTX_CLASS (ycode) == RTX_AUTOINC) - return 0; + return false; /* The offset added here is chosen as the maximum offset that any instruction could need to add when operating on something @@ -2486,7 +2484,7 @@ offsettable_address_addr_space_p (int strictp, machine_mode mode, rtx y, return (*addressp) (QImode, z, as); } -/* Return 1 if ADDR is an address-expression whose effect depends +/* Return true if ADDR is an address-expression whose effect depends on the mode of the memory reference it is used in. ADDRSPACE is the address space associated with the address. diff --git a/gcc/recog.h b/gcc/recog.h index 653d0b0..1df1a6e 100644 --- a/gcc/recog.h +++ b/gcc/recog.h @@ -200,11 +200,11 @@ extern void temporarily_undo_changes (int); extern void redo_changes (int); extern int constrain_operands (int, alternative_mask); extern int constrain_operands_cached (rtx_insn *, int); -extern int memory_address_addr_space_p (machine_mode, rtx, addr_space_t); +extern bool memory_address_addr_space_p (machine_mode, rtx, addr_space_t); #define memory_address_p(mode,addr) \ memory_address_addr_space_p ((mode), (addr), ADDR_SPACE_GENERIC) -extern int strict_memory_address_addr_space_p (machine_mode, rtx, - addr_space_t); +extern bool strict_memory_address_addr_space_p (machine_mode, rtx, + addr_space_t); #define strict_memory_address_p(mode,addr) \ strict_memory_address_addr_space_p ((mode), (addr), ADDR_SPACE_GENERIC) extern int validate_replace_rtx_subexp (rtx, rtx, rtx_insn *, rtx *); @@ -218,9 +218,9 @@ extern int num_changes_pending (void); extern bool reg_fits_class_p (const_rtx, reg_class_t, int, machine_mode); extern bool valid_insn_p (rtx_insn *); -extern int offsettable_memref_p (rtx); -extern int offsettable_nonstrict_memref_p (rtx); -extern int offsettable_address_addr_space_p (int, machine_mode, rtx, +extern bool offsettable_memref_p (rtx); +extern bool offsettable_nonstrict_memref_p (rtx); +extern bool offsettable_address_addr_space_p (int, machine_mode, rtx, addr_space_t); #define offsettable_address_p(strict,mode,addr) \ offsettable_address_addr_space_p ((strict), (mode), (addr), \ diff --git a/gcc/reload.c b/gcc/reload.c index d21be91..4c55ca5 100644 --- a/gcc/reload.c +++ b/gcc/reload.c @@ -262,8 +262,8 @@ static bool alternative_allows_const_pool_ref (rtx, const char *, int); static rtx find_reloads_toplev (rtx, int, enum reload_type, int, int, rtx_insn *, int *); static rtx make_memloc (rtx, int); -static int maybe_memory_address_addr_space_p (machine_mode, rtx, - addr_space_t, rtx *); +static bool maybe_memory_address_addr_space_p (machine_mode, rtx, + addr_space_t, rtx *); static int find_reloads_address (machine_mode, rtx *, rtx, rtx *, int, enum reload_type, int, rtx_insn *); static rtx subst_reg_equivs (rtx, rtx_insn *); @@ -2156,21 +2156,21 @@ hard_reg_set_here_p (unsigned int beg_regno, unsigned int end_regno, rtx x) return 0; } -/* Return 1 if ADDR is a valid memory address for mode MODE +/* Return true if ADDR is a valid memory address for mode MODE in address space AS, and check that each pseudo reg has the proper kind of hard reg. */ -int +bool strict_memory_address_addr_space_p (machine_mode mode ATTRIBUTE_UNUSED, rtx addr, addr_space_t as) { #ifdef GO_IF_LEGITIMATE_ADDRESS gcc_assert (ADDR_SPACE_GENERIC_P (as)); GO_IF_LEGITIMATE_ADDRESS (mode, addr, win); - return 0; + return false; win: - return 1; + return true; #else return targetm.addr_space.legitimate_address_p (mode, addr, 1, as); #endif @@ -4829,11 +4829,11 @@ make_memloc (rtx ad, int regno) to mode MODE in address space AS by reloading the part pointed to by PART into a register. */ -static int +static bool maybe_memory_address_addr_space_p (machine_mode mode, rtx ad, addr_space_t as, rtx *part) { - int retv; + bool retv; rtx tem = *part; rtx reg = gen_rtx_REG (GET_MODE (tem), max_reg_num ()); -- cgit v1.1 From fedcf3c476aff7533741a1c61071200f0a38cf83 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 8 Jul 2021 09:52:49 +0200 Subject: tree-optimization/101373 - avoid PRE across externally throwing call PRE already tries to avoid hoisting possibly trapping expressions across calls that might not return normally but fails to consider const calls that throw externally. The following fixes that and also plugs the hole of trapping references not pruned in case they are not catched by the actuall call clobbering it. At -Os we hit the same issue in RTL PRE and postreload-gcse has even more incomplete checks so the patch adjusts both of those as well. 2021-07-08 Richard Biener PR tree-optimization/101373 * tree-ssa-pre.c (prune_clobbered_mems): Also prune trapping references when the BB may not return. (compute_avail): Pass in the function we're working on and replace cfun references with it. Externally throwing const calls also possibly terminate the function. (pass_pre::execute): Pass down the function we're working on. * gcse.c (compute_hash_table_work): Externally throwing const/pure calls also need record_last_mem_set_info. * postreload-gcse.c (record_opr_changes): Looping or externally throwing const/pure calls also need record_last_mem_set_info. * g++.dg/torture/pr101373.C: New testcase, XFAILed. * gnat.dg/opt95.adb: Likewise. --- gcc/gcse.c | 3 ++- gcc/postreload-gcse.c | 4 +++- gcc/testsuite/g++.dg/torture/pr101373.C | 33 +++++++++++++++++++++++++++ gcc/testsuite/gnat.dg/opt95.adb | 40 +++++++++++++++++++++++++++++++++ gcc/tree-ssa-pre.c | 34 +++++++++++++++++----------- 5 files changed, 99 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/g++.dg/torture/pr101373.C create mode 100644 gcc/testsuite/gnat.dg/opt95.adb (limited to 'gcc') diff --git a/gcc/gcse.c b/gcc/gcse.c index ecf7e51..ccd3366 100644 --- a/gcc/gcse.c +++ b/gcc/gcse.c @@ -1537,7 +1537,8 @@ compute_hash_table_work (struct gcse_hash_table_d *table) record_last_reg_set_info (insn, regno); if (! RTL_CONST_OR_PURE_CALL_P (insn) - || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)) + || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn) + || can_throw_external (insn)) record_last_mem_set_info (insn); } diff --git a/gcc/postreload-gcse.c b/gcc/postreload-gcse.c index 0b28247..6c95d09 100644 --- a/gcc/postreload-gcse.c +++ b/gcc/postreload-gcse.c @@ -779,7 +779,9 @@ record_opr_changes (rtx_insn *insn) EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, regno, hrsi) record_last_reg_set_info_regno (insn, regno); - if (! RTL_CONST_OR_PURE_CALL_P (insn)) + if (! RTL_CONST_OR_PURE_CALL_P (insn) + || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn) + || can_throw_external (insn)) record_last_mem_set_info (insn); } } diff --git a/gcc/testsuite/g++.dg/torture/pr101373.C b/gcc/testsuite/g++.dg/torture/pr101373.C new file mode 100644 index 0000000..f8c8097 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr101373.C @@ -0,0 +1,33 @@ +// { dg-do run } +// { dg-xfail-run-if "PR100409" { *-*-* } } + +int __attribute__((const,noipa)) foo (int j) +{ + if (j != 0) + throw 1; + return 0; +} + +int __attribute__((noipa)) bar (int *p, int n) +{ + int ret = 0; + if (n) + { + foo (n); + ret = *p; + } + ret += *p; + return ret; +} + +int main() +{ + try + { + return bar (nullptr, 1); + } + catch (...) + { + return 0; + } +} diff --git a/gcc/testsuite/gnat.dg/opt95.adb b/gcc/testsuite/gnat.dg/opt95.adb new file mode 100644 index 0000000..2c72582 --- /dev/null +++ b/gcc/testsuite/gnat.dg/opt95.adb @@ -0,0 +1,40 @@ +-- { dg-do run } +-- { dg-options "-O2 -gnatp" } + +procedure Opt95 is + + function Foo (J : Integer) return Integer; + pragma Pure_Function (Foo); + pragma Machine_Attribute (Foo, "noipa"); + + function Foo (J : Integer) return Integer is + begin + if J /= 0 then + raise Constraint_Error; + end if; + return 0; + end; + + function Bar (A : access Integer; N : Integer) return Integer; + pragma Machine_Attribute (Bar, "noipa"); + + function Bar (A : access Integer; N : Integer) return Integer is + Ret : Integer := 0; + Ret2 : Integer := 0; + begin + if N /= 0 then + Ret2 := Foo (N); + Ret := A.all; + end if; + Ret := Ret + A.all; + return Ret + Ret2; + end; + + V : Integer; + pragma Volatile (V); + +begin + V := Bar (null, 1); +exception + when Constraint_Error => null; +end; diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c index 69141c2..aa5244e 100644 --- a/gcc/tree-ssa-pre.c +++ b/gcc/tree-ssa-pre.c @@ -2071,6 +2071,13 @@ prune_clobbered_mems (bitmap_set_t set, basic_block block) && value_dies_in_block_x (expr, block)))) to_remove = i; } + /* If the REFERENCE may trap make sure the block does not contain + a possible exit point. + ??? This is overly conservative if we translate AVAIL_OUT + as the available expression might be after the exit point. */ + if (BB_MAY_NOTRETURN (block) + && vn_reference_may_trap (ref)) + to_remove = i; } else if (expr->kind == NARY) { @@ -3860,7 +3867,7 @@ insert (void) AVAIL_OUT[BLOCK] = AVAIL_IN[BLOCK] U PHI_GEN[BLOCK] U TMP_GEN[BLOCK]. */ static void -compute_avail (void) +compute_avail (function *fun) { basic_block block, son; @@ -3871,7 +3878,7 @@ compute_avail (void) /* We pretend that default definitions are defined in the entry block. This includes function arguments and the static chain decl. */ - FOR_EACH_SSA_NAME (i, name, cfun) + FOR_EACH_SSA_NAME (i, name, fun) { pre_expr e; if (!SSA_NAME_IS_DEFAULT_DEF (name) @@ -3881,31 +3888,31 @@ compute_avail (void) e = get_or_alloc_expr_for_name (name); add_to_value (get_expr_value_id (e), e); - bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (cfun)), e); - bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)), + bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (fun)), e); + bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (fun)), e); } if (dump_file && (dump_flags & TDF_DETAILS)) { - print_bitmap_set (dump_file, TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (cfun)), + print_bitmap_set (dump_file, TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (fun)), "tmp_gen", ENTRY_BLOCK); - print_bitmap_set (dump_file, AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)), + print_bitmap_set (dump_file, AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (fun)), "avail_out", ENTRY_BLOCK); } /* Allocate the worklist. */ - worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); + worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); /* Seed the algorithm by putting the dominator children of the entry block on the worklist. */ - for (son = first_dom_son (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun)); + for (son = first_dom_son (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (fun)); son; son = next_dom_son (CDI_DOMINATORS, son)) worklist[sp++] = son; - BB_LIVE_VOP_ON_EXIT (ENTRY_BLOCK_PTR_FOR_FN (cfun)) - = ssa_default_def (cfun, gimple_vop (cfun)); + BB_LIVE_VOP_ON_EXIT (ENTRY_BLOCK_PTR_FOR_FN (fun)) + = ssa_default_def (fun, gimple_vop (fun)); /* Loop until the worklist is empty. */ while (sp) @@ -3970,7 +3977,8 @@ compute_avail (void) before it. */ int flags = gimple_call_flags (stmt); if (!(flags & ECF_CONST) - || (flags & ECF_LOOPING_CONST_OR_PURE)) + || (flags & ECF_LOOPING_CONST_OR_PURE) + || stmt_can_throw_external (fun, stmt)) BB_MAY_NOTRETURN (block) = 1; } @@ -3987,7 +3995,7 @@ compute_avail (void) BB_LIVE_VOP_ON_EXIT (block) = gimple_vdef (stmt); if (gimple_has_side_effects (stmt) - || stmt_could_throw_p (cfun, stmt) + || stmt_could_throw_p (fun, stmt) || is_gimple_debug (stmt)) continue; @@ -4384,7 +4392,7 @@ pass_pre::execute (function *fun) we require AVAIL. */ if (n_basic_blocks_for_fn (fun) < 4000) { - compute_avail (); + compute_avail (fun); compute_antic (); insert (); } -- cgit v1.1 From 47113773456ade7324c5467511d97f36cced57b4 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 12 Jul 2021 15:13:17 +0200 Subject: produce simple DOT graphs from SLP trees This adds a dot_slp_tree debug function producing a simple DOT graph from a starting node down the graph. There's no fancy direct invocation of dot but the output is directed to a specified file. It re-uses vect_print_slp_tree, naming nodes as their address. 2021-07-12 Richard Biener * dump-context.h (debug_dump_context::debug_dump_context): Add FILE * parameter defaulted to stderr. * dumpfile.c (debug_dump_context::debug_dump_context): Adjust. * tree-vect-slp.c (dot_slp_tree): New functions. --- gcc/dump-context.h | 2 +- gcc/dumpfile.c | 4 ++-- gcc/tree-vect-slp.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/dump-context.h b/gcc/dump-context.h index e8ed374..1a6bf5e 100644 --- a/gcc/dump-context.h +++ b/gcc/dump-context.h @@ -204,7 +204,7 @@ private: class debug_dump_context { public: - debug_dump_context (); + debug_dump_context (FILE *f = stderr); ~debug_dump_context (); private: diff --git a/gcc/dumpfile.c b/gcc/dumpfile.c index 2457df2..8169daf 100644 --- a/gcc/dumpfile.c +++ b/gcc/dumpfile.c @@ -2098,14 +2098,14 @@ enable_rtl_dump_file (void) /* debug_dump_context's ctor. Temporarily override the dump_context (to forcibly enable output to stderr). */ -debug_dump_context::debug_dump_context () +debug_dump_context::debug_dump_context (FILE *f) : m_context (), m_saved (&dump_context::get ()), m_saved_flags (dump_flags), m_saved_pflags (pflags), m_saved_file (dump_file) { - set_dump_file (stderr); + set_dump_file (f); dump_context::s_current = &m_context; pflags = dump_flags = MSG_ALL_KINDS | MSG_ALL_PRIORITIES; dump_context::get ().refresh_dumps_are_enabled (); diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index cd002b3..86fa3c1 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2552,6 +2552,44 @@ debug (slp_tree node) node); } +/* Recursive helper for the dot producer below. */ + +static void +dot_slp_tree (FILE *f, slp_tree node, hash_set &visited) +{ + if (visited.add (node)) + return; + + fprintf (f, "\"%p\" [label=\"", (void *)node); + vect_print_slp_tree (MSG_NOTE, + dump_location_t::from_location_t (UNKNOWN_LOCATION), + node); + fprintf (f, "\"];\n"); + + + for (slp_tree child : SLP_TREE_CHILDREN (node)) + fprintf (f, "\"%p\" -> \"%p\";", (void *)node, (void *)child); + + for (slp_tree child : SLP_TREE_CHILDREN (node)) + dot_slp_tree (f, child, visited); +} + +DEBUG_FUNCTION void +dot_slp_tree (const char *fname, slp_tree node) +{ + FILE *f = fopen (fname, "w"); + fprintf (f, "digraph {\n"); + fflush (f); + { + debug_dump_context ctx (f); + hash_set visited; + dot_slp_tree (f, node, visited); + } + fflush (f); + fprintf (f, "}\n"); + fclose (f); +} + /* Dump a slp tree NODE using flags specified in DUMP_KIND. */ static void -- cgit v1.1 From 3f2338b4706cdc53ab276b9a5fed7f6927404f07 Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Thu, 8 Jul 2021 19:23:35 -0700 Subject: [PHIOPT/MATCH] Remove the statement to move if not used Instead of waiting for DCE to remove the unused statement, and maybe optimize another conditional, it is better if we don't move the statement and have the statement removed. OK? Bootstrapped and tested on x86_64-linux-gnu. Changes from v1: * v2: Change the order of insertation and check to see if the lhs is used rather than see if the lhs was used in the sequence. gcc/ChangeLog: * tree-ssa-phiopt.c (match_simplify_replacement): Move insert of the sequence before the movement of the statement. Check if to see if the statement is used outside of the original phi to see if we should move it. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/pr96928-1.c: Update to similar as pr96928.c. --- gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c | 5 ++++- gcc/tree-ssa-phiopt.c | 13 ++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c index 2e86620..9e505ac 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c @@ -2,7 +2,10 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-phiopt2 -fdump-tree-optimized" } */ /* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */ -/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */ +/* The following check is done at optimized because a ^ (~b) is rewritten as ~(a^b) + and in the case of match.pd optimizing these ?:, the ~ is moved out already + by the time we get to phiopt2. */ +/* { dg-final { scan-tree-dump-times "c_\[0-9]*\\\(D\\\) \\\^" 1 "optimized" } } */ /* { dg-final { scan-tree-dump-times " = ~" 1 "optimized" } } */ /* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */ /* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */ diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c index 7a98b7a..c6adbbd 100644 --- a/gcc/tree-ssa-phiopt.c +++ b/gcc/tree-ssa-phiopt.c @@ -1020,7 +1020,16 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, return false; gsi = gsi_last_bb (cond_bb); - if (stmt_to_move) + /* Insert the sequence generated from gimple_simplify_phiopt. */ + if (seq) + gsi_insert_seq_before (&gsi, seq, GSI_CONTINUE_LINKING); + + /* If there was a statement to move and the result of the statement + is going to be used, move it to right before the original + conditional. */ + if (stmt_to_move + && (gimple_assign_lhs (stmt_to_move) == result + || !has_single_use (gimple_assign_lhs (stmt_to_move)))) { if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -1032,8 +1041,6 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, gsi_move_before (&gsi1, &gsi); reset_flow_sensitive_info (gimple_assign_lhs (stmt_to_move)); } - if (seq) - gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); replace_phi_edge_with_variable (cond_bb, e1, phi, result); -- cgit v1.1 From a1539b797a06e03b08e1f1de28ad0d19a3956616 Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Mon, 12 Jul 2021 11:38:17 -0400 Subject: Do not register a cast as an equivalence. Registering an equivalence between objects of the same size in a cast can cause other relations to be incorrect. gcc/ PR tree-optimization/101335 * range-op.cc (operator_cast::lhs_op1_relation): Delete. gcc/testsuite/ * gcc.dg/tree-ssa/pr101335.c: New. --- gcc/range-op.cc | 25 ------------------------- gcc/testsuite/gcc.dg/tree-ssa/pr101335.c | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr101335.c (limited to 'gcc') diff --git a/gcc/range-op.cc b/gcc/range-op.cc index f8e4c6d..0800046 100644 --- a/gcc/range-op.cc +++ b/gcc/range-op.cc @@ -2159,10 +2159,6 @@ public: const irange &lhs, const irange &op2, relation_kind rel = VREL_NONE) const; - virtual enum tree_code lhs_op1_relation (const irange &lhs, - const irange &op1, - const irange &op2) const; - private: bool truncating_cast_p (const irange &inner, const irange &outer) const; bool inside_domain_p (const wide_int &min, const wide_int &max, @@ -2171,27 +2167,6 @@ private: const irange &outer) const; } op_convert; -// Determine if there is a relationship between LHS and OP1. - -enum tree_code -operator_cast::lhs_op1_relation (const irange &lhs, - const irange &op1, - const irange &op2 ATTRIBUTE_UNUSED) const -{ - if (op1.undefined_p ()) - return VREL_NONE; - // We can't make larger types equivalent to smaller types because we can - // miss sign extensions in a chain of casts. - // u32 = 0xfffff - // s32 = (s32) u32 - // s64 = (s64) s32 - // we cant simply "convert" s64 = (s64)u32 or we get positive 0xffff - // value instead of sign extended negative value. - if (TYPE_PRECISION (lhs.type ()) == TYPE_PRECISION (op1.type ())) - return EQ_EXPR; - return VREL_NONE; -} - // Return TRUE if casting from INNER to OUTER is a truncating cast. inline bool diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr101335.c b/gcc/testsuite/gcc.dg/tree-ssa/pr101335.c new file mode 100644 index 0000000..921362c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr101335.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +unsigned a = 0xFFFFFFFF; +int b; +int main() +{ + int c = ~a; + unsigned d = c - 10; + if (d > c) + c = 20; + b = -(c | 0); + if (b > -8) + __builtin_abort (); + return 0; +} + -- cgit v1.1 From 8d980e84240c82502661758fbecd5f456018ea89 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 12 Jul 2021 21:06:32 +0200 Subject: i386: Fix vec_set expanders [PR101424] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AVX does not support 32-byte integer compares, required by ix86_expand_vector_set_var. The following patch fixes vec_set expanders by introducing new vec_setm_avx2_operand predicate for AVX vector modes. gcc/ 2021-07-12 Uroš Bizjak PR target/101424 * config/i386/predicates.md (vec_setm_sse41_operand): Rename from vec_setm_operand. (vec_setm_avx2_operand): New predicate. * config/i386/sse.md (vec_set): Use V_128 mode iterator. Use vec_setm_sse41_operand as operand 2 predicate. (vec_set PR target/101424 * gcc.target/i386/pr101424.c: New test. --- gcc/config/i386/mmx.md | 2 +- gcc/config/i386/predicates.md | 7 ++++++- gcc/config/i386/sse.md | 18 ++++++++++++++++-- gcc/testsuite/gcc.target/i386/pr101424.c | 15 +++++++++++++++ 4 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr101424.c (limited to 'gcc') diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 986b758..0984f7c 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -3604,7 +3604,7 @@ (define_expand "vec_setv2hi" [(match_operand:V2HI 0 "register_operand") (match_operand:HI 1 "register_operand") - (match_operand 2 "vec_setm_operand")] + (match_operand 2 "vec_setm_sse41_operand")] "TARGET_SSE2" { if (CONST_INT_P (operands[2])) diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 9488632..6aa1ea3 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1021,11 +1021,16 @@ }) ;; True for registers, or const_int_operand, used to vec_setm expander. -(define_predicate "vec_setm_operand" +(define_predicate "vec_setm_sse41_operand" (ior (and (match_operand 0 "register_operand") (match_test "TARGET_SSE4_1")) (match_code "const_int"))) +(define_predicate "vec_setm_avx2_operand" + (ior (and (match_operand 0 "register_operand") + (match_test "TARGET_AVX2")) + (match_code "const_int"))) + (define_predicate "vec_setm_mmx_operand" (ior (and (match_operand 0 "register_operand") (match_test "TARGET_SSE4_1") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 17c9e57..ab29999 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -8486,9 +8486,9 @@ (set_attr "mode" "DF")]) (define_expand "vec_set" - [(match_operand:V 0 "register_operand") + [(match_operand:V_128 0 "register_operand") (match_operand: 1 "register_operand") - (match_operand 2 "vec_setm_operand")] + (match_operand 2 "vec_setm_sse41_operand")] "TARGET_SSE" { if (CONST_INT_P (operands[2])) @@ -8499,6 +8499,20 @@ DONE; }) +(define_expand "vec_set" + [(match_operand:V_256_512 0 "register_operand") + (match_operand: 1 "register_operand") + (match_operand 2 "vec_setm_avx2_operand")] + "TARGET_AVX" +{ + if (CONST_INT_P (operands[2])) + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + else + ix86_expand_vector_set_var (operands[0], operands[1], operands[2]); + DONE; +}) + (define_insn_and_split "*vec_extractv4sf_0" [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r") (vec_select:SF diff --git a/gcc/testsuite/gcc.target/i386/pr101424.c b/gcc/testsuite/gcc.target/i386/pr101424.c new file mode 100644 index 0000000..28bb723 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101424.c @@ -0,0 +1,15 @@ +/* PR target/101424 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ + +typedef int v4df __attribute__((vector_size(32))); + +int foo_v4df_b, foo_v4df_c; + +v4df +__attribute__foo_v4df () +{ + v4df a; + a[foo_v4df_c] = foo_v4df_b; + return a; +} -- cgit v1.1 From 8d75b8830e9dafb4e0c400c723653512adf40295 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Mon, 12 Jul 2021 16:35:18 -0400 Subject: c++: permit deduction guides at class scope [PR79501] This adds support for declaring (class-scope) deduction guides for a member class template. Fortunately it seems only a couple of changes are needed in order for the existing CTAD machinery to handle them properly: we need to make sure to give them a FUNCTION_TYPE instead of a METHOD_TYPE, and we need to avoid using a BASELINK when looking them up. PR c++/79501 PR c++/100983 gcc/cp/ChangeLog: * decl.c (grokfndecl): Don't require that deduction guides are declared at namespace scope. Check that class-scope deduction guides have the same access as the member class template. (grokdeclarator): Pretend class-scope deduction guides are static. * search.c (lookup_member): Don't use a BASELINK for (class-scope) deduction guides. gcc/testsuite/ChangeLog: * g++.dg/cpp1z/class-deduction92.C: New test. * g++.dg/cpp1z/class-deduction93.C: New test. * g++.dg/cpp1z/class-deduction94.C: New test. * g++.dg/cpp1z/class-deduction95.C: New test. --- gcc/cp/decl.c | 17 +++++++++++------ gcc/cp/search.c | 5 ++++- gcc/testsuite/g++.dg/cpp1z/class-deduction92.C | 17 +++++++++++++++++ gcc/testsuite/g++.dg/cpp1z/class-deduction93.C | 25 +++++++++++++++++++++++++ gcc/testsuite/g++.dg/cpp1z/class-deduction94.C | 19 +++++++++++++++++++ gcc/testsuite/g++.dg/cpp1z/class-deduction95.C | 11 +++++++++++ 6 files changed, 87 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction92.C create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction93.C create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction94.C create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction95.C (limited to 'gcc') diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 0df689b..01d64a1 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -10042,12 +10042,6 @@ grokfndecl (tree ctype, if (deduction_guide_p (decl)) { - if (!DECL_NAMESPACE_SCOPE_P (decl)) - { - error_at (location, "deduction guide %qD must be declared at " - "namespace scope", decl); - return NULL_TREE; - } tree type = TREE_TYPE (DECL_NAME (decl)); if (in_namespace == NULL_TREE && CP_DECL_CONTEXT (decl) != CP_TYPE_CONTEXT (type)) @@ -10057,6 +10051,13 @@ grokfndecl (tree ctype, inform (location_of (type), " declared here"); return NULL_TREE; } + if (DECL_CLASS_SCOPE_P (decl) + && current_access_specifier != declared_access (TYPE_NAME (type))) + { + error_at (location, "deduction guide %qD must have the same access " + "as %qT", decl, type); + inform (location_of (type), " declared here"); + } if (funcdef_flag) error_at (location, "deduction guide %qD must not have a function body", decl); @@ -12037,6 +12038,10 @@ grokdeclarator (const cp_declarator *declarator, storage_class = declspecs->storage_class; if (storage_class == sc_static) staticp = 1 + (decl_context == FIELD); + else if (decl_context == FIELD && sfk == sfk_deduction_guide) + /* Treat class-scope deduction guides as static member functions + so that they get a FUNCTION_TYPE instead of a METHOD_TYPE. */ + staticp = 2; if (virtualp) { diff --git a/gcc/cp/search.c b/gcc/cp/search.c index 7b18368..af41bfe 100644 --- a/gcc/cp/search.c +++ b/gcc/cp/search.c @@ -1226,7 +1226,10 @@ lookup_member (tree xbasetype, tree name, int protect, bool want_type, rval = error_mark_node; } - if (rval && is_overloaded_fn (rval)) + if (rval && is_overloaded_fn (rval) + /* Don't use a BASELINK for class-scope deduction guides since + they're not actually member functions. */ + && !dguide_name_p (name)) rval = build_baselink (rval_binfo, basetype_path, rval, (IDENTIFIER_CONV_OP_P (name) ? TREE_TYPE (name): NULL_TREE)); diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction92.C b/gcc/testsuite/g++.dg/cpp1z/class-deduction92.C new file mode 100644 index 0000000..4920ca4 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction92.C @@ -0,0 +1,17 @@ +// PR c++/79501 +// { dg-do compile { target c++17 } } + +template +struct X { + template + struct B { T t; }; + + template B(T, decltype(V)=V) -> B; + + auto foo() { return B{V}; } +}; + +X<42> x; +using type = decltype(x.foo()); +using type = decltype(decltype(x)::B{42}); +using type = X<42>::B; diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction93.C b/gcc/testsuite/g++.dg/cpp1z/class-deduction93.C new file mode 100644 index 0000000..9d2db7a --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction93.C @@ -0,0 +1,25 @@ +// PR c++/79501 +// { dg-do compile { target c++17 } } +// A variant of class-deduction78.C where List and its deduction guides are +// defined at class scope. + +using size_t = decltype(sizeof(42)); + +struct A { + template + struct List { + T head; + List tail; + }; + + template + struct List {}; + + template List(T) -> List; + template List(T, List) -> List; +}; + +int main() { + using type = decltype(A::List{0, A::List{1, A::List{2}}}); + using type = A::List; +} diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction94.C b/gcc/testsuite/g++.dg/cpp1z/class-deduction94.C new file mode 100644 index 0000000..f29ebd2 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction94.C @@ -0,0 +1,19 @@ +// PR c++/79501 +// { dg-do compile { target c++17 } } + +struct X { +protected: + template + struct B { T t; }; + + template B(T) -> B; +}; + +struct Y { +protected: + template + struct B { T t; }; + +private: + template B(T) -> B; // { dg-error "access" } +}; diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction95.C b/gcc/testsuite/g++.dg/cpp1z/class-deduction95.C new file mode 100644 index 0000000..05cbb2b --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction95.C @@ -0,0 +1,11 @@ +// PR c++/100983 +// { dg-do compile { target c++17 } } + +struct X { + template + struct Y { template Y(Ts...); }; + + template Y(Ts...) -> Y; +}; + +X::Y y{1,2,3}; -- cgit v1.1 From e61afa0c2fb633b037368c067d4744c07349a2eb Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Mon, 12 Jul 2021 23:45:38 +0100 Subject: docs: fix s/ei_safe_safe/ei_safe_edge/ typo gcc/ChangeLog: * doc/cfg.texi: Fix s/ei_safe_safe/ei_safe_edge/ typo. --- gcc/doc/cfg.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/doc/cfg.texi b/gcc/doc/cfg.texi index 7ff2be8..e70ada2 100644 --- a/gcc/doc/cfg.texi +++ b/gcc/doc/cfg.texi @@ -225,7 +225,7 @@ point to the previous edge in the sequence. This function returns the @code{edge} currently pointed to by an @code{edge_iterator}. -@item ei_safe_safe +@item ei_safe_edge This function returns the @code{edge} currently pointed to by an @code{edge_iterator}, but returns @code{NULL} if the iterator is pointing at the end of the sequence. This function has been provided -- cgit v1.1 From 9693ecdf7ed5dde9618d06560697ff8ee5e1e6b7 Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Mon, 12 Jul 2021 14:38:42 -0400 Subject: Add relation processing to ubsan builtins. Ubsan builtins call the plus/minus/multiple fold routines, but did not use any relation information between the 2 operands that is available. query and pass any relations. This resolves gcc.dg/pr97505.c when operating in ranger-only mode. * gimple-range-fold.cc (fold_using_range::range_of_builtin_ubsan_call): Query relation between the 2 operands and use it. --- gcc/gimple-range-fold.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc index 1fa4ace..eff5d1f 100644 --- a/gcc/gimple-range-fold.cc +++ b/gcc/gimple-range-fold.cc @@ -825,12 +825,14 @@ fold_using_range::range_of_builtin_ubsan_call (irange &r, gcall *call, tree arg1 = gimple_call_arg (call, 1); src.get_operand (ir0, arg0); src.get_operand (ir1, arg1); + // Check for any relation between arg0 and arg1. + relation_kind relation = src.query_relation (arg0, arg1); bool saved_flag_wrapv = flag_wrapv; // Pretend the arithmetic is wrapping. If there is any overflow, // we'll complain, but will actually do wrapping operation. flag_wrapv = 1; - op->fold_range (r, type, ir0, ir1); + op->fold_range (r, type, ir0, ir1, relation); flag_wrapv = saved_flag_wrapv; // If for both arguments vrp_valueize returned non-NULL, this should -- cgit v1.1 From 07bcbf9cc2a031ba5abcff368b452bfc99bf707e Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Tue, 13 Jul 2021 00:16:30 +0000 Subject: Daily bump. --- gcc/ChangeLog | 121 ++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/ada/ChangeLog | 138 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/cp/ChangeLog | 11 ++++ gcc/testsuite/ChangeLog | 59 +++++++++++++++++++++ 5 files changed, 330 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6e6aaf0..98570fe 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,124 @@ +2021-07-12 Andrew MacLeod + + * gimple-range-fold.cc (fold_using_range::range_of_builtin_ubsan_call): + Query relation between the 2 operands and use it. + +2021-07-12 Sergei Trofimovich + + * doc/cfg.texi: Fix s/ei_safe_safe/ei_safe_edge/ typo. + +2021-07-12 Uroš Bizjak + + PR target/101424 + * config/i386/predicates.md (vec_setm_sse41_operand): + Rename from vec_setm_operand. + (vec_setm_avx2_operand): New predicate. + * config/i386/sse.md (vec_set): Use V_128 mode iterator. + Use vec_setm_sse41_operand as operand 2 predicate. + (vec_set + + PR tree-optimization/101335 + * range-op.cc (operator_cast::lhs_op1_relation): Delete. + +2021-07-12 Andrew Pinski + + * tree-ssa-phiopt.c (match_simplify_replacement): Move + insert of the sequence before the movement of the + statement. Check if to see if the statement is used + outside of the original phi to see if we should move it. + +2021-07-12 Richard Biener + + * dump-context.h (debug_dump_context::debug_dump_context): + Add FILE * parameter defaulted to stderr. + * dumpfile.c (debug_dump_context::debug_dump_context): Adjust. + * tree-vect-slp.c (dot_slp_tree): New functions. + +2021-07-12 Richard Biener + + PR tree-optimization/101373 + * tree-ssa-pre.c (prune_clobbered_mems): Also prune trapping + references when the BB may not return. + (compute_avail): Pass in the function we're working on and + replace cfun references with it. Externally throwing + const calls also possibly terminate the function. + (pass_pre::execute): Pass down the function we're working on. + * gcse.c (compute_hash_table_work): Externally throwing + const/pure calls also need record_last_mem_set_info. + * postreload-gcse.c (record_opr_changes): Looping or externally + throwing const/pure calls also need record_last_mem_set_info. + +2021-07-12 Uroš Bizjak + + * recog.c (memory_address_addr_space_p): Change the type to bool. + Return true/false instead of 1/0. + (offsettable_memref_p): Ditto. + (offsettable_nonstrict_memref_p): Ditto. + (offsettable_address_addr_space_p): Ditto. + Change the type of addressp indirect function to bool. + * recog.h (memory_address_addr_space_p): Change the type to bool. + (strict_memory_address_addr_space_p): Ditto. + (offsettable_memref_p): Ditto. + (offsettable_nonstrict_memref_p): Ditto. + (offsettable_address_addr_space_p): Ditto. + * reload.c (maybe_memory_address_addr_space_p): Ditto. + (strict_memory_address_addr_space_p): Change the type to bool. + Return true/false instead of 1/0. + (maybe_memory_address_addr_space_p): Change the type to bool. + +2021-07-12 Richard Biener + + * tree-vect-slp.c (vect_slp_region): Show the number of + SLP graph entries in the optimization message. + +2021-07-12 Richard Biener + + PR tree-optimization/101394 + * tree-ssa-pre.c (do_pre_regular_insertion): Avoid inserting + copies from abnormals for a full redundancy. + +2021-07-12 Richard Biener + + PR middle-end/101423 + * gimple.c (gimple_could_trap_p_1): Internal function calls + do not trap. + * tree-eh.c (tree_could_trap_p): Likewise. + +2021-07-12 prathamesh.kulkarni + + PR target/66791 + * config/arm/arm_neon.h (vmul_n_u32): Replace call to builtin with + __a * __b. + (vmulq_n_u32): Likewise. + (vmul_n_f32): Gate __a * __b on __FAST_MATH__. + (vmulq_n_f32): Likewise. + (vmul_n_f16): Likewise. + (vmulq_n_f16): Likewise. + +2021-07-12 Martin Liska + + PR sanitizer/101425 + * gcc.c (check_offload_target_name): Call + candidates_list_and_hint only if we have a candidate. + +2021-07-12 prathamesh.kulkarni + + PR target/98435 + * config/arm/neon.md (vec_init): Move to ... + * config/arm/vec-common.md (vec_init): ... here. + Change the pattern's mode to VDQX and gate it on VALID_MVE_MODE. + +2021-07-12 Roger Sayle + + PR tree-optimization/101403 + * match.pd ((T)bswap(X)>>C): Correctly handle cases where + signedness of the shift is not the same as the signedness of + the type extension. + 2021-07-09 Roger Sayle Uroš Bizjak diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index d01e523..12fd5d4 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210712 +20210713 diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index 9b5629c..a60b02a 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,141 @@ +2021-07-12 Pierre-Marie de Rodat + + * adaint.c (__gnat_number_of_cpus): Replace "#ifdef" by "#if + defined". + +2021-07-12 Eric Botcazou + + * gcc-interface/decl.c (gnat_to_gnu_entity) : Add a + parallel type only when -fgnat-encodings=all is specified. + : Use the PAT name and special suffixes only when + -fgnat-encodings=all is specified. + : Build a special type for debugging purposes only + when -fgnat-encodings=all is specified. Add a parallel type or use + the PAT name only when -fgnat-encodings=all is specified. + : Generate debug info for the inner record types only + when -fgnat-encodings=all is specified. + : Use a debug type for an artificial subtype only + except when -fgnat-encodings=all is specified. + (elaborate_expression_1): Reset need_for_debug when possible only + except when -fgnat-encodings=all is specified. + (components_to_record): Use XV encodings for variable size only + when -fgnat-encodings=all is specified. + (associate_original_type_to_packed_array): Add a parallel type only + when -fgnat-encodings=all is specified. + * gcc-interface/misc.c (gnat_get_array_descr_info): Do not return + full information only when -fgnat-encodings=all is specified. + * gcc-interface/utils.c (make_packable_type): Add a parallel type + only when -fgnat-encodings=all is specified. + (maybe_pad_type): Make the inner type a debug type only except when + -fgnat-encodings=all is specified. Create an XVS type for variable + size only when -fgnat-encodings=all is specified. + (rest_of_record_type_compilation): Add a parallel type only when + -fgnat-encodings=all is specified. + +2021-07-12 Eric Botcazou + + * gcc-interface/decl.c (gnat_to_gnu_entity) : Use a + fixed lower bound if the index subtype is marked so, as well as a + more efficient formula for the upper bound if the array cannot be + superflat. + (flb_cannot_be_superflat): New predicate. + (cannot_be_superflat): Rename into... + (range_cannot_be_superfla): ...this. Minor tweak. + +2021-07-12 Bob Duff + + * uintp.ads, types.h: New subtypes of Uint: Valid_Uint, Unat, + Upos, Nonzero_Uint with predicates. These correspond to new + field types in Gen_IL. + * gen_il-types.ads (Valid_Uint, Unat, Upos, Nonzero_Uint): New + field types. + * einfo-utils.ads, einfo-utils.adb, fe.h (Known_Alignment, + Init_Alignment): Use the initial zero value to represent + "unknown". This will ensure that if Alignment is called before + Set_Alignment, the compiler will blow up (if assertions are + enabled). + * atree.ads, atree.adb, atree.h, gen_il-gen.adb + (Get_Valid_32_Bit_Field): New generic low-level getter for + subtypes of Uint. + (Copy_Alignment): New procedure to copy Alignment field even + when Unknown. + (Init_Object_Size_Align, Init_Size_Align): Do not bypass the + Init_ procedures. + * exp_pakd.adb, freeze.adb, layout.adb, repinfo.adb, + sem_util.adb: Protect calls to Alignment with Known_Alignment. + Use Copy_Alignment when it might be unknown. + * gen_il-gen-gen_entities.adb (Alignment, + String_Literal_Length): Use type Unat instead of Uint, to ensure + that the field is always Set_ before we get it, and that it is + set to a nonnegative value. + (Enumeration_Pos): Unat. + (Enumeration_Rep): Valid_Uint. Can be negative, but must be + valid before fetching. + (Discriminant_Number): Upos. + (Renaming_Map): Remove. + * gen_il-gen-gen_nodes.adb (Char_Literal_Value, Reason): Unat. + (Intval, Corresponding_Integer_Value): Valid_Uint. + * gen_il-internals.ads: New functions for dealing with special + defaults and new subtypes of Uint. + * scans.ads: Correct comments. + * scn.adb (Post_Scan): Do not set Intval to No_Uint; that is no + longer allowed. + * sem_ch13.adb (Analyze_Enumeration_Representation_Clause): Do + not set Enumeration_Rep to No_Uint; that is no longer allowed. + (Offset_Value): Protect calls to Alignment with Known_Alignment. + * sem_prag.adb (Set_Atomic_VFA): Do not use Uint_0 to mean + "unknown"; call Init_Alignment instead. + * sinfo.ads: Minor comment fix. + * treepr.adb: Deal with printing of new field types. + * einfo.ads, gen_il-fields.ads (Renaming_Map): Remove. + * gcc-interface/decl.c (gnat_to_gnu_entity): Use Known_Alignment + before calling Alignment. This preserve some probably buggy + behavior: if the alignment is not set, it previously defaulted + to Uint_0; we now make that explicit. Use Copy_Alignment, + because "Set_Alignment (Y, Alignment (X));" no longer works when + the Alignment of X has not yet been set. + * gcc-interface/trans.c (process_freeze_entity): Use + Copy_Alignment. + +2021-07-12 Eric Botcazou + + * libgnat/s-dwalin.ads: Adjust a few comments left and right. + (Line_Info_Register): Comment out unused components. + (Line_Info_Header): Add DWARF 5 support. + (Dwarf_Context): Likewise. Rename "prologue" into "header". + * libgnat/s-dwalin.adb: Alphabetize "with" clauses. + (DWARF constants): Add DWARF 5 support and reorder. + (For_Each_Row): Adjust. + (Initialize_Pass): Likewise. + (Initialize_State_Machine): Likewise and fix typo. + (Open): Add DWARF 5 support. + (Parse_Prologue): Rename into... + (Parse_Header): ...this and add DWARF 5 support. + (Read_And_Execute_Isn): Rename into... + (Read_And_Execute_Insn): ...this and adjust. + (To_File_Name): Change parameter name and add DWARF 5 support. + (Read_Entry_Format_Array): New procedure. + (Skip_Form): Add DWARF 5 support and reorder. + (Seek_Abbrev): Do not count entries and add DWARF 5 support. + (Debug_Info_Lookup): Add DWARF 5 support. + (Symbolic_Address.Set_Result): Likewise. + (Symbolic_Address): Adjust. + +2021-07-12 Bob Duff + + * sem_ch13.adb (Duplicate_Clause): Add a helper routine + Check_One_Attr, with a parameter for the attribute_designator we + are looking for, and one for the attribute_designator of the + current node (which are usually the same). For Size and + Value_Size, call it twice, once for each. + * errout.ads: Fix a typo. + +2021-07-12 Piotr Trojanek + + * exp_imgv.adb (Expand_Image_Attribute): Move rewriting to + attribute Put_Image to the beginning of expansion of attribute + Image. + 2021-07-09 Eric Botcazou * gcc-interface/utils.c (finish_subprog_decl): Remove obsolete line. diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 39e5ec3..dc57991 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,14 @@ +2021-07-12 Patrick Palka + + PR c++/79501 + PR c++/100983 + * decl.c (grokfndecl): Don't require that deduction guides are + declared at namespace scope. Check that class-scope deduction + guides have the same access as the member class template. + (grokdeclarator): Pretend class-scope deduction guides are static. + * search.c (lookup_member): Don't use a BASELINK for (class-scope) + deduction guides. + 2021-07-10 Patrick Palka PR c++/82110 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index cac4995..6eacfd0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,62 @@ +2021-07-12 Patrick Palka + + PR c++/79501 + PR c++/100983 + * g++.dg/cpp1z/class-deduction92.C: New test. + * g++.dg/cpp1z/class-deduction93.C: New test. + * g++.dg/cpp1z/class-deduction94.C: New test. + * g++.dg/cpp1z/class-deduction95.C: New test. + +2021-07-12 Uroš Bizjak + + PR target/101424 + * gcc.target/i386/pr101424.c: New test. + +2021-07-12 Andrew MacLeod + + * gcc.dg/tree-ssa/pr101335.c: New. + +2021-07-12 Andrew Pinski + + * gcc.dg/tree-ssa/pr96928-1.c: Update to similar as pr96928.c. + +2021-07-12 Richard Biener + + PR tree-optimization/101373 + * g++.dg/torture/pr101373.C: New testcase, XFAILed. + * gnat.dg/opt95.adb: Likewise. + +2021-07-12 Richard Biener + + * g++.dg/vect/slp-pr87105.cc: Adjust. + * gcc.dg/vect/bb-slp-pr54400.c: Likewise. + +2021-07-12 Richard Biener + + PR tree-optimization/101394 + * gcc.dg/torture/pr101394.c: New testcase. + +2021-07-12 Roger Sayle + Jakub Jelinek + + PR tree-optimization/101403 + * gcc.dg/pr101403.c: Avoid (unimportant) uninitialized variable. + +2021-07-12 prathamesh.kulkarni + + PR target/66791 + * gcc.target/arm/armv8_2-fp16-neon-2.c: Adjust. + +2021-07-12 prathamesh.kulkarni + + PR target/98435 + * gcc.target/arm/simd/pr98435.c: New test. + +2021-07-12 Roger Sayle + + PR tree-optimization/101403 + * gcc.dg/pr101403.c: New test case. + 2021-07-10 John David Anglin * gcc.dg/torture/pr100329.c: Require target lra. -- cgit v1.1 From 7591309696537212a1d0497bc09c09b7abc7e650 Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Mon, 12 Jul 2021 23:50:38 -0400 Subject: Change rs6000_const_f32_to_i32 return type. The function rs6000_const_f32_to_i32 called REAL_VALUE_TO_TARGET_SINGLE with a long long type and returns it. This patch changes the type to long which is the proper type for REAL_VALUE_TO_TARGET_SINGLE. 2021-07-12 Michael Meissner gcc/ * config/rs6000/altivec.md (xxspltiw_v4sf): Change local variable value to to long. * config/rs6000/rs6000-protos.h (rs6000_const_f32_to_i32): Change return type to long. * config/rs6000/rs6000.c (rs6000_const_f32_to_i32): Change return type to long. --- gcc/config/rs6000/altivec.md | 2 +- gcc/config/rs6000/rs6000-protos.h | 2 +- gcc/config/rs6000/rs6000.c | 6 ++++-- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index dad3a07..a20d6ac 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -863,7 +863,7 @@ UNSPEC_XXSPLTIW))] "TARGET_POWER10" { - long long value = rs6000_const_f32_to_i32 (operands[1]); + long value = rs6000_const_f32_to_i32 (operands[1]); emit_insn (gen_xxspltiw_v4sf_inst (operands[0], GEN_INT (value))); DONE; }) diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 9de294d..94bf961 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -281,7 +281,7 @@ extern void rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label); extern void rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label); -extern long long rs6000_const_f32_to_i32 (rtx operand); +extern long rs6000_const_f32_to_i32 (rtx operand); /* Declare functions in rs6000-c.c */ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 9a5db63..de11de5 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -27936,10 +27936,12 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype) return NULL; } -long long +/* Convert a SFmode constant to the integer bit pattern. */ + +long rs6000_const_f32_to_i32 (rtx operand) { - long long value; + long value; const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand); gcc_assert (GET_MODE (operand) == SFmode); -- cgit v1.1 From 31ff034a1eadfd4641ce9c44647e30a759cfbbfb Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Mon, 12 Jul 2021 23:51:24 -0400 Subject: Fix vec-splati-runnable.c test. I noticed that the vec-splati-runnable.c did not have an abort after one of the tests. If the test was run with optimization, the optimizer could delete some of the tests and throw off the count. However, due to the fact that the value being loaded in that test is undefined, I did not check what value was loaded, but I just stored it into a volatile global variable. 2021-07-12 Michael Meissner gcc/testsuite/ * gcc.target/powerpc/vec-splati-runnable.c: Run test with -O2 optimization. Do not check what XXSPLTIDP generates if the value is undefined. --- .../gcc.target/powerpc/vec-splati-runnable.c | 29 +++++++--------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c index e84ce77..a135279 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c @@ -1,7 +1,7 @@ /* { dg-do run { target { power10_hw } } } */ /* { dg-do link { target { ! power10_hw } } } */ /* { dg-require-effective-target power10_ok } */ -/* { dg-options "-mdejagnu-cpu=power10 -save-temps" } */ +/* { dg-options "-mdejagnu-cpu=power10 -save-temps -O2" } */ #include #define DEBUG 0 @@ -12,6 +12,8 @@ extern void abort (void); +volatile vector double vresult_d_undefined; + int main (int argc, char *argv []) { @@ -85,25 +87,12 @@ main (int argc, char *argv []) #endif } - /* This test will generate a "note" to the user that the argument - is subnormal. It is not an error, but results are not defined. */ - vresult_d = (vector double) { 2.0, 3.0 }; - expected_vresult_d = (vector double) { 6.6E-42f, 6.6E-42f }; - - vresult_d = vec_splatid (6.6E-42f); - - /* Although the instruction says the results are not defined, it does seem - to work, at least on Mambo. But no guarentees! */ - if (!vec_all_eq (vresult_d, expected_vresult_d)) { -#if DEBUG - printf("ERROR, vec_splati (6.6E-42f)\n"); - for(i = 0; i < 2; i++) - printf(" vresult_d[%i] = %e, expected_vresult_d[%i] = %e\n", - i, vresult_d[i], i, expected_vresult_d[i]); -#else - ; -#endif - } + /* This test will generate a "note" to the user that the argument is + subnormal. It is not an error, but results are not defined. Because this + is undefined, we cannot check that any value is correct. Just store it in + a volatile variable so the XXSPLTIDP instruction gets generated and the + warning message printed. */ + vresult_d_undefined = vec_splatid (6.6E-42f); /* Vector splat immediate */ vsrc_a_int = (vector int) { 2, 3, 4, 5 }; -- cgit v1.1 From 063eba7ca73030139a3bf822ed127cf09b2fc226 Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Tue, 13 Jul 2021 00:36:43 -0400 Subject: Deal with prefixed loads/stores in tests, PR testsuite/100166 This patch updates the various tests in the testsuite to treat plxv and pstxv as being vector loads/stores. This shows up if you run the testsuite with a compiler configured with the option: --with-cpu=power10. 2021-07-13 Michael Meissner gcc/testsuite/ PR testsuite/100166 * gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a-pr63175.c: Update insn counts to account for power10 prefixed loads and stores. * gcc.target/powerpc/fold-vec-load-builtin_vec_xl-char.c: Likewise. * gcc.target/powerpc/fold-vec-load-builtin_vec_xl-double.c: Likewise. * gcc.target/powerpc/fold-vec-load-builtin_vec_xl-float.c: Likewise. * gcc.target/powerpc/fold-vec-load-builtin_vec_xl-int.c: Likewise. * gcc.target/powerpc/fold-vec-load-builtin_vec_xl-longlong.c: Likewise. * gcc.target/powerpc/fold-vec-load-builtin_vec_xl-short.c: Likewise. * gcc.target/powerpc/fold-vec-load-vec_vsx_ld-char.c: Likewise. * gcc.target/powerpc/fold-vec-load-vec_vsx_ld-double.c: Likewise. * gcc.target/powerpc/fold-vec-load-vec_vsx_ld-float.c: Likewise. * gcc.target/powerpc/fold-vec-load-vec_vsx_ld-int.c: Likewise. * gcc.target/powerpc/fold-vec-load-vec_vsx_ld-longlong.c: Likewise. * gcc.target/powerpc/fold-vec-load-vec_vsx_ld-short.c: Likewise. * gcc.target/powerpc/fold-vec-load-vec_xl-char.c: Likewise. * gcc.target/powerpc/fold-vec-load-vec_xl-double.c: Likewise. * gcc.target/powerpc/fold-vec-load-vec_xl-float.c: Likewise. * gcc.target/powerpc/fold-vec-load-vec_xl-int.c: Likewise. * gcc.target/powerpc/fold-vec-load-vec_xl-longlong.c: Likewise. * gcc.target/powerpc/fold-vec-load-vec_xl-short.c: Likewise. * gcc.target/powerpc/fold-vec-splat-floatdouble.c: Likewise. * gcc.target/powerpc/fold-vec-splat-longlong.c: Likewise. * gcc.target/powerpc/fold-vec-store-builtin_vec_xst-char.c: Likewise. * gcc.target/powerpc/fold-vec-store-builtin_vec_xst-double.c: Likewise. * gcc.target/powerpc/fold-vec-store-builtin_vec_xst-float.c: Likewise. * gcc.target/powerpc/fold-vec-store-builtin_vec_xst-int.c: Likewise. * gcc.target/powerpc/fold-vec-store-builtin_vec_xst-longlong.c: Likewise. * gcc.target/powerpc/fold-vec-store-builtin_vec_xst-short.c: Likewise. * gcc.target/powerpc/fold-vec-store-vec_vsx_st-char.c: Likewise. * gcc.target/powerpc/fold-vec-store-vec_vsx_st-double.c: Likewise. * gcc.target/powerpc/fold-vec-store-vec_vsx_st-float.c: Likewise. * gcc.target/powerpc/fold-vec-store-vec_vsx_st-int.c: Likewise. * gcc.target/powerpc/fold-vec-store-vec_vsx_st-longlong.c: Likewise. * gcc.target/powerpc/fold-vec-store-vec_vsx_st-short.c: Likewise. * gcc.target/powerpc/fold-vec-store-vec_xst-char.c: Likewise. * gcc.target/powerpc/fold-vec-store-vec_xst-double.c: Likewise. * gcc.target/powerpc/fold-vec-store-vec_xst-float.c: Likewise. * gcc.target/powerpc/fold-vec-store-vec_xst-int.c: Likewise. * gcc.target/powerpc/fold-vec-store-vec_xst-longlong.c: Likewise. * gcc.target/powerpc/fold-vec-store-vec_xst-short.c: Likewise. * gcc.target/powerpc/lvsl-lvsr.c: Likewise. * gcc.target/powerpc/pr86731-fwrapv-longlong.c: Likewise. --- .../gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a-pr63175.c | 2 +- .../gcc.target/powerpc/fold-vec-load-builtin_vec_xl-char.c | 2 +- .../gcc.target/powerpc/fold-vec-load-builtin_vec_xl-double.c | 2 +- .../gcc.target/powerpc/fold-vec-load-builtin_vec_xl-float.c | 2 +- .../gcc.target/powerpc/fold-vec-load-builtin_vec_xl-int.c | 2 +- .../gcc.target/powerpc/fold-vec-load-builtin_vec_xl-longlong.c | 2 +- .../gcc.target/powerpc/fold-vec-load-builtin_vec_xl-short.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-char.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-double.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-float.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-int.c | 2 +- .../gcc.target/powerpc/fold-vec-load-vec_vsx_ld-longlong.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-short.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-char.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-double.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-float.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-int.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-longlong.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-short.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-splat-floatdouble.c | 7 ++++--- gcc/testsuite/gcc.target/powerpc/fold-vec-splat-longlong.c | 2 +- .../gcc.target/powerpc/fold-vec-store-builtin_vec_xst-char.c | 2 +- .../gcc.target/powerpc/fold-vec-store-builtin_vec_xst-double.c | 2 +- .../gcc.target/powerpc/fold-vec-store-builtin_vec_xst-float.c | 2 +- .../gcc.target/powerpc/fold-vec-store-builtin_vec_xst-int.c | 2 +- .../gcc.target/powerpc/fold-vec-store-builtin_vec_xst-longlong.c | 2 +- .../gcc.target/powerpc/fold-vec-store-builtin_vec_xst-short.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-char.c | 2 +- .../gcc.target/powerpc/fold-vec-store-vec_vsx_st-double.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-float.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-int.c | 2 +- .../gcc.target/powerpc/fold-vec-store-vec_vsx_st-longlong.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-short.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-char.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-double.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-float.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-int.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-longlong.c | 2 +- gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-short.c | 2 +- gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c | 2 +- gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c | 2 +- 41 files changed, 44 insertions(+), 43 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a-pr63175.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a-pr63175.c index 246f38f..d9f173b 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a-pr63175.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a-pr63175.c @@ -25,6 +25,6 @@ main1 (void) with no word loads (lw, lwu, lwz, lwzu, or their indexed forms) or word stores (stw, stwu, stwx, stwux, or their indexed forms). */ -/* { dg-final { scan-assembler "\t(lvx|lxv|lvsr|stxv)" } } */ +/* { dg-final { scan-assembler "\t(lvx|lxv|lvsr|stxv|plxv|pstxv)" } } */ /* { dg-final { scan-assembler-not "\tlwz?u?x? " { xfail { powerpc-ibm-aix* } } } } */ /* { dg-final { scan-assembler-not "\tstwu?x? " } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-char.c index 9b199c2..f6eb88f 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-char.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-char.c @@ -36,4 +36,4 @@ BUILD_VAR_TEST( test10, vector unsigned char, signed long long, vector unsigned BUILD_VAR_TEST( test11, vector unsigned char, signed int, vector unsigned char); BUILD_CST_TEST( test12, vector unsigned char, 8, vector unsigned char); -/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mp?lvx\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-double.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-double.c index c49dfe8..66d5445 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-double.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-double.c @@ -28,4 +28,4 @@ BUILD_VAR_TEST( test4, vector double, signed long long, vector double); BUILD_VAR_TEST( test5, vector double, signed int, vector double); BUILD_CST_TEST( test6, vector double, 12, vector double); -/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mlvx\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mp?lvx\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-float.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-float.c index cdded36..7d84c20 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-float.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-float.c @@ -28,4 +28,4 @@ BUILD_VAR_TEST( test4, vector float, signed long long, vector float); BUILD_VAR_TEST( test5, vector float, signed int, vector float); BUILD_CST_TEST( test6, vector float, 12, vector float); -/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mp?lvx\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-int.c index bc18beb..c6a8226 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-int.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-int.c @@ -36,4 +36,4 @@ BUILD_VAR_TEST( test10, vector unsigned int, signed long long, vector unsigned i BUILD_VAR_TEST( test11, vector unsigned int, signed int, vector unsigned int); BUILD_CST_TEST( test12, vector unsigned int, 12, vector unsigned int); -/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mp?lvx\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-longlong.c index 66e953a..6f0cd73 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-longlong.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-longlong.c @@ -36,4 +36,4 @@ BUILD_VAR_TEST( test10, vector unsigned long long, signed long long, vector uns BUILD_VAR_TEST( test11, vector unsigned long long, signed int, vector unsigned long long); BUILD_CST_TEST( test12, vector unsigned long long, 12, vector unsigned long long); -/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mlvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mp?lvx\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-short.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-short.c index 0ef1c59..6c270a9 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-short.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-short.c @@ -36,4 +36,4 @@ BUILD_VAR_TEST( test10, vector unsigned short, signed long long, vector unsigne BUILD_VAR_TEST( test11, vector unsigned short, signed int, vector unsigned short); BUILD_CST_TEST( test12, vector unsigned short, 12, vector unsigned short); -/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mp?lvx\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-char.c index 0b76341..6aae435 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-char.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-char.c @@ -35,5 +35,5 @@ BUILD_VAR_TEST( test10, vector unsigned char, signed long long, vector unsigned BUILD_VAR_TEST( test11, vector unsigned char, signed int, vector unsigned char); BUILD_CST_TEST( test12, vector unsigned char, 12, vector unsigned char); -/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-double.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-double.c index beb6d03..b3f3b7f 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-double.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-double.c @@ -27,4 +27,4 @@ BUILD_VAR_TEST( test4, vector double, int, vector double); BUILD_VAR_TEST( test5, vector double, long long, vector double); BUILD_CST_TEST( test6, vector double, 12, vector double); -/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mlvx\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-float.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-float.c index 5f9b6d3..56cbe9a 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-float.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-float.c @@ -27,4 +27,4 @@ BUILD_VAR_TEST( test5, vector float, signed long long, vector float); BUILD_VAR_TEST( test7, vector float, signed int, vector float); BUILD_CST_TEST( test8, vector float, 12, vector float); -/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-int.c index a59f52f..2cde9f5 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-int.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-int.c @@ -35,4 +35,4 @@ BUILD_VAR_TEST( test10, vector unsigned int, signed long long, vector unsigned BUILD_VAR_TEST( test11, vector unsigned int, signed int, vector unsigned int); BUILD_CST_TEST( test12, vector unsigned int, 12, vector unsigned int); -/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-longlong.c index 5c121fa..cf2b7f9 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-longlong.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-longlong.c @@ -35,4 +35,4 @@ BUILD_VAR_TEST( test10, vector unsigned long long, signed long long, vector uns BUILD_VAR_TEST( test11, vector unsigned long long, signed int, vector unsigned long long); BUILD_CST_TEST( test12, vector unsigned long long, 12, vector unsigned long long); -/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mlvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-short.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-short.c index 07154d8..cfc0e30 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-short.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_vsx_ld-short.c @@ -35,4 +35,4 @@ BUILD_VAR_TEST( test10, vector unsigned short, signed long long, vector unsigne BUILD_VAR_TEST( test11, vector unsigned short, signed int, vector unsigned short); BUILD_CST_TEST( test12, vector unsigned short, 12, vector unsigned short); -/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-char.c index 04c4f31..7281b3c 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-char.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-char.c @@ -35,4 +35,4 @@ BUILD_VAR_TEST( test10, vector unsigned char, signed long long, unsigned char); BUILD_VAR_TEST( test11, vector unsigned char, signed int, unsigned char); BUILD_CST_TEST( test12, vector unsigned char, 12, unsigned char); -/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-double.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-double.c index 1958d65..3f3d985 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-double.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-double.c @@ -28,4 +28,4 @@ BUILD_VAR_TEST( test4, vector double, signed long long, double); BUILD_VAR_TEST( test5, vector double, signed int, double); BUILD_CST_TEST( test6, vector double, 12, double); -/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mlvx\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-float.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-float.c index 5578138..eafe305 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-float.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-float.c @@ -27,4 +27,4 @@ BUILD_VAR_TEST( test4, vector float, signed long long, vector float); BUILD_VAR_TEST( test5, vector float, signed int, vector float); BUILD_CST_TEST( test6, vector float, 12, vector float); -/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-int.c index 8ba880e..a226721 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-int.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-int.c @@ -35,4 +35,4 @@ BUILD_VAR_TEST( test10, vector unsigned int, signed long long, vector unsigned BUILD_VAR_TEST( test11, vector unsigned int, signed int, vector unsigned int); BUILD_CST_TEST( test12, vector unsigned int, 12, vector unsigned int); -/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-longlong.c index 6df3c79..f2e5469 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-longlong.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-longlong.c @@ -35,4 +35,4 @@ BUILD_VAR_TEST( test10, vector unsigned long long, signed long long, vector uns BUILD_VAR_TEST( test11, vector unsigned long long, signed int, vector unsigned long long); BUILD_CST_TEST( test12, vector unsigned long long, 12, vector unsigned long long); -/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mlvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-short.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-short.c index c5088ab..2d64b4f 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-short.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-vec_xl-short.c @@ -35,4 +35,4 @@ BUILD_VAR_TEST( test10, vector unsigned short, signed long long, vector unsigne BUILD_VAR_TEST( test11, vector unsigned short, signed int, vector unsigned short); BUILD_CST_TEST( test12, vector unsigned short, 12, vector unsigned short); -/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-floatdouble.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-floatdouble.c index ab39696..7661917 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-floatdouble.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-floatdouble.c @@ -20,11 +20,12 @@ vector double testd_01 (vector double x) { return vec_splat (x, 0b00001); } vector double test_dc () { const vector double y = { 3.0, 5.0 }; return vec_splat (y, 0b00010); } -/* If the source vector is a known constant, we will generate a load. */ -/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvd2x\M|\mlxv\M} 2 } } */ +/* If the source vector is a known constant, we will generate a load or possibly + XXSPLTIW. */ +/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvd2x\M|\mlxv\M|\mplxv\M|\mxxspltiw\M} 2 } } */ /* For float types, we generate a splat. */ -/* { dg-final { scan-assembler-times "vspltw|xxspltw" 3 } } */ +/* { dg-final { scan-assembler-times {\mvspltw\M|\mxxspltw\M} 3 } } */ /* For double types, we will generate xxpermdi instructions. */ /* { dg-final { scan-assembler-times "xxpermdi" 3 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-longlong.c index 4fa06c8..b95b987 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-longlong.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-longlong.c @@ -25,7 +25,7 @@ vector signed long long test_sll () { const vector signed long long y = {34, 45} vector unsigned long long test_ull () { const vector unsigned long long y = {56, 67}; return vec_splat (y, 0b00010); } /* Assorted load instructions for the initialization with known constants. */ -/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvd2x\M|\mlxv\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvd2x\M|\mlxv\M|\mplxv\M} 3 } } */ /* xxpermdi for vec_splat of long long vectors. At the time of this writing, the number of xxpermdi instructions diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-char.c index d1100d0..162563c 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-char.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-char.c @@ -36,4 +36,4 @@ BUILD_VAR_TEST( test10, vector unsigned char, signed long long, vector unsigned BUILD_VAR_TEST( test11, vector unsigned char, signed int, vector unsigned char ); BUILD_CST_TEST( test12, vector unsigned char, 12, vector unsigned char ); -/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-double.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-double.c index 74e34c3..c42a720 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-double.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-double.c @@ -28,4 +28,4 @@ BUILD_VAR_TEST( test4, vector double, signed long long, double ); BUILD_VAR_TEST( test5, vector double, signed int, double ); BUILD_CST_TEST( test6, vector double, 12, double ); -/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-float.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-float.c index db6bd33..b200c47 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-float.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-float.c @@ -28,4 +28,4 @@ BUILD_VAR_TEST( test4, vector float, signed long long, vector float ); BUILD_VAR_TEST( test5, vector float, signed int, vector float ); BUILD_CST_TEST( test6, vector float, 12, vector float ); -/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-int.c index 2a32889..d984882 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-int.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-int.c @@ -36,4 +36,4 @@ BUILD_VAR_TEST( test10, vector unsigned int, signed long long, vector unsigned i BUILD_VAR_TEST( test11, vector unsigned int, signed int, vector unsigned int ); BUILD_CST_TEST( test12, vector unsigned int, 12, vector unsigned int ); -/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-longlong.c index a62ca51..bb72d9b 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-longlong.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-longlong.c @@ -36,4 +36,4 @@ BUILD_VAR_TEST( test10, vector unsigned long long, signed long long, vector uns BUILD_VAR_TEST( test11, vector unsigned long long, signed int, vector unsigned long long ); BUILD_CST_TEST( test12, vector unsigned long long, 12, vector unsigned long long ); -/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-short.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-short.c index 2b1e1c0..f4dbb70 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-short.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-builtin_vec_xst-short.c @@ -36,4 +36,4 @@ BUILD_VAR_TEST( test10, vector unsigned short, signed long long, vector unsigned BUILD_VAR_TEST( test11, vector unsigned short, signed int, vector unsigned short ); BUILD_CST_TEST( test12, vector unsigned short, 12, vector unsigned short ); -/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-char.c index 82bb891..ae5cf8e 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-char.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-char.c @@ -36,4 +36,4 @@ BUILD_VAR_TEST( test10, vector unsigned char, signed long long, vector unsigned BUILD_VAR_TEST( test11, vector unsigned char, signed int, vector unsigned char ); BUILD_CST_TEST( test12, vector unsigned char, 12, vector unsigned char ); -/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-double.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-double.c index 34772cf..1360f4d 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-double.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-double.c @@ -28,4 +28,4 @@ BUILD_VAR_TEST( test7, vector double, signed long long, vector double ); BUILD_VAR_TEST( test8, vector double, signed int, vector double ); BUILD_CST_TEST( test9, vector double, 12, vector double ); -/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-float.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-float.c index cf13f2a..1b70f2a 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-float.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-float.c @@ -28,4 +28,4 @@ BUILD_VAR_TEST( test7, vector float, signed long long, vector float ); BUILD_VAR_TEST( test8, vector float, signed int, vector float ); BUILD_CST_TEST( test9, vector float, 12, vector float ); -/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-int.c index a9e189d..4e4a499 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-int.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-int.c @@ -36,4 +36,4 @@ BUILD_VAR_TEST( test10, vector unsigned int, signed long long, vector unsigned i BUILD_VAR_TEST( test11, vector unsigned int, signed int, vector unsigned int ); BUILD_CST_TEST( test12, vector unsigned int, 12, vector unsigned int ); -/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-longlong.c index f50e2b9..b57c126 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-longlong.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-longlong.c @@ -36,4 +36,4 @@ BUILD_VAR_TEST( test10, vector unsigned long long, signed long long, vector unsi BUILD_VAR_TEST( test11, vector unsigned long long, signed int, vector unsigned long long ); BUILD_CST_TEST( test12, vector unsigned long long, 12, vector unsigned long long ); -/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-short.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-short.c index 0f8a93a..7593f42 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-short.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_vsx_st-short.c @@ -36,4 +36,4 @@ BUILD_VAR_TEST( test10, vector unsigned short, signed long long, vector unsigned BUILD_VAR_TEST( test11, vector unsigned short, signed int, vector unsigned short ); BUILD_CST_TEST( test12, vector unsigned short, 12, vector unsigned short ); -/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-char.c index 4f5930a..fdd2ed5 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-char.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-char.c @@ -35,4 +35,4 @@ BUILD_VAR_TEST( test10, vector unsigned char, signed long long, vector unsigned BUILD_VAR_TEST( test11, vector unsigned char, signed int, vector unsigned char ); BUILD_CST_TEST( test12, vector unsigned char, 12, vector unsigned char ); -/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-double.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-double.c index 511d5fe..62f8552 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-double.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-double.c @@ -27,4 +27,4 @@ BUILD_VAR_TEST( test7, vector double, signed long long, vector double ); BUILD_VAR_TEST( test8, vector double, signed int, vector double ); BUILD_CST_TEST( test9, vector double, 12, vector double ); -/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-float.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-float.c index 13e6cb6..ad15a5a 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-float.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-float.c @@ -27,4 +27,4 @@ BUILD_VAR_TEST( test7, vector float, signed long long, vector float ); BUILD_VAR_TEST( test8, vector float, signed int, vector float ); BUILD_CST_TEST( test9, vector float, 12, vector float ); -/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 6 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-int.c index fd6ff78..abe93df 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-int.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-int.c @@ -35,4 +35,4 @@ BUILD_VAR_TEST( test10, vector unsigned int, signed long long, vector unsigned i BUILD_VAR_TEST( test11, vector unsigned int, signed int, vector unsigned int ); BUILD_CST_TEST( test12, vector unsigned int, 12, vector unsigned int ); -/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-longlong.c index a669481..6859593 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-longlong.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-longlong.c @@ -35,4 +35,4 @@ BUILD_VAR_TEST( test10, vector unsigned long long, signed long long, vector unsi BUILD_VAR_TEST( test11, vector unsigned long long, signed int, vector unsigned long long ); BUILD_CST_TEST( test12, vector unsigned long long, 12, vector unsigned long long ); -/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-short.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-short.c index 78eae57..6c54873 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-short.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-store-vec_xst-short.c @@ -35,4 +35,4 @@ BUILD_VAR_TEST( test10, vector unsigned short, signed long long, vector unsigned BUILD_VAR_TEST( test11, vector unsigned short, signed int, vector unsigned short ); BUILD_CST_TEST( test12, vector unsigned short, 12, vector unsigned short ); -/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvw4x\M|\mstxvd2x\M|\mstxvx\M|\mstvx\M|\mpstxv\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c b/gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c index 93843c0..26aadbc 100644 --- a/gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c +++ b/gcc/testsuite/gcc.target/powerpc/lvsl-lvsr.c @@ -6,7 +6,7 @@ /* { dg-options "-O0 -Wno-deprecated" } */ /* { dg-final { scan-assembler-times "lvsl" 2 } } */ /* { dg-final { scan-assembler-times "lvsr" 2 } } */ -/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mp?lxv\M} 2 } } */ /* { dg-final { scan-assembler-times {\m(?:v|xx)permr?\M} 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c index 1269fe6..bd1502b 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c +++ b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c @@ -30,5 +30,5 @@ vector signed long long splats4(void) /* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */ /* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */ -/* { dg-final { scan-assembler-times {\mlvx\M|\mlxv\M|\mlxvd2x\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M} 2 } } */ -- cgit v1.1 From f546e2b6cc5c610ae18aac274d0d6493f2da3801 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 13 Jul 2021 08:04:34 +0200 Subject: Revert "Display the number of components BB vectorized" This reverts commit c03cae4e066066278c8435c409829a9bf851e49f. --- gcc/testsuite/g++.dg/vect/slp-pr87105.cc | 2 +- gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c | 2 +- gcc/tree-vect-slp.c | 12 ++++-------- 3 files changed, 6 insertions(+), 10 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc index 451a117..d07b1cd 100644 --- a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc +++ b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc @@ -99,7 +99,7 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept { // We should have if-converted everything down to straight-line code // { dg-final { scan-tree-dump-times "" 1 "slp2" } } -// { dg-final { scan-tree-dump-times "optimized: basic block part" 1 "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } } +// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } } // It's a bit awkward to detect that all stores were vectorized but the // following more or less does the trick // { dg-final { scan-tree-dump "vect_\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } } diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c index 7c46fa0..6b427aa 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr54400.c @@ -39,5 +39,5 @@ main () } /* We are lacking an effective target for .REDUC_PLUS support. */ -/* { dg-final { scan-tree-dump-times "optimized: basic block part" 3 "slp2" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "basic block part vectorized" 3 "slp2" { target x86_64-*-* } } } */ /* { dg-final { scan-tree-dump-not " = VEC_PERM_EXPR" "slp2" { target x86_64-*-* } } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 86fa3c1..97fba6a 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -5865,16 +5865,12 @@ vect_slp_region (vec bbs, vec datarefs, if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes)) dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, - "basic block part with %u components " - "vectorized using %wu byte vectors\n", - instance->subgraph_entries.length (), - bytes); + "basic block part vectorized using %wu " + "byte vectors\n", bytes); else dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, - "basic block part with %u components " - "vectorized using variable length " - "vectors\n", - instance->subgraph_entries.length ()); + "basic block part vectorized using " + "variable length vectors\n"); } } } -- cgit v1.1 From 18a463bb666cc8f3421589e7641ec617acb84741 Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Fri, 2 Jul 2021 10:19:30 +0800 Subject: docs: Add 'S' to Machine Constraints for RISC-V It was undocument before, but it might used in linux kernel for resolve code model issue, so LLVM community suggest we should document that, so that make it become supported/documented/non-internal machine constraints. gcc/ChangeLog: PR target/101275 * config/riscv/constraints.md ("S"): Update description and remove @internal. * doc/md.texi (Machine Constraints): Document the 'S' constraints for RISC-V. --- gcc/config/riscv/constraints.md | 3 +-- gcc/doc/md.texi | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md index 8c15c6c..c87d5b7 100644 --- a/gcc/config/riscv/constraints.md +++ b/gcc/config/riscv/constraints.md @@ -67,8 +67,7 @@ (match_test "GET_CODE(XEXP(op,0)) == REG"))) (define_constraint "S" - "@internal - A constant call address." + "A constraint that matches an absolute symbolic address." (match_operand 0 "absolute_symbolic_operand")) (define_constraint "U" diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 0fe70b7..8225a76 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3536,6 +3536,9 @@ A 5-bit unsigned immediate for CSR access instructions. @item A An address that is held in a general-purpose register. +@item S +A constraint that matches an absolute symbolic address. + @end table @item RX---@file{config/rx/constraints.md} -- cgit v1.1 From dddb6ffdc5c25264dd75ad82dad8e48a0718d2d9 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 13 Jul 2021 11:04:22 +0200 Subject: passes: Fix up subobject __bos [PR101419] The following testcase is miscompiled, because VN during cunrolli changes __bos argument from address of a larger field to address of a smaller field and so __builtin_object_size (, 1) then folds into smaller value than the actually available size. copy_reference_ops_from_ref has a hack for this, but it was using cfun->after_inlining as a check whether the hack can be ignored, and cunrolli is after_inlining. This patch uses a property to make it exact (set at the end of objsz pass that doesn't do insert_min_max_p) and additionally based on discussions in the PR moves the objsz pass earlier after IPA. 2021-07-13 Jakub Jelinek Richard Biener PR tree-optimization/101419 * tree-pass.h (PROP_objsz): Define. (make_pass_early_object_sizes): Declare. * passes.def (pass_all_early_optimizations): Rename pass_object_sizes there to pass_early_object_sizes, drop parameter. (pass_all_optimizations): Move pass_object_sizes right after pass_ccp, drop parameter, move pass_post_ipa_warn right after that. * tree-object-size.c (pass_object_sizes::execute): Rename to... (object_sizes_execute): ... this. Add insert_min_max_p argument. (pass_data_object_sizes): Move after object_sizes_execute. (pass_object_sizes): Likewise. In execute method call object_sizes_execute, drop set_pass_param method and insert_min_max_p non-static data member and its initializer in the ctor. (pass_data_early_object_sizes, pass_early_object_sizes, make_pass_early_object_sizes): New. * tree-ssa-sccvn.c (copy_reference_ops_from_ref): Use (cfun->curr_properties & PROP_objsz) instead of cfun->after_inlining. * gcc.dg/builtin-object-size-10.c: Pass -fdump-tree-early_objsz-details instead of -fdump-tree-objsz1-details in dg-options and adjust names of dump file in scan-tree-dump. * gcc.dg/pr101419.c: New test. --- gcc/passes.def | 6 +- gcc/testsuite/gcc.dg/builtin-object-size-10.c | 6 +- gcc/testsuite/gcc.dg/pr101419.c | 62 ++++++++++++++ gcc/tree-object-size.c | 114 +++++++++++++++++--------- gcc/tree-pass.h | 2 + gcc/tree-ssa-sccvn.c | 6 +- 6 files changed, 145 insertions(+), 51 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr101419.c (limited to 'gcc') diff --git a/gcc/passes.def b/gcc/passes.def index 945d2bc..f5d88a6 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -74,7 +74,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_all_early_optimizations); PUSH_INSERT_PASSES_WITHIN (pass_all_early_optimizations) NEXT_PASS (pass_remove_cgraph_callee_edges); - NEXT_PASS (pass_object_sizes, true /* insert_min_max_p */); + NEXT_PASS (pass_early_object_sizes); /* Don't record nonzero bits before IPA to avoid using too much memory. */ NEXT_PASS (pass_ccp, false /* nonzero_p */); @@ -194,14 +194,14 @@ along with GCC; see the file COPYING3. If not see They ensure memory accesses are not indirect wherever possible. */ NEXT_PASS (pass_strip_predict_hints, false /* early_p */); NEXT_PASS (pass_ccp, true /* nonzero_p */); - NEXT_PASS (pass_post_ipa_warn); /* After CCP we rewrite no longer addressed locals into SSA form if possible. */ + NEXT_PASS (pass_object_sizes); + NEXT_PASS (pass_post_ipa_warn); NEXT_PASS (pass_complete_unrolli); NEXT_PASS (pass_backprop); NEXT_PASS (pass_phiprop); NEXT_PASS (pass_forwprop); - NEXT_PASS (pass_object_sizes, false /* insert_min_max_p */); /* pass_build_alias is a dummy pass that ensures that we execute TODO_rebuild_alias at this point. */ NEXT_PASS (pass_build_alias); diff --git a/gcc/testsuite/gcc.dg/builtin-object-size-10.c b/gcc/testsuite/gcc.dg/builtin-object-size-10.c index 2a212fa..bfcdf5c 100644 --- a/gcc/testsuite/gcc.dg/builtin-object-size-10.c +++ b/gcc/testsuite/gcc.dg/builtin-object-size-10.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-objsz1-details" } */ +/* { dg-options "-O2 -fdump-tree-early_objsz-details" } */ // { dg-skip-if "packed attribute missing for drone_source_packet" { "epiphany-*-*" } } typedef struct { @@ -22,5 +22,5 @@ foo(char *x) return dpkt; } -/* { dg-final { scan-tree-dump "maximum object size 21" "objsz1" } } */ -/* { dg-final { scan-tree-dump "maximum subobject size 16" "objsz1" } } */ +/* { dg-final { scan-tree-dump "maximum object size 21" "early_objsz" } } */ +/* { dg-final { scan-tree-dump "maximum subobject size 16" "early_objsz" } } */ diff --git a/gcc/testsuite/gcc.dg/pr101419.c b/gcc/testsuite/gcc.dg/pr101419.c new file mode 100644 index 0000000..2cce383 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr101419.c @@ -0,0 +1,62 @@ +/* PR tree-optimization/101419 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef __SIZE_TYPE__ size_t; +void baz (int, int) __attribute__((__warning__("detected overflow"))); + +union U { + int i; + char c; +}; + +static void +foo (union U *u) +{ + if (__builtin_object_size (&u->c, 1) < sizeof (u->c)) + baz (__builtin_object_size (&u->c, 1), sizeof (u->c)); /* { dg-bogus "detected overflow" } */ + __builtin_memset (&u->c, 0, sizeof (u->c)); + + if (__builtin_object_size (&u->i, 1) < sizeof (u->i)) + baz (__builtin_object_size (&u->i, 1), sizeof (u->i)); /* { dg-bogus "detected overflow" } */ + __builtin_memset (&u->i, 0, sizeof (u->i)); +} + +void +bar (union U *u) +{ + int i, j; + for (i = 0; i < 1; i++) + { + foo (u); + for (j = 0; j < 2; j++) + asm volatile (""); + } +} + +static void +qux (void *p, size_t q) +{ + if (__builtin_object_size (p, 1) < q) + baz (__builtin_object_size (p, 1), q); /* { dg-bogus "detected overflow" } */ + __builtin_memset (p, 0, q); +} + +static void +corge (union U *u) +{ + qux (&u->c, sizeof (u->c)); + qux (&u->i, sizeof (u->i)); +} + +void +garply (union U *u) +{ + int i, j; + for (i = 0; i < 1; i++) + { + corge (u); + for (j = 0; j < 2; j++) + asm volatile (""); + } +} diff --git a/gcc/tree-object-size.c b/gcc/tree-object-size.c index 13be7f4..744748d 100644 --- a/gcc/tree-object-size.c +++ b/gcc/tree-object-size.c @@ -1304,45 +1304,6 @@ fini_object_sizes (void) } } - -/* Simple pass to optimize all __builtin_object_size () builtins. */ - -namespace { - -const pass_data pass_data_object_sizes = -{ - GIMPLE_PASS, /* type */ - "objsz", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - TV_NONE, /* tv_id */ - ( PROP_cfg | PROP_ssa ), /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ -}; - -class pass_object_sizes : public gimple_opt_pass -{ -public: - pass_object_sizes (gcc::context *ctxt) - : gimple_opt_pass (pass_data_object_sizes, ctxt), insert_min_max_p (false) - {} - - /* opt_pass methods: */ - opt_pass * clone () { return new pass_object_sizes (m_ctxt); } - void set_pass_param (unsigned int n, bool param) - { - gcc_assert (n == 0); - insert_min_max_p = param; - } - virtual unsigned int execute (function *); - - private: - /* Determines whether the pass instance creates MIN/MAX_EXPRs. */ - bool insert_min_max_p; -}; // class pass_object_sizes - /* Dummy valueize function. */ static tree @@ -1351,8 +1312,8 @@ do_valueize (tree t) return t; } -unsigned int -pass_object_sizes::execute (function *fun) +static unsigned int +object_sizes_execute (function *fun, bool insert_min_max_p) { basic_block bb; FOR_EACH_BB_FN (bb, fun) @@ -1453,6 +1414,38 @@ pass_object_sizes::execute (function *fun) return 0; } +/* Simple pass to optimize all __builtin_object_size () builtins. */ + +namespace { + +const pass_data pass_data_object_sizes = +{ + GIMPLE_PASS, /* type */ + "objsz", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + PROP_objsz, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_object_sizes : public gimple_opt_pass +{ +public: + pass_object_sizes (gcc::context *ctxt) + : gimple_opt_pass (pass_data_object_sizes, ctxt) + {} + + /* opt_pass methods: */ + opt_pass * clone () { return new pass_object_sizes (m_ctxt); } + virtual unsigned int execute (function *fun) + { + return object_sizes_execute (fun, false); + } +}; // class pass_object_sizes + } // anon namespace gimple_opt_pass * @@ -1460,3 +1453,42 @@ make_pass_object_sizes (gcc::context *ctxt) { return new pass_object_sizes (ctxt); } + +/* Early version of pass to optimize all __builtin_object_size () builtins. */ + +namespace { + +const pass_data pass_data_early_object_sizes = +{ + GIMPLE_PASS, /* type */ + "early_objsz", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_early_object_sizes : public gimple_opt_pass +{ +public: + pass_early_object_sizes (gcc::context *ctxt) + : gimple_opt_pass (pass_data_early_object_sizes, ctxt) + {} + + /* opt_pass methods: */ + virtual unsigned int execute (function *fun) + { + return object_sizes_execute (fun, true); + } +}; // class pass_object_sizes + +} // anon namespace + +gimple_opt_pass * +make_pass_early_object_sizes (gcc::context *ctxt) +{ + return new pass_early_object_sizes (ctxt); +} diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 15693fe..aa9757a 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -208,6 +208,7 @@ protected: #define PROP_gimple_lcf (1 << 1) /* lowered control flow */ #define PROP_gimple_leh (1 << 2) /* lowered eh */ #define PROP_cfg (1 << 3) +#define PROP_objsz (1 << 4) /* object sizes computed */ #define PROP_ssa (1 << 5) #define PROP_no_crit_edges (1 << 6) #define PROP_rtl (1 << 7) @@ -426,6 +427,7 @@ extern gimple_opt_pass *make_pass_omp_target_link (gcc::context *ctxt); extern gimple_opt_pass *make_pass_oacc_device_lower (gcc::context *ctxt); extern gimple_opt_pass *make_pass_omp_device_lower (gcc::context *ctxt); extern gimple_opt_pass *make_pass_object_sizes (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_early_object_sizes (gcc::context *ctxt); extern gimple_opt_pass *make_pass_warn_printf (gcc::context *ctxt); extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt); extern gimple_opt_pass *make_pass_fold_builtins (gcc::context *ctxt); diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c index 64e3a70..d6aee2e 100644 --- a/gcc/tree-ssa-sccvn.c +++ b/gcc/tree-ssa-sccvn.c @@ -925,12 +925,10 @@ copy_reference_ops_from_ref (tree ref, vec *result) + (wi::to_offset (bit_offset) >> LOG2_BITS_PER_UNIT)); /* Probibit value-numbering zero offset components of addresses the same before the pass folding - __builtin_object_size had a chance to run - (checking cfun->after_inlining does the - trick here). */ + __builtin_object_size had a chance to run. */ if (TREE_CODE (orig) != ADDR_EXPR || maybe_ne (off, 0) - || cfun->after_inlining) + || (cfun->curr_properties & PROP_objsz)) off.to_shwi (&temp.off); } } -- cgit v1.1 From 0ae469e8c0ccb93a26bb1e60db6418d6bcced15e Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 13 Jul 2021 10:17:34 +0100 Subject: ifcvt: Improve tests for predicated operations -msve-vector-bits=128 causes the AArch64 port to list 128-bit Advanced SIMD as the first-choice mode for vectorisation, with SVE being used for things that Advanced SIMD can't handle as easily. However, ifcvt would not then try to use SVE's predicated FP arithmetic, leading to tests like TSVC ControlFlow-flt failing to vectorise. The mask load/store code did try other vector modes, but could also be improved to make sure that SVEness sticks when computing derived modes. (Unlike mode_for_vector, related_vector_mode always returns a vector mode, so there's no need to check VECTOR_MODE_P as well.) gcc/ * internal-fn.c (vectorized_internal_fn_supported_p): Handle vector types first. For scalar types, consider both the preferred vector mode and the alternative vector modes. * optabs-query.c (can_vec_mask_load_store_p): Use the same structure as above, in particular using related_vector_mode for modes provided by autovectorize_vector_modes. gcc/testsuite/ * gcc.target/aarch64/sve/cond_arith_6.c: New test. --- gcc/internal-fn.c | 28 +++++++++++++++++----- gcc/optabs-query.c | 23 ++++++------------ .../gcc.target/aarch64/sve/cond_arith_6.c | 14 +++++++++++ 3 files changed, 43 insertions(+), 22 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_arith_6.c (limited to 'gcc') diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index fb8b43d..cd5e63f 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -4109,16 +4109,32 @@ expand_internal_call (gcall *stmt) bool vectorized_internal_fn_supported_p (internal_fn ifn, tree type) { + if (VECTOR_MODE_P (TYPE_MODE (type))) + return direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED); + scalar_mode smode; - if (!VECTOR_TYPE_P (type) && is_a (TYPE_MODE (type), &smode)) + if (!is_a (TYPE_MODE (type), &smode)) + return false; + + machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode); + if (VECTOR_MODE_P (vmode)) { - machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode); - if (VECTOR_MODE_P (vmode)) - type = build_vector_type_for_mode (type, vmode); + tree vectype = build_vector_type_for_mode (type, vmode); + if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED)) + return true; } - return (VECTOR_MODE_P (TYPE_MODE (type)) - && direct_internal_fn_supported_p (ifn, type, OPTIMIZE_FOR_SPEED)); + auto_vector_modes vector_modes; + targetm.vectorize.autovectorize_vector_modes (&vector_modes, true); + for (machine_mode base_mode : vector_modes) + if (related_vector_mode (base_mode, smode).exists (&vmode)) + { + tree vectype = build_vector_type_for_mode (type, vmode); + if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED)) + return true; + } + + return false; } void diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c index 3248ce2..05ee5f5 100644 --- a/gcc/optabs-query.c +++ b/gcc/optabs-query.c @@ -582,27 +582,18 @@ can_vec_mask_load_store_p (machine_mode mode, return false; vmode = targetm.vectorize.preferred_simd_mode (smode); - if (!VECTOR_MODE_P (vmode)) - return false; - - if (targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode) + if (VECTOR_MODE_P (vmode) + && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode) && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing) return true; auto_vector_modes vector_modes; targetm.vectorize.autovectorize_vector_modes (&vector_modes, true); - for (unsigned int i = 0; i < vector_modes.length (); ++i) - { - poly_uint64 cur = GET_MODE_SIZE (vector_modes[i]); - poly_uint64 nunits; - if (!multiple_p (cur, GET_MODE_SIZE (smode), &nunits)) - continue; - if (mode_for_vector (smode, nunits).exists (&vmode) - && VECTOR_MODE_P (vmode) - && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode) - && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing) - return true; - } + for (machine_mode base_mode : vector_modes) + if (related_vector_mode (base_mode, smode).exists (&vmode) + && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode) + && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing) + return true; return false; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_6.c new file mode 100644 index 0000000..4085ab1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_arith_6.c @@ -0,0 +1,14 @@ +/* { dg-options "-O3 -msve-vector-bits=128" } */ + +void +f (float *x) +{ + for (int i = 0; i < 100; ++i) + if (x[i] > 1.0f) + x[i] -= 1.0f; +} + +/* { dg-final { scan-assembler {\tld1w\tz} } } */ +/* { dg-final { scan-assembler {\tfcmgt\tp} } } */ +/* { dg-final { scan-assembler {\tfsub\tz} } } */ +/* { dg-final { scan-assembler {\tst1w\tz} } } */ -- cgit v1.1 From 3658ee4c73955ebf281842d2e637ca556fa8bbfd Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 13 Jul 2021 10:17:35 +0100 Subject: vect: Simplify epilogue reduction code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vect_create_epilog_for_reduction only handles two cases: single-loop reductions and double reductions. “nested cycles” (i.e. reductions in the inner loop when vectorising an outer loop) are handled elsewhere and don't need a vector->scalar reduction. The function had variables called nested_in_vect_loop and double_reduc and asserted that nested_in_vect_loop implied double_reduc, but it still had code to handle nested_in_vect_loop && !double_reduc. This patch removes that and uses double_reduc everywhere. gcc/ * tree-vect-loop.c (vect_create_epilog_for_reduction): Remove nested_in_vect_loop and use double_reduc everywhere. Remove dead assignment to "loop". --- gcc/tree-vect-loop.c | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index bc523d1..7c3e335 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -5005,7 +5005,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, imm_use_iterator imm_iter, phi_imm_iter; use_operand_p use_p, phi_use_p; gimple *use_stmt; - bool nested_in_vect_loop = false; auto_vec new_phis; int j, i; auto_vec scalar_results; @@ -5023,10 +5022,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, { outer_loop = loop; loop = loop->inner; - nested_in_vect_loop = true; - gcc_assert (!slp_node); + gcc_assert (!slp_node && double_reduc); } - gcc_assert (!nested_in_vect_loop || double_reduc); vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info); gcc_assert (vectype); @@ -5049,8 +5046,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info); else if (double_reduc) ; - else if (nested_in_vect_loop) - ; else adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info); } @@ -5923,7 +5918,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, { gcc_assert (!slp_reduc); gimple_seq stmts = NULL; - if (nested_in_vect_loop) + if (double_reduc) { new_phi = new_phis[0]; gcc_assert (VECTOR_TYPE_P (TREE_TYPE (adjustment_def))); @@ -5942,21 +5937,12 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, epilog_stmt = gimple_seq_last_stmt (stmts); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); - if (nested_in_vect_loop) - { - if (!double_reduc) - scalar_results.quick_push (new_temp); - else - scalar_results[0] = new_temp; - } - else - scalar_results[0] = new_temp; - + scalar_results[0] = new_temp; new_phis[0] = epilog_stmt; } if (double_reduc) - loop = loop->inner; + loop = outer_loop; /* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit phis with new adjusted scalar results, i.e., replace use @@ -6017,14 +6003,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt); } - if (nested_in_vect_loop) - { - if (double_reduc) - loop = outer_loop; - else - gcc_unreachable (); - } - phis.create (3); /* Find the loop-closed-use at the loop exit of the original scalar result. (The reduction result is expected to have two immediate uses, -- cgit v1.1 From b68eb70bd6df8c4b846bddb4f0aeae9054b932bc Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 13 Jul 2021 10:17:36 +0100 Subject: vect: Create array_slice of live-out stmts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch constructs an array_slice of the scalar statements that produce live-out reduction results in the original unvectorised loop. There are three cases: - SLP reduction chains: the final SLP stmt is live-out - full SLP reductions: all SLP stmts are live-out - non-SLP reductions: the single scalar stmt is live-out This is a slight simplification on its own, mostly because it maans “group_size” has a consistent meaning throughout the function. The main justification though is that it helps with later patches. gcc/ * tree-vect-loop.c (vect_create_epilog_for_reduction): Truncate scalar_results to group_size elements after reducing down from N*group_size elements. Construct an array_slice of the live-out stmts and assert that there is one stmt per scalar result. --- gcc/tree-vect-loop.c | 61 ++++++++++++++++++---------------------------------- 1 file changed, 21 insertions(+), 40 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 7c3e335..8390ac8 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -5010,7 +5010,12 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, auto_vec scalar_results; unsigned int group_size = 1, k; auto_vec phis; - bool slp_reduc = false; + /* SLP reduction without reduction chain, e.g., + # a1 = phi + # b1 = phi + a2 = operation (a1) + b2 = operation (b1) */ + bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)); bool direct_slp_reduc; tree new_phi_result; tree induction_index = NULL_TREE; @@ -5050,6 +5055,16 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info); } + stmt_vec_info single_live_out_stmt[] = { stmt_info }; + array_slice live_out_stmts = single_live_out_stmt; + if (slp_reduc) + /* All statements produce live-out values. */ + live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node); + else if (slp_node) + /* The last statement in the reduction chain produces the live-out + value. */ + single_live_out_stmt[0] = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]; + unsigned vec_num; int ncopies; if (slp_node) @@ -5248,13 +5263,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); bitsize = TYPE_SIZE (scalar_type); - /* SLP reduction without reduction chain, e.g., - # a1 = phi - # b1 = phi - a2 = operation (a1) - b2 = operation (b1) */ - slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)); - /* True if we should implement SLP_REDUC using native reduction operations instead of scalar operations. */ direct_slp_reduc = (reduc_fn != IFN_LAST @@ -5877,6 +5885,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, first_res, res); scalar_results[j % group_size] = new_res; } + scalar_results.truncate (group_size); for (k = 0; k < group_size; k++) scalar_results[k] = gimple_convert (&stmts, scalar_type, scalar_results[k]); @@ -5969,39 +5978,11 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, use use */ - - /* In SLP reduction chain we reduce vector results into one vector if - necessary, hence we set here REDUC_GROUP_SIZE to 1. SCALAR_DEST is the - LHS of the last stmt in the reduction chain, since we are looking for - the loop exit phi node. */ - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) - { - stmt_vec_info dest_stmt_info - = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]); - scalar_dest = gimple_assign_lhs (dest_stmt_info->stmt); - group_size = 1; - } - - /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in - case that REDUC_GROUP_SIZE is greater than vectorization factor). - Therefore, we need to match SCALAR_RESULTS with corresponding statements. - The first (REDUC_GROUP_SIZE / number of new vector stmts) scalar results - correspond to the first vector stmt, etc. - (RATIO is equal to (REDUC_GROUP_SIZE / number of new vector stmts)). */ - if (group_size > new_phis.length ()) - gcc_assert (!(group_size % new_phis.length ())); - - for (k = 0; k < group_size; k++) + gcc_assert (live_out_stmts.size () == scalar_results.length ()); + for (k = 0; k < live_out_stmts.size (); k++) { - if (slp_reduc) - { - stmt_vec_info scalar_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[k]; - - orig_stmt_info = STMT_VINFO_RELATED_STMT (scalar_stmt_info); - /* SLP statements can't participate in patterns. */ - gcc_assert (!orig_stmt_info); - scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt); - } + stmt_vec_info scalar_stmt_info = vect_orig_stmt (live_out_stmts[k]); + scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt); phis.create (3); /* Find the loop-closed-use at the loop exit of the original scalar -- cgit v1.1 From 81ad6bfc078ca36a42446e2f2295102ffaac9ee1 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 13 Jul 2021 10:17:37 +0100 Subject: vect: Remove new_phis from vect_create_epilog_for_reduction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vect_create_epilog_for_reduction had a variable called new_phis. It collected the statements that produce the exit block definitions of the vector reduction accumulators. Although those statements are indeed phis initially, they are often replaced with normal statements later, leading to puzzling code like: FOR_EACH_VEC_ELT (new_phis, i, new_phi) { int bit_offset; if (gimple_code (new_phi) == GIMPLE_PHI) vec_temp = PHI_RESULT (new_phi); else vec_temp = gimple_assign_lhs (new_phi); Also, although the array collects statements, in practice all users want the lhs instead. This patch therefore replaces new_phis with a vector of gimple values called “reduc_inputs”. Also, reduction chains and ncopies>1 were handled with identical code (and there was a comment saying so). The patch unites them into a single “if”. gcc/ * tree-vect-loop.c (vect_create_epilog_for_reduction): Replace the new_phis vector with a reduc_inputs vector. Combine handling of reduction chains and ncopies > 1. --- gcc/tree-vect-loop.c | 113 +++++++++++++++++++-------------------------------- 1 file changed, 41 insertions(+), 72 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 8390ac8..b7f73ca 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -5005,7 +5005,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, imm_use_iterator imm_iter, phi_imm_iter; use_operand_p use_p, phi_use_p; gimple *use_stmt; - auto_vec new_phis; + auto_vec reduc_inputs; int j, i; auto_vec scalar_results; unsigned int group_size = 1, k; @@ -5017,7 +5017,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, b2 = operation (b1) */ bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)); bool direct_slp_reduc; - tree new_phi_result; tree induction_index = NULL_TREE; if (slp_node) @@ -5215,7 +5214,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, if (double_reduc) loop = outer_loop; exit_bb = single_exit (loop)->dest; - new_phis.create (slp_node ? vec_num : ncopies); + reduc_inputs.create (slp_node ? vec_num : ncopies); for (unsigned i = 0; i < vec_num; i++) { if (slp_node) @@ -5223,19 +5222,14 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, else def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[0]); for (j = 0; j < ncopies; j++) - { + { tree new_def = copy_ssa_name (def); - phi = create_phi_node (new_def, exit_bb); - if (j == 0) - new_phis.quick_push (phi); - else - { - def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]); - new_phis.quick_push (phi); - } - - SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); - } + phi = create_phi_node (new_def, exit_bb); + if (j) + def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]); + SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); + reduc_inputs.quick_push (new_def); + } } exit_gsi = gsi_after_labels (exit_bb); @@ -5274,52 +5268,32 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, a2 = operation (a1) a3 = operation (a2), - we may end up with more than one vector result. Here we reduce them to - one vector. */ - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc) + we may end up with more than one vector result. Here we reduce them + to one vector. + + The same is true if we couldn't use a single defuse cycle. */ + if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) + || direct_slp_reduc + || ncopies > 1) { gimple_seq stmts = NULL; - tree first_vect = PHI_RESULT (new_phis[0]); - first_vect = gimple_convert (&stmts, vectype, first_vect); - for (k = 1; k < new_phis.length (); k++) + tree first_vect = gimple_convert (&stmts, vectype, reduc_inputs[0]); + for (k = 1; k < reduc_inputs.length (); k++) { - gimple *next_phi = new_phis[k]; - tree second_vect = PHI_RESULT (next_phi); - second_vect = gimple_convert (&stmts, vectype, second_vect); + tree second_vect = gimple_convert (&stmts, vectype, reduc_inputs[k]); first_vect = gimple_build (&stmts, code, vectype, first_vect, second_vect); } gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); - new_phi_result = first_vect; - new_phis.truncate (0); - new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); + reduc_inputs.truncate (0); + reduc_inputs.safe_push (first_vect); } - /* Likewise if we couldn't use a single defuse cycle. */ - else if (ncopies > 1) - { - gimple_seq stmts = NULL; - tree first_vect = PHI_RESULT (new_phis[0]); - first_vect = gimple_convert (&stmts, vectype, first_vect); - for (int k = 1; k < ncopies; ++k) - { - tree second_vect = PHI_RESULT (new_phis[k]); - second_vect = gimple_convert (&stmts, vectype, second_vect); - first_vect = gimple_build (&stmts, code, vectype, - first_vect, second_vect); - } - gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); - new_phi_result = first_vect; - new_phis.truncate (0); - new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); - } - else - new_phi_result = PHI_RESULT (new_phis[0]); if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION && reduc_fn != IFN_LAST) { - /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing + /* For condition reductions, we have a vector (REDUC_INPUTS 0) containing various data values where the condition matched and another vector (INDUCTION_INDEX) containing all the indexes of those matches. We need to extract the last matching index (which will be the index with @@ -5350,7 +5324,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, tree zero_vec = build_zero_cst (vectype); gimple_seq stmts = NULL; - new_phi_result = gimple_convert (&stmts, vectype, new_phi_result); + reduc_inputs[0] = gimple_convert (&stmts, vectype, reduc_inputs[0]); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); /* Find maximum value from the vector of found indexes. */ @@ -5370,7 +5344,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes with the vector (INDUCTION_INDEX) of found indexes, choosing values - from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC) + from the data vector (REDUC_INPUTS 0) for matches, 0 (ZERO_VEC) otherwise. Only one value should match, resulting in a vector (VEC_COND) with one data value and the rest zeros. In the case where the loop never made any matches, every index will @@ -5389,7 +5363,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, zero. */ tree vec_cond = make_ssa_name (vectype); gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR, - vec_compare, new_phi_result, + vec_compare, + reduc_inputs[0], zero_vec); gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT); @@ -5437,7 +5412,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, val = data_reduc[i], idx_val = induction_index[i]; return val; */ - tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result)); + tree data_eltype = TREE_TYPE (TREE_TYPE (reduc_inputs[0])); tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index)); unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype)); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index)); @@ -5461,7 +5436,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, epilog_stmt = gimple_build_assign (val, BIT_FIELD_REF, build3 (BIT_FIELD_REF, data_eltype, - new_phi_result, + reduc_inputs[0], bitsize_int (el_size), bitsize_int (off))); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); @@ -5513,10 +5488,10 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, "Reduce using direct vector reduction.\n"); gimple_seq stmts = NULL; - new_phi_result = gimple_convert (&stmts, vectype, new_phi_result); - vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result)); + reduc_inputs[0] = gimple_convert (&stmts, vectype, reduc_inputs[0]); + vec_elem_type = TREE_TYPE (TREE_TYPE (reduc_inputs[0])); new_temp = gimple_build (&stmts, as_combined_fn (reduc_fn), - vec_elem_type, new_phi_result); + vec_elem_type, reduc_inputs[0]); new_temp = gimple_convert (&stmts, scalar_type, new_temp); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); @@ -5546,7 +5521,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, neutral value. We can then do a normal reduction on each vector. */ /* Enforced by vectorizable_reduction. */ - gcc_assert (new_phis.length () == 1); + gcc_assert (reduc_inputs.length () == 1); gcc_assert (pow2p_hwi (group_size)); slp_tree orig_phis_slp_node = slp_node_instance->reduc_phis; @@ -5602,7 +5577,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, sel[j] = (index[j] == i); - which selects the elements of NEW_PHI_RESULT that should + which selects the elements of REDUC_INPUTS[0] that should be included in the result. */ tree compare_val = build_int_cst (index_elt_type, i); compare_val = build_vector_from_val (index_type, compare_val); @@ -5611,11 +5586,11 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* Calculate the equivalent of: - vec = seq ? new_phi_result : vector_identity; + vec = seq ? reduc_inputs[0] : vector_identity; VEC is now suitable for a full vector reduction. */ tree vec = gimple_build (&seq, VEC_COND_EXPR, vectype, - sel, new_phi_result, vector_identity); + sel, reduc_inputs[0], vector_identity); /* Do the reduction and convert it to the appropriate type. */ tree scalar = gimple_build (&seq, as_combined_fn (reduc_fn), @@ -5630,7 +5605,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, bool reduce_with_shift; tree vec_temp; - gcc_assert (slp_reduc || new_phis.length () == 1); + gcc_assert (slp_reduc || reduc_inputs.length () == 1); /* See if the target wants to do the final (shift) reduction in a vector mode of smaller size and first reduce upper/lower @@ -5640,7 +5615,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); unsigned nunits1 = nunits; if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode - && new_phis.length () == 1) + && reduc_inputs.length () == 1) { nunits1 = GET_MODE_NUNITS (mode1).to_constant (); /* For SLP reductions we have to make sure lanes match up, but @@ -5672,7 +5647,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* First reduce the vector to the desired vector size we should do shift reduction on by combining upper and lower halves. */ - new_temp = new_phi_result; + new_temp = reduc_inputs[0]; while (nunits > nunits1) { nunits /= 2; @@ -5751,7 +5726,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, new_temp = make_ssa_name (vectype1); epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - new_phis[0] = epilog_stmt; + reduc_inputs[0] = new_temp; } if (reduce_with_shift && !slp_reduc) @@ -5832,13 +5807,9 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, int element_bitsize = tree_to_uhwi (bitsize); tree compute_type = TREE_TYPE (vectype); gimple_seq stmts = NULL; - FOR_EACH_VEC_ELT (new_phis, i, new_phi) + FOR_EACH_VEC_ELT (reduc_inputs, i, vec_temp) { int bit_offset; - if (gimple_code (new_phi) == GIMPLE_PHI) - vec_temp = PHI_RESULT (new_phi); - else - vec_temp = gimple_assign_lhs (new_phi); new_temp = gimple_build (&stmts, BIT_FIELD_REF, compute_type, vec_temp, bitsize, bitsize_zero_node); @@ -5929,11 +5900,10 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gimple_seq stmts = NULL; if (double_reduc) { - new_phi = new_phis[0]; gcc_assert (VECTOR_TYPE_P (TREE_TYPE (adjustment_def))); adjustment_def = gimple_convert (&stmts, vectype, adjustment_def); new_temp = gimple_build (&stmts, code, vectype, - PHI_RESULT (new_phi), adjustment_def); + reduc_inputs[0], adjustment_def); } else { @@ -5947,7 +5917,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, epilog_stmt = gimple_seq_last_stmt (stmts); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); scalar_results[0] = new_temp; - new_phis[0] = epilog_stmt; } if (double_reduc) -- cgit v1.1 From 32b8edd5297c9193b81122fdd55316fcbf0959dc Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 13 Jul 2021 10:17:38 +0100 Subject: vect: Ensure reduc_inputs always have vectype Vector reduction accumulators can differ in signedness from the final scalar result. The conversions to handle that case were distributed through vect_create_epilog_for_reduction; this patch does the conversion up-front instead. gcc/ * tree-vect-loop.c (vect_create_epilog_for_reduction): Convert the phi results to vectype after creating them. Remove later conversion code that thus becomes redundant. --- gcc/tree-vect-loop.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index b7f73ca..1bd9a6e 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -5214,9 +5214,11 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, if (double_reduc) loop = outer_loop; exit_bb = single_exit (loop)->dest; + exit_gsi = gsi_after_labels (exit_bb); reduc_inputs.create (slp_node ? vec_num : ncopies); for (unsigned i = 0; i < vec_num; i++) { + gimple_seq stmts = NULL; if (slp_node) def = vect_get_slp_vect_def (slp_node, i); else @@ -5228,12 +5230,12 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, if (j) def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]); SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); + new_def = gimple_convert (&stmts, vectype, new_def); reduc_inputs.quick_push (new_def); } + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); } - exit_gsi = gsi_after_labels (exit_bb); - /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 (i.e. when reduc_fn is not available) and in the final adjustment code (if needed). Also get the original scalar reduction variable as @@ -5277,17 +5279,14 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, || ncopies > 1) { gimple_seq stmts = NULL; - tree first_vect = gimple_convert (&stmts, vectype, reduc_inputs[0]); + tree single_input = reduc_inputs[0]; for (k = 1; k < reduc_inputs.length (); k++) - { - tree second_vect = gimple_convert (&stmts, vectype, reduc_inputs[k]); - first_vect = gimple_build (&stmts, code, vectype, - first_vect, second_vect); - } + single_input = gimple_build (&stmts, code, vectype, + single_input, reduc_inputs[k]); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); reduc_inputs.truncate (0); - reduc_inputs.safe_push (first_vect); + reduc_inputs.safe_push (single_input); } if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION @@ -5323,10 +5322,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* Vector of {0, 0, 0,...}. */ tree zero_vec = build_zero_cst (vectype); - gimple_seq stmts = NULL; - reduc_inputs[0] = gimple_convert (&stmts, vectype, reduc_inputs[0]); - gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); - /* Find maximum value from the vector of found indexes. */ tree max_index = make_ssa_name (index_scalar_type); gcall *max_index_stmt = gimple_build_call_internal (IFN_REDUC_MAX, @@ -5394,7 +5389,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* Convert the reduced value back to the result type and set as the result. */ - stmts = NULL; + gimple_seq stmts = NULL; new_temp = gimple_build (&stmts, VIEW_CONVERT_EXPR, scalar_type, data_reduc); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); @@ -5412,7 +5407,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, val = data_reduc[i], idx_val = induction_index[i]; return val; */ - tree data_eltype = TREE_TYPE (TREE_TYPE (reduc_inputs[0])); + tree data_eltype = TREE_TYPE (vectype); tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index)); unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype)); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index)); @@ -5488,8 +5483,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, "Reduce using direct vector reduction.\n"); gimple_seq stmts = NULL; - reduc_inputs[0] = gimple_convert (&stmts, vectype, reduc_inputs[0]); - vec_elem_type = TREE_TYPE (TREE_TYPE (reduc_inputs[0])); + vec_elem_type = TREE_TYPE (vectype); new_temp = gimple_build (&stmts, as_combined_fn (reduc_fn), vec_elem_type, reduc_inputs[0]); new_temp = gimple_convert (&stmts, scalar_type, new_temp); -- cgit v1.1 From d592920c89973acd8d9f5b1f6b0526036ce63ccb Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 13 Jul 2021 10:17:39 +0100 Subject: vect: Add a vect_phi_initial_value helper function This patch adds a helper function called vect_phi_initial_value for returning the incoming value of a given loop phi. The main reason for adding it is to ensure that the right preheader edge is used when vectorising nested loops. (PHI_ARG_DEF_FROM_EDGE itself doesn't assert that the given edge is for the right block, although I guess that would be good to add separately.) gcc/ * tree-vectorizer.h: Include tree-ssa-operands.h. (vect_phi_initial_value): New function. * tree-vect-loop.c (neutral_op_for_slp_reduction): Use it. (get_initial_defs_for_reduction, info_for_reduction): Likewise. (vect_create_epilog_for_reduction, vectorizable_reduction): Likewise. (vect_transform_cycle_phi, vectorizable_induction): Likewise. --- gcc/tree-vect-loop.c | 29 +++++++++-------------------- gcc/tree-vectorizer.h | 21 ++++++++++++++++++++- 2 files changed, 29 insertions(+), 21 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 1bd9a6e..a31d762 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -3288,8 +3288,7 @@ neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type, has only a single initial value, so that value is neutral for all statements. */ if (reduc_chain) - return PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, - loop_preheader_edge (loop)); + return vect_phi_initial_value (stmt_vinfo); return NULL_TREE; default: @@ -4829,13 +4828,13 @@ get_initial_defs_for_reduction (vec_info *vinfo, /* Get the def before the loop. In reduction chain we have only one initial value. Else we have as many as PHIs in the group. */ if (reduc_chain) - op = j != 0 ? neutral_op : PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe); + op = j != 0 ? neutral_op : vect_phi_initial_value (stmt_vinfo); else if (((vec_oprnds->length () + 1) * nunits - number_of_places_left_in_vector >= group_size) && neutral_op) op = neutral_op; else - op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe); + op = vect_phi_initial_value (stmt_vinfo); /* Create 'vect_ = {op0,op1,...,opn}'. */ number_of_places_left_in_vector--; @@ -4906,9 +4905,7 @@ info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info) } else if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) { - edge pe = loop_preheader_edge (gimple_bb (phi)->loop_father); - stmt_vec_info info - = vinfo->lookup_def (PHI_ARG_DEF_FROM_EDGE (phi, pe)); + stmt_vec_info info = vinfo->lookup_def (vect_phi_initial_value (phi)); if (info && STMT_VINFO_DEF_TYPE (info) == vect_double_reduction_def) stmt_info = info; } @@ -5042,8 +5039,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, { /* Get at the scalar def before the loop, that defines the initial value of the reduction variable. */ - initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt, - loop_preheader_edge (loop)); + initial_def = vect_phi_initial_value (reduc_def_stmt); /* Optimize: for induction condition reduction, if we can't use zero for induc_val, use initial_def. */ if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) @@ -5558,9 +5554,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, for MIN and MAX reduction, for example. */ if (!neutral_op) { - tree scalar_value - = PHI_ARG_DEF_FROM_EDGE (orig_phis[i]->stmt, - loop_preheader_edge (loop)); + tree scalar_value = vect_phi_initial_value (orig_phis[i]); scalar_value = gimple_convert (&seq, TREE_TYPE (vectype), scalar_value); vector_identity = gimple_build_vector_from_val (&seq, vectype, @@ -6752,10 +6746,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, else if (cond_reduc_dt == vect_constant_def) { enum vect_def_type cond_initial_dt; - tree cond_initial_val - = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, loop_preheader_edge (loop)); - - gcc_assert (cond_reduc_val != NULL_TREE); + tree cond_initial_val = vect_phi_initial_value (reduc_def_phi); vect_is_simple_use (cond_initial_val, loop_vinfo, &cond_initial_dt); if (cond_initial_dt == vect_constant_def && types_compatible_p (TREE_TYPE (cond_initial_val), @@ -7528,8 +7519,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, { /* Get at the scalar def before the loop, that defines the initial value of the reduction variable. */ - tree initial_def = PHI_ARG_DEF_FROM_EDGE (phi, - loop_preheader_edge (loop)); + tree initial_def = vect_phi_initial_value (phi); /* Optimize: if initial_def is for REDUC_MAX smaller than the base and we can't use zero for induc_val, use initial_def. Similarly for REDUC_MIN and initial_def larger than the base. */ @@ -8175,8 +8165,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, return true; } - init_expr = PHI_ARG_DEF_FROM_EDGE (phi, - loop_preheader_edge (iv_loop)); + init_expr = vect_phi_initial_value (phi); gimple_seq stmts = NULL; if (!nested_in_vect_loop) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index fa28336..e2fd360 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -27,7 +27,7 @@ typedef class _stmt_vec_info *stmt_vec_info; #include "tree-hash-traits.h" #include "target.h" #include "internal-fn.h" - +#include "tree-ssa-operands.h" /* Used for naming of new temporaries. */ enum vect_var_kind { @@ -1369,6 +1369,25 @@ nested_in_vect_loop_p (class loop *loop, stmt_vec_info stmt_info) && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father)); } +/* PHI is either a scalar reduction phi or a scalar induction phi. + Return the initial value of the variable on entry to the containing + loop. */ + +static inline tree +vect_phi_initial_value (gphi *phi) +{ + basic_block bb = gimple_bb (phi); + edge pe = loop_preheader_edge (bb->loop_father); + gcc_assert (pe->dest == bb); + return PHI_ARG_DEF_FROM_EDGE (phi, pe); +} + +static inline tree +vect_phi_initial_value (stmt_vec_info stmt_info) +{ + return vect_phi_initial_value (as_a (stmt_info->stmt)); +} + /* Return true if STMT_INFO should produce a vector mask type rather than a normal nonmask type. */ -- cgit v1.1 From 826c452e571884fcabbc73863eef5120e683d034 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 13 Jul 2021 10:17:39 +0100 Subject: vect: Pass reduc_info to get_initial_defs_for_reduction This patch passes the reduc_info to get_initial_defs_for_reduction, so that the function can get general information from there rather than from the first SLP statement. This isn't a win on its own, but it becomes important with later patches. gcc/ * tree-vect-loop.c (get_initial_defs_for_reduction): Take the reduc_info as an additional parameter. (vect_transform_cycle_phi): Update accordingly. --- gcc/tree-vect-loop.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index a31d762..565c285 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -4764,32 +4764,28 @@ get_initial_def_for_reduction (loop_vec_info loop_vinfo, return init_def; } -/* Get at the initial defs for the reduction PHIs in SLP_NODE. - NUMBER_OF_VECTORS is the number of vector defs to create. - If NEUTRAL_OP is nonnull, introducing extra elements of that - value will not change the result. */ +/* Get at the initial defs for the reduction PHIs for REDUC_INFO, whose + associated SLP node is SLP_NODE. NUMBER_OF_VECTORS is the number of vector + defs to create. If NEUTRAL_OP is nonnull, introducing extra elements of + that value will not change the result. */ static void get_initial_defs_for_reduction (vec_info *vinfo, + stmt_vec_info reduc_info, slp_tree slp_node, vec *vec_oprnds, unsigned int number_of_vectors, bool reduc_chain, tree neutral_op) { vec stmts = SLP_TREE_SCALAR_STMTS (slp_node); - stmt_vec_info stmt_vinfo = stmts[0]; unsigned HOST_WIDE_INT nunits; unsigned j, number_of_places_left_in_vector; - tree vector_type; + tree vector_type = STMT_VINFO_VECTYPE (reduc_info); unsigned int group_size = stmts.length (); unsigned int i; class loop *loop; - vector_type = STMT_VINFO_VECTYPE (stmt_vinfo); - - gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def); - - loop = (gimple_bb (stmt_vinfo->stmt))->loop_father; + loop = (gimple_bb (reduc_info->stmt))->loop_father; gcc_assert (loop); edge pe = loop_preheader_edge (loop); @@ -4823,7 +4819,7 @@ get_initial_defs_for_reduction (vec_info *vinfo, { tree op; i = j % group_size; - stmt_vinfo = stmts[i]; + stmt_vec_info stmt_vinfo = stmts[i]; /* Get the def before the loop. In reduction chain we have only one initial value. Else we have as many as PHIs in the group. */ @@ -7510,7 +7506,8 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, = neutral_op_for_slp_reduction (slp_node, vectype_out, STMT_VINFO_REDUC_CODE (reduc_info), first != NULL); - get_initial_defs_for_reduction (loop_vinfo, slp_node_instance->reduc_phis, + get_initial_defs_for_reduction (loop_vinfo, reduc_info, + slp_node_instance->reduc_phis, &vec_initial_defs, vec_num, first != NULL, neutral_op); } -- cgit v1.1 From bd5a69191f023b9bc2a1f83c5f7d5e591c333b9a Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 13 Jul 2021 10:17:40 +0100 Subject: vect: Pass reduc_info to get_initial_def_for_reduction Similarly to the previous patch, this one passes the reduc_info to get_initial_def_for_reduction, rather than a stmt_vec_info that lacks the metadata. This again becomes useful later. gcc/ * tree-vect-loop.c (get_initial_def_for_reduction): Take the reduc_info instead of the original stmt_vec_info. (vect_transform_cycle_phi): Update accordingly. --- gcc/tree-vect-loop.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 565c285..a67036f 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -4625,7 +4625,7 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, /* Function get_initial_def_for_reduction Input: - STMT_VINFO - a stmt that performs a reduction operation in the loop. + REDUC_INFO - the info_for_reduction INIT_VAL - the initial value of the reduction variable Output: @@ -4667,7 +4667,7 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, static tree get_initial_def_for_reduction (loop_vec_info loop_vinfo, - stmt_vec_info stmt_vinfo, + stmt_vec_info reduc_info, enum tree_code code, tree init_val, tree *adjustment_def) { @@ -4685,8 +4685,8 @@ get_initial_def_for_reduction (loop_vec_info loop_vinfo, gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type) || SCALAR_FLOAT_TYPE_P (scalar_type)); - gcc_assert (nested_in_vect_loop_p (loop, stmt_vinfo) - || loop == (gimple_bb (stmt_vinfo->stmt))->loop_father); + gcc_assert (nested_in_vect_loop_p (loop, reduc_info) + || loop == (gimple_bb (reduc_info->stmt))->loop_father); /* ADJUSTMENT_DEF is NULL when called from vect_create_epilog_for_reduction to vectorize double reduction. */ @@ -7556,7 +7556,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) adjustment_defp = NULL; vec_initial_def - = get_initial_def_for_reduction (loop_vinfo, reduc_stmt_info, code, + = get_initial_def_for_reduction (loop_vinfo, reduc_info, code, initial_def, adjustment_defp); STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = adjustment_def; vec_initial_defs.create (ncopies); -- cgit v1.1 From 221bdb333b0917c927aec4d367a72e3667087d7a Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 13 Jul 2021 10:17:41 +0100 Subject: vect: Generalise neutral_op_for_slp_reduction This patch generalises the interface to neutral_op_for_slp_reduction so that it can be used for non-SLP reductions too. This isn't much of a win on its own, but it helps later patches. gcc/ * tree-vect-loop.c (neutral_op_for_slp_reduction): Replace with... (neutral_op_for_reduction): ...this, providing a more general interface. (vect_create_epilog_for_reduction): Update accordingly. (vectorizable_reduction): Likewise. (vect_transform_cycle_phi): Likewise. --- gcc/tree-vect-loop.c | 59 +++++++++++++++++++++++----------------------------- 1 file changed, 26 insertions(+), 33 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index a67036f..744645d 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -3248,23 +3248,15 @@ reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn) } } -/* If there is a neutral value X such that SLP reduction NODE would not - be affected by the introduction of additional X elements, return that X, - otherwise return null. CODE is the code of the reduction and VECTOR_TYPE - is the vector type that would hold element X. REDUC_CHAIN is true if - the SLP statements perform a single reduction, false if each statement - performs an independent reduction. */ +/* If there is a neutral value X such that a reduction would not be affected + by the introduction of additional X elements, return that X, otherwise + return null. CODE is the code of the reduction and SCALAR_TYPE is type + of the scalar elements. If the reduction has just a single initial value + then INITIAL_VALUE is that value, otherwise it is null. */ static tree -neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type, - tree_code code, bool reduc_chain) +neutral_op_for_reduction (tree scalar_type, tree_code code, tree initial_value) { - vec stmts = SLP_TREE_SCALAR_STMTS (slp_node); - stmt_vec_info stmt_vinfo = stmts[0]; - tree scalar_type = TREE_TYPE (vector_type); - class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father; - gcc_assert (loop); - switch (code) { case WIDEN_SUM_EXPR: @@ -3284,12 +3276,7 @@ neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type, case MAX_EXPR: case MIN_EXPR: - /* For MIN/MAX the initial values are neutral. A reduction chain - has only a single initial value, so that value is neutral for - all statements. */ - if (reduc_chain) - return vect_phi_initial_value (stmt_vinfo); - return NULL_TREE; + return initial_value; default: return NULL_TREE; @@ -5535,10 +5522,11 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, tree neutral_op = NULL_TREE; if (slp_node) { - stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (stmt_info); - neutral_op - = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, - vectype, code, first != NULL); + tree initial_value = NULL_TREE; + if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) + initial_value = vect_phi_initial_value (orig_phis[0]); + neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype), code, + initial_value); } if (neutral_op) vector_identity = gimple_build_vector_from_val (&seq, vectype, @@ -6935,9 +6923,13 @@ vectorizable_reduction (loop_vec_info loop_vinfo, /* For SLP reductions, see if there is a neutral value we can use. */ tree neutral_op = NULL_TREE; if (slp_node) - neutral_op = neutral_op_for_slp_reduction - (slp_node_instance->reduc_phis, vectype_out, orig_code, - REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL); + { + tree initial_value = NULL_TREE; + if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL) + initial_value = vect_phi_initial_value (reduc_def_phi); + neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out), + orig_code, initial_value); + } if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION) { @@ -7501,15 +7493,16 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, else { gcc_assert (slp_node == slp_node_instance->reduc_phis); - stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info); - tree neutral_op - = neutral_op_for_slp_reduction (slp_node, vectype_out, - STMT_VINFO_REDUC_CODE (reduc_info), - first != NULL); + tree initial_value = NULL_TREE; + if (REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info)) + initial_value = vect_phi_initial_value (phi); + tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); + tree neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out), + code, initial_value); get_initial_defs_for_reduction (loop_vinfo, reduc_info, slp_node_instance->reduc_phis, &vec_initial_defs, vec_num, - first != NULL, neutral_op); + initial_value != NULL, neutral_op); } } else -- cgit v1.1 From 7670b6633e51afbbc4b3c8a5775accf7f2d887af Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 13 Jul 2021 10:17:42 +0100 Subject: vect: Simplify get_initial_def_for_reduction After previous patches, we can now easily provide the neutral op as an argument to get_initial_def_for_reduction. This in turn allows the adjustment calculation to be moved outside of get_initial_def_for_reduction, which is the main motivation of the patch. gcc/ * tree-vect-loop.c (get_initial_def_for_reduction): Remove adjustment handling. Take the neutral value as an argument, in place of the code argument. (vect_transform_cycle_phi): Update accordingly. Handle the initial values of cond reductions separately from code reductions. Choose the adjustment here rather than in get_initial_def_for_reduction. Sink the splat of vec_initial_def. --- gcc/tree-vect-loop.c | 177 +++++++++++++++++---------------------------------- 1 file changed, 59 insertions(+), 118 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 744645d..fe7e73f 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -4614,57 +4614,26 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, Input: REDUC_INFO - the info_for_reduction INIT_VAL - the initial value of the reduction variable + NEUTRAL_OP - a value that has no effect on the reduction, as per + neutral_op_for_reduction Output: - ADJUSTMENT_DEF - a tree that holds a value to be added to the final result - of the reduction (used for adjusting the epilog - see below). Return a vector variable, initialized according to the operation that STMT_VINFO performs. This vector will be used as the initial value of the vector of partial results. - Option1 (adjust in epilog): Initialize the vector as follows: - add/bit or/xor: [0,0,...,0,0] - mult/bit and: [1,1,...,1,1] - min/max/cond_expr: [init_val,init_val,..,init_val,init_val] - and when necessary (e.g. add/mult case) let the caller know - that it needs to adjust the result by init_val. - - Option2: Initialize the vector as follows: - add/bit or/xor: [init_val,0,0,...,0] - mult/bit and: [init_val,1,1,...,1] - min/max/cond_expr: [init_val,init_val,...,init_val] - and no adjustments are needed. - - For example, for the following code: - - s = init_val; - for (i=0;istmt))->loop_father); - /* ADJUSTMENT_DEF is NULL when called from - vect_create_epilog_for_reduction to vectorize double reduction. */ - if (adjustment_def) - *adjustment_def = NULL; - - switch (code) + if (operand_equal_p (init_val, neutral_op)) { - case WIDEN_SUM_EXPR: - case DOT_PROD_EXPR: - case SAD_EXPR: - case PLUS_EXPR: - case MINUS_EXPR: - case BIT_IOR_EXPR: - case BIT_XOR_EXPR: - case MULT_EXPR: - case BIT_AND_EXPR: - { - if (code == MULT_EXPR) - { - real_init_val = dconst1; - int_init_val = 1; - } - - if (code == BIT_AND_EXPR) - int_init_val = -1; - - if (SCALAR_FLOAT_TYPE_P (scalar_type)) - def_for_init = build_real (scalar_type, real_init_val); - else - def_for_init = build_int_cst (scalar_type, int_init_val); - - if (adjustment_def || operand_equal_p (def_for_init, init_val, 0)) - { - /* Option1: the first element is '0' or '1' as well. */ - if (!operand_equal_p (def_for_init, init_val, 0)) - *adjustment_def = init_val; - init_def = gimple_build_vector_from_val (&stmts, vectype, - def_for_init); - } - else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()) - { - /* Option2 (variable length): the first element is INIT_VAL. */ - init_def = gimple_build_vector_from_val (&stmts, vectype, - def_for_init); - init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT, - vectype, init_def, init_val); - } - else - { - /* Option2: the first element is INIT_VAL. */ - tree_vector_builder elts (vectype, 1, 2); - elts.quick_push (init_val); - elts.quick_push (def_for_init); - init_def = gimple_build_vector (&stmts, &elts); - } - } - break; - - case MIN_EXPR: - case MAX_EXPR: - case COND_EXPR: - { - init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val); - init_def = gimple_build_vector_from_val (&stmts, vectype, init_val); - } - break; - - default: - gcc_unreachable (); + /* If both elements are equal then the vector described above is + just a splat. */ + neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype), neutral_op); + init_def = gimple_build_vector_from_val (&stmts, vectype, neutral_op); + } + else + { + neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype), neutral_op); + init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val); + if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()) + { + /* Construct a splat of NEUTRAL_OP and insert INIT_VAL into + element 0. */ + init_def = gimple_build_vector_from_val (&stmts, vectype, + neutral_op); + init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT, + vectype, init_def, init_val); + } + else + { + /* Build {INIT_VAL, NEUTRAL_OP, NEUTRAL_OP, ...}. */ + tree_vector_builder elts (vectype, 1, 2); + elts.quick_push (init_val); + elts.quick_push (neutral_op); + init_def = gimple_build_vector (&stmts, &elts); + } } if (stmts) @@ -7479,7 +7407,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, vectype_out); /* Get the loop-entry arguments. */ - tree vec_initial_def; + tree vec_initial_def = NULL_TREE; auto_vec vec_initial_defs; if (slp_node) { @@ -7529,9 +7457,6 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE; } vec_initial_def = build_vector_from_val (vectype_out, induc_val); - vec_initial_defs.create (ncopies); - for (i = 0; i < ncopies; ++i) - vec_initial_defs.quick_push (vec_initial_def); } else if (nested_cycle) { @@ -7541,23 +7466,39 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, ncopies, initial_def, &vec_initial_defs); } + else if (STMT_VINFO_REDUC_TYPE (reduc_info) == CONST_COND_REDUCTION + || STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION) + /* Fill the initial vector with the initial scalar value. */ + vec_initial_def + = get_initial_def_for_reduction (loop_vinfo, reduc_stmt_info, + initial_def, initial_def); else { - tree adjustment_def = NULL_TREE; - tree *adjustment_defp = &adjustment_def; enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); - if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) - adjustment_defp = NULL; + tree neutral_op = neutral_op_for_reduction (TREE_TYPE (initial_def), + code, initial_def); + gcc_assert (neutral_op); + /* Try to simplify the vector initialization by applying an + adjustment after the reduction has been performed. */ + if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def + && !operand_equal_p (neutral_op, initial_def)) + { + STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = initial_def; + initial_def = neutral_op; + } vec_initial_def - = get_initial_def_for_reduction (loop_vinfo, reduc_info, code, - initial_def, adjustment_defp); - STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = adjustment_def; - vec_initial_defs.create (ncopies); - for (i = 0; i < ncopies; ++i) - vec_initial_defs.quick_push (vec_initial_def); + = get_initial_def_for_reduction (loop_vinfo, reduc_info, + initial_def, neutral_op); } } + if (vec_initial_def) + { + vec_initial_defs.create (ncopies); + for (i = 0; i < ncopies; ++i) + vec_initial_defs.quick_push (vec_initial_def); + } + /* Generate the reduction PHIs upfront. */ for (i = 0; i < vec_num; i++) { -- cgit v1.1 From 1583b8bff0be7e41aa721dde79f90ca0763bd4e2 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 13 Jul 2021 10:17:43 +0100 Subject: vect: Reuse reduction accumulators between loops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for reusing a main loop's reduction accumulator in an epilogue loop. This in turn lets the loops share a single piece of vector->scalar reduction code. The patch has the following restrictions: (1) The epilogue reduction can only operate on a single vector (e.g. ncopies must be 1 for non-SLP reductions, and the group size must be <= the element count for SLP reductions). (2) Both loops must use the same vector mode for their accumulators. This means that the patch is restricted to targets that support --param vect-partial-vector-usage=1. (3) The reduction must be a standard “tree code” reduction. However, these restrictions could be lifted in future. For example, if the main loop operates on 128-bit vectors and the epilogue loop operates on 64-bit vectors, we could in future reduce the 128-bit vector by one stage and use the 64-bit result as the starting point for the epilogue result. The patch tries to handle chained SLP reductions, unchained SLP reductions and non-SLP reductions. It also handles cases in which the epilogue loop is entered directly (rather than via the main loop) and cases in which the epilogue loop can be skipped. vect_get_main_loop_result is a bit more general than the current patch needs. gcc/ * tree-vectorizer.h (vect_reusable_accumulator): New structure. (_loop_vec_info::main_loop_edge): New field. (_loop_vec_info::skip_main_loop_edge): Likewise. (_loop_vec_info::skip_this_loop_edge): Likewise. (_loop_vec_info::reusable_accumulators): Likewise. (_stmt_vec_info::reduc_scalar_results): Likewise. (_stmt_vec_info::reused_accumulator): Likewise. (vect_get_main_loop_result): Declare. * tree-vectorizer.c (vec_info::new_stmt_vec_info): Initialize reduc_scalar_inputs. (vec_info::free_stmt_vec_info): Free reduc_scalar_inputs. * tree-vect-loop-manip.c (vect_get_main_loop_result): New function. (vect_do_peeling): Fill an epilogue loop's main_loop_edge, skip_main_loop_edge and skip_this_loop_edge fields. * tree-vect-loop.c (INCLUDE_ALGORITHM): Define. (vect_emit_reduction_init_stmts): New function. (get_initial_def_for_reduction): Use it. (get_initial_defs_for_reduction): Likewise. Change the vinfo parameter to a loop_vec_info. (vect_create_epilog_for_reduction): Store the scalar results in the reduc_info. If an epilogue loop is reusing an accumulator from the main loop, and if the epilogue loop can also be skipped, try to place the reduction code in the join block. Record accumulators that could potentially be reused by epilogue loops. (vect_transform_cycle_phi): When vectorizing epilogue loops, try to reuse accumulators from the main loop. Record the initial value in reduc_info for non-SLP reductions too. gcc/testsuite/ * gcc.target/aarch64/sve/reduc_9.c: New test. * gcc.target/aarch64/sve/reduc_9_run.c: Likewise. * gcc.target/aarch64/sve/reduc_10.c: Likewise. * gcc.target/aarch64/sve/reduc_10_run.c: Likewise. * gcc.target/aarch64/sve/reduc_11.c: Likewise. * gcc.target/aarch64/sve/reduc_11_run.c: Likewise. * gcc.target/aarch64/sve/reduc_12.c: Likewise. * gcc.target/aarch64/sve/reduc_12_run.c: Likewise. * gcc.target/aarch64/sve/reduc_13.c: Likewise. * gcc.target/aarch64/sve/reduc_13_run.c: Likewise. * gcc.target/aarch64/sve/reduc_14.c: Likewise. * gcc.target/aarch64/sve/reduc_14_run.c: Likewise. * gcc.target/aarch64/sve/reduc_15.c: Likewise. * gcc.target/aarch64/sve/reduc_15_run.c: Likewise. --- gcc/testsuite/gcc.target/aarch64/sve/reduc_10.c | 77 ++++++ .../gcc.target/aarch64/sve/reduc_10_run.c | 49 ++++ gcc/testsuite/gcc.target/aarch64/sve/reduc_11.c | 71 +++++ .../gcc.target/aarch64/sve/reduc_11_run.c | 34 +++ gcc/testsuite/gcc.target/aarch64/sve/reduc_12.c | 71 +++++ .../gcc.target/aarch64/sve/reduc_12_run.c | 66 +++++ gcc/testsuite/gcc.target/aarch64/sve/reduc_13.c | 101 +++++++ .../gcc.target/aarch64/sve/reduc_13_run.c | 61 ++++ gcc/testsuite/gcc.target/aarch64/sve/reduc_14.c | 107 +++++++ .../gcc.target/aarch64/sve/reduc_14_run.c | 187 +++++++++++++ gcc/testsuite/gcc.target/aarch64/sve/reduc_15.c | 16 ++ .../gcc.target/aarch64/sve/reduc_15_run.c | 22 ++ gcc/testsuite/gcc.target/aarch64/sve/reduc_9.c | 77 ++++++ gcc/testsuite/gcc.target/aarch64/sve/reduc_9_run.c | 29 ++ gcc/tree-vect-loop-manip.c | 26 ++ gcc/tree-vect-loop.c | 307 +++++++++++++++++---- gcc/tree-vectorizer.c | 4 + gcc/tree-vectorizer.h | 56 +++- 18 files changed, 1297 insertions(+), 64 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_10.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_10_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_11.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_11_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_12.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_12_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_13.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_13_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_14.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_14_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_15.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_15_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_9.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/reduc_9_run.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_10.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_10.c new file mode 100644 index 0000000..fb817b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_10.c @@ -0,0 +1,77 @@ +/* { dg-options "-O3 --param vect-partial-vector-usage=1" } */ + +unsigned short __attribute__((noipa)) +add_loop (unsigned short *x, int n) +{ + unsigned short res = 0; + for (int i = 0; i < n; ++i) + res += x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +min_loop (unsigned short *x, int n) +{ + unsigned short res = ~0; + for (int i = 0; i < n; ++i) + res = res < x[i] ? res : x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +max_loop (unsigned short *x, int n) +{ + unsigned short res = 0; + for (int i = 0; i < n; ++i) + res = res > x[i] ? res : x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +and_loop (unsigned short *x, int n) +{ + unsigned short res = ~0; + for (int i = 0; i < n; ++i) + res &= x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +or_loop (unsigned short *x, int n) +{ + unsigned short res = 0; + for (int i = 0; i < n; ++i) + res |= x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +eor_loop (unsigned short *x, int n) +{ + unsigned short res = 0; + for (int i = 0; i < n; ++i) + res ^= x[i]; + return res; +} + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuminv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tandv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teorv\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_10_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_10_run.c new file mode 100644 index 0000000..1dd579b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_10_run.c @@ -0,0 +1,49 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O3 --param vect-partial-vector-usage=1" } */ + +#define N 0x1100 + +#include "reduc_10.c" + +int +main (void) +{ + unsigned short x[N]; + for (int i = 0; i < N; ++i) + x[i] = (i + 1) * (i + 2); + + if (add_loop (x, 0) != 0 + || add_loop (x, 11) != 572 + || add_loop (x, 0x100) != 22016 + || add_loop (x, 0xfff) != 20480 + || max_loop (x, 0) != 0 + || max_loop (x, 11) != 132 + || max_loop (x, 0x100) != 65280 + || max_loop (x, 0xfff) != 65504 + || or_loop (x, 0) != 0 + || or_loop (x, 11) != 0xfe + || or_loop (x, 0x80) != 0x7ffe + || or_loop (x, 0xb4) != 0x7ffe + || or_loop (x, 0xb5) != 0xfffe + || eor_loop (x, 0) != 0 + || eor_loop (x, 11) != 0xe8 + || eor_loop (x, 0x100) != 0xcf00 + || eor_loop (x, 0xfff) != 0xa000) + __builtin_abort (); + + for (int i = 0; i < N; ++i) + x[i] = ~x[i]; + + if (min_loop (x, 0) != 65535 + || min_loop (x, 11) != 65403 + || min_loop (x, 0x100) != 255 + || min_loop (x, 0xfff) != 31 + || and_loop (x, 0) != 0xffff + || and_loop (x, 11) != 0xff01 + || and_loop (x, 0x80) != 0x8001 + || and_loop (x, 0xb4) != 0x8001 + || and_loop (x, 0xb5) != 1) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_11.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_11.c new file mode 100644 index 0000000..f99ef4a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_11.c @@ -0,0 +1,71 @@ +/* { dg-options "-O3 -msve-vector-bits=256 --param vect-partial-vector-usage=1" } */ + +unsigned short __attribute__((noipa)) +add_loop (unsigned short *x, unsigned short res) +{ + for (int i = 0; i < 0xfff; ++i) + res += x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +min_loop (unsigned short *x, unsigned short res) +{ + for (int i = 0; i < 0xfff; ++i) + res = res < x[i] ? res : x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +max_loop (unsigned short *x, unsigned short res) +{ + for (int i = 0; i < 0xfff; ++i) + res = res > x[i] ? res : x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +and_loop (unsigned short *x, unsigned short res) +{ + for (int i = 0; i < 0xfff; ++i) + res &= x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +or_loop (unsigned short *x, unsigned short res) +{ + for (int i = 0; i < 0xfff; ++i) + res |= x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +eor_loop (unsigned short *x, unsigned short res) +{ + for (int i = 0; i < 0xfff; ++i) + res ^= x[i]; + return res; +} + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuminv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tandv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teorv\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_11_run.c new file mode 100644 index 0000000..5b41560 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_11_run.c @@ -0,0 +1,34 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-O3 -msve-vector-bits=256 --param vect-partial-vector-usage=1" } */ + +#define N 0x1100 + +#include "reduc_11.c" + +int +main (void) +{ + unsigned short x[N]; + for (int i = 0; i < N; ++i) + x[i] = (i + 1) * (i + 2); + + if (add_loop (x, 42) != 20522 + || max_loop (x, 65503) != 65504 + || max_loop (x, 65505) != 65505 + || or_loop (x, 0) != 0xfffe + || or_loop (x, 1) != 0xffff + || eor_loop (x, 0) != 0xa000 + || eor_loop (x, 0xbfff) != 0x1fff) + __builtin_abort (); + + for (int i = 0; i < N; ++i) + x[i] = ~x[i]; + + if (min_loop (x, 32) != 31 + || min_loop (x, 30) != 30 + || and_loop (x, 0xff) != 1 + || and_loop (x, 0) != 0) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_12.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_12.c new file mode 100644 index 0000000..d32b81a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_12.c @@ -0,0 +1,71 @@ +/* { dg-options "-O3 --param vect-partial-vector-usage=1" } */ + +unsigned short __attribute__((noipa)) +add_loop (unsigned short *x, int n, unsigned short res) +{ + for (int i = 0; i < n; ++i) + res += x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +min_loop (unsigned short *x, int n, unsigned short res) +{ + for (int i = 0; i < n; ++i) + res = res < x[i] ? res : x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +max_loop (unsigned short *x, int n, unsigned short res) +{ + for (int i = 0; i < n; ++i) + res = res > x[i] ? res : x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +and_loop (unsigned short *x, int n, unsigned short res) +{ + for (int i = 0; i < n; ++i) + res &= x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +or_loop (unsigned short *x, int n, unsigned short res) +{ + for (int i = 0; i < n; ++i) + res |= x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +eor_loop (unsigned short *x, int n, unsigned short res) +{ + for (int i = 0; i < n; ++i) + res ^= x[i]; + return res; +} + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuminv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tandv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teorv\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_12_run.c new file mode 100644 index 0000000..929b81a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_12_run.c @@ -0,0 +1,66 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O3 --param vect-partial-vector-usage=1" } */ + +#define N 0x1100 + +#include "reduc_12.c" + +int +main (void) +{ + unsigned short x[N]; + for (int i = 0; i < N; ++i) + x[i] = (i + 1) * (i + 2); + + if (add_loop (x, 0, 10) != 10 + || add_loop (x, 11, 42) != 614 + || add_loop (x, 0x100, 84) != 22100 + || add_loop (x, 0xfff, 20) != 20500 + || max_loop (x, 0, 10) != 10 + || max_loop (x, 11, 131) != 132 + || max_loop (x, 11, 133) != 133 + || max_loop (x, 0x100, 65279) != 65280 + || max_loop (x, 0x100, 65281) != 65281 + || max_loop (x, 0xfff, 65503) != 65504 + || max_loop (x, 0xfff, 65505) != 65505 + || or_loop (x, 0, 0x71) != 0x71 + || or_loop (x, 11, 0) != 0xfe + || or_loop (x, 11, 0xb3c) != 0xbfe + || or_loop (x, 0x80, 0) != 0x7ffe + || or_loop (x, 0x80, 1) != 0x7fff + || or_loop (x, 0xb4, 0) != 0x7ffe + || or_loop (x, 0xb4, 1) != 0x7fff + || or_loop (x, 0xb5, 0) != 0xfffe + || or_loop (x, 0xb5, 1) != 0xffff + || eor_loop (x, 0, 0x3e) != 0x3e + || eor_loop (x, 11, 0) != 0xe8 + || eor_loop (x, 11, 0x1ff) != 0x117 + || eor_loop (x, 0x100, 0) != 0xcf00 + || eor_loop (x, 0x100, 0xeee) != 0xc1ee + || eor_loop (x, 0xfff, 0) != 0xa000 + || eor_loop (x, 0xfff, 0x8888) != 0x2888) + __builtin_abort (); + + for (int i = 0; i < N; ++i) + x[i] = ~x[i]; + + if (min_loop (x, 0, 10000) != 10000 + || min_loop (x, 11, 65404) != 65403 + || min_loop (x, 11, 65402) != 65402 + || min_loop (x, 0x100, 256) != 255 + || min_loop (x, 0x100, 254) != 254 + || min_loop (x, 0xfff, 32) != 31 + || min_loop (x, 0xfff, 30) != 30 + || and_loop (x, 0, 0x1234) != 0x1234 + || and_loop (x, 11, 0xffff) != 0xff01 + || and_loop (x, 11, 0xcdef) != 0xcd01 + || and_loop (x, 0x80, 0xffff) != 0x8001 + || and_loop (x, 0x80, 0xfffe) != 0x8000 + || and_loop (x, 0xb4, 0xffff) != 0x8001 + || and_loop (x, 0xb4, 0xfffe) != 0x8000 + || and_loop (x, 0xb5, 0xffff) != 1 + || and_loop (x, 0xb5, 0xfffe) != 0) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_13.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_13.c new file mode 100644 index 0000000..ce2b8f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_13.c @@ -0,0 +1,101 @@ +/* { dg-options "-O3 -msve-vector-bits=256 --param vect-partial-vector-usage=1" } */ + +void __attribute__((noipa)) +add_loop (unsigned int *x, unsigned int *res) +{ + unsigned int res0 = res[0]; + unsigned int res1 = res[1]; + for (int i = 0; i < 0x7ff; ++i) + { + res0 += x[i * 2]; + res1 += x[i * 2 + 1]; + } + res[0] = res0; + res[1] = res1; +} + +void __attribute__((noipa)) +min_loop (unsigned int *x, unsigned int *res) +{ + unsigned int res0 = res[0]; + unsigned int res1 = res[1]; + for (int i = 0; i < 0x7ff; ++i) + { + res0 = res0 < x[i * 2] ? res0 : x[i * 2]; + res1 = res1 < x[i * 2 + 1] ? res1 : x[i * 2 + 1]; + } + res[0] = res0; + res[1] = res1; +} + +void __attribute__((noipa)) +max_loop (unsigned int *x, unsigned int *res) +{ + unsigned int res0 = res[0]; + unsigned int res1 = res[1]; + for (int i = 0; i < 0x7ff; ++i) + { + res0 = res0 > x[i * 2] ? res0 : x[i * 2]; + res1 = res1 > x[i * 2 + 1] ? res1 : x[i * 2 + 1]; + } + res[0] = res0; + res[1] = res1; +} + +void __attribute__((noipa)) +and_loop (unsigned int *x, unsigned int *res) +{ + unsigned int res0 = res[0]; + unsigned int res1 = res[1]; + for (int i = 0; i < 0x7ff; ++i) + { + res0 &= x[i * 2]; + res1 &= x[i * 2 + 1]; + } + res[0] = res0; + res[1] = res1; +} + +void __attribute__((noipa)) +or_loop (unsigned int *x, unsigned int *res) +{ + unsigned int res0 = res[0]; + unsigned int res1 = res[1]; + for (int i = 0; i < 0x7ff; ++i) + { + res0 |= x[i * 2]; + res1 |= x[i * 2 + 1]; + } + res[0] = res0; + res[1] = res1; +} + +void __attribute__((noipa)) +eor_loop (unsigned int *x, unsigned int *res) +{ + unsigned int res0 = res[0]; + unsigned int res1 = res[1]; + for (int i = 0; i < 0x7ff; ++i) + { + res0 ^= x[i * 2]; + res1 ^= x[i * 2 + 1]; + } + res[0] = res0; + res[1] = res1; +} + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_13_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_13_run.c new file mode 100644 index 0000000..5514d8d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_13_run.c @@ -0,0 +1,61 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-O3 -msve-vector-bits=256 --param vect-partial-vector-usage=1" } */ + +#define N 0x1100 + +#include "reduc_13.c" + +int +main (void) +{ + unsigned int x[N]; + for (int i = 0; i < N; ++i) + x[i] = ((i + 1) * (i + 2)) & 0xfffff; + + unsigned int add_res[2] = { 42, 1111 }; + add_loop (x, add_res); + if (add_res[0] != 968538154 + || add_res[1] != 964340823) + __builtin_abort (); + + unsigned int max_res1[2] = { 0, 0 }; + max_loop (x, max_res1); + if (max_res1[0] != 1048150 + || max_res1[1] != 1045506) + __builtin_abort (); + + unsigned int max_res2[2] = { 1048151, 1045507 }; + max_loop (x, max_res2); + if (max_res2[0] != 1048151 + || max_res2[1] != 1045507) + __builtin_abort (); + + unsigned int or_res[2] = { 0x1000000, 0x2000000 }; + or_loop (x, or_res); + if (or_res[0] != 0x10ffffe + || or_res[1] != 0x20ffffe) + __builtin_abort (); + + unsigned int eor_res[2] = { 0x1000000, 0x2000000 }; + eor_loop (x, eor_res); + if (eor_res[0] != 0x1010000 + || eor_res[1] != 0x20b5000) + __builtin_abort (); + + for (int i = 0; i < N; ++i) + x[i] = ~x[i] & 0xfffff; + + unsigned int min_res1[2] = { 500, 4000 }; + min_loop (x, min_res1); + if (min_res1[0] != 425 + || min_res1[1] != 3069) + __builtin_abort (); + + unsigned int min_res2[2] = { 424, 3068 }; + min_loop (x, min_res2); + if (min_res2[0] != 424 + || min_res2[1] != 3068) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_14.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_14.c new file mode 100644 index 0000000..3be611e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_14.c @@ -0,0 +1,107 @@ +/* { dg-options "-O3 --param vect-partial-vector-usage=1" } */ + +void __attribute__((noipa)) +add_loop (unsigned int *x, int n, unsigned int *res) +{ + unsigned int res0 = res[0]; + unsigned int res1 = res[1]; + for (int i = 0; i < n; ++i) + { + res0 += x[i * 2]; + res1 += x[i * 2 + 1]; + } + res[0] = res0; + res[1] = res1; +} + +void __attribute__((noipa)) +min_loop (unsigned int *x, int n, unsigned int *res) +{ + unsigned int res0 = res[0]; + unsigned int res1 = res[1]; + for (int i = 0; i < n; ++i) + { + res0 = res0 < x[i * 2] ? res0 : x[i * 2]; + res1 = res1 < x[i * 2 + 1] ? res1 : x[i * 2 + 1]; + } + res[0] = res0; + res[1] = res1; +} + +void __attribute__((noipa)) +max_loop (unsigned int *x, int n, unsigned int *res) +{ + unsigned int res0 = res[0]; + unsigned int res1 = res[1]; + for (int i = 0; i < n; ++i) + { + res0 = res0 > x[i * 2] ? res0 : x[i * 2]; + res1 = res1 > x[i * 2 + 1] ? res1 : x[i * 2 + 1]; + } + res[0] = res0; + res[1] = res1; +} + +void __attribute__((noipa)) +and_loop (unsigned int *x, int n, unsigned int *res) +{ + unsigned int res0 = res[0]; + unsigned int res1 = res[1]; + for (int i = 0; i < n; ++i) + { + res0 &= x[i * 2]; + res1 &= x[i * 2 + 1]; + } + res[0] = res0; + res[1] = res1; +} + +void __attribute__((noipa)) +or_loop (unsigned int *x, int n, unsigned int *res) +{ + unsigned int res0 = res[0]; + unsigned int res1 = res[1]; + for (int i = 0; i < n; ++i) + { + res0 |= x[i * 2]; + res1 |= x[i * 2 + 1]; + } + res[0] = res0; + res[1] = res1; +} + +void __attribute__((noipa)) +eor_loop (unsigned int *x, int n, unsigned int *res) +{ + unsigned int res0 = res[0]; + unsigned int res1 = res[1]; + for (int i = 0; i < n; ++i) + { + res0 ^= x[i * 2]; + res1 ^= x[i * 2 + 1]; + } + res[0] = res0; + res[1] = res1; +} + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\t} 2 } } */ + +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuminv\t} 2 } } */ + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\t} 2 } } */ + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tandv\t} 2 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torv\t} 2 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teorv\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_14_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_14_run.c new file mode 100644 index 0000000..ccaa770 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_14_run.c @@ -0,0 +1,187 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-O3 -msve-vector-bits=256 --param vect-partial-vector-usage=1" } */ + +#define N 0x1100 + +#include "reduc_14.c" + +int +main (void) +{ + unsigned int x[N]; + for (int i = 0; i < N; ++i) + x[i] = ((i + 1) * (i + 2)) & 0xfffff; + + unsigned int add_res1[2] = { 11, 22 }; + add_loop (x, 0, add_res1); + if (add_res1[0] != 11 + || add_res1[1] != 22) + __builtin_abort (); + + unsigned int add_res2[2] = { 10, 20 }; + add_loop (x, 11, add_res2); + if (add_res2[0] != 1902 + || add_res2[1] != 2176) + __builtin_abort (); + + unsigned int add_res3[2] = { 15, 30 }; + add_loop (x, 0x100, add_res3); + if (add_res3[0] != 22435087 + || add_res3[1] != 22566686) + __builtin_abort (); + + unsigned int add_res4[2] = { 100, 200 }; + add_loop (x, 0x11f, add_res4); + if (add_res4[0] != 31602244 + || add_res4[1] != 31767656) + __builtin_abort (); + + unsigned int max_res1[2] = { 461, 500 }; + max_loop (x, 11, max_res1); + if (max_res1[0] != 462 + || max_res1[1] != 506) + __builtin_abort (); + + unsigned int max_res2[2] = { 463, 507 }; + max_loop (x, 11, max_res2); + if (max_res2[0] != 463 + || max_res2[1] != 507) + __builtin_abort (); + + unsigned int max_res3[2] = { 1000000, 1000000 }; + max_loop (x, 0x200, max_res3); + if (max_res3[0] != 1047552 + || max_res3[1] != 1045506) + __builtin_abort (); + + unsigned int max_res4[2] = { 1047553, 1045507 }; + max_loop (x, 0x200, max_res4); + if (max_res4[0] != 1047553 + || max_res4[1] != 1045507) + __builtin_abort (); + + unsigned int max_res5[2] = { 300000, 30000 }; + max_loop (x, 0x11f, max_res5); + if (max_res5[0] != 328902 + || max_res5[1] != 330050) + __builtin_abort (); + + unsigned int max_res6[2] = { 328903, 330051 }; + max_loop (x, 0x11f, max_res6); + if (max_res6[0] != 328903 + || max_res6[1] != 330051) + __builtin_abort (); + + unsigned int or_res1[2] = { 11, 22 }; + or_loop (x, 0, or_res1); + if (or_res1[0] != 11 + || or_res1[1] != 22) + __builtin_abort (); + + unsigned int or_res2[2] = { 0x200000, 0xe00000 }; + or_loop (x, 11, or_res2); + if (or_res2[0] != 0x2001fe + || or_res2[1] != 0xe001fe) + __builtin_abort (); + + unsigned int or_res3[2] = { 0x800000, 0x700000 }; + or_loop (x, 0x40, or_res3); + if (or_res3[0] != 0x803ffe + || or_res3[1] != 0x707ffe) + __builtin_abort (); + + unsigned int or_res4[2] = { 0x100001, 0x300000 }; + or_loop (x, 0x4f, or_res4); + if (or_res4[0] != 0x107fff + || or_res4[1] != 0x307ffe) + __builtin_abort (); + + unsigned int eor_res1[2] = { 11, 22 }; + eor_loop (x, 0, eor_res1); + if (eor_res1[0] != 11 + || eor_res1[1] != 22) + __builtin_abort (); + + unsigned int eor_res2[2] = { 0x2000ff, 0xe000ff }; + eor_loop (x, 11, eor_res2); + if (eor_res2[0] != 0x2001cf + || eor_res2[1] != 0xe000b7) + __builtin_abort (); + + unsigned int eor_res3[2] = { 0x805000, 0x70f000 }; + eor_loop (x, 0x100, eor_res3); + if (eor_res3[0] != 0x824200 + || eor_res3[1] != 0x77dc00) + __builtin_abort (); + + unsigned int eor_res4[2] = { 0x101201, 0x300f00 }; + eor_loop (x, 0x11f, eor_res4); + if (eor_res4[0] != 0x178801 + || eor_res4[1] != 0x337240) + __builtin_abort (); + + for (int i = 0; i < N; ++i) + x[i] = ~x[i] & 0xfffff; + + unsigned int min_res1[2] = { 1048200, 1048100 }; + min_loop (x, 11, min_res1); + if (min_res1[0] != 1048113 + || min_res1[1] != 1048069) + __builtin_abort (); + + unsigned int min_res2[2] = { 1048112, 1048068 }; + min_loop (x, 11, min_res2); + if (min_res2[0] != 1048112 + || min_res2[1] != 1048068) + __builtin_abort (); + + unsigned int min_res3[2] = { 10000, 10000 }; + min_loop (x, 0x200, min_res3); + if (min_res3[0] != 1023 + || min_res3[1] != 3069) + __builtin_abort (); + + unsigned int min_res4[2] = { 1022, 3068 }; + min_loop (x, 0x200, min_res4); + if (min_res4[0] != 1022 + || min_res4[1] != 3068) + __builtin_abort (); + + unsigned int min_res5[2] = { 719680, 718530 }; + min_loop (x, 0x11f, min_res5); + if (min_res5[0] != 719673 + || min_res5[1] != 718525) + __builtin_abort (); + + unsigned int min_res6[2] = { 719672, 718524 }; + min_loop (x, 0x11f, min_res6); + if (min_res6[0] != 719672 + || min_res6[1] != 718524) + __builtin_abort (); + + unsigned int and_res1[2] = { 11, 22 }; + and_loop (x, 0, and_res1); + if (and_res1[0] != 11 + || and_res1[1] != 22) + __builtin_abort (); + + unsigned int and_res2[2] = { 0xf5cff, 0xf78ff }; + and_loop (x, 11, and_res2); + if (and_res2[0] != 0xf5c01 + || and_res2[1] != 0xf7801) + __builtin_abort (); + + unsigned int and_res3[2] = { 0x7efff, 0xecfff }; + and_loop (x, 0x40, and_res3); + if (and_res3[0] != 0x7c001 + || and_res3[1] != 0xe8001) + __builtin_abort (); + + unsigned int and_res4[2] = { 0xffffff, 0xffffff }; + and_loop (x, 0x4f, and_res4); + if (and_res4[0] != 0xf8001 + || and_res4[1] != 0xf8001) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_15.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_15.c new file mode 100644 index 0000000..15b1ade --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_15.c @@ -0,0 +1,16 @@ +/* { dg-options "-O3 --param vect-partial-vector-usage=1" } */ + +int __attribute__((noipa)) +add_loop (int *x, int n, int res) +{ + for (int i = 0; i < n; ++i) + { + res += x[i * 2]; + res += x[i * 2 + 1]; + } + return res; +} + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_15_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_15_run.c new file mode 100644 index 0000000..3207fce --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_15_run.c @@ -0,0 +1,22 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-O3 -msve-vector-bits=256 --param vect-partial-vector-usage=1" } */ + +#define N 0x1100 + +#include "reduc_15.c" + +int +main (void) +{ + int x[N]; + for (int i = 0; i < N; ++i) + x[i] = ((i + 1) * (i + 2)) & 0xfffff; + + if (add_loop (x, 0, 33) != 33 + || add_loop (x, 11, 30) != 4078 + || add_loop (x, 0x100, 45) != 45001773 + || add_loop (x, 0x11f, 300) != 63369900) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_9.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_9.c new file mode 100644 index 0000000..b839821 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_9.c @@ -0,0 +1,77 @@ +/* { dg-options "-O3 -msve-vector-bits=256 --param vect-partial-vector-usage=1" } */ + +unsigned short __attribute__((noipa)) +add_loop (unsigned short *x) +{ + unsigned short res = 0; + for (int i = 0; i < 0xfff; ++i) + res += x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +min_loop (unsigned short *x) +{ + unsigned short res = ~0; + for (int i = 0; i < 0xfff; ++i) + res = res < x[i] ? res : x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +max_loop (unsigned short *x) +{ + unsigned short res = 0; + for (int i = 0; i < 0xfff; ++i) + res = res > x[i] ? res : x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +and_loop (unsigned short *x) +{ + unsigned short res = ~0; + for (int i = 0; i < 0xfff; ++i) + res &= x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +or_loop (unsigned short *x) +{ + unsigned short res = 0; + for (int i = 0; i < 0xfff; ++i) + res |= x[i]; + return res; +} + +unsigned short __attribute__((noipa)) +eor_loop (unsigned short *x) +{ + unsigned short res = 0; + for (int i = 0; i < 0xfff; ++i) + res ^= x[i]; + return res; +} + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuminv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tandv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torv\t} 1 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teorv\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_9_run.c new file mode 100644 index 0000000..aa248f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_9_run.c @@ -0,0 +1,29 @@ +/* { dg-do run { target aarch64_sve256_hw } } */ +/* { dg-options "-O3 -msve-vector-bits=256 --param vect-partial-vector-usage=1" } */ + +#define N 0x1100 + +#include "reduc_9.c" + +int +main (void) +{ + unsigned short x[N]; + for (int i = 0; i < N; ++i) + x[i] = (i + 1) * (i + 2); + + if (add_loop (x) != 20480 + || max_loop (x) != 65504 + || or_loop (x) != 0xfffe + || eor_loop (x) != 0xa000) + __builtin_abort (); + + for (int i = 0; i < N; ++i) + x[i] = ~x[i]; + + if (min_loop (x) != 31 + || and_loop (x) != 1) + __builtin_abort (); + + return 0; +} diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 2909e8a..c29ffb3 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -2457,6 +2457,28 @@ vect_update_epilogue_niters (loop_vec_info epilogue_vinfo, return vect_determine_partial_vectors_and_peeling (epilogue_vinfo, true); } +/* LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped. + Return a value that equals: + + - MAIN_LOOP_VALUE when LOOP_VINFO is entered from the main loop and + - SKIP_VALUE when the main loop is skipped. */ + +tree +vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value, + tree skip_value) +{ + gcc_assert (loop_vinfo->main_loop_edge); + + tree phi_result = make_ssa_name (TREE_TYPE (main_loop_value)); + basic_block bb = loop_vinfo->main_loop_edge->dest; + gphi *new_phi = create_phi_node (phi_result, bb); + add_phi_arg (new_phi, main_loop_value, loop_vinfo->main_loop_edge, + UNKNOWN_LOCATION); + add_phi_arg (new_phi, skip_value, + loop_vinfo->skip_main_loop_edge, UNKNOWN_LOCATION); + return phi_result; +} + /* Function vect_do_peeling. Input: @@ -2986,6 +3008,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, skip_vector ? anchor : guard_bb, prob_epilog.invert (), irred_flag); + if (vect_epilogues) + epilogue_vinfo->skip_this_loop_edge = guard_e; slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e, single_exit (epilog)); /* Only need to handle basic block before epilog loop if it's not @@ -3057,6 +3081,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e, UNKNOWN_LOCATION); niters = PHI_RESULT (new_phi); + epilogue_vinfo->main_loop_edge = update_e; + epilogue_vinfo->skip_main_loop_edge = skip_e; } /* Set ADVANCE to the number of iterations performed by the previous diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index fe7e73f..8c27d75 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -19,6 +19,7 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ +#define INCLUDE_ALGORITHM #include "config.h" #include "system.h" #include "coretypes.h" @@ -823,6 +824,10 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) th (0), versioning_threshold (0), vectorization_factor (0), + main_loop_edge (nullptr), + skip_main_loop_edge (nullptr), + skip_this_loop_edge (nullptr), + reusable_accumulators (), max_vectorization_factor (0), mask_skip_niters (NULL_TREE), rgroup_compare_type (NULL_TREE), @@ -4607,7 +4612,32 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, prologue_cost, epilogue_cost); } +/* SEQ is a sequence of instructions that initialize the reduction + described by REDUC_INFO. Emit them in the appropriate place. */ +static void +vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo, + stmt_vec_info reduc_info, gimple *seq) +{ + if (reduc_info->reused_accumulator) + { + /* When reusing an accumulator from the main loop, we only need + initialization instructions if the main loop can be skipped. + In that case, emit the initialization instructions at the end + of the guard block that does the skip. */ + edge skip_edge = loop_vinfo->skip_main_loop_edge; + gcc_assert (skip_edge); + gimple_stmt_iterator gsi = gsi_last_bb (skip_edge->src); + gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); + } + else + { + /* The normal case: emit the initialization instructions on the + preheader edge. */ + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), seq); + } +} /* Function get_initial_def_for_reduction @@ -4675,36 +4705,30 @@ get_initial_def_for_reduction (loop_vec_info loop_vinfo, } if (stmts) - gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); + vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, stmts); return init_def; } -/* Get at the initial defs for the reduction PHIs for REDUC_INFO, whose - associated SLP node is SLP_NODE. NUMBER_OF_VECTORS is the number of vector - defs to create. If NEUTRAL_OP is nonnull, introducing extra elements of - that value will not change the result. */ +/* Get at the initial defs for the reduction PHIs for REDUC_INFO, + which performs a reduction involving GROUP_SIZE scalar statements. + NUMBER_OF_VECTORS is the number of vector defs to create. If NEUTRAL_OP + is nonnull, introducing extra elements of that value will not change the + result. */ static void -get_initial_defs_for_reduction (vec_info *vinfo, +get_initial_defs_for_reduction (loop_vec_info loop_vinfo, stmt_vec_info reduc_info, - slp_tree slp_node, vec *vec_oprnds, unsigned int number_of_vectors, - bool reduc_chain, tree neutral_op) + unsigned int group_size, tree neutral_op) { - vec stmts = SLP_TREE_SCALAR_STMTS (slp_node); + vec &initial_values = reduc_info->reduc_initial_values; unsigned HOST_WIDE_INT nunits; unsigned j, number_of_places_left_in_vector; tree vector_type = STMT_VINFO_VECTYPE (reduc_info); - unsigned int group_size = stmts.length (); unsigned int i; - class loop *loop; - - loop = (gimple_bb (reduc_info->stmt))->loop_father; - gcc_assert (loop); - edge pe = loop_preheader_edge (loop); - gcc_assert (!reduc_chain || neutral_op); + gcc_assert (group_size == initial_values.length () || neutral_op); /* NUMBER_OF_COPIES is the number of times we need to use the same values in created vectors. It is greater than 1 if unrolling is performed. @@ -4734,18 +4758,13 @@ get_initial_defs_for_reduction (vec_info *vinfo, { tree op; i = j % group_size; - stmt_vec_info stmt_vinfo = stmts[i]; /* Get the def before the loop. In reduction chain we have only one initial value. Else we have as many as PHIs in the group. */ - if (reduc_chain) - op = j != 0 ? neutral_op : vect_phi_initial_value (stmt_vinfo); - else if (((vec_oprnds->length () + 1) * nunits - - number_of_places_left_in_vector >= group_size) - && neutral_op) + if (i >= initial_values.length () || (j > i && neutral_op)) op = neutral_op; else - op = vect_phi_initial_value (stmt_vinfo); + op = initial_values[i]; /* Create 'vect_ = {op0,op1,...,opn}'. */ number_of_places_left_in_vector--; @@ -4781,8 +4800,8 @@ get_initial_defs_for_reduction (vec_info *vinfo, { /* First time round, duplicate ELTS to fill the required number of vectors. */ - duplicate_and_interleave (vinfo, &ctor_seq, vector_type, elts, - number_of_vectors, *vec_oprnds); + duplicate_and_interleave (loop_vinfo, &ctor_seq, vector_type, + elts, number_of_vectors, *vec_oprnds); break; } vec_oprnds->quick_push (init); @@ -4794,7 +4813,7 @@ get_initial_defs_for_reduction (vec_info *vinfo, } } if (ctor_seq != NULL) - gsi_insert_seq_on_edge_immediate (pe, ctor_seq); + vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, ctor_seq); } /* For a statement STMT_INFO taking part in a reduction operation return @@ -4823,6 +4842,99 @@ info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info) return stmt_info; } +/* See if LOOP_VINFO is an epilogue loop whose main loop had a reduction that + REDUC_INFO can build on. Adjust REDUC_INFO and return true if so, otherwise + return false. */ + +static bool +vect_find_reusable_accumulator (loop_vec_info loop_vinfo, + stmt_vec_info reduc_info) +{ + loop_vec_info main_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo); + if (!main_loop_vinfo) + return false; + + if (STMT_VINFO_REDUC_TYPE (reduc_info) != TREE_CODE_REDUCTION) + return false; + + unsigned int num_phis = reduc_info->reduc_initial_values.length (); + auto_vec main_loop_results (num_phis); + auto_vec initial_values (num_phis); + if (edge main_loop_edge = loop_vinfo->main_loop_edge) + { + /* The epilogue loop can be entered either from the main loop or + from an earlier guard block. */ + edge skip_edge = loop_vinfo->skip_main_loop_edge; + for (tree incoming_value : reduc_info->reduc_initial_values) + { + /* Look for: + + INCOMING_VALUE = phi. */ + gcc_assert (TREE_CODE (incoming_value) == SSA_NAME); + + gphi *phi = as_a (SSA_NAME_DEF_STMT (incoming_value)); + gcc_assert (gimple_bb (phi) == main_loop_edge->dest); + + tree from_main_loop = PHI_ARG_DEF_FROM_EDGE (phi, main_loop_edge); + tree from_skip = PHI_ARG_DEF_FROM_EDGE (phi, skip_edge); + + main_loop_results.quick_push (from_main_loop); + initial_values.quick_push (from_skip); + } + } + else + /* The main loop dominates the epilogue loop. */ + main_loop_results.splice (reduc_info->reduc_initial_values); + + /* See if the main loop has the kind of accumulator we need. */ + vect_reusable_accumulator *accumulator + = main_loop_vinfo->reusable_accumulators.get (main_loop_results[0]); + if (!accumulator + || num_phis != accumulator->reduc_info->reduc_scalar_results.length () + || !std::equal (main_loop_results.begin (), main_loop_results.end (), + accumulator->reduc_info->reduc_scalar_results.begin ())) + return false; + + /* For now, only handle the case in which both loops are operating on the + same vector types. In future we could reduce wider vectors to narrower + ones as well. */ + tree vectype = STMT_VINFO_VECTYPE (reduc_info); + tree old_vectype = TREE_TYPE (accumulator->reduc_input); + if (!useless_type_conversion_p (old_vectype, vectype)) + return false; + + /* Non-SLP reductions might apply an adjustment after the reduction + operation, in order to simplify the initialization of the accumulator. + If the epilogue loop carries on from where the main loop left off, + it should apply the same adjustment to the final reduction result. + + If the epilogue loop can also be entered directly (rather than via + the main loop), we need to be able to handle that case in the same way, + with the same adjustment. (In principle we could add a PHI node + to select the correct adjustment, but in practice that shouldn't be + necessary.) */ + tree main_adjustment + = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (accumulator->reduc_info); + if (loop_vinfo->main_loop_edge && main_adjustment) + { + gcc_assert (num_phis == 1); + tree initial_value = initial_values[0]; + /* Check that we can use INITIAL_VALUE as the adjustment and + initialize the accumulator with a neutral value instead. */ + if (!operand_equal_p (initial_value, main_adjustment)) + return false; + tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); + initial_values[0] = neutral_op_for_reduction (TREE_TYPE (initial_value), + code, initial_value); + } + STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = main_adjustment; + reduc_info->reduc_initial_values.truncate (0); + reduc_info->reduc_initial_values.splice (initial_values); + reduc_info->reused_accumulator = accumulator; + return true; +} + /* Function vect_create_epilog_for_reduction Create code at the loop-epilog to finalize the result of a reduction @@ -4915,7 +5027,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gimple *use_stmt; auto_vec reduc_inputs; int j, i; - auto_vec scalar_results; + vec &scalar_results = reduc_info->reduc_scalar_results; unsigned int group_size = 1, k; auto_vec phis; /* SLP reduction without reduction chain, e.g., @@ -4941,16 +5053,12 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gcc_assert (vectype); mode = TYPE_MODE (vectype); - tree initial_def = NULL; tree induc_val = NULL_TREE; tree adjustment_def = NULL; if (slp_node) ; else { - /* Get at the scalar def before the loop, that defines the initial value - of the reduction variable. */ - initial_def = vect_phi_initial_value (reduc_def_stmt); /* Optimize: for induction condition reduction, if we can't use zero for induc_val, use initial_def. */ if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) @@ -5196,6 +5304,37 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, reduc_inputs.safe_push (single_input); } + tree orig_reduc_input = reduc_inputs[0]; + + /* If this loop is an epilogue loop that can be skipped after the + main loop, we can only share a reduction operation between the + main loop and the epilogue if we put it at the target of the + skip edge. + + We can still reuse accumulators if this check fails. Doing so has + the minor(?) benefit of making the epilogue loop's scalar result + independent of the main loop's scalar result. */ + bool unify_with_main_loop_p = false; + if (reduc_info->reused_accumulator + && loop_vinfo->skip_this_loop_edge + && single_succ_p (exit_bb) + && single_succ (exit_bb) == loop_vinfo->skip_this_loop_edge->dest) + { + unify_with_main_loop_p = true; + + basic_block reduc_block = loop_vinfo->skip_this_loop_edge->dest; + reduc_inputs[0] = make_ssa_name (vectype); + gphi *new_phi = create_phi_node (reduc_inputs[0], reduc_block); + add_phi_arg (new_phi, orig_reduc_input, single_succ_edge (exit_bb), + UNKNOWN_LOCATION); + add_phi_arg (new_phi, reduc_info->reused_accumulator->reduc_input, + loop_vinfo->skip_this_loop_edge, UNKNOWN_LOCATION); + exit_gsi = gsi_after_labels (reduc_block); + } + + /* Shouldn't be used beyond this point. */ + exit_bb = nullptr; + if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION && reduc_fn != IFN_LAST) { @@ -5405,6 +5544,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, the same as initial_def already. */ tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, induc_val); + tree initial_def = reduc_info->reduc_initial_values[0]; tmp = make_ssa_name (new_scalar_dest); epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, @@ -5425,9 +5565,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gcc_assert (reduc_inputs.length () == 1); gcc_assert (pow2p_hwi (group_size)); - slp_tree orig_phis_slp_node = slp_node_instance->reduc_phis; - vec orig_phis - = SLP_TREE_SCALAR_STMTS (orig_phis_slp_node); gimple_seq seq = NULL; /* Build a vector {0, 1, 2, ...}, with the same number of elements @@ -5452,7 +5589,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, { tree initial_value = NULL_TREE; if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) - initial_value = vect_phi_initial_value (orig_phis[0]); + initial_value = reduc_info->reduc_initial_values[0]; neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype), code, initial_value); } @@ -5466,7 +5603,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, for MIN and MAX reduction, for example. */ if (!neutral_op) { - tree scalar_value = vect_phi_initial_value (orig_phis[i]); + tree scalar_value = reduc_info->reduc_initial_values[i]; scalar_value = gimple_convert (&seq, TREE_TYPE (vectype), scalar_value); vector_identity = gimple_build_vector_from_val (&seq, vectype, @@ -5780,6 +5917,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, the same as initial_def already. */ tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, induc_val); + tree initial_def = reduc_info->reduc_initial_values[0]; tree tmp = make_ssa_name (new_scalar_dest); epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, @@ -5819,6 +5957,11 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, scalar_results[0] = new_temp; } + /* Record this operation if it could be reused by the epilogue loop. */ + if (STMT_VINFO_REDUC_TYPE (reduc_info) == TREE_CODE_REDUCTION) + loop_vinfo->reusable_accumulators.put (scalar_results[0], + { orig_reduc_input, reduc_info }); + if (double_reduc) loop = outer_loop; @@ -5886,6 +6029,17 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, { /* Replace the uses: */ orig_name = PHI_RESULT (exit_phi); + + /* Look for a single use at the target of the skip edge. */ + if (unify_with_main_loop_p) + { + use_operand_p use_p; + gimple *user; + if (!single_imm_use (orig_name, &use_p, &user)) + gcc_unreachable (); + orig_name = gimple_get_lhs (user); + } + scalar_result = scalar_results[k]; FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name) { @@ -7421,16 +7575,32 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, else { gcc_assert (slp_node == slp_node_instance->reduc_phis); - tree initial_value = NULL_TREE; + vec &initial_values = reduc_info->reduc_initial_values; + vec &stmts = SLP_TREE_SCALAR_STMTS (slp_node); + + unsigned int num_phis = stmts.length (); if (REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info)) - initial_value = vect_phi_initial_value (phi); - tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); - tree neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out), - code, initial_value); - get_initial_defs_for_reduction (loop_vinfo, reduc_info, - slp_node_instance->reduc_phis, - &vec_initial_defs, vec_num, - initial_value != NULL, neutral_op); + num_phis = 1; + initial_values.reserve (num_phis); + for (unsigned int i = 0; i < num_phis; ++i) + { + gphi *this_phi = as_a (stmts[i]->stmt); + initial_values.quick_push (vect_phi_initial_value (this_phi)); + } + if (vec_num == 1) + vect_find_reusable_accumulator (loop_vinfo, reduc_info); + if (!initial_values.is_empty ()) + { + tree initial_value + = (num_phis == 1 ? initial_values[0] : NULL_TREE); + tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); + tree neutral_op + = neutral_op_for_reduction (TREE_TYPE (vectype_out), + code, initial_value); + get_initial_defs_for_reduction (loop_vinfo, reduc_info, + &vec_initial_defs, vec_num, + stmts.length (), neutral_op); + } } } else @@ -7438,6 +7608,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, /* Get at the scalar def before the loop, that defines the initial value of the reduction variable. */ tree initial_def = vect_phi_initial_value (phi); + reduc_info->reduc_initial_values.safe_push (initial_def); /* Optimize: if initial_def is for REDUC_MAX smaller than the base and we can't use zero for induc_val, use initial_def. Similarly for REDUC_MIN and initial_def larger than the base. */ @@ -7474,21 +7645,30 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, initial_def, initial_def); else { - enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); - tree neutral_op = neutral_op_for_reduction (TREE_TYPE (initial_def), - code, initial_def); - gcc_assert (neutral_op); - /* Try to simplify the vector initialization by applying an - adjustment after the reduction has been performed. */ - if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def - && !operand_equal_p (neutral_op, initial_def)) + if (ncopies == 1) + vect_find_reusable_accumulator (loop_vinfo, reduc_info); + if (!reduc_info->reduc_initial_values.is_empty ()) { - STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = initial_def; - initial_def = neutral_op; + initial_def = reduc_info->reduc_initial_values[0]; + enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); + tree neutral_op + = neutral_op_for_reduction (TREE_TYPE (initial_def), + code, initial_def); + gcc_assert (neutral_op); + /* Try to simplify the vector initialization by applying an + adjustment after the reduction has been performed. */ + if (!reduc_info->reused_accumulator + && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def + && !operand_equal_p (neutral_op, initial_def)) + { + STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) + = initial_def; + initial_def = neutral_op; + } + vec_initial_def + = get_initial_def_for_reduction (loop_vinfo, reduc_info, + initial_def, neutral_op); } - vec_initial_def - = get_initial_def_for_reduction (loop_vinfo, reduc_info, - initial_def, neutral_op); } } @@ -7499,6 +7679,17 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, vec_initial_defs.quick_push (vec_initial_def); } + if (auto *accumulator = reduc_info->reused_accumulator) + { + if (loop_vinfo->main_loop_edge) + vec_initial_defs[0] + = vect_get_main_loop_result (loop_vinfo, accumulator->reduc_input, + vec_initial_defs[0]); + else + vec_initial_defs.safe_push (accumulator->reduc_input); + gcc_assert (vec_initial_defs.length () == 1); + } + /* Generate the reduction PHIs upfront. */ for (i = 0; i < vec_num; i++) { diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 9748043..f1035a8 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -694,6 +694,8 @@ vec_info::new_stmt_vec_info (gimple *stmt) STMT_VINFO_SLP_VECT_ONLY (res) = false; STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false; STMT_VINFO_VEC_STMTS (res) = vNULL; + res->reduc_initial_values = vNULL; + res->reduc_scalar_results = vNULL; if (is_a (this) && gimple_code (stmt) == GIMPLE_PHI @@ -755,6 +757,8 @@ vec_info::free_stmt_vec_info (stmt_vec_info stmt_info) release_ssa_name (lhs); } + stmt_info->reduc_initial_values.release (); + stmt_info->reduc_scalar_results.release (); STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release (); STMT_VINFO_VEC_STMTS (stmt_info).release (); free (stmt_info); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index e2fd360..d825b0c 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -551,6 +551,18 @@ typedef auto_vec vec_loop_lens; typedef auto_vec > drs_init_vec; +/* Information about a reduction accumulator from the main loop that could + conceivably be reused as the input to a reduction in an epilogue loop. */ +struct vect_reusable_accumulator { + /* The final value of the accumulator, which forms the input to the + reduction operation. */ + tree reduc_input; + + /* The stmt_vec_info that describes the reduction (i.e. the one for + which is_reduc_info is true). */ + stmt_vec_info reduc_info; +}; + /*-----------------------------------------------------------------*/ /* Info on vectorized loops. */ /*-----------------------------------------------------------------*/ @@ -588,6 +600,26 @@ public: /* Unrolling factor */ poly_uint64 vectorization_factor; + /* If this loop is an epilogue loop whose main loop can be skipped, + MAIN_LOOP_EDGE is the edge from the main loop to this loop's + preheader. SKIP_MAIN_LOOP_EDGE is then the edge that skips the + main loop and goes straight to this loop's preheader. + + Both fields are null otherwise. */ + edge main_loop_edge; + edge skip_main_loop_edge; + + /* If this loop is an epilogue loop that might be skipped after executing + the main loop, this edge is the one that skips the epilogue. */ + edge skip_this_loop_edge; + + /* The vectorized form of a standard reduction replaces the original + scalar code's final result (a loop-closed SSA PHI) with the result + of a vector-to-scalar reduction operation. After vectorization, + this variable maps these vector-to-scalar results to information + about the reductions that generated them. */ + hash_map reusable_accumulators; + /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR if there is no particular limit. */ unsigned HOST_WIDE_INT max_vectorization_factor; @@ -1186,6 +1218,23 @@ public: /* The vector type for performing the actual reduction. */ tree reduc_vectype; + /* If IS_REDUC_INFO is true and if the vector code is performing + N scalar reductions in parallel, this variable gives the initial + scalar values of those N reductions. */ + vec reduc_initial_values; + + /* If IS_REDUC_INFO is true and if the vector code is performing + N scalar reductions in parallel, this variable gives the vectorized code's + final (scalar) result for each of those N reductions. In other words, + REDUC_SCALAR_RESULTS[I] replaces the original scalar code's loop-closed + SSA PHI for reduction number I. */ + vec reduc_scalar_results; + + /* Only meaningful if IS_REDUC_INFO. If non-null, the reduction is + being performed by an epilogue loop and we have decided to reuse + this accumulator from the main loop. */ + vect_reusable_accumulator *reused_accumulator; + /* Whether we force a single cycle PHI during reduction vectorization. */ bool force_single_cycle; @@ -1382,12 +1431,6 @@ vect_phi_initial_value (gphi *phi) return PHI_ARG_DEF_FROM_EDGE (phi, pe); } -static inline tree -vect_phi_initial_value (stmt_vec_info stmt_info) -{ - return vect_phi_initial_value (as_a (stmt_info->stmt)); -} - /* Return true if STMT_INFO should produce a vector mask type rather than a normal nonmask type. */ @@ -1818,6 +1861,7 @@ class loop *vect_loop_versioning (loop_vec_info, gimple *); extern class loop *vect_do_peeling (loop_vec_info, tree, tree, tree *, tree *, tree *, int, bool, bool, tree *); +extern tree vect_get_main_loop_result (loop_vec_info, tree, tree); extern void vect_prepare_for_masked_peels (loop_vec_info); extern dump_user_location_t find_loop_location (class loop *); extern bool vect_can_advance_ivs_p (loop_vec_info); -- cgit v1.1 From 9aa5001ef466e6162e4acc11c6999747299474f1 Mon Sep 17 00:00:00 2001 From: Roger Sayle Date: Tue, 13 Jul 2021 14:01:41 +0100 Subject: Make gimple_could_trap_p const-safe. Allow gimple_could_trap_p (which previously took a non-const gimple) to be called from functions that take a const gimple (such as gimple_has_side_effects), and update its prototypes. Pre-approved as obvious. 2021-07-13 Roger Sayle Richard Biener gcc/ChangeLog * gimple.c (gimple_could_trap_p_1): Make S argument a "const gimple*". Preserve constness in call to gimple_asm_volatile_p. (gimple_could_trap_p): Make S argument a "const gimple*". * gimple.h (gimple_could_trap_p_1, gimple_could_trap_p): Update function prototypes. --- gcc/gimple.c | 6 +++--- gcc/gimple.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple.c b/gcc/gimple.c index cc46454..0690f94 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -2129,7 +2129,7 @@ gimple_has_side_effects (const gimple *s) S is a GIMPLE_ASSIGN, the LHS of the assignment is also checked. */ bool -gimple_could_trap_p_1 (gimple *s, bool include_mem, bool include_stores) +gimple_could_trap_p_1 (const gimple *s, bool include_mem, bool include_stores) { tree t, div = NULL_TREE; enum tree_code op; @@ -2146,7 +2146,7 @@ gimple_could_trap_p_1 (gimple *s, bool include_mem, bool include_stores) switch (gimple_code (s)) { case GIMPLE_ASM: - return gimple_asm_volatile_p (as_a (s)); + return gimple_asm_volatile_p (as_a (s)); case GIMPLE_CALL: if (gimple_call_internal_p (s)) @@ -2194,7 +2194,7 @@ gimple_could_trap_p_1 (gimple *s, bool include_mem, bool include_stores) /* Return true if statement S can trap. */ bool -gimple_could_trap_p (gimple *s) +gimple_could_trap_p (const gimple *s) { return gimple_could_trap_p_1 (s, true, true); } diff --git a/gcc/gimple.h b/gcc/gimple.h index be1155e..acf572b 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -1601,8 +1601,8 @@ void gimple_set_lhs (gimple *, tree); gimple *gimple_copy (gimple *); void gimple_move_vops (gimple *, gimple *); bool gimple_has_side_effects (const gimple *); -bool gimple_could_trap_p_1 (gimple *, bool, bool); -bool gimple_could_trap_p (gimple *); +bool gimple_could_trap_p_1 (const gimple *, bool, bool); +bool gimple_could_trap_p (const gimple *); bool gimple_assign_rhs_could_trap_p (gimple *); extern void dump_gimple_statistics (void); unsigned get_gimple_rhs_num_ops (enum tree_code); -- cgit v1.1 From f75560398af6f1f696c820016f437af4e8b4265c Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Tue, 13 Jul 2021 09:41:30 -0400 Subject: Adjust testcase to test the call is removed. Ranger now handles the test. gcc/testsuite PR tree-optimization/93781 * gcc.dg/tree-ssa/pr93781-1.c: Check that call is removed. --- gcc/testsuite/gcc.dg/tree-ssa/pr93781-1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr93781-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93781-1.c index 5ebd805..b2505f3 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr93781-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93781-1.c @@ -12,7 +12,9 @@ void foo (unsigned int arg) if (a < 0) b = x; - /* In the fullness of time, we will delete this call. */ if (b >= 5) kill ();; } + +/* { dg-final { scan-tree-dump-not "kill" "evrp" } } */ + -- cgit v1.1 From acd4b9103c1a30c833de4eee31fb69c3ff13cd77 Mon Sep 17 00:00:00 2001 From: "Paul A. Clarke" Date: Tue, 29 Jun 2021 09:18:55 -0500 Subject: rs6000: Add support for SSE4.1 "test" intrinsics 2021-07-13 Paul A. Clarke gcc * config/rs6000/smmintrin.h (_mm_testz_si128, _mm_testc_si128, _mm_testnzc_si128, _mm_test_all_ones, _mm_test_all_zeros, _mm_test_mix_ones_zeros): New. --- gcc/config/rs6000/smmintrin.h | 56 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index bdf6eb3..16fd34d 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -116,4 +116,60 @@ _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask) return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask); } +__inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_testz_si128 (__m128i __A, __m128i __B) +{ + /* Note: This implementation does NOT set "zero" or "carry" flags. */ + const __v16qu __zero = {0}; + return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __B), __zero); +} + +__inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_testc_si128 (__m128i __A, __m128i __B) +{ + /* Note: This implementation does NOT set "zero" or "carry" flags. */ + const __v16qu __zero = {0}; + const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A); + return vec_all_eq (vec_and ((__v16qu) __notA, (__v16qu) __B), __zero); +} + +__inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_testnzc_si128 (__m128i __A, __m128i __B) +{ + /* Note: This implementation does NOT set "zero" or "carry" flags. */ + return _mm_testz_si128 (__A, __B) == 0 && _mm_testc_si128 (__A, __B) == 0; +} + +__inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_test_all_zeros (__m128i __A, __m128i __mask) +{ + const __v16qu __zero = {0}; + return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __mask), __zero); +} + +__inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_test_all_ones (__m128i __A) +{ + const __v16qu __ones = vec_splats ((unsigned char) 0xff); + return vec_all_eq ((__v16qu) __A, __ones); +} + +__inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_test_mix_ones_zeros (__m128i __A, __m128i __mask) +{ + const __v16qu __zero = {0}; + const __v16qu __Amasked = vec_and ((__v16qu) __A, (__v16qu) __mask); + const int any_ones = vec_any_ne (__Amasked, __zero); + const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A); + const __v16qu __notAmasked = vec_and ((__v16qu) __notA, (__v16qu) __mask); + const int any_zeros = vec_any_ne (__notAmasked, __zero); + return any_ones * any_zeros; +} + #endif -- cgit v1.1 From 60aee15bb7ed57d70face854834468b8b9a3ec39 Mon Sep 17 00:00:00 2001 From: "Paul A. Clarke" Date: Tue, 29 Jun 2021 09:23:39 -0500 Subject: rs6000: Add tests for SSE4.1 "test" intrinsics Copy the test for _mm_testz_si128, _mm_testc_si128, _mm_testnzc_si128, _mm_test_all_ones, _mm_test_all_zeros, _mm_test_mix_ones_zeros from gcc/testsuite/gcc.target/i386. 2021-07-13 Paul A. Clarke gcc/testsuite * gcc.target/powerpc/sse4_1-ptest-1.c: Copy from gcc/testsuite/gcc.target/i386. --- gcc/testsuite/gcc.target/powerpc/sse4_1-ptest-1.c | 117 ++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-ptest-1.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-ptest-1.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-ptest-1.c new file mode 100644 index 0000000..69d13d5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-ptest-1.c @@ -0,0 +1,117 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include + +static int +make_ptestz (__m128i m, __m128i v) +{ + union + { + __m128i x; + unsigned char c[16]; + } val, mask; + int i, z; + + mask.x = m; + val.x = v; + + z = 1; + for (i = 0; i < 16; i++) + if ((mask.c[i] & val.c[i])) + { + z = 0; + break; + } + return z; +} + +static int +make_ptestc (__m128i m, __m128i v) +{ + union + { + __m128i x; + unsigned char c[16]; + } val, mask; + int i, c; + + mask.x = m; + val.x = v; + + c = 1; + for (i = 0; i < 16; i++) + if ((val.c[i] & ~mask.c[i])) + { + c = 0; + break; + } + return c; +} + +static void +TEST (void) +{ + union + { + __m128i x; + unsigned int i[4]; + } val[4]; + int i, j, l; + int res[32]; + + val[0].i[0] = 0x11111111; + val[0].i[1] = 0x00000000; + val[0].i[2] = 0x00000000; + val[0].i[3] = 0x11111111; + + val[1].i[0] = 0x00000000; + val[1].i[1] = 0x11111111; + val[1].i[2] = 0x11111111; + val[1].i[3] = 0x00000000; + + val[2].i[0] = 0; + val[2].i[1] = 0; + val[2].i[2] = 0; + val[2].i[3] = 0; + + val[3].i[0] = 0xffffffff; + val[3].i[1] = 0xffffffff; + val[3].i[2] = 0xffffffff; + val[3].i[3] = 0xffffffff; + + l = 0; + for(i = 0; i < 4; i++) + for(j = 0; j < 4; j++) + { + res[l++] = _mm_testz_si128 (val[j].x, val[i].x); + res[l++] = _mm_testc_si128 (val[j].x, val[i].x); + } + + l = 0; + for(i = 0; i < 4; i++) + for(j = 0; j < 4; j++) + { + if (res[l++] != make_ptestz (val[j].x, val[i].x)) + abort (); + if (res[l++] != make_ptestc (val[j].x, val[i].x)) + abort (); + } + + if (res[2] != _mm_testz_si128 (val[1].x, val[0].x)) + abort (); + + if (res[3] != _mm_testc_si128 (val[1].x, val[0].x)) + abort (); +} -- cgit v1.1 From 8695bf78dad1a42636775843ca832a2f4dba4da3 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Wed, 2 Jun 2021 16:55:00 +0100 Subject: gcc: Add vec_select -> subreg RTL simplification Add a new RTL simplification for the case of a VEC_SELECT selecting the low part of a vector. The simplification returns a SUBREG. The primary goal of this patch is to enable better combinations of Neon RTL patterns - specifically allowing generation of 'write-to- high-half' narrowing intructions. Adding this RTL simplification means that the expected results for a number of tests need to be updated: * aarch64 Neon: Update the scan-assembler regex for intrinsics tests to expect a scalar register instead of lane 0 of a vector. * aarch64 SVE: Likewise. * arm MVE: Use lane 1 instead of lane 0 for lane-extraction intrinsics tests (as the move instructions get optimized away for lane 0.) This patch also adds new code generation tests to narrow_high_combine.c to verify the benefit of this RTL simplification. gcc/ChangeLog: 2021-06-08 Jonathan Wright * combine.c (combine_simplify_rtx): Add vec_select -> subreg simplification. * config/aarch64/aarch64.md (*zero_extend2_aarch64): Add Neon to general purpose register case for zero-extend pattern. * config/arm/vfp.md (*arm_movsi_vfp): Remove "*" from *t -> r case to prevent some cases opting to go through memory. * cse.c (fold_rtx): Add vec_select -> subreg simplification. * rtl.c (rtvec_series_p): Define predicate to determine whether a vector contains a linear series of integers. * rtl.h (rtvec_series_p): Define. * rtlanal.c (vec_series_lowpart_p): Define predicate to determine if a vector selection is equivalent to the low part of the vector. * rtlanal.h (vec_series_lowpart_p): Define. * simplify-rtx.c (simplify_context::simplify_binary_operation_1): Add vec_select -> subreg simplification. gcc/testsuite/ChangeLog: * gcc.target/aarch64/extract_zero_extend.c: Remove dump scan for RTL pattern match. * gcc.target/aarch64/narrow_high_combine.c: Add new tests. * gcc.target/aarch64/simd/vmulx_laneq_f64_1.c: Update scan-assembler regex to look for a scalar register instead of lane 0 of a vector. * gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c: Likewise. * gcc.target/aarch64/simd/vmulxs_lane_f32_1.c: Likewise. * gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_laneq_s32.c: Likewise. * gcc.target/aarch64/sve/dup_lane_1.c: Likewise. * gcc.target/aarch64/sve/extract_1.c: Likewise. * gcc.target/aarch64/sve/extract_2.c: Likewise. * gcc.target/aarch64/sve/extract_3.c: Likewise. * gcc.target/aarch64/sve/extract_4.c: Likewise. * gcc.target/aarch64/sve/live_1.c: Update scan-assembler regex cases to look for 'b' and 'h' registers instead of 'w'. * gcc.target/arm/crypto-vsha1cq_u32.c: Update scan-assembler regex to reflect lane 0 vector extractions being simplified to scalar register moves. * gcc.target/arm/crypto-vsha1h_u32.c: Likewise. * gcc.target/arm/crypto-vsha1mq_u32.c: Likewise. * gcc.target/arm/crypto-vsha1pq_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vgetq_lane_f16.c: Extract lane 1 as the moves for lane 0 now get optimized away. * gcc.target/arm/mve/intrinsics/vgetq_lane_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vgetq_lane_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vgetq_lane_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vgetq_lane_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vgetq_lane_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vgetq_lane_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vgetq_lane_u8.c: Likewise. --- gcc/combine.c | 14 + gcc/config/aarch64/aarch64.md | 11 +- gcc/config/arm/vfp.md | 2 +- gcc/cse.c | 14 + gcc/rtl.c | 15 + gcc/rtl.h | 1 + gcc/rtlanal.c | 19 ++ gcc/rtlanal.h | 3 + gcc/simplify-rtx.c | 10 + .../gcc.target/aarch64/extract_zero_extend.c | 9 - .../gcc.target/aarch64/narrow_high_combine.c | 314 ++++++++++++++------- .../gcc.target/aarch64/simd/vmulx_laneq_f64_1.c | 2 +- .../gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c | 2 +- .../gcc.target/aarch64/simd/vmulxs_lane_f32_1.c | 2 +- .../gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c | 2 +- .../gcc.target/aarch64/simd/vqdmlalh_lane_s16.c | 2 +- .../gcc.target/aarch64/simd/vqdmlals_lane_s32.c | 2 +- .../gcc.target/aarch64/simd/vqdmlslh_lane_s16.c | 2 +- .../gcc.target/aarch64/simd/vqdmlsls_lane_s32.c | 2 +- .../gcc.target/aarch64/simd/vqdmullh_lane_s16.c | 2 +- .../gcc.target/aarch64/simd/vqdmullh_laneq_s16.c | 2 +- .../gcc.target/aarch64/simd/vqdmulls_lane_s32.c | 2 +- .../gcc.target/aarch64/simd/vqdmulls_laneq_s32.c | 2 +- gcc/testsuite/gcc.target/aarch64/sve/dup_lane_1.c | 20 +- gcc/testsuite/gcc.target/aarch64/sve/extract_1.c | 4 +- gcc/testsuite/gcc.target/aarch64/sve/extract_2.c | 4 +- gcc/testsuite/gcc.target/aarch64/sve/extract_3.c | 4 +- gcc/testsuite/gcc.target/aarch64/sve/extract_4.c | 4 +- gcc/testsuite/gcc.target/aarch64/sve/live_1.c | 5 +- gcc/testsuite/gcc.target/arm/crypto-vsha1cq_u32.c | 4 +- gcc/testsuite/gcc.target/arm/crypto-vsha1h_u32.c | 4 +- gcc/testsuite/gcc.target/arm/crypto-vsha1mq_u32.c | 4 +- gcc/testsuite/gcc.target/arm/crypto-vsha1pq_u32.c | 4 +- .../gcc.target/arm/mve/intrinsics/vgetq_lane_f16.c | 4 +- .../gcc.target/arm/mve/intrinsics/vgetq_lane_f32.c | 4 +- .../gcc.target/arm/mve/intrinsics/vgetq_lane_s16.c | 4 +- .../gcc.target/arm/mve/intrinsics/vgetq_lane_s32.c | 4 +- .../gcc.target/arm/mve/intrinsics/vgetq_lane_s8.c | 4 +- .../gcc.target/arm/mve/intrinsics/vgetq_lane_u16.c | 4 +- .../gcc.target/arm/mve/intrinsics/vgetq_lane_u32.c | 4 +- .../gcc.target/arm/mve/intrinsics/vgetq_lane_u8.c | 4 +- 41 files changed, 355 insertions(+), 170 deletions(-) (limited to 'gcc') diff --git a/gcc/combine.c b/gcc/combine.c index 6476812..cb5fa40 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see #include "rtl-iter.h" #include "print-rtl.h" #include "function-abi.h" +#include "rtlanal.h" /* Number of attempts to combine instructions in this function. */ @@ -6276,6 +6277,19 @@ combine_simplify_rtx (rtx x, machine_mode op0_mode, int in_dest, - 1, 0)); break; + case VEC_SELECT: + { + rtx trueop0 = XEXP (x, 0); + mode = GET_MODE (trueop0); + rtx trueop1 = XEXP (x, 1); + /* If we select a low-part subreg, return that. */ + if (vec_series_lowpart_p (GET_MODE (x), mode, trueop1)) + { + rtx new_rtx = lowpart_subreg (GET_MODE (x), trueop0, mode); + if (new_rtx != NULL_RTX) + return new_rtx; + } + } default: break; diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index aef6da9..f12a0be 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1884,15 +1884,16 @@ ) (define_insn "*zero_extend2_aarch64" - [(set (match_operand:GPI 0 "register_operand" "=r,r,w") - (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m")))] + [(set (match_operand:GPI 0 "register_operand" "=r,r,w,r") + (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m,w")))] "" "@ and\t%0, %1, ldr\t%w0, %1 - ldr\t%0, %1" - [(set_attr "type" "logic_imm,load_4,f_loads") - (set_attr "arch" "*,*,fp")] + ldr\t%0, %1 + umov\t%w0, %1.[0]" + [(set_attr "type" "logic_imm,load_4,f_loads,neon_to_gp") + (set_attr "arch" "*,*,fp,fp")] ) (define_expand "qihi2" diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 55b6c1a..93e96369 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -224,7 +224,7 @@ ;; problems because small constants get converted into adds. (define_insn "*arm_movsi_vfp" [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m ,*t,r,*t,*t, *Uv") - (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk,r,*t,*t,*Uvi,*t"))] + (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk,r,t,*t,*Uvi,*t"))] "TARGET_ARM && TARGET_HARD_FLOAT && ( s_register_operand (operands[0], SImode) || s_register_operand (operands[1], SImode))" diff --git a/gcc/cse.c b/gcc/cse.c index 4b7cbdc..330c1e9 100644 --- a/gcc/cse.c +++ b/gcc/cse.c @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see #include "rtl-iter.h" #include "regs.h" #include "function-abi.h" +#include "rtlanal.h" /* The basic idea of common subexpression elimination is to go through the code, keeping a record of expressions that would @@ -3171,6 +3172,19 @@ fold_rtx (rtx x, rtx_insn *insn) if (NO_FUNCTION_CSE && CONSTANT_P (XEXP (XEXP (x, 0), 0))) return x; break; + case VEC_SELECT: + { + rtx trueop0 = XEXP (x, 0); + mode = GET_MODE (trueop0); + rtx trueop1 = XEXP (x, 1); + /* If we select a low-part subreg, return that. */ + if (vec_series_lowpart_p (GET_MODE (x), mode, trueop1)) + { + rtx new_rtx = lowpart_subreg (GET_MODE (x), trueop0, mode); + if (new_rtx != NULL_RTX) + return new_rtx; + } + } /* Anything else goes through the loop below. */ default: diff --git a/gcc/rtl.c b/gcc/rtl.c index aaee882..4a30d21 100644 --- a/gcc/rtl.c +++ b/gcc/rtl.c @@ -736,6 +736,21 @@ rtvec_all_equal_p (const_rtvec vec) } } +/* Return true if VEC contains a linear series of integers + { START, START+1, START+2, ... }. */ + +bool +rtvec_series_p (rtvec vec, int start) +{ + for (int i = 0; i < GET_NUM_ELEM (vec); i++) + { + rtx x = RTVEC_ELT (vec, i); + if (!CONST_INT_P (x) || INTVAL (x) != i + start) + return false; + } + return true; +} + /* Return an indication of which type of insn should have X as a body. In generator files, this can be UNKNOWN if the answer is only known at (GCC) runtime. Otherwise the value is CODE_LABEL, INSN, CALL_INSN diff --git a/gcc/rtl.h b/gcc/rtl.h index 5ed0d6d..2dbc433 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -2996,6 +2996,7 @@ extern unsigned int rtx_size (const_rtx); extern rtx shallow_copy_rtx (const_rtx CXX_MEM_STAT_INFO); extern int rtx_equal_p (const_rtx, const_rtx); extern bool rtvec_all_equal_p (const_rtvec); +extern bool rtvec_series_p (rtvec, int); /* Return true if X is a vector constant with a duplicated element value. */ diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c index 55c338e..ec7a062 100644 --- a/gcc/rtlanal.c +++ b/gcc/rtlanal.c @@ -6940,3 +6940,22 @@ register_asm_p (const_rtx x) && DECL_ASSEMBLER_NAME_SET_P (REG_EXPR (x)) && DECL_REGISTER (REG_EXPR (x))); } + +/* Return true if, for all OP of mode OP_MODE: + + (vec_select:RESULT_MODE OP SEL) + + is equivalent to the lowpart RESULT_MODE of OP. */ + +bool +vec_series_lowpart_p (machine_mode result_mode, machine_mode op_mode, rtx sel) +{ + int nunits; + if (GET_MODE_NUNITS (op_mode).is_constant (&nunits) + && targetm.can_change_mode_class (op_mode, result_mode, ALL_REGS)) + { + int offset = BYTES_BIG_ENDIAN ? nunits - XVECLEN (sel, 0) : 0; + return rtvec_series_p (XVEC (sel, 0), offset); + } + return false; +} diff --git a/gcc/rtlanal.h b/gcc/rtlanal.h index dedc2d7..e164242 100644 --- a/gcc/rtlanal.h +++ b/gcc/rtlanal.h @@ -331,4 +331,7 @@ inline vec_rtx_properties_base::~vec_rtx_properties_base () collecting the references a second time. */ using vec_rtx_properties = growing_rtx_properties; +bool +vec_series_lowpart_p (machine_mode result_mode, machine_mode op_mode, rtx sel); + #endif diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index c82101c..2d169d3 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. If not see #include "selftest.h" #include "selftest-rtl.h" #include "rtx-vector-builder.h" +#include "rtlanal.h" /* Simplification and canonicalization of RTL. */ @@ -4201,6 +4202,15 @@ simplify_context::simplify_binary_operation_1 (rtx_code code, return trueop0; } + /* If we select a low-part subreg, return that. */ + if (vec_series_lowpart_p (mode, GET_MODE (trueop0), trueop1)) + { + rtx new_rtx = lowpart_subreg (mode, trueop0, + GET_MODE (trueop0)); + if (new_rtx != NULL_RTX) + return new_rtx; + } + /* If we build {a,b} then permute it, build the result directly. */ if (XVECLEN (trueop1, 0) == 2 && CONST_INT_P (XVECEXP (trueop1, 0, 0)) diff --git a/gcc/testsuite/gcc.target/aarch64/extract_zero_extend.c b/gcc/testsuite/gcc.target/aarch64/extract_zero_extend.c index 0209305..193b945 100644 --- a/gcc/testsuite/gcc.target/aarch64/extract_zero_extend.c +++ b/gcc/testsuite/gcc.target/aarch64/extract_zero_extend.c @@ -70,12 +70,3 @@ foo_siv4hi (siv4hi a) /* { dg-final { scan-assembler-times "umov\\t" 8 } } */ /* { dg-final { scan-assembler-not "and\\t" } } */ - -/* { dg-final { scan-rtl-dump "aarch64_get_lane_zero_extenddiv16qi" "final" } } */ -/* { dg-final { scan-rtl-dump "aarch64_get_lane_zero_extenddiv8qi" "final" } } */ -/* { dg-final { scan-rtl-dump "aarch64_get_lane_zero_extenddiv8hi" "final" } } */ -/* { dg-final { scan-rtl-dump "aarch64_get_lane_zero_extenddiv4hi" "final" } } */ -/* { dg-final { scan-rtl-dump "aarch64_get_lane_zero_extendsiv16qi" "final" } } */ -/* { dg-final { scan-rtl-dump "aarch64_get_lane_zero_extendsiv8qi" "final" } } */ -/* { dg-final { scan-rtl-dump "aarch64_get_lane_zero_extendsiv8hi" "final" } } */ -/* { dg-final { scan-rtl-dump "aarch64_get_lane_zero_extendsiv4hi" "final" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/narrow_high_combine.c b/gcc/testsuite/gcc.target/aarch64/narrow_high_combine.c index cf649bd..50ecab0 100644 --- a/gcc/testsuite/gcc.target/aarch64/narrow_high_combine.c +++ b/gcc/testsuite/gcc.target/aarch64/narrow_high_combine.c @@ -4,122 +4,228 @@ #include -#define TEST_ARITH(name, rettype, rmwtype, intype, fs, rs) \ - rettype test_ ## name ## _ ## fs ## _high_combine \ +#define TEST_1_ARITH(name, rettype, rmwtype, intype, fs, rs) \ + rettype test_1_ ## name ## _ ## fs ## _high_combine \ (rmwtype a, intype b, intype c) \ { \ return vcombine_ ## rs (a, name ## _ ## fs (b, c)); \ } -TEST_ARITH (vaddhn, int8x16_t, int8x8_t, int16x8_t, s16, s8) -TEST_ARITH (vaddhn, int16x8_t, int16x4_t, int32x4_t, s32, s16) -TEST_ARITH (vaddhn, int32x4_t, int32x2_t, int64x2_t, s64, s32) -TEST_ARITH (vaddhn, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) -TEST_ARITH (vaddhn, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) -TEST_ARITH (vaddhn, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) - -TEST_ARITH (vraddhn, int8x16_t, int8x8_t, int16x8_t, s16, s8) -TEST_ARITH (vraddhn, int16x8_t, int16x4_t, int32x4_t, s32, s16) -TEST_ARITH (vraddhn, int32x4_t, int32x2_t, int64x2_t, s64, s32) -TEST_ARITH (vraddhn, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) -TEST_ARITH (vraddhn, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) -TEST_ARITH (vraddhn, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) - -TEST_ARITH (vsubhn, int8x16_t, int8x8_t, int16x8_t, s16, s8) -TEST_ARITH (vsubhn, int16x8_t, int16x4_t, int32x4_t, s32, s16) -TEST_ARITH (vsubhn, int32x4_t, int32x2_t, int64x2_t, s64, s32) -TEST_ARITH (vsubhn, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) -TEST_ARITH (vsubhn, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) -TEST_ARITH (vsubhn, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) - -TEST_ARITH (vrsubhn, int8x16_t, int8x8_t, int16x8_t, s16, s8) -TEST_ARITH (vrsubhn, int16x8_t, int16x4_t, int32x4_t, s32, s16) -TEST_ARITH (vrsubhn, int32x4_t, int32x2_t, int64x2_t, s64, s32) -TEST_ARITH (vrsubhn, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) -TEST_ARITH (vrsubhn, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) -TEST_ARITH (vrsubhn, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) - -#define TEST_SHIFT(name, rettype, rmwtype, intype, fs, rs) \ - rettype test_ ## name ## _ ## fs ## _high_combine \ +TEST_1_ARITH (vaddhn, int8x16_t, int8x8_t, int16x8_t, s16, s8) +TEST_1_ARITH (vaddhn, int16x8_t, int16x4_t, int32x4_t, s32, s16) +TEST_1_ARITH (vaddhn, int32x4_t, int32x2_t, int64x2_t, s64, s32) +TEST_1_ARITH (vaddhn, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) +TEST_1_ARITH (vaddhn, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) +TEST_1_ARITH (vaddhn, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) + +TEST_1_ARITH (vraddhn, int8x16_t, int8x8_t, int16x8_t, s16, s8) +TEST_1_ARITH (vraddhn, int16x8_t, int16x4_t, int32x4_t, s32, s16) +TEST_1_ARITH (vraddhn, int32x4_t, int32x2_t, int64x2_t, s64, s32) +TEST_1_ARITH (vraddhn, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) +TEST_1_ARITH (vraddhn, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) +TEST_1_ARITH (vraddhn, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) + +TEST_1_ARITH (vsubhn, int8x16_t, int8x8_t, int16x8_t, s16, s8) +TEST_1_ARITH (vsubhn, int16x8_t, int16x4_t, int32x4_t, s32, s16) +TEST_1_ARITH (vsubhn, int32x4_t, int32x2_t, int64x2_t, s64, s32) +TEST_1_ARITH (vsubhn, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) +TEST_1_ARITH (vsubhn, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) +TEST_1_ARITH (vsubhn, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) + +TEST_1_ARITH (vrsubhn, int8x16_t, int8x8_t, int16x8_t, s16, s8) +TEST_1_ARITH (vrsubhn, int16x8_t, int16x4_t, int32x4_t, s32, s16) +TEST_1_ARITH (vrsubhn, int32x4_t, int32x2_t, int64x2_t, s64, s32) +TEST_1_ARITH (vrsubhn, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) +TEST_1_ARITH (vrsubhn, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) +TEST_1_ARITH (vrsubhn, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) + +#define TEST_2_ARITH(name, rettype, intype, fs, rs) \ + rettype test_2_ ## name ## _ ## fs ## _high_combine \ + (intype a, intype b, intype c) \ + { \ + return vcombine_ ## rs (name ## _ ## fs (a, c), \ + name ## _ ## fs (b, c)); \ + } + +TEST_2_ARITH (vaddhn, int8x16_t, int16x8_t, s16, s8) +TEST_2_ARITH (vaddhn, int16x8_t, int32x4_t, s32, s16) +TEST_2_ARITH (vaddhn, int32x4_t, int64x2_t, s64, s32) +TEST_2_ARITH (vaddhn, uint8x16_t, uint16x8_t, u16, u8) +TEST_2_ARITH (vaddhn, uint16x8_t, uint32x4_t, u32, u16) +TEST_2_ARITH (vaddhn, uint32x4_t, uint64x2_t, u64, u32) + +TEST_2_ARITH (vraddhn, int8x16_t, int16x8_t, s16, s8) +TEST_2_ARITH (vraddhn, int16x8_t, int32x4_t, s32, s16) +TEST_2_ARITH (vraddhn, int32x4_t, int64x2_t, s64, s32) +TEST_2_ARITH (vraddhn, uint8x16_t, uint16x8_t, u16, u8) +TEST_2_ARITH (vraddhn, uint16x8_t, uint32x4_t, u32, u16) +TEST_2_ARITH (vraddhn, uint32x4_t, uint64x2_t, u64, u32) + +TEST_2_ARITH (vsubhn, int8x16_t, int16x8_t, s16, s8) +TEST_2_ARITH (vsubhn, int16x8_t, int32x4_t, s32, s16) +TEST_2_ARITH (vsubhn, int32x4_t, int64x2_t, s64, s32) +TEST_2_ARITH (vsubhn, uint8x16_t, uint16x8_t, u16, u8) +TEST_2_ARITH (vsubhn, uint16x8_t, uint32x4_t, u32, u16) +TEST_2_ARITH (vsubhn, uint32x4_t, uint64x2_t, u64, u32) + +TEST_2_ARITH (vrsubhn, int8x16_t, int16x8_t, s16, s8) +TEST_2_ARITH (vrsubhn, int16x8_t, int32x4_t, s32, s16) +TEST_2_ARITH (vrsubhn, int32x4_t, int64x2_t, s64, s32) +TEST_2_ARITH (vrsubhn, uint8x16_t, uint16x8_t, u16, u8) +TEST_2_ARITH (vrsubhn, uint16x8_t, uint32x4_t, u32, u16) +TEST_2_ARITH (vrsubhn, uint32x4_t, uint64x2_t, u64, u32) + +#define TEST_1_SHIFT(name, rettype, rmwtype, intype, fs, rs) \ + rettype test_1_ ## name ## _ ## fs ## _high_combine \ (rmwtype a, intype b) \ { \ return vcombine_ ## rs (a, name ## _ ## fs (b, 4)); \ } -TEST_SHIFT (vshrn_n, int8x16_t, int8x8_t, int16x8_t, s16, s8) -TEST_SHIFT (vshrn_n, int16x8_t, int16x4_t, int32x4_t, s32, s16) -TEST_SHIFT (vshrn_n, int32x4_t, int32x2_t, int64x2_t, s64, s32) -TEST_SHIFT (vshrn_n, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) -TEST_SHIFT (vshrn_n, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) -TEST_SHIFT (vshrn_n, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) - -TEST_SHIFT (vrshrn_n, int8x16_t, int8x8_t, int16x8_t, s16, s8) -TEST_SHIFT (vrshrn_n, int16x8_t, int16x4_t, int32x4_t, s32, s16) -TEST_SHIFT (vrshrn_n, int32x4_t, int32x2_t, int64x2_t, s64, s32) -TEST_SHIFT (vrshrn_n, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) -TEST_SHIFT (vrshrn_n, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) -TEST_SHIFT (vrshrn_n, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) - -TEST_SHIFT (vqshrn_n, int8x16_t, int8x8_t, int16x8_t, s16, s8) -TEST_SHIFT (vqshrn_n, int16x8_t, int16x4_t, int32x4_t, s32, s16) -TEST_SHIFT (vqshrn_n, int32x4_t, int32x2_t, int64x2_t, s64, s32) -TEST_SHIFT (vqshrn_n, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) -TEST_SHIFT (vqshrn_n, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) -TEST_SHIFT (vqshrn_n, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) - -TEST_SHIFT (vqrshrn_n, int8x16_t, int8x8_t, int16x8_t, s16, s8) -TEST_SHIFT (vqrshrn_n, int16x8_t, int16x4_t, int32x4_t, s32, s16) -TEST_SHIFT (vqrshrn_n, int32x4_t, int32x2_t, int64x2_t, s64, s32) -TEST_SHIFT (vqrshrn_n, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) -TEST_SHIFT (vqrshrn_n, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) -TEST_SHIFT (vqrshrn_n, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) - -TEST_SHIFT (vqshrun_n, uint8x16_t, uint8x8_t, int16x8_t, s16, u8) -TEST_SHIFT (vqshrun_n, uint16x8_t, uint16x4_t, int32x4_t, s32, u16) -TEST_SHIFT (vqshrun_n, uint32x4_t, uint32x2_t, int64x2_t, s64, u32) - -TEST_SHIFT (vqrshrun_n, uint8x16_t, uint8x8_t, int16x8_t, s16, u8) -TEST_SHIFT (vqrshrun_n, uint16x8_t, uint16x4_t, int32x4_t, s32, u16) -TEST_SHIFT (vqrshrun_n, uint32x4_t, uint32x2_t, int64x2_t, s64, u32) - -#define TEST_UNARY(name, rettype, rmwtype, intype, fs, rs) \ - rettype test_ ## name ## _ ## fs ## _high_combine \ +TEST_1_SHIFT (vshrn_n, int8x16_t, int8x8_t, int16x8_t, s16, s8) +TEST_1_SHIFT (vshrn_n, int16x8_t, int16x4_t, int32x4_t, s32, s16) +TEST_1_SHIFT (vshrn_n, int32x4_t, int32x2_t, int64x2_t, s64, s32) +TEST_1_SHIFT (vshrn_n, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) +TEST_1_SHIFT (vshrn_n, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) +TEST_1_SHIFT (vshrn_n, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) + +TEST_1_SHIFT (vrshrn_n, int8x16_t, int8x8_t, int16x8_t, s16, s8) +TEST_1_SHIFT (vrshrn_n, int16x8_t, int16x4_t, int32x4_t, s32, s16) +TEST_1_SHIFT (vrshrn_n, int32x4_t, int32x2_t, int64x2_t, s64, s32) +TEST_1_SHIFT (vrshrn_n, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) +TEST_1_SHIFT (vrshrn_n, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) +TEST_1_SHIFT (vrshrn_n, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) + +TEST_1_SHIFT (vqshrn_n, int8x16_t, int8x8_t, int16x8_t, s16, s8) +TEST_1_SHIFT (vqshrn_n, int16x8_t, int16x4_t, int32x4_t, s32, s16) +TEST_1_SHIFT (vqshrn_n, int32x4_t, int32x2_t, int64x2_t, s64, s32) +TEST_1_SHIFT (vqshrn_n, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) +TEST_1_SHIFT (vqshrn_n, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) +TEST_1_SHIFT (vqshrn_n, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) + +TEST_1_SHIFT (vqrshrn_n, int8x16_t, int8x8_t, int16x8_t, s16, s8) +TEST_1_SHIFT (vqrshrn_n, int16x8_t, int16x4_t, int32x4_t, s32, s16) +TEST_1_SHIFT (vqrshrn_n, int32x4_t, int32x2_t, int64x2_t, s64, s32) +TEST_1_SHIFT (vqrshrn_n, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) +TEST_1_SHIFT (vqrshrn_n, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) +TEST_1_SHIFT (vqrshrn_n, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) + +TEST_1_SHIFT (vqshrun_n, uint8x16_t, uint8x8_t, int16x8_t, s16, u8) +TEST_1_SHIFT (vqshrun_n, uint16x8_t, uint16x4_t, int32x4_t, s32, u16) +TEST_1_SHIFT (vqshrun_n, uint32x4_t, uint32x2_t, int64x2_t, s64, u32) + +TEST_1_SHIFT (vqrshrun_n, uint8x16_t, uint8x8_t, int16x8_t, s16, u8) +TEST_1_SHIFT (vqrshrun_n, uint16x8_t, uint16x4_t, int32x4_t, s32, u16) +TEST_1_SHIFT (vqrshrun_n, uint32x4_t, uint32x2_t, int64x2_t, s64, u32) + +#define TEST_2_SHIFT(name, rettype, intype, fs, rs) \ + rettype test_2_ ## name ## _ ## fs ## _high_combine \ + (intype a, intype b) \ + { \ + return vcombine_ ## rs (name ## _ ## fs (a, 4), \ + name ## _ ## fs (b, 4)); \ + } + +TEST_2_SHIFT (vshrn_n, int8x16_t, int16x8_t, s16, s8) +TEST_2_SHIFT (vshrn_n, int16x8_t, int32x4_t, s32, s16) +TEST_2_SHIFT (vshrn_n, int32x4_t, int64x2_t, s64, s32) +TEST_2_SHIFT (vshrn_n, uint8x16_t, uint16x8_t, u16, u8) +TEST_2_SHIFT (vshrn_n, uint16x8_t, uint32x4_t, u32, u16) +TEST_2_SHIFT (vshrn_n, uint32x4_t, uint64x2_t, u64, u32) + +TEST_2_SHIFT (vrshrn_n, int8x16_t, int16x8_t, s16, s8) +TEST_2_SHIFT (vrshrn_n, int16x8_t, int32x4_t, s32, s16) +TEST_2_SHIFT (vrshrn_n, int32x4_t, int64x2_t, s64, s32) +TEST_2_SHIFT (vrshrn_n, uint8x16_t, uint16x8_t, u16, u8) +TEST_2_SHIFT (vrshrn_n, uint16x8_t, uint32x4_t, u32, u16) +TEST_2_SHIFT (vrshrn_n, uint32x4_t, uint64x2_t, u64, u32) + +TEST_2_SHIFT (vqshrn_n, int8x16_t, int16x8_t, s16, s8) +TEST_2_SHIFT (vqshrn_n, int16x8_t, int32x4_t, s32, s16) +TEST_2_SHIFT (vqshrn_n, int32x4_t, int64x2_t, s64, s32) +TEST_2_SHIFT (vqshrn_n, uint8x16_t, uint16x8_t, u16, u8) +TEST_2_SHIFT (vqshrn_n, uint16x8_t, uint32x4_t, u32, u16) +TEST_2_SHIFT (vqshrn_n, uint32x4_t, uint64x2_t, u64, u32) + +TEST_2_SHIFT (vqrshrn_n, int8x16_t, int16x8_t, s16, s8) +TEST_2_SHIFT (vqrshrn_n, int16x8_t, int32x4_t, s32, s16) +TEST_2_SHIFT (vqrshrn_n, int32x4_t, int64x2_t, s64, s32) +TEST_2_SHIFT (vqrshrn_n, uint8x16_t, uint16x8_t, u16, u8) +TEST_2_SHIFT (vqrshrn_n, uint16x8_t, uint32x4_t, u32, u16) +TEST_2_SHIFT (vqrshrn_n, uint32x4_t, uint64x2_t, u64, u32) + +TEST_2_SHIFT (vqshrun_n, uint8x16_t, int16x8_t, s16, u8) +TEST_2_SHIFT (vqshrun_n, uint16x8_t, int32x4_t, s32, u16) +TEST_2_SHIFT (vqshrun_n, uint32x4_t, int64x2_t, s64, u32) + +TEST_2_SHIFT (vqrshrun_n, uint8x16_t, int16x8_t, s16, u8) +TEST_2_SHIFT (vqrshrun_n, uint16x8_t, int32x4_t, s32, u16) +TEST_2_SHIFT (vqrshrun_n, uint32x4_t, int64x2_t, s64, u32) + +#define TEST_1_UNARY(name, rettype, rmwtype, intype, fs, rs) \ + rettype test_1_ ## name ## _ ## fs ## _high_combine \ (rmwtype a, intype b) \ { \ return vcombine_ ## rs (a, name ## _ ## fs (b)); \ } -TEST_UNARY (vmovn, int8x16_t, int8x8_t, int16x8_t, s16, s8) -TEST_UNARY (vmovn, int16x8_t, int16x4_t, int32x4_t, s32, s16) -TEST_UNARY (vmovn, int32x4_t, int32x2_t, int64x2_t, s64, s32) -TEST_UNARY (vmovn, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) -TEST_UNARY (vmovn, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) -TEST_UNARY (vmovn, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) - -TEST_UNARY (vqmovn, int8x16_t, int8x8_t, int16x8_t, s16, s8) -TEST_UNARY (vqmovn, int16x8_t, int16x4_t, int32x4_t, s32, s16) -TEST_UNARY (vqmovn, int32x4_t, int32x2_t, int64x2_t, s64, s32) -TEST_UNARY (vqmovn, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) -TEST_UNARY (vqmovn, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) -TEST_UNARY (vqmovn, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) - -TEST_UNARY (vqmovun, uint8x16_t, uint8x8_t, int16x8_t, s16, u8) -TEST_UNARY (vqmovun, uint16x8_t, uint16x4_t, int32x4_t, s32, u16) -TEST_UNARY (vqmovun, uint32x4_t, uint32x2_t, int64x2_t, s64, u32) - -/* { dg-final { scan-assembler-times "\\taddhn2\\tv" 6} } */ -/* { dg-final { scan-assembler-times "\\tsubhn2\\tv" 6} } */ -/* { dg-final { scan-assembler-times "\\trsubhn2\\tv" 6} } */ -/* { dg-final { scan-assembler-times "\\traddhn2\\tv" 6} } */ -/* { dg-final { scan-assembler-times "\\trshrn2\\tv" 6} } */ -/* { dg-final { scan-assembler-times "\\tshrn2\\tv" 6} } */ -/* { dg-final { scan-assembler-times "\\tsqshrun2\\tv" 3} } */ -/* { dg-final { scan-assembler-times "\\tsqrshrun2\\tv" 3} } */ -/* { dg-final { scan-assembler-times "\\tsqshrn2\\tv" 3} } */ -/* { dg-final { scan-assembler-times "\\tuqshrn2\\tv" 3} } */ -/* { dg-final { scan-assembler-times "\\tsqrshrn2\\tv" 3} } */ -/* { dg-final { scan-assembler-times "\\tuqrshrn2\\tv" 3} } */ -/* { dg-final { scan-assembler-times "\\txtn2\\tv" 6} } */ -/* { dg-final { scan-assembler-times "\\tuqxtn2\\tv" 3} } */ -/* { dg-final { scan-assembler-times "\\tsqxtn2\\tv" 3} } */ -/* { dg-final { scan-assembler-times "\\tsqxtun2\\tv" 3} } */ +TEST_1_UNARY (vmovn, int8x16_t, int8x8_t, int16x8_t, s16, s8) +TEST_1_UNARY (vmovn, int16x8_t, int16x4_t, int32x4_t, s32, s16) +TEST_1_UNARY (vmovn, int32x4_t, int32x2_t, int64x2_t, s64, s32) +TEST_1_UNARY (vmovn, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) +TEST_1_UNARY (vmovn, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) +TEST_1_UNARY (vmovn, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) + +TEST_1_UNARY (vqmovn, int8x16_t, int8x8_t, int16x8_t, s16, s8) +TEST_1_UNARY (vqmovn, int16x8_t, int16x4_t, int32x4_t, s32, s16) +TEST_1_UNARY (vqmovn, int32x4_t, int32x2_t, int64x2_t, s64, s32) +TEST_1_UNARY (vqmovn, uint8x16_t, uint8x8_t, uint16x8_t, u16, u8) +TEST_1_UNARY (vqmovn, uint16x8_t, uint16x4_t, uint32x4_t, u32, u16) +TEST_1_UNARY (vqmovn, uint32x4_t, uint32x2_t, uint64x2_t, u64, u32) + +TEST_1_UNARY (vqmovun, uint8x16_t, uint8x8_t, int16x8_t, s16, u8) +TEST_1_UNARY (vqmovun, uint16x8_t, uint16x4_t, int32x4_t, s32, u16) +TEST_1_UNARY (vqmovun, uint32x4_t, uint32x2_t, int64x2_t, s64, u32) + +#define TEST_2_UNARY(name, rettype, intype, fs, rs) \ + rettype test_2_ ## name ## _ ## fs ## _high_combine \ + (intype a, intype b) \ + { \ + return vcombine_ ## rs (name ## _ ## fs (a), \ + name ## _ ## fs (b)); \ + } + +TEST_2_UNARY (vmovn, int8x16_t, int16x8_t, s16, s8) +TEST_2_UNARY (vmovn, int16x8_t, int32x4_t, s32, s16) +TEST_2_UNARY (vmovn, int32x4_t, int64x2_t, s64, s32) +TEST_2_UNARY (vmovn, uint8x16_t, uint16x8_t, u16, u8) +TEST_2_UNARY (vmovn, uint16x8_t, uint32x4_t, u32, u16) +TEST_2_UNARY (vmovn, uint32x4_t, uint64x2_t, u64, u32) + +TEST_2_UNARY (vqmovn, int8x16_t, int16x8_t, s16, s8) +TEST_2_UNARY (vqmovn, int16x8_t, int32x4_t, s32, s16) +TEST_2_UNARY (vqmovn, int32x4_t, int64x2_t, s64, s32) +TEST_2_UNARY (vqmovn, uint8x16_t, uint16x8_t, u16, u8) +TEST_2_UNARY (vqmovn, uint16x8_t, uint32x4_t, u32, u16) +TEST_2_UNARY (vqmovn, uint32x4_t, uint64x2_t, u64, u32) + +TEST_2_UNARY (vqmovun, uint8x16_t, int16x8_t, s16, u8) +TEST_2_UNARY (vqmovun, uint16x8_t, int32x4_t, s32, u16) +TEST_2_UNARY (vqmovun, uint32x4_t, int64x2_t, s64, u32) + +/* { dg-final { scan-assembler-times "\\taddhn2\\tv" 12} } */ +/* { dg-final { scan-assembler-times "\\tsubhn2\\tv" 12} } */ +/* { dg-final { scan-assembler-times "\\trsubhn2\\tv" 12} } */ +/* { dg-final { scan-assembler-times "\\traddhn2\\tv" 12} } */ +/* { dg-final { scan-assembler-times "\\trshrn2\\tv" 12} } */ +/* { dg-final { scan-assembler-times "\\tshrn2\\tv" 12} } */ +/* { dg-final { scan-assembler-times "\\tsqshrun2\\tv" 6} } */ +/* { dg-final { scan-assembler-times "\\tsqrshrun2\\tv" 6} } */ +/* { dg-final { scan-assembler-times "\\tsqshrn2\\tv" 6} } */ +/* { dg-final { scan-assembler-times "\\tuqshrn2\\tv" 6} } */ +/* { dg-final { scan-assembler-times "\\tsqrshrn2\\tv" 6} } */ +/* { dg-final { scan-assembler-times "\\tuqrshrn2\\tv" 6} } */ +/* { dg-final { scan-assembler-times "\\txtn2\\tv" 12} } */ +/* { dg-final { scan-assembler-times "\\tuqxtn2\\tv" 6} } */ +/* { dg-final { scan-assembler-times "\\tsqxtn2\\tv" 6} } */ +/* { dg-final { scan-assembler-times "\\tsqxtun2\\tv" 6} } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f64_1.c index db79d53..9ef001e 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f64_1.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f64_1.c @@ -72,5 +72,5 @@ main (void) set_and_test_case3 (); return 0; } -/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[dD\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]\n" 1 } } */ /* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[dD\]\\\[1\\\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c index 3f8303c..232ade9 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c @@ -58,5 +58,5 @@ main (void) set_and_test_case3 (); return 0; } -/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[dD\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]\n" 1 } } */ /* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[dD\]\\\[1\\\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_lane_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_lane_f32_1.c index 124dcd8..37aa0ec 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_lane_f32_1.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_lane_f32_1.c @@ -57,5 +57,5 @@ main (void) set_and_test_case3 (); return 0; } -/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]\n" 1 } } */ /* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c index 255f096..c9f2484 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c @@ -79,7 +79,7 @@ main (void) set_and_test_case3 (); return 0; } -/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]\n" 1 } } */ /* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */ /* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[2\\\]\n" 1 } } */ /* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmlalh_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlalh_lane_s16.c index 21ae724..6b96d1c 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vqdmlalh_lane_s16.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlalh_lane_s16.c @@ -11,4 +11,4 @@ t_vqdmlalh_lane_s16 (int32_t a, int16_t b, int16x4_t c) return vqdmlalh_lane_s16 (a, b, c, 0); } -/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[hH\]\[0-9\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmlals_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlals_lane_s32.c index 79db7b7..a780ddb 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vqdmlals_lane_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlals_lane_s32.c @@ -11,4 +11,4 @@ t_vqdmlals_lane_s32 (int64_t a, int32_t b, int32x2_t c) return vqdmlals_lane_s32 (a, b, c, 0); } -/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmlslh_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlslh_lane_s16.c index 185507b..8bbac1a 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vqdmlslh_lane_s16.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlslh_lane_s16.c @@ -11,4 +11,4 @@ t_vqdmlslh_lane_s16 (int32_t a, int16_t b, int16x4_t c) return vqdmlslh_lane_s16 (a, b, c, 0); } -/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[hH\]\[0-9\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmlsls_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlsls_lane_s32.c index f692923..069ba91 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vqdmlsls_lane_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmlsls_lane_s32.c @@ -11,4 +11,4 @@ t_vqdmlsls_lane_s32 (int64_t a, int32_t b, int32x2_t c) return vqdmlsls_lane_s32 (a, b, c, 0); } -/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmullh_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmullh_lane_s16.c index debf191..fcd496b 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vqdmullh_lane_s16.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmullh_lane_s16.c @@ -11,4 +11,4 @@ t_vqdmullh_lane_s16 (int16_t a, int16x4_t b) return vqdmullh_lane_s16 (a, b, 0); } -/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[hH\]\[0-9\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmullh_laneq_s16.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmullh_laneq_s16.c index e810c47..db77fff 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vqdmullh_laneq_s16.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmullh_laneq_s16.c @@ -11,4 +11,4 @@ t_vqdmullh_laneq_s16 (int16_t a, int16x8_t b) return vqdmullh_laneq_s16 (a, b, 0); } -/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[hH\]\[0-9\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_lane_s32.c index a5fe60f..04bbe7f 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_lane_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_lane_s32.c @@ -11,4 +11,4 @@ t_vqdmulls_lane_s32 (int32_t a, int32x2_t b) return vqdmulls_lane_s32 (a, b, 0); } -/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_laneq_s32.c b/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_laneq_s32.c index bd856d8..e8e2368 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_laneq_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_laneq_s32.c @@ -11,4 +11,4 @@ t_vqdmulls_laneq_s32 (int32_t a, int32x4_t b) return vqdmulls_laneq_s32 (a, b, 0); } -/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/dup_lane_1.c index 532847b..14c1f5a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/dup_lane_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/dup_lane_1.c @@ -56,15 +56,27 @@ TEST_ALL (DUP_LANE) /* { dg-final { scan-assembler-not {\ttbl\t} } } */ -/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[0\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, d[0-9]} 2 { + target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[0\]} 2 { + target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]} 2 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[3\]} 2 } } */ -/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[0\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, s[0-9]} 2 { + target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[0\]} 2 { + target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[5\]} 2 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[7\]} 2 } } */ -/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[0\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, h[0-9]} 2 { + target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[0\]} 2 { + target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[6\]} 2 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[15\]} 2 } } */ -/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[0\]} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, b[0-9]} 1 { + target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[0\]} 1 { + target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[19\]} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[31\]} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c index df51ce3..1a926db 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c @@ -56,7 +56,7 @@ typedef _Float16 vnx8hf __attribute__((vector_size (32))); TEST_ALL (EXTRACT) -/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ @@ -64,7 +64,7 @@ TEST_ALL (EXTRACT) /* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ /* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c index 0642604..1c54d10 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c @@ -56,7 +56,7 @@ typedef _Float16 vnx16hf __attribute__((vector_size (64))); TEST_ALL (EXTRACT) -/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ @@ -64,7 +64,7 @@ TEST_ALL (EXTRACT) /* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ /* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c index 604f1f6..501b9f3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c @@ -77,7 +77,7 @@ typedef _Float16 vnx32hf __attribute__((vector_size (128))); TEST_ALL (EXTRACT) -/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]\n} 5 } } */ /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ @@ -86,7 +86,7 @@ TEST_ALL (EXTRACT) /* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ /* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]\n} 5 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c index 8b45e317..94d3155 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c @@ -84,7 +84,7 @@ typedef _Float16 v128hf __attribute__((vector_size (256))); TEST_ALL (EXTRACT) -/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]\n} 6 } } */ /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ @@ -93,7 +93,7 @@ TEST_ALL (EXTRACT) /* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ /* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]\n} 6 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/live_1.c b/gcc/testsuite/gcc.target/aarch64/sve/live_1.c index e8d92ec..80ee176 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/live_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/live_1.c @@ -32,10 +32,9 @@ TEST_ALL (EXTRACT_LAST) /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].s, } 4 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].d, } 4 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ /* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/crypto-vsha1cq_u32.c b/gcc/testsuite/gcc.target/arm/crypto-vsha1cq_u32.c index 41f97a7..0cadd19 100644 --- a/gcc/testsuite/gcc.target/arm/crypto-vsha1cq_u32.c +++ b/gcc/testsuite/gcc.target/arm/crypto-vsha1cq_u32.c @@ -31,5 +31,5 @@ uint32_t foo (void) TEST_SHA1C_VEC_SELECT (GET_LANE) /* { dg-final { scan-assembler-times {sha1c.32\tq[0-9]+, q[0-9]+} 5 } } */ -/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 3 } } */ -/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 4 } } */ +/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/crypto-vsha1h_u32.c b/gcc/testsuite/gcc.target/arm/crypto-vsha1h_u32.c index b284667..33af705 100644 --- a/gcc/testsuite/gcc.target/arm/crypto-vsha1h_u32.c +++ b/gcc/testsuite/gcc.target/arm/crypto-vsha1h_u32.c @@ -27,5 +27,5 @@ uint32_t foo (void) TEST_SHA1H_VEC_SELECT (GET_LANE) /* { dg-final { scan-assembler-times {sha1h.32\tq[0-9]+, q[0-9]+} 5 } } */ -/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 3 } } */ -/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 8 } } */ +/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/crypto-vsha1mq_u32.c b/gcc/testsuite/gcc.target/arm/crypto-vsha1mq_u32.c index 676e64c..bdd1c4f 100644 --- a/gcc/testsuite/gcc.target/arm/crypto-vsha1mq_u32.c +++ b/gcc/testsuite/gcc.target/arm/crypto-vsha1mq_u32.c @@ -31,5 +31,5 @@ uint32_t foo (void) TEST_SHA1M_VEC_SELECT (GET_LANE) /* { dg-final { scan-assembler-times {sha1m.32\tq[0-9]+, q[0-9]+} 5 } } */ -/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 3 } } */ -/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 4 } } */ +/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/crypto-vsha1pq_u32.c b/gcc/testsuite/gcc.target/arm/crypto-vsha1pq_u32.c index ed10fe2..d48a07c 100644 --- a/gcc/testsuite/gcc.target/arm/crypto-vsha1pq_u32.c +++ b/gcc/testsuite/gcc.target/arm/crypto-vsha1pq_u32.c @@ -31,5 +31,5 @@ uint32_t foo (void) TEST_SHA1P_VEC_SELECT (GET_LANE) /* { dg-final { scan-assembler-times {sha1p.32\tq[0-9]+, q[0-9]+} 5 } } */ -/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 3 } } */ -/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 4 } } */ +/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_f16.c index 2a5aa63..a92e1d4 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_f16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_f16.c @@ -8,7 +8,7 @@ float16_t foo (float16x8_t a) { - return vgetq_lane_f16 (a, 0); + return vgetq_lane_f16 (a, 1); } /* { dg-final { scan-assembler "vmov.u16" } } */ @@ -16,7 +16,7 @@ foo (float16x8_t a) float16_t foo1 (float16x8_t a) { - return vgetq_lane (a, 0); + return vgetq_lane (a, 1); } /* { dg-final { scan-assembler "vmov.u16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_f32.c index f1839cc..98319ef 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_f32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_f32.c @@ -8,7 +8,7 @@ float32_t foo (float32x4_t a) { - return vgetq_lane_f32 (a, 0); + return vgetq_lane_f32 (a, 1); } /* { dg-final { scan-assembler "vmov.32" } } */ @@ -16,7 +16,7 @@ foo (float32x4_t a) float32_t foo1 (float32x4_t a) { - return vgetq_lane (a, 0); + return vgetq_lane (a, 1); } /* { dg-final { scan-assembler "vmov.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_s16.c index ed1c217..c9eefeb 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_s16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_s16.c @@ -8,7 +8,7 @@ int16_t foo (int16x8_t a) { - return vgetq_lane_s16 (a, 0); + return vgetq_lane_s16 (a, 1); } /* { dg-final { scan-assembler "vmov.s16" } } */ @@ -16,7 +16,7 @@ foo (int16x8_t a) int16_t foo1 (int16x8_t a) { - return vgetq_lane (a, 0); + return vgetq_lane (a, 1); } /* { dg-final { scan-assembler "vmov.s16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_s32.c index c87ed93..0925a25 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_s32.c @@ -8,7 +8,7 @@ int32_t foo (int32x4_t a) { - return vgetq_lane_s32 (a, 0); + return vgetq_lane_s32 (a, 1); } /* { dg-final { scan-assembler "vmov.32" } } */ @@ -16,7 +16,7 @@ foo (int32x4_t a) int32_t foo1 (int32x4_t a) { - return vgetq_lane (a, 0); + return vgetq_lane (a, 1); } /* { dg-final { scan-assembler "vmov.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_s8.c index 11242ff..5b76e3d 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_s8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_s8.c @@ -8,7 +8,7 @@ int8_t foo (int8x16_t a) { - return vgetq_lane_s8 (a, 0); + return vgetq_lane_s8 (a, 1); } /* { dg-final { scan-assembler "vmov.s8" } } */ @@ -16,7 +16,7 @@ foo (int8x16_t a) int8_t foo1 (int8x16_t a) { - return vgetq_lane (a, 0); + return vgetq_lane (a, 1); } /* { dg-final { scan-assembler "vmov.s8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_u16.c index 2788b58..c4a3fb0 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_u16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_u16.c @@ -8,7 +8,7 @@ uint16_t foo (uint16x8_t a) { - return vgetq_lane_u16 (a, 0); + return vgetq_lane_u16 (a, 1); } /* { dg-final { scan-assembler "vmov.u16" } } */ @@ -16,7 +16,7 @@ foo (uint16x8_t a) uint16_t foo1 (uint16x8_t a) { - return vgetq_lane (a, 0); + return vgetq_lane (a, 1); } /* { dg-final { scan-assembler "vmov.u16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_u32.c index 721c5a5..d798370 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_u32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_u32.c @@ -8,7 +8,7 @@ uint32_t foo (uint32x4_t a) { - return vgetq_lane_u32 (a, 0); + return vgetq_lane_u32 (a, 1); } /* { dg-final { scan-assembler "vmov.32" } } */ @@ -16,7 +16,7 @@ foo (uint32x4_t a) uint32_t foo1 (uint32x4_t a) { - return vgetq_lane (a, 0); + return vgetq_lane (a, 1); } /* { dg-final { scan-assembler "vmov.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_u8.c index 2bcaeac..631d995 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_u8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vgetq_lane_u8.c @@ -8,7 +8,7 @@ uint8_t foo (uint8x16_t a) { - return vgetq_lane_u8 (a, 0); + return vgetq_lane_u8 (a, 1); } /* { dg-final { scan-assembler "vmov.u8" } } */ @@ -16,7 +16,7 @@ foo (uint8x16_t a) uint8_t foo1 (uint8x16_t a) { - return vgetq_lane (a, 0); + return vgetq_lane (a, 1); } /* { dg-final { scan-assembler "vmov.u8" } } */ -- cgit v1.1 From 0e7754560f694b4e702baebdc481f6b0e82f7b14 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Wed, 14 Jul 2021 00:16:44 +0000 Subject: Daily bump. --- gcc/ChangeLog | 186 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/testsuite/ChangeLog | 157 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 344 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 98570fe..ebeb274 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,189 @@ +2021-07-13 Jonathan Wright + + * combine.c (combine_simplify_rtx): Add vec_select -> subreg + simplification. + * config/aarch64/aarch64.md (*zero_extend2_aarch64): + Add Neon to general purpose register case for zero-extend + pattern. + * config/arm/vfp.md (*arm_movsi_vfp): Remove "*" from *t -> r + case to prevent some cases opting to go through memory. + * cse.c (fold_rtx): Add vec_select -> subreg simplification. + * rtl.c (rtvec_series_p): Define predicate to determine + whether a vector contains a linear series of integers. + * rtl.h (rtvec_series_p): Define. + * rtlanal.c (vec_series_lowpart_p): Define predicate to + determine if a vector selection is equivalent to the low part + of the vector. + * rtlanal.h (vec_series_lowpart_p): Define. + * simplify-rtx.c (simplify_context::simplify_binary_operation_1): + Add vec_select -> subreg simplification. + +2021-07-13 Paul A. Clarke + + * config/rs6000/smmintrin.h (_mm_testz_si128, _mm_testc_si128, + _mm_testnzc_si128, _mm_test_all_ones, _mm_test_all_zeros, + _mm_test_mix_ones_zeros): New. + +2021-07-13 Roger Sayle + Richard Biener + + * gimple.c (gimple_could_trap_p_1): Make S argument a + "const gimple*". Preserve constness in call to + gimple_asm_volatile_p. + (gimple_could_trap_p): Make S argument a "const gimple*". + * gimple.h (gimple_could_trap_p_1, gimple_could_trap_p): + Update function prototypes. + +2021-07-13 Richard Sandiford + + * tree-vectorizer.h (vect_reusable_accumulator): New structure. + (_loop_vec_info::main_loop_edge): New field. + (_loop_vec_info::skip_main_loop_edge): Likewise. + (_loop_vec_info::skip_this_loop_edge): Likewise. + (_loop_vec_info::reusable_accumulators): Likewise. + (_stmt_vec_info::reduc_scalar_results): Likewise. + (_stmt_vec_info::reused_accumulator): Likewise. + (vect_get_main_loop_result): Declare. + * tree-vectorizer.c (vec_info::new_stmt_vec_info): Initialize + reduc_scalar_inputs. + (vec_info::free_stmt_vec_info): Free reduc_scalar_inputs. + * tree-vect-loop-manip.c (vect_get_main_loop_result): New function. + (vect_do_peeling): Fill an epilogue loop's main_loop_edge, + skip_main_loop_edge and skip_this_loop_edge fields. + * tree-vect-loop.c (INCLUDE_ALGORITHM): Define. + (vect_emit_reduction_init_stmts): New function. + (get_initial_def_for_reduction): Use it. + (get_initial_defs_for_reduction): Likewise. Change the vinfo + parameter to a loop_vec_info. + (vect_create_epilog_for_reduction): Store the scalar results + in the reduc_info. If an epilogue loop is reusing an accumulator + from the main loop, and if the epilogue loop can also be skipped, + try to place the reduction code in the join block. Record + accumulators that could potentially be reused by epilogue loops. + (vect_transform_cycle_phi): When vectorizing epilogue loops, + try to reuse accumulators from the main loop. Record the initial + value in reduc_info for non-SLP reductions too. + +2021-07-13 Richard Sandiford + + * tree-vect-loop.c (get_initial_def_for_reduction): Remove + adjustment handling. Take the neutral value as an argument, + in place of the code argument. + (vect_transform_cycle_phi): Update accordingly. Handle the + initial values of cond reductions separately from code reductions. + Choose the adjustment here rather than in + get_initial_def_for_reduction. Sink the splat of vec_initial_def. + +2021-07-13 Richard Sandiford + + * tree-vect-loop.c (neutral_op_for_slp_reduction): Replace with... + (neutral_op_for_reduction): ...this, providing a more general + interface. + (vect_create_epilog_for_reduction): Update accordingly. + (vectorizable_reduction): Likewise. + (vect_transform_cycle_phi): Likewise. + +2021-07-13 Richard Sandiford + + * tree-vect-loop.c (get_initial_def_for_reduction): Take the + reduc_info instead of the original stmt_vec_info. + (vect_transform_cycle_phi): Update accordingly. + +2021-07-13 Richard Sandiford + + * tree-vect-loop.c (get_initial_defs_for_reduction): Take the + reduc_info as an additional parameter. + (vect_transform_cycle_phi): Update accordingly. + +2021-07-13 Richard Sandiford + + * tree-vectorizer.h: Include tree-ssa-operands.h. + (vect_phi_initial_value): New function. + * tree-vect-loop.c (neutral_op_for_slp_reduction): Use it. + (get_initial_defs_for_reduction, info_for_reduction): Likewise. + (vect_create_epilog_for_reduction, vectorizable_reduction): Likewise. + (vect_transform_cycle_phi, vectorizable_induction): Likewise. + +2021-07-13 Richard Sandiford + + * tree-vect-loop.c (vect_create_epilog_for_reduction): Convert + the phi results to vectype after creating them. Remove later + conversion code that thus becomes redundant. + +2021-07-13 Richard Sandiford + + * tree-vect-loop.c (vect_create_epilog_for_reduction): Replace + the new_phis vector with a reduc_inputs vector. Combine handling + of reduction chains and ncopies > 1. + +2021-07-13 Richard Sandiford + + * tree-vect-loop.c (vect_create_epilog_for_reduction): Truncate + scalar_results to group_size elements after reducing down from + N*group_size elements. Construct an array_slice of the live-out + stmts and assert that there is one stmt per scalar result. + +2021-07-13 Richard Sandiford + + * tree-vect-loop.c (vect_create_epilog_for_reduction): Remove + nested_in_vect_loop and use double_reduc everywhere. Remove dead + assignment to "loop". + +2021-07-13 Richard Sandiford + + * internal-fn.c (vectorized_internal_fn_supported_p): Handle + vector types first. For scalar types, consider both the preferred + vector mode and the alternative vector modes. + * optabs-query.c (can_vec_mask_load_store_p): Use the same + structure as above, in particular using related_vector_mode + for modes provided by autovectorize_vector_modes. + +2021-07-13 Jakub Jelinek + Richard Biener + + PR tree-optimization/101419 + * tree-pass.h (PROP_objsz): Define. + (make_pass_early_object_sizes): Declare. + * passes.def (pass_all_early_optimizations): Rename pass_object_sizes + there to pass_early_object_sizes, drop parameter. + (pass_all_optimizations): Move pass_object_sizes right after pass_ccp, + drop parameter, move pass_post_ipa_warn right after that. + * tree-object-size.c (pass_object_sizes::execute): Rename to... + (object_sizes_execute): ... this. Add insert_min_max_p argument. + (pass_data_object_sizes): Move after object_sizes_execute. + (pass_object_sizes): Likewise. In execute method call + object_sizes_execute, drop set_pass_param method and insert_min_max_p + non-static data member and its initializer in the ctor. + (pass_data_early_object_sizes, pass_early_object_sizes, + make_pass_early_object_sizes): New. + * tree-ssa-sccvn.c (copy_reference_ops_from_ref): Use + (cfun->curr_properties & PROP_objsz) instead of cfun->after_inlining. + +2021-07-13 Kito Cheng + + PR target/101275 + * config/riscv/constraints.md ("S"): Update description and remove + @internal. + * doc/md.texi (Machine Constraints): Document the 'S' constraints + for RISC-V. + +2021-07-13 Richard Biener + + Revert: + 2021-07-12 Richard Biener + + * tree-vect-slp.c (vect_slp_region): Show the number of + SLP graph entries in the optimization message. + +2021-07-13 Michael Meissner + + * config/rs6000/altivec.md (xxspltiw_v4sf): Change local variable + value to to long. + * config/rs6000/rs6000-protos.h (rs6000_const_f32_to_i32): Change + return type to long. + * config/rs6000/rs6000.c (rs6000_const_f32_to_i32): Change return + type to long. + 2021-07-12 Andrew MacLeod * gimple-range-fold.cc (fold_using_range::range_of_builtin_ubsan_call): diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 12fd5d4..4b4dbab 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210713 +20210714 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6eacfd0..0ed2e93 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,160 @@ +2021-07-13 Jonathan Wright + + * gcc.target/aarch64/extract_zero_extend.c: Remove dump scan + for RTL pattern match. + * gcc.target/aarch64/narrow_high_combine.c: Add new tests. + * gcc.target/aarch64/simd/vmulx_laneq_f64_1.c: Update + scan-assembler regex to look for a scalar register instead of + lane 0 of a vector. + * gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c: Likewise. + * gcc.target/aarch64/simd/vmulxs_lane_f32_1.c: Likewise. + * gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c: Likewise. + * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. + * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. + * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. + * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. + * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. + * gcc.target/aarch64/simd/vqdmullh_laneq_s16.c: Likewise. + * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. + * gcc.target/aarch64/simd/vqdmulls_laneq_s32.c: Likewise. + * gcc.target/aarch64/sve/dup_lane_1.c: Likewise. + * gcc.target/aarch64/sve/extract_1.c: Likewise. + * gcc.target/aarch64/sve/extract_2.c: Likewise. + * gcc.target/aarch64/sve/extract_3.c: Likewise. + * gcc.target/aarch64/sve/extract_4.c: Likewise. + * gcc.target/aarch64/sve/live_1.c: Update scan-assembler regex + cases to look for 'b' and 'h' registers instead of 'w'. + * gcc.target/arm/crypto-vsha1cq_u32.c: Update scan-assembler + regex to reflect lane 0 vector extractions being simplified + to scalar register moves. + * gcc.target/arm/crypto-vsha1h_u32.c: Likewise. + * gcc.target/arm/crypto-vsha1mq_u32.c: Likewise. + * gcc.target/arm/crypto-vsha1pq_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vgetq_lane_f16.c: Extract + lane 1 as the moves for lane 0 now get optimized away. + * gcc.target/arm/mve/intrinsics/vgetq_lane_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vgetq_lane_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vgetq_lane_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vgetq_lane_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vgetq_lane_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vgetq_lane_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vgetq_lane_u8.c: Likewise. + +2021-07-13 Paul A. Clarke + + * gcc.target/powerpc/sse4_1-ptest-1.c: Copy from + gcc/testsuite/gcc.target/i386. + +2021-07-13 Andrew MacLeod + + PR tree-optimization/93781 + * gcc.dg/tree-ssa/pr93781-1.c: Check that call is removed. + +2021-07-13 Richard Sandiford + + * gcc.target/aarch64/sve/reduc_9.c: New test. + * gcc.target/aarch64/sve/reduc_9_run.c: Likewise. + * gcc.target/aarch64/sve/reduc_10.c: Likewise. + * gcc.target/aarch64/sve/reduc_10_run.c: Likewise. + * gcc.target/aarch64/sve/reduc_11.c: Likewise. + * gcc.target/aarch64/sve/reduc_11_run.c: Likewise. + * gcc.target/aarch64/sve/reduc_12.c: Likewise. + * gcc.target/aarch64/sve/reduc_12_run.c: Likewise. + * gcc.target/aarch64/sve/reduc_13.c: Likewise. + * gcc.target/aarch64/sve/reduc_13_run.c: Likewise. + * gcc.target/aarch64/sve/reduc_14.c: Likewise. + * gcc.target/aarch64/sve/reduc_14_run.c: Likewise. + * gcc.target/aarch64/sve/reduc_15.c: Likewise. + * gcc.target/aarch64/sve/reduc_15_run.c: Likewise. + +2021-07-13 Richard Sandiford + + * gcc.target/aarch64/sve/cond_arith_6.c: New test. + +2021-07-13 Jakub Jelinek + Richard Biener + + PR tree-optimization/101419 + * gcc.dg/builtin-object-size-10.c: Pass -fdump-tree-early_objsz-details + instead of -fdump-tree-objsz1-details in dg-options and adjust names + of dump file in scan-tree-dump. + * gcc.dg/pr101419.c: New test. + +2021-07-13 Richard Biener + + Revert: + 2021-07-13 Richard Biener + + * g++.dg/vect/slp-pr87105.cc: Adjust. + * gcc.dg/vect/bb-slp-pr54400.c: Likewise. + +2021-07-13 Michael Meissner + + PR testsuite/100166 + * gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a-pr63175.c: Update + insn counts to account for power10 prefixed loads and stores. + * gcc.target/powerpc/fold-vec-load-builtin_vec_xl-char.c: + Likewise. + * gcc.target/powerpc/fold-vec-load-builtin_vec_xl-double.c: + Likewise. + * gcc.target/powerpc/fold-vec-load-builtin_vec_xl-float.c: + Likewise. + * gcc.target/powerpc/fold-vec-load-builtin_vec_xl-int.c: + Likewise. + * gcc.target/powerpc/fold-vec-load-builtin_vec_xl-longlong.c: + Likewise. + * gcc.target/powerpc/fold-vec-load-builtin_vec_xl-short.c: + Likewise. + * gcc.target/powerpc/fold-vec-load-vec_vsx_ld-char.c: Likewise. + * gcc.target/powerpc/fold-vec-load-vec_vsx_ld-double.c: Likewise. + * gcc.target/powerpc/fold-vec-load-vec_vsx_ld-float.c: Likewise. + * gcc.target/powerpc/fold-vec-load-vec_vsx_ld-int.c: Likewise. + * gcc.target/powerpc/fold-vec-load-vec_vsx_ld-longlong.c: + Likewise. + * gcc.target/powerpc/fold-vec-load-vec_vsx_ld-short.c: Likewise. + * gcc.target/powerpc/fold-vec-load-vec_xl-char.c: Likewise. + * gcc.target/powerpc/fold-vec-load-vec_xl-double.c: Likewise. + * gcc.target/powerpc/fold-vec-load-vec_xl-float.c: Likewise. + * gcc.target/powerpc/fold-vec-load-vec_xl-int.c: Likewise. + * gcc.target/powerpc/fold-vec-load-vec_xl-longlong.c: Likewise. + * gcc.target/powerpc/fold-vec-load-vec_xl-short.c: Likewise. + * gcc.target/powerpc/fold-vec-splat-floatdouble.c: Likewise. + * gcc.target/powerpc/fold-vec-splat-longlong.c: Likewise. + * gcc.target/powerpc/fold-vec-store-builtin_vec_xst-char.c: + Likewise. + * gcc.target/powerpc/fold-vec-store-builtin_vec_xst-double.c: + Likewise. + * gcc.target/powerpc/fold-vec-store-builtin_vec_xst-float.c: + Likewise. + * gcc.target/powerpc/fold-vec-store-builtin_vec_xst-int.c: + Likewise. + * gcc.target/powerpc/fold-vec-store-builtin_vec_xst-longlong.c: + Likewise. + * gcc.target/powerpc/fold-vec-store-builtin_vec_xst-short.c: + Likewise. + * gcc.target/powerpc/fold-vec-store-vec_vsx_st-char.c: Likewise. + * gcc.target/powerpc/fold-vec-store-vec_vsx_st-double.c: + Likewise. + * gcc.target/powerpc/fold-vec-store-vec_vsx_st-float.c: Likewise. + * gcc.target/powerpc/fold-vec-store-vec_vsx_st-int.c: Likewise. + * gcc.target/powerpc/fold-vec-store-vec_vsx_st-longlong.c: + Likewise. + * gcc.target/powerpc/fold-vec-store-vec_vsx_st-short.c: Likewise. + * gcc.target/powerpc/fold-vec-store-vec_xst-char.c: Likewise. + * gcc.target/powerpc/fold-vec-store-vec_xst-double.c: Likewise. + * gcc.target/powerpc/fold-vec-store-vec_xst-float.c: Likewise. + * gcc.target/powerpc/fold-vec-store-vec_xst-int.c: Likewise. + * gcc.target/powerpc/fold-vec-store-vec_xst-longlong.c: Likewise. + * gcc.target/powerpc/fold-vec-store-vec_xst-short.c: Likewise. + * gcc.target/powerpc/lvsl-lvsr.c: Likewise. + * gcc.target/powerpc/pr86731-fwrapv-longlong.c: Likewise. + +2021-07-13 Michael Meissner + + * gcc.target/powerpc/vec-splati-runnable.c: Run test with -O2 + optimization. Do not check what XXSPLTIDP generates if the value + is undefined. + 2021-07-12 Patrick Palka PR c++/79501 -- cgit v1.1 From 66907e739959ff85490f1711cfd06fdf1e946945 Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Tue, 13 Jul 2021 22:25:54 -0300 Subject: adjust landing pads when changing main label If an artificial label created for a landing pad ends up being dropped in favor of a user-supplied label, the user-supplied label inherits the landing pad index, but the post_landing_pad field is not adjusted to point to the new label. This patch fixes the problem, and adds verification that we don't remove a label that's still used as a landing pad. The circumstance in which this problem can be hit was unusual: removal of a block with an unreachable label moves the label to some other unrelated block, in case its address is taken. In the case at hand (pr42739.C, complicated by wrappers and cleanups), the chosen block happened to be an EH landing pad. (A followup patch will change that.) for gcc/ChangeLog * tree-cfg.c (cleanup_dead_labels_eh): Update post_landing_pad label upon change of landing pad block's primary label. (cleanup_dead_labels): Check that a removed label is not that of a landing pad. --- gcc/tree-cfg.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index c73e1cb..1f0f4a2 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -1481,6 +1481,7 @@ cleanup_dead_labels_eh (label_record *label_for_bb) if (lab != lp->post_landing_pad) { EH_LANDING_PAD_NR (lp->post_landing_pad) = 0; + lp->post_landing_pad = lab; EH_LANDING_PAD_NR (lab) = lp->index; } } @@ -1707,7 +1708,10 @@ cleanup_dead_labels (void) || FORCED_LABEL (label)) gsi_next (&i); else - gsi_remove (&i, true); + { + gcc_checking_assert (EH_LANDING_PAD_NR (label) == 0); + gsi_remove (&i, true); + } } } -- cgit v1.1 From a7098d6ef4e4e799dab8ef925c62b199d707694b Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Tue, 13 Jul 2021 22:25:56 -0300 Subject: fix typo in attr_fnspec::verify Odd-numbered indices describing argument access sizes in the fnspec string can only hold 't' or a digit, as tested in the beginning of the case. When checking that the size-supplying argument does not have additional information associated with it, the test that excludes the 't' possibility looks for it at the even position in the fnspec string. Oops. This might yield false positives and negatives if a function has a fnspec in which an argument uses a 't' access-size, and ('t' - '1') happens to be the index of an argument described in an fnspec string. Assuming ASCII encoding, it would take a function with at least 68 arguments described in fnspec. Still, probably worth fixing. for gcc/ChangeLog * tree-ssa-alias.c (attr_fnspec::verify): Fix index in non-'t'-sized arg check. --- gcc/tree-ssa-alias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c index 0421bfa..742a95a 100644 --- a/gcc/tree-ssa-alias.c +++ b/gcc/tree-ssa-alias.c @@ -3895,7 +3895,7 @@ attr_fnspec::verify () && str[idx] != 'w' && str[idx] != 'W' && str[idx] != 'o' && str[idx] != 'O') err = true; - if (str[idx] != 't' + if (str[idx + 1] != 't' /* Size specified is scalar, so it should be described by ". " if specified at all. */ && (arg_specified_p (str[idx + 1] - '1') -- cgit v1.1 From 1dd3f21095858fbfd3e28a149578d5fb67e75f95 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 13 Jul 2021 13:59:15 +0200 Subject: Support reduction def re-use for epilogue with different vector size The following adds support for re-using the vector reduction def from the main loop in vectorized epilogue loops on architectures which use different vector sizes for the epilogue. That's only x86 as far as I am aware. 2021-07-13 Richard Biener * tree-vect-loop.c (vect_find_reusable_accumulator): Handle vector types where the old vector type has a multiple of the new vector type elements. (vect_create_partial_epilog): New function, split out from... (vect_create_epilog_for_reduction): ... here. (vect_transform_cycle_phi): Reduce the re-used accumulator to the new vector type. * gcc.target/i386/vect-reduc-1.c: New testcase. --- gcc/testsuite/gcc.target/i386/vect-reduc-1.c | 17 ++ gcc/tree-vect-loop.c | 227 ++++++++++++++++----------- 2 files changed, 156 insertions(+), 88 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/vect-reduc-1.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/i386/vect-reduc-1.c b/gcc/testsuite/gcc.target/i386/vect-reduc-1.c new file mode 100644 index 0000000..9ee9ba4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-reduc-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx2 -mno-avx512f -fdump-tree-vect-details" } */ + +#define N 32 +int foo (int *a, int n) +{ + int sum = 1; + for (int i = 0; i < 8*N + 4; ++i) + sum += a[i]; + return sum; +} + +/* The reduction epilog should be vectorized and the accumulator + re-used. */ +/* { dg-final { scan-tree-dump "LOOP EPILOGUE VECTORIZED" "vect" } } */ +/* { dg-final { scan-assembler-times "psrl" 2 } } */ +/* { dg-final { scan-assembler-times "padd" 5 } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 8c27d75..e978015 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -4896,12 +4896,11 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, accumulator->reduc_info->reduc_scalar_results.begin ())) return false; - /* For now, only handle the case in which both loops are operating on the - same vector types. In future we could reduce wider vectors to narrower - ones as well. */ + /* Handle the case where we can reduce wider vectors to narrower ones. */ tree vectype = STMT_VINFO_VECTYPE (reduc_info); tree old_vectype = TREE_TYPE (accumulator->reduc_input); - if (!useless_type_conversion_p (old_vectype, vectype)) + if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype), + TYPE_VECTOR_SUBPARTS (vectype))) return false; /* Non-SLP reductions might apply an adjustment after the reduction @@ -4935,6 +4934,101 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, return true; } +/* Reduce the vector VEC_DEF down to VECTYPE with reduction operation + CODE emitting stmts before GSI. Returns a vector def of VECTYPE. */ + +static tree +vect_create_partial_epilog (tree vec_def, tree vectype, enum tree_code code, + gimple_seq *seq) +{ + unsigned nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec_def)).to_constant (); + unsigned nunits1 = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); + tree stype = TREE_TYPE (vectype); + tree new_temp = vec_def; + while (nunits > nunits1) + { + nunits /= 2; + tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype), + stype, nunits); + unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1)); + + /* The target has to make sure we support lowpart/highpart + extraction, either via direct vector extract or through + an integer mode punning. */ + tree dst1, dst2; + gimple *epilog_stmt; + if (convert_optab_handler (vec_extract_optab, + TYPE_MODE (TREE_TYPE (new_temp)), + TYPE_MODE (vectype1)) + != CODE_FOR_nothing) + { + /* Extract sub-vectors directly once vec_extract becomes + a conversion optab. */ + dst1 = make_ssa_name (vectype1); + epilog_stmt + = gimple_build_assign (dst1, BIT_FIELD_REF, + build3 (BIT_FIELD_REF, vectype1, + new_temp, TYPE_SIZE (vectype1), + bitsize_int (0))); + gimple_seq_add_stmt_without_update (seq, epilog_stmt); + dst2 = make_ssa_name (vectype1); + epilog_stmt + = gimple_build_assign (dst2, BIT_FIELD_REF, + build3 (BIT_FIELD_REF, vectype1, + new_temp, TYPE_SIZE (vectype1), + bitsize_int (bitsize))); + gimple_seq_add_stmt_without_update (seq, epilog_stmt); + } + else + { + /* Extract via punning to appropriately sized integer mode + vector. */ + tree eltype = build_nonstandard_integer_type (bitsize, 1); + tree etype = build_vector_type (eltype, 2); + gcc_assert (convert_optab_handler (vec_extract_optab, + TYPE_MODE (etype), + TYPE_MODE (eltype)) + != CODE_FOR_nothing); + tree tem = make_ssa_name (etype); + epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR, + build1 (VIEW_CONVERT_EXPR, + etype, new_temp)); + gimple_seq_add_stmt_without_update (seq, epilog_stmt); + new_temp = tem; + tem = make_ssa_name (eltype); + epilog_stmt + = gimple_build_assign (tem, BIT_FIELD_REF, + build3 (BIT_FIELD_REF, eltype, + new_temp, TYPE_SIZE (eltype), + bitsize_int (0))); + gimple_seq_add_stmt_without_update (seq, epilog_stmt); + dst1 = make_ssa_name (vectype1); + epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR, + build1 (VIEW_CONVERT_EXPR, + vectype1, tem)); + gimple_seq_add_stmt_without_update (seq, epilog_stmt); + tem = make_ssa_name (eltype); + epilog_stmt + = gimple_build_assign (tem, BIT_FIELD_REF, + build3 (BIT_FIELD_REF, eltype, + new_temp, TYPE_SIZE (eltype), + bitsize_int (bitsize))); + gimple_seq_add_stmt_without_update (seq, epilog_stmt); + dst2 = make_ssa_name (vectype1); + epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR, + build1 (VIEW_CONVERT_EXPR, + vectype1, tem)); + gimple_seq_add_stmt_without_update (seq, epilog_stmt); + } + + new_temp = make_ssa_name (vectype1); + epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2); + gimple_seq_add_stmt_without_update (seq, epilog_stmt); + } + + return new_temp; +} + /* Function vect_create_epilog_for_reduction Create code at the loop-epilog to finalize the result of a reduction @@ -5684,87 +5778,11 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* First reduce the vector to the desired vector size we should do shift reduction on by combining upper and lower halves. */ - new_temp = reduc_inputs[0]; - while (nunits > nunits1) - { - nunits /= 2; - vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype), - stype, nunits); - unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1)); - - /* The target has to make sure we support lowpart/highpart - extraction, either via direct vector extract or through - an integer mode punning. */ - tree dst1, dst2; - if (convert_optab_handler (vec_extract_optab, - TYPE_MODE (TREE_TYPE (new_temp)), - TYPE_MODE (vectype1)) - != CODE_FOR_nothing) - { - /* Extract sub-vectors directly once vec_extract becomes - a conversion optab. */ - dst1 = make_ssa_name (vectype1); - epilog_stmt - = gimple_build_assign (dst1, BIT_FIELD_REF, - build3 (BIT_FIELD_REF, vectype1, - new_temp, TYPE_SIZE (vectype1), - bitsize_int (0))); - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - dst2 = make_ssa_name (vectype1); - epilog_stmt - = gimple_build_assign (dst2, BIT_FIELD_REF, - build3 (BIT_FIELD_REF, vectype1, - new_temp, TYPE_SIZE (vectype1), - bitsize_int (bitsize))); - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - } - else - { - /* Extract via punning to appropriately sized integer mode - vector. */ - tree eltype = build_nonstandard_integer_type (bitsize, 1); - tree etype = build_vector_type (eltype, 2); - gcc_assert (convert_optab_handler (vec_extract_optab, - TYPE_MODE (etype), - TYPE_MODE (eltype)) - != CODE_FOR_nothing); - tree tem = make_ssa_name (etype); - epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR, - build1 (VIEW_CONVERT_EXPR, - etype, new_temp)); - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - new_temp = tem; - tem = make_ssa_name (eltype); - epilog_stmt - = gimple_build_assign (tem, BIT_FIELD_REF, - build3 (BIT_FIELD_REF, eltype, - new_temp, TYPE_SIZE (eltype), - bitsize_int (0))); - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - dst1 = make_ssa_name (vectype1); - epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR, - build1 (VIEW_CONVERT_EXPR, - vectype1, tem)); - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - tem = make_ssa_name (eltype); - epilog_stmt - = gimple_build_assign (tem, BIT_FIELD_REF, - build3 (BIT_FIELD_REF, eltype, - new_temp, TYPE_SIZE (eltype), - bitsize_int (bitsize))); - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - dst2 = make_ssa_name (vectype1); - epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR, - build1 (VIEW_CONVERT_EXPR, - vectype1, tem)); - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - } - - new_temp = make_ssa_name (vectype1); - epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2); - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - reduc_inputs[0] = new_temp; - } + gimple_seq stmts = NULL; + new_temp = vect_create_partial_epilog (reduc_inputs[0], vectype1, + code, &stmts); + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); + reduc_inputs[0] = new_temp; if (reduce_with_shift && !slp_reduc) { @@ -7681,13 +7699,46 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, if (auto *accumulator = reduc_info->reused_accumulator) { + tree def = accumulator->reduc_input; + unsigned int nreduc; + bool res = constant_multiple_p (TYPE_VECTOR_SUBPARTS (TREE_TYPE (def)), + TYPE_VECTOR_SUBPARTS (vectype_out), + &nreduc); + gcc_assert (res); + if (nreduc != 1) + { + /* Reduce the single vector to a smaller one. */ + gimple_seq stmts = NULL; + def = vect_create_partial_epilog (def, vectype_out, + STMT_VINFO_REDUC_CODE (reduc_info), + &stmts); + /* Adjust the input so we pick up the partially reduced value + for the skip edge in vect_create_epilog_for_reduction. */ + accumulator->reduc_input = def; + if (loop_vinfo->main_loop_edge) + { + /* While we'd like to insert on the edge this will split + blocks and disturb bookkeeping, we also will eventually + need this on the skip edge. Rely on sinking to + fixup optimal placement and insert in the pred. */ + gimple_stmt_iterator gsi + = gsi_last_bb (loop_vinfo->main_loop_edge->src); + /* Insert before a cond that eventually skips the + epilogue. */ + if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi))) + gsi_prev (&gsi); + gsi_insert_seq_after (&gsi, stmts, GSI_CONTINUE_LINKING); + } + else + gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), + stmts); + } if (loop_vinfo->main_loop_edge) vec_initial_defs[0] - = vect_get_main_loop_result (loop_vinfo, accumulator->reduc_input, + = vect_get_main_loop_result (loop_vinfo, def, vec_initial_defs[0]); else - vec_initial_defs.safe_push (accumulator->reduc_input); - gcc_assert (vec_initial_defs.length () == 1); + vec_initial_defs.safe_push (def); } /* Generate the reduction PHIs upfront. */ -- cgit v1.1 From 3be762c2ed79e36b9c8faaea2be04725c967a34e Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 14 Jul 2021 10:22:50 +0200 Subject: godump: Fix -fdump-go-spec= reproduceability issue [PR101407] pot_dummy_types is a hash_set from whose traversal the code prints some type lines. hash_set normally uses default_hash_traits which for pointer types (the hash set hashes const char *) uses pointer_hash which hashes the addresses of the pointers except of the least significant 3 bits. With address space randomization, that results in non-determinism in the -fdump-go-specs= generated file, each invocation can have different order of the lines emitted from pot_dummy_types traversal. This patch fixes it by hashing the string contents instead to make the hashes reproduceable. 2021-07-14 Jakub Jelinek PR go/101407 * godump.c (godump_str_hash): New type. (godump_container::pot_dummy_types): Use string_hash instead of ptr_hash in the hash_set. --- gcc/godump.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/godump.c b/gcc/godump.c index cf99894..a50aef1 100644 --- a/gcc/godump.c +++ b/gcc/godump.c @@ -56,6 +56,8 @@ static FILE *go_dump_file; static GTY(()) vec *queue; +struct godump_str_hash : string_hash, ggc_remove {}; + /* A hash table of macros we have seen. */ static htab_t macro_hash; @@ -535,7 +537,7 @@ public: /* Types which may potentially have to be defined as dummy types. */ - hash_set pot_dummy_types; + hash_set pot_dummy_types; /* Go keywords. */ htab_t keyword_hash; -- cgit v1.1 From a967a3efd39280fe3f5774e45490e991f8e99059 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 14 Jul 2021 11:06:58 +0200 Subject: tree-optimization/101445 - fix negative stride SLP vect with gaps The following fixes the IV adjustment for the gap in a negative stride SLP vectorization. The adjustment was in the wrong direction, now fixes as in the patch. 2021-07-14 Richard Biener PR tree-optimization/101445 * tree-vect-stmts.c (vectorizable_load): Do the gap adjustment of the IV in the correct direction for negative stride accesses. * gcc.dg/vect/pr101445.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr101445.c | 28 ++++++++++++++++++++++++++++ gcc/tree-vect-stmts.c | 6 ++++++ 2 files changed, 34 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/vect/pr101445.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/pr101445.c b/gcc/testsuite/gcc.dg/vect/pr101445.c new file mode 100644 index 0000000..f8a6e9c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101445.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include "tree-vect.h" + +int a[35] = { 1, 1, 3 }; + +void __attribute__((noipa)) +foo () +{ + for (int b = 4; b >= 0; b--) + { + int tem = a[b * 5 + 3 + 1]; + a[b * 5 + 3] = tem; + a[b * 5 + 2] = tem; + a[b * 5 + 1] = tem; + a[b * 5 + 0] = tem; + } +} + +int main() +{ + check_vect (); + foo (); + for (int d = 0; d < 25; d++) + if (a[d] != 0) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index e590f34..3980f09 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -9759,6 +9759,9 @@ vectorizable_load (vec_info *vinfo, poly_wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj); + if (tree_int_cst_sgn + (vect_dr_behavior (vinfo, dr_info)->step) == -1) + bump_val = -bump_val; tree bump = wide_int_to_tree (sizetype, bump_val); dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi, stmt_info, bump); @@ -9772,6 +9775,9 @@ vectorizable_load (vec_info *vinfo, poly_wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj); + if (tree_int_cst_sgn + (vect_dr_behavior (vinfo, dr_info)->step) == -1) + bump_val = -bump_val; tree bump = wide_int_to_tree (sizetype, bump_val); dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi, stmt_info, bump); -- cgit v1.1 From cc11b924bfe7752edbba052ca71653f46a60887a Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 9 Jul 2021 09:16:01 -0700 Subject: x86: Don't enable UINTR in 32-bit mode UINTR is available only in 64-bit mode. Since the codegen target is unknown when the the gcc driver is processing -march=native, to properly handle UINTR for -march=native: 1. Pass "arch [32|64]" and "tune [32|64]" to host_detect_local_cpu to indicate 32-bit and 64-bit codegen. 2. Change ix86_option_override_internal to enable UINTR only in 64-bit mode for -march=CPU when PTA_CPU includes PTA_UINTR. gcc/ PR target/101395 * config/i386/driver-i386.c (host_detect_local_cpu): Check "arch [32|64]" and "tune [32|64]" for 32-bit and 64-bit codegen. Enable UINTR only for 64-bit codegen. * config/i386/i386-options.c (ix86_option_override_internal::DEF_PTA): Skip PTA_UINTR if not in 64-bit mode. * config/i386/i386.h (ARCH_ARG): New. (CC1_CPU_SPEC): Pass "[arch|tune] 32" for 32-bit codegen and "[arch|tune] 64" for 64-bit codegen. gcc/testsuite/ PR target/101395 * gcc.target/i386/pr101395-1.c: New test. * gcc.target/i386/pr101395-2.c: Likewise. * gcc.target/i386/pr101395-3.c: Likewise. --- gcc/config/i386/driver-i386.c | 25 +++++++++++++++++++------ gcc/config/i386/i386-options.c | 1 + gcc/config/i386/i386.h | 7 ++++--- gcc/testsuite/gcc.target/i386/pr101395-1.c | 12 ++++++++++++ gcc/testsuite/gcc.target/i386/pr101395-2.c | 22 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr101395-3.c | 6 ++++++ 6 files changed, 64 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr101395-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr101395-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr101395-3.c (limited to 'gcc') diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index dd92366..f844a16 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -370,9 +370,9 @@ detect_caches_intel (bool xeon_mp, unsigned max_level, } /* This will be called by the spec parser in gcc.c when it sees - a %:local_cpu_detect(args) construct. Currently it will be called - with either "arch" or "tune" as argument depending on if -march=native - or -mtune=native is to be substituted. + a %:local_cpu_detect(args) construct. Currently it will be + called with either "arch [32|64]" or "tune [32|64]" as argument + depending on if -march=native or -mtune=native is to be substituted. It returns a string containing new command line parameters to be put at the place of the above two options, depending on what CPU @@ -401,7 +401,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int l2sizekb = 0; - if (argc < 1) + if (argc < 2) return NULL; arch = !strcmp (argv[0], "arch"); @@ -409,6 +409,15 @@ const char *host_detect_local_cpu (int argc, const char **argv) if (!arch && strcmp (argv[0], "tune")) return NULL; + bool codegen_x86_64; + + if (!strcmp (argv[1], "32")) + codegen_x86_64 = false; + else if (!strcmp (argv[1], "64")) + codegen_x86_64 = true; + else + return NULL; + struct __processor_model cpu_model = { }; struct __processor_model2 cpu_model2 = { }; unsigned int cpu_features2[SIZE_OF_CPU_FEATURES] = { }; @@ -804,8 +813,12 @@ const char *host_detect_local_cpu (int argc, const char **argv) if (isa_names_table[i].option) { if (has_feature (isa_names_table[i].feature)) - options = concat (options, " ", - isa_names_table[i].option, NULL); + { + if (codegen_x86_64 + || isa_names_table[i].feature != FEATURE_UINTR) + options = concat (options, " ", + isa_names_table[i].option, NULL); + } else options = concat (options, neg_option, isa_names_table[i].option + 2, NULL); diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index 7a35c46..7cba655 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -2109,6 +2109,7 @@ ix86_option_override_internal (bool main_args_p, #define DEF_PTA(NAME) \ if (((processor_alias_table[i].flags & PTA_ ## NAME) != 0) \ && PTA_ ## NAME != PTA_64BIT \ + && (TARGET_64BIT || PTA_ ## NAME != PTA_UINTR) \ && !TARGET_EXPLICIT_ ## NAME ## _P (opts)) \ SET_TARGET_ ## NAME (opts); #include "i386-isa.def" diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 8c3eace..324e8a9 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -576,10 +576,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #ifndef HAVE_LOCAL_CPU_DETECT #define CC1_CPU_SPEC CC1_CPU_SPEC_1 #else +#define ARCH_ARG "%{" OPT_ARCH64 ":64;:32}" #define CC1_CPU_SPEC CC1_CPU_SPEC_1 \ -"%{march=native:%>march=native %:local_cpu_detect(arch) \ - %{!mtune=*:%>mtune=native %:local_cpu_detect(tune)}} \ -%{mtune=native:%>mtune=native %:local_cpu_detect(tune)}" +"%{march=native:%>march=native %:local_cpu_detect(arch " ARCH_ARG ") \ + %{!mtune=*:%>mtune=native %:local_cpu_detect(tune " ARCH_ARG ")}} \ +%{mtune=native:%>mtune=native %:local_cpu_detect(tune " ARCH_ARG ")}" #endif #endif diff --git a/gcc/testsuite/gcc.target/i386/pr101395-1.c b/gcc/testsuite/gcc.target/i386/pr101395-1.c new file mode 100644 index 0000000..74c8bfe --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101395-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=sapphirerapids" } */ + +#ifdef __x86_64__ +# ifndef __UINTR__ +# error UINTR is not enabled for Sapphirerapids +# endif +#else +# ifdef __UINTR__ +# error UINTR is not usable in 32-bit mode +# endif +#endif diff --git a/gcc/testsuite/gcc.target/i386/pr101395-2.c b/gcc/testsuite/gcc.target/i386/pr101395-2.c new file mode 100644 index 0000000..f2b677f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101395-2.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=native" } */ + +int +main () +{ + if (__builtin_cpu_supports ("uintr")) + { +#ifdef __x86_64__ +# ifndef __UINTR__ + __builtin_abort (); +# endif +#else +# ifdef __UINTR__ + __builtin_abort (); +# endif +#endif + return 0; + } + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr101395-3.c b/gcc/testsuite/gcc.target/i386/pr101395-3.c new file mode 100644 index 0000000..bc6ab42 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101395-3.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=native -mno-uintr" } */ + +#ifdef __UINTR__ +# error UINTR should be disabled +#endif -- cgit v1.1 From ab0a6b213abf6843b59cdea6399030e828109551 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Wed, 14 Jul 2021 14:54:26 +0100 Subject: Vect: Add support for dot-product where the sign for the multiplicant changes. This patch adds support for a dot product where the sign of the multiplication arguments differ. i.e. one is signed and one is unsigned but the precisions are the same. #define N 480 #define SIGNEDNESS_1 unsigned #define SIGNEDNESS_2 signed #define SIGNEDNESS_3 signed #define SIGNEDNESS_4 unsigned SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, SIGNEDNESS_4 char *restrict b) { for (__INTPTR_TYPE__ i = 0; i < N; ++i) { int av = a[i]; int bv = b[i]; SIGNEDNESS_2 short mult = av * bv; res += mult; } return res; } The operations are performed as if the operands were extended to a 32-bit value. As such this operation isn't valid if there is an intermediate conversion to an unsigned value. i.e. if SIGNEDNESS_2 is unsigned. more over if the signs of SIGNEDNESS_3 and SIGNEDNESS_4 are flipped the same optab is used but the operands are flipped in the optab expansion. To support this the patch extends the dot-product detection to optionally ignore operands with different signs and stores this information in the optab subtype which is now made a bitfield. The subtype can now additionally controls which optab an EXPR can expand to. gcc/ChangeLog: * optabs.def (usdot_prod_optab): New. * doc/md.texi: Document it and clarify other dot prod optabs. * optabs-tree.h (enum optab_subtype): Add optab_vector_mixed_sign. * optabs-tree.c (optab_for_tree_code): Support usdot_prod_optab. * optabs.c (expand_widen_pattern_expr): Likewise. * tree-cfg.c (verify_gimple_assign_ternary): Likewise. * tree-vect-loop.c (vectorizable_reduction): Query dot-product kind. * tree-vect-patterns.c (vect_supportable_direct_optab_p): Take optional optab subtype. (vect_widened_op_tree): Optionally ignore mismatch types. (vect_recog_dot_prod_pattern): Support usdot_prod_optab. --- gcc/doc/md.texi | 52 +++++++++++++++++++++++++---- gcc/optabs-tree.c | 7 +++- gcc/optabs-tree.h | 3 +- gcc/optabs.c | 32 +++++++++++++++--- gcc/optabs.def | 1 + gcc/tree-cfg.c | 3 +- gcc/tree-vect-loop.c | 8 ++++- gcc/tree-vect-patterns.c | 86 ++++++++++++++++++++++++++++++++++++------------ 8 files changed, 156 insertions(+), 36 deletions(-) (limited to 'gcc') diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 8225a76..07681e2 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5449,13 +5449,53 @@ Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand @cindex @code{sdot_prod@var{m}} instruction pattern @item @samp{sdot_prod@var{m}} + +Compute the sum of the products of two signed elements. +Operand 1 and operand 2 are of the same mode. Their +product, which is of a wider mode, is computed and added to operand 3. +Operand 3 is of a mode equal or wider than the mode of the product. The +result is placed in operand 0, which is of the same mode as operand 3. + +Semantically the expressions perform the multiplication in the following signs + +@smallexample +sdot == + op0 = sign-ext (op1) * sign-ext (op2) + op3 +@dots{} +@end smallexample + @cindex @code{udot_prod@var{m}} instruction pattern -@itemx @samp{udot_prod@var{m}} -Compute the sum of the products of two signed/unsigned elements. -Operand 1 and operand 2 are of the same mode. Their product, which is of a -wider mode, is computed and added to operand 3. Operand 3 is of a mode equal or -wider than the mode of the product. The result is placed in operand 0, which -is of the same mode as operand 3. +@item @samp{udot_prod@var{m}} + +Compute the sum of the products of two unsigned elements. +Operand 1 and operand 2 are of the same mode. Their +product, which is of a wider mode, is computed and added to operand 3. +Operand 3 is of a mode equal or wider than the mode of the product. The +result is placed in operand 0, which is of the same mode as operand 3. + +Semantically the expressions perform the multiplication in the following signs + +@smallexample +udot == + op0 = zero-ext (op1) * zero-ext (op2) + op3 +@dots{} +@end smallexample + +@cindex @code{usdot_prod@var{m}} instruction pattern +@item @samp{usdot_prod@var{m}} +Compute the sum of the products of elements of different signs. +Operand 1 must be unsigned and operand 2 signed. Their +product, which is of a wider mode, is computed and added to operand 3. +Operand 3 is of a mode equal or wider than the mode of the product. The +result is placed in operand 0, which is of the same mode as operand 3. + +Semantically the expressions perform the multiplication in the following signs + +@smallexample +usdot == + op0 = ((signed-conv) zero-ext (op1)) * sign-ext (op2) + op3 +@dots{} +@end smallexample @cindex @code{ssad@var{m}} instruction pattern @item @samp{ssad@var{m}} diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c index 95ffe39..eeb5aee 100644 --- a/gcc/optabs-tree.c +++ b/gcc/optabs-tree.c @@ -127,7 +127,12 @@ optab_for_tree_code (enum tree_code code, const_tree type, return TYPE_UNSIGNED (type) ? usum_widen_optab : ssum_widen_optab; case DOT_PROD_EXPR: - return TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab; + { + if (subtype == optab_vector_mixed_sign) + return usdot_prod_optab; + + return (TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab); + } case SAD_EXPR: return TYPE_UNSIGNED (type) ? usad_optab : ssad_optab; diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h index c3aaa1a..fbd2b06 100644 --- a/gcc/optabs-tree.h +++ b/gcc/optabs-tree.h @@ -29,7 +29,8 @@ enum optab_subtype { optab_default, optab_scalar, - optab_vector + optab_vector, + optab_vector_mixed_sign }; /* Return the optab used for computing the given operation on the type given by diff --git a/gcc/optabs.c b/gcc/optabs.c index 62a6bdb..14d8ad2 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -262,6 +262,11 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op, bool sbool = false; oprnd0 = ops->op0; + if (nops >= 2) + oprnd1 = ops->op1; + if (nops >= 3) + oprnd2 = ops->op2; + tmode0 = TYPE_MODE (TREE_TYPE (oprnd0)); if (ops->code == VEC_UNPACK_FIX_TRUNC_HI_EXPR || ops->code == VEC_UNPACK_FIX_TRUNC_LO_EXPR) @@ -285,6 +290,27 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op, ? vec_unpacks_sbool_hi_optab : vec_unpacks_sbool_lo_optab); sbool = true; } + else if (ops->code == DOT_PROD_EXPR) + { + enum optab_subtype subtype = optab_default; + signop sign1 = TYPE_SIGN (TREE_TYPE (oprnd0)); + signop sign2 = TYPE_SIGN (TREE_TYPE (oprnd1)); + if (sign1 == sign2) + ; + else if (sign1 == SIGNED && sign2 == UNSIGNED) + { + subtype = optab_vector_mixed_sign; + /* Same as optab_vector_mixed_sign but flip the operands. */ + std::swap (op0, op1); + } + else if (sign1 == UNSIGNED && sign2 == SIGNED) + subtype = optab_vector_mixed_sign; + else + gcc_unreachable (); + + widen_pattern_optab + = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), subtype); + } else widen_pattern_optab = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default); @@ -298,10 +324,7 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op, gcc_assert (icode != CODE_FOR_nothing); if (nops >= 2) - { - oprnd1 = ops->op1; - tmode1 = TYPE_MODE (TREE_TYPE (oprnd1)); - } + tmode1 = TYPE_MODE (TREE_TYPE (oprnd1)); else if (sbool) { nops = 2; @@ -316,7 +339,6 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op, { gcc_assert (tmode1 == tmode0); gcc_assert (op1); - oprnd2 = ops->op2; wmode = TYPE_MODE (TREE_TYPE (oprnd2)); } diff --git a/gcc/optabs.def b/gcc/optabs.def index 51acc1b..201b8aa 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -352,6 +352,7 @@ OPTAB_D (uavg_ceil_optab, "uavg$a3_ceil") OPTAB_D (sdot_prod_optab, "sdot_prod$I$a") OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3") OPTAB_D (udot_prod_optab, "udot_prod$I$a") +OPTAB_D (usdot_prod_optab, "usdot_prod$I$a") OPTAB_D (usum_widen_optab, "widen_usum$I$a3") OPTAB_D (usad_optab, "usad$I$a") OPTAB_D (ssad_optab, "ssad$I$a") diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 1f0f4a2..2820847 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -4438,7 +4438,8 @@ verify_gimple_assign_ternary (gassign *stmt) && !SCALAR_FLOAT_TYPE_P (rhs1_type)) || (!INTEGRAL_TYPE_P (lhs_type) && !SCALAR_FLOAT_TYPE_P (lhs_type)))) - || !types_compatible_p (rhs1_type, rhs2_type) + /* rhs1_type and rhs2_type may differ in sign. */ + || !tree_nop_conversion_p (rhs1_type, rhs2_type) || !useless_type_conversion_p (lhs_type, rhs3_type) || maybe_lt (GET_MODE_SIZE (element_mode (rhs3_type)), 2 * GET_MODE_SIZE (element_mode (rhs1_type)))) diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index e978015..fc3dab0 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -6661,6 +6661,12 @@ vectorizable_reduction (loop_vec_info loop_vinfo, bool lane_reduc_code_p = (code == DOT_PROD_EXPR || code == WIDEN_SUM_EXPR || code == SAD_EXPR); int op_type = TREE_CODE_LENGTH (code); + enum optab_subtype optab_query_kind = optab_vector; + if (code == DOT_PROD_EXPR + && TYPE_SIGN (TREE_TYPE (gimple_assign_rhs1 (stmt))) + != TYPE_SIGN (TREE_TYPE (gimple_assign_rhs2 (stmt)))) + optab_query_kind = optab_vector_mixed_sign; + scalar_dest = gimple_assign_lhs (stmt); scalar_type = TREE_TYPE (scalar_dest); @@ -7189,7 +7195,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, bool ok = true; /* 4.1. check support for the operation in the loop */ - optab optab = optab_for_tree_code (code, vectype_in, optab_vector); + optab optab = optab_for_tree_code (code, vectype_in, optab_query_kind); if (!optab) { if (dump_enabled_p ()) diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index b2e7fc2..71533e6 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -191,9 +191,9 @@ vect_get_external_def_edge (vec_info *vinfo, tree var) } /* Return true if the target supports a vector version of CODE, - where CODE is known to map to a direct optab. ITYPE specifies - the type of (some of) the scalar inputs and OTYPE specifies the - type of the scalar result. + where CODE is known to map to a direct optab with the given SUBTYPE. + ITYPE specifies the type of (some of) the scalar inputs and OTYPE + specifies the type of the scalar result. If CODE allows the inputs and outputs to have different type (such as for WIDEN_SUM_EXPR), it is the input mode rather @@ -208,7 +208,8 @@ vect_get_external_def_edge (vec_info *vinfo, tree var) static bool vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code, tree itype, tree *vecotype_out, - tree *vecitype_out = NULL) + tree *vecitype_out = NULL, + enum optab_subtype subtype = optab_default) { tree vecitype = get_vectype_for_scalar_type (vinfo, itype); if (!vecitype) @@ -218,7 +219,7 @@ vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code, if (!vecotype) return false; - optab optab = optab_for_tree_code (code, vecitype, optab_default); + optab optab = optab_for_tree_code (code, vecitype, subtype); if (!optab) return false; @@ -521,6 +522,7 @@ vect_joust_widened_type (tree type, tree new_type, tree *common_type) unsigned int precision = MAX (TYPE_PRECISION (*common_type), TYPE_PRECISION (new_type)); precision *= 2; + if (precision * 2 > TYPE_PRECISION (type)) return false; @@ -539,6 +541,10 @@ vect_joust_widened_type (tree type, tree new_type, tree *common_type) to a type that (a) is narrower than the result of STMT_INFO and (b) can hold all leaf operand values. + If SUBTYPE then allow that the signs of the operands + may differ in signs but not in precision. SUBTYPE is updated to reflect + this. + Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE exists. */ @@ -546,7 +552,8 @@ static unsigned int vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code, tree_code widened_code, bool shift_p, unsigned int max_nops, - vect_unpromoted_value *unprom, tree *common_type) + vect_unpromoted_value *unprom, tree *common_type, + enum optab_subtype *subtype = NULL) { /* Check for an integer operation with the right code. */ gassign *assign = dyn_cast (stmt_info->stmt); @@ -607,7 +614,8 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code, = vinfo->lookup_def (this_unprom->op); nops = vect_widened_op_tree (vinfo, def_stmt_info, code, widened_code, shift_p, max_nops, - this_unprom, common_type); + this_unprom, common_type, + subtype); if (nops == 0) return 0; @@ -625,7 +633,18 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code, *common_type = this_unprom->type; else if (!vect_joust_widened_type (type, this_unprom->type, common_type)) - return 0; + { + if (subtype) + { + /* See if we can sign extend the smaller type. */ + if (TYPE_PRECISION (this_unprom->type) + > TYPE_PRECISION (*common_type)) + *common_type = this_unprom->type; + *subtype = optab_vector_mixed_sign; + } + else + return 0; + } } } next_op += nops; @@ -725,12 +744,22 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs, /* Convert UNPROM to TYPE and return the result, adding new statements to STMT_INFO's pattern definition statements if no better way is - available. VECTYPE is the vector form of TYPE. */ + available. VECTYPE is the vector form of TYPE. + + If SUBTYPE then convert the type based on the subtype. */ static tree vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type, - vect_unpromoted_value *unprom, tree vectype) + vect_unpromoted_value *unprom, tree vectype, + enum optab_subtype subtype = optab_default) { + + /* Update the type if the signs differ. */ + if (subtype == optab_vector_mixed_sign + && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op))) + type = build_nonstandard_integer_type (TYPE_PRECISION (type), + TYPE_SIGN (unprom->type)); + /* Check for a no-op conversion. */ if (types_compatible_p (type, TREE_TYPE (unprom->op))) return unprom->op; @@ -806,12 +835,14 @@ vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type, } /* Invoke vect_convert_input for N elements of UNPROM and store the - result in the corresponding elements of RESULT. */ + result in the corresponding elements of RESULT. + + If SUBTYPE then convert the type based on the subtype. */ static void vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n, tree *result, tree type, vect_unpromoted_value *unprom, - tree vectype) + tree vectype, enum optab_subtype subtype = optab_default) { for (unsigned int i = 0; i < n; ++i) { @@ -819,11 +850,12 @@ vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n, for (j = 0; j < i; ++j) if (unprom[j].op == unprom[i].op) break; + if (j < i) result[i] = result[j]; else result[i] = vect_convert_input (vinfo, stmt_info, - type, &unprom[i], vectype); + type, &unprom[i], vectype, subtype); } } @@ -895,7 +927,8 @@ vect_reassociating_reduction_p (vec_info *vinfo, Try to find the following pattern: - type x_t, y_t; + type1a x_t + type1b y_t; TYPE1 prod; TYPE2 sum = init; loop: @@ -908,9 +941,9 @@ vect_reassociating_reduction_p (vec_info *vinfo, [S6 prod = (TYPE2) prod; #optional] S7 sum_1 = prod + sum_0; - where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the - same size of 'TYPE1' or bigger. This is a special case of a reduction - computation. + where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b', + the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of + 'type1a' and 'type1b' can differ. Input: @@ -953,7 +986,8 @@ vect_recog_dot_prod_pattern (vec_info *vinfo, In which - DX is double the size of X - DY is double the size of Y - - DX, DY, DPROD all have the same type + - DX, DY, DPROD all have the same type but the sign + between X, Y and DPROD can differ. - sum is the same size of DPROD or bigger - sum has been recognized as a reduction variable. @@ -991,8 +1025,18 @@ vect_recog_dot_prod_pattern (vec_info *vinfo, /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi inside the loop (in case we are analyzing an outer-loop). */ vect_unpromoted_value unprom0[2]; + enum optab_subtype subtype = optab_vector; if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR, - false, 2, unprom0, &half_type)) + false, 2, unprom0, &half_type, &subtype)) + return NULL; + + /* If there are two widening operations, make sure they agree on the sign + of the extension. The result of an optab_vector_mixed_sign operation + is signed; otherwise, the result has the same sign as the operands. */ + if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type) + && (subtype == optab_vector_mixed_sign + ? TYPE_UNSIGNED (unprom_mult.type) + : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))) return NULL; /* If there are two widening operations, make sure they agree on @@ -1005,13 +1049,13 @@ vect_recog_dot_prod_pattern (vec_info *vinfo, tree half_vectype; if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type, - type_out, &half_vectype)) + type_out, &half_vectype, subtype)) return NULL; /* Get the inputs in the appropriate types. */ tree mult_oprnd[2]; vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type, - unprom0, half_vectype); + unprom0, half_vectype, subtype); var = vect_recog_temp_ssa_var (type, NULL); pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR, -- cgit v1.1 From 752045ed1eea0eddc48923df78999dab7f2827ba Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Wed, 14 Jul 2021 15:19:32 +0100 Subject: AArch64: Add support for sign differing dot-product usdot for NEON and SVE. Hi All, This adds optabs implementing usdot_prod. The following testcase: #define N 480 #define SIGNEDNESS_1 unsigned #define SIGNEDNESS_2 signed #define SIGNEDNESS_3 signed #define SIGNEDNESS_4 unsigned SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, SIGNEDNESS_4 char *restrict b) { for (__INTPTR_TYPE__ i = 0; i < N; ++i) { int av = a[i]; int bv = b[i]; SIGNEDNESS_2 short mult = av * bv; res += mult; } return res; } Generates for NEON f: movi v0.4s, 0 mov x3, 0 .p2align 3,,7 .L2: ldr q1, [x2, x3] ldr q2, [x1, x3] usdot v0.4s, v1.16b, v2.16b add x3, x3, 16 cmp x3, 480 bne .L2 addv s0, v0.4s fmov w1, s0 add w0, w0, w1 ret and for SVE f: mov x3, 0 cntb x5 mov w4, 480 mov z1.b, #0 whilelo p0.b, wzr, w4 mov z3.b, #0 ptrue p1.b, all .p2align 3,,7 .L2: ld1b z2.b, p0/z, [x1, x3] ld1b z0.b, p0/z, [x2, x3] add x3, x3, x5 sel z0.b, p0, z0.b, z3.b whilelo p0.b, w3, w4 usdot z1.s, z0.b, z2.b b.any .L2 uaddv d0, p1, z1.s fmov x1, d0 add w0, w0, w1 ret instead of f: movi v0.4s, 0 mov x3, 0 .p2align 3,,7 .L2: ldr q2, [x1, x3] ldr q1, [x2, x3] add x3, x3, 16 sxtl v4.8h, v2.8b sxtl2 v3.8h, v2.16b uxtl v2.8h, v1.8b uxtl2 v1.8h, v1.16b mul v2.8h, v2.8h, v4.8h mul v1.8h, v1.8h, v3.8h saddw v0.4s, v0.4s, v2.4h saddw2 v0.4s, v0.4s, v2.8h saddw v0.4s, v0.4s, v1.4h saddw2 v0.4s, v0.4s, v1.8h cmp x3, 480 bne .L2 addv s0, v0.4s fmov w1, s0 add w0, w0, w1 ret and f: mov x3, 0 cnth x5 mov w4, 480 mov z1.b, #0 whilelo p0.h, wzr, w4 ptrue p2.b, all .p2align 3,,7 .L2: ld1sb z2.h, p0/z, [x1, x3] punpklo p1.h, p0.b ld1b z0.h, p0/z, [x2, x3] add x3, x3, x5 mul z0.h, p2/m, z0.h, z2.h sunpklo z2.s, z0.h sunpkhi z0.s, z0.h add z1.s, p1/m, z1.s, z2.s punpkhi p1.h, p0.b whilelo p0.h, w3, w4 add z1.s, p1/m, z1.s, z0.s b.any .L2 uaddv d0, p2, z1.s fmov x1, d0 add w0, w0, w1 ret gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_usdot): Rename to... (usdot_prod): ... This. * config/aarch64/aarch64-simd-builtins.def (usdot): Rename to... (usdot_prod): ...This. * config/aarch64/arm_neon.h (vusdot_s32, vusdotq_s32): Likewise. * config/aarch64/aarch64-sve.md (@aarch64_dot_prod): Rename to... (@dot_prod): ...This. * config/aarch64/aarch64-sve-builtins-base.cc (svusdot_impl::expand): Use it. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/vusdot-autovec.c: New test. * gcc.target/aarch64/sve/vusdot-autovec.c: New test. --- gcc/config/aarch64/aarch64-simd-builtins.def | 5 +-- gcc/config/aarch64/aarch64-simd.md | 2 +- gcc/config/aarch64/aarch64-sve-builtins-base.cc | 2 +- gcc/config/aarch64/aarch64-sve.md | 2 +- gcc/config/aarch64/arm_neon.h | 4 +-- .../gcc.target/aarch64/simd/vusdot-autovec.c | 38 ++++++++++++++++++++++ .../gcc.target/aarch64/sve/vusdot-autovec.c | 38 ++++++++++++++++++++++ 7 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index ac5d4fc..063f503 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -374,10 +374,11 @@ BUILTIN_VSDQ_I_DI (BINOP, srshl, 0, NONE) BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0, NONE) - /* Implemented by aarch64_{_lane}{q}. */ + /* Implemented by _prod. */ BUILTIN_VB (TERNOP, sdot, 0, NONE) BUILTIN_VB (TERNOPU, udot, 0, NONE) - BUILTIN_VB (TERNOP_SSUS, usdot, 0, NONE) + BUILTIN_VB (TERNOP_SSUS, usdot_prod, 10, NONE) + /* Implemented by aarch64__lane{q}. */ BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE) BUILTIN_VB (QUADOPU_LANE, udot_lane, 0, NONE) BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 540244c..7489098 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -601,7 +601,7 @@ ;; These instructions map to the __builtins for the armv8.6a I8MM usdot ;; (vector) Dot Product operation. -(define_insn "aarch64_usdot" +(define_insn "usdot_prod" [(set (match_operand:VS 0 "register_operand" "=w") (plus:VS (unspec:VS [(match_operand: 2 "register_operand" "w") diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 8fd6d3f..02e42a7 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -2366,7 +2366,7 @@ public: Hence we do the same rotation on arguments as svdot_impl does. */ e.rotate_inputs_left (0, 3); machine_mode mode = e.vector_mode (0); - insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode); + insn_code icode = code_for_dot_prod (UNSPEC_USDOT, mode); return e.use_exact_insn (icode); } diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 9e48c0e..359fe0e 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -6870,7 +6870,7 @@ [(set_attr "movprfx" "*,yes")] ) -(define_insn "@aarch64_dot_prod" +(define_insn "@dot_prod" [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") (plus:VNx4SI_ONLY (unspec:VNx4SI_ONLY diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 17e059e..00d76ea 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -34039,14 +34039,14 @@ __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b) { - return __builtin_aarch64_usdotv8qi_ssus (__r, __a, __b); + return __builtin_aarch64_usdot_prodv8qi_ssus (__r, __a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b) { - return __builtin_aarch64_usdotv16qi_ssus (__r, __a, __b); + return __builtin_aarch64_usdot_prodv16qi_ssus (__r, __a, __b); } __extension__ extern __inline int32x2_t diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c new file mode 100644 index 0000000..b99a945 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */ + +#define N 480 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +SIGNEDNESS_1 int __attribute__ ((noipa)) +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 char *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +SIGNEDNESS_1 int __attribute__ ((noipa)) +g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b, + SIGNEDNESS_4 char *restrict a) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c new file mode 100644 index 0000000..094dd51 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8.2-a+i8mm+sve" } */ + +#define N 480 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +SIGNEDNESS_1 int __attribute__ ((noipa)) +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 char *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +SIGNEDNESS_1 int __attribute__ ((noipa)) +g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b, + SIGNEDNESS_4 char *restrict a) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */ -- cgit v1.1 From 6412c58c781f64b60e7353e762cd5cec62a863e7 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Wed, 14 Jul 2021 15:20:45 +0100 Subject: AArch32: Add support for sign differing dot-product usdot for NEON. This adds optabs implementing usdot_prod. The following testcase: #define N 480 #define SIGNEDNESS_1 unsigned #define SIGNEDNESS_2 signed #define SIGNEDNESS_3 signed #define SIGNEDNESS_4 unsigned SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, SIGNEDNESS_4 char *restrict b) { for (__INTPTR_TYPE__ i = 0; i < N; ++i) { int av = a[i]; int bv = b[i]; SIGNEDNESS_2 short mult = av * bv; res += mult; } return res; } Generates f: vmov.i32 q8, #0 @ v4si add r3, r2, #480 .L2: vld1.8 {q10}, [r2]! vld1.8 {q9}, [r1]! vusdot.s8 q8, q9, q10 cmp r3, r2 bne .L2 vadd.i32 d16, d16, d17 vpadd.i32 d16, d16, d16 vmov.32 r3, d16[0] add r0, r0, r3 bx lr instead of f: vmov.i32 q8, #0 @ v4si add r3, r2, #480 .L2: vld1.8 {q9}, [r2]! vld1.8 {q11}, [r1]! cmp r3, r2 vmull.s8 q10, d18, d22 vmull.s8 q9, d19, d23 vaddw.s16 q8, q8, d20 vaddw.s16 q8, q8, d21 vaddw.s16 q8, q8, d18 vaddw.s16 q8, q8, d19 bne .L2 vadd.i32 d16, d16, d17 vpadd.i32 d16, d16, d16 vmov.32 r3, d16[0] add r0, r0, r3 bx lr For NEON. I couldn't figure out if the MVE instruction vmlaldav.s16 could be used to emulate this. Because it would require additional widening to work I left MVE out of this patch set but perhaps someone should take a look. gcc/ChangeLog: * config/arm/neon.md (usdot_prod): New. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vusdot-autovec.c: New test. --- gcc/config/arm/neon.md | 12 +++++++ gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c | 38 ++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c (limited to 'gcc') diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 64365e0..8b0a396 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -2969,6 +2969,18 @@ DONE; }) +;; Auto-vectorizer pattern for usdot +(define_expand "usdot_prod" + [(set (match_operand:VCVTI 0 "register_operand") + (plus:VCVTI (unspec:VCVTI [(match_operand: 1 + "register_operand") + (match_operand: 2 + "register_operand")] + UNSPEC_DOT_US) + (match_operand:VCVTI 3 "register_operand")))] + "TARGET_I8MM" +) + (define_expand "neon_copysignf" [(match_operand:VCVTF 0 "register_operand") (match_operand:VCVTF 1 "register_operand") diff --git a/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c new file mode 100644 index 0000000..7cc56f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */ + +#define N 480 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +SIGNEDNESS_1 int __attribute__ ((noipa)) +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 char *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +SIGNEDNESS_1 int __attribute__ ((noipa)) +g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b, + SIGNEDNESS_4 char *restrict a) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +/* { dg-final { scan-assembler-times {vusdot.s8} 2 { target { arm-*-*-gnueabihf } } } } */ -- cgit v1.1 From 1e0ab1c4ba6159ad7ce71c6cddd5e04d2a636742 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Wed, 14 Jul 2021 15:21:40 +0100 Subject: middle-end: Add tests middle end generic tests for sign differing dotproduct. This adds testcases to test for auto-vect detection of the new sign differing dot product. gcc/ChangeLog: * doc/sourcebuild.texi (arm_v8_2a_i8mm_neon_hw): Document. gcc/testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_arm_v8_2a_imm8_neon_ok_nocache, check_effective_target_arm_v8_2a_i8mm_neon_hw, check_effective_target_vect_usdot_qi): New. * gcc.dg/vect/vect-reduc-dot-9.c: New test. * gcc.dg/vect/vect-reduc-dot-10.c: New test. * gcc.dg/vect/vect-reduc-dot-11.c: New test. * gcc.dg/vect/vect-reduc-dot-12.c: New test. * gcc.dg/vect/vect-reduc-dot-13.c: New test. * gcc.dg/vect/vect-reduc-dot-14.c: New test. * gcc.dg/vect/vect-reduc-dot-15.c: New test. * gcc.dg/vect/vect-reduc-dot-16.c: New test. * gcc.dg/vect/vect-reduc-dot-17.c: New test. * gcc.dg/vect/vect-reduc-dot-18.c: New test. * gcc.dg/vect/vect-reduc-dot-19.c: New test. * gcc.dg/vect/vect-reduc-dot-20.c: New test. * gcc.dg/vect/vect-reduc-dot-21.c: New test. * gcc.dg/vect/vect-reduc-dot-22.c: New test. --- gcc/doc/sourcebuild.texi | 9 +++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-10.c | 13 +++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c | 13 +++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-12.c | 13 +++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-13.c | 13 +++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-14.c | 13 +++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c | 13 +++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-16.c | 13 +++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-17.c | 52 +++++++++++++++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-18.c | 52 +++++++++++++++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c | 52 +++++++++++++++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-20.c | 52 +++++++++++++++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c | 52 +++++++++++++++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c | 52 +++++++++++++++++ gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c | 52 +++++++++++++++++ gcc/testsuite/lib/target-supports.exp | 80 +++++++++++++++++++++++++++ 16 files changed, 544 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-10.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-12.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-13.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-14.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-16.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-17.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-18.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-20.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c (limited to 'gcc') diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 16c6a3b..b1fffd5 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1780,6 +1780,10 @@ Target supports a vector dot-product of @code{signed char}. @item vect_udot_qi Target supports a vector dot-product of @code{unsigned char}. +@item vect_usdot_qi +Target supports a vector dot-product where one operand of the multiply is +@code{signed char} and the other of @code{unsigned char}. + @item vect_sdot_hi Target supports a vector dot-product of @code{signed short}. @@ -2055,6 +2059,11 @@ ARM target supports executing instructions from ARMv8.2-A with the Dot Product extension. Some multilibs may be incompatible with these options. Implies arm_v8_2a_dotprod_neon_ok. +@item arm_v8_2a_i8mm_neon_hw +ARM target supports executing instructions from ARMv8.2-A with the 8-bit +Matrix Multiply extension. Some multilibs may be incompatible with these +options. Implies arm_v8_2a_i8mm_ok. + @item arm_fp16fml_neon_ok @anchor{arm_fp16fml_neon_ok} ARM target supports extensions to generate the @code{VFMAL} and @code{VFMLS} diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-10.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-10.c new file mode 100644 index 0000000..7ce8696 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-10.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 unsigned +#define SIGNEDNESS_4 signed + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c new file mode 100644 index 0000000..5e3cfc9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 unsigned +#define SIGNEDNESS_4 signed + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-12.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-12.c new file mode 100644 index 0000000..0841261 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-12.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-13.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-13.c new file mode 100644 index 0000000..7ee0f45 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-13.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 signed +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-14.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-14.c new file mode 100644 index 0000000..2de1434 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-14.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 signed +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 unsigned +#define SIGNEDNESS_4 signed + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c new file mode 100644 index 0000000..5a6fd19 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 signed +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 unsigned +#define SIGNEDNESS_4 signed + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-16.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-16.c new file mode 100644 index 0000000..aec6287 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-16.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#define SIGNEDNESS_1 signed +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +#include "vect-reduc-dot-9.c" + +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-17.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-17.c new file mode 100644 index 0000000..aa269c4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-17.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS_1 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned +#endif + +SIGNEDNESS_1 int __attribute__ ((noipa)) +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 char *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 int mult = av * bv; + res += mult; + } + return res; +} + +#define BASE ((SIGNEDNESS_3 int) -1 < 0 ? -126 : 4) +#define OFFSET 20 + +int +main (void) +{ + check_vect (); + + SIGNEDNESS_3 char a[N], b[N]; + int expected = 0x12345; + for (int i = 0; i < N; ++i) + { + a[i] = BASE + i * 5; + b[i] = BASE + OFFSET + i * 4; + asm volatile ("" ::: "memory"); + expected += (SIGNEDNESS_2 int) (a[i] * b[i]); + } + if (f (0x12345, a, b) != expected) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-18.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-18.c new file mode 100644 index 0000000..2b1cc04 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-18.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS_1 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned +#endif + +SIGNEDNESS_1 int __attribute__ ((noipa)) +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 char *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 int mult = av * bv; + res += mult; + } + return res; +} + +#define BASE ((SIGNEDNESS_3 int) -1 < 0 ? -126 : 4) +#define OFFSET 20 + +int +main (void) +{ + check_vect (); + + SIGNEDNESS_3 char a[N], b[N]; + int expected = 0x12345; + for (int i = 0; i < N; ++i) + { + a[i] = BASE + i * 5; + b[i] = BASE + OFFSET + i * 4; + asm volatile ("" ::: "memory"); + expected += (SIGNEDNESS_2 int) (a[i] * b[i]); + } + if (f (0x12345, a, b) != expected) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c new file mode 100644 index 0000000..962b24e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS_1 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned +#endif + +SIGNEDNESS_1 long __attribute__ ((noipa)) +f (SIGNEDNESS_1 long res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 short *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 long mult = av * bv; + res += mult; + } + return res; +} + +#define BASE ((SIGNEDNESS_3 int) -1 < 0 ? -126 : 4) +#define OFFSET 20 + +int +main (void) +{ + check_vect (); + + SIGNEDNESS_3 char a[N]; + SIGNEDNESS_4 short b[N]; + int expected = 0x12345; + for (int i = 0; i < N; ++i) + { + a[i] = BASE + i * 5; + b[i] = BASE + OFFSET + i * 4; + asm volatile ("" ::: "memory"); + expected += (SIGNEDNESS_2 int) (a[i] * b[i]); + } + if (f (0x12345, a, b) != expected) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-20.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-20.c new file mode 100644 index 0000000..d757fb1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-20.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS_1 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned +#endif + +SIGNEDNESS_1 long __attribute__ ((noipa)) +f (SIGNEDNESS_1 long res, SIGNEDNESS_3 short *restrict a, + SIGNEDNESS_4 char *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 long mult = av * bv; + res += mult; + } + return res; +} + +#define BASE ((SIGNEDNESS_3 int) -1 < 0 ? -126 : 4) +#define OFFSET 20 + +int +main (void) +{ + check_vect (); + + SIGNEDNESS_3 short a[N]; + SIGNEDNESS_4 char b[N]; + int expected = 0x12345; + for (int i = 0; i < N; ++i) + { + a[i] = BASE + i * 5; + b[i] = BASE + OFFSET + i * 4; + asm volatile ("" ::: "memory"); + expected += (SIGNEDNESS_2 int) (a[i] * b[i]); + } + if (f (0x12345, a, b) != expected) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c new file mode 100644 index 0000000..b5754bf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS_1 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned +#endif + +SIGNEDNESS_1 long __attribute__ ((noipa)) +f (SIGNEDNESS_1 long res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 short *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 int mult = av * bv; + res += mult; + } + return res; +} + +#define BASE ((SIGNEDNESS_3 int) -1 < 0 ? -126 : 4) +#define OFFSET 20 + +int +main (void) +{ + check_vect (); + + SIGNEDNESS_3 char a[N]; + SIGNEDNESS_4 short b[N]; + int expected = 0x12345; + for (int i = 0; i < N; ++i) + { + a[i] = BASE + i * 5; + b[i] = BASE + OFFSET + i * 4; + asm volatile ("" ::: "memory"); + expected += (SIGNEDNESS_2 int) (a[i] * b[i]); + } + if (f (0x12345, a, b) != expected) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c new file mode 100644 index 0000000..febeb19 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS_1 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned +#endif + +SIGNEDNESS_1 long __attribute__ ((noipa)) +f (SIGNEDNESS_1 long res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 short *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 int mult = av * bv; + res += mult; + } + return res; +} + +#define BASE ((SIGNEDNESS_3 int) -1 < 0 ? -126 : 4) +#define OFFSET 20 + +int +main (void) +{ + check_vect (); + + SIGNEDNESS_3 char a[N]; + SIGNEDNESS_4 short b[N]; + int expected = 0x12345; + for (int i = 0; i < N; ++i) + { + a[i] = BASE + i * 5; + b[i] = BASE + OFFSET + i * 4; + asm volatile ("" ::: "memory"); + expected += (SIGNEDNESS_2 int) (a[i] * b[i]); + } + if (f (0x12345, a, b) != expected) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c new file mode 100644 index 0000000..cbbeede --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */ +/* { dg-add-options arm_v8_2a_i8mm } */ + +#include "tree-vect.h" + +#define N 50 + +#ifndef SIGNEDNESS_1 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 unsigned +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned +#endif + +SIGNEDNESS_1 int __attribute__ ((noipa)) +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 char *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +#define BASE ((SIGNEDNESS_3 int) -1 < 0 ? -126 : 4) +#define OFFSET 20 + +int +main (void) +{ + check_vect (); + + SIGNEDNESS_3 char a[N], b[N]; + int expected = 0x12345; + for (int i = 0; i < N; ++i) + { + a[i] = BASE + i * 5; + b[i] = BASE + OFFSET + i * 4; + asm volatile ("" ::: "memory"); + expected += (SIGNEDNESS_2 short) (a[i] * b[i]); + } + if (f (0x12345, a, b) != expected) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 789723f..12df869 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -5267,6 +5267,36 @@ proc check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache { } { return 0; } +# Return 1 if the target supports ARMv8.2 Adv.SIMD imm8 +# instructions, 0 otherwise. The test is valid for ARM and for AArch64. +# Record the command line options needed. + +proc check_effective_target_arm_v8_2a_imm8_neon_ok_nocache { } { + global et_arm_v8_2a_imm8_neon_flags + set et_arm_v8_2a_imm8_neon_flags "" + + if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { + return 0; + } + + # Iterate through sets of options to find the compiler flags that + # need to be added to the -march option. + foreach flags {"" "-mfloat-abi=softfp -mfpu=neon-fp-armv8" "-mfloat-abi=hard -mfpu=neon-fp-armv8"} { + if { [check_no_compiler_messages_nocache \ + arm_v8_2a_imm8_neon_ok object { + #include + #if !defined (__ARM_FEATURE_MATMUL_INT8) + #error "__ARM_FEATURE_MATMUL_INT8 not defined" + #endif + } "$flags -march=armv8.2-a+imm8"] } { + set et_arm_v8_2a_imm8_neon_flags "$flags -march=armv8.2-a+imm8" + return 1 + } + } + + return 0; +} + # Return 1 if the target supports ARMv8.1-M MVE # instructions, 0 otherwise. The test is valid for ARM. # Record the command line options needed. @@ -5694,6 +5724,43 @@ proc check_effective_target_arm_v8_2a_dotprod_neon_hw { } { } [add_options_for_arm_v8_2a_dotprod_neon ""]] } +# Return 1 if the target supports executing AdvSIMD instructions from ARMv8.2 +# with the i8mm extension, 0 otherwise. The test is valid for ARM and for +# AArch64. + +proc check_effective_target_arm_v8_2a_i8mm_neon_hw { } { + if { ![check_effective_target_arm_v8_2a_i8mm_ok] } { + return 0; + } + return [check_runtime arm_v8_2a_i8mm_neon_hw_available { + #include "arm_neon.h" + int + main (void) + { + + uint32x2_t results = {0,0}; + uint8x8_t a = {1,1,1,1,2,2,2,2}; + int8x8_t b = {2,2,2,2,3,3,3,3}; + + #ifdef __ARM_ARCH_ISA_A64 + asm ("usdot %0.2s, %1.8b, %2.8b" + : "=w"(results) + : "w"(a), "w"(b) + : /* No clobbers. */); + + #else + asm ("vusdot.u8 %P0, %P1, %P2" + : "=w"(results) + : "w"(a), "w"(b) + : /* No clobbers. */); + #endif + + return (vget_lane_u32 (results, 0) == 8 + && vget_lane_u32 (results, 1) == 24) ? 1 : 0; + } + } [add_options_for_arm_v8_2a_i8mm ""]] +} + # Return 1 if this is a ARM target with NEON enabled. proc check_effective_target_arm_neon { } { @@ -7060,6 +7127,19 @@ proc check_effective_target_vect_udot_qi { } { } # Return 1 if the target plus current options supports a vector +# dot-product where one operand of the multiply is signed char +# and the other unsigned chars, 0 otherwise. +# +# This won't change for different subtargets so cache the result. + +proc check_effective_target_vect_usdot_qi { } { + return [check_cached_effective_target_indexed vect_usdot_qi { + expr { [istarget aarch64*-*-*] + || [istarget arm*-*-*] }}] +} + + +# Return 1 if the target plus current options supports a vector # dot-product of signed shorts, 0 otherwise. # # This won't change for different subtargets so cache the result. -- cgit v1.1 From c9165e2d58bb037793c1c93e1b5633a61f88db30 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Wed, 14 Jul 2021 15:22:37 +0100 Subject: AArch32: Correct sdot RTL on aarch32 The RTL Generated from dot_prod is invalid as operand3 cannot be written to, it's a normal input. For the expand it's just another operand but the caller does not expect it to be written to. gcc/ChangeLog: * config/arm/neon.md (dot_prod): Drop statements. --- gcc/config/arm/neon.md | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 8b0a396..7645121 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -2961,13 +2961,7 @@ DOTPROD) (match_operand:VCVTI 3 "register_operand")))] "TARGET_DOTPROD" -{ - emit_insn ( - gen_neon_dot (operands[3], operands[3], operands[1], - operands[2])); - emit_insn (gen_rtx_SET (operands[0], operands[3])); - DONE; -}) +) ;; Auto-vectorizer pattern for usdot (define_expand "usdot_prod" -- cgit v1.1 From 6d1cdb27828d2ef1ae1ab0209836646a269b9610 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Wed, 14 Jul 2021 15:23:23 +0100 Subject: AArch64: Correct dot-product auto-vect optab RTL The current RTL for the vectorizer patterns for dot-product are incorrect. Operand3 isn't an output parameter so we can't write to it. This fixes this issue and reduces the number of RTL. gcc/ChangeLog: * config/aarch64/aarch64-simd-builtins.def (udot, sdot): Rename to... (sdot_prod, udot_prod): ...These. * config/aarch64/aarch64-simd.md (dot_prod): Remove. (aarch64_dot): Rename to... (dot_prod): ...This. * config/aarch64/arm_neon.h (vdot_u32, vdotq_u32, vdot_s32, vdotq_s32): Update builtins. --- gcc/config/aarch64/aarch64-simd-builtins.def | 4 +- gcc/config/aarch64/aarch64-simd.md | 62 +++++++++++----------------- gcc/config/aarch64/arm_neon.h | 8 ++-- 3 files changed, 29 insertions(+), 45 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 063f503..99e7348 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -375,8 +375,8 @@ BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0, NONE) /* Implemented by _prod. */ - BUILTIN_VB (TERNOP, sdot, 0, NONE) - BUILTIN_VB (TERNOPU, udot, 0, NONE) + BUILTIN_VB (TERNOP, sdot_prod, 10, NONE) + BUILTIN_VB (TERNOPU, udot_prod, 10, NONE) BUILTIN_VB (TERNOP_SSUS, usdot_prod, 10, NONE) /* Implemented by aarch64__lane{q}. */ BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 7489098..88fa5ba 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -587,8 +587,28 @@ DONE; }) -;; These instructions map to the __builtins for the Dot Product operations. -(define_insn "aarch64_dot" +;; These expands map to the Dot Product optab the vectorizer checks for +;; and to the intrinsics patttern. +;; The auto-vectorizer expects a dot product builtin that also does an +;; accumulation into the provided register. +;; Given the following pattern +;; +;; for (i=0; idot_prod" [(set (match_operand:VS 0 "register_operand" "=w") (plus:VS (match_operand:VS 1 "register_operand" "0") (unspec:VS [(match_operand: 2 "register_operand" "w") @@ -613,41 +633,6 @@ [(set_attr "type" "neon_dot")] ) -;; These expands map to the Dot Product optab the vectorizer checks for. -;; The auto-vectorizer expects a dot product builtin that also does an -;; accumulation into the provided register. -;; Given the following pattern -;; -;; for (i=0; idot_prod" - [(set (match_operand:VS 0 "register_operand") - (plus:VS (unspec:VS [(match_operand: 1 "register_operand") - (match_operand: 2 "register_operand")] - DOTPROD) - (match_operand:VS 3 "register_operand")))] - "TARGET_DOTPROD" -{ - emit_insn ( - gen_aarch64_dot (operands[3], operands[3], operands[1], - operands[2])); - emit_insn (gen_rtx_SET (operands[0], operands[3])); - DONE; -}) - ;; These instructions map to the __builtins for the Dot Product ;; indexed operations. (define_insn "aarch64_dot_lane" @@ -944,8 +929,7 @@ rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode)); rtx abd = gen_reg_rtx (V16QImode); emit_insn (gen_aarch64_abdv16qi (abd, operands[1], operands[2])); - emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3], - abd, ones)); + emit_insn (gen_udot_prodv16qi (operands[0], operands[3], abd, ones)); DONE; } rtx reduc = gen_reg_rtx (V8HImode); diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 00d76ea..597f44c 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -31767,28 +31767,28 @@ __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdot_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b) { - return __builtin_aarch64_udotv8qi_uuuu (__r, __a, __b); + return __builtin_aarch64_udot_prodv8qi_uuuu (__r, __a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdotq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b) { - return __builtin_aarch64_udotv16qi_uuuu (__r, __a, __b); + return __builtin_aarch64_udot_prodv16qi_uuuu (__r, __a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdot_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b) { - return __builtin_aarch64_sdotv8qi (__r, __a, __b); + return __builtin_aarch64_sdot_prodv8qi (__r, __a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdotq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b) { - return __builtin_aarch64_sdotv16qi (__r, __a, __b); + return __builtin_aarch64_sdot_prodv16qi (__r, __a, __b); } __extension__ extern __inline uint32x2_t -- cgit v1.1 From 269ca408e2839d7f3554a91515d73d4d95352f68 Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Wed, 14 Jul 2021 17:25:29 +0200 Subject: Fortran - ICE in gfc_conv_expr_present initializing non-dummy class variable gcc/fortran/ChangeLog: PR fortran/100949 * trans-expr.c (gfc_trans_class_init_assign): Call gfc_conv_expr_present only for dummy variables. gcc/testsuite/ChangeLog: PR fortran/100949 * gfortran.dg/pr100949.f90: New test. --- gcc/fortran/trans-expr.c | 5 +++-- gcc/testsuite/gfortran.dg/pr100949.f90 | 10 ++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/pr100949.f90 (limited to 'gcc') diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index de406ad..9e0dcde 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -1741,8 +1741,9 @@ gfc_trans_class_init_assign (gfc_code *code) } } - if (code->expr1->symtree->n.sym->attr.optional - || code->expr1->symtree->n.sym->ns->proc_name->attr.entry_master) + if (code->expr1->symtree->n.sym->attr.dummy + && (code->expr1->symtree->n.sym->attr.optional + || code->expr1->symtree->n.sym->ns->proc_name->attr.entry_master)) { tree present = gfc_conv_expr_present (code->expr1->symtree->n.sym); tmp = build3_loc (input_location, COND_EXPR, TREE_TYPE (tmp), diff --git a/gcc/testsuite/gfortran.dg/pr100949.f90 b/gcc/testsuite/gfortran.dg/pr100949.f90 new file mode 100644 index 0000000..6c736fd --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr100949.f90 @@ -0,0 +1,10 @@ +! { dg-do compile } +! PR fortran/100949 - ICE in gfc_conv_expr_present, at fortran/trans-expr.c:1975 + +subroutine s +entry f + type t + end type + class(t), allocatable :: y, z + allocate (z, mold=y) +end -- cgit v1.1 From a42f8120442cf3ba25d621bed857b5be19019d0c Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Tue, 13 Jul 2021 17:16:54 -0400 Subject: c++: constexpr array reference and value-initialization [PR101371] This PR gave me a hard time: I saw multiple issues starting with different revisions. But ultimately the root cause seems to be the following, and the attached patch fixes all issues I've found here. In cxx_eval_array_reference we create a new constexpr context for the CP_AGGREGATE_TYPE_P case, but we also have to create it for the non-aggregate case. In this test, we are evaluating ((B *)this)->a = rhs->a which means that we set ctx.object to ((B *)this)->a. Then we proceed to evaluate the initializer, rhs->a. For *rhs, we eval rhs, a PARM_DECL, for which we have (const B &) &c.arr[0] in the hash table. Then cxx_fold_indirect_ref gives us c.arr[0]. c is evaluated to {.arr={}} so c.arr is {}. Now we want c.arr[0], so we end up in cxx_eval_array_reference and since we're initializing from {}, we call build_value_init which gives us an AGGR_INIT_EXPR that calls 'constexpr B::B()'. Then we evaluate this AGGR_INIT_EXPR and since its first argument is dummy, we take ctx.object instead. But that is the wrong object, we're not initializing ((B *)this)->a here. And so we wound up with an initializer for A, and then crash in cxx_eval_component_reference: gcc_assert (DECL_CONTEXT (part) == TYPE_MAIN_VARIANT (TREE_TYPE (whole))); where DECL_CONTEXT (part) is B (as it should be) but the type of whole was A. So create a new object, if there already was one, and the element type is not a scalar. PR c++/101371 gcc/cp/ChangeLog: * constexpr.c (cxx_eval_array_reference): Create a new .object and .ctor for the non-aggregate non-scalar case too when value-initializing. gcc/testsuite/ChangeLog: * g++.dg/cpp1y/constexpr-101371-2.C: New test. * g++.dg/cpp1y/constexpr-101371.C: New test. --- gcc/cp/constexpr.c | 15 +++++++++---- gcc/testsuite/g++.dg/cpp1y/constexpr-101371-2.C | 23 ++++++++++++++++++++ gcc/testsuite/g++.dg/cpp1y/constexpr-101371.C | 29 +++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-101371-2.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-101371.C (limited to 'gcc') diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c index 39787f3..31fa5b6 100644 --- a/gcc/cp/constexpr.c +++ b/gcc/cp/constexpr.c @@ -3851,16 +3851,23 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree t, { tree empty_ctor = build_constructor (init_list_type_node, NULL); val = digest_init (elem_type, empty_ctor, tf_warning_or_error); + } + else + val = build_value_init (elem_type, tf_warning_or_error); + + if (!SCALAR_TYPE_P (elem_type)) + { new_ctx = *ctx; - new_ctx.object = t; + if (ctx->object) + /* If there was no object, don't add one: it could confuse us + into thinking we're modifying a const object. */ + new_ctx.object = t; new_ctx.ctor = build_constructor (elem_type, NULL); ctx = &new_ctx; } - else - val = build_value_init (elem_type, tf_warning_or_error); t = cxx_eval_constant_expression (ctx, val, lval, non_constant_p, overflow_p); - if (CP_AGGREGATE_TYPE_P (elem_type) && t != ctx->ctor) + if (!SCALAR_TYPE_P (elem_type) && t != ctx->ctor) free_constructor (ctx->ctor); return t; } diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-101371-2.C b/gcc/testsuite/g++.dg/cpp1y/constexpr-101371-2.C new file mode 100644 index 0000000..fb67b67 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-101371-2.C @@ -0,0 +1,23 @@ +// PR c++/101371 +// { dg-do compile { target c++14 } } + +struct A { + int i; +}; +struct B { + A a{}; + constexpr B() : a() {} + constexpr B(const B &rhs) : a(rhs.a) {} +}; +struct C { + B arr[1]; +}; + +constexpr C +fn () +{ + C c{}; + return c; +} + +constexpr C c = fn(); diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-101371.C b/gcc/testsuite/g++.dg/cpp1y/constexpr-101371.C new file mode 100644 index 0000000..b6351b8 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-101371.C @@ -0,0 +1,29 @@ +// PR c++/101371 +// { dg-do compile { target c++14 } } + +struct A { + int i; +}; +struct B { + A a{}; + constexpr B() : a() {} + constexpr B(const B &rhs) : a(rhs.a) {} +}; +struct C { + B arr[1]; +}; + +struct X { + constexpr C fn () const + { + C c{}; + return c; + } +}; + +void +g () +{ + X x; + constexpr auto z = x.fn(); +} -- cgit v1.1 From 398572c1544d8b7541862401b985ae7e855cb8fb Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Wed, 14 Jul 2021 12:47:10 -0400 Subject: Turn hybrid mode off, default to ranger-only mode for EVRP. Change the default EVRP mode to ranger-only. gcc/ * params.opt (param_evrp_mode): Change default. gcc/testsuite/ * gcc.dg/pr80776-1.c: Remove xfail. --- gcc/params.opt | 2 +- gcc/testsuite/gcc.dg/pr80776-1.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/params.opt b/gcc/params.opt index 577cd42..92b003e 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -131,7 +131,7 @@ Common Joined UInteger Var(param_evrp_sparse_threshold) Init(800) Optimization P Maximum number of basic blocks before EVRP uses a sparse cache. -param=evrp-mode= -Common Joined Var(param_evrp_mode) Enum(evrp_mode) Init(EVRP_MODE_EVRP_FIRST) Param Optimization +Common Joined Var(param_evrp_mode) Enum(evrp_mode) Init(EVRP_MODE_RVRP_ONLY) Param Optimization --param=evrp-mode=[legacy|ranger|legacy-first|ranger-first|ranger-trace|ranger-debug|trace|debug] Specifies the mode Early VRP should operate in. Enum diff --git a/gcc/testsuite/gcc.dg/pr80776-1.c b/gcc/testsuite/gcc.dg/pr80776-1.c index eca5e80..b9bce62 100644 --- a/gcc/testsuite/gcc.dg/pr80776-1.c +++ b/gcc/testsuite/gcc.dg/pr80776-1.c @@ -27,5 +27,5 @@ Foo (void) Setting these ranges at the definition site, causes VRP to remove the unreachable code altogether, leaving the following sprintf unguarded. This causes the bogus warning below. */ - sprintf (number, "%d", i); /* { dg-bogus "writing" "" { xfail *-*-* } } */ + sprintf (number, "%d", i); /* { dg-bogus "writing" "" } */ } -- cgit v1.1 From 4940166a15193d6583b320f2957af8720745b76c Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Wed, 14 Jul 2021 19:00:59 +0100 Subject: Vect: correct rebase issue The lines being removed have been updated and merged into a new condition. But when resolving some conflicts I accidentally reintroduced them causing some test failes. This removes them. Committed as the changes were previously approved in https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574977.html but the hunk was misapplied during a rebase. gcc/ChangeLog: * tree-vect-patterns.c (vect_recog_dot_prod_pattern): Remove erroneous line. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-reduc-dot-11.c: Expect pass. * gcc.dg/vect/vect-reduc-dot-15.c: Likewise. * gcc.dg/vect/vect-reduc-dot-19.c: Likewise. * gcc.dg/vect/vect-reduc-dot-21.c: Likewise. --- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c | 2 +- gcc/tree-vect-patterns.c | 6 ------ 5 files changed, 4 insertions(+), 10 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c index 5e3cfc9..0f7cbbb 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-11.c @@ -9,5 +9,5 @@ #include "vect-reduc-dot-9.c" -/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c index 5a6fd19..dc48f95 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-15.c @@ -9,5 +9,5 @@ #include "vect-reduc-dot-9.c" -/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_usdot_qi } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c index 962b24e..dbeaaec 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c @@ -49,4 +49,4 @@ main (void) __builtin_abort (); } -/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c index b5754bf..6d08bf4 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c @@ -49,4 +49,4 @@ main (void) __builtin_abort (); } -/* { dg-final { scan-tree-dump-not "vect_recog_dot_prod_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump "vect_recog_dot_prod_pattern: detected" "vect" } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 71533e6..53ced5d 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1039,12 +1039,6 @@ vect_recog_dot_prod_pattern (vec_info *vinfo, : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))) return NULL; - /* If there are two widening operations, make sure they agree on - the sign of the extension. */ - if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type) - && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)) - return NULL; - vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt); tree half_vectype; -- cgit v1.1 From b15e301748f0e042379909e32b3ade439dd8f8f9 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Fri, 9 Jul 2021 05:45:03 -0400 Subject: c++: enable -fdelete-dead-exceptions by default As I was discussing with richi, I don't think it makes sense to protect calls to pure/const functions from DCE just because they aren't explicitly declared noexcept. PR100382 indicates that there are different considerations for Go, which has non-call exceptions. But still turn the flag off for that specific testcase. gcc/c-family/ChangeLog: * c-opts.c (c_common_post_options): Set -fdelete-dead-exceptions. gcc/ChangeLog: * doc/invoke.texi: -fdelete-dead-exceptions is on by default for C++. gcc/testsuite/ChangeLog: * g++.dg/torture/pr100382.C: Pass -fno-delete-dead-exceptions. --- gcc/c-family/c-opts.c | 4 ++++ gcc/doc/invoke.texi | 6 ++++-- gcc/testsuite/g++.dg/torture/pr100382.C | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c index 60b5802..1c4e832c 100644 --- a/gcc/c-family/c-opts.c +++ b/gcc/c-family/c-opts.c @@ -1015,6 +1015,10 @@ c_common_post_options (const char **pfilename) SET_OPTION_IF_UNSET (&global_options, &global_options_set, flag_finite_loops, optimize >= 2 && cxx_dialect >= cxx11); + /* It's OK to discard calls to pure/const functions that might throw. */ + SET_OPTION_IF_UNSET (&global_options, &global_options_set, + flag_delete_dead_exceptions, true); + if (cxx_dialect >= cxx11) { /* If we're allowing C++0x constructs, don't warn about C++98 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index e67d47a..ea88124 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -16335,8 +16335,10 @@ arbitrary signal handlers such as @code{SIGALRM}. @opindex fdelete-dead-exceptions Consider that instructions that may throw exceptions but don't otherwise contribute to the execution of the program can be optimized away. -This option is enabled by default for the Ada compiler, as permitted by -the Ada language specification. +This does not affect calls to functions except those with the +@code{pure} or @code{const} attributes. +This option is enabled by default for the Ada and C++ compilers, as permitted by +the language specifications. Optimization passes that cause dead exceptions to be removed are enabled independently at different optimization levels. @item -funwind-tables diff --git a/gcc/testsuite/g++.dg/torture/pr100382.C b/gcc/testsuite/g++.dg/torture/pr100382.C index ffc4182..eac5743 100644 --- a/gcc/testsuite/g++.dg/torture/pr100382.C +++ b/gcc/testsuite/g++.dg/torture/pr100382.C @@ -1,4 +1,5 @@ // { dg-do run } +// { dg-additional-options -fno-delete-dead-exceptions } int x, y; int __attribute__((pure,noinline)) foo () { if (x) throw 1; return y; } -- cgit v1.1 From 91bb571d200e551f427e337e00494e0b4f229876 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Tue, 13 Jul 2021 14:42:09 -0400 Subject: vec: use auto_vec in a few more places The uses of vec in get_all_loop_exits and process_conditional were memory leaks, as .release() was never called for them. The other changes are some cases that did have proper release handling, but it's simpler to leave releasing to the auto_vec destructor. gcc/ChangeLog: * sel-sched-ir.h (get_all_loop_exits): Use auto_vec. gcc/cp/ChangeLog: * class.c (struct find_final_overrider_data): Use auto_vec. (find_final_overrider): Remove explicit release. * coroutines.cc (process_conditional): Use auto_vec. * cp-gimplify.c (struct cp_genericize_data): Use auto_vec. (cp_genericize_tree): Remove explicit release. * parser.c (cp_parser_objc_at_property_declaration): Use auto_delete_vec. * semantics.c (omp_reduction_lookup): Use auto_vec. --- gcc/cp/class.c | 4 +--- gcc/cp/coroutines.cc | 2 +- gcc/cp/cp-gimplify.c | 3 +-- gcc/cp/parser.c | 6 +----- gcc/cp/semantics.c | 3 +-- gcc/sel-sched-ir.h | 2 +- 6 files changed, 6 insertions(+), 14 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/class.c b/gcc/cp/class.c index 33093e1..14db066 100644 --- a/gcc/cp/class.c +++ b/gcc/cp/class.c @@ -2391,7 +2391,7 @@ struct find_final_overrider_data { /* The candidate overriders. */ tree candidates; /* Path to most derived. */ - vec path; + auto_vec path; }; /* Add the overrider along the current path to FFOD->CANDIDATES. @@ -2504,8 +2504,6 @@ find_final_overrider (tree derived, tree binfo, tree fn) dfs_walk_all (derived, dfs_find_final_overrider_pre, dfs_find_final_overrider_post, &ffod); - ffod.path.release (); - /* If there was no winner, issue an error message. */ if (!ffod.candidates || TREE_CHAIN (ffod.candidates)) return error_mark_node; diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 54ffdc8..712a5c0 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -3081,7 +3081,7 @@ process_conditional (var_nest_node *n, tree& vlist) { tree init = n->init; hash_map var_flags; - vec var_list = vNULL; + auto_vec var_list; tree new_then = push_stmt_list (); handle_nested_conditionals (n->then_cl, var_list, var_flags); new_then = pop_stmt_list (new_then); diff --git a/gcc/cp/cp-gimplify.c b/gcc/cp/cp-gimplify.c index 00b7772..de37f2c 100644 --- a/gcc/cp/cp-gimplify.c +++ b/gcc/cp/cp-gimplify.c @@ -807,7 +807,7 @@ omp_cxx_notice_variable (struct cp_genericize_omp_taskreg *omp_ctx, tree decl) struct cp_genericize_data { hash_set *p_set; - vec bind_expr_stack; + auto_vec bind_expr_stack; struct cp_genericize_omp_taskreg *omp_ctx; tree try_block; bool no_sanitize_p; @@ -1582,7 +1582,6 @@ cp_genericize_tree (tree* t_p, bool handle_invisiref_parm_p) wtd.handle_invisiref_parm_p = handle_invisiref_parm_p; cp_walk_tree (t_p, cp_genericize_r, &wtd, NULL); delete wtd.p_set; - wtd.bind_expr_stack.release (); if (sanitize_flags_p (SANITIZE_VPTR)) cp_ubsan_instrument_member_accesses (t_p); } diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 93698aa..821ce17 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -35247,7 +35247,7 @@ cp_parser_objc_at_property_declaration (cp_parser *parser) /* Parse the optional attribute list. A list of parsed, but not verified, attributes. */ - vec prop_attr_list = vNULL; + auto_delete_vec prop_attr_list; location_t loc = cp_lexer_peek_token (parser->lexer)->location; cp_lexer_consume_token (parser->lexer); /* Eat '@property'. */ @@ -35423,10 +35423,6 @@ cp_parser_objc_at_property_declaration (cp_parser *parser) } cp_parser_consume_semicolon_at_end_of_statement (parser); - - while (!prop_attr_list.is_empty()) - delete prop_attr_list.pop (); - prop_attr_list.release (); } /* Parse an Objective-C++ @synthesize declaration. The syntax is: diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index b080259..b97dc1f 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -5774,7 +5774,7 @@ omp_reduction_lookup (location_t loc, tree id, tree type, tree *baselinkp, if (!id && CLASS_TYPE_P (type) && TYPE_BINFO (type)) { - vec ambiguous = vNULL; + auto_vec ambiguous; tree binfo = TYPE_BINFO (type), base_binfo, ret = NULL_TREE; unsigned int ix; if (ambiguousp == NULL) @@ -5811,7 +5811,6 @@ omp_reduction_lookup (location_t loc, tree id, tree type, tree *baselinkp, if (idx == 0) str = get_spaces (str); } - ambiguous.release (); ret = error_mark_node; baselink = NULL_TREE; } diff --git a/gcc/sel-sched-ir.h b/gcc/sel-sched-ir.h index 78b2566..8ee0529 100644 --- a/gcc/sel-sched-ir.h +++ b/gcc/sel-sched-ir.h @@ -1166,7 +1166,7 @@ get_all_loop_exits (basic_block bb) || (inner_loop_header_p (e->dest))) && loop_depth (e->dest->loop_father) >= this_depth) { - vec next_exits = get_all_loop_exits (e->dest); + auto_vec next_exits = get_all_loop_exits (e->dest); if (next_exits.exists ()) { -- cgit v1.1 From bebd8e9da838c51a7f911985083d5a2b2498a23a Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Wed, 14 Jul 2021 15:37:30 -0400 Subject: c++: CTAD and forwarding references [PR88252] Here during CTAD we're incorrectly treating T&& as a forwarding reference even though T is a template parameter of the class template. This happens because the template parameter T in the out-of-line definition of the constructor doesn't have the flag TEMPLATE_TYPE_PARM_FOR_CLASS set, and during duplicate_decls the the redeclaration (which is in terms of this unflagged T) prevails. To fix this, we could perhaps be more consistent about setting the flag, but it appears we don't really need this flag to make the determination. Since the template parameters of an synthesized guide consist of the template parameters of the class template followed by those of the constructor (if any), it should suffice to look at the index of the template parameter to determine whether it comes from the class template or the constructor (template). This patch replaces the TEMPLATE_TYPE_PARM_FOR_CLASS flag with this approach. PR c++/88252 gcc/cp/ChangeLog: * cp-tree.h (TEMPLATE_TYPE_PARM_FOR_CLASS): Remove. * pt.c (push_template_decl): Remove TEMPLATE_TYPE_PARM_FOR_CLASS handling. (redeclare_class_template): Likewise. (forwarding_reference_p): Define. (maybe_adjust_types_for_deduction): Use it instead. Add 'tparms' parameter. (unify_one_argument): Pass tparms to maybe_adjust_types_for_deduction. (try_one_overload): Likewise. (unify): Likewise. (rewrite_template_parm): Remove TEMPLATE_TYPE_PARM_FOR_CLASS handling. gcc/testsuite/ChangeLog: * g++.dg/cpp1z/class-deduction96.C: New test. --- gcc/cp/cp-tree.h | 6 -- gcc/cp/pt.c | 90 ++++++++++++++------------ gcc/testsuite/g++.dg/cpp1z/class-deduction96.C | 34 ++++++++++ 3 files changed, 84 insertions(+), 46 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction96.C (limited to 'gcc') diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index b1cf44e..f4bcab5 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -443,7 +443,6 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX]; BLOCK_OUTER_CURLY_BRACE_P (in BLOCK) FOLD_EXPR_MODOP_P (*_FOLD_EXPR) IF_STMT_CONSTEXPR_P (IF_STMT) - TEMPLATE_TYPE_PARM_FOR_CLASS (TEMPLATE_TYPE_PARM) DECL_NAMESPACE_INLINE_P (in NAMESPACE_DECL) SWITCH_STMT_ALL_CASES_P (in SWITCH_STMT) REINTERPRET_CAST_P (in NOP_EXPR) @@ -5863,11 +5862,6 @@ enum auto_deduction_context adc_decomp_type /* Decomposition declaration initializer deduction */ }; -/* True if this type-parameter belongs to a class template, used by C++17 - class template argument deduction. */ -#define TEMPLATE_TYPE_PARM_FOR_CLASS(NODE) \ - (TREE_LANG_FLAG_0 (TEMPLATE_TYPE_PARM_CHECK (NODE))) - /* True iff this TEMPLATE_TYPE_PARM represents decltype(auto). */ #define AUTO_IS_DECLTYPE(NODE) \ (TYPE_LANG_FLAG_5 (TEMPLATE_TYPE_PARM_CHECK (NODE))) diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index cf0ce77..c7bf7d4 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -154,8 +154,8 @@ static void tsubst_enum (tree, tree, tree); static bool check_instantiated_args (tree, tree, tsubst_flags_t); static int check_non_deducible_conversion (tree, tree, int, int, struct conversion **, bool); -static int maybe_adjust_types_for_deduction (unification_kind_t, tree*, tree*, - tree); +static int maybe_adjust_types_for_deduction (tree, unification_kind_t, + tree*, tree*, tree); static int type_unification_real (tree, tree, tree, const tree *, unsigned int, int, unification_kind_t, vec **, @@ -5801,18 +5801,7 @@ push_template_decl (tree decl, bool is_friend) } else if (DECL_IMPLICIT_TYPEDEF_P (decl) && CLASS_TYPE_P (TREE_TYPE (decl))) - { - /* Class template, set TEMPLATE_TYPE_PARM_FOR_CLASS. */ - tree parms = INNERMOST_TEMPLATE_PARMS (current_template_parms); - for (int i = 0; i < TREE_VEC_LENGTH (parms); ++i) - { - tree t = TREE_VALUE (TREE_VEC_ELT (parms, i)); - if (TREE_CODE (t) == TYPE_DECL) - t = TREE_TYPE (t); - if (TREE_CODE (t) == TEMPLATE_TYPE_PARM) - TEMPLATE_TYPE_PARM_FOR_CLASS (t) = true; - } - } + /* Class template. */; else if (TREE_CODE (decl) == TYPE_DECL && TYPE_DECL_ALIAS_P (decl)) /* alias-declaration */ @@ -6292,9 +6281,6 @@ redeclare_class_template (tree type, tree parms, tree cons) gcc_assert (DECL_CONTEXT (parm) == NULL_TREE); DECL_CONTEXT (parm) = tmpl; } - - if (TREE_CODE (parm) == TYPE_DECL) - TEMPLATE_TYPE_PARM_FOR_CLASS (TREE_TYPE (parm)) = true; } tree ci = get_constraints (tmpl); @@ -21709,6 +21695,41 @@ fn_type_unification (tree fn, return r; } +/* Returns true iff PARM is a forwarding reference in the context of + template argument deduction for TMPL. */ + +static bool +forwarding_reference_p (tree parm, tree tmpl) +{ + /* [temp.deduct.call], "A forwarding reference is an rvalue reference to a + cv-unqualified template parameter ..." */ + if (TYPE_REF_P (parm) + && TYPE_REF_IS_RVALUE (parm) + && TREE_CODE (TREE_TYPE (parm)) == TEMPLATE_TYPE_PARM + && cp_type_quals (TREE_TYPE (parm)) == TYPE_UNQUALIFIED) + { + parm = TREE_TYPE (parm); + /* [temp.deduct.call], "... that does not represent a template parameter + of a class template (during class template argument deduction)." */ + if (tmpl + && deduction_guide_p (tmpl) + && DECL_ARTIFICIAL (tmpl)) + { + /* Since the template parameters of a synthesized guide consist of + the template parameters of the class template followed by those of + the constructor (if any), we can tell if PARM represents a template + parameter of the class template by comparing its index with the + arity of the class template. */ + tree ctmpl = CLASSTYPE_TI_TEMPLATE (TREE_TYPE (TREE_TYPE (tmpl))); + if (TEMPLATE_TYPE_IDX (parm) + < TREE_VEC_LENGTH (DECL_INNERMOST_TEMPLATE_PARMS (ctmpl))) + return false; + } + return true; + } + return false; +} + /* Adjust types before performing type deduction, as described in [temp.deduct.call] and [temp.deduct.conv]. The rules in these two sections are symmetric. PARM is the type of a function parameter @@ -21718,7 +21739,8 @@ fn_type_unification (tree fn, ARG_EXPR is the original argument expression, which may be null. */ static int -maybe_adjust_types_for_deduction (unification_kind_t strict, +maybe_adjust_types_for_deduction (tree tparms, + unification_kind_t strict, tree* parm, tree* arg, tree arg_expr) @@ -21741,10 +21763,7 @@ maybe_adjust_types_for_deduction (unification_kind_t strict, /* Core issue #873: Do the DR606 thing (see below) for these cases, too, but here handle it by stripping the reference from PARM rather than by adding it to ARG. */ - if (TYPE_REF_P (*parm) - && TYPE_REF_IS_RVALUE (*parm) - && TREE_CODE (TREE_TYPE (*parm)) == TEMPLATE_TYPE_PARM - && cp_type_quals (TREE_TYPE (*parm)) == TYPE_UNQUALIFIED + if (forwarding_reference_p (*parm, TPARMS_PRIMARY_TEMPLATE (tparms)) && TYPE_REF_P (*arg) && !TYPE_REF_IS_RVALUE (*arg)) *parm = TREE_TYPE (*parm); @@ -21781,17 +21800,10 @@ maybe_adjust_types_for_deduction (unification_kind_t strict, *arg = TYPE_MAIN_VARIANT (*arg); } - /* [14.8.2.1/3 temp.deduct.call], "A forwarding reference is an rvalue - reference to a cv-unqualified template parameter that does not represent a - template parameter of a class template (during class template argument - deduction (13.3.1.8)). If P is a forwarding reference and the argument is - an lvalue, the type "lvalue reference to A" is used in place of A for type - deduction. */ - if (TYPE_REF_P (*parm) - && TYPE_REF_IS_RVALUE (*parm) - && TREE_CODE (TREE_TYPE (*parm)) == TEMPLATE_TYPE_PARM - && !TEMPLATE_TYPE_PARM_FOR_CLASS (TREE_TYPE (*parm)) - && cp_type_quals (TREE_TYPE (*parm)) == TYPE_UNQUALIFIED + /* [temp.deduct.call], "If P is a forwarding reference and the argument is + an lvalue, the type 'lvalue reference to A' is used in place of A for + type deduction." */ + if (forwarding_reference_p (*parm, TPARMS_PRIMARY_TEMPLATE (tparms)) && (arg_expr ? lvalue_p (arg_expr) /* try_one_overload doesn't provide an arg_expr, but functions are always lvalues. */ @@ -22080,8 +22092,8 @@ unify_one_argument (tree tparms, tree targs, tree parm, tree arg, return unify_invalid (explain_p); } - arg_strict |= - maybe_adjust_types_for_deduction (strict, &parm, &arg, arg_expr); + arg_strict |= maybe_adjust_types_for_deduction (tparms, strict, + &parm, &arg, arg_expr); } else if ((TYPE_P (parm) || TREE_CODE (parm) == TEMPLATE_DECL) @@ -22750,7 +22762,8 @@ try_one_overload (tree tparms, else if (addr_p) arg = build_pointer_type (arg); - sub_strict |= maybe_adjust_types_for_deduction (strict, &parm, &arg, NULL); + sub_strict |= maybe_adjust_types_for_deduction (tparms, strict, + &parm, &arg, NULL_TREE); /* We don't copy orig_targs for this because if we have already deduced some template args from previous args, unify would complain when we @@ -23449,7 +23462,7 @@ unify (tree tparms, tree targs, tree parm, tree arg, int strict, /* It should only be possible to get here for a call. */ gcc_assert (elt_strict & UNIFY_ALLOW_OUTER_LEVEL); elt_strict |= maybe_adjust_types_for_deduction - (DEDUCE_CALL, &elttype, &type, elt); + (tparms, DEDUCE_CALL, &elttype, &type, elt); elt = type; } @@ -28495,9 +28508,6 @@ rewrite_template_parm (tree olddecl, unsigned index, unsigned level, tree oldtype = TREE_TYPE (olddecl); newtype = cxx_make_type (TREE_CODE (oldtype)); TYPE_MAIN_VARIANT (newtype) = newtype; - if (TREE_CODE (oldtype) == TEMPLATE_TYPE_PARM) - TEMPLATE_TYPE_PARM_FOR_CLASS (newtype) - = TEMPLATE_TYPE_PARM_FOR_CLASS (oldtype); } else { diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction96.C b/gcc/testsuite/g++.dg/cpp1z/class-deduction96.C new file mode 100644 index 0000000..7fa8400 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction96.C @@ -0,0 +1,34 @@ +// PR c++/88252 +// { dg-do compile { target c++17 } } + +template +struct A { + A(T&&); + template A(T&&, U&&); + template struct B; +}; + +template +A::A(T&&) { } + +template +template +A::A(T&&, U&&) { } + +template +template +struct A::B { + B(U&&); + template B(U&&, V&&); +}; + +int i; + +int main() { + A{i}; // { dg-error "deduction|no match|rvalue reference" } + A{i, 0}; // { dg-error "deduction|no match|rvalue reference" } + A{0, i}; + A::B{i}; // { dg-error "deduction|no match|rvalue reference" } + A::B{i, 0}; // { dg-error "deduction|no match|rvalue reference" } + A::B{0, i}; +} -- cgit v1.1 From 7d914777fc6c6151f430d798fc97bae927a430f7 Mon Sep 17 00:00:00 2001 From: Peter Bergner Date: Wed, 14 Jul 2021 18:23:31 -0500 Subject: rs6000: Move rs6000_split_multireg_move to later in file An upcoming change to rs6000_split_multireg_move requires it to be moved later in the file to fix a declaration issue. 2021-07-14 Peter Bergner gcc/ * config/rs6000/rs6000.c (rs6000_split_multireg_move): Move to later in the file. --- gcc/config/rs6000/rs6000.c | 1845 ++++++++++++++++++++++---------------------- 1 file changed, 922 insertions(+), 923 deletions(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index de11de5..1d27bb8 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -16690,533 +16690,157 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, emit_move_insn (orig_after, after); } -/* Emit instructions to move SRC to DST. Called by splitters for - multi-register moves. It will emit at most one instruction for - each register that is accessed; that is, it won't emit li/lis pairs - (or equivalent for 64-bit code). One of SRC or DST must be a hard - register. */ +static GTY(()) alias_set_type TOC_alias_set = -1; -void -rs6000_split_multireg_move (rtx dst, rtx src) +alias_set_type +get_TOC_alias_set (void) { - /* The register number of the first register being moved. */ - int reg; - /* The mode that is to be moved. */ - machine_mode mode; - /* The mode that the move is being done in, and its size. */ - machine_mode reg_mode; - int reg_mode_size; - /* The number of registers that will be moved. */ - int nregs; + if (TOC_alias_set == -1) + TOC_alias_set = new_alias_set (); + return TOC_alias_set; +} - reg = REG_P (dst) ? REGNO (dst) : REGNO (src); - mode = GET_MODE (dst); - nregs = hard_regno_nregs (reg, mode); +/* The mode the ABI uses for a word. This is not the same as word_mode + for -m32 -mpowerpc64. This is used to implement various target hooks. */ - /* If we have a vector quad register for MMA, and this is a load or store, - see if we can use vector paired load/stores. */ - if (mode == XOmode && TARGET_MMA - && (MEM_P (dst) || MEM_P (src))) - { - reg_mode = OOmode; - nregs /= 2; - } - /* If we have a vector pair/quad mode, split it into two/four separate - vectors. */ - else if (mode == OOmode || mode == XOmode) - reg_mode = V1TImode; - else if (FP_REGNO_P (reg)) - reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : - (TARGET_HARD_FLOAT ? DFmode : SFmode); - else if (ALTIVEC_REGNO_P (reg)) - reg_mode = V16QImode; +static scalar_int_mode +rs6000_abi_word_mode (void) +{ + return TARGET_32BIT ? SImode : DImode; +} + +/* Implement the TARGET_OFFLOAD_OPTIONS hook. */ +static char * +rs6000_offload_options (void) +{ + if (TARGET_64BIT) + return xstrdup ("-foffload-abi=lp64"); else - reg_mode = word_mode; - reg_mode_size = GET_MODE_SIZE (reg_mode); + return xstrdup ("-foffload-abi=ilp32"); +} - gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode)); + +/* A quick summary of the various types of 'constant-pool tables' + under PowerPC: - /* TDmode residing in FP registers is special, since the ISA requires that - the lower-numbered word of a register pair is always the most significant - word, even in little-endian mode. This does not match the usual subreg - semantics, so we cannnot use simplify_gen_subreg in those cases. Access - the appropriate constituent registers "by hand" in little-endian mode. + Target Flags Name One table per + AIX (none) AIX TOC object file + AIX -mfull-toc AIX TOC object file + AIX -mminimal-toc AIX minimal TOC translation unit + SVR4/EABI (none) SVR4 SDATA object file + SVR4/EABI -fpic SVR4 pic object file + SVR4/EABI -fPIC SVR4 PIC translation unit + SVR4/EABI -mrelocatable EABI TOC function + SVR4/EABI -maix AIX TOC object file + SVR4/EABI -maix -mminimal-toc + AIX minimal TOC translation unit - Note we do not need to check for destructive overlap here since TDmode - can only reside in even/odd register pairs. */ - if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN) - { - rtx p_src, p_dst; - int i; + Name Reg. Set by entries contains: + made by addrs? fp? sum? - for (i = 0; i < nregs; i++) - { - if (REG_P (src) && FP_REGNO_P (REGNO (src))) - p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i); - else - p_src = simplify_gen_subreg (reg_mode, src, mode, - i * reg_mode_size); + AIX TOC 2 crt0 as Y option option + AIX minimal TOC 30 prolog gcc Y Y option + SVR4 SDATA 13 crt0 gcc N Y N + SVR4 pic 30 prolog ld Y not yet N + SVR4 PIC 30 prolog gcc Y option option + EABI TOC 30 prolog gcc Y option option - if (REG_P (dst) && FP_REGNO_P (REGNO (dst))) - p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i); - else - p_dst = simplify_gen_subreg (reg_mode, dst, mode, - i * reg_mode_size); +*/ - emit_insn (gen_rtx_SET (p_dst, p_src)); - } +/* Hash functions for the hash table. */ - return; - } +static unsigned +rs6000_hash_constant (rtx k) +{ + enum rtx_code code = GET_CODE (k); + machine_mode mode = GET_MODE (k); + unsigned result = (code << 3) ^ mode; + const char *format; + int flen, fidx; - /* The __vector_pair and __vector_quad modes are multi-register - modes, so if we have to load or store the registers, we have to be - careful to properly swap them if we're in little endian mode - below. This means the last register gets the first memory - location. We also need to be careful of using the right register - numbers if we are splitting XO to OO. */ - if (mode == OOmode || mode == XOmode) + format = GET_RTX_FORMAT (code); + flen = strlen (format); + fidx = 0; + + switch (code) { - nregs = hard_regno_nregs (reg, mode); - int reg_mode_nregs = hard_regno_nregs (reg, reg_mode); - if (MEM_P (dst)) - { - unsigned offset = 0; - unsigned size = GET_MODE_SIZE (reg_mode); + case LABEL_REF: + return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0)); - /* If we are reading an accumulator register, we have to - deprime it before we can access it. */ - if (TARGET_MMA - && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) - emit_insn (gen_mma_xxmfacc (src, src)); + case CONST_WIDE_INT: + { + int i; + flen = CONST_WIDE_INT_NUNITS (k); + for (i = 0; i < flen; i++) + result = result * 613 + CONST_WIDE_INT_ELT (k, i); + return result; + } - for (int i = 0; i < nregs; i += reg_mode_nregs) - { - unsigned subreg = - (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); - rtx dst2 = adjust_address (dst, reg_mode, offset); - rtx src2 = gen_rtx_REG (reg_mode, reg + subreg); - offset += size; - emit_insn (gen_rtx_SET (dst2, src2)); - } + case CONST_DOUBLE: + return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result; - return; - } + case CODE_LABEL: + fidx = 3; + break; - if (MEM_P (src)) + default: + break; + } + + for (; fidx < flen; fidx++) + switch (format[fidx]) + { + case 's': { - unsigned offset = 0; - unsigned size = GET_MODE_SIZE (reg_mode); + unsigned i, len; + const char *str = XSTR (k, fidx); + len = strlen (str); + result = result * 613 + len; + for (i = 0; i < len; i++) + result = result * 613 + (unsigned) str[i]; + break; + } + case 'u': + case 'e': + result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx)); + break; + case 'i': + case 'n': + result = result * 613 + (unsigned) XINT (k, fidx); + break; + case 'w': + if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT)) + result = result * 613 + (unsigned) XWINT (k, fidx); + else + { + size_t i; + for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++) + result = result * 613 + (unsigned) (XWINT (k, fidx) + >> CHAR_BIT * i); + } + break; + case '0': + break; + default: + gcc_unreachable (); + } - for (int i = 0; i < nregs; i += reg_mode_nregs) - { - unsigned subreg = - (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); - rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg); - rtx src2 = adjust_address (src, reg_mode, offset); - offset += size; - emit_insn (gen_rtx_SET (dst2, src2)); - } + return result; +} - /* If we are writing an accumulator register, we have to - prime it after we've written it. */ - if (TARGET_MMA - && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) - emit_insn (gen_mma_xxmtacc (dst, dst)); +hashval_t +toc_hasher::hash (toc_hash_struct *thc) +{ + return rs6000_hash_constant (thc->key) ^ thc->key_mode; +} - return; - } +/* Compare H1 and H2 for equivalence. */ - if (GET_CODE (src) == UNSPEC) - { - gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE); - gcc_assert (REG_P (dst)); - if (GET_MODE (src) == XOmode) - gcc_assert (FP_REGNO_P (REGNO (dst))); - if (GET_MODE (src) == OOmode) - gcc_assert (VSX_REGNO_P (REGNO (dst))); - - reg_mode = GET_MODE (XVECEXP (src, 0, 0)); - int nvecs = XVECLEN (src, 0); - for (int i = 0; i < nvecs; i++) - { - int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i; - rtx dst_i = gen_rtx_REG (reg_mode, reg + index); - emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i))); - } - - /* We are writing an accumulator register, so we have to - prime it after we've written it. */ - if (GET_MODE (src) == XOmode) - emit_insn (gen_mma_xxmtacc (dst, dst)); - - return; - } - - /* Register -> register moves can use common code. */ - } - - if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst))) - { - /* If we are reading an accumulator register, we have to - deprime it before we can access it. */ - if (TARGET_MMA - && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) - emit_insn (gen_mma_xxmfacc (src, src)); - - /* Move register range backwards, if we might have destructive - overlap. */ - int i; - /* XO/OO are opaque so cannot use subregs. */ - if (mode == OOmode || mode == XOmode ) - { - for (i = nregs - 1; i >= 0; i--) - { - rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i); - rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i); - emit_insn (gen_rtx_SET (dst_i, src_i)); - } - } - else - { - for (i = nregs - 1; i >= 0; i--) - emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, - i * reg_mode_size), - simplify_gen_subreg (reg_mode, src, mode, - i * reg_mode_size))); - } - - /* If we are writing an accumulator register, we have to - prime it after we've written it. */ - if (TARGET_MMA - && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) - emit_insn (gen_mma_xxmtacc (dst, dst)); - } - else - { - int i; - int j = -1; - bool used_update = false; - rtx restore_basereg = NULL_RTX; - - if (MEM_P (src) && INT_REGNO_P (reg)) - { - rtx breg; - - if (GET_CODE (XEXP (src, 0)) == PRE_INC - || GET_CODE (XEXP (src, 0)) == PRE_DEC) - { - rtx delta_rtx; - breg = XEXP (XEXP (src, 0), 0); - delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC - ? GEN_INT (GET_MODE_SIZE (GET_MODE (src))) - : GEN_INT (-GET_MODE_SIZE (GET_MODE (src)))); - emit_insn (gen_add3_insn (breg, breg, delta_rtx)); - src = replace_equiv_address (src, breg); - } - else if (! rs6000_offsettable_memref_p (src, reg_mode, true)) - { - if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY) - { - rtx basereg = XEXP (XEXP (src, 0), 0); - if (TARGET_UPDATE) - { - rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0); - emit_insn (gen_rtx_SET (ndst, - gen_rtx_MEM (reg_mode, - XEXP (src, 0)))); - used_update = true; - } - else - emit_insn (gen_rtx_SET (basereg, - XEXP (XEXP (src, 0), 1))); - src = replace_equiv_address (src, basereg); - } - else - { - rtx basereg = gen_rtx_REG (Pmode, reg); - emit_insn (gen_rtx_SET (basereg, XEXP (src, 0))); - src = replace_equiv_address (src, basereg); - } - } - - breg = XEXP (src, 0); - if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM) - breg = XEXP (breg, 0); - - /* If the base register we are using to address memory is - also a destination reg, then change that register last. */ - if (REG_P (breg) - && REGNO (breg) >= REGNO (dst) - && REGNO (breg) < REGNO (dst) + nregs) - j = REGNO (breg) - REGNO (dst); - } - else if (MEM_P (dst) && INT_REGNO_P (reg)) - { - rtx breg; - - if (GET_CODE (XEXP (dst, 0)) == PRE_INC - || GET_CODE (XEXP (dst, 0)) == PRE_DEC) - { - rtx delta_rtx; - breg = XEXP (XEXP (dst, 0), 0); - delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC - ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst))) - : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst)))); - - /* We have to update the breg before doing the store. - Use store with update, if available. */ - - if (TARGET_UPDATE) - { - rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); - emit_insn (TARGET_32BIT - ? (TARGET_POWERPC64 - ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc) - : gen_movsi_si_update (breg, breg, delta_rtx, nsrc)) - : gen_movdi_di_update (breg, breg, delta_rtx, nsrc)); - used_update = true; - } - else - emit_insn (gen_add3_insn (breg, breg, delta_rtx)); - dst = replace_equiv_address (dst, breg); - } - else if (!rs6000_offsettable_memref_p (dst, reg_mode, true) - && GET_CODE (XEXP (dst, 0)) != LO_SUM) - { - if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY) - { - rtx basereg = XEXP (XEXP (dst, 0), 0); - if (TARGET_UPDATE) - { - rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); - emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode, - XEXP (dst, 0)), - nsrc)); - used_update = true; - } - else - emit_insn (gen_rtx_SET (basereg, - XEXP (XEXP (dst, 0), 1))); - dst = replace_equiv_address (dst, basereg); - } - else - { - rtx basereg = XEXP (XEXP (dst, 0), 0); - rtx offsetreg = XEXP (XEXP (dst, 0), 1); - gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS - && REG_P (basereg) - && REG_P (offsetreg) - && REGNO (basereg) != REGNO (offsetreg)); - if (REGNO (basereg) == 0) - { - rtx tmp = offsetreg; - offsetreg = basereg; - basereg = tmp; - } - emit_insn (gen_add3_insn (basereg, basereg, offsetreg)); - restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg); - dst = replace_equiv_address (dst, basereg); - } - } - else if (GET_CODE (XEXP (dst, 0)) != LO_SUM) - gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true)); - } - - /* If we are reading an accumulator register, we have to - deprime it before we can access it. */ - if (TARGET_MMA && REG_P (src) - && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) - emit_insn (gen_mma_xxmfacc (src, src)); - - for (i = 0; i < nregs; i++) - { - /* Calculate index to next subword. */ - ++j; - if (j == nregs) - j = 0; - - /* If compiler already emitted move of first word by - store with update, no need to do anything. */ - if (j == 0 && used_update) - continue; - - /* XO/OO are opaque so cannot use subregs. */ - if (mode == OOmode || mode == XOmode ) - { - rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j); - rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j); - emit_insn (gen_rtx_SET (dst_i, src_i)); - } - else - emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, - j * reg_mode_size), - simplify_gen_subreg (reg_mode, src, mode, - j * reg_mode_size))); - } - - /* If we are writing an accumulator register, we have to - prime it after we've written it. */ - if (TARGET_MMA && REG_P (dst) - && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) - emit_insn (gen_mma_xxmtacc (dst, dst)); - - if (restore_basereg != NULL_RTX) - emit_insn (restore_basereg); - } -} - -static GTY(()) alias_set_type TOC_alias_set = -1; - -alias_set_type -get_TOC_alias_set (void) -{ - if (TOC_alias_set == -1) - TOC_alias_set = new_alias_set (); - return TOC_alias_set; -} - -/* The mode the ABI uses for a word. This is not the same as word_mode - for -m32 -mpowerpc64. This is used to implement various target hooks. */ - -static scalar_int_mode -rs6000_abi_word_mode (void) -{ - return TARGET_32BIT ? SImode : DImode; -} - -/* Implement the TARGET_OFFLOAD_OPTIONS hook. */ -static char * -rs6000_offload_options (void) -{ - if (TARGET_64BIT) - return xstrdup ("-foffload-abi=lp64"); - else - return xstrdup ("-foffload-abi=ilp32"); -} - - -/* A quick summary of the various types of 'constant-pool tables' - under PowerPC: - - Target Flags Name One table per - AIX (none) AIX TOC object file - AIX -mfull-toc AIX TOC object file - AIX -mminimal-toc AIX minimal TOC translation unit - SVR4/EABI (none) SVR4 SDATA object file - SVR4/EABI -fpic SVR4 pic object file - SVR4/EABI -fPIC SVR4 PIC translation unit - SVR4/EABI -mrelocatable EABI TOC function - SVR4/EABI -maix AIX TOC object file - SVR4/EABI -maix -mminimal-toc - AIX minimal TOC translation unit - - Name Reg. Set by entries contains: - made by addrs? fp? sum? - - AIX TOC 2 crt0 as Y option option - AIX minimal TOC 30 prolog gcc Y Y option - SVR4 SDATA 13 crt0 gcc N Y N - SVR4 pic 30 prolog ld Y not yet N - SVR4 PIC 30 prolog gcc Y option option - EABI TOC 30 prolog gcc Y option option - -*/ - -/* Hash functions for the hash table. */ - -static unsigned -rs6000_hash_constant (rtx k) -{ - enum rtx_code code = GET_CODE (k); - machine_mode mode = GET_MODE (k); - unsigned result = (code << 3) ^ mode; - const char *format; - int flen, fidx; - - format = GET_RTX_FORMAT (code); - flen = strlen (format); - fidx = 0; - - switch (code) - { - case LABEL_REF: - return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0)); - - case CONST_WIDE_INT: - { - int i; - flen = CONST_WIDE_INT_NUNITS (k); - for (i = 0; i < flen; i++) - result = result * 613 + CONST_WIDE_INT_ELT (k, i); - return result; - } - - case CONST_DOUBLE: - return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result; - - case CODE_LABEL: - fidx = 3; - break; - - default: - break; - } - - for (; fidx < flen; fidx++) - switch (format[fidx]) - { - case 's': - { - unsigned i, len; - const char *str = XSTR (k, fidx); - len = strlen (str); - result = result * 613 + len; - for (i = 0; i < len; i++) - result = result * 613 + (unsigned) str[i]; - break; - } - case 'u': - case 'e': - result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx)); - break; - case 'i': - case 'n': - result = result * 613 + (unsigned) XINT (k, fidx); - break; - case 'w': - if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT)) - result = result * 613 + (unsigned) XWINT (k, fidx); - else - { - size_t i; - for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++) - result = result * 613 + (unsigned) (XWINT (k, fidx) - >> CHAR_BIT * i); - } - break; - case '0': - break; - default: - gcc_unreachable (); - } - - return result; -} - -hashval_t -toc_hasher::hash (toc_hash_struct *thc) -{ - return rs6000_hash_constant (thc->key) ^ thc->key_mode; -} - -/* Compare H1 and H2 for equivalence. */ - -bool -toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2) -{ - rtx r1 = h1->key; - rtx r2 = h2->key; +bool +toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2) +{ + rtx r1 = h1->key; + rtx r2 = h2->key; if (h1->key_mode != h2->key_mode) return 0; @@ -26450,538 +26074,913 @@ prefixed_load_p (rtx_insn *insn) if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn)) return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT); else - return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed); + return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed); +} + +/* Whether a store instruction is a prefixed instruction. This is called from + the prefixed attribute processing. */ + +bool +prefixed_store_p (rtx_insn *insn) +{ + /* Validate the insn to make sure it is a normal store insn. */ + extract_insn_cached (insn); + if (recog_data.n_operands < 2) + return false; + + rtx mem = recog_data.operand[0]; + rtx reg = recog_data.operand[1]; + + if (!REG_P (reg) && !SUBREG_P (reg)) + return false; + + if (!MEM_P (mem)) + return false; + + /* Prefixed store instructions do not support update or indexed forms. */ + if (get_attr_indexed (insn) == INDEXED_YES + || get_attr_update (insn) == UPDATE_YES) + return false; + + machine_mode mem_mode = GET_MODE (mem); + rtx addr = XEXP (mem, 0); + enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode); + + /* Need to make sure we aren't looking at a stfs which doesn't look + like the other things reg_to_non_prefixed/address_is_prefixed + looks for. */ + if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn)) + return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT); + else + return address_is_prefixed (addr, mem_mode, non_prefixed); +} + +/* Whether a load immediate or add instruction is a prefixed instruction. This + is called from the prefixed attribute processing. */ + +bool +prefixed_paddi_p (rtx_insn *insn) +{ + rtx set = single_set (insn); + if (!set) + return false; + + rtx dest = SET_DEST (set); + rtx src = SET_SRC (set); + + if (!REG_P (dest) && !SUBREG_P (dest)) + return false; + + /* Is this a load immediate that can't be done with a simple ADDI or + ADDIS? */ + if (CONST_INT_P (src)) + return (satisfies_constraint_eI (src) + && !satisfies_constraint_I (src) + && !satisfies_constraint_L (src)); + + /* Is this a PADDI instruction that can't be done with a simple ADDI or + ADDIS? */ + if (GET_CODE (src) == PLUS) + { + rtx op1 = XEXP (src, 1); + + return (CONST_INT_P (op1) + && satisfies_constraint_eI (op1) + && !satisfies_constraint_I (op1) + && !satisfies_constraint_L (op1)); + } + + /* If not, is it a load of a PC-relative address? */ + if (!TARGET_PCREL || GET_MODE (dest) != Pmode) + return false; + + if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST) + return false; + + enum insn_form iform = address_to_insn_form (src, Pmode, + NON_PREFIXED_DEFAULT); + + return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL); +} + +/* Whether the next instruction needs a 'p' prefix issued before the + instruction is printed out. */ +static bool prepend_p_to_next_insn; + +/* Define FINAL_PRESCAN_INSN if some processing needs to be done before + outputting the assembler code. On the PowerPC, we remember if the current + insn is a prefixed insn where we need to emit a 'p' before the insn. + + In addition, if the insn is part of a PC-relative reference to an external + label optimization, this is recorded also. */ +void +rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int) +{ + prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn) + == MAYBE_PREFIXED_YES + && get_attr_prefixed (insn) == PREFIXED_YES); + return; +} + +/* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode. + We use it to emit a 'p' for prefixed insns that is set in + FINAL_PRESCAN_INSN. */ +void +rs6000_asm_output_opcode (FILE *stream) +{ + if (prepend_p_to_next_insn) + { + fprintf (stream, "p"); + + /* Reset the flag in the case where there are separate insn lines in the + sequence, so the 'p' is only emitted for the first line. This shows up + when we are doing the PCREL_OPT optimization, in that the label created + with %r would have a leading 'p' printed. */ + prepend_p_to_next_insn = false; + } + + return; +} + +/* Emit the relocation to tie the next instruction to a previous instruction + that loads up an external address. This is used to do the PCREL_OPT + optimization. Note, the label is generated after the PLD of the got + pc-relative address to allow for the assembler to insert NOPs before the PLD + instruction. The operand is a constant integer that is the label + number. */ + +void +output_pcrel_opt_reloc (rtx label_num) +{ + rtx operands[1] = { label_num }; + output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)", + operands); +} + +/* Adjust the length of an INSN. LENGTH is the currently-computed length and + should be adjusted to reflect any required changes. This macro is used when + there is some systematic length adjustment required that would be difficult + to express in the length attribute. + + In the PowerPC, we use this to adjust the length of an instruction if one or + more prefixed instructions are generated, using the attribute + num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the + hardware requires that a prefied instruciton does not cross a 64-byte + boundary. This means the compiler has to assume the length of the first + prefixed instruction is 12 bytes instead of 8 bytes. Since the length is + already set for the non-prefixed instruction, we just need to udpate for the + difference. */ + +int +rs6000_adjust_insn_length (rtx_insn *insn, int length) +{ + if (TARGET_PREFIXED && NONJUMP_INSN_P (insn)) + { + rtx pattern = PATTERN (insn); + if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER + && get_attr_prefixed (insn) == PREFIXED_YES) + { + int num_prefixed = get_attr_max_prefixed_insns (insn); + length += 4 * (num_prefixed + 1); + } + } + + return length; +} + + +#ifdef HAVE_GAS_HIDDEN +# define USE_HIDDEN_LINKONCE 1 +#else +# define USE_HIDDEN_LINKONCE 0 +#endif + +/* Fills in the label name that should be used for a 476 link stack thunk. */ + +void +get_ppc476_thunk_name (char name[32]) +{ + gcc_assert (TARGET_LINK_STACK); + + if (USE_HIDDEN_LINKONCE) + sprintf (name, "__ppc476.get_thunk"); + else + ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0); } -/* Whether a store instruction is a prefixed instruction. This is called from - the prefixed attribute processing. */ +/* This function emits the simple thunk routine that is used to preserve + the link stack on the 476 cpu. */ -bool -prefixed_store_p (rtx_insn *insn) +static void rs6000_code_end (void) ATTRIBUTE_UNUSED; +static void +rs6000_code_end (void) { - /* Validate the insn to make sure it is a normal store insn. */ - extract_insn_cached (insn); - if (recog_data.n_operands < 2) - return false; + char name[32]; + tree decl; - rtx mem = recog_data.operand[0]; - rtx reg = recog_data.operand[1]; + if (!TARGET_LINK_STACK) + return; - if (!REG_P (reg) && !SUBREG_P (reg)) - return false; + get_ppc476_thunk_name (name); - if (!MEM_P (mem)) - return false; + decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name), + build_function_type_list (void_type_node, NULL_TREE)); + DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, + NULL_TREE, void_type_node); + TREE_PUBLIC (decl) = 1; + TREE_STATIC (decl) = 1; - /* Prefixed store instructions do not support update or indexed forms. */ - if (get_attr_indexed (insn) == INDEXED_YES - || get_attr_update (insn) == UPDATE_YES) - return false; +#if RS6000_WEAK + if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF) + { + cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); + targetm.asm_out.unique_section (decl, 0); + switch_to_section (get_named_section (decl, NULL, 0)); + DECL_WEAK (decl) = 1; + ASM_WEAKEN_DECL (asm_out_file, decl, name, 0); + targetm.asm_out.globalize_label (asm_out_file, name); + targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN); + ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); + } + else +#endif + { + switch_to_section (text_section); + ASM_OUTPUT_LABEL (asm_out_file, name); + } - machine_mode mem_mode = GET_MODE (mem); - rtx addr = XEXP (mem, 0); - enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode); + DECL_INITIAL (decl) = make_node (BLOCK); + current_function_decl = decl; + allocate_struct_function (decl, false); + init_function_start (decl); + first_function_block_is_cold = false; + /* Make sure unwind info is emitted for the thunk if needed. */ + final_start_function (emit_barrier (), asm_out_file, 1); - /* Need to make sure we aren't looking at a stfs which doesn't look - like the other things reg_to_non_prefixed/address_is_prefixed - looks for. */ - if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn)) - return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT); - else - return address_is_prefixed (addr, mem_mode, non_prefixed); + fputs ("\tblr\n", asm_out_file); + + final_end_function (); + init_insn_lengths (); + free_after_compilation (cfun); + set_cfun (NULL); + current_function_decl = NULL; } -/* Whether a load immediate or add instruction is a prefixed instruction. This - is called from the prefixed attribute processing. */ +/* Add r30 to hard reg set if the prologue sets it up and it is not + pic_offset_table_rtx. */ -bool -prefixed_paddi_p (rtx_insn *insn) +static void +rs6000_set_up_by_prologue (struct hard_reg_set_container *set) { - rtx set = single_set (insn); - if (!set) - return false; + if (!TARGET_SINGLE_PIC_BASE + && TARGET_TOC + && TARGET_MINIMAL_TOC + && !constant_pool_empty_p ()) + add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); + if (cfun->machine->split_stack_argp_used) + add_to_hard_reg_set (&set->set, Pmode, 12); - rtx dest = SET_DEST (set); - rtx src = SET_SRC (set); + /* Make sure the hard reg set doesn't include r2, which was possibly added + via PIC_OFFSET_TABLE_REGNUM. */ + if (TARGET_TOC) + remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM); +} - if (!REG_P (dest) && !SUBREG_P (dest)) - return false; + +/* Helper function for rs6000_split_logical to emit a logical instruction after + spliting the operation to single GPR registers. - /* Is this a load immediate that can't be done with a simple ADDI or - ADDIS? */ - if (CONST_INT_P (src)) - return (satisfies_constraint_eI (src) - && !satisfies_constraint_I (src) - && !satisfies_constraint_L (src)); + DEST is the destination register. + OP1 and OP2 are the input source registers. + CODE is the base operation (AND, IOR, XOR, NOT). + MODE is the machine mode. + If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. + If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. + If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ - /* Is this a PADDI instruction that can't be done with a simple ADDI or - ADDIS? */ - if (GET_CODE (src) == PLUS) +static void +rs6000_split_logical_inner (rtx dest, + rtx op1, + rtx op2, + enum rtx_code code, + machine_mode mode, + bool complement_final_p, + bool complement_op1_p, + bool complement_op2_p) +{ + rtx bool_rtx; + + /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */ + if (op2 && CONST_INT_P (op2) + && (mode == SImode || (mode == DImode && TARGET_POWERPC64)) + && !complement_final_p && !complement_op1_p && !complement_op2_p) { - rtx op1 = XEXP (src, 1); + HOST_WIDE_INT mask = GET_MODE_MASK (mode); + HOST_WIDE_INT value = INTVAL (op2) & mask; - return (CONST_INT_P (op1) - && satisfies_constraint_eI (op1) - && !satisfies_constraint_I (op1) - && !satisfies_constraint_L (op1)); + /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */ + if (code == AND) + { + if (value == 0) + { + emit_insn (gen_rtx_SET (dest, const0_rtx)); + return; + } + + else if (value == mask) + { + if (!rtx_equal_p (dest, op1)) + emit_insn (gen_rtx_SET (dest, op1)); + return; + } + } + + /* Optimize IOR/XOR of 0 to be a simple move. Split large operations + into separate ORI/ORIS or XORI/XORIS instrucitons. */ + else if (code == IOR || code == XOR) + { + if (value == 0) + { + if (!rtx_equal_p (dest, op1)) + emit_insn (gen_rtx_SET (dest, op1)); + return; + } + } } - /* If not, is it a load of a PC-relative address? */ - if (!TARGET_PCREL || GET_MODE (dest) != Pmode) - return false; + if (code == AND && mode == SImode + && !complement_final_p && !complement_op1_p && !complement_op2_p) + { + emit_insn (gen_andsi3 (dest, op1, op2)); + return; + } - if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST) - return false; + if (complement_op1_p) + op1 = gen_rtx_NOT (mode, op1); - enum insn_form iform = address_to_insn_form (src, Pmode, - NON_PREFIXED_DEFAULT); + if (complement_op2_p) + op2 = gen_rtx_NOT (mode, op2); - return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL); -} + /* For canonical RTL, if only one arm is inverted it is the first. */ + if (!complement_op1_p && complement_op2_p) + std::swap (op1, op2); -/* Whether the next instruction needs a 'p' prefix issued before the - instruction is printed out. */ -static bool prepend_p_to_next_insn; + bool_rtx = ((code == NOT) + ? gen_rtx_NOT (mode, op1) + : gen_rtx_fmt_ee (code, mode, op1, op2)); -/* Define FINAL_PRESCAN_INSN if some processing needs to be done before - outputting the assembler code. On the PowerPC, we remember if the current - insn is a prefixed insn where we need to emit a 'p' before the insn. + if (complement_final_p) + bool_rtx = gen_rtx_NOT (mode, bool_rtx); - In addition, if the insn is part of a PC-relative reference to an external - label optimization, this is recorded also. */ -void -rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int) -{ - prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn) - == MAYBE_PREFIXED_YES - && get_attr_prefixed (insn) == PREFIXED_YES); - return; + emit_insn (gen_rtx_SET (dest, bool_rtx)); } -/* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode. - We use it to emit a 'p' for prefixed insns that is set in - FINAL_PRESCAN_INSN. */ -void -rs6000_asm_output_opcode (FILE *stream) +/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These + operations are split immediately during RTL generation to allow for more + optimizations of the AND/IOR/XOR. + + OPERANDS is an array containing the destination and two input operands. + CODE is the base operation (AND, IOR, XOR, NOT). + MODE is the machine mode. + If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. + If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. + If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. + CLOBBER_REG is either NULL or a scratch register of type CC to allow + formation of the AND instructions. */ + +static void +rs6000_split_logical_di (rtx operands[3], + enum rtx_code code, + bool complement_final_p, + bool complement_op1_p, + bool complement_op2_p) { - if (prepend_p_to_next_insn) + const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff); + const HOST_WIDE_INT upper_32bits = ~ lower_32bits; + const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000); + enum hi_lo { hi = 0, lo = 1 }; + rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2]; + size_t i; + + op0_hi_lo[hi] = gen_highpart (SImode, operands[0]); + op1_hi_lo[hi] = gen_highpart (SImode, operands[1]); + op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]); + op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]); + + if (code == NOT) + op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX; + else { - fprintf (stream, "p"); + if (!CONST_INT_P (operands[2])) + { + op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]); + op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]); + } + else + { + HOST_WIDE_INT value = INTVAL (operands[2]); + HOST_WIDE_INT value_hi_lo[2]; - /* Reset the flag in the case where there are separate insn lines in the - sequence, so the 'p' is only emitted for the first line. This shows up - when we are doing the PCREL_OPT optimization, in that the label created - with %r would have a leading 'p' printed. */ - prepend_p_to_next_insn = false; - } + gcc_assert (!complement_final_p); + gcc_assert (!complement_op1_p); + gcc_assert (!complement_op2_p); - return; -} + value_hi_lo[hi] = value >> 32; + value_hi_lo[lo] = value & lower_32bits; -/* Emit the relocation to tie the next instruction to a previous instruction - that loads up an external address. This is used to do the PCREL_OPT - optimization. Note, the label is generated after the PLD of the got - pc-relative address to allow for the assembler to insert NOPs before the PLD - instruction. The operand is a constant integer that is the label - number. */ + for (i = 0; i < 2; i++) + { + HOST_WIDE_INT sub_value = value_hi_lo[i]; -void -output_pcrel_opt_reloc (rtx label_num) -{ - rtx operands[1] = { label_num }; - output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)", - operands); -} + if (sub_value & sign_bit) + sub_value |= upper_32bits; -/* Adjust the length of an INSN. LENGTH is the currently-computed length and - should be adjusted to reflect any required changes. This macro is used when - there is some systematic length adjustment required that would be difficult - to express in the length attribute. + op2_hi_lo[i] = GEN_INT (sub_value); - In the PowerPC, we use this to adjust the length of an instruction if one or - more prefixed instructions are generated, using the attribute - num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the - hardware requires that a prefied instruciton does not cross a 64-byte - boundary. This means the compiler has to assume the length of the first - prefixed instruction is 12 bytes instead of 8 bytes. Since the length is - already set for the non-prefixed instruction, we just need to udpate for the - difference. */ + /* If this is an AND instruction, check to see if we need to load + the value in a register. */ + if (code == AND && sub_value != -1 && sub_value != 0 + && !and_operand (op2_hi_lo[i], SImode)) + op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]); + } + } + } -int -rs6000_adjust_insn_length (rtx_insn *insn, int length) -{ - if (TARGET_PREFIXED && NONJUMP_INSN_P (insn)) + for (i = 0; i < 2; i++) { - rtx pattern = PATTERN (insn); - if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER - && get_attr_prefixed (insn) == PREFIXED_YES) + /* Split large IOR/XOR operations. */ + if ((code == IOR || code == XOR) + && CONST_INT_P (op2_hi_lo[i]) + && !complement_final_p + && !complement_op1_p + && !complement_op2_p + && !logical_const_operand (op2_hi_lo[i], SImode)) { - int num_prefixed = get_attr_max_prefixed_insns (insn); - length += 4 * (num_prefixed + 1); + HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]); + HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000); + HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff); + rtx tmp = gen_reg_rtx (SImode); + + /* Make sure the constant is sign extended. */ + if ((hi_16bits & sign_bit) != 0) + hi_16bits |= upper_32bits; + + rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits), + code, SImode, false, false, false); + + rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits), + code, SImode, false, false, false); } + else + rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i], + code, SImode, complement_final_p, + complement_op1_p, complement_op2_p); } - return length; + return; } - -#ifdef HAVE_GAS_HIDDEN -# define USE_HIDDEN_LINKONCE 1 -#else -# define USE_HIDDEN_LINKONCE 0 -#endif +/* Split the insns that make up boolean operations operating on multiple GPR + registers. The boolean MD patterns ensure that the inputs either are + exactly the same as the output registers, or there is no overlap. -/* Fills in the label name that should be used for a 476 link stack thunk. */ + OPERANDS is an array containing the destination and two input operands. + CODE is the base operation (AND, IOR, XOR, NOT). + If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. + If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. + If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ void -get_ppc476_thunk_name (char name[32]) +rs6000_split_logical (rtx operands[3], + enum rtx_code code, + bool complement_final_p, + bool complement_op1_p, + bool complement_op2_p) { - gcc_assert (TARGET_LINK_STACK); - - if (USE_HIDDEN_LINKONCE) - sprintf (name, "__ppc476.get_thunk"); - else - ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0); -} + machine_mode mode = GET_MODE (operands[0]); + machine_mode sub_mode; + rtx op0, op1, op2; + int sub_size, regno0, regno1, nregs, i; -/* This function emits the simple thunk routine that is used to preserve - the link stack on the 476 cpu. */ + /* If this is DImode, use the specialized version that can run before + register allocation. */ + if (mode == DImode && !TARGET_POWERPC64) + { + rs6000_split_logical_di (operands, code, complement_final_p, + complement_op1_p, complement_op2_p); + return; + } -static void rs6000_code_end (void) ATTRIBUTE_UNUSED; -static void -rs6000_code_end (void) -{ - char name[32]; - tree decl; + op0 = operands[0]; + op1 = operands[1]; + op2 = (code == NOT) ? NULL_RTX : operands[2]; + sub_mode = (TARGET_POWERPC64) ? DImode : SImode; + sub_size = GET_MODE_SIZE (sub_mode); + regno0 = REGNO (op0); + regno1 = REGNO (op1); - if (!TARGET_LINK_STACK) - return; + gcc_assert (reload_completed); + gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO)); + gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO)); - get_ppc476_thunk_name (name); + nregs = rs6000_hard_regno_nregs[(int)mode][regno0]; + gcc_assert (nregs > 1); - decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name), - build_function_type_list (void_type_node, NULL_TREE)); - DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, - NULL_TREE, void_type_node); - TREE_PUBLIC (decl) = 1; - TREE_STATIC (decl) = 1; + if (op2 && REG_P (op2)) + gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO)); -#if RS6000_WEAK - if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF) - { - cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); - targetm.asm_out.unique_section (decl, 0); - switch_to_section (get_named_section (decl, NULL, 0)); - DECL_WEAK (decl) = 1; - ASM_WEAKEN_DECL (asm_out_file, decl, name, 0); - targetm.asm_out.globalize_label (asm_out_file, name); - targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN); - ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); - } - else -#endif + for (i = 0; i < nregs; i++) { - switch_to_section (text_section); - ASM_OUTPUT_LABEL (asm_out_file, name); - } - - DECL_INITIAL (decl) = make_node (BLOCK); - current_function_decl = decl; - allocate_struct_function (decl, false); - init_function_start (decl); - first_function_block_is_cold = false; - /* Make sure unwind info is emitted for the thunk if needed. */ - final_start_function (emit_barrier (), asm_out_file, 1); + int offset = i * sub_size; + rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset); + rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset); + rtx sub_op2 = ((code == NOT) + ? NULL_RTX + : simplify_subreg (sub_mode, op2, mode, offset)); - fputs ("\tblr\n", asm_out_file); + rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode, + complement_final_p, complement_op1_p, + complement_op2_p); + } - final_end_function (); - init_insn_lengths (); - free_after_compilation (cfun); - set_cfun (NULL); - current_function_decl = NULL; + return; } -/* Add r30 to hard reg set if the prologue sets it up and it is not - pic_offset_table_rtx. */ +/* Emit instructions to move SRC to DST. Called by splitters for + multi-register moves. It will emit at most one instruction for + each register that is accessed; that is, it won't emit li/lis pairs + (or equivalent for 64-bit code). One of SRC or DST must be a hard + register. */ -static void -rs6000_set_up_by_prologue (struct hard_reg_set_container *set) +void +rs6000_split_multireg_move (rtx dst, rtx src) { - if (!TARGET_SINGLE_PIC_BASE - && TARGET_TOC - && TARGET_MINIMAL_TOC - && !constant_pool_empty_p ()) - add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); - if (cfun->machine->split_stack_argp_used) - add_to_hard_reg_set (&set->set, Pmode, 12); + /* The register number of the first register being moved. */ + int reg; + /* The mode that is to be moved. */ + machine_mode mode; + /* The mode that the move is being done in, and its size. */ + machine_mode reg_mode; + int reg_mode_size; + /* The number of registers that will be moved. */ + int nregs; - /* Make sure the hard reg set doesn't include r2, which was possibly added - via PIC_OFFSET_TABLE_REGNUM. */ - if (TARGET_TOC) - remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM); -} + reg = REG_P (dst) ? REGNO (dst) : REGNO (src); + mode = GET_MODE (dst); + nregs = hard_regno_nregs (reg, mode); - -/* Helper function for rs6000_split_logical to emit a logical instruction after - spliting the operation to single GPR registers. + /* If we have a vector quad register for MMA, and this is a load or store, + see if we can use vector paired load/stores. */ + if (mode == XOmode && TARGET_MMA + && (MEM_P (dst) || MEM_P (src))) + { + reg_mode = OOmode; + nregs /= 2; + } + /* If we have a vector pair/quad mode, split it into two/four separate + vectors. */ + else if (mode == OOmode || mode == XOmode) + reg_mode = V1TImode; + else if (FP_REGNO_P (reg)) + reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : + (TARGET_HARD_FLOAT ? DFmode : SFmode); + else if (ALTIVEC_REGNO_P (reg)) + reg_mode = V16QImode; + else + reg_mode = word_mode; + reg_mode_size = GET_MODE_SIZE (reg_mode); - DEST is the destination register. - OP1 and OP2 are the input source registers. - CODE is the base operation (AND, IOR, XOR, NOT). - MODE is the machine mode. - If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. - If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. - If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ + gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode)); -static void -rs6000_split_logical_inner (rtx dest, - rtx op1, - rtx op2, - enum rtx_code code, - machine_mode mode, - bool complement_final_p, - bool complement_op1_p, - bool complement_op2_p) -{ - rtx bool_rtx; + /* TDmode residing in FP registers is special, since the ISA requires that + the lower-numbered word of a register pair is always the most significant + word, even in little-endian mode. This does not match the usual subreg + semantics, so we cannnot use simplify_gen_subreg in those cases. Access + the appropriate constituent registers "by hand" in little-endian mode. - /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */ - if (op2 && CONST_INT_P (op2) - && (mode == SImode || (mode == DImode && TARGET_POWERPC64)) - && !complement_final_p && !complement_op1_p && !complement_op2_p) + Note we do not need to check for destructive overlap here since TDmode + can only reside in even/odd register pairs. */ + if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN) { - HOST_WIDE_INT mask = GET_MODE_MASK (mode); - HOST_WIDE_INT value = INTVAL (op2) & mask; + rtx p_src, p_dst; + int i; - /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */ - if (code == AND) + for (i = 0; i < nregs; i++) { - if (value == 0) - { - emit_insn (gen_rtx_SET (dest, const0_rtx)); - return; - } + if (REG_P (src) && FP_REGNO_P (REGNO (src))) + p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i); + else + p_src = simplify_gen_subreg (reg_mode, src, mode, + i * reg_mode_size); - else if (value == mask) - { - if (!rtx_equal_p (dest, op1)) - emit_insn (gen_rtx_SET (dest, op1)); - return; - } - } + if (REG_P (dst) && FP_REGNO_P (REGNO (dst))) + p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i); + else + p_dst = simplify_gen_subreg (reg_mode, dst, mode, + i * reg_mode_size); - /* Optimize IOR/XOR of 0 to be a simple move. Split large operations - into separate ORI/ORIS or XORI/XORIS instrucitons. */ - else if (code == IOR || code == XOR) - { - if (value == 0) - { - if (!rtx_equal_p (dest, op1)) - emit_insn (gen_rtx_SET (dest, op1)); - return; - } + emit_insn (gen_rtx_SET (p_dst, p_src)); } - } - if (code == AND && mode == SImode - && !complement_final_p && !complement_op1_p && !complement_op2_p) - { - emit_insn (gen_andsi3 (dest, op1, op2)); return; } - if (complement_op1_p) - op1 = gen_rtx_NOT (mode, op1); - - if (complement_op2_p) - op2 = gen_rtx_NOT (mode, op2); - - /* For canonical RTL, if only one arm is inverted it is the first. */ - if (!complement_op1_p && complement_op2_p) - std::swap (op1, op2); - - bool_rtx = ((code == NOT) - ? gen_rtx_NOT (mode, op1) - : gen_rtx_fmt_ee (code, mode, op1, op2)); + /* The __vector_pair and __vector_quad modes are multi-register + modes, so if we have to load or store the registers, we have to be + careful to properly swap them if we're in little endian mode + below. This means the last register gets the first memory + location. We also need to be careful of using the right register + numbers if we are splitting XO to OO. */ + if (mode == OOmode || mode == XOmode) + { + nregs = hard_regno_nregs (reg, mode); + int reg_mode_nregs = hard_regno_nregs (reg, reg_mode); + if (MEM_P (dst)) + { + unsigned offset = 0; + unsigned size = GET_MODE_SIZE (reg_mode); - if (complement_final_p) - bool_rtx = gen_rtx_NOT (mode, bool_rtx); + /* If we are reading an accumulator register, we have to + deprime it before we can access it. */ + if (TARGET_MMA + && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) + emit_insn (gen_mma_xxmfacc (src, src)); - emit_insn (gen_rtx_SET (dest, bool_rtx)); -} + for (int i = 0; i < nregs; i += reg_mode_nregs) + { + unsigned subreg = + (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); + rtx dst2 = adjust_address (dst, reg_mode, offset); + rtx src2 = gen_rtx_REG (reg_mode, reg + subreg); + offset += size; + emit_insn (gen_rtx_SET (dst2, src2)); + } -/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These - operations are split immediately during RTL generation to allow for more - optimizations of the AND/IOR/XOR. + return; + } - OPERANDS is an array containing the destination and two input operands. - CODE is the base operation (AND, IOR, XOR, NOT). - MODE is the machine mode. - If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. - If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. - If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. - CLOBBER_REG is either NULL or a scratch register of type CC to allow - formation of the AND instructions. */ + if (MEM_P (src)) + { + unsigned offset = 0; + unsigned size = GET_MODE_SIZE (reg_mode); -static void -rs6000_split_logical_di (rtx operands[3], - enum rtx_code code, - bool complement_final_p, - bool complement_op1_p, - bool complement_op2_p) -{ - const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff); - const HOST_WIDE_INT upper_32bits = ~ lower_32bits; - const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000); - enum hi_lo { hi = 0, lo = 1 }; - rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2]; - size_t i; + for (int i = 0; i < nregs; i += reg_mode_nregs) + { + unsigned subreg = + (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); + rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg); + rtx src2 = adjust_address (src, reg_mode, offset); + offset += size; + emit_insn (gen_rtx_SET (dst2, src2)); + } - op0_hi_lo[hi] = gen_highpart (SImode, operands[0]); - op1_hi_lo[hi] = gen_highpart (SImode, operands[1]); - op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]); - op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]); + /* If we are writing an accumulator register, we have to + prime it after we've written it. */ + if (TARGET_MMA + && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) + emit_insn (gen_mma_xxmtacc (dst, dst)); - if (code == NOT) - op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX; - else - { - if (!CONST_INT_P (operands[2])) - { - op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]); - op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]); + return; } - else + + if (GET_CODE (src) == UNSPEC) { - HOST_WIDE_INT value = INTVAL (operands[2]); - HOST_WIDE_INT value_hi_lo[2]; + gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE); + gcc_assert (REG_P (dst)); + if (GET_MODE (src) == XOmode) + gcc_assert (FP_REGNO_P (REGNO (dst))); + if (GET_MODE (src) == OOmode) + gcc_assert (VSX_REGNO_P (REGNO (dst))); - gcc_assert (!complement_final_p); - gcc_assert (!complement_op1_p); - gcc_assert (!complement_op2_p); + reg_mode = GET_MODE (XVECEXP (src, 0, 0)); + int nvecs = XVECLEN (src, 0); + for (int i = 0; i < nvecs; i++) + { + int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i; + rtx dst_i = gen_rtx_REG (reg_mode, reg + index); + emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i))); + } - value_hi_lo[hi] = value >> 32; - value_hi_lo[lo] = value & lower_32bits; + /* We are writing an accumulator register, so we have to + prime it after we've written it. */ + if (GET_MODE (src) == XOmode) + emit_insn (gen_mma_xxmtacc (dst, dst)); - for (i = 0; i < 2; i++) - { - HOST_WIDE_INT sub_value = value_hi_lo[i]; + return; + } - if (sub_value & sign_bit) - sub_value |= upper_32bits; + /* Register -> register moves can use common code. */ + } - op2_hi_lo[i] = GEN_INT (sub_value); + if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst))) + { + /* If we are reading an accumulator register, we have to + deprime it before we can access it. */ + if (TARGET_MMA + && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) + emit_insn (gen_mma_xxmfacc (src, src)); - /* If this is an AND instruction, check to see if we need to load - the value in a register. */ - if (code == AND && sub_value != -1 && sub_value != 0 - && !and_operand (op2_hi_lo[i], SImode)) - op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]); + /* Move register range backwards, if we might have destructive + overlap. */ + int i; + /* XO/OO are opaque so cannot use subregs. */ + if (mode == OOmode || mode == XOmode ) + { + for (i = nregs - 1; i >= 0; i--) + { + rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i); + rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i); + emit_insn (gen_rtx_SET (dst_i, src_i)); } } - } + else + { + for (i = nregs - 1; i >= 0; i--) + emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, + i * reg_mode_size), + simplify_gen_subreg (reg_mode, src, mode, + i * reg_mode_size))); + } - for (i = 0; i < 2; i++) + /* If we are writing an accumulator register, we have to + prime it after we've written it. */ + if (TARGET_MMA + && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) + emit_insn (gen_mma_xxmtacc (dst, dst)); + } + else { - /* Split large IOR/XOR operations. */ - if ((code == IOR || code == XOR) - && CONST_INT_P (op2_hi_lo[i]) - && !complement_final_p - && !complement_op1_p - && !complement_op2_p - && !logical_const_operand (op2_hi_lo[i], SImode)) + int i; + int j = -1; + bool used_update = false; + rtx restore_basereg = NULL_RTX; + + if (MEM_P (src) && INT_REGNO_P (reg)) { - HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]); - HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000); - HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff); - rtx tmp = gen_reg_rtx (SImode); + rtx breg; - /* Make sure the constant is sign extended. */ - if ((hi_16bits & sign_bit) != 0) - hi_16bits |= upper_32bits; + if (GET_CODE (XEXP (src, 0)) == PRE_INC + || GET_CODE (XEXP (src, 0)) == PRE_DEC) + { + rtx delta_rtx; + breg = XEXP (XEXP (src, 0), 0); + delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC + ? GEN_INT (GET_MODE_SIZE (GET_MODE (src))) + : GEN_INT (-GET_MODE_SIZE (GET_MODE (src)))); + emit_insn (gen_add3_insn (breg, breg, delta_rtx)); + src = replace_equiv_address (src, breg); + } + else if (! rs6000_offsettable_memref_p (src, reg_mode, true)) + { + if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY) + { + rtx basereg = XEXP (XEXP (src, 0), 0); + if (TARGET_UPDATE) + { + rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0); + emit_insn (gen_rtx_SET (ndst, + gen_rtx_MEM (reg_mode, + XEXP (src, 0)))); + used_update = true; + } + else + emit_insn (gen_rtx_SET (basereg, + XEXP (XEXP (src, 0), 1))); + src = replace_equiv_address (src, basereg); + } + else + { + rtx basereg = gen_rtx_REG (Pmode, reg); + emit_insn (gen_rtx_SET (basereg, XEXP (src, 0))); + src = replace_equiv_address (src, basereg); + } + } - rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits), - code, SImode, false, false, false); + breg = XEXP (src, 0); + if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM) + breg = XEXP (breg, 0); - rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits), - code, SImode, false, false, false); + /* If the base register we are using to address memory is + also a destination reg, then change that register last. */ + if (REG_P (breg) + && REGNO (breg) >= REGNO (dst) + && REGNO (breg) < REGNO (dst) + nregs) + j = REGNO (breg) - REGNO (dst); } - else - rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i], - code, SImode, complement_final_p, - complement_op1_p, complement_op2_p); - } - - return; -} - -/* Split the insns that make up boolean operations operating on multiple GPR - registers. The boolean MD patterns ensure that the inputs either are - exactly the same as the output registers, or there is no overlap. + else if (MEM_P (dst) && INT_REGNO_P (reg)) + { + rtx breg; - OPERANDS is an array containing the destination and two input operands. - CODE is the base operation (AND, IOR, XOR, NOT). - If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. - If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. - If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ + if (GET_CODE (XEXP (dst, 0)) == PRE_INC + || GET_CODE (XEXP (dst, 0)) == PRE_DEC) + { + rtx delta_rtx; + breg = XEXP (XEXP (dst, 0), 0); + delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC + ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst))) + : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst)))); -void -rs6000_split_logical (rtx operands[3], - enum rtx_code code, - bool complement_final_p, - bool complement_op1_p, - bool complement_op2_p) -{ - machine_mode mode = GET_MODE (operands[0]); - machine_mode sub_mode; - rtx op0, op1, op2; - int sub_size, regno0, regno1, nregs, i; + /* We have to update the breg before doing the store. + Use store with update, if available. */ - /* If this is DImode, use the specialized version that can run before - register allocation. */ - if (mode == DImode && !TARGET_POWERPC64) - { - rs6000_split_logical_di (operands, code, complement_final_p, - complement_op1_p, complement_op2_p); - return; - } + if (TARGET_UPDATE) + { + rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); + emit_insn (TARGET_32BIT + ? (TARGET_POWERPC64 + ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc) + : gen_movsi_si_update (breg, breg, delta_rtx, nsrc)) + : gen_movdi_di_update (breg, breg, delta_rtx, nsrc)); + used_update = true; + } + else + emit_insn (gen_add3_insn (breg, breg, delta_rtx)); + dst = replace_equiv_address (dst, breg); + } + else if (!rs6000_offsettable_memref_p (dst, reg_mode, true) + && GET_CODE (XEXP (dst, 0)) != LO_SUM) + { + if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY) + { + rtx basereg = XEXP (XEXP (dst, 0), 0); + if (TARGET_UPDATE) + { + rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); + emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode, + XEXP (dst, 0)), + nsrc)); + used_update = true; + } + else + emit_insn (gen_rtx_SET (basereg, + XEXP (XEXP (dst, 0), 1))); + dst = replace_equiv_address (dst, basereg); + } + else + { + rtx basereg = XEXP (XEXP (dst, 0), 0); + rtx offsetreg = XEXP (XEXP (dst, 0), 1); + gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS + && REG_P (basereg) + && REG_P (offsetreg) + && REGNO (basereg) != REGNO (offsetreg)); + if (REGNO (basereg) == 0) + { + rtx tmp = offsetreg; + offsetreg = basereg; + basereg = tmp; + } + emit_insn (gen_add3_insn (basereg, basereg, offsetreg)); + restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg); + dst = replace_equiv_address (dst, basereg); + } + } + else if (GET_CODE (XEXP (dst, 0)) != LO_SUM) + gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true)); + } - op0 = operands[0]; - op1 = operands[1]; - op2 = (code == NOT) ? NULL_RTX : operands[2]; - sub_mode = (TARGET_POWERPC64) ? DImode : SImode; - sub_size = GET_MODE_SIZE (sub_mode); - regno0 = REGNO (op0); - regno1 = REGNO (op1); + /* If we are reading an accumulator register, we have to + deprime it before we can access it. */ + if (TARGET_MMA && REG_P (src) + && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) + emit_insn (gen_mma_xxmfacc (src, src)); - gcc_assert (reload_completed); - gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO)); - gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO)); + for (i = 0; i < nregs; i++) + { + /* Calculate index to next subword. */ + ++j; + if (j == nregs) + j = 0; - nregs = rs6000_hard_regno_nregs[(int)mode][regno0]; - gcc_assert (nregs > 1); + /* If compiler already emitted move of first word by + store with update, no need to do anything. */ + if (j == 0 && used_update) + continue; - if (op2 && REG_P (op2)) - gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO)); + /* XO/OO are opaque so cannot use subregs. */ + if (mode == OOmode || mode == XOmode ) + { + rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j); + rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j); + emit_insn (gen_rtx_SET (dst_i, src_i)); + } + else + emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, + j * reg_mode_size), + simplify_gen_subreg (reg_mode, src, mode, + j * reg_mode_size))); + } - for (i = 0; i < nregs; i++) - { - int offset = i * sub_size; - rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset); - rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset); - rtx sub_op2 = ((code == NOT) - ? NULL_RTX - : simplify_subreg (sub_mode, op2, mode, offset)); + /* If we are writing an accumulator register, we have to + prime it after we've written it. */ + if (TARGET_MMA && REG_P (dst) + && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) + emit_insn (gen_mma_xxmtacc (dst, dst)); - rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode, - complement_final_p, complement_op1_p, - complement_op2_p); + if (restore_basereg != NULL_RTX) + emit_insn (restore_basereg); } - - return; } - /* Return true if the peephole2 can combine a load involving a combination of an addis instruction and a load with an offset that can be fused together on -- cgit v1.1 From 69feb7601e86274fa9abbfb420b00c8adf947e7b Mon Sep 17 00:00:00 2001 From: Peter Bergner Date: Wed, 14 Jul 2021 18:27:02 -0500 Subject: rs6000: Generate an lxvp instead of two adjacent lxv instructions The MMA build built-ins currently use individual lxv instructions to load up the registers of a __vector_pair or __vector_quad. If the memory addresses of the built-in operands are to adjacent locations, then we can use an lxvp in some cases to load up two registers at once. The patch below adds support for checking whether memory addresses are adjacent and emitting an lxvp instead of two lxv instructions. 2021-07-14 Peter Bergner gcc/ * config/rs6000/rs6000.c (adjacent_mem_locations): Return the lower addressed memory rtx, if any. (rs6000_split_multireg_move): Fix code formatting. Handle MMA build built-ins with operands in adjacent memory locations. gcc/testsuite/ * gcc.target/powerpc/mma-builtin-9.c: New test. --- gcc/config/rs6000/rs6000.c | 82 ++++++++++++++++++------ gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c | 28 ++++++++ 2 files changed, 92 insertions(+), 18 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 1d27bb8..ce29b37 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -18051,23 +18051,29 @@ get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset, return true; } -/* The function returns true if the target storage location of - mem1 is adjacent to the target storage location of mem2 */ -/* Return 1 if memory locations are adjacent. */ +/* If the target storage locations of arguments MEM1 and MEM2 are + adjacent, then return the argument that has the lower address. + Otherwise, return NULL_RTX. */ -static bool +static rtx adjacent_mem_locations (rtx mem1, rtx mem2) { rtx reg1, reg2; HOST_WIDE_INT off1, size1, off2, size2; - if (get_memref_parts (mem1, ®1, &off1, &size1) - && get_memref_parts (mem2, ®2, &off2, &size2)) - return ((REGNO (reg1) == REGNO (reg2)) - && ((off1 + size1 == off2) - || (off2 + size2 == off1))); + if (MEM_P (mem1) + && MEM_P (mem2) + && get_memref_parts (mem1, ®1, &off1, &size1) + && get_memref_parts (mem2, ®2, &off2, &size2) + && REGNO (reg1) == REGNO (reg2)) + { + if (off1 + size1 == off2) + return mem1; + else if (off2 + size2 == off1) + return mem2; + } - return false; + return NULL_RTX; } /* This function returns true if it can be determined that the two MEM @@ -26708,8 +26714,8 @@ rs6000_split_multireg_move (rtx dst, rtx src) for (int i = 0; i < nregs; i += reg_mode_nregs) { - unsigned subreg = - (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); + unsigned subreg + = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i); rtx dst2 = adjust_address (dst, reg_mode, offset); rtx src2 = gen_rtx_REG (reg_mode, reg + subreg); offset += size; @@ -26726,8 +26732,8 @@ rs6000_split_multireg_move (rtx dst, rtx src) for (int i = 0; i < nregs; i += reg_mode_nregs) { - unsigned subreg = - (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); + unsigned subreg + = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i); rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg); rtx src2 = adjust_address (src, reg_mode, offset); offset += size; @@ -26752,13 +26758,53 @@ rs6000_split_multireg_move (rtx dst, rtx src) if (GET_MODE (src) == OOmode) gcc_assert (VSX_REGNO_P (REGNO (dst))); - reg_mode = GET_MODE (XVECEXP (src, 0, 0)); int nvecs = XVECLEN (src, 0); for (int i = 0; i < nvecs; i++) { - int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i; - rtx dst_i = gen_rtx_REG (reg_mode, reg + index); - emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i))); + rtx op; + int regno = reg + i; + + if (WORDS_BIG_ENDIAN) + { + op = XVECEXP (src, 0, i); + + /* If we are loading an even VSX register and the memory location + is adjacent to the next register's memory location (if any), + then we can load them both with one LXVP instruction. */ + if ((regno & 1) == 0) + { + rtx op2 = XVECEXP (src, 0, i + 1); + if (adjacent_mem_locations (op, op2) == op) + { + op = adjust_address (op, OOmode, 0); + /* Skip the next register, since we're going to + load it together with this register. */ + i++; + } + } + } + else + { + op = XVECEXP (src, 0, nvecs - i - 1); + + /* If we are loading an even VSX register and the memory location + is adjacent to the next register's memory location (if any), + then we can load them both with one LXVP instruction. */ + if ((regno & 1) == 0) + { + rtx op2 = XVECEXP (src, 0, nvecs - i - 2); + if (adjacent_mem_locations (op2, op) == op2) + { + op = adjust_address (op2, OOmode, 0); + /* Skip the next register, since we're going to + load it together with this register. */ + i++; + } + } + } + + rtx dst_i = gen_rtx_REG (GET_MODE (op), regno); + emit_insn (gen_rtx_SET (dst_i, op)); } /* We are writing an accumulator register, so we have to diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c new file mode 100644 index 0000000..397d0f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +typedef unsigned char vec_t __attribute__((vector_size(16))); + +void +foo (__vector_pair *dst, vec_t *src) +{ + __vector_pair pair; + /* Adjacent loads should be combined into one lxvp instruction. */ + __builtin_vsx_build_pair (&pair, src[0], src[1]); + *dst = pair; +} + +void +bar (__vector_quad *dst, vec_t *src) +{ + __vector_quad quad; + /* Adjacent loads should be combined into two lxvp instructions. */ + __builtin_mma_build_acc (&quad, src[0], src[1], src[2], src[3]); + *dst = quad; +} + +/* { dg-final { scan-assembler-not {\mlxv\M} } } */ +/* { dg-final { scan-assembler-not {\mstxv\M} } } */ +/* { dg-final { scan-assembler-times {\mlxvp\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 3 } } */ -- cgit v1.1 From c4fee1c646d52a9001a53fa0d4072db86b9be791 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 15 Jul 2021 00:16:54 +0000 Subject: Daily bump. --- gcc/ChangeLog | 130 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/c-family/ChangeLog | 4 ++ gcc/cp/ChangeLog | 35 +++++++++++++ gcc/fortran/ChangeLog | 6 +++ gcc/testsuite/ChangeLog | 81 ++++++++++++++++++++++++++++++ 6 files changed, 257 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ebeb274..e6c88f2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,133 @@ +2021-07-14 Peter Bergner + + * config/rs6000/rs6000.c (adjacent_mem_locations): Return the lower + addressed memory rtx, if any. + (rs6000_split_multireg_move): Fix code formatting. + Handle MMA build built-ins with operands in adjacent memory locations. + +2021-07-14 Peter Bergner + + * config/rs6000/rs6000.c (rs6000_split_multireg_move): Move to later + in the file. + +2021-07-14 Jason Merrill + + * sel-sched-ir.h (get_all_loop_exits): Use auto_vec. + +2021-07-14 Jason Merrill + + * doc/invoke.texi: -fdelete-dead-exceptions is on by default for + C++. + +2021-07-14 Tamar Christina + + * tree-vect-patterns.c (vect_recog_dot_prod_pattern): + Remove erroneous line. + +2021-07-14 Andrew MacLeod + + * params.opt (param_evrp_mode): Change default. + +2021-07-14 Tamar Christina + + * config/aarch64/aarch64-simd-builtins.def (udot, sdot): Rename to... + (sdot_prod, udot_prod): ...These. + * config/aarch64/aarch64-simd.md (dot_prod): Remove. + (aarch64_dot): Rename to... + (dot_prod): ...This. + * config/aarch64/arm_neon.h (vdot_u32, vdotq_u32, vdot_s32, vdotq_s32): + Update builtins. + +2021-07-14 Tamar Christina + + * config/arm/neon.md (dot_prod): Drop statements. + +2021-07-14 Tamar Christina + + * doc/sourcebuild.texi (arm_v8_2a_i8mm_neon_hw): Document. + +2021-07-14 Tamar Christina + + * config/arm/neon.md (usdot_prod): New. + +2021-07-14 Tamar Christina + + * config/aarch64/aarch64-simd.md (aarch64_usdot): Rename to... + (usdot_prod): ... This. + * config/aarch64/aarch64-simd-builtins.def (usdot): Rename to... + (usdot_prod): ...This. + * config/aarch64/arm_neon.h (vusdot_s32, vusdotq_s32): Likewise. + * config/aarch64/aarch64-sve.md (@aarch64_dot_prod): + Rename to... + (@dot_prod): ...This. + * config/aarch64/aarch64-sve-builtins-base.cc + (svusdot_impl::expand): Use it. + +2021-07-14 Tamar Christina + + * optabs.def (usdot_prod_optab): New. + * doc/md.texi: Document it and clarify other dot prod optabs. + * optabs-tree.h (enum optab_subtype): Add optab_vector_mixed_sign. + * optabs-tree.c (optab_for_tree_code): Support usdot_prod_optab. + * optabs.c (expand_widen_pattern_expr): Likewise. + * tree-cfg.c (verify_gimple_assign_ternary): Likewise. + * tree-vect-loop.c (vectorizable_reduction): Query dot-product kind. + * tree-vect-patterns.c (vect_supportable_direct_optab_p): Take optional + optab subtype. + (vect_widened_op_tree): Optionally ignore + mismatch types. + (vect_recog_dot_prod_pattern): Support usdot_prod_optab. + +2021-07-14 H.J. Lu + + PR target/101395 + * config/i386/driver-i386.c (host_detect_local_cpu): Check + "arch [32|64]" and "tune [32|64]" for 32-bit and 64-bit codegen. + Enable UINTR only for 64-bit codegen. + * config/i386/i386-options.c + (ix86_option_override_internal::DEF_PTA): Skip PTA_UINTR if not + in 64-bit mode. + * config/i386/i386.h (ARCH_ARG): New. + (CC1_CPU_SPEC): Pass "[arch|tune] 32" for 32-bit codegen and + "[arch|tune] 64" for 64-bit codegen. + +2021-07-14 Richard Biener + + PR tree-optimization/101445 + * tree-vect-stmts.c (vectorizable_load): Do the gap adjustment + of the IV in the correct direction for negative stride + accesses. + +2021-07-14 Jakub Jelinek + + PR go/101407 + * godump.c (godump_str_hash): New type. + (godump_container::pot_dummy_types): Use string_hash instead of + ptr_hash in the hash_set. + +2021-07-14 Richard Biener + + * tree-vect-loop.c (vect_find_reusable_accumulator): Handle + vector types where the old vector type has a multiple of + the new vector type elements. + (vect_create_partial_epilog): New function, split out from... + (vect_create_epilog_for_reduction): ... here. + (vect_transform_cycle_phi): Reduce the re-used accumulator + to the new vector type. + +2021-07-14 Alexandre Oliva + + * tree-ssa-alias.c (attr_fnspec::verify): Fix index in + non-'t'-sized arg check. + +2021-07-14 Alexandre Oliva + + * tree-cfg.c (cleanup_dead_labels_eh): Update + post_landing_pad label upon change of landing pad block's + primary label. + (cleanup_dead_labels): Check that a removed label is not that + of a landing pad. + 2021-07-13 Jonathan Wright * combine.c (combine_simplify_rtx): Add vec_select -> subreg diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 4b4dbab..9070a2a 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210714 +20210715 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 0f1b45d..f98bf2b 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,7 @@ +2021-07-14 Jason Merrill + + * c-opts.c (c_common_post_options): Set -fdelete-dead-exceptions. + 2021-07-06 Martin Sebor * c-format.c (gcc_tdiag_char_table): Remove support for %G and %K. diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index dc57991..a80d236 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,38 @@ +2021-07-14 Patrick Palka + + PR c++/88252 + * cp-tree.h (TEMPLATE_TYPE_PARM_FOR_CLASS): Remove. + * pt.c (push_template_decl): Remove TEMPLATE_TYPE_PARM_FOR_CLASS + handling. + (redeclare_class_template): Likewise. + (forwarding_reference_p): Define. + (maybe_adjust_types_for_deduction): Use it instead. Add 'tparms' + parameter. + (unify_one_argument): Pass tparms to + maybe_adjust_types_for_deduction. + (try_one_overload): Likewise. + (unify): Likewise. + (rewrite_template_parm): Remove TEMPLATE_TYPE_PARM_FOR_CLASS + handling. + +2021-07-14 Jason Merrill + + * class.c (struct find_final_overrider_data): Use auto_vec. + (find_final_overrider): Remove explicit release. + * coroutines.cc (process_conditional): Use auto_vec. + * cp-gimplify.c (struct cp_genericize_data): Use auto_vec. + (cp_genericize_tree): Remove explicit release. + * parser.c (cp_parser_objc_at_property_declaration): Use + auto_delete_vec. + * semantics.c (omp_reduction_lookup): Use auto_vec. + +2021-07-14 Marek Polacek + + PR c++/101371 + * constexpr.c (cxx_eval_array_reference): Create a new .object + and .ctor for the non-aggregate non-scalar case too when + value-initializing. + 2021-07-12 Patrick Palka PR c++/79501 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 3cf3e7d..5406c53 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,9 @@ +2021-07-14 Harald Anlauf + + PR fortran/100949 + * trans-expr.c (gfc_trans_class_init_assign): Call + gfc_conv_expr_present only for dummy variables. + 2021-07-06 Thomas Koenig PR fortran/100227 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0ed2e93..944639a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,84 @@ +2021-07-14 Peter Bergner + + * gcc.target/powerpc/mma-builtin-9.c: New test. + +2021-07-14 Patrick Palka + + PR c++/88252 + * g++.dg/cpp1z/class-deduction96.C: New test. + +2021-07-14 Jason Merrill + + * g++.dg/torture/pr100382.C: Pass -fno-delete-dead-exceptions. + +2021-07-14 Tamar Christina + + * gcc.dg/vect/vect-reduc-dot-11.c: Expect pass. + * gcc.dg/vect/vect-reduc-dot-15.c: Likewise. + * gcc.dg/vect/vect-reduc-dot-19.c: Likewise. + * gcc.dg/vect/vect-reduc-dot-21.c: Likewise. + +2021-07-14 Andrew MacLeod + + * gcc.dg/pr80776-1.c: Remove xfail. + +2021-07-14 Marek Polacek + + PR c++/101371 + * g++.dg/cpp1y/constexpr-101371-2.C: New test. + * g++.dg/cpp1y/constexpr-101371.C: New test. + +2021-07-14 Harald Anlauf + + PR fortran/100949 + * gfortran.dg/pr100949.f90: New test. + +2021-07-14 Tamar Christina + + * lib/target-supports.exp + (check_effective_target_arm_v8_2a_imm8_neon_ok_nocache, + check_effective_target_arm_v8_2a_i8mm_neon_hw, + check_effective_target_vect_usdot_qi): New. + * gcc.dg/vect/vect-reduc-dot-9.c: New test. + * gcc.dg/vect/vect-reduc-dot-10.c: New test. + * gcc.dg/vect/vect-reduc-dot-11.c: New test. + * gcc.dg/vect/vect-reduc-dot-12.c: New test. + * gcc.dg/vect/vect-reduc-dot-13.c: New test. + * gcc.dg/vect/vect-reduc-dot-14.c: New test. + * gcc.dg/vect/vect-reduc-dot-15.c: New test. + * gcc.dg/vect/vect-reduc-dot-16.c: New test. + * gcc.dg/vect/vect-reduc-dot-17.c: New test. + * gcc.dg/vect/vect-reduc-dot-18.c: New test. + * gcc.dg/vect/vect-reduc-dot-19.c: New test. + * gcc.dg/vect/vect-reduc-dot-20.c: New test. + * gcc.dg/vect/vect-reduc-dot-21.c: New test. + * gcc.dg/vect/vect-reduc-dot-22.c: New test. + +2021-07-14 Tamar Christina + + * gcc.target/arm/simd/vusdot-autovec.c: New test. + +2021-07-14 Tamar Christina + + * gcc.target/aarch64/simd/vusdot-autovec.c: New test. + * gcc.target/aarch64/sve/vusdot-autovec.c: New test. + +2021-07-14 H.J. Lu + + PR target/101395 + * gcc.target/i386/pr101395-1.c: New test. + * gcc.target/i386/pr101395-2.c: Likewise. + * gcc.target/i386/pr101395-3.c: Likewise. + +2021-07-14 Richard Biener + + PR tree-optimization/101445 + * gcc.dg/vect/pr101445.c: New testcase. + +2021-07-14 Richard Biener + + * gcc.target/i386/vect-reduc-1.c: New testcase. + 2021-07-13 Jonathan Wright * gcc.target/aarch64/extract_zero_extend.c: Remove dump scan -- cgit v1.1 From 0b7a11874d4eb428c18a91f38786032ce0e77a96 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Wed, 14 Jul 2021 17:10:49 -0400 Subject: c++: fix tree_contains_struct for C++ types [PR101095] Many of the types from cp-tree.def were only marked as having tree_common, when actually most of them have type_non_common. This broke g++.dg/modules/xtreme-header-2, as the modules code relies on tree_contains_struct to know what bits it needs to stream. We don't seem to use type_non_common for TYPE_ARGUMENT_PACK, so I bumped it down to TS_TYPE_COMMON. I tried doing the same in cp_tree_size, but that breaks without more extensive changes to tree_node_structure. Why do we need the init_ts function anyway? It seems redundant with tree_node_structure. PR c++/101095 gcc/cp/ChangeLog: * cp-objcp-common.c (cp_common_init_ts): Mark types as types. (cp_tree_size): Remove redundant entries. --- gcc/cp/cp-objcp-common.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/cp-objcp-common.c b/gcc/cp/cp-objcp-common.c index 46b2248..ee25573 100644 --- a/gcc/cp/cp-objcp-common.c +++ b/gcc/cp/cp-objcp-common.c @@ -72,10 +72,13 @@ cp_tree_size (enum tree_code code) case DEFERRED_NOEXCEPT: return sizeof (tree_deferred_noexcept); case OVERLOAD: return sizeof (tree_overload); case STATIC_ASSERT: return sizeof (tree_static_assert); - case TYPE_ARGUMENT_PACK: - case TYPE_PACK_EXPANSION: return sizeof (tree_type_non_common); - case NONTYPE_ARGUMENT_PACK: - case EXPR_PACK_EXPANSION: return sizeof (tree_exp); +#if 0 + /* This would match cp_common_init_ts, but breaks GC because + tree_node_structure_for_code returns TS_TYPE_NON_COMMON for all + types. */ + case UNBOUND_CLASS_TEMPLATE: + case TYPE_ARGUMENT_PACK: return sizeof (tree_type_common); +#endif case ARGUMENT_PACK_SELECT: return sizeof (tree_argument_pack_select); case TRAIT_EXPR: return sizeof (tree_trait_expr); case LAMBDA_EXPR: return sizeof (tree_lambda_expr); @@ -456,13 +459,8 @@ cp_common_init_ts (void) /* Random new trees. */ MARK_TS_COMMON (BASELINK); - MARK_TS_COMMON (DECLTYPE_TYPE); MARK_TS_COMMON (OVERLOAD); MARK_TS_COMMON (TEMPLATE_PARM_INDEX); - MARK_TS_COMMON (TYPENAME_TYPE); - MARK_TS_COMMON (TYPEOF_TYPE); - MARK_TS_COMMON (UNBOUND_CLASS_TEMPLATE); - MARK_TS_COMMON (UNDERLYING_TYPE); /* New decls. */ MARK_TS_DECL_COMMON (TEMPLATE_DECL); @@ -472,10 +470,16 @@ cp_common_init_ts (void) MARK_TS_DECL_NON_COMMON (USING_DECL); /* New Types. */ + MARK_TS_TYPE_COMMON (UNBOUND_CLASS_TEMPLATE); + MARK_TS_TYPE_COMMON (TYPE_ARGUMENT_PACK); + + MARK_TS_TYPE_NON_COMMON (DECLTYPE_TYPE); + MARK_TS_TYPE_NON_COMMON (TYPENAME_TYPE); + MARK_TS_TYPE_NON_COMMON (TYPEOF_TYPE); + MARK_TS_TYPE_NON_COMMON (UNDERLYING_TYPE); MARK_TS_TYPE_NON_COMMON (BOUND_TEMPLATE_TEMPLATE_PARM); MARK_TS_TYPE_NON_COMMON (TEMPLATE_TEMPLATE_PARM); MARK_TS_TYPE_NON_COMMON (TEMPLATE_TYPE_PARM); - MARK_TS_TYPE_NON_COMMON (TYPE_ARGUMENT_PACK); MARK_TS_TYPE_NON_COMMON (TYPE_PACK_EXPANSION); /* Statements. */ -- cgit v1.1 From 28ca8446417d1af3ea00912e76ff46244b2a4075 Mon Sep 17 00:00:00 2001 From: Trevor Saunders Date: Wed, 7 Jul 2021 21:25:30 -0400 Subject: use error_at and warning_at in cfgexpand.c gcc/ChangeLog: * cfgexpand.c (tree_conflicts_with_clobbers_p): Pass location to diagnostics. (expand_asm_stmt): Likewise. Signed-off-by: Trevor Saunders --- gcc/cfgexpand.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'gcc') diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index 3edd53c..7dd1225 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -2954,7 +2954,8 @@ check_operand_nalternatives (const vec &constraints) variable definition for error, NULL_TREE for ok. */ static bool -tree_conflicts_with_clobbers_p (tree t, HARD_REG_SET *clobbered_regs) +tree_conflicts_with_clobbers_p (tree t, HARD_REG_SET *clobbered_regs, + location_t loc) { /* Conflicts between asm-declared register variables and the clobber list are not allowed. */ @@ -2962,9 +2963,8 @@ tree_conflicts_with_clobbers_p (tree t, HARD_REG_SET *clobbered_regs) if (overlap) { - error ("% specifier for variable %qE conflicts with " - "% clobber list", - DECL_NAME (overlap)); + error_at (loc, "% specifier for variable %qE conflicts with " + "% clobber list", DECL_NAME (overlap)); /* Reset registerness to stop multiple errors emitted for a single variable. */ @@ -3087,7 +3087,7 @@ expand_asm_stmt (gasm *stmt) /* ??? Diagnose during gimplification? */ if (ninputs + noutputs + nlabels > MAX_RECOG_OPERANDS) { - error ("more than %d operands in %", MAX_RECOG_OPERANDS); + error_at (locus, "more than %d operands in %", MAX_RECOG_OPERANDS); return; } @@ -3140,7 +3140,8 @@ expand_asm_stmt (gasm *stmt) if (j == -2) { /* ??? Diagnose during gimplification? */ - error ("unknown register name %qs in %", regname); + error_at (locus, "unknown register name %qs in %", + regname); error_seen = true; } else if (j == -4) @@ -3205,7 +3206,8 @@ expand_asm_stmt (gasm *stmt) && HARD_REGISTER_P (DECL_RTL (output_tvec[j])) && output_hregno == REGNO (DECL_RTL (output_tvec[j]))) { - error ("invalid hard register usage between output operands"); + error_at (locus, "invalid hard register usage between output " + "operands"); error_seen = true; } @@ -3231,16 +3233,16 @@ expand_asm_stmt (gasm *stmt) if (i == match && output_hregno != input_hregno) { - error ("invalid hard register usage between output " - "operand and matching constraint operand"); + error_at (locus, "invalid hard register usage between " + "output operand and matching constraint operand"); error_seen = true; } else if (early_clobber_p && i != match && output_hregno == input_hregno) { - error ("invalid hard register usage between " - "earlyclobber operand and input operand"); + error_at (locus, "invalid hard register usage between " + "earlyclobber operand and input operand"); error_seen = true; } } @@ -3319,7 +3321,7 @@ expand_asm_stmt (gasm *stmt) if (! allows_reg && !MEM_P (op)) { - error ("output number %d not directly addressable", i); + error_at (locus, "output number %d not directly addressable", i); error_seen = true; } if ((! allows_mem && MEM_P (op) && GET_MODE (op) != BLKmode) @@ -3415,9 +3417,8 @@ expand_asm_stmt (gasm *stmt) if (allows_reg && TYPE_MODE (type) != BLKmode) op = force_reg (TYPE_MODE (type), op); else if (!allows_mem) - warning (0, "% operand %d probably does not match " - "constraints", - i + noutputs); + warning_at (locus, 0, "% operand %d probably does not match " + "constraints", i + noutputs); else if (MEM_P (op)) { /* We won't recognize either volatile memory or memory @@ -3471,10 +3472,10 @@ expand_asm_stmt (gasm *stmt) bool clobber_conflict_found = 0; for (i = 0; i < noutputs; ++i) - if (tree_conflicts_with_clobbers_p (output_tvec[i], &clobbered_regs)) + if (tree_conflicts_with_clobbers_p (output_tvec[i], &clobbered_regs, locus)) clobber_conflict_found = 1; for (i = 0; i < ninputs - ninout; ++i) - if (tree_conflicts_with_clobbers_p (input_tvec[i], &clobbered_regs)) + if (tree_conflicts_with_clobbers_p (input_tvec[i], &clobbered_regs, locus)) clobber_conflict_found = 1; /* Make vectors for the expression-rtx, constraint strings, -- cgit v1.1 From 329769b72001dcca18a120ab751f58de1b716ff8 Mon Sep 17 00:00:00 2001 From: Trevor Saunders Date: Wed, 7 Jul 2021 21:30:03 -0400 Subject: use diagnostic location in diagnostic_report_current_function It appears that input_location was used here before the diagnostic's location was available, and never updated, when the other part of the header was added that uses it, so this makes it consistent. gcc/ChangeLog: * tree-diagnostic.c (diagnostic_report_current_function): Use the diagnostic's location, not input_location. Signed-off-by: Trevor Saunders --- gcc/tree-diagnostic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-diagnostic.c b/gcc/tree-diagnostic.c index 8bb214b..705da94 100644 --- a/gcc/tree-diagnostic.c +++ b/gcc/tree-diagnostic.c @@ -36,9 +36,9 @@ void diagnostic_report_current_function (diagnostic_context *context, diagnostic_info *diagnostic) { - diagnostic_report_current_module (context, diagnostic_location (diagnostic)); - lang_hooks.print_error_function (context, LOCATION_FILE (input_location), - diagnostic); + location_t loc = diagnostic_location (diagnostic); + diagnostic_report_current_module (context, loc); + lang_hooks.print_error_function (context, LOCATION_FILE (loc), diagnostic); } static void -- cgit v1.1 From 8d76ff99220c7aa428516e93998457dbe299f037 Mon Sep 17 00:00:00 2001 From: Trevor Saunders Date: Mon, 12 Jul 2021 02:55:17 -0400 Subject: pass location to md_asm_adjust So the hook can use it as the location of diagnostics. gcc/ChangeLog: * cfgexpand.c (expand_asm_loc): Adjust. (expand_asm_stmt): Likewise. * config/arm/aarch-common-protos.h (arm_md_asm_adjust): Likewise. * config/arm/aarch-common.c (arm_md_asm_adjust): Likewise. * config/arm/arm.c (thumb1_md_asm_adjust): Likewise. * config/avr/avr.c (avr_md_asm_adjust): Likewise. * config/cris/cris.c (cris_md_asm_adjust): Likewise. * config/i386/i386.c (ix86_md_asm_adjust): Likewise. * config/mn10300/mn10300.c (mn10300_md_asm_adjust): Likewise. * config/nds32/nds32.c (nds32_md_asm_adjust): Likewise. * config/pdp11/pdp11.c (pdp11_md_asm_adjust): Likewise. * config/rs6000/rs6000.c (rs6000_md_asm_adjust): Likewise. * config/s390/s390.c (s390_md_asm_adjust): Likewise. * config/vax/vax.c (vax_md_asm_adjust): Likewise. * config/visium/visium.c (visium_md_asm_adjust): Likewise. * doc/tm.texi: Regenerate. * target.def: Add location argument to md_asm_adjust. Signed-off-by: Trevor Saunders --- gcc/cfgexpand.c | 9 +++++---- gcc/config/arm/aarch-common-protos.h | 3 ++- gcc/config/arm/aarch-common.c | 8 ++++---- gcc/config/arm/arm.c | 4 ++-- gcc/config/avr/avr.c | 3 ++- gcc/config/cris/cris.c | 4 ++-- gcc/config/i386/i386.c | 8 ++++---- gcc/config/mn10300/mn10300.c | 2 +- gcc/config/nds32/nds32.c | 3 ++- gcc/config/pdp11/pdp11.c | 4 ++-- gcc/config/rs6000/rs6000.c | 2 +- gcc/config/s390/s390.c | 2 +- gcc/config/vax/vax.c | 5 +++-- gcc/config/visium/visium.c | 4 ++-- gcc/doc/tm.texi | 5 +++-- gcc/target.def | 5 +++-- 16 files changed, 39 insertions(+), 32 deletions(-) (limited to 'gcc') diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index 7dd1225..8183280 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -2897,7 +2897,8 @@ expand_asm_loc (tree string, int vol, location_t locus) if (targetm.md_asm_adjust) targetm.md_asm_adjust (output_rvec, input_rvec, input_mode, - constraints, clobber_rvec, clobbered_regs); + constraints, clobber_rvec, clobbered_regs, + locus); asm_op = body; nclobbers = clobber_rvec.length (); @@ -3074,8 +3075,7 @@ expand_asm_stmt (gasm *stmt) return; } - /* There are some legacy diagnostics in here, and also avoids an extra - parameter to targetm.md_asm_adjust. */ + /* There are some legacy diagnostics in here. */ save_input_location s_i_l(locus); unsigned noutputs = gimple_asm_noutputs (stmt); @@ -3456,7 +3456,8 @@ expand_asm_stmt (gasm *stmt) if (targetm.md_asm_adjust) after_md_seq = targetm.md_asm_adjust (output_rvec, input_rvec, input_mode, - constraints, clobber_rvec, clobbered_regs); + constraints, clobber_rvec, clobbered_regs, + locus); /* Do not allow the hook to change the output and input count, lest it mess up the operand numbering. */ diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index b6171e8..6be5fb1 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -147,6 +147,7 @@ struct cpu_cost_table rtx_insn *arm_md_asm_adjust (vec &outputs, vec & /*inputs*/, vec & /*input_modes*/, vec &constraints, - vec &clobbers, HARD_REG_SET &clobbered_regs); + vec &clobbers, HARD_REG_SET &clobbered_regs, + location_t loc); #endif /* GCC_AARCH_COMMON_PROTOS_H */ diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c index 0dbdc56..67343fe 100644 --- a/gcc/config/arm/aarch-common.c +++ b/gcc/config/arm/aarch-common.c @@ -534,7 +534,7 @@ rtx_insn * arm_md_asm_adjust (vec &outputs, vec & /*inputs*/, vec & /*input_modes*/, vec &constraints, vec & /*clobbers*/, - HARD_REG_SET & /*clobbered_regs*/) + HARD_REG_SET & /*clobbered_regs*/, location_t loc) { bool saw_asm_flag = false; @@ -547,7 +547,7 @@ arm_md_asm_adjust (vec &outputs, vec & /*inputs*/, con += 4; if (strchr (con, ',') != NULL) { - error ("alternatives not allowed in % flag output"); + error_at (loc, "alternatives not allowed in % flag output"); continue; } @@ -608,7 +608,7 @@ arm_md_asm_adjust (vec &outputs, vec & /*inputs*/, mode = CC_Vmode, code = NE; break; default: - error ("unknown % flag output %qs", constraints[i]); + error_at (loc, "unknown % flag output %qs", constraints[i]); continue; } @@ -618,7 +618,7 @@ arm_md_asm_adjust (vec &outputs, vec & /*inputs*/, machine_mode dest_mode = GET_MODE (dest); if (!SCALAR_INT_MODE_P (dest_mode)) { - error ("invalid type for % flag output"); + error_at (loc, "invalid type for % flag output"); continue; } diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index de37c90..6d781e2 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -333,7 +333,7 @@ static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT); static rtx_insn *thumb1_md_asm_adjust (vec &, vec &, vec &, vec &, vec &, - HARD_REG_SET &); + HARD_REG_SET &, location_t); /* Table of machine attributes. */ static const struct attribute_spec arm_attribute_table[] = @@ -34105,7 +34105,7 @@ rtx_insn * thumb1_md_asm_adjust (vec &outputs, vec & /*inputs*/, vec & /*input_modes*/, vec &constraints, vec & /*clobbers*/, - HARD_REG_SET & /*clobbered_regs*/) + HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/) { for (unsigned i = 0, n = outputs.length (); i < n; ++i) if (startswith (constraints[i], "=@cc")) diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c index c95c436..200701a 100644 --- a/gcc/config/avr/avr.c +++ b/gcc/config/avr/avr.c @@ -14498,7 +14498,8 @@ static rtx_insn * avr_md_asm_adjust (vec &/*outputs*/, vec &/*inputs*/, vec & /*input_modes*/, vec &/*constraints*/, - vec &clobbers, HARD_REG_SET &clobbered_regs) + vec &clobbers, HARD_REG_SET &clobbered_regs, + location_t /*loc*/) { clobbers.safe_push (cc_reg_rtx); SET_HARD_REG_BIT (clobbered_regs, REG_CC); diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c index d9213d7..f458ea0 100644 --- a/gcc/config/cris/cris.c +++ b/gcc/config/cris/cris.c @@ -151,7 +151,7 @@ static void cris_function_arg_advance (cumulative_args_t, const function_arg_info &); static rtx_insn *cris_md_asm_adjust (vec &, vec &, vec &, vec &, - vec &, HARD_REG_SET &); + vec &, HARD_REG_SET &, location_t); static void cris_option_override (void); @@ -3507,7 +3507,7 @@ static rtx_insn * cris_md_asm_adjust (vec &outputs, vec &inputs, vec & /*input_modes*/, vec &constraints, vec &clobbers, - HARD_REG_SET &clobbered_regs) + HARD_REG_SET &clobbered_regs, location_t /*loc*/) { /* For the time being, all asms clobber condition codes. Revisit when there's a reasonable use for inputs/outputs diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index cff2690..530d357 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -21596,7 +21596,7 @@ static rtx_insn * ix86_md_asm_adjust (vec &outputs, vec & /*inputs*/, vec & /*input_modes*/, vec &constraints, vec &clobbers, - HARD_REG_SET &clobbered_regs) + HARD_REG_SET &clobbered_regs, location_t loc) { bool saw_asm_flag = false; @@ -21609,7 +21609,7 @@ ix86_md_asm_adjust (vec &outputs, vec & /*inputs*/, con += 4; if (strchr (con, ',') != NULL) { - error ("alternatives not allowed in % flag output"); + error_at (loc, "alternatives not allowed in % flag output"); continue; } @@ -21673,7 +21673,7 @@ ix86_md_asm_adjust (vec &outputs, vec & /*inputs*/, } if (code == UNKNOWN) { - error ("unknown % flag output %qs", constraints[i]); + error_at (loc, "unknown % flag output %qs", constraints[i]); continue; } if (invert) @@ -21702,7 +21702,7 @@ ix86_md_asm_adjust (vec &outputs, vec & /*inputs*/, machine_mode dest_mode = GET_MODE (dest); if (!SCALAR_INT_MODE_P (dest_mode)) { - error ("invalid type for % flag output"); + error_at (loc, "invalid type for % flag output"); continue; } diff --git a/gcc/config/mn10300/mn10300.c b/gcc/config/mn10300/mn10300.c index c1c2e6e..6f842a3 100644 --- a/gcc/config/mn10300/mn10300.c +++ b/gcc/config/mn10300/mn10300.c @@ -2850,7 +2850,7 @@ static rtx_insn * mn10300_md_asm_adjust (vec & /*outputs*/, vec & /*inputs*/, vec & /*input_modes*/, vec & /*constraints*/, vec &clobbers, - HARD_REG_SET &clobbered_regs) + HARD_REG_SET &clobbered_regs, location_t /*loc*/) { clobbers.safe_push (gen_rtx_REG (CCmode, CC_REG)); SET_HARD_REG_BIT (clobbered_regs, CC_REG); diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c index 7217d78..2c9cfcf 100644 --- a/gcc/config/nds32/nds32.c +++ b/gcc/config/nds32/nds32.c @@ -4199,7 +4199,8 @@ nds32_md_asm_adjust (vec &outputs ATTRIBUTE_UNUSED, vec &inputs ATTRIBUTE_UNUSED, vec &input_modes ATTRIBUTE_UNUSED, vec &constraints ATTRIBUTE_UNUSED, - vec &clobbers, HARD_REG_SET &clobbered_regs) + vec &clobbers, HARD_REG_SET &clobbered_regs, + location_t /*loc*/) { if (!flag_inline_asm_r15) { diff --git a/gcc/config/pdp11/pdp11.c b/gcc/config/pdp11/pdp11.c index 4cab3ae..ced6531 100644 --- a/gcc/config/pdp11/pdp11.c +++ b/gcc/config/pdp11/pdp11.c @@ -156,7 +156,7 @@ static int pdp11_addr_cost (rtx, machine_mode, addr_space_t, bool); static int pdp11_insn_cost (rtx_insn *insn, bool speed); static rtx_insn *pdp11_md_asm_adjust (vec &, vec &, vec &, vec &, - vec &, HARD_REG_SET &); + vec &, HARD_REG_SET &, location_t); static bool pdp11_return_in_memory (const_tree, const_tree); static rtx pdp11_function_value (const_tree, const_tree, bool); static rtx pdp11_libcall_value (machine_mode, const_rtx); @@ -2139,7 +2139,7 @@ static rtx_insn * pdp11_md_asm_adjust (vec & /*outputs*/, vec & /*inputs*/, vec & /*input_modes*/, vec & /*constraints*/, vec &clobbers, - HARD_REG_SET &clobbered_regs) + HARD_REG_SET &clobbered_regs, location_t /*loc*/) { clobbers.safe_push (gen_rtx_REG (CCmode, CC_REGNUM)); SET_HARD_REG_BIT (clobbered_regs, CC_REGNUM); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index ce29b37..779de95 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -3444,7 +3444,7 @@ static rtx_insn * rs6000_md_asm_adjust (vec & /*outputs*/, vec & /*inputs*/, vec & /*input_modes*/, vec & /*constraints*/, vec &clobbers, - HARD_REG_SET &clobbered_regs) + HARD_REG_SET &clobbered_regs, location_t /*loc*/) { clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO)); SET_HARD_REG_BIT (clobbered_regs, CA_REGNO); diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 590dd8f..800e0ab 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -16771,7 +16771,7 @@ static rtx_insn * s390_md_asm_adjust (vec &outputs, vec &inputs, vec &input_modes, vec &constraints, vec & /*clobbers*/, - HARD_REG_SET & /*clobbered_regs*/) + HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/) { if (!TARGET_VXE) /* Long doubles are stored in FPR pairs - nothing to do. */ diff --git a/gcc/config/vax/vax.c b/gcc/config/vax/vax.c index 3aacd1e..e26ab3b 100644 --- a/gcc/config/vax/vax.c +++ b/gcc/config/vax/vax.c @@ -57,7 +57,7 @@ static bool vax_rtx_costs (rtx, machine_mode, int, int, int *, bool); static machine_mode vax_cc_modes_compatible (machine_mode, machine_mode); static rtx_insn *vax_md_asm_adjust (vec &, vec &, vec &, vec &, - vec &, HARD_REG_SET &); + vec &, HARD_REG_SET &, location_t); static rtx vax_function_arg (cumulative_args_t, const function_arg_info &); static void vax_function_arg_advance (cumulative_args_t, const function_arg_info &); @@ -1181,7 +1181,8 @@ vax_md_asm_adjust (vec &outputs ATTRIBUTE_UNUSED, vec &inputs ATTRIBUTE_UNUSED, vec &input_modes ATTRIBUTE_UNUSED, vec &constraints ATTRIBUTE_UNUSED, - vec &clobbers, HARD_REG_SET &clobbered_regs) + vec &clobbers, HARD_REG_SET &clobbered_regs, + location_t /*loc*/) { clobbers.safe_push (gen_rtx_REG (CCmode, VAX_PSL_REGNUM)); SET_HARD_REG_BIT (clobbered_regs, VAX_PSL_REGNUM); diff --git a/gcc/config/visium/visium.c b/gcc/config/visium/visium.c index 7eb2248..58e5355 100644 --- a/gcc/config/visium/visium.c +++ b/gcc/config/visium/visium.c @@ -190,7 +190,7 @@ static tree visium_build_builtin_va_list (void); static rtx_insn *visium_md_asm_adjust (vec &, vec &, vec &, vec &, vec &, - HARD_REG_SET &); + HARD_REG_SET &, location_t); static bool visium_legitimate_constant_p (machine_mode, rtx); @@ -795,7 +795,7 @@ static rtx_insn * visium_md_asm_adjust (vec & /*outputs*/, vec & /*inputs*/, vec & /*input_modes*/, vec & /*constraints*/, vec &clobbers, - HARD_REG_SET &clobbered_regs) + HARD_REG_SET &clobbered_regs, location_t /*loc*/) { clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REGNUM)); SET_HARD_REG_BIT (clobbered_regs, FLAGS_REGNUM); diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 2a41ae5..3ad3944 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -11708,11 +11708,12 @@ from shared libraries (DLLs). You need not define this macro if it would always evaluate to zero. @end defmac -@deftypefn {Target Hook} {rtx_insn *} TARGET_MD_ASM_ADJUST (vec& @var{outputs}, vec& @var{inputs}, vec& @var{input_modes}, vec& @var{constraints}, vec& @var{clobbers}, HARD_REG_SET& @var{clobbered_regs}) +@deftypefn {Target Hook} {rtx_insn *} TARGET_MD_ASM_ADJUST (vec& @var{outputs}, vec& @var{inputs}, vec& @var{input_modes}, vec& @var{constraints}, vec& @var{clobbers}, HARD_REG_SET& @var{clobbered_regs}, location_t @var{loc}) This target hook may add @dfn{clobbers} to @var{clobbers} and @var{clobbered_regs} for any hard regs the port wishes to automatically clobber for an asm. The @var{outputs} and @var{inputs} may be inspected -to avoid clobbering a register that is already used by the asm. +to avoid clobbering a register that is already used by the asm. @var{loc} +is the source location of the asm. It may modify the @var{outputs}, @var{inputs}, @var{input_modes}, and @var{constraints} as necessary for other pre-processing. In this case the diff --git a/gcc/target.def b/gcc/target.def index c009671..2e40448 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -4226,7 +4226,8 @@ DEFHOOK "This target hook may add @dfn{clobbers} to @var{clobbers} and\n\ @var{clobbered_regs} for any hard regs the port wishes to automatically\n\ clobber for an asm. The @var{outputs} and @var{inputs} may be inspected\n\ -to avoid clobbering a register that is already used by the asm.\n\ +to avoid clobbering a register that is already used by the asm. @var{loc}\n\ +is the source location of the asm.\n\ \n\ It may modify the @var{outputs}, @var{inputs}, @var{input_modes}, and\n\ @var{constraints} as necessary for other pre-processing. In this case the\n\ @@ -4236,7 +4237,7 @@ to @var{input_modes}.", rtx_insn *, (vec& outputs, vec& inputs, vec& input_modes, vec& constraints, vec& clobbers, - HARD_REG_SET& clobbered_regs), + HARD_REG_SET& clobbered_regs, location_t loc), NULL) /* This target hook allows the backend to specify a calling convention -- cgit v1.1 From 4f3b383cf8825197e714a4a21852eca071f8e67e Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 9 Jul 2021 11:13:11 +0200 Subject: driver/101383 - handle -gtoggle in driver The driver amends assembler options with for example --gdwarf-5 when debugging is enabled but the check for that does not consider the effect of -gtoggle which is not handled in the common option machinery. The following alters debug_info_level according to -gtoggle mimicing what process_options later does in the compiler. This in particular avoids changing of the cc1-checksum with every bootstrap (debug) cycle as we compute that from stage2 where we use -g -gtoggle but with --gdwarf-5 and no debug info from the compiler the assembler will fill the line table with the temporary assembler file names. 2021-07-09 Richard Biener PR driver/101383 * gcc.c (process_command): Process -gtoggle like process_options would after parsing options. --- gcc/gcc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'gcc') diff --git a/gcc/gcc.c b/gcc/gcc.c index 12b3440..3e98bc7 100644 --- a/gcc/gcc.c +++ b/gcc/gcc.c @@ -4927,6 +4927,16 @@ process_command (unsigned int decoded_options_count, #endif } + /* Handle -gtoggle as it would later in toplev.c:process_options to + make the debug-level-gt spec function work as expected. */ + if (flag_gtoggle) + { + if (debug_info_level == DINFO_LEVEL_NONE) + debug_info_level = DINFO_LEVEL_NORMAL; + else + debug_info_level = DINFO_LEVEL_NONE; + } + if (output_file && strcmp (output_file, "-") != 0 && strcmp (output_file, HOST_BIT_BUCKET) != 0) -- cgit v1.1 From f6dde32b9d487dd6e343d0a1e1d1f60783f5e735 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 15 Jul 2021 10:17:06 +0200 Subject: gimplify: Fix endless recursion on volatile empty type reads/writes [PR101437] Andrew's recent change to optimize away during gimplification not just assignments of zero sized types, but also assignments of empty types, caused infinite recursion in the gimplifier. If such assignment is optimized away, we gimplify separately the to_p and from_p operands and throw away the result. When gimplifying the operand that is volatile, we run into the gimplifier code below, which has different handling for types with non-BLKmode mode, tries to gimplify those as vol.N = expr, and for BLKmode just throws those away. Zero sized types will always have BLKmode and so are fine, but for the non-BLKmode ones like struct S in the testcase, the vol.N = expr gimplification will reach again the gimplify_modify_expr code, see it is assignment of empty type and will gimplify again vol.N separately (non-volatile, so ok) and expr, on which it will recurse again. The following patch breaks that infinite recursion by ignoring bare volatile loads from empty types. If a volatile load or store for aggregates are supposed to be member-wise loads or stores, then there are no non-padding members in the empty types that should be copied and so it is probably ok. 2021-07-15 Jakub Jelinek PR middle-end/101437 * gimplify.c (gimplify_expr): Throw away volatile reads from empty types even if they have non-BLKmode TYPE_MODE. * gcc.c-torture/compile/pr101437.c: New test. --- gcc/gimplify.c | 3 ++- gcc/testsuite/gcc.c-torture/compile/pr101437.c | 29 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr101437.c (limited to 'gcc') diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 75a4a9d..93a2121 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -15060,7 +15060,8 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, *expr_p = NULL; } else if (COMPLETE_TYPE_P (TREE_TYPE (*expr_p)) - && TYPE_MODE (TREE_TYPE (*expr_p)) != BLKmode) + && TYPE_MODE (TREE_TYPE (*expr_p)) != BLKmode + && !is_empty_type (TREE_TYPE (*expr_p))) { /* Historically, the compiler has treated a bare reference to a non-BLKmode volatile lvalue as forcing a load. */ diff --git a/gcc/testsuite/gcc.c-torture/compile/pr101437.c b/gcc/testsuite/gcc.c-torture/compile/pr101437.c new file mode 100644 index 0000000..96e7df8 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr101437.c @@ -0,0 +1,29 @@ +/* PR middle-end/101437 */ + +struct S { int : 1; }; + +void +foo (volatile struct S *p) +{ + struct S s = {}; + *p = s; +} + +void +bar (volatile struct S *p) +{ + *p; +} + +void +baz (volatile struct S *p) +{ + struct S s; + s = *p; +} + +void +qux (volatile struct S *p, volatile struct S *q) +{ + *p = *q; +} -- cgit v1.1 From 5402023f05e8fc28c2f1cfd7107264403b118a17 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Thu, 15 Jul 2021 13:16:00 +0100 Subject: Revert "AArch64: Correct dot-product auto-vect optab RTL" This reverts commit 6d1cdb27828d2ef1ae1ab0209836646a269b9610. --- gcc/config/aarch64/aarch64-simd-builtins.def | 4 +- gcc/config/aarch64/aarch64-simd.md | 62 +++++++++++++++++----------- gcc/config/aarch64/arm_neon.h | 8 ++-- 3 files changed, 45 insertions(+), 29 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 99e7348..063f503 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -375,8 +375,8 @@ BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0, NONE) /* Implemented by _prod. */ - BUILTIN_VB (TERNOP, sdot_prod, 10, NONE) - BUILTIN_VB (TERNOPU, udot_prod, 10, NONE) + BUILTIN_VB (TERNOP, sdot, 0, NONE) + BUILTIN_VB (TERNOPU, udot, 0, NONE) BUILTIN_VB (TERNOP_SSUS, usdot_prod, 10, NONE) /* Implemented by aarch64__lane{q}. */ BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 88fa5ba..7489098 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -587,28 +587,8 @@ DONE; }) -;; These expands map to the Dot Product optab the vectorizer checks for -;; and to the intrinsics patttern. -;; The auto-vectorizer expects a dot product builtin that also does an -;; accumulation into the provided register. -;; Given the following pattern -;; -;; for (i=0; idot_prod" +;; These instructions map to the __builtins for the Dot Product operations. +(define_insn "aarch64_dot" [(set (match_operand:VS 0 "register_operand" "=w") (plus:VS (match_operand:VS 1 "register_operand" "0") (unspec:VS [(match_operand: 2 "register_operand" "w") @@ -633,6 +613,41 @@ [(set_attr "type" "neon_dot")] ) +;; These expands map to the Dot Product optab the vectorizer checks for. +;; The auto-vectorizer expects a dot product builtin that also does an +;; accumulation into the provided register. +;; Given the following pattern +;; +;; for (i=0; idot_prod" + [(set (match_operand:VS 0 "register_operand") + (plus:VS (unspec:VS [(match_operand: 1 "register_operand") + (match_operand: 2 "register_operand")] + DOTPROD) + (match_operand:VS 3 "register_operand")))] + "TARGET_DOTPROD" +{ + emit_insn ( + gen_aarch64_dot (operands[3], operands[3], operands[1], + operands[2])); + emit_insn (gen_rtx_SET (operands[0], operands[3])); + DONE; +}) + ;; These instructions map to the __builtins for the Dot Product ;; indexed operations. (define_insn "aarch64_dot_lane" @@ -929,7 +944,8 @@ rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode)); rtx abd = gen_reg_rtx (V16QImode); emit_insn (gen_aarch64_abdv16qi (abd, operands[1], operands[2])); - emit_insn (gen_udot_prodv16qi (operands[0], operands[3], abd, ones)); + emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3], + abd, ones)); DONE; } rtx reduc = gen_reg_rtx (V8HImode); diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 597f44c..00d76ea 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -31767,28 +31767,28 @@ __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdot_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b) { - return __builtin_aarch64_udot_prodv8qi_uuuu (__r, __a, __b); + return __builtin_aarch64_udotv8qi_uuuu (__r, __a, __b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdotq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b) { - return __builtin_aarch64_udot_prodv16qi_uuuu (__r, __a, __b); + return __builtin_aarch64_udotv16qi_uuuu (__r, __a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdot_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b) { - return __builtin_aarch64_sdot_prodv8qi (__r, __a, __b); + return __builtin_aarch64_sdotv8qi (__r, __a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdotq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b) { - return __builtin_aarch64_sdot_prodv16qi (__r, __a, __b); + return __builtin_aarch64_sdotv16qi (__r, __a, __b); } __extension__ extern __inline uint32x2_t -- cgit v1.1 From 8e321f2a6383e378f64e556707de1cdae0a8562d Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Thu, 15 Jul 2021 13:16:15 +0100 Subject: Revert "AArch32: Correct sdot RTL on aarch32" This reverts commit c9165e2d58bb037793c1c93e1b5633a61f88db30. --- gcc/config/arm/neon.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 7645121..8b0a396 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -2961,7 +2961,13 @@ DOTPROD) (match_operand:VCVTI 3 "register_operand")))] "TARGET_DOTPROD" -) +{ + emit_insn ( + gen_neon_dot (operands[3], operands[3], operands[1], + operands[2])); + emit_insn (gen_rtx_SET (operands[0], operands[3])); + DONE; +}) ;; Auto-vectorizer pattern for usdot (define_expand "usdot_prod" -- cgit v1.1 From 79f71ec6fca0e093d27cb238d7c75dccb3a55d65 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Thu, 15 Jul 2021 12:38:36 +0200 Subject: Abstract out non_null adjustments in ranger. There are 4 exact copies of the non-null range adjusting code in the ranger. This patch abstracts the functionality into a separate method. As a follow-up I would like to remove the varying_p check, since I have seen incoming ranges such as [0, 0xff....ef] which are not varying, but are not-null. Removing the varying restriction catches those. gcc/ChangeLog: * gimple-range-cache.cc (non_null_ref::adjust_range): New. (ranger_cache::range_of_def): Call adjust_range. (ranger_cache::entry_range): Same. * gimple-range-cache.h (non_null_ref::adjust_range): New. * gimple-range.cc (gimple_ranger::range_of_expr): Call adjust_range. (gimple_ranger::range_on_entry): Same. --- gcc/gimple-range-cache.cc | 35 ++++++++++++++++++++++++++--------- gcc/gimple-range-cache.h | 2 ++ gcc/gimple-range.cc | 8 ++------ 3 files changed, 30 insertions(+), 15 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc index 98ecdbb..23597ad 100644 --- a/gcc/gimple-range-cache.cc +++ b/gcc/gimple-range-cache.cc @@ -81,6 +81,29 @@ non_null_ref::non_null_deref_p (tree name, basic_block bb, bool search_dom) return false; } +// If NAME has a non-null dereference in block BB, adjust R with the +// non-zero information from non_null_deref_p, and return TRUE. If +// SEARCH_DOM is true, non_null_deref_p should search the dominator tree. + +bool +non_null_ref::adjust_range (irange &r, tree name, basic_block bb, + bool search_dom) +{ + // Check if pointers have any non-null dereferences. Non-call + // exceptions mean we could throw in the middle of the block, so just + // punt for now on those. + if (!cfun->can_throw_non_call_exceptions + && r.varying_p () + && non_null_deref_p (name, bb, search_dom)) + { + int_range<2> nz; + nz.set_nonzero (TREE_TYPE (name)); + r.intersect (nz); + return true; + } + return false; +} + // Allocate an populate the bitmap for NAME. An ON bit for a block // index indicates there is a non-null reference in that block. In // order to populate the bitmap, a quick run of all the immediate uses @@ -857,9 +880,8 @@ ranger_cache::range_of_def (irange &r, tree name, basic_block bb) r = gimple_range_global (name); } - if (bb && r.varying_p () && m_non_null.non_null_deref_p (name, bb, false) && - !cfun->can_throw_non_call_exceptions) - r = range_nonzero (TREE_TYPE (name)); + if (bb) + m_non_null.adjust_range (r, name, bb, false); } // Get the range of NAME as it occurs on entry to block BB. @@ -878,12 +900,7 @@ ranger_cache::entry_range (irange &r, tree name, basic_block bb) if (!m_on_entry.get_bb_range (r, name, bb)) range_of_def (r, name); - // Check if pointers have any non-null dereferences. Non-call - // exceptions mean we could throw in the middle of the block, so just - // punt for now on those. - if (r.varying_p () && m_non_null.non_null_deref_p (name, bb, false) && - !cfun->can_throw_non_call_exceptions) - r = range_nonzero (TREE_TYPE (name)); + m_non_null.adjust_range (r, name, bb, false); } // Get the range of NAME as it occurs on exit from block BB. diff --git a/gcc/gimple-range-cache.h b/gcc/gimple-range-cache.h index ecf63dc..f842e9c 100644 --- a/gcc/gimple-range-cache.h +++ b/gcc/gimple-range-cache.h @@ -34,6 +34,8 @@ public: non_null_ref (); ~non_null_ref (); bool non_null_deref_p (tree name, basic_block bb, bool search_dom = true); + bool adjust_range (irange &r, tree name, basic_block bb, + bool search_dom = true); private: vec m_nn; void process_name (tree name); diff --git a/gcc/gimple-range.cc b/gcc/gimple-range.cc index 1851339..b210787 100644 --- a/gcc/gimple-range.cc +++ b/gcc/gimple-range.cc @@ -69,9 +69,7 @@ gimple_ranger::range_of_expr (irange &r, tree expr, gimple *stmt) if (def_stmt && gimple_bb (def_stmt) == bb) { range_of_stmt (r, def_stmt, expr); - if (!cfun->can_throw_non_call_exceptions && r.varying_p () && - m_cache.m_non_null.non_null_deref_p (expr, bb)) - r = range_nonzero (TREE_TYPE (expr)); + m_cache.m_non_null.adjust_range (r, expr, bb, true); } else // Otherwise OP comes from outside this block, use range on entry. @@ -95,9 +93,7 @@ gimple_ranger::range_on_entry (irange &r, basic_block bb, tree name) if (m_cache.block_range (entry_range, bb, name)) r.intersect (entry_range); - if (!cfun->can_throw_non_call_exceptions && r.varying_p () && - m_cache.m_non_null.non_null_deref_p (name, bb)) - r = range_nonzero (TREE_TYPE (name)); + m_cache.m_non_null.adjust_range (r, name, bb, true); } // Calculate the range for NAME at the end of block BB and return it in R. -- cgit v1.1 From 92acae5047e4b8c5be035f067099942a93e55d0c Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 15 Jul 2021 11:41:12 +0200 Subject: Streamline vect_gen_while This adjusts the vect_gen_while API to match that of vect_gen_while_not allowing further patches to generate more than one stmt for the while case. 2021-07-15 Richard Biener * tree-vectorizer.h (vect_gen_while): Match up with vect_gen_while_not. * tree-vect-stmts.c (vect_gen_while): Adjust API to that of vect_gen_while_not. (vect_gen_while_not): Adjust. * tree-vect-loop-manip.c (vect_set_loop_controls_directly): Likewise. --- gcc/tree-vect-loop-manip.c | 14 ++++++-------- gcc/tree-vect-stmts.c | 21 +++++++++++---------- gcc/tree-vectorizer.h | 3 ++- 3 files changed, 19 insertions(+), 19 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index c29ffb3..1f3d661 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -609,11 +609,8 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo, } if (use_masks_p) - { - init_ctrl = make_temp_ssa_name (ctrl_type, NULL, "max_mask"); - gimple *tmp_stmt = vect_gen_while (init_ctrl, start, end); - gimple_seq_add_stmt (preheader_seq, tmp_stmt); - } + init_ctrl = vect_gen_while (preheader_seq, ctrl_type, + start, end, "max_mask"); else { init_ctrl = make_temp_ssa_name (compare_type, NULL, "max_len"); @@ -652,9 +649,10 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo, /* Get the control value for the next iteration of the loop. */ if (use_masks_p) { - next_ctrl = make_temp_ssa_name (ctrl_type, NULL, "next_mask"); - gcall *call = vect_gen_while (next_ctrl, test_index, this_test_limit); - gsi_insert_before (test_gsi, call, GSI_SAME_STMT); + gimple_seq stmts = NULL; + next_ctrl = vect_gen_while (&stmts, ctrl_type, test_index, + this_test_limit, "next_mask"); + gsi_insert_seq_before (test_gsi, stmts, GSI_SAME_STMT); } else { diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 3980f09..ec82acb 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -11984,22 +11984,25 @@ supportable_narrowing_operation (enum tree_code code, return false; } -/* Generate and return a statement that sets vector mask MASK such that - MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */ +/* Generate and return a vector mask of MASK_TYPE such that + mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I. + Add the statements to SEQ. */ -gcall * -vect_gen_while (tree mask, tree start_index, tree end_index) +tree +vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index, + tree end_index, const char *name) { tree cmp_type = TREE_TYPE (start_index); - tree mask_type = TREE_TYPE (mask); gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT, cmp_type, mask_type, OPTIMIZE_FOR_SPEED)); gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3, start_index, end_index, build_zero_cst (mask_type)); - gimple_call_set_lhs (call, mask); - return call; + tree tmp = make_temp_ssa_name (mask_type, NULL, name); + gimple_call_set_lhs (call, tmp); + gimple_seq_add_stmt (seq, call); + return tmp; } /* Generate a vector mask of type MASK_TYPE for which index I is false iff @@ -12009,9 +12012,7 @@ tree vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index, tree end_index) { - tree tmp = make_ssa_name (mask_type); - gcall *call = vect_gen_while (tmp, start_index, end_index); - gimple_seq_add_stmt (seq, call); + tree tmp = vect_gen_while (seq, mask_type, start_index, end_index); return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp); } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index d825b0c..f7c08ca 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1948,7 +1948,8 @@ extern bool vect_supportable_shift (vec_info *, enum tree_code, tree); extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &); extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &); extern void optimize_mask_stores (class loop*); -extern gcall *vect_gen_while (tree, tree, tree); +extern tree vect_gen_while (gimple_seq *, tree, tree, tree, + const char * = nullptr); extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); extern opt_result vect_get_vector_types_for_stmt (vec_info *, stmt_vec_info, tree *, -- cgit v1.1 From ad5f8ac1d2f2dc92d43663243b52f9e9eb3cf7c0 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Thu, 15 Jul 2021 10:16:17 -0500 Subject: rs6000: Don't let swaps pass break multiply low-part (PR101129) 2021-07-15 Bill Schmidt gcc/ PR target/101129 * config/rs6000/rs6000-p8swap.c (has_part_mult): New. (rs6000_analyze_swaps): Insns containing a subreg of a mult are not swappable. gcc/testsuite/ PR target/101129 * gcc.target/powerpc/pr101129.c: New. --- gcc/config/rs6000/rs6000-p8swap.c | 19 ++++++++++++++++ gcc/testsuite/gcc.target/powerpc/pr101129.c | 35 +++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr101129.c (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c index 21cbcb2..6b559aa 100644 --- a/gcc/config/rs6000/rs6000-p8swap.c +++ b/gcc/config/rs6000/rs6000-p8swap.c @@ -1523,6 +1523,22 @@ replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i) insn->set_deleted (); } +/* INSN is known to contain a SUBREG, which we can normally handle, + but if the SUBREG itself contains a MULT then we need to leave it alone + to avoid turning a mult_hipart into a mult_lopart, for example. */ +static bool +has_part_mult (rtx_insn *insn) +{ + rtx body = PATTERN (insn); + if (GET_CODE (body) != SET) + return false; + rtx src = SET_SRC (body); + if (GET_CODE (src) != SUBREG) + return false; + rtx inner = XEXP (src, 0); + return (GET_CODE (inner) == MULT); +} + /* Make NEW_MEM_EXP's attributes and flags resemble those of ORIGINAL_MEM_EXP. */ static void @@ -2501,6 +2517,9 @@ rs6000_analyze_swaps (function *fun) insn_entry[uid].is_swappable = 0; else if (special != SH_NONE) insn_entry[uid].special_handling = special; + else if (insn_entry[uid].contains_subreg + && has_part_mult (insn)) + insn_entry[uid].is_swappable = 0; else if (insn_entry[uid].contains_subreg) insn_entry[uid].special_handling = SH_SUBREG; } diff --git a/gcc/testsuite/gcc.target/powerpc/pr101129.c b/gcc/testsuite/gcc.target/powerpc/pr101129.c new file mode 100644 index 0000000..1abc124 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr101129.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-mdejagnu-cpu=power8 -O " } */ + +/* PR101129: The swaps pass was turning a mult-lopart into a mult-hipart. + Make sure we aren't doing that anymore. */ + +typedef unsigned char u8; +typedef unsigned char __attribute__((__vector_size__ (8))) U; +typedef unsigned char __attribute__((__vector_size__ (16))) V; +typedef unsigned int u32; +typedef unsigned long long u64; +typedef __int128 u128; + +u8 g; +U u; + +void +foo0 (u32 u32_0, U *ret) +{ + u128 u128_2 = u32_0 * (u128)((V){ 5 } > (u32_0 & 4)); + u64 u64_r = u128_2 >> 64; + u8 u8_r = u64_r + g; + *ret = u + u8_r; +} + +int +main (void) +{ + U x; + foo0 (7, &x); + for (unsigned i = 0; i < sizeof (x); i++) + if (x[i] != 0) __builtin_abort(); + return 0; +} -- cgit v1.1 From 07bd2703047d222ed7ff189d86350e73c5cc2c9e Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Thu, 15 Jul 2021 17:26:45 +0200 Subject: Change the type of return value of profile_count::value to uint64_t The field in which profile_count holds the count has 61 bits but the getter method only returns it as a 32 bit number. The getter is (and should be) only used for dumping but even dumps are better when they do not lie. gcc/ChangeLog: 2021-07-13 Martin Jambor * profile-count.h (profile_count::value): Change the return type to uint64_t. * gimple-pretty-print.c (dump_gimple_bb_header): Adjust print statement. * tree-cfg.c (dump_function_to_file): Likewise. --- gcc/gimple-pretty-print.c | 2 +- gcc/profile-count.h | 2 +- gcc/tree-cfg.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c index 39c5775..d6e63d6 100644 --- a/gcc/gimple-pretty-print.c +++ b/gcc/gimple-pretty-print.c @@ -2831,7 +2831,7 @@ dump_gimple_bb_header (FILE *outf, basic_block bb, int indent, if (bb->loop_father->header == bb) fprintf (outf, ",loop_header(%d)", bb->loop_father->num); if (bb->count.initialized_p ()) - fprintf (outf, ",%s(%d)", + fprintf (outf, ",%s(%" PRIu64 ")", profile_quality_as_string (bb->count.quality ()), bb->count.value ()); fprintf (outf, "):\n"); diff --git a/gcc/profile-count.h b/gcc/profile-count.h index f2b1e3a..c7a45ac 100644 --- a/gcc/profile-count.h +++ b/gcc/profile-count.h @@ -804,7 +804,7 @@ public: } /* Get the value of the count. */ - uint32_t value () const { return m_val; } + uint64_t value () const { return m_val; } /* Get the quality of the count. */ enum profile_quality quality () const { return m_quality; } diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 2820847..c8b0f7b 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -8086,7 +8086,7 @@ dump_function_to_file (tree fndecl, FILE *file, dump_flags_t flags) { basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun); if (bb->count.initialized_p ()) - fprintf (file, ",%s(%d)", + fprintf (file, ",%s(%" PRIu64 ")", profile_quality_as_string (bb->count.quality ()), bb->count.value ()); fprintf (file, ")\n%s (", function_name (fun)); -- cgit v1.1 From 98f1f9f38c45218c06200feb1939c9433a2ab6ca Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Thu, 15 Jul 2021 10:11:23 -0600 Subject: Avoid -Wvla-parameter for nontrivial bounds [PR97548]. Resolves: PR c/101289 - bogus -Wvla-paramater warning when using const for vla param PR c/97548 - bogus -Wvla-parameter on a bound expression involving a parameter gcc/c-family/ChangeLog: PR c/101289 PR c/97548 * c-warn.c (warn_parm_array_mismatch): Use OEP_DECL_NAME. gcc/c/ChangeLog: PR c/101289 PR c/97548 * c-decl.c (get_parm_array_spec): Strip nops. gcc/ChangeLog: PR c/101289 PR c/97548 * fold-const.c (operand_compare::operand_equal_p): Handle OEP_DECL_NAME. (operand_compare::verify_hash_value): Same. * tree-core.h (OEP_DECL_NAME): New. gcc/testsuite/ChangeLog: * gcc.dg/Wvla-parameter-12.c: New test. --- gcc/c-family/c-warn.c | 3 ++- gcc/c/c-decl.c | 1 + gcc/fold-const.c | 33 +++++++++++++++++++++-------- gcc/testsuite/gcc.dg/Wvla-parameter-12.c | 36 ++++++++++++++++++++++++++++++++ gcc/tree-core.h | 7 ++++++- 5 files changed, 69 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/Wvla-parameter-12.c (limited to 'gcc') diff --git a/gcc/c-family/c-warn.c b/gcc/c-family/c-warn.c index 3495959..552a29f 100644 --- a/gcc/c-family/c-warn.c +++ b/gcc/c-family/c-warn.c @@ -3646,7 +3646,8 @@ warn_parm_array_mismatch (location_t origloc, tree fndecl, tree newparms) /* The VLA bounds don't refer to other function parameters. Compare them lexicographically to detect gross mismatches such as between T[foo()] and T[bar()]. */ - if (operand_equal_p (newbnd, curbnd, OEP_LEXICOGRAPHIC)) + if (operand_equal_p (newbnd, curbnd, + OEP_DECL_NAME | OEP_LEXICOGRAPHIC)) continue; if (warning_at (newloc, OPT_Wvla_parameter, diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c index 983d65e..234ee16 100644 --- a/gcc/c/c-decl.c +++ b/gcc/c/c-decl.c @@ -5865,6 +5865,7 @@ get_parm_array_spec (const struct c_parm *parm, tree attrs) /* Each variable VLA bound is represented by a dollar sign. */ spec += "$"; + STRIP_NOPS (nelts); vbchain = tree_cons (NULL_TREE, nelts, vbchain); } diff --git a/gcc/fold-const.c b/gcc/fold-const.c index e0cdb75..7dcecc9 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -3499,11 +3499,26 @@ operand_compare::operand_equal_p (const_tree arg0, const_tree arg1, case tcc_declaration: /* Consider __builtin_sqrt equal to sqrt. */ - return (TREE_CODE (arg0) == FUNCTION_DECL - && fndecl_built_in_p (arg0) && fndecl_built_in_p (arg1) - && DECL_BUILT_IN_CLASS (arg0) == DECL_BUILT_IN_CLASS (arg1) - && (DECL_UNCHECKED_FUNCTION_CODE (arg0) - == DECL_UNCHECKED_FUNCTION_CODE (arg1))); + if (TREE_CODE (arg0) == FUNCTION_DECL) + return (fndecl_built_in_p (arg0) && fndecl_built_in_p (arg1) + && DECL_BUILT_IN_CLASS (arg0) == DECL_BUILT_IN_CLASS (arg1) + && (DECL_UNCHECKED_FUNCTION_CODE (arg0) + == DECL_UNCHECKED_FUNCTION_CODE (arg1))); + + if (DECL_P (arg0) + && (flags & OEP_DECL_NAME) + && (flags & OEP_LEXICOGRAPHIC)) + { + /* Consider decls with the same name equal. The caller needs + to make sure they refer to the same entity (such as a function + formal parameter). */ + tree a0name = DECL_NAME (arg0); + tree a1name = DECL_NAME (arg1); + const char *a0ns = a0name ? IDENTIFIER_POINTER (a0name) : NULL; + const char *a1ns = a1name ? IDENTIFIER_POINTER (a1name) : NULL; + return a0ns && a1ns && strcmp (a0ns, a1ns) == 0; + } + return false; case tcc_exceptional: if (TREE_CODE (arg0) == CONSTRUCTOR) @@ -3914,14 +3929,14 @@ bool operand_compare::verify_hash_value (const_tree arg0, const_tree arg1, unsigned int flags, bool *ret) { - /* When checking, verify at the outermost operand_equal_p call that - if operand_equal_p returns non-zero then ARG0 and ARG1 has the same - hash value. */ + /* When checking and unless comparing DECL names, verify that if + the outermost operand_equal_p call returns non-zero then ARG0 + and ARG1 have the same hash value. */ if (flag_checking && !(flags & OEP_NO_HASH_CHECK)) { if (operand_equal_p (arg0, arg1, flags | OEP_NO_HASH_CHECK)) { - if (arg0 != arg1) + if (arg0 != arg1 && !(flags & OEP_DECL_NAME)) { inchash::hash hstate0 (0), hstate1 (0); hash_operand (arg0, hstate0, flags | OEP_HASH_CHECK); diff --git a/gcc/testsuite/gcc.dg/Wvla-parameter-12.c b/gcc/testsuite/gcc.dg/Wvla-parameter-12.c new file mode 100644 index 0000000..1be5e48 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Wvla-parameter-12.c @@ -0,0 +1,36 @@ +/* PR c/101289 - bogus -Wvla-parameter warning when using const bound + { dg-do compile } + { dg-options "-Wall" } */ + +void f1ci_can (const int n, char a[n]); +void f1ci_can (const int n, char a[n]); // { dg-bogus "-Wvla-parameter" } + +void f2ci_can (const int m, char a[m]); +void f2ci_can (int n, char a[n]); // { dg-bogus "-Wvla-parameter" } + +void f3i_can (int n, char a[n]); +void f3i_can (const int n, char a[n]); // { dg-bogus "-Wvla-parameter" } + +void f4i_can (int n, char a[n]); +void f4i_can (const int n, char a[(int)n]); // { dg-bogus "-Wvla-parameter" } + +void f5i_can (int n, char a[(char)n]); +void f5i_can (const int n, char a[(char)n]); // { dg-bogus "-Wvla-parameter" } + +void f6i_can (int m, char a[(char)m]); +void f6i_can (const int n, char a[(char)n]); // { dg-bogus "-Wvla-parameter" "" { xfail *-*-* } } + + +/* PR c/97548 - bogus -Wvla-parameter on a bound expression involving + a parameter */ + +int n; + +void f7ianp1 (int, int[n + 1]); +void f7ianp1 (int, int[n + 1]); +void f7ianp1 (int, int[n + 2]); // { dg-warning "-Wvla-parameter" } + +void f8iakp1 (int k, int [k + 1]); +void f8iakp1 (int k, int [k + 1]); // { dg-bogus "-Wvla-parameter" } +void f8iakp1 (int k, int [1 + k]); // { dg-bogus "-Wvla-parameter" } +void f8iakp1 (int k, int [k + 2]); // { dg-warning "-Wvla-parameter" } diff --git a/gcc/tree-core.h b/gcc/tree-core.h index e15e6c6..23cd289 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -888,6 +888,7 @@ enum size_type_kind { stk_type_kind_last }; +/* Flags controlling operand_equal_p() behavior. */ enum operand_equal_flag { OEP_ONLY_CONST = 1, OEP_PURE_SAME = 2, @@ -902,7 +903,11 @@ enum operand_equal_flag { OEP_BITWISE = 128, /* For OEP_ADDRESS_OF of COMPONENT_REFs, only consider same fields as equivalent rather than also different fields with the same offset. */ - OEP_ADDRESS_OF_SAME_FIELD = 256 + OEP_ADDRESS_OF_SAME_FIELD = 256, + /* In conjunction with OEP_LEXICOGRAPHIC considers names of declarations + of the same kind. Used to compare VLA bounds involving parameters + across redeclarations of the same function. */ + OEP_DECL_NAME = 512 }; /* Enum and arrays used for tree allocation stats. -- cgit v1.1 From b25edf6e6feeadc6a5aa337b8c725786227162dd Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Thu, 15 Jul 2021 17:42:10 +0100 Subject: testsuite: Fix testisms in scalar tests PR101457 These testcases accidentally contain the wrong signs for the expected values for the scalar code. The vector code however is correct. Committed as a trivial fix. gcc/testsuite/ChangeLog: PR middle-end/101457 * gcc.dg/vect/vect-reduc-dot-17.c: Fix signs of scalar code. * gcc.dg/vect/vect-reduc-dot-18.c: Likewise. * gcc.dg/vect/vect-reduc-dot-22.c: Likewise. * gcc.dg/vect/vect-reduc-dot-9.c: Likewise. --- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-17.c | 5 +++-- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-18.c | 5 +++-- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c | 5 +++-- 4 files changed, 10 insertions(+), 7 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-17.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-17.c index aa269c4..38f86fe 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-17.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-17.c @@ -35,8 +35,9 @@ main (void) { check_vect (); - SIGNEDNESS_3 char a[N], b[N]; - int expected = 0x12345; + SIGNEDNESS_3 char a[N]; + SIGNEDNESS_4 char b[N]; + SIGNEDNESS_1 int expected = 0x12345; for (int i = 0; i < N; ++i) { a[i] = BASE + i * 5; diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-18.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-18.c index 2b1cc04..2e86ebe 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-18.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-18.c @@ -35,8 +35,9 @@ main (void) { check_vect (); - SIGNEDNESS_3 char a[N], b[N]; - int expected = 0x12345; + SIGNEDNESS_3 char a[N]; + SIGNEDNESS_4 char b[N]; + SIGNEDNESS_1 int expected = 0x12345; for (int i = 0; i < N; ++i) { a[i] = BASE + i * 5; diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c index febeb19..0bde43a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c @@ -37,7 +37,7 @@ main (void) SIGNEDNESS_3 char a[N]; SIGNEDNESS_4 short b[N]; - int expected = 0x12345; + SIGNEDNESS_1 long expected = 0x12345; for (int i = 0; i < N; ++i) { a[i] = BASE + i * 5; diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c index cbbeede..d1049c9 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-9.c @@ -35,8 +35,9 @@ main (void) { check_vect (); - SIGNEDNESS_3 char a[N], b[N]; - int expected = 0x12345; + SIGNEDNESS_3 char a[N]; + SIGNEDNESS_4 char b[N]; + SIGNEDNESS_1 int expected = 0x12345; for (int i = 0; i < N; ++i) { a[i] = BASE + i * 5; -- cgit v1.1 From 7094a69bd62a14dfa311eaa2fea468f221c7c9f3 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 15 Jul 2021 18:53:20 +0200 Subject: c++: Optimize away NULLPTR_TYPE comparisons [PR101443] Comparisons of NULLPTR_TYPE operands cause all kinds of problems in the middle-end and in fold-const.c, various optimizations assume that if they see e.g. a non-equality comparison with one of the operands being INTEGER_CST and it is not INTEGRAL_TYPE_P (which has TYPE_{MIN,MAX}_VALUE), they can build_int_cst (type, 1) to find a successor. The following patch fixes it by making sure they don't appear in the IL, optimize them away at cp_fold time as all can be folded. Though, I've just noticed that clang++ rejects the non-equality comparisons instead, foo () > 0 with invalid operands to binary expression ('decltype(nullptr)' (aka 'nullptr_t') and 'int') and foo () > nullptr with invalid operands to binary expression ('decltype(nullptr)' (aka 'nullptr_t') and 'nullptr_t') Shall we reject those too, in addition or instead of parts of this patch? If so, wouldn't this patch be still useful for backports, I bet we don't want to start reject it on the release branches when we used to accept it. 2021-07-15 Jakub Jelinek PR c++/101443 * cp-gimplify.c (cp_fold): For comparisons with NULLPTR_TYPE operands, fold them right away to true or false. * g++.dg/cpp0x/nullptr46.C: New test. --- gcc/cp/cp-gimplify.c | 26 ++++++++++++++++++++++++++ gcc/testsuite/g++.dg/cpp0x/nullptr46.C | 11 +++++++++++ 2 files changed, 37 insertions(+) create mode 100644 gcc/testsuite/g++.dg/cpp0x/nullptr46.C (limited to 'gcc') diff --git a/gcc/cp/cp-gimplify.c b/gcc/cp/cp-gimplify.c index de37f2c..ff0bff7 100644 --- a/gcc/cp/cp-gimplify.c +++ b/gcc/cp/cp-gimplify.c @@ -2423,6 +2423,32 @@ cp_fold (tree x) op0 = cp_fold_maybe_rvalue (TREE_OPERAND (x, 0), rval_ops); op1 = cp_fold_rvalue (TREE_OPERAND (x, 1)); + /* decltype(nullptr) has only one value, so optimize away all comparisons + with that type right away, keeping them in the IL causes troubles for + various optimizations. */ + if (COMPARISON_CLASS_P (org_x) + && TREE_CODE (TREE_TYPE (op0)) == NULLPTR_TYPE + && TREE_CODE (TREE_TYPE (op1)) == NULLPTR_TYPE) + { + switch (code) + { + case EQ_EXPR: + case LE_EXPR: + case GE_EXPR: + x = constant_boolean_node (true, TREE_TYPE (x)); + break; + case NE_EXPR: + case LT_EXPR: + case GT_EXPR: + x = constant_boolean_node (false, TREE_TYPE (x)); + break; + default: + gcc_unreachable (); + } + return omit_two_operands_loc (loc, TREE_TYPE (x), x, + op0, op1); + } + if (op0 != TREE_OPERAND (x, 0) || op1 != TREE_OPERAND (x, 1)) { if (op0 == error_mark_node || op1 == error_mark_node) diff --git a/gcc/testsuite/g++.dg/cpp0x/nullptr46.C b/gcc/testsuite/g++.dg/cpp0x/nullptr46.C new file mode 100644 index 0000000..1514cee --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/nullptr46.C @@ -0,0 +1,11 @@ +// PR c++/101443 +// { dg-do compile { target c++11 } } +// { dg-options "-O2" } + +decltype(nullptr) foo (); + +bool +bar () +{ + return foo () > nullptr || foo () < nullptr; +} -- cgit v1.1 From 7a9c9a3265f85bd8c660df08e56bbab0b416df7e Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Thu, 15 Jul 2021 15:22:19 +0000 Subject: testsuite: [arm] Add missing effective-target to vusdot-autovec.c This test fails when forcing an -mcpu option incompatible with -march=armv8.2-a+i8mm. This patch adds the missing arm_v8_2a_i8mm_ok effective-target, as well as the associated dg-add-options arm_v8_2a_i8mm. 2021-07-15 Christophe Lyon gcc/testsuite/ * gcc.target/arm/simd/vusdot-autovec.c: Use arm_v8_2a_i8mm_ok effective-target. --- gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c index 7cc56f6..e7af895 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c +++ b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c @@ -1,5 +1,7 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */ +/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ +/* { dg-options "-O3" } */ +/* { dg-add-options arm_v8_2a_i8mm } */ #define N 480 #define SIGNEDNESS_1 unsigned -- cgit v1.1 From 797358f42fab5ee58a893b68ed18f6ea05eff634 Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Thu, 15 Jul 2021 15:27:46 +0000 Subject: testsuite: [arm] Remove arm_v8_2a_imm8_neon_ok_nocache This patch removes this recently-introduced effective-target, as it looks like a typo and duplicate for arm_v8_2a_i8mm_ok (imm8 vs i8mm), and it is not used. 2021-07-15 Christophe Lyon gcc/testsuite/ * lib/target-supports.exp (arm_v8_2a_imm8_neon_ok_nocache): Delete. --- gcc/testsuite/lib/target-supports.exp | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 12df869..42ac9d0 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -5267,36 +5267,6 @@ proc check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache { } { return 0; } -# Return 1 if the target supports ARMv8.2 Adv.SIMD imm8 -# instructions, 0 otherwise. The test is valid for ARM and for AArch64. -# Record the command line options needed. - -proc check_effective_target_arm_v8_2a_imm8_neon_ok_nocache { } { - global et_arm_v8_2a_imm8_neon_flags - set et_arm_v8_2a_imm8_neon_flags "" - - if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { - return 0; - } - - # Iterate through sets of options to find the compiler flags that - # need to be added to the -march option. - foreach flags {"" "-mfloat-abi=softfp -mfpu=neon-fp-armv8" "-mfloat-abi=hard -mfpu=neon-fp-armv8"} { - if { [check_no_compiler_messages_nocache \ - arm_v8_2a_imm8_neon_ok object { - #include - #if !defined (__ARM_FEATURE_MATMUL_INT8) - #error "__ARM_FEATURE_MATMUL_INT8 not defined" - #endif - } "$flags -march=armv8.2-a+imm8"] } { - set et_arm_v8_2a_imm8_neon_flags "$flags -march=armv8.2-a+imm8" - return 1 - } - } - - return 0; -} - # Return 1 if the target supports ARMv8.1-M MVE # instructions, 0 otherwise. The test is valid for ARM. # Record the command line options needed. -- cgit v1.1 From a9241df96e1950c630550ada9371c0b4a03496cf Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Thu, 15 Jul 2021 15:01:57 -0400 Subject: analyzer: handle self-referential phis gcc/analyzer/ChangeLog: * state-purge.cc (self_referential_phi_p): New. (state_purge_per_ssa_name::process_point): Don't purge an SSA name at its def-stmt if the def-stmt is self-referential. gcc/testsuite/ChangeLog: * gcc.dg/analyzer/phi-1.c: New test. Signed-off-by: David Malcolm --- gcc/analyzer/state-purge.cc | 37 ++++++++++++++++++++++++++++++++--- gcc/testsuite/gcc.dg/analyzer/phi-1.c | 24 +++++++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/phi-1.c (limited to 'gcc') diff --git a/gcc/analyzer/state-purge.cc b/gcc/analyzer/state-purge.cc index 70a09ed..e82ea87 100644 --- a/gcc/analyzer/state-purge.cc +++ b/gcc/analyzer/state-purge.cc @@ -288,6 +288,20 @@ state_purge_per_ssa_name::add_to_worklist (const function_point &point, } } +/* Does this phi depend on itself? + e.g. in: + added_2 = PHI + the middle defn (from edge 3) requires added_2 itself. */ + +static bool +self_referential_phi_p (const gphi *phi) +{ + for (unsigned i = 0; i < gimple_phi_num_args (phi); i++) + if (gimple_phi_arg_def (phi, i) == gimple_phi_result (phi)) + return true; + return false; +} + /* Process POINT, popped from WORKLIST. Iterate over predecessors of POINT, adding to WORKLIST. */ @@ -326,11 +340,28 @@ state_purge_per_ssa_name::process_point (const function_point &point, !gsi_end_p (gpi); gsi_next (&gpi)) { gphi *phi = gpi.phi (); + /* Are we at the def-stmt for m_name? */ if (phi == def_stmt) { - if (logger) - logger->log ("def stmt within phis; terminating"); - return; + /* Does this phi depend on itself? + e.g. in: + added_2 = PHI + the middle defn (from edge 3) requires added_2 itself + so we can't purge it here. */ + if (self_referential_phi_p (phi)) + { + if (logger) + logger->log ("self-referential def stmt within phis;" + " continuing"); + } + else + { + /* Otherwise, we can stop here, so that m_name + can be purged. */ + if (logger) + logger->log ("def stmt within phis; terminating"); + return; + } } } diff --git a/gcc/testsuite/gcc.dg/analyzer/phi-1.c b/gcc/testsuite/gcc.dg/analyzer/phi-1.c new file mode 100644 index 0000000..0926003 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/phi-1.c @@ -0,0 +1,24 @@ +/* { dg-do "compile" } */ + +typedef __SIZE_TYPE__ size_t; +#define NULL ((void *) 0) + +extern const char *foo (void); +extern size_t bar (void); + +void +_nl_expand_alias (const char *locale_alias_path) +{ + size_t added; + do + { + added = 0; + while (added == 0 && locale_alias_path[0] != '\0') + { + const char *start = foo (); + if (start < locale_alias_path) + added = bar (); + } + } + while (added != 0); +} -- cgit v1.1 From e9711fe482b4abef0e7572809d3593631991276e Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Thu, 15 Jul 2021 15:02:42 -0400 Subject: analyzer: use DECL_DEBUG_EXPR on SSA names for artificial vars gcc/analyzer/ChangeLog: * analyzer.cc (fixup_tree_for_diagnostic_1): Use DECL_DEBUG_EXPR if it's available. * engine.cc (readability): Likewise. Signed-off-by: David Malcolm --- gcc/analyzer/analyzer.cc | 9 +++++++-- gcc/analyzer/engine.cc | 19 ++++++++++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc index 12c03f6..a8ee1a1 100644 --- a/gcc/analyzer/analyzer.cc +++ b/gcc/analyzer/analyzer.cc @@ -165,8 +165,13 @@ fixup_tree_for_diagnostic_1 (tree expr, hash_set *visited) && TREE_CODE (expr) == SSA_NAME && (SSA_NAME_VAR (expr) == NULL_TREE || DECL_ARTIFICIAL (SSA_NAME_VAR (expr)))) - if (tree expr2 = maybe_reconstruct_from_def_stmt (expr, visited)) - return expr2; + { + if (tree var = SSA_NAME_VAR (expr)) + if (VAR_P (var) && DECL_HAS_DEBUG_EXPR_P (var)) + return DECL_DEBUG_EXPR (var); + if (tree expr2 = maybe_reconstruct_from_def_stmt (expr, visited)) + return expr2; + } return expr; } diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 01b83a4..8f3e7f7 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -527,9 +527,22 @@ readability (const_tree expr) case SSA_NAME: { if (tree var = SSA_NAME_VAR (expr)) - /* Slightly favor the underlying var over the SSA name to - avoid having them compare equal. */ - return readability (var) - 1; + { + if (DECL_ARTIFICIAL (var)) + { + /* If we have an SSA name for an artificial var, + only use it if it has a debug expr associated with + it that fixup_tree_for_diagnostic can use. */ + if (VAR_P (var) && DECL_HAS_DEBUG_EXPR_P (var)) + return readability (DECL_DEBUG_EXPR (var)) - 1; + } + else + { + /* Slightly favor the underlying var over the SSA name to + avoid having them compare equal. */ + return readability (var) - 1; + } + } /* Avoid printing '' for SSA names for temporaries. */ return -1; } -- cgit v1.1 From 98cd4d123aa14598b1f0d54c22663c8200a96d9c Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Thu, 15 Jul 2021 15:04:07 -0400 Subject: analyzer: add -fdump-analyzer-exploded-paths gcc/analyzer/ChangeLog: * analyzer.opt (fdump-analyzer-exploded-paths): New. * diagnostic-manager.cc (diagnostic_manager::emit_saved_diagnostic): Implement it. * engine.cc (exploded_path::dump_to_pp): Add ext_state param and use it to dump states if non-NULL. (exploded_path::dump): Likewise. (exploded_path::dump_to_file): New. * exploded-graph.h (exploded_path::dump_to_pp): Add ext_state param. (exploded_path::dump): Likewise. (exploded_path::dump): Likewise. (exploded_path::dump_to_file): New. gcc/ChangeLog: * doc/invoke.texi (-fdump-analyzer-exploded-paths): New. Signed-off-by: David Malcolm --- gcc/analyzer/analyzer.opt | 4 ++++ gcc/analyzer/diagnostic-manager.cc | 11 +++++++++++ gcc/analyzer/engine.cc | 34 ++++++++++++++++++++++++++++------ gcc/analyzer/exploded-graph.h | 9 ++++++--- gcc/doc/invoke.texi | 6 ++++++ 5 files changed, 55 insertions(+), 9 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt index dd34495..7b77ae8 100644 --- a/gcc/analyzer/analyzer.opt +++ b/gcc/analyzer/analyzer.opt @@ -210,6 +210,10 @@ fdump-analyzer-exploded-nodes-3 Common RejectNegative Var(flag_dump_analyzer_exploded_nodes_3) Dump a textual representation of the exploded graph to SRCFILE.eg-ID.txt. +fdump-analyzer-exploded-paths +Common RejectNegative Var(flag_dump_analyzer_exploded_paths) +Dump a textual representation of each diagnostic's exploded path to SRCFILE.IDX.KIND.epath.txt. + fdump-analyzer-feasibility Common RejectNegative Var(flag_dump_analyzer_feasibility) Dump various analyzer internals to SRCFILE.*.fg.dot and SRCFILE.*.tg.dot. diff --git a/gcc/analyzer/diagnostic-manager.cc b/gcc/analyzer/diagnostic-manager.cc index b7d263b..d005fac 100644 --- a/gcc/analyzer/diagnostic-manager.cc +++ b/gcc/analyzer/diagnostic-manager.cc @@ -1164,6 +1164,17 @@ diagnostic_manager::emit_saved_diagnostic (const exploded_graph &eg, inform_n (loc, num_dupes, "%i duplicate", "%i duplicates", num_dupes); + if (flag_dump_analyzer_exploded_paths) + { + auto_timevar tv (TV_ANALYZER_DUMP); + pretty_printer pp; + pp_printf (&pp, "%s.%i.%s.epath.txt", + dump_base_name, sd.get_index (), sd.m_d->get_kind ()); + char *filename = xstrdup (pp_formatted_text (&pp)); + epath->dump_to_file (filename, eg.get_ext_state ()); + inform (loc, "exploded path written to %qs", filename); + free (filename); + } } delete pp; } diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 8f3e7f7..dc07a79 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -3630,10 +3630,12 @@ exploded_path::feasible_p (logger *logger, feasibility_problem **out, return true; } -/* Dump this path in multiline form to PP. */ +/* Dump this path in multiline form to PP. + If EXT_STATE is non-NULL, then show the nodes. */ void -exploded_path::dump_to_pp (pretty_printer *pp) const +exploded_path::dump_to_pp (pretty_printer *pp, + const extrinsic_state *ext_state) const { for (unsigned i = 0; i < m_edges.length (); i++) { @@ -3643,28 +3645,48 @@ exploded_path::dump_to_pp (pretty_printer *pp) const eedge->m_src->m_index, eedge->m_dest->m_index); pp_newline (pp); + + if (ext_state) + eedge->m_dest->dump_to_pp (pp, *ext_state); } } /* Dump this path in multiline form to FP. */ void -exploded_path::dump (FILE *fp) const +exploded_path::dump (FILE *fp, const extrinsic_state *ext_state) const { pretty_printer pp; pp_format_decoder (&pp) = default_tree_printer; pp_show_color (&pp) = pp_show_color (global_dc->printer); pp.buffer->stream = fp; - dump_to_pp (&pp); + dump_to_pp (&pp, ext_state); pp_flush (&pp); } /* Dump this path in multiline form to stderr. */ DEBUG_FUNCTION void -exploded_path::dump () const +exploded_path::dump (const extrinsic_state *ext_state) const { - dump (stderr); + dump (stderr, ext_state); +} + +/* Dump this path verbosely to FILENAME. */ + +void +exploded_path::dump_to_file (const char *filename, + const extrinsic_state &ext_state) const +{ + FILE *fp = fopen (filename, "w"); + if (!fp) + return; + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + pp.buffer->stream = fp; + dump_to_pp (&pp, &ext_state); + pp_flush (&pp); + fclose (fp); } /* class feasibility_problem. */ diff --git a/gcc/analyzer/exploded-graph.h b/gcc/analyzer/exploded-graph.h index 2d25e5e..1d8b73d 100644 --- a/gcc/analyzer/exploded-graph.h +++ b/gcc/analyzer/exploded-graph.h @@ -895,9 +895,12 @@ public: exploded_node *get_final_enode () const; - void dump_to_pp (pretty_printer *pp) const; - void dump (FILE *fp) const; - void dump () const; + void dump_to_pp (pretty_printer *pp, + const extrinsic_state *ext_state) const; + void dump (FILE *fp, const extrinsic_state *ext_state) const; + void dump (const extrinsic_state *ext_state = NULL) const; + void dump_to_file (const char *filename, + const extrinsic_state &ext_state) const; bool feasible_p (logger *logger, feasibility_problem **out, engine *eng, const exploded_graph *eg) const; diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index ea88124..62e165f 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -428,6 +428,7 @@ Objective-C and Objective-C++ Dialects}. -fdump-analyzer-exploded-nodes @gol -fdump-analyzer-exploded-nodes-2 @gol -fdump-analyzer-exploded-nodes-3 @gol +-fdump-analyzer-exploded-paths @gol -fdump-analyzer-feasibility @gol -fdump-analyzer-json @gol -fdump-analyzer-state-purge @gol @@ -9651,6 +9652,11 @@ Dump a textual representation of the ``exploded graph'' to one dump file per node, to @file{@var{file}.eg-@var{id}.txt}. This is typically a large number of dump files. +@item -fdump-analyzer-exploded-paths +@opindex fdump-analyzer-exploded-paths +Dump a textual representation of the ``exploded path'' for each +diagnostic to @file{@var{file}.@var{idx}.@var{kind}.epath.txt}. + @item -fdump-analyzer-feasibility @opindex dump-analyzer-feasibility Dump internal details about the analyzer's search for feasible paths. -- cgit v1.1 From 33255ad3ac14e3953750fe0f2d82b901c2852ff6 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Thu, 15 Jul 2021 15:07:07 -0400 Subject: analyzer: reimplement -Wanalyzer-use-of-uninitialized-value [PR95006 et al] The initial gcc 10 era commit of the analyzer (in 757bf1dff5e8cee34c0a75d06140ca972bfecfa7) had an implementation of -Wanalyzer-use-of-uninitialized-value, but was sufficiently buggy that I removed it in 78b9783774bfd3540f38f5b1e3c7fc9f719653d7 before the release of gcc 10.1 This patch reintroduces the warning, heavily rewritten, with (I hope) a less buggy implementation this time, for GCC 12. gcc/analyzer/ChangeLog: PR analyzer/95006 PR analyzer/94713 PR analyzer/94714 * analyzer.cc (maybe_reconstruct_from_def_stmt): Split out GIMPLE_ASSIGN case into... (get_diagnostic_tree_for_gassign_1): New. (get_diagnostic_tree_for_gassign): New. * analyzer.h (get_diagnostic_tree_for_gassign): New decl. * analyzer.opt (Wanalyzer-write-to-string-literal): New. * constraint-manager.cc (class svalue_purger): New. (constraint_manager::purge_state_involving): New. * constraint-manager.h (constraint_manager::purge_state_involving): New. * diagnostic-manager.cc (saved_diagnostic::supercedes_p): New. (dedupe_winners::handle_interactions): New. (diagnostic_manager::emit_saved_diagnostics): Call it. * diagnostic-manager.h (saved_diagnostic::supercedes_p): New decl. * engine.cc (impl_region_model_context::warn): Convert return type to bool. Return false if the diagnostic isn't saved. (impl_region_model_context::purge_state_involving): New. (impl_sm_context::get_state): Use NULL ctxt when querying old rvalue. (impl_sm_context::set_next_state): Use new sval when querying old state. (class dump_path_diagnostic): Move to region-model.cc (exploded_node::on_stmt): Move to on_stmt_pre and on_stmt_post. Remove call to purge_state_involving. (exploded_node::on_stmt_pre): New, based on the above. Move most of it to region_model::on_stmt_pre. (exploded_node::on_stmt_post): Likewise, moving to region_model::on_stmt_post. (class stale_jmp_buf): Fix parent class to use curiously recurring template pattern. (feasibility_state::maybe_update_for_edge): Call on_call_pre and on_call_post on gcalls. * exploded-graph.h (impl_region_model_context::warn): Return bool. (impl_region_model_context::purge_state_involving): New decl. (exploded_node::on_stmt_pre): New decl. (exploded_node::on_stmt_post): New decl. * pending-diagnostic.h (pending_diagnostic::use_of_uninit_p): New. (pending_diagnostic::supercedes_p): New. * program-state.cc (sm_state_map::get_state): Inherit state for conjured_svalue as well as initial_svalue. (sm_state_map::purge_state_involving): Also support SK_CONJURED. * region-model-impl-calls.cc (call_details::get_uncertainty): Handle m_ctxt being NULL. (call_details::get_or_create_conjured_svalue): New. (region_model::impl_call_fgets): New. (region_model::impl_call_fread): New. * region-model-manager.cc (region_model_manager::get_or_create_initial_value): Return an uninitialized poisoned value for regions that can't have initial values. * region-model-reachability.cc (reachable_regions::mark_escaped_clusters): Handle ctxt being NULL. * region-model.cc (region_to_value_map::purge_state_involving): New. (poisoned_value_diagnostic::use_of_uninit_p): New. (poisoned_value_diagnostic::emit): Handle POISON_KIND_UNINIT. (poisoned_value_diagnostic::describe_final_event): Likewise. (region_model::check_for_poison): New. (region_model::on_assignment): Call it. (class dump_path_diagnostic): Move here from engine.cc. (region_model::on_stmt_pre): New, based on exploded_node::on_stmt. (region_model::on_call_pre): Move the setting of the LHS to a conjured svalue to before the checks for specific functions. Handle "fgets", "fgets_unlocked", and "fread". (region_model::purge_state_involving): New. (region_model::handle_unrecognized_call): Handle ctxt being NULL. (region_model::get_rvalue): Call check_for_poison. (selftest::test_stack_frames): Use NULL for context when getting uninitialized rvalue. (selftest::test_alloca): Likewise. * region-model.h (region_to_value_map::purge_state_involving): New decl. (call_details::get_or_create_conjured_svalue): New decl. (region_model::on_stmt_pre): New decl. (region_model::purge_state_involving): New decl. (region_model::impl_call_fgets): New decl. (region_model::impl_call_fread): New decl. (region_model::check_for_poison): New decl. (region_model_context::warn): Return bool. (region_model_context::purge_state_involving): New. (noop_region_model_context::warn): Return bool. (noop_region_model_context::purge_state_involving): New. (test_region_model_context:: warn): Return bool. * region.cc (region::get_memory_space): New. (region::can_have_initial_svalue_p): New. (region::involves_p): New. * region.h (enum memory_space): New. (region::get_memory_space): New decl. (region::can_have_initial_svalue_p): New decl. (region::involves_p): New decl. * sm-malloc.cc (use_after_free::supercedes_p): New. * store.cc (binding_cluster::purge_state_involving): New. (store::purge_state_involving): New. * store.h (class symbolic_binding): New forward decl. (binding_key::dyn_cast_symbolic_binding): New. (symbolic_binding::dyn_cast_symbolic_binding): New. (binding_cluster::purge_state_involving): New. (store::purge_state_involving): New. * svalue.cc (svalue::can_merge_p): Reject attempts to merge poisoned svalues with other svalues, so that we identify paths in which a variable is conditionally uninitialized. (involvement_visitor::visit_conjured_svalue): New. (svalue::involves_p): Also handle SK_CONJURED. (poison_kind_to_str): Handle POISON_KIND_UNINIT. (poisoned_svalue::maybe_fold_bits_within): New. * svalue.h (enum poison_kind): Add POISON_KIND_UNINIT. (poisoned_svalue::maybe_fold_bits_within): New decl. gcc/ChangeLog: PR analyzer/95006 PR analyzer/94713 PR analyzer/94714 * doc/invoke.texi: Add -Wanalyzer-use-of-uninitialized-value. gcc/testsuite/ChangeLog: PR analyzer/95006 PR analyzer/94713 PR analyzer/94714 * g++.dg/analyzer/pr93212.C: Update location of warning. * g++.dg/analyzer/pr94011.C: Add -Wno-analyzer-use-of-uninitialized-value. * g++.dg/analyzer/pr94503.C: Likewise. * gcc.dg/analyzer/clobbers-1.c: Convert "f" from a local to a param to avoid uninitialized warning. * gcc.dg/analyzer/data-model-1.c (test_12): Add test for uninitialized value on result of alloca. (test_12a): Add expected warning. (test_12c): Likewise. (test_19): Likewise. (test_29b): Likewise. (test_29c): Likewise. (test_37): Remove xfail. (test_37a): Likewise. * gcc.dg/analyzer/data-model-20.c: Add warning about leak. * gcc.dg/analyzer/explode-2.c: Remove params; add -Wno-analyzer-too-complex, -Wno-analyzer-malloc-leak, and xfails. Initialize the locals. * gcc.dg/analyzer/explode-2a.c: Initialize the locals. Add expected leak. * gcc.dg/analyzer/fgets-1.c: New test. * gcc.dg/analyzer/fread-1.c: New test. * gcc.dg/analyzer/malloc-1.c (test_16): Add expected warning. (test_40): Likewise. * gcc.dg/analyzer/memset-CVE-2017-18549-1.c: Check for uninitialized padding. * gcc.dg/analyzer/pr93355-localealias-feasibility.c (fread): New decl. (read_alias_file): Call it. * gcc.dg/analyzer/pr94047.c: Add expected warnings. * gcc.dg/analyzer/pr94851-2.c: Likewise. * gcc.dg/analyzer/pr96841.c: Convert local to a param. * gcc.dg/analyzer/pr98628.c: Likewise. * gcc.dg/analyzer/pr99042.c: Updated expected location of leak diagnostics. * gcc.dg/analyzer/symbolic-1.c: Add expected warnings. * gcc.dg/analyzer/symbolic-7.c: Likewise. * gcc.dg/analyzer/torture/pr93649.c: Add expected warning. Skip with -fno-fat-lto-objects. * gcc.dg/analyzer/uninit-1.c: New test. * gcc.dg/analyzer/uninit-2.c: New test. * gcc.dg/analyzer/uninit-3.c: New test. * gcc.dg/analyzer/uninit-4.c: New test. * gcc.dg/analyzer/uninit-pr94713.c: New test. * gcc.dg/analyzer/uninit-pr94714.c: New test. * gcc.dg/analyzer/use-after-free-2.c: New test. * gcc.dg/analyzer/use-after-free-3.c: New test. * gcc.dg/analyzer/zlib-3.c: Add expected warning. * gcc.dg/analyzer/zlib-6.c: Convert locals to params to avoid uninitialized warnings. Remove xfail. * gcc.dg/analyzer/zlib-6a.c: New test, based on the old version of the above. * gfortran.dg/analyzer/pr97668.f: Add -Wno-analyzer-use-of-uninitialized-value and -Wno-analyzer-too-complex. Signed-off-by: David Malcolm --- gcc/analyzer/analyzer.cc | 95 +++++--- gcc/analyzer/analyzer.h | 1 + gcc/analyzer/analyzer.opt | 4 + gcc/analyzer/constraint-manager.cc | 23 ++ gcc/analyzer/constraint-manager.h | 1 + gcc/analyzer/diagnostic-manager.cc | 46 ++++ gcc/analyzer/diagnostic-manager.h | 2 + gcc/analyzer/engine.cc | 250 ++++++++------------ gcc/analyzer/exploded-graph.h | 15 +- gcc/analyzer/pending-diagnostic.h | 13 + gcc/analyzer/program-state.cc | 43 ++-- gcc/analyzer/region-model-impl-calls.cc | 50 +++- gcc/analyzer/region-model-manager.cc | 4 + gcc/analyzer/region-model-reachability.cc | 16 +- gcc/analyzer/region-model.cc | 261 +++++++++++++++++++-- gcc/analyzer/region-model.h | 32 ++- gcc/analyzer/region.cc | 117 +++++++++ gcc/analyzer/region.h | 16 ++ gcc/analyzer/sm-malloc.cc | 19 ++ gcc/analyzer/store.cc | 55 +++++ gcc/analyzer/store.h | 10 + gcc/analyzer/svalue.cc | 32 ++- gcc/analyzer/svalue.h | 8 + gcc/doc/invoke.texi | 10 + gcc/testsuite/g++.dg/analyzer/pr93212.C | 4 +- gcc/testsuite/g++.dg/analyzer/pr94011.C | 2 +- gcc/testsuite/g++.dg/analyzer/pr94503.C | 2 + gcc/testsuite/gcc.dg/analyzer/clobbers-1.c | 3 +- gcc/testsuite/gcc.dg/analyzer/data-model-1.c | 32 ++- gcc/testsuite/gcc.dg/analyzer/data-model-20.c | 2 +- gcc/testsuite/gcc.dg/analyzer/explode-2.c | 16 +- gcc/testsuite/gcc.dg/analyzer/explode-2a.c | 4 +- gcc/testsuite/gcc.dg/analyzer/fgets-1.c | 31 +++ gcc/testsuite/gcc.dg/analyzer/fread-1.c | 13 + gcc/testsuite/gcc.dg/analyzer/malloc-1.c | 7 +- .../gcc.dg/analyzer/memset-CVE-2017-18549-1.c | 8 +- .../analyzer/pr93355-localealias-feasibility.c | 7 + gcc/testsuite/gcc.dg/analyzer/pr94047.c | 2 +- gcc/testsuite/gcc.dg/analyzer/pr94851-2.c | 2 +- gcc/testsuite/gcc.dg/analyzer/pr96841.c | 4 +- gcc/testsuite/gcc.dg/analyzer/pr98628.c | 3 +- gcc/testsuite/gcc.dg/analyzer/pr99042.c | 8 +- gcc/testsuite/gcc.dg/analyzer/symbolic-1.c | 6 +- gcc/testsuite/gcc.dg/analyzer/symbolic-7.c | 6 +- gcc/testsuite/gcc.dg/analyzer/torture/pr93649.c | 3 +- gcc/testsuite/gcc.dg/analyzer/uninit-1.c | 44 ++++ gcc/testsuite/gcc.dg/analyzer/uninit-2.c | 14 ++ gcc/testsuite/gcc.dg/analyzer/uninit-3.c | 36 +++ gcc/testsuite/gcc.dg/analyzer/uninit-4.c | 39 +++ gcc/testsuite/gcc.dg/analyzer/uninit-pr94713.c | 11 + gcc/testsuite/gcc.dg/analyzer/uninit-pr94714.c | 12 + gcc/testsuite/gcc.dg/analyzer/use-after-free-2.c | 8 + gcc/testsuite/gcc.dg/analyzer/use-after-free-3.c | 12 + gcc/testsuite/gcc.dg/analyzer/zlib-3.c | 2 +- gcc/testsuite/gcc.dg/analyzer/zlib-6.c | 13 +- gcc/testsuite/gcc.dg/analyzer/zlib-6a.c | 47 ++++ gcc/testsuite/gfortran.dg/analyzer/pr97668.f | 2 +- 57 files changed, 1232 insertions(+), 296 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/fgets-1.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/fread-1.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/uninit-1.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/uninit-2.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/uninit-3.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/uninit-4.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/uninit-pr94713.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/uninit-pr94714.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/use-after-free-2.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/use-after-free-3.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/zlib-6a.c (limited to 'gcc') diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc index a8ee1a1..ddace9a 100644 --- a/gcc/analyzer/analyzer.cc +++ b/gcc/analyzer/analyzer.cc @@ -63,6 +63,51 @@ get_stmt_location (const gimple *stmt, function *fun) static tree fixup_tree_for_diagnostic_1 (tree expr, hash_set *visited); +/* Attemp to generate a tree for the LHS of ASSIGN_STMT. + VISITED must be non-NULL; it is used to ensure termination. */ + +static tree +get_diagnostic_tree_for_gassign_1 (const gassign *assign_stmt, + hash_set *visited) +{ + enum tree_code code = gimple_assign_rhs_code (assign_stmt); + + /* Reverse the effect of extract_ops_from_tree during + gimplification. */ + switch (get_gimple_rhs_class (code)) + { + default: + case GIMPLE_INVALID_RHS: + gcc_unreachable (); + case GIMPLE_TERNARY_RHS: + case GIMPLE_BINARY_RHS: + case GIMPLE_UNARY_RHS: + { + tree t = make_node (code); + TREE_TYPE (t) = TREE_TYPE (gimple_assign_lhs (assign_stmt)); + unsigned num_rhs_args = gimple_num_ops (assign_stmt) - 1; + for (unsigned i = 0; i < num_rhs_args; i++) + { + tree op = gimple_op (assign_stmt, i + 1); + if (op) + { + op = fixup_tree_for_diagnostic_1 (op, visited); + if (op == NULL_TREE) + return NULL_TREE; + } + TREE_OPERAND (t, i) = op; + } + return t; + } + case GIMPLE_SINGLE_RHS: + { + tree op = gimple_op (assign_stmt, 1); + op = fixup_tree_for_diagnostic_1 (op, visited); + return op; + } + } +} + /* Subroutine of fixup_tree_for_diagnostic_1, called on SSA names. Attempt to reconstruct a a tree expression for SSA_NAME based on its def-stmt. @@ -91,45 +136,8 @@ maybe_reconstruct_from_def_stmt (tree ssa_name, /* Can't handle these. */ return NULL_TREE; case GIMPLE_ASSIGN: - { - enum tree_code code = gimple_assign_rhs_code (def_stmt); - - /* Reverse the effect of extract_ops_from_tree during - gimplification. */ - switch (get_gimple_rhs_class (code)) - { - default: - case GIMPLE_INVALID_RHS: - gcc_unreachable (); - case GIMPLE_TERNARY_RHS: - case GIMPLE_BINARY_RHS: - case GIMPLE_UNARY_RHS: - { - tree t = make_node (code); - TREE_TYPE (t) = TREE_TYPE (ssa_name); - unsigned num_rhs_args = gimple_num_ops (def_stmt) - 1; - for (unsigned i = 0; i < num_rhs_args; i++) - { - tree op = gimple_op (def_stmt, i + 1); - if (op) - { - op = fixup_tree_for_diagnostic_1 (op, visited); - if (op == NULL_TREE) - return NULL_TREE; - } - TREE_OPERAND (t, i) = op; - } - return t; - } - case GIMPLE_SINGLE_RHS: - { - tree op = gimple_op (def_stmt, 1); - op = fixup_tree_for_diagnostic_1 (op, visited); - return op; - } - } - } - break; + return get_diagnostic_tree_for_gassign_1 + (as_a (def_stmt), visited); case GIMPLE_CALL: { gcall *call_stmt = as_a (def_stmt); @@ -193,6 +201,15 @@ fixup_tree_for_diagnostic (tree expr) return fixup_tree_for_diagnostic_1 (expr, &visited); } +/* Attempt to generate a tree for the LHS of ASSIGN_STMT. */ + +tree +get_diagnostic_tree_for_gassign (const gassign *assign_stmt) +{ + hash_set visited; + return get_diagnostic_tree_for_gassign_1 (assign_stmt, &visited); +} + } // namespace ana /* Helper function for checkers. Is the CALL to the given function name, diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h index 02830e4..d42bee7 100644 --- a/gcc/analyzer/analyzer.h +++ b/gcc/analyzer/analyzer.h @@ -112,6 +112,7 @@ extern void print_quoted_type (pretty_printer *pp, tree t); extern int readability_comparator (const void *p1, const void *p2); extern int tree_cmp (const void *p1, const void *p2); extern tree fixup_tree_for_diagnostic (tree); +extern tree get_diagnostic_tree_for_gassign (const gassign *); /* A tree, extended with stack frame information for locals, so that we can distinguish between different values of locals within a potentially diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt index 7b77ae8..6ddb6e3 100644 --- a/gcc/analyzer/analyzer.opt +++ b/gcc/analyzer/analyzer.opt @@ -134,6 +134,10 @@ Wanalyzer-write-to-string-literal Common Var(warn_analyzer_write_to_string_literal) Init(1) Warning Warn about code paths which attempt to write to a string literal. +Wanalyzer-use-of-uninitialized-value +Common Var(warn_analyzer_use_of_uninitialized_value) Init(1) Warning +Warn about code paths in which an uninitialized value is used. + Wanalyzer-too-complex Common Var(warn_analyzer_too_complex) Init(0) Warning Warn if the code is too complicated for the analyzer to fully explore. diff --git a/gcc/analyzer/constraint-manager.cc b/gcc/analyzer/constraint-manager.cc index 51cf522..5b5a9de 100644 --- a/gcc/analyzer/constraint-manager.cc +++ b/gcc/analyzer/constraint-manager.cc @@ -1653,6 +1653,29 @@ on_liveness_change (const svalue_set &live_svalues, purge (p, NULL); } +class svalue_purger +{ +public: + svalue_purger (const svalue *sval) : m_sval (sval) {} + + bool should_purge_p (const svalue *sval) const + { + return sval->involves_p (m_sval); + } + +private: + const svalue *m_sval; +}; + +/* Purge any state involving SVAL. */ + +void +constraint_manager::purge_state_involving (const svalue *sval) +{ + svalue_purger p (sval); + purge (p, NULL); +} + /* Comparator for use by constraint_manager::canonicalize. Sort a pair of equiv_class instances, using the representative svalue as a sort key. */ diff --git a/gcc/analyzer/constraint-manager.h b/gcc/analyzer/constraint-manager.h index 3173610..2bb3215 100644 --- a/gcc/analyzer/constraint-manager.h +++ b/gcc/analyzer/constraint-manager.h @@ -269,6 +269,7 @@ public: void on_liveness_change (const svalue_set &live_svalues, const region_model *model); + void purge_state_involving (const svalue *sval); void canonicalize (); diff --git a/gcc/analyzer/diagnostic-manager.cc b/gcc/analyzer/diagnostic-manager.cc index d005fac..631fef6 100644 --- a/gcc/analyzer/diagnostic-manager.cc +++ b/gcc/analyzer/diagnostic-manager.cc @@ -722,6 +722,18 @@ saved_diagnostic::add_duplicate (saved_diagnostic *other) m_duplicates.safe_push (other); } +/* Return true if this diagnostic supercedes OTHER, and that OTHER should + therefore not be emitted. */ + +bool +saved_diagnostic::supercedes_p (const saved_diagnostic &other) const +{ + /* They should be at the same stmt. */ + if (m_stmt != other.m_stmt) + return false; + return m_d->supercedes_p (*other.m_d); +} + /* State for building a checker_path from a particular exploded_path. In particular, this precomputes reachability information: the set of source enodes for which a path be found to the diagnostic enode. */ @@ -1021,6 +1033,38 @@ public: } } + /* Handle interactions between the dedupe winners, so that some + diagnostics can supercede others (of different kinds). + + We want use-after-free to supercede use-of-unitialized-value, + so that if we have these at the same stmt, we don't emit + a use-of-uninitialized, just the use-after-free. */ + + void handle_interactions (diagnostic_manager *dm) + { + LOG_SCOPE (dm->get_logger ()); + auto_vec superceded; + for (auto outer : m_map) + { + const saved_diagnostic *outer_sd = outer.second; + for (auto inner : m_map) + { + const saved_diagnostic *inner_sd = inner.second; + if (inner_sd->supercedes_p (*outer_sd)) + { + superceded.safe_push (outer.first); + if (dm->get_logger ()) + dm->log ("sd[%i] \"%s\" superceded by sd[%i] \"%s\"", + outer_sd->get_index (), outer_sd->m_d->get_kind (), + inner_sd->get_index (), inner_sd->m_d->get_kind ()); + break; + } + } + } + for (auto iter : superceded) + m_map.remove (iter); + } + /* Emit the simplest diagnostic within each set. */ void emit_best (diagnostic_manager *dm, @@ -1095,6 +1139,8 @@ diagnostic_manager::emit_saved_diagnostics (const exploded_graph &eg) FOR_EACH_VEC_ELT (m_saved_diagnostics, i, sd) best_candidates.add (get_logger (), &pf, sd); + best_candidates.handle_interactions (this); + /* For each dedupe-key, call emit_saved_diagnostic on the "best" saved_diagnostic. */ best_candidates.emit_best (this, eg); diff --git a/gcc/analyzer/diagnostic-manager.h b/gcc/analyzer/diagnostic-manager.h index fc8ac26..ad2eb4d 100644 --- a/gcc/analyzer/diagnostic-manager.h +++ b/gcc/analyzer/diagnostic-manager.h @@ -58,6 +58,8 @@ public: unsigned get_index () const { return m_idx; } + bool supercedes_p (const saved_diagnostic &other) const; + //private: const state_machine *m_sm; const exploded_node *m_enode; diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index dc07a79..7662a7f 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -108,14 +108,29 @@ impl_region_model_context (program_state *state, { } -void +bool impl_region_model_context::warn (pending_diagnostic *d) { LOG_FUNC (get_logger ()); + if (m_stmt == NULL && m_stmt_finder == NULL) + { + if (get_logger ()) + get_logger ()->log ("rejecting diagnostic: no stmt"); + delete d; + return false; + } if (m_eg) - m_eg->get_diagnostic_manager ().add_diagnostic - (m_enode_for_diag, m_enode_for_diag->get_supernode (), - m_stmt, m_stmt_finder, d); + { + m_eg->get_diagnostic_manager ().add_diagnostic + (m_enode_for_diag, m_enode_for_diag->get_supernode (), + m_stmt, m_stmt_finder, d); + return true; + } + else + { + delete d; + return false; + } } void @@ -155,6 +170,19 @@ impl_region_model_context::get_uncertainty () return m_uncertainty; } +/* Purge state involving SVAL. The region_model has already been purged, + so we only need to purge other state in the program_state: + the sm-state. */ + +void +impl_region_model_context::purge_state_involving (const svalue *sval) +{ + int i; + sm_state_map *smap; + FOR_EACH_VEC_ELT (m_new_state->m_checker_states, i, smap) + smap->purge_state_involving (sval, m_ext_state); +} + /* struct setjmp_record. */ int @@ -230,16 +258,15 @@ public: return model->get_fndecl_for_call (call, &old_ctxt); } - state_machine::state_t get_state (const gimple *stmt, + state_machine::state_t get_state (const gimple *stmt ATTRIBUTE_UNUSED, tree var) { logger * const logger = get_logger (); LOG_FUNC (logger); - impl_region_model_context old_ctxt - (m_eg, m_enode_for_diag, NULL, NULL/*m_enode->get_state ()*/, - NULL, stmt); + /* Use NULL ctxt on this get_rvalue call to avoid triggering + uninitialized value warnings. */ const svalue *var_old_sval - = m_old_state->m_region_model->get_rvalue (var, &old_ctxt); + = m_old_state->m_region_model->get_rvalue (var, NULL); state_machine::state_t current = m_old_smap->get_state (var_old_sval, m_eg.get_ext_state ()); @@ -263,12 +290,6 @@ public: { logger * const logger = get_logger (); LOG_FUNC (logger); - impl_region_model_context old_ctxt - (m_eg, m_enode_for_diag, NULL, NULL/*m_enode->get_state ()*/, - NULL, stmt); - const svalue *var_old_sval - = m_old_state->m_region_model->get_rvalue (var, &old_ctxt); - impl_region_model_context new_ctxt (m_eg, m_enode_for_diag, m_old_state, m_new_state, NULL, @@ -278,8 +299,9 @@ public: const svalue *origin_new_sval = m_new_state->m_region_model->get_rvalue (origin, &new_ctxt); + /* We use the new sval here to avoid issues with uninitialized values. */ state_machine::state_t current - = m_old_smap->get_state (var_old_sval, m_eg.get_ext_state ()); + = m_old_smap->get_state (var_new_sval, m_eg.get_ext_state ()); if (logger) logger->log ("%s: state transition of %qE: %s -> %s", m_sm.get_name (), @@ -1160,26 +1182,6 @@ fndecl_has_gimple_body_p (tree fndecl) namespace ana { -/* A pending_diagnostic subclass for implementing "__analyzer_dump_path". */ - -class dump_path_diagnostic - : public pending_diagnostic_subclass -{ -public: - bool emit (rich_location *richloc) FINAL OVERRIDE - { - inform (richloc, "path"); - return true; - } - - const char *get_kind () const FINAL OVERRIDE { return "dump_path_diagnostic"; } - - bool operator== (const dump_path_diagnostic &) const - { - return true; - } -}; - /* Modify STATE in place, applying the effects of the stmt at this node's point. */ @@ -1218,89 +1220,8 @@ exploded_node::on_stmt (exploded_graph &eg, bool unknown_side_effects = false; bool terminate_path = false; - switch (gimple_code (stmt)) - { - default: - /* No-op for now. */ - break; - - case GIMPLE_ASSIGN: - { - const gassign *assign = as_a (stmt); - state->m_region_model->on_assignment (assign, &ctxt); - } - break; - - case GIMPLE_ASM: - /* No-op for now. */ - break; - - case GIMPLE_CALL: - { - /* Track whether we have a gcall to a function that's not recognized by - anything, for which we don't have a function body, or for which we - don't know the fndecl. */ - const gcall *call = as_a (stmt); - - /* Debugging/test support. */ - if (is_special_named_call_p (call, "__analyzer_describe", 2)) - state->m_region_model->impl_call_analyzer_describe (call, &ctxt); - else if (is_special_named_call_p (call, "__analyzer_dump", 0)) - { - /* Handle the builtin "__analyzer_dump" by dumping state - to stderr. */ - state->dump (eg.get_ext_state (), true); - } - else if (is_special_named_call_p (call, "__analyzer_dump_capacity", 1)) - state->m_region_model->impl_call_analyzer_dump_capacity (call, &ctxt); - else if (is_special_named_call_p (call, "__analyzer_dump_path", 0)) - { - /* Handle the builtin "__analyzer_dump_path" by queuing a - diagnostic at this exploded_node. */ - ctxt.warn (new dump_path_diagnostic ()); - } - else if (is_special_named_call_p (call, "__analyzer_dump_region_model", - 0)) - { - /* Handle the builtin "__analyzer_dump_region_model" by dumping - the region model's state to stderr. */ - state->m_region_model->dump (false); - } - else if (is_special_named_call_p (call, "__analyzer_eval", 1)) - state->m_region_model->impl_call_analyzer_eval (call, &ctxt); - else if (is_special_named_call_p (call, "__analyzer_break", 0)) - { - /* Handle the builtin "__analyzer_break" by triggering a - breakpoint. */ - /* TODO: is there a good cross-platform way to do this? */ - raise (SIGINT); - } - else if (is_special_named_call_p (call, - "__analyzer_dump_exploded_nodes", - 1)) - { - /* This is handled elsewhere. */ - } - else if (is_setjmp_call_p (call)) - state->m_region_model->on_setjmp (call, this, &ctxt); - else if (is_longjmp_call_p (call)) - { - on_longjmp (eg, call, state, &ctxt); - return on_stmt_flags::terminate_path (); - } - else - unknown_side_effects - = state->m_region_model->on_call_pre (call, &ctxt, &terminate_path); - } - break; - - case GIMPLE_RETURN: - { - const greturn *return_ = as_a (stmt); - state->m_region_model->on_return (return_, &ctxt); - } - break; - } + on_stmt_pre (eg, stmt, state, &terminate_path, + &unknown_side_effects, &ctxt); if (terminate_path) return on_stmt_flags::terminate_path (); @@ -1316,41 +1237,71 @@ exploded_node::on_stmt (exploded_graph &eg, impl_sm_context sm_ctxt (eg, sm_idx, sm, this, &old_state, state, old_smap, new_smap); - /* If we're at the def-stmt of an SSA name, then potentially purge - any sm-state for svalues that involve that SSA name. This avoids - false positives in loops, since a symbolic value referring to the - SSA name will be referring to the previous value of that SSA name. - For example, in: - while ((e = hashmap_iter_next(&iter))) { - struct oid2strbuf *e_strbuf = (struct oid2strbuf *)e; - free (e_strbuf->value); - } - at the def-stmt of e_8: - e_8 = hashmap_iter_next (&iter); - we should purge the "freed" state of: - INIT_VAL(CAST_REG(‘struct oid2strbuf’, (*INIT_VAL(e_8))).value) - which is the "e_strbuf->value" value from the previous iteration, - or we will erroneously report a double-free - the "e_8" within it - refers to the previous value. */ - if (tree lhs = gimple_get_lhs (stmt)) - if (TREE_CODE (lhs) == SSA_NAME) - { - const svalue *sval - = old_state.m_region_model->get_rvalue (lhs, &ctxt); - new_smap->purge_state_involving (sval, eg.get_ext_state ()); - } - /* Allow the state_machine to handle the stmt. */ if (sm.on_stmt (&sm_ctxt, snode, stmt)) unknown_side_effects = false; } - if (const gcall *call = dyn_cast (stmt)) - state->m_region_model->on_call_post (call, unknown_side_effects, &ctxt); + on_stmt_post (stmt, state, unknown_side_effects, &ctxt); return on_stmt_flags (); } +/* Handle the pre-sm-state part of STMT, modifying STATE in-place. + Write true to *OUT_TERMINATE_PATH if the path should be terminated. + Write true to *OUT_UNKNOWN_SIDE_EFFECTS if the stmt has unknown + side effects. */ + +void +exploded_node::on_stmt_pre (exploded_graph &eg, + const gimple *stmt, + program_state *state, + bool *out_terminate_path, + bool *out_unknown_side_effects, + region_model_context *ctxt) +{ + /* Handle special-case calls that require the full program_state. */ + if (const gcall *call = dyn_cast (stmt)) + { + if (is_special_named_call_p (call, "__analyzer_dump", 0)) + { + /* Handle the builtin "__analyzer_dump" by dumping state + to stderr. */ + state->dump (eg.get_ext_state (), true); + return; + } + else if (is_setjmp_call_p (call)) + { + state->m_region_model->on_setjmp (call, this, ctxt); + return; + } + else if (is_longjmp_call_p (call)) + { + on_longjmp (eg, call, state, ctxt); + *out_terminate_path = true; + return; + } + } + + /* Otherwise, defer to m_region_model. */ + state->m_region_model->on_stmt_pre (stmt, + out_terminate_path, + out_unknown_side_effects, + ctxt); +} + +/* Handle the post-sm-state part of STMT, modifying STATE in-place. */ + +void +exploded_node::on_stmt_post (const gimple *stmt, + program_state *state, + bool unknown_side_effects, + region_model_context *ctxt) +{ + if (const gcall *call = dyn_cast (stmt)) + state->m_region_model->on_call_post (call, unknown_side_effects, ctxt); +} + /* Consider the effect of following superedge SUCC from this node. Return true if it's feasible to follow the edge, or false @@ -1415,7 +1366,7 @@ valid_longjmp_stack_p (const program_point &longjmp_point, where the enclosing function of the "setjmp" has returned (and thus the stack frame no longer exists). */ -class stale_jmp_buf : public pending_diagnostic_subclass +class stale_jmp_buf : public pending_diagnostic_subclass { public: stale_jmp_buf (const gcall *setjmp_call, const gcall *longjmp_call, @@ -3763,6 +3714,13 @@ feasibility_state::maybe_update_for_edge (logger *logger, if (const gassign *assign = dyn_cast (stmt)) m_model.on_assignment (assign, NULL); + else if (const gcall *call = dyn_cast (stmt)) + { + bool terminate_path; + bool unknown_side_effects + = m_model.on_call_pre (call, NULL, &terminate_path); + m_model.on_call_post (call, unknown_side_effects, NULL); + } else if (const greturn *return_ = dyn_cast (stmt)) m_model.on_return (return_, NULL); } diff --git a/gcc/analyzer/exploded-graph.h b/gcc/analyzer/exploded-graph.h index 1d8b73d..8f48d8a 100644 --- a/gcc/analyzer/exploded-graph.h +++ b/gcc/analyzer/exploded-graph.h @@ -46,7 +46,7 @@ class impl_region_model_context : public region_model_context uncertainty_t *uncertainty, logger *logger = NULL); - void warn (pending_diagnostic *d) FINAL OVERRIDE; + bool warn (pending_diagnostic *d) FINAL OVERRIDE; void on_svalue_leak (const svalue *) OVERRIDE; void on_liveness_change (const svalue_set &live_svalues, const region_model *model) FINAL OVERRIDE; @@ -74,6 +74,8 @@ class impl_region_model_context : public region_model_context uncertainty_t *get_uncertainty () FINAL OVERRIDE; + void purge_state_involving (const svalue *sval) FINAL OVERRIDE; + exploded_graph *m_eg; log_user m_logger; exploded_node *m_enode_for_diag; @@ -223,6 +225,17 @@ class exploded_node : public dnode const gimple *stmt, program_state *state, uncertainty_t *uncertainty); + void on_stmt_pre (exploded_graph &eg, + const gimple *stmt, + program_state *state, + bool *out_terminate_path, + bool *out_unknown_side_effects, + region_model_context *ctxt); + void on_stmt_post (const gimple *stmt, + program_state *state, + bool unknown_side_effects, + region_model_context *ctxt); + bool on_edge (exploded_graph &eg, const superedge *succ, program_point *next_point, diff --git a/gcc/analyzer/pending-diagnostic.h b/gcc/analyzer/pending-diagnostic.h index 571fc1b..48e2b3e 100644 --- a/gcc/analyzer/pending-diagnostic.h +++ b/gcc/analyzer/pending-diagnostic.h @@ -154,6 +154,9 @@ class pending_diagnostic /* Hand-coded RTTI: get an ID for the subclass. */ virtual const char *get_kind () const = 0; + /* A vfunc for identifying "use of uninitialized value". */ + virtual bool use_of_uninit_p () const { return false; } + /* Compare for equality with OTHER, which might be of a different subclass. */ @@ -269,6 +272,16 @@ class pending_diagnostic { return false; } + + /* Vfunc for determining that this pending_diagnostic supercedes OTHER, + and that OTHER should therefore not be emitted. + They have already been tested for being at the same stmt. */ + + virtual bool + supercedes_p (const pending_diagnostic &other ATTRIBUTE_UNUSED) const + { + return false; + } }; /* A template to make it easier to make subclasses of pending_diagnostic. diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc index 6d60c04..23cfcb0 100644 --- a/gcc/analyzer/program-state.cc +++ b/gcc/analyzer/program-state.cc @@ -372,21 +372,31 @@ sm_state_map::get_state (const svalue *sval, INIT_VAL(foo). */ if (m_sm.inherited_state_p ()) if (region_model_manager *mgr = ext_state.get_model_manager ()) - if (const initial_svalue *init_sval = sval->dyn_cast_initial_svalue ()) - { - const region *reg = init_sval->get_region (); - /* Try recursing upwards (up to the base region for the cluster). */ - if (!reg->base_region_p ()) - if (const region *parent_reg = reg->get_parent_region ()) - { - const svalue *parent_init_sval - = mgr->get_or_create_initial_value (parent_reg); - state_machine::state_t parent_state - = get_state (parent_init_sval, ext_state); - if (parent_state) - return parent_state; - } - } + { + if (const initial_svalue *init_sval = sval->dyn_cast_initial_svalue ()) + { + const region *reg = init_sval->get_region (); + /* Try recursing upwards (up to the base region for the + cluster). */ + if (!reg->base_region_p ()) + if (const region *parent_reg = reg->get_parent_region ()) + { + const svalue *parent_init_sval + = mgr->get_or_create_initial_value (parent_reg); + state_machine::state_t parent_state + = get_state (parent_init_sval, ext_state); + if (parent_state) + return parent_state; + } + } + else if (const sub_svalue *sub_sval = sval->dyn_cast_sub_svalue ()) + { + const svalue *parent_sval = sub_sval->get_parent (); + if (state_machine::state_t parent_state + = get_state (parent_sval, ext_state)) + return parent_state; + } + } return m_sm.get_default_state (sval); } @@ -596,7 +606,8 @@ sm_state_map::purge_state_involving (const svalue *sval, const extrinsic_state &ext_state) { /* Currently svalue::involves_p requires this. */ - if (sval->get_kind () != SK_INITIAL) + if (!(sval->get_kind () == SK_INITIAL + || sval->get_kind () == SK_CONJURED)) return; svalue_set svals_to_unset; diff --git a/gcc/analyzer/region-model-impl-calls.cc b/gcc/analyzer/region-model-impl-calls.cc index 466d397..4be6550 100644 --- a/gcc/analyzer/region-model-impl-calls.cc +++ b/gcc/analyzer/region-model-impl-calls.cc @@ -84,7 +84,10 @@ call_details::call_details (const gcall *call, region_model *model, uncertainty_t * call_details::get_uncertainty () const { - return m_ctxt->get_uncertainty (); + if (m_ctxt) + return m_ctxt->get_uncertainty (); + else + return NULL; } /* If the callsite has a left-hand-side region, set it to RESULT @@ -173,6 +176,15 @@ call_details::dump (bool simple) const pp_flush (&pp); } +/* Get a conjured_svalue for this call for REG. */ + +const svalue * +call_details::get_or_create_conjured_svalue (const region *reg) const +{ + region_model_manager *mgr = m_model->get_manager (); + return mgr->get_or_create_conjured_svalue (reg->get_type (), m_call, reg); +} + /* Implementations of specific functions. */ /* Handle the on_call_pre part of "alloca". */ @@ -305,6 +317,42 @@ region_model::impl_call_error (const call_details &cd, unsigned min_args, return true; } +/* Handle the on_call_pre part of "fgets" and "fgets_unlocked". */ + +void +region_model::impl_call_fgets (const call_details &cd) +{ + /* Ideally we would bifurcate state here between the + error vs no error cases. */ + const svalue *ptr_sval = cd.get_arg_svalue (0); + if (const region_svalue *ptr_to_region_sval + = ptr_sval->dyn_cast_region_svalue ()) + { + const region *reg = ptr_to_region_sval->get_pointee (); + const region *base_reg = reg->get_base_region (); + const svalue *new_sval = cd.get_or_create_conjured_svalue (base_reg); + purge_state_involving (new_sval, cd.get_ctxt ()); + set_value (base_reg, new_sval, cd.get_ctxt ()); + } +} + +/* Handle the on_call_pre part of "fread". */ + +void +region_model::impl_call_fread (const call_details &cd) +{ + const svalue *ptr_sval = cd.get_arg_svalue (0); + if (const region_svalue *ptr_to_region_sval + = ptr_sval->dyn_cast_region_svalue ()) + { + const region *reg = ptr_to_region_sval->get_pointee (); + const region *base_reg = reg->get_base_region (); + const svalue *new_sval = cd.get_or_create_conjured_svalue (base_reg); + purge_state_involving (new_sval, cd.get_ctxt ()); + set_value (base_reg, new_sval, cd.get_ctxt ()); + } +} + /* Handle the on_call_post part of "free", after sm-handling. If the ptr points to an underlying heap region, delete the region, diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc index 55acb90..7a52a64 100644 --- a/gcc/analyzer/region-model-manager.cc +++ b/gcc/analyzer/region-model-manager.cc @@ -252,6 +252,10 @@ region_model_manager::get_or_create_unknown_svalue (tree type) const svalue * region_model_manager::get_or_create_initial_value (const region *reg) { + if (!reg->can_have_initial_svalue_p ()) + return get_or_create_poisoned_svalue (POISON_KIND_UNINIT, + reg->get_type ()); + /* The initial value of a cast is a cast of the initial value. */ if (const cast_region *cast_reg = reg->dyn_cast_cast_region ()) { diff --git a/gcc/analyzer/region-model-reachability.cc b/gcc/analyzer/region-model-reachability.cc index e165cda..1f65307 100644 --- a/gcc/analyzer/region-model-reachability.cc +++ b/gcc/analyzer/region-model-reachability.cc @@ -267,7 +267,6 @@ reachable_regions::handle_parm (const svalue *sval, tree param_type) void reachable_regions::mark_escaped_clusters (region_model_context *ctxt) { - gcc_assert (ctxt); auto_vec escaped_fn_regs (m_mutable_base_regs.elements ()); for (hash_set::iterator iter = m_mutable_base_regs.begin (); @@ -281,12 +280,15 @@ reachable_regions::mark_escaped_clusters (region_model_context *ctxt) if (const function_region *fn_reg = base_reg->dyn_cast_function_region ()) escaped_fn_regs.quick_push (fn_reg); } - /* Sort to ensure deterministic results. */ - escaped_fn_regs.qsort (region::cmp_ptr_ptr); - unsigned i; - const function_region *fn_reg; - FOR_EACH_VEC_ELT (escaped_fn_regs, i, fn_reg) - ctxt->on_escaped_function (fn_reg->get_fndecl ()); + if (ctxt) + { + /* Sort to ensure deterministic results. */ + escaped_fn_regs.qsort (region::cmp_ptr_ptr); + unsigned i; + const function_region *fn_reg; + FOR_EACH_VEC_ELT (escaped_fn_regs, i, fn_reg) + ctxt->on_escaped_function (fn_reg->get_fndecl ()); + } } /* Dump SET to PP, sorting it to avoid churn when comparing dumps. */ diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index acbbd11..3fe2cce 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -221,6 +221,23 @@ region_to_value_map::can_merge_with_p (const region_to_value_map &other, return true; } +/* Purge any state involving SVAL. */ + +void +region_to_value_map::purge_state_involving (const svalue *sval) +{ + auto_vec to_purge; + for (auto iter : *this) + { + const region *iter_reg = iter.first; + const svalue *iter_sval = iter.second; + if (iter_reg->involves_p (sval) || iter_sval->involves_p (sval)) + to_purge.safe_push (iter_reg); + } + for (auto iter : to_purge) + m_hash_map.remove (iter); +} + /* class region_model. */ /* Ctor for region_model: construct an "empty" model. */ @@ -442,6 +459,11 @@ public: const char *get_kind () const FINAL OVERRIDE { return "poisoned_value_diagnostic"; } + bool use_of_uninit_p () const FINAL OVERRIDE + { + return m_pkind == POISON_KIND_UNINIT; + } + bool operator== (const poisoned_value_diagnostic &other) const { return m_expr == other.m_expr; @@ -453,6 +475,16 @@ public: { default: gcc_unreachable (); + case POISON_KIND_UNINIT: + { + diagnostic_metadata m; + m.add_cwe (457); /* "CWE-457: Use of Uninitialized Variable". */ + return warning_meta (rich_loc, m, + OPT_Wanalyzer_use_of_uninitialized_value, + "use of uninitialized value %qE", + m_expr); + } + break; case POISON_KIND_FREED: { diagnostic_metadata m; @@ -482,6 +514,9 @@ public: { default: gcc_unreachable (); + case POISON_KIND_UNINIT: + return ev.formatted_print ("use of uninitialized value %qE here", + m_expr); case POISON_KIND_FREED: return ev.formatted_print ("use after % of %qE here", m_expr); @@ -782,6 +817,41 @@ region_model::get_gassign_result (const gassign *assign, } } +/* Check for SVAL being poisoned, adding a warning to CTXT. + Return SVAL, or, if a warning is added, another value, to avoid + repeatedly complaining about the same poisoned value in followup code. */ + +const svalue * +region_model::check_for_poison (const svalue *sval, + tree expr, + region_model_context *ctxt) const +{ + if (!ctxt) + return sval; + + if (const poisoned_svalue *poisoned_sval = sval->dyn_cast_poisoned_svalue ()) + { + /* If we have an SSA name for a temporary, we don't want to print + ''. + Poisoned values are shared by type, and so we can't reconstruct + the tree other than via the def stmts, using + fixup_tree_for_diagnostic. */ + tree diag_arg = fixup_tree_for_diagnostic (expr); + enum poison_kind pkind = poisoned_sval->get_poison_kind (); + if (ctxt->warn (new poisoned_value_diagnostic (diag_arg, pkind))) + { + /* We only want to report use of a poisoned value at the first + place it gets used; return an unknown value to avoid generating + a chain of followup warnings. */ + sval = m_mgr->get_or_create_unknown_svalue (sval->get_type ()); + } + + return sval; + } + + return sval; +} + /* Update this model for the ASSIGN stmt, using CTXT to report any diagnostics. */ @@ -798,6 +868,8 @@ region_model::on_assignment (const gassign *assign, region_model_context *ctxt) for some SVALUE. */ if (const svalue *sval = get_gassign_result (assign, ctxt)) { + tree expr = get_diagnostic_tree_for_gassign (assign); + check_for_poison (sval, expr, ctxt); set_value (lhs_reg, sval, ctxt); return; } @@ -863,6 +935,109 @@ region_model::on_assignment (const gassign *assign, region_model_context *ctxt) } } +/* A pending_diagnostic subclass for implementing "__analyzer_dump_path". */ + +class dump_path_diagnostic + : public pending_diagnostic_subclass +{ +public: + bool emit (rich_location *richloc) FINAL OVERRIDE + { + inform (richloc, "path"); + return true; + } + + const char *get_kind () const FINAL OVERRIDE { return "dump_path_diagnostic"; } + + bool operator== (const dump_path_diagnostic &) const + { + return true; + } +}; + +/* Handle the pre-sm-state part of STMT, modifying this object in-place. + Write true to *OUT_TERMINATE_PATH if the path should be terminated. + Write true to *OUT_UNKNOWN_SIDE_EFFECTS if the stmt has unknown + side effects. */ + +void +region_model::on_stmt_pre (const gimple *stmt, + bool *out_terminate_path, + bool *out_unknown_side_effects, + region_model_context *ctxt) +{ + switch (gimple_code (stmt)) + { + default: + /* No-op for now. */ + break; + + case GIMPLE_ASSIGN: + { + const gassign *assign = as_a (stmt); + on_assignment (assign, ctxt); + } + break; + + case GIMPLE_ASM: + /* No-op for now. */ + break; + + case GIMPLE_CALL: + { + /* Track whether we have a gcall to a function that's not recognized by + anything, for which we don't have a function body, or for which we + don't know the fndecl. */ + const gcall *call = as_a (stmt); + + /* Debugging/test support. */ + if (is_special_named_call_p (call, "__analyzer_describe", 2)) + impl_call_analyzer_describe (call, ctxt); + else if (is_special_named_call_p (call, "__analyzer_dump_capacity", 1)) + impl_call_analyzer_dump_capacity (call, ctxt); + else if (is_special_named_call_p (call, "__analyzer_dump_path", 0)) + { + /* Handle the builtin "__analyzer_dump_path" by queuing a + diagnostic at this exploded_node. */ + ctxt->warn (new dump_path_diagnostic ()); + } + else if (is_special_named_call_p (call, "__analyzer_dump_region_model", + 0)) + { + /* Handle the builtin "__analyzer_dump_region_model" by dumping + the region model's state to stderr. */ + dump (false); + } + else if (is_special_named_call_p (call, "__analyzer_eval", 1)) + impl_call_analyzer_eval (call, ctxt); + else if (is_special_named_call_p (call, "__analyzer_break", 0)) + { + /* Handle the builtin "__analyzer_break" by triggering a + breakpoint. */ + /* TODO: is there a good cross-platform way to do this? */ + raise (SIGINT); + } + else if (is_special_named_call_p (call, + "__analyzer_dump_exploded_nodes", + 1)) + { + /* This is handled elsewhere. */ + } + else + *out_unknown_side_effects = on_call_pre (call, ctxt, + out_terminate_path); + } + break; + + case GIMPLE_RETURN: + { + const greturn *return_ = as_a (stmt); + on_return (return_, ctxt); + } + break; + } +} + /* Update this model for the CALL stmt, using CTXT to report any diagnostics - the first half. @@ -885,6 +1060,22 @@ region_model::on_call_pre (const gcall *call, region_model_context *ctxt, bool unknown_side_effects = false; + /* Some of the cases below update the lhs of the call based on the + return value, but not all. Provide a default value, which may + get overwritten below. */ + if (tree lhs = gimple_call_lhs (call)) + { + const region *lhs_region = get_lvalue (lhs, ctxt); + if (TREE_CODE (lhs) == SSA_NAME) + { + const svalue *sval + = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (lhs), call, + lhs_region); + purge_state_involving (sval, ctxt); + set_value (lhs_region, sval, ctxt); + } + } + if (gimple_call_internal_p (call)) { switch (gimple_call_internal_fn (call)) @@ -994,6 +1185,17 @@ region_model::on_call_pre (const gcall *call, region_model_context *ctxt, else unknown_side_effects = true; } + else if (is_named_call_p (callee_fndecl, "fgets", call, 3) + || is_named_call_p (callee_fndecl, "fgets_unlocked", call, 3)) + { + impl_call_fgets (cd); + return false; + } + else if (is_named_call_p (callee_fndecl, "fread", call, 4)) + { + impl_call_fread (cd); + return false; + } else if (is_named_call_p (callee_fndecl, "getchar", call, 0)) { /* No side-effects (tracking stream state is out-of-scope @@ -1029,19 +1231,6 @@ region_model::on_call_pre (const gcall *call, region_model_context *ctxt, else unknown_side_effects = true; - /* Some of the above cases update the lhs of the call based on the - return value. If we get here, it hasn't been done yet, so do that - now. */ - if (tree lhs = gimple_call_lhs (call)) - { - const region *lhs_region = get_lvalue (lhs, ctxt); - if (TREE_CODE (lhs) == SSA_NAME) - { - const svalue *sval = m_mgr->get_or_create_initial_value (lhs_region); - set_value (lhs_region, sval, ctxt); - } - } - return unknown_side_effects; } @@ -1090,6 +1279,38 @@ region_model::on_call_post (const gcall *call, handle_unrecognized_call (call, ctxt); } +/* Purge state involving SVAL from this region_model, using CTXT + (if non-NULL) to purge other state in a program_state. + + For example, if we're at the def-stmt of an SSA name, then we need to + purge any state for svalues that involve that SSA name. This avoids + false positives in loops, since a symbolic value referring to the + SSA name will be referring to the previous value of that SSA name. + + For example, in: + while ((e = hashmap_iter_next(&iter))) { + struct oid2strbuf *e_strbuf = (struct oid2strbuf *)e; + free (e_strbuf->value); + } + at the def-stmt of e_8: + e_8 = hashmap_iter_next (&iter); + we should purge the "freed" state of: + INIT_VAL(CAST_REG(‘struct oid2strbuf’, (*INIT_VAL(e_8))).value) + which is the "e_strbuf->value" value from the previous iteration, + or we will erroneously report a double-free - the "e_8" within it + refers to the previous value. */ + +void +region_model::purge_state_involving (const svalue *sval, + region_model_context *ctxt) +{ + m_store.purge_state_involving (sval, m_mgr); + m_constraints->purge_state_involving (sval); + m_dynamic_extents.purge_state_involving (sval); + if (ctxt) + ctxt->purge_state_involving (sval); +} + /* Handle a call CALL to a function with unknown behavior. Traverse the regions in this model, determining what regions are @@ -1135,7 +1356,7 @@ region_model::handle_unrecognized_call (const gcall *call, } } - uncertainty_t *uncertainty = ctxt->get_uncertainty (); + uncertainty_t *uncertainty = ctxt ? ctxt->get_uncertainty () : NULL; /* Purge sm-state for the svalues that were reachable, both in non-mutable and mutable form. */ @@ -1144,14 +1365,16 @@ region_model::handle_unrecognized_call (const gcall *call, iter != reachable_regs.end_reachable_svals (); ++iter) { const svalue *sval = (*iter); - ctxt->on_unknown_change (sval, false); + if (ctxt) + ctxt->on_unknown_change (sval, false); } for (svalue_set::iterator iter = reachable_regs.begin_mutable_svals (); iter != reachable_regs.end_mutable_svals (); ++iter) { const svalue *sval = (*iter); - ctxt->on_unknown_change (sval, true); + if (ctxt) + ctxt->on_unknown_change (sval, true); if (uncertainty) uncertainty->on_mutable_sval_at_unknown_call (sval); } @@ -1603,6 +1826,8 @@ region_model::get_rvalue (path_var pv, region_model_context *ctxt) const assert_compat_types (result_sval->get_type (), TREE_TYPE (pv.m_tree)); + result_sval = check_for_poison (result_sval, pv.m_tree, ctxt); + return result_sval; } @@ -4307,7 +4532,7 @@ test_stack_frames () /* Verify that p (which was pointing at the local "x" in the popped frame) has been poisoned. */ - const svalue *new_p_sval = model.get_rvalue (p, &ctxt); + const svalue *new_p_sval = model.get_rvalue (p, NULL); ASSERT_EQ (new_p_sval->get_kind (), SK_POISONED); ASSERT_EQ (new_p_sval->dyn_cast_poisoned_svalue ()->get_poison_kind (), POISON_KIND_POPPED_STACK); @@ -5397,7 +5622,7 @@ test_alloca () /* Verify that the pointers to the alloca region are replaced by poisoned values when the frame is popped. */ model.pop_frame (NULL, NULL, &ctxt); - ASSERT_EQ (model.get_rvalue (p, &ctxt)->get_kind (), SK_POISONED); + ASSERT_EQ (model.get_rvalue (p, NULL)->get_kind (), SK_POISONED); } /* Verify that svalue::involves_p works. */ diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index cf5232d..71f6b3e 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -171,6 +171,8 @@ public: bool can_merge_with_p (const region_to_value_map &other, region_to_value_map *out) const; + void purge_state_involving (const svalue *sval); + private: hash_map_t m_hash_map; }; @@ -470,6 +472,8 @@ public: void dump_to_pp (pretty_printer *pp, bool simple) const; void dump (bool simple) const; + const svalue *get_or_create_conjured_svalue (const region *) const; + private: const gcall *m_call; region_model *m_model; @@ -518,6 +522,12 @@ class region_model void canonicalize (); bool canonicalized_p () const; + void + on_stmt_pre (const gimple *stmt, + bool *out_terminate_path, + bool *out_unknown_side_effects, + region_model_context *ctxt); + void on_assignment (const gassign *stmt, region_model_context *ctxt); const svalue *get_gassign_result (const gassign *assign, region_model_context *ctxt); @@ -527,6 +537,8 @@ class region_model bool unknown_side_effects, region_model_context *ctxt); + void purge_state_involving (const svalue *sval, region_model_context *ctxt); + /* Specific handling for on_call_pre. */ bool impl_call_alloca (const call_details &cd); void impl_call_analyzer_describe (const gcall *call, @@ -539,6 +551,8 @@ class region_model bool impl_call_calloc (const call_details &cd); bool impl_call_error (const call_details &cd, unsigned min_args, bool *out_terminate_path); + void impl_call_fgets (const call_details &cd); + void impl_call_fread (const call_details &cd); void impl_call_free (const call_details &cd); bool impl_call_malloc (const call_details &cd); void impl_call_memcpy (const call_details &cd); @@ -727,6 +741,10 @@ class region_model bool called_from_main_p () const; const svalue *get_initial_value_for_global (const region *reg) const; + const svalue *check_for_poison (const svalue *sval, + tree expr, + region_model_context *ctxt) const; + void check_for_writable_region (const region* dest_reg, region_model_context *ctxt) const; @@ -757,7 +775,9 @@ class region_model class region_model_context { public: - virtual void warn (pending_diagnostic *d) = 0; + /* Hook for clients to store pending diagnostics. + Return true if the diagnostic was stored, or false if it was deleted. */ + virtual bool warn (pending_diagnostic *d) = 0; /* Hook for clients to be notified when an SVAL that was reachable in a previous state is no longer live, so that clients can emit warnings @@ -799,6 +819,9 @@ class region_model_context virtual void on_escaped_function (tree fndecl) = 0; virtual uncertainty_t *get_uncertainty () = 0; + + /* Hook for clients to purge state involving SVAL. */ + virtual void purge_state_involving (const svalue *sval) = 0; }; /* A "do nothing" subclass of region_model_context. */ @@ -806,7 +829,7 @@ class region_model_context class noop_region_model_context : public region_model_context { public: - void warn (pending_diagnostic *) OVERRIDE {} + bool warn (pending_diagnostic *) OVERRIDE { return false; } void on_svalue_leak (const svalue *) OVERRIDE {} void on_liveness_change (const svalue_set &, const region_model *) OVERRIDE {} @@ -829,6 +852,8 @@ public: void on_escaped_function (tree) OVERRIDE {} uncertainty_t *get_uncertainty () OVERRIDE { return NULL; } + + void purge_state_involving (const svalue *sval ATTRIBUTE_UNUSED) OVERRIDE {} }; /* A subclass of region_model_context for determining if operations fail @@ -931,9 +956,10 @@ using namespace ::selftest; class test_region_model_context : public noop_region_model_context { public: - void warn (pending_diagnostic *d) FINAL OVERRIDE + bool warn (pending_diagnostic *d) FINAL OVERRIDE { m_diagnostics.safe_push (d); + return true; } unsigned get_num_diagnostics () const { return m_diagnostics.length (); } diff --git a/gcc/analyzer/region.cc b/gcc/analyzer/region.cc index 4633717..6cccb0f 100644 --- a/gcc/analyzer/region.cc +++ b/gcc/analyzer/region.cc @@ -168,6 +168,109 @@ region::maybe_get_frame_region () const return NULL; } +/* Get the memory space of this region. */ + +enum memory_space +region::get_memory_space () const +{ + const region *iter = this; + while (iter) + { + switch (iter->get_kind ()) + { + default: + break; + case RK_GLOBALS: + return MEMSPACE_GLOBALS; + case RK_CODE: + case RK_FUNCTION: + case RK_LABEL: + return MEMSPACE_CODE; + case RK_FRAME: + case RK_STACK: + case RK_ALLOCA: + return MEMSPACE_STACK; + case RK_HEAP: + case RK_HEAP_ALLOCATED: + return MEMSPACE_HEAP; + case RK_STRING: + return MEMSPACE_READONLY_DATA; + } + if (iter->get_kind () == RK_CAST) + iter = iter->dyn_cast_cast_region ()->get_original_region (); + else + iter = iter->get_parent_region (); + } + return MEMSPACE_UNKNOWN; +} + +/* Subroutine for use by region_model_manager::get_or_create_initial_value. + Return true if this region has an initial_svalue. + Return false if attempting to use INIT_VAL(this_region) should give + the "UNINITIALIZED" poison value. */ + +bool +region::can_have_initial_svalue_p () const +{ + const region *base_reg = get_base_region (); + + /* Check for memory spaces that are uninitialized by default. */ + enum memory_space mem_space = base_reg->get_memory_space (); + switch (mem_space) + { + default: + gcc_unreachable (); + case MEMSPACE_UNKNOWN: + case MEMSPACE_CODE: + case MEMSPACE_GLOBALS: + case MEMSPACE_READONLY_DATA: + /* Such regions have initial_svalues. */ + return true; + + case MEMSPACE_HEAP: + /* Heap allocations are uninitialized by default. */ + return false; + + case MEMSPACE_STACK: + if (tree decl = base_reg->maybe_get_decl ()) + { + /* See the assertion in frame_region::get_region_for_local for the + tree codes we need to handle here. */ + switch (TREE_CODE (decl)) + { + default: + gcc_unreachable (); + + case PARM_DECL: + /* Parameters have initial values. */ + return true; + + case VAR_DECL: + case RESULT_DECL: + /* Function locals don't have initial values. */ + return false; + + case SSA_NAME: + { + tree ssa_name = decl; + /* SSA names that are the default defn of a PARM_DECL + have initial_svalues; other SSA names don't. */ + if (SSA_NAME_IS_DEFAULT_DEF (ssa_name) + && SSA_NAME_VAR (ssa_name) + && TREE_CODE (SSA_NAME_VAR (ssa_name)) == PARM_DECL) + return true; + else + return false; + } + } + } + + /* If we have an on-stack region that isn't associated with a decl + or SSA name, then we have VLA/alloca, which is uninitialized. */ + return false; + } +} + /* If this region is a decl_region, return the decl. Otherwise return NULL. */ @@ -584,6 +687,20 @@ region::non_null_p () const } } +/* Return true iff this region is defined in terms of SVAL. */ + +bool +region::involves_p (const svalue *sval) const +{ + if (const symbolic_region *symbolic_reg = dyn_cast_symbolic_region ()) + { + if (symbolic_reg->get_pointer ()->involves_p (sval)) + return true; + } + + return false; +} + /* Comparator for trees to impose a deterministic ordering on T1 and T2. */ diff --git a/gcc/analyzer/region.h b/gcc/analyzer/region.h index 353d5c4..a17e73c 100644 --- a/gcc/analyzer/region.h +++ b/gcc/analyzer/region.h @@ -25,6 +25,18 @@ along with GCC; see the file COPYING3. If not see namespace ana { +/* An enum for identifying different spaces within memory. */ + +enum memory_space +{ + MEMSPACE_UNKNOWN, + MEMSPACE_CODE, + MEMSPACE_GLOBALS, + MEMSPACE_STACK, + MEMSPACE_HEAP, + MEMSPACE_READONLY_DATA +}; + /* An enum for discriminating between the different concrete subclasses of region. */ @@ -123,6 +135,8 @@ public: bool base_region_p () const; bool descendent_of_p (const region *elder) const; const frame_region *maybe_get_frame_region () const; + enum memory_space get_memory_space () const; + bool can_have_initial_svalue_p () const; tree maybe_get_decl () const; @@ -141,6 +155,8 @@ public: static int cmp_ptr_ptr (const void *, const void *); + bool involves_p (const svalue *sval) const; + region_offset get_offset () const; /* Attempt to get the size of this region as a concrete number of bytes. diff --git a/gcc/analyzer/sm-malloc.cc b/gcc/analyzer/sm-malloc.cc index 40e64b3..9707a68 100644 --- a/gcc/analyzer/sm-malloc.cc +++ b/gcc/analyzer/sm-malloc.cc @@ -1198,6 +1198,25 @@ public: funcname, ev.m_expr); } + /* Implementation of pending_diagnostic::supercedes_p for + use_after_free. + + We want use-after-free to supercede use-of-unitialized-value, + so that if we have these at the same stmt, we don't emit + a use-of-uninitialized, just the use-after-free. + (this is because we fully purge information about freed + buffers when we free them to avoid state explosions, so + that if they are accessed after the free, it looks like + they are uninitialized). */ + + bool supercedes_p (const pending_diagnostic &other) const FINAL OVERRIDE + { + if (other.use_of_uninit_p ()) + return true; + + return false; + } + private: diagnostic_event_id_t m_free_event; const deallocator *m_deallocator; diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc index a65c741..0042a20 100644 --- a/gcc/analyzer/store.cc +++ b/gcc/analyzer/store.cc @@ -1316,6 +1316,38 @@ binding_cluster::mark_region_as_unknown (store_manager *mgr, bind (mgr, reg, sval); } +/* Purge state involving SVAL. */ + +void +binding_cluster::purge_state_involving (const svalue *sval, + region_model_manager *sval_mgr) +{ + auto_vec to_remove; + for (auto iter : m_map) + { + const binding_key *iter_key = iter.first; + if (const symbolic_binding *symbolic_key + = iter_key->dyn_cast_symbolic_binding ()) + { + const region *reg = symbolic_key->get_region (); + if (reg->involves_p (sval)) + to_remove.safe_push (iter_key); + } + const svalue *iter_sval = iter.second; + if (iter_sval->involves_p (sval)) + { + const svalue *new_sval + = sval_mgr->get_or_create_unknown_svalue (iter_sval->get_type ()); + m_map.put (iter_key, new_sval); + } + } + for (auto iter : to_remove) + { + m_map.remove (iter); + m_touched = true; + } +} + /* Get any SVAL bound to REG within this cluster via kind KIND, without checking parent regions of REG. */ @@ -2447,6 +2479,29 @@ store::mark_region_as_unknown (store_manager *mgr, const region *reg, cluster->mark_region_as_unknown (mgr, reg, uncertainty); } +/* Purge state involving SVAL. */ + +void +store::purge_state_involving (const svalue *sval, + region_model_manager *sval_mgr) +{ + auto_vec base_regs_to_purge; + for (auto iter : m_cluster_map) + { + const region *base_reg = iter.first; + if (base_reg->involves_p (sval)) + base_regs_to_purge.safe_push (base_reg); + else + { + binding_cluster *cluster = iter.second; + cluster->purge_state_involving (sval, sval_mgr); + } + } + + for (auto iter : base_regs_to_purge) + purge_cluster (iter); +} + /* Get the cluster for BASE_REG, or NULL (const version). */ const binding_cluster * diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h index 2ac2923..bc58694 100644 --- a/gcc/analyzer/store.h +++ b/gcc/analyzer/store.h @@ -198,6 +198,7 @@ private: class byte_range; class concrete_binding; +class symbolic_binding; /* Abstract base class for describing ranges of bits within a binding_map that can have svalues bound to them. */ @@ -220,6 +221,8 @@ public: virtual const concrete_binding *dyn_cast_concrete_binding () const { return NULL; } + virtual const symbolic_binding *dyn_cast_symbolic_binding () const + { return NULL; } }; /* A concrete range of bits. */ @@ -420,6 +423,9 @@ public: void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE; + const symbolic_binding *dyn_cast_symbolic_binding () const FINAL OVERRIDE + { return this; } + const region *get_region () const { return m_region; } static int cmp_ptr_ptr (const void *, const void *); @@ -563,6 +569,8 @@ public: void zero_fill_region (store_manager *mgr, const region *reg); void mark_region_as_unknown (store_manager *mgr, const region *reg, uncertainty_t *uncertainty); + void purge_state_involving (const svalue *sval, + region_model_manager *sval_mgr); const svalue *get_binding (store_manager *mgr, const region *reg) const; const svalue *get_binding_recursive (store_manager *mgr, @@ -697,6 +705,8 @@ public: void zero_fill_region (store_manager *mgr, const region *reg); void mark_region_as_unknown (store_manager *mgr, const region *reg, uncertainty_t *uncertainty); + void purge_state_involving (const svalue *sval, + region_model_manager *sval_mgr); const binding_cluster *get_cluster (const region *base_reg) const; binding_cluster *get_cluster (const region *base_reg); diff --git a/gcc/analyzer/svalue.cc b/gcc/analyzer/svalue.cc index 70c23f0..22da769 100644 --- a/gcc/analyzer/svalue.cc +++ b/gcc/analyzer/svalue.cc @@ -158,6 +158,13 @@ svalue::can_merge_p (const svalue *other, || (other->get_kind () == SK_UNMERGEABLE)) return NULL; + /* Reject attempts to merge poisoned svalues with other svalues + (either non-poisoned, or other kinds of poison), so that e.g. + we identify paths in which a variable is conditionally uninitialized. */ + if (get_kind () == SK_POISONED + || other->get_kind () == SK_POISONED) + return NULL; + /* Reject attempts to merge NULL pointers with not-NULL-pointers. */ if (POINTER_TYPE_P (get_type ())) { @@ -516,6 +523,12 @@ public: m_found = true; } + void visit_conjured_svalue (const conjured_svalue *candidate) + { + if (candidate == m_needle) + m_found = true; + } + bool found_p () const { return m_found; } private: @@ -528,8 +541,9 @@ private: bool svalue::involves_p (const svalue *other) const { - /* Currently only implemented for initial_svalue. */ - gcc_assert (other->get_kind () == SK_INITIAL); + /* Currently only implemented for these kinds. */ + gcc_assert (other->get_kind () == SK_INITIAL + || other->get_kind () == SK_CONJURED); involvement_visitor v (other); accept (&v); @@ -811,6 +825,8 @@ poison_kind_to_str (enum poison_kind kind) { default: gcc_unreachable (); + case POISON_KIND_UNINIT: + return "uninit"; case POISON_KIND_FREED: return "freed"; case POISON_KIND_POPPED_STACK: @@ -847,6 +863,18 @@ poisoned_svalue::accept (visitor *v) const v->visit_poisoned_svalue (this); } +/* Implementation of svalue::maybe_fold_bits_within vfunc + for poisoned_svalue. */ + +const svalue * +poisoned_svalue::maybe_fold_bits_within (tree type, + const bit_range &, + region_model_manager *mgr) const +{ + /* Bits within a poisoned value are also poisoned. */ + return mgr->get_or_create_poisoned_svalue (m_kind, type); +} + /* class setjmp_svalue's implementation is in engine.cc, so that it can use the declaration of exploded_node. */ diff --git a/gcc/analyzer/svalue.h b/gcc/analyzer/svalue.h index 5552fcf..54b97f8 100644 --- a/gcc/analyzer/svalue.h +++ b/gcc/analyzer/svalue.h @@ -324,6 +324,9 @@ public: enum poison_kind { + /* For use to describe uninitialized memory. */ + POISON_KIND_UNINIT, + /* For use to describe freed memory. */ POISON_KIND_FREED, @@ -378,6 +381,11 @@ public: void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE; void accept (visitor *v) const FINAL OVERRIDE; + const svalue * + maybe_fold_bits_within (tree type, + const bit_range &subrange, + region_model_manager *mgr) const FINAL OVERRIDE; + enum poison_kind get_poison_kind () const { return m_kind; } private: diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 62e165f..b16176e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -9234,6 +9234,7 @@ Enabling this option effectively enables the following warnings: -Wanalyzer-tainted-array-index @gol -Wanalyzer-unsafe-call-within-signal-handler @gol -Wanalyzer-use-after-free @gol +-Wanalyzer-use-of-uninitialized-value @gol -Wanalyzer-use-of-pointer-in-stale-stack-frame @gol -Wanalyzer-write-to-const @gol -Wanalyzer-write-to-string-literal @gol @@ -9478,6 +9479,15 @@ detects an attempt to write through a pointer to a string literal. However, the analyzer does not prioritize detection of such paths, so false negatives are more likely relative to other warnings. +@item -Wno-analyzer-use-of-uninitialized-value +@opindex Wanalyzer-use-of-uninitialized-value +@opindex Wno-analyzer-use-of-uninitialized-value +This warning requires @option{-fanalyzer}, which enables it; use +@option{-Wno-analyzer-use-of-uninitialized-value} to disable it. + +This diagnostic warns for paths through the code in which an uninitialized +value is used. + @end table Pertinent parameters for controlling the exploration are: diff --git a/gcc/testsuite/g++.dg/analyzer/pr93212.C b/gcc/testsuite/g++.dg/analyzer/pr93212.C index 1029e8d..41507e2 100644 --- a/gcc/testsuite/g++.dg/analyzer/pr93212.C +++ b/gcc/testsuite/g++.dg/analyzer/pr93212.C @@ -4,8 +4,8 @@ auto lol() { int aha = 3; - return [&aha] { - return aha; // { dg-warning "dereferencing pointer '.*' to within stale stack frame" } + return [&aha] { // { dg-warning "dereferencing pointer '.*' to within stale stack frame" } + return aha; }; /* TODO: may be worth special-casing the reporting of dangling references from lambdas, to highlight the declaration, and maybe fix diff --git a/gcc/testsuite/g++.dg/analyzer/pr94011.C b/gcc/testsuite/g++.dg/analyzer/pr94011.C index 2642aa4..81c0acd 100644 --- a/gcc/testsuite/g++.dg/analyzer/pr94011.C +++ b/gcc/testsuite/g++.dg/analyzer/pr94011.C @@ -1,5 +1,5 @@ // { dg-do compile { target c++11 } } -// { dg-additional-options "-O1" } +// { dg-additional-options "-O1 -Wno-analyzer-use-of-uninitialized-value" } template DV vu (DV j4) diff --git a/gcc/testsuite/g++.dg/analyzer/pr94503.C b/gcc/testsuite/g++.dg/analyzer/pr94503.C index 9432ac4..ecf7121 100644 --- a/gcc/testsuite/g++.dg/analyzer/pr94503.C +++ b/gcc/testsuite/g++.dg/analyzer/pr94503.C @@ -1,3 +1,5 @@ +// { dg-additional-options "-Wno-analyzer-use-of-uninitialized-value" } + template class allocator { public: allocator(const allocator &); diff --git a/gcc/testsuite/gcc.dg/analyzer/clobbers-1.c b/gcc/testsuite/gcc.dg/analyzer/clobbers-1.c index 824dbd4..6400f84 100644 --- a/gcc/testsuite/gcc.dg/analyzer/clobbers-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/clobbers-1.c @@ -25,9 +25,8 @@ void test_1 (void) __analyzer_dump_exploded_nodes (0); /* { dg-warning "1 processed enode" } */ } -void test_2 (void) +void test_2 (struct foo f) { - struct foo f; f.i = 42; if (f.j) __analyzer_eval (f.j); /* { dg-warning "TRUE" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/data-model-1.c b/gcc/testsuite/gcc.dg/analyzer/data-model-1.c index 34932da..908d999 100644 --- a/gcc/testsuite/gcc.dg/analyzer/data-model-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/data-model-1.c @@ -137,7 +137,7 @@ void test_11 (void) /* alloca. */ -void test_12 (void) +int test_12 (void) { void *p = __builtin_alloca (256); void *q = __builtin_alloca (256); @@ -145,14 +145,14 @@ void test_12 (void) /* alloca results should be unique. */ __analyzer_eval (p == q); /* { dg-warning "FALSE" } */ - // FIXME: complain about uses of poisoned values + return *(int *)p; /* { dg-warning "use of uninitialized value '\\*\\(int \\*\\)p" } */ } /* Use of uninit value. */ int test_12a (void) { int i; - return i; // FIXME: do we see the return stmt? + return i; /* { dg-warning "use of uninitialized value 'i'" } */ } void test_12b (void *p, void *q) @@ -165,9 +165,11 @@ int test_12c (void) int i; int j; - j = i; // FIXME: should complain about this + j = i; /* { dg-warning "use of uninitialized value 'i'" } */ - return j; + /* We should not emit followup warnings after the first warning about + an uninitialized value. */ + return j; /* { dg-bogus "use of uninitialized value" } */ } struct coord @@ -348,7 +350,9 @@ void test_19 (void) { int i, j; /* Compare two uninitialized locals. */ - __analyzer_eval (i == j); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (i == j); /* { dg-warning "UNKNOWN" "unknown " } */ + /* { dg-warning "use of uninitialized value 'i'" "uninit i" { target *-*-* } .-1 } */ + /* { dg-warning "use of uninitialized value 'j'" "uninit j" { target *-*-* } .-2 } */ } void test_20 (int i, int j) @@ -649,8 +653,10 @@ void test_29b (void) __analyzer_eval (p[9].x == 109024); /* { dg-warning "TRUE" } */ __analyzer_eval (p[9].y == 109025); /* { dg-warning "TRUE" } */ - __analyzer_eval (p[10].x == 0); /* { dg-warning "UNKNOWN" } */ - __analyzer_eval (p[10].y == 0); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (p[10].x == 0); /* { dg-warning "UNKNOWN" "unknown" } */ + /* { dg-warning "use of uninitialized value 'p\\\[10\\\].x'" "uninit" { target *-*-* } .-1 } */ + __analyzer_eval (p[10].y == 0); /* { dg-warning "UNKNOWN" "unknown" } */ + /* { dg-warning "use of uninitialized value 'p\\\[10\\\].y'" "uninit" { target *-*-* } .-1 } */ q = &p[7]; @@ -698,8 +704,10 @@ void test_29c (int len) __analyzer_eval (p[9].x == 109024); /* { dg-warning "TRUE" } */ __analyzer_eval (p[9].y == 109025); /* { dg-warning "TRUE" } */ - __analyzer_eval (p[10].x == 0); /* { dg-warning "UNKNOWN" } */ - __analyzer_eval (p[10].y == 0); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (p[10].x == 0); /* { dg-warning "UNKNOWN" "unknown" } */ + /* { dg-warning "use of uninitialized value '\\*p\\\[10\\\].x'" "uninit" { target *-*-* } .-1 } */ + __analyzer_eval (p[10].y == 0); /* { dg-warning "UNKNOWN" "unknown" } */ + /* { dg-warning "use of uninitialized value '\\*p\\\[10\\\].y'" "uninit" { target *-*-* } .-1 } */ q = &p[7]; @@ -811,7 +819,7 @@ void test_36 (int i) int test_37 (void) { int *ptr; - return *ptr; /* { dg-warning "use of uninitialized value 'ptr'" "uninit-warning-removed" { xfail *-*-* } } */ + return *ptr; /* { dg-warning "use of uninitialized value 'ptr'" } */ } /* Write through uninitialized pointer. */ @@ -819,7 +827,7 @@ int test_37 (void) void test_37a (int i) { int *ptr; - *ptr = i; /* { dg-warning "use of uninitialized value 'ptr'" "uninit-warning-removed" { xfail *-*-* } } */ + *ptr = i; /* { dg-warning "use of uninitialized value 'ptr'" } */ } // TODO: the various other ptr deref poisonings diff --git a/gcc/testsuite/gcc.dg/analyzer/data-model-20.c b/gcc/testsuite/gcc.dg/analyzer/data-model-20.c index 8fdbb6b..ff65883 100644 --- a/gcc/testsuite/gcc.dg/analyzer/data-model-20.c +++ b/gcc/testsuite/gcc.dg/analyzer/data-model-20.c @@ -17,7 +17,7 @@ test (int n) { for (; i >= 0; i++) { free(arr[i]); /* { dg-bogus "double-'free'" } */ } - free(arr); + free(arr); /* { dg-warning "leak" } */ return NULL; } } diff --git a/gcc/testsuite/gcc.dg/analyzer/explode-2.c b/gcc/testsuite/gcc.dg/analyzer/explode-2.c index 70d8fec..3b987e1 100644 --- a/gcc/testsuite/gcc.dg/analyzer/explode-2.c +++ b/gcc/testsuite/gcc.dg/analyzer/explode-2.c @@ -2,9 +2,11 @@ independently, so the total combined number of states at any program point within the loop is NUM_VARS * NUM_STATES. - Set the limits high enough that we can fully explore this. */ + However, due to the way the analyzer represents heap-allocated regions + this never terminates, eventually hitting the complexity limit + (PR analyzer/93695). */ -/* { dg-additional-options "--param analyzer-max-enodes-per-program-point=200 --param analyzer-bb-explosion-factor=50" } */ +/* { dg-additional-options "-Wno-analyzer-too-complex -Wno-analyzer-malloc-leak" } */ #include @@ -12,35 +14,35 @@ extern int get (void); void test (void) { - void *p0, *p1, *p2, *p3; + void *p0 = NULL, *p1 = NULL, *p2 = NULL, *p3 = NULL; while (get ()) { switch (get ()) { default: case 0: - p0 = malloc (16); /* { dg-warning "leak" } */ + p0 = malloc (16); /* { dg-warning "leak" "" { xfail *-*-* } } */ break; case 1: free (p0); /* { dg-warning "double-'free' of 'p0'" "" { xfail *-*-* } } */ break; case 2: - p1 = malloc (16); /* { dg-warning "leak" } */ + p1 = malloc (16); /* { dg-warning "leak" "" { xfail *-*-* } } */ break; case 3: free (p1); /* { dg-warning "double-'free' of 'p1'" "" { xfail *-*-* } } */ break; case 4: - p2 = malloc (16); /* { dg-warning "leak" } */ + p2 = malloc (16); /* { dg-warning "leak" "" { xfail *-*-* } } */ break; case 5: free (p2); /* { dg-warning "double-'free' of 'p2'" "" { xfail *-*-* } } */ break; case 6: - p3 = malloc (16); /* { dg-warning "leak" } */ + p3 = malloc (16); /* { dg-warning "leak" "" { xfail *-*-* } } */ break; case 7: free (p3); /* { dg-warning "double-'free' of 'p3'" "" { xfail *-*-* } } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/explode-2a.c b/gcc/testsuite/gcc.dg/analyzer/explode-2a.c index 126407f..f60354c 100644 --- a/gcc/testsuite/gcc.dg/analyzer/explode-2a.c +++ b/gcc/testsuite/gcc.dg/analyzer/explode-2a.c @@ -8,13 +8,13 @@ extern int get (void); void test (void) { - void *p0, *p1, *p2, *p3; + void *p0 = NULL, *p1 = NULL, *p2 = NULL, *p3 = NULL; /* Due to not purging constraints on SSA names within loops (PR analyzer/101068), the analyzer effectively treats the original explode-2.c as this code. */ int a = get (); int b = get (); - while (a) + while (a) /* { dg-warning "leak" } */ { switch (b) { diff --git a/gcc/testsuite/gcc.dg/analyzer/fgets-1.c b/gcc/testsuite/gcc.dg/analyzer/fgets-1.c new file mode 100644 index 0000000..e93d24c --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/fgets-1.c @@ -0,0 +1,31 @@ +/* { dg-do "compile" } */ + +#define NULL ((void *) 0) +typedef struct _IO_FILE FILE; + +extern char *fgets(char *__restrict __s, int __n, + FILE *__restrict __stream); +extern char *fgets_unlocked(char *__restrict __s, int __n, + FILE *__restrict __stream); + +char +test_1 (FILE *fp) +{ + char buf[400]; + + if (fgets (buf, sizeof buf, fp) == NULL) + return 0; + + return buf[0]; +} + +char +test_2 (FILE *fp) +{ + char buf[400]; + + if (fgets_unlocked (buf, sizeof buf, fp) == NULL) + return 0; + + return buf[0]; +} diff --git a/gcc/testsuite/gcc.dg/analyzer/fread-1.c b/gcc/testsuite/gcc.dg/analyzer/fread-1.c new file mode 100644 index 0000000..593cb7f --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/fread-1.c @@ -0,0 +1,13 @@ +/* { dg-additional-options "-fanalyzer-checker=taint" } */ + +typedef __SIZE_TYPE__ size_t; + +extern size_t fread (void *, size_t, size_t, void *); + +int +test_1 (void *fp) +{ + int i; + fread (&i, sizeof (i), 1, fp); + return i; +} diff --git a/gcc/testsuite/gcc.dg/analyzer/malloc-1.c b/gcc/testsuite/gcc.dg/analyzer/malloc-1.c index 448b8558..df2fc9c 100644 --- a/gcc/testsuite/gcc.dg/analyzer/malloc-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/malloc-1.c @@ -204,8 +204,7 @@ void test_16 (void) bar (); fail: - free (q); /* { dg-warning "free of uninitialized 'q'" "" { xfail *-*-* } } */ - /* TODO(xfail): implement uninitialized detection. */ + free (q); /* { dg-warning "use of uninitialized value 'q'" } */ free (p); } @@ -459,8 +458,8 @@ int * test_40 (int i) { int *p = (int*)malloc(sizeof(int*)); - i = *p; /* { dg-warning "dereference of possibly-NULL 'p' \\\[CWE-690\\\]" } */ - /* TODO: (it's also uninitialized) */ + i = *p; /* { dg-warning "dereference of possibly-NULL 'p' \\\[CWE-690\\\]" "possibly-null" } */ + /* { dg-warning "use of uninitialized value '\\*p'" "uninit" { target *-*-*} .-1 } */ return p; } diff --git a/gcc/testsuite/gcc.dg/analyzer/memset-CVE-2017-18549-1.c b/gcc/testsuite/gcc.dg/analyzer/memset-CVE-2017-18549-1.c index 9dd1139..de9b5e3 100644 --- a/gcc/testsuite/gcc.dg/analyzer/memset-CVE-2017-18549-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/memset-CVE-2017-18549-1.c @@ -37,6 +37,8 @@ struct aac_srb_reply #define ST_OK 0 #define SRB_STATUS_SUCCESS 0x01 +extern void check_uninit (u8 v); + /* Adapted from drivers/scsi/aacraid/commctrl.c */ static int aac_send_raw_srb(/* [...snip...] */) @@ -66,10 +68,8 @@ static int aac_send_raw_srb(/* [...snip...] */) __analyzer_eval (reply.sense_data_size == 0); /* { dg-warning "TRUE" } */ __analyzer_eval (reply.sense_data[0] == 0); /* { dg-warning "TRUE" } */ __analyzer_eval (reply.sense_data[AAC_SENSE_BUFFERSIZE - 1] == 0); /* { dg-warning "TRUE" } */ - /* TODO: the following should be detected as uninitialized, when - that diagnostic is reimplemented. */ - __analyzer_eval (reply.padding[0] == 0); /* { dg-warning "UNKNOWN" } */ - __analyzer_eval (reply.padding[1] == 0); /* { dg-warning "UNKNOWN" } */ + check_uninit (reply.padding[0]); /* { dg-warning "uninitialized value" } */ + check_uninit (reply.padding[1]); /* { dg-warning "uninitialized value" } */ } static int aac_send_raw_srb_fixed(/* [...snip...] */) diff --git a/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias-feasibility.c b/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias-feasibility.c index 1a34d05..c7b49d2 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias-feasibility.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias-feasibility.c @@ -30,6 +30,7 @@ typedef __SIZE_TYPE__ size_t; typedef struct _IO_FILE FILE; extern FILE *fopen (const char *__restrict __filename, const char *__restrict __modes); +extern size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); extern int fclose (FILE *__stream); extern int isspace (int) __attribute__((__nothrow__, __leaf__)); @@ -50,6 +51,12 @@ read_alias_file (const char *fname, int fname_len) if (fp == NULL) return 0; + if (fread (buf, sizeof buf, 1, fp) != 1) + { + fclose (fp); + return 0; + } + cp = buf; /* Ignore leading white space. */ diff --git a/gcc/testsuite/gcc.dg/analyzer/pr94047.c b/gcc/testsuite/gcc.dg/analyzer/pr94047.c index 5107ec0..d13da3e 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr94047.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr94047.c @@ -13,7 +13,7 @@ void foo (void) { struct list l; - tlist t = l; + tlist t = l; /* { dg-warning "use of uninitialized value 'l'" } */ for (;;) bar (&t); } diff --git a/gcc/testsuite/gcc.dg/analyzer/pr94851-2.c b/gcc/testsuite/gcc.dg/analyzer/pr94851-2.c index 6094721..b837451 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr94851-2.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr94851-2.c @@ -45,7 +45,7 @@ int pamark(void) { if (curbp->b_amark == (AMARK *)NULL) curbp->b_amark = p; else - last->m_next = p; + last->m_next = p; /* { dg-warning "dereference of NULL 'last'" } */ } p->m_name = (char)c; /* { dg-bogus "leak of 'p'" "bogus leak" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/pr96841.c b/gcc/testsuite/gcc.dg/analyzer/pr96841.c index 8546661..c766582 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr96841.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr96841.c @@ -10,10 +10,8 @@ void th (int *); void -bv (__SIZE_TYPE__ ny) +bv (__SIZE_TYPE__ ny, int ***mf) { - int ***mf; - while (l8 ()) { *mf = 0; diff --git a/gcc/testsuite/gcc.dg/analyzer/pr98628.c b/gcc/testsuite/gcc.dg/analyzer/pr98628.c index e2fa778..fa0ca96 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr98628.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr98628.c @@ -7,8 +7,7 @@ struct chanset_t { struct chanset_t *next; char dname[]; }; -void help_subst() { - char *writeidx; +void help_subst(char *writeidx) { for (;; help_subst_chan = *help_subst_chan_0_0) { foo(help_subst_chan.next->dname); if (help_subst_chan_0_0) { diff --git a/gcc/testsuite/gcc.dg/analyzer/pr99042.c b/gcc/testsuite/gcc.dg/analyzer/pr99042.c index c3d124f..f28a9de 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr99042.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr99042.c @@ -29,8 +29,8 @@ int test_3 (void) if ((p->file = fopen("test.txt", "w")) == NULL) return 1; unknown_fn (); - return 0; /* { dg-warning "leak" } */ -} + return 0; +} /* { dg-warning "leak" } */ int test_4 (void) { @@ -38,8 +38,8 @@ int test_4 (void) struct foo *p = &f; if ((p->file = fopen("test.txt", "w")) == NULL) return 1; - return 0; /* { dg-warning "leak" } */ -} + return 0; +} /* { dg-warning "leak" } */ int test_5 (void) { diff --git a/gcc/testsuite/gcc.dg/analyzer/symbolic-1.c b/gcc/testsuite/gcc.dg/analyzer/symbolic-1.c index feab9ce..0eba646 100644 --- a/gcc/testsuite/gcc.dg/analyzer/symbolic-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/symbolic-1.c @@ -11,14 +11,16 @@ void test_1 (char a, char b, char c, char d, char e, char f, __analyzer_eval (arr[2] == a); /* { dg-warning "TRUE" } */ __analyzer_eval (arr[3] == b); /* { dg-warning "TRUE" } */ - __analyzer_eval (arr[4]); /* { dg-warning "UNKNOWN" } */ // TODO: report uninit + __analyzer_eval (arr[4]); /* { dg-warning "UNKNOWN" "unknown" } */ + /* { dg-warning "use of uninitialized value 'arr\\\[4\\\]'" "uninit" { target *-*-* } .-1 } */ /* Replace one concrete binding's value with a different value. */ arr[3] = c; /* (3) */ __analyzer_eval (arr[2] == a); /* { dg-warning "TRUE" } */ __analyzer_eval (arr[3] == c); /* { dg-warning "TRUE" } */ __analyzer_eval (arr[3] == b); /* { dg-warning "UNKNOWN" } */ - __analyzer_eval (arr[4]); /* { dg-warning "UNKNOWN" } */ // TODO: report uninit + __analyzer_eval (arr[4]); /* { dg-warning "UNKNOWN" "unknown" } */ + /* { dg-warning "use of uninitialized value 'arr\\\[4\\\]'" "uninit" { target *-*-* } .-1 } */ /* Symbolic binding. */ arr[i] = d; /* (4) */ diff --git a/gcc/testsuite/gcc.dg/analyzer/symbolic-7.c b/gcc/testsuite/gcc.dg/analyzer/symbolic-7.c index 4f01367..665e0b6 100644 --- a/gcc/testsuite/gcc.dg/analyzer/symbolic-7.c +++ b/gcc/testsuite/gcc.dg/analyzer/symbolic-7.c @@ -37,8 +37,10 @@ void test_3 (int i) int arr[2]; /* Concrete reads. */ - __analyzer_eval (arr[0] == 42); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (arr[0] == 42); /* { dg-warning "UNKNOWN" "unknown" } */ + /* { dg-warning "use of uninitialized value 'arr\\\[0\\\]'" "uninit" { target *-*-* } .-1 } */ /* Symbolic read. */ - __analyzer_eval (arr[i] == 42); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (arr[i] == 42); /* { dg-warning "UNKNOWN" "unknown" } */ + /* { dg-warning "use of uninitialized value 'arr\\\[i\\\]'" "uninit" { target *-*-* } .-1 } */ } diff --git a/gcc/testsuite/gcc.dg/analyzer/torture/pr93649.c b/gcc/testsuite/gcc.dg/analyzer/torture/pr93649.c index 9d92939..314c8f3 100644 --- a/gcc/testsuite/gcc.dg/analyzer/torture/pr93649.c +++ b/gcc/testsuite/gcc.dg/analyzer/torture/pr93649.c @@ -1,3 +1,4 @@ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } { "" } } */ /* { dg-additional-options "-Wno-incompatible-pointer-types -Wno-analyzer-too-complex" } */ /* TODO: ideally we shouldn't have -Wno-analyzer-too-complex above; it appears to be needed due to the recursion. */ @@ -57,7 +58,7 @@ ts (struct dz *cx) { struct dz nt; - if (nt.r5) + if (nt.r5) /* { dg-warning "use of uninitialized value 'nt.r5'" } */ { m6 (cx); h5 (cx); diff --git a/gcc/testsuite/gcc.dg/analyzer/uninit-1.c b/gcc/testsuite/gcc.dg/analyzer/uninit-1.c new file mode 100644 index 0000000..8fcdcd6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/uninit-1.c @@ -0,0 +1,44 @@ +#include "analyzer-decls.h" + +int test_1 (void) +{ + int i; + return i; /* { dg-warning "use of uninitialized value 'i'" } */ +} + +int test_2 (void) +{ + int i; + return i * 2; /* { dg-warning "use of uninitialized value 'i'" } */ +} + +int test_3 (void) +{ + static int i; + return i; +} + +int test_4 (void) +{ + int *p; + return *p; /* { dg-warning "use of uninitialized value 'p'" } */ +} + +int test_5 (int flag, int *q) +{ + int *p; + if (flag) /* { dg-message "following 'false' branch" } */ + p = q; + + /* There should be two enodes here, + i.e. not merging the init vs non-init states. */ + __analyzer_dump_exploded_nodes (0); /* { dg-warning "2 processed enodes" } */ + + return *p; /* { dg-warning "use of uninitialized value 'p'" } */ +} + +int test_6 (int i) +{ + int arr[10]; + return arr[i]; /* { dg-warning "use of uninitialized value 'arr\\\[i\\\]'" } */ +} diff --git a/gcc/testsuite/gcc.dg/analyzer/uninit-2.c b/gcc/testsuite/gcc.dg/analyzer/uninit-2.c new file mode 100644 index 0000000..0b0b8b6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/uninit-2.c @@ -0,0 +1,14 @@ +typedef __SIZE_TYPE__ size_t; + +extern size_t strlen (const char *__s) + __attribute__ ((__nothrow__ , __leaf__)) + __attribute__ ((__pure__)) + __attribute__ ((__nonnull__ (1))); + +extern char *read_file (const char *file); + +size_t test_1 (const char *file) +{ + char *str = read_file (file); + return strlen (str); +} diff --git a/gcc/testsuite/gcc.dg/analyzer/uninit-3.c b/gcc/testsuite/gcc.dg/analyzer/uninit-3.c new file mode 100644 index 0000000..fa33e0a --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/uninit-3.c @@ -0,0 +1,36 @@ +/* Reduced from linux 5.3.11: drivers/net/wireless/ath/ath10k/usb.c */ + +/* The original file has this licence header. */ + +// SPDX-License-Identifier: ISC +/* + * Copyright (c) 2007-2011 Atheros Communications Inc. + * Copyright (c) 2011-2012,2017 Qualcomm Atheros, Inc. + * Copyright (c) 2016-2017 Erik Stromdahl + */ + +/* Adapted from include/linux/compiler_attributes.h. */ +#define __printf(a, b) __attribute__((__format__(printf, a, b))) + +/* From drivers/net/wireless/ath/ath10k/core.h. */ + +struct ath10k; + +/* From drivers/net/wireless/ath/ath10k/debug.h. */ + +enum ath10k_debug_mask { + /* [...other values removed...] */ + ATH10K_DBG_USB_BULK = 0x00080000, +}; + +extern unsigned int ath10k_debug_mask; + +__printf(3, 4) void __ath10k_dbg(struct ath10k *ar, + enum ath10k_debug_mask mask, + const char *fmt, ...); + +static void ath10k_usb_hif_tx_sg(struct ath10k *ar) +{ + if (ath10k_debug_mask & ATH10K_DBG_USB_BULK) + __ath10k_dbg(ar, ATH10K_DBG_USB_BULK, "usb bulk transmit failed: %d\n", 42); +} diff --git a/gcc/testsuite/gcc.dg/analyzer/uninit-4.c b/gcc/testsuite/gcc.dg/analyzer/uninit-4.c new file mode 100644 index 0000000..791b111 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/uninit-4.c @@ -0,0 +1,39 @@ +/* Example of interprocedural detection of an uninitialized field + in a heap-allocated struct. */ + +#include +#include "analyzer-decls.h" + +struct foo +{ + int i; + int j; + int k; +}; + +struct foo *__attribute__((noinline)) +alloc_foo (int a, int b) +{ + struct foo *p = malloc (sizeof (struct foo)); + if (!p) + return NULL; + p->i = a; + p->k = b; + return p; +} + +void test (int x, int y, int z) +{ + struct foo *p = alloc_foo (x, z); + if (!p) + return; + + __analyzer_eval (p->i == x); /* { dg-warning "TRUE" } */ + + __analyzer_eval (p->j == y); /* { dg-warning "UNKNOWN" "unknown" } */ + /* { dg-warning "use of uninitialized value '\\*p\\.j'" "uninit" { target *-*-* } .-1 } */ + + __analyzer_eval (p->k == z); /* { dg-warning "TRUE" } */ + + free (p); +} diff --git a/gcc/testsuite/gcc.dg/analyzer/uninit-pr94713.c b/gcc/testsuite/gcc.dg/analyzer/uninit-pr94713.c new file mode 100644 index 0000000..cc337dc --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/uninit-pr94713.c @@ -0,0 +1,11 @@ +void f1 (int *); +void f2 (int); + +int foo (void) +{ + int *p; + + f1 (p); /* { dg-warning "use of uninitialized value 'p'" } */ + f2 (p[0]); /* { dg-warning "use of uninitialized value 'p'" } */ + return 0; +} diff --git a/gcc/testsuite/gcc.dg/analyzer/uninit-pr94714.c b/gcc/testsuite/gcc.dg/analyzer/uninit-pr94714.c new file mode 100644 index 0000000..df07f98 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/uninit-pr94714.c @@ -0,0 +1,12 @@ +#include + +int main (void) +{ + int *p; + int i; + + p = &i; /* { dg-bogus "uninitialized" } */ + printf ("%d\n", p[0]); /* { dg-warning "use of uninitialized value '\\*p'" } */ + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/analyzer/use-after-free-2.c b/gcc/testsuite/gcc.dg/analyzer/use-after-free-2.c new file mode 100644 index 0000000..fc138ee --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/use-after-free-2.c @@ -0,0 +1,8 @@ +int test (void) +{ + int *ptr = (int *)__builtin_malloc (sizeof (int)); + *ptr = 42; /* { dg-warning "dereference of possibly-NULL 'ptr'" } */ + __builtin_free (ptr); + + return *ptr; /* { dg-warning "use after 'free' of 'ptr'" "use-after-free" } */ +} diff --git a/gcc/testsuite/gcc.dg/analyzer/use-after-free-3.c b/gcc/testsuite/gcc.dg/analyzer/use-after-free-3.c new file mode 100644 index 0000000..b19fd3d --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/use-after-free-3.c @@ -0,0 +1,12 @@ +#include + +void test_1 (int x, int y, int *out) +{ + int *ptr = (int *)malloc (sizeof (int)); + if (!ptr) + return; + *ptr = 19; + + free (ptr); + *out = *ptr; /* { dg-warning "use after 'free' of 'ptr'" } */ +} diff --git a/gcc/testsuite/gcc.dg/analyzer/zlib-3.c b/gcc/testsuite/gcc.dg/analyzer/zlib-3.c index 5faada1..57f5dcd 100644 --- a/gcc/testsuite/gcc.dg/analyzer/zlib-3.c +++ b/gcc/testsuite/gcc.dg/analyzer/zlib-3.c @@ -179,7 +179,7 @@ static int huft_build(uInt *b, uInt n, uInt s, const uInt *d, const uInt *e, f = 1 << (k - w); for (j = i >> w; j < z; j += f) - q[j] = r; + q[j] = r; /* { dg-warning "use of uninitialized value 'r.base'" } */ mask = (1 << w) - 1; while ((i & mask) != x[h]) { diff --git a/gcc/testsuite/gcc.dg/analyzer/zlib-6.c b/gcc/testsuite/gcc.dg/analyzer/zlib-6.c index 0d814c0..c8e06c6 100644 --- a/gcc/testsuite/gcc.dg/analyzer/zlib-6.c +++ b/gcc/testsuite/gcc.dg/analyzer/zlib-6.c @@ -16,15 +16,8 @@ typedef struct inflate_blocks_state { extern int inflate_flush(inflate_blocks_statef *, z_stream *, int); -int inflate_blocks(inflate_blocks_statef *s, z_stream *z, int r) { - uInt t; - uLong b; - uInt k; - Byte *p; - uInt n; - Byte *q; - uInt m; - +int inflate_blocks(inflate_blocks_statef *s, z_stream *z, int r, + uLong b, uInt k, Byte *p, uInt n, Byte *q, uInt m) { while (k < (3)) { { if (n) @@ -41,7 +34,7 @@ int inflate_blocks(inflate_blocks_statef *s, z_stream *z, int r) { return inflate_flush(s, z, r); } }; - b |= ((uLong)(n--, *p++)) << k; /* { dg-warning "use of uninitialized value" "uninit-warning-removed" { xfail *-*-* } } */ + b |= ((uLong)(n--, *p++)) << k; k += 8; } } diff --git a/gcc/testsuite/gcc.dg/analyzer/zlib-6a.c b/gcc/testsuite/gcc.dg/analyzer/zlib-6a.c new file mode 100644 index 0000000..9676e0b --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/zlib-6a.c @@ -0,0 +1,47 @@ +typedef unsigned char Byte; +typedef unsigned int uInt; +typedef unsigned long uLong; + +typedef struct z_stream_s { + Byte *next_in; + uInt avail_in; + uLong total_in; +} z_stream; + +typedef struct inflate_blocks_state { + uInt bitk; + uLong bitb; + Byte *write; +} inflate_blocks_statef; + +extern int inflate_flush(inflate_blocks_statef *, z_stream *, int); + +int inflate_blocks(inflate_blocks_statef *s, z_stream *z, int r) { + uInt t; + uLong b; + uInt k; + Byte *p; + uInt n; + Byte *q; + uInt m; + + while (k < (3)) { /* { dg-warning "use of uninitialized value 'k'" } */ + { + if (n) /* { dg-warning "use of uninitialized value 'n'" } */ + r = 0; + else { + { + s->bitb = b; /* { dg-warning "use of uninitialized value 'b'" } */ + s->bitk = k; /* { dg-warning "use of uninitialized value 'k'" } */ + z->avail_in = n; /* { dg-warning "use of uninitialized value 'n'" } */ + z->total_in += p - z->next_in; /* { dg-warning "use of uninitialized value 'p'" } */ + z->next_in = p; /* { dg-warning "use of uninitialized value 'p'" } */ + s->write = q; /* { dg-warning "use of uninitialized value 'q'" } */ + } + return inflate_flush(s, z, r); + } + }; + b |= ((uLong)(n--, *p++)) << k; /* { dg-warning "use of uninitialized value" } */ + k += 8; /* { dg-warning "use of uninitialized value 'k'" } */ + } +} diff --git a/gcc/testsuite/gfortran.dg/analyzer/pr97668.f b/gcc/testsuite/gfortran.dg/analyzer/pr97668.f index 568c891..abb6bb2 100644 --- a/gcc/testsuite/gfortran.dg/analyzer/pr97668.f +++ b/gcc/testsuite/gfortran.dg/analyzer/pr97668.f @@ -1,4 +1,4 @@ -c { dg-additional-options "-std=legacy" } +c { dg-additional-options "-std=legacy -Wno-analyzer-use-of-uninitialized-value -Wno-analyzer-too-complex" } SUBROUTINE PPADD (A, C, BH) -- cgit v1.1 From f0500db3692276f60e0562c17c87a0cb03e34398 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Thu, 15 Jul 2021 13:15:03 -0600 Subject: Detect buffer overflow by aggregate and vector stores [PR97027]. Resolves: PR middle-end/97027 - missing warning on buffer overflow storing a larger scalar into a smaller array gcc/ChangeLog: PR middle-end/97027 * tree-ssa-strlen.c (handle_assign): New function. (maybe_warn_overflow): Add argument. (nonzero_bytes_for_type): New function. (count_nonzero_bytes): Handle more tree types. Call nonzero_bytes_for_tye. (count_nonzero_bytes): Handle types. (handle_store): Handle stores from function calls. (strlen_check_and_optimize_call): Move code to handle_assign. Call it for assignments from function calls. gcc/testsuite/ChangeLog: PR middle-end/97027 * gcc.dg/Wstringop-overflow-15.c: Remove an xfail. * gcc.dg/Wstringop-overflow-47.c: Adjust xfails. * gcc.dg/torture/pr69170.c: Avoid valid warnings. * gcc.dg/torture/pr70025.c: Prune out a false positive. * gcc.dg/vect/pr97769.c: Initialize a loop control variable. * gcc.target/i386/pr92658-avx512bw-trunc.c: Increase buffer size to avoid overflow. * gcc.target/i386/pr92658-avx512f.c: Same. * gcc.dg/Wstringop-overflow-68.c: New test. * gcc.dg/Wstringop-overflow-69.c: New test. * gcc.dg/Wstringop-overflow-70.c: New test. * gcc.dg/Wstringop-overflow-71.c: New test. * gcc.dg/strlenopt-95.c: New test. --- gcc/testsuite/gcc.dg/Wstringop-overflow-15.c | 2 +- gcc/testsuite/gcc.dg/Wstringop-overflow-47.c | 17 +- gcc/testsuite/gcc.dg/Wstringop-overflow-68.c | 104 +++++++++ gcc/testsuite/gcc.dg/Wstringop-overflow-69.c | 84 +++++++ gcc/testsuite/gcc.dg/Wstringop-overflow-70.c | 21 ++ gcc/testsuite/gcc.dg/Wstringop-overflow-71.c | 105 +++++++++ gcc/testsuite/gcc.dg/strlenopt-95.c | 65 ++++++ gcc/testsuite/gcc.dg/torture/pr69170.c | 2 +- gcc/testsuite/gcc.dg/torture/pr70025.c | 5 + gcc/testsuite/gcc.dg/vect/pr97769.c | 2 +- .../gcc.target/i386/pr92658-avx512bw-trunc.c | 4 +- gcc/testsuite/gcc.target/i386/pr92658-avx512f.c | 4 +- gcc/tree-ssa-strlen.c | 255 +++++++++++++++------ 13 files changed, 588 insertions(+), 82 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/Wstringop-overflow-68.c create mode 100644 gcc/testsuite/gcc.dg/Wstringop-overflow-69.c create mode 100644 gcc/testsuite/gcc.dg/Wstringop-overflow-70.c create mode 100644 gcc/testsuite/gcc.dg/Wstringop-overflow-71.c create mode 100644 gcc/testsuite/gcc.dg/strlenopt-95.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-15.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-15.c index 1907bac..87f8462 100644 --- a/gcc/testsuite/gcc.dg/Wstringop-overflow-15.c +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-15.c @@ -30,7 +30,7 @@ void vla_bounded (int n) a[0] = 0; a[1] = 1; a[n] = n; // { dg-warning "\\\[-Wstringop-overflow" "pr82608" { xfail *-*-* } } - a[69] = n; // { dg-warning "\\\[-Wstringop-overflow" "pr82608" { xfail *-*-* } } + a[69] = n; // { dg-warning "\\\[-Wstringop-overflow" "pr82608" } sink (&a); } diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c index 6412874..968f6ee 100644 --- a/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c @@ -31,15 +31,15 @@ void nowarn_c32 (char c) void warn_c32 (char c) { - extern char warn_a32[32]; // { dg-message "at offset 32 into destination object 'warn_a32' of size 32" "pr97027" } + extern char warn_a32[32]; // { dg-message "at offset (32|1) into destination object 'warn_a32' of size 32" "pr97027" } void *p = warn_a32 + 1; - *(C32*)p = (C32){ c }; // { dg-warning "writing 1 byte into a region of size 0" "pr97027" } + *(C32*)p = (C32){ c }; // { dg-warning "writing (1 byte|32 bytes) into a region of size (0|31)" "pr97027" } /* Verify a local variable too. */ char a32[32]; p = a32 + 1; - *(C32*)p = (C32){ c }; // { dg-warning "writing 1 byte into a region of size 0" "pr97027" } + *(C32*)p = (C32){ c }; // { dg-warning "writing (1 byte|32 bytes) into a region of size (0|31)" "pr97027" } sink (p); } @@ -60,15 +60,20 @@ void nowarn_i16_64 (int16_t i) void warn_i16_64 (int16_t i) { - extern char warn_a64[64]; // { dg-message "at offset 128 to object 'warn_a64' with size 64" "pr97027" { xfail *-*-* } } +/* The IL below that's visible to the warning changes from one target to + another. On some like aarch64 it's a single vector store, on others + like x86_64 it's a series of BIT_FIELD_REFs. The overflow by + the former is detected but the latter is not yet. */ + + extern char warn_a64[64]; // { dg-message "at offset (1|128) into destination object 'warn_a64' of size (63|64)" "pr97027 note" { xfail { ! aarch64-*-* } } } void *p = warn_a64 + 1; I16_64 *q = (I16_64*)p; - *q = (I16_64){ i }; // { dg-warning "writing 1 byte into a region of size 0" "pr97027" { xfail *-*-* } } + *q = (I16_64){ i }; // { dg-warning "writing (1 byte|64 bytes) into a region of size (0|63)" "pr97027" { xfail { ! aarch64-*-* } } } char a64[64]; p = a64 + 1; q = (I16_64*)p; - *q = (I16_64){ i }; // { dg-warning "writing 1 byte into a region of size 0" "pr97027" { xfail *-*-* } } + *q = (I16_64){ i }; // { dg-warning "writing (1 byte|64 bytes) into a region of size (0|63)" "pr97027" { xfail { ! aarch64-*-* } } } sink (p); } diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-68.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-68.c new file mode 100644 index 0000000..d2d3ae5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-68.c @@ -0,0 +1,104 @@ +/* PR tree-optimization/97027 - missing warning on buffer overflow storing + a larger scalar into a smaller array + Verify overflow by aggregate stores. + { dg-do compile } + { dg-options "-O2" } */ + +#define A(N) (A ## N) +#define Ac1 (AC1){ 0 } +#define Ac2 (AC2){ 0, 1 } +#define Ac4 (AC4){ 0, 1, 2, 3 } +#define Ac8 (AC8){ 0, 1, 2, 3, 4, 5, 6, 7 } +#define Ac16 (AC16){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } + +typedef struct AC1 { char a[1]; } AC1; +typedef struct AC2 { char a[2]; } AC2; +typedef struct AC3 { char a[3]; } AC3; +typedef struct AC4 { char a[4]; } AC4; +typedef struct AC5 { char a[5]; } AC5; +typedef struct AC8 { char a[8]; } AC8; +typedef struct AC16 { char a[16]; } AC16; + +extern char a1[1], a2[2], a3[3], a4[4], a5[5], a6[6], a7[7], a8[8], a15[15]; + +extern AC1 ac1; +extern AC2 ac2; +extern AC4 ac4; +extern AC8 ac8; +extern AC16 ac16; + +extern AC1 fac1 (void); +extern AC2 fac2 (void); +extern AC4 fac4 (void); +extern AC8 fac8 (void); +extern AC16 fac16 (void); + +void nowarn (void) +{ + *(AC1*)a1 = Ac1; + *(AC2*)a2 = Ac2; + *(AC4*)a4 = Ac4; + *(AC4*)a5 = Ac4; + *(AC4*)a6 = Ac4; + *(AC4*)a7 = Ac4; + *(AC8*)a8 = Ac8; + *(AC8*)a15 = Ac8; +} + +void warn_comp_lit_zero (void) +{ + *(AC2*)a1 = (AC2){ }; // { dg-warning "writing 2 bytes into a region of size 1" } + *(AC4*)a2 = (AC4){ }; // { dg-warning "writing 4 bytes into a region of size 2" } + *(AC4*)a3 = (AC4){ }; // { dg-warning "writing 4 bytes into a region of size 3" } + *(AC8*)a4 = (AC8){ }; // { dg-warning "writing 8 bytes into a region of size 4" } + *(AC8*)a7 = (AC8){ }; // { dg-warning "writing 8 bytes into a region of size 7" } + *(AC16*)a15 = (AC16){ };// { dg-warning "writing 16 bytes into a region of size 15" } +} + +void warn_comp_lit (void) +{ + *(AC2*)a1 = Ac2; // { dg-warning "writing 2 bytes into a region of size 1" "pr??????" { xfail *-*-* } } + *(AC4*)a2 = Ac4; // { dg-warning "writing 4 bytes into a region of size 2" "pr??????" { xfail *-*-* } } + *(AC4*)a3 = Ac4; // { dg-warning "writing 4 bytes into a region of size 3" "pr??????" { xfail *-*-* } } + *(AC8*)a4 = Ac8; // { dg-warning "writing 8 bytes into a region of size 4" "pr??????" { xfail *-*-* } } + *(AC8*)a7 = Ac8; // { dg-warning "writing 8 bytes into a region of size 7" "pr??????" { xfail *-*-* } } + *(AC16*)a15 = Ac16; // { dg-warning "writing 16 bytes into a region of size 15" "pr??????" { xfail *-*-* } } +} + +void warn_aggr_decl (void) +{ + *(AC2*)a1 = ac2; // { dg-warning "writing 2 bytes into a region of size 1" } + *(AC4*)a2 = ac4; // { dg-warning "writing 4 bytes into a region of size 2" } + *(AC4*)a3 = ac4; // { dg-warning "writing 4 bytes into a region of size 3" } + *(AC8*)a4 = ac8; // { dg-warning "writing 8 bytes into a region of size 4" } + *(AC8*)a7 = ac8; // { dg-warning "writing 8 bytes into a region of size 7" } + *(AC16*)a15 = ac16; // { dg-warning "writing 16 bytes into a region of size 15" } +} + +void warn_aggr_parm (AC2 pc2, AC4 pc4, AC8 pc8, AC16 pc16) +{ + *(AC2*)a1 = pc2; // { dg-warning "writing 2 bytes into a region of size 1" } + *(AC4*)a2 = pc4; // { dg-warning "writing 4 bytes into a region of size 2" } + *(AC4*)a3 = pc4; // { dg-warning "writing 4 bytes into a region of size 3" } + *(AC8*)a4 = pc8; // { dg-warning "writing 8 bytes into a region of size 4" } + *(AC8*)a7 = pc8; // { dg-warning "writing 8 bytes into a region of size 7" } + *(AC16*)a15 = pc16; // { dg-warning "writing 16 bytes into a region of size 15" } +} + +void warn_aggr_func (void) +{ + *(AC2*)a1 = fac2 (); // { dg-warning "writing 2 bytes into a region of size 1" } + *(AC4*)a2 = fac4 (); // { dg-warning "writing 4 bytes into a region of size 2" } + *(AC4*)a3 = fac4 (); // { dg-warning "writing 4 bytes into a region of size 3" } + *(AC8*)a4 = fac8 (); // { dg-warning "writing 8 bytes into a region of size 4" } + *(AC8*)a7 = fac8 (); // { dg-warning "writing 8 bytes into a region of size 7" } + *(AC16*)a15 = fac16 ();// { dg-warning "writing 16 bytes into a region of size 15" } + + extern AC2 fac2_x (); + + *(AC2*)a1 = fac2_x (); // { dg-warning "writing 2 bytes into a region of size 1" } + + extern AC2 fac2_p (char*); + + *(AC2*)a1 = fac2_p (0); // { dg-warning "writing 2 bytes into a region of size 1" } +} diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-69.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-69.c new file mode 100644 index 0000000..754b481 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-69.c @@ -0,0 +1,84 @@ +/* PR tree-optimization/97027 - missing warning on buffer overflow storing + a larger scalar into a smaller array + Verify overflow by vector stores. + { dg-do compile } + { dg-options "-O2" } */ + +#define V(N) __attribute__ ((vector_size (N))) +#define C1 (VC1){ 0 } +#define C2 (VC2){ 0, 1 } +#define C4 (VC4){ 0, 1, 2, 3 } +#define C8 (VC8){ 0, 1, 2, 3, 4, 5, 6, 7 } +#define C16 (VC16){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } + +typedef V (1) char VC1; +typedef V (2) char VC2; +typedef V (4) char VC4; +typedef V (8) char VC8; +typedef V (16) char VC16; + +extern char a1[1], a2[2], a3[3], a4[4], a5[5], a6[6], a7[7], a8[8], a15[15]; + +extern VC1 c1; +extern VC2 c2; +extern VC4 c4; +extern VC8 c8; +extern VC16 c16; + +extern VC1 fc1 (void); +extern VC2 fc2 (void); +extern VC4 fc4 (void); +extern VC8 fc8 (void); +extern VC16 fc16 (void); + +void nowarn (void) +{ + *(VC1*)a1 = C1; + *(VC2*)a2 = C2; + *(VC4*)a4 = C4; + *(VC4*)a5 = C4; + *(VC4*)a6 = C4; + *(VC4*)a7 = C4; + *(VC8*)a8 = C8; + *(VC8*)a15 = C8; +} + +void warn_vec_lit (void) +{ + *(VC2*)a1 = C2; // { dg-warning "writing 2 bytes into a region of size 1" } + *(VC4*)a2 = C4; // { dg-warning "writing 4 bytes into a region of size 2" } + *(VC4*)a3 = C4; // { dg-warning "writing 4 bytes into a region of size 3" } + *(VC8*)a4 = C8; // { dg-warning "writing 8 bytes into a region of size 4" } + *(VC8*)a7 = C8; // { dg-warning "writing 8 bytes into a region of size 7" } + *(VC16*)a15 = C16; // { dg-warning "writing 16 bytes into a region of size 15" } +} + +void warn_vec_decl (void) +{ + *(VC2*)a1 = c2; // { dg-warning "writing 2 bytes into a region of size 1" } + *(VC4*)a2 = c4; // { dg-warning "writing 4 bytes into a region of size 2" } + *(VC4*)a3 = c4; // { dg-warning "writing 4 bytes into a region of size 3" } + *(VC8*)a4 = c8; // { dg-warning "writing 8 bytes into a region of size 4" } + *(VC8*)a7 = c8; // { dg-warning "writing 8 bytes into a region of size 7" } + *(VC16*)a15 = c16; // { dg-warning "writing 16 bytes into a region of size 15" } +} + +void warn_vec_parm (VC2 pc2, VC4 pc4, VC8 pc8, VC16 pc16) +{ + *(VC2*)a1 = pc2; // { dg-warning "writing 2 bytes into a region of size 1" } + *(VC4*)a2 = pc4; // { dg-warning "writing 4 bytes into a region of size 2" } + *(VC4*)a3 = pc4; // { dg-warning "writing 4 bytes into a region of size 3" } + *(VC8*)a4 = pc8; // { dg-warning "writing 8 bytes into a region of size 4" } + *(VC8*)a7 = pc8; // { dg-warning "writing 8 bytes into a region of size 7" } + *(VC16*)a15 = pc16; // { dg-warning "writing 16 bytes into a region of size 15" } +} + +void warn_vec_func (void) +{ + *(VC2*)a1 = fc2 (); // { dg-warning "writing 2 bytes into a region of size 1" } + *(VC4*)a2 = fc4 (); // { dg-warning "writing 4 bytes into a region of size 2" } + *(VC4*)a3 = fc4 (); // { dg-warning "writing 4 bytes into a region of size 3" } + *(VC8*)a4 = fc8 (); // { dg-warning "writing 8 bytes into a region of size 4" } + *(VC8*)a7 = fc8 (); // { dg-warning "writing 8 bytes into a region of size 7" } + *(VC16*)a15 = fc16 ();// { dg-warning "writing 16 bytes into a region of size 15" } +} diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-70.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-70.c new file mode 100644 index 0000000..5d8bfa9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-70.c @@ -0,0 +1,21 @@ +/* PR tree-optimization/97027 - missing warning on buffer overflow storing + a larger scalar into a smaller array + Verify overflow by vector stores. + { dg-do compile } + { dg-options "-O3" } */ + +void* nowarn_loop (void) +{ + char *p = __builtin_malloc (16); + for (int i = 0; i != 16; ++i) + p[i] = i; + return p; +} + +void* warn_loop (void) +{ + char *p = __builtin_malloc (15); + for (int i = 0; i != 16; ++i) + p[i] = i; // { dg-warning "writing 16 bytes into a region of size 15" } + return p; +} diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-71.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-71.c new file mode 100644 index 0000000..dccee35 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-71.c @@ -0,0 +1,105 @@ +/* PR tree-optimization/97027 - missing warning on buffer overflow storing + a larger scalar into a smaller array + Verify warnings for overflow by stores of results of built-in functions. + { dg-do compile } + { dg-options "-O2" } */ + +typedef __INT16_TYPE__ int16_t; +typedef __SIZE_TYPE__ size_t; + +extern int abs (int); + +extern void* alloca (size_t); + +extern double nan (const char *); +_Decimal32 nand32 (const char *); + +extern size_t strlen (const char *); +extern char* strcpy (char *, const char *); + + +extern unsigned char ax[], a1[1], a2[2], a8[8]; + + +void nowarn_abs (int i) +{ + *(int *)ax = abs (i); + *(char *)a1 = abs (i); +} + +void warn_abs (int i) +{ + *(int *)a1 = abs (i); // { dg-warning "\\\[-Wstringop-overflow" } +} + + +void nowarn_alloca (size_t n) +{ + *(void **)ax = alloca (n); +} + +void warn_alloca (size_t n) +{ + *(void **)a1 = alloca (n); // { dg-warning "\\\[-Wstringop-overflow" } +} + + +void nowarn_complex (double x, double i) +{ + *(_Complex double *)ax = __builtin_complex (x, i); +} + +void warn_complex (double x, double i) +{ + _Complex double *p = (_Complex double *)a1; + *p = __builtin_complex (x, i); // { dg-warning "\\\[-Wstringop-overflow" "pr101455" { xfail *-*-* } } +} + + +void nowarn_nan (const char *s) +{ + *(double *)ax = nan (s); +} + +void warn_nan (const char *s) +{ + *(double *)a1 = nan (s); // { dg-warning "\\\[-Wstringop-overflow" } +} + + +void nowarn_nand32 (const char *s) +{ + *(_Decimal32 *)ax = nand32 (s); +} + +void warn_nand32 (const char *s) +{ + *(_Decimal32 *)a1 = nand32 (s); // { dg-warning "\\\[-Wstringop-overflow" } +} + + +void nowarn_strlen (const char *s1, const char *s2, const char *s3) +{ + *(char *)ax = strlen (s1); + *(char *)a1 = strlen (s2); + *(size_t *)a8 = strlen (s3); +} + +void warn_strlen (const char *s1, const char *s2) +{ + *(int16_t *)a1 = strlen (s1); // { dg-warning "\\\[-Wstringop-overflow" } + *(size_t *)a2 = strlen (s2); // { dg-warning "\\\[-Wstringop-overflow" } +} + + +void nowarn_strcpy (char *s1, char *s2, const char *s3) +{ + *(char **)ax = strcpy (s1, s2); + *(char **)a8 = strcpy (s2, s3); +} + +void warn_strcpy (char *s1, char *s2, const char *s3) +{ + *(char **)a1 = strcpy (s1, s2); // { dg-warning "\\\[-Wstringop-overflow" } + *(char **)a2 = strcpy (s2, s3); // { dg-warning "\\\[-Wstringop-overflow" } +} diff --git a/gcc/testsuite/gcc.dg/strlenopt-95.c b/gcc/testsuite/gcc.dg/strlenopt-95.c new file mode 100644 index 0000000..505bc99 --- /dev/null +++ b/gcc/testsuite/gcc.dg/strlenopt-95.c @@ -0,0 +1,65 @@ +/* Verify strlen results of vector assignments. + { dg-do compile } + { dg-options "-O2 -Wall" } */ + +#include "strlenopt.h" + +#define V(N) __attribute__ ((vector_size (N))) + +typedef V (1) char VC1; +typedef V (2) char VC2; +typedef V (4) char VC4; +typedef V (8) char VC8; +typedef V (16) char VC16; + +extern char a[]; + +#define A(expr) ((expr) ? (void)0 : abort ()) + +void test_fold (int i) +{ + *(VC4*)a = (VC4){ }; + A (strlen (a) == 0); + A (!a[1] && !a[2] && !a[3]); + + *(VC4*)a = (VC4){ 0, 1 }; + A (strlen (a) == 0); + A (a[1] == 1 && !a[2] && !a[3]); + + *(VC4*)a = (VC4){ 1 }; + A (strlen (a) == 1); + A (!a[1] && !a[2] && !a[3]); + + *(VC4*)a = (VC4){ 1, 0, 3 }; + A (strlen (a) == 1); + A (!a[1] && a[2] == 3 && !a[3]); + + *(VC4*)a = (VC4){ 1, 2 }; + A (strlen (a) == 2); + A (!a[2] && !a[3]); + + *(VC4*)a = (VC4){ 1, 2, 0, 4 }; + A (strlen (a) == 2); + A (!a[2] && a[3] == 4); + + *(VC4*)a = (VC4){ 1, 2, 3 }; + A (strlen (a) == 3); + A (!a[3]); + + *(VC8*)a = (VC8){ 1, 2, 3, 0, 5 }; + A (strlen (a) == 3); + + *(VC8*)a = (VC8){ 1, 2, 3, 0, 5, 6 }; + A (strlen (a) == 3); + + *(VC8*)a = (VC8){ 1, 2, 3, 0, 5, 6, 7 }; + A (strlen (a) == 3); + A (strlen (a + 1) == 2); + A (strlen (a + 2) == 1); + A (strlen (a + 3) == 0); + + A (a[4] == 5 && a[5] == 6 && a[6] == 7 && a[7] == 8); +} + +/* { dg-final { scan-tree-dump-not "abort \\(" "strlen1" } } + { dg-final { scan-tree-dump-not "strlen \\(" "strlen1" } } */ diff --git a/gcc/testsuite/gcc.dg/torture/pr69170.c b/gcc/testsuite/gcc.dg/torture/pr69170.c index 2af0bde..a39125a 100644 --- a/gcc/testsuite/gcc.dg/torture/pr69170.c +++ b/gcc/testsuite/gcc.dg/torture/pr69170.c @@ -6,7 +6,7 @@ typedef struct { char buf[]; } hash_state; int a; -hash_state b; +extern hash_state b; void fn1() { a = 0; diff --git a/gcc/testsuite/gcc.dg/torture/pr70025.c b/gcc/testsuite/gcc.dg/torture/pr70025.c index 6c43a0a..7cf28c4 100644 --- a/gcc/testsuite/gcc.dg/torture/pr70025.c +++ b/gcc/testsuite/gcc.dg/torture/pr70025.c @@ -80,3 +80,8 @@ main () __builtin_abort (); return 0; } + +/* At -O3 the loop in bar() is vectorized and results in a (possibly + unreachable) out-of-bounds store to p.d7[8]: + _22(D)->d7[8] = _122; + { dg-prune-output "-Wstringop-overflow" } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr97769.c b/gcc/testsuite/gcc.dg/vect/pr97769.c index 127f91a..59e0b46 100644 --- a/gcc/testsuite/gcc.dg/vect/pr97769.c +++ b/gcc/testsuite/gcc.dg/vect/pr97769.c @@ -25,7 +25,7 @@ fn2(tmp *p1) { char *d = (char *)p1->d1; int *b = p1->h1; - for (int a; a; a++, d += 4) + for (int a = 0; a; a++, d += 4) fn1(d, *b++); } diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c index c1982f9..fa6d36d 100644 --- a/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c +++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c @@ -13,7 +13,7 @@ typedef unsigned short v32hi __attribute__((vector_size (64))); void truncwb_512 (v32qi * dst, v32hi * __restrict src) { - unsigned char tem[8]; + unsigned char tem[32]; tem[0] = (*src)[0]; tem[1] = (*src)[1]; tem[2] = (*src)[2]; @@ -52,7 +52,7 @@ truncwb_512 (v32qi * dst, v32hi * __restrict src) void truncwb_256 (v16qi * dst, v16hi * __restrict src) { - unsigned char tem[8]; + unsigned char tem[16]; tem[0] = (*src)[0]; tem[1] = (*src)[1]; tem[2] = (*src)[2]; diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c index e9ee3d2..e26b06e 100644 --- a/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c +++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c @@ -54,7 +54,7 @@ truncqb (v8qi * dst, v8di * __restrict src) void truncdw (v16hi * dst, v16si * __restrict src) { - unsigned short tem[8]; + unsigned short tem[16]; tem[0] = (*src)[0]; tem[1] = (*src)[1]; tem[2] = (*src)[2]; @@ -78,7 +78,7 @@ truncdw (v16hi * dst, v16si * __restrict src) void truncdb (v16qi * dst, v16si * __restrict src) { - unsigned char tem[8]; + unsigned char tem[16]; tem[0] = (*src)[0]; tem[1] = (*src)[1]; tem[2] = (*src)[2]; diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c index 94257df..799c21f 100644 --- a/gcc/tree-ssa-strlen.c +++ b/gcc/tree-ssa-strlen.c @@ -192,6 +192,8 @@ struct laststmt_struct static int get_stridx_plus_constant (strinfo *, unsigned HOST_WIDE_INT, tree); static void handle_builtin_stxncpy_strncat (bool, gimple_stmt_iterator *); +static bool handle_assign (gimple_stmt_iterator *, tree, bool *, + pointer_query &); /* Sets MINMAX to either the constant value or the range VAL is in and returns either the constant value or VAL on success or null @@ -1929,12 +1931,15 @@ maybe_set_strlen_range (tree lhs, tree src, tree bound) /* Diagnose buffer overflow by a STMT writing LEN + PLUS_ONE bytes, either into a region allocated for the object SI when non-null, or into an object designated by the LHS of STMT otherwise. + For a call STMT, when CALL_LHS is set use its left hand side + as the destination, otherwise use argument zero. When nonnull uses RVALS to determine range information. RAWMEM may be set by memcpy and other raw memory functions to allow accesses across subobject boundaries. */ static void -maybe_warn_overflow (gimple *stmt, tree len, pointer_query &ptr_qry, +maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len, + pointer_query &ptr_qry, strinfo *si = NULL, bool plus_one = false, bool rawmem = false) { @@ -1944,14 +1949,23 @@ maybe_warn_overflow (gimple *stmt, tree len, pointer_query &ptr_qry, /* The DECL of the function performing the write if it is done by one. */ tree writefn = NULL_TREE; - /* The destination expression involved in the store STMT. */ + /* The destination expression involved in the store or call STMT. */ tree dest = NULL_TREE; if (is_gimple_assign (stmt)) dest = gimple_assign_lhs (stmt); else if (is_gimple_call (stmt)) { - dest = gimple_call_arg (stmt, 0); + if (call_lhs) + dest = gimple_call_lhs (stmt); + else + { + gcc_assert (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)); + dest = gimple_call_arg (stmt, 0); + } + + if (!dest) + return; writefn = gimple_call_fndecl (stmt); } else @@ -2108,12 +2122,12 @@ maybe_warn_overflow (gimple *stmt, tree len, pointer_query &ptr_qry, /* Convenience wrapper for the above. */ static inline void -maybe_warn_overflow (gimple *stmt, unsigned HOST_WIDE_INT len, +maybe_warn_overflow (gimple *stmt, bool call_lhs, unsigned HOST_WIDE_INT len, pointer_query &ptr_qry, strinfo *si = NULL, bool plus_one = false, bool rawmem = false) { - maybe_warn_overflow (stmt, build_int_cst (size_type_node, len), ptr_qry, - si, plus_one, rawmem); + tree tlen = build_int_cst (size_type_node, len); + maybe_warn_overflow (stmt, call_lhs, tlen, ptr_qry, si, plus_one, rawmem); } /* Handle a strlen call. If strlen of the argument is known, replace @@ -2443,7 +2457,7 @@ handle_builtin_strcpy (enum built_in_function bcode, gimple_stmt_iterator *gsi, else if (idx < 0) srclen = build_int_cst (size_type_node, ~idx); - maybe_warn_overflow (stmt, srclen, ptr_qry, olddsi, true); + maybe_warn_overflow (stmt, false, srclen, ptr_qry, olddsi, true); if (olddsi != NULL) adjust_last_stmt (olddsi, stmt, false, ptr_qry); @@ -3248,7 +3262,7 @@ handle_builtin_memcpy (enum built_in_function bcode, gimple_stmt_iterator *gsi, if (olddsi != NULL && !integer_zerop (len)) { - maybe_warn_overflow (stmt, len, ptr_qry, olddsi, false, true); + maybe_warn_overflow (stmt, false, len, ptr_qry, olddsi, false, true); adjust_last_stmt (olddsi, stmt, false, ptr_qry); } @@ -3713,7 +3727,8 @@ handle_builtin_memset (gimple_stmt_iterator *gsi, bool *zero_write, tree memset_size = gimple_call_arg (memset_stmt, 2); /* Check for overflow. */ - maybe_warn_overflow (memset_stmt, memset_size, ptr_qry, NULL, false, true); + maybe_warn_overflow (memset_stmt, false, memset_size, ptr_qry, NULL, + false, true); /* Bail when there is no statement associated with the destination (the statement may be null even when SI1->ALLOC is not). */ @@ -4374,19 +4389,49 @@ handle_pointer_plus (gimple_stmt_iterator *gsi) } } +/* Set LENRANGE to the number of nonzero bytes for a store of TYPE and + clear all flags. Return true on success and false on failure. */ + +static bool +nonzero_bytes_for_type (tree type, unsigned lenrange[3], + bool *nulterm, bool *allnul, bool *allnonnul) +{ + /* Use the size of the type of the expression as the size of the store, + and set the upper bound of the length range to that of the size. + Nothing is known about the contents so clear all flags. */ + tree typesize = TYPE_SIZE_UNIT (type); + if (!type) + return false; + + if (!tree_fits_uhwi_p (typesize)) + return false; + + unsigned HOST_WIDE_INT sz = tree_to_uhwi (typesize); + if (sz > UINT_MAX) + return false; + + lenrange[2] = sz; + lenrange[1] = lenrange[2] ? lenrange[2] - 1 : 0; + lenrange[0] = 0; + *nulterm = false; + *allnul = false; + *allnonnul = false; + return true; +} + static bool count_nonzero_bytes_addr (tree, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, unsigned [3], bool *, bool *, bool *, range_query *, ssa_name_limit_t &); -/* Determines the minimum and maximum number of leading non-zero bytes - in the representation of EXP and set LENRANGE[0] and LENRANGE[1] +/* Recursively determine the minimum and maximum number of leading nonzero + bytes in the representation of EXP and set LENRANGE[0] and LENRANGE[1] to each. Sets LENRANGE[2] to the total size of the access (which may be less than LENRANGE[1] when what's being referenced by EXP is a pointer rather than an array). - Sets *NULTERM if the representation contains a zero byte, and sets - *ALLNUL if all the bytes are zero. + Sets *NULTERM if the representation contains a zero byte, sets *ALLNUL + if all the bytes are zero, and *ALLNONNUL is all are nonzero. OFFSET and NBYTES are the offset into the representation and the size of the access to it determined from an ADDR_EXPR (i.e., a pointer) or MEM_REF or zero for other expressions. @@ -4422,9 +4467,11 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset, if (gimple_assign_single_p (stmt)) { exp = gimple_assign_rhs1 (stmt); - if (TREE_CODE (exp) != MEM_REF) + if (!DECL_P (exp) + && TREE_CODE (exp) != CONSTRUCTOR + && TREE_CODE (exp) != MEM_REF) return false; - /* Handle MEM_REF below. */ + /* Handle DECLs, CONSTRUCTOR and MEM_REF below. */ } else if (gimple_code (stmt) == GIMPLE_PHI) { @@ -4448,6 +4495,25 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset, } } + if (TREE_CODE (exp) == CONSTRUCTOR) + { + if (nbytes) + /* If NBYTES has already been determined by an outer MEM_REF + fail rather than overwriting it (this shouldn't happen). */ + return false; + + tree type = TREE_TYPE (exp); + tree size = TYPE_SIZE_UNIT (type); + if (!size || !tree_fits_uhwi_p (size)) + return false; + + unsigned HOST_WIDE_INT byte_size = tree_to_uhwi (size); + if (byte_size < offset) + return false; + + nbytes = byte_size - offset; + } + if (TREE_CODE (exp) == MEM_REF) { if (nbytes) @@ -4483,9 +4549,11 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset, if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL) { - exp = ctor_for_folding (exp); - if (!exp) - return false; + /* If EXP can be folded into a constant use the result. Otherwise + proceed to use EXP to determine a range of the result. */ + if (tree fold_exp = ctor_for_folding (exp)) + if (fold_exp != error_mark_node) + exp = fold_exp; } const char *prep = NULL; @@ -4533,7 +4601,8 @@ count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset, } if (!nbytes) - return false; + return nonzero_bytes_for_type (TREE_TYPE (exp), lenrange, + nulterm, allnul, allnonnul); /* Compute the number of leading nonzero bytes in the representation and update the minimum and maximum. */ @@ -4696,14 +4765,19 @@ count_nonzero_bytes_addr (tree exp, unsigned HOST_WIDE_INT offset, return true; } -/* Same as above except with an implicit SSA_NAME limit. RVALS is used - to determine ranges of dynamically computed string lengths (the results - of strlen). */ +/* Same as above except with an implicit SSA_NAME limit. When EXPR_OR_TYPE + is a type rather than an expression use its size to compute the range. + RVALS is used to determine ranges of dynamically computed string lengths + (the results of strlen). */ static bool -count_nonzero_bytes (tree exp, unsigned lenrange[3], bool *nulterm, +count_nonzero_bytes (tree expr_or_type, unsigned lenrange[3], bool *nulterm, bool *allnul, bool *allnonnul, range_query *rvals) { + if (TYPE_P (expr_or_type)) + return nonzero_bytes_for_type (expr_or_type, lenrange, + nulterm, allnul, allnonnul); + /* Set to optimistic values so the caller doesn't have to worry about initializing these and to what. On success, the function will clear these if it determines their values are different but being recursive @@ -4714,7 +4788,8 @@ count_nonzero_bytes (tree exp, unsigned lenrange[3], bool *nulterm, *allnonnul = true; ssa_name_limit_t snlim; - return count_nonzero_bytes (exp, 0, 0, lenrange, nulterm, allnul, allnonnul, + tree expr = expr_or_type; + return count_nonzero_bytes (expr, 0, 0, lenrange, nulterm, allnul, allnonnul, rvals, snlim); } @@ -4728,11 +4803,29 @@ static bool handle_store (gimple_stmt_iterator *gsi, bool *zero_write, pointer_query &ptr_qry) { - int idx = -1; - strinfo *si = NULL; gimple *stmt = gsi_stmt (*gsi); - tree ssaname = NULL_TREE, lhs = gimple_assign_lhs (stmt); - tree rhs = gimple_assign_rhs1 (stmt); + /* The LHS and RHS of the store. The RHS is null if STMT is a function + call. STORETYPE is the type of the store (determined from either + the RHS of the assignment statement or the LHS of a function call. */ + tree lhs, rhs, storetype; + if (is_gimple_assign (stmt)) + { + lhs = gimple_assign_lhs (stmt); + rhs = gimple_assign_rhs1 (stmt); + storetype = TREE_TYPE (rhs); + } + else if (is_gimple_call (stmt)) + { + lhs = gimple_call_lhs (stmt); + rhs = NULL_TREE; + storetype = TREE_TYPE (lhs); + } + else + return true; + + tree ssaname = NULL_TREE; + strinfo *si = NULL; + int idx = -1; range_query *const rvals = ptr_qry.rvals; @@ -4756,13 +4849,13 @@ handle_store (gimple_stmt_iterator *gsi, bool *zero_write, ssaname = TREE_OPERAND (lhs, 0); else if (si == NULL || compare_nonzero_chars (si, offset, rvals) < 0) { - *zero_write = initializer_zerop (rhs); + *zero_write = rhs ? initializer_zerop (rhs) : false; bool dummy; unsigned lenrange[] = { UINT_MAX, 0, 0 }; - if (count_nonzero_bytes (rhs, lenrange, &dummy, &dummy, &dummy, - rvals)) - maybe_warn_overflow (stmt, lenrange[2], ptr_qry); + if (count_nonzero_bytes (rhs ? rhs : storetype, lenrange, + &dummy, &dummy, &dummy, rvals)) + maybe_warn_overflow (stmt, true, lenrange[2], ptr_qry); return true; } @@ -4793,16 +4886,17 @@ handle_store (gimple_stmt_iterator *gsi, bool *zero_write, bool full_string_p; const bool ranges_valid - = count_nonzero_bytes (rhs, lenrange, &full_string_p, + = count_nonzero_bytes (rhs ? rhs : storetype, lenrange, &full_string_p, &storing_all_zeros_p, &storing_all_nonzero_p, rvals); + if (ranges_valid) { rhs_minlen = lenrange[0]; storing_nonzero_p = lenrange[1] > 0; *zero_write = storing_all_zeros_p; - maybe_warn_overflow (stmt, lenrange[2], ptr_qry); + maybe_warn_overflow (stmt, true, lenrange[2], ptr_qry); } else { @@ -4864,7 +4958,7 @@ handle_store (gimple_stmt_iterator *gsi, bool *zero_write, && storing_nonzero_p && lenrange[0] == lenrange[1] && lenrange[0] == lenrange[2] - && TREE_CODE (TREE_TYPE (rhs)) == INTEGER_TYPE) + && TREE_CODE (storetype) == INTEGER_TYPE) { /* Handle a store of one or more non-nul characters that ends before the terminating nul of the destination and so does @@ -5145,8 +5239,19 @@ strlen_check_and_optimize_call (gimple_stmt_iterator *gsi, bool *zero_write, if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) { tree fntype = gimple_call_fntype (stmt); - if (fntype && lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (fntype))) - handle_alloc_call (BUILT_IN_NONE, gsi); + if (!fntype) + return true; + + if (lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (fntype))) + { + handle_alloc_call (BUILT_IN_NONE, gsi); + return true; + } + + if (tree lhs = gimple_call_lhs (stmt)) + handle_assign (gsi, lhs, zero_write, ptr_qry); + + /* Proceed to handle user-defined formatting functions. */ } /* When not optimizing we must be checking printf calls which @@ -5362,6 +5467,48 @@ handle_integral_assign (gimple_stmt_iterator *gsi, bool *cleanup_eh, } } +/* Handle assignment statement at *GSI to LHS. Set *ZERO_WRITE if + the assignent stores all zero bytes.. */ + +static bool +handle_assign (gimple_stmt_iterator *gsi, tree lhs, bool *zero_write, + pointer_query &ptr_qry) +{ + tree type = TREE_TYPE (lhs); + if (TREE_CODE (type) == ARRAY_TYPE) + type = TREE_TYPE (type); + + bool is_char_store = is_char_type (type); + if (!is_char_store && TREE_CODE (lhs) == MEM_REF) + { + /* To consider stores into char objects via integer types other + than char but not those to non-character objects, determine + the type of the destination rather than just the type of + the access. */ + for (int i = 0; i != 2; ++i) + { + tree ref = TREE_OPERAND (lhs, i); + type = TREE_TYPE (ref); + if (TREE_CODE (type) == POINTER_TYPE) + type = TREE_TYPE (type); + if (TREE_CODE (type) == ARRAY_TYPE) + type = TREE_TYPE (type); + if (is_char_type (type)) + { + is_char_store = true; + break; + } + } + } + + /* Handle a single or multibyte assignment. */ + if (is_char_store && !handle_store (gsi, zero_write, ptr_qry)) + return false; + + return true; +} + + /* Attempt to check for validity of the performed access a single statement at *GSI using string length knowledge, and to optimize it. If the given basic block needs clean-up of EH, CLEANUP_EH is set to @@ -5407,38 +5554,8 @@ check_and_optimize_stmt (gimple_stmt_iterator *gsi, bool *cleanup_eh, /* Handle assignment to a character. */ handle_integral_assign (gsi, cleanup_eh, ptr_qry.rvals); else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs)) - { - tree type = TREE_TYPE (lhs); - if (TREE_CODE (type) == ARRAY_TYPE) - type = TREE_TYPE (type); - - bool is_char_store = is_char_type (type); - if (!is_char_store && TREE_CODE (lhs) == MEM_REF) - { - /* To consider stores into char objects via integer types - other than char but not those to non-character objects, - determine the type of the destination rather than just - the type of the access. */ - for (int i = 0; i != 2; ++i) - { - tree ref = TREE_OPERAND (lhs, i); - type = TREE_TYPE (ref); - if (TREE_CODE (type) == POINTER_TYPE) - type = TREE_TYPE (type); - if (TREE_CODE (type) == ARRAY_TYPE) - type = TREE_TYPE (type); - if (is_char_type (type)) - { - is_char_store = true; - break; - } - } - } - - /* Handle a single or multibyte assignment. */ - if (is_char_store && !handle_store (gsi, &zero_write, ptr_qry)) - return false; - } + if (!handle_assign (gsi, lhs, &zero_write, ptr_qry)) + return false; } else if (gcond *cond = dyn_cast (stmt)) { -- cgit v1.1 From 478cc962ad174bfc64c573152a0658935651fce3 Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Thu, 15 Jul 2021 11:07:12 -0400 Subject: Add gimple_range_type for statements. The existing mechanisms for picking up the type of a statement are inconsistent with the needs of ranger. Encapsulate all the bits required to pick up the return type of a statement in one place, and check whether the type is supported. * gimple-range-fold.cc (adjust_pointer_diff_expr): Use gimple_range_type. (fold_using_range::fold_stmt): Ditto. (fold_using_range::range_of_range_op): Ditto. (fold_using_range::range_of_phi): Ditto. (fold_using_range::range_of_call): Ditto. (fold_using_range::range_of_builtin_ubsan_call): Ditto. (fold_using_range::range_of_builtin_call): Ditto. (fold_using_range::range_of_cond_expr): Ditto. * gimple-range-fold.h (gimple_range_type): New. --- gcc/gimple-range-fold.cc | 44 +++++++++++++++++++------------------------- gcc/gimple-range-fold.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 25 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc index eff5d1f..f8578c0 100644 --- a/gcc/gimple-range-fold.cc +++ b/gcc/gimple-range-fold.cc @@ -362,7 +362,7 @@ adjust_pointer_diff_expr (irange &res, const gimple *diff_stmt) { tree max = vrp_val_max (ptrdiff_type_node); wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max))); - tree expr_type = gimple_expr_type (diff_stmt); + tree expr_type = gimple_range_type (diff_stmt); tree range_min = build_zero_cst (expr_type); tree range_max = wide_int_to_tree (expr_type, wmax - 1); int_range<2> r (range_min, range_max); @@ -522,16 +522,8 @@ fold_using_range::fold_stmt (irange &r, gimple *s, fur_source &src, tree name) if (!res) { - // If no name is specified, try the expression kind. - if (!name) - { - tree t = gimple_expr_type (s); - if (!irange::supports_type_p (t)) - return false; - r.set_varying (t); - return true; - } - if (!gimple_range_ssa_p (name)) + // If no name specified or range is unsupported, bail. + if (!name || !gimple_range_ssa_p (name)) return false; // We don't understand the stmt, so return the global range. r = gimple_range_global (name); @@ -558,10 +550,11 @@ bool fold_using_range::range_of_range_op (irange &r, gimple *s, fur_source &src) { int_range_max range1, range2; - tree type = gimple_expr_type (s); + tree type = gimple_range_type (s); + if (!type) + return false; range_operator *handler = gimple_range_handler (s); gcc_checking_assert (handler); - gcc_checking_assert (irange::supports_type_p (type)); tree lhs = gimple_get_lhs (s); tree op1 = gimple_range_operand1 (s); @@ -719,11 +712,11 @@ bool fold_using_range::range_of_phi (irange &r, gphi *phi, fur_source &src) { tree phi_def = gimple_phi_result (phi); - tree type = TREE_TYPE (phi_def); + tree type = gimple_range_type (phi); int_range_max arg_range; unsigned x; - if (!irange::supports_type_p (type)) + if (!type) return false; // Start with an empty range, unioning in each argument's range. @@ -780,13 +773,13 @@ fold_using_range::range_of_phi (irange &r, gphi *phi, fur_source &src) bool fold_using_range::range_of_call (irange &r, gcall *call, fur_source &src) { - tree type = gimple_call_return_type (call); + tree type = gimple_range_type (call); + if (!type) + return false; + tree lhs = gimple_call_lhs (call); bool strict_overflow_p; - if (!irange::supports_type_p (type)) - return false; - if (range_of_builtin_call (r, call, src)) ; else if (gimple_stmt_nonnegative_warnv_p (call, &strict_overflow_p)) @@ -817,7 +810,7 @@ fold_using_range::range_of_builtin_ubsan_call (irange &r, gcall *call, { gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR); - tree type = gimple_call_return_type (call); + tree type = gimple_range_type (call); range_operator *op = range_op_handler (code, type); gcc_checking_assert (op); int_range_max ir0, ir1; @@ -853,7 +846,7 @@ fold_using_range::range_of_builtin_call (irange &r, gcall *call, if (func == CFN_LAST) return false; - tree type = gimple_call_return_type (call); + tree type = gimple_range_type (call); tree arg; int mini, maxi, zerov = 0, prec; scalar_int_mode mode; @@ -1094,12 +1087,12 @@ fold_using_range::range_of_cond_expr (irange &r, gassign *s, fur_source &src) tree op1 = gimple_assign_rhs2 (s); tree op2 = gimple_assign_rhs3 (s); - gcc_checking_assert (gimple_assign_rhs_code (s) == COND_EXPR); - gcc_checking_assert (useless_type_conversion_p (TREE_TYPE (op1), - TREE_TYPE (op2))); - if (!irange::supports_type_p (TREE_TYPE (op1))) + tree type = gimple_range_type (s); + if (!type) return false; + gcc_checking_assert (gimple_assign_rhs_code (s) == COND_EXPR); + gcc_checking_assert (range_compatible_p (TREE_TYPE (op1), TREE_TYPE (op2))); src.get_operand (cond_range, cond); src.get_operand (range1, op1); src.get_operand (range2, op2); @@ -1118,6 +1111,7 @@ fold_using_range::range_of_cond_expr (irange &r, gassign *s, fur_source &src) r = range1; r.union_ (range2); } + gcc_checking_assert (range_compatible_p (r.type (), type)); return true; } diff --git a/gcc/gimple-range-fold.h b/gcc/gimple-range-fold.h index dc1b28f..ceed7ba 100644 --- a/gcc/gimple-range-fold.h +++ b/gcc/gimple-range-fold.h @@ -56,6 +56,36 @@ gimple_range_handler (const gimple *s) return NULL; } +// Return the type of range which statement S calculates. If the type is +// unsupported or no type can be determined, return NULL_TREE. + +static inline tree +gimple_range_type (const gimple *s) +{ + tree lhs = gimple_get_lhs (s); + tree type = NULL_TREE; + if (lhs) + type = TREE_TYPE (lhs); + else + { + enum gimple_code code = gimple_code (s); + if (code == GIMPLE_COND) + type = boolean_type_node; + else if (code == GIMPLE_PHI) + type = TREE_TYPE (gimple_phi_result (s)); + else if (code == GIMPLE_CALL) + { + type = gimple_call_fntype (s); + // If it has a type, get the return type. + if (type) + type = TREE_TYPE (type); + } + } + if (irange::supports_type_p (type)) + return type; + return NULL_TREE; +} + // Return EXP if it is an SSA_NAME with a type supported by gimple ranges. static inline tree -- cgit v1.1 From f364cdffa47af574f90f671b2dcf5afa91442741 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 15 Jul 2021 22:34:25 +0200 Subject: i386: Fix ix86_hard_regno_mode_ok for TDmode on 32bit targets [PR101346] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit General regs on 32bit targets do not support 128bit modes, including TDmode. gcc/ 2021-07-15 Uroš Bizjak PR target/101346 * config/i386/i386.h (VALID_SSE_REG_MODE): Add TDmode. (VALID_INT_MODE_P): Add SDmode and DDmode. Add TDmode for TARGET_64BIT. (VALID_DFP_MODE_P): Remove. * config/i386/i386.c (ix86_hard_regno_mode_ok): Do not use VALID_DFP_MODE_P. gcc/testsuite/ 2021-07-15 Uroš Bizjak PR target/101346 * gcc.target/i386/pr101346.c: New test. --- gcc/config/i386/i386.c | 7 ++----- gcc/config/i386/i386.h | 8 +++----- gcc/testsuite/gcc.target/i386/pr101346.c | 10 ++++++++++ 3 files changed, 15 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr101346.c (limited to 'gcc') diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 530d357..9d74b7a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19535,11 +19535,8 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) return !can_create_pseudo_p (); } /* We handle both integer and floats in the general purpose registers. */ - else if (VALID_INT_MODE_P (mode)) - return true; - else if (VALID_FP_MODE_P (mode)) - return true; - else if (VALID_DFP_MODE_P (mode)) + else if (VALID_INT_MODE_P (mode) + || VALID_FP_MODE_P (mode)) return true; /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go on to use that value in smaller contexts, this can easily force a diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 324e8a9..0c2c93d 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1023,7 +1023,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define VALID_SSE_REG_MODE(MODE) \ ((MODE) == V1TImode || (MODE) == TImode \ || (MODE) == V4SFmode || (MODE) == V4SImode \ - || (MODE) == SFmode || (MODE) == TFmode) + || (MODE) == SFmode || (MODE) == TFmode || (MODE) == TDmode) #define VALID_MMX_REG_MODE_3DNOW(MODE) \ ((MODE) == V2SFmode || (MODE) == SFmode) @@ -1037,9 +1037,6 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define VALID_MASK_AVX512BW_MODE(MODE) ((MODE) == SImode || (MODE) == DImode) -#define VALID_DFP_MODE_P(MODE) \ - ((MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode) - #define VALID_FP_MODE_P(MODE) \ ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \ || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode) \ @@ -1049,12 +1046,13 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == SImode || (MODE) == DImode \ || (MODE) == CQImode || (MODE) == CHImode \ || (MODE) == CSImode || (MODE) == CDImode \ + || (MODE) == SDmode || (MODE) == DDmode \ || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \ || (TARGET_64BIT \ && ((MODE) == TImode || (MODE) == CTImode \ || (MODE) == TFmode || (MODE) == TCmode \ || (MODE) == V8QImode || (MODE) == V4HImode \ - || (MODE) == V2SImode))) + || (MODE) == V2SImode || (MODE) == TDmode))) /* Return true for modes passed in SSE registers. */ #define SSE_REG_MODE_P(MODE) \ diff --git a/gcc/testsuite/gcc.target/i386/pr101346.c b/gcc/testsuite/gcc.target/i386/pr101346.c new file mode 100644 index 0000000..fefabaf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101346.c @@ -0,0 +1,10 @@ +/* PR target/101346 */ +/* { dg-do compile } */ +/* { dg-options "-O0 -fprofile-generate -msse" } */ +/* { dg-require-profiling "-fprofile-generate" } */ + +_Decimal128 +foo (_Decimal128 x) +{ + return - __builtin_fabsd128 (x); +} -- cgit v1.1 From c031ea2782a1873eee5ba82fb114cd87ff831412 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Thu, 15 Jul 2021 19:33:07 -0400 Subject: analyzer: fix const-correctness of various is_a_helper gcc/analyzer/ChangeLog: * svalue.h (is_a_helper ::test): Make param and template param const. (is_a_helper ::test): Likewise. (is_a_helper ::test): Likewise. (is_a_helper ::test): Likewise. Signed-off-by: David Malcolm --- gcc/analyzer/svalue.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/svalue.h b/gcc/analyzer/svalue.h index 54b97f8..20d7cf8 100644 --- a/gcc/analyzer/svalue.h +++ b/gcc/analyzer/svalue.h @@ -1063,7 +1063,7 @@ public: template <> template <> inline bool -is_a_helper ::test (svalue *sval) +is_a_helper ::test (const svalue *sval) { return sval->get_kind () == SK_PLACEHOLDER; } @@ -1165,7 +1165,7 @@ public: template <> template <> inline bool -is_a_helper ::test (svalue *sval) +is_a_helper ::test (const svalue *sval) { return sval->get_kind () == SK_WIDENING; } @@ -1266,7 +1266,7 @@ public: template <> template <> inline bool -is_a_helper ::test (svalue *sval) +is_a_helper ::test (const svalue *sval) { return sval->get_kind () == SK_COMPOUND; } @@ -1366,7 +1366,7 @@ public: template <> template <> inline bool -is_a_helper ::test (svalue *sval) +is_a_helper ::test (const svalue *sval) { return sval->get_kind () == SK_CONJURED; } -- cgit v1.1 From d97d71a1989e9ee8e1b8563b351c42b7732da108 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Fri, 16 Jul 2021 00:16:25 +0000 Subject: Daily bump. --- gcc/ChangeLog | 152 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 148 ++++++++++++++++++++++++++++++++++++++++++++++ gcc/c-family/ChangeLog | 6 ++ gcc/c/ChangeLog | 6 ++ gcc/cp/ChangeLog | 12 ++++ gcc/testsuite/ChangeLog | 125 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 450 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e6c88f2..47772d9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,155 @@ +2021-07-15 Uroš Bizjak + + PR target/101346 + * config/i386/i386.h (VALID_SSE_REG_MODE): Add TDmode. + (VALID_INT_MODE_P): Add SDmode and DDmode. + Add TDmode for TARGET_64BIT. + (VALID_DFP_MODE_P): Remove. + * config/i386/i386.c (ix86_hard_regno_mode_ok): + Do not use VALID_DFP_MODE_P. + +2021-07-15 Andrew MacLeod + + * gimple-range-fold.cc (adjust_pointer_diff_expr): Use + gimple_range_type. + (fold_using_range::fold_stmt): Ditto. + (fold_using_range::range_of_range_op): Ditto. + (fold_using_range::range_of_phi): Ditto. + (fold_using_range::range_of_call): Ditto. + (fold_using_range::range_of_builtin_ubsan_call): Ditto. + (fold_using_range::range_of_builtin_call): Ditto. + (fold_using_range::range_of_cond_expr): Ditto. + * gimple-range-fold.h (gimple_range_type): New. + +2021-07-15 Martin Sebor + + PR middle-end/97027 + * tree-ssa-strlen.c (handle_assign): New function. + (maybe_warn_overflow): Add argument. + (nonzero_bytes_for_type): New function. + (count_nonzero_bytes): Handle more tree types. Call + nonzero_bytes_for_tye. + (count_nonzero_bytes): Handle types. + (handle_store): Handle stores from function calls. + (strlen_check_and_optimize_call): Move code to handle_assign. Call + it for assignments from function calls. + +2021-07-15 David Malcolm + + PR analyzer/95006 + PR analyzer/94713 + PR analyzer/94714 + * doc/invoke.texi: Add -Wanalyzer-use-of-uninitialized-value. + +2021-07-15 David Malcolm + + * doc/invoke.texi (-fdump-analyzer-exploded-paths): New. + +2021-07-15 Martin Sebor + + PR c/101289 + PR c/97548 + * fold-const.c (operand_compare::operand_equal_p): Handle OEP_DECL_NAME. + (operand_compare::verify_hash_value): Same. + * tree-core.h (OEP_DECL_NAME): New. + +2021-07-15 Martin Jambor + + * profile-count.h (profile_count::value): Change the return type to + uint64_t. + * gimple-pretty-print.c (dump_gimple_bb_header): Adjust print + statement. + * tree-cfg.c (dump_function_to_file): Likewise. + +2021-07-15 Bill Schmidt + + PR target/101129 + * config/rs6000/rs6000-p8swap.c (has_part_mult): New. + (rs6000_analyze_swaps): Insns containing a subreg of a mult are + not swappable. + +2021-07-15 Richard Biener + + * tree-vectorizer.h (vect_gen_while): Match up with + vect_gen_while_not. + * tree-vect-stmts.c (vect_gen_while): Adjust API to that + of vect_gen_while_not. + (vect_gen_while_not): Adjust. + * tree-vect-loop-manip.c (vect_set_loop_controls_directly): Likewise. + +2021-07-15 Aldy Hernandez + + * gimple-range-cache.cc (non_null_ref::adjust_range): New. + (ranger_cache::range_of_def): Call adjust_range. + (ranger_cache::entry_range): Same. + * gimple-range-cache.h (non_null_ref::adjust_range): New. + * gimple-range.cc (gimple_ranger::range_of_expr): Call + adjust_range. + (gimple_ranger::range_on_entry): Same. + +2021-07-15 Tamar Christina + + Revert: + 2021-07-14 Tamar Christina + + * config/arm/neon.md (dot_prod): Drop statements. + +2021-07-15 Tamar Christina + + Revert: + 2021-07-14 Tamar Christina + + * config/aarch64/aarch64-simd-builtins.def (udot, sdot): Rename to... + (sdot_prod, udot_prod): ...These. + * config/aarch64/aarch64-simd.md (dot_prod): Remove. + (aarch64_dot): Rename to... + (dot_prod): ...This. + * config/aarch64/arm_neon.h (vdot_u32, vdotq_u32, vdot_s32, vdotq_s32): + Update builtins. + +2021-07-15 Jakub Jelinek + + PR middle-end/101437 + * gimplify.c (gimplify_expr): Throw away volatile reads from empty + types even if they have non-BLKmode TYPE_MODE. + +2021-07-15 Richard Biener + + PR driver/101383 + * gcc.c (process_command): Process -gtoggle like process_options + would after parsing options. + +2021-07-15 Trevor Saunders + + * cfgexpand.c (expand_asm_loc): Adjust. + (expand_asm_stmt): Likewise. + * config/arm/aarch-common-protos.h (arm_md_asm_adjust): Likewise. + * config/arm/aarch-common.c (arm_md_asm_adjust): Likewise. + * config/arm/arm.c (thumb1_md_asm_adjust): Likewise. + * config/avr/avr.c (avr_md_asm_adjust): Likewise. + * config/cris/cris.c (cris_md_asm_adjust): Likewise. + * config/i386/i386.c (ix86_md_asm_adjust): Likewise. + * config/mn10300/mn10300.c (mn10300_md_asm_adjust): Likewise. + * config/nds32/nds32.c (nds32_md_asm_adjust): Likewise. + * config/pdp11/pdp11.c (pdp11_md_asm_adjust): Likewise. + * config/rs6000/rs6000.c (rs6000_md_asm_adjust): Likewise. + * config/s390/s390.c (s390_md_asm_adjust): Likewise. + * config/vax/vax.c (vax_md_asm_adjust): Likewise. + * config/visium/visium.c (visium_md_asm_adjust): Likewise. + * doc/tm.texi: Regenerate. + * target.def: Add location argument to md_asm_adjust. + +2021-07-15 Trevor Saunders + + * tree-diagnostic.c (diagnostic_report_current_function): Use the + diagnostic's location, not input_location. + +2021-07-15 Trevor Saunders + + * cfgexpand.c (tree_conflicts_with_clobbers_p): Pass location to + diagnostics. + (expand_asm_stmt): Likewise. + 2021-07-14 Peter Bergner * config/rs6000/rs6000.c (adjacent_mem_locations): Return the lower diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 9070a2a..16244c6 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210715 +20210716 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index f0b2d96..e6bd95c 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,151 @@ +2021-07-15 David Malcolm + + * svalue.h (is_a_helper ::test): Make + param and template param const. + (is_a_helper ::test): Likewise. + (is_a_helper ::test): Likewise. + (is_a_helper ::test): Likewise. + +2021-07-15 David Malcolm + + PR analyzer/95006 + PR analyzer/94713 + PR analyzer/94714 + * analyzer.cc (maybe_reconstruct_from_def_stmt): Split out + GIMPLE_ASSIGN case into... + (get_diagnostic_tree_for_gassign_1): New. + (get_diagnostic_tree_for_gassign): New. + * analyzer.h (get_diagnostic_tree_for_gassign): New decl. + * analyzer.opt (Wanalyzer-write-to-string-literal): New. + * constraint-manager.cc (class svalue_purger): New. + (constraint_manager::purge_state_involving): New. + * constraint-manager.h + (constraint_manager::purge_state_involving): New. + * diagnostic-manager.cc (saved_diagnostic::supercedes_p): New. + (dedupe_winners::handle_interactions): New. + (diagnostic_manager::emit_saved_diagnostics): Call it. + * diagnostic-manager.h (saved_diagnostic::supercedes_p): New decl. + * engine.cc (impl_region_model_context::warn): Convert return type + to bool. Return false if the diagnostic isn't saved. + (impl_region_model_context::purge_state_involving): New. + (impl_sm_context::get_state): Use NULL ctxt when querying old + rvalue. + (impl_sm_context::set_next_state): Use new sval when querying old + state. + (class dump_path_diagnostic): Move to region-model.cc + (exploded_node::on_stmt): Move to on_stmt_pre and on_stmt_post. + Remove call to purge_state_involving. + (exploded_node::on_stmt_pre): New, based on the above. Move most + of it to region_model::on_stmt_pre. + (exploded_node::on_stmt_post): Likewise, moving to + region_model::on_stmt_post. + (class stale_jmp_buf): Fix parent class to use curiously recurring + template pattern. + (feasibility_state::maybe_update_for_edge): Call on_call_pre and + on_call_post on gcalls. + * exploded-graph.h (impl_region_model_context::warn): Return bool. + (impl_region_model_context::purge_state_involving): New decl. + (exploded_node::on_stmt_pre): New decl. + (exploded_node::on_stmt_post): New decl. + * pending-diagnostic.h (pending_diagnostic::use_of_uninit_p): New. + (pending_diagnostic::supercedes_p): New. + * program-state.cc (sm_state_map::get_state): Inherit state for + conjured_svalue as well as initial_svalue. + (sm_state_map::purge_state_involving): Also support SK_CONJURED. + * region-model-impl-calls.cc (call_details::get_uncertainty): + Handle m_ctxt being NULL. + (call_details::get_or_create_conjured_svalue): New. + (region_model::impl_call_fgets): New. + (region_model::impl_call_fread): New. + * region-model-manager.cc + (region_model_manager::get_or_create_initial_value): Return an + uninitialized poisoned value for regions that can't have initial + values. + * region-model-reachability.cc + (reachable_regions::mark_escaped_clusters): Handle ctxt being + NULL. + * region-model.cc (region_to_value_map::purge_state_involving): New. + (poisoned_value_diagnostic::use_of_uninit_p): New. + (poisoned_value_diagnostic::emit): Handle POISON_KIND_UNINIT. + (poisoned_value_diagnostic::describe_final_event): Likewise. + (region_model::check_for_poison): New. + (region_model::on_assignment): Call it. + (class dump_path_diagnostic): Move here from engine.cc. + (region_model::on_stmt_pre): New, based on exploded_node::on_stmt. + (region_model::on_call_pre): Move the setting of the LHS to a + conjured svalue to before the checks for specific functions. + Handle "fgets", "fgets_unlocked", and "fread". + (region_model::purge_state_involving): New. + (region_model::handle_unrecognized_call): Handle ctxt being NULL. + (region_model::get_rvalue): Call check_for_poison. + (selftest::test_stack_frames): Use NULL for context when getting + uninitialized rvalue. + (selftest::test_alloca): Likewise. + * region-model.h (region_to_value_map::purge_state_involving): New + decl. + (call_details::get_or_create_conjured_svalue): New decl. + (region_model::on_stmt_pre): New decl. + (region_model::purge_state_involving): New decl. + (region_model::impl_call_fgets): New decl. + (region_model::impl_call_fread): New decl. + (region_model::check_for_poison): New decl. + (region_model_context::warn): Return bool. + (region_model_context::purge_state_involving): New. + (noop_region_model_context::warn): Return bool. + (noop_region_model_context::purge_state_involving): New. + (test_region_model_context:: warn): Return bool. + * region.cc (region::get_memory_space): New. + (region::can_have_initial_svalue_p): New. + (region::involves_p): New. + * region.h (enum memory_space): New. + (region::get_memory_space): New decl. + (region::can_have_initial_svalue_p): New decl. + (region::involves_p): New decl. + * sm-malloc.cc (use_after_free::supercedes_p): New. + * store.cc (binding_cluster::purge_state_involving): New. + (store::purge_state_involving): New. + * store.h (class symbolic_binding): New forward decl. + (binding_key::dyn_cast_symbolic_binding): New. + (symbolic_binding::dyn_cast_symbolic_binding): New. + (binding_cluster::purge_state_involving): New. + (store::purge_state_involving): New. + * svalue.cc (svalue::can_merge_p): Reject attempts to merge + poisoned svalues with other svalues, so that we identify + paths in which a variable is conditionally uninitialized. + (involvement_visitor::visit_conjured_svalue): New. + (svalue::involves_p): Also handle SK_CONJURED. + (poison_kind_to_str): Handle POISON_KIND_UNINIT. + (poisoned_svalue::maybe_fold_bits_within): New. + * svalue.h (enum poison_kind): Add POISON_KIND_UNINIT. + (poisoned_svalue::maybe_fold_bits_within): New decl. + +2021-07-15 David Malcolm + + * analyzer.opt (fdump-analyzer-exploded-paths): New. + * diagnostic-manager.cc + (diagnostic_manager::emit_saved_diagnostic): Implement it. + * engine.cc (exploded_path::dump_to_pp): Add ext_state param and + use it to dump states if non-NULL. + (exploded_path::dump): Likewise. + (exploded_path::dump_to_file): New. + * exploded-graph.h (exploded_path::dump_to_pp): Add ext_state + param. + (exploded_path::dump): Likewise. + (exploded_path::dump): Likewise. + (exploded_path::dump_to_file): New. + +2021-07-15 David Malcolm + + * analyzer.cc (fixup_tree_for_diagnostic_1): Use DECL_DEBUG_EXPR + if it's available. + * engine.cc (readability): Likewise. + +2021-07-15 David Malcolm + + * state-purge.cc (self_referential_phi_p): New. + (state_purge_per_ssa_name::process_point): Don't purge an SSA name + at its def-stmt if the def-stmt is self-referential. + 2021-07-07 David Malcolm * diagnostic-manager.cc (null_assignment_sm_context::get_state): diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index f98bf2b..817f4c4 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,9 @@ +2021-07-15 Martin Sebor + + PR c/101289 + PR c/97548 + * c-warn.c (warn_parm_array_mismatch): Use OEP_DECL_NAME. + 2021-07-14 Jason Merrill * c-opts.c (c_common_post_options): Set -fdelete-dead-exceptions. diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index ef6f4ad..a190806 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,9 @@ +2021-07-15 Martin Sebor + + PR c/101289 + PR c/97548 + * c-decl.c (get_parm_array_spec): Strip nops. + 2021-07-06 Martin Sebor * c-objc-common.c (c_tree_printer): Remove support for %G and %K. diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index a80d236..0e2139a 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,15 @@ +2021-07-15 Jakub Jelinek + + PR c++/101443 + * cp-gimplify.c (cp_fold): For comparisons with NULLPTR_TYPE + operands, fold them right away to true or false. + +2021-07-15 Jason Merrill + + PR c++/101095 + * cp-objcp-common.c (cp_common_init_ts): Mark types as types. + (cp_tree_size): Remove redundant entries. + 2021-07-14 Patrick Palka PR c++/88252 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 944639a..7a9a29e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,128 @@ +2021-07-15 Uroš Bizjak + + PR target/101346 + * gcc.target/i386/pr101346.c: New test. + +2021-07-15 Martin Sebor + + PR middle-end/97027 + * gcc.dg/Wstringop-overflow-15.c: Remove an xfail. + * gcc.dg/Wstringop-overflow-47.c: Adjust xfails. + * gcc.dg/torture/pr69170.c: Avoid valid warnings. + * gcc.dg/torture/pr70025.c: Prune out a false positive. + * gcc.dg/vect/pr97769.c: Initialize a loop control variable. + * gcc.target/i386/pr92658-avx512bw-trunc.c: Increase buffer size + to avoid overflow. + * gcc.target/i386/pr92658-avx512f.c: Same. + * gcc.dg/Wstringop-overflow-68.c: New test. + * gcc.dg/Wstringop-overflow-69.c: New test. + * gcc.dg/Wstringop-overflow-70.c: New test. + * gcc.dg/Wstringop-overflow-71.c: New test. + * gcc.dg/strlenopt-95.c: New test. + +2021-07-15 David Malcolm + + PR analyzer/95006 + PR analyzer/94713 + PR analyzer/94714 + * g++.dg/analyzer/pr93212.C: Update location of warning. + * g++.dg/analyzer/pr94011.C: Add + -Wno-analyzer-use-of-uninitialized-value. + * g++.dg/analyzer/pr94503.C: Likewise. + * gcc.dg/analyzer/clobbers-1.c: Convert "f" from a local to a + param to avoid uninitialized warning. + * gcc.dg/analyzer/data-model-1.c (test_12): Add test for + uninitialized value on result of alloca. + (test_12a): Add expected warning. + (test_12c): Likewise. + (test_19): Likewise. + (test_29b): Likewise. + (test_29c): Likewise. + (test_37): Remove xfail. + (test_37a): Likewise. + * gcc.dg/analyzer/data-model-20.c: Add warning about leak. + * gcc.dg/analyzer/explode-2.c: Remove params; add + -Wno-analyzer-too-complex, -Wno-analyzer-malloc-leak, and xfails. + Initialize the locals. + * gcc.dg/analyzer/explode-2a.c: Initialize the locals. Add + expected leak. + * gcc.dg/analyzer/fgets-1.c: New test. + * gcc.dg/analyzer/fread-1.c: New test. + * gcc.dg/analyzer/malloc-1.c (test_16): Add expected warning. + (test_40): Likewise. + * gcc.dg/analyzer/memset-CVE-2017-18549-1.c: Check for + uninitialized padding. + * gcc.dg/analyzer/pr93355-localealias-feasibility.c (fread): New + decl. + (read_alias_file): Call it. + * gcc.dg/analyzer/pr94047.c: Add expected warnings. + * gcc.dg/analyzer/pr94851-2.c: Likewise. + * gcc.dg/analyzer/pr96841.c: Convert local to a param. + * gcc.dg/analyzer/pr98628.c: Likewise. + * gcc.dg/analyzer/pr99042.c: Updated expected location of leak + diagnostics. + * gcc.dg/analyzer/symbolic-1.c: Add expected warnings. + * gcc.dg/analyzer/symbolic-7.c: Likewise. + * gcc.dg/analyzer/torture/pr93649.c: Add expected warning. Skip + with -fno-fat-lto-objects. + * gcc.dg/analyzer/uninit-1.c: New test. + * gcc.dg/analyzer/uninit-2.c: New test. + * gcc.dg/analyzer/uninit-3.c: New test. + * gcc.dg/analyzer/uninit-4.c: New test. + * gcc.dg/analyzer/uninit-pr94713.c: New test. + * gcc.dg/analyzer/uninit-pr94714.c: New test. + * gcc.dg/analyzer/use-after-free-2.c: New test. + * gcc.dg/analyzer/use-after-free-3.c: New test. + * gcc.dg/analyzer/zlib-3.c: Add expected warning. + * gcc.dg/analyzer/zlib-6.c: Convert locals to params to avoid + uninitialized warnings. Remove xfail. + * gcc.dg/analyzer/zlib-6a.c: New test, based on the old version + of the above. + * gfortran.dg/analyzer/pr97668.f: Add + -Wno-analyzer-use-of-uninitialized-value and + -Wno-analyzer-too-complex. + +2021-07-15 David Malcolm + + * gcc.dg/analyzer/phi-1.c: New test. + +2021-07-15 Christophe Lyon + + * lib/target-supports.exp (arm_v8_2a_imm8_neon_ok_nocache): + Delete. + +2021-07-15 Christophe Lyon + + * gcc.target/arm/simd/vusdot-autovec.c: Use arm_v8_2a_i8mm_ok + effective-target. + +2021-07-15 Jakub Jelinek + + PR c++/101443 + * g++.dg/cpp0x/nullptr46.C: New test. + +2021-07-15 Tamar Christina + + PR middle-end/101457 + * gcc.dg/vect/vect-reduc-dot-17.c: Fix signs of scalar code. + * gcc.dg/vect/vect-reduc-dot-18.c: Likewise. + * gcc.dg/vect/vect-reduc-dot-22.c: Likewise. + * gcc.dg/vect/vect-reduc-dot-9.c: Likewise. + +2021-07-15 Martin Sebor + + * gcc.dg/Wvla-parameter-12.c: New test. + +2021-07-15 Bill Schmidt + + PR target/101129 + * gcc.target/powerpc/pr101129.c: New. + +2021-07-15 Jakub Jelinek + + PR middle-end/101437 + * gcc.c-torture/compile/pr101437.c: New test. + 2021-07-14 Peter Bergner * gcc.target/powerpc/mma-builtin-9.c: New test. -- cgit v1.1 From a314d50336db752f2ae2c50262956ce4490567ac Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 15 Jul 2021 14:30:48 +0200 Subject: Disable --param vect-partial-vector-usage by default on x86 The following defaults --param vect-partial-vector-usage to zero for x86_64 matching existing behavior where support for this is not present. 2021-07-15 Richard Biener * config/i386/i386-options.c (ix86_option_override_internal): Set param_vect_partial_vector_usage to zero if not set. --- gcc/config/i386/i386-options.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'gcc') diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index 7cba655..3416a4f 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -2834,6 +2834,11 @@ ix86_option_override_internal (bool main_args_p, SET_OPTION_IF_UNSET (opts, opts_set, param_ira_consider_dup_in_all_alts, 0); + /* Fully masking the main or the epilogue vectorized loop is not + profitable generally so leave it disabled until we get more + fine grained control & costing. */ + SET_OPTION_IF_UNSET (opts, opts_set, param_vect_partial_vector_usage, 0); + return true; } -- cgit v1.1 From d6aa28bb93c6fc9042ee87ff7addac60647dbddb Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 16 Jul 2021 08:50:40 +0200 Subject: tree-optimization/101462 - fix signedness of reused reduction vector This fixes the partial reduction of the reused reduction vector to carried out in the correct sign and the correctly signed vector recorded for the skip edge use. 2021-07-16 Richard Biener * tree-vect-loop.c (vect_transform_cycle_phi): Correct sign conversion issues with the partial reduction of the reused vector accumulator. --- gcc/tree-vect-loop.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index fc3dab0..00a57b2 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -7706,21 +7706,35 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, if (auto *accumulator = reduc_info->reused_accumulator) { tree def = accumulator->reduc_input; - unsigned int nreduc; - bool res = constant_multiple_p (TYPE_VECTOR_SUBPARTS (TREE_TYPE (def)), - TYPE_VECTOR_SUBPARTS (vectype_out), - &nreduc); - gcc_assert (res); - if (nreduc != 1) - { - /* Reduce the single vector to a smaller one. */ + if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def))) + { + unsigned int nreduc; + bool res = constant_multiple_p (TYPE_VECTOR_SUBPARTS + (TREE_TYPE (def)), + TYPE_VECTOR_SUBPARTS (vectype_out), + &nreduc); + gcc_assert (res); gimple_seq stmts = NULL; - def = vect_create_partial_epilog (def, vectype_out, - STMT_VINFO_REDUC_CODE (reduc_info), - &stmts); + /* Reduce the single vector to a smaller one. */ + if (nreduc != 1) + { + /* Perform the reduction in the appropriate type. */ + tree rvectype = vectype_out; + if (!useless_type_conversion_p (TREE_TYPE (vectype_out), + TREE_TYPE (TREE_TYPE (def)))) + rvectype = build_vector_type (TREE_TYPE (TREE_TYPE (def)), + TYPE_VECTOR_SUBPARTS + (vectype_out)); + def = vect_create_partial_epilog (def, rvectype, + STMT_VINFO_REDUC_CODE + (reduc_info), + &stmts); + } /* Adjust the input so we pick up the partially reduced value for the skip edge in vect_create_epilog_for_reduction. */ accumulator->reduc_input = def; + if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def))) + def = gimple_convert (&stmts, vectype_out, def); if (loop_vinfo->main_loop_edge) { /* While we'd like to insert on the edge this will split -- cgit v1.1 From 2f11ca2a3a3bea38a7c5bd63e777620a4887e649 Mon Sep 17 00:00:00 2001 From: Cooper Qu Date: Fri, 16 Jul 2021 16:05:39 +0800 Subject: C-SKY: Use the common way to define MULTILIB_DIRNAMES. C-SKY previously used a forked print-sysroot-suffix.sh and define CSKY_MULTILIB_DIRNAMES to specify OS multilib directories. This patch delete the forked print-sysroot-suffix.sh and define MULTILIB_DIRNAMES to generate same directories. gcc/ * config.gcc: Don't use forked print-sysroot-suffix.sh and t-sysroot-suffix for C-SKY. * config/csky/print-sysroot-suffix.sh: Delete. * config/csky/t-csky-linux: Delete. * config/csky/t-sysroot-suffix: Define MULTILIB_DIRNAMES instead of CSKY_MULTILIB_DIRNAMES. --- gcc/config.gcc | 5 -- gcc/config/csky/print-sysroot-suffix.sh | 147 -------------------------------- gcc/config/csky/t-csky-linux | 2 +- gcc/config/csky/t-sysroot-suffix | 28 ------ 4 files changed, 1 insertion(+), 181 deletions(-) delete mode 100644 gcc/config/csky/print-sysroot-suffix.sh delete mode 100644 gcc/config/csky/t-sysroot-suffix (limited to 'gcc') diff --git a/gcc/config.gcc b/gcc/config.gcc index f3e94f7..93e2b32 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -1568,11 +1568,6 @@ csky-*-*) tm_file="dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h ${tm_file} csky/csky-linux-elf.h" tmake_file="${tmake_file} csky/t-csky csky/t-csky-linux" - if test "x${enable_multilib}" = xyes ; then - tm_file="$tm_file ./sysroot-suffix.h" - tmake_file="${tmake_file} csky/t-sysroot-suffix" - fi - case ${target} in csky-*-linux-gnu*) tm_defines="$tm_defines DEFAULT_LIBC=LIBC_GLIBC" diff --git a/gcc/config/csky/print-sysroot-suffix.sh b/gcc/config/csky/print-sysroot-suffix.sh deleted file mode 100644 index 4840bc6..0000000 --- a/gcc/config/csky/print-sysroot-suffix.sh +++ /dev/null @@ -1,147 +0,0 @@ -#! /bin/sh -# Script to generate SYSROOT_SUFFIX_SPEC equivalent to MULTILIB_OSDIRNAMES -# Arguments are MULTILIB_OSDIRNAMES, MULTILIB_OPTIONS and MULTILIB_MATCHES. - -# Copyright (C) 2018-2021 Free Software Foundation, Inc. -# Contributed by C-SKY Microsystems and Mentor Graphics. - -# This file is part of GCC. - -# GCC is free software; you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free -# Software Foundation; either version 3, or (at your option) any later -# version. - -# GCC is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# for more details. - -# You should have received a copy of the GNU General Public License -# along with GCC; see the file COPYING3. If not see -# . - -# This shell script produces a header file fragment that defines -# SYSROOT_SUFFIX_SPEC. It assumes that the sysroots will have the same -# structure and names used by the multilibs. - -# Invocation: -# print-sysroot-suffix.sh \ -# MULTILIB_OSDIRNAMES \ -# MULTILIB_OPTIONS \ -# MULTILIB_MATCHES \ -# > t-sysroot-suffix.h - -# The three options exactly correspond to the variables of the same -# names defined in the tmake_file fragments. - -# Example: -# sh ./gcc/config/print-sysroot-suffix.sh "a=A" "a b/c/d" "" -# => -# #undef SYSROOT_SUFFIX_SPEC -# #define SYSROOT_SUFFIX_SPEC "" \ -# "%{a:" \ -# "%{b:A/b/;" \ -# "c:A/c/;" \ -# "d:A/d/;" \ -# ":A/};" \ -# ":}" - -# The script uses temporary subscripts in order to permit a recursive -# algorithm without the use of functions. - -set -e - -dirnames="$1" -options="$2" -matches="$3" - -cat > print-sysroot-suffix3.sh <<\EOF -#! /bin/sh -# Print all the multilib matches for this option -result="$1" -EOF -for x in $matches; do - l=`echo $x | sed -e 's/=.*$//' -e 's/?/=/g'` - r=`echo $x | sed -e 's/^.*=//' -e 's/?/=/g'` - echo "[ \"\$1\" = \"$l\" ] && result=\"\$result|$r\"" >> print-sysroot-suffix3.sh -done -echo 'echo $result' >> print-sysroot-suffix3.sh -chmod +x print-sysroot-suffix3.sh - -cat > print-sysroot-suffix2.sh <<\EOF -#! /bin/sh -# Recursive script to enumerate all multilib combinations, match against -# multilib directories and output a spec string of the result. -# Will fold identical trees. - -padding="$1" -optstring="$2" -shift 2 -n="\" \\ -$padding\"" -if [ $# = 0 ]; then -EOF - -pat= -for x in $dirnames; do -# p=`echo $x | sed -e 's,=!,/$=/,'` - p=`echo $x | sed -e 's/=//g'` -# pat="$pat -e 's=^//$p='" - pat="$pat -e 's/$p/g'" -done -echo ' optstring=`echo "/$optstring" | sed '"$pat\`" >> print-sysroot-suffix2.sh -cat >> print-sysroot-suffix2.sh <<\EOF - case $optstring in - //*) - ;; - *) - echo "$optstring" - ;; - esac -else - thisopt="$1" - shift - bit= - lastcond= - result= - for x in `echo "$thisopt" | sed -e 's,/, ,g'`; do - case $x in -EOF -for x in `echo "$options" | sed -e 's,/, ,g'`; do - match=`./print-sysroot-suffix3.sh "$x"` - echo "$x) optmatch=\"$match\" ;;" >> print-sysroot-suffix2.sh -done -cat >> print-sysroot-suffix2.sh <<\EOF - esac - bit=`"$0" "$padding " "$optstring$x/" "$@"` - if [ -z "$lastopt" ]; then - lastopt="$optmatch" - else - if [ "$lastbit" = "$bit" ]; then - lastopt="$lastopt|$optmatch" - else - result="$result$lastopt:$lastbit;$n" - lastopt="$optmatch" - fi - fi - lastbit="$bit" - done - bit=`"$0" "$padding " "$optstring" "$@"` - if [ "$bit" = "$lastbit" ]; then - if [ -z "$result" ]; then - echo "$bit" - else - echo "$n%{$result:$bit}" - fi - else - echo "$n%{$result$lastopt:$lastbit;$n:$bit}" - fi -fi -EOF -chmod +x ./print-sysroot-suffix2.sh -result=`./print-sysroot-suffix2.sh \"\" \"\" $options` -echo "#undef SYSROOT_SUFFIX_SPEC" -echo "#define SYSROOT_SUFFIX_SPEC \"$result\"" -rm print-sysroot-suffix2.sh -rm print-sysroot-suffix3.sh diff --git a/gcc/config/csky/t-csky-linux b/gcc/config/csky/t-csky-linux index 0730c3a..9139040 100644 --- a/gcc/config/csky/t-csky-linux +++ b/gcc/config/csky/t-csky-linux @@ -21,7 +21,7 @@ MULTILIB_EXCEPTIONS = -CSKY_MULTILIB_OSDIRNAMES = mfloat-abi.softfp=/soft-fp mfloat-abi.hard=/hard-fp mfloat-abi.soft=/. mcpu.ck810f=/. mcpu.ck807f=/ck807 mcpu.ck860f=/ck860 +MULTILIB_OSDIRNAMES = ./ ./ck807 ./ck860 ./ ./soft-fp ./hard-fp # Arch variants. MULTILIB_OPTIONS += mcpu=ck810f/mcpu=ck807f/mcpu=ck860f diff --git a/gcc/config/csky/t-sysroot-suffix b/gcc/config/csky/t-sysroot-suffix deleted file mode 100644 index d891f69..0000000 --- a/gcc/config/csky/t-sysroot-suffix +++ /dev/null @@ -1,28 +0,0 @@ -# Makefile fragment for C-SKY sysroot suffix. -# -# Copyright (C) 2018-2021 Free Software Foundation, Inc. -# Contributed by C-SKY Microsystems and Mentor Graphics. -# -# This file is part of GCC. -# -# GCC is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GCC is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GCC; see the file COPYING3. If not see -# . - -# Generate SYSROOT_SUFFIX_SPEC from MULTILIB_OSDIRNAMES. - -sysroot-suffix.h: $(srcdir)/config/csky/print-sysroot-suffix.sh - $(SHELL) $(srcdir)/config/csky/print-sysroot-suffix.sh \ - "$(CSKY_MULTILIB_OSDIRNAMES)" "$(MULTILIB_OPTIONS)" \ - "$(MULTILIB_MATCHES)" > tmp-sysroot-suffix.h - mv tmp-sysroot-suffix.h $@ -- cgit v1.1 From fc58c49ed92d499e7d1d11ecac8f17a3461b20a9 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Thu, 15 Jul 2021 13:27:04 +0100 Subject: testsuite: aarch64: Fix failing SVE tests on big endian A recent change "gcc: Add vec_select -> subreg RTL simplification" updated the expected test results for SVE extraction tests. The new result should only have been changed for little endian. This patch restores the old expected result for big endian. gcc/testsuite/ChangeLog: 2021-07-15 Jonathan Wright * gcc.target/aarch64/sve/extract_1.c: Split expected results by big/little endian targets, restoring the old expected result for big endian. * gcc.target/aarch64/sve/extract_2.c: Likewise. * gcc.target/aarch64/sve/extract_3.c: Likewise. * gcc.target/aarch64/sve/extract_4.c: Likewise. --- gcc/testsuite/gcc.target/aarch64/sve/extract_1.c | 10 ++++++++-- gcc/testsuite/gcc.target/aarch64/sve/extract_2.c | 10 ++++++++-- gcc/testsuite/gcc.target/aarch64/sve/extract_3.c | 10 ++++++++-- gcc/testsuite/gcc.target/aarch64/sve/extract_4.c | 10 ++++++++-- 4 files changed, 32 insertions(+), 8 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c index 1a926db..7d76c98 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c @@ -56,7 +56,10 @@ typedef _Float16 vnx8hf __attribute__((vector_size (32))); TEST_ALL (EXTRACT) -/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]\n} 2 { + target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 { + target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ @@ -64,7 +67,10 @@ TEST_ALL (EXTRACT) /* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ /* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, s[0-9]\n} 2 { + target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.s\[0\]\n} 1 { + target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c index 1c54d10..a2644ce 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c @@ -56,7 +56,10 @@ typedef _Float16 vnx16hf __attribute__((vector_size (64))); TEST_ALL (EXTRACT) -/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]\n} 2 { + target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 { + target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ @@ -64,7 +67,10 @@ TEST_ALL (EXTRACT) /* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ /* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, s[0-9]\n} 2 { + target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.s\[0\]\n} 1 { + target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c index 501b9f3..baa5459 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c @@ -77,7 +77,10 @@ typedef _Float16 vnx32hf __attribute__((vector_size (128))); TEST_ALL (EXTRACT) -/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]\n} 5 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]\n} 5 { + target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 { + target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ @@ -86,7 +89,10 @@ TEST_ALL (EXTRACT) /* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ /* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]\n} 5 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, s[0-9]\n} 5 { + target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.s\[0\]\n} 1 { + target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c index 94d3155..aa6fe48 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c @@ -84,7 +84,10 @@ typedef _Float16 v128hf __attribute__((vector_size (256))); TEST_ALL (EXTRACT) -/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]\n} 6 { + target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 { + target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ @@ -93,7 +96,10 @@ TEST_ALL (EXTRACT) /* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ /* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, s[0-9]\n} 6 { + target { aarch64_little_endian } } } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.s\[0\]\n} 1 { + target { aarch64_big_endian } } } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ -- cgit v1.1 From df0d7486ec9bca8a77ca106d9fbb60f819dd9cec Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 16 Jul 2021 11:17:37 +0200 Subject: tree-optimization/101467 - fix make_temp_ssa_name usage My previous change to vect_gen_while introduced paths which call make_temp_ssa_name with a NULL name which isn't supported. The following fixes that. 2021-07-16 Richard Biener PR tree-optimization/101467 * tree-vect-stmts.c (vect_gen_while): Properly guard make_temp_ssa_name usage. --- gcc/tree-vect-stmts.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index ec82acb..0ef4696 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -11999,7 +11999,11 @@ vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index, gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3, start_index, end_index, build_zero_cst (mask_type)); - tree tmp = make_temp_ssa_name (mask_type, NULL, name); + tree tmp; + if (name) + tmp = make_temp_ssa_name (mask_type, NULL, name); + else + tmp = make_ssa_name (mask_type); gimple_call_set_lhs (call, tmp); gimple_seq_add_stmt (seq, call); return tmp; -- cgit v1.1 From 0990d93dd8a4268bff5bbe48aa26748cf63201c7 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 7 Jun 2021 13:44:15 +0200 Subject: IBM Z: Use @PLT symbols for local functions in 64-bit mode This helps with generating code for kernel hotpatches, which contain individual functions and are loaded more than 2G away from vmlinux. This should not create performance regressions for the normal use cases, because for local functions ld replaces @PLT calls with direct calls. gcc/ChangeLog: * config/s390/predicates.md (bras_sym_operand): Accept all functions in 64-bit mode, use UNSPEC_PLT31. (larl_operand): Use UNSPEC_PLT31. * config/s390/s390.c (s390_loadrelative_operand_p): Likewise. (legitimize_pic_address): Likewise. (s390_emit_tls_call_insn): Mark __tls_get_offset as function, use UNSPEC_PLT31. (s390_delegitimize_address): Use UNSPEC_PLT31. (s390_output_addr_const_extra): Likewise. (print_operand): Add @PLT to TLS calls, handle %K. (s390_function_profiler): Mark __fentry__/_mcount as function, use %K, use UNSPEC_PLT31. (s390_output_mi_thunk): Use only UNSPEC_GOT, use %K. (s390_emit_call): Use UNSPEC_PLT31. (s390_emit_tpf_eh_return): Mark __tpf_eh_return as function. * config/s390/s390.md (UNSPEC_PLT31): Rename from UNSPEC_PLT. (*movdi_64): Use %K. (reload_base_64): Likewise. (*sibcall_brc): Likewise. (*sibcall_brcl): Likewise. (*sibcall_value_brc): Likewise. (*sibcall_value_brcl): Likewise. (*bras): Likewise. (*brasl): Likewise. (*bras_r): Likewise. (*brasl_r): Likewise. (*bras_tls): Likewise. (*brasl_tls): Likewise. (main_base_64): Likewise. (reload_base_64): Likewise. (@split_stack_call): Likewise. gcc/testsuite/ChangeLog: * g++.dg/ext/visibility/noPLT.C: Skip on s390x. * g++.target/s390/mi-thunk.C: New test. * gcc.target/s390/nodatarel-1.c: Move foostatic to the new tests. * gcc.target/s390/pr80080-4.c: Allow @PLT suffix. * gcc.target/s390/risbg-ll-3.c: Likewise. * gcc.target/s390/call.h: Common code for the new tests. * gcc.target/s390/call-z10-pic-nodatarel.c: New test. * gcc.target/s390/call-z10-pic.c: New test. * gcc.target/s390/call-z10.c: New test. * gcc.target/s390/call-z9-pic-nodatarel.c: New test. * gcc.target/s390/call-z9-pic.c: New test. * gcc.target/s390/call-z9.c: New test. * gcc.target/s390/mfentry-m64-pic.c: New test. * gcc.target/s390/tls.h: Common code for the new TLS tests. * gcc.target/s390/tls-pic.c: New test. * gcc.target/s390/tls.c: New test. --- gcc/config/s390/predicates.md | 9 ++- gcc/config/s390/s390.c | 81 +++++++++++++++------- gcc/config/s390/s390.md | 32 ++++----- gcc/testsuite/g++.dg/ext/visibility/noPLT.C | 2 +- gcc/testsuite/g++.target/s390/mi-thunk.C | 23 ++++++ .../gcc.target/s390/call-z10-pic-nodatarel.c | 20 ++++++ gcc/testsuite/gcc.target/s390/call-z10-pic.c | 20 ++++++ gcc/testsuite/gcc.target/s390/call-z10.c | 20 ++++++ .../gcc.target/s390/call-z9-pic-nodatarel.c | 18 +++++ gcc/testsuite/gcc.target/s390/call-z9-pic.c | 18 +++++ gcc/testsuite/gcc.target/s390/call-z9.c | 20 ++++++ gcc/testsuite/gcc.target/s390/call.h | 40 +++++++++++ gcc/testsuite/gcc.target/s390/mfentry-m64-pic.c | 9 +++ gcc/testsuite/gcc.target/s390/nodatarel-1.c | 26 +------ gcc/testsuite/gcc.target/s390/pr80080-4.c | 2 +- gcc/testsuite/gcc.target/s390/risbg-ll-3.c | 6 +- gcc/testsuite/gcc.target/s390/tls-pic.c | 14 ++++ gcc/testsuite/gcc.target/s390/tls.c | 10 +++ gcc/testsuite/gcc.target/s390/tls.h | 23 ++++++ 19 files changed, 320 insertions(+), 73 deletions(-) create mode 100644 gcc/testsuite/g++.target/s390/mi-thunk.C create mode 100644 gcc/testsuite/gcc.target/s390/call-z10-pic-nodatarel.c create mode 100644 gcc/testsuite/gcc.target/s390/call-z10-pic.c create mode 100644 gcc/testsuite/gcc.target/s390/call-z10.c create mode 100644 gcc/testsuite/gcc.target/s390/call-z9-pic-nodatarel.c create mode 100644 gcc/testsuite/gcc.target/s390/call-z9-pic.c create mode 100644 gcc/testsuite/gcc.target/s390/call-z9.c create mode 100644 gcc/testsuite/gcc.target/s390/call.h create mode 100644 gcc/testsuite/gcc.target/s390/mfentry-m64-pic.c create mode 100644 gcc/testsuite/gcc.target/s390/tls-pic.c create mode 100644 gcc/testsuite/gcc.target/s390/tls.c create mode 100644 gcc/testsuite/gcc.target/s390/tls.h (limited to 'gcc') diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md index 15093cb..99c343a 100644 --- a/gcc/config/s390/predicates.md +++ b/gcc/config/s390/predicates.md @@ -101,10 +101,13 @@ (define_special_predicate "bras_sym_operand" (ior (and (match_code "symbol_ref") - (match_test "!flag_pic || SYMBOL_REF_LOCAL_P (op)")) + (ior (match_test "!flag_pic") + (match_test "SYMBOL_REF_LOCAL_P (op)") + (and (match_test "TARGET_64BIT") + (match_test "SYMBOL_REF_FUNCTION_P (op)")))) (and (match_code "const") (and (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC") - (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_PLT"))))) + (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_PLT31"))))) ;; Return true if OP is a PLUS that is not a legitimate ;; operand for the LA instruction. @@ -197,7 +200,7 @@ && XINT (op, 1) == UNSPEC_GOTENT) return true; if (GET_CODE (op) == UNSPEC - && XINT (op, 1) == UNSPEC_PLT) + && XINT (op, 1) == UNSPEC_PLT31) return true; if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_INDNTPOFF) diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 800e0ab..b1d3b99 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -3291,7 +3291,7 @@ s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend) if (GET_CODE (addr) == SYMBOL_REF || (GET_CODE (addr) == UNSPEC && (XINT (addr, 1) == UNSPEC_GOTENT - || XINT (addr, 1) == UNSPEC_PLT))) + || XINT (addr, 1) == UNSPEC_PLT31))) { if (symref) *symref = addr; @@ -4964,7 +4964,7 @@ legitimize_pic_address (rtx orig, rtx reg) || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr)) || (GET_CODE (addr) == UNSPEC && (XINT (addr, 1) == UNSPEC_GOTENT - || XINT (addr, 1) == UNSPEC_PLT))) + || XINT (addr, 1) == UNSPEC_PLT31))) && GET_CODE (addend) == CONST_INT) { /* This can be locally addressed. */ @@ -5125,7 +5125,7 @@ legitimize_pic_address (rtx orig, rtx reg) /* For @PLT larl is used. This is handled like local symbol refs. */ - case UNSPEC_PLT: + case UNSPEC_PLT31: gcc_unreachable (); break; @@ -5191,7 +5191,10 @@ s390_emit_tls_call_insn (rtx result_reg, rtx tls_call) emit_insn (s390_load_got ()); if (!s390_tls_symbol) - s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset"); + { + s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset"); + SYMBOL_REF_FLAGS (s390_tls_symbol) |= SYMBOL_FLAG_FUNCTION; + } insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg, gen_rtx_REG (Pmode, RETURN_REGNUM)); @@ -7596,7 +7599,7 @@ s390_delegitimize_address (rtx orig_x) y = XEXP (x, 0); if (GET_CODE (y) == UNSPEC && (XINT (y, 1) == UNSPEC_GOTENT - || XINT (y, 1) == UNSPEC_PLT)) + || XINT (y, 1) == UNSPEC_PLT31)) y = XVECEXP (y, 0, 0); else return orig_x; @@ -7849,7 +7852,7 @@ s390_output_addr_const_extra (FILE *file, rtx x) output_addr_const (file, XVECEXP (x, 0, 0)); fprintf (file, "@GOTOFF"); return true; - case UNSPEC_PLT: + case UNSPEC_PLT31: output_addr_const (file, XVECEXP (x, 0, 0)); fprintf (file, "@PLT"); return true; @@ -7943,6 +7946,7 @@ print_operand_address (FILE *file, rtx addr) 'E': print opcode suffix for branch on index instruction. 'G': print the size of the operand in bytes. 'J': print tls_load/tls_gdcall/tls_ldcall suffix + 'K': print @PLT suffix for call targets and load address values. 'M': print the second word of a TImode operand. 'N': print the second word of a DImode operand. 'O': print only the displacement of a memory reference or address. @@ -8129,6 +8133,29 @@ print_operand (FILE *file, rtx x, int code) case 'Y': print_shift_count_operand (file, x); return; + + case 'K': + /* Append @PLT to both local and non-local symbols in order to support + Linux Kernel livepatching: patches contain individual functions and + are loaded further than 2G away from vmlinux, and therefore they must + call even static functions via PLT. ld will optimize @PLT away for + normal code, and keep it for patches. + + Do not indiscriminately add @PLT in 31-bit mode due to the %r12 + restriction, use UNSPEC_PLT31 instead. + + @PLT only makes sense for functions, data is taken care of by + -mno-pic-data-is-text-relative. + + Adding @PLT interferes with handling of weak symbols in non-PIC code, + since their addresses are loaded with larl, which then always produces + a non-NULL result, so skip them here as well. */ + if (TARGET_64BIT + && GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_FUNCTION_P (x) + && !(SYMBOL_REF_WEAK (x) && !flag_pic)) + fprintf (file, "@PLT"); + return; } switch (GET_CODE (x)) @@ -13125,9 +13152,10 @@ s390_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) op[3] = GEN_INT (UNITS_PER_LONG); op[2] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount"); - if (flag_pic) + SYMBOL_REF_FLAGS (op[2]) |= SYMBOL_FLAG_FUNCTION; + if (flag_pic && !TARGET_64BIT) { - op[2] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[2]), UNSPEC_PLT); + op[2] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[2]), UNSPEC_PLT31); op[2] = gen_rtx_CONST (Pmode, op[2]); } @@ -13142,7 +13170,7 @@ s390_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) warning (OPT_Wcannot_profile, "nested functions cannot be profiled " "with %<-mfentry%> on s390"); else - output_asm_insn ("brasl\t0,%2", op); + output_asm_insn ("brasl\t0,%2%K2", op); } else if (TARGET_64BIT) { @@ -13154,7 +13182,7 @@ s390_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) output_asm_insn ("stg\t%0,%1", op); if (flag_dwarf2_cfi_asm) output_asm_insn (".cfi_rel_offset\t%0,%3", op); - output_asm_insn ("brasl\t%0,%2", op); + output_asm_insn ("brasl\t%0,%2%K2", op); output_asm_insn ("lg\t%0,%1", op); if (flag_dwarf2_cfi_asm) output_asm_insn (".cfi_restore\t%0", op); @@ -13170,7 +13198,7 @@ s390_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) output_asm_insn ("st\t%0,%1", op); if (flag_dwarf2_cfi_asm) output_asm_insn (".cfi_rel_offset\t%0,%3", op); - output_asm_insn ("brasl\t%0,%2", op); + output_asm_insn ("brasl\t%0,%2%K2", op); output_asm_insn ("l\t%0,%1", op); if (flag_dwarf2_cfi_asm) output_asm_insn (".cfi_restore\t%0", op); @@ -13246,9 +13274,11 @@ s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0])) { nonlocal = 1; - op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]), - TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT); - op[0] = gen_rtx_CONST (Pmode, op[0]); + if (!TARGET_64BIT) + { + op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]), UNSPEC_GOT); + op[0] = gen_rtx_CONST (Pmode, op[0]); + } } /* Operand 1 is the 'this' pointer. */ @@ -13338,7 +13368,7 @@ s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, } /* Jump to target. */ - output_asm_insn ("jg\t%0", op); + output_asm_insn ("jg\t%0%K0", op); /* Output literal pool if required. */ if (op[5]) @@ -13729,7 +13759,7 @@ rtx_insn * s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg, rtx retaddr_reg) { - bool plt_call = false; + bool plt31_call_p = false; rtx_insn *insn; rtx vec[4] = { NULL_RTX }; int elts = 0; @@ -13744,15 +13774,15 @@ s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg, { /* When calling a global routine in PIC mode, we must replace the symbol itself with the PLT stub. */ - if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location)) + if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location) && !TARGET_64BIT) { - if (TARGET_64BIT || retaddr_reg != NULL_RTX) + if (retaddr_reg != NULL_RTX) { addr_location = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr_location), - UNSPEC_PLT); + UNSPEC_PLT31); addr_location = gen_rtx_CONST (Pmode, addr_location); - plt_call = true; + plt31_call_p = true; } else /* For -fpic code the PLT entries might use r12 which is @@ -13773,7 +13803,7 @@ s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg, register 1. */ if (retaddr_reg == NULL_RTX && GET_CODE (addr_location) != SYMBOL_REF - && !plt_call) + && !plt31_call_p) { emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location); addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM); @@ -13781,7 +13811,7 @@ s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg, if (TARGET_INDIRECT_BRANCH_NOBP_CALL && GET_CODE (addr_location) != SYMBOL_REF - && !plt_call) + && !plt31_call_p) { /* Indirect branch thunks require the target to be a single GPR. */ addr_location = force_reg (Pmode, addr_location); @@ -13833,7 +13863,7 @@ s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg, insn = emit_call_insn (*call); /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */ - if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX) + if (plt31_call_p || tls_call != NULL_RTX) { /* s390_function_ok_for_sibcall should have denied sibcalls in this case. */ @@ -13889,7 +13919,10 @@ s390_emit_tpf_eh_return (rtx target) rtx reg, orig_ra; if (!s390_tpf_eh_return_symbol) - s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return"); + { + s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return"); + SYMBOL_REF_FLAGS (s390_tpf_eh_return_symbol) |= SYMBOL_FLAG_FUNCTION; + } reg = gen_rtx_REG (Pmode, 2); orig_ra = gen_rtx_REG (Pmode, 3); diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 0c5b4dc..8ad21b0 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -79,7 +79,7 @@ UNSPEC_GOTENT UNSPEC_GOT UNSPEC_GOTOFF - UNSPEC_PLT + UNSPEC_PLT31 UNSPEC_PLTOFF ; Literal pool @@ -1906,7 +1906,7 @@ vlgvg\t%0,%v1,0 vleg\t%v0,%1,0 vsteg\t%v1,%0,0 - larl\t%0,%1" + larl\t%0,%1%K1" [(set_attr "op_type" "RI,RI,RI,RI,RI,RIL,RIL,RIL,RRE,RRE,RRE,RXY,RIL,RRE,RXY, RXY,RR,RX,RXY,RX,RXY,RIL,SIL,*,*,RS,RS,VRI,VRR,VRS,VRS, VRX,VRX,RIL") @@ -2180,7 +2180,7 @@ (match_operand:SI 1 "larl_operand" "X"))] "!TARGET_64BIT && !FP_REG_P (operands[0])" - "larl\t%0,%1" + "larl\t%0,%1%K1" [(set_attr "op_type" "RIL") (set_attr "type" "larl") (set_attr "z10prop" "z10_fwd_A1") @@ -10373,7 +10373,7 @@ [(call (mem:QI (match_operand 0 "bras_sym_operand" "X")) (match_operand 1 "const_int_operand" "n"))] "SIBLING_CALL_P (insn) && TARGET_SMALL_EXEC" - "j\t%0" + "j\t%0%K0" [(set_attr "op_type" "RI") (set_attr "type" "branch")]) @@ -10381,7 +10381,7 @@ [(call (mem:QI (match_operand 0 "bras_sym_operand" "X")) (match_operand 1 "const_int_operand" "n"))] "SIBLING_CALL_P (insn)" - "jg\t%0" + "jg\t%0%K0" [(set_attr "op_type" "RIL") (set_attr "type" "branch")]) @@ -10434,7 +10434,7 @@ (call (mem:QI (match_operand 1 "bras_sym_operand" "X")) (match_operand 2 "const_int_operand" "n")))] "SIBLING_CALL_P (insn) && TARGET_SMALL_EXEC" - "j\t%1" + "j\t%1%K1" [(set_attr "op_type" "RI") (set_attr "type" "branch")]) @@ -10443,7 +10443,7 @@ (call (mem:QI (match_operand 1 "bras_sym_operand" "X")) (match_operand 2 "const_int_operand" "n")))] "SIBLING_CALL_P (insn)" - "jg\t%1" + "jg\t%1%K1" [(set_attr "op_type" "RIL") (set_attr "type" "branch")]) @@ -10470,7 +10470,7 @@ "!SIBLING_CALL_P (insn) && TARGET_SMALL_EXEC && GET_MODE (operands[2]) == Pmode" - "bras\t%2,%0" + "bras\t%2,%0%K0" [(set_attr "op_type" "RI") (set_attr "type" "jsr") (set_attr "z196prop" "z196_cracked")]) @@ -10482,7 +10482,7 @@ "!SIBLING_CALL_P (insn) && GET_MODE (operands[2]) == Pmode" - "brasl\t%2,%0" + "brasl\t%2,%0%K0" [(set_attr "op_type" "RIL") (set_attr "type" "jsr") (set_attr "z196prop" "z196_cracked") @@ -10576,7 +10576,7 @@ "!SIBLING_CALL_P (insn) && TARGET_SMALL_EXEC && GET_MODE (operands[3]) == Pmode" - "bras\t%3,%1" + "bras\t%3,%1%K1" [(set_attr "op_type" "RI") (set_attr "type" "jsr") (set_attr "z196prop" "z196_cracked")]) @@ -10589,7 +10589,7 @@ "!SIBLING_CALL_P (insn) && GET_MODE (operands[3]) == Pmode" - "brasl\t%3,%1" + "brasl\t%3,%1%K1" [(set_attr "op_type" "RIL") (set_attr "type" "jsr") (set_attr "z196prop" "z196_cracked") @@ -10720,7 +10720,7 @@ "!SIBLING_CALL_P (insn) && TARGET_SMALL_EXEC && GET_MODE (operands[3]) == Pmode" - "bras\t%3,%1%J4" + "bras\t%3,%1%K1%J4" [(set_attr "op_type" "RI") (set_attr "type" "jsr") (set_attr "z196prop" "z196_cracked")]) @@ -10734,7 +10734,7 @@ "!SIBLING_CALL_P (insn) && GET_MODE (operands[3]) == Pmode" - "brasl\t%3,%1%J4" + "brasl\t%3,%1%K1%J4" [(set_attr "op_type" "RIL") (set_attr "type" "jsr") (set_attr "z196prop" "z196_cracked") @@ -11343,7 +11343,7 @@ [(set (match_operand 0 "register_operand" "=a") (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_MAIN_BASE))] "GET_MODE (operands[0]) == Pmode" - "larl\t%0,%1" + "larl\t%0,%1%K1" [(set_attr "op_type" "RIL") (set_attr "type" "larl") (set_attr "z10prop" "z10_fwd_A1") @@ -11363,7 +11363,7 @@ [(set (match_operand 0 "register_operand" "=a") (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_RELOAD_BASE))] "GET_MODE (operands[0]) == Pmode" - "larl\t%0,%1" + "larl\t%0,%1%K1" [(set_attr "op_type" "RIL") (set_attr "type" "larl") (set_attr "z10prop" "z10_fwd_A1")]) @@ -12220,7 +12220,7 @@ "" { s390_output_split_stack_data (operands[1], operands[2], operands[3], operands[4]); - return "jg\t%0"; + return "jg\t%0%K0"; } [(set_attr "op_type" "RIL") (set_attr "type" "branch")]) diff --git a/gcc/testsuite/g++.dg/ext/visibility/noPLT.C b/gcc/testsuite/g++.dg/ext/visibility/noPLT.C index 38af05f..b888303 100644 --- a/gcc/testsuite/g++.dg/ext/visibility/noPLT.C +++ b/gcc/testsuite/g++.dg/ext/visibility/noPLT.C @@ -1,5 +1,5 @@ /* Test that -fvisibility=hidden prevents PLT. */ -/* { dg-do compile { target fpic } } */ +/* { dg-do compile { target { fpic && { ! s390x-*-* } } } } */ /* { dg-require-visibility "" } */ /* { dg-options "-fPIC -fvisibility=hidden" } */ /* { dg-final { scan-assembler-not "methodEv@PLT|indirect_symbol.*methodEv" } } */ diff --git a/gcc/testsuite/g++.target/s390/mi-thunk.C b/gcc/testsuite/g++.target/s390/mi-thunk.C new file mode 100644 index 0000000..7c25dc0 --- /dev/null +++ b/gcc/testsuite/g++.target/s390/mi-thunk.C @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fPIC" } */ + +class A { +public: + virtual int a (void); +}; + +class B { +public: + virtual int b (void); +}; + +class C : public B, public A { +public: + virtual int a (void); +}; + +int C::a (void) { return b(); } + +/* { dg-final { scan-assembler {\n_ZThn8_N1C1aEv:\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {\n_ZThn4_N1C1aEv:\n} { target { ! lp64 } } } } */ +/* { dg-final { scan-assembler {\n\tjg\t.LTHUNK0@PLT\n} { target lp64 } } } */ diff --git a/gcc/testsuite/gcc.target/s390/call-z10-pic-nodatarel.c b/gcc/testsuite/gcc.target/s390/call-z10-pic-nodatarel.c new file mode 100644 index 0000000..4998461 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/call-z10-pic-nodatarel.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z10 -mzarch -fPIC -mno-pic-data-is-text-relative" } */ + +#include "call.h" + +/* { dg-final { scan-assembler {brasl\t%r\d+,foo@PLT\n} } } */ +/* { dg-final { scan-assembler {lgrl\t%r2,foo@GOTENT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {lrl\t%r2,foo@GOTENT\n} { target { ! lp64 } } } } */ + +/* { dg-final { scan-assembler {brasl\t%r\d+,foostatic@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {brasl\t%r\d+,foostatic\n} { target { ! lp64 } } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foostatic@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foostatic\n} { target { ! lp64 } } } } */ + +/* { dg-final { scan-assembler {brasl\t%r\d+,fooweak@PLT\n} } } */ +/* { dg-final { scan-assembler {lgrl\t%r2,fooweak@GOTENT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {lrl\t%r2,fooweak@GOTENT\n} { target { ! lp64 } } } } */ + +/* { dg-final { scan-assembler {foos:\n\t.quad\tfoo\n\t.quad\tfoostatic\n\t.quad\tfooweak\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {foos:\n\t.long\tfoo\n\t.long\tfoostatic\n\t.long\tfooweak\n} { target { ! lp64 } } } } */ diff --git a/gcc/testsuite/gcc.target/s390/call-z10-pic.c b/gcc/testsuite/gcc.target/s390/call-z10-pic.c new file mode 100644 index 0000000..210c56c --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/call-z10-pic.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z10 -mzarch -fPIC" } */ + +#include "call.h" + +/* { dg-final { scan-assembler {brasl\t%r\d+,foo@PLT\n} } } */ +/* { dg-final { scan-assembler {lgrl\t%r2,foo@GOTENT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {lrl\t%r2,foo@GOTENT\n} { target { ! lp64 } } } } */ + +/* { dg-final { scan-assembler {brasl\t%r\d+,foostatic@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {brasl\t%r\d+,foostatic\n} { target { ! lp64 } } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foostatic@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foostatic\n} { target { ! lp64 } } } } */ + +/* { dg-final { scan-assembler {brasl\t%r\d+,fooweak@PLT\n} } } */ +/* { dg-final { scan-assembler {lgrl\t%r2,fooweak@GOTENT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {lrl\t%r2,fooweak@GOTENT\n} { target { ! lp64 } } } } */ + +/* { dg-final { scan-assembler {foos:\n\t.quad\tfoo\n\t.quad\tfoostatic\n\t.quad\tfooweak\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {foos:\n\t.long\tfoo\n\t.long\tfoostatic\n\t.long\tfooweak\n} { target { ! lp64 } } } } */ diff --git a/gcc/testsuite/gcc.target/s390/call-z10.c b/gcc/testsuite/gcc.target/s390/call-z10.c new file mode 100644 index 0000000..2d7dc73 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/call-z10.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z10 -mzarch" } */ + +#include "call.h" + +/* { dg-final { scan-assembler {brasl\t%r\d+,foo@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {brasl\t%r\d+,foo\n} { target { ! lp64 } } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foo@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foo\n} { target { ! lp64 } } } } */ + +/* { dg-final { scan-assembler {brasl\t%r\d+,foostatic@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {brasl\t%r\d+,foostatic\n} { target { ! lp64 } } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foostatic@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foostatic\n} { target { ! lp64 } } } } */ + +/* { dg-final { scan-assembler {brasl\t%r\d+,fooweak\n} } } */ +/* { dg-final { scan-assembler {larl\t%r2,fooweak\n} } } */ + +/* { dg-final { scan-assembler {foos:\n\t.quad\tfoo\n\t.quad\tfoostatic\n\t.quad\tfooweak\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {foos:\n\t.long\tfoo\n\t.long\tfoostatic\n\t.long\tfooweak\n} { target { ! lp64 } } } } */ diff --git a/gcc/testsuite/gcc.target/s390/call-z9-pic-nodatarel.c b/gcc/testsuite/gcc.target/s390/call-z9-pic-nodatarel.c new file mode 100644 index 0000000..b1ae318 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/call-z9-pic-nodatarel.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z9-ec -fPIC -mno-pic-data-is-text-relative" } */ + +#include "call.h" + +/* { dg-final { scan-assembler {brasl\t%r\d+,foo@PLT\n} } } */ +/* { dg-final { scan-assembler {larl\t%r\d+,foo@GOTENT\n} } } */ + +/* { dg-final { scan-assembler {brasl\t%r\d+,foostatic@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {brasl\t%r\d+,foostatic\n} { target { ! lp64 } } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foostatic@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foostatic\n} { target { ! lp64 } } } } */ + +/* { dg-final { scan-assembler {brasl\t%r\d+,fooweak@PLT\n} } } */ +/* { dg-final { scan-assembler {larl\t%r\d+,fooweak@GOTENT\n} } } */ + +/* { dg-final { scan-assembler {foos:\n\t.quad\tfoo\n\t.quad\tfoostatic\n\t.quad\tfooweak\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {foos:\n\t.long\tfoo\n\t.long\tfoostatic\n\t.long\tfooweak\n} { target { ! lp64 } } } } */ diff --git a/gcc/testsuite/gcc.target/s390/call-z9-pic.c b/gcc/testsuite/gcc.target/s390/call-z9-pic.c new file mode 100644 index 0000000..a89e78d --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/call-z9-pic.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z9-ec -fPIC" } */ + +#include "call.h" + +/* { dg-final { scan-assembler {brasl\t%r\d+,foo@PLT\n} } } */ +/* { dg-final { scan-assembler {larl\t%r\d+,foo@GOTENT\n} } } */ + +/* { dg-final { scan-assembler {brasl\t%r\d+,foostatic@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {brasl\t%r\d+,foostatic\n} { target { ! lp64 } } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foostatic@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foostatic\n} { target { ! lp64 } } } } */ + +/* { dg-final { scan-assembler {brasl\t%r\d+,fooweak@PLT\n} } } */ +/* { dg-final { scan-assembler {larl\t%r\d+,fooweak@GOTENT\n} } } */ + +/* { dg-final { scan-assembler {foos:\n\t.quad\tfoo\n\t.quad\tfoostatic\n\t.quad\tfooweak\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {foos:\n\t.long\tfoo\n\t.long\tfoostatic\n\t.long\tfooweak\n} { target { ! lp64 } } } } */ diff --git a/gcc/testsuite/gcc.target/s390/call-z9.c b/gcc/testsuite/gcc.target/s390/call-z9.c new file mode 100644 index 0000000..21d035a --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/call-z9.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z9-ec" } */ + +#include "call.h" + +/* { dg-final { scan-assembler {brasl\t%r\d+,foo@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {brasl\t%r\d+,foo\n} { target { ! lp64 } } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foo@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foo\n} { target { ! lp64 } } } } */ + +/* { dg-final { scan-assembler {brasl\t%r\d+,foostatic@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {brasl\t%r\d+,foostatic\n} { target { ! lp64 } } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foostatic@PLT\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {larl\t%r2,foostatic\n} { target { ! lp64 } } } } */ + +/* { dg-final { scan-assembler {brasl\t%r\d+,fooweak\n} } } */ +/* { dg-final { scan-assembler {larl\t%r2,fooweak\n} } } */ + +/* { dg-final { scan-assembler {foos:\n\t.quad\tfoo\n\t.quad\tfoostatic\n\t.quad\tfooweak\n} { target lp64 } } } */ +/* { dg-final { scan-assembler {foos:\n\t.long\tfoo\n\t.long\tfoostatic\n\t.long\tfooweak\n} { target { ! lp64 } } } } */ diff --git a/gcc/testsuite/gcc.target/s390/call.h b/gcc/testsuite/gcc.target/s390/call.h new file mode 100644 index 0000000..059a672 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/call.h @@ -0,0 +1,40 @@ +/* Common code for testing the function call code generation. */ + +__attribute__ ((noipa)) void +foo (void) +{ + return; +} + +void * +usefoo (void) +{ + foo (); + return foo; +} + +__attribute__ ((noipa)) static void +foostatic (void) +{ + return; +} + +void * +usefoostatic (void) +{ + foostatic (); + return foostatic; +} + +__attribute__ ((weak)) void fooweak (void); + +void * +usefooweak (void) +{ + fooweak (); + return fooweak; +} + +__attribute__ ((__used__, section (".foos"), aligned (sizeof (void *)))) +static void +*foos[] = { foo, foostatic, fooweak }; diff --git a/gcc/testsuite/gcc.target/s390/mfentry-m64-pic.c b/gcc/testsuite/gcc.target/s390/mfentry-m64-pic.c new file mode 100644 index 0000000..32d55ce --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/mfentry-m64-pic.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { lp64 } } } */ +/* { dg-options "-pg -mfentry -fPIC" } */ + +void +profileme (void) +{ + /* __fentry__ must be referenced through PLT. */ + /* { dg-final { scan-assembler "brasl\t0,__fentry__@PLT\n" } } */ +} diff --git a/gcc/testsuite/gcc.target/s390/nodatarel-1.c b/gcc/testsuite/gcc.target/s390/nodatarel-1.c index f53332f..02e64c3 100644 --- a/gcc/testsuite/gcc.target/s390/nodatarel-1.c +++ b/gcc/testsuite/gcc.target/s390/nodatarel-1.c @@ -15,12 +15,6 @@ foo () return a; } -static int __attribute__((noinline,noclone)) -foostatic (void) -{ - return a; -} - /* Just to make a potentially modified. */ void @@ -29,7 +23,7 @@ bar (int b) a = b; } -/* { dg-final { scan-assembler-times "\\.LANCHOR\\d+@GOTENT" 3 } } */ +/* { dg-final { scan-assembler-times "\\.LANCHOR\\d+@GOTENT" 2 } } */ /* The exrl target is a label_ref which should not be affected at all. */ @@ -63,21 +57,3 @@ fooptr () } /* { dg-final { scan-assembler-times "foo@GOTENT" 1 } } */ - - -/* A static function can be addressed relatively. */ - -int -callfoostatic () -{ - return foostatic (); -} - -void * -foostaticptr () -{ - return &foostatic; -} - - -/* { dg-final { scan-assembler-not "foostatic@" } } */ diff --git a/gcc/testsuite/gcc.target/s390/pr80080-4.c b/gcc/testsuite/gcc.target/s390/pr80080-4.c index 5fc6a55..3f16a19 100644 --- a/gcc/testsuite/gcc.target/s390/pr80080-4.c +++ b/gcc/testsuite/gcc.target/s390/pr80080-4.c @@ -13,4 +13,4 @@ void foo4(int *mem) } } -/* { dg-final { scan-assembler {(?n)\n\tlt\t.*\n\tjne\t(\.L\d+)\n(.*\n)*\tcs\t.*\n\tber\t%r14\n\1:\n\tjg\tbar\n} } } */ +/* { dg-final { scan-assembler {(?n)\n\tlt\t.*\n\tjne\t(\.L\d+)\n(.*\n)*\tcs\t.*\n\tber\t%r14\n\1:\n\tjg\tbar(@PLT)?\n} } } */ diff --git a/gcc/testsuite/gcc.target/s390/risbg-ll-3.c b/gcc/testsuite/gcc.target/s390/risbg-ll-3.c index 864b0d6..02f6e04 100644 --- a/gcc/testsuite/gcc.target/s390/risbg-ll-3.c +++ b/gcc/testsuite/gcc.target/s390/risbg-ll-3.c @@ -23,7 +23,7 @@ i64 f1 (i64 v_a, i64 v_b) extern i64 f2_foo(); i64 f2 (i64 v_a, i64 v_b) { -/* { dg-final { scan-assembler "f2:\n\trisbg\t%r2,%r3,60,62,0\n\tbner\t%r14\n\tjg\tf2_foo\n" { target { lp64 } } } } */ +/* { dg-final { scan-assembler "f2:\n\trisbg\t%r2,%r3,60,62,0\n\tbner\t%r14\n\tjg\tf2_foo.*\n" { target { lp64 } } } } */ /* { dg-final { scan-assembler "f2:\n\trisbgn\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r3,%r5,60,62,0" { target { ! lp64 } } } } */ i64 v_anda = v_a & -15; i64 v_andb = v_b & 14; @@ -37,8 +37,8 @@ i64 f2 (i64 v_a, i64 v_b) void f2_bar (); void f2_cconly (i64 v_a, i64 v_b) { -/* { dg-final { scan-assembler "f2_cconly:\n\trisbg\t%r2,%r3,60,62,0\n\tber\t%r14\n\tjg\tf2_bar\n" { target { lp64 } } } } */ -/* { dg-final { scan-assembler "f2_cconly:\n\trisbgn\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r3,%r5,60,62,0\n\tber\t%r14\n\tjg\tf2_bar\n" { target { ! lp64 } } } } */ +/* { dg-final { scan-assembler "f2_cconly:\n\trisbg\t%r2,%r3,60,62,0\n\tber\t%r14\n\tjg\tf2_bar(@PLT)?\n" { target { lp64 } } } } */ +/* { dg-final { scan-assembler "f2_cconly:\n\trisbgn\t%r3,%r2,0,0\\\+32-1,64-0-32\n\trisbg\t%r3,%r5,60,62,0\n\tber\t%r14\n\tjg\tf2_bar(@PLT)?\n" { target { ! lp64 } } } } */ if ((v_a & -15) | (v_b & 14)) f2_bar(); } diff --git a/gcc/testsuite/gcc.target/s390/tls-pic.c b/gcc/testsuite/gcc.target/s390/tls-pic.c new file mode 100644 index 0000000..17c5726 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/tls-pic.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fPIC" } */ + +#include "tls.h" + +/* foo must use the global dynamic model. + __tls_get_offset must be referenced through PLT. */ + +/* { dg-final { scan-assembler-times {\tbrasl\t%r14,__tls_get_offset@PLT:tls_gdcall:foo\n} 1 } } */ + +/* foostatic must use the local dynamic model. + __tls_get_offset must be referenced through PLT. */ + +/* { dg-final { scan-assembler-times {\tbrasl\t%r14,__tls_get_offset@PLT:tls_ldcall} 1 } } */ diff --git a/gcc/testsuite/gcc.target/s390/tls.c b/gcc/testsuite/gcc.target/s390/tls.c new file mode 100644 index 0000000..3d479d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/tls.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +#include "tls.h" + +/* foo must use the initial-exec model, foostatic must use the local-exec + model. */ + +/* { dg-final { scan-assembler-times {\tear} 4 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {\tear} 2 { target { ! lp64 } } } } */ diff --git a/gcc/testsuite/gcc.target/s390/tls.h b/gcc/testsuite/gcc.target/s390/tls.h new file mode 100644 index 0000000..f639bd3 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/tls.h @@ -0,0 +1,23 @@ +/* Common code for testing the TLS code generation. */ + +__thread int +foo; + +int +setfoo (int x) +{ + int result = foo; + foo = x; + return result; +} + +static __thread int +foostatic; + +int +setfoostatic (int x) +{ + int result = foostatic; + foostatic = x; + return result; +} -- cgit v1.1 From ce777eaedfdc55850b429f97b2093f2de5da09de Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 16 Jul 2021 12:02:25 +0200 Subject: Get rid of some gimple_expr_type uses This gets rid of a few gimple_expr_type uses. 2021-07-16 Richard Biener * gimple-fold.c (gimple_fold_stmt_to_constant_1): Use the type of the LHS. (gimple_assign_nonnegative_warnv_p): Likewise. (gimple_call_nonnegative_warnv_p): Likewise. Return false if the call has no LHS. * gimple.c (gimple_could_trap_p_1): Use the type of the LHS. * tree-eh.c (stmt_could_throw_1_p): Likewise. * tree-inline.c (insert_init_stmt): Likewise. * tree-ssa-loop-niter.c (get_val_for): Likewise. * tree-outof-ssa.c (ssa_is_replaceable_p): Use the type of the def. * tree-ssa-sccvn.c (init_vn_nary_op_from_stmt): Take a gassign *. Use the type of the lhs. (vn_nary_op_lookup_stmt): Adjust. (vn_nary_op_insert_stmt): Likewise. --- gcc/gimple-fold.c | 20 +++++++++++--------- gcc/gimple.c | 4 ++-- gcc/tree-eh.c | 2 +- gcc/tree-inline.c | 2 +- gcc/tree-outof-ssa.c | 2 +- gcc/tree-ssa-loop-niter.c | 4 ++-- gcc/tree-ssa-sccvn.c | 12 ++++++------ 7 files changed, 24 insertions(+), 22 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index 1401092..a3afe87 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -7507,7 +7507,8 @@ gimple_fold_stmt_to_constant_1 (gimple *stmt, tree (*valueize) (tree), tree op1 = (*valueize) (gimple_assign_rhs2 (stmt)); tree op2 = (*valueize) (gimple_assign_rhs3 (stmt)); return fold_ternary_loc (loc, subcode, - gimple_expr_type (stmt), op0, op1, op2); + TREE_TYPE (gimple_assign_lhs (stmt)), + op0, op1, op2); } default: @@ -8901,16 +8902,17 @@ gimple_assign_nonnegative_warnv_p (gimple *stmt, bool *strict_overflow_p, int depth) { enum tree_code code = gimple_assign_rhs_code (stmt); + tree type = TREE_TYPE (gimple_assign_lhs (stmt)); switch (get_gimple_rhs_class (code)) { case GIMPLE_UNARY_RHS: return tree_unary_nonnegative_warnv_p (gimple_assign_rhs_code (stmt), - gimple_expr_type (stmt), + type, gimple_assign_rhs1 (stmt), strict_overflow_p, depth); case GIMPLE_BINARY_RHS: return tree_binary_nonnegative_warnv_p (gimple_assign_rhs_code (stmt), - gimple_expr_type (stmt), + type, gimple_assign_rhs1 (stmt), gimple_assign_rhs2 (stmt), strict_overflow_p, depth); @@ -8938,12 +8940,12 @@ gimple_call_nonnegative_warnv_p (gimple *stmt, bool *strict_overflow_p, gimple_call_arg (stmt, 0) : NULL_TREE; tree arg1 = gimple_call_num_args (stmt) > 1 ? gimple_call_arg (stmt, 1) : NULL_TREE; - - return tree_call_nonnegative_warnv_p (gimple_expr_type (stmt), - gimple_call_combined_fn (stmt), - arg0, - arg1, - strict_overflow_p, depth); + tree lhs = gimple_call_lhs (stmt); + return (lhs + && tree_call_nonnegative_warnv_p (TREE_TYPE (lhs), + gimple_call_combined_fn (stmt), + arg0, arg1, + strict_overflow_p, depth)); } /* Return true if return value of call STMT is known to be non-negative. diff --git a/gcc/gimple.c b/gcc/gimple.c index 0690f94..863bc0d 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -2164,12 +2164,12 @@ gimple_could_trap_p_1 (const gimple *s, bool include_mem, bool include_stores) if (op == COND_EXPR) return tree_could_trap_p (gimple_assign_rhs1 (s)); - /* For comparisons we need to check rhs operand types instead of rhs type + /* For comparisons we need to check rhs operand types instead of lhs type (which is BOOLEAN_TYPE). */ if (TREE_CODE_CLASS (op) == tcc_comparison) t = TREE_TYPE (gimple_assign_rhs1 (s)); else - t = gimple_expr_type (s); + t = TREE_TYPE (gimple_assign_lhs (s)); if (get_gimple_rhs_class (op) == GIMPLE_BINARY_RHS) div = gimple_assign_rhs2 (s); diff --git a/gcc/tree-eh.c b/gcc/tree-eh.c index 57ce8f0..3a09de9 100644 --- a/gcc/tree-eh.c +++ b/gcc/tree-eh.c @@ -2856,7 +2856,7 @@ stmt_could_throw_1_p (gassign *stmt) if (TREE_CODE_CLASS (code) == tcc_comparison) t = TREE_TYPE (gimple_assign_rhs1 (stmt)); else - t = gimple_expr_type (stmt); + t = TREE_TYPE (gimple_assign_lhs (stmt)); fp_operation = FLOAT_TYPE_P (t); if (fp_operation) { diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index f605e76..8e6cdd3 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -3398,7 +3398,7 @@ insert_init_stmt (copy_body_data *id, basic_block bb, gimple *init_stmt) && gimple_assign_rhs_class (init_stmt) == GIMPLE_UNARY_RHS) { tree rhs = build1 (gimple_assign_rhs_code (init_stmt), - gimple_expr_type (init_stmt), + TREE_TYPE (gimple_assign_lhs (init_stmt)), gimple_assign_rhs1 (init_stmt)); rhs = force_gimple_operand_gsi (&si, rhs, true, NULL_TREE, false, GSI_NEW_STMT); diff --git a/gcc/tree-outof-ssa.c b/gcc/tree-outof-ssa.c index b730196..1a133a0 100644 --- a/gcc/tree-outof-ssa.c +++ b/gcc/tree-outof-ssa.c @@ -87,7 +87,7 @@ ssa_is_replaceable_p (gimple *stmt) /* Float expressions must go through memory if float-store is on. */ if (flag_float_store - && FLOAT_TYPE_P (gimple_expr_type (stmt))) + && FLOAT_TYPE_P (TREE_TYPE (def))) return false; /* An assignment with a register variable on the RHS is not diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index b5add82..6fabf10 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -3018,7 +3018,7 @@ get_val_for (tree x, tree base) else if (gimple_assign_rhs_class (stmt) == GIMPLE_UNARY_RHS && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) return fold_build1 (gimple_assign_rhs_code (stmt), - gimple_expr_type (stmt), + TREE_TYPE (gimple_assign_lhs (stmt)), get_val_for (gimple_assign_rhs1 (stmt), base)); else if (gimple_assign_rhs_class (stmt) == GIMPLE_BINARY_RHS) { @@ -3031,7 +3031,7 @@ get_val_for (tree x, tree base) else gcc_unreachable (); return fold_build2 (gimple_assign_rhs_code (stmt), - gimple_expr_type (stmt), rhs1, rhs2); + TREE_TYPE (gimple_assign_lhs (stmt)), rhs1, rhs2); } else gcc_unreachable (); diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c index d6aee2e..7900df9 100644 --- a/gcc/tree-ssa-sccvn.c +++ b/gcc/tree-ssa-sccvn.c @@ -423,7 +423,7 @@ static unsigned int vn_nary_length_from_stmt (gimple *); static vn_nary_op_t alloc_vn_nary_op_noinit (unsigned int, obstack *); static vn_nary_op_t vn_nary_op_insert_into (vn_nary_op_t, vn_nary_op_table_type *, bool); -static void init_vn_nary_op_from_stmt (vn_nary_op_t, gimple *); +static void init_vn_nary_op_from_stmt (vn_nary_op_t, gassign *); static void init_vn_nary_op_from_pieces (vn_nary_op_t, unsigned int, enum tree_code, tree, tree *); static tree vn_lookup_simplify_result (gimple_match_op *); @@ -2377,7 +2377,7 @@ vn_nary_build_or_lookup_1 (gimple_match_op *res_op, bool insert) vno1->length = length; vno1->predicated_values = 0; vno1->u.result = result; - init_vn_nary_op_from_stmt (vno1, new_stmt); + init_vn_nary_op_from_stmt (vno1, as_a (new_stmt)); vn_nary_op_insert_into (vno1, valid_info->nary, true); /* Also do not link it into the undo chain. */ last_inserted_nary = vno1->next; @@ -3882,12 +3882,12 @@ vn_nary_length_from_stmt (gimple *stmt) /* Initialize VNO from STMT. */ static void -init_vn_nary_op_from_stmt (vn_nary_op_t vno, gimple *stmt) +init_vn_nary_op_from_stmt (vn_nary_op_t vno, gassign *stmt) { unsigned i; vno->opcode = gimple_assign_rhs_code (stmt); - vno->type = gimple_expr_type (stmt); + vno->type = TREE_TYPE (gimple_assign_lhs (stmt)); switch (vno->opcode) { case REALPART_EXPR: @@ -3968,7 +3968,7 @@ vn_nary_op_lookup_stmt (gimple *stmt, vn_nary_op_t *vnresult) vn_nary_op_t vno1 = XALLOCAVAR (struct vn_nary_op_s, sizeof_vn_nary_op (vn_nary_length_from_stmt (stmt))); - init_vn_nary_op_from_stmt (vno1, stmt); + init_vn_nary_op_from_stmt (vno1, as_a (stmt)); return vn_nary_op_lookup_1 (vno1, vnresult); } @@ -4221,7 +4221,7 @@ vn_nary_op_insert_stmt (gimple *stmt, tree result) vn_nary_op_t vno1 = alloc_vn_nary_op (vn_nary_length_from_stmt (stmt), result, VN_INFO (result)->value_id); - init_vn_nary_op_from_stmt (vno1, stmt); + init_vn_nary_op_from_stmt (vno1, as_a (stmt)); return vn_nary_op_insert_into (vno1, valid_info->nary, true); } -- cgit v1.1 From 8da8ed435e9f01b37bf4ee57fa62509d44121c7d Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Fri, 16 Jul 2021 14:21:29 +0200 Subject: Implement EAF_NOT_RETURNED tracking in ipa-modref 2021-07-16 Jan Hubicka * ipa-modref.c (struct escape_entry): Use eaf_fleags_t. (dump_eaf_flags): Dump EAF_NOT_RETURNED (eaf_flags_useful_p): Use eaf_fleags_t; handle const functions and EAF_NOT_RETURNED. (modref_summary::useful_p): Likewise. (modref_summary_lto::useful_p): Likewise. (struct) modref_summary_lto: Use eaf_fleags_t. (deref_flags): Handle EAF_NOT_RETURNED. (struct escape_point): Use min_flags. (modref_lattice::init): Add EAF_NOT_RETURNED. (merge_call_lhs_flags): Ignore EAF_NOT_RETURNED functions (analyze_ssa_name_flags): Clear EAF_NOT_RETURNED on return; handle call flags. (analyze_parms): Also analyze const functions; update conition on flags usefulness. (modref_write): Update streaming. (read_section): Update streaming. (remap_arg_flags): Use eaf_flags_t. (modref_merge_call_site_flags): Hanlde EAF_NOT_RETURNED. * ipa-modref.h: (eaf_flags_t): New typedef. (struct modref_summary): Use eaf_flags_t. * tree-core.h (EAF_NOT_RETURNED): New constant. --- gcc/ipa-modref.c | 71 ++++++++++++++++++++++++++++++++++++++------------------ gcc/ipa-modref.h | 3 ++- gcc/tree-core.h | 3 +++ 3 files changed, 54 insertions(+), 23 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index d5a8332..7b849c1 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -86,6 +86,7 @@ along with GCC; see the file COPYING3. If not see #include "stringpool.h" #include "tree-ssanames.h" + namespace { /* We record fnspec specifiers for call edges since they depends on actual @@ -135,7 +136,7 @@ struct escape_entry /* Argument it escapes to. */ unsigned int arg; /* Minimal flags known about the argument. */ - char min_flags; + eaf_flags_t min_flags; /* Does it escape directly or indirectly? */ bool direct; }; @@ -155,6 +156,8 @@ dump_eaf_flags (FILE *out, int flags, bool newline = true) fprintf (out, " nodirectescape"); if (flags & EAF_UNUSED) fprintf (out, " unused"); + if (flags & EAF_NOT_RETURNED) + fprintf (out, " not_returned"); if (newline) fprintf (out, "\n"); } @@ -278,12 +281,17 @@ modref_summary::~modref_summary () /* Return true if FLAGS holds some useful information. */ static bool -eaf_flags_useful_p (vec &flags, int ecf_flags) +eaf_flags_useful_p (vec &flags, int ecf_flags) { for (unsigned i = 0; i < flags.length (); i++) - if (ecf_flags & ECF_PURE) + if (ecf_flags & ECF_CONST) { - if (flags[i] & (EAF_UNUSED | EAF_DIRECT)) + if (flags[i] & (EAF_UNUSED | EAF_NOT_RETURNED)) + return true; + } + else if (ecf_flags & ECF_PURE) + { + if (flags[i] & (EAF_UNUSED | EAF_DIRECT | EAF_NOT_RETURNED)) return true; } else @@ -300,13 +308,15 @@ eaf_flags_useful_p (vec &flags, int ecf_flags) bool modref_summary::useful_p (int ecf_flags, bool check_flags) { - if (ecf_flags & (ECF_CONST | ECF_NOVOPS)) + if (ecf_flags & ECF_NOVOPS) return false; if (arg_flags.length () && !check_flags) return true; if (check_flags && eaf_flags_useful_p (arg_flags, ecf_flags)) return true; arg_flags.release (); + if (ecf_flags & ECF_CONST) + return false; if (loads && !loads->every_base) return true; if (ecf_flags & ECF_PURE) @@ -325,7 +335,7 @@ struct GTY(()) modref_summary_lto more verbose and thus more likely to hit the limits. */ modref_records_lto *loads; modref_records_lto *stores; - auto_vec GTY((skip)) arg_flags; + auto_vec GTY((skip)) arg_flags; bool writes_errno; modref_summary_lto (); @@ -356,13 +366,15 @@ modref_summary_lto::~modref_summary_lto () bool modref_summary_lto::useful_p (int ecf_flags, bool check_flags) { - if (ecf_flags & (ECF_CONST | ECF_NOVOPS)) + if (ecf_flags & ECF_NOVOPS) return false; if (arg_flags.length () && !check_flags) return true; if (check_flags && eaf_flags_useful_p (arg_flags, ecf_flags)) return true; arg_flags.release (); + if (ecf_flags & ECF_CONST) + return false; if (loads && !loads->every_base) return true; if (ecf_flags & ECF_PURE) @@ -1317,6 +1329,8 @@ deref_flags (int flags, bool ignore_stores) if ((flags & EAF_NOESCAPE) || ignore_stores) ret |= EAF_NOESCAPE; } + if (flags & EAF_NOT_RETURNED) + ret |= EAF_NOT_RETURNED; return ret; } @@ -1332,7 +1346,7 @@ struct escape_point int arg; /* Flags already known about the argument (this can save us from recording esape points if local analysis did good job already). */ - char min_flags; + eaf_flags_t min_flags; /* Does value escape directly or indiretly? */ bool direct; }; @@ -1366,7 +1380,7 @@ void modref_lattice::init () { flags = EAF_DIRECT | EAF_NOCLOBBER | EAF_NOESCAPE | EAF_UNUSED - | EAF_NODIRECTESCAPE; + | EAF_NODIRECTESCAPE | EAF_NOT_RETURNED; open = true; known = false; } @@ -1539,6 +1553,9 @@ merge_call_lhs_flags (gcall *call, int arg, int index, bool deref, && (flags & ERF_RETURN_ARG_MASK) != arg) return; + if (gimple_call_arg_flags (call, arg) & (EAF_NOT_RETURNED | EAF_UNUSED)) + return; + /* If return value is SSA name determine its flags. */ if (TREE_CODE (gimple_call_lhs (call)) == SSA_NAME) { @@ -1613,9 +1630,12 @@ analyze_ssa_name_flags (tree name, vec &lattice, int depth, if (greturn *ret = dyn_cast (use_stmt)) { if (gimple_return_retval (ret) == name) - lattice[index].merge (~EAF_UNUSED); + lattice[index].merge (~(EAF_UNUSED | EAF_NOT_RETURNED)); else if (memory_access_to (gimple_return_retval (ret), name)) - lattice[index].merge_direct_load (); + { + lattice[index].merge_direct_load (); + lattice[index].merge (~EAF_NOT_RETURNED); + } } /* Account for LHS store, arg loads and flags from callee function. */ else if (gcall *call = dyn_cast (use_stmt)) @@ -1666,7 +1686,8 @@ analyze_ssa_name_flags (tree name, vec &lattice, int depth, { if (!(ecf_flags & (ECF_CONST | ECF_NOVOPS))) { - int call_flags = gimple_call_arg_flags (call, i); + int call_flags = gimple_call_arg_flags (call, i) + | EAF_NOT_RETURNED; if (ignore_stores) call_flags |= EAF_NOCLOBBER | EAF_NOESCAPE | EAF_NODIRECTESCAPE; @@ -1689,7 +1710,8 @@ analyze_ssa_name_flags (tree name, vec &lattice, int depth, else { int call_flags = deref_flags - (gimple_call_arg_flags (call, i), ignore_stores); + (gimple_call_arg_flags (call, i) + | EAF_NOT_RETURNED, ignore_stores); if (!record_ipa) lattice[index].merge (call_flags); else @@ -1819,8 +1841,8 @@ analyze_parms (modref_summary *summary, modref_summary_lto *summary_lto, unsigned int count = 0; int ecf_flags = flags_from_decl_or_type (current_function_decl); - /* For const functions we have nothing to gain by EAF flags. */ - if (ecf_flags & (ECF_CONST | ECF_NOVOPS)) + /* For novops functions we have nothing to gain by EAF flags. */ + if (ecf_flags & ECF_NOVOPS) return; for (tree parm = DECL_ARGUMENTS (current_function_decl); parm; @@ -1863,7 +1885,11 @@ analyze_parms (modref_summary *summary, modref_summary_lto *summary_lto, /* For pure functions we have implicit NOCLOBBER and NOESCAPE. */ if (ecf_flags & ECF_PURE) - flags &= ~(EAF_NOCLOBBER | EAF_NOESCAPE | EAF_NODIRECTESCAPE); + flags &= (EAF_UNUSED | EAF_DIRECT | EAF_NOT_RETURNED); + /* Only useful flags for const function are EAF_NOT_RETURNED and + EAF_UNUSED. */ + if (ecf_flags & ECF_CONST) + flags &= (EAF_UNUSED | EAF_NOT_RETURNED); if (flags) { @@ -2518,7 +2544,7 @@ modref_write () streamer_write_uhwi (ob, r->arg_flags.length ()); for (unsigned int i = 0; i < r->arg_flags.length (); i++) - streamer_write_char_stream (ob->main_stream, r->arg_flags[i]); + streamer_write_uhwi (ob, r->arg_flags[i]); write_modref_records (r->loads, ob); write_modref_records (r->stores, ob); @@ -2609,7 +2635,7 @@ read_section (struct lto_file_decl_data *file_data, const char *data, modref_sum_lto->arg_flags.reserve_exact (args); for (unsigned int i = 0; i < args; i++) { - unsigned char flags = streamer_read_uchar (&ib); + eaf_flags_t flags = streamer_read_uhwi (&ib); if (modref_sum) modref_sum->arg_flags.quick_push (flags); if (modref_sum_lto) @@ -2713,9 +2739,9 @@ modref_read (void) /* Recompute arg_flags for param adjustments in INFO. */ static void -remap_arg_flags (auto_vec &arg_flags, clone_info *info) +remap_arg_flags (auto_vec &arg_flags, clone_info *info) { - auto_vec old = arg_flags.copy (); + auto_vec old = arg_flags.copy (); int max = -1; size_t i; ipa_adjusted_param *p; @@ -3665,8 +3691,9 @@ modref_merge_call_site_flags (escape_summary *sum, flags |= EAF_NOESCAPE | EAF_NOCLOBBER | EAF_NODIRECTESCAPE; flags_lto |= EAF_NOESCAPE | EAF_NOCLOBBER | EAF_NODIRECTESCAPE; } - flags |= ee->min_flags; - flags_lto |= ee->min_flags; + /* Returning the value is already accounted to at local propagation. */ + flags |= ee->min_flags | EAF_NOT_RETURNED; + flags_lto |= ee->min_flags | EAF_NOT_RETURNED; if (!(flags & EAF_UNUSED) && cur_summary && ee->parm_index < cur_summary->arg_flags.length ()) { diff --git a/gcc/ipa-modref.h b/gcc/ipa-modref.h index 8af62b3..498cc24 100644 --- a/gcc/ipa-modref.h +++ b/gcc/ipa-modref.h @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see #define IPA_MODREF_H typedef modref_tree modref_records; +typedef unsigned short eaf_flags_t; /* Single function summary. */ @@ -29,7 +30,7 @@ struct GTY(()) modref_summary /* Load and stores in function (transitively closed to all callees) */ modref_records *loads; modref_records *stores; - auto_vec GTY((skip)) arg_flags; + auto_vec GTY((skip)) arg_flags; bool writes_errno; modref_summary (); diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 23cd289..9391609 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -114,6 +114,9 @@ struct die_struct; referenced by it can escape. */ #define EAF_NODIRECTESCAPE (1 << 4) +/* Nonzero if the argument does not escape to return value. */ +#define EAF_NOT_RETURNED (1 << 8) + /* Call return flags. */ /* Mask for the argument number that is returned. Lower two bits of the return flags, encodes argument slots zero to three. */ -- cgit v1.1 From 650c70a9fe7198394d3bbe4c0b1a7a73dc0bdd4a Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 16 Jul 2021 13:26:20 +0200 Subject: Remove more gimple_expr_type uses This removes a few more uses. 2021-07-16 Richard Biener * gimple-ssa-store-merging.c (verify_symbolic_number_p): Use the type of the LHS. (find_bswap_or_nop_1): Likewise. (find_bswap_or_nop): Likewise. * tree-vectorizer.h (vect_get_smallest_scalar_type): Adjust prototype. * tree-vect-data-refs.c (vect_get_smallest_scalar_type): Remove unused parameters, pass in the scalar type. Fix internal store function handling. * tree-vect-stmts.c (vect_analyze_stmt): Remove assert. (vect_get_vector_types_for_stmt): Move down check for existing vector stmt after we've determined a scalar type. Pass down the used scalar type to vect_get_smallest_scalar_type. * tree-vect-generic.c (expand_vector_condition): Use the type of the LHS. (expand_vector_scalar_condition): Likewise. (expand_vector_operations_1): Likewise. * tree-vect-patterns.c (vect_widened_op_tree): Likewise. (vect_recog_dot_prod_pattern): Likewise. (vect_recog_sad_pattern): Likewise. (vect_recog_widen_op_pattern): Likewise. (vect_recog_widen_sum_pattern): Likewise. (vect_recog_mixed_size_cond_pattern): Likewise. --- gcc/gimple-ssa-store-merging.c | 6 +++--- gcc/tree-vect-data-refs.c | 38 +++++++++++++++++++++----------------- gcc/tree-vect-generic.c | 8 ++++---- gcc/tree-vect-patterns.c | 12 ++++++------ gcc/tree-vect-stmts.c | 17 ++++++++--------- gcc/tree-vectorizer.h | 3 +-- 6 files changed, 43 insertions(+), 41 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c index 20959ac..ce54c78 100644 --- a/gcc/gimple-ssa-store-merging.c +++ b/gcc/gimple-ssa-store-merging.c @@ -313,7 +313,7 @@ verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt) { tree lhs_type; - lhs_type = gimple_expr_type (stmt); + lhs_type = TREE_TYPE (gimple_get_lhs (stmt)); if (TREE_CODE (lhs_type) != INTEGER_TYPE && TREE_CODE (lhs_type) != ENUMERAL_TYPE) @@ -702,7 +702,7 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit) int i, type_size, old_type_size; tree type; - type = gimple_expr_type (stmt); + type = TREE_TYPE (gimple_assign_lhs (stmt)); type_size = TYPE_PRECISION (type); if (type_size % BITS_PER_UNIT != 0) return NULL; @@ -851,7 +851,7 @@ find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg, gimple * find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) { - tree type_size = TYPE_SIZE_UNIT (gimple_expr_type (stmt)); + tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt))); if (!tree_fits_uhwi_p (type_size)) return NULL; diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 579149d..6995efb 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -116,11 +116,8 @@ vect_lanes_optab_supported_p (const char *name, convert_optab optab, types. */ tree -vect_get_smallest_scalar_type (stmt_vec_info stmt_info, - HOST_WIDE_INT *lhs_size_unit, - HOST_WIDE_INT *rhs_size_unit) +vect_get_smallest_scalar_type (stmt_vec_info stmt_info, tree scalar_type) { - tree scalar_type = gimple_expr_type (stmt_info->stmt); HOST_WIDE_INT lhs, rhs; /* During the analysis phase, this function is called on arbitrary @@ -131,21 +128,24 @@ vect_get_smallest_scalar_type (stmt_vec_info stmt_info, lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)); gassign *assign = dyn_cast (stmt_info->stmt); - if (assign - && (gimple_assign_cast_p (assign) + if (assign) + { + scalar_type = TREE_TYPE (gimple_assign_lhs (assign)); + if (gimple_assign_cast_p (assign) || gimple_assign_rhs_code (assign) == DOT_PROD_EXPR || gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR || gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR || gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR || gimple_assign_rhs_code (assign) == WIDEN_PLUS_EXPR || gimple_assign_rhs_code (assign) == WIDEN_MINUS_EXPR - || gimple_assign_rhs_code (assign) == FLOAT_EXPR)) - { - tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign)); + || gimple_assign_rhs_code (assign) == FLOAT_EXPR) + { + tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign)); - rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)); - if (rhs < lhs) - scalar_type = rhs_type; + rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)); + if (rhs < lhs) + scalar_type = rhs_type; + } } else if (gcall *call = dyn_cast (stmt_info->stmt)) { @@ -153,10 +153,16 @@ vect_get_smallest_scalar_type (stmt_vec_info stmt_info, if (gimple_call_internal_p (call)) { internal_fn ifn = gimple_call_internal_fn (call); - if (internal_load_fn_p (ifn) || internal_store_fn_p (ifn)) - /* gimple_expr_type already picked the type of the loaded - or stored data. */ + if (internal_load_fn_p (ifn)) + /* For loads the LHS type does the trick. */ i = ~0U; + else if (internal_store_fn_p (ifn)) + { + /* For stores use the tyep of the stored value. */ + i = internal_fn_stored_value_index (ifn); + scalar_type = TREE_TYPE (gimple_call_arg (call, i)); + i = ~0U; + } else if (internal_fn_mask_index (ifn) == 0) i = 1; } @@ -172,8 +178,6 @@ vect_get_smallest_scalar_type (stmt_vec_info stmt_info, } } - *lhs_size_unit = lhs; - *rhs_size_unit = rhs; return scalar_type; } diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index 5f3f9fa..a1257db 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -1017,7 +1017,7 @@ static bool expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names) { gassign *stmt = as_a (gsi_stmt (*gsi)); - tree type = gimple_expr_type (stmt); + tree type = TREE_TYPE (gimple_assign_lhs (stmt)); tree a = gimple_assign_rhs1 (stmt); tree a1 = a; tree a2 = NULL_TREE; @@ -1745,11 +1745,11 @@ static void expand_vector_scalar_condition (gimple_stmt_iterator *gsi) { gassign *stmt = as_a (gsi_stmt (*gsi)); - tree type = gimple_expr_type (stmt); + tree lhs = gimple_assign_lhs (stmt); + tree type = TREE_TYPE (lhs); tree compute_type = get_compute_type (COND_EXPR, mov_optab, type); machine_mode compute_mode = TYPE_MODE (compute_type); gcc_assert (compute_mode != BLKmode); - tree lhs = gimple_assign_lhs (stmt); tree rhs2 = gimple_assign_rhs2 (stmt); tree rhs3 = gimple_assign_rhs3 (stmt); tree new_rhs; @@ -2129,10 +2129,10 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi, return; rhs1 = gimple_assign_rhs1 (stmt); - type = gimple_expr_type (stmt); if (rhs_class == GIMPLE_BINARY_RHS) rhs2 = gimple_assign_rhs2 (stmt); + type = TREE_TYPE (lhs); if (!VECTOR_TYPE_P (type) || !VECTOR_TYPE_P (TREE_TYPE (rhs1))) return; diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 53ced5d..c249444 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -564,7 +564,7 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code, if (rhs_code != code && rhs_code != widened_code) return 0; - tree type = gimple_expr_type (assign); + tree type = TREE_TYPE (gimple_assign_lhs (assign)); if (!INTEGRAL_TYPE_P (type)) return 0; @@ -1006,7 +1006,7 @@ vect_recog_dot_prod_pattern (vec_info *vinfo, &oprnd0, &oprnd1)) return NULL; - type = gimple_expr_type (last_stmt); + type = TREE_TYPE (gimple_get_lhs (last_stmt)); vect_unpromoted_value unprom_mult; oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult); @@ -1135,7 +1135,7 @@ vect_recog_sad_pattern (vec_info *vinfo, &plus_oprnd0, &plus_oprnd1)) return NULL; - tree sum_type = gimple_expr_type (last_stmt); + tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt)); /* Any non-truncating sequence of conversions is OK here, since with a successful match, the result of the ABS(U) is known to fit @@ -1258,7 +1258,7 @@ vect_recog_widen_op_pattern (vec_info *vinfo, /* Pattern detected. */ vect_pattern_detected (name, last_stmt); - tree type = gimple_expr_type (last_stmt); + tree type = TREE_TYPE (gimple_get_lhs (last_stmt)); tree itype = type; if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type)) @@ -1653,7 +1653,7 @@ vect_recog_widen_sum_pattern (vec_info *vinfo, &oprnd0, &oprnd1)) return NULL; - type = gimple_expr_type (last_stmt); + type = TREE_TYPE (gimple_get_lhs (last_stmt)); /* So far so good. Since last_stmt was detected as a (summation) reduction, we know that oprnd1 is the reduction variable (defined by a loop-header @@ -3716,7 +3716,7 @@ vect_recog_mixed_size_cond_pattern (vec_info *vinfo, if (comp_vectype == NULL_TREE) return NULL; - type = gimple_expr_type (last_stmt); + type = TREE_TYPE (gimple_assign_lhs (last_stmt)); if (types_compatible_p (type, comp_scalar_type) || ((TREE_CODE (then_clause) != INTEGER_CST || TREE_CODE (else_clause) != INTEGER_CST) diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 0ef4696..d71552296 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -10802,8 +10802,6 @@ vect_analyze_stmt (vec_info *vinfo, if (STMT_VINFO_RELEVANT_P (stmt_info)) { - tree type = gimple_expr_type (stmt_info->stmt); - gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type))); gcall *call = dyn_cast (stmt_info->stmt); gcc_assert (STMT_VINFO_VECTYPE (stmt_info) || (call && gimple_call_lhs (call) == NULL_TREE)); @@ -12076,11 +12074,6 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info, "not vectorized: irregular stmt.%G", stmt); } - if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt)))) - return opt_result::failure_at (stmt, - "not vectorized: vector stmt in loop:%G", - stmt); - tree vectype; tree scalar_type = NULL_TREE; if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info)) @@ -12130,6 +12123,12 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype); } + + if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type))) + return opt_result::failure_at (stmt, + "not vectorized: vector stmt in loop:%G", + stmt); + *stmt_vectype_out = vectype; /* Don't try to compute scalar types if the stmt produces a boolean @@ -12140,8 +12139,8 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info, /* The number of units is set according to the smallest scalar type (or the largest vector size, but we only support one vector size per vectorization). */ - HOST_WIDE_INT dummy; - scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy); + scalar_type = vect_get_smallest_scalar_type (stmt_info, + TREE_TYPE (vectype)); if (scalar_type != TREE_TYPE (vectype)) { if (dump_enabled_p ()) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index f7c08ca..d9f0195 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1960,8 +1960,7 @@ extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0); extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64); extern enum dr_alignment_support vect_supportable_dr_alignment (vec_info *, dr_vec_info *, bool); -extern tree vect_get_smallest_scalar_type (stmt_vec_info, HOST_WIDE_INT *, - HOST_WIDE_INT *); +extern tree vect_get_smallest_scalar_type (stmt_vec_info, tree); extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *); extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance); extern opt_result vect_enhance_data_refs_alignment (loop_vec_info); -- cgit v1.1 From e32234536f361796e7cad8ed69a1c0bb46ee55de Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Thu, 15 Jul 2021 18:45:49 -0400 Subject: c++: Don't hide narrowing errors in system headers Jonathan pointed me at this issue where constexpr unsigned f() { constexpr int n = -1; return unsigned{n}; } is accepted in system headers, despite the narrowing conversion from a constant. I suspect that whereas narrowing warnings should be disabled, ill-formed narrowing of constants should be a hard error (which can still be disabled by -Wno-narrowing). gcc/cp/ChangeLog: * typeck2.c (check_narrowing): Don't suppress the pedantic error in system headers. libstdc++-v3/ChangeLog: * testsuite/20_util/ratio/operations/ops_overflow_neg.cc: Add dg-error. gcc/testsuite/ChangeLog: * g++.dg/cpp1y/Wnarrowing2.C: New test. * g++.dg/cpp1y/Wnarrowing2.h: New test. --- gcc/cp/typeck2.c | 1 + gcc/testsuite/g++.dg/cpp1y/Wnarrowing2.C | 4 ++++ gcc/testsuite/g++.dg/cpp1y/Wnarrowing2.h | 2 ++ 3 files changed, 7 insertions(+) create mode 100644 gcc/testsuite/g++.dg/cpp1y/Wnarrowing2.C create mode 100644 gcc/testsuite/g++.dg/cpp1y/Wnarrowing2.h (limited to 'gcc') diff --git a/gcc/cp/typeck2.c b/gcc/cp/typeck2.c index 6679e24..dcfdff2 100644 --- a/gcc/cp/typeck2.c +++ b/gcc/cp/typeck2.c @@ -986,6 +986,7 @@ check_narrowing (tree type, tree init, tsubst_flags_t complain, { int savederrorcount = errorcount; global_dc->pedantic_errors = 1; + auto s = make_temp_override (global_dc->dc_warn_system_headers, true); pedwarn (loc, OPT_Wnarrowing, "narrowing conversion of %qE from %qH to %qI", init, ftype, type); diff --git a/gcc/testsuite/g++.dg/cpp1y/Wnarrowing2.C b/gcc/testsuite/g++.dg/cpp1y/Wnarrowing2.C new file mode 100644 index 0000000..048d484 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/Wnarrowing2.C @@ -0,0 +1,4 @@ +// { dg-do compile { target c++14 } } + +#include "Wnarrowing2.h" +// { dg-error "narrowing conversion" "" { target *-*-* } 0 } diff --git a/gcc/testsuite/g++.dg/cpp1y/Wnarrowing2.h b/gcc/testsuite/g++.dg/cpp1y/Wnarrowing2.h new file mode 100644 index 0000000..7dafa51 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/Wnarrowing2.h @@ -0,0 +1,2 @@ +#pragma GCC system_header +constexpr unsigned f() { constexpr int n = -1; return unsigned{n}; } -- cgit v1.1 From 3bf6e1f89dbd2c67c419d3d4e94085208611262f Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Fri, 16 Jul 2021 15:56:35 +0100 Subject: testsuite: fix IL32 issues with usdot tests. Fix tests when int == long by using long long instead. gcc/testsuite/ChangeLog: PR middle-end/101457 * gcc.dg/vect/vect-reduc-dot-19.c: Use long long. * gcc.dg/vect/vect-reduc-dot-20.c: Likewise. * gcc.dg/vect/vect-reduc-dot-21.c: Likewise. * gcc.dg/vect/vect-reduc-dot-22.c: Likewise. --- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c | 8 ++++---- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-20.c | 8 ++++---- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c | 6 +++--- gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c | 6 +++--- 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c index dbeaaec..d00f24a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-19.c @@ -13,15 +13,15 @@ #define SIGNEDNESS_4 unsigned #endif -SIGNEDNESS_1 long __attribute__ ((noipa)) -f (SIGNEDNESS_1 long res, SIGNEDNESS_3 char *restrict a, +SIGNEDNESS_1 long long __attribute__ ((noipa)) +f (SIGNEDNESS_1 long long res, SIGNEDNESS_3 char *restrict a, SIGNEDNESS_4 short *restrict b) { for (__INTPTR_TYPE__ i = 0; i < N; ++i) { int av = a[i]; int bv = b[i]; - SIGNEDNESS_2 long mult = av * bv; + SIGNEDNESS_2 long long mult = av * bv; res += mult; } return res; @@ -37,7 +37,7 @@ main (void) SIGNEDNESS_3 char a[N]; SIGNEDNESS_4 short b[N]; - int expected = 0x12345; + SIGNEDNESS_1 long long expected = 0x12345; for (int i = 0; i < N; ++i) { a[i] = BASE + i * 5; diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-20.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-20.c index d757fb1..17adbca8 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-20.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-20.c @@ -13,15 +13,15 @@ #define SIGNEDNESS_4 unsigned #endif -SIGNEDNESS_1 long __attribute__ ((noipa)) -f (SIGNEDNESS_1 long res, SIGNEDNESS_3 short *restrict a, +SIGNEDNESS_1 long long __attribute__ ((noipa)) +f (SIGNEDNESS_1 long long res, SIGNEDNESS_3 short *restrict a, SIGNEDNESS_4 char *restrict b) { for (__INTPTR_TYPE__ i = 0; i < N; ++i) { int av = a[i]; int bv = b[i]; - SIGNEDNESS_2 long mult = av * bv; + SIGNEDNESS_2 long long mult = av * bv; res += mult; } return res; @@ -37,7 +37,7 @@ main (void) SIGNEDNESS_3 short a[N]; SIGNEDNESS_4 char b[N]; - int expected = 0x12345; + SIGNEDNESS_1 long long expected = 0x12345; for (int i = 0; i < N; ++i) { a[i] = BASE + i * 5; diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c index 6d08bf4..6cc6a4f 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-21.c @@ -13,8 +13,8 @@ #define SIGNEDNESS_4 unsigned #endif -SIGNEDNESS_1 long __attribute__ ((noipa)) -f (SIGNEDNESS_1 long res, SIGNEDNESS_3 char *restrict a, +SIGNEDNESS_1 long long __attribute__ ((noipa)) +f (SIGNEDNESS_1 long long res, SIGNEDNESS_3 char *restrict a, SIGNEDNESS_4 short *restrict b) { for (__INTPTR_TYPE__ i = 0; i < N; ++i) @@ -37,7 +37,7 @@ main (void) SIGNEDNESS_3 char a[N]; SIGNEDNESS_4 short b[N]; - int expected = 0x12345; + SIGNEDNESS_1 long long expected = 0x12345; for (int i = 0; i < N; ++i) { a[i] = BASE + i * 5; diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c index 0bde43a..e13d3d5 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-22.c @@ -13,8 +13,8 @@ #define SIGNEDNESS_4 unsigned #endif -SIGNEDNESS_1 long __attribute__ ((noipa)) -f (SIGNEDNESS_1 long res, SIGNEDNESS_3 char *restrict a, +SIGNEDNESS_1 long long __attribute__ ((noipa)) +f (SIGNEDNESS_1 long long res, SIGNEDNESS_3 char *restrict a, SIGNEDNESS_4 short *restrict b) { for (__INTPTR_TYPE__ i = 0; i < N; ++i) @@ -37,7 +37,7 @@ main (void) SIGNEDNESS_3 char a[N]; SIGNEDNESS_4 short b[N]; - SIGNEDNESS_1 long expected = 0x12345; + SIGNEDNESS_1 long long expected = 0x12345; for (int i = 0; i < N; ++i) { a[i] = BASE + i * 5; -- cgit v1.1 From 27d16cb56972279fb1ca4e5c5a79d4c5b0b86610 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 15 Jun 2021 08:07:39 -0500 Subject: Support scanning of build-time GC roots in gengtype Currently gengtype supports scanning target-specific files for GC roots, but those files must exist in the source tree. This patch extends the support to include header files generated into the build directory. It also allows targets to specify build dependencies for s-gtype to ensure the built headers are up to date prior to running gengtype. 2021-06-15 Bill Schmidt gcc/ * Makefile.in (EXTRA_GTYPE_DEPS): New variable. (s-gtype): Depend on EXTRA_GTYPE_DEPS. * gengtype-state.c (state_writer::write_state_file_list): Add a parameter to the fileslist expression for the number of build headers to scan. (read_state_files_list): Detect build headers and strip the initial "./" or ".\" from their names. * gengtype.c (build_headers): New global variable. (num_build_headers): Likewise. (open_base_files): Emit #include for each build header. (main): Detect and count build headers. * gengtype.h (build_headers): New extern variable. (num_build_headers): Likewise. --- gcc/Makefile.in | 5 +++-- gcc/gengtype-state.c | 32 ++++++++++++++++++++++++++------ gcc/gengtype.c | 22 +++++++++++++++++++--- gcc/gengtype.h | 5 +++++ 4 files changed, 53 insertions(+), 11 deletions(-) (limited to 'gcc') diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 934b2a0..1666ef8 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -560,6 +560,7 @@ out_file=$(srcdir)/config/@out_file@ out_object_file=@out_object_file@ common_out_file=$(srcdir)/common/config/@common_out_file@ common_out_object_file=@common_out_object_file@ +EXTRA_GTYPE_DEPS= md_file=$(srcdir)/common.md $(srcdir)/config/@md_file@ tm_file_list=@tm_file_list@ tm_include_list=@tm_include_list@ @@ -2746,8 +2747,8 @@ s-gtyp-input: Makefile $(SHELL) $(srcdir)/../move-if-change tmp-gi.list gtyp-input.list $(STAMP) s-gtyp-input -s-gtype: build/gengtype$(build_exeext) $(filter-out [%], $(GTFILES)) \ - gtyp-input.list +s-gtype: $(EXTRA_GTYPE_DEPS) build/gengtype$(build_exeext) \ + $(filter-out [%], $(GTFILES)) gtyp-input.list # First, parse all files and save a state file. $(RUN_GEN) build/gengtype$(build_exeext) $(GENGTYPE_FLAGS) \ -S $(srcdir) -I gtyp-input.list -w tmp-gtype.state diff --git a/gcc/gengtype-state.c b/gcc/gengtype-state.c index e9775ed..ac9d536 100644 --- a/gcc/gengtype-state.c +++ b/gcc/gengtype-state.c @@ -1269,7 +1269,7 @@ state_writer::write_state_files_list (void) int i = 0; /* Write the list of files with their lang_bitmap. */ begin_s_expr ("fileslist"); - fprintf (state_file, "%d", (int) num_gt_files); + fprintf (state_file, "%d %d", (int) num_gt_files, (int) num_build_headers); for (i = 0; i < (int) num_gt_files; i++) { const char *cursrcrelpath = NULL; @@ -2456,16 +2456,20 @@ read_state_files_list (void) struct state_token_st *t0 = peek_state_token (0); struct state_token_st *t1 = peek_state_token (1); struct state_token_st *t2 = peek_state_token (2); + struct state_token_st *t3 = peek_state_token (3); if (state_token_kind (t0) == STOK_LEFTPAR && state_token_is_name (t1, "!fileslist") - && state_token_kind (t2) == STOK_INTEGER) + && state_token_kind (t2) == STOK_INTEGER + && state_token_kind (t3) == STOK_INTEGER) { - int i = 0; + int i = 0, j = 0; num_gt_files = t2->stok_un.stok_num; - next_state_tokens (3); - t0 = t1 = t2 = NULL; + num_build_headers = t3->stok_un.stok_num; + next_state_tokens (4); + t0 = t1 = t2 = t3 = NULL; gt_files = XCNEWVEC (const input_file *, num_gt_files); + build_headers = XCNEWVEC (const char *, num_build_headers); for (i = 0; i < (int) num_gt_files; i++) { bool issrcfile = FALSE; @@ -2498,7 +2502,23 @@ read_state_files_list (void) free (fullpath); } else - curgt = input_file_by_name (fnam); + { + curgt = input_file_by_name (fnam); + /* Look for a header file created during the build, + which looks like "./.h". */ + int len = strlen (fnam); + if (len >= 5 + && fnam[0] == '.' + && IS_DIR_SEPARATOR (fnam[1]) + && fnam[len-2] == '.' + && fnam[len-1] == 'h') + { + char *buf = (char *) xmalloc (len - 1); + /* Strip the leading "./" from the filename. */ + strcpy (buf, &fnam[2]); + build_headers[j++] = buf; + } + } set_lang_bitmap (curgt, bmap); gt_files[i] = curgt; next_state_tokens (2); diff --git a/gcc/gengtype.c b/gcc/gengtype.c index d105c9b..31d4bf4 100644 --- a/gcc/gengtype.c +++ b/gcc/gengtype.c @@ -143,6 +143,11 @@ get_ultimate_base_class (type_p s) const input_file **gt_files; size_t num_gt_files; +/* Table of headers to be included in gtype-desc.c that are generated + during the build. These are identified as "./.h". */ +const char **build_headers; +size_t num_build_headers; + /* A number of places use the name of this "gengtype.c" file for a location for things that we can't rely on the source to define. Make sure we can still use pointer comparison on filenames. */ @@ -1736,6 +1741,8 @@ open_base_files (void) gtype_desc_c = create_file ("GCC", "gtype-desc.c"); for (ifp = ifiles; *ifp; ifp++) oprintf (gtype_desc_c, "#include \"%s\"\n", *ifp); + for (int j = 0; j < (int) num_build_headers; j++) + oprintf (gtype_desc_c, "#include \"%s\"\n", build_headers[j]); /* Make sure we handle "cfun" specially. */ oprintf (gtype_desc_c, "\n/* See definition in function.h. */\n"); @@ -5216,11 +5223,20 @@ main (int argc, char **argv) &pos)); #undef POS_HERE read_input_list (inputlist); + num_build_headers = 0; for (i = 0; i < num_gt_files; i++) { - parse_file (get_input_file_name (gt_files[i])); - DBGPRINTF ("parsed file #%d %s", - (int) i, get_input_file_name (gt_files[i])); + const char *fname = get_input_file_name (gt_files[i]); + parse_file (fname); + DBGPRINTF ("parsed file #%d %s", (int) i, fname); + /* Check if this is a header file generated during the build. */ + int len = strlen (fname); + if (len >= 5 + && fname[0] == '.' + && IS_DIR_SEPARATOR (fname[1]) + && fname[len-2] == '.' + && fname[len-1] == 'h') + num_build_headers++; } if (verbosity_level >= 1) printf ("%s parsed %d files with %d GTY types\n", diff --git a/gcc/gengtype.h b/gcc/gengtype.h index 4fe8f0f..8a7a549 100644 --- a/gcc/gengtype.h +++ b/gcc/gengtype.h @@ -55,6 +55,11 @@ struct fileloc extern const input_file** gt_files; extern size_t num_gt_files; +/* Table of headers to be included in gtype-desc.c that are generated + during the build. These are identified as "./.h". */ +extern const char **build_headers; +extern size_t num_build_headers; + /* A number of places use the name of this "gengtype.c" file for a location for things that we can't rely on the source to define. We also need to refer to the "system.h" file specifically. These two -- cgit v1.1 From bd5b625228d545d5ecb35df24f9f094edc95e3fa Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 15 Jun 2021 09:35:34 -0500 Subject: rs6000: Initial create of rs6000-gen-builtins.c 2021-04-02 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c: New. --- gcc/config/rs6000/rs6000-gen-builtins.c | 165 ++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 gcc/config/rs6000/rs6000-gen-builtins.c (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c new file mode 100644 index 0000000..6ab7d7b --- /dev/null +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -0,0 +1,165 @@ +/* Generate built-in function initialization and recognition for Power. + Copyright (C) 2020-21 Free Software Foundation, Inc. + Contributed by Bill Schmidt, IBM + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This program generates built-in function initialization and + recognition code for Power targets, based on text files that + describe the built-in functions and vector overloads: + + rs6000-builtin-new.def Table of built-in functions + rs6000-overload.def Table of overload functions + + Both files group similar functions together in "stanzas," as + described below. + + Each stanza in the built-in function file starts with a line + identifying the circumstances in which the group of functions is + permitted, with the gating predicate in square brackets. For + example, this could be + + [altivec] + + or it could be + + [power9] + + The bracketed gating predicate is the only information allowed on + the stanza header line, other than whitespace. + + Following the stanza header are two lines for each function: the + prototype line and the attributes line. The prototype line has + this format, where the square brackets indicate optional + information and angle brackets indicate required information: + + [kind] (); + + Here [kind] can be one of "const", "pure", or "fpmath"; + is a legal type for a built-in function result; + is the name by which the function can be called; + and is a comma-separated list of legal types + for built-in function arguments. The argument list may be + empty, but the parentheses and semicolon are required. + + The attributes line looks like this: + + {} + + Here is a unique internal identifier for the built-in + function that will be used as part of an enumeration of all + built-in functions; is the define_expand or + define_insn that will be invoked when the call is expanded; + and is a comma-separated list of special + conditions that apply to the built-in function. The attribute + list may be empty, but the braces are required. + + Attributes are strings, such as these: + + init Process as a vec_init function + set Process as a vec_set function + extract Process as a vec_extract function + nosoft Not valid with -msoft-float + ldvec Needs special handling for vec_ld semantics + stvec Needs special handling for vec_st semantics + reve Needs special handling for element reversal + pred Needs special handling for comparison predicates + htm Needs special handling for transactional memory + htmspr HTM function using an SPR + htmcr HTM function using a CR + mma Needs special handling for MMA instructions + quad MMA instruction using a register quad as an input operand + pair MMA instruction using a register pair as an input operand + no32bit Not valid for TARGET_32BIT + 32bit Requires different handling for TARGET_32BIT + cpu This is a "cpu_is" or "cpu_supports" builtin + ldstmask Altivec mask for load or store + lxvrse Needs special handling for load-rightmost, sign-extended + lxvrze Needs special handling for load-rightmost, zero-extended + endian Needs special handling for endianness + + An example stanza might look like this: + +[altivec] + const vsc __builtin_altivec_abs_v16qi (vsc); + ABS_V16QI absv16qi2 {} + const vss __builtin_altivec_abs_v8hi (vss); + ABS_V8HI absv8hi2 {} + + Here "vsc" and "vss" are shorthand for "vector signed char" and + "vector signed short" to shorten line lengths and improve readability. + Note the use of indentation, which is recommended but not required. + + The overload file has more complex stanza headers. Here the stanza + represents all functions with the same overloaded function name: + + [, , [[, ]] ] + + Here the single square brackets are part of the syntax, + is a unique internal identifier for the overload that will be used as + part of an enumeration of all overloaded functions; is the + name that will appear as a #define in rs6000-vecdefines.h; + is the name that is overloaded in the back end; and + is an optional token used to guard the #define with an #ifdef + in rs6000-vecdefines.h. + + Each function entry again has two lines. The first line is again a + prototype line (this time without [kind]): + + (); + + The second line contains the that this particular instance of + the overloaded function maps to. It must match a token that appears in + rs6000-builtin-new.def. Optionally, a second token may appear. If only + one token is on the line, it is also used to build the unique identifier + for the overloaded function. If a second token is present, the second + token is used instead for this purpose. This is necessary in cases + where a built-in function accepts more than one type signature. It is + common to have a built-in function that, for example, specifies a + "vector signed char" argument, but accepts "vector unsigned char" and + "vector bool char" as well because only the mode matters. Note that + the overload resolution mechanism has always handled these cases by + performing fold_convert on vector arguments to hide type mismatches, + and it will continue to do so. + + As a concrete example, __builtin_altivec_mtvscr uses an opaque argument + type for the source operand. Its built-in function id is MTVSCR. The + overloaded function __builtin_vec_mtvscr takes a variety of specific + types, but not all vector types. Each of these maps to the same + __builtin_altivec_mtvscr built-in function, but the overload ID must + be unique, so we must specify the second token as shown here. + + [VEC_MTVSCR, vec_mtvscr, __builtin_vec_mtvscr] + void __builtin_vec_mtvscr (vbc); + MTVSCR MTVSCR_VBC + void __builtin_vec_mtvscr (vsc); + MTVSCR MTVSCR_VSC + ... + + Blank lines may be used as desired in these files between the lines as + defined above; that is, you can introduce as many extra newlines as you + like after a required newline, but nowhere else. Lines beginning with + a semicolon are also treated as blank lines. */ + +#include +#include +#include +#include +#include +#include +#include +#include -- cgit v1.1 From 4a720a9547320699aceda7d2e0b08de5ab40132f Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 2 Apr 2021 16:23:13 -0500 Subject: rs6000: Add initial input files This patch adds a tiny subset of the built-in and overload descriptions. 2021-04-02 Bill Schmidt gcc/ * config/rs6000/rs6000-builtin-new.def: New. * config/rs6000/rs6000-overload.def: New. --- gcc/config/rs6000/rs6000-builtin-new.def | 199 +++++++++++++++++++++++++++++++ gcc/config/rs6000/rs6000-overload.def | 82 +++++++++++++ 2 files changed, 281 insertions(+) create mode 100644 gcc/config/rs6000/rs6000-builtin-new.def create mode 100644 gcc/config/rs6000/rs6000-overload.def (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def new file mode 100644 index 0000000..a84a3de --- /dev/null +++ b/gcc/config/rs6000/rs6000-builtin-new.def @@ -0,0 +1,199 @@ +; Built-in functions for PowerPC. +; Copyright (C) 2020-21 Free Software Foundation, Inc. +; Contributed by Bill Schmidt, IBM +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + + +; Built-in functions in this file are organized into "stanzas", where +; all built-ins in a given stanza are enabled together. Each stanza +; starts with a line identifying the circumstances in which the group of +; functions is permitted, with the gating predicate in square brackets. +; For example, this could be +; +; [altivec] +; +; or it could be +; +; [power9] +; +; The bracketed gating predicate is the only information allowed on +; the stanza header line, other than whitespace. +; +; Following the stanza header are two lines for each function: the +; prototype line and the attributes line. The prototype line has +; this format, where the square brackets indicate optional +; information and angle brackets indicate required information: +; +; [kind] (); +; +; Here [kind] can be one of "const", "pure", or "fpmath"; +; is a legal type for a built-in function result; +; is the name by which the function can be called; +; and is a comma-separated list of legal types +; for built-in function arguments. The argument list may be +; empty, but the parentheses and semicolon are required. +; +; A legal type is of the form: +; +; [const] [[signed|unsigned] | ] [*] +; +; where "const" applies only to a of "int". Legal values +; of are (for now): +; +; char +; short +; int +; long +; long double +; long long +; float +; double +; __int128 +; _Float128 +; bool +; string +; _Decimal32 +; _Decimal64 +; _Decimal128 +; __ibm128 +; +; Legal values of are as follows, and are shorthand for +; the associated meaning: +; +; vsc vector signed char +; vuc vector unsigned char +; vbc vector bool char +; vss vector signed short +; vus vector unsigned short +; vbs vector bool short +; vsi vector signed int +; vui vector unsigned int +; vbi vector bool int +; vsll vector signed long long +; vull vector unsigned long long +; vbll vector bool long long +; vsq vector signed __int128 +; vuq vector unsigned __int128 +; vbq vector bool __int128 +; vp vector pixel +; vf vector float +; vd vector double +; v256 __vector_pair +; v512 __vector_quad +; +; For simplicity, We don't support "short int" and "long long int". +; We don't currently support a of "_Float16". "signed" +; and "unsigned" only apply to integral base types. The optional * +; indicates a pointer type. +; +; The attributes line looks like this: +; +; {} +; +; Here is a unique internal identifier for the built-in +; function that will be used as part of an enumeration of all +; built-in functions; is the define_expand or +; define_insn that will be invoked when the call is expanded; +; and is a comma-separated list of special +; conditions that apply to the built-in function. The attribute +; list may be empty, but the braces are required. +; +; Attributes are strings, and the allowed ones are listed below. +; +; init Process as a vec_init function +; set Process as a vec_set function +; extract Process as a vec_extract function +; nosoft Not valid with -msoft-float +; ldvec Needs special handling for vec_ld semantics +; stvec Needs special handling for vec_st semantics +; reve Needs special handling for element reversal +; pred Needs special handling for comparison predicates +; htm Needs special handling for transactional memory +; htmspr HTM function using an SPR +; htmcr HTM function using a CR +; mma Needs special handling for MMA +; quad MMA instruction using a register quad as an input operand +; pair MMA instruction using a register pair as an input operand +; no32bit Not valid for TARGET_32BIT +; 32bit Requires different handling for TARGET_32BIT +; cpu This is a "cpu_is" or "cpu_supports" builtin +; ldstmask Altivec mask for load or store +; lxvrse Needs special handling for load-rightmost, sign-extended +; lxvrze Needs special handling for load-rightmost, zero-extended +; endian Needs special handling for endianness +; +; Each attribute corresponds to extra processing required when +; the built-in is expanded. All such special processing should +; be controlled by an attribute from now on. +; +; It is important to note that each entry's must be +; unique. The code generated from this file will call def_builtin +; for each entry, and this can only happen once per name. +; +; The type signature for the builtin must match the modes of the RTL +; pattern . When a builtin is used only as a basis for +; overloading, you can use an arbitrary type for each mode (for example, +; for V8HImode, you could use vp, vss, vus, or vbs). The overloading +; machinery takes care of adding appropriate casts between vectors to +; satisfy impedance matching. The overloaded prototypes are the ones +; that must match what users expect. Thus you will often have a small +; number of entries in this file that correspond to a much greater +; number of entries in rs6000-overload.def. +; +; However, builtins in this file that are expected to be directly called +; by users must have one version for each expected type combination. +; +; Eventually we want to automatically generate built-in documentation +; from the entries in this file. Documenting of built-ins with more +; than one acceptable prototype can be done by cross-referencing +; against rs6000-overload.def and picking up the allowable prototypes +; from there. +; +; Blank lines may be used as desired in this file between the lines as +; defined above; that is, you can introduce as many extra newlines as you +; like after a required newline, but nowhere else. Lines beginning with +; a semicolon are also treated as blank lines. +; +; A const int argument may be restricted to certain values. This is +; indicated by one of the following occurring after the "int" token: +; +; restricts the constant to x bits, interpreted as unsigned +; restricts the constant to the inclusive range [x,y] +; [x,y] restricts the constant to the inclusive range [x,y], +; but only applies if the argument is constant. +; {x,y} restricts the constant to one of two values, x or y. +; +; Here x and y are integer tokens. Note that the "const" token is a +; lie when the restriction is [x,y], but this simplifies the parsing +; significantly and is hopefully forgivable. + + + +; AltiVec builtins. +[altivec] + const vsc __builtin_altivec_abs_v16qi (vsc); + ABS_V16QI absv16qi2 {} + + const vf __builtin_altivec_abs_v4sf (vf); + ABS_V4SF absv4sf2 {} + + const vsi __builtin_altivec_abs_v4si (vsi); + ABS_V4SI absv4si2 {} + + const vss __builtin_altivec_abs_v8hi (vss); + ABS_V8HI absv8hi2 {} diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def new file mode 100644 index 0000000..d8028c9 --- /dev/null +++ b/gcc/config/rs6000/rs6000-overload.def @@ -0,0 +1,82 @@ +; Overloaded built-in functions for PowerPC. +; Copyright (C) 2020-21 Free Software Foundation, Inc. +; Contributed by Bill Schmidt, IBM +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + + +; Overloaded built-in functions in this file are organized into "stanzas", +; where all built-ins in a given stanza have the same overloaded function +; name: +; +; [, , [[, ]] ] +; +; Here the single square brackets are part of the syntax; +; is a unique internal identifier for the overload that will be used as +; part of an enumeration of all overloaded functions; is the +; name that will appear as a #define in rs6000-vecdefines.h; +; is the name that is overloaded in the back end; and +; is an optional token used to guard the #define with an #ifdef +; in rs6000-vecdefines.h. If no #define is desired, the should +; be replaced with the token SKIP. +; +; Each function entry has two lines. The first line is a prototype line. +; See rs6000-builtin-new.def for a description of the prototype line. +; A prototype line in this file differs in that it doesn't have an +; optional [kind] token: +; +; (); +; +; The second line contains the that this particular instance of +; the overloaded function maps to. It must match a token that appears in +; rs6000-builtin-new.def. Optionally, a second token may appear. If only +; one token is on the line, it is also used to build the unique identifier +; for the overloaded function. If a second token is present, the second +; token is used instead for this purpose. This is necessary in cases +; where a built-in function accepts more than one type signature. It is +; common to have a built-in function that, for example, specifies a +; "vector signed char" argument, but accepts "vector unsigned char" and +; "vector bool char" as well because only the mode matters. Note that +; the overload resolution mechanism has always handled these cases by +; performing fold_convert on vector arguments to hide type mismatches, +; and it will continue to do so. +; +; As a concrete example, __builtin_altivec_mtvscr uses an opaque argument +; type for the source operand. Its built-in function id is MTVSCR. The +; overloaded function __builtin_vec_mtvscr takes a variety of specific +; types, but not all vector types. Each of these maps to the same +; __builtin_altivec_mtvscr built-in function, but the overload ID must +; be unique, so we must specify the second token as shown here. +; +;[VEC_MTVSCR, vec_mtvscr, __builtin_vec_mtvscr] +; void __builtin_vec_mtvscr (vbc); +; MTVSCR MTVSCR_VBC +; void __builtin_vec_mtvscr (vsc); +; MTVSCR MTVSCR_VSC +; ... +; +; Blank lines may be used as desired in this file between the lines as +; defined above; that is, you can introduce as many extra newlines as you +; like after a required newline, but nowhere else. Lines beginning with +; a semicolon are also treated as blank lines. + + +[VEC_ABS, vec_abs, __builtin_vec_abs] + vsc __builtin_vec_abs (vsc); + ABS_V16QI + vss __builtin_vec_abs (vss); + ABS_V8HI -- cgit v1.1 From 43fa306f1d723d9d6c0884e38b102b954d3a4c30 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 7 Jun 2021 11:20:56 -0500 Subject: rs6000: Add file support and functions for diagnostic support 2021-06-07 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (bif_file): New variable. (ovld_file): Likewise. (header_file): Likewise. (init_file): Likewise. (defines_file): Likewise. (pgm_path): Likewise. (bif_path): Likewise. (ovld_path): Likewise. (header_path): Likewise. (init_path): Likewise. (defines_path): Likewise. (LINELEN): New macro. (linebuf): New variable. (line): Likewise. (pos): Likewise. (diag): Likewise. (bif_diag): New function. (ovld_diag): Likewise. --- gcc/config/rs6000/rs6000-gen-builtins.c | 47 +++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index 6ab7d7b..3c53c34 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -163,3 +163,50 @@ along with GCC; see the file COPYING3. If not see #include #include #include + +/* Input and output file descriptors and pathnames. */ +static FILE *bif_file; +static FILE *ovld_file; +static FILE *header_file; +static FILE *init_file; +static FILE *defines_file; + +static const char *pgm_path; +static const char *bif_path; +static const char *ovld_path; +static const char *header_path; +static const char *init_path; +static const char *defines_path; + +/* Position information. Note that "pos" is zero-indexed, but users + expect one-indexed column information, so representations of "pos" + as columns in diagnostic messages must be adjusted. */ +#define LINELEN 1024 +static char linebuf[LINELEN]; +static int line; +static int pos; + +/* Pointer to a diagnostic function. */ +static void (*diag) (const char *, ...) + __attribute__ ((format (printf, 1, 2))); + +/* Custom diagnostics. */ +static void __attribute__ ((format (printf, 1, 2))) +bif_diag (const char * fmt, ...) +{ + va_list args; + fprintf (stderr, "%s:%d: ", bif_path, line); + va_start (args, fmt); + vfprintf (stderr, fmt, args); + va_end (args); +} + +static void __attribute__ ((format (printf, 1, 2))) +ovld_diag (const char * fmt, ...) +{ + va_list args; + fprintf (stderr, "%s:%d: ", ovld_path, line); + va_start (args, fmt); + vfprintf (stderr, fmt, args); + va_end (args); +} -- cgit v1.1 From 9abd2ac5a9b694bcdd871165d109f94866032534 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 16 Jul 2021 12:21:08 -0400 Subject: rs6000: Add helper functions for parsing 2021-07-16 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (consume_whitespace): New function. (advance_line): Likewise. (safe_inc_pos): Likewise. (match_identifier): Likewise. (match_integer): Likewise. (match_to_right_bracket): Likewise. --- gcc/config/rs6000/rs6000-gen-builtins.c | 110 ++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index 3c53c34..7923cc4 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -210,3 +210,113 @@ ovld_diag (const char * fmt, ...) vfprintf (stderr, fmt, args); va_end (args); } + +/* Pass over whitespace (other than a newline, which terminates the scan). */ +static void +consume_whitespace (void) +{ + while (pos < LINELEN && isspace(linebuf[pos]) && linebuf[pos] != '\n') + pos++; + return; +} + +/* Get the next nonblank, noncomment line, returning 0 on EOF, 1 otherwise. */ +static int +advance_line (FILE *file) +{ + while (1) + { + /* Read ahead one line and check for EOF. */ + if (!fgets (linebuf, sizeof linebuf, file)) + return 0; + line++; + size_t len = strlen (linebuf); + if (linebuf[len - 1] != '\n') + (*diag) ("line doesn't terminate with newline\n"); + pos = 0; + consume_whitespace (); + if (linebuf[pos] != '\n' && linebuf[pos] != ';') + return 1; + } +} + +static inline void +safe_inc_pos (void) +{ + if (pos++ >= LINELEN) + { + (*diag) ("line length overrun.\n"); + exit (1); + } +} + +/* Match an identifier, returning NULL on failure, else a pointer to a + buffer containing the identifier. */ +static char * +match_identifier (void) +{ + int lastpos = pos - 1; + while (isalnum (linebuf[lastpos + 1]) || linebuf[lastpos + 1] == '_') + ++lastpos; + + if (lastpos < pos) + return 0; + + char *buf = (char *) malloc (lastpos - pos + 2); + memcpy (buf, &linebuf[pos], lastpos - pos + 1); + buf[lastpos - pos + 1] = '\0'; + + pos = lastpos + 1; + return buf; +} + +/* Match an integer and return the string representing its value, + or a null string on failure. */ +static char * +match_integer (void) +{ + int startpos = pos; + if (linebuf[pos] == '-') + safe_inc_pos (); + + int lastpos = pos - 1; + while (isdigit (linebuf[lastpos + 1])) + ++lastpos; + + if (lastpos < pos) + return NULL; + + pos = lastpos + 1; + char *buf = (char *) malloc (lastpos - startpos + 2); + memcpy (buf, &linebuf[startpos], lastpos - startpos + 1); + buf[lastpos - startpos + 1] = '\0'; + return buf; +} + +/* Match a string up to but not including a ']', and return its value, + or zero if there is nothing before the ']'. Error if we don't find + such a character. */ +static const char * +match_to_right_bracket (void) +{ + int lastpos = pos - 1; + while (linebuf[lastpos + 1] != ']') + { + if (linebuf[lastpos + 1] == '\n') + { + (*diag) ("no ']' found before end of line.\n"); + exit (1); + } + ++lastpos; + } + + if (lastpos < pos) + return 0; + + char *buf = (char *) malloc (lastpos - pos + 2); + memcpy (buf, &linebuf[pos], lastpos - pos + 1); + buf[lastpos - pos + 1] = '\0'; + + pos = lastpos + 1; + return buf; +} -- cgit v1.1 From 0d685dfbb603b631c0e1d121dd73e73d33573ec5 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 7 Jun 2021 11:49:56 -0500 Subject: rs6000: Add functions for matching types, part 1 of 3 2021-06-07 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (void_status): New enum. (basetype): Likewise. (typeinfo): Likewise. (handle_pointer): New function. (match_basetype): New stub function. (match_const_restriction): Likewise. (match_type): New function. --- gcc/config/rs6000/rs6000-gen-builtins.c | 367 ++++++++++++++++++++++++++++++++ 1 file changed, 367 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index 7923cc4..3845071 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -186,6 +186,52 @@ static char linebuf[LINELEN]; static int line; static int pos; +/* Used to determine whether a type can be void (only return types). */ +enum void_status +{ + VOID_NOTOK, + VOID_OK +}; + +/* Legal base types for an argument or return type. */ +enum basetype +{ + BT_CHAR, + BT_SHORT, + BT_INT, + BT_LONG, + BT_LONGLONG, + BT_FLOAT, + BT_DOUBLE, + BT_LONGDOUBLE, + BT_INT128, + BT_FLOAT128, + BT_BOOL, + BT_STRING, + BT_DECIMAL32, + BT_DECIMAL64, + BT_DECIMAL128, + BT_IBM128, + BT_VPAIR, + BT_VQUAD +}; + +/* Type modifiers for an argument or return type. */ +struct typeinfo +{ + char isvoid; + char isconst; + char isvector; + char issigned; + char isunsigned; + char isbool; + char ispixel; + char ispointer; + basetype base; + char *val1; + char *val2; +}; + /* Pointer to a diagnostic function. */ static void (*diag) (const char *, ...) __attribute__ ((format (printf, 1, 2))); @@ -320,3 +366,324 @@ match_to_right_bracket (void) pos = lastpos + 1; return buf; } + +static inline void +handle_pointer (typeinfo *typedata) +{ + consume_whitespace (); + if (linebuf[pos] == '*') + { + typedata->ispointer = 1; + safe_inc_pos (); + } +} + +/* Match one of the allowable base types. Consumes one token unless the + token is "long", which must be paired with a second "long". Optionally + consumes a following '*' token for pointers. Return 1 for success, + 0 for failure. */ +static int +match_basetype (typeinfo *typedata) +{ + return 1; +} + +/* A const int argument may be restricted to certain values. This is + indicated by one of the following occurring after the "int' token: + + restricts the constant to x bits, interpreted as unsigned + restricts the constant to the inclusive range [x,y] + [x,y] restricts the constant to the inclusive range [x,y], + but only applies if the argument is constant. + {x,y} restricts the constant to one of two values, x or y. + + Here x and y are integer tokens. Note that the "const" token is a + lie when the restriction is [x,y], but this simplifies the parsing + significantly and is hopefully forgivable. + + Return 1 for success, else 0. */ +static int +match_const_restriction (typeinfo *typedata) +{ + return 1; +} + +/* Look for a type, which can be terminated by a token that is not part of + a type, a comma, or a closing parenthesis. Place information about the + type in TYPEDATA. Return 1 for success, 0 for failure. */ +static int +match_type (typeinfo *typedata, int voidok) +{ + /* A legal type is of the form: + + [const] [[signed|unsigned] | ] [*] + + Legal values of are (for now): + + char + short + int + long + long double + long long + float + double + __int128 + _Float128 + bool + string + _Decimal32 + _Decimal64 + _Decimal128 + __ibm128 + + Legal values of are as follows, and are shorthand for + the associated meaning: + + vsc vector signed char + vuc vector unsigned char + vbc vector bool char + vss vector signed short + vus vector unsigned short + vbs vector bool short + vsi vector signed int + vui vector unsigned int + vbi vector bool int + vsll vector signed long long + vull vector unsigned long long + vbll vector bool long long + vsq vector signed __int128 + vuq vector unsigned __int128 + vbq vector bool __int128 + vp vector pixel + vf vector float + vd vector double + v256 __vector_pair + v512 __vector_quad + + For simplicity, We don't support "short int" and "long long int". + We don't currently support a of "_Float16". "signed" + and "unsigned" only apply to integral base types. The optional * + indicates a pointer type. */ + + consume_whitespace (); + memset (typedata, 0, sizeof *typedata); + int oldpos = pos; + + char *token = match_identifier (); + if (!token) + return 0; + + if (!strcmp (token, "const")) + { + typedata->isconst = 1; + consume_whitespace (); + oldpos = pos; + token = match_identifier (); + } + + if (!strcmp (token, "void")) + typedata->isvoid = 1; + + if (!strcmp (token, "vsc")) + { + typedata->isvector = 1; + typedata->issigned = 1; + typedata->base = BT_CHAR; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vuc")) + { + typedata->isvector = 1; + typedata->isunsigned = 1; + typedata->base = BT_CHAR; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vbc")) + { + typedata->isvector = 1; + typedata->isbool = 1; + typedata->base = BT_CHAR; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vss")) + { + typedata->isvector = 1; + typedata->issigned = 1; + typedata->base = BT_SHORT; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vus")) + { + typedata->isvector = 1; + typedata->isunsigned = 1; + typedata->base = BT_SHORT; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vbs")) + { + typedata->isvector = 1; + typedata->isbool = 1; + typedata->base = BT_SHORT; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vsi")) + { + typedata->isvector = 1; + typedata->issigned = 1; + typedata->base = BT_INT; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vui")) + { + typedata->isvector = 1; + typedata->isunsigned = 1; + typedata->base = BT_INT; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vbi")) + { + typedata->isvector = 1; + typedata->isbool = 1; + typedata->base = BT_INT; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vsll")) + { + typedata->isvector = 1; + typedata->issigned = 1; + typedata->base = BT_LONGLONG; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vull")) + { + typedata->isvector = 1; + typedata->isunsigned = 1; + typedata->base = BT_LONGLONG; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vbll")) + { + typedata->isvector = 1; + typedata->isbool = 1; + typedata->base = BT_LONGLONG; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vsq")) + { + typedata->isvector = 1; + typedata->issigned = 1; + typedata->base = BT_INT128; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vuq")) + { + typedata->isvector = 1; + typedata->isunsigned = 1; + typedata->base = BT_INT128; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vbq")) + { + typedata->isvector = 1; + typedata->isbool = 1; + typedata->base = BT_INT128; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vp")) + { + typedata->isvector = 1; + typedata->ispixel = 1; + typedata->base = BT_SHORT; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vf")) + { + typedata->isvector = 1; + typedata->base = BT_FLOAT; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "vd")) + { + typedata->isvector = 1; + typedata->base = BT_DOUBLE; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "v256")) + { + typedata->isvector = 1; + typedata->base = BT_VPAIR; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "v512")) + { + typedata->isvector = 1; + typedata->base = BT_VQUAD; + handle_pointer (typedata); + return 1; + } + else if (!strcmp (token, "signed")) + typedata->issigned = 1; + else if (!strcmp (token, "unsigned")) + typedata->isunsigned = 1; + else if (!typedata->isvoid && !typedata->isconst) + { + /* Push back token. */ + pos = oldpos; + return match_basetype (typedata); + } + + if (typedata->isvoid) + { + consume_whitespace (); + if (linebuf[pos] == '*') + { + typedata->ispointer = 1; + safe_inc_pos (); + } + else if (!voidok) + return 0; + return 1; + } + + if (!typedata->issigned && !typedata->isunsigned) + pos = oldpos; + if (!match_basetype (typedata)) + return 0; + + if (typedata->isconst) + { + if (typedata->ispointer) + return 1; + if (typedata->base != BT_INT) + { + (*diag)("'const' at %d requires pointer or integer type", + oldpos + 1); + return 0; + } + consume_whitespace (); + if (linebuf[pos] == '<' || linebuf[pos] == '{' || linebuf[pos] == '[') + return match_const_restriction (typedata); + } + + return 1; +} -- cgit v1.1 From 81736934dd7928f67dacb6f0a5656b8622b0ff46 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 2 Apr 2021 16:31:38 -0500 Subject: rs6000: Add functions for matching types, part 2 of 3 2021-04-02 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (match_basetype): Implement. --- gcc/config/rs6000/rs6000-gen-builtins.c | 64 +++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index 3845071..44d6e8a 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -385,6 +385,70 @@ handle_pointer (typeinfo *typedata) static int match_basetype (typeinfo *typedata) { + consume_whitespace (); + int oldpos = pos; + char *token = match_identifier (); + if (!token) + { + (*diag) ("missing base type in return type at column %d\n", pos + 1); + return 0; + } + + if (!strcmp (token, "char")) + typedata->base = BT_CHAR; + else if (!strcmp (token, "short")) + typedata->base = BT_SHORT; + else if (!strcmp (token, "int")) + typedata->base = BT_INT; + else if (!strcmp (token, "long")) + { + consume_whitespace (); + oldpos = pos; + char *mustbelongordbl = match_identifier (); + if (!mustbelongordbl) + typedata->base = BT_LONG; + else if (!strcmp (mustbelongordbl, "long")) + typedata->base = BT_LONGLONG; + else if (!strcmp (mustbelongordbl, "double")) + typedata->base = BT_LONGDOUBLE; + else + /* Speculatively accept "long" here and push back the token. + This occurs when "long" is a return type and the next token + is the function name. */ + { + typedata->base = BT_LONG; + pos = oldpos; + } + } + else if (!strcmp (token, "float")) + typedata->base = BT_FLOAT; + else if (!strcmp (token, "double")) + typedata->base = BT_DOUBLE; + else if (!strcmp (token, "__int128")) + typedata->base = BT_INT128; + else if (!strcmp (token, "_Float128")) + typedata->base = BT_FLOAT128; + else if (!strcmp (token, "bool")) + typedata->base = BT_BOOL; + /* A "string" is a special "const char *" -- we need it because it + cannot match either signed or unsigned char *. */ + else if (!strcmp (token, "string")) + typedata->base = BT_STRING; + else if (!strcmp (token, "_Decimal32")) + typedata->base = BT_DECIMAL32; + else if (!strcmp (token, "_Decimal64")) + typedata->base = BT_DECIMAL64; + else if (!strcmp (token, "_Decimal128")) + typedata->base = BT_DECIMAL128; + else if (!strcmp (token, "__ibm128")) + typedata->base = BT_IBM128; + else + { + (*diag) ("unrecognized base type at column %d\n", oldpos + 1); + return 0; + } + + handle_pointer (typedata); return 1; } -- cgit v1.1 From e11b02f17206af70f72a82ff1167a2676d5b18c9 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 7 Jun 2021 12:03:49 -0500 Subject: rs6000: Add functions for matching types, part 3 of 3 2021-06-07 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (restriction): New enum. (typeinfo): Add restr field. (match_bracketed_pair): New function. (match_const_restriction): Implement. --- gcc/config/rs6000/rs6000-gen-builtins.c | 115 +++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index 44d6e8a..34566fc 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -216,6 +216,22 @@ enum basetype BT_VQUAD }; +/* Ways in which a const int value can be restricted. RES_BITS indicates + that the integer is restricted to val1 bits, interpreted as an unsigned + number. RES_RANGE indicates that the integer is restricted to values + between val1 and val2, inclusive. RES_VAR_RANGE is like RES_RANGE, but + the argument may be variable, so it can only be checked if it is constant. + RES_VALUES indicates that the integer must have one of the values val1 + or val2. */ +enum restriction +{ + RES_NONE, + RES_BITS, + RES_RANGE, + RES_VAR_RANGE, + RES_VALUES +}; + /* Type modifiers for an argument or return type. */ struct typeinfo { @@ -228,6 +244,7 @@ struct typeinfo char ispixel; char ispointer; basetype base; + restriction restr; char *val1; char *val2; }; @@ -452,6 +469,53 @@ match_basetype (typeinfo *typedata) return 1; } +/* Helper routine for match_const_restriction. */ +static int +match_bracketed_pair (typeinfo *typedata, char open, char close, + restriction restr) +{ + if (linebuf[pos] == open) + { + safe_inc_pos (); + int oldpos = pos; + char *x = match_integer (); + if (x == NULL) + { + (*diag) ("malformed integer at column %d.\n", oldpos + 1); + return 0; + } + consume_whitespace (); + if (linebuf[pos] != ',') + { + (*diag) ("missing comma at column %d.\n", pos + 1); + return 0; + } + safe_inc_pos (); + consume_whitespace (); + oldpos = pos; + char *y = match_integer (); + if (y == NULL) + { + (*diag) ("malformed integer at column %d.\n", oldpos + 1); + return 0; + } + typedata->restr = restr; + typedata->val1 = x; + typedata->val2 = y; + + consume_whitespace (); + if (linebuf[pos] != close) + { + (*diag) ("malformed restriction at column %d.\n", pos + 1); + return 0; + } + safe_inc_pos (); + return 1; + } + + return 0; +} + /* A const int argument may be restricted to certain values. This is indicated by one of the following occurring after the "int' token: @@ -469,7 +533,56 @@ match_basetype (typeinfo *typedata) static int match_const_restriction (typeinfo *typedata) { - return 1; + int oldpos = pos; + if (linebuf[pos] == '<') + { + safe_inc_pos (); + oldpos = pos; + char *x = match_integer (); + if (x == NULL) + { + (*diag) ("malformed integer at column %d.\n", oldpos + 1); + return 0; + } + consume_whitespace (); + if (linebuf[pos] == '>') + { + typedata->restr = RES_BITS; + typedata->val1 = x; + safe_inc_pos (); + return 1; + } + else if (linebuf[pos] != ',') + { + (*diag) ("malformed restriction at column %d.\n", pos + 1); + return 0; + } + safe_inc_pos (); + oldpos = pos; + char *y = match_integer (); + if (y == NULL) + { + (*diag) ("malformed integer at column %d.\n", oldpos + 1); + return 0; + } + typedata->restr = RES_RANGE; + typedata->val1 = x; + typedata->val2 = y; + + consume_whitespace (); + if (linebuf[pos] != '>') + { + (*diag) ("malformed restriction at column %d.\n", pos + 1); + return 0; + } + safe_inc_pos (); + return 1; + } + else if (match_bracketed_pair (typedata, '{', '}', RES_VALUES) + || match_bracketed_pair (typedata, '[', ']', RES_VAR_RANGE)) + return 1; + + return 0; } /* Look for a type, which can be terminated by a token that is not part of -- cgit v1.1 From fa5f8b49e55caf5bb341f5eb6b5ab828b9286425 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 8 Jun 2021 09:33:40 -0500 Subject: rs6000: Red-black tree implementation for balanced tree search 2021-06-08 Bill Schmidt gcc/ * config/rs6000/rbtree.c: New file. * config/rs6000/rbtree.h: New file. --- gcc/config/rs6000/rbtree.c | 242 +++++++++++++++++++++++++++++++++++++++++++++ gcc/config/rs6000/rbtree.h | 52 ++++++++++ 2 files changed, 294 insertions(+) create mode 100644 gcc/config/rs6000/rbtree.c create mode 100644 gcc/config/rs6000/rbtree.h (limited to 'gcc') diff --git a/gcc/config/rs6000/rbtree.c b/gcc/config/rs6000/rbtree.c new file mode 100644 index 0000000..37a559c --- /dev/null +++ b/gcc/config/rs6000/rbtree.c @@ -0,0 +1,242 @@ +/* Partial red-black tree implementation for rs6000-gen-builtins.c. + Copyright (C) 2020-21 Free Software Foundation, Inc. + Contributed by Bill Schmidt, IBM + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include +#include +#include +#include +#include "rbtree.h" + +/* Initialize a red-black tree. */ +void +rbt_new (struct rbt_strings *t) +{ + t->rbt_nil = (rbt_string_node *) malloc (sizeof (rbt_string_node)); + t->rbt_nil->color = RBT_BLACK; + t->rbt_root = t->rbt_nil; +} + +/* Create a new node to be inserted into the red-black tree. An inserted + node starts out red. */ +static struct rbt_string_node * +rbt_create_node (struct rbt_strings *t, char *str) +{ + struct rbt_string_node *nodeptr + = (struct rbt_string_node *) malloc (sizeof (rbt_string_node)); + nodeptr->str = str; + nodeptr->left = t->rbt_nil; + nodeptr->right = t->rbt_nil; + nodeptr->par = NULL; + nodeptr->color = RBT_RED; + return nodeptr; +} + +/* Perform a left-rotate operation on NODE in the red-black tree. */ +static void +rbt_left_rotate (struct rbt_strings *t, struct rbt_string_node *node) +{ + struct rbt_string_node *right = node->right; + assert (right); + + /* Turn RIGHT's left subtree into NODE's right subtree. */ + node->right = right->left; + if (right->left != t->rbt_nil) + right->left->par = node; + + /* Link NODE's parent to RIGHT. */ + right->par = node->par; + + if (node->par == t->rbt_nil) + t->rbt_root = right; + else if (node == node->par->left) + node->par->left = right; + else + node->par->right = right; + + /* Put NODE on RIGHT's left. */ + right->left = node; + node->par = right; +} + +/* Perform a right-rotate operation on NODE in the red-black tree. */ +static void +rbt_right_rotate (struct rbt_strings *t, struct rbt_string_node *node) +{ + struct rbt_string_node *left = node->left; + assert (left); + + /* Turn LEFT's right subtree into NODE's left subtree. */ + node->left = left->right; + if (left->right != t->rbt_nil) + left->right->par = node; + + /* Link NODE's parent to LEFT. */ + left->par = node->par; + + if (node->par == t->rbt_nil) + t->rbt_root = left; + else if (node == node->par->right) + node->par->right = left; + else + node->par->left = left; + + /* Put NODE on LEFT's right. */ + left->right = node; + node->par = left; +} + +/* Insert STR into the tree, returning 1 for success and 0 if STR already + appears in the tree. */ +int +rbt_insert (struct rbt_strings *t, char *str) +{ + struct rbt_string_node *curr = t->rbt_root; + struct rbt_string_node *trail = t->rbt_nil; + + while (curr != t->rbt_nil) + { + trail = curr; + int cmp = strcmp (str, curr->str); + if (cmp < 0) + curr = curr->left; + else if (cmp > 0) + curr = curr->right; + else + return 0; + } + + struct rbt_string_node *fresh = rbt_create_node (t, str); + fresh->par = trail; + + if (trail == t->rbt_nil) + t->rbt_root = fresh; + else if (strcmp (fresh->str, trail->str) < 0) + trail->left = fresh; + else + trail->right = fresh; + + fresh->left = t->rbt_nil; + fresh->right = t->rbt_nil; + + /* FRESH has now been inserted as a red leaf. If we have invalidated + one of the following preconditions, we must fix things up: + (a) If a node is red, both of its children are black. + (b) The root must be black. + Note that only (a) or (b) applies at any given time during the + process. This algorithm works up the tree from NEW looking + for a red child with a red parent, and cleaning that up. If the + root ends up red, it gets turned black at the end. */ + curr = fresh; + while (curr->par->color == RBT_RED) + if (curr->par == curr->par->par->left) + { + struct rbt_string_node *uncle = curr->par->par->right; + if (uncle->color == RBT_RED) + { + curr->par->color = RBT_BLACK; + uncle->color = RBT_BLACK; + curr->par->par->color = RBT_RED; + curr = curr->par->par; + } + else if (curr == curr->par->right) + { + curr = curr->par; + rbt_left_rotate (t, curr); + } + else + { + curr->par->color = RBT_BLACK; + curr->par->par->color = RBT_RED; + rbt_right_rotate (t, curr->par->par); + } + } + else /* curr->par == curr->par->par->right */ + { + /* Gender-neutral formations are awkward, so let's be fair. ;-) + ("Parent-sibling" is just awful.) */ + struct rbt_string_node *aunt = curr->par->par->left; + if (aunt->color == RBT_RED) + { + curr->par->color = RBT_BLACK; + aunt->color = RBT_BLACK; + curr->par->par->color = RBT_RED; + curr = curr->par->par; + } + else if (curr == curr->par->left) + { + curr = curr->par; + rbt_right_rotate (t, curr); + } + else + { + curr->par->color = RBT_BLACK; + curr->par->par->color = RBT_RED; + rbt_left_rotate (t, curr->par->par); + } + } + + t->rbt_root->color = RBT_BLACK; + return 1; +} + +/* Return 1 if STR is in the red-black tree, else 0. */ +int +rbt_find (struct rbt_strings *t, char *str) +{ + struct rbt_string_node *curr = t->rbt_root; + + while (curr != t->rbt_nil) + { + int cmp = strcmp (str, curr->str); + if (cmp < 0) + curr = curr->left; + else if (cmp > 0) + curr = curr->right; + else + return 1; + } + + return 0; +} + +/* Inorder dump of the binary search tree. */ +void +rbt_dump (struct rbt_strings *t, struct rbt_string_node *subtree) +{ + if (subtree != t->rbt_nil) + { + rbt_dump (t, subtree->left); + fprintf (stderr, "%s\n", subtree->str); + rbt_dump (t, subtree->right); + } +} + +/* Inorder call-back for iteration over the tree. */ +void +rbt_inorder_callback (struct rbt_strings *t, struct rbt_string_node *subtree, + void (*fn) (char *)) +{ + if (subtree != t->rbt_nil) + { + rbt_inorder_callback (t, subtree->left, fn); + (*fn) (subtree->str); + rbt_inorder_callback (t, subtree->right, fn); + } +} diff --git a/gcc/config/rs6000/rbtree.h b/gcc/config/rs6000/rbtree.h new file mode 100644 index 0000000..fab0001 --- /dev/null +++ b/gcc/config/rs6000/rbtree.h @@ -0,0 +1,52 @@ +/* Partial red-black tree implementation for rs6000-gen-builtins.c. + Copyright (C) 2020-21 Free Software Foundation, Inc. + Contributed by Bill Schmidt, IBM + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Red-black binary search tree on strings. Presently we don't support + deletes; only insert/find operations are implemented. */ +enum rbt_color + { + RBT_BLACK, + RBT_RED + }; + +struct rbt_string_node { + char *str; + struct rbt_string_node *left; + struct rbt_string_node *right; + struct rbt_string_node *par; + enum rbt_color color; +}; + +/* Root and sentinel nodes of a red-black tree. + rbt_nil points to a sentinel node, which is the parent of root + and the child of every node without a "real" left or right child. + rbt_root points to the root of the tree, if it exists yet. The + root and sentinel nodes are always black. */ +struct rbt_strings { + struct rbt_string_node *rbt_nil; + struct rbt_string_node *rbt_root; +}; + +void rbt_new (struct rbt_strings *); +int rbt_insert (struct rbt_strings *, char *); +int rbt_find (struct rbt_strings *, char *); +void rbt_dump (struct rbt_strings *, struct rbt_string_node *); +void rbt_inorder_callback (struct rbt_strings *, struct rbt_string_node *, + void (*) (char *)); -- cgit v1.1 From 94ba897be8b59ef5926eed4c77fd53812fb20add Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Fri, 16 Jul 2021 11:08:58 -0600 Subject: PR testsuite/101468 - Wstringop-overflow tests failures gcc/testsuite/ChangeLog: PR testsuite/101468 * gcc.dg/Wstringop-overflow-41.c: Adjust to avoid target-specific failures. * gcc.dg/Wstringop-overflow-42.c: Same. * gcc.dg/Wstringop-overflow-68.c: Same. * gcc.dg/Wstringop-overflow-70.c: Same. * gcc.dg/Wstringop-overflow-71.c: Same. * gcc.dg/strlenopt-95.c: Fix typos. --- gcc/testsuite/gcc.dg/Wstringop-overflow-41.c | 3 ++- gcc/testsuite/gcc.dg/Wstringop-overflow-42.c | 12 ++++++------ gcc/testsuite/gcc.dg/Wstringop-overflow-68.c | 12 ++++++------ gcc/testsuite/gcc.dg/Wstringop-overflow-70.c | 5 ++++- gcc/testsuite/gcc.dg/Wstringop-overflow-71.c | 22 +++++++++++++++------- gcc/testsuite/gcc.dg/strlenopt-95.c | 8 ++++---- 6 files changed, 37 insertions(+), 25 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-41.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-41.c index 9b2d2cb..e255e67 100644 --- a/gcc/testsuite/gcc.dg/Wstringop-overflow-41.c +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-41.c @@ -29,7 +29,8 @@ void char_array_cst_off_cst_size (void) sink (p); ++idx; - memset (p + idx, 0, 3); // { dg-warning "writing 3 bytes into a region of size 1" "pr?????" { xfail ilp32 } } + memset (p + idx, 0, 3); // { dg-warning "writing 3 bytes into a region of size 1" } + sink (p); ++idx; memset (p + idx, 0, 3); // { dg-warning "writing 3 bytes into a region of size 0" } diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-42.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-42.c index 4bb22f2..8527eea 100644 --- a/gcc/testsuite/gcc.dg/Wstringop-overflow-42.c +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-42.c @@ -23,21 +23,21 @@ void cpy_ui_1_max (unsigned i, const char *s) { if (i < 1) i = 1; d = strcpy (a + i, s); // { dg-warning "writing 1 or more bytes into a region of size 0" } - d = strcpy (a + i + 1, s); // { dg-warning "writing 1 or more bytes into a region of size 0" "" { xfail ilp32 } } + d = strcpy (a + i + 1, s); // { dg-warning "writing 1 or more bytes into a region of size 0" "" { xfail { ! lp64 } } } } void cpy_sl_1_max (long i, const char *s) { if (i < 1) i = 1; - d = strcpy (a + i, s); // { dg-warning "writing 1 or more bytes into a region of size 0" } - d = strcpy (a + i + 1, s); // { dg-warning "writing 1 or more bytes into a region of size 0" } + d = strcpy (a + i, s); // { dg-warning "writing 1 or more bytes into a region of size 0" "" { target { ! ptr_eq_short } } } + d = strcpy (a + i + 1, s); // { dg-warning "writing 1 or more bytes into a region of size 0" "" { target { ! ptr_eq_short } } } } void cpy_ul_1_max (unsigned long i, const char *s) { if (i < 1) i = 1; - d = strcpy (a + i, s); // { dg-warning "writing 1 or more bytes into a region of size 0" } + d = strcpy (a + i, s); // { dg-warning "writing 1 or more bytes into a region of size 0" "" { target { ! ptr_eq_short } } } /* Because of integer wraparound the offset's range is [1, 0] so the overflow isn't diagnosed (yet). */ @@ -56,7 +56,7 @@ void cpy_si_min_m1 (int i, const char *s) void cpy_sl_min_m1 (long i, const char *s) { if (i > -1) i = -1; - d = strcpy (a + i - 1, s); // { dg-warning "writing 1 or more bytes into a region of size 0" } - d = strcpy (a + i, s); // { dg-warning "writing 1 or more bytes into a region of size 0" } + d = strcpy (a + i - 1, s); // { dg-warning "writing 1 or more bytes into a region of size 0" "" { target { ! ptr_eq_short } } } + d = strcpy (a + i, s); // { dg-warning "writing 1 or more bytes into a region of size 0" "" { target { ! ptr_eq_short } } } d = strcpy (a + i + 2, s); } diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-68.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-68.c index d2d3ae5..6bcba27 100644 --- a/gcc/testsuite/gcc.dg/Wstringop-overflow-68.c +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-68.c @@ -57,12 +57,12 @@ void warn_comp_lit_zero (void) void warn_comp_lit (void) { - *(AC2*)a1 = Ac2; // { dg-warning "writing 2 bytes into a region of size 1" "pr??????" { xfail *-*-* } } - *(AC4*)a2 = Ac4; // { dg-warning "writing 4 bytes into a region of size 2" "pr??????" { xfail *-*-* } } - *(AC4*)a3 = Ac4; // { dg-warning "writing 4 bytes into a region of size 3" "pr??????" { xfail *-*-* } } - *(AC8*)a4 = Ac8; // { dg-warning "writing 8 bytes into a region of size 4" "pr??????" { xfail *-*-* } } - *(AC8*)a7 = Ac8; // { dg-warning "writing 8 bytes into a region of size 7" "pr??????" { xfail *-*-* } } - *(AC16*)a15 = Ac16; // { dg-warning "writing 16 bytes into a region of size 15" "pr??????" { xfail *-*-* } } + *(AC2*)a1 = Ac2; // { dg-warning "writing 2 bytes into a region of size 1" "pr101475" { xfail *-*-* } } + *(AC4*)a2 = Ac4; // { dg-warning "writing 4 bytes into a region of size 2" "pr101475" { xfail *-*-* } } + *(AC4*)a3 = Ac4; // { dg-warning "writing 4 bytes into a region of size 3" "pr101475" { xfail *-*-* } } + *(AC8*)a4 = Ac8; // { dg-warning "writing 8 bytes into a region of size 4" "pr101475" { xfail *-*-* } } + *(AC8*)a7 = Ac8; // { dg-warning "writing 8 bytes into a region of size 7" "pr101475" { xfail *-*-* } } + *(AC16*)a15 = Ac16; // { dg-warning "writing 16 bytes into a region of size 15" "pr101475" { xfail *-*-* } } } void warn_aggr_decl (void) diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-70.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-70.c index 5d8bfa9..82c4d9f 100644 --- a/gcc/testsuite/gcc.dg/Wstringop-overflow-70.c +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-70.c @@ -16,6 +16,9 @@ void* warn_loop (void) { char *p = __builtin_malloc (15); for (int i = 0; i != 16; ++i) - p[i] = i; // { dg-warning "writing 16 bytes into a region of size 15" } + /* The size of the write below depends on the target. When vectorized + the vector size may be 4 or 16, otherwise it may be a series of byte + assignments. */ + p[i] = i; // { dg-warning "writing (1|2|4|16) bytes? into a region of size (0|1|3|15)" } return p; } diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-71.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-71.c index dccee35..f56a005 100644 --- a/gcc/testsuite/gcc.dg/Wstringop-overflow-71.c +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-71.c @@ -12,7 +12,15 @@ extern int abs (int); extern void* alloca (size_t); extern double nan (const char *); -_Decimal32 nand32 (const char *); + +#ifdef __DEC32_MAX__ + _Decimal32 nand32 (const char *); +#else +/* _Decimal32 is supported only conditionally and not available on all + targets. */ +# define _Decimal32 double +# define nand32(s) nan (s) +#endif extern size_t strlen (const char *); extern char* strcpy (char *, const char *); @@ -56,23 +64,23 @@ void warn_complex (double x, double i) } -void nowarn_nan (const char *s) +__attribute__ ((noipa)) void nowarn_nan (const char *s) { *(double *)ax = nan (s); } -void warn_nan (const char *s) +__attribute__ ((noipa)) void warn_nan (const char *s) { *(double *)a1 = nan (s); // { dg-warning "\\\[-Wstringop-overflow" } } -void nowarn_nand32 (const char *s) +__attribute__ ((noipa)) void nowarn_nand32 (const char *s) { *(_Decimal32 *)ax = nand32 (s); } -void warn_nand32 (const char *s) +__attribute__ ((noipa)) void warn_nand32 (const char *s) { *(_Decimal32 *)a1 = nand32 (s); // { dg-warning "\\\[-Wstringop-overflow" } } @@ -88,7 +96,7 @@ void nowarn_strlen (const char *s1, const char *s2, const char *s3) void warn_strlen (const char *s1, const char *s2) { *(int16_t *)a1 = strlen (s1); // { dg-warning "\\\[-Wstringop-overflow" } - *(size_t *)a2 = strlen (s2); // { dg-warning "\\\[-Wstringop-overflow" } + *(size_t *)a2 = strlen (s2); // { dg-warning "\\\[-Wstringop-overflow" "!ptr_eq_short" { target { ! ptr_eq_short } } } } @@ -101,5 +109,5 @@ void nowarn_strcpy (char *s1, char *s2, const char *s3) void warn_strcpy (char *s1, char *s2, const char *s3) { *(char **)a1 = strcpy (s1, s2); // { dg-warning "\\\[-Wstringop-overflow" } - *(char **)a2 = strcpy (s2, s3); // { dg-warning "\\\[-Wstringop-overflow" } + *(char **)a2 = strcpy (s2, s3); // { dg-warning "\\\[-Wstringop-overflow" "!ptr_eq_short" { target { ! ptr_eq_short } } } } diff --git a/gcc/testsuite/gcc.dg/strlenopt-95.c b/gcc/testsuite/gcc.dg/strlenopt-95.c index 505bc99..6e0a79d 100644 --- a/gcc/testsuite/gcc.dg/strlenopt-95.c +++ b/gcc/testsuite/gcc.dg/strlenopt-95.c @@ -1,6 +1,6 @@ /* Verify strlen results of vector assignments. { dg-do compile } - { dg-options "-O2 -Wall" } */ + { dg-options "-O2 -Wall -fdump-tree-optimized" } */ #include "strlenopt.h" @@ -52,7 +52,7 @@ void test_fold (int i) *(VC8*)a = (VC8){ 1, 2, 3, 0, 5, 6 }; A (strlen (a) == 3); - *(VC8*)a = (VC8){ 1, 2, 3, 0, 5, 6, 7 }; + *(VC8*)a = (VC8){ 1, 2, 3, 0, 5, 6, 7, 8 }; A (strlen (a) == 3); A (strlen (a + 1) == 2); A (strlen (a + 2) == 1); @@ -61,5 +61,5 @@ void test_fold (int i) A (a[4] == 5 && a[5] == 6 && a[6] == 7 && a[7] == 8); } -/* { dg-final { scan-tree-dump-not "abort \\(" "strlen1" } } - { dg-final { scan-tree-dump-not "strlen \\(" "strlen1" } } */ +/* { dg-final { scan-tree-dump-not "abort \\(" "optimized" } } + { dg-final { scan-tree-dump-not "strlen \\(" "optimized" } } */ -- cgit v1.1 From e06b1c5ac00b1bd0339739d3d9377c90852a83c9 Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Thu, 15 Jul 2021 18:07:09 -0700 Subject: Fix PR 101453: ICE with optimize and large integer constant The problem is the buffer is too small to hold "-O" and the interger. This fixes the problem by use the correct size instead. Changes since v1: * v2: Use HOST_BITS_PER_LONG and just divide by 3 instead of 3.32. OK? Bootstrapped and tested on x86_64-linux with no regressions. gcc/c-family/ChangeLog: PR c/101453 * c-common.c (parse_optimize_options): Use the correct size for buffer. --- gcc/c-family/c-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c index 681fcc9..fe3657b 100644 --- a/gcc/c-family/c-common.c +++ b/gcc/c-family/c-common.c @@ -5798,7 +5798,7 @@ parse_optimize_options (tree args, bool attr_p) if (TREE_CODE (value) == INTEGER_CST) { - char buffer[20]; + char buffer[HOST_BITS_PER_LONG / 3 + 4]; sprintf (buffer, "-O%ld", (long) TREE_INT_CST_LOW (value)); vec_safe_push (optimize_args, ggc_strdup (buffer)); } -- cgit v1.1 From 5932dd35eaa816e8d9b6406c6c433395ff5b6162 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Fri, 16 Jul 2021 15:45:33 -0400 Subject: analyzer: add svalue::maybe_get_region gcc/analyzer/ChangeLog: * program-state.cc (program_state::detect_leaks): Simplify using svalue::maybe_get_region. * region-model-impl-calls.cc (region_model::impl_call_fgets): Likewise. (region_model::impl_call_fread): Likewise. (region_model::impl_call_free): Likewise. (region_model::impl_call_operator_delete): Likewise. * region-model.cc (selftest::test_stack_frames): Likewise. (selftest::test_state_merging): Likewise. * svalue.cc (svalue::maybe_get_region): New. * svalue.h (svalue::maybe_get_region): New decl. Signed-off-by: David Malcolm --- gcc/analyzer/program-state.cc | 9 +++------ gcc/analyzer/region-model-impl-calls.cc | 16 ++++------------ gcc/analyzer/region-model.cc | 5 ++--- gcc/analyzer/svalue.cc | 12 ++++++++++++ gcc/analyzer/svalue.h | 1 + 5 files changed, 22 insertions(+), 21 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc index 23cfcb0..cc53aef 100644 --- a/gcc/analyzer/program-state.cc +++ b/gcc/analyzer/program-state.cc @@ -1285,12 +1285,9 @@ program_state::detect_leaks (const program_state &src_state, /* Purge dead heap-allocated regions from dynamic extents. */ for (const svalue *sval : dead_svals) - if (const region_svalue *region_sval = sval->dyn_cast_region_svalue ()) - { - const region *reg = region_sval->get_pointee (); - if (reg->get_kind () == RK_HEAP_ALLOCATED) - dest_state.m_region_model->unset_dynamic_extents (reg); - } + if (const region *reg = sval->maybe_get_region ()) + if (reg->get_kind () == RK_HEAP_ALLOCATED) + dest_state.m_region_model->unset_dynamic_extents (reg); } #if CHECKING_P diff --git a/gcc/analyzer/region-model-impl-calls.cc b/gcc/analyzer/region-model-impl-calls.cc index 4be6550..efb0fc8 100644 --- a/gcc/analyzer/region-model-impl-calls.cc +++ b/gcc/analyzer/region-model-impl-calls.cc @@ -325,10 +325,8 @@ region_model::impl_call_fgets (const call_details &cd) /* Ideally we would bifurcate state here between the error vs no error cases. */ const svalue *ptr_sval = cd.get_arg_svalue (0); - if (const region_svalue *ptr_to_region_sval - = ptr_sval->dyn_cast_region_svalue ()) + if (const region *reg = ptr_sval->maybe_get_region ()) { - const region *reg = ptr_to_region_sval->get_pointee (); const region *base_reg = reg->get_base_region (); const svalue *new_sval = cd.get_or_create_conjured_svalue (base_reg); purge_state_involving (new_sval, cd.get_ctxt ()); @@ -342,10 +340,8 @@ void region_model::impl_call_fread (const call_details &cd) { const svalue *ptr_sval = cd.get_arg_svalue (0); - if (const region_svalue *ptr_to_region_sval - = ptr_sval->dyn_cast_region_svalue ()) + if (const region *reg = ptr_sval->maybe_get_region ()) { - const region *reg = ptr_to_region_sval->get_pointee (); const region *base_reg = reg->get_base_region (); const svalue *new_sval = cd.get_or_create_conjured_svalue (base_reg); purge_state_involving (new_sval, cd.get_ctxt ()); @@ -372,12 +368,10 @@ void region_model::impl_call_free (const call_details &cd) { const svalue *ptr_sval = cd.get_arg_svalue (0); - if (const region_svalue *ptr_to_region_sval - = ptr_sval->dyn_cast_region_svalue ()) + if (const region *freed_reg = ptr_sval->maybe_get_region ()) { /* If the ptr points to an underlying heap region, delete it, poisoning pointers. */ - const region *freed_reg = ptr_to_region_sval->get_pointee (); unbind_region_and_descendents (freed_reg, POISON_KIND_FREED); m_dynamic_extents.remove (freed_reg); } @@ -472,12 +466,10 @@ bool region_model::impl_call_operator_delete (const call_details &cd) { const svalue *ptr_sval = cd.get_arg_svalue (0); - if (const region_svalue *ptr_to_region_sval - = ptr_sval->dyn_cast_region_svalue ()) + if (const region *freed_reg = ptr_sval->maybe_get_region ()) { /* If the ptr points to an underlying heap region, delete it, poisoning pointers. */ - const region *freed_reg = ptr_to_region_sval->get_pointee (); unbind_region_and_descendents (freed_reg, POISON_KIND_FREED); } return false; diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 3fe2cce..190c852 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -4541,7 +4541,7 @@ test_stack_frames () renumbering. */ const svalue *new_q_sval = model.get_rvalue (q, &ctxt); ASSERT_EQ (new_q_sval->get_kind (), SK_REGION); - ASSERT_EQ (new_q_sval->dyn_cast_region_svalue ()->get_pointee (), + ASSERT_EQ (new_q_sval->maybe_get_region (), model.get_lvalue (p, &ctxt)); /* Verify that top of stack has been updated. */ @@ -5070,8 +5070,7 @@ test_state_merging () model0.set_value (q_in_first_frame, sval_ptr, NULL); /* Verify that it's pointing at the newer frame. */ - const region *reg_pointee - = sval_ptr->dyn_cast_region_svalue ()->get_pointee (); + const region *reg_pointee = sval_ptr->maybe_get_region (); ASSERT_EQ (reg_pointee->get_parent_region (), reg_2nd_frame); model0.canonicalize (); diff --git a/gcc/analyzer/svalue.cc b/gcc/analyzer/svalue.cc index 22da769..fa9a862 100644 --- a/gcc/analyzer/svalue.cc +++ b/gcc/analyzer/svalue.cc @@ -111,6 +111,18 @@ svalue::maybe_get_constant () const return NULL_TREE; } +/* If this svalue is a region_svalue, return the region it points to. + Otherwise return NULL. */ + +const region * +svalue::maybe_get_region () const +{ + if (const region_svalue *region_sval = dyn_cast_region_svalue ()) + return region_sval->get_pointee (); + else + return NULL; +} + /* If this svalue is a cast (i.e a unaryop NOP_EXPR or VIEW_CONVERT_EXPR), return the underlying svalue. Otherwise return NULL. */ diff --git a/gcc/analyzer/svalue.h b/gcc/analyzer/svalue.h index 20d7cf8..1519889 100644 --- a/gcc/analyzer/svalue.h +++ b/gcc/analyzer/svalue.h @@ -126,6 +126,7 @@ public: dyn_cast_conjured_svalue () const { return NULL; } tree maybe_get_constant () const; + const region *maybe_get_region () const; const svalue *maybe_undo_cast () const; const svalue *unwrap_any_unmergeable () const; -- cgit v1.1 From 9ea10c480565fa42b1804fb436f7e26ca77b71a3 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Fri, 16 Jul 2021 15:47:06 -0400 Subject: analyzer: add __analyzer_dump_state gcc/analyzer/ChangeLog: * engine.cc (exploded_node::on_stmt_pre): Handle __analyzer_dump_state. * program-state.cc (extrinsic_state::get_sm_idx_by_name): New. (program_state::impl_call_analyzer_dump_state): New. * program-state.h (extrinsic_state::get_sm_idx_by_name): New decl. (program_state::impl_call_analyzer_dump_state): New decl. * region-model-impl-calls.cc (call_details::get_arg_string_literal): New. * region-model.h (call_details::get_arg_string_literal): New decl. gcc/ChangeLog: * doc/analyzer.texi: Add __analyzer_dump_state. gcc/testsuite/ChangeLog: * gcc.dg/analyzer/analyzer-decls.h (__analyzer_dump_state): New. * gcc.dg/analyzer/dump-state.c: New test. Signed-off-by: David Malcolm --- gcc/analyzer/engine.cc | 3 ++ gcc/analyzer/program-state.cc | 49 ++++++++++++++++++++++++++ gcc/analyzer/program-state.h | 6 ++++ gcc/analyzer/region-model-impl-calls.cc | 18 ++++++++++ gcc/analyzer/region-model.h | 1 + gcc/doc/analyzer.texi | 9 +++++ gcc/testsuite/gcc.dg/analyzer/analyzer-decls.h | 5 +++ gcc/testsuite/gcc.dg/analyzer/dump-state.c | 14 ++++++++ 8 files changed, 105 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/analyzer/dump-state.c (limited to 'gcc') diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 7662a7f..f9fc581 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -1270,6 +1270,9 @@ exploded_node::on_stmt_pre (exploded_graph &eg, state->dump (eg.get_ext_state (), true); return; } + else if (is_special_named_call_p (call, "__analyzer_dump_state", 2)) + state->impl_call_analyzer_dump_state (call, eg.get_ext_state (), + ctxt); else if (is_setjmp_call_p (call)) { state->m_region_model->on_setjmp (call, this, ctxt); diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc index cc53aef..3081217 100644 --- a/gcc/analyzer/program-state.cc +++ b/gcc/analyzer/program-state.cc @@ -131,6 +131,27 @@ extrinsic_state::get_model_manager () const return NULL; /* for selftests. */ } +/* Try to find a state machine named NAME. + If found, return true and write its index to *OUT. + Otherwise return false. */ + +bool +extrinsic_state::get_sm_idx_by_name (const char *name, unsigned *out) const +{ + unsigned i; + state_machine *sm; + FOR_EACH_VEC_ELT (m_checkers, i, sm) + if (0 == strcmp (name, sm->get_name ())) + { + /* Found NAME. */ + *out = i; + return true; + } + + /* NAME not found. */ + return false; +} + /* struct sm_state_map::entry_t. */ int @@ -1290,6 +1311,34 @@ program_state::detect_leaks (const program_state &src_state, dest_state.m_region_model->unset_dynamic_extents (reg); } +/* Handle calls to "__analyzer_dump_state". */ + +void +program_state::impl_call_analyzer_dump_state (const gcall *call, + const extrinsic_state &ext_state, + region_model_context *ctxt) +{ + call_details cd (call, m_region_model, ctxt); + const char *sm_name = cd.get_arg_string_literal (0); + if (!sm_name) + { + error_at (call->location, "cannot determine state machine"); + return; + } + unsigned sm_idx; + if (!ext_state.get_sm_idx_by_name (sm_name, &sm_idx)) + { + error_at (call->location, "unrecognized state machine %qs", sm_name); + return; + } + const sm_state_map *smap = m_checker_states[sm_idx]; + + const svalue *sval = cd.get_arg_svalue (1); + + state_machine::state_t state = smap->get_state (sval, ext_state); + warning_at (call->location, 0, "state: %qs", state->get_name ()); +} + #if CHECKING_P namespace selftest { diff --git a/gcc/analyzer/program-state.h b/gcc/analyzer/program-state.h index f16fe6b..8dee930 100644 --- a/gcc/analyzer/program-state.h +++ b/gcc/analyzer/program-state.h @@ -58,6 +58,8 @@ public: engine *get_engine () const { return m_engine; } region_model_manager *get_model_manager () const; + bool get_sm_idx_by_name (const char *name, unsigned *out) const; + private: /* The state machines. */ auto_delete_vec &m_checkers; @@ -256,6 +258,10 @@ public: const extrinsic_state &ext_state, region_model_context *ctxt); + void impl_call_analyzer_dump_state (const gcall *call, + const extrinsic_state &ext_state, + region_model_context *ctxt); + /* TODO: lose the pointer here (const-correctness issues?). */ region_model *m_region_model; auto_delete_vec m_checker_states; diff --git a/gcc/analyzer/region-model-impl-calls.cc b/gcc/analyzer/region-model-impl-calls.cc index efb0fc8..545634b 100644 --- a/gcc/analyzer/region-model-impl-calls.cc +++ b/gcc/analyzer/region-model-impl-calls.cc @@ -140,6 +140,24 @@ call_details::get_arg_svalue (unsigned idx) const return m_model->get_rvalue (arg, m_ctxt); } +/* Attempt to get the string literal for argument IDX, or return NULL + otherwise. + For use when implementing "__analyzer_*" functions that take + string literals. */ + +const char * +call_details::get_arg_string_literal (unsigned idx) const +{ + const svalue *str_arg = get_arg_svalue (idx); + if (const region *pointee = str_arg->maybe_get_region ()) + if (const string_region *string_reg = pointee->dyn_cast_string_region ()) + { + tree string_cst = string_reg->get_string_cst (); + return TREE_STRING_POINTER (string_cst); + } + return NULL; +} + /* Dump a multiline representation of this call to PP. */ void diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index 71f6b3e..f07a287 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -468,6 +468,7 @@ public: tree get_arg_tree (unsigned idx) const; tree get_arg_type (unsigned idx) const; const svalue *get_arg_svalue (unsigned idx) const; + const char *get_arg_string_literal (unsigned idx) const; void dump_to_pp (pretty_printer *pp, bool simple) const; void dump (bool simple) const; diff --git a/gcc/doc/analyzer.texi b/gcc/doc/analyzer.texi index 2ca4bf6..aadb0de 100644 --- a/gcc/doc/analyzer.texi +++ b/gcc/doc/analyzer.texi @@ -522,6 +522,15 @@ it will also dump all of the states within the ``processed'' nodes. will dump the region_model's state to stderr. @smallexample +__analyzer_dump_state ("malloc", ptr); +@end smallexample + +will emit a warning describing the state of the 2nd argument +(which can be of any type) with respect to the state machine with +a name matching the 1st argument (which must be a string literal). +This is for use when debugging, and may be of use in DejaGnu tests. + +@smallexample __analyzer_eval (expr); @end smallexample will emit a warning with text "TRUE", FALSE" or "UNKNOWN" based on the diff --git a/gcc/testsuite/gcc.dg/analyzer/analyzer-decls.h b/gcc/testsuite/gcc.dg/analyzer/analyzer-decls.h index 2446693..e8745c0 100644 --- a/gcc/testsuite/gcc.dg/analyzer/analyzer-decls.h +++ b/gcc/testsuite/gcc.dg/analyzer/analyzer-decls.h @@ -35,6 +35,11 @@ extern void __analyzer_dump_path (void); /* Dump the region_model's state to stderr. */ extern void __analyzer_dump_region_model (void); +/* Emit a warning describing the state of the 2nd argument + (which can be of any type) with respect to NAME. + This is for use when debugging, and may be of use in DejaGnu tests. */ +extern void __analyzer_dump_state (const char *name, ...); + /* Emit a warning with text "TRUE", FALSE" or "UNKNOWN" based on the truthfulness of the argument. */ extern void __analyzer_eval (int); diff --git a/gcc/testsuite/gcc.dg/analyzer/dump-state.c b/gcc/testsuite/gcc.dg/analyzer/dump-state.c new file mode 100644 index 0000000..618a5a9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/dump-state.c @@ -0,0 +1,14 @@ +/* Verify that __analyzer_dump_state works as expected. */ + +#include +#include "analyzer-decls.h" + +void test_1 (void) +{ + void *p = malloc (1024); + __analyzer_dump_state ("malloc", p); /* { dg-warning "state: 'unchecked'" } */ + free (p); + __analyzer_dump_state ("malloc", p); /* { dg-warning "state: 'freed'" } */ + __analyzer_dump_state (NULL, p); /* { dg-error "cannot determine state machine" } */ + __analyzer_dump_state ("not a state machine", p); /* { dg-error "unrecognized state machine 'not a state machine'" } */ +} -- cgit v1.1 From 9faf8348621ae6ab583af593d67ac424300a2bad Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Fri, 16 Jul 2021 15:49:17 -0400 Subject: analyzer: add region_model::check_region_access I've been experimenting with various new diagnostics that require a common place for the analyzer to check the validity of reads or writes to memory (e.g. buffer overflow). As preliminary work, this patch adds new region_model::check_region_for_{read|write} functions which are called anywhere that the analyzer "sees" memory being read from or written to (via region_model::get_store_value and region_model::set_value). This takes over the hardcoded calls to check_for_writable_region (allowing for other kinds of checks on writes); checking reads is currently a no-op. gcc/analyzer/ChangeLog: * analyzer.h (enum access_direction): New. * engine.cc (exploded_node::on_longjmp): Update for new param of get_store_value. * program-state.cc (program_state::prune_for_point): Likewise. * region-model-impl-calls.cc (region_model::impl_call_memcpy): Replace call to check_for_writable_region with call to check_region_for_write. (region_model::impl_call_memset): Likewise. (region_model::impl_call_strcpy): Likewise. * region-model-reachability.cc (reachable_regions::add): Update for new param of get_store_value. * region-model.cc (region_model::get_rvalue_1): Likewise, also for get_rvalue_for_bits. (region_model::get_store_value): Add ctxt param and use it to call check_region_for_read. (region_model::get_rvalue_for_bits): Add ctxt param and use it to call get_store_value. (region_model::check_region_access): New. (region_model::check_region_for_write): New. (region_model::check_region_for_read): New. (region_model::set_value): Update comment. Replace call to check_for_writable_region with call to check_region_for_write. * region-model.h (region_model::get_rvalue_for_bits): Add ctxt param. (region_model::get_store_value): Add ctxt param. (region_model::check_region_access): New decl. (region_model::check_region_for_write): New decl. (region_model::check_region_for_read): New decl. * region.cc (region_model::copy_region): Update call to get_store_value. * svalue.cc (initial_svalue::implicitly_live_p): Likewise. Signed-off-by: David Malcolm --- gcc/analyzer/analyzer.h | 8 ++++ gcc/analyzer/engine.cc | 3 +- gcc/analyzer/program-state.cc | 2 +- gcc/analyzer/region-model-impl-calls.cc | 6 +-- gcc/analyzer/region-model-reachability.cc | 2 +- gcc/analyzer/region-model.cc | 70 ++++++++++++++++++++++++++----- gcc/analyzer/region-model.h | 13 +++++- gcc/analyzer/region.cc | 2 +- gcc/analyzer/svalue.cc | 2 +- 9 files changed, 88 insertions(+), 20 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h index d42bee7..90143d9 100644 --- a/gcc/analyzer/analyzer.h +++ b/gcc/analyzer/analyzer.h @@ -208,6 +208,14 @@ public: virtual logger *get_logger () const = 0; }; +/* An enum for describing the direction of an access to memory. */ + +enum access_direction +{ + DIR_READ, + DIR_WRITE +}; + } // namespace ana extern bool is_special_named_call_p (const gcall *call, const char *funcname, diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index f9fc581..ee625fb 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -1468,7 +1468,8 @@ exploded_node::on_longjmp (exploded_graph &eg, const region *buf = new_region_model->deref_rvalue (buf_ptr_sval, buf_ptr, ctxt); - const svalue *buf_content_sval = new_region_model->get_store_value (buf); + const svalue *buf_content_sval + = new_region_model->get_store_value (buf, ctxt); const setjmp_svalue *setjmp_sval = buf_content_sval->dyn_cast_setjmp_svalue (); if (!setjmp_sval) diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc index 3081217..ccfe7b0 100644 --- a/gcc/analyzer/program-state.cc +++ b/gcc/analyzer/program-state.cc @@ -1082,7 +1082,7 @@ program_state::prune_for_point (exploded_graph &eg, temporaries keep the value reachable until the frame is popped. */ const svalue *sval - = new_state.m_region_model->get_store_value (reg); + = new_state.m_region_model->get_store_value (reg, NULL); if (!new_state.can_purge_p (eg.get_ext_state (), sval) && SSA_NAME_VAR (ssa_name)) { diff --git a/gcc/analyzer/region-model-impl-calls.cc b/gcc/analyzer/region-model-impl-calls.cc index 545634b..eff8caa 100644 --- a/gcc/analyzer/region-model-impl-calls.cc +++ b/gcc/analyzer/region-model-impl-calls.cc @@ -431,7 +431,7 @@ region_model::impl_call_memcpy (const call_details &cd) return; } - check_for_writable_region (dest_reg, cd.get_ctxt ()); + check_region_for_write (dest_reg, cd.get_ctxt ()); /* Otherwise, mark region's contents as unknown. */ mark_region_as_unknown (dest_reg, cd.get_uncertainty ()); @@ -455,7 +455,7 @@ region_model::impl_call_memset (const call_details &cd) const region *sized_dest_reg = m_mgr->get_sized_region (dest_reg, NULL_TREE, num_bytes_sval); - check_for_writable_region (sized_dest_reg, cd.get_ctxt ()); + check_region_for_write (sized_dest_reg, cd.get_ctxt ()); fill_region (sized_dest_reg, fill_value_u8); return true; } @@ -515,7 +515,7 @@ region_model::impl_call_strcpy (const call_details &cd) cd.maybe_set_lhs (dest_sval); - check_for_writable_region (dest_reg, cd.get_ctxt ()); + check_region_for_write (dest_reg, cd.get_ctxt ()); /* For now, just mark region's contents as unknown. */ mark_region_as_unknown (dest_reg, cd.get_uncertainty ()); diff --git a/gcc/analyzer/region-model-reachability.cc b/gcc/analyzer/region-model-reachability.cc index 1f65307..b5ae787 100644 --- a/gcc/analyzer/region-model-reachability.cc +++ b/gcc/analyzer/region-model-reachability.cc @@ -154,7 +154,7 @@ reachable_regions::add (const region *reg, bool is_mutable) if (binding_cluster *bind_cluster = m_store->get_cluster (base_reg)) bind_cluster->for_each_value (handle_sval_cb, this); else - handle_sval (m_model->get_store_value (reg)); + handle_sval (m_model->get_store_value (reg, NULL)); } void diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 190c852..4fab1ef 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -1743,7 +1743,7 @@ region_model::get_rvalue_1 (path_var pv, region_model_context *ctxt) const gcc_assert (TREE_CODE (first_bit_offset) == INTEGER_CST); bit_range bits (TREE_INT_CST_LOW (first_bit_offset), TREE_INT_CST_LOW (num_bits)); - return get_rvalue_for_bits (TREE_TYPE (expr), reg, bits); + return get_rvalue_for_bits (TREE_TYPE (expr), reg, bits, ctxt); } case SSA_NAME: @@ -1753,7 +1753,7 @@ region_model::get_rvalue_1 (path_var pv, region_model_context *ctxt) const case ARRAY_REF: { const region *reg = get_lvalue (pv, ctxt); - return get_store_value (reg); + return get_store_value (reg, ctxt); } case REALPART_EXPR: @@ -1808,7 +1808,7 @@ region_model::get_rvalue_1 (path_var pv, region_model_context *ctxt) const case MEM_REF: { const region *ref_reg = get_lvalue (pv, ctxt); - return get_store_value (ref_reg); + return get_store_value (ref_reg, ctxt); } } } @@ -1913,11 +1913,15 @@ region_model::get_initial_value_for_global (const region *reg) const } /* Get a value for REG, looking it up in the store, or otherwise falling - back to "initial" or "unknown" values. */ + back to "initial" or "unknown" values. + Use CTXT to report any warnings associated with reading from REG. */ const svalue * -region_model::get_store_value (const region *reg) const +region_model::get_store_value (const region *reg, + region_model_context *ctxt) const { + check_region_for_read (reg, ctxt); + /* Special-case: handle var_decls in the constant pool. */ if (const decl_region *decl_reg = reg->dyn_cast_decl_region ()) if (const svalue *sval = decl_reg->maybe_get_constant_value (m_mgr)) @@ -2077,14 +2081,16 @@ region_model::deref_rvalue (const svalue *ptr_sval, tree ptr_tree, /* Attempt to get BITS within any value of REG, as TYPE. In particular, extract values from compound_svalues for the case where there's a concrete binding at BITS. - Return an unknown svalue if we can't handle the given case. */ + Return an unknown svalue if we can't handle the given case. + Use CTXT to report any warnings associated with reading from REG. */ const svalue * region_model::get_rvalue_for_bits (tree type, const region *reg, - const bit_range &bits) const + const bit_range &bits, + region_model_context *ctxt) const { - const svalue *sval = get_store_value (reg); + const svalue *sval = get_store_value (reg, ctxt); return m_mgr->get_or_create_bits_within (type, bits, sval); } @@ -2240,8 +2246,52 @@ region_model::get_capacity (const region *reg) const return m_mgr->get_or_create_unknown_svalue (sizetype); } +/* If CTXT is non-NULL, use it to warn about any problems accessing REG, + using DIR to determine if this access is a read or write. */ + +void +region_model::check_region_access (const region *reg, + enum access_direction dir, + region_model_context *ctxt) const +{ + /* Fail gracefully if CTXT is NULL. */ + if (!ctxt) + return; + + switch (dir) + { + default: + gcc_unreachable (); + case DIR_READ: + /* Currently a no-op. */ + break; + case DIR_WRITE: + check_for_writable_region (reg, ctxt); + break; + } +} + +/* If CTXT is non-NULL, use it to warn about any problems writing to REG. */ + +void +region_model::check_region_for_write (const region *dest_reg, + region_model_context *ctxt) const +{ + check_region_access (dest_reg, DIR_WRITE, ctxt); +} + +/* If CTXT is non-NULL, use it to warn about any problems reading from REG. */ + +void +region_model::check_region_for_read (const region *src_reg, + region_model_context *ctxt) const +{ + check_region_access (src_reg, DIR_READ, ctxt); +} + /* Set the value of the region given by LHS_REG to the value given - by RHS_SVAL. */ + by RHS_SVAL. + Use CTXT to report any warnings associated with writing to LHS_REG. */ void region_model::set_value (const region *lhs_reg, const svalue *rhs_sval, @@ -2250,7 +2300,7 @@ region_model::set_value (const region *lhs_reg, const svalue *rhs_sval, gcc_assert (lhs_reg); gcc_assert (rhs_sval); - check_for_writable_region (lhs_reg, ctxt); + check_region_for_write (lhs_reg, ctxt); m_store.set_value (m_mgr->get_store_manager(), lhs_reg, rhs_sval, ctxt ? ctxt->get_uncertainty () : NULL); diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index f07a287..734ec60 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -609,7 +609,8 @@ class region_model const svalue *get_rvalue_for_bits (tree type, const region *reg, - const bit_range &bits) const; + const bit_range &bits, + region_model_context *ctxt) const; void set_value (const region *lhs_reg, const svalue *rhs_sval, region_model_context *ctxt); @@ -687,7 +688,8 @@ class region_model static void append_ssa_names_cb (const region *base_reg, struct append_ssa_names_cb_data *data); - const svalue *get_store_value (const region *reg) const; + const svalue *get_store_value (const region *reg, + region_model_context *ctxt) const; bool region_exists_p (const region *reg) const; @@ -748,6 +750,13 @@ class region_model void check_for_writable_region (const region* dest_reg, region_model_context *ctxt) const; + void check_region_access (const region *reg, + enum access_direction dir, + region_model_context *ctxt) const; + void check_region_for_write (const region *dest_reg, + region_model_context *ctxt) const; + void check_region_for_read (const region *src_reg, + region_model_context *ctxt) const; /* Storing this here to avoid passing it around everywhere. */ region_model_manager *const m_mgr; diff --git a/gcc/analyzer/region.cc b/gcc/analyzer/region.cc index 6cccb0f..fa187fd 100644 --- a/gcc/analyzer/region.cc +++ b/gcc/analyzer/region.cc @@ -573,7 +573,7 @@ region_model::copy_region (const region *dst_reg, const region *src_reg, if (dst_reg == src_reg) return; - const svalue *sval = get_store_value (src_reg); + const svalue *sval = get_store_value (src_reg, ctxt); set_value (dst_reg, sval, ctxt); } diff --git a/gcc/analyzer/svalue.cc b/gcc/analyzer/svalue.cc index fa9a862..323df80 100644 --- a/gcc/analyzer/svalue.cc +++ b/gcc/analyzer/svalue.cc @@ -936,7 +936,7 @@ initial_svalue::implicitly_live_p (const svalue_set *, a popped stack frame. */ if (model->region_exists_p (m_reg)) { - const svalue *reg_sval = model->get_store_value (m_reg); + const svalue *reg_sval = model->get_store_value (m_reg, NULL); if (reg_sval == this) return true; } -- cgit v1.1 From d04b0c75794545f1f7a942764285e21eaf2915a1 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Fri, 16 Jul 2021 16:21:10 -0400 Subject: c++: covariant reference return types [PR99664] This implements the wording changes of CWG 960 which clarifies that two reference types are covariant only if they're both lvalue references or both rvalue references. DR 960 PR c++/99664 gcc/cp/ChangeLog: * search.c (check_final_overrider): Compare TYPE_REF_IS_RVALUE when the return types are references. gcc/testsuite/ChangeLog: * g++.dg/inherit/covariant23.C: New test. --- gcc/cp/search.c | 8 +++++++- gcc/testsuite/g++.dg/inherit/covariant23.C | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/inherit/covariant23.C (limited to 'gcc') diff --git a/gcc/cp/search.c b/gcc/cp/search.c index af41bfe..943671a 100644 --- a/gcc/cp/search.c +++ b/gcc/cp/search.c @@ -1948,7 +1948,13 @@ check_final_overrider (tree overrider, tree basefn) fail = !INDIRECT_TYPE_P (base_return); if (!fail) { - fail = cp_type_quals (base_return) != cp_type_quals (over_return); + if (cp_type_quals (base_return) != cp_type_quals (over_return)) + fail = 1; + + if (TYPE_REF_P (base_return) + && (TYPE_REF_IS_RVALUE (base_return) + != TYPE_REF_IS_RVALUE (over_return))) + fail = 1; base_return = TREE_TYPE (base_return); over_return = TREE_TYPE (over_return); diff --git a/gcc/testsuite/g++.dg/inherit/covariant23.C b/gcc/testsuite/g++.dg/inherit/covariant23.C new file mode 100644 index 0000000..b27be15 --- /dev/null +++ b/gcc/testsuite/g++.dg/inherit/covariant23.C @@ -0,0 +1,14 @@ +// PR c++/99664 +// { dg-do compile { target c++11 } } + +struct Res { }; + +struct A { + virtual Res &&f(); + virtual Res &g(); +}; + +struct B : A { + Res &f() override; // { dg-error "return type" } + Res &&g() override; // { dg-error "return type" } +}; -- cgit v1.1 From a8b3861496bffae8b813ea196c1c5b27f79fbe69 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Fri, 16 Jul 2021 16:21:13 -0400 Subject: c++: alias CTAD in unevaluated context [PR101233] This is the alias CTAD version of the CTAD bug PR93248, and the fix is the same: clear cp_unevaluated_operand so that the entire chain of DECL_ARGUMENTS gets substituted. PR c++/101233 gcc/cp/ChangeLog: * pt.c (alias_ctad_tweaks): Clear cp_unevaluated_operand for substituting DECL_ARGUMENTS. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/class-deduction-alias10.C: New test. --- gcc/cp/pt.c | 8 +++++++- gcc/testsuite/g++.dg/cpp2a/class-deduction-alias10.C | 10 ++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/cpp2a/class-deduction-alias10.C (limited to 'gcc') diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index c7bf7d4..94ca3bc 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -29097,7 +29097,13 @@ alias_ctad_tweaks (tree tmpl, tree uguides) /* Substitute the deduced arguments plus the rewritten template parameters into f to get g. This covers the type, copyness, guideness, and explicit-specifier. */ - tree g = tsubst_decl (DECL_TEMPLATE_RESULT (f), targs, complain); + tree g; + { + /* Parms are to have DECL_CHAIN tsubsted, which would be skipped + if cp_unevaluated_operand. */ + cp_evaluated ev; + g = tsubst_decl (DECL_TEMPLATE_RESULT (f), targs, complain); + } if (g == error_mark_node) continue; DECL_USE_TEMPLATE (g) = 0; diff --git a/gcc/testsuite/g++.dg/cpp2a/class-deduction-alias10.C b/gcc/testsuite/g++.dg/cpp2a/class-deduction-alias10.C new file mode 100644 index 0000000..a473fff --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/class-deduction-alias10.C @@ -0,0 +1,10 @@ +// PR c++/101233 +// { dg-do compile { target c++20 } } + +template +struct A { A(T, U); }; + +template +using B = A; + +using type = decltype(B{0, 0}); -- cgit v1.1 From 87277b6a04486b606761b86dbcfbc9a4b6871f4c Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sat, 17 Jul 2021 00:16:31 +0000 Subject: Daily bump. --- gcc/ChangeLog | 215 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 59 +++++++++++++ gcc/c-family/ChangeLog | 6 ++ gcc/cp/ChangeLog | 18 ++++ gcc/testsuite/ChangeLog | 69 ++++++++++++++++ 6 files changed, 368 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 47772d9..b18ee85 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,218 @@ +2021-07-16 David Malcolm + + * doc/analyzer.texi: Add __analyzer_dump_state. + +2021-07-16 Bill Schmidt + + * config/rs6000/rbtree.c: New file. + * config/rs6000/rbtree.h: New file. + +2021-07-16 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (restriction): New enum. + (typeinfo): Add restr field. + (match_bracketed_pair): New function. + (match_const_restriction): Implement. + +2021-07-16 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (match_basetype): Implement. + +2021-07-16 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (void_status): New enum. + (basetype): Likewise. + (typeinfo): Likewise. + (handle_pointer): New function. + (match_basetype): New stub function. + (match_const_restriction): Likewise. + (match_type): New function. + +2021-07-16 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (consume_whitespace): New + function. + (advance_line): Likewise. + (safe_inc_pos): Likewise. + (match_identifier): Likewise. + (match_integer): Likewise. + (match_to_right_bracket): Likewise. + +2021-07-16 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (bif_file): New variable. + (ovld_file): Likewise. + (header_file): Likewise. + (init_file): Likewise. + (defines_file): Likewise. + (pgm_path): Likewise. + (bif_path): Likewise. + (ovld_path): Likewise. + (header_path): Likewise. + (init_path): Likewise. + (defines_path): Likewise. + (LINELEN): New macro. + (linebuf): New variable. + (line): Likewise. + (pos): Likewise. + (diag): Likewise. + (bif_diag): New function. + (ovld_diag): Likewise. + +2021-07-16 Bill Schmidt + + * config/rs6000/rs6000-builtin-new.def: New. + * config/rs6000/rs6000-overload.def: New. + +2021-07-16 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c: New. + +2021-07-16 Bill Schmidt + + * Makefile.in (EXTRA_GTYPE_DEPS): New variable. + (s-gtype): Depend on EXTRA_GTYPE_DEPS. + * gengtype-state.c (state_writer::write_state_file_list): Add a + parameter to the fileslist expression for the number of build + headers to scan. + (read_state_files_list): Detect build headers and strip the + initial "./" or ".\" from their names. + * gengtype.c (build_headers): New global variable. + (num_build_headers): Likewise. + (open_base_files): Emit #include for each build header. + (main): Detect and count build headers. + * gengtype.h (build_headers): New extern variable. + (num_build_headers): Likewise. + +2021-07-16 Richard Biener + + * gimple-ssa-store-merging.c (verify_symbolic_number_p): Use + the type of the LHS. + (find_bswap_or_nop_1): Likewise. + (find_bswap_or_nop): Likewise. + * tree-vectorizer.h (vect_get_smallest_scalar_type): Adjust + prototype. + * tree-vect-data-refs.c (vect_get_smallest_scalar_type): + Remove unused parameters, pass in the scalar type. Fix + internal store function handling. + * tree-vect-stmts.c (vect_analyze_stmt): Remove assert. + (vect_get_vector_types_for_stmt): Move down check for + existing vector stmt after we've determined a scalar type. + Pass down the used scalar type to vect_get_smallest_scalar_type. + * tree-vect-generic.c (expand_vector_condition): Use + the type of the LHS. + (expand_vector_scalar_condition): Likewise. + (expand_vector_operations_1): Likewise. + * tree-vect-patterns.c (vect_widened_op_tree): Likewise. + (vect_recog_dot_prod_pattern): Likewise. + (vect_recog_sad_pattern): Likewise. + (vect_recog_widen_op_pattern): Likewise. + (vect_recog_widen_sum_pattern): Likewise. + (vect_recog_mixed_size_cond_pattern): Likewise. + +2021-07-16 Jan Hubicka + + * ipa-modref.c (struct escape_entry): Use eaf_fleags_t. + (dump_eaf_flags): Dump EAF_NOT_RETURNED + (eaf_flags_useful_p): Use eaf_fleags_t; handle const functions + and EAF_NOT_RETURNED. + (modref_summary::useful_p): Likewise. + (modref_summary_lto::useful_p): Likewise. + (struct) modref_summary_lto: Use eaf_fleags_t. + (deref_flags): Handle EAF_NOT_RETURNED. + (struct escape_point): Use min_flags. + (modref_lattice::init): Add EAF_NOT_RETURNED. + (merge_call_lhs_flags): Ignore EAF_NOT_RETURNED functions + (analyze_ssa_name_flags): Clear EAF_NOT_RETURNED on return; + handle call flags. + (analyze_parms): Also analyze const functions; update conition on + flags usefulness. + (modref_write): Update streaming. + (read_section): Update streaming. + (remap_arg_flags): Use eaf_flags_t. + (modref_merge_call_site_flags): Hanlde EAF_NOT_RETURNED. + * ipa-modref.h: (eaf_flags_t): New typedef. + (struct modref_summary): Use eaf_flags_t. + * tree-core.h (EAF_NOT_RETURNED): New constant. + +2021-07-16 Richard Biener + + * gimple-fold.c (gimple_fold_stmt_to_constant_1): Use + the type of the LHS. + (gimple_assign_nonnegative_warnv_p): Likewise. + (gimple_call_nonnegative_warnv_p): Likewise. Return false + if the call has no LHS. + * gimple.c (gimple_could_trap_p_1): Use the type of the LHS. + * tree-eh.c (stmt_could_throw_1_p): Likewise. + * tree-inline.c (insert_init_stmt): Likewise. + * tree-ssa-loop-niter.c (get_val_for): Likewise. + * tree-outof-ssa.c (ssa_is_replaceable_p): Use the type of + the def. + * tree-ssa-sccvn.c (init_vn_nary_op_from_stmt): Take a + gassign *. Use the type of the lhs. + (vn_nary_op_lookup_stmt): Adjust. + (vn_nary_op_insert_stmt): Likewise. + +2021-07-16 Ilya Leoshkevich + + * config/s390/predicates.md (bras_sym_operand): Accept all + functions in 64-bit mode, use UNSPEC_PLT31. + (larl_operand): Use UNSPEC_PLT31. + * config/s390/s390.c (s390_loadrelative_operand_p): Likewise. + (legitimize_pic_address): Likewise. + (s390_emit_tls_call_insn): Mark __tls_get_offset as function, + use UNSPEC_PLT31. + (s390_delegitimize_address): Use UNSPEC_PLT31. + (s390_output_addr_const_extra): Likewise. + (print_operand): Add @PLT to TLS calls, handle %K. + (s390_function_profiler): Mark __fentry__/_mcount as function, + use %K, use UNSPEC_PLT31. + (s390_output_mi_thunk): Use only UNSPEC_GOT, use %K. + (s390_emit_call): Use UNSPEC_PLT31. + (s390_emit_tpf_eh_return): Mark __tpf_eh_return as function. + * config/s390/s390.md (UNSPEC_PLT31): Rename from UNSPEC_PLT. + (*movdi_64): Use %K. + (reload_base_64): Likewise. + (*sibcall_brc): Likewise. + (*sibcall_brcl): Likewise. + (*sibcall_value_brc): Likewise. + (*sibcall_value_brcl): Likewise. + (*bras): Likewise. + (*brasl): Likewise. + (*bras_r): Likewise. + (*brasl_r): Likewise. + (*bras_tls): Likewise. + (*brasl_tls): Likewise. + (main_base_64): Likewise. + (reload_base_64): Likewise. + (@split_stack_call): Likewise. + +2021-07-16 Richard Biener + + PR tree-optimization/101467 + * tree-vect-stmts.c (vect_gen_while): Properly guard + make_temp_ssa_name usage. + +2021-07-16 Cooper Qu + + * config.gcc: Don't use forked print-sysroot-suffix.sh and + t-sysroot-suffix for C-SKY. + * config/csky/print-sysroot-suffix.sh: Delete. + * config/csky/t-csky-linux: Delete. + * config/csky/t-sysroot-suffix: Define MULTILIB_DIRNAMES + instead of CSKY_MULTILIB_DIRNAMES. + +2021-07-16 Richard Biener + + * tree-vect-loop.c (vect_transform_cycle_phi): Correct sign + conversion issues with the partial reduction of the reused + vector accumulator. + +2021-07-16 Richard Biener + + * config/i386/i386-options.c (ix86_option_override_internal): Set + param_vect_partial_vector_usage to zero if not set. + 2021-07-15 Uroš Bizjak PR target/101346 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 16244c6..4a19022 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210716 +20210717 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index e6bd95c..7b63636 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,62 @@ +2021-07-16 David Malcolm + + * analyzer.h (enum access_direction): New. + * engine.cc (exploded_node::on_longjmp): Update for new param of + get_store_value. + * program-state.cc (program_state::prune_for_point): Likewise. + * region-model-impl-calls.cc (region_model::impl_call_memcpy): + Replace call to check_for_writable_region with call to + check_region_for_write. + (region_model::impl_call_memset): Likewise. + (region_model::impl_call_strcpy): Likewise. + * region-model-reachability.cc (reachable_regions::add): Update + for new param of get_store_value. + * region-model.cc (region_model::get_rvalue_1): Likewise, also for + get_rvalue_for_bits. + (region_model::get_store_value): Add ctxt param and use it to call + check_region_for_read. + (region_model::get_rvalue_for_bits): Add ctxt param and use it to + call get_store_value. + (region_model::check_region_access): New. + (region_model::check_region_for_write): New. + (region_model::check_region_for_read): New. + (region_model::set_value): Update comment. Replace call to + check_for_writable_region with call to check_region_for_write. + * region-model.h (region_model::get_rvalue_for_bits): Add ctxt + param. + (region_model::get_store_value): Add ctxt param. + (region_model::check_region_access): New decl. + (region_model::check_region_for_write): New decl. + (region_model::check_region_for_read): New decl. + * region.cc (region_model::copy_region): Update call to + get_store_value. + * svalue.cc (initial_svalue::implicitly_live_p): Likewise. + +2021-07-16 David Malcolm + + * engine.cc (exploded_node::on_stmt_pre): Handle + __analyzer_dump_state. + * program-state.cc (extrinsic_state::get_sm_idx_by_name): New. + (program_state::impl_call_analyzer_dump_state): New. + * program-state.h (extrinsic_state::get_sm_idx_by_name): New decl. + (program_state::impl_call_analyzer_dump_state): New decl. + * region-model-impl-calls.cc + (call_details::get_arg_string_literal): New. + * region-model.h (call_details::get_arg_string_literal): New decl. + +2021-07-16 David Malcolm + + * program-state.cc (program_state::detect_leaks): Simplify using + svalue::maybe_get_region. + * region-model-impl-calls.cc (region_model::impl_call_fgets): Likewise. + (region_model::impl_call_fread): Likewise. + (region_model::impl_call_free): Likewise. + (region_model::impl_call_operator_delete): Likewise. + * region-model.cc (selftest::test_stack_frames): Likewise. + (selftest::test_state_merging): Likewise. + * svalue.cc (svalue::maybe_get_region): New. + * svalue.h (svalue::maybe_get_region): New decl. + 2021-07-15 David Malcolm * svalue.h (is_a_helper ::test): Make diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 817f4c4..a0dea4c 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,9 @@ +2021-07-16 Andrew Pinski + + PR c/101453 + * c-common.c (parse_optimize_options): Use the correct + size for buffer. + 2021-07-15 Martin Sebor PR c/101289 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 0e2139a..0c4734f 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,21 @@ +2021-07-16 Patrick Palka + + PR c++/101233 + * pt.c (alias_ctad_tweaks): Clear cp_unevaluated_operand for + substituting DECL_ARGUMENTS. + +2021-07-16 Patrick Palka + + DR 960 + PR c++/99664 + * search.c (check_final_overrider): Compare TYPE_REF_IS_RVALUE + when the return types are references. + +2021-07-16 Marek Polacek + + * typeck2.c (check_narrowing): Don't suppress the pedantic error + in system headers. + 2021-07-15 Jakub Jelinek PR c++/101443 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7a9a29e..b53ce62 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,72 @@ +2021-07-16 Patrick Palka + + PR c++/101233 + * g++.dg/cpp2a/class-deduction-alias10.C: New test. + +2021-07-16 Patrick Palka + + DR 960 + PR c++/99664 + * g++.dg/inherit/covariant23.C: New test. + +2021-07-16 David Malcolm + + * gcc.dg/analyzer/analyzer-decls.h (__analyzer_dump_state): New. + * gcc.dg/analyzer/dump-state.c: New test. + +2021-07-16 Martin Sebor + + PR testsuite/101468 + * gcc.dg/Wstringop-overflow-41.c: Adjust to avoid target-specific + failures. + * gcc.dg/Wstringop-overflow-42.c: Same. + * gcc.dg/Wstringop-overflow-68.c: Same. + * gcc.dg/Wstringop-overflow-70.c: Same. + * gcc.dg/Wstringop-overflow-71.c: Same. + * gcc.dg/strlenopt-95.c: Fix typos. + +2021-07-16 Tamar Christina + + PR middle-end/101457 + * gcc.dg/vect/vect-reduc-dot-19.c: Use long long. + * gcc.dg/vect/vect-reduc-dot-20.c: Likewise. + * gcc.dg/vect/vect-reduc-dot-21.c: Likewise. + * gcc.dg/vect/vect-reduc-dot-22.c: Likewise. + +2021-07-16 Marek Polacek + + * g++.dg/cpp1y/Wnarrowing2.C: New test. + * g++.dg/cpp1y/Wnarrowing2.h: New test. + +2021-07-16 Ilya Leoshkevich + + * g++.dg/ext/visibility/noPLT.C: Skip on s390x. + * g++.target/s390/mi-thunk.C: New test. + * gcc.target/s390/nodatarel-1.c: Move foostatic to the new + tests. + * gcc.target/s390/pr80080-4.c: Allow @PLT suffix. + * gcc.target/s390/risbg-ll-3.c: Likewise. + * gcc.target/s390/call.h: Common code for the new tests. + * gcc.target/s390/call-z10-pic-nodatarel.c: New test. + * gcc.target/s390/call-z10-pic.c: New test. + * gcc.target/s390/call-z10.c: New test. + * gcc.target/s390/call-z9-pic-nodatarel.c: New test. + * gcc.target/s390/call-z9-pic.c: New test. + * gcc.target/s390/call-z9.c: New test. + * gcc.target/s390/mfentry-m64-pic.c: New test. + * gcc.target/s390/tls.h: Common code for the new TLS tests. + * gcc.target/s390/tls-pic.c: New test. + * gcc.target/s390/tls.c: New test. + +2021-07-16 Jonathan Wright + + * gcc.target/aarch64/sve/extract_1.c: Split expected results + by big/little endian targets, restoring the old expected + result for big endian. + * gcc.target/aarch64/sve/extract_2.c: Likewise. + * gcc.target/aarch64/sve/extract_3.c: Likewise. + * gcc.target/aarch64/sve/extract_4.c: Likewise. + 2021-07-15 Uroš Bizjak PR target/101346 -- cgit v1.1 From 704e8a825c78b9a8424c291509413bbb48e602c7 Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Fri, 16 Jul 2021 11:42:14 -0400 Subject: Add wi_fold_in_parts. range-ops uses wi_fold to individually fold subranges one at a time and then combined them. This patch first calls wi_fold_in_parts which checks if one of the subranges is small, and if so, further splits that subrange into constants. gcc/ PR tree-optimization/96542 * range-op.cc (range_operator::wi_fold_in_parts): New. (range_operator::fold_range): Call wi_fold_in_parts. (operator_lshift::wi_fold): Fix broken lshift by [0,0]. * range-op.h (wi_fold_in_parts): Add prototype. gcc/testsuite * gcc.dg/pr96542.c: New. --- gcc/range-op.cc | 75 +++++++++++++++++++++++++++++++++++++++--- gcc/range-op.h | 6 ++++ gcc/testsuite/gcc.dg/pr96542.c | 27 +++++++++++++++ 3 files changed, 103 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr96542.c (limited to 'gcc') diff --git a/gcc/range-op.cc b/gcc/range-op.cc index 0800046..e0be51d 100644 --- a/gcc/range-op.cc +++ b/gcc/range-op.cc @@ -133,6 +133,65 @@ range_operator::wi_fold (irange &r, tree type, r.set_varying (type); } +// Call wi_fold, except further split small subranges into constants. +// This can provide better precision. For something 8 >> [0,1] +// Instead of [8, 16], we will produce [8,8][16,16] + +void +range_operator::wi_fold_in_parts (irange &r, tree type, + const wide_int &lh_lb, + const wide_int &lh_ub, + const wide_int &rh_lb, + const wide_int &rh_ub) const +{ + wi::overflow_type ov_rh, ov_lh; + int_range_max tmp; + wide_int rh_range = wi::sub (rh_ub, rh_lb, TYPE_SIGN (type), &ov_rh); + wide_int lh_range = wi::sub (lh_ub, lh_lb, TYPE_SIGN (type), &ov_lh); + signop sign = TYPE_SIGN (type);; + // If there are 2, 3, or 4 values in the RH range, do them separately. + // Call wi_fold_in_parts to check the RH side. + if (wi::gt_p (rh_range, 0, sign) && wi::lt_p (rh_range, 4, sign) + && ov_rh == wi::OVF_NONE) + { + wi_fold_in_parts (r, type, lh_lb, lh_ub, rh_lb, rh_lb); + if (wi::gt_p (rh_range, 1, sign)) + { + wi_fold_in_parts (tmp, type, lh_lb, lh_ub, rh_lb + 1, rh_lb + 1); + r.union_ (tmp); + if (wi::eq_p (rh_range, 3)) + { + wi_fold_in_parts (tmp, type, lh_lb, lh_ub, rh_lb + 2, rh_lb + 2); + r.union_ (tmp); + } + } + wi_fold_in_parts (tmp, type, lh_lb, lh_ub, rh_ub, rh_ub); + r.union_ (tmp); + } + // Otherise check for 2, 3, or 4 values in the LH range and split them up. + // The RH side has been checked, so no recursion needed. + else if (wi::gt_p (lh_range, 0, sign) && wi::lt_p (lh_range, 4, sign) + && ov_lh == wi::OVF_NONE) + { + wi_fold (r, type, lh_lb, lh_lb, rh_lb, rh_ub); + if (wi::gt_p (lh_range, 1, sign)) + { + wi_fold (tmp, type, lh_lb + 1, lh_lb + 1, rh_lb, rh_ub); + r.union_ (tmp); + if (wi::eq_p (lh_range, 3)) + { + wi_fold (tmp, type, lh_lb + 2, lh_lb + 2, rh_lb, rh_ub); + r.union_ (tmp); + } + } + wi_fold (tmp, type, lh_ub, lh_ub, rh_lb, rh_ub); + r.union_ (tmp); + } + // Otherwise just call wi_fold. + else + wi_fold (r, type, lh_lb, lh_ub, rh_lb, rh_ub); +} + // The default for fold is to break all ranges into sub-ranges and // invoke the wi_fold method on each sub-range pair. @@ -152,8 +211,8 @@ range_operator::fold_range (irange &r, tree type, // If both ranges are single pairs, fold directly into the result range. if (num_lh == 1 && num_rh == 1) { - wi_fold (r, type, lh.lower_bound (0), lh.upper_bound (0), - rh.lower_bound (0), rh.upper_bound (0)); + wi_fold_in_parts (r, type, lh.lower_bound (0), lh.upper_bound (0), + rh.lower_bound (0), rh.upper_bound (0)); op1_op2_relation_effect (r, type, lh, rh, rel); return true; } @@ -167,7 +226,7 @@ range_operator::fold_range (irange &r, tree type, wide_int lh_ub = lh.upper_bound (x); wide_int rh_lb = rh.lower_bound (y); wide_int rh_ub = rh.upper_bound (y); - wi_fold (tmp, type, lh_lb, lh_ub, rh_lb, rh_ub); + wi_fold_in_parts (tmp, type, lh_lb, lh_ub, rh_lb, rh_ub); r.union_ (tmp); if (r.varying_p ()) { @@ -1915,8 +1974,14 @@ operator_lshift::wi_fold (irange &r, tree type, int bound_shift = overflow_pos - rh_ub.to_shwi (); // If bound_shift == HOST_BITS_PER_WIDE_INT, the llshift can // overflow. However, for that to happen, rh.max needs to be zero, - // which means rh is a singleton range of zero, which means it - // should be handled by the lshift fold_range above. + // which means rh is a singleton range of zero, which means we simply return + // [lh_lb, lh_ub] as the range. + if (wi::eq_p (rh_ub, rh_lb) && wi::eq_p (rh_ub, 0)) + { + r = int_range<2> (type, lh_lb, lh_ub); + return; + } + wide_int bound = wi::set_bit_in_zero (bound_shift, prec); wide_int complement = ~(bound - 1); wide_int low_bound, high_bound; diff --git a/gcc/range-op.h b/gcc/range-op.h index 2b5db64..17be9e0 100644 --- a/gcc/range-op.h +++ b/gcc/range-op.h @@ -97,6 +97,12 @@ protected: const irange &op1_range, const irange &op2_range, relation_kind rel) const; + // Called by fold range to split small subranges into parts. + void wi_fold_in_parts (irange &r, tree type, + const wide_int &lh_lb, + const wide_int &lh_ub, + const wide_int &rh_lb, + const wide_int &rh_ub) const; }; extern range_operator *range_op_handler (enum tree_code code, tree type); diff --git a/gcc/testsuite/gcc.dg/pr96542.c b/gcc/testsuite/gcc.dg/pr96542.c new file mode 100644 index 0000000..5014f2a --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr96542.c @@ -0,0 +1,27 @@ +/* { dg-do compile} */ +/* { dg-options "-O2 -fdump-tree-evrp" } */ + + +unsigned char +foo (unsigned int x) +{ + _Bool y = x; + return (((unsigned char) ~0) >> y) * 2; +} + +unsigned char +bar (unsigned int x) +{ + return (((unsigned char) ~0) >> (_Bool) x) * 2; +} + +unsigned +baz (unsigned int x) +{ + if (x >= 4) return 32; + return (-1U >> x) * 16; +} + +/* { dg-final { scan-tree-dump-times "254" 2 "evrp" } } */ +/* { dg-final { scan-tree-dump "= PHI <32.*, 4294967280" "evrp" } } */ + -- cgit v1.1 From 0103d18dfc9a29bea69e5122fbdf3477d3e1384f Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 17 Jul 2021 14:24:19 +0200 Subject: Support EAF_NOT_RETURNED in tree-ssa-structalias gcc/ChangeLog: 2021-07-17 Jan Hubicka * tree-ssa-structalias.c (handle_rhs_call): Support EAF_NOT_RETURNED. (handle_const_call): Liekise (handle_pure_call): Liekise gcc/testsuite/ChangeLog: 2021-07-17 Jan Hubicka * gcc.dg/tree-ssa/modref-6.c: New test. --- gcc/testsuite/gcc.dg/tree-ssa/modref-6.c | 37 ++++++++++++++++++++++++++++++++ gcc/tree-ssa-structalias.c | 30 +++++++++++++++++++------- 2 files changed, 59 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/modref-6.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-6.c b/gcc/testsuite/gcc.dg/tree-ssa/modref-6.c new file mode 100644 index 0000000..8db9a1d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-6.c @@ -0,0 +1,37 @@ +/* { dg-options "-O2 -fdump-tree-modref1 -fdump-tree-optimized" } */ +/* { dg-do compile } */ +int c; +__attribute__ ((noinline)) +int *test (int *b) +{ + c++; + return *b ? &c : 0; +} +__attribute__ ((noinline, pure)) +int *pure_test (int *b) +{ + return *b && c ? &c : 0; +} +__attribute__ ((noinline, const)) +int *const_test (int *b) +{ + return b ? &c : 0; +} +void escape (int *); + +int test2() +{ + int a = 42; + escape (test (&a)); + escape (pure_test (&a)); + escape (const_test (&a)); + return a; +} +/* Flags for normal call. */ +/* { dg-final { scan-tree-dump "parm 0 flags: direct noclobber noescape nodirectescape not_returned" "modref1" } } */ +/* Flags for pure call. */ +/* { dg-final { scan-tree-dump "parm 0 flags: direct not_returned" "modref1" } } */ +/* Flags for const call. */ +/* { dg-final { scan-tree-dump "parm 0 flags: unused not_returned" "modref1" } } */ +/* Overall we want to make "int a" non escaping. */ +/* { dg-final { scan-tree-dump "return 42" "optimized" } } */ diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index 7163438..71894b3 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -4082,9 +4082,12 @@ handle_rhs_call (gcall *stmt, vec *results) if (!(flags & EAF_DIRECT)) make_transitive_closure_constraints (tem); make_copy_constraint (uses, tem->id); + /* TODO: This is overly conservative when some parameters are + returned while others are not. */ + if (!(flags & EAF_NOT_RETURNED)) + returns_uses = true; if (!(flags & (EAF_NOESCAPE | EAF_DIRECT))) make_indirect_escape_constraint (tem); - returns_uses = true; } else if (flags & (EAF_NOESCAPE | EAF_NODIRECTESCAPE)) { @@ -4098,6 +4101,8 @@ handle_rhs_call (gcall *stmt, vec *results) if (!(flags & EAF_DIRECT)) make_transitive_closure_constraints (tem); make_copy_constraint (uses, tem->id); + if (!(flags & EAF_NOT_RETURNED)) + returns_uses = true; make_copy_constraint (clobbers, tem->id); /* Add *tem = nonlocal, do not add *tem = callused as EAF_NOESCAPE parameters do not escape to other parameters @@ -4111,7 +4116,6 @@ handle_rhs_call (gcall *stmt, vec *results) process_constraint (new_constraint (lhs, rhs)); if (!(flags & (EAF_NOESCAPE | EAF_DIRECT))) make_indirect_escape_constraint (tem); - returns_uses = true; } else make_escape_constraint (arg); @@ -4261,13 +4265,18 @@ handle_const_call (gcall *stmt, vec *results) /* May return offsetted arguments. */ varinfo_t tem = NULL; - if (gimple_call_num_args (stmt) != 0) - { - tem = new_var_info (NULL_TREE, "callarg", true); - tem->is_reg_var = true; - } for (k = 0; k < gimple_call_num_args (stmt); ++k) { + int flags = gimple_call_arg_flags (stmt, k); + + /* If the argument is not used or not returned we can ignore it. */ + if (flags & (EAF_UNUSED | EAF_NOT_RETURNED)) + continue; + if (!tem) + { + tem = new_var_info (NULL_TREE, "callarg", true); + tem->is_reg_var = true; + } tree arg = gimple_call_arg (stmt, k); auto_vec argc; get_constraint_for_rhs (arg, &argc); @@ -4298,6 +4307,7 @@ handle_pure_call (gcall *stmt, vec *results) struct constraint_expr rhsc; unsigned i; varinfo_t uses = NULL; + bool record_uses = false; /* Memory reached from pointer arguments is call-used. */ for (i = 0; i < gimple_call_num_args (stmt); ++i) @@ -4315,6 +4325,8 @@ handle_pure_call (gcall *stmt, vec *results) make_transitive_closure_constraints (uses); } make_constraint_to (uses->id, arg); + if (!(flags & EAF_NOT_RETURNED)) + record_uses = true; } /* The static chain is used as well. */ @@ -4327,6 +4339,7 @@ handle_pure_call (gcall *stmt, vec *results) make_transitive_closure_constraints (uses); } make_constraint_to (uses->id, gimple_call_chain (stmt)); + record_uses = true; } /* And if we applied NRV the address of the return slot. */ @@ -4343,10 +4356,11 @@ handle_pure_call (gcall *stmt, vec *results) auto_vec tmpc; get_constraint_for_address_of (gimple_call_lhs (stmt), &tmpc); make_constraints_to (uses->id, tmpc); + record_uses = true; } /* Pure functions may return call-used and nonlocal memory. */ - if (uses) + if (record_uses) { rhsc.var = uses->id; rhsc.offset = 0; -- cgit v1.1 From 853921378bfa149353b4e1c7dde5c02f80072ad7 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sun, 18 Jul 2021 00:16:23 +0000 Subject: Daily bump. --- gcc/ChangeLog | 14 ++++++++++++++ gcc/DATESTAMP | 2 +- gcc/testsuite/ChangeLog | 8 ++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b18ee85..c4ac83f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2021-07-17 Jan Hubicka + + * tree-ssa-structalias.c (handle_rhs_call): Support EAF_NOT_RETURNED. + (handle_const_call): Liekise + (handle_pure_call): Liekise + +2021-07-17 Andrew MacLeod + + PR tree-optimization/96542 + * range-op.cc (range_operator::wi_fold_in_parts): New. + (range_operator::fold_range): Call wi_fold_in_parts. + (operator_lshift::wi_fold): Fix broken lshift by [0,0]. + * range-op.h (wi_fold_in_parts): Add prototype. + 2021-07-16 David Malcolm * doc/analyzer.texi: Add __analyzer_dump_state. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 4a19022..fe69eb2 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210717 +20210718 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b53ce62..87ee36d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2021-07-17 Jan Hubicka + + * gcc.dg/tree-ssa/modref-6.c: New test. + +2021-07-17 Andrew MacLeod + + * gcc.dg/pr96542.c: New. + 2021-07-16 Patrick Palka PR c++/101233 -- cgit v1.1 From 5cca4131e4aabf70a18e362620ad65a3cebf227a Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Sun, 5 Jul 2020 19:07:30 -0400 Subject: libgccjit: Handle truncation and extension for casts [PR95498] 2021-07-18 Antoni Boucher gcc/jit/ PR target/95498 * jit-playback.c (convert): Add support to handle truncation and extension in the convert function. gcc/testsuite/ PR target/95498 * jit.dg/all-non-failing-tests.h: New test. * jit.dg/test-cast.c: New test. Signed-off-by: Antoni Boucher --- gcc/jit/jit-playback.c | 32 +++++++++----- gcc/testsuite/jit.dg/all-non-failing-tests.h | 10 +++++ gcc/testsuite/jit.dg/test-cast.c | 66 ++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/jit.dg/test-cast.c (limited to 'gcc') diff --git a/gcc/jit/jit-playback.c b/gcc/jit/jit-playback.c index c613630..79ac525 100644 --- a/gcc/jit/jit-playback.c +++ b/gcc/jit/jit-playback.c @@ -62,22 +62,32 @@ along with GCC; see the file COPYING3. If not see /* gcc::jit::playback::context::build_cast uses the convert.h API, which in turn requires the frontend to provide a "convert" - function, apparently as a fallback. - - Hence we provide this dummy one, with the requirement that any casts - are handled before reaching this. */ + function, apparently as a fallback for casts that can be simplified + (truncation, extension). */ extern tree convert (tree type, tree expr); tree convert (tree dst_type, tree expr) { - gcc_assert (gcc::jit::active_playback_ctxt); - gcc::jit::active_playback_ctxt->add_error (NULL, "unhandled conversion"); - fprintf (stderr, "input expression:\n"); - debug_tree (expr); - fprintf (stderr, "requested type:\n"); - debug_tree (dst_type); - return error_mark_node; + tree t_ret = NULL; + t_ret = targetm.convert_to_type (dst_type, expr); + if (t_ret) + return t_ret; + switch (TREE_CODE (dst_type)) + { + case INTEGER_TYPE: + case ENUMERAL_TYPE: + return fold (convert_to_integer (dst_type, expr)); + + default: + gcc_assert (gcc::jit::active_playback_ctxt); + gcc::jit::active_playback_ctxt->add_error (NULL, "unhandled conversion"); + fprintf (stderr, "input expression:\n"); + debug_tree (expr); + fprintf (stderr, "requested type:\n"); + debug_tree (dst_type); + return error_mark_node; + } } namespace gcc { diff --git a/gcc/testsuite/jit.dg/all-non-failing-tests.h b/gcc/testsuite/jit.dg/all-non-failing-tests.h index 4202eb7..84ef54a 100644 --- a/gcc/testsuite/jit.dg/all-non-failing-tests.h +++ b/gcc/testsuite/jit.dg/all-non-failing-tests.h @@ -98,6 +98,13 @@ #undef create_code #undef verify_code +/* test-cast.c */ +#define create_code create_code_cast +#define verify_code verify_code_cast +#include "test-cast.c" +#undef create_code +#undef verify_code + /* test-compound-assignment.c */ #define create_code create_code_compound_assignment #define verify_code verify_code_compound_assignment @@ -361,6 +368,9 @@ const struct testcase testcases[] = { {"calling_internal_function", create_code_calling_internal_function, verify_code_calling_internal_function}, + {"cast", + create_code_cast, + verify_code_cast}, {"compound_assignment", create_code_compound_assignment, verify_code_compound_assignment}, diff --git a/gcc/testsuite/jit.dg/test-cast.c b/gcc/testsuite/jit.dg/test-cast.c new file mode 100644 index 0000000..2b1e385 --- /dev/null +++ b/gcc/testsuite/jit.dg/test-cast.c @@ -0,0 +1,66 @@ +#include +#include +#include + +#include "libgccjit.h" + +#include "harness.h" + +void +create_code (gcc_jit_context *ctxt, void *user_data) +{ + /* Let's try to inject the equivalent of: +char +my_casts (int x) +{ + return (char)(long) x; +} + */ + gcc_jit_type *int_type = + gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_INT); + gcc_jit_type *long_type = + gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_LONG); + gcc_jit_type *return_type = + gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_CHAR); + + gcc_jit_param *x = + gcc_jit_context_new_param ( + ctxt, + NULL, + int_type, "x"); + gcc_jit_param *params[1] = {x}; + gcc_jit_function *func = + gcc_jit_context_new_function (ctxt, + NULL, + GCC_JIT_FUNCTION_EXPORTED, + return_type, + "my_casts", + 1, params, 0); + + gcc_jit_block *initial = + gcc_jit_function_new_block (func, "initial"); + + gcc_jit_block_end_with_return(initial, NULL, + gcc_jit_context_new_cast(ctxt, + NULL, + gcc_jit_context_new_cast(ctxt, + NULL, + gcc_jit_param_as_rvalue(x), + long_type + ), + return_type + )); +} + +void +verify_code (gcc_jit_context *ctxt, gcc_jit_result *result) +{ + typedef int (*my_casts_fn_type) (int); + CHECK_NON_NULL (result); + my_casts_fn_type my_casts = + (my_casts_fn_type)gcc_jit_result_get_code (result, "my_casts"); + CHECK_NON_NULL (my_casts); + char val = my_casts (10); + note ("my_casts returned: %d", val); + CHECK_VALUE (val, 10); +} -- cgit v1.1 From 5586e7e85de381f3df843091494889f8ad8e3e1f Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sun, 18 Jul 2021 06:12:29 -0700 Subject: x86: Don't issue vzeroupper if callee returns AVX register Don't issue vzeroupper before function call if callee returns AVX register since callee must be compiled with AVX. gcc/ PR target/101495 * config/i386/i386.c (ix86_check_avx_upper_stores): Moved before ix86_avx_u128_mode_needed. (ix86_avx_u128_mode_needed): Return AVX_U128_DIRTY if callee returns AVX register. gcc/testsuite/ PR target/101495 * gcc.target/i386/avx-vzeroupper-28.c: New test. --- gcc/config/i386/i386.c | 32 ++++++++++++++--------- gcc/testsuite/gcc.target/i386/avx-vzeroupper-28.c | 17 ++++++++++++ 2 files changed, 37 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx-vzeroupper-28.c (limited to 'gcc') diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 9d74b7a..8481693 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -14093,6 +14093,18 @@ ix86_check_avx_upper_register (const_rtx exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128); } +/* Check if a 256bit or 512bit AVX register is referenced in stores. */ + +static void +ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) + { + if (ix86_check_avx_upper_register (dest)) + { + bool *used = (bool *) data; + *used = true; + } + } + /* Return needed mode for entity in optimize_mode_switching pass. */ static int @@ -14117,6 +14129,14 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) } } + /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit + nor 512bit registers used in the function return register. */ + bool avx_upper_reg_found = false; + note_stores (insn, ix86_check_avx_upper_stores, + &avx_upper_reg_found); + if (avx_upper_reg_found) + return AVX_U128_DIRTY; + /* If the function is known to preserve some SSE registers, RA and previous passes can legitimately rely on that for modes wider than 256 bits. It's only safe to issue a @@ -14217,18 +14237,6 @@ ix86_mode_needed (int entity, rtx_insn *insn) return 0; } -/* Check if a 256bit or 512bit AVX register is referenced in stores. */ - -static void -ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) - { - if (ix86_check_avx_upper_register (dest)) - { - bool *used = (bool *) data; - *used = true; - } - } - /* Calculate mode of upper 128bit AVX registers after the insn. */ static int diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-28.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-28.c new file mode 100644 index 0000000..381ee9a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-28.c @@ -0,0 +1,17 @@ +/* PR target/101495 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2 -mtune=generic -dp" } */ + +#include + +extern __m256 x, y; +extern __m256 bar (void); + +__m256 +foo () +{ + x = y; + return bar (); +} + +/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */ -- cgit v1.1 From f527b8233498b40c8a2c616b82265f2e58aba42a Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Sun, 18 Jul 2021 21:35:53 +0200 Subject: Fortran: reject FORMAT tag of unknown type. gcc/fortran/ChangeLog: PR fortran/101084 * io.c (resolve_tag_format): Extend FORMAT check to unknown type. gcc/testsuite/ChangeLog: PR fortran/101084 * gfortran.dg/fmt_nonchar_3.f90: New test. --- gcc/fortran/io.c | 2 +- gcc/testsuite/gfortran.dg/fmt_nonchar_3.f90 | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gfortran.dg/fmt_nonchar_3.f90 (limited to 'gcc') diff --git a/gcc/fortran/io.c b/gcc/fortran/io.c index 40cd76e..fc97df7 100644 --- a/gcc/fortran/io.c +++ b/gcc/fortran/io.c @@ -1763,7 +1763,7 @@ resolve_tag_format (gfc_expr *e) if (e->ts.type != BT_CHARACTER) { if (e->ts.type == BT_DERIVED || e->ts.type == BT_CLASS - || e->ts.type == BT_VOID) + || e->ts.type == BT_VOID || e->ts.type == BT_UNKNOWN) { gfc_error ("Non-character non-Hollerith in FORMAT tag at %L", &e->where); diff --git a/gcc/testsuite/gfortran.dg/fmt_nonchar_3.f90 b/gcc/testsuite/gfortran.dg/fmt_nonchar_3.f90 new file mode 100644 index 0000000..3b3c260 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/fmt_nonchar_3.f90 @@ -0,0 +1,7 @@ +! { dg-do compile } +! PR fortran/101084 + +program p + integer, parameter :: a(0) = 1 + print int(a) ! { dg-error "Non-character non-Hollerith in FORMAT tag" } +end -- cgit v1.1 From 6ae8aac19cdbdbd96d90f86e4d8505fe121bdf06 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sat, 17 Jul 2021 14:38:39 -0700 Subject: x86: Enable the GPR only instructions for -mgeneral-regs-only For -mgeneral-regs-only, enable the GPR only instructions which are enabled implicitly by SSE ISAs unless they have been disabled explicitly. gcc/ PR target/101492 * common/config/i386/i386-common.c (ix86_handle_option): For -mgeneral-regs-only, enable the GPR only instructions which are enabled implicitly by SSE ISAs unless they have been disabled explicitly. gcc/testsuite/ PR target/101492 * gcc.target/i386/pr101492-1.c: New test. * gcc.target/i386/pr101492-2.c: Likewise. * gcc.target/i386/pr101492-3.c: Likewise. * gcc.target/i386/pr101492-4.c: Likewise. --- gcc/common/config/i386/i386-common.c | 27 +++++++++++++++++++++++++-- gcc/testsuite/gcc.target/i386/pr101492-1.c | 10 ++++++++++ gcc/testsuite/gcc.target/i386/pr101492-2.c | 10 ++++++++++ gcc/testsuite/gcc.target/i386/pr101492-3.c | 10 ++++++++++ gcc/testsuite/gcc.target/i386/pr101492-4.c | 12 ++++++++++++ 5 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr101492-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr101492-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr101492-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr101492-4.c (limited to 'gcc') diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index e156cc3..76ab1a1 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -354,16 +354,39 @@ ix86_handle_option (struct gcc_options *opts, case OPT_mgeneral_regs_only: if (value) { + HOST_WIDE_INT general_regs_only_flags = 0; + HOST_WIDE_INT general_regs_only_flags2 = 0; + + /* NB: Enable the GPR only instructions which are enabled + implicitly by SSE ISAs unless they have been disabled + explicitly. */ + if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)) + { + if (!TARGET_EXPLICIT_CRC32_P (opts)) + general_regs_only_flags |= OPTION_MASK_ISA_CRC32; + if (!TARGET_EXPLICIT_POPCNT_P (opts)) + general_regs_only_flags |= OPTION_MASK_ISA_POPCNT; + } + if (TARGET_SSE3_P (opts->x_ix86_isa_flags)) + { + if (!TARGET_EXPLICIT_MWAIT_P (opts)) + general_regs_only_flags2 |= OPTION_MASK_ISA2_MWAIT; + } + /* Disable MMX, SSE and x87 instructions if only general registers are allowed. */ opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_GENERAL_REGS_ONLY_UNSET; opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET; + opts->x_ix86_isa_flags |= general_regs_only_flags; + opts->x_ix86_isa_flags2 |= general_regs_only_flags2; opts->x_ix86_isa_flags_explicit - |= OPTION_MASK_ISA_GENERAL_REGS_ONLY_UNSET; + |= (OPTION_MASK_ISA_GENERAL_REGS_ONLY_UNSET + | general_regs_only_flags); opts->x_ix86_isa_flags2_explicit - |= OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET; + |= (OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET + | general_regs_only_flags2); opts->x_target_flags &= ~MASK_80387; } diff --git a/gcc/testsuite/gcc.target/i386/pr101492-1.c b/gcc/testsuite/gcc.target/i386/pr101492-1.c new file mode 100644 index 0000000..4100257 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101492-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.2 -mgeneral-regs-only" } */ + +#include + +unsigned int +foo1 (unsigned int x, unsigned int y) +{ + return __crc32d (x, y); +} diff --git a/gcc/testsuite/gcc.target/i386/pr101492-2.c b/gcc/testsuite/gcc.target/i386/pr101492-2.c new file mode 100644 index 0000000..c7d24f4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101492-2.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.2 -mgeneral-regs-only" } */ + +#include + +unsigned int +foo1 (unsigned int x) +{ + return _mm_popcnt_u32 (x); +} diff --git a/gcc/testsuite/gcc.target/i386/pr101492-3.c b/gcc/testsuite/gcc.target/i386/pr101492-3.c new file mode 100644 index 0000000..37e2071 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101492-3.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse3 -mgeneral-regs-only" } */ + +#include + +void +foo1 (unsigned int x, unsigned int y) +{ + _mm_mwait (x, y); +} diff --git a/gcc/testsuite/gcc.target/i386/pr101492-4.c b/gcc/testsuite/gcc.target/i386/pr101492-4.c new file mode 100644 index 0000000..c5a4f0a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101492-4.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-mwait -msse3 -mgeneral-regs-only" } */ + +#include + +void +foo1 (unsigned int x, unsigned int y) +{ + _mm_mwait (x, y); +} + +/* { dg-error "target specific option mismatch" "" { target *-*-* } 0 } */ -- cgit v1.1 From bdea84c4b5773723fa3ac7fa01f33542093864d5 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Mon, 19 Jul 2021 00:16:24 +0000 Subject: Daily bump. --- gcc/ChangeLog | 16 ++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/fortran/ChangeLog | 5 +++++ gcc/jit/ChangeLog | 6 ++++++ gcc/testsuite/ChangeLog | 24 ++++++++++++++++++++++++ 5 files changed, 52 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c4ac83f..c7053c0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2021-07-18 H.J. Lu + + PR target/101492 + * common/config/i386/i386-common.c (ix86_handle_option): For + -mgeneral-regs-only, enable the GPR only instructions which are + enabled implicitly by SSE ISAs unless they have been disabled + explicitly. + +2021-07-18 H.J. Lu + + PR target/101495 + * config/i386/i386.c (ix86_check_avx_upper_stores): Moved before + ix86_avx_u128_mode_needed. + (ix86_avx_u128_mode_needed): Return AVX_U128_DIRTY if callee + returns AVX register. + 2021-07-17 Jan Hubicka * tree-ssa-structalias.c (handle_rhs_call): Support EAF_NOT_RETURNED. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index fe69eb2..844ac74 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210718 +20210719 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 5406c53..f1fe435 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,8 @@ +2021-07-18 Harald Anlauf + + PR fortran/101084 + * io.c (resolve_tag_format): Extend FORMAT check to unknown type. + 2021-07-14 Harald Anlauf PR fortran/100949 diff --git a/gcc/jit/ChangeLog b/gcc/jit/ChangeLog index 819aa97..2a2675a 100644 --- a/gcc/jit/ChangeLog +++ b/gcc/jit/ChangeLog @@ -1,3 +1,9 @@ +2021-07-18 Antoni Boucher + + PR target/95498 + * jit-playback.c (convert): Add support to handle truncation and + extension in the convert function. + 2021-05-19 Martin Liska PR testsuite/100658 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 87ee36d..d726e73 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,27 @@ +2021-07-18 H.J. Lu + + PR target/101492 + * gcc.target/i386/pr101492-1.c: New test. + * gcc.target/i386/pr101492-2.c: Likewise. + * gcc.target/i386/pr101492-3.c: Likewise. + * gcc.target/i386/pr101492-4.c: Likewise. + +2021-07-18 Harald Anlauf + + PR fortran/101084 + * gfortran.dg/fmt_nonchar_3.f90: New test. + +2021-07-18 H.J. Lu + + PR target/101495 + * gcc.target/i386/avx-vzeroupper-28.c: New test. + +2021-07-18 Antoni Boucher + + PR target/95498 + * jit.dg/all-non-failing-tests.h: New test. + * jit.dg/test-cast.c: New test. + 2021-07-17 Jan Hubicka * gcc.dg/tree-ssa/modref-6.c: New test. -- cgit v1.1 From 4a21a8c34a707e88f450375e3c7d593be75162f4 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 16 Jul 2021 14:53:42 +0200 Subject: Remove gimple_expr_type uses from value-range code This removes the last uses from value-range code. 2021-07-16 Richard Biener * tree-vrp.c (register_edge_assert_for_2): Use the type from the LHS. (vrp_folder::fold_predicate_in): Likewise. * vr-values.c (gimple_assign_nonzero_p): Likewise. (vr_values::extract_range_from_comparison): Likewise. (vr_values::extract_range_from_ubsan_builtin): Use the type of the first operand. (vr_values::extract_range_basic): Push down type computation, use the appropriate LHS. (vr_values::extract_range_from_assignment): Use the type of the LHS. --- gcc/tree-vrp.c | 14 +++++++------- gcc/vr-values.c | 28 ++++++++++++++++------------ 2 files changed, 23 insertions(+), 19 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c index 0565c9b..a9c31bc 100644 --- a/gcc/tree-vrp.c +++ b/gcc/tree-vrp.c @@ -1484,13 +1484,13 @@ register_edge_assert_for_2 (tree name, edge e, } /* Extract NAME2 from the (optional) sign-changing cast. */ - if (gimple_assign_cast_p (def_stmt)) + if (gassign *ass = dyn_cast (def_stmt)) { - if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)) - && ! TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (def_stmt))) - && (TYPE_PRECISION (gimple_expr_type (def_stmt)) - == TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (def_stmt))))) - name3 = gimple_assign_rhs1 (def_stmt); + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass)) + && ! TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (ass))) + && (TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (ass))) + == TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (ass))))) + name3 = gimple_assign_rhs1 (ass); } /* If name3 is used later, create an ASSERT_EXPR for it. */ @@ -4119,7 +4119,7 @@ vrp_folder::fold_predicate_in (gimple_stmt_iterator *si) if (val) { if (assignment_p) - val = fold_convert (gimple_expr_type (stmt), val); + val = fold_convert (TREE_TYPE (gimple_assign_lhs (stmt)), val); if (dump_file) { diff --git a/gcc/vr-values.c b/gcc/vr-values.c index 190676d..1b3ec38 100644 --- a/gcc/vr-values.c +++ b/gcc/vr-values.c @@ -338,16 +338,17 @@ gimple_assign_nonzero_p (gimple *stmt) { enum tree_code code = gimple_assign_rhs_code (stmt); bool strict_overflow_p; + tree type = TREE_TYPE (gimple_assign_lhs (stmt)); switch (get_gimple_rhs_class (code)) { case GIMPLE_UNARY_RHS: return tree_unary_nonzero_warnv_p (gimple_assign_rhs_code (stmt), - gimple_expr_type (stmt), + type, gimple_assign_rhs1 (stmt), &strict_overflow_p); case GIMPLE_BINARY_RHS: return tree_binary_nonzero_warnv_p (gimple_assign_rhs_code (stmt), - gimple_expr_type (stmt), + type, gimple_assign_rhs1 (stmt), gimple_assign_rhs2 (stmt), &strict_overflow_p); @@ -1025,7 +1026,7 @@ vr_values::extract_range_from_comparison (value_range_equiv *vr, gimple *stmt) { enum tree_code code = gimple_assign_rhs_code (stmt); - tree type = gimple_expr_type (stmt); + tree type = TREE_TYPE (gimple_assign_lhs (stmt)); tree op0 = gimple_assign_rhs1 (stmt); tree op1 = gimple_assign_rhs2 (stmt); bool sop; @@ -1164,7 +1165,6 @@ bool vr_values::extract_range_from_ubsan_builtin (value_range_equiv *vr, gimple *stmt) { gcc_assert (is_gimple_call (stmt)); - tree type = gimple_expr_type (stmt); enum tree_code subcode = ERROR_MARK; combined_fn cfn = gimple_call_combined_fn (stmt); scalar_int_mode mode; @@ -1190,7 +1190,8 @@ vr_values::extract_range_from_ubsan_builtin (value_range_equiv *vr, gimple *stmt any overflow, we'll complain, but will actually do wrapping operation. */ flag_wrapv = 1; - extract_range_from_binary_expr (vr, subcode, type, + extract_range_from_binary_expr (vr, subcode, + TREE_TYPE (gimple_call_arg (stmt, 0)), gimple_call_arg (stmt, 0), gimple_call_arg (stmt, 1)); flag_wrapv = saved_flag_wrapv; @@ -1217,7 +1218,6 @@ void vr_values::extract_range_basic (value_range_equiv *vr, gimple *stmt) { bool sop; - tree type = gimple_expr_type (stmt); if (is_gimple_call (stmt)) { @@ -1244,13 +1244,14 @@ vr_values::extract_range_basic (value_range_equiv *vr, gimple *stmt) /* Handle extraction of the two results (result of arithmetics and a flag whether arithmetics overflowed) from {ADD,SUB,MUL}_OVERFLOW internal function. Similarly from ATOMIC_COMPARE_EXCHANGE. */ - else if (is_gimple_assign (stmt) - && (gimple_assign_rhs_code (stmt) == REALPART_EXPR - || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR) - && INTEGRAL_TYPE_P (type)) + if (is_gimple_assign (stmt) + && (gimple_assign_rhs_code (stmt) == REALPART_EXPR + || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR) + && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))) { enum tree_code code = gimple_assign_rhs_code (stmt); tree op = gimple_assign_rhs1 (stmt); + tree type = TREE_TYPE (gimple_assign_lhs (stmt)); if (TREE_CODE (op) == code && TREE_CODE (TREE_OPERAND (op, 0)) == SSA_NAME) { gimple *g = SSA_NAME_DEF_STMT (TREE_OPERAND (op, 0)); @@ -1328,6 +1329,9 @@ vr_values::extract_range_basic (value_range_equiv *vr, gimple *stmt) } } } + /* None of the below should need a 'type', but we are only called + for assignments and calls with a LHS. */ + tree type = TREE_TYPE (gimple_get_lhs (stmt)); if (INTEGRAL_TYPE_P (type) && gimple_stmt_nonnegative_warnv_p (stmt, &sop)) set_value_range_to_nonnegative (vr, type); @@ -1355,12 +1359,12 @@ vr_values::extract_range_from_assignment (value_range_equiv *vr, gassign *stmt) extract_range_from_ssa_name (vr, gimple_assign_rhs1 (stmt)); else if (TREE_CODE_CLASS (code) == tcc_binary) extract_range_from_binary_expr (vr, gimple_assign_rhs_code (stmt), - gimple_expr_type (stmt), + TREE_TYPE (gimple_assign_lhs (stmt)), gimple_assign_rhs1 (stmt), gimple_assign_rhs2 (stmt)); else if (TREE_CODE_CLASS (code) == tcc_unary) extract_range_from_unary_expr (vr, gimple_assign_rhs_code (stmt), - gimple_expr_type (stmt), + TREE_TYPE (gimple_assign_lhs (stmt)), gimple_assign_rhs1 (stmt)); else if (code == COND_EXPR) extract_range_from_cond_expr (vr, stmt); -- cgit v1.1 From 3ce20b6be543824a07c3367253188d754b58667e Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 16 Jul 2021 13:26:20 +0200 Subject: Remove last gimple_expr_type uses This removes the last uses of gimple_expr_type. 2021-07-16 Richard Biener * tree-ssa-sccvn.c (vn_reference_eq): Handle NULL vr->type. (ao_ref_init_from_vn_reference): Likewise. (fully_constant_reference): Likewise. (vn_reference_lookup_call): Do not set vr->type to random values. * tree-ssa-pre.c (compute_avail): Do not try to PRE calls without a value. * tree-vect-generic.c (expand_vector_piecewise): Pass in whether we expanded parallel. (expand_vector_parallel): Adjust. (expand_vector_addition): Likewise. (expand_vector_comparison): Likewise. (expand_vector_operation): Likewise. (expand_vector_scalar_condition): Likewise. (expand_vector_conversion): Likewise. --- gcc/tree-ssa-pre.c | 3 ++- gcc/tree-ssa-sccvn.c | 28 ++++++++++++++++++++-------- gcc/tree-vect-generic.c | 25 ++++++++++++------------- 3 files changed, 34 insertions(+), 22 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c index aa5244e..d2a7395 100644 --- a/gcc/tree-ssa-pre.c +++ b/gcc/tree-ssa-pre.c @@ -4023,7 +4023,8 @@ compute_avail (function *fun) continue; vn_reference_lookup_call (as_a (stmt), &ref, &ref1); - if (!ref) + /* There is no point to PRE a call without a value. */ + if (!ref || !ref->result) continue; /* If the value of the call is not invalidated in diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c index 7900df9..a174c31 100644 --- a/gcc/tree-ssa-sccvn.c +++ b/gcc/tree-ssa-sccvn.c @@ -764,14 +764,18 @@ vn_reference_eq (const_vn_reference_t const vr1, const_vn_reference_t const vr2) if (vr1->operands == vr2->operands) return true; - if (COMPLETE_TYPE_P (vr1->type) != COMPLETE_TYPE_P (vr2->type) - || (COMPLETE_TYPE_P (vr1->type) - && !expressions_equal_p (TYPE_SIZE (vr1->type), - TYPE_SIZE (vr2->type)))) + if (!vr1->type || !vr2->type) + { + if (vr1->type != vr2->type) + return false; + } + else if (COMPLETE_TYPE_P (vr1->type) != COMPLETE_TYPE_P (vr2->type) + || (COMPLETE_TYPE_P (vr1->type) + && !expressions_equal_p (TYPE_SIZE (vr1->type), + TYPE_SIZE (vr2->type)))) return false; - - if (INTEGRAL_TYPE_P (vr1->type) - && INTEGRAL_TYPE_P (vr2->type)) + else if (INTEGRAL_TYPE_P (vr1->type) + && INTEGRAL_TYPE_P (vr2->type)) { if (TYPE_PRECISION (vr1->type) != TYPE_PRECISION (vr2->type)) return false; @@ -1049,6 +1053,10 @@ ao_ref_init_from_vn_reference (ao_ref *ref, poly_offset_int size = -1; tree size_tree = NULL_TREE; + /* We don't handle calls. */ + if (!type) + return false; + machine_mode mode = TYPE_MODE (type); if (mode == BLKmode) size_tree = TYPE_SIZE (type); @@ -1478,6 +1486,7 @@ fully_constant_vn_reference_p (vn_reference_t ref) /* Simplify reads from constants or constant initializers. */ else if (BITS_PER_UNIT == 8 + && ref->type && COMPLETE_TYPE_P (ref->type) && is_gimple_reg_type (ref->type)) { @@ -3671,7 +3680,10 @@ vn_reference_lookup_call (gcall *call, vn_reference_t *vnresult, vr->vuse = vuse ? SSA_VAL (vuse) : NULL_TREE; vr->operands = valueize_shared_reference_ops_from_call (call); - vr->type = gimple_expr_type (call); + tree lhs = gimple_call_lhs (call); + /* For non-SSA return values the referece ops contain the LHS. */ + vr->type = ((lhs && TREE_CODE (lhs) == SSA_NAME) + ? TREE_TYPE (lhs) : NULL_TREE); vr->punned = false; vr->set = 0; vr->base_set = 0; diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index a1257db..2e00b3e 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -307,7 +307,7 @@ static tree expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, tree type, tree inner_type, tree a, tree b, enum tree_code code, - tree ret_type = NULL_TREE) + bool parallel_p, tree ret_type = NULL_TREE) { vec *v; tree part_width = TYPE_SIZE (inner_type); @@ -317,8 +317,7 @@ expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, int i; location_t loc = gimple_location (gsi_stmt (*gsi)); - if (ret_type - || types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type)) + if (ret_type || !parallel_p) warning_at (loc, OPT_Wvector_operation_performance, "vector operation will be expanded piecewise"); else @@ -364,13 +363,13 @@ expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, if (TYPE_MODE (TREE_TYPE (type)) == word_mode) return expand_vector_piecewise (gsi, f, type, TREE_TYPE (type), - a, b, code); + a, b, code, true); else if (n_words > 1) { tree word_type = build_word_mode_vector_type (n_words); result = expand_vector_piecewise (gsi, f, word_type, TREE_TYPE (word_type), - a, b, code); + a, b, code, true); result = force_gimple_operand_gsi (gsi, result, true, NULL, true, GSI_SAME_STMT); } @@ -410,7 +409,7 @@ expand_vector_addition (gimple_stmt_iterator *gsi, else return expand_vector_piecewise (gsi, f, type, TREE_TYPE (type), - a, b, code); + a, b, code, false); } static bool @@ -501,7 +500,7 @@ expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0, else t = expand_vector_piecewise (gsi, do_compare, type, TREE_TYPE (TREE_TYPE (op0)), op0, op1, - code); + code, false); } else t = NULL_TREE; @@ -1248,11 +1247,11 @@ expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type if (TREE_CODE_CLASS (code) == tcc_unary) return expand_vector_piecewise (gsi, do_unop, type, compute_type, gimple_assign_rhs1 (assign), - NULL_TREE, code); + NULL_TREE, code, false); else return expand_vector_piecewise (gsi, do_binop, type, compute_type, gimple_assign_rhs1 (assign), - gimple_assign_rhs2 (assign), code); + gimple_assign_rhs2 (assign), code, false); } /* Try to optimize @@ -1762,7 +1761,7 @@ expand_vector_scalar_condition (gimple_stmt_iterator *gsi) COND_EXPR); else new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type, - rhs2, rhs3, COND_EXPR); + rhs2, rhs3, COND_EXPR, false); if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), new_rhs); @@ -1885,7 +1884,7 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) { new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, ret_type, arg1_type, arg, - NULL_TREE, code1); + NULL_TREE, code1, false); g = gimple_build_assign (lhs, new_rhs); gsi_replace (gsi, g, false); return; @@ -1953,7 +1952,7 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) do_vec_narrow_conversion, arg_type, dcompute_type, arg, NULL_TREE, code1, - ret_type); + false, ret_type); g = gimple_build_assign (lhs, new_rhs); gsi_replace (gsi, g, false); return; @@ -2065,7 +2064,7 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type, TREE_TYPE (arg_type), arg, - NULL_TREE, code, ret_type); + NULL_TREE, code, false, ret_type); g = gimple_build_assign (lhs, new_rhs); gsi_replace (gsi, g, false); } -- cgit v1.1 From 3d7686a30fcf0e44133f28673b722b5f57e430c5 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 16 Jul 2021 14:56:38 +0200 Subject: Remove gimple_expr_type This removes the transitional gimple_expr_type API. 2021-07-16 Richard Biener * gimple.h (gimple_expr_type): Remove. * doc/gimple.texi: Remove gimple_expr_type documentation. --- gcc/doc/gimple.texi | 8 -------- gcc/gimple.h | 42 ------------------------------------------ 2 files changed, 50 deletions(-) (limited to 'gcc') diff --git a/gcc/doc/gimple.texi b/gcc/doc/gimple.texi index 4b3d7d7..5d89dbc 100644 --- a/gcc/doc/gimple.texi +++ b/gcc/doc/gimple.texi @@ -868,14 +868,6 @@ Return the basic block to which statement @code{G} belongs to. Return the lexical scope block holding statement @code{G}. @end deftypefn -@deftypefn {GIMPLE function} tree gimple_expr_type (gimple stmt) -Return the type of the main expression computed by @code{STMT}. Return -@code{void_type_node} if @code{STMT} computes nothing. This will only return -something meaningful for @code{GIMPLE_ASSIGN}, @code{GIMPLE_COND} and -@code{GIMPLE_CALL}. For all other tuple codes, it will return -@code{void_type_node}. -@end deftypefn - @deftypefn {GIMPLE function} {enum tree_code} gimple_expr_code (gimple stmt) Return the tree code for the expression computed by @code{STMT}. This is only meaningful for @code{GIMPLE_CALL}, @code{GIMPLE_ASSIGN} and diff --git a/gcc/gimple.h b/gcc/gimple.h index acf572b..29da919 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -6608,48 +6608,6 @@ is_gimple_resx (const gimple *gs) return gimple_code (gs) == GIMPLE_RESX; } -/* Return the type of the main expression computed by STMT. Return - void_type_node if the statement computes nothing. */ - -static inline tree -gimple_expr_type (const gimple *stmt) -{ - enum gimple_code code = gimple_code (stmt); - /* In general we want to pass out a type that can be substituted - for both the RHS and the LHS types if there is a possibly - useless conversion involved. That means returning the - original RHS type as far as we can reconstruct it. */ - if (code == GIMPLE_CALL) - { - const gcall *call_stmt = as_a (stmt); - if (gimple_call_internal_p (call_stmt)) - switch (gimple_call_internal_fn (call_stmt)) - { - case IFN_MASK_STORE: - case IFN_SCATTER_STORE: - return TREE_TYPE (gimple_call_arg (call_stmt, 3)); - case IFN_MASK_SCATTER_STORE: - return TREE_TYPE (gimple_call_arg (call_stmt, 4)); - default: - break; - } - return gimple_call_return_type (call_stmt); - } - else if (code == GIMPLE_ASSIGN) - { - if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) - return TREE_TYPE (gimple_assign_rhs1 (stmt)); - else - /* As fallback use the type of the LHS. */ - return TREE_TYPE (gimple_get_lhs (stmt)); - } - else if (code == GIMPLE_COND) - return boolean_type_node; - else if (code == GIMPLE_PHI) - return TREE_TYPE (gimple_phi_result (stmt)); - else - return void_type_node; -} /* Enum and arrays used for allocation stats. Keep in sync with gimple.c:gimple_alloc_kind_names. */ -- cgit v1.1 From 8df3ee8f7d85d0708f3c3ca96b55c9230c2ae9f0 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 19 Jul 2021 13:29:16 +0200 Subject: tree-optimization/101505 - properly determine stmt precision for PHIs Loop vectorization pattern recog fails to walk PHIs when determining stmt precisions. This fails to recognize non-mask uses for bools in PHIs and outer loop vectorization. 2021-07-19 Richard Biener PR tree-optimization/101505 * tree-vect-patterns.c (vect_determine_precisions): Walk PHIs also for loop vectorization. * gcc.dg/vect/pr101505.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr101505.c | 16 ++++++++++++++++ gcc/tree-vect-patterns.c | 14 ++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/vect/pr101505.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/pr101505.c b/gcc/testsuite/gcc.dg/vect/pr101505.c new file mode 100644 index 0000000..e2b8945 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101505.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O1" } */ + +int n2; + +__attribute__ ((simd)) char +w7 (void) +{ + short int xb = n2; + int qp; + + for (qp = 0; qp < 2; ++qp) + xb = xb < 1; + + return xb; +} diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index c249444..44f6c9b 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -5355,6 +5355,13 @@ vect_determine_precisions (vec_info *vinfo) for (unsigned int i = 0; i < nbbs; i++) { basic_block bb = bbs[i]; + for (auto gsi = gsi_start_phis (bb); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); + if (stmt_info) + vect_determine_mask_precision (vinfo, stmt_info); + } for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) if (!is_gimple_debug (gsi_stmt (si))) vect_determine_mask_precision @@ -5368,6 +5375,13 @@ vect_determine_precisions (vec_info *vinfo) if (!is_gimple_debug (gsi_stmt (si))) vect_determine_stmt_precisions (vinfo, vinfo->lookup_stmt (gsi_stmt (si))); + for (auto gsi = gsi_start_phis (bb); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); + if (stmt_info) + vect_determine_stmt_precisions (vinfo, stmt_info); + } } } else -- cgit v1.1 From 3522fe2a115a2118553dad9aa98e524674fd8114 Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Mon, 19 Jul 2021 10:16:42 -0400 Subject: c++: Add test for DR 2126 DR 2126 gcc/testsuite/ChangeLog: * g++.dg/cpp0x/constexpr-temp2.C: New test. --- gcc/testsuite/g++.dg/cpp0x/constexpr-temp2.C | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-temp2.C (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-temp2.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-temp2.C new file mode 100644 index 0000000..28ffd2c --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-temp2.C @@ -0,0 +1,6 @@ +// DR 2126 +// { dg-do compile { target c++11 } } + +typedef const int CI[3]; +constexpr CI &ci = CI{11, 22, 33}; +static_assert(ci[1] == 22, ""); -- cgit v1.1 From db95ac7745b284d1fd667ee6262b4afc778fe074 Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Sat, 17 Jul 2021 01:16:28 -0700 Subject: [AARCH64] Fix PR 101205: csinv does not have an zero_extend version So the problem is even though there was a csneg with a zero_extend in the front, there was not one for csinv. This fixes it by extending that pattern. OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. gcc/ChangeLog: PR target/101205 * config/aarch64/aarch64.md (csneg3_uxtw_insn): Rename to ... (*cs3_uxtw_insn4): and extend to NEG_NOT. gcc/testsuite/ChangeLog: PR target/101205 * gcc.target/aarch64/csinv-neg-1.c: New test. --- gcc/config/aarch64/aarch64.md | 6 +- gcc/testsuite/gcc.target/aarch64/csinv-neg-1.c | 112 +++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/csinv-neg-1.c (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index f12a0be..8cd259f 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4203,15 +4203,15 @@ [(set_attr "type" "csel")] ) -(define_insn "csneg3_uxtw_insn" +(define_insn "*cs3_uxtw_insn4" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (if_then_else:SI (match_operand 1 "aarch64_comparison_operation" "") - (neg:SI (match_operand:SI 2 "register_operand" "r")) + (NEG_NOT:SI (match_operand:SI 2 "register_operand" "r")) (match_operand:SI 3 "aarch64_reg_or_zero" "rZ"))))] "" - "csneg\\t%w0, %w3, %w2, %M1" + "cs\\t%w0, %w3, %w2, %M1" [(set_attr "type" "csel")] ) diff --git a/gcc/testsuite/gcc.target/aarch64/csinv-neg-1.c b/gcc/testsuite/gcc.target/aarch64/csinv-neg-1.c new file mode 100644 index 0000000..e528883 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/csinv-neg-1.c @@ -0,0 +1,112 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* +** inv1: +** cmp w0, 0 +** csinv w0, w1, w2, ne +** ret +*/ +unsigned long long +inv1(unsigned a, unsigned b, unsigned c) +{ + unsigned t = a ? b : ~c; + return t; +} + +/* +** inv1_local: +** cmp w0, 0 +** csinv w0, w1, w2, ne +** ret +*/ +unsigned long long +inv1_local(unsigned a, unsigned b, unsigned c) +{ + unsigned d = ~c; + unsigned t = a ? b : d; + return t; +} + +/* +** inv_zero1: +** cmp w0, 0 +** csinv w0, wzr, w1, ne +** ret +*/ +unsigned long long +inv_zero1(unsigned a, unsigned b) +{ + unsigned t = a ? 0 : ~b; + return t; +} + +/* +** inv_zero2: +** cmp w0, 0 +** csinv w0, wzr, w1, eq +** ret +*/ +unsigned long long +inv_zero2(unsigned a, unsigned b) +{ + unsigned t = a ? ~b : 0; + return t; +} + + +/* +** inv2: +** cmp w0, 0 +** csinv w0, w2, w1, eq +** ret +*/ +unsigned long long +inv2(unsigned a, unsigned b, unsigned c) +{ + unsigned t = a ? ~b : c; + return t; +} + +/* +** inv2_local: +** cmp w0, 0 +** csinv w0, w2, w1, eq +** ret +*/ +unsigned long long +inv2_local(unsigned a, unsigned b, unsigned c) +{ + unsigned d = ~b; + unsigned t = a ? d : c; + return t; +} + +/* +** neg1: +** cmp w0, 0 +** csneg w0, w1, w2, ne +** ret +*/ +unsigned long long +neg1(unsigned a, unsigned b, unsigned c) +{ + unsigned t = a ? b : -c; + return t; +} + + +/* +** neg2: +** cmp w0, 0 +** csneg w0, w2, w1, eq +** ret +*/ +unsigned long long +neg2(unsigned a, unsigned b, unsigned c) +{ + unsigned t = a ? -b : c; + return t; +} + +/* { dg-final { check-function-bodies "**" "" "" } } */ -- cgit v1.1 From aad32a00b7d2b64ae158b2b167768a9ae3e20f6e Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Thu, 8 Jul 2021 15:47:53 +0100 Subject: amdgcn: Add -mxnack and -msram-ecc [PR 100208] gcc/ChangeLog: PR target/100208 * config/gcn/gcn-hsa.h (DRIVER_SELF_SPECS): New. (ASM_SPEC): Set -mattr for xnack and sram-ecc. * config/gcn/gcn-opts.h (enum sram_ecc_type): New. * config/gcn/gcn-valu.md: Add a warning comment. * config/gcn/gcn.c (gcn_option_override): Add "sorry" for -mxnack. (output_file_start): Add xnack and sram-ecc state to ".amdgcn_target". * config/gcn/gcn.md: Add a warning comment. * config/gcn/gcn.opt: Add -mxnack and -msram-ecc. * config/gcn/mkoffload.c (EF_AMDGPU_MACH_AMDGCN_GFX908): Remove SRAM-ECC flag. (EF_AMDGPU_XNACK): New. (EF_AMDGPU_SRAM_ECC): New. (elf_flags): New. (copy_early_debug_info): Use elf_flags. (main): Handle -mxnack and -msram-ecc options. * doc/invoke.texi: Document -mxnack and -msram-ecc. gcc/testsuite/ChangeLog: PR target/100208 * gcc.target/gcn/sram-ecc-1.c: New test. * gcc.target/gcn/sram-ecc-2.c: New test. * gcc.target/gcn/sram-ecc-3.c: New test. * gcc.target/gcn/sram-ecc-4.c: New test. * gcc.target/gcn/sram-ecc-5.c: New test. * gcc.target/gcn/sram-ecc-6.c: New test. * gcc.target/gcn/sram-ecc-7.c: New test. * gcc.target/gcn/sram-ecc-8.c: New test. --- gcc/config/gcn/gcn-hsa.h | 6 ++++ gcc/config/gcn/gcn-opts.h | 7 ++++ gcc/config/gcn/gcn-valu.md | 2 ++ gcc/config/gcn/gcn.c | 13 +++++-- gcc/config/gcn/gcn.md | 1 + gcc/config/gcn/gcn.opt | 21 +++++++++++ gcc/config/gcn/mkoffload.c | 58 +++++++++++++++++++++++++++++-- gcc/doc/invoke.texi | 17 +++++++++ gcc/testsuite/gcc.target/gcn/sram-ecc-1.c | 17 +++++++++ gcc/testsuite/gcc.target/gcn/sram-ecc-2.c | 17 +++++++++ gcc/testsuite/gcc.target/gcn/sram-ecc-3.c | 21 +++++++++++ gcc/testsuite/gcc.target/gcn/sram-ecc-4.c | 21 +++++++++++ gcc/testsuite/gcc.target/gcn/sram-ecc-5.c | 17 +++++++++ gcc/testsuite/gcc.target/gcn/sram-ecc-6.c | 17 +++++++++ gcc/testsuite/gcc.target/gcn/sram-ecc-7.c | 21 +++++++++++ gcc/testsuite/gcc.target/gcn/sram-ecc-8.c | 21 +++++++++++ 16 files changed, 273 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/gcn/sram-ecc-1.c create mode 100644 gcc/testsuite/gcc.target/gcn/sram-ecc-2.c create mode 100644 gcc/testsuite/gcc.target/gcn/sram-ecc-3.c create mode 100644 gcc/testsuite/gcc.target/gcn/sram-ecc-4.c create mode 100644 gcc/testsuite/gcc.target/gcn/sram-ecc-5.c create mode 100644 gcc/testsuite/gcc.target/gcn/sram-ecc-6.c create mode 100644 gcc/testsuite/gcc.target/gcn/sram-ecc-7.c create mode 100644 gcc/testsuite/gcc.target/gcn/sram-ecc-8.c (limited to 'gcc') diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h index 61cdb31..724e9a3 100644 --- a/gcc/config/gcn/gcn-hsa.h +++ b/gcc/config/gcn/gcn-hsa.h @@ -75,9 +75,15 @@ extern unsigned int gcn_local_sym_hash (const char *name); supported for gcn. */ #define GOMP_SELF_SPECS "" +#define DRIVER_SELF_SPECS \ + "%{march=fiji|march=gfx900|march=gfx906:%{!msram-ecc=*:-msram-ecc=off}}" + /* Use LLVM assembler and linker options. */ #define ASM_SPEC "-triple=amdgcn--amdhsa " \ "%:last_arg(%{march=*:-mcpu=%*}) " \ + "-mattr=%{mxnack:+xnack;:-xnack} " \ + /* FIXME: support "any" when we move to HSACOv4. */ \ + "-mattr=%{!msram-ecc=off:+sram-ecc;:-sram-ecc} " \ "-filetype=obj" #define LINK_SPEC "--pie --export-dynamic" #define LIB_SPEC "-lc" diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h index ed67d01..b255160 100644 --- a/gcc/config/gcn/gcn-opts.h +++ b/gcc/config/gcn/gcn-opts.h @@ -34,4 +34,11 @@ extern int gcn_isa; #define TARGET_GCN5 (gcn_isa == 5) #define TARGET_GCN5_PLUS (gcn_isa >= 5) +enum sram_ecc_type +{ + SRAM_ECC_OFF, + SRAM_ECC_ON, + SRAM_ECC_ANY +}; + #endif diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index beefcf7..84ff675 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -703,6 +703,8 @@ ;; - The address space and glc (volatile) fields are there to replace the ;; fields normally found in a MEM. ;; - Multiple forms of address expression are supported, below. +;; +;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on (define_expand "gather_load" [(match_operand:V_ALL 0 "register_operand") diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c index 6d02a4a..385b90c 100644 --- a/gcc/config/gcn/gcn.c +++ b/gcc/config/gcn/gcn.c @@ -144,6 +144,10 @@ gcn_option_override (void) /* 1MB total. */ stack_size_opt = 1048576; } + + /* The xnack option is a placeholder, for now. */ + if (flag_xnack) + sorry ("XNACK support"); } /* }}} */ @@ -5182,11 +5186,16 @@ output_file_start (void) case PROCESSOR_FIJI: cpu = "gfx803"; break; case PROCESSOR_VEGA10: cpu = "gfx900"; break; case PROCESSOR_VEGA20: cpu = "gfx906"; break; - case PROCESSOR_GFX908: cpu = "gfx908+sram-ecc"; break; + case PROCESSOR_GFX908: cpu = "gfx908"; break; default: gcc_unreachable (); } - fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s\"\n", cpu); + const char *xnack = (flag_xnack ? "+xnack" : ""); + /* FIXME: support "any" when we move to HSACOv4. */ + const char *sram_ecc = (flag_sram_ecc ? "+sram-ecc" : ""); + + fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s%s%s\"\n", + cpu, xnack, sram_ecc); } /* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h. diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index ae7249a..8ffa43c 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -569,6 +569,7 @@ (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")]) ; 8/16bit move pattern +; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on (define_insn "*mov_insn" [(set (match_operand:QIHI 0 "nonimmediate_operand" diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index 767d458..b2b10b0 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -76,3 +76,24 @@ Target RejectNegative Joined UInteger Var(stack_size_opt) Init(-1) Wopenacc-dims Target Var(warn_openacc_dims) Warning Warn about invalid OpenACC dimensions. + +mxnack +Target Var(flag_xnack) Init(0) +Compile for devices requiring XNACK enabled. Default off. + +Enum +Name(sram_ecc_type) Type(enum sram_ecc_type) +SRAM-ECC modes: + +EnumValue +Enum(sram_ecc_type) String(off) Value(SRAM_ECC_OFF) + +EnumValue +Enum(sram_ecc_type) String(on) Value(SRAM_ECC_ON) + +EnumValue +Enum(sram_ecc_type) String(any) Value(SRAM_ECC_ANY) + +msram-ecc= +Target RejectNegative Joined ToLower Enum(sram_ecc_type) Var(flag_sram_ecc) Init(SRAM_ECC_ANY) +Compile for devices with the SRAM ECC feature enabled, or not. Default \"any\". diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c index 1469a68..804cc26 100644 --- a/gcc/config/gcn/mkoffload.c +++ b/gcc/config/gcn/mkoffload.c @@ -52,7 +52,10 @@ #undef EF_AMDGPU_MACH_AMDGCN_GFX906 #define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f #undef EF_AMDGPU_MACH_AMDGCN_GFX908 -#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x230 // Assume SRAM-ECC enabled. +#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30 + +#define EF_AMDGPU_XNACK 0x100 +#define EF_AMDGPU_SRAM_ECC 0x200 #ifndef R_AMDGPU_NONE #define R_AMDGPU_NONE 0 @@ -77,6 +80,7 @@ static struct obstack files_to_cleanup; enum offload_abi offload_abi = OFFLOAD_ABI_UNSET; uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; // Default GPU architecture. +uint32_t elf_flags = 0; /* Delete tempfiles. */ @@ -298,7 +302,7 @@ copy_early_debug_info (const char *infile, const char *outfile) ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA; ehdr.e_type = ET_REL; ehdr.e_machine = EM_AMDGPU; - ehdr.e_flags = elf_arch; + ehdr.e_flags = elf_arch | elf_flags; /* Load the section headers so we can walk them later. */ Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr) @@ -823,6 +827,7 @@ main (int argc, char **argv) bool fopenacc = false; bool fPIC = false; bool fpic = false; + bool sram_seen = false; for (int i = 1; i < argc; i++) { #define STR "-foffload-abi=" @@ -845,6 +850,26 @@ main (int argc, char **argv) fPIC = true; else if (strcmp (argv[i], "-fpic") == 0) fpic = true; + else if (strcmp (argv[i], "-mxnack") == 0) + elf_flags |= EF_AMDGPU_XNACK; + else if (strcmp (argv[i], "-mno-xnack") == 0) + elf_flags &= ~EF_AMDGPU_XNACK; + else if (strcmp (argv[i], "-msram-ecc=on") == 0) + { + elf_flags |= EF_AMDGPU_SRAM_ECC; + sram_seen = true; + } + else if (strcmp (argv[i], "-msram-ecc=any") == 0) + { + /* FIXME: change this when we move to HSACOv4. */ + elf_flags |= EF_AMDGPU_SRAM_ECC; + sram_seen = true; + } + else if (strcmp (argv[i], "-msram-ecc=off") == 0) + { + elf_flags &= ~EF_AMDGPU_SRAM_ECC; + sram_seen = true; + } else if (strcmp (argv[i], "-save-temps") == 0) save_temps = true; else if (strcmp (argv[i], "-v") == 0) @@ -865,6 +890,21 @@ main (int argc, char **argv) if (!(fopenacc ^ fopenmp)) fatal_error (input_location, "either -fopenacc or -fopenmp must be set"); + /* The SRAM-ECC feature defaults to "any" on GPUs where the feature is + available. */ + if (!sram_seen) + switch (elf_arch) + { + case EF_AMDGPU_MACH_AMDGCN_GFX803: + case EF_AMDGPU_MACH_AMDGCN_GFX900: + case EF_AMDGPU_MACH_AMDGCN_GFX906: + break; + default: + /* FIXME: change this when we move to HSACOv4. */ + elf_flags |= EF_AMDGPU_SRAM_ECC; + break; + } + const char *abi; switch (offload_abi) { @@ -892,6 +932,12 @@ main (int argc, char **argv) obstack_ptr_grow (&cc_argv_obstack, "-xlto"); if (fopenmp) obstack_ptr_grow (&cc_argv_obstack, "-mgomp"); + obstack_ptr_grow (&cc_argv_obstack, + (elf_flags & EF_AMDGPU_XNACK + ? "-mxnack" : "-mno-xnack")); + obstack_ptr_grow (&cc_argv_obstack, + (elf_flags & EF_AMDGPU_SRAM_ECC + ? "-msram-ecc=on" : "-msram-ecc=off")); for (int ix = 1; ix != argc; ix++) { @@ -993,6 +1039,14 @@ main (int argc, char **argv) } obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name); obstack_ptr_grow (&ld_argv_obstack, "-lgomp"); + obstack_ptr_grow (&ld_argv_obstack, + (elf_flags & EF_AMDGPU_XNACK + ? "-mxnack" : "-mno-xnack")); + obstack_ptr_grow (&ld_argv_obstack, + (elf_flags & EF_AMDGPU_SRAM_ECC + ? "-msram-ecc=on" : "-msram-ecc=off")); + if (verbose) + obstack_ptr_grow (&ld_argv_obstack, "-v"); for (int i = 1; i < argc; i++) if (startswith (argv[i], "-l") diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index b16176e..32697e6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -18847,6 +18847,15 @@ Compile for GCN5 Vega 20 devices (gfx906). @end table +@item -msram-ecc=on +@itemx -msram-ecc=off +@itemx -msram-ecc=any +@opindex msram-ecc +Compile binaries suitable for devices with the SRAM-ECC feature enabled, +disabled, or either mode. This feature can be enabled per-process on some +devices. The compiled code must match the device mode. The default is +@samp{any}, for devices that support it. + @item -mstack-size=@var{bytes} @opindex mstack-size Specify how many @var{bytes} of stack space will be requested for each GPU @@ -18855,6 +18864,14 @@ available. The size of the stack allocation may also have an impact on run-time performance. The default is 32KB when using OpenACC or OpenMP, and 1MB otherwise. +@item -mxnack +@opindex mxnack +Compile binaries suitable for devices with the XNACK feature enabled. Some +devices always require XNACK and some allow the user to configure XNACK. The +compiled code must match the device mode. The default is @samp{-mno-xnack}. +At present this option is a placeholder for support that is not yet +implemented. + @end table @node ARC Options diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-1.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-1.c new file mode 100644 index 0000000..d46c302 --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-1.c @@ -0,0 +1,17 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets without sram-ecc enabled (in which sub-dword loads do not + zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -msram-ecc=off" } */ + +extern unsigned char c; + +unsigned int +f () +{ + return c; +} + +/* { dg-final { scan-assembler "lshl.* 24" } } */ +/* { dg-final { scan-assembler "lshr.* 24" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-2.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-2.c new file mode 100644 index 0000000..351d43c --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-2.c @@ -0,0 +1,17 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets without sram-ecc enabled (in which sub-dword loads do not + zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -msram-ecc=off" } */ + +extern unsigned short s; + +unsigned short +f () +{ + return s; +} + +/* { dg-final { scan-assembler "lshl.* 16" } } */ +/* { dg-final { scan-assembler "lshr.* 16" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-3.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-3.c new file mode 100644 index 0000000..692d457 --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-3.c @@ -0,0 +1,21 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets without sram-ecc enabled (in which sub-dword loads do not + zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=off" } */ + +typedef unsigned int v64si __attribute__ ((vector_size (64*4))); +typedef unsigned char v64qi __attribute__ ((vector_size (64*1))); + +extern v64si a; +extern v64qi b; + +void +f () +{ + for (int n = 0; n < 64; n++) + a[n] = b[n]; +} + +/* { dg-final { scan-assembler "zero_extendv64qiv64si2" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-4.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-4.c new file mode 100644 index 0000000..61b8d55 --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-4.c @@ -0,0 +1,21 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets without sram-ecc enabled (in which sub-dword loads do not + zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=off" } */ + +typedef unsigned int v64si __attribute__ ((vector_size (64*4))); +typedef unsigned short v64hi __attribute__ ((vector_size (64*2))); + +extern v64si a; +extern v64hi b; + +void +f () +{ + for (int n = 0; n < 64; n++) + a[n] = b[n]; +} + +/* { dg-final { scan-assembler "zero_extendv64hiv64si2" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-5.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-5.c new file mode 100644 index 0000000..4f0543b --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-5.c @@ -0,0 +1,17 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets that may not have sram-ecc enabled (in which sub-dword loads do + not zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -msram-ecc=any" } */ + +extern unsigned char c; + +unsigned int +f () +{ + return c; +} + +/* { dg-final { scan-assembler "lshl.* 24" } } */ +/* { dg-final { scan-assembler "lshr.* 24" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-6.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-6.c new file mode 100644 index 0000000..9dfceaf --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-6.c @@ -0,0 +1,17 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets that may not have sram-ecc enabled (in which sub-dword loads do + not zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -msram-ecc=any" } */ + +extern unsigned short s; + +unsigned short +f () +{ + return s; +} + +/* { dg-final { scan-assembler "lshl.* 16" } } */ +/* { dg-final { scan-assembler "lshr.* 16" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-7.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-7.c new file mode 100644 index 0000000..9d0ce6f --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-7.c @@ -0,0 +1,21 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets that may not have sram-ecc enabled (in which sub-dword loads do + not zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=any" } */ + +typedef unsigned int v64si __attribute__ ((vector_size (64*4))); +typedef unsigned char v64qi __attribute__ ((vector_size (64*1))); + +extern v64si a; +extern v64qi b; + +void +f () +{ + for (int n = 0; n < 64; n++) + a[n] = b[n]; +} + +/* { dg-final { scan-assembler "zero_extendv64qiv64si2" } } */ diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-8.c b/gcc/testsuite/gcc.target/gcn/sram-ecc-8.c new file mode 100644 index 0000000..76e0288 --- /dev/null +++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-8.c @@ -0,0 +1,21 @@ +/* Ensure that explicit zero-entend instructions are present when compiling + for targets that may not have sram-ecc enabled (in which sub-dword loads do + not zero the high bits of the target register). */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=any" } */ + +typedef unsigned int v64si __attribute__ ((vector_size (64*4))); +typedef unsigned short v64hi __attribute__ ((vector_size (64*2))); + +extern v64si a; +extern v64hi b; + +void +f () +{ + for (int n = 0; n < 64; n++) + a[n] = b[n]; +} + +/* { dg-final { scan-assembler "zero_extendv64hiv64si2" } } */ -- cgit v1.1 From 483dd64546f1c7aa6ad8986eee570849d25f15e3 Mon Sep 17 00:00:00 2001 From: Indu Bhagat Date: Mon, 19 Jul 2021 10:23:27 -0700 Subject: debug: Add new function ctf_debuginfo_p gcc/ * flags.h (ctf_debuginfo_p): New function declaration. * opts.c (ctf_debuginfo_p): New function definition. --- gcc/flags.h | 4 ++++ gcc/opts.c | 8 ++++++++ 2 files changed, 12 insertions(+) (limited to 'gcc') diff --git a/gcc/flags.h b/gcc/flags.h index 85fd228..afedef0 100644 --- a/gcc/flags.h +++ b/gcc/flags.h @@ -44,6 +44,10 @@ const char * debug_set_names (uint32_t w_symbols); extern bool btf_debuginfo_p (); +/* Return true iff CTF debug info is enabled. */ + +extern bool ctf_debuginfo_p (); + /* Return true iff DWARF2 debug info is enabled. */ extern bool dwarf_debuginfo_p (); diff --git a/gcc/opts.c b/gcc/opts.c index 25282f7..93366e6 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -135,6 +135,14 @@ btf_debuginfo_p () return (write_symbols & BTF_DEBUG); } +/* Return TRUE iff CTF debug info is enabled. */ + +bool +ctf_debuginfo_p () +{ + return (write_symbols & CTF_DEBUG); +} + /* Return TRUE iff dwarf2 debug info is enabled. */ bool -- cgit v1.1 From f007a638a86e4b59bef0a0d8efa5bb8c5e5b200a Mon Sep 17 00:00:00 2001 From: Indu Bhagat Date: Mon, 19 Jul 2021 10:24:59 -0700 Subject: debug: Allow means for targets to opt out of CTF/BTF support CTF/BTF debug formats can be safely enabled for all ELF-based targets by default in GCC. CTF/BTF debug formats now adopt a similar approach as taken for DWARF debug format via the DWARF2_DEBUGGING_INFO. - By default, CTF/BTF formats can be enabled for all ELF-based targets. - By default, CTF/BTF formats can be disabled for all non ELF-based targets. - If the user passed a -gctf but CTF is not enabled for the target, GCC issues an error to the user (as is done currently with other debug formats) - "target system does not support the 'ctf' debug format". Analogous behavior for -gbtf command line option. A previous commit disabled the CTF and BTF testcases on the AIX platform. This is not necessary now that CTF and BTF debug formats are disabled by default on all non-ELF targets. GCC emits an error message when -gctf/-gbtf is used on such platforms and these tests will be skipped. gcc/ * config/elfos.h (CTF_DEBUGGING_INFO): New definition. (BTF_DEBUGGING_INFO): Likewise. * doc/tm.texi.in: Document the new macros. * doc/tm.texi: Regenerated. * toplev.c: Guard initialization of debug hooks. gcc/testsuite/ * gcc.dg/debug/btf/btf.exp: Do not run BTF testsuite if target does not support BTF format. Remove redundant check for AIX. * gcc.dg/debug/ctf/ctf.exp: Do not run CTF testsuite if target does not support CTF format. Remove redundant check for AIX. * lib/gcc-dg.exp: Remove redundant check for AIX. --- gcc/config/elfos.h | 8 ++++++++ gcc/doc/tm.texi | 26 ++++++++++++++++++++++++++ gcc/doc/tm.texi.in | 26 ++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/debug/btf/btf.exp | 16 +++++++++------- gcc/testsuite/gcc.dg/debug/ctf/ctf.exp | 16 +++++++++------- gcc/testsuite/lib/gcc-dg.exp | 1 - gcc/toplev.c | 11 +++++++++-- 7 files changed, 87 insertions(+), 17 deletions(-) (limited to 'gcc') diff --git a/gcc/config/elfos.h b/gcc/config/elfos.h index 7a736cc..e5cb487 100644 --- a/gcc/config/elfos.h +++ b/gcc/config/elfos.h @@ -68,6 +68,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define DWARF2_DEBUGGING_INFO 1 +/* All ELF targets can support CTF. */ + +#define CTF_DEBUGGING_INFO 1 + +/* All ELF targets can support BTF. */ + +#define BTF_DEBUGGING_INFO 1 + /* The GNU tools operate better with dwarf2, and it is required by some psABI's. Since we don't have any native tools to be compatible with, default to dwarf2. */ diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 3ad3944..c8f4abe 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -9947,6 +9947,8 @@ This describes how to specify debugging information. * File Names and DBX:: Macros controlling output of file names in DBX format. * DWARF:: Macros for DWARF format. * VMS Debug:: Macros for VMS debug format. +* CTF Debug:: Macros for CTF debug format. +* BTF Debug:: Macros for BTF debug format. @end menu @node All Debuggers @@ -10374,6 +10376,30 @@ behavior is controlled by @code{TARGET_OPTION_OPTIMIZATION} and @code{TARGET_OPTION_OVERRIDE}. @end defmac +@need 2000 +@node CTF Debug +@subsection Macros for CTF Debug Format + +@c prevent bad page break with this line +Here are macros for CTF debug format. + +@defmac CTF_DEBUGGING_INFO +Define this macro if GCC should produce debugging output in CTF debug +format in response to the @option{-gctf} option. +@end defmac + +@need 2000 +@node BTF Debug +@subsection Macros for BTF Debug Format + +@c prevent bad page break with this line +Here are macros for BTF debug format. + +@defmac BTF_DEBUGGING_INFO +Define this macro if GCC should produce debugging output in BTF debug +format in response to the @option{-gbtf} option. +@end defmac + @node Floating Point @section Cross Compilation and Floating Point @cindex cross compilation and floating point diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index f881cda..9c4b501 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -6613,6 +6613,8 @@ This describes how to specify debugging information. * File Names and DBX:: Macros controlling output of file names in DBX format. * DWARF:: Macros for DWARF format. * VMS Debug:: Macros for VMS debug format. +* CTF Debug:: Macros for CTF debug format. +* BTF Debug:: Macros for BTF debug format. @end menu @node All Debuggers @@ -6994,6 +6996,30 @@ behavior is controlled by @code{TARGET_OPTION_OPTIMIZATION} and @code{TARGET_OPTION_OVERRIDE}. @end defmac +@need 2000 +@node CTF Debug +@subsection Macros for CTF Debug Format + +@c prevent bad page break with this line +Here are macros for CTF debug format. + +@defmac CTF_DEBUGGING_INFO +Define this macro if GCC should produce debugging output in CTF debug +format in response to the @option{-gctf} option. +@end defmac + +@need 2000 +@node BTF Debug +@subsection Macros for BTF Debug Format + +@c prevent bad page break with this line +Here are macros for BTF debug format. + +@defmac BTF_DEBUGGING_INFO +Define this macro if GCC should produce debugging output in BTF debug +format in response to the @option{-gbtf} option. +@end defmac + @node Floating Point @section Cross Compilation and Floating Point @cindex cross compilation and floating point diff --git a/gcc/testsuite/gcc.dg/debug/btf/btf.exp b/gcc/testsuite/gcc.dg/debug/btf/btf.exp index e173515..15593fd 100644 --- a/gcc/testsuite/gcc.dg/debug/btf/btf.exp +++ b/gcc/testsuite/gcc.dg/debug/btf/btf.exp @@ -24,11 +24,6 @@ if { [istarget nvptx-*-*] } { return } -if { [istarget "powerpc-ibm-aix*"] } { - set torture_execute_xfail "powerpc-ibm-aix*" - return -} - # If a testcase doesn't have special options, use these. global DEFAULT_CFLAGS if ![info exists DEFAULT_CFLAGS] then { @@ -39,8 +34,15 @@ if ![info exists DEFAULT_CFLAGS] then { dg-init # Main loop. -dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\] ]] \ - "" $DEFAULT_CFLAGS +set comp_output [gcc_target_compile \ + "$srcdir/$subdir/../trivial.c" "trivial.S" assembly \ + "additional_flags=-gbtf"] +if { ! [string match "*: target system does not support the * debug format*" \ + $comp_output] } { + remove-build-file "trivial.S" + dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\] ]] \ + "" $DEFAULT_CFLAGS +} # All done. dg-finish diff --git a/gcc/testsuite/gcc.dg/debug/ctf/ctf.exp b/gcc/testsuite/gcc.dg/debug/ctf/ctf.exp index 0b650ed..7ad6723 100644 --- a/gcc/testsuite/gcc.dg/debug/ctf/ctf.exp +++ b/gcc/testsuite/gcc.dg/debug/ctf/ctf.exp @@ -24,11 +24,6 @@ if { [istarget nvptx-*-*] } { return } -if { [istarget "powerpc-ibm-aix*"] } { - set torture_execute_xfail "powerpc-ibm-aix*" - return -} - # If a testcase doesn't have special options, use these. global DEFAULT_CFLAGS if ![info exists DEFAULT_CFLAGS] then { @@ -39,8 +34,15 @@ if ![info exists DEFAULT_CFLAGS] then { dg-init # Main loop. -dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\] ]] \ - "" $DEFAULT_CFLAGS +set comp_output [gcc_target_compile \ + "$srcdir/$subdir/../trivial.c" "trivial.S" assembly \ + "additional_flags=-gctf"] +if { ! [string match "*: target system does not support the * debug format*" \ + $comp_output] } { + remove-build-file "trivial.S" + dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\] ]] \ + "" $DEFAULT_CFLAGS +} # All done. dg-finish diff --git a/gcc/testsuite/lib/gcc-dg.exp b/gcc/testsuite/lib/gcc-dg.exp index 81f4bb2..7edd070 100644 --- a/gcc/testsuite/lib/gcc-dg.exp +++ b/gcc/testsuite/lib/gcc-dg.exp @@ -641,7 +641,6 @@ proc gcc-dg-frontend-supports-ctf { target_compile trivial } { proc gcc-dg-target-supports-debug-format { target_compile trivial type } { global srcdir subdir - if {$type == "-gctf" && [istarget *-*-aix*]} { return 0 } set comp_output [$target_compile \ "$srcdir/$subdir/$trivial" "trivial.S" assembly \ "additional_flags=$type"] diff --git a/gcc/toplev.c b/gcc/toplev.c index 43f1f7d..14d1335 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -1463,8 +1463,15 @@ process_options (void) debug_hooks = &xcoff_debug_hooks; #endif #ifdef DWARF2_DEBUGGING_INFO - else if (dwarf_debuginfo_p () - || dwarf_based_debuginfo_p ()) + else if (dwarf_debuginfo_p ()) + debug_hooks = &dwarf2_debug_hooks; +#endif +#ifdef CTF_DEBUGGING_INFO + else if (ctf_debuginfo_p ()) + debug_hooks = &dwarf2_debug_hooks; +#endif +#ifdef BTF_DEBUGGING_INFO + else if (btf_debuginfo_p ()) debug_hooks = &dwarf2_debug_hooks; #endif #ifdef VMS_DEBUGGING_INFO -- cgit v1.1 From a113b14398f2a4ad2742e6e9c87e25cac60f263e Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Mon, 19 Jul 2021 15:44:02 -0400 Subject: analyzer: add svalue::can_have_associated_state_p [PR101503] PR analyzer/101503 reports an assertion failure due to an unexpected "UNKNOWN" value (due to using --param analyzer-max-svalue-depth=0). This patch fixes this by rejecting attempts to purge state involving unknown/poisoned svalues (in region_model::purge_state_involving), as these svalues should not have state associated with them - they are singletons w.r.t each type. To be more systematic about this, the patch also introduces a new svalue::can_have_associated_state_p which returns false for unknown/poisoned svalues, so that we can reject adding constraints or sm-state on them, or building various kinds of svalue in terms of them (e.g. unary ops, binary ops, etc). gcc/analyzer/ChangeLog: PR analyzer/101503 * constraint-manager.cc (constraint_manager::add_constraint): Use can_have_associated_state_p rather than testing for unknown. (constraint_manager::get_or_add_equiv_class): Likewise. * program-state.cc (sm_state_map::set_state): Likewise. (sm_state_map::impl_set_state): Add assertion. * region-model-manager.cc (region_model_manager::maybe_fold_unaryop): Handle poisoned values. (region_model_manager::maybe_fold_binop): Move handling of unknown values... (region_model_manager::get_or_create_binop): ...to here, and generalize to use can_have_associated_state_p. (region_model_manager::maybe_fold_sub_svalue): Use can_have_associated_state_p rather than testing for unknown. (region_model_manager::maybe_fold_repeated_svalue): Use unknown when the size or repeated value is "unknown"/"poisoned". * region-model.cc (region_model::purge_state_involving): Reject attempts to purge unknown/poisoned svalues, as these svalues should not have state associated with them. * svalue.cc (sub_svalue::sub_svalue): Assert that we're building on top of an svalue with can_have_associated_state_p. (repeated_svalue::repeated_svalue): Likewise. (bits_within_svalue::bits_within_svalue): Likewise. * svalue.h (svalue::can_have_associated_state_p): New. (unknown_svalue::can_have_associated_state_p): New. (poisoned_svalue::can_have_associated_state_p): New. (unaryop_svalue::unaryop_svalue): Assert that we're building on top of an svalue with can_have_associated_state_p. (binop_svalue::binop_svalue): Likewise. (widening_svalue::widening_svalue): Likewise. gcc/testsuite/ChangeLog: PR analyzer/101503 * gcc.dg/analyzer/pr101503.c: New test. Signed-off-by: David Malcolm --- gcc/analyzer/constraint-manager.cc | 8 ++++---- gcc/analyzer/program-state.cc | 6 ++++-- gcc/analyzer/region-model-manager.cc | 28 ++++++++++++++++++++-------- gcc/analyzer/region-model.cc | 2 ++ gcc/analyzer/svalue.cc | 4 ++++ gcc/analyzer/svalue.h | 16 ++++++++++++++++ gcc/testsuite/gcc.dg/analyzer/pr101503.c | 11 +++++++++++ 7 files changed, 61 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr101503.c (limited to 'gcc') diff --git a/gcc/analyzer/constraint-manager.cc b/gcc/analyzer/constraint-manager.cc index 5b5a9de..f59929a 100644 --- a/gcc/analyzer/constraint-manager.cc +++ b/gcc/analyzer/constraint-manager.cc @@ -833,9 +833,9 @@ constraint_manager::add_constraint (const svalue *lhs, lhs = lhs->unwrap_any_unmergeable (); rhs = rhs->unwrap_any_unmergeable (); - /* Nothing can be known about unknown values. */ - if (lhs->get_kind () == SK_UNKNOWN - || rhs->get_kind () == SK_UNKNOWN) + /* Nothing can be known about unknown/poisoned values. */ + if (!lhs->can_have_associated_state_p () + || !rhs->can_have_associated_state_p ()) /* Not a contradiction. */ return true; @@ -1175,7 +1175,7 @@ constraint_manager::get_or_add_equiv_class (const svalue *sval) { equiv_class_id result (-1); - gcc_assert (sval->get_kind () != SK_UNKNOWN); + gcc_assert (sval->can_have_associated_state_p ()); /* Convert all NULL pointers to (void *) to avoid state explosions involving all of the various (foo *)NULL vs (bar *)NULL. */ diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc index ccfe7b0..5bb8676 100644 --- a/gcc/analyzer/program-state.cc +++ b/gcc/analyzer/program-state.cc @@ -453,8 +453,8 @@ sm_state_map::set_state (region_model *model, if (model == NULL) return; - /* Reject attempts to set state on UNKNOWN. */ - if (sval->get_kind () == SK_UNKNOWN) + /* Reject attempts to set state on UNKNOWN/POISONED. */ + if (!sval->can_have_associated_state_p ()) return; equiv_class &ec = model->get_constraints ()->get_equiv_class (sval); @@ -492,6 +492,8 @@ sm_state_map::impl_set_state (const svalue *sval, if (get_state (sval, ext_state) == state) return false; + gcc_assert (sval->can_have_associated_state_p ()); + /* Special-case state 0 as the default value. */ if (state == 0) { diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc index 7a52a64..fccb93e 100644 --- a/gcc/analyzer/region-model-manager.cc +++ b/gcc/analyzer/region-model-manager.cc @@ -340,6 +340,13 @@ region_model_manager::maybe_fold_unaryop (tree type, enum tree_code op, /* Ops on "unknown" are also unknown. */ if (arg->get_kind () == SK_UNKNOWN) return get_or_create_unknown_svalue (type); + /* Likewise for "poisoned". */ + else if (const poisoned_svalue *poisoned_sval + = arg->dyn_cast_poisoned_svalue ()) + return get_or_create_poisoned_svalue (poisoned_sval->get_poison_kind (), + type); + + gcc_assert (arg->can_have_associated_state_p ()); switch (op) { @@ -615,12 +622,6 @@ region_model_manager::maybe_fold_binop (tree type, enum tree_code op, get_or_create_binop (size_type_node, op, binop->get_arg1 (), arg1)); - /* Ops on "unknown" are also unknown (unless we can use one of the - identities above). */ - if (arg0->get_kind () == SK_UNKNOWN - || arg1->get_kind () == SK_UNKNOWN) - return get_or_create_unknown_svalue (type); - /* etc. */ return NULL; @@ -641,6 +642,12 @@ region_model_manager::get_or_create_binop (tree type, enum tree_code op, if (const svalue *folded = maybe_fold_binop (type, op, arg0, arg1)) return folded; + /* Ops on "unknown"/"poisoned" are unknown (unless we were able to fold + it via an identity in maybe_fold_binop). */ + if (!arg0->can_have_associated_state_p () + || !arg1->can_have_associated_state_p ()) + return get_or_create_unknown_svalue (type); + binop_svalue::key_t key (type, op, arg0, arg1); if (binop_svalue **slot = m_binop_values_map.get (key)) return *slot; @@ -658,8 +665,8 @@ region_model_manager::maybe_fold_sub_svalue (tree type, const svalue *parent_svalue, const region *subregion) { - /* Subvalues of "unknown" are unknown. */ - if (parent_svalue->get_kind () == SK_UNKNOWN) + /* Subvalues of "unknown"/"poisoned" are unknown. */ + if (!parent_svalue->can_have_associated_state_p ()) return get_or_create_unknown_svalue (type); /* If we have a subregion of a zero-fill, it's zero. */ @@ -755,6 +762,11 @@ region_model_manager::maybe_fold_repeated_svalue (tree type, const svalue *outer_size, const svalue *inner_svalue) { + /* Repeated "unknown"/"poisoned" is unknown. */ + if (!outer_size->can_have_associated_state_p () + || !inner_svalue->can_have_associated_state_p ()) + return get_or_create_unknown_svalue (type); + /* If INNER_SVALUE is the same size as OUTER_SIZE, turn into simply a cast. */ if (tree cst_outer_num_bytes = outer_size->maybe_get_constant ()) diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 4fab1ef..6d02c60 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -1304,6 +1304,8 @@ void region_model::purge_state_involving (const svalue *sval, region_model_context *ctxt) { + if (!sval->can_have_associated_state_p ()) + return; m_store.purge_state_involving (sval, m_mgr); m_constraints->purge_state_involving (sval); m_dynamic_extents.purge_state_involving (sval); diff --git a/gcc/analyzer/svalue.cc b/gcc/analyzer/svalue.cc index 323df80..094c725 100644 --- a/gcc/analyzer/svalue.cc +++ b/gcc/analyzer/svalue.cc @@ -1109,6 +1109,7 @@ sub_svalue::sub_svalue (tree type, const svalue *parent_svalue, type), m_parent_svalue (parent_svalue), m_subregion (subregion) { + gcc_assert (parent_svalue->can_have_associated_state_p ()); } /* Implementation of svalue::dump_to_pp vfunc for sub_svalue. */ @@ -1165,6 +1166,8 @@ repeated_svalue::repeated_svalue (tree type, m_outer_size (outer_size), m_inner_svalue (inner_svalue) { + gcc_assert (outer_size->can_have_associated_state_p ()); + gcc_assert (inner_svalue->can_have_associated_state_p ()); } /* Implementation of svalue::dump_to_pp vfunc for repeated_svalue. */ @@ -1290,6 +1293,7 @@ bits_within_svalue::bits_within_svalue (tree type, m_bits (bits), m_inner_svalue (inner_svalue) { + gcc_assert (inner_svalue->can_have_associated_state_p ()); } /* Implementation of svalue::dump_to_pp vfunc for bits_within_svalue. */ diff --git a/gcc/analyzer/svalue.h b/gcc/analyzer/svalue.h index 1519889..debe439 100644 --- a/gcc/analyzer/svalue.h +++ b/gcc/analyzer/svalue.h @@ -160,6 +160,11 @@ public: virtual bool all_zeroes_p () const; + /* Can this svalue be involved in constraints and sm-state? + Most can, but UNKNOWN and POISONED svalues are singletons + per-type and thus it's meaningless for them to "have state". */ + virtual bool can_have_associated_state_p () const { return true; } + protected: svalue (complexity c, tree type) : m_complexity (c), m_type (type) @@ -319,6 +324,9 @@ public: maybe_fold_bits_within (tree type, const bit_range &subrange, region_model_manager *mgr) const FINAL OVERRIDE; + + /* Unknown values are singletons per-type, so can't have state. */ + bool can_have_associated_state_p () const FINAL OVERRIDE { return false; } }; /* An enum describing a particular kind of "poisoned" value. */ @@ -389,6 +397,9 @@ public: enum poison_kind get_poison_kind () const { return m_kind; } + /* Poisoned svalues are singletons per-type, so can't have state. */ + bool can_have_associated_state_p () const FINAL OVERRIDE { return false; } + private: enum poison_kind m_kind; }; @@ -602,6 +613,7 @@ public: unaryop_svalue (tree type, enum tree_code op, const svalue *arg) : svalue (complexity (arg), type), m_op (op), m_arg (arg) { + gcc_assert (arg->can_have_associated_state_p ()); } enum svalue_kind get_kind () const FINAL OVERRIDE { return SK_UNARYOP; } @@ -694,6 +706,8 @@ public: type), m_op (op), m_arg0 (arg0), m_arg1 (arg1) { + gcc_assert (arg0->can_have_associated_state_p ()); + gcc_assert (arg1->can_have_associated_state_p ()); } enum svalue_kind get_kind () const FINAL OVERRIDE { return SK_BINOP; } @@ -1135,6 +1149,8 @@ public: m_point (point.get_function_point ()), m_base_sval (base_sval), m_iter_sval (iter_sval) { + gcc_assert (base_sval->can_have_associated_state_p ()); + gcc_assert (iter_sval->can_have_associated_state_p ()); } enum svalue_kind get_kind () const FINAL OVERRIDE { return SK_WIDENING; } diff --git a/gcc/testsuite/gcc.dg/analyzer/pr101503.c b/gcc/testsuite/gcc.dg/analyzer/pr101503.c new file mode 100644 index 0000000..16faf6e --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pr101503.c @@ -0,0 +1,11 @@ +/* { dg-additional-options "--param analyzer-max-svalue-depth=0" } */ + +int val; + +int +fn (void) +{ + val = fn (); + + return 0; +} -- cgit v1.1 From 237ab3ee49e2f3110accfcc03b6c0df8b4889f15 Mon Sep 17 00:00:00 2001 From: Iain Sandoe Date: Thu, 8 Jul 2021 09:42:49 +0100 Subject: coroutines: Adjust outlined function names [PR95520]. The mechanism used to date for uniquing the coroutine helper functions (actor, destroy) was over-complicating things and leading to the noted PR and also difficulties in setting breakpoints on these functions (so this will help PR99215 as well). This implementation delegates the adjustment to the mangling to write_encoding() which necessitates some book-keeping so that it is possible to determine which of the coroutine helper names is to be mangled. Signed-off-by: Iain Sandoe PR c++/95520 - [coroutines] __builtin_FUNCTION() returns mangled .actor instead of original function name PR c++/95520 gcc/cp/ChangeLog: * coroutines.cc (struct coroutine_info): Add fields for actor and destroy function decls. (to_ramp): New. (coro_get_ramp_function): New. (coro_get_actor_function): New. (coro_get_destroy_function): New. (act_des_fn): Set up mapping between ramp, actor and destroy functions. (morph_fn_to_coro): Adjust interface to the builder for helper function decls. * cp-tree.h (DECL_ACTOR_FN, DECL_DESTROY_FN, DECL_RAMP_FN, JOIN_STR): New. * mangle.c (write_encoding): Handle coroutine helpers. (write_unqualified_name): Handle lambda coroutine helpers. gcc/testsuite/ChangeLog: * g++.dg/coroutines/pr95520.C: New test. --- gcc/cp/coroutines.cc | 87 ++++++++++++++++++++++++++----- gcc/cp/cp-tree.h | 22 ++++++++ gcc/cp/mangle.c | 19 ++++++- gcc/testsuite/g++.dg/coroutines/pr95520.C | 29 +++++++++++ 4 files changed, 143 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/g++.dg/coroutines/pr95520.C (limited to 'gcc') diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 712a5c0..47c79e5 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -82,11 +82,13 @@ static bool coro_promise_type_found_p (tree, location_t); struct GTY((for_user)) coroutine_info { tree function_decl; /* The original function decl. */ - tree promise_type; /* The cached promise type for this function. */ - tree handle_type; /* The cached coroutine handle for this function. */ - tree self_h_proxy; /* A handle instance that is used as the proxy for the - one that will eventually be allocated in the coroutine - frame. */ + tree actor_decl; /* The synthesized actor function. */ + tree destroy_decl; /* The synthesized destroy function. */ + tree promise_type; /* The cached promise type for this function. */ + tree handle_type; /* The cached coroutine handle for this function. */ + tree self_h_proxy; /* A handle instance that is used as the proxy for the + one that will eventually be allocated in the coroutine + frame. */ tree promise_proxy; /* Likewise, a proxy promise instance. */ tree return_void; /* The expression for p.return_void() if it exists. */ location_t first_coro_keyword; /* The location of the keyword that made this @@ -526,6 +528,46 @@ coro_promise_type_found_p (tree fndecl, location_t loc) return true; } +/* Map from actor or destroyer to ramp. */ +static GTY(()) hash_map *to_ramp; + +/* Given a tree that is an actor or destroy, find the ramp function. */ + +tree +coro_get_ramp_function (tree decl) +{ + if (!to_ramp) + return NULL_TREE; + tree *p = to_ramp->get (decl); + if (p) + return *p; + return NULL_TREE; +} + +/* Given the DECL for a ramp function (the user's original declaration) return + the actor function if it has been defined. */ + +tree +coro_get_actor_function (tree decl) +{ + if (coroutine_info *info = get_coroutine_info (decl)) + return info->actor_decl; + + return NULL_TREE; +} + +/* Given the DECL for a ramp function (the user's original declaration) return + the destroy function if it has been defined. */ + +tree +coro_get_destroy_function (tree decl) +{ + if (coroutine_info *info = get_coroutine_info (decl)) + return info->destroy_decl; + + return NULL_TREE; +} + /* These functions assumes that the caller has verified that the state for the decl has been initialized, we try to minimize work here. */ @@ -3979,15 +4021,23 @@ register_local_var_uses (tree *stmt, int *do_subtree, void *d) return NULL_TREE; } -/* Build, return FUNCTION_DECL node with its coroutine frame pointer argument - for either actor or destroy functions. */ +/* Build, return FUNCTION_DECL node based on ORIG with a type FN_TYPE which has + a single argument of type CORO_FRAME_PTR. Build the actor function if + ACTOR_P is true, otherwise the destroy. */ static tree -act_des_fn (tree orig, tree fn_type, tree coro_frame_ptr, const char* name) +coro_build_actor_or_destroy_function (tree orig, tree fn_type, + tree coro_frame_ptr, bool actor_p) { - tree fn_name = get_fn_local_identifier (orig, name); location_t loc = DECL_SOURCE_LOCATION (orig); - tree fn = build_lang_decl (FUNCTION_DECL, fn_name, fn_type); + tree fn + = build_lang_decl (FUNCTION_DECL, copy_node (DECL_NAME (orig)), fn_type); + + /* Allow for locating the ramp (original) function from this one. */ + if (!to_ramp) + to_ramp = hash_map::create_ggc (10); + to_ramp->put (fn, orig); + DECL_CONTEXT (fn) = DECL_CONTEXT (orig); DECL_SOURCE_LOCATION (fn) = loc; DECL_ARTIFICIAL (fn) = true; @@ -4021,6 +4071,17 @@ act_des_fn (tree orig, tree fn_type, tree coro_frame_ptr, const char* name) /* This is a coroutine component. */ DECL_COROUTINE_P (fn) = 1; + /* Set up a means to find out if a decl is one of the helpers and, if so, + which one. */ + if (coroutine_info *info = get_coroutine_info (orig)) + { + gcc_checking_assert ((actor_p && info->actor_decl == NULL_TREE) + || info->destroy_decl == NULL_TREE); + if (actor_p) + info->actor_decl = fn; + else + info->destroy_decl = fn; + } return fn; } @@ -4329,8 +4390,10 @@ morph_fn_to_coro (tree orig, tree *resumer, tree *destroyer) tree act_des_fn_ptr = build_pointer_type (act_des_fn_type); /* Declare the actor and destroyer function. */ - tree actor = act_des_fn (orig, act_des_fn_type, coro_frame_ptr, "actor"); - tree destroy = act_des_fn (orig, act_des_fn_type, coro_frame_ptr, "destroy"); + tree actor = coro_build_actor_or_destroy_function (orig, act_des_fn_type, + coro_frame_ptr, true); + tree destroy = coro_build_actor_or_destroy_function (orig, act_des_fn_type, + coro_frame_ptr, false); /* Construct the wrapped function body; we will analyze this to determine the requirements for the coroutine frame. */ diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index f4bcab5..ddf8f43 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -5166,6 +5166,21 @@ more_aggr_init_expr_args_p (const aggr_init_expr_arg_iterator *iter) #define DECL_COROUTINE_P(NODE) \ (LANG_DECL_FN_CHECK (DECL_COMMON_CHECK (NODE))->coroutine_p) +/* For a FUNCTION_DECL of a coroutine, this holds the ACTOR helper function + decl. */ +#define DECL_ACTOR_FN(NODE) \ + (coro_get_actor_function ((NODE))) + +/* For a FUNCTION_DECL of a coroutine, this holds the DESTROY helper function + decl. */ +#define DECL_DESTROY_FN(NODE) \ + (coro_get_destroy_function ((NODE))) + +/* For a FUNCTION_DECL of a coroutine helper (ACTOR or DESTROY), this points + back to the original (ramp) function. */ +#define DECL_RAMP_FN(NODE) \ + (coro_get_ramp_function (NODE)) + /* True for an OMP_ATOMIC that has dependent parameters. These are stored as an expr in operand 1, and integer_zero_node or clauses in operand 0. */ #define OMP_ATOMIC_DEPENDENT_P(NODE) \ @@ -5584,6 +5599,7 @@ extern GTY(()) vec *keyed_classes; #ifndef NO_DOT_IN_LABEL #define JOINER '.' +#define JOIN_STR "." #define AUTO_TEMP_NAME "_.tmp_" #define VFIELD_BASE ".vf" @@ -5595,6 +5611,7 @@ extern GTY(()) vec *keyed_classes; #ifndef NO_DOLLAR_IN_LABEL #define JOINER '$' +#define JOIN_STR "$" #define AUTO_TEMP_NAME "_$tmp_" #define VFIELD_BASE "$vf" @@ -5603,6 +5620,8 @@ extern GTY(()) vec *keyed_classes; #else /* NO_DOLLAR_IN_LABEL */ +#define JOIN_STR "_" + #define VTABLE_NAME "__vt_" #define VTABLE_NAME_P(ID_NODE) \ (!strncmp (IDENTIFIER_POINTER (ID_NODE), VTABLE_NAME, \ @@ -8292,6 +8311,9 @@ extern tree finish_co_yield_expr (location_t, tree); extern tree coro_validate_builtin_call (tree, tsubst_flags_t = tf_warning_or_error); extern bool morph_fn_to_coro (tree, tree *, tree *); +extern tree coro_get_actor_function (tree); +extern tree coro_get_destroy_function (tree); +extern tree coro_get_ramp_function (tree); /* Inline bodies. */ diff --git a/gcc/cp/mangle.c b/gcc/cp/mangle.c index ee14c2d..bf4abba 100644 --- a/gcc/cp/mangle.c +++ b/gcc/cp/mangle.c @@ -832,6 +832,18 @@ write_encoding (const tree decl) write_bare_function_type (fn_type, mangle_return_type_p (decl), d); + + /* If this is a coroutine helper, then append an appropriate string to + identify which. */ + if (tree ramp = DECL_RAMP_FN (decl)) + { + if (DECL_ACTOR_FN (ramp) == decl) + write_string (JOIN_STR "actor"); + else if (DECL_DESTROY_FN (ramp) == decl) + write_string (JOIN_STR "destroy"); + else + gcc_unreachable (); + } } } @@ -1423,9 +1435,12 @@ write_unqualified_name (tree decl) } else if (DECL_OVERLOADED_OPERATOR_P (decl)) { + tree t; + if (!(t = DECL_RAMP_FN (decl))) + t = decl; const char *mangled_name - = (ovl_op_info[DECL_ASSIGNMENT_OPERATOR_P (decl)] - [DECL_OVERLOADED_OPERATOR_CODE_RAW (decl)].mangled_name); + = (ovl_op_info[DECL_ASSIGNMENT_OPERATOR_P (t)] + [DECL_OVERLOADED_OPERATOR_CODE_RAW (t)].mangled_name); write_string (mangled_name); } else if (UDLIT_OPER_P (DECL_NAME (decl))) diff --git a/gcc/testsuite/g++.dg/coroutines/pr95520.C b/gcc/testsuite/g++.dg/coroutines/pr95520.C new file mode 100644 index 0000000..4849b07 --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/pr95520.C @@ -0,0 +1,29 @@ +// { dg-do run } +// { dg-output "coroutine name: MyFoo" } +#include +#include + +struct pt +{ + using handle_t = std::coroutine_handle; + auto get_return_object() noexcept { return handle_t::from_promise(*this); } + + std::suspend_never initial_suspend () const noexcept { return {}; } + std::suspend_never final_suspend () const noexcept { return {}; } + void return_void() const noexcept {} + void unhandled_exception() const noexcept {} +}; + +template <> struct std::coroutine_traits + { using promise_type = pt; }; + +static pt::handle_t MyFoo () +{ + printf ("coroutine name: %s\n", __builtin_FUNCTION()); + co_return; +} + +int main() +{ + MyFoo (); +} -- cgit v1.1 From 4e2fa18262660d7753f10f84cc49d85fac1c8798 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Mon, 19 Jul 2021 16:45:10 -0600 Subject: PR testsuite 101520 - gcc.target/powerpc/pr93658.c has excess errors gcc/testsuite/ChangeLog: * gcc.target/powerpc/pr93658.c: Suppress valid warnings. --- gcc/testsuite/gcc.target/powerpc/pr93658.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/powerpc/pr93658.c b/gcc/testsuite/gcc.target/powerpc/pr93658.c index d1010ee..8858ca8 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr93658.c +++ b/gcc/testsuite/gcc.target/powerpc/pr93658.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fstack-protector-strong -mdejagnu-cpu=power8" } */ +/* { dg-options "-O3 -Wno-stringop-overflow -fstack-protector-strong -mdejagnu-cpu=power8" } */ /* { dg-require-effective-target powerpc_vsx_ok } */ /* PR93658: Failure compiling this test is an infinite loop in LRA's -- cgit v1.1 From 476242fa5ceaa91393562814471c385ebfdd41f3 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Mon, 19 Jul 2021 11:53:05 -0700 Subject: compiler: avoid aliases in receiver types If a package declares a method on an alias type, the alias would be used in the export data. This would then trigger a compiler assertion on import: we should not be adding methods to aliases. Fix the problem by ensuring that receiver types do not use alias types. This seems preferable to consistently avoiding aliases in export data, as aliases can cross packages. And it's painful to try to patch this while writing the export data, as at that point all the types are known. Test case is https://golang.org/cl/335172. Fixes golang/go#47131 Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/335729 --- gcc/go/gofrontend/MERGE | 2 +- gcc/go/gofrontend/gogo.cc | 40 ++++++++++++++++++++++++++++++++++++ gcc/go/gofrontend/gogo.h | 8 ++++++++ gcc/go/gofrontend/types.cc | 51 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 4d0f44f..5323e18 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -adcf10890833026437a94da54934ce50c0018309 +920549b6382a2623538d31001271941f0e9e5a51 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/go/gofrontend/gogo.cc b/gcc/go/gofrontend/gogo.cc index 2872d2e..95b76bd 100644 --- a/gcc/go/gofrontend/gogo.cc +++ b/gcc/go/gofrontend/gogo.cc @@ -5763,6 +5763,26 @@ Function::check_labels() const } } +// Set the receiver type. This is used to remove aliases. + +void +Function::set_receiver_type(Type* rtype) +{ + Function_type* oft = this->type_; + Typed_identifier* rec = new Typed_identifier(oft->receiver()->name(), + rtype, + oft->receiver()->location()); + Typed_identifier_list* parameters = NULL; + if (oft->parameters() != NULL) + parameters = oft->parameters()->copy(); + Typed_identifier_list* results = NULL; + if (oft->results() != NULL) + results = oft->results()->copy(); + Function_type* nft = Type::make_function_type(rec, parameters, results, + oft->location()); + this->type_ = nft; +} + // Swap one function with another. This is used when building the // thunk we use to call a function which calls recover. It may not // work for any other case. @@ -7285,6 +7305,26 @@ Function_declaration::set_nointerface() this->pragmas_ |= GOPRAGMA_NOINTERFACE; } +// Set the receiver type. This is used to remove aliases. + +void +Function_declaration::set_receiver_type(Type* rtype) +{ + Function_type* oft = this->fntype_; + Typed_identifier* rec = new Typed_identifier(oft->receiver()->name(), + rtype, + oft->receiver()->location()); + Typed_identifier_list* parameters = NULL; + if (oft->parameters() != NULL) + parameters = oft->parameters()->copy(); + Typed_identifier_list* results = NULL; + if (oft->results() != NULL) + results = oft->results()->copy(); + Function_type* nft = Type::make_function_type(rec, parameters, results, + oft->location()); + this->fntype_ = nft; +} + // Import an inlinable function. This is used for an inlinable // function whose body is recorded in the export data. Parse the // export data into a Block and create a regular function using that diff --git a/gcc/go/gofrontend/gogo.h b/gcc/go/gofrontend/gogo.h index f4155a2..c49bc92 100644 --- a/gcc/go/gofrontend/gogo.h +++ b/gcc/go/gofrontend/gogo.h @@ -1724,6 +1724,10 @@ class Function set_is_referenced_by_inline() { this->is_referenced_by_inline_ = true; } + // Set the receiver type. This is used to remove aliases. + void + set_receiver_type(Type* rtype); + // Swap with another function. Used only for the thunk which calls // recover. void @@ -1990,6 +1994,10 @@ class Function_declaration set_is_on_inlinable_list() { this->is_on_inlinable_list_ = true; } + // Set the receiver type. This is used to remove aliases. + void + set_receiver_type(Type* rtype); + // Import the function body, creating a function. void import_function_body(Gogo*, Named_object*); diff --git a/gcc/go/gofrontend/types.cc b/gcc/go/gofrontend/types.cc index d08cbc9..ab7166b 100644 --- a/gcc/go/gofrontend/types.cc +++ b/gcc/go/gofrontend/types.cc @@ -10416,6 +10416,57 @@ Named_type::finalize_methods(Gogo* gogo) return; } + // Remove any aliases in the local method receiver types. + Bindings* methods = this->local_methods_; + if (methods != NULL) + { + for (Bindings::const_declarations_iterator p = + methods->begin_declarations(); + p != methods->end_declarations(); + ++p) + { + Named_object* no = p->second; + Function_type* fntype; + if (no->is_function()) + fntype = no->func_value()->type(); + else if (no->is_function_declaration()) + fntype = no->func_declaration_value()->type(); + else + { + go_assert(saw_errors()); + continue; + } + + Type* rtype = fntype->receiver()->type(); + bool is_pointer = false; + Type* pt = rtype->points_to(); + if (pt != NULL) + { + rtype = pt; + is_pointer = true; + } + if (rtype->named_type() != this) + { + if (rtype->unalias() != this) + { + go_assert(saw_errors()); + continue; + } + + rtype = this; + if (is_pointer) + rtype = Type::make_pointer_type(rtype); + + if (no->is_function()) + no->func_value()->set_receiver_type(rtype); + else if (no->is_function_declaration()) + no->func_declaration_value()->set_receiver_type(rtype); + else + go_unreachable(); + } + } + } + Type::finalize_methods(gogo, this, this->location_, &this->all_methods_); } -- cgit v1.1 From 21ea2f9320d31d3d925031a8ba189d9b19e52bc1 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Tue, 20 Jul 2021 00:16:38 +0000 Subject: Daily bump. --- gcc/ChangeLog | 82 +++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 34 ++++++++++++++++++++ gcc/cp/ChangeLog | 18 +++++++++++ gcc/testsuite/ChangeLog | 49 +++++++++++++++++++++++++++++ 5 files changed, 184 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c7053c0..0d931bf 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,85 @@ +2021-07-19 Indu Bhagat + + * config/elfos.h (CTF_DEBUGGING_INFO): New definition. + (BTF_DEBUGGING_INFO): Likewise. + * doc/tm.texi.in: Document the new macros. + * doc/tm.texi: Regenerated. + * toplev.c: Guard initialization of debug hooks. + +2021-07-19 Indu Bhagat + + * flags.h (ctf_debuginfo_p): New function declaration. + * opts.c (ctf_debuginfo_p): New function definition. + +2021-07-19 Andrew Stubbs + + PR target/100208 + * config/gcn/gcn-hsa.h (DRIVER_SELF_SPECS): New. + (ASM_SPEC): Set -mattr for xnack and sram-ecc. + * config/gcn/gcn-opts.h (enum sram_ecc_type): New. + * config/gcn/gcn-valu.md: Add a warning comment. + * config/gcn/gcn.c (gcn_option_override): Add "sorry" for -mxnack. + (output_file_start): Add xnack and sram-ecc state to ".amdgcn_target". + * config/gcn/gcn.md: Add a warning comment. + * config/gcn/gcn.opt: Add -mxnack and -msram-ecc. + * config/gcn/mkoffload.c (EF_AMDGPU_MACH_AMDGCN_GFX908): Remove + SRAM-ECC flag. + (EF_AMDGPU_XNACK): New. + (EF_AMDGPU_SRAM_ECC): New. + (elf_flags): New. + (copy_early_debug_info): Use elf_flags. + (main): Handle -mxnack and -msram-ecc options. + * doc/invoke.texi: Document -mxnack and -msram-ecc. + +2021-07-19 Andrew Pinski + + PR target/101205 + * config/aarch64/aarch64.md (csneg3_uxtw_insn): Rename to ... + (*cs3_uxtw_insn4): and extend to NEG_NOT. + +2021-07-19 Richard Biener + + PR tree-optimization/101505 + * tree-vect-patterns.c (vect_determine_precisions): Walk + PHIs also for loop vectorization. + +2021-07-19 Richard Biener + + * gimple.h (gimple_expr_type): Remove. + * doc/gimple.texi: Remove gimple_expr_type documentation. + +2021-07-19 Richard Biener + + * tree-ssa-sccvn.c (vn_reference_eq): Handle NULL vr->type. + (ao_ref_init_from_vn_reference): Likewise. + (fully_constant_reference): Likewise. + (vn_reference_lookup_call): Do not set vr->type to random + values. + * tree-ssa-pre.c (compute_avail): Do not try to PRE calls + without a value. + * tree-vect-generic.c (expand_vector_piecewise): Pass in + whether we expanded parallel. + (expand_vector_parallel): Adjust. + (expand_vector_addition): Likewise. + (expand_vector_comparison): Likewise. + (expand_vector_operation): Likewise. + (expand_vector_scalar_condition): Likewise. + (expand_vector_conversion): Likewise. + +2021-07-19 Richard Biener + + * tree-vrp.c (register_edge_assert_for_2): Use the + type from the LHS. + (vrp_folder::fold_predicate_in): Likewise. + * vr-values.c (gimple_assign_nonzero_p): Likewise. + (vr_values::extract_range_from_comparison): Likewise. + (vr_values::extract_range_from_ubsan_builtin): Use the + type of the first operand. + (vr_values::extract_range_basic): Push down type + computation, use the appropriate LHS. + (vr_values::extract_range_from_assignment): Use the + type of the LHS. + 2021-07-18 H.J. Lu PR target/101492 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 844ac74..4d9def3 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210719 +20210720 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 7b63636..f32fe08 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,37 @@ +2021-07-19 David Malcolm + + PR analyzer/101503 + * constraint-manager.cc (constraint_manager::add_constraint): Use + can_have_associated_state_p rather than testing for unknown. + (constraint_manager::get_or_add_equiv_class): Likewise. + * program-state.cc (sm_state_map::set_state): Likewise. + (sm_state_map::impl_set_state): Add assertion. + * region-model-manager.cc + (region_model_manager::maybe_fold_unaryop): Handle poisoned + values. + (region_model_manager::maybe_fold_binop): Move handling of unknown + values... + (region_model_manager::get_or_create_binop): ...to here, and + generalize to use can_have_associated_state_p. + (region_model_manager::maybe_fold_sub_svalue): Use + can_have_associated_state_p rather than testing for unknown. + (region_model_manager::maybe_fold_repeated_svalue): Use unknown + when the size or repeated value is "unknown"/"poisoned". + * region-model.cc (region_model::purge_state_involving): Reject + attempts to purge unknown/poisoned svalues, as these svalues + should not have state associated with them. + * svalue.cc (sub_svalue::sub_svalue): Assert that we're building + on top of an svalue with can_have_associated_state_p. + (repeated_svalue::repeated_svalue): Likewise. + (bits_within_svalue::bits_within_svalue): Likewise. + * svalue.h (svalue::can_have_associated_state_p): New. + (unknown_svalue::can_have_associated_state_p): New. + (poisoned_svalue::can_have_associated_state_p): New. + (unaryop_svalue::unaryop_svalue): Assert that we're building on + top of an svalue with can_have_associated_state_p. + (binop_svalue::binop_svalue): Likewise. + (widening_svalue::widening_svalue): Likewise. + 2021-07-16 David Malcolm * analyzer.h (enum access_direction): New. diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 0c4734f..bb46293 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,21 @@ +2021-07-19 Iain Sandoe + + PR c++/95520 + * coroutines.cc (struct coroutine_info): Add fields for + actor and destroy function decls. + (to_ramp): New. + (coro_get_ramp_function): New. + (coro_get_actor_function): New. + (coro_get_destroy_function): New. + (act_des_fn): Set up mapping between ramp, actor and + destroy functions. + (morph_fn_to_coro): Adjust interface to the builder for + helper function decls. + * cp-tree.h (DECL_ACTOR_FN, DECL_DESTROY_FN, DECL_RAMP_FN, + JOIN_STR): New. + * mangle.c (write_encoding): Handle coroutine helpers. + (write_unqualified_name): Handle lambda coroutine helpers. + 2021-07-16 Patrick Palka PR c++/101233 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d726e73..501f5ab 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,52 @@ +2021-07-19 Martin Sebor + + * gcc.target/powerpc/pr93658.c: Suppress valid warnings. + +2021-07-19 Iain Sandoe + + PR c++/95520 + * g++.dg/coroutines/pr95520.C: New test. + +2021-07-19 David Malcolm + + PR analyzer/101503 + * gcc.dg/analyzer/pr101503.c: New test. + +2021-07-19 Indu Bhagat + + * gcc.dg/debug/btf/btf.exp: Do not run BTF testsuite if target does not + support BTF format. Remove redundant check for AIX. + * gcc.dg/debug/ctf/ctf.exp: Do not run CTF testsuite if target does not + support CTF format. Remove redundant check for AIX. + * lib/gcc-dg.exp: Remove redundant check for AIX. + +2021-07-19 Andrew Stubbs + + PR target/100208 + * gcc.target/gcn/sram-ecc-1.c: New test. + * gcc.target/gcn/sram-ecc-2.c: New test. + * gcc.target/gcn/sram-ecc-3.c: New test. + * gcc.target/gcn/sram-ecc-4.c: New test. + * gcc.target/gcn/sram-ecc-5.c: New test. + * gcc.target/gcn/sram-ecc-6.c: New test. + * gcc.target/gcn/sram-ecc-7.c: New test. + * gcc.target/gcn/sram-ecc-8.c: New test. + +2021-07-19 Andrew Pinski + + PR target/101205 + * gcc.target/aarch64/csinv-neg-1.c: New test. + +2021-07-19 Marek Polacek + + DR 2126 + * g++.dg/cpp0x/constexpr-temp2.C: New test. + +2021-07-19 Richard Biener + + PR tree-optimization/101505 + * gcc.dg/vect/pr101505.c: New testcase. + 2021-07-18 H.J. Lu PR target/101492 -- cgit v1.1 From a1d27560770818c514ad1ad6683f89e1e1bcd0ec Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Mon, 19 Jul 2021 20:49:17 -0500 Subject: vect: Recog mul_highpart pattern [PR100696] This patch is to extend the existing pattern mulhs handlings to cover normal multiply highpart pattern recognization, it introduces one new internal function IFN_MULH for 1:1 map to [su]mul_highpart optab. Since it covers MULT_HIGHPART_EXPR with optab support, i386 part change is to ensure it follows the consistent costing path. Bootstrapped & regtested on powerpc64le-linux-gnu P9, x86_64-redhat-linux and aarch64-linux-gnu. gcc/ChangeLog: PR tree-optimization/100696 * internal-fn.c (first_commutative_argument): Add info for IFN_MULH. * internal-fn.def (IFN_MULH): New internal function. * tree-vect-patterns.c (vect_recog_mulhs_pattern): Add support to recog normal multiply highpart as IFN_MULH. * config/i386/i386.c (ix86_add_stmt_cost): Adjust for combined function CFN_MULH. gcc/testsuite/ChangeLog: PR tree-optimization/100696 * gcc.target/i386/pr100637-3w.c: Adjust for mul_highpart recog. --- gcc/config/i386/i386.c | 3 +++ gcc/internal-fn.c | 1 + gcc/internal-fn.def | 2 ++ gcc/testsuite/gcc.target/i386/pr100637-3w.c | 6 ++--- gcc/tree-vect-patterns.c | 38 +++++++++++++++++++++-------- 5 files changed, 37 insertions(+), 13 deletions(-) (limited to 'gcc') diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 8481693..ff96134 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -22568,6 +22568,9 @@ ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count, mode == SFmode ? ix86_cost->fmass : ix86_cost->fmasd); break; + case CFN_MULH: + stmt_cost = ix86_multiplication_cost (ix86_cost, mode); + break; default: break; } diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index cd5e63f..1360a00 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -3703,6 +3703,7 @@ first_commutative_argument (internal_fn fn) case IFN_FNMS: case IFN_AVG_FLOOR: case IFN_AVG_CEIL: + case IFN_MULH: case IFN_MULHS: case IFN_MULHRS: case IFN_FMIN: diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index a7003d5..3ac9ae6 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -169,6 +169,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first, DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first, savg_ceil, uavg_ceil, binary) +DEF_INTERNAL_SIGNED_OPTAB_FN (MULH, ECF_CONST | ECF_NOTHROW, first, + smul_highpart, umul_highpart, binary) DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | ECF_NOTHROW, first, smulhs, umulhs, binary) DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, diff --git a/gcc/testsuite/gcc.target/i386/pr100637-3w.c b/gcc/testsuite/gcc.target/i386/pr100637-3w.c index b951f30..4ea467b 100644 --- a/gcc/testsuite/gcc.target/i386/pr100637-3w.c +++ b/gcc/testsuite/gcc.target/i386/pr100637-3w.c @@ -1,6 +1,6 @@ /* PR target/100637 */ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -msse4" } */ +/* { dg-options "-O2 -ftree-vectorize -msse4 -fno-vect-cost-model" } */ short r[2], a[2], b[2]; unsigned short ur[2], ua[2], ub[2]; @@ -13,7 +13,7 @@ void mulh (void) r[i] = ((int) a[i] * b[i]) >> 16; } -/* { dg-final { scan-assembler "pmulhw" { xfail *-*-* } } } */ +/* { dg-final { scan-assembler "pmulhw" } } */ void mulhu (void) { @@ -23,7 +23,7 @@ void mulhu (void) ur[i] = ((unsigned int) ua[i] * ub[i]) >> 16; } -/* { dg-final { scan-assembler "pmulhuw" { xfail *-*-* } } } */ +/* { dg-final { scan-assembler "pmulhuw" } } */ void mulhrs (void) { diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 44f6c9b..70bb751 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1934,8 +1934,15 @@ vect_recog_over_widening_pattern (vec_info *vinfo, 1) Multiply high with scaling TYPE res = ((TYPE) a * (TYPE) b) >> c; + Here, c is bitsize (TYPE) / 2 - 1. + 2) ... or also with rounding TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1; + Here, d is bitsize (TYPE) / 2 - 2. + + 3) Normal multiply high + TYPE res = ((TYPE) a * (TYPE) b) >> e; + Here, e is bitsize (TYPE) / 2. where only the bottom half of res is used. */ @@ -1980,7 +1987,6 @@ vect_recog_mulhs_pattern (vec_info *vinfo, stmt_vec_info mulh_stmt_info; tree scale_term; internal_fn ifn; - unsigned int expect_offset; /* Check for the presence of the rounding term. */ if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR) @@ -2029,25 +2035,37 @@ vect_recog_mulhs_pattern (vec_info *vinfo, /* Get the scaling term. */ scale_term = gimple_assign_rhs2 (plus_input_stmt); + /* Check that the scaling factor is correct. */ + if (TREE_CODE (scale_term) != INTEGER_CST) + return NULL; + + /* Check pattern 2). */ + if (wi::to_widest (scale_term) + target_precision + 2 + != TYPE_PRECISION (lhs_type)) + return NULL; - expect_offset = target_precision + 2; ifn = IFN_MULHRS; } else { mulh_stmt_info = rshift_input_stmt_info; scale_term = gimple_assign_rhs2 (last_stmt); + /* Check that the scaling factor is correct. */ + if (TREE_CODE (scale_term) != INTEGER_CST) + return NULL; - expect_offset = target_precision + 1; - ifn = IFN_MULHS; + /* Check for pattern 1). */ + if (wi::to_widest (scale_term) + target_precision + 1 + == TYPE_PRECISION (lhs_type)) + ifn = IFN_MULHS; + /* Check for pattern 3). */ + else if (wi::to_widest (scale_term) + target_precision + == TYPE_PRECISION (lhs_type)) + ifn = IFN_MULH; + else + return NULL; } - /* Check that the scaling factor is correct. */ - if (TREE_CODE (scale_term) != INTEGER_CST - || wi::to_widest (scale_term) + expect_offset - != TYPE_PRECISION (lhs_type)) - return NULL; - /* Check whether the scaling input term can be seen as two widened inputs multiplied together. */ vect_unpromoted_value unprom_mult[2]; -- cgit v1.1 From 1c0d49b9ce9ab011fa77d4eac689fa1a038123ef Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Mon, 19 Jul 2021 20:50:13 -0500 Subject: rs6000: Support [u]mul3_highpart for vector This patch is to make Power10 newly introduced vector multiply high (part) instructions exploited in vectorized loops, it renames existing define_insns as standard pattern names. It depends on that patch which enables vectorizer to recog mul_highpart. gcc/ChangeLog: * config/rs6000/vsx.md (mulhs_): Rename to... (smul3_highpart): ... this. (mulhu_): Rename to... (umul3_highpart): ... this. * config/rs6000/rs6000-builtin.def (MULHS_V2DI, MULHS_V4SI, MULHU_V2DI, MULHU_V4SI): Adjust. gcc/testsuite/ChangeLog: * gcc.target/powerpc/mul-vectorize-3.c: New test. * gcc.target/powerpc/mul-vectorize-4.c: New test. --- gcc/config/rs6000/rs6000-builtin.def | 8 +++--- gcc/config/rs6000/vsx.md | 4 +-- gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c | 32 +++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c | 33 ++++++++++++++++++++++ 4 files changed, 71 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index a0dfefc..9dbf16f 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -3035,10 +3035,10 @@ BU_P10V_AV_2 (MODS_V2DI, "vmodsd", CONST, modv2di3) BU_P10V_AV_2 (MODS_V4SI, "vmodsw", CONST, modv4si3) BU_P10V_AV_2 (MODU_V2DI, "vmodud", CONST, umodv2di3) BU_P10V_AV_2 (MODU_V4SI, "vmoduw", CONST, umodv4si3) -BU_P10V_AV_2 (MULHS_V2DI, "vmulhsd", CONST, mulhs_v2di) -BU_P10V_AV_2 (MULHS_V4SI, "vmulhsw", CONST, mulhs_v4si) -BU_P10V_AV_2 (MULHU_V2DI, "vmulhud", CONST, mulhu_v2di) -BU_P10V_AV_2 (MULHU_V4SI, "vmulhuw", CONST, mulhu_v4si) +BU_P10V_AV_2 (MULHS_V2DI, "vmulhsd", CONST, smulv2di3_highpart) +BU_P10V_AV_2 (MULHS_V4SI, "vmulhsw", CONST, smulv4si3_highpart) +BU_P10V_AV_2 (MULHU_V2DI, "vmulhud", CONST, umulv2di3_highpart) +BU_P10V_AV_2 (MULHU_V4SI, "vmulhuw", CONST, umulv4si3_highpart) BU_P10V_AV_2 (MULLD_V2DI, "vmulld", CONST, mulv2di3) BU_P10V_VSX_1 (VXXSPLTIW_V4SI, "vxxspltiw_v4si", CONST, xxspltiw_v4si) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index f622873..6f6fc0b 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -6351,7 +6351,7 @@ [(set_attr "type" "vecdiv") (set_attr "size" "")]) -(define_insn "mulhs_" +(define_insn "smul3_highpart" [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") (mult:VIlong (ashiftrt (match_operand:VIlong 1 "vsx_register_operand" "v") @@ -6363,7 +6363,7 @@ "vmulhs %0,%1,%2" [(set_attr "type" "veccomplex")]) -(define_insn "mulhu_" +(define_insn "umul3_highpart" [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") (us_mult:VIlong (ashiftrt (match_operand:VIlong 1 "vsx_register_operand" "v") diff --git a/gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c new file mode 100644 index 0000000..2c89c0f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-3.c @@ -0,0 +1,32 @@ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */ + +/* Test vectorizer can exploit ISA 3.1 instructions Vector Multiply + High Signed/Unsigned Word for both signed and unsigned int high part + multiplication. */ + +#define N 128 + +extern signed int si_a[N], si_b[N], si_c[N]; +extern unsigned int ui_a[N], ui_b[N], ui_c[N]; + +typedef signed long long sLL; +typedef unsigned long long uLL; + +__attribute__ ((noipa)) void +test_si () +{ + for (int i = 0; i < N; i++) + si_c[i] = ((sLL) si_a[i] * (sLL) si_b[i]) >> 32; +} + +__attribute__ ((noipa)) void +test_ui () +{ + for (int i = 0; i < N; i++) + ui_c[i] = ((uLL) ui_a[i] * (uLL) ui_b[i]) >> 32; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ +/* { dg-final { scan-assembler-times {\mvmulhsw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulhuw\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c new file mode 100644 index 0000000..265e758 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-4.c @@ -0,0 +1,33 @@ +/* { dg-require-effective-target power10_ok } */ +/* { dg-require-effective-target int128 } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */ + +/* Test vectorizer can exploit ISA 3.1 instructions Vector Multiply + High Signed/Unsigned Doubleword for both signed and unsigned long + long high part multiplication. */ + +#define N 128 + +extern signed long long sll_a[N], sll_b[N], sll_c[N]; +extern unsigned long long ull_a[N], ull_b[N], ull_c[N]; + +typedef signed __int128 s128; +typedef unsigned __int128 u128; + +__attribute__ ((noipa)) void +test_sll () +{ + for (int i = 0; i < N; i++) + sll_c[i] = ((s128) sll_a[i] * (s128) sll_b[i]) >> 64; +} + +__attribute__ ((noipa)) void +test_ull () +{ + for (int i = 0; i < N; i++) + ull_c[i] = ((u128) ull_a[i] * (u128) ull_b[i]) >> 64; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ +/* { dg-final { scan-assembler-times {\mvmulhsd\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvmulhud\M} 1 } } */ -- cgit v1.1 From e695f0101a8cacbc29353c5a000731e50b2627e6 Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Tue, 20 Jul 2021 11:02:27 +0800 Subject: RISC-V: Detect python and pick best one for calling multilib-generator gcc/ * config.gcc (riscv*-*-*): Detect which python is available. --- gcc/config.gcc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config.gcc b/gcc/config.gcc index 93e2b32..3df9b52 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -4730,9 +4730,10 @@ case "${target}" in echo "--with-multilib-list= can't used with --with-multilib-generator= at same time" 1>&2 exit 1 fi + PYTHON=`which python || which python3 || which python2` case "${target}" in riscv*-*-elf*) - if ${srcdir}/config/riscv/multilib-generator \ + if ${PYTHON} ${srcdir}/config/riscv/multilib-generator \ `echo ${with_multilib_generator} | sed 's/;/ /g'`\ > t-multilib-config; then -- cgit v1.1 From 6d4da4aeef5b20f7f9693ddc27d26740d0dbe36c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 20 Jul 2021 06:15:16 +0200 Subject: i386: Remove atomic_storedi_fpu and atomic_loaddi_fpu peepholes [PR100182] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These patterns result in non-atomic sequence. 2021-07-21 Uroš Bizjak gcc/ PR target/100182 * config/i386/sync.md (define_peephole2 atomic_storedi_fpu): Remove. (define_peephole2 atomic_loaddi_fpu): Ditto. gcc/testsuite/ PR target/100182 * gcc.target/i386/pr71245-1.c: Remove. * gcc.target/i386/pr71245-2.c: Ditto. --- gcc/config/i386/sync.md | 152 ------------------------------ gcc/testsuite/gcc.target/i386/pr71245-1.c | 22 ----- gcc/testsuite/gcc.target/i386/pr71245-2.c | 22 ----- 3 files changed, 196 deletions(-) delete mode 100644 gcc/testsuite/gcc.target/i386/pr71245-1.c delete mode 100644 gcc/testsuite/gcc.target/i386/pr71245-2.c (limited to 'gcc') diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index 7913b91..05a8352 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -219,82 +219,6 @@ DONE; }) -(define_peephole2 - [(set (match_operand:DF 0 "fp_register_operand") - (unspec:DF [(match_operand:DI 1 "memory_operand")] - UNSPEC_FILD_ATOMIC)) - (set (match_operand:DI 2 "memory_operand") - (unspec:DI [(match_dup 0)] - UNSPEC_FIST_ATOMIC)) - (set (match_operand:DF 3 "sse_reg_operand") - (match_operand:DF 4 "memory_operand"))] - "!TARGET_64BIT - && peep2_reg_dead_p (2, operands[0]) - && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))" - [(set (match_dup 3) (match_dup 5)) - (set (match_dup 4) (match_dup 3))] - "operands[5] = gen_lowpart (DFmode, operands[1]);") - -(define_peephole2 - [(set (match_operand:DF 0 "fp_register_operand") - (unspec:DF [(match_operand:DI 1 "memory_operand")] - UNSPEC_FILD_ATOMIC)) - (set (match_operand:DI 2 "memory_operand") - (unspec:DI [(match_dup 0)] - UNSPEC_FIST_ATOMIC)) - (set (mem:BLK (scratch:SI)) - (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE)) - (set (match_operand:DF 3 "sse_reg_operand") - (match_operand:DF 4 "memory_operand"))] - "!TARGET_64BIT - && peep2_reg_dead_p (2, operands[0]) - && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))" - [(const_int 0)] -{ - emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1])); - emit_move_insn (operands[4], operands[3]); - emit_insn (gen_memory_blockage ()); - DONE; -}) - -(define_peephole2 - [(set (match_operand:DF 0 "sse_reg_operand") - (unspec:DF [(match_operand:DI 1 "memory_operand")] - UNSPEC_LDX_ATOMIC)) - (set (match_operand:DI 2 "memory_operand") - (unspec:DI [(match_dup 0)] - UNSPEC_STX_ATOMIC)) - (set (match_operand:DF 3 "sse_reg_operand") - (match_operand:DF 4 "memory_operand"))] - "!TARGET_64BIT - && peep2_reg_dead_p (2, operands[0]) - && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))" - [(set (match_dup 3) (match_dup 5)) - (set (match_dup 4) (match_dup 3))] - "operands[5] = gen_lowpart (DFmode, operands[1]);") - -(define_peephole2 - [(set (match_operand:DF 0 "sse_reg_operand") - (unspec:DF [(match_operand:DI 1 "memory_operand")] - UNSPEC_LDX_ATOMIC)) - (set (match_operand:DI 2 "memory_operand") - (unspec:DI [(match_dup 0)] - UNSPEC_STX_ATOMIC)) - (set (mem:BLK (scratch:SI)) - (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE)) - (set (match_operand:DF 3 "sse_reg_operand") - (match_operand:DF 4 "memory_operand"))] - "!TARGET_64BIT - && peep2_reg_dead_p (2, operands[0]) - && rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))" - [(const_int 0)] -{ - emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1])); - emit_move_insn (operands[4], operands[3]); - emit_insn (gen_memory_blockage ()); - DONE; -}) - (define_expand "atomic_store" [(set (match_operand:ATOMIC 0 "memory_operand") (unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand") @@ -384,82 +308,6 @@ DONE; }) -(define_peephole2 - [(set (match_operand:DF 0 "memory_operand") - (match_operand:DF 1 "any_fp_register_operand")) - (set (match_operand:DF 2 "fp_register_operand") - (unspec:DF [(match_operand:DI 3 "memory_operand")] - UNSPEC_FILD_ATOMIC)) - (set (match_operand:DI 4 "memory_operand") - (unspec:DI [(match_dup 2)] - UNSPEC_FIST_ATOMIC))] - "!TARGET_64BIT - && peep2_reg_dead_p (3, operands[2]) - && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 5) (match_dup 1))] - "operands[5] = gen_lowpart (DFmode, operands[4]);") - -(define_peephole2 - [(set (match_operand:DF 0 "memory_operand") - (match_operand:DF 1 "any_fp_register_operand")) - (set (mem:BLK (scratch:SI)) - (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE)) - (set (match_operand:DF 2 "fp_register_operand") - (unspec:DF [(match_operand:DI 3 "memory_operand")] - UNSPEC_FILD_ATOMIC)) - (set (match_operand:DI 4 "memory_operand") - (unspec:DI [(match_dup 2)] - UNSPEC_FIST_ATOMIC))] - "!TARGET_64BIT - && peep2_reg_dead_p (4, operands[2]) - && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))" - [(const_int 0)] -{ - emit_move_insn (operands[0], operands[1]); - emit_insn (gen_memory_blockage ()); - emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]); - DONE; -}) - -(define_peephole2 - [(set (match_operand:DF 0 "memory_operand") - (match_operand:DF 1 "any_fp_register_operand")) - (set (match_operand:DF 2 "sse_reg_operand") - (unspec:DF [(match_operand:DI 3 "memory_operand")] - UNSPEC_LDX_ATOMIC)) - (set (match_operand:DI 4 "memory_operand") - (unspec:DI [(match_dup 2)] - UNSPEC_STX_ATOMIC))] - "!TARGET_64BIT - && peep2_reg_dead_p (3, operands[2]) - && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 5) (match_dup 1))] - "operands[5] = gen_lowpart (DFmode, operands[4]);") - -(define_peephole2 - [(set (match_operand:DF 0 "memory_operand") - (match_operand:DF 1 "any_fp_register_operand")) - (set (mem:BLK (scratch:SI)) - (unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE)) - (set (match_operand:DF 2 "sse_reg_operand") - (unspec:DF [(match_operand:DI 3 "memory_operand")] - UNSPEC_LDX_ATOMIC)) - (set (match_operand:DI 4 "memory_operand") - (unspec:DI [(match_dup 2)] - UNSPEC_STX_ATOMIC))] - "!TARGET_64BIT - && peep2_reg_dead_p (4, operands[2]) - && rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))" - [(const_int 0)] -{ - emit_move_insn (operands[0], operands[1]); - emit_insn (gen_memory_blockage ()); - emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]); - DONE; -}) - ;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC ;; operations. But the fix_trunc patterns want way more setup than we want ;; to provide. Note that the scratch is DFmode instead of XFmode in order diff --git a/gcc/testsuite/gcc.target/i386/pr71245-1.c b/gcc/testsuite/gcc.target/i386/pr71245-1.c deleted file mode 100644 index 02c0dcb..0000000 --- a/gcc/testsuite/gcc.target/i386/pr71245-1.c +++ /dev/null @@ -1,22 +0,0 @@ -/* PR target/71245 */ -/* { dg-do compile { target ia32 } } */ -/* { dg-options "-O2 -march=pentium -mno-sse -mfpmath=387" } */ - -typedef union -{ - unsigned long long ll; - double d; -} u_t; - -u_t d = { .d = 5.0 }; - -void foo_d (void) -{ - u_t tmp; - - tmp.ll = __atomic_load_n (&d.ll, __ATOMIC_SEQ_CST); - tmp.d += 1.0; - __atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST); -} - -/* { dg-final { scan-assembler-not "(fistp|fild)" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr71245-2.c b/gcc/testsuite/gcc.target/i386/pr71245-2.c deleted file mode 100644 index bf37a8c..0000000 --- a/gcc/testsuite/gcc.target/i386/pr71245-2.c +++ /dev/null @@ -1,22 +0,0 @@ -/* PR target/71245 */ -/* { dg-do compile { target ia32 } } */ -/* { dg-options "-O2 -march=pentium -msse -mno-sse2 -mfpmath=387" } */ - -typedef union -{ - unsigned long long ll; - double d; -} u_t; - -u_t d = { .d = 5.0 }; - -void foo_d (void) -{ - u_t tmp; - - tmp.ll = __atomic_load_n (&d.ll, __ATOMIC_SEQ_CST); - tmp.d += 1.0; - __atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST); -} - -/* { dg-final { scan-assembler-not "movlps" { xfail *-*-* } } } */ -- cgit v1.1 From b7e450c97340789687b65ab013dbe25e012b0b6c Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Fri, 9 Jul 2021 10:12:19 +0100 Subject: aarch64: Refactor TBL/TBX RTL patterns Rename two-source-register TBL/TBX RTL patterns so that their names better reflect what they do, rather than confusing them with tbl3 or tbx4 patterns. Also use the correct "neon_tbl2" type attribute for both patterns. Rename single-source-register TBL/TBX patterns for consistency. gcc/ChangeLog: 2021-07-08 Jonathan Wright * config/aarch64/aarch64-simd-builtins.def: Use two variant generators for all TBL/TBX intrinsics and rename to consistent forms: qtbl[1234] or qtbx[1234]. * config/aarch64/aarch64-simd.md (aarch64_tbl1): Rename to... (aarch64_qtbl1): This. (aarch64_tbx1): Rename to... (aarch64_qtbx1): This. (aarch64_tbl2v16qi): Delete. (aarch64_tbl3): Rename to... (aarch64_qtbl2): This. (aarch64_tbx4): Rename to... (aarch64_qtbx2): This. * config/aarch64/aarch64.c (aarch64_expand_vec_perm_1): Use renamed qtbl1 and qtbl2 RTL patterns. * config/aarch64/arm_neon.h (vqtbl1_p8): Use renamed qtbl1 RTL pattern. (vqtbl1_s8): Likewise. (vqtbl1_u8): Likewise. (vqtbl1q_p8): Likewise. (vqtbl1q_s8): Likewise. (vqtbl1q_u8): Likewise. (vqtbx1_s8): Use renamed qtbx1 RTL pattern. (vqtbx1_u8): Likewise. (vqtbx1_p8): Likewise. (vqtbx1q_s8): Likewise. (vqtbx1q_u8): Likewise. (vqtbx1q_p8): Likewise. (vtbl1_s8): Use renamed qtbl1 RTL pattern. (vtbl1_u8): Likewise. (vtbl1_p8): Likewise. (vtbl2_s8): Likewise (vtbl2_u8): Likewise. (vtbl2_p8): Likewise. (vtbl3_s8): Use renamed qtbl2 RTL pattern. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vtbx2_s8): Use renamed qtbx2 RTL pattern. (vtbx2_u8): Likewise. (vtbx2_p8): Likewise. (vqtbl2_s8): Use renamed qtbl2 RTL pattern. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbx2_s8): Use renamed qtbx2 RTL pattern. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. --- gcc/config/aarch64/aarch64-simd-builtins.def | 34 +++---- gcc/config/aarch64/aarch64-simd.md | 24 ++--- gcc/config/aarch64/aarch64.c | 8 +- gcc/config/aarch64/arm_neon.h | 132 ++++++++++++--------------- 4 files changed, 82 insertions(+), 116 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 063f503..b7f1237 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -718,37 +718,31 @@ VAR1 (BINOPP, crypto_pmull, 0, NONE, di) VAR1 (BINOPP, crypto_pmull, 0, NONE, v2di) - /* Implemented by aarch64_tbl3. */ - VAR1 (BINOP, tbl3, 0, NONE, v8qi) - VAR1 (BINOP, tbl3, 0, NONE, v16qi) + /* Implemented by aarch64_qtbl1. */ + VAR2 (BINOP, qtbl1, 0, NONE, v8qi, v16qi) + VAR2 (BINOPU, qtbl1, 0, NONE, v8qi, v16qi) - /* Implemented by aarch64_tbl1. */ - VAR2 (BINOP, tbl1, 0, NONE, v8qi, v16qi) - VAR2 (BINOPU, tbl1, 0, NONE, v8qi, v16qi) + /* Implemented by aarch64_qtbl2. */ + VAR2 (BINOP, qtbl2, 0, NONE, v8qi, v16qi) /* Implemented by aarch64_qtbl3. */ - VAR1 (BINOP, qtbl3, 0, NONE, v8qi) - VAR1 (BINOP, qtbl3, 0, NONE, v16qi) + VAR2 (BINOP, qtbl3, 0, NONE, v8qi, v16qi) /* Implemented by aarch64_qtbl4. */ - VAR1 (BINOP, qtbl4, 0, NONE, v8qi) - VAR1 (BINOP, qtbl4, 0, NONE, v16qi) + VAR2 (BINOP, qtbl4, 0, NONE, v8qi, v16qi) - /* Implemented by aarch64_tbx1. */ - VAR2 (TERNOP, tbx1, 0, NONE, v8qi, v16qi) - VAR2 (TERNOPU, tbx1, 0, NONE, v8qi, v16qi) + /* Implemented by aarch64_qtbx1. */ + VAR2 (TERNOP, qtbx1, 0, NONE, v8qi, v16qi) + VAR2 (TERNOPU, qtbx1, 0, NONE, v8qi, v16qi) - /* Implemented by aarch64_tbx4. */ - VAR1 (TERNOP, tbx4, 0, NONE, v8qi) - VAR1 (TERNOP, tbx4, 0, NONE, v16qi) + /* Implemented by aarch64_qtbx2. */ + VAR2 (TERNOP, qtbx2, 0, NONE, v8qi, v16qi) /* Implemented by aarch64_qtbx3. */ - VAR1 (TERNOP, qtbx3, 0, NONE, v8qi) - VAR1 (TERNOP, qtbx3, 0, NONE, v16qi) + VAR2 (TERNOP, qtbx3, 0, NONE, v8qi, v16qi) /* Implemented by aarch64_qtbx4. */ - VAR1 (TERNOP, qtbx4, 0, NONE, v8qi) - VAR1 (TERNOP, qtbx4, 0, NONE, v16qi) + VAR2 (TERNOP, qtbx4, 0, NONE, v8qi, v16qi) /* Builtins for ARMv8.1-A Adv.SIMD instructions. */ diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 7489098..7332a73 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -6948,7 +6948,7 @@ { rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i); rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i); - emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2])); + emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2])); } DONE; } @@ -7425,7 +7425,7 @@ DONE; }) -(define_insn "aarch64_tbl1" +(define_insn "aarch64_qtbl1" [(set (match_operand:VB 0 "register_operand" "=w") (unspec:VB [(match_operand:V16QI 1 "register_operand" "w") (match_operand:VB 2 "register_operand" "w")] @@ -7435,7 +7435,7 @@ [(set_attr "type" "neon_tbl1")] ) -(define_insn "aarch64_tbx1" +(define_insn "aarch64_qtbx1" [(set (match_operand:VB 0 "register_operand" "=w") (unspec:VB [(match_operand:VB 1 "register_operand" "0") (match_operand:V16QI 2 "register_operand" "w") @@ -7448,27 +7448,17 @@ ;; Two source registers. -(define_insn "aarch64_tbl2v16qi" - [(set (match_operand:V16QI 0 "register_operand" "=w") - (unspec:V16QI [(match_operand:OI 1 "register_operand" "w") - (match_operand:V16QI 2 "register_operand" "w")] - UNSPEC_TBL))] - "TARGET_SIMD" - "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b" - [(set_attr "type" "neon_tbl2_q")] -) - -(define_insn "aarch64_tbl3" +(define_insn "aarch64_qtbl2" [(set (match_operand:VB 0 "register_operand" "=w") (unspec:VB [(match_operand:OI 1 "register_operand" "w") (match_operand:VB 2 "register_operand" "w")] UNSPEC_TBL))] "TARGET_SIMD" "tbl\\t%S0., {%S1.16b - %T1.16b}, %S2." - [(set_attr "type" "neon_tbl3")] + [(set_attr "type" "neon_tbl2")] ) -(define_insn "aarch64_tbx4" +(define_insn "aarch64_qtbx2" [(set (match_operand:VB 0 "register_operand" "=w") (unspec:VB [(match_operand:VB 1 "register_operand" "0") (match_operand:OI 2 "register_operand" "w") @@ -7476,7 +7466,7 @@ UNSPEC_TBX))] "TARGET_SIMD" "tbx\\t%S0., {%S2.16b - %T2.16b}, %S3." - [(set_attr "type" "neon_tbl4")] + [(set_attr "type" "neon_tbl2")] ) ;; Three source registers. diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index f5b25a7..3bdf19d 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -22047,11 +22047,11 @@ aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel) /* Expand the argument to a V16QI mode by duplicating it. */ rtx pair = gen_reg_rtx (V16QImode); emit_insn (gen_aarch64_combinev8qi (pair, op0, op0)); - emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel)); + emit_insn (gen_aarch64_qtbl1v8qi (target, pair, sel)); } else { - emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel)); + emit_insn (gen_aarch64_qtbl1v16qi (target, op0, sel)); } } else @@ -22062,13 +22062,13 @@ aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel) { pair = gen_reg_rtx (V16QImode); emit_insn (gen_aarch64_combinev8qi (pair, op0, op1)); - emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel)); + emit_insn (gen_aarch64_qtbl1v8qi (target, pair, sel)); } else { pair = gen_reg_rtx (OImode); emit_insn (gen_aarch64_combinev16qi (pair, op0, op1)); - emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel)); + emit_insn (gen_aarch64_qtbl2v16qi (target, pair, sel)); } } } diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 00d76ea..1048d7c 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -9534,90 +9534,90 @@ __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl1_p8 (poly8x16_t __tab, uint8x8_t __idx) { - return (poly8x8_t) __builtin_aarch64_tbl1v8qi ((int8x16_t) __tab, - (int8x8_t) __idx); + return (poly8x8_t) __builtin_aarch64_qtbl1v8qi ((int8x16_t) __tab, + (int8x8_t) __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl1_s8 (int8x16_t __tab, uint8x8_t __idx) { - return __builtin_aarch64_tbl1v8qi (__tab, (int8x8_t) __idx); + return __builtin_aarch64_qtbl1v8qi (__tab, (int8x8_t) __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl1_u8 (uint8x16_t __tab, uint8x8_t __idx) { - return __builtin_aarch64_tbl1v8qi_uuu (__tab, __idx); + return __builtin_aarch64_qtbl1v8qi_uuu (__tab, __idx); } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl1q_p8 (poly8x16_t __tab, uint8x16_t __idx) { - return (poly8x16_t) __builtin_aarch64_tbl1v16qi ((int8x16_t) __tab, - (int8x16_t) __idx); + return (poly8x16_t) __builtin_aarch64_qtbl1v16qi ((int8x16_t) __tab, + (int8x16_t) __idx); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl1q_s8 (int8x16_t __tab, uint8x16_t __idx) { - return __builtin_aarch64_tbl1v16qi (__tab, (int8x16_t) __idx); + return __builtin_aarch64_qtbl1v16qi (__tab, (int8x16_t) __idx); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl1q_u8 (uint8x16_t __tab, uint8x16_t __idx) { - return __builtin_aarch64_tbl1v16qi_uuu (__tab, __idx); + return __builtin_aarch64_qtbl1v16qi_uuu (__tab, __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1_s8 (int8x8_t __r, int8x16_t __tab, uint8x8_t __idx) { - return __builtin_aarch64_tbx1v8qi (__r, __tab, (int8x8_t) __idx); + return __builtin_aarch64_qtbx1v8qi (__r, __tab, (int8x8_t) __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1_u8 (uint8x8_t __r, uint8x16_t __tab, uint8x8_t __idx) { - return __builtin_aarch64_tbx1v8qi_uuuu (__r, __tab, __idx); + return __builtin_aarch64_qtbx1v8qi_uuuu (__r, __tab, __idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1_p8 (poly8x8_t __r, poly8x16_t __tab, uint8x8_t __idx) { - return (poly8x8_t) __builtin_aarch64_tbx1v8qi ((int8x8_t) __r, - (int8x16_t) __tab, - (int8x8_t) __idx); + return (poly8x8_t) __builtin_aarch64_qtbx1v8qi ((int8x8_t) __r, + (int8x16_t) __tab, + (int8x8_t) __idx); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1q_s8 (int8x16_t __r, int8x16_t __tab, uint8x16_t __idx) { - return __builtin_aarch64_tbx1v16qi (__r, __tab, (int8x16_t) __idx); + return __builtin_aarch64_qtbx1v16qi (__r, __tab, (int8x16_t) __idx); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1q_u8 (uint8x16_t __r, uint8x16_t __tab, uint8x16_t __idx) { - return __builtin_aarch64_tbx1v16qi_uuuu (__r, __tab, __idx); + return __builtin_aarch64_qtbx1v16qi_uuuu (__r, __tab, __idx); } __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx1q_p8 (poly8x16_t __r, poly8x16_t __tab, uint8x16_t __idx) { - return (poly8x16_t) __builtin_aarch64_tbx1v16qi ((int8x16_t) __r, - (int8x16_t) __tab, - (int8x16_t) __idx); + return (poly8x16_t) __builtin_aarch64_qtbx1v16qi ((int8x16_t) __r, + (int8x16_t) __tab, + (int8x16_t) __idx); } /* V7 legacy table intrinsics. */ @@ -9628,7 +9628,7 @@ vtbl1_s8 (int8x8_t __tab, int8x8_t __idx) { int8x16_t __temp = vcombine_s8 (__tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); - return __builtin_aarch64_tbl1v8qi (__temp, __idx); + return __builtin_aarch64_qtbl1v8qi (__temp, __idx); } __extension__ extern __inline uint8x8_t @@ -9637,7 +9637,7 @@ vtbl1_u8 (uint8x8_t __tab, uint8x8_t __idx) { uint8x16_t __temp = vcombine_u8 (__tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); - return __builtin_aarch64_tbl1v8qi_uuu (__temp, __idx); + return __builtin_aarch64_qtbl1v8qi_uuu (__temp, __idx); } __extension__ extern __inline poly8x8_t @@ -9646,8 +9646,8 @@ vtbl1_p8 (poly8x8_t __tab, uint8x8_t __idx) { poly8x16_t __temp = vcombine_p8 (__tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); - return (poly8x8_t) __builtin_aarch64_tbl1v8qi ((int8x16_t) __temp, - (int8x8_t) __idx); + return (poly8x8_t) __builtin_aarch64_qtbl1v8qi ((int8x16_t) __temp, + (int8x8_t) __idx); } __extension__ extern __inline int8x8_t @@ -9655,7 +9655,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl2_s8 (int8x8x2_t __tab, int8x8_t __idx) { int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]); - return __builtin_aarch64_tbl1v8qi (__temp, __idx); + return __builtin_aarch64_qtbl1v8qi (__temp, __idx); } __extension__ extern __inline uint8x8_t @@ -9663,7 +9663,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl2_u8 (uint8x8x2_t __tab, uint8x8_t __idx) { uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]); - return __builtin_aarch64_tbl1v8qi_uuu (__temp, __idx); + return __builtin_aarch64_qtbl1v8qi_uuu (__temp, __idx); } __extension__ extern __inline poly8x8_t @@ -9671,15 +9671,14 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl2_p8 (poly8x8x2_t __tab, uint8x8_t __idx) { poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]); - return (poly8x8_t) __builtin_aarch64_tbl1v8qi ((int8x16_t) __temp, - (int8x8_t) __idx); + return (poly8x8_t) __builtin_aarch64_qtbl1v8qi ((int8x16_t) __temp, + (int8x8_t) __idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx) { - int8x8_t __result; int8x16x2_t __temp; __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); @@ -9688,15 +9687,13 @@ vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx) (int8x16_t) __temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __result = __builtin_aarch64_tbl3v8qi (__o, __idx); - return __result; + return __builtin_aarch64_qtbl2v8qi (__o, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx) { - uint8x8_t __result; uint8x16x2_t __temp; __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); @@ -9705,15 +9702,13 @@ vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx) (int8x16_t) __temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); - return __result; + return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx) { - poly8x8_t __result; poly8x16x2_t __temp; __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); @@ -9722,15 +9717,13 @@ vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx) (int8x16_t) __temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); - return __result; + return (poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx) { - int8x8_t __result; int8x16x2_t __temp; __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); @@ -9739,15 +9732,13 @@ vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx) (int8x16_t) __temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __result = __builtin_aarch64_tbl3v8qi (__o, __idx); - return __result; + return __builtin_aarch64_qtbl2v8qi (__o, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx) { - uint8x8_t __result; uint8x16x2_t __temp; __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); @@ -9756,15 +9747,13 @@ vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx) (int8x16_t) __temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); - return __result; + return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx) { - poly8x8_t __result; poly8x16x2_t __temp; __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); @@ -9773,8 +9762,7 @@ vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx) (int8x16_t) __temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); - return __result; + return(poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } __extension__ extern __inline int8x8_t @@ -9782,7 +9770,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx2_s8 (int8x8_t __r, int8x8x2_t __tab, int8x8_t __idx) { int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]); - return __builtin_aarch64_tbx1v8qi (__r, __temp, __idx); + return __builtin_aarch64_qtbx1v8qi (__r, __temp, __idx); } __extension__ extern __inline uint8x8_t @@ -9790,7 +9778,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx2_u8 (uint8x8_t __r, uint8x8x2_t __tab, uint8x8_t __idx) { uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]); - return __builtin_aarch64_tbx1v8qi_uuuu (__r, __temp, __idx); + return __builtin_aarch64_qtbx1v8qi_uuuu (__r, __temp, __idx); } __extension__ extern __inline poly8x8_t @@ -9798,9 +9786,9 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx2_p8 (poly8x8_t __r, poly8x8x2_t __tab, uint8x8_t __idx) { poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]); - return (poly8x8_t) __builtin_aarch64_tbx1v8qi ((int8x8_t) __r, - (int8x16_t) __temp, - (int8x8_t) __idx); + return (poly8x8_t) __builtin_aarch64_qtbx1v8qi ((int8x8_t) __r, + (int8x16_t) __temp, + (int8x8_t) __idx); } /* End of temporary inline asm. */ @@ -23335,7 +23323,7 @@ vqtbl2_s8 (int8x16x2_t __tab, uint8x8_t __idx) __builtin_aarch64_simd_oi __o; __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); - return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); + return __builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } __extension__ extern __inline uint8x8_t @@ -23345,7 +23333,7 @@ vqtbl2_u8 (uint8x16x2_t __tab, uint8x8_t __idx) __builtin_aarch64_simd_oi __o; __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); + return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } __extension__ extern __inline poly8x8_t @@ -23355,7 +23343,7 @@ vqtbl2_p8 (poly8x16x2_t __tab, uint8x8_t __idx) __builtin_aarch64_simd_oi __o; __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); + return (poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } __extension__ extern __inline int8x16_t @@ -23365,7 +23353,7 @@ vqtbl2q_s8 (int8x16x2_t __tab, uint8x16_t __idx) __builtin_aarch64_simd_oi __o; __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); + return __builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx); } __extension__ extern __inline uint8x16_t @@ -23375,7 +23363,7 @@ vqtbl2q_u8 (uint8x16x2_t __tab, uint8x16_t __idx) __builtin_aarch64_simd_oi __o; __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); + return (uint8x16_t)__builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx); } __extension__ extern __inline poly8x16_t @@ -23385,7 +23373,7 @@ vqtbl2q_p8 (poly8x16x2_t __tab, uint8x16_t __idx) __builtin_aarch64_simd_oi __o; __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); + return (poly8x16_t)__builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx); } /* vqtbl3 */ @@ -23539,7 +23527,7 @@ vqtbx2_s8 (int8x8_t __r, int8x16x2_t __tab, uint8x8_t __idx) __builtin_aarch64_simd_oi __o; __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); - return __builtin_aarch64_tbx4v8qi (__r, __o, (int8x8_t)__idx); + return __builtin_aarch64_qtbx2v8qi (__r, __o, (int8x8_t)__idx); } __extension__ extern __inline uint8x8_t @@ -23549,8 +23537,8 @@ vqtbx2_u8 (uint8x8_t __r, uint8x16x2_t __tab, uint8x8_t __idx) __builtin_aarch64_simd_oi __o; __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); + return (uint8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o, + (int8x8_t)__idx); } __extension__ extern __inline poly8x8_t @@ -23560,8 +23548,8 @@ vqtbx2_p8 (poly8x8_t __r, poly8x16x2_t __tab, uint8x8_t __idx) __builtin_aarch64_simd_oi __o; __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); + return (poly8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o, + (int8x8_t)__idx); } __extension__ extern __inline int8x16_t @@ -23571,7 +23559,7 @@ vqtbx2q_s8 (int8x16_t __r, int8x16x2_t __tab, uint8x16_t __idx) __builtin_aarch64_simd_oi __o; __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); - return __builtin_aarch64_tbx4v16qi (__r, __o, (int8x16_t)__idx); + return __builtin_aarch64_qtbx2v16qi (__r, __o, (int8x16_t)__idx); } __extension__ extern __inline uint8x16_t @@ -23581,7 +23569,7 @@ vqtbx2q_u8 (uint8x16_t __r, uint8x16x2_t __tab, uint8x16_t __idx) __builtin_aarch64_simd_oi __o; __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o, + return (uint8x16_t)__builtin_aarch64_qtbx2v16qi ((int8x16_t)__r, __o, (int8x16_t)__idx); } @@ -23592,8 +23580,8 @@ vqtbx2q_p8 (poly8x16_t __r, poly8x16x2_t __tab, uint8x16_t __idx) __builtin_aarch64_simd_oi __o; __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); - return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o, - (int8x16_t)__idx); + return (poly8x16_t)__builtin_aarch64_qtbx2v16qi ((int8x16_t)__r, __o, + (int8x16_t)__idx); } /* vqtbx3 */ @@ -28511,7 +28499,6 @@ __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx) { - int8x8_t __result; int8x16x2_t __temp; __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); @@ -28520,15 +28507,13 @@ vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx) (int8x16_t) __temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx); - return __result; + return __builtin_aarch64_qtbx2v8qi (__r, __o, __idx); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx) { - uint8x8_t __result; uint8x16x2_t __temp; __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); @@ -28537,16 +28522,14 @@ vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx) (int8x16_t) __temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); - return __result; + return (uint8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o, + (int8x8_t)__idx); } __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx) { - poly8x8_t __result; poly8x16x2_t __temp; __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); @@ -28555,9 +28538,8 @@ vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx) (int8x16_t) __temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); - return __result; + return (poly8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o, + (int8x8_t)__idx); } /* vtrn */ -- cgit v1.1 From 7cc2df084b7977653a9b59cbc34a9ad500ae619c Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 20 Jul 2021 11:00:33 +0200 Subject: debug/101473 - apply debug prefix maps before checksumming DIEs The following makes sure to apply the debug prefix maps to filenames before checksumming DIEs to create the global symbol for the CU DIE used by LTO to link the late debug to the early debug. This avoids binary differences (in said symbol) when compiling with toolchains installed under a different path and that compensated with appropriate -fdebug-prefix-map options. The easiest and most scalable way is to record both the unmapped and the remapped filename in the dwarf_file_data so the remapping process takes place at a single point and only once (otherwise it creates GC garbage at each point doing that). 2021-07-20 Richard Biener PR debug/101473 * dwarf2out.h (dwarf_file_data): Add key member. * dwarf2out.c (dwarf_file_hasher::equal): Compare key. (dwarf_file_hasher::hash): Hash key. (lookup_filename): Remap the filename and store it in the filename member of dwarf_file_data when creating a new dwarf_file_data. (file_name_acquire): Do not remap the filename again. (maybe_emit_file): Likewise. --- gcc/dwarf2out.c | 12 ++++++------ gcc/dwarf2out.h | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'gcc') diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c index 82783c4..884f1e1 100644 --- a/gcc/dwarf2out.c +++ b/gcc/dwarf2out.c @@ -12424,7 +12424,7 @@ file_name_acquire (dwarf_file_data **slot, file_name_acquire_data *fnad) fi = fnad->files + fnad->used_files++; - f = remap_debug_filename (d->filename); + f = d->filename; /* Skip all leading "./". */ while (f[0] == '.' && IS_DIR_SEPARATOR (f[1])) @@ -27460,13 +27460,13 @@ dwarf2out_ignore_block (const_tree block) bool dwarf_file_hasher::equal (dwarf_file_data *p1, const char *p2) { - return filename_cmp (p1->filename, p2) == 0; + return filename_cmp (p1->key, p2) == 0; } hashval_t dwarf_file_hasher::hash (dwarf_file_data *p) { - return htab_hash_string (p->filename); + return htab_hash_string (p->key); } /* Lookup FILE_NAME (in the list of filenames that we know about here in @@ -27496,7 +27496,8 @@ lookup_filename (const char *file_name) return *slot; created = ggc_alloc (); - created->filename = file_name; + created->key = file_name; + created->filename = remap_debug_filename (file_name); created->emitted_number = 0; *slot = created; return created; @@ -27522,8 +27523,7 @@ maybe_emit_file (struct dwarf_file_data * fd) if (output_asm_line_debug_info ()) { fprintf (asm_out_file, "\t.file %u ", fd->emitted_number); - output_quoted_string (asm_out_file, - remap_debug_filename (fd->filename)); + output_quoted_string (asm_out_file, fd->filename); fputc ('\n', asm_out_file); } } diff --git a/gcc/dwarf2out.h b/gcc/dwarf2out.h index 057afdb..b2152a5 100644 --- a/gcc/dwarf2out.h +++ b/gcc/dwarf2out.h @@ -424,6 +424,7 @@ extern enum dwarf_tag dw_get_die_tag (dw_die_ref); /* Data about a single source file. */ struct GTY((for_user)) dwarf_file_data { + const char * key; const char * filename; int emitted_number; }; -- cgit v1.1 From 1ef9b135793a528c05c4a3e22649744955aa2dfb Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 20 Jul 2021 15:42:02 +0100 Subject: aarch64: Tweak old vect-* tests to avoid new FAILs I'm not sure what these test were originally designed to test. vaddv and vmaxv seem to be testing for vectorisation, with associated scan-assembler tests. But they use arm_neon.h functions to test the results, which would presumably also trip many of the scans. That was probably what the split into vect-fmax-fmin.c and vect-fmaxv-fminv-compile.c was supposed to avoid. Anyway, the tests started failing after the recent change to allow staged reductions for epilogue loops. And epilogues came into play because the reduction loops iterate LANES-1 rather than LANES times. (vmaxv was trying to iterate LANES times, but the gimple optimisers outsmarted it. The other two explicitly had a count of LANES-1.) Just suppressing epilogues causes other issues for vaddv and vmaxv. The easiest fix therefore seemed to be to use an asm to hide the initial value of the vmaxv loop (so that it really does iterate LANES times) and then make the others match that style. gcc/testsuite/ PR testsuite/101506 * gcc.target/aarch64/vect-vmaxv.c: Use an asm to hide the true initial value of the reduction from the vectorizer. * gcc.target/aarch64/vect-vaddv.c: Likewise. Make the vector loop operate on exactly LANES (rather than LANES-1) iterations. * gcc.target/aarch64/vect-fmaxv-fminv.x: Likewise. --- gcc/testsuite/gcc.target/aarch64/vect-fmaxv-fminv.x | 20 ++++++++++++-------- gcc/testsuite/gcc.target/aarch64/vect-vaddv.c | 4 ++-- gcc/testsuite/gcc.target/aarch64/vect-vmaxv.c | 2 +- 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fmaxv-fminv.x b/gcc/testsuite/gcc.target/aarch64/vect-fmaxv-fminv.x index 0bc6ba4..d3ba31c 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fmaxv-fminv.x +++ b/gcc/testsuite/gcc.target/aarch64/vect-fmaxv-fminv.x @@ -5,8 +5,9 @@ typedef double *__restrict__ pRF64; float maxv_f32 (pRF32 a) { int i; - float s = a[0]; - for (i=1;i<8;i++) + float s; + asm ("" : "=w" (s) : "0" (a[0])); + for (i=0;i<8;i++) s = (s > a[i] ? s : a[i]); return s; @@ -15,8 +16,9 @@ float maxv_f32 (pRF32 a) float minv_f32 (pRF32 a) { int i; - float s = a[0]; - for (i=1;i<16;i++) + float s; + asm ("" : "=w" (s) : "0" (a[0])); + for (i=0;i<16;i++) s = (s < a[i] ? s : a[i]); return s; @@ -25,8 +27,9 @@ float minv_f32 (pRF32 a) double maxv_f64 (pRF64 a) { int i; - double s = a[0]; - for (i=1;i<8;i++) + double s; + asm ("" : "=w" (s) : "0" (a[0])); + for (i=0;i<8;i++) s = (s > a[i] ? s : a[i]); return s; @@ -35,8 +38,9 @@ double maxv_f64 (pRF64 a) double minv_f64 (pRF64 a) { int i; - double s = a[0]; - for (i=1;i<16;i++) + double s; + asm ("" : "=w" (s) : "0" (a[0])); + for (i=0;i<16;i++) s = (s < a[i] ? s : a[i]); return s; diff --git a/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c b/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c index 41e9157..3a12ae9 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c @@ -57,8 +57,8 @@ test_vaddv##SUFFIX##_##TYPE##x##LANES##_t (void) \ /* Calculate linearly. */ \ for (i = 0; i < moves; i++) \ { \ - out_l[i] = input_##TYPE[i]; \ - for (j = 1; j < LANES; j++) \ + asm ("" : "=r" (out_l[i]) : "0" (0)); \ + for (j = 0; j < LANES; j++) \ out_l[i] += input_##TYPE[i + j]; \ } \ \ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-vmaxv.c b/gcc/testsuite/gcc.target/aarch64/vect-vmaxv.c index 4280834..1bdea89 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-vmaxv.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-vmaxv.c @@ -36,7 +36,7 @@ test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t (void) \ /* Calculate linearly. */ \ for (i = 0; i < moves; i++) \ { \ - out_l[i] = input_##TYPE[i]; \ + asm ("" : "=r" (out_l[i]) : "0" (input_##TYPE[i])); \ for (j = 0; j < LANES; j++) \ out_l[i] = input_##TYPE[i + j] CMP_OP out_l[i] ? \ input_##TYPE[i + j] : out_l[i]; \ -- cgit v1.1 From e0e82856d535f56c916382f892ed2435dde54d4d Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 20 Jul 2021 17:26:10 +0200 Subject: rs6000: Fix up easy_vector_constant_msb handling [PR101384] The following gcc.dg/pr101384.c testcase is miscompiled on powerpc64le-linux. easy_altivec_constant has code to try construct vector constants with different element sizes, perhaps different from CONST_VECTOR's mode. But as written, that works fine for vspltis[bhw] cases, but not for the vspltisw x,-1; vsl[bhw] x,x,x case, because that creates always a V16QImode, V8HImode or V4SImode constant containing broadcasted constant with just the MSB set. The vspltis_constant function etc. expects the vspltis[bhw] instructions where the small [-16..15] or even [-32..30] constant is sign-extended to the remaining step bytes, but that is not the case for the 0x80...00 constants, with step > 1 we can't handle e.g. { 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff } vectors but do want to handle e.g. { 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80 } and similarly with copies > 1 we do want to handle e.g. { 0x80808080, 0x80808080, 0x80808080, 0x80808080 }. 2021-07-20 Jakub Jelinek PR target/101384 * config/rs6000/rs6000-protos.h (easy_altivec_constant): Change return type from bool to int. * config/rs6000/rs6000.c (vspltis_constant): Fix up handling the EASY_VECTOR_MSB case if either step or copies is not 1. (vspltis_shifted): Fix comment typo. (easy_altivec_constant): Change return type from bool to int, instead of returning true return byte size of the element mode that should be used to synthetize the constant. * config/rs6000/predicates.md (easy_vector_constant_msb): Require that vspltis_shifted is 0, handle the case where easy_altivec_constant assumes using different vector mode from CONST_VECTOR's mode. * config/rs6000/altivec.md (easy_vector_constant_msb splitter): Use easy_altivec_constant to determine mode in which -1 >> -1 should be performed, use rs6000_expand_vector_init instead of gen_vec_initv4sisi. * gcc.dg/pr101384.c: New test. * gcc.target/powerpc/pr101384-1.c: New test. * gcc.target/powerpc/pr101384-2.c: New test. --- gcc/config/rs6000/altivec.md | 19 +++++-- gcc/config/rs6000/predicates.md | 17 +++++- gcc/config/rs6000/rs6000-protos.h | 2 +- gcc/config/rs6000/rs6000.c | 59 ++++++++++++++------ gcc/testsuite/gcc.dg/pr101384.c | 39 +++++++++++++ gcc/testsuite/gcc.target/powerpc/pr101384-1.c | 79 +++++++++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/pr101384-2.c | 79 +++++++++++++++++++++++++++ 7 files changed, 268 insertions(+), 26 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr101384.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr101384-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr101384-2.c (limited to 'gcc') diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index a20d6ac..d70c17e 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -317,22 +317,33 @@ [(const_int 0)] { rtx dest = operands[0]; - machine_mode mode = GET_MODE (operands[0]); + machine_mode mode; rtvec v; int i, num_elements; - if (mode == V4SFmode) + switch (easy_altivec_constant (operands[1], mode)) { + case 1: + mode = V16QImode; + break; + case 2: + mode = V8HImode; + break; + case 4: mode = V4SImode; - dest = gen_lowpart (V4SImode, dest); + break; + default: + gcc_unreachable (); } + if (mode != mode) + dest = gen_lowpart (mode, dest); num_elements = GET_MODE_NUNITS (mode); v = rtvec_alloc (num_elements); for (i = 0; i < num_elements; i++) RTVEC_ELT (v, i) = constm1_rtx; - emit_insn (gen_vec_initv4sisi (dest, gen_rtx_PARALLEL (mode, v))); + rs6000_expand_vector_init (dest, gen_rtx_PARALLEL (mode, v)); emit_insn (gen_rtx_SET (dest, gen_rtx_ASHIFT (mode, dest, dest))); DONE; }) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 121cbf1..956e42b 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -683,15 +683,26 @@ (define_predicate "easy_vector_constant_msb" (and (match_code "const_vector") (and (match_test "TARGET_ALTIVEC") - (match_test "easy_altivec_constant (op, mode)"))) + (match_test "easy_altivec_constant (op, mode)") + (match_test "vspltis_shifted (op) == 0"))) { HOST_WIDE_INT val; - int elt; + int elt, sz = easy_altivec_constant (op, mode); + machine_mode inner = GET_MODE_INNER (mode); + int isz = GET_MODE_SIZE (inner); if (mode == V2DImode || mode == V2DFmode) return 0; elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0; + if (isz < sz) + { + if (const_vector_elt_as_int (op, elt) != 0) + return 0; + elt += (BYTES_BIG_ENDIAN ? -1 : 1) * (sz - isz) / isz; + } + else if (isz > sz) + inner = smallest_int_mode_for_size (sz * BITS_PER_UNIT); val = const_vector_elt_as_int (op, elt); - return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode)); + return EASY_VECTOR_MSB (val, inner); }) ;; Return true if this is an easy altivec constant that we form diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 94bf961..14f6b31 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -30,7 +30,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int, tree, machine_mode); #endif /* TREE_CODE */ -extern bool easy_altivec_constant (rtx, machine_mode); +extern int easy_altivec_constant (rtx, machine_mode); extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *); extern int vspltis_shifted (rtx); extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 779de95..279f00c 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6134,6 +6134,27 @@ vspltis_constant (rtx op, unsigned step, unsigned copies) splat_val = val; msb_val = val >= 0 ? 0 : -1; + if (val == 0 && step > 1) + { + /* Special case for loading most significant bit with step > 1. + In that case, match 0s in all but step-1s elements, where match + EASY_VECTOR_MSB. */ + for (i = 1; i < nunits; ++i) + { + unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i; + HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt); + if ((i & (step - 1)) == step - 1) + { + if (!EASY_VECTOR_MSB (elt_val, inner)) + break; + } + else if (elt_val) + break; + } + if (i == nunits) + return true; + } + /* Construct the value to be splatted, if possible. If not, return 0. */ for (i = 2; i <= copies; i *= 2) { @@ -6146,6 +6167,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies) | (small_val & mask))) return false; splat_val = small_val; + inner = smallest_int_mode_for_size (bitsize); } /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */ @@ -6160,8 +6182,9 @@ vspltis_constant (rtx op, unsigned step, unsigned copies) ; /* Also check if are loading up the most significant bit which can be done by - loading up -1 and shifting the value left by -1. */ - else if (EASY_VECTOR_MSB (splat_val, inner)) + loading up -1 and shifting the value left by -1. Only do this for + step 1 here, for larger steps it is done earlier. */ + else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1) ; else @@ -6271,15 +6294,15 @@ vspltis_shifted (rtx op) } } - /* If all elements are equal, we don't need to do VLSDOI. */ + /* If all elements are equal, we don't need to do VSLDOI. */ return 0; } -/* Return true if OP is of the given MODE and can be synthesized - with a vspltisb, vspltish or vspltisw. */ +/* Return non-zero (element mode byte size) if OP is of the given MODE + and can be synthesized with a vspltisb, vspltish or vspltisw. */ -bool +int easy_altivec_constant (rtx op, machine_mode mode) { unsigned step, copies; @@ -6287,39 +6310,39 @@ easy_altivec_constant (rtx op, machine_mode mode) if (mode == VOIDmode) mode = GET_MODE (op); else if (mode != GET_MODE (op)) - return false; + return 0; /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy constants. */ if (mode == V2DFmode) - return zero_constant (op, mode); + return zero_constant (op, mode) ? 8 : 0; else if (mode == V2DImode) { if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0)) || !CONST_INT_P (CONST_VECTOR_ELT (op, 1))) - return false; + return 0; if (zero_constant (op, mode)) - return true; + return 8; if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1) - return true; + return 8; - return false; + return 0; } /* V1TImode is a special container for TImode. Ignore for now. */ else if (mode == V1TImode) - return false; + return 0; /* Start with a vspltisw. */ step = GET_MODE_NUNITS (mode) / 4; copies = 1; if (vspltis_constant (op, step, copies)) - return true; + return 4; /* Then try with a vspltish. */ if (step == 1) @@ -6328,7 +6351,7 @@ easy_altivec_constant (rtx op, machine_mode mode) step >>= 1; if (vspltis_constant (op, step, copies)) - return true; + return 2; /* And finally a vspltisb. */ if (step == 1) @@ -6337,12 +6360,12 @@ easy_altivec_constant (rtx op, machine_mode mode) step >>= 1; if (vspltis_constant (op, step, copies)) - return true; + return 1; if (vspltis_shifted (op) != 0) - return true; + return GET_MODE_SIZE (GET_MODE_INNER (mode)); - return false; + return 0; } /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose diff --git a/gcc/testsuite/gcc.dg/pr101384.c b/gcc/testsuite/gcc.dg/pr101384.c new file mode 100644 index 0000000..7030c0a --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr101384.c @@ -0,0 +1,39 @@ +/* PR target/101384 */ +/* { dg-do run } */ +/* { dg-options "-O2 -Wno-psabi -w" } */ + +typedef unsigned char __attribute__((__vector_size__ (16))) U; +typedef unsigned short __attribute__((__vector_size__ (8 * sizeof (short)))) V; + +U u; +V v; + +__attribute__((noipa)) U +foo (void) +{ + U y = (U) { 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff, + 0x80, 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0xff } + u; + return y; +} + +__attribute__((noipa)) V +bar (void) +{ + V y = (V) { 0x8000, 0xffff, 0x8000, 0xffff, + 0x8000, 0xffff, 0x8000, 0xffff } + v; + return y; +} + +int +main () +{ + U x = foo (); + for (unsigned i = 0; i < 16; i++) + if (x[i] != ((i & 3) ? 0xff : 0x80)) + __builtin_abort (); + V y = bar (); + for (unsigned i = 0; i < 8; i++) + if (y[i] != ((i & 1) ? 0xffff : 0x8000)) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/pr101384-1.c b/gcc/testsuite/gcc.target/powerpc/pr101384-1.c new file mode 100644 index 0000000..627d7d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr101384-1.c @@ -0,0 +1,79 @@ +/* PR target/101384 */ +/* { dg-do compile { target le } } */ +/* { dg-options "-O2 -maltivec" } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-final { scan-assembler-times {\mvspltis[whb] [^\n\r]*,-1\M} 9 } } */ +/* { dg-final { scan-assembler-times {\mvslw\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mvslh\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mvslb\M} 3 } } */ + +typedef unsigned char __attribute__((__vector_size__ (16))) U; +typedef unsigned short __attribute__((__vector_size__ (16))) V; +typedef unsigned int __attribute__((__vector_size__ (16))) W; + +U u; +V v; +W w; + +U +f1 (void) +{ + U y = (U) { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 } + u; + return y; +} + +U +f2 (void) +{ + U y = (U) { 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80 } + u; + return y; +} + +U +f3 (void) +{ + U y = (U) { 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80 } + u; + return y; +} + +V +f4 (void) +{ + V y = (V) { 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080 } + v; + return y; +} + +V +f5 (void) +{ + V y = (V) { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 } + v; + return y; +} + +V +f6 (void) +{ + V y = (V) { 0, 0x8000, 0, 0x8000, 0, 0x8000, 0, 0x8000 } + v; + return y; +} + +W +f7 (void) +{ + W y = (W) { 0x80808080, 0x80808080, 0x80808080, 0x80808080 } + w; + return y; +} + +W +f8 (void) +{ + W y = (W) { 0x80008000, 0x80008000, 0x80008000, 0x80008000 } + w; + return y; +} + +W +f9 (void) +{ + W y = (W) { 0x80000000, 0x80000000, 0x80000000, 0x80000000 } + w; + return y; +} diff --git a/gcc/testsuite/gcc.target/powerpc/pr101384-2.c b/gcc/testsuite/gcc.target/powerpc/pr101384-2.c new file mode 100644 index 0000000..f395708 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr101384-2.c @@ -0,0 +1,79 @@ +/* PR target/101384 */ +/* { dg-do compile { target be } } */ +/* { dg-options "-O2 -maltivec" } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-final { scan-assembler-times {\mvspltis[whb] [^\n\r]*,-1\M} 9 } } */ +/* { dg-final { scan-assembler-times {\mvslw\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mvslh\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mvslb\M} 3 } } */ + +typedef unsigned char __attribute__((__vector_size__ (16))) U; +typedef unsigned short __attribute__((__vector_size__ (16))) V; +typedef unsigned int __attribute__((__vector_size__ (16))) W; + +U u; +V v; +W w; + +U +f1 (void) +{ + U y = (U) { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 } + u; + return y; +} + +U +f2 (void) +{ + U y = (U) { 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0, 0x80, 0 } + u; + return y; +} + +U +f3 (void) +{ + U y = (U) { 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0, 0x80, 0, 0, 0 } + u; + return y; +} + +V +f4 (void) +{ + V y = (V) { 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080, 0x8080 } + v; + return y; +} + +V +f5 (void) +{ + V y = (V) { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 } + v; + return y; +} + +V +f6 (void) +{ + V y = (V) { 0x8000, 0, 0x8000, 0, 0x8000, 0, 0x8000, 0 } + v; + return y; +} + +W +f7 (void) +{ + W y = (W) { 0x80808080, 0x80808080, 0x80808080, 0x80808080 } + w; + return y; +} + +W +f8 (void) +{ + W y = (W) { 0x80008000, 0x80008000, 0x80008000, 0x80008000 } + w; + return y; +} + +W +f9 (void) +{ + W y = (W) { 0x80000000, 0x80000000, 0x80000000, 0x80000000 } + w; + return y; +} -- cgit v1.1 From 7fcb33455c9dc9359d98cd6bffe7f32f282ed713 Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Tue, 20 Jul 2021 12:56:19 -0400 Subject: PR 100167: Fix vector long long multiply/divide tests on power10. This patch updates the vector long long multiply and divide tests to supply the correct code information if power10 code generation is used. 2021-06-18 Michael Meissner gcc/testsuite/ PR testsuite/100167 * gcc.target/powerpc/fold-vec-div-longlong.c: Fix expected code generation on power10. * gcc.target/powerpc/fold-vec-mult-longlong.c: Likewise. --- gcc/testsuite/gcc.target/powerpc/fold-vec-div-longlong.c | 7 +++++-- gcc/testsuite/gcc.target/powerpc/fold-vec-mult-longlong.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-div-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-div-longlong.c index 312e984..f6a9b29 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-div-longlong.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-div-longlong.c @@ -19,5 +19,8 @@ test6 (vector unsigned long long x, vector unsigned long long y) { return vec_div (x, y); } -/* { dg-final { scan-assembler-times {\mdivd\M} 2 } } */ -/* { dg-final { scan-assembler-times {\mdivdu\M} 2 } } */ + +/* { dg-final { scan-assembler-times {\mdivd\M} 2 { target { ! has_arch_pwr10 } } } } */ +/* { dg-final { scan-assembler-times {\mdivdu\M} 2 { target { ! has_arch_pwr10 } } } } */ +/* { dg-final { scan-assembler-times {\mvdivsd\M} 1 { target { has_arch_pwr10 } } } } */ +/* { dg-final { scan-assembler-times {\mvdivud\M} 1 { target { has_arch_pwr10 } } } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-longlong.c index 38dba9f..dff073d 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-longlong.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-longlong.c @@ -20,5 +20,8 @@ test6 (vector unsigned long long x, vector unsigned long long y) return vec_mul (x, y); } -/* { dg-final { scan-assembler-times "\[ \t\]mulld " 4 { target lp64 } } } */ - +/* Power10 can generate the vmulld instruction even in 32-bit. Before power10, + we limit the code to lp64, since 32-bit cannot generate the mulld + instruction. */ +/* { dg-final { scan-assembler-times {\mmulld\M} 4 { target { lp64 && { ! has_arch_pwr10 } } } } } */ +/* { dg-final { scan-assembler-times {\mvmulld\M} 2 { target { has_arch_pwr10 } } } } */ -- cgit v1.1 From 00dcc88a0ed7bd148ea86d900b6c93574a2e1f26 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Tue, 20 Jul 2021 11:14:19 -0600 Subject: Adjust by-value function vec arguments to by-reference. gcc/c-family/ChangeLog: * c-common.c (c_build_shufflevector): Adjust by-value argument to by-const-reference. * c-common.h (c_build_shufflevector): Same. gcc/c/ChangeLog: * c-tree.h (c_build_function_call_vec): Adjust by-value argument to by-const-reference. * c-typeck.c (c_build_function_call_vec): Same. gcc/ChangeLog: * cfgloop.h (single_likely_exit): Adjust by-value argument to by-const-reference. * cfgloopanal.c (single_likely_exit): Same. * cgraph.h (struct cgraph_node): Same. * cgraphclones.c (cgraph_node::create_virtual_clone): Same. * genautomata.c (merge_states): Same. * genextract.c (VEC_char_to_string): Same. * genmatch.c (dt_node::gen_kids_1): Same. (walk_captures): Adjust by-value argument to by-reference. * gimple-ssa-store-merging.c (check_no_overlap): Adjust by-value argument to by-const-reference. * gimple.c (gimple_build_call_vec): Same. (gimple_build_call_internal_vec): Same. (gimple_build_switch): Same. (sort_case_labels): Same. (preprocess_case_label_vec_for_gimple): Adjust by-value argument to by-reference. * gimple.h (gimple_build_call_vec): Adjust by-value argument to by-const-reference. (gimple_build_call_internal_vec): Same. (gimple_build_switch): Same. (sort_case_labels): Same. (preprocess_case_label_vec_for_gimple): Adjust by-value argument to by-reference. * haifa-sched.c (calc_priorities): Adjust by-value argument to by-const-reference. (sched_init_luids): Same. (haifa_init_h_i_d): Same. * ipa-cp.c (ipa_get_indirect_edge_target_1): Same. (adjust_callers_for_value_intersection): Adjust by-value argument to by-reference. (find_more_scalar_values_for_callers_subset): Adjust by-value argument to by-const-reference. (find_more_contexts_for_caller_subset): Same. (find_aggregate_values_for_callers_subset): Same. (copy_useful_known_contexts): Same. * ipa-fnsummary.c (remap_edge_summaries): Same. (remap_freqcounting_predicate): Same. * ipa-inline.c (add_new_edges_to_heap): Adjust by-value argument to by-reference. * ipa-predicate.c (predicate::remap_after_inlining): Adjust by-value argument to by-const-reference. * ipa-predicate.h (predicate::remap_after_inlining): Same. * ipa-prop.c (ipa_find_agg_cst_for_param): Same. * ipa-prop.h (ipa_find_agg_cst_for_param): Same. * ira-build.c (ira_loop_tree_body_rev_postorder): Same. * read-rtl.c (add_overload_instance): Same. * rtl.h (native_decode_rtx): Same. (native_decode_vector_rtx): Same. * sched-int.h (sched_init_luids): Same. (haifa_init_h_i_d): Same. * simplify-rtx.c (native_decode_vector_rtx): Same. (native_decode_rtx): Same. * tree-call-cdce.c (gen_shrink_wrap_conditions): Same. (shrink_wrap_one_built_in_call_with_conds): Same. (shrink_wrap_conditional_dead_built_in_calls): Same. * tree-data-ref.c (create_runtime_alias_checks): Same. (compute_all_dependences): Same. * tree-data-ref.h (compute_all_dependences): Same. (create_runtime_alias_checks): Same. (index_in_loop_nest): Same. * tree-if-conv.c (mask_exists): Same. * tree-loop-distribution.c (class loop_distribution): Same. (loop_distribution::create_rdg_vertices): Same. (dump_rdg_partitions): Same. (debug_rdg_partitions): Same. (partition_contains_all_rw): Same. (loop_distribution::distribute_loop): Same. * tree-parloops.c (oacc_entry_exit_ok_1): Same. (oacc_entry_exit_single_gang): Same. * tree-ssa-loop-im.c (hoist_memory_references): Same. (loop_suitable_for_sm): Same. * tree-ssa-loop-niter.c (bound_index): Same. * tree-ssa-reassoc.c (update_ops): Same. (swap_ops_for_binary_stmt): Same. (rewrite_expr_tree): Same. (rewrite_expr_tree_parallel): Same. * tree-ssa-sccvn.c (ao_ref_init_from_vn_reference): Same. * tree-ssa-sccvn.h (ao_ref_init_from_vn_reference): Same. * tree-ssa-structalias.c (process_all_all_constraints): Same. (make_constraints_to): Same. (handle_lhs_call): Same. (find_func_aliases_for_builtin_call): Same. (sort_fieldstack): Same. (check_for_overlaps): Same. * tree-vect-loop-manip.c (vect_create_cond_for_align_checks): Same. (vect_create_cond_for_unequal_addrs): Same. (vect_create_cond_for_lower_bounds): Same. (vect_create_cond_for_alias_checks): Same. * tree-vect-slp-patterns.c (vect_validate_multiplication): Same. * tree-vect-slp.c (vect_analyze_slp_instance): Same. (vect_make_slp_decision): Same. (vect_slp_bbs): Same. (duplicate_and_interleave): Same. (vect_transform_slp_perm_load): Same. (vect_schedule_slp): Same. * tree-vectorizer.h (vect_transform_slp_perm_load): Same. (vect_schedule_slp): Same. (duplicate_and_interleave): Same. * tree.c (build_vector_from_ctor): Same. (build_vector): Same. (check_vector_cst): Same. (check_vector_cst_duplicate): Same. (check_vector_cst_fill): Same. (check_vector_cst_stepped): Same. * tree.h (build_vector_from_ctor): Same. --- gcc/c-family/c-common.c | 4 ++-- gcc/c-family/c-common.h | 2 +- gcc/c/c-tree.h | 5 +++-- gcc/c/c-typeck.c | 2 +- gcc/cfgloop.h | 2 +- gcc/cfgloopanal.c | 2 +- gcc/cgraph.h | 2 +- gcc/cgraphclones.c | 2 +- gcc/genautomata.c | 2 +- gcc/genextract.c | 2 +- gcc/genmatch.c | 19 ++++++++++--------- gcc/gimple-ssa-store-merging.c | 3 ++- gcc/gimple.c | 10 +++++----- gcc/gimple.h | 10 +++++----- gcc/haifa-sched.c | 8 ++++---- gcc/ipa-cp.c | 22 +++++++++++----------- gcc/ipa-fnsummary.c | 8 ++++---- gcc/ipa-inline.c | 2 +- gcc/ipa-predicate.c | 4 ++-- gcc/ipa-predicate.h | 2 +- gcc/ipa-prop.c | 2 +- gcc/ipa-prop.h | 2 +- gcc/ira-build.c | 2 +- gcc/read-rtl.c | 2 +- gcc/rtl.h | 4 ++-- gcc/sched-int.h | 4 ++-- gcc/simplify-rtx.c | 4 ++-- gcc/tree-call-cdce.c | 7 ++++--- gcc/tree-data-ref.c | 6 +++--- gcc/tree-data-ref.h | 9 +++++---- gcc/tree-if-conv.c | 2 +- gcc/tree-loop-distribution.c | 19 +++++++++++-------- gcc/tree-parloops.c | 5 +++-- gcc/tree-ssa-loop-im.c | 4 ++-- gcc/tree-ssa-loop-niter.c | 2 +- gcc/tree-ssa-reassoc.c | 9 +++++---- gcc/tree-ssa-sccvn.c | 10 ++++++---- gcc/tree-ssa-sccvn.h | 2 +- gcc/tree-ssa-structalias.c | 15 ++++++++------- gcc/tree-vect-loop-manip.c | 10 ++++++---- gcc/tree-vect-slp-patterns.c | 6 ++++-- gcc/tree-vect-slp.c | 14 ++++++++------ gcc/tree-vectorizer.h | 6 +++--- gcc/tree.c | 12 ++++++------ gcc/tree.h | 2 +- 45 files changed, 146 insertions(+), 127 deletions(-) (limited to 'gcc') diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c index fe3657b..aacdfb4 100644 --- a/gcc/c-family/c-common.c +++ b/gcc/c-family/c-common.c @@ -1115,8 +1115,8 @@ c_build_vec_perm_expr (location_t loc, tree v0, tree v1, tree mask, and have vector types, V0 has the same element type as V1, and the number of elements the result is that of MASK. */ tree -c_build_shufflevector (location_t loc, tree v0, tree v1, vec mask, - bool complain) +c_build_shufflevector (location_t loc, tree v0, tree v1, + const vec &mask, bool complain) { tree ret; bool wrap = true; diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h index 50ca8fb..c4b2789 100644 --- a/gcc/c-family/c-common.h +++ b/gcc/c-family/c-common.h @@ -1049,7 +1049,7 @@ extern bool vector_targets_convertible_p (const_tree t1, const_tree t2); extern bool vector_types_convertible_p (const_tree t1, const_tree t2, bool emit_lax_note); extern tree c_build_vec_perm_expr (location_t, tree, tree, tree, bool = true); extern tree c_build_shufflevector (location_t, tree, tree, - vec, bool = true); + const vec &, bool = true); extern tree c_build_vec_convert (location_t, tree, location_t, tree, bool = true); extern void init_c_lex (void); diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h index a671a3e..ab6db38 100644 --- a/gcc/c/c-tree.h +++ b/gcc/c/c-tree.h @@ -759,8 +759,9 @@ extern tree c_finish_omp_clauses (tree, enum c_omp_region_type); extern tree c_build_va_arg (location_t, tree, location_t, tree); extern tree c_finish_transaction (location_t, tree, int); extern bool c_tree_equal (tree, tree); -extern tree c_build_function_call_vec (location_t, vec, tree, - vec *, vec *); +extern tree c_build_function_call_vec (location_t, const vec&, + tree, vec *, + vec *); extern tree c_omp_clause_copy_ctor (tree, tree, tree); /* Set to 0 at beginning of a function definition, set to 1 if diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index 5349ef1..4f7ed67 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -3240,7 +3240,7 @@ build_function_call_vec (location_t loc, vec arg_loc, /* Like build_function_call_vec, but call also resolve_overloaded_builtin. */ tree -c_build_function_call_vec (location_t loc, vec arg_loc, +c_build_function_call_vec (location_t loc, const vec &arg_loc, tree function, vec *params, vec *origtypes) { diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 5e69927..5c2b98d 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -385,7 +385,7 @@ extern basic_block *get_loop_body_in_custom_order (const class loop *, void *, extern auto_vec get_loop_exit_edges (const class loop *, basic_block * = NULL); extern edge single_exit (const class loop *); -extern edge single_likely_exit (class loop *loop, vec); +extern edge single_likely_exit (class loop *loop, const vec &); extern unsigned num_loop_branches (const class loop *); extern edge loop_preheader_edge (const class loop *); diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c index 2db46c8..4cd73c2 100644 --- a/gcc/cfgloopanal.c +++ b/gcc/cfgloopanal.c @@ -470,7 +470,7 @@ mark_loop_exit_edges (void) to noreturn call. */ edge -single_likely_exit (class loop *loop, vec exits) +single_likely_exit (class loop *loop, const vec &exits) { edge found = single_exit (loop); unsigned i; diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 9f4338f..8c776d6 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -949,7 +949,7 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node /* Create callgraph node clone with new declaration. The actual body will be copied later at compilation stage. The name of the new clone will be constructed from the name of the original node, SUFFIX and NUM_SUFFIX. */ - cgraph_node *create_virtual_clone (vec redirect_callers, + cgraph_node *create_virtual_clone (const vec &redirect_callers, vec *tree_map, ipa_param_adjustments *param_adjustments, const char * suffix, unsigned num_suffix); diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c index 7e463ac..fef4a23 100644 --- a/gcc/cgraphclones.c +++ b/gcc/cgraphclones.c @@ -564,7 +564,7 @@ clone_function_name (tree decl, const char *suffix) bitmap interface. */ cgraph_node * -cgraph_node::create_virtual_clone (vec redirect_callers, +cgraph_node::create_virtual_clone (const vec &redirect_callers, vec *tree_map, ipa_param_adjustments *param_adjustments, const char * suffix, unsigned num_suffix) diff --git a/gcc/genautomata.c b/gcc/genautomata.c index 6bbfc68..e488c5f 100644 --- a/gcc/genautomata.c +++ b/gcc/genautomata.c @@ -6137,7 +6137,7 @@ evaluate_equiv_classes (automaton_t automaton, vec *equiv_classes) /* The function merges equivalent states of AUTOMATON. */ static void -merge_states (automaton_t automaton, vec equiv_classes) +merge_states (automaton_t automaton, const vec &equiv_classes) { state_t curr_state; state_t new_state; diff --git a/gcc/genextract.c b/gcc/genextract.c index 6fe4a25..3ed2f68 100644 --- a/gcc/genextract.c +++ b/gcc/genextract.c @@ -214,7 +214,7 @@ VEC_safe_set_locstr (md_rtx_info *info, vec *vp, /* Another helper subroutine of walk_rtx: given a vec, convert it to a NUL-terminated string in malloc memory. */ static char * -VEC_char_to_string (vec v) +VEC_char_to_string (const vec &v) { size_t n = v.length (); char *s = XNEWVEC (char, n + 1); diff --git a/gcc/genmatch.c b/gcc/genmatch.c index 970a2eb..9524845 100644 --- a/gcc/genmatch.c +++ b/gcc/genmatch.c @@ -1632,8 +1632,9 @@ public: void gen_kids (FILE *, int, bool, int); void gen_kids_1 (FILE *, int, bool, int, - vec, vec, vec, - vec, vec, vec); + const vec &, const vec &, + const vec &, const vec &, + const vec &, const vec &); void analyze (sinfo_map_t &); }; @@ -2983,12 +2984,12 @@ dt_node::gen_kids (FILE *f, int indent, bool gimple, int depth) void dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth, - vec gimple_exprs, - vec generic_exprs, - vec fns, - vec generic_fns, - vec preds, - vec others) + const vec &gimple_exprs, + const vec &generic_exprs, + const vec &fns, + const vec &generic_fns, + const vec &preds, + const vec &others) { char buf[128]; char *kid_opname = buf; @@ -5031,7 +5032,7 @@ parser::parse_pattern () recursively. */ static void -walk_captures (operand *op, vec > cpts) +walk_captures (operand *op, vec > &cpts) { if (! op) return; diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c index ce54c78..5d3094b 100644 --- a/gcc/gimple-ssa-store-merging.c +++ b/gcc/gimple-ssa-store-merging.c @@ -2654,7 +2654,8 @@ gather_bswap_load_refs (vec *refs, tree val) go after the = _5 store and thus change behavior. */ static bool -check_no_overlap (vec m_store_info, unsigned int i, +check_no_overlap (const vec &m_store_info, + unsigned int i, bool all_integer_cst_p, unsigned int first_order, unsigned int last_order, unsigned HOST_WIDE_INT start, unsigned HOST_WIDE_INT end, unsigned int first_earlier, diff --git a/gcc/gimple.c b/gcc/gimple.c index 863bc0d..383da98 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -241,7 +241,7 @@ gimple_build_call_1 (tree fn, unsigned nargs) specified in vector ARGS. */ gcall * -gimple_build_call_vec (tree fn, vec args) +gimple_build_call_vec (tree fn, const vec &args) { unsigned i; unsigned nargs = args.length (); @@ -338,7 +338,7 @@ gimple_build_call_internal (enum internal_fn fn, unsigned nargs, ...) specified in vector ARGS. */ gcall * -gimple_build_call_internal_vec (enum internal_fn fn, vec args) +gimple_build_call_internal_vec (enum internal_fn fn, const vec &args) { unsigned i, nargs; gcall *call; @@ -802,7 +802,7 @@ gimple_build_switch_nlabels (unsigned nlabels, tree index, tree default_label) ARGS is a vector of labels excluding the default. */ gswitch * -gimple_build_switch (tree index, tree default_label, vec args) +gimple_build_switch (tree index, tree default_label, const vec &args) { unsigned i, nlabels = args.length (); @@ -3051,7 +3051,7 @@ compare_case_labels (const void *p1, const void *p2) /* Sort the case labels in LABEL_VEC in place in ascending order. */ void -sort_case_labels (vec label_vec) +sort_case_labels (vec &label_vec) { label_vec.qsort (compare_case_labels); } @@ -3076,7 +3076,7 @@ sort_case_labels (vec label_vec) found or not. */ void -preprocess_case_label_vec_for_gimple (vec labels, +preprocess_case_label_vec_for_gimple (vec &labels, tree index_type, tree *default_casep) { diff --git a/gcc/gimple.h b/gcc/gimple.h index 29da919..31d7dd0 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -1516,11 +1516,11 @@ void gimple_init (gimple *g, enum gimple_code code, unsigned num_ops); gimple *gimple_alloc (enum gimple_code, unsigned CXX_MEM_STAT_INFO); greturn *gimple_build_return (tree); void gimple_call_reset_alias_info (gcall *); -gcall *gimple_build_call_vec (tree, vec ); +gcall *gimple_build_call_vec (tree, const vec &); gcall *gimple_build_call (tree, unsigned, ...); gcall *gimple_build_call_valist (tree, unsigned, va_list); gcall *gimple_build_call_internal (enum internal_fn, unsigned, ...); -gcall *gimple_build_call_internal_vec (enum internal_fn, vec ); +gcall *gimple_build_call_internal_vec (enum internal_fn, const vec &); gcall *gimple_build_call_from_tree (tree, tree); gassign *gimple_build_assign (tree, tree CXX_MEM_STAT_INFO); gassign *gimple_build_assign (tree, enum tree_code, @@ -1547,7 +1547,7 @@ gtry *gimple_build_try (gimple_seq, gimple_seq, gimple *gimple_build_wce (gimple_seq); gresx *gimple_build_resx (int); gswitch *gimple_build_switch_nlabels (unsigned, tree, tree); -gswitch *gimple_build_switch (tree, tree, vec ); +gswitch *gimple_build_switch (tree, tree, const vec &); geh_dispatch *gimple_build_eh_dispatch (int); gdebug *gimple_build_debug_bind (tree, tree, gimple * CXX_MEM_STAT_INFO); gdebug *gimple_build_debug_source_bind (tree, tree, gimple * CXX_MEM_STAT_INFO); @@ -1626,8 +1626,8 @@ extern bool nonbarrier_call_p (gimple *); extern bool infer_nonnull_range (gimple *, tree); extern bool infer_nonnull_range_by_dereference (gimple *, tree); extern bool infer_nonnull_range_by_attribute (gimple *, tree); -extern void sort_case_labels (vec); -extern void preprocess_case_label_vec_for_gimple (vec, tree, tree *); +extern void sort_case_labels (vec &); +extern void preprocess_case_label_vec_for_gimple (vec &, tree, tree *); extern void gimple_seq_set_location (gimple_seq, location_t); extern void gimple_seq_discard (gimple_seq); extern void maybe_remove_unused_call_args (struct function *, gimple *); diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c index 9c88765..26d1127 100644 --- a/gcc/haifa-sched.c +++ b/gcc/haifa-sched.c @@ -891,7 +891,7 @@ static void move_block_after_check (rtx_insn *); static void move_succs (vec **, basic_block); static void sched_remove_insn (rtx_insn *); static void clear_priorities (rtx_insn *, rtx_vec_t *); -static void calc_priorities (rtx_vec_t); +static void calc_priorities (const rtx_vec_t &); static void add_jump_dependencies (rtx_insn *, rtx_insn *); #endif /* INSN_SCHEDULING */ @@ -8923,7 +8923,7 @@ clear_priorities (rtx_insn *insn, rtx_vec_t *roots_ptr) changed. ROOTS is a vector of instructions whose priority computation will trigger initialization of all cleared priorities. */ static void -calc_priorities (rtx_vec_t roots) +calc_priorities (const rtx_vec_t &roots) { int i; rtx_insn *insn; @@ -8988,7 +8988,7 @@ sched_init_insn_luid (rtx_insn *insn) The hook common_sched_info->luid_for_non_insn () is used to determine if notes, labels, etc. need luids. */ void -sched_init_luids (bb_vec_t bbs) +sched_init_luids (const bb_vec_t &bbs) { int i; basic_block bb; @@ -9062,7 +9062,7 @@ init_h_i_d (rtx_insn *insn) /* Initialize haifa_insn_data for BBS. */ void -haifa_init_h_i_d (bb_vec_t bbs) +haifa_init_h_i_d (const bb_vec_t &bbs) { int i; basic_block bb; diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index 57c18af..ce28ada 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -2946,9 +2946,9 @@ propagate_constants_across_call (struct cgraph_edge *cs) static tree ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie, - vec known_csts, - vec known_contexts, - vec known_aggs, + const vec &known_csts, + const vec &known_contexts, + const vec &known_aggs, struct ipa_agg_replacement_value *agg_reps, bool *speculative) { @@ -2985,7 +2985,7 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie, } if (!t) { - struct ipa_agg_value_set *agg; + const ipa_agg_value_set *agg; if (known_aggs.length () > (unsigned int) param_index) agg = &known_aggs[param_index]; else @@ -3045,7 +3045,7 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie, if (!t && known_aggs.length () > (unsigned int) param_index && !ie->indirect_info->by_ref) { - struct ipa_agg_value_set *agg = &known_aggs[param_index]; + const ipa_agg_value_set *agg = &known_aggs[param_index]; t = ipa_find_agg_cst_for_param (agg, (unsigned) param_index < known_csts.length () @@ -4267,7 +4267,7 @@ get_info_about_necessary_edges (ipcp_value *val, cgraph_node *dest, this kind of adjustment is possible. */ static bool -adjust_callers_for_value_intersection (vec callers, +adjust_callers_for_value_intersection (vec &callers, cgraph_node *node) { for (unsigned i = 0; i < callers.length (); i++) @@ -4725,8 +4725,8 @@ self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc, static void find_more_scalar_values_for_callers_subset (struct cgraph_node *node, - vec known_csts, - vec callers) + vec &known_csts, + const vec &callers) { ipa_node_params *info = ipa_node_params_sum->get (node); int i, count = ipa_get_param_count (info); @@ -4818,7 +4818,7 @@ static void find_more_contexts_for_caller_subset (cgraph_node *node, vec *known_contexts, - vec callers) + const vec &callers) { ipa_node_params *info = ipa_node_params_sum->get (node); int i, count = ipa_get_param_count (info); @@ -5179,7 +5179,7 @@ intersect_aggregates_with_edge (struct cgraph_edge *cs, int index, static struct ipa_agg_replacement_value * find_aggregate_values_for_callers_subset (struct cgraph_node *node, - vec callers) + const vec &callers) { ipa_node_params *dest_info = ipa_node_params_sum->get (node); struct ipa_agg_replacement_value *res; @@ -5413,7 +5413,7 @@ known_contexts_useful_p (vec known_contexts) /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */ static vec -copy_useful_known_contexts (vec known_contexts) +copy_useful_known_contexts (const vec &known_contexts) { if (known_contexts_useful_p (known_contexts)) return known_contexts.copy (); diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index 95d2875..cf80ce3 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -3967,8 +3967,8 @@ remap_edge_summaries (struct cgraph_edge *inlined_edge, class ipa_fn_summary *info, class ipa_node_params *params_summary, class ipa_fn_summary *callee_info, - vec operand_map, - vec offset_map, + const vec &operand_map, + const vec &offset_map, clause_t possible_truths, predicate *toplev_predicate) { @@ -4028,8 +4028,8 @@ remap_freqcounting_predicate (class ipa_fn_summary *info, class ipa_node_params *params_summary, class ipa_fn_summary *callee_info, vec *v, - vec operand_map, - vec offset_map, + const vec &operand_map, + const vec &offset_map, clause_t possible_truths, predicate *toplev_predicate) diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 9d896be..413446b 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -1774,7 +1774,7 @@ compute_max_insns (cgraph_node *node, int insns) /* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */ static void -add_new_edges_to_heap (edge_heap_t *heap, vec new_edges) +add_new_edges_to_heap (edge_heap_t *heap, vec &new_edges) { while (new_edges.length () > 0) { diff --git a/gcc/ipa-predicate.c b/gcc/ipa-predicate.c index 6dd749b..e4b11ec 100644 --- a/gcc/ipa-predicate.c +++ b/gcc/ipa-predicate.c @@ -507,8 +507,8 @@ predicate predicate::remap_after_inlining (class ipa_fn_summary *info, class ipa_node_params *params_summary, class ipa_fn_summary *callee_info, - vec operand_map, - vec offset_map, + const vec &operand_map, + const vec &offset_map, clause_t possible_truths, const predicate &toplev_predicate) { diff --git a/gcc/ipa-predicate.h b/gcc/ipa-predicate.h index 3ed7104..ac52b54 100644 --- a/gcc/ipa-predicate.h +++ b/gcc/ipa-predicate.h @@ -243,7 +243,7 @@ public: predicate remap_after_inlining (class ipa_fn_summary *, class ipa_node_params *params_summary, class ipa_fn_summary *, - vec, vec, + const vec &, const vec &, clause_t, const predicate &); void stream_in (class lto_input_block *); diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index f74d2e1..43f46a5 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -3562,7 +3562,7 @@ ipa_find_agg_cst_from_init (tree scalar, HOST_WIDE_INT offset, bool by_ref) initializer of a constant. */ tree -ipa_find_agg_cst_for_param (struct ipa_agg_value_set *agg, tree scalar, +ipa_find_agg_cst_for_param (const ipa_agg_value_set *agg, tree scalar, HOST_WIDE_INT offset, bool by_ref, bool *from_global_constant) { diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h index 3d28a6e..2fe220a 100644 --- a/gcc/ipa-prop.h +++ b/gcc/ipa-prop.h @@ -1092,7 +1092,7 @@ ipa_bits *ipa_get_ipa_bits_for_value (const widest_int &value, void ipa_analyze_node (struct cgraph_node *); /* Aggregate jump function related functions. */ -tree ipa_find_agg_cst_for_param (struct ipa_agg_value_set *agg, tree scalar, +tree ipa_find_agg_cst_for_param (const ipa_agg_value_set *agg, tree scalar, HOST_WIDE_INT offset, bool by_ref, bool *from_global_constant = NULL); bool ipa_load_from_parm_agg (struct ipa_func_body_info *fbi, diff --git a/gcc/ira-build.c b/gcc/ira-build.c index 4031ce1..4212065 100644 --- a/gcc/ira-build.c +++ b/gcc/ira-build.c @@ -1672,7 +1672,7 @@ finish_cost_vectors (void) static vec ira_loop_tree_body_rev_postorder (ira_loop_tree_node_t loop_node ATTRIBUTE_UNUSED, - vec loop_preorder) + const vec &loop_preorder) { vec topsort_nodes = vNULL; unsigned int n_loop_preorder; diff --git a/gcc/read-rtl.c b/gcc/read-rtl.c index 9254028..0411666 100644 --- a/gcc/read-rtl.c +++ b/gcc/read-rtl.c @@ -835,7 +835,7 @@ md_reader::handle_overloaded_name (rtx original, vec *iterators) gives the iterator associated with argument I of ONAME. */ static void -add_overload_instance (overloaded_name *oname, vec iterators, rtx x) +add_overload_instance (overloaded_name *oname, const vec &iterators, rtx x) { /* Create the instance. */ overloaded_instance *instance = new overloaded_instance; diff --git a/gcc/rtl.h b/gcc/rtl.h index 2dbc433..5ffe900 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -2416,9 +2416,9 @@ extern void get_full_rtx_cost (rtx, machine_mode, enum rtx_code, int, struct full_rtx_costs *); extern bool native_encode_rtx (machine_mode, rtx, vec &, unsigned int, unsigned int); -extern rtx native_decode_rtx (machine_mode, vec, +extern rtx native_decode_rtx (machine_mode, const vec &, unsigned int); -extern rtx native_decode_vector_rtx (machine_mode, vec, +extern rtx native_decode_vector_rtx (machine_mode, const vec &, unsigned int, unsigned int, unsigned int); extern poly_uint64 subreg_lsb (const_rtx); extern poly_uint64 subreg_size_lsb (poly_uint64, poly_uint64, poly_uint64); diff --git a/gcc/sched-int.h b/gcc/sched-int.h index 4727ab2..868f1eb 100644 --- a/gcc/sched-int.h +++ b/gcc/sched-int.h @@ -43,12 +43,12 @@ extern void sched_init_bbs (void); extern void sched_extend_luids (void); extern void sched_init_insn_luid (rtx_insn *); -extern void sched_init_luids (bb_vec_t); +extern void sched_init_luids (const bb_vec_t &); extern void sched_finish_luids (void); extern void sched_extend_target (void); -extern void haifa_init_h_i_d (bb_vec_t); +extern void haifa_init_h_i_d (const bb_vec_t &); extern void haifa_finish_h_i_d (void); /* Hooks that are common to all the schedulers. */ diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 2d169d3..fd306bf 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -6752,7 +6752,7 @@ native_encode_rtx (machine_mode mode, rtx x, vec &bytes, Return the vector on success, otherwise return NULL_RTX. */ rtx -native_decode_vector_rtx (machine_mode mode, vec bytes, +native_decode_vector_rtx (machine_mode mode, const vec &bytes, unsigned int first_byte, unsigned int npatterns, unsigned int nelts_per_pattern) { @@ -6797,7 +6797,7 @@ native_decode_vector_rtx (machine_mode mode, vec bytes, Return the rtx on success, otherwise return NULL_RTX. */ rtx -native_decode_rtx (machine_mode mode, vec bytes, +native_decode_rtx (machine_mode mode, const vec &bytes, unsigned int first_byte) { if (VECTOR_MODE_P (mode)) diff --git a/gcc/tree-call-cdce.c b/gcc/tree-call-cdce.c index 6668397..d9b9b4c 100644 --- a/gcc/tree-call-cdce.c +++ b/gcc/tree-call-cdce.c @@ -761,7 +761,7 @@ get_no_error_domain (enum built_in_function fnc) condition are separated by NULL tree in the vector. */ static void -gen_shrink_wrap_conditions (gcall *bi_call, vec conds, +gen_shrink_wrap_conditions (gcall *bi_call, const vec &conds, unsigned int *nconds) { gcall *call; @@ -797,7 +797,8 @@ gen_shrink_wrap_conditions (gcall *bi_call, vec conds, when it is non-null, it is called while all of the CONDS are true. */ static void -shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, vec conds, +shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, + const vec &conds, unsigned int nconds, gcall *bi_newcall = NULL) { @@ -1132,7 +1133,7 @@ use_internal_fn (gcall *call) wrapping transformation. */ static void -shrink_wrap_conditional_dead_built_in_calls (vec calls) +shrink_wrap_conditional_dead_built_in_calls (const vec &calls) { unsigned i = 0; diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index b6abd8b..210ac28 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -2643,7 +2643,7 @@ create_intersect_range_checks (class loop *loop, tree *cond_expr, void create_runtime_alias_checks (class loop *loop, - vec *alias_pairs, + const vec *alias_pairs, tree * cond_expr) { tree part_cond_expr; @@ -5635,9 +5635,9 @@ compute_affine_dependence (struct data_dependence_relation *ddr, is small enough to be handled. */ bool -compute_all_dependences (vec datarefs, +compute_all_dependences (const vec &datarefs, vec *dependence_relations, - vec loop_nest, + const vec &loop_nest, bool compute_self_and_rr) { struct data_dependence_relation *ddr; diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index 8001cc5..a0ff2a8 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -551,9 +551,9 @@ extern struct data_dependence_relation *initialize_data_dependence_relation extern void compute_affine_dependence (struct data_dependence_relation *, loop_p); extern void compute_self_dependence (struct data_dependence_relation *); -extern bool compute_all_dependences (vec , +extern bool compute_all_dependences (const vec &, vec *, - vec, bool); + const vec &, bool); extern tree find_data_references_in_bb (class loop *, basic_block, vec *); extern unsigned int dr_alignment (innermost_loop_behavior *); @@ -578,7 +578,8 @@ extern int data_ref_compare_tree (tree, tree); extern void prune_runtime_alias_test_list (vec *, poly_uint64); extern void create_runtime_alias_checks (class loop *, - vec *, tree*); + const vec *, + tree*); extern tree dr_direction_indicator (struct data_reference *); extern tree dr_zero_step_indicator (struct data_reference *); extern bool dr_known_forward_stride_p (struct data_reference *); @@ -666,7 +667,7 @@ ddr_dependence_level (ddr_p ddr) /* Return the index of the variable VAR in the LOOP_NEST array. */ static inline int -index_in_loop_nest (int var, vec loop_nest) +index_in_loop_nest (int var, const vec &loop_nest) { class loop *loopi; int var_index; diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index 345488e..49e89cf 100644 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -2208,7 +2208,7 @@ insert_gimplified_predicates (loop_p loop) mask if it was created for given SIZE and -1 otherwise. */ static int -mask_exists (int size, vec vec) +mask_exists (int size, const vec &vec) { unsigned int ix; int v; diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 65aa1df..a984d21 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -527,7 +527,8 @@ class loop_distribution /* Build the vertices of the reduced dependence graph RDG. Return false if that failed. */ - bool create_rdg_vertices (struct graph *rdg, vec stmts, loop_p loop); + bool create_rdg_vertices (struct graph *rdg, const vec &stmts, + loop_p loop); /* Initialize STMTS with all the statements of LOOP. We use topological order to discover all statements. The order is important because @@ -646,7 +647,7 @@ class loop_distribution statements from STMTS into separate loops. Returns the number of distributed loops. Set NB_CALLS to number of generated builtin calls. Set *DESTROY_P to whether LOOP needs to be destroyed. */ - int distribute_loop (class loop *loop, vec stmts, + int distribute_loop (class loop *loop, const vec &stmts, control_dependences *cd, int *nb_calls, bool *destroy_p, bool only_patterns_p); @@ -699,7 +700,8 @@ bb_top_order_cmp_r (const void *x, const void *y, void *loop) } bool -loop_distribution::create_rdg_vertices (struct graph *rdg, vec stmts, +loop_distribution::create_rdg_vertices (struct graph *rdg, + const vec &stmts, loop_p loop) { int i; @@ -1953,7 +1955,7 @@ loop_distribution::rdg_build_partitions (struct graph *rdg, /* Dump to FILE the PARTITIONS. */ static void -dump_rdg_partitions (FILE *file, vec partitions) +dump_rdg_partitions (FILE *file, const vec &partitions) { int i; partition *partition; @@ -1963,10 +1965,10 @@ dump_rdg_partitions (FILE *file, vec partitions) } /* Debug PARTITIONS. */ -extern void debug_rdg_partitions (vec ); +extern void debug_rdg_partitions (const vec &); DEBUG_FUNCTION void -debug_rdg_partitions (vec partitions) +debug_rdg_partitions (const vec &partitions) { dump_rdg_partitions (stderr, partitions); } @@ -2017,7 +2019,7 @@ number_of_rw_in_partition (struct graph *rdg, partition *partition) static bool partition_contains_all_rw (struct graph *rdg, - vec partitions) + const vec &partitions) { int i; partition *partition; @@ -2921,7 +2923,8 @@ loop_distribution::finalize_partitions (class loop *loop, Set *DESTROY_P to whether LOOP needs to be destroyed. */ int -loop_distribution::distribute_loop (class loop *loop, vec stmts, +loop_distribution::distribute_loop (class loop *loop, + const vec &stmts, control_dependences *cd, int *nb_calls, bool *destroy_p, bool only_patterns_p) { diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index fe1baef..bb54757 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -3713,7 +3713,7 @@ ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref, reduction results in REDUCTION_STORES. */ static bool -oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec region_bbs, +oacc_entry_exit_ok_1 (bitmap in_loop_bbs, const vec ®ion_bbs, reduction_info_table_type *reduction_list, bitmap reduction_stores) { @@ -3828,7 +3828,8 @@ oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec region_bbs, if any changes were made. */ static bool -oacc_entry_exit_single_gang (bitmap in_loop_bbs, vec region_bbs, +oacc_entry_exit_single_gang (bitmap in_loop_bbs, + const vec ®ion_bbs, bitmap reduction_stores) { tree gang_pos = NULL_TREE; diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 81b4ec2..dfb3984 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -2557,7 +2557,7 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, static void hoist_memory_references (class loop *loop, bitmap mem_refs, - vec exits) + const vec &exits) { im_mem_ref *ref; unsigned i; @@ -2970,7 +2970,7 @@ find_refs_for_sm (class loop *loop, bitmap sm_executed, bitmap refs_to_sm) static bool loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED, - vec exits) + const vec &exits) { unsigned i; edge ex; diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index 6fabf10..1b5605c 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -3929,7 +3929,7 @@ wide_int_cmp (const void *p1, const void *p2) Lookup by binary search. */ static int -bound_index (vec bounds, const widest_int &bound) +bound_index (const vec &bounds, const widest_int &bound) { unsigned int end = bounds.length (); unsigned int begin = 0; diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c index 2dd4435..8498cfc 100644 --- a/gcc/tree-ssa-reassoc.c +++ b/gcc/tree-ssa-reassoc.c @@ -4486,7 +4486,7 @@ get_ops (tree var, enum tree_code code, vec *ops, stmts. */ static tree -update_ops (tree var, enum tree_code code, vec ops, +update_ops (tree var, enum tree_code code, const vec &ops, unsigned int *pidx, class loop *loop) { gimple *stmt = SSA_NAME_DEF_STMT (var); @@ -5033,7 +5033,7 @@ remove_visited_stmt_chain (tree var) cases, but it is unlikely to be worth it. */ static void -swap_ops_for_binary_stmt (vec ops, +swap_ops_for_binary_stmt (const vec &ops, unsigned int opindex, gimple *stmt) { operand_entry *oe1, *oe2, *oe3; @@ -5104,7 +5104,8 @@ insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert) static tree rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex, - vec ops, bool changed, bool next_changed) + const vec &ops, bool changed, + bool next_changed) { tree rhs1 = gimple_assign_rhs1 (stmt); tree rhs2 = gimple_assign_rhs2 (stmt); @@ -5326,7 +5327,7 @@ get_reassociation_width (int ops_num, enum tree_code opc, static void rewrite_expr_tree_parallel (gassign *stmt, int width, - vec ops) + const vec &ops) { enum tree_code opcode = gimple_assign_rhs_code (stmt); int op_num = ops.length (); diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c index a174c31..f6f900c 100644 --- a/gcc/tree-ssa-sccvn.c +++ b/gcc/tree-ssa-sccvn.c @@ -1042,9 +1042,8 @@ copy_reference_ops_from_ref (tree ref, vec *result) bool ao_ref_init_from_vn_reference (ao_ref *ref, alias_set_type set, alias_set_type base_set, - tree type, vec ops) + tree type, const vec &ops) { - vn_reference_op_t op; unsigned i; tree base = NULL_TREE; tree *op0_p = &base; @@ -1067,7 +1066,10 @@ ao_ref_init_from_vn_reference (ao_ref *ref, size = wi::to_poly_offset (size_tree); /* Lower the final access size from the outermost expression. */ - op = &ops[0]; + const_vn_reference_op_t cst_op = &ops[0]; + /* Cast away constness for the sake of the const-unsafe + FOR_EACH_VEC_ELT(). */ + vn_reference_op_t op = const_cast(cst_op); size_tree = NULL_TREE; if (op->opcode == COMPONENT_REF) size_tree = DECL_SIZE (op->op0); @@ -1098,7 +1100,7 @@ ao_ref_init_from_vn_reference (ao_ref *ref, && op->op0 && DECL_P (TREE_OPERAND (op->op0, 0))) { - vn_reference_op_t pop = &ops[i-1]; + const_vn_reference_op_t pop = &ops[i-1]; base = TREE_OPERAND (op->op0, 0); if (known_eq (pop->off, -1)) { diff --git a/gcc/tree-ssa-sccvn.h b/gcc/tree-ssa-sccvn.h index 6df526c..9610059 100644 --- a/gcc/tree-ssa-sccvn.h +++ b/gcc/tree-ssa-sccvn.h @@ -254,7 +254,7 @@ tree vn_nary_op_lookup_pieces (unsigned int, enum tree_code, vn_nary_op_t vn_nary_op_insert_pieces (unsigned int, enum tree_code, tree, tree *, tree, unsigned int); bool ao_ref_init_from_vn_reference (ao_ref *, alias_set_type, alias_set_type, - tree, vec ); + tree, const vec &); vec vn_reference_operands_for_lookup (tree); tree vn_reference_lookup_pieces (tree, alias_set_type, alias_set_type, tree, vec , diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index 71894b3..c694926 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -3713,8 +3713,8 @@ get_constraint_for_rhs (tree t, vec *results) entries in *LHSC. */ static void -process_all_all_constraints (vec lhsc, - vec rhsc) +process_all_all_constraints (const vec &lhsc, + const vec &rhsc) { struct constraint_expr *lhsp, *rhsp; unsigned i, j; @@ -3814,7 +3814,7 @@ do_structure_copy (tree lhsop, tree rhsop) /* Create constraints ID = { rhsc }. */ static void -make_constraints_to (unsigned id, vec rhsc) +make_constraints_to (unsigned id, const vec &rhsc) { struct constraint_expr *c; struct constraint_expr includes; @@ -4162,7 +4162,7 @@ handle_rhs_call (gcall *stmt, vec *results) the LHS point to global and escaped variables. */ static void -handle_lhs_call (gcall *stmt, tree lhs, int flags, vec rhsc, +handle_lhs_call (gcall *stmt, tree lhs, int flags, vec &rhsc, tree fndecl) { auto_vec lhsc; @@ -4623,9 +4623,10 @@ find_func_aliases_for_builtin_call (struct function *fn, gcall *t) case BUILT_IN_REALLOC: if (gimple_call_lhs (t)) { + auto_vec rhsc; handle_lhs_call (t, gimple_call_lhs (t), gimple_call_return_flags (t) | ERF_NOALIAS, - vNULL, fndecl); + rhsc, fndecl); get_constraint_for_ptr_offset (gimple_call_lhs (t), NULL_TREE, &lhsc); get_constraint_for_ptr_offset (gimple_call_arg (t, 0), @@ -5696,7 +5697,7 @@ fieldoff_compare (const void *pa, const void *pb) /* Sort a fieldstack according to the field offset and sizes. */ static void -sort_fieldstack (vec fieldstack) +sort_fieldstack (vec &fieldstack) { fieldstack.qsort (fieldoff_compare); } @@ -6106,7 +6107,7 @@ create_function_info_for (tree decl, const char *name, bool add_id, FIELDSTACK is assumed to be sorted by offset. */ static bool -check_for_overlaps (vec fieldstack) +check_for_overlaps (const vec &fieldstack) { fieldoff_s *fo = NULL; unsigned int i; diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 1f3d661..4988c93 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -3192,7 +3192,7 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, tree *cond_expr, gimple_seq *cond_expr_stmt_list) { - vec may_misalign_stmts + const vec &may_misalign_stmts = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); stmt_vec_info stmt_info; int mask = LOOP_VINFO_PTR_MASK (loop_vinfo); @@ -3283,7 +3283,8 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, static void vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr) { - vec pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo); + const vec &pairs + = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo); unsigned int i; vec_object_pair *pair; FOR_EACH_VEC_ELT (pairs, i, pair) @@ -3302,7 +3303,8 @@ vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr) static void vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr) { - vec lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo); + const vec &lower_bounds + = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo); for (unsigned int i = 0; i < lower_bounds.length (); ++i) { tree expr = lower_bounds[i].expr; @@ -3344,7 +3346,7 @@ vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr) void vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr) { - vec comp_alias_ddrs = + const vec &comp_alias_ddrs = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo); if (comp_alias_ddrs.is_empty ()) diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c index f774cac..ad209de 100644 --- a/gcc/tree-vect-slp-patterns.c +++ b/gcc/tree-vect-slp-patterns.c @@ -790,7 +790,8 @@ is_eq_or_top (complex_perm_kinds_t perm, complex_perm_kinds_t kind) static inline bool vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache, - vec left_op, vec right_op, + const vec &left_op, + const vec &right_op, bool neg_first, bool *conj_first_operand, bool fms) { @@ -862,7 +863,8 @@ vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache, static inline bool vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache, - vec op, complex_perm_kinds_t permKind) + const vec &op, + complex_perm_kinds_t permKind) { /* The left node is the more common case, test it first. */ if (!is_eq_or_top (linear_loads_p (perm_cache, op[0]), permKind)) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 97fba6a..b9d88c2 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3354,7 +3354,8 @@ vect_analyze_slp_instance (vec_info *vinfo, else if (kind == slp_inst_kind_reduc_group) { /* Collect reduction statements. */ - vec reductions = as_a (vinfo)->reductions; + const vec &reductions + = as_a (vinfo)->reductions; scalar_stmts.create (reductions.length ()); for (i = 0; reductions.iterate (i, &next_info); i++) if (STMT_VINFO_RELEVANT_P (next_info) @@ -4172,7 +4173,8 @@ vect_make_slp_decision (loop_vec_info loop_vinfo) { unsigned int i; poly_uint64 unrolling_factor = 1; - vec slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); + const vec &slp_instances + = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); slp_instance instance; int decided_to_slp = 0; @@ -5939,7 +5941,7 @@ vect_slp_region (vec bbs, vec datarefs, true if anything in the basic-block was vectorized. */ static bool -vect_slp_bbs (vec bbs) +vect_slp_bbs (const vec &bbs) { vec datarefs = vNULL; auto_vec dataref_groups; @@ -6084,7 +6086,7 @@ vect_slp_function (function *fun) void duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type, - vec elts, unsigned int nresults, + const vec &elts, unsigned int nresults, vec &results) { unsigned int nelts = elts.length (); @@ -6440,7 +6442,7 @@ vect_get_slp_defs (vec_info *, bool vect_transform_slp_perm_load (vec_info *vinfo, - slp_tree node, vec dr_chain, + slp_tree node, const vec &dr_chain, gimple_stmt_iterator *gsi, poly_uint64 vf, bool analyze_only, unsigned *n_perms, unsigned int *n_loads, bool dce_chain) @@ -7469,7 +7471,7 @@ vect_schedule_scc (vec_info *vinfo, slp_tree node, slp_instance instance, /* Generate vector code for SLP_INSTANCES in the loop/basic block. */ void -vect_schedule_slp (vec_info *vinfo, vec slp_instances) +vect_schedule_slp (vec_info *vinfo, const vec &slp_instances) { slp_instance instance; unsigned int i; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index d9f0195..deb2247 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2072,12 +2072,12 @@ extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree); extern void vect_slp_init (void); extern void vect_slp_fini (void); extern void vect_free_slp_instance (slp_instance); -extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, vec, +extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, const vec &, gimple_stmt_iterator *, poly_uint64, bool, unsigned *, unsigned * = nullptr, bool = false); extern bool vect_slp_analyze_operations (vec_info *); -extern void vect_schedule_slp (vec_info *, vec); +extern void vect_schedule_slp (vec_info *, const vec &); extern opt_result vect_analyze_slp (vec_info *, unsigned); extern bool vect_make_slp_decision (loop_vec_info); extern void vect_detect_hybrid_slp (loop_vec_info); @@ -2095,7 +2095,7 @@ extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree, unsigned int * = NULL, tree * = NULL, tree * = NULL); extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree, - vec, unsigned int, vec &); + const vec &, unsigned int, vec &); extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info); extern bool vect_update_shared_vectype (stmt_vec_info, tree); extern slp_tree vect_create_new_slp_node (unsigned, tree_code); diff --git a/gcc/tree.c b/gcc/tree.c index 1aa6e55..bead1ac 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -2047,7 +2047,7 @@ make_vector (unsigned log2_npatterns, are extracted from V, a vector of CONSTRUCTOR_ELT. */ tree -build_vector_from_ctor (tree type, vec *v) +build_vector_from_ctor (tree type, const vec *v) { if (vec_safe_length (v) == 0) return build_zero_cst (type); @@ -14428,7 +14428,7 @@ test_labels () are given by VALS. */ static tree -build_vector (tree type, vec vals MEM_STAT_DECL) +build_vector (tree type, const vec &vals MEM_STAT_DECL) { gcc_assert (known_eq (vals.length (), TYPE_VECTOR_SUBPARTS (type))); tree_vector_builder builder (type, vals.length (), 1); @@ -14439,7 +14439,7 @@ build_vector (tree type, vec vals MEM_STAT_DECL) /* Check that VECTOR_CST ACTUAL contains the elements in EXPECTED. */ static void -check_vector_cst (vec expected, tree actual) +check_vector_cst (const vec &expected, tree actual) { ASSERT_KNOWN_EQ (expected.length (), TYPE_VECTOR_SUBPARTS (TREE_TYPE (actual))); @@ -14452,7 +14452,7 @@ check_vector_cst (vec expected, tree actual) and that its elements match EXPECTED. */ static void -check_vector_cst_duplicate (vec expected, tree actual, +check_vector_cst_duplicate (const vec &expected, tree actual, unsigned int npatterns) { ASSERT_EQ (npatterns, VECTOR_CST_NPATTERNS (actual)); @@ -14468,7 +14468,7 @@ check_vector_cst_duplicate (vec expected, tree actual, EXPECTED. */ static void -check_vector_cst_fill (vec expected, tree actual, +check_vector_cst_fill (const vec &expected, tree actual, unsigned int npatterns) { ASSERT_EQ (npatterns, VECTOR_CST_NPATTERNS (actual)); @@ -14483,7 +14483,7 @@ check_vector_cst_fill (vec expected, tree actual, and that its elements match EXPECTED. */ static void -check_vector_cst_stepped (vec expected, tree actual, +check_vector_cst_stepped (const vec &expected, tree actual, unsigned int npatterns) { ASSERT_EQ (npatterns, VECTOR_CST_NPATTERNS (actual)); diff --git a/gcc/tree.h b/gcc/tree.h index 8bdf16d..13c088c 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -4475,7 +4475,7 @@ extern tree build_int_cst (tree, poly_int64); extern tree build_int_cstu (tree type, poly_uint64); extern tree build_int_cst_type (tree, poly_int64); extern tree make_vector (unsigned, unsigned CXX_MEM_STAT_INFO); -extern tree build_vector_from_ctor (tree, vec *); +extern tree build_vector_from_ctor (tree, const vec *); extern tree build_vector_from_val (tree, tree); extern tree build_uniform_cst (tree, tree); extern tree build_vec_series (tree, tree, tree); -- cgit v1.1 From 818203714e8640ce29c886b5060c91b12ad3a7c4 Mon Sep 17 00:00:00 2001 From: Jeff Law Date: Tue, 20 Jul 2021 14:49:54 -0400 Subject: Attach MEM_EXPR information when flushing BLKmode args to the stack - V2 gcc/ * function.c (assign_parm_setup_block): Use adjust_address instead of change_address to preserve MEM_EXPR and friends. --- gcc/function.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/function.c b/gcc/function.c index 00b2fe7..af3d57b 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -3036,7 +3036,15 @@ assign_parm_setup_block (struct assign_parm_data_all *all, reg = gen_rtx_REG (word_mode, REGNO (entry_parm)); reg = convert_to_mode (mode, copy_to_reg (reg), 1); } - emit_move_insn (change_address (mem, mode, 0), reg); + + /* We use adjust_address to get a new MEM with the mode + changed. adjust_address is better than change_address + for this purpose because adjust_address does not lose + the MEM_EXPR associated with the MEM. + + If the MEM_EXPR is lost, then optimizations like DSE + assume the MEM escapes and thus is not subject to DSE. */ + emit_move_insn (adjust_address (mem, mode, 0), reg); } #ifdef BLOCK_REG_PADDING -- cgit v1.1 From e07d30fdcaec4906e0dcb948fc4748bf74c15c05 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Tue, 20 Jul 2021 13:08:39 -0600 Subject: Handle all UBSAN built-ins in -Wuninitialized [PR101300]. Resolves: PR middle-end/101300 - -fsanitize=undefined suppresses -Wuninitialized for a VLA read at -O0 gcc/ChangeLog: PR middle-end/101300 * tree-ssa-uninit.c (check_defs): Handle UBSAN built-ins. gcc/testsuite/ChangeLog: PR middle-end/101300 * gcc.dg/uninit-pr101300.c: New test. --- gcc/testsuite/gcc.dg/uninit-pr101300.c | 53 ++++++++++++++++++++++++++++++++++ gcc/tree-ssa-uninit.c | 23 +++++++++++++-- 2 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/uninit-pr101300.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/uninit-pr101300.c b/gcc/testsuite/gcc.dg/uninit-pr101300.c new file mode 100644 index 0000000..4392e8ba --- /dev/null +++ b/gcc/testsuite/gcc.dg/uninit-pr101300.c @@ -0,0 +1,53 @@ +/* PR middle-end/101300 - -fsanitize=undefined suppresses -Wuninitialized + for a VLA read at -O0 + { dg-do compile } + { dg-options "-O0 -Wall -fsanitize=undefined" } */ + +int warn_vla_rd0 (int n) +{ + char a[n]; + return a[0]; // { dg-warning "\\\[-Wuninitialized]" } +} + +int warn_vla_rd1 (int n) +{ + char a[n]; + return a[1]; // { dg-warning "\\\[-Wuninitialized]" } +} + +int warn_vla_rdi (int n, int i) +{ + char a[n]; + return a[i]; // { dg-warning "\\\[-Wuninitialized]" } +} + + +int warn_vla_wr0_rd2_1_0 (int n) +{ + char a[n]; + a[0] = __LINE__; + int x = a[2]; // { dg-warning "\\\[-Wuninitialized]" } + int y = a[1]; // { dg-warning "\\\[-Wuninitialized]" } + int z = a[0]; + return x + y + z; +} + +int warn_vla_wr1_rd2_1_0 (int n) +{ + char a[n]; + a[1] = __LINE__; + int x = a[2]; // { dg-warning "\\\[-Wuninitialized]" } + int y = a[1]; + int z = a[0]; // { dg-warning "\\\[-Wuninitialized]" } + return x + y + z; +} + +int warn_vla_wr2_rd2_1_0 (int n) +{ + char a[n]; + a[2] = __LINE__; + int x = a[2]; + int y = a[1]; // { dg-warning "\\\[-Wuninitialized]" } + int z = a[0]; // { dg-warning "\\\[-Wuninitialized]" } + return x + y + z; +} diff --git a/gcc/tree-ssa-uninit.c b/gcc/tree-ssa-uninit.c index 24ac031..148f3c2 100644 --- a/gcc/tree-ssa-uninit.c +++ b/gcc/tree-ssa-uninit.c @@ -228,9 +228,26 @@ check_defs (ao_ref *ref, tree vdef, void *data_) gimple *def_stmt = SSA_NAME_DEF_STMT (vdef); /* The ASAN_MARK intrinsic doesn't modify the variable. */ - if (is_gimple_call (def_stmt) - && gimple_call_internal_p (def_stmt, IFN_ASAN_MARK)) - return false; + if (is_gimple_call (def_stmt)) + { + if (gimple_call_internal_p (def_stmt) + && gimple_call_internal_fn (def_stmt) == IFN_ASAN_MARK) + return false; + + if (tree fndecl = gimple_call_fndecl (def_stmt)) + { + /* Some sanitizer calls pass integer arguments to built-ins + that expect pointers. Avoid using gimple_call_builtin_p() + which fails for such calls. */ + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) + { + built_in_function fncode = DECL_FUNCTION_CODE (fndecl); + if (fncode > BEGIN_SANITIZER_BUILTINS + && fncode < END_SANITIZER_BUILTINS) + return false; + } + } + } /* End of VLA scope is not a kill. */ if (gimple_call_builtin_p (def_stmt, BUILT_IN_STACK_RESTORE)) -- cgit v1.1 From 8bf5b49ebd2176b8c535147377381dd07fbdd643 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Tue, 20 Jul 2021 13:48:20 -0600 Subject: Correct stpcpy offset computation for -Warray-bounds et al. [PR101397]. Resolves: PR middle-end/101397 - spurious warning writing to the result of stpcpy minus 1 gcc/ChangeLog: PR middle-end/101397 * builtins.c (gimple_call_return_array): Add argument. Correct offsets for memchr, mempcpy, stpcpy, and stpncpy. (compute_objsize_r): Adjust offset computation for argument returning built-ins. gcc/testsuite/ChangeLog: PR middle-end/101397 * gcc.dg/Warray-bounds-80.c: New test. * gcc.dg/Warray-bounds-81.c: New test. * gcc.dg/Warray-bounds-82.c: New test. * gcc.dg/Warray-bounds-83.c: New test. * gcc.dg/Warray-bounds-84.c: New test. * gcc.dg/Wstringop-overflow-46.c: Adjust expected output. --- gcc/builtins.c | 103 +++++++-- gcc/testsuite/gcc.dg/Warray-bounds-80.c | 96 +++++++++ gcc/testsuite/gcc.dg/Warray-bounds-81.c | 302 +++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/Warray-bounds-82.c | 258 +++++++++++++++++++++++ gcc/testsuite/gcc.dg/Warray-bounds-83.c | 172 +++++++++++++++ gcc/testsuite/gcc.dg/Warray-bounds-84.c | 65 ++++++ gcc/testsuite/gcc.dg/Wstringop-overflow-46.c | 5 +- 7 files changed, 981 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-80.c create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-81.c create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-82.c create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-83.c create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-84.c (limited to 'gcc') diff --git a/gcc/builtins.c b/gcc/builtins.c index 39ab139..170d776 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -5200,12 +5200,19 @@ get_offset_range (tree x, gimple *stmt, offset_int r[2], range_query *rvals) /* Return the argument that the call STMT to a built-in function returns or null if it doesn't. On success, set OFFRNG[] to the range of offsets from the argument reflected in the value returned by the built-in if it - can be determined, otherwise to 0 and HWI_M1U respectively. */ + can be determined, otherwise to 0 and HWI_M1U respectively. Set + *PAST_END for functions like mempcpy that might return a past the end + pointer (most functions return a dereferenceable pointer to an existing + element of an array). */ static tree -gimple_call_return_array (gimple *stmt, offset_int offrng[2], +gimple_call_return_array (gimple *stmt, offset_int offrng[2], bool *past_end, range_query *rvals) { + /* Clear and set below for the rare function(s) that might return + a past-the-end pointer. */ + *past_end = false; + { /* Check for attribute fn spec to see if the function returns one of its arguments. */ @@ -5213,6 +5220,7 @@ gimple_call_return_array (gimple *stmt, offset_int offrng[2], unsigned int argno; if (fnspec.returns_arg (&argno)) { + /* Functions return the first argument (not a range). */ offrng[0] = offrng[1] = 0; return gimple_call_arg (stmt, argno); } @@ -5242,6 +5250,7 @@ gimple_call_return_array (gimple *stmt, offset_int offrng[2], if (gimple_call_num_args (stmt) != 2) return NULL_TREE; + /* Allocation functions return a pointer to the beginning. */ offrng[0] = offrng[1] = 0; return gimple_call_arg (stmt, 1); } @@ -5253,10 +5262,6 @@ gimple_call_return_array (gimple *stmt, offset_int offrng[2], case BUILT_IN_MEMMOVE: case BUILT_IN_MEMMOVE_CHK: case BUILT_IN_MEMSET: - case BUILT_IN_STPCPY: - case BUILT_IN_STPCPY_CHK: - case BUILT_IN_STPNCPY: - case BUILT_IN_STPNCPY_CHK: case BUILT_IN_STRCAT: case BUILT_IN_STRCAT_CHK: case BUILT_IN_STRCPY: @@ -5265,18 +5270,34 @@ gimple_call_return_array (gimple *stmt, offset_int offrng[2], case BUILT_IN_STRNCAT_CHK: case BUILT_IN_STRNCPY: case BUILT_IN_STRNCPY_CHK: + /* Functions return the first argument (not a range). */ offrng[0] = offrng[1] = 0; return gimple_call_arg (stmt, 0); case BUILT_IN_MEMPCPY: case BUILT_IN_MEMPCPY_CHK: { + /* The returned pointer is in a range constrained by the smaller + of the upper bound of the size argument and the source object + size. */ + offrng[0] = 0; + offrng[1] = HOST_WIDE_INT_M1U; tree off = gimple_call_arg (stmt, 2); - if (!get_offset_range (off, stmt, offrng, rvals)) + bool off_valid = get_offset_range (off, stmt, offrng, rvals); + if (!off_valid || offrng[0] != offrng[1]) { - offrng[0] = 0; - offrng[1] = HOST_WIDE_INT_M1U; + /* If the offset is either indeterminate or in some range, + try to constrain its upper bound to at most the size + of the source object. */ + access_ref aref; + tree src = gimple_call_arg (stmt, 1); + if (compute_objsize (src, 1, &aref, rvals) + && aref.sizrng[1] < offrng[1]) + offrng[1] = aref.sizrng[1]; } + + /* Mempcpy may return a past-the-end pointer. */ + *past_end = true; return gimple_call_arg (stmt, 0); } @@ -5284,23 +5305,63 @@ gimple_call_return_array (gimple *stmt, offset_int offrng[2], { tree off = gimple_call_arg (stmt, 2); if (get_offset_range (off, stmt, offrng, rvals)) - offrng[0] = 0; + offrng[1] -= 1; else - { - offrng[0] = 0; - offrng[1] = HOST_WIDE_INT_M1U; - } + offrng[1] = HOST_WIDE_INT_M1U; + + offrng[0] = 0; return gimple_call_arg (stmt, 0); } case BUILT_IN_STRCHR: case BUILT_IN_STRRCHR: case BUILT_IN_STRSTR: + offrng[0] = 0; + offrng[1] = HOST_WIDE_INT_M1U; + return gimple_call_arg (stmt, 0); + + case BUILT_IN_STPCPY: + case BUILT_IN_STPCPY_CHK: { + access_ref aref; + tree src = gimple_call_arg (stmt, 1); + if (compute_objsize (src, 1, &aref, rvals)) + offrng[1] = aref.sizrng[1] - 1; + else + offrng[1] = HOST_WIDE_INT_M1U; + offrng[0] = 0; + return gimple_call_arg (stmt, 0); + } + + case BUILT_IN_STPNCPY: + case BUILT_IN_STPNCPY_CHK: + { + /* The returned pointer is in a range between the first argument + and it plus the smaller of the upper bound of the size argument + and the source object size. */ offrng[1] = HOST_WIDE_INT_M1U; + tree off = gimple_call_arg (stmt, 2); + if (!get_offset_range (off, stmt, offrng, rvals) + || offrng[0] != offrng[1]) + { + /* If the offset is either indeterminate or in some range, + try to constrain its upper bound to at most the size + of the source object. */ + access_ref aref; + tree src = gimple_call_arg (stmt, 1); + if (compute_objsize (src, 1, &aref, rvals) + && aref.sizrng[1] < offrng[1]) + offrng[1] = aref.sizrng[1]; + } + + /* When the source is the empty string the returned pointer is + a copy of the argument. Otherwise stpcpy can also return + a past-the-end pointer. */ + offrng[0] = 0; + *past_end = true; + return gimple_call_arg (stmt, 0); } - return gimple_call_arg (stmt, 0); default: break; @@ -5753,9 +5814,12 @@ compute_objsize_r (tree ptr, int ostype, access_ref *pref, /* For functions known to return one of their pointer arguments try to determine what the returned pointer points to, and on success add OFFRNG which was set to the offset added by - the function (e.g., memchr) to the overall offset. */ + the function (e.g., memchr or stpcpy) to the overall offset. + */ + bool past_end; offset_int offrng[2]; - if (tree ret = gimple_call_return_array (stmt, offrng, rvals)) + if (tree ret = gimple_call_return_array (stmt, offrng, + &past_end, rvals)) { if (!compute_objsize_r (ret, ostype, pref, snlim, qry)) return false; @@ -5764,6 +5828,11 @@ compute_objsize_r (tree ptr, int ostype, access_ref *pref, the object. */ offset_int remrng[2]; remrng[1] = pref->size_remaining (remrng); + if (remrng[1] != 0 && !past_end) + /* Decrement the size for functions that never return + a past-the-end pointer. */ + remrng[1] -= 1; + if (remrng[1] < offrng[1]) offrng[1] = remrng[1]; pref->add_offset (offrng[0], offrng[1]); diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-80.c b/gcc/testsuite/gcc.dg/Warray-bounds-80.c new file mode 100644 index 0000000..4ef32fb --- /dev/null +++ b/gcc/testsuite/gcc.dg/Warray-bounds-80.c @@ -0,0 +1,96 @@ +/* PR tree-optimization/101397 - spurious warning writing to the result + of stpcpy minus 1 + { dg-do compile } + { dg-options "-O2 -Wall" } */ + +char* stpcpy (char*, const char*); + +void sink (int, ...); + +extern char ax[], a3[3], a5[5], *s; + +volatile int x; + +void test_stpcpy (int i) +{ + { + char *p = stpcpy (ax, s); + x = p[-9]; // { dg-bogus "\\\[-Warray-bounds" } + x = p[-1]; // { dg-bogus "\\\[-Warray-bounds" } + x = p[ 0]; + x = p[+9]; + } + + { + char *p = stpcpy (a3, s); + x = p[-2]; // { dg-bogus "\\\[-Warray-bounds" } + x = p[-1]; // { dg-bogus "\\\[-Warray-bounds" } + } + + { + char *p = stpcpy (a3, s); + x = p[-3]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-2], p[-1], p[0], p[1], p[2]); + x = p[ 3]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + /* Stpcpy always returns a pointer to the copied nul (which must + exist) and never a past-the-end pointer. As a result, P below + is in [a5, a5 + 4]. */ + char *p = stpcpy (a5, s); + x = p[-5]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4]); + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = stpcpy (a5 + 1, s); + x = p[-5]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3]); + x = p[ 4]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = stpcpy (a5 + 2, s); + x = p[-5]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2]); + x = p[ 3]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = stpcpy (a5 + 3, s); + x = p[-5]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1]); + x = p[ 2]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + /* Because strlen (a3) is at most 2, the stpcpy call must return + a pointer in the range [ax, ax + 2], and so -3 is necessarily + out of bounds. */ + char *p = stpcpy (ax, a3); + p[-3] = 1; // { dg-warning "\\\[-Warray-bounds" } + } + + { + if (i >= 0) + i = -1; + + char *p = stpcpy (a3, s); + x = p[i]; // { dg-bogus "\\\[-Warray-bounds" } + } + + { + if (i >= -3) + i = -3; + + char *p = stpcpy (a3, s); + p[i] = 1; // { dg-warning "\\\[-Warray-bounds" } + } + +} diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-81.c b/gcc/testsuite/gcc.dg/Warray-bounds-81.c new file mode 100644 index 0000000..27e725d --- /dev/null +++ b/gcc/testsuite/gcc.dg/Warray-bounds-81.c @@ -0,0 +1,302 @@ +/* PR tree-optimization/101397 - spurious warning writing to the result + of stpcpy minus 1 + Verify warnings for indexing into a pointer returned from stpncpy. + The call stpncpy(S1, S2, N) returns the address of the copy of + the first NUL is it exists or &S1[N] otherwise. + { dg-do compile } + { dg-options "-O2 -Wall -Wno-stringop-truncation" } */ + +typedef __SIZE_TYPE__ size_t; + +void* calloc (size_t, size_t); +char* stpncpy (char*, const char*, size_t); + +void sink (int, ...); + +extern char ax[], a3[3], a5[5], a7[7], a9[9], *s; + +volatile int x; + +/* Verify warnings for indexing into the result of stpncpy with a source + pointing to an array of unknown bound. */ + +void test_stpncpy_from_ptr (int i, int n) +{ + { + // P is in [ax, ax + 5]. + char *p = stpncpy (ax, s, 5); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-5], p[-1], p[0], p[9]); + } + + { + // P is in [a5, a5 + 3]. + char *p = stpncpy (a5, s, 3); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-3], p[-2], p[-1], p[0]); + sink (p[ 1], p[ 2], p[ 3], p[4]); + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + // P is in [ax, ax + 4]. + char *p = stpncpy (a5, s, 4); + x = p[-5]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[ 1], p[ 2], p[ 3], p[4]); + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + // P is in [ax, ax + 5]. + char *p = stpncpy (a5, s, n); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-5], p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[ 1], p[ 2], p[ 3], p[4]); + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + // P is in [ax, ax + 4]. + char *p = stpncpy (a5, s, 4); + + if (i > -1) i = -1; + x = p[i]; + + if (i > -2) i = -2; + x = p[i]; + + if (i > -3) i = -3; + x = p[i]; + + if (i > -4) i = -4; + x = p[i]; + + if (i > -5) i = -5; + x = p[i]; // { dg-warning "\\\[-Warray-bounds" } + } +} + +/* Verify warnings for indexing into the result of stpncpy with a source + an array of size 5. */ + +void test_stpncpy_from_a5 (int i, int n, int n3_9) +{ + { + // The returned pointer is in [ax, ax + 3]. + char *p = stpncpy (ax, a5, 3); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-3], p[-2], p[-1], p[0], p[1], p[99]); + } + + { + // The returned pointer is in [ax, ax + 5]. + char *p = stpncpy (ax, a5, 5); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-5]; + x = p[-1]; + x = p[ 0]; + x = p[ 9]; + } + + { + //The returned pointer is in [ax, ax + 5] even though n is not known. + char *p = stpncpy (ax, a5, n); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-5], p[-4], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[9], p[99]); + } + + + { + // The returned pointer is in [a3, a3 + 3]. + char *p = stpncpy (a3, a5, 3); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-3], p[-2], p[-1], p[0]); + sink (p[ 1], p[ 2]); + x = p[ 3]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + // The returned pointer is in [a3, a3 + 3]. + char *p = stpncpy (a3, a5, n); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-3], p[-2], p[-1], p[0]); + sink (p[ 1], p[ 2]); + x = p[ 3]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + if (n3_9 < 3 || 9 < n3_9) + n3_9 = 3; + + // The returned pointer is in [a3, a3 + 3]. + char *p = stpncpy (a3, a5, n3_9); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-3], p[-2], p[-1], p[0]); + sink (p[ 1], p[ 2]); + x = p[ 3]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = stpncpy (a3, a5, 3); + + if (i > -1) i = -1; + x = p[i]; + + if (i > -2) i = -2; + x = p[i]; + + if (i > -3) i = -3; + x = p[i]; + + if (i > -4) i = -4; + x = p[i]; // { dg-warning "\\\[-Warray-bounds" } + } +} + + +/* Verify warnings for indexing into the result of stpncpy with a source + an array of size 7. */ + +void test_stpncpy_from_a7 (int i, int n, int n3_9) +{ + { + // The returned pointer is ax + 5. + char *p = stpncpy (ax, a7, 5); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-5]; + x = p[-1]; + x = p[ 0]; + x = p[ 9]; + } + + { + //The returned pointer is in [ax, ax + 7] even though n is not known. + char *p = stpncpy (ax, a7, n); + x = p[-8]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-7], p[-6], p[-5], p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9]); + } + + + { + // The returned pointer is in [a5, a5 + 3]. + char *p = stpncpy (a5, a7, 3); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4]); + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + // The returned pointer is a5 + 4. + char *p = stpncpy (a5, a7, 4); + x = p[-5]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4]); + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + // The returned pointer is in [a5, a5 + 5]. + char *p = stpncpy (a5, a7, n); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-5], p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4]); + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + if (n3_9 < 3 || 9 < n3_9) + n3_9 = 3; + + // The returned pointer is in [a5, a5 + 5]. + char *p = stpncpy (a5, a7, n3_9); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-5], p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4]); + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = stpncpy (a5, a7, 4); + + if (i > -1) i = -1; + x = p[i]; + + if (i > -2) i = -2; + x = p[i]; + + if (i > -3) i = -3; + x = p[i]; + + if (i > -4) i = -4; + x = p[i]; + + if (i > -5) i = -5; + x = p[i]; // { dg-warning "\\\[-Warray-bounds" } + } +} + + +void test_stpncpy_from_a5_to_allocated (int i, int n, int n5_7, int n3_9) +{ + if (n5_7 < 5 || 7 < n5_7) + n5_7 = 5; + + { + char *d = calloc (n5_7, 1); + char *p = stpncpy (d, s, n); + x = p[-8]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-7], p[-6], p[-5], p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6]); + x = p[7]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *d = calloc (n5_7, 1); + char *p = stpncpy (d, a3, n); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6]); + x = p[7]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *d = calloc (n5_7, 1); + char *p = stpncpy (d, a5, n); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-5], p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6]); + x = p[7]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *d = calloc (n5_7, 1); + char *p = stpncpy (d, a9, n); + x = p[-8]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-7], p[-6], p[-5], p[-3], p[-4], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6]); + x = p[7]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *d = calloc (n5_7, 1); + char *p = stpncpy (d, a3, n3_9); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6]); + x = p[7]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *d = calloc (n5_7, 1); + char *p = stpncpy (d, a9, n3_9); + x = p[-8]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-7], p[-6], p[-5], p[-4], p[-4], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6]); + x = p[7]; // { dg-warning "\\\[-Warray-bounds" } + } + +} diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-82.c b/gcc/testsuite/gcc.dg/Warray-bounds-82.c new file mode 100644 index 0000000..b5dd919 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Warray-bounds-82.c @@ -0,0 +1,258 @@ +/* PR tree-optimization/101397 - spurious warning writing to the result + of stpcpy minus 1 + Verify warnings for indexing into a pointer returned from mempcpy. + The call mempcpy(S1, S2, N) returns &S1[N]. + { dg-do compile } + { dg-options "-O2 -Wall" } */ + +typedef __SIZE_TYPE__ size_t; + +void* mempcpy (void*, const void*, size_t); + +extern char ax[], a3[3], a5[5], a7[7], *s; + +volatile int x; + +/* Verify warnings for indexing into the result of mempcpy with a source + pointing to an array of unknown bound. */ + +void test_mempcpy_from_ptr (int i) +{ + { + char *p = mempcpy (ax, s, 5); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-5]; + x = p[-1]; + x = p[ 0]; + x = p[ 9]; + } + + { + char *p = mempcpy (a5, s, 3); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-3]; + x = p[-2]; + x = p[-1]; + x = p[ 0]; + x = p[ 1]; + x = p[ 2]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = mempcpy (a5, s, 4); + x = p[-5]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-4]; + x = p[-3]; + x = p[-2]; + x = p[-1]; + x = p[ 0]; + x = p[ 1]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = mempcpy (a5, s, 4); + + if (i > -1) i = -1; + x = p[i]; + + if (i > -2) i = -2; + x = p[i]; + + if (i > -3) i = -3; + x = p[i]; + + if (i > -4) i = -4; + x = p[i]; + + if (i > -5) i = -5; + x = p[i]; // { dg-warning "\\\[-Warray-bounds" } + } +} + +/* Verify warnings for indexing into the result of mempcpy with a source + an array of size 5. */ + +void test_mempcpy_from_a5 (int i, int n, int n3_9) +{ + { + // The returned pointer is ax + 3 as specified by the bound. + char *p = mempcpy (ax, a5, 3); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-3]; + x = p[-2]; + x = p[ 0]; + x = p[ 1]; + x = p[ 2]; + } + + { + // The returned pointer is ax + 5. + char *p = mempcpy (ax, a5, 5); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-5]; + x = p[-1]; + x = p[ 0]; + x = p[ 9]; + } + + { + //The returned pointer is in [ax, ax + 5] even though n is not known. + char *p = mempcpy (ax, a5, n); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-5]; + x = p[-1]; + x = p[ 0]; + x = p[ 9]; + } + + + { + // The returned pointer is a3 + 3. + char *p = mempcpy (a3, a5, 3); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-3]; + x = p[-1]; + x = p[ 0]; // { dg-warning "\\\[-Warray-bounds" } + x = p[ 1]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + // The returned pointer is in [a3, a3 + 3]. + char *p = mempcpy (a3, a5, n); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-3]; + x = p[-2]; + x = p[-1]; + x = p[ 0]; + x = p[ 2]; + x = p[ 3]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + if (n3_9 < 3 || 9 < n3_9) + n3_9 = 3; + + // The returned pointer is a3. + char *p = mempcpy (a3, a5, n3_9); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-3]; + x = p[-2]; + x = p[-1]; + x = p[ 0]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = mempcpy (a3, a5, 3); + + if (i > -1) i = -1; + x = p[i]; + + if (i > -2) i = -2; + x = p[i]; + + if (i > -3) i = -3; + x = p[i]; + + if (i > -4) i = -4; + x = p[i]; // { dg-warning "\\\[-Warray-bounds" } + } +} + + +/* Verify warnings for indexing into the result of mempcpy with a source + an array of size 7. */ + +void test_mempcpy_from_a7 (int i, int n, int n3_9) +{ + { + // The returned pointer is ax + 5. + char *p = mempcpy (ax, a7, 5); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-5]; + x = p[-1]; + x = p[ 0]; + x = p[ 9]; + } + + { + //The returned pointer is in [ax, ax + 7] even though n is not known. + char *p = mempcpy (ax, a7, n); + x = p[-8]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-7]; + x = p[-1]; + x = p[ 0]; + x = p[ 9]; + } + + + { + // The returned pointer is a5 + 3 as specified by the bound. + char *p = mempcpy (a5, a7, 3); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-3]; + x = p[-2]; + x = p[ 0]; + x = p[ 1]; + x = p[ 2]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + // The returned pointer is a5 + 4. + char *p = mempcpy (a5, a7, 4); + x = p[-5]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-4]; + x = p[-3]; + x = p[-2]; + x = p[-1]; + x = p[ 0]; + x = p[ 1]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + // The returned pointer is in [a5, a5 + 5]. + char *p = mempcpy (a5, a7, n); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-5]; + x = p[-3]; + x = p[-2]; + x = p[-1]; + x = p[ 0]; + x = p[ 4]; + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + if (n3_9 < 3 || 9 < n3_9) + n3_9 = 3; + + // The returned pointer is in [a5 + 3, a5 + 5]. + char *p = mempcpy (a5, a7, n3_9); + x = p[-6]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-5]; + x = p[-3]; + x = p[-2]; + x = p[-1]; + x = p[ 0]; + x = p[ 1]; + x = p[ 2]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = mempcpy (a5, a7, 4); + + if (i > -1) i = -1; + x = p[i]; + + if (i > -2) i = -2; + x = p[i]; + + if (i > -3) i = -3; + x = p[i]; + + if (i > -4) i = -4; + x = p[i]; + + if (i > -5) i = -5; + x = p[i]; // { dg-warning "\\\[-Warray-bounds" } + } +} diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-83.c b/gcc/testsuite/gcc.dg/Warray-bounds-83.c new file mode 100644 index 0000000..b1d02ea --- /dev/null +++ b/gcc/testsuite/gcc.dg/Warray-bounds-83.c @@ -0,0 +1,172 @@ +/* PR tree-optimization/101397 - spurious warning writing to the result + of stpcpy minus 1 + Verify warnings for indexing into a pointer returned from stpncpy. + The call stpncpy(S1, S2, N) returns the address of the copy of + the first NUL is it exists or &S1[N] otherwise. + { dg-do compile } + { dg-options "-O2 -Wall -Wno-stringop-truncation" } */ + +typedef __SIZE_TYPE__ size_t; + +__attribute__ ((alloc_size (1))) const void* alloc (size_t); + +void* memchr (const void*, int, size_t); + +void sink (int, ...); + +extern char ax[], a3[3], a5[5], a7[7], a9[9]; + +volatile int x; + +/* Verify warnings for indexing into the result of memchr. */ + +void test_memchr (int i, int n, int n3_5, int n3_9) +{ + { + /* Because memchr never returns a past-the-end pointer the result + below is in [ax, ax + 4]. */ + const char *p = memchr (ax, x, 5); + x = p[-5]; // { dg-warning "\\\[-Warray-bounds" } + x = p[-4]; + x = p[-1]; + x = p[ 0]; + x = p[ 9]; + } + + { + // The returned pointer is in [ax, ax + n]. + const char *p = memchr (ax, x, n); + sink (p[-99], p[-6], p[-5], p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[99]); + } + + + { + // The returned pointer is in [a5, a5 + 2]. + const char *p = memchr (a5, x, 3); + x = p[-3]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4]); + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + // The returned pointer is a5 + 4. + const char *p = memchr (a5, x, 4); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4]); + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + // The returned pointer is in [a5, a5 + 4]. + const char *p = memchr (a5, x, n); + x = p[-5]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4]); + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + if (n3_5 < 3 || 5 < n3_5) + n3_5 = 3; + + // The returned pointer is in [a7, a7 + 4]. + const char *p = memchr (a7, x, n3_5); + x = p[-5]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6]); + x = p[7]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + if (n3_9 < 3 || 9 < n3_9) + n3_9 = 3; + + // The returned pointer is in [a5, a5 + 4]. + const char *p = memchr (a5, x, n3_9); + x = p[-5]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4]); + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + const char *p = memchr (a5, x, 4); + + if (i > -1) i = -1; + x = p[i]; + + if (i > -2) i = -2; + x = p[i]; + + if (i > -3) i = -3; + x = p[i]; + + if (i > -4) i = -4; + x = p[i]; // { dg-warning "\\\[-Warray-bounds" } + } +} + + +void test_memchr_in_allocated (int i, int n, int n5_7, int n3_9) +{ + if (n5_7 < 5 || 7 < n5_7) + n5_7 = 5; + + { + const char *s = alloc (n5_7); + const char *p = memchr (s, x, n); + x = p[-7]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-6], p[-6], p[-5], p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6]); + x = p[7]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + const char *s = alloc (n5_7); + const char *p = memchr (s, x, n); + x = p[-7]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-6], p[-5], p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6]); + x = p[7]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + const char *s = alloc (n5_7); + const char *p = memchr (s, x, n); + x = p[-7]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-6], p[-5], p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6]); + x = p[7]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + const char *s = alloc (n5_7); + const char *p = memchr (s, x, n); + x = p[-7]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-6], p[-5], p[-3], p[-4], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6]); + x = p[7]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + const char *s = alloc (n5_7); + const char *p = memchr (s, x, n3_9); + x = p[-7]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-6], p[-5], p[-4], p[-3], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6]); + x = p[7]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + const char *s = alloc (n5_7); + const char *p = memchr (s, x, n3_9); + x = p[-7]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-6], p[-5], p[-4], p[-4], p[-2], p[-1], p[0]); + sink (p[1], p[2], p[3], p[4], p[5], p[6]); + x = p[7]; // { dg-warning "\\\[-Warray-bounds" } + } + +} diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-84.c b/gcc/testsuite/gcc.dg/Warray-bounds-84.c new file mode 100644 index 0000000..b9350d7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Warray-bounds-84.c @@ -0,0 +1,65 @@ +/* PR tree-optimization/101397 - spurious warning writing to the result + of stpcpy minus 1 + { dg-do compile } + { dg-options "-O2 -Wall" } */ + +char* strcpy (char*, const char*); + +void sink (int, ...); + +extern char ax[], a3[3], a5[5], *s; + +volatile int x; + +void test_strcpy (int i) +{ + { + char *p = strcpy (ax, s); + x = p[-1]; // { dg-warning "\\\[-Warray-bounds" } + x = p[ 0]; + x = p[+9]; + } + + { + char *p = strcpy (a3, s); + x = p[-1]; // { dg-warning "\\\[-Warray-bounds" } + x = p[0]; + x = p[1]; + x = p[2]; + x = p[3]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = strcpy (a5, s); + x = p[-1]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[0], p[1], p[2], p[3], p[4]); + x = p[ 5]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = strcpy (a5 + 1, s); + x = p[-2]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-1], p[0], p[1], p[2], p[3]); + x = p[4]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = strcpy (a5 + 2, s); + x = p[-3]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-2], p[-1], p[0], p[1], p[2]); + x = p[3]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = strcpy (a5 + 3, s); + x = p[-4]; // { dg-warning "\\\[-Warray-bounds" } + sink (p[-3], p[-2], p[-1], p[0], p[1]); + x = p[2]; // { dg-warning "\\\[-Warray-bounds" } + } + + { + char *p = strcpy (ax, a3); + p[-1] = 1; // { dg-warning "\\\[-Warray-bounds" } + sink (p[0], p[1], p[2], p[9], p[99]); + } +} diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-46.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-46.c index b126fcb..042c967 100644 --- a/gcc/testsuite/gcc.dg/Wstringop-overflow-46.c +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-46.c @@ -79,9 +79,8 @@ void warn_memchr_var_memset_range (const void *s, unsigned n) as in the first two notes. The exact value probably isn't too important. */ char *p0 = malloc (UR (5, 7)); - // { dg-message ": destination object of size \\\[5, 7]" "note 1" { target *-*-* } .-1 } - // { dg-message "at offset \\\[1, 7] into destination object of size \\\[5, 7]" "note 2" { target *-*-* } .-2 } - // { dg-message "at offset \\\[2, 7] into destination object of size \\\[5, 7]" "note 3" { target *-*-* } .-3 } + // { dg-message "at offset \\\[\[01\], 6] into destination object of size \\\[5, 7]" "note 2" { target *-*-* } .-1 } + // { dg-message "at offset \\\[2, 7] into destination object of size \\\[5, 7]" "note 3" { target *-*-* } .-2 } sink (p0); char *p1 = memchr (p0, '1', n); -- cgit v1.1 From 92d4550991de7e0970a38939422b31e9dc07dd11 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Wed, 21 Jul 2021 00:16:54 +0000 Subject: Daily bump. --- gcc/ChangeLog | 248 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/c-family/ChangeLog | 6 ++ gcc/c/ChangeLog | 6 ++ gcc/testsuite/ChangeLog | 54 +++++++++++ 5 files changed, 315 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0d931bf..5755379 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,251 @@ +2021-07-20 Martin Sebor + + PR middle-end/101397 + * builtins.c (gimple_call_return_array): Add argument. Correct + offsets for memchr, mempcpy, stpcpy, and stpncpy. + (compute_objsize_r): Adjust offset computation for argument returning + built-ins. + +2021-07-20 Martin Sebor + + PR middle-end/101300 + * tree-ssa-uninit.c (check_defs): Handle UBSAN built-ins. + +2021-07-20 Jeff Law + + * function.c (assign_parm_setup_block): Use adjust_address instead + of change_address to preserve MEM_EXPR and friends. + +2021-07-20 Martin Sebor + + * cfgloop.h (single_likely_exit): Adjust by-value argument to + by-const-reference. + * cfgloopanal.c (single_likely_exit): Same. + * cgraph.h (struct cgraph_node): Same. + * cgraphclones.c (cgraph_node::create_virtual_clone): Same. + * genautomata.c (merge_states): Same. + * genextract.c (VEC_char_to_string): Same. + * genmatch.c (dt_node::gen_kids_1): Same. + (walk_captures): Adjust by-value argument to by-reference. + * gimple-ssa-store-merging.c (check_no_overlap): Adjust by-value argument + to by-const-reference. + * gimple.c (gimple_build_call_vec): Same. + (gimple_build_call_internal_vec): Same. + (gimple_build_switch): Same. + (sort_case_labels): Same. + (preprocess_case_label_vec_for_gimple): Adjust by-value argument to + by-reference. + * gimple.h (gimple_build_call_vec): Adjust by-value argument to + by-const-reference. + (gimple_build_call_internal_vec): Same. + (gimple_build_switch): Same. + (sort_case_labels): Same. + (preprocess_case_label_vec_for_gimple): Adjust by-value argument to + by-reference. + * haifa-sched.c (calc_priorities): Adjust by-value argument to + by-const-reference. + (sched_init_luids): Same. + (haifa_init_h_i_d): Same. + * ipa-cp.c (ipa_get_indirect_edge_target_1): Same. + (adjust_callers_for_value_intersection): Adjust by-value argument to + by-reference. + (find_more_scalar_values_for_callers_subset): Adjust by-value argument to + by-const-reference. + (find_more_contexts_for_caller_subset): Same. + (find_aggregate_values_for_callers_subset): Same. + (copy_useful_known_contexts): Same. + * ipa-fnsummary.c (remap_edge_summaries): Same. + (remap_freqcounting_predicate): Same. + * ipa-inline.c (add_new_edges_to_heap): Adjust by-value argument to + by-reference. + * ipa-predicate.c (predicate::remap_after_inlining): Adjust by-value argument + to by-const-reference. + * ipa-predicate.h (predicate::remap_after_inlining): Same. + * ipa-prop.c (ipa_find_agg_cst_for_param): Same. + * ipa-prop.h (ipa_find_agg_cst_for_param): Same. + * ira-build.c (ira_loop_tree_body_rev_postorder): Same. + * read-rtl.c (add_overload_instance): Same. + * rtl.h (native_decode_rtx): Same. + (native_decode_vector_rtx): Same. + * sched-int.h (sched_init_luids): Same. + (haifa_init_h_i_d): Same. + * simplify-rtx.c (native_decode_vector_rtx): Same. + (native_decode_rtx): Same. + * tree-call-cdce.c (gen_shrink_wrap_conditions): Same. + (shrink_wrap_one_built_in_call_with_conds): Same. + (shrink_wrap_conditional_dead_built_in_calls): Same. + * tree-data-ref.c (create_runtime_alias_checks): Same. + (compute_all_dependences): Same. + * tree-data-ref.h (compute_all_dependences): Same. + (create_runtime_alias_checks): Same. + (index_in_loop_nest): Same. + * tree-if-conv.c (mask_exists): Same. + * tree-loop-distribution.c (class loop_distribution): Same. + (loop_distribution::create_rdg_vertices): Same. + (dump_rdg_partitions): Same. + (debug_rdg_partitions): Same. + (partition_contains_all_rw): Same. + (loop_distribution::distribute_loop): Same. + * tree-parloops.c (oacc_entry_exit_ok_1): Same. + (oacc_entry_exit_single_gang): Same. + * tree-ssa-loop-im.c (hoist_memory_references): Same. + (loop_suitable_for_sm): Same. + * tree-ssa-loop-niter.c (bound_index): Same. + * tree-ssa-reassoc.c (update_ops): Same. + (swap_ops_for_binary_stmt): Same. + (rewrite_expr_tree): Same. + (rewrite_expr_tree_parallel): Same. + * tree-ssa-sccvn.c (ao_ref_init_from_vn_reference): Same. + * tree-ssa-sccvn.h (ao_ref_init_from_vn_reference): Same. + * tree-ssa-structalias.c (process_all_all_constraints): Same. + (make_constraints_to): Same. + (handle_lhs_call): Same. + (find_func_aliases_for_builtin_call): Same. + (sort_fieldstack): Same. + (check_for_overlaps): Same. + * tree-vect-loop-manip.c (vect_create_cond_for_align_checks): Same. + (vect_create_cond_for_unequal_addrs): Same. + (vect_create_cond_for_lower_bounds): Same. + (vect_create_cond_for_alias_checks): Same. + * tree-vect-slp-patterns.c (vect_validate_multiplication): Same. + * tree-vect-slp.c (vect_analyze_slp_instance): Same. + (vect_make_slp_decision): Same. + (vect_slp_bbs): Same. + (duplicate_and_interleave): Same. + (vect_transform_slp_perm_load): Same. + (vect_schedule_slp): Same. + * tree-vectorizer.h (vect_transform_slp_perm_load): Same. + (vect_schedule_slp): Same. + (duplicate_and_interleave): Same. + * tree.c (build_vector_from_ctor): Same. + (build_vector): Same. + (check_vector_cst): Same. + (check_vector_cst_duplicate): Same. + (check_vector_cst_fill): Same. + (check_vector_cst_stepped): Same. + * tree.h (build_vector_from_ctor): Same. + +2021-07-20 Jakub Jelinek + + PR target/101384 + * config/rs6000/rs6000-protos.h (easy_altivec_constant): Change return + type from bool to int. + * config/rs6000/rs6000.c (vspltis_constant): Fix up handling the + EASY_VECTOR_MSB case if either step or copies is not 1. + (vspltis_shifted): Fix comment typo. + (easy_altivec_constant): Change return type from bool to int, instead + of returning true return byte size of the element mode that should be + used to synthetize the constant. + * config/rs6000/predicates.md (easy_vector_constant_msb): Require + that vspltis_shifted is 0, handle the case where easy_altivec_constant + assumes using different vector mode from CONST_VECTOR's mode. + * config/rs6000/altivec.md (easy_vector_constant_msb splitter): Use + easy_altivec_constant to determine mode in which -1 >> -1 should be + performed, use rs6000_expand_vector_init instead of gen_vec_initv4sisi. + +2021-07-20 Richard Biener + + PR debug/101473 + * dwarf2out.h (dwarf_file_data): Add key member. + * dwarf2out.c (dwarf_file_hasher::equal): Compare key. + (dwarf_file_hasher::hash): Hash key. + (lookup_filename): Remap the filename and store it in the + filename member of dwarf_file_data when creating a new + dwarf_file_data. + (file_name_acquire): Do not remap the filename again. + (maybe_emit_file): Likewise. + +2021-07-20 Jonathan Wright + + * config/aarch64/aarch64-simd-builtins.def: Use two variant + generators for all TBL/TBX intrinsics and rename to + consistent forms: qtbl[1234] or qtbx[1234]. + * config/aarch64/aarch64-simd.md (aarch64_tbl1): + Rename to... + (aarch64_qtbl1): This. + (aarch64_tbx1): Rename to... + (aarch64_qtbx1): This. + (aarch64_tbl2v16qi): Delete. + (aarch64_tbl3): Rename to... + (aarch64_qtbl2): This. + (aarch64_tbx4): Rename to... + (aarch64_qtbx2): This. + * config/aarch64/aarch64.c (aarch64_expand_vec_perm_1): Use + renamed qtbl1 and qtbl2 RTL patterns. + * config/aarch64/arm_neon.h (vqtbl1_p8): Use renamed qtbl1 + RTL pattern. + (vqtbl1_s8): Likewise. + (vqtbl1_u8): Likewise. + (vqtbl1q_p8): Likewise. + (vqtbl1q_s8): Likewise. + (vqtbl1q_u8): Likewise. + (vqtbx1_s8): Use renamed qtbx1 RTL pattern. + (vqtbx1_u8): Likewise. + (vqtbx1_p8): Likewise. + (vqtbx1q_s8): Likewise. + (vqtbx1q_u8): Likewise. + (vqtbx1q_p8): Likewise. + (vtbl1_s8): Use renamed qtbl1 RTL pattern. + (vtbl1_u8): Likewise. + (vtbl1_p8): Likewise. + (vtbl2_s8): Likewise + (vtbl2_u8): Likewise. + (vtbl2_p8): Likewise. + (vtbl3_s8): Use renamed qtbl2 RTL pattern. + (vtbl3_u8): Likewise. + (vtbl3_p8): Likewise. + (vtbl4_s8): Likewise. + (vtbl4_u8): Likewise. + (vtbl4_p8): Likewise. + (vtbx2_s8): Use renamed qtbx2 RTL pattern. + (vtbx2_u8): Likewise. + (vtbx2_p8): Likewise. + (vqtbl2_s8): Use renamed qtbl2 RTL pattern. + (vqtbl2_u8): Likewise. + (vqtbl2_p8): Likewise. + (vqtbl2q_s8): Likewise. + (vqtbl2q_u8): Likewise. + (vqtbl2q_p8): Likewise. + (vqtbx2_s8): Use renamed qtbx2 RTL pattern. + (vqtbx2_u8): Likewise. + (vqtbx2_p8): Likewise. + (vqtbx2q_s8): Likewise. + (vqtbx2q_u8): Likewise. + (vqtbx2q_p8): Likewise. + (vtbx4_s8): Likewise. + (vtbx4_u8): Likewise. + (vtbx4_p8): Likewise. + +2021-07-20 Uroš Bizjak + + PR target/100182 + * config/i386/sync.md (define_peephole2 atomic_storedi_fpu): + Remove. + (define_peephole2 atomic_loaddi_fpu): Ditto. + +2021-07-20 Kito Cheng + + * config.gcc (riscv*-*-*): Detect which python is available. + +2021-07-20 Kewen Lin + + * config/rs6000/vsx.md (mulhs_): Rename to... + (smul3_highpart): ... this. + (mulhu_): Rename to... + (umul3_highpart): ... this. + * config/rs6000/rs6000-builtin.def (MULHS_V2DI, MULHS_V4SI, + MULHU_V2DI, MULHU_V4SI): Adjust. + +2021-07-20 Kewen Lin + + PR tree-optimization/100696 + * internal-fn.c (first_commutative_argument): Add info for IFN_MULH. + * internal-fn.def (IFN_MULH): New internal function. + * tree-vect-patterns.c (vect_recog_mulhs_pattern): Add support to + recog normal multiply highpart as IFN_MULH. + * config/i386/i386.c (ix86_add_stmt_cost): Adjust for combined + function CFN_MULH. + 2021-07-19 Indu Bhagat * config/elfos.h (CTF_DEBUGGING_INFO): New definition. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 4d9def3..4bc9d90 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210720 +20210721 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index a0dea4c..87d658a 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,9 @@ +2021-07-20 Martin Sebor + + * c-common.c (c_build_shufflevector): Adjust by-value argument to + by-const-reference. + * c-common.h (c_build_shufflevector): Same. + 2021-07-16 Andrew Pinski PR c/101453 diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index a190806..9cd1dd6 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,9 @@ +2021-07-20 Martin Sebor + + * c-tree.h (c_build_function_call_vec): Adjust by-value argument to + by-const-reference. + * c-typeck.c (c_build_function_call_vec): Same. + 2021-07-15 Martin Sebor PR c/101289 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 501f5ab..2a1f24b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,57 @@ +2021-07-20 Martin Sebor + + PR middle-end/101397 + * gcc.dg/Warray-bounds-80.c: New test. + * gcc.dg/Warray-bounds-81.c: New test. + * gcc.dg/Warray-bounds-82.c: New test. + * gcc.dg/Warray-bounds-83.c: New test. + * gcc.dg/Warray-bounds-84.c: New test. + * gcc.dg/Wstringop-overflow-46.c: Adjust expected output. + +2021-07-20 Martin Sebor + + PR middle-end/101300 + * gcc.dg/uninit-pr101300.c: New test. + +2021-07-20 Michael Meissner + + PR testsuite/100167 + * gcc.target/powerpc/fold-vec-div-longlong.c: Fix expected code + generation on power10. + * gcc.target/powerpc/fold-vec-mult-longlong.c: Likewise. + +2021-07-20 Jakub Jelinek + + PR target/101384 + * gcc.dg/pr101384.c: New test. + * gcc.target/powerpc/pr101384-1.c: New test. + * gcc.target/powerpc/pr101384-2.c: New test. + +2021-07-20 Richard Sandiford + + PR testsuite/101506 + * gcc.target/aarch64/vect-vmaxv.c: Use an asm to hide the + true initial value of the reduction from the vectorizer. + * gcc.target/aarch64/vect-vaddv.c: Likewise. Make the vector + loop operate on exactly LANES (rather than LANES-1) iterations. + * gcc.target/aarch64/vect-fmaxv-fminv.x: Likewise. + +2021-07-20 Uroš Bizjak + + PR target/100182 + * gcc.target/i386/pr71245-1.c: Remove. + * gcc.target/i386/pr71245-2.c: Ditto. + +2021-07-20 Kewen Lin + + * gcc.target/powerpc/mul-vectorize-3.c: New test. + * gcc.target/powerpc/mul-vectorize-4.c: New test. + +2021-07-20 Kewen Lin + + PR tree-optimization/100696 + * gcc.target/i386/pr100637-3w.c: Adjust for mul_highpart recog. + 2021-07-19 Martin Sebor * gcc.target/powerpc/pr93658.c: Suppress valid warnings. -- cgit v1.1 From 2d9588bac5ac9e2ed778f3c7eae9ebf7bf258b44 Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Wed, 21 Jul 2021 00:22:05 -0500 Subject: predcom: Refactor more using auto_vec This patch follows Martin's suggestion at the link[1] to do more refactorings by: - Adding m_ prefix for class pcom_worker member variables. - Using auto_vec instead of vec among class pcom_worker, chain, component and comp_ptrs. The changes in tree-data-ref.[ch] is required, without it the destruction of auto_vec instance could try to double free the memory pointed by m_vec. Bootstrapped and regtested on powerpc64le-linux-gnu P9, x86_64-redhat-linux and aarch64-linux-gnu, also bootstrapped on ppc64le P9 with bootstrap-O3 config. [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573424.html gcc/ChangeLog: * tree-data-ref.c (free_dependence_relations): Adjust to pass vec by reference. (free_data_refs): Likewise. * tree-data-ref.h (free_dependence_relations): Likewise. (free_data_refs): Likewise. * tree-predcom.c (struct chain): Use auto_vec instead of vec for members. (struct component): Likewise. (pcom_worker::pcom_worker): Adjust for auto_vec and renaming changes. (pcom_worker::~pcom_worker): Likewise. (pcom_worker::release_chain): Adjust as auto_vec changes. (pcom_worker::loop): Rename to ... (pcom_worker::m_loop): ... this. (pcom_worker::datarefs): Rename to ... (pcom_worker::m_datarefs): ... this. Use auto_vec instead of vec. (pcom_worker::dependences): Rename to ... (pcom_worker::m_dependences): ... this. Use auto_vec instead of vec. (pcom_worker::chains): Rename to ... (pcom_worker::m_chains): ... this. Use auto_vec instead of vec. (pcom_worker::looparound_phis): Rename to ... (pcom_worker::m_looparound_phis): ... this. Use auto_vec instead of vec. (pcom_worker::cache): Rename to ... (pcom_worker::m_cache): ... this. Use auto_vec instead of vec. (pcom_worker::release_chain): Adjust for auto_vec changes. (pcom_worker::release_chains): Adjust for auto_vec and renaming changes. (release_component): Remove. (release_components): Adjust for release_component removal. (component_of): Adjust to use vec. (merge_comps): Likewise. (pcom_worker::aff_combination_dr_offset): Adjust for renaming changes. (pcom_worker::determine_offset): Likewise. (class comp_ptrs): Remove. (pcom_worker::split_data_refs_to_components): Adjust for renaming changes, for comp_ptrs removal with auto_vec. (pcom_worker::suitable_component_p): Adjust for renaming changes. (pcom_worker::filter_suitable_components): Adjust for release_component removal. (pcom_worker::valid_initializer_p): Adjust for renaming changes. (pcom_worker::find_looparound_phi): Likewise. (pcom_worker::add_looparound_copies): Likewise. (pcom_worker::determine_roots_comp): Likewise. (pcom_worker::single_nonlooparound_use): Likewise. (pcom_worker::execute_pred_commoning_chain): Likewise. (pcom_worker::execute_pred_commoning): Likewise. (pcom_worker::try_combine_chains): Likewise. (pcom_worker::prepare_initializers_chain): Likewise. (pcom_worker::prepare_initializers): Likewise. (pcom_worker::prepare_finalizers_chain): Likewise. (pcom_worker::prepare_finalizers): Likewise. (pcom_worker::tree_predictive_commoning_loop): Likewise. --- gcc/tree-data-ref.c | 4 +- gcc/tree-data-ref.h | 4 +- gcc/tree-predcom.c | 248 ++++++++++++++++++++++------------------------------ 3 files changed, 108 insertions(+), 148 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 210ac28..b6f7828 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -6208,7 +6208,7 @@ free_dependence_relation (struct data_dependence_relation *ddr) DEPENDENCE_RELATIONS. */ void -free_dependence_relations (vec dependence_relations) +free_dependence_relations (vec& dependence_relations) { for (data_dependence_relation *ddr : dependence_relations) if (ddr) @@ -6220,7 +6220,7 @@ free_dependence_relations (vec dependence_relations) /* Free the memory used by the data references from DATAREFS. */ void -free_data_refs (vec datarefs) +free_data_refs (vec& datarefs) { for (data_reference *dr : datarefs) free_data_ref (dr); diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index a0ff2a8..de45f25 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -534,9 +534,9 @@ extern void debug (vec &ref); extern void debug (vec *ptr); extern void debug_data_dependence_relations (vec ); extern void free_dependence_relation (struct data_dependence_relation *); -extern void free_dependence_relations (vec ); +extern void free_dependence_relations (vec& ); extern void free_data_ref (data_reference_p); -extern void free_data_refs (vec ); +extern void free_data_refs (vec& ); extern opt_result find_data_references_in_stmt (class loop *, gimple *, vec *); extern bool graphite_find_data_references_in_stmt (edge, loop_p, gimple *, diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c index a4ebf22..cf85517 100644 --- a/gcc/tree-predcom.c +++ b/gcc/tree-predcom.c @@ -306,19 +306,19 @@ typedef struct chain struct chain *ch1, *ch2; /* The references in the chain. */ - vec refs; + auto_vec refs; /* The maximum distance of the reference in the chain from the root. */ unsigned length; /* The variables used to copy the value throughout iterations. */ - vec vars; + auto_vec vars; /* Initializers for the variables. */ - vec inits; + auto_vec inits; /* Finalizers for the eliminated stores. */ - vec finis; + auto_vec finis; /* gimple stmts intializing the initial variables of the chain. */ gimple_seq init_seq; @@ -362,7 +362,7 @@ enum ref_step_type struct component { /* The references in the component. */ - vec refs; + auto_vec refs; /* What we know about the step of the references in the component. */ enum ref_step_type comp_step; @@ -381,17 +381,13 @@ struct component class pcom_worker { public: - pcom_worker (loop_p l) : loop (l), chains (vNULL), cache (NULL) - { - dependences.create (10); - datarefs.create (10); - } + pcom_worker (loop_p l) : m_loop (l), m_cache (NULL) {} ~pcom_worker () { - free_data_refs (datarefs); - free_dependence_relations (dependences); - free_affine_expand_cache (&cache); + free_data_refs (m_datarefs); + free_dependence_relations (m_dependences); + free_affine_expand_cache (&m_cache); release_chains (); } @@ -407,23 +403,24 @@ public: private: /* The pointer to the given loop. */ - loop_p loop; + loop_p m_loop; /* All data references. */ - vec datarefs; + auto_vec m_datarefs; /* All data dependences. */ - vec dependences; + auto_vec m_dependences; /* All chains. */ - vec chains; + auto_vec m_chains; /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */ - auto_bitmap looparound_phis; + auto_bitmap m_looparound_phis; typedef hash_map tree_expand_map_t; /* Cache used by tree_to_aff_combination_expand. */ - tree_expand_map_t *cache; + tree_expand_map_t *m_cache; + /* Splits dependence graph to components. */ struct component *split_data_refs_to_components (); @@ -695,13 +692,9 @@ pcom_worker::release_chain (chain_p chain) FOR_EACH_VEC_ELT (chain->refs, i, ref) free (ref); - chain->refs.release (); - chain->vars.release (); - chain->inits.release (); if (chain->init_seq) gimple_seq_discard (chain->init_seq); - chain->finis.release (); if (chain->fini_seq) gimple_seq_discard (chain->fini_seq); @@ -716,18 +709,8 @@ pcom_worker::release_chains () unsigned i; chain_p chain; - FOR_EACH_VEC_ELT (chains, i, chain) + FOR_EACH_VEC_ELT (m_chains, i, chain) release_chain (chain); - chains.release (); -} - -/* Frees a component COMP. */ - -static void -release_component (struct component *comp) -{ - comp->refs.release (); - free (comp); } /* Frees list of components COMPS. */ @@ -740,7 +723,7 @@ release_components (struct component *comps) for (act = comps; act; act = next) { next = act->next; - release_component (act); + XDELETE (act); } } @@ -748,7 +731,7 @@ release_components (struct component *comps) shortening. */ static unsigned -component_of (unsigned fathers[], unsigned a) +component_of (vec &fathers, unsigned a) { unsigned root, n; @@ -768,7 +751,8 @@ component_of (unsigned fathers[], unsigned a) components, A and B are components to merge. */ static void -merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b) +merge_comps (vec &fathers, vec &sizes, + unsigned a, unsigned b) { unsigned ca = component_of (fathers, a); unsigned cb = component_of (fathers, b); @@ -822,7 +806,7 @@ pcom_worker::aff_combination_dr_offset (struct data_reference *dr, tree type = TREE_TYPE (DR_OFFSET (dr)); aff_tree delta; - tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset, &cache); + tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset, &m_cache); aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr))); aff_combination_add (offset, &delta); } @@ -869,7 +853,7 @@ pcom_worker::determine_offset (struct data_reference *a, aff_combination_add (&diff, &baseb); tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)), - &step, &cache); + &step, &m_cache); return aff_combination_constant_multiple_p (&diff, &step, off); } @@ -890,50 +874,28 @@ last_always_executed_block (class loop *loop) return last; } -/* RAII class for comp_father and comp_size usage. */ - -class comp_ptrs -{ -public: - unsigned *comp_father; - unsigned *comp_size; - - comp_ptrs (unsigned n) - { - comp_father = XNEWVEC (unsigned, n + 1); - comp_size = XNEWVEC (unsigned, n + 1); - } - - ~comp_ptrs () - { - free (comp_father); - free (comp_size); - } - - comp_ptrs (const comp_ptrs &) = delete; - comp_ptrs &operator= (const comp_ptrs &) = delete; -}; - /* Splits dependence graph on DATAREFS described by DEPENDENCES to components. */ struct component * pcom_worker::split_data_refs_to_components () { - unsigned i, n = datarefs.length (); + unsigned i, n = m_datarefs.length (); unsigned ca, ia, ib, bad; - comp_ptrs ptrs (n); - struct component **comps; struct data_reference *dr, *dra, *drb; struct data_dependence_relation *ddr; struct component *comp_list = NULL, *comp; dref dataref; /* Don't do store elimination if loop has multiple exit edges. */ - bool eliminate_store_p = single_exit (loop) != NULL; - basic_block last_always_executed = last_always_executed_block (loop); + bool eliminate_store_p = single_exit (m_loop) != NULL; + basic_block last_always_executed = last_always_executed_block (m_loop); auto_bitmap no_store_store_comps; + auto_vec comp_father (n + 1); + auto_vec comp_size (n + 1); + comp_father.quick_grow (n + 1); + comp_size.quick_grow (n + 1); - FOR_EACH_VEC_ELT (datarefs, i, dr) + FOR_EACH_VEC_ELT (m_datarefs, i, dr) { if (!DR_REF (dr)) /* A fake reference for call or asm_expr that may clobber memory; @@ -943,26 +905,26 @@ pcom_worker::split_data_refs_to_components () if (is_gimple_call (DR_STMT (dr))) return NULL; dr->aux = (void *) (size_t) i; - ptrs.comp_father[i] = i; - ptrs.comp_size[i] = 1; + comp_father[i] = i; + comp_size[i] = 1; } /* A component reserved for the "bad" data references. */ - ptrs.comp_father[n] = n; - ptrs.comp_size[n] = 1; + comp_father[n] = n; + comp_size[n] = 1; - FOR_EACH_VEC_ELT (datarefs, i, dr) + FOR_EACH_VEC_ELT (m_datarefs, i, dr) { enum ref_step_type dummy; if (!suitable_reference_p (dr, &dummy)) { ia = (unsigned) (size_t) dr->aux; - merge_comps (ptrs.comp_father, ptrs.comp_size, n, ia); + merge_comps (comp_father, comp_size, n, ia); } } - FOR_EACH_VEC_ELT (dependences, i, ddr) + FOR_EACH_VEC_ELT (m_dependences, i, ddr) { poly_widest_int dummy_off; @@ -979,12 +941,12 @@ pcom_worker::split_data_refs_to_components () || DDR_NUM_DIST_VECTS (ddr) == 0)) eliminate_store_p = false; - ia = component_of (ptrs.comp_father, (unsigned) (size_t) dra->aux); - ib = component_of (ptrs.comp_father, (unsigned) (size_t) drb->aux); + ia = component_of (comp_father, (unsigned) (size_t) dra->aux); + ib = component_of (comp_father, (unsigned) (size_t) drb->aux); if (ia == ib) continue; - bad = component_of (ptrs.comp_father, n); + bad = component_of (comp_father, n); /* If both A and B are reads, we may ignore unsuitable dependences. */ if (DR_IS_READ (dra) && DR_IS_READ (drb)) @@ -1008,7 +970,7 @@ pcom_worker::split_data_refs_to_components () else if (!determine_offset (dra, drb, &dummy_off)) { bitmap_set_bit (no_store_store_comps, ib); - merge_comps (ptrs.comp_father, ptrs.comp_size, bad, ia); + merge_comps (comp_father, comp_size, bad, ia); continue; } } @@ -1022,7 +984,7 @@ pcom_worker::split_data_refs_to_components () else if (!determine_offset (dra, drb, &dummy_off)) { bitmap_set_bit (no_store_store_comps, ia); - merge_comps (ptrs.comp_father, ptrs.comp_size, bad, ib); + merge_comps (comp_father, comp_size, bad, ib); continue; } } @@ -1030,17 +992,17 @@ pcom_worker::split_data_refs_to_components () && ia != bad && ib != bad && !determine_offset (dra, drb, &dummy_off)) { - merge_comps (ptrs.comp_father, ptrs.comp_size, bad, ia); - merge_comps (ptrs.comp_father, ptrs.comp_size, bad, ib); + merge_comps (comp_father, comp_size, bad, ia); + merge_comps (comp_father, comp_size, bad, ib); continue; } - merge_comps (ptrs.comp_father, ptrs.comp_size, ia, ib); + merge_comps (comp_father, comp_size, ia, ib); } if (eliminate_store_p) { - tree niters = number_of_latch_executions (loop); + tree niters = number_of_latch_executions (m_loop); /* Don't do store elimination if niters info is unknown because stores in the last iteration can't be eliminated and we need to recover it @@ -1048,12 +1010,13 @@ pcom_worker::split_data_refs_to_components () eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know); } - comps = XCNEWVEC (struct component *, n); - bad = component_of (ptrs.comp_father, n); - FOR_EACH_VEC_ELT (datarefs, i, dr) + auto_vec comps; + comps.safe_grow_cleared (n, true); + bad = component_of (comp_father, n); + FOR_EACH_VEC_ELT (m_datarefs, i, dr) { ia = (unsigned) (size_t) dr->aux; - ca = component_of (ptrs.comp_father, ia); + ca = component_of (comp_father, ia); if (ca == bad) continue; @@ -1061,7 +1024,7 @@ pcom_worker::split_data_refs_to_components () if (!comp) { comp = XCNEW (struct component); - comp->refs.create (ptrs.comp_size[ca]); + comp->refs.create (comp_size[ca]); comp->eliminate_store_p = eliminate_store_p; comps[ca] = comp; } @@ -1084,7 +1047,7 @@ pcom_worker::split_data_refs_to_components () bitmap_iterator bi; EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps, 0, ia, bi) { - ca = component_of (ptrs.comp_father, ia); + ca = component_of (comp_father, ia); if (ca != bad) comps[ca]->eliminate_store_p = false; } @@ -1099,7 +1062,6 @@ pcom_worker::split_data_refs_to_components () comp_list = comp; } } - free (comps); return comp_list; } @@ -1111,14 +1073,14 @@ pcom_worker::suitable_component_p (struct component *comp) { unsigned i; dref a, first; - basic_block ba, bp = loop->header; + basic_block ba, bp = m_loop->header; bool ok, has_write = false; FOR_EACH_VEC_ELT (comp->refs, i, a) { ba = gimple_bb (a->stmt); - if (!just_once_each_iteration_p (loop, ba)) + if (!just_once_each_iteration_p (m_loop, ba)) return false; gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp)); @@ -1180,7 +1142,7 @@ pcom_worker::filter_suitable_components (struct component *comps) *comp = act->next; FOR_EACH_VEC_ELT (act->refs, i, ref) free (ref); - release_component (act); + XDELETE (act); } } @@ -1392,7 +1354,7 @@ pcom_worker::valid_initializer_p (struct data_reference *ref, unsigned distance, aff_combination_add (&diff, &base); tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)), - &step, &cache); + &step, &m_cache); if (!aff_combination_constant_multiple_p (&diff, &step, &off)) return false; @@ -1413,7 +1375,7 @@ pcom_worker::find_looparound_phi (dref ref, dref root) tree name, init, init_ref; gphi *phi = NULL; gimple *init_stmt; - edge latch = loop_latch_edge (loop); + edge latch = loop_latch_edge (m_loop); struct data_reference init_dr; gphi_iterator psi; @@ -1429,7 +1391,7 @@ pcom_worker::find_looparound_phi (dref ref, dref root) if (!name) return NULL; - for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) + for (psi = gsi_start_phis (m_loop->header); !gsi_end_p (psi); gsi_next (&psi)) { phi = psi.phi (); if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name) @@ -1439,7 +1401,7 @@ pcom_worker::find_looparound_phi (dref ref, dref root) if (gsi_end_p (psi)) return NULL; - init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); + init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop)); if (TREE_CODE (init) != SSA_NAME) return NULL; init_stmt = SSA_NAME_DEF_STMT (init); @@ -1457,7 +1419,7 @@ pcom_worker::find_looparound_phi (dref ref, dref root) memset (&init_dr, 0, sizeof (struct data_reference)); DR_REF (&init_dr) = init_ref; DR_STMT (&init_dr) = phi; - if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop, + if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, m_loop, init_stmt)) return NULL; @@ -1512,7 +1474,7 @@ pcom_worker::add_looparound_copies (chain_p chain) if (!phi) continue; - bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi))); + bitmap_set_bit (m_looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi))); insert_looparound_copy (chain, ref, phi); } } @@ -1533,7 +1495,7 @@ pcom_worker::determine_roots_comp (struct component *comp) if (comp->comp_step == RS_INVARIANT) { chain = make_invariant_chain (comp); - chains.safe_push (chain); + m_chains.safe_push (chain); return; } @@ -1578,7 +1540,7 @@ pcom_worker::determine_roots_comp (struct component *comp) if (nontrivial_chain_p (chain)) { add_looparound_copies (chain); - chains.safe_push (chain); + m_chains.safe_push (chain); } else release_chain (chain); @@ -1599,7 +1561,7 @@ pcom_worker::determine_roots_comp (struct component *comp) if (nontrivial_chain_p (chain)) { add_looparound_copies (chain); - chains.safe_push (chain); + m_chains.safe_push (chain); } else release_chain (chain); @@ -2196,7 +2158,7 @@ pcom_worker::single_nonlooparound_use (tree name) { /* Ignore uses in looparound phi nodes. Uses in other phi nodes could not be processed anyway, so just fail for them. */ - if (bitmap_bit_p (looparound_phis, + if (bitmap_bit_p (m_looparound_phis, SSA_NAME_VERSION (PHI_RESULT (stmt)))) continue; @@ -2305,14 +2267,14 @@ pcom_worker::execute_pred_commoning_chain (chain_p chain, /* If dead stores in this chain store loop variant values, we need to set up the variables by loading from memory before loop and propagating it with PHI nodes. */ - initialize_root_vars_store_elim_2 (loop, chain, tmp_vars); + initialize_root_vars_store_elim_2 (m_loop, chain, tmp_vars); } /* For inter-iteration store elimination chain, stores at each distance in loop's last (chain->length - 1) iterations can't be eliminated, because there is no following killing store. We need to generate these stores after loop. */ - finalize_eliminated_stores (loop, chain); + finalize_eliminated_stores (m_loop, chain); } bool last_store_p = true; @@ -2342,7 +2304,7 @@ pcom_worker::execute_pred_commoning_chain (chain_p chain, else { /* For non-combined chains, set up the variables that hold its value. */ - initialize_root_vars (loop, chain, tmp_vars); + initialize_root_vars (m_loop, chain, tmp_vars); a = get_chain_root (chain); in_lhs = (chain->type == CT_STORE_LOAD || chain->type == CT_COMBINATION); @@ -2411,15 +2373,15 @@ pcom_worker::execute_pred_commoning (bitmap tmp_vars) chain_p chain; unsigned i; - FOR_EACH_VEC_ELT (chains, i, chain) + FOR_EACH_VEC_ELT (m_chains, i, chain) { if (chain->type == CT_INVARIANT) - execute_load_motion (loop, chain, tmp_vars); + execute_load_motion (m_loop, chain, tmp_vars); else execute_pred_commoning_chain (chain, tmp_vars); } - FOR_EACH_VEC_ELT (chains, i, chain) + FOR_EACH_VEC_ELT (m_chains, i, chain) { if (chain->type == CT_INVARIANT) ; @@ -2979,7 +2941,7 @@ pcom_worker::try_combine_chains () auto_vec worklist; bool combined_p = false; - FOR_EACH_VEC_ELT (chains, i, ch1) + FOR_EACH_VEC_ELT (m_chains, i, ch1) if (chain_can_be_combined_p (ch1)) worklist.safe_push (ch1); @@ -2989,7 +2951,7 @@ pcom_worker::try_combine_chains () if (!chain_can_be_combined_p (ch1)) continue; - FOR_EACH_VEC_ELT (chains, j, ch2) + FOR_EACH_VEC_ELT (m_chains, j, ch2) { if (!chain_can_be_combined_p (ch2)) continue; @@ -2998,7 +2960,7 @@ pcom_worker::try_combine_chains () if (cch) { worklist.safe_push (cch); - chains.safe_push (cch); + m_chains.safe_push (cch); combined_p = true; break; } @@ -3008,8 +2970,8 @@ pcom_worker::try_combine_chains () return; /* Setup UID for all statements in dominance order. */ - basic_block *bbs = get_loop_body_in_dom_order (loop); - renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes); + basic_block *bbs = get_loop_body_in_dom_order (m_loop); + renumber_gimple_stmt_uids_in_blocks (bbs, m_loop->num_nodes); free (bbs); /* Re-association in combined chains may generate statements different to @@ -3022,7 +2984,7 @@ pcom_worker::try_combine_chains () We first update position information for all combined chains. */ dref ref; - for (i = 0; chains.iterate (i, &ch1); ++i) + for (i = 0; m_chains.iterate (i, &ch1); ++i) { if (ch1->type != CT_COMBINATION || ch1->combined) continue; @@ -3033,7 +2995,7 @@ pcom_worker::try_combine_chains () update_pos_for_combined_chains (ch1); } /* Then sort references according to newly updated position information. */ - for (i = 0; chains.iterate (i, &ch1); ++i) + for (i = 0; m_chains.iterate (i, &ch1); ++i) { if (ch1->type != CT_COMBINATION && !ch1->combined) continue; @@ -3155,10 +3117,10 @@ pcom_worker::prepare_initializers_chain (chain_p chain) struct data_reference *dr = get_chain_root (chain)->ref; tree init; dref laref; - edge entry = loop_preheader_edge (loop); + edge entry = loop_preheader_edge (m_loop); if (chain->type == CT_STORE_STORE) - return prepare_initializers_chain_store_elim (loop, chain); + return prepare_initializers_chain_store_elim (m_loop, chain); /* Find the initializers for the variables, and check that they cannot trap. */ @@ -3210,15 +3172,15 @@ pcom_worker::prepare_initializers () chain_p chain; unsigned i; - for (i = 0; i < chains.length (); ) + for (i = 0; i < m_chains.length (); ) { - chain = chains[i]; + chain = m_chains[i]; if (prepare_initializers_chain (chain)) i++; else { release_chain (chain); - chains.unordered_remove (i); + m_chains.unordered_remove (i); } } } @@ -3231,7 +3193,7 @@ pcom_worker::prepare_finalizers_chain (chain_p chain) { unsigned i, n = chain->length; struct data_reference *dr = get_chain_root (chain)->ref; - tree fini, niters = number_of_latch_executions (loop); + tree fini, niters = number_of_latch_executions (m_loop); /* For now we can't eliminate stores if some of them are conditional executed. */ @@ -3281,9 +3243,9 @@ pcom_worker::prepare_finalizers () unsigned i; bool loop_closed_ssa = false; - for (i = 0; i < chains.length ();) + for (i = 0; i < m_chains.length ();) { - chain = chains[i]; + chain = m_chains[i]; /* Finalizer is only necessary for inter-iteration store elimination chains. */ @@ -3305,7 +3267,7 @@ pcom_worker::prepare_finalizers () else { release_chain (chain); - chains.unordered_remove (i); + m_chains.unordered_remove (i); } } return loop_closed_ssa; @@ -3341,10 +3303,10 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p) bool unroll = false, loop_closed_ssa = false; if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Processing loop %d\n", loop->num); + fprintf (dump_file, "Processing loop %d\n", m_loop->num); /* Nothing for predicitive commoning if loop only iterates 1 time. */ - if (get_max_loop_iterations_int (loop) == 0) + if (get_max_loop_iterations_int (m_loop) == 0) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n"); @@ -3355,8 +3317,8 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p) /* Find the data references and split them into components according to their dependence relations. */ auto_vec loop_nest; - if (!compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs, - &dependences)) + if (!compute_data_dependences_for_loop (m_loop, true, &loop_nest, &m_datarefs, + &m_dependences)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Cannot analyze data dependencies\n"); @@ -3364,7 +3326,7 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p) } if (dump_file && (dump_flags & TDF_DETAILS)) - dump_data_dependence_relations (dump_file, dependences); + dump_data_dependence_relations (dump_file, m_dependences); components = split_data_refs_to_components (); @@ -3385,7 +3347,7 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p) determine_roots (components); release_components (components); - if (!chains.exists ()) + if (!m_chains.exists ()) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, @@ -3399,21 +3361,21 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p) /* Try to combine the chains that are always worked with together. */ try_combine_chains (); - insert_init_seqs (loop, chains); + insert_init_seqs (m_loop, m_chains); if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Before commoning:\n\n"); - dump_chains (dump_file, chains); + dump_chains (dump_file, m_chains); } if (allow_unroll_p) /* Determine the unroll factor, and if the loop should be unrolled, ensure that its number of iterations is divisible by the factor. */ - unroll_factor = determine_unroll_factor (chains); + unroll_factor = determine_unroll_factor (m_chains); if (unroll_factor > 1) - unroll = can_unroll_loop_p (loop, unroll_factor, &desc); + unroll = can_unroll_loop_p (m_loop, unroll_factor, &desc); /* Execute the predictive commoning transformations, and possibly unroll the loop. */ @@ -3425,7 +3387,7 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p) fprintf (dump_file, "Unrolling %u times.\n", unroll_factor); dta.tmp_vars = tmp_vars; - dta.chains = chains; + dta.chains = m_chains; dta.worker = this; /* Cfg manipulations performed in tree_transform_and_unroll_loop before @@ -3434,12 +3396,12 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p) statements. To fix this, we store the ssa names defined by the phi nodes here instead of the phi nodes themselves, and restore the phi nodes in execute_pred_commoning_cbck. A bit hacky. */ - replace_phis_by_defined_names (chains); + replace_phis_by_defined_names (m_chains); - edge exit = single_dom_exit (loop); - tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc, + edge exit = single_dom_exit (m_loop); + tree_transform_and_unroll_loop (m_loop, unroll_factor, exit, &desc, execute_pred_commoning_cbck, &dta); - eliminate_temp_copies (loop, tmp_vars); + eliminate_temp_copies (m_loop, tmp_vars); } else { @@ -3554,5 +3516,3 @@ make_pass_predcom (gcc::context *ctxt) { return new pass_predcom (ctxt); } - - -- cgit v1.1 From 62acc72a957b561462a436fcb2d6caac5b363190 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 21 Jul 2021 07:50:20 +0100 Subject: unroll: Avoid unnecessary tail loops for constant niters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit unroll and jam can decide to unroll the outer loop of a nest like: for (int j = 0; j < n; ++j) for (int i = 0; i < n; ++i) x[i] += __builtin_expf (y[j][i]); It then uses a tail loop to handle any left-over iterations. However, the code is structured so that this tail loop is always used. If n is a multiple of the unroll factor UF, the final UF iterations will use the tail loop rather than the unrolled loop. “Fixing” that for variable loop counts would mean introducing another runtime test: a branch around the tail loop if there are no more iterations. There's at least an argument that the overhead of doing that test might not pay for itself. But we use this structure even if the iteration count is provably a multiple of UF at compile time. E.g. with s/n/100/ and an unroll factor of 2, the first 98 iterations use the unrolled loop and the final 2 iterations use the original loop. This patch makes the unroller avoid a tail loop in that case. The end result seemed easier to follow if variables were declared at the point of initialisation, so that it's more obvious which ones are meaningful even when there's no tail loop. gcc/ * tree-ssa-loop-manip.c (determine_exit_conditions): Return a null exit condition if no tail loop is needed, and if the original exit condition should therefore be kept as-is. (tree_transform_and_unroll_loop): Handle that case here too. gcc/testsuite/ * gcc.dg/unroll-9.c: New test/ --- gcc/testsuite/gcc.dg/unroll-9.c | 12 ++ gcc/tree-ssa-loop-manip.c | 306 +++++++++++++++++++++------------------- 2 files changed, 176 insertions(+), 142 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/unroll-9.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/unroll-9.c b/gcc/testsuite/gcc.dg/unroll-9.c new file mode 100644 index 0000000..2d65ec3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/unroll-9.c @@ -0,0 +1,12 @@ +/* { dg-options "-O3 -fdump-tree-unrolljam -fno-math-errno" } */ + +void +f (float *restrict x, float y[100][100]) +{ + for (int j = 0; j < 100; ++j) + for (int i = 0; i < 100; ++i) + x[i] += __builtin_expf (y[j][i]); +} + +/* The loop should be unrolled 2 times, without a tail loop. */ +/* { dg-final { scan-tree-dump-times "__builtin_expf" 2 "unrolljam" } } */ diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c index 28ae131..41f9872 100644 --- a/gcc/tree-ssa-loop-manip.c +++ b/gcc/tree-ssa-loop-manip.c @@ -997,8 +997,10 @@ can_unroll_loop_p (class loop *loop, unsigned factor, /* Determines the conditions that control execution of LOOP unrolled FACTOR times. DESC is number of iterations of LOOP. ENTER_COND is set to condition that must be true if the main loop can be entered. + If the loop does not always iterate an exact multiple of FACTOR times, EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing - how the exit from the unrolled loop should be controlled. */ + how the exit from the unrolled loop should be controlled. Otherwise, + the trees are set to null and EXIT_CMP is set to ERROR_MARK. */ static void determine_exit_conditions (class loop *loop, class tree_niter_desc *desc, @@ -1079,6 +1081,16 @@ determine_exit_conditions (class loop *loop, class tree_niter_desc *desc, assum = fold_build2 (cmp, boolean_type_node, base, bound); cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond); + if (integer_nonzerop (cond) + && integer_zerop (desc->may_be_zero)) + { + /* Convert the latch count to an iteration count. */ + tree niter = fold_build2 (PLUS_EXPR, type, desc->niter, + build_one_cst (type)); + if (multiple_of_p (type, niter, bigstep)) + return; + } + cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE); if (stmts) gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); @@ -1234,137 +1246,138 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor, transform_callback transform, void *data) { - gcond *exit_if; - tree ctr_before, ctr_after; - tree enter_main_cond, exit_base, exit_step, exit_bound; - enum tree_code exit_cmp; - gphi *phi_old_loop, *phi_new_loop, *phi_rest; - gphi_iterator psi_old_loop, psi_new_loop; - tree init, next, new_init; - class loop *new_loop; - basic_block rest, exit_bb; - edge old_entry, new_entry, old_latch, precond_edge, new_exit; - edge new_nonexit, e; - gimple_stmt_iterator bsi; - use_operand_p op; - bool ok; - unsigned i; - profile_probability prob, prob_entry, scale_unrolled; - profile_count freq_e, freq_h; gcov_type new_est_niter = niter_for_unrolled_loop (loop, factor); unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP; - auto_vec to_remove; + enum tree_code exit_cmp; + tree enter_main_cond, exit_base, exit_step, exit_bound; determine_exit_conditions (loop, desc, factor, &enter_main_cond, &exit_base, &exit_step, &exit_cmp, &exit_bound); + bool single_loop_p = !exit_base; /* Let us assume that the unrolled loop is quite likely to be entered. */ + profile_probability prob_entry; if (integer_nonzerop (enter_main_cond)) prob_entry = profile_probability::always (); else prob_entry = profile_probability::guessed_always () .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100); - /* The values for scales should keep profile consistent, and somewhat close - to correct. - - TODO: The current value of SCALE_REST makes it appear that the loop that - is created by splitting the remaining iterations of the unrolled loop is - executed the same number of times as the original loop, and with the same - frequencies, which is obviously wrong. This does not appear to cause - problems, so we do not bother with fixing it for now. To make the profile - correct, we would need to change the probability of the exit edge of the - loop, and recompute the distribution of frequencies in its body because - of this change (scale the frequencies of blocks before and after the exit - by appropriate factors). */ - scale_unrolled = prob_entry; - - new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry, - prob_entry.invert (), scale_unrolled, - profile_probability::guessed_always (), - true); - gcc_assert (new_loop != NULL); - update_ssa (TODO_update_ssa); - - /* Prepare the cfg and update the phi nodes. Move the loop exit to the - loop latch (and make its condition dummy, for the moment). */ - rest = loop_preheader_edge (new_loop)->src; - precond_edge = single_pred_edge (rest); - split_edge (loop_latch_edge (loop)); - exit_bb = single_pred (loop->latch); - - /* Since the exit edge will be removed, the frequency of all the blocks - in the loop that are dominated by it must be scaled by - 1 / (1 - exit->probability). */ - if (exit->probability.initialized_p ()) - scale_dominated_blocks_in_loop (loop, exit->src, - /* We are scaling up here so probability - does not fit. */ - loop->header->count, - loop->header->count - - loop->header->count.apply_probability - (exit->probability)); - - bsi = gsi_last_bb (exit_bb); - exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node, - integer_zero_node, - NULL_TREE, NULL_TREE); - - gsi_insert_after (&bsi, exit_if, GSI_NEW_STMT); - new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr); - rescan_loop_exit (new_exit, true, false); - - /* Set the probability of new exit to the same of the old one. Fix - the frequency of the latch block, by scaling it back by - 1 - exit->probability. */ - new_exit->probability = exit->probability; - new_nonexit = single_pred_edge (loop->latch); - new_nonexit->probability = exit->probability.invert (); - new_nonexit->flags = EDGE_TRUE_VALUE; - if (new_nonexit->probability.initialized_p ()) - scale_bbs_frequencies (&loop->latch, 1, new_nonexit->probability); - - old_entry = loop_preheader_edge (loop); - new_entry = loop_preheader_edge (new_loop); - old_latch = loop_latch_edge (loop); - for (psi_old_loop = gsi_start_phis (loop->header), - psi_new_loop = gsi_start_phis (new_loop->header); - !gsi_end_p (psi_old_loop); - gsi_next (&psi_old_loop), gsi_next (&psi_new_loop)) + gcond *exit_if = nullptr; + class loop *new_loop = nullptr; + basic_block rest; + edge new_exit; + if (!single_loop_p) { - phi_old_loop = psi_old_loop.phi (); - phi_new_loop = psi_new_loop.phi (); - - init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry); - op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry); - gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op))); - next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch); - - /* Prefer using original variable as a base for the new ssa name. - This is necessary for virtual ops, and useful in order to avoid - losing debug info for real ops. */ - if (TREE_CODE (next) == SSA_NAME - && useless_type_conversion_p (TREE_TYPE (next), - TREE_TYPE (init))) - new_init = copy_ssa_name (next); - else if (TREE_CODE (init) == SSA_NAME - && useless_type_conversion_p (TREE_TYPE (init), - TREE_TYPE (next))) - new_init = copy_ssa_name (init); - else if (useless_type_conversion_p (TREE_TYPE (next), TREE_TYPE (init))) - new_init = make_temp_ssa_name (TREE_TYPE (next), NULL, "unrinittmp"); - else - new_init = make_temp_ssa_name (TREE_TYPE (init), NULL, "unrinittmp"); + /* The values for scales should keep profile consistent, and somewhat + close to correct. + + TODO: The current value of SCALE_REST makes it appear that the loop + that is created by splitting the remaining iterations of the unrolled + loop is executed the same number of times as the original loop, and + with the same frequencies, which is obviously wrong. This does not + appear to cause problems, so we do not bother with fixing it for now. + To make the profile correct, we would need to change the probability + of the exit edge of the loop, and recompute the distribution of + frequencies in its body because of this change (scale the frequencies + of blocks before and after the exit by appropriate factors). */ + profile_probability scale_unrolled = prob_entry; + new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry, + prob_entry.invert (), scale_unrolled, + profile_probability::guessed_always (), + true); + gcc_assert (new_loop != NULL); + update_ssa (TODO_update_ssa); + + /* Prepare the cfg and update the phi nodes. Move the loop exit to the + loop latch (and make its condition dummy, for the moment). */ + rest = loop_preheader_edge (new_loop)->src; + edge precond_edge = single_pred_edge (rest); + split_edge (loop_latch_edge (loop)); + basic_block exit_bb = single_pred (loop->latch); + + /* Since the exit edge will be removed, the frequency of all the blocks + in the loop that are dominated by it must be scaled by + 1 / (1 - exit->probability). */ + if (exit->probability.initialized_p ()) + scale_dominated_blocks_in_loop (loop, exit->src, + /* We are scaling up here so + probability does not fit. */ + loop->header->count, + loop->header->count + - loop->header->count.apply_probability + (exit->probability)); + + gimple_stmt_iterator bsi = gsi_last_bb (exit_bb); + exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node, + integer_zero_node, + NULL_TREE, NULL_TREE); + + gsi_insert_after (&bsi, exit_if, GSI_NEW_STMT); + new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr); + rescan_loop_exit (new_exit, true, false); + + /* Set the probability of new exit to the same of the old one. Fix + the frequency of the latch block, by scaling it back by + 1 - exit->probability. */ + new_exit->probability = exit->probability; + edge new_nonexit = single_pred_edge (loop->latch); + new_nonexit->probability = exit->probability.invert (); + new_nonexit->flags = EDGE_TRUE_VALUE; + if (new_nonexit->probability.initialized_p ()) + scale_bbs_frequencies (&loop->latch, 1, new_nonexit->probability); + + edge old_entry = loop_preheader_edge (loop); + edge new_entry = loop_preheader_edge (new_loop); + edge old_latch = loop_latch_edge (loop); + for (gphi_iterator psi_old_loop = gsi_start_phis (loop->header), + psi_new_loop = gsi_start_phis (new_loop->header); + !gsi_end_p (psi_old_loop); + gsi_next (&psi_old_loop), gsi_next (&psi_new_loop)) + { + gphi *phi_old_loop = psi_old_loop.phi (); + gphi *phi_new_loop = psi_new_loop.phi (); + + tree init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry); + use_operand_p op + = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry); + gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op))); + tree next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch); + + /* Prefer using original variable as a base for the new ssa name. + This is necessary for virtual ops, and useful in order to avoid + losing debug info for real ops. */ + tree new_init; + if (TREE_CODE (next) == SSA_NAME + && useless_type_conversion_p (TREE_TYPE (next), + TREE_TYPE (init))) + new_init = copy_ssa_name (next); + else if (TREE_CODE (init) == SSA_NAME + && useless_type_conversion_p (TREE_TYPE (init), + TREE_TYPE (next))) + new_init = copy_ssa_name (init); + else if (useless_type_conversion_p (TREE_TYPE (next), + TREE_TYPE (init))) + new_init = make_temp_ssa_name (TREE_TYPE (next), NULL, + "unrinittmp"); + else + new_init = make_temp_ssa_name (TREE_TYPE (init), NULL, + "unrinittmp"); - phi_rest = create_phi_node (new_init, rest); + gphi *phi_rest = create_phi_node (new_init, rest); + add_phi_arg (phi_rest, init, precond_edge, UNKNOWN_LOCATION); + add_phi_arg (phi_rest, next, new_exit, UNKNOWN_LOCATION); + SET_USE (op, new_init); + } - add_phi_arg (phi_rest, init, precond_edge, UNKNOWN_LOCATION); - add_phi_arg (phi_rest, next, new_exit, UNKNOWN_LOCATION); - SET_USE (op, new_init); + remove_path (exit); + } + else + { + new_exit = exit; + rest = exit->dest; } - - remove_path (exit); /* Transform the loop. */ if (transform) @@ -1376,57 +1389,66 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor, bitmap_ones (wont_exit); bitmap_clear_bit (wont_exit, factor - 1); - ok = gimple_duplicate_loop_to_header_edge + auto_vec to_remove; + bool ok = gimple_duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), factor - 1, wont_exit, new_exit, &to_remove, DLTHE_FLAG_UPDATE_FREQ); gcc_assert (ok); - FOR_EACH_VEC_ELT (to_remove, i, e) + for (edge e : to_remove) { ok = remove_path (e); gcc_assert (ok); } update_ssa (TODO_update_ssa); - /* Ensure that the frequencies in the loop match the new estimated - number of iterations, and change the probability of the new - exit edge. */ - - freq_h = loop->header->count; - freq_e = (loop_preheader_edge (loop))->count (); - if (freq_h.nonzero_p ()) + if (!single_loop_p) { - /* Avoid dropping loop body profile counter to 0 because of zero count - in loop's preheader. */ - if (freq_h.nonzero_p () && !(freq_e == profile_count::zero ())) - freq_e = freq_e.force_nonzero (); - scale_loop_frequencies (loop, freq_e.probability_in (freq_h)); + /* Ensure that the frequencies in the loop match the new estimated + number of iterations, and change the probability of the new + exit edge. */ + + profile_count freq_h = loop->header->count; + profile_count freq_e = (loop_preheader_edge (loop))->count (); + if (freq_h.nonzero_p ()) + { + /* Avoid dropping loop body profile counter to 0 because of zero + count in loop's preheader. */ + if (freq_h.nonzero_p () && !(freq_e == profile_count::zero ())) + freq_e = freq_e.force_nonzero (); + scale_loop_frequencies (loop, freq_e.probability_in (freq_h)); + } } - exit_bb = single_pred (loop->latch); + basic_block exit_bb = single_pred (loop->latch); new_exit = find_edge (exit_bb, rest); new_exit->probability = profile_probability::always () .apply_scale (1, new_est_niter + 1); - rest->count += new_exit->count (); + if (!single_loop_p) + rest->count += new_exit->count (); - new_nonexit = single_pred_edge (loop->latch); - prob = new_nonexit->probability; + edge new_nonexit = single_pred_edge (loop->latch); + profile_probability prob = new_nonexit->probability; new_nonexit->probability = new_exit->probability.invert (); prob = new_nonexit->probability / prob; if (prob.initialized_p ()) scale_bbs_frequencies (&loop->latch, 1, prob); - /* Finally create the new counter for number of iterations and add the new - exit instruction. */ - bsi = gsi_last_nondebug_bb (exit_bb); - exit_if = as_a (gsi_stmt (bsi)); - create_iv (exit_base, exit_step, NULL_TREE, loop, - &bsi, false, &ctr_before, &ctr_after); - gimple_cond_set_code (exit_if, exit_cmp); - gimple_cond_set_lhs (exit_if, ctr_after); - gimple_cond_set_rhs (exit_if, exit_bound); - update_stmt (exit_if); + if (!single_loop_p) + { + /* Finally create the new counter for number of iterations and add + the new exit instruction. */ + tree ctr_before, ctr_after; + gimple_stmt_iterator bsi = gsi_last_nondebug_bb (exit_bb); + exit_if = as_a (gsi_stmt (bsi)); + create_iv (exit_base, exit_step, NULL_TREE, loop, + &bsi, false, &ctr_before, &ctr_after); + gimple_cond_set_code (exit_if, exit_cmp); + gimple_cond_set_lhs (exit_if, ctr_after); + gimple_cond_set_rhs (exit_if, exit_bound); + update_stmt (exit_if); + } checking_verify_flow_info (); checking_verify_loop_structure (); -- cgit v1.1 From 957952ce64e067c56e58df5ee36bbb004eecffa1 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 21 Jul 2021 07:50:20 +0100 Subject: unroll: Run VN on unrolled-and-jammed loops Unroll and jam can sometimes leave redundancies. E.g. for: for (int j = 0; j < 100; ++j) for (int i = 0; i < 100; ++i) x[i] += y[i] * z[j][i]; the new loop will do the equivalent of: for (int j = 0; j < 100; j += 2) for (int i = 0; i < 100; ++i) { x[i] += y[i] * z[j][i]; x[i] += y[i] * z[j + 1][i]; } with two reads of y[i] and with a round trip through memory for x[i]. At the moment these redundancies survive till vectorisation, so if vectorisation succeeds, we're reliant on being able to remove the redundancies from the vector form. This can be hard to do if a vector loop uses predication. E.g. on SVE we end up with: .L3: ld1w z3.s, p0/z, [x3, x0, lsl 2] ld1w z0.s, p0/z, [x5, x0, lsl 2] ld1w z1.s, p0/z, [x2, x0, lsl 2] mad z1.s, p1/m, z0.s, z3.s ld1w z2.s, p0/z, [x4, x0, lsl 2] st1w z1.s, p0, [x3, x0, lsl 2] // store to x[i] ld1w z1.s, p0/z, [x3, x0, lsl 2] // load back from x[i] mad z0.s, p1/m, z2.s, z1.s st1w z0.s, p0, [x3, x0, lsl 2] add x0, x0, x6 whilelo p0.s, w0, w1 b.any .L3 This patch runs a value-numbering pass on loops after a successful unroll-and-jam, which gets rid of the unnecessary load and gives a more accurate idea of vector costs. Unfortunately the redundant store still persists without a pre-vect DSE, but that feels like a separate issue. Note that the pass requires the loop to have a single exit, hence the simple calculation of exit_bbs. gcc/ * gimple-loop-jam.c: Include tree-ssa-sccvn.h. (tree_loop_unroll_and_jam): Run value-numbering on a loop that has been successfully unrolled. gcc/testsuite/ * gcc.dg/unroll-10.c: New test. --- gcc/gimple-loop-jam.c | 14 +++++++++----- gcc/testsuite/gcc.dg/unroll-10.c | 13 +++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/unroll-10.c (limited to 'gcc') diff --git a/gcc/gimple-loop-jam.c b/gcc/gimple-loop-jam.c index 4842f0d..544ad77 100644 --- a/gcc/gimple-loop-jam.c +++ b/gcc/gimple-loop-jam.c @@ -38,6 +38,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-data-ref.h" #include "tree-ssa-loop-ivopts.h" #include "tree-vectorizer.h" +#include "tree-ssa-sccvn.h" /* Unroll and Jam transformation @@ -487,7 +488,7 @@ static unsigned int tree_loop_unroll_and_jam (void) { class loop *loop; - bool changed = false; + unsigned int todo = 0; gcc_assert (scev_initialized_p ()); @@ -591,7 +592,11 @@ tree_loop_unroll_and_jam (void) &desc); free_original_copy_tables (); fuse_loops (outer->inner); - changed = true; + todo |= TODO_cleanup_cfg; + + auto_bitmap exit_bbs; + bitmap_set_bit (exit_bbs, single_dom_exit (outer)->dest->index); + todo |= do_rpo_vn (cfun, loop_preheader_edge (outer), exit_bbs); } loop_nest.release (); @@ -599,13 +604,12 @@ tree_loop_unroll_and_jam (void) free_data_refs (datarefs); } - if (changed) + if (todo) { scev_reset (); free_dominance_info (CDI_DOMINATORS); - return TODO_cleanup_cfg; } - return 0; + return todo; } /* Pass boilerplate */ diff --git a/gcc/testsuite/gcc.dg/unroll-10.c b/gcc/testsuite/gcc.dg/unroll-10.c new file mode 100644 index 0000000..0559915 --- /dev/null +++ b/gcc/testsuite/gcc.dg/unroll-10.c @@ -0,0 +1,13 @@ +/* { dg-options "-O3 -fdump-tree-unrolljam" } */ + +void +f (int *restrict x, int *restrict y, int z[restrict 100][100]) +{ + for (int j = 0; j < 100; ++j) + for (int i = 0; i < 100; ++i) + x[i] += y[i] * z[j][i]; +} + +/* The loop should be unrolled 2 times, leaving one load from x, + one load from y and 2 loads from z. */ +/* { dg-final { scan-tree-dump-times { = \(*\*} 4 "unrolljam" } } */ -- cgit v1.1 From b3d4011ba10275fbd5d6ec5a16d5aaebbdfb5d3c Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Wed, 21 Jul 2021 09:36:48 +0200 Subject: Fortran: Fix bind(C) character length checks gcc/fortran/ChangeLog: * decl.c (gfc_verify_c_interop_param): Update for F2008 + F2018 changes; reject unsupported bits with 'Error: Sorry,'. * trans-expr.c (gfc_conv_procedure_call): Fix condition to For using CFI descriptor with characters. gcc/testsuite/ChangeLog: * gfortran.dg/iso_c_binding_char_1.f90: Update dg-error. * gfortran.dg/pr32599.f03: Use -std=-f2003 + update comment. * gfortran.dg/bind_c_char_10.f90: New test. * gfortran.dg/bind_c_char_6.f90: New test. * gfortran.dg/bind_c_char_7.f90: New test. * gfortran.dg/bind_c_char_8.f90: New test. * gfortran.dg/bind_c_char_9.f90: New test. --- gcc/fortran/decl.c | 113 ++++- gcc/fortran/trans-expr.c | 18 +- gcc/testsuite/gfortran.dg/bind_c_char_10.f90 | 480 +++++++++++++++++++++ gcc/testsuite/gfortran.dg/bind_c_char_6.f90 | 262 +++++++++++ gcc/testsuite/gfortran.dg/bind_c_char_7.f90 | 261 +++++++++++ gcc/testsuite/gfortran.dg/bind_c_char_8.f90 | 249 +++++++++++ gcc/testsuite/gfortran.dg/bind_c_char_9.f90 | 188 ++++++++ gcc/testsuite/gfortran.dg/iso_c_binding_char_1.f90 | 2 +- gcc/testsuite/gfortran.dg/pr32599.f03 | 8 +- 9 files changed, 1557 insertions(+), 24 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/bind_c_char_10.f90 create mode 100644 gcc/testsuite/gfortran.dg/bind_c_char_6.f90 create mode 100644 gcc/testsuite/gfortran.dg/bind_c_char_7.f90 create mode 100644 gcc/testsuite/gfortran.dg/bind_c_char_8.f90 create mode 100644 gcc/testsuite/gfortran.dg/bind_c_char_9.f90 (limited to 'gcc') diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c index 413c7a7..05081c4 100644 --- a/gcc/fortran/decl.c +++ b/gcc/fortran/decl.c @@ -1552,20 +1552,115 @@ gfc_verify_c_interop_param (gfc_symbol *sym) } /* Character strings are only C interoperable if they have a - length of 1. */ - if (sym->ts.type == BT_CHARACTER && !sym->attr.dimension) + length of 1. However, as argument they are either iteroperable + when passed as descriptor (which requires len=: or len=*) or + when having a constant length or are always passed by + descriptor. */ + if (sym->ts.type == BT_CHARACTER) { gfc_charlen *cl = sym->ts.u.cl; - if (!cl || !cl->length || cl->length->expr_type != EXPR_CONSTANT - || mpz_cmp_si (cl->length->value.integer, 1) != 0) + + if (sym->attr.allocatable || sym->attr.pointer) { - gfc_error ("Character argument %qs at %L " - "must be length 1 because " - "procedure %qs is BIND(C)", - sym->name, &sym->declared_at, - sym->ns->proc_name->name); + /* F2018, 18.3.6 (6). */ + if (!sym->ts.deferred) + { + if (sym->attr.allocatable) + gfc_error ("Allocatable character dummy argument %qs " + "at %L must have deferred length as " + "procedure %qs is BIND(C)", sym->name, + &sym->declared_at, sym->ns->proc_name->name); + else + gfc_error ("Pointer character dummy argument %qs at %L " + "must have deferred length as procedure %qs " + "is BIND(C)", sym->name, &sym->declared_at, + sym->ns->proc_name->name); + retval = false; + } + else if (!gfc_notify_std (GFC_STD_F2018, + "Deferred-length character dummy " + "argument %qs at %L of procedure " + "%qs with BIND(C) attribute", + sym->name, &sym->declared_at, + sym->ns->proc_name->name)) + retval = false; + else if (!sym->attr.dimension) + { + /* FIXME: Use CFI array descriptor for scalars. */ + gfc_error ("Sorry, deferred-length scalar character dummy " + "argument %qs at %L of procedure %qs with " + "BIND(C) not yet supported", sym->name, + &sym->declared_at, sym->ns->proc_name->name); + retval = false; + } + } + else if (sym->attr.value + && (!cl || !cl->length + || cl->length->expr_type != EXPR_CONSTANT + || mpz_cmp_si (cl->length->value.integer, 1) != 0)) + { + gfc_error ("Character dummy argument %qs at %L must be " + "of length 1 as it has the VALUE attribute", + sym->name, &sym->declared_at); retval = false; } + else if (!cl || !cl->length) + { + /* Assumed length; F2018, 18.3.6 (5)(2). + Uses the CFI array descriptor. */ + if (!gfc_notify_std (GFC_STD_F2018, + "Assumed-length character dummy argument " + "%qs at %L of procedure %qs with BIND(C) " + "attribute", sym->name, &sym->declared_at, + sym->ns->proc_name->name)) + retval = false; + else if (!sym->attr.dimension + || sym->as->type == AS_ASSUMED_SIZE + || sym->as->type == AS_EXPLICIT) + { + /* FIXME: Valid - should use the CFI array descriptor, but + not yet handled for scalars and assumed-/explicit-size + arrays. */ + gfc_error ("Sorry, character dummy argument %qs at %L " + "with assumed length is not yet supported for " + "procedure %qs with BIND(C) attribute", + sym->name, &sym->declared_at, + sym->ns->proc_name->name); + retval = false; + } + } + else if (cl->length->expr_type != EXPR_CONSTANT) + { + /* F2018, 18.3.6, (5), item 4. */ + if (!sym->attr.dimension + || sym->as->type == AS_ASSUMED_SIZE + || sym->as->type == AS_EXPLICIT) + { + gfc_error ("Character dummy argument %qs at %L must be " + "of constant length or assumed length, " + "unless it has assumed shape or assumed rank, " + "as procedure %qs has the BIND(C) attribute", + sym->name, &sym->declared_at, + sym->ns->proc_name->name); + retval = false; + } + else if (!gfc_notify_std (GFC_STD_F2018, + "Character dummy argument %qs at " + "%L with nonconstant length as " + "procedure %qs is BIND(C)", + sym->name, &sym->declared_at, + sym->ns->proc_name->name)) + retval = false; + } + else if (mpz_cmp_si (cl->length->value.integer, 1) != 0 + && !gfc_notify_std (GFC_STD_F2008, + "Character dummy argument %qs at %L " + "with length greater than 1 for " + "procedure %qs with BIND(C) " + "attribute", + sym->name, &sym->declared_at, + sym->ns->proc_name->name)) + retval = false; } /* We have to make sure that any param to a bind(c) routine does diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index 9e0dcde..b18a9ec 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -5757,18 +5757,16 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, arg = arg->next, formal = formal ? formal->next : NULL, ++argc) { bool finalized = false; - bool non_unity_length_string = false; + bool assumed_length_string = false; tree derived_array = NULL_TREE; e = arg->expr; fsym = formal ? formal->sym : NULL; parm_kind = MISSING; - if (fsym && fsym->ts.type == BT_CHARACTER && fsym->ts.u.cl - && (!fsym->ts.u.cl->length - || fsym->ts.u.cl->length->expr_type != EXPR_CONSTANT - || mpz_cmp_si (fsym->ts.u.cl->length->value.integer, 1) != 0)) - non_unity_length_string = true; + if (fsym && fsym->ts.type == BT_CHARACTER + && (!fsym->ts.u.cl || !fsym->ts.u.cl->length)) + assumed_length_string = true; /* If the procedure requires an explicit interface, the actual argument is passed according to the corresponding formal @@ -6002,8 +6000,8 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, else if (sym->attr.is_bind_c && e && (is_CFI_desc (fsym, NULL) - || non_unity_length_string)) - /* Implement F2018, C.12.6.1: paragraph (2). */ + || assumed_length_string)) + /* Implement F2018, 18.3.6, list item (5), bullet point 2. */ gfc_conv_gfc_desc_to_cfi_desc (&parmse, e, fsym); else if (fsym && fsym->attr.value) @@ -6447,8 +6445,8 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, } if (sym->attr.is_bind_c && e - && (is_CFI_desc (fsym, NULL) || non_unity_length_string)) - /* Implement F2018, C.12.6.1: paragraph (2). */ + && (is_CFI_desc (fsym, NULL) || assumed_length_string)) + /* Implement F2018, 18.3.6, list item (5), bullet point 2. */ gfc_conv_gfc_desc_to_cfi_desc (&parmse, e, fsym); else if (e->expr_type == EXPR_VARIABLE diff --git a/gcc/testsuite/gfortran.dg/bind_c_char_10.f90 b/gcc/testsuite/gfortran.dg/bind_c_char_10.f90 new file mode 100644 index 0000000..3595851 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/bind_c_char_10.f90 @@ -0,0 +1,480 @@ +! { dg-do run } +! { dg-additional-options "-fdump-tree-original" } + +! F2018 - examples with array descriptor + +module m + use iso_c_binding, only: c_char + implicit none (type, external) + +contains + +! Assumed-shape array, nonallocatable/nonpointer + +subroutine as1 (x1) bind(C) + character(kind=c_char, len=1) :: x1(:) + if (size(x1) /= 6) stop + if (len(x1) /= 1) stop + if (any (x1 /= ['g', & + 'd', & + 'f', & + 's', & + '3', & + '5'])) stop 1 + x1 = ['1', & + 'h', & + 'f', & + '3', & + '4', & + 'h'] +end + +subroutine as2 (x2) bind(C) + character(kind=c_char, len=2) :: x2(:) + if (size(x2) /= 6) stop + if (len(x2) /= 2) stop + if (any (x2 /= ['ab', & + 'fd', & + 'D4', & + '54', & + 'ga', & + 'hg'])) stop + x2 = ['ab', & + 'hd', & + 'fj', & + 'a4', & + '4a', & + 'hf'] +end + +subroutine as3 (xn, n) bind(C) + integer :: n + character(kind=c_char, len=n) :: xn(:) + if (size(xn) /= 6) stop + if (len(xn) /= 5) stop + if (any (xn /= ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'])) stop + xn = ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'] +end + +subroutine as4 (xstar) bind(C) + character(kind=c_char, len=*) :: xstar(:) + if (size(xstar) /= 6) stop + if (len(xstar) /= 5) stop + if (any (xstar /= ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'])) stop + xstar = ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'] +end + +! Assumed-rank array, nonallocatable/nonpointer + +subroutine ar1 (x1) bind(C) + character(kind=c_char, len=1) :: x1(..) + if (size(x1) /= 6) stop + if (len(x1) /= 1) stop + select rank(x1) + rank(1) + if (any (x1 /= ['g', & + 'd', & + 'f', & + 's', & + '3', & + '5'])) stop + x1 = ['1', & + 'h', & + 'f', & + '3', & + '4', & + 'h'] + rank default + stop + end select +end + +subroutine ar2 (x2) bind(C) + character(kind=c_char, len=2) :: x2(..) + if (size(x2) /= 6) stop + if (len(x2) /= 2) stop + select rank(x2) + rank(1) + if (any (x2 /= ['ab', & + 'fd', & + 'D4', & + '54', & + 'ga', & + 'hg'])) stop + x2 = ['ab', & + 'hd', & + 'fj', & + 'a4', & + '4a', & + 'hf'] + rank default + stop + end select +end + +subroutine ar3 (xn, n) bind(C) + integer :: n + character(len=n) :: xn(..) + if (size(xn) /= 6) stop + if (len(xn) /= 5) stop + select rank(xn) + rank(1) + if (any (xn /= ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'])) stop + xn = ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'] + rank default + stop + end select +end + +subroutine ar4 (xstar) bind(C) + character(kind=c_char, len=*) :: xstar(..) + if (size(xstar) /= 6) stop + if (len(xstar) /= 5) stop + select rank(xstar) + rank(1) + if (any (xstar /= ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'])) stop + xstar = ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'] + rank default + stop + end select +end + +! ALLOCATABLE + +! Assumed-shape array, allocatable + +subroutine a5a (xcolon) bind(C) + character(kind=c_char, len=:), allocatable :: xcolon(:) + if (.not. allocated (xcolon)) stop + if (size(xcolon) /= 6) stop + if (len(xcolon) /= 5) stop + if (any (xcolon /= ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'])) stop + xcolon = ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'] +end + +! Assumed-rank array, allocatable + +subroutine a5ar (xcolon) bind(C) + character(kind=c_char, len=:), allocatable :: xcolon(..) + if (.not. allocated (xcolon)) stop + if (size(xcolon) /= 6) stop + if (len(xcolon) /= 5) stop + select rank(xcolon) + rank(1) + if (any (xcolon /= ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'])) stop + xcolon = ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'] + rank default + stop + end select +end + +! POINTER +! Assumed-shape array, pointer + +subroutine a5p (xcolon) bind(C) + character(kind=c_char, len=:), pointer :: xcolon(:) + if (.not. associated (xcolon)) stop + if (size(xcolon) /= 6) stop + if (len(xcolon) /= 5) stop + if (any (xcolon /= ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'])) stop + xcolon = ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'] +end + +! Assumed-rank array, pointer + +subroutine a5pr (xcolon) bind(C) + character(kind=c_char, len=:), pointer :: xcolon(..) + if (.not. associated (xcolon)) stop + if (size(xcolon) /= 6) stop + if (len(xcolon) /= 5) stop + select rank(xcolon) + rank(1) + if (any (xcolon /= ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'])) stop + xcolon = ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'] + rank default + stop + end select +end +end module m + +program main + use m + implicit none (type, external) + character(kind=c_char, len=1) :: str1a6(6) + character(kind=c_char, len=2) :: str2a6(6) + character(kind=c_char, len=5) :: str5a6(6) + + character(kind=c_char, len=:), allocatable :: astr5a6(:) + character(kind=c_char, len=:), pointer :: pstr5a6(:) + + allocate (character(kind=c_char, len=5) :: astr5a6(6), pstr5a6(6)) + + ! assumed shape - with array descriptor + + str1a6 = ['g', & + 'd', & + 'f', & + 's', & + '3', & + '5'] + call as1 (str1a6) + if (any (str1a6 /= ['1', & + 'h', & + 'f', & + '3', & + '4', & + 'h'])) stop + str2a6 = ['ab', & + 'fd', & + 'D4', & + '54', & + 'ga', & + 'hg'] + call as2 (str2a6) + if (any (str2a6 /= ['ab', & + 'hd', & + 'fj', & + 'a4', & + '4a', & + 'hf'])) stop + + str5a6 = ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'] + call as3 (str5a6, 5) + if (any (str5a6 /= ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'])) stop + + str5a6 = ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'] + call as4 (str5a6) + if (any (str5a6 /= ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'])) stop + + ! assumed rank - with array descriptor + + str1a6 = ['g', & + 'd', & + 'f', & + 's', & + '3', & + '5'] + call ar1 (str1a6) + if (any (str1a6 /= ['1', & + 'h', & + 'f', & + '3', & + '4', & + 'h'])) stop + str2a6 = ['ab', & + 'fd', & + 'D4', & + '54', & + 'ga', & + 'hg'] + call ar2 (str2a6) + if (any (str2a6 /= ['ab', & + 'hd', & + 'fj', & + 'a4', & + '4a', & + 'hf'])) stop + + str5a6 = ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'] + call ar3 (str5a6, 5) + if (any (str5a6 /= ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'])) stop + + + str5a6 = ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'] + call ar4 (str5a6) + if (any (str5a6 /= ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'])) stop + + ! allocatable - with array descriptor + astr5a6(:) = ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'] + call a5a (astr5a6) + if (any (astr5a6 /= ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'])) stop + + astr5a6(:) = ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'] + call a5ar (astr5a6) + if (any (astr5a6 /= ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'])) stop + + + ! pointer - with array descriptor + pstr5a6 = ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'] + call a5p (pstr5a6) + if (any (pstr5a6 /= ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'])) stop + + pstr5a6 = ['DDGhf', & + 'hdrh$', & + 'fDGSl', & + 'DFHs3', & + '43grG', & + 'hFG$k'] + call a5pr (pstr5a6) + if (any (pstr5a6 /= ['FDGhf', & + 'hdrhg', & + 'fDgFl', & + 'DFHs3', & + '4a54G', & + 'hSs6k'])) stop + deallocate (astr5a6, pstr5a6) +end + +! All arguments shall use array descriptors +! { dg-final { scan-tree-dump-times "void as1 \\(struct array01_character\\(kind=1\\) & restrict x1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void as2 \\(struct array01_character\\(kind=1\\) & restrict x2\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void as4 \\(struct array01_character\\(kind=1\\) & restrict xstar\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void as3 \\(struct array01_character\\(kind=1\\) & restrict xn, integer(kind=4) & restrict n) +! { dg-final { scan-tree-dump-times "void ar1 \\(struct array15_character\\(kind=1\\) & restrict x1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void ar2 \\(struct array15_character\\(kind=1\\) & restrict x2\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void ar3 \\(struct array15_character\\(kind=1\\) & restrict xn, integer(kind=4) & restrict n) +! { dg-final { scan-tree-dump-times "void ar4 \\(struct array15_character\\(kind=1\\) & restrict xstar\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void a5a \\(struct array01_character\\(kind=1\\) & restrict xcolon\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void a5ar \\(struct array15_character\\(kind=1\\) & restrict xcolon\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void a5p \\(struct array01_character\\(kind=1\\) & xcolon\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void a5pr \\(struct array15_character\\(kind=1\\) & xcolon\\)" 1 "original" } } diff --git a/gcc/testsuite/gfortran.dg/bind_c_char_6.f90 b/gcc/testsuite/gfortran.dg/bind_c_char_6.f90 new file mode 100644 index 0000000..23e1d92 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/bind_c_char_6.f90 @@ -0,0 +1,262 @@ +! { dg-do compile } +! { dg-additional-options "-std=f2003 -fimplicit-none" } + +! F2003 only permits length=1 character dummy args + +! Scalar, nonallocatable/nonpointer + +subroutine s1 (x1) bind(C) + character(len=1) :: x1 +end + +subroutine s2 (x2) bind(C) ! { dg-error "Fortran 2008: Character dummy argument 'x2' at .1. with length greater than 1 for procedure 's2' with BIND\\(C\\) attribute" } + character(len=2) :: x2 +end + +subroutine s3 (xn, n) bind(C) ! { dg-error "Character dummy argument 'xn' at .1. must be of constant length or assumed length, unless it has assumed shape or assumed rank, as procedure 's3' has the BIND\\(C\\) attribute" } + integer :: n + character(len=n) :: xn +end + +subroutine s4 (xstar) bind(C) ! { dg-error "Fortran 2018: Assumed-length character dummy argument 'xstar' at .1. of procedure 's4' with BIND\\(C\\) attribute" } + character(len=*) :: xstar +end + +! Assumed-shape array, nonallocatable/nonpointer + +subroutine as1 (x1) bind(C) ! { dg-error "Fortran 2018: Assumed-shape array 'x1' at .1. as dummy argument to the BIND\\(C\\) procedure 'as1' at .2." } + character(len=1) :: x1(:) +end + +subroutine as2 (x2) bind(C) ! { dg-error "Fortran 2008: Character dummy argument 'x2' at .1. with length greater than 1 for procedure 'as2' with BIND\\(C\\) attribute" } + ! { dg-error "Fortran 2018: Assumed-shape array 'x2' at .1. as dummy argument to the BIND\\(C\\) procedure 'as2' at .2." "" { target *-*-* } .-1 } + character(len=2) :: x2(:,:) +end + +subroutine as3 (xn, n) bind(C) ! { dg-error "Fortran 2018: Character dummy argument 'xn' at .1. with nonconstant length as procedure 'as3' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Assumed-shape array 'xn' at .1. as dummy argument to the BIND\\(C\\) procedure 'as3' at .2." "" { target *-*-* } .-1 } + integer :: n + character(len=n) :: xn(:,:,:) +end + +subroutine as4 (xstar) bind(C) ! { dg-error "Fortran 2018: Assumed-length character dummy argument 'xstar' at .1. of procedure 'as4' with BIND\\(C\\) attribute" } + ! { dg-error "Fortran 2018: Assumed-shape array 'xstar' at .1. as dummy argument to the BIND\\(C\\) procedure 'as4' at .2." "" { target *-*-* } .-1 } + character(len=*) :: xstar(:,:,:,:) +end + +! Assumed-rank array, nonallocatable/nonpointer + +subroutine ar1 (x1) bind(C) ! { dg-error "Symbol 'x1' at .1. has no IMPLICIT type" } + character(len=1) :: x1(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine ar2 (x2) bind(C) ! { dg-error "Symbol 'x2' at .1. has no IMPLICIT type" } + character(len=2) :: x2(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine ar3 (xn, n) bind(C) ! { dg-error "Symbol 'xn' at .1. has no IMPLICIT type" } + integer :: n + character(len=n) :: xn(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine ar4 (xstar) bind(C) ! { dg-error "Symbol 'xstar' at .1. has no IMPLICIT type" } + character(len=*) :: xstar(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +! Assumed-size array, nonallocatable/nonpointer + +subroutine az1 (x1) bind(C) + character(len=1) :: x1(*) +end + +subroutine az2 (x2) bind(C) ! { dg-error "Fortran 2008: Character dummy argument 'x2' at .1. with length greater than 1 for procedure 'az2' with BIND\\(C\\) attribute" } + character(len=2) :: x2(*) +end + +subroutine az3 (xn, n) bind(C) ! { dg-error "Character dummy argument 'xn' at .1. must be of constant length or assumed length, unless it has assumed shape or assumed rank, as procedure 'az3' has the BIND\\(C\\) attribute" } + integer :: n + character(len=n) :: xn(*) +end + +subroutine az4 (xstar) bind(C) ! { dg-error "Fortran 2018: Assumed-length character dummy argument 'xstar' at .1. of procedure 'az4' with BIND\\(C\\) attribute" } + character(len=*) :: xstar(*) +end + +! Explicit-size array, nonallocatable/nonpointer + +subroutine ae1 (x1) bind(C) + character(len=1) :: x1(5) +end + +subroutine ae2 (x2) bind(C) ! { dg-error "Fortran 2008: Character dummy argument 'x2' at .1. with length greater than 1 for procedure 'ae2' with BIND\\(C\\) attribute" } + character(len=2) :: x2(7) +end + +subroutine ae3 (xn, n) bind(C) ! { dg-error "Character dummy argument 'xn' at .1. must be of constant length or assumed length, unless it has assumed shape or assumed rank, as procedure 'ae3' has the BIND\\(C\\) attribute" } + integer :: n + character(len=n) :: xn(9) +end + +subroutine ae4 (xstar) bind(C) ! { dg-error "Fortran 2018: Assumed-length character dummy argument 'xstar' at .1. of procedure 'ae4' with BIND\\(C\\) attribute" } + character(len=*) :: xstar(3) +end + +! ALLOCATABLE +! Scalar, allocatable + +subroutine s1a (x1) bind(C) ! { dg-error "Allocatable character dummy argument 'x1' at .1. must have deferred length as procedure 's1a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x1' at .1. with ALLOCATABLE attribute in procedure 's1a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=1), allocatable :: x1 +end + +subroutine s2a (x2) bind(C) ! { dg-error "Allocatable character dummy argument 'x2' at .1. must have deferred length as procedure 's2a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x2' at .1. with ALLOCATABLE attribute in procedure 's2a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=2), allocatable :: x2 +end + +subroutine s3a (xn, n) bind(C) ! { dg-error "Allocatable character dummy argument 'xn' at .1. must have deferred length as procedure 's3a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xn' at .1. with ALLOCATABLE attribute in procedure 's3a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + integer :: n + character(len=n), allocatable :: xn +end + +subroutine s4a (xstar) bind(C) ! { dg-error "Allocatable character dummy argument 'xstar' at .1. must have deferred length as procedure 's4a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xstar' at .1. with ALLOCATABLE attribute in procedure 's4a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=*), allocatable :: xstar +end + +subroutine s5a (xcolon) bind(C) ! { dg-error "Fortran 2018: Deferred-length character dummy argument 'xcolon' at .1. of procedure 's5a' with BIND\\(C\\) attribute" } + ! { dg-error "Fortran 2018: Variable 'xcolon' at .1. with ALLOCATABLE attribute in procedure 's5a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=:), allocatable :: xcolon +end + +! Assumed-shape array, allocatable + +subroutine a1a (x1) bind(C) ! { dg-error "Allocatable character dummy argument 'x1' at .1. must have deferred length as procedure 'a1a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x1' at .1. with ALLOCATABLE attribute in procedure 'a1a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=1), allocatable :: x1(:) +end + +subroutine a2a (x2) bind(C) ! { dg-error "Allocatable character dummy argument 'x2' at .1. must have deferred length as procedure 'a2a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x2' at .1. with ALLOCATABLE attribute in procedure 'a2a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=2), allocatable :: x2(:,:) +end + +subroutine a3a (xn, n) bind(C) ! { dg-error "Allocatable character dummy argument 'xn' at .1. must have deferred length as procedure 'a3a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xn' at .1. with ALLOCATABLE attribute in procedure 'a3a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + integer :: n + character(len=n), allocatable :: xn(:,:,:) +end + +subroutine a4a (xstar) bind(C) ! { dg-error "Allocatable character dummy argument 'xstar' at .1. must have deferred length as procedure 'a4a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xstar' at .1. with ALLOCATABLE attribute in procedure 'a4a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=*), allocatable :: xstar(:,:,:,:) +end + +subroutine a5a (xcolon) bind(C) ! { dg-error "Fortran 2018: Deferred-length character dummy argument 'xcolon' at .1. of procedure 'a5a' with BIND\\(C\\) attribute" } + ! { dg-error "Fortran 2018: Variable 'xcolon' at .1. with ALLOCATABLE attribute in procedure 'a5a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=:), allocatable :: xcolon(:) +end + +! Assumed-rank array, allocatable + +subroutine a1ar (x1) bind(C) ! { dg-error "Symbol 'x1' at .1. has no IMPLICIT type" } + character(len=1), allocatable :: x1(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a2ar (x2) bind(C) ! { dg-error "Symbol 'x2' at .1. has no IMPLICIT type" } + character(len=2), allocatable :: x2(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a3ar (xn, n) bind(C) ! { dg-error "Symbol 'xn' at .1. has no IMPLICIT type" } + integer :: n + character(len=n), allocatable :: xn(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a4ar (xstar) bind(C) ! { dg-error "Symbol 'xstar' at .1. has no IMPLICIT type" } + character(len=*), allocatable :: xstar(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a5ar (xcolon) bind(C) ! { dg-error "Symbol 'xcolon' at .1. has no IMPLICIT type" } + character(len=:), allocatable :: xcolon(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +! POINTER +! Scalar, pointer + +subroutine s1p (x1) bind(C) ! { dg-error "Pointer character dummy argument 'x1' at .1. must have deferred length as procedure 's1p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x1' at .1. with POINTER attribute in procedure 's1p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=1), pointer :: x1 +end + +subroutine s2p (x2) bind(C) ! { dg-error "Pointer character dummy argument 'x2' at .1. must have deferred length as procedure 's2p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x2' at .1. with POINTER attribute in procedure 's2p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=2), pointer :: x2 +end + +subroutine s3p (xn, n) bind(C) ! { dg-error "Pointer character dummy argument 'xn' at .1. must have deferred length as procedure 's3p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xn' at .1. with POINTER attribute in procedure 's3p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + integer :: n + character(len=n), pointer :: xn +end + +subroutine s4p (xstar) bind(C) ! { dg-error "Pointer character dummy argument 'xstar' at .1. must have deferred length as procedure 's4p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xstar' at .1. with POINTER attribute in procedure 's4p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=*), pointer :: xstar +end + +subroutine s5p (xcolon) bind(C) ! { dg-error "Fortran 2018: Deferred-length character dummy argument 'xcolon' at .1. of procedure 's5p' with BIND\\(C\\) attribute" } + ! { dg-error "Fortran 2018: Variable 'xcolon' at .1. with POINTER attribute in procedure 's5p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=:), pointer :: xcolon +end + +! Assumed-shape array, pointer + +subroutine a1p (x1) bind(C) ! { dg-error "Pointer character dummy argument 'x1' at .1. must have deferred length as procedure 'a1p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x1' at .1. with POINTER attribute in procedure 'a1p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=1), pointer :: x1(:) +end + +subroutine a2p (x2) bind(C) ! { dg-error "Pointer character dummy argument 'x2' at .1. must have deferred length as procedure 'a2p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x2' at .1. with POINTER attribute in procedure 'a2p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=2), pointer :: x2(:,:) +end + +subroutine a3p (xn, n) bind(C) ! { dg-error "Pointer character dummy argument 'xn' at .1. must have deferred length as procedure 'a3p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xn' at .1. with POINTER attribute in procedure 'a3p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + integer :: n + character(len=n), pointer :: xn(:,:,:) +end + +subroutine a4p (xstar) bind(C) ! { dg-error "Pointer character dummy argument 'xstar' at .1. must have deferred length as procedure 'a4p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xstar' at .1. with POINTER attribute in procedure 'a4p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=*), pointer :: xstar(:,:,:,:) +end + +subroutine a5p (xcolon) bind(C) ! { dg-error "Fortran 2018: Deferred-length character dummy argument 'xcolon' at .1. of procedure 'a5p' with BIND\\(C\\) attribute" } + ! { dg-error "Fortran 2018: Variable 'xcolon' at .1. with POINTER attribute in procedure 'a5p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=:), pointer :: xcolon(:) +end + +! Assumed-rank array, pointer + +subroutine a1pr (x1) bind(C) ! { dg-error "Symbol 'x1' at .1. has no IMPLICIT type" } + character(len=1), pointer :: x1(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a2pr (x2) bind(C) ! { dg-error "Symbol 'x2' at .1. has no IMPLICIT type" } + character(len=2), pointer :: x2(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a3pr (xn, n) bind(C) ! { dg-error "Symbol 'xn' at .1. has no IMPLICIT type" } + integer :: n + character(len=n), pointer :: xn(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a4pr (xstar) bind(C) ! { dg-error "Symbol 'xstar' at .1. has no IMPLICIT type" } + character(len=*), pointer :: xstar(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a5pr (xcolon) bind(C) ! { dg-error "Symbol 'xcolon' at .1. has no IMPLICIT type" } + character(len=:), pointer :: xcolon(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end diff --git a/gcc/testsuite/gfortran.dg/bind_c_char_7.f90 b/gcc/testsuite/gfortran.dg/bind_c_char_7.f90 new file mode 100644 index 0000000..a9b8c3b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/bind_c_char_7.f90 @@ -0,0 +1,261 @@ +! { dg-do compile } +! { dg-additional-options "-std=f2008 -fimplicit-none" } + +! F2008 permits constant character lengths for dummy arguments + +! Scalar, nonallocatable/nonpointer + +subroutine s1 (x1) bind(C) + character(len=1) :: x1 +end + +subroutine s2 (x2) bind(C) + character(len=2) :: x2 +end + +subroutine s3 (xn, n) bind(C) ! { dg-error "Character dummy argument 'xn' at .1. must be of constant length or assumed length, unless it has assumed shape or assumed rank, as procedure 's3' has the BIND\\(C\\) attribute" } + integer :: n + character(len=n) :: xn +end + +subroutine s4 (xstar) bind(C) ! { dg-error "Fortran 2018: Assumed-length character dummy argument 'xstar' at .1. of procedure 's4' with BIND\\(C\\) attribute" } + character(len=*) :: xstar +end + +! Assumed-shape array, nonallocatable/nonpointer + +subroutine as1 (x1) bind(C) ! { dg-error "Fortran 2018: Assumed-shape array 'x1' at .1. as dummy argument to the BIND\\(C\\) procedure 'as1' at .2." } + character(len=1) :: x1(:) +end + +subroutine as2 (x2) bind(C) ! { dg-error "Fortran 2018: Assumed-shape array 'x2' at .1. as dummy argument to the BIND\\(C\\) procedure 'as2' at .2." } + character(len=2) :: x2(:,:) +end + +subroutine as3 (xn, n) bind(C) ! { dg-error "Fortran 2018: Character dummy argument 'xn' at .1. with nonconstant length as procedure 'as3' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Assumed-shape array 'xn' at .1. as dummy argument to the BIND\\(C\\) procedure 'as3' at .2." "" { target *-*-* } .-1 } + integer :: n + character(len=n) :: xn(:,:,:) +end + +subroutine as4 (xstar) bind(C) ! { dg-error "Fortran 2018: Assumed-length character dummy argument 'xstar' at .1. of procedure 'as4' with BIND\\(C\\) attribute" } + ! { dg-error "Fortran 2018: Assumed-shape array 'xstar' at .1. as dummy argument to the BIND\\(C\\) procedure 'as4' at .2." "" { target *-*-* } .-1 } + character(len=*) :: xstar(:,:,:,:) +end + +! Assumed-rank array, nonallocatable/nonpointer + +subroutine ar1 (x1) bind(C) ! { dg-error "Symbol 'x1' at .1. has no IMPLICIT type" } + character(len=1) :: x1(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine ar2 (x2) bind(C) ! { dg-error "Symbol 'x2' at .1. has no IMPLICIT type" } + character(len=2) :: x2(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine ar3 (xn, n) bind(C) ! { dg-error "Symbol 'xn' at .1. has no IMPLICIT type" } + integer :: n + character(len=n) :: xn(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine ar4 (xstar) bind(C) ! { dg-error "Symbol 'xstar' at .1. has no IMPLICIT type" } + character(len=*) :: xstar(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +! Assumed-size array, nonallocatable/nonpointer + +subroutine az1 (x1) bind(C) + character(len=1) :: x1(*) +end + +subroutine az2 (x2) bind(C) + character(len=2) :: x2(*) +end + +subroutine az3 (xn, n) bind(C) ! { dg-error "Character dummy argument 'xn' at .1. must be of constant length or assumed length, unless it has assumed shape or assumed rank, as procedure 'az3' has the BIND\\(C\\) attribute" } + integer :: n + character(len=n) :: xn(*) +end + +subroutine az4 (xstar) bind(C) ! { dg-error "Fortran 2018: Assumed-length character dummy argument 'xstar' at .1. of procedure 'az4' with BIND\\(C\\) attribute" } + character(len=*) :: xstar(*) +end + +! Explicit-size array, nonallocatable/nonpointer + +subroutine ae1 (x1) bind(C) + character(len=1) :: x1(5) +end + +subroutine ae2 (x2) bind(C) + character(len=2) :: x2(7) +end + +subroutine ae3 (xn, n) bind(C) ! { dg-error "Character dummy argument 'xn' at .1. must be of constant length or assumed length, unless it has assumed shape or assumed rank, as procedure 'ae3' has the BIND\\(C\\) attribute" } + integer :: n + character(len=n) :: xn(9) +end + +subroutine ae4 (xstar) bind(C) ! { dg-error "Fortran 2018: Assumed-length character dummy argument 'xstar' at .1. of procedure 'ae4' with BIND\\(C\\) attribute" } + character(len=*) :: xstar(3) +end + +! ALLOCATABLE +! Scalar, allocatable + +subroutine s1a (x1) bind(C) ! { dg-error "Allocatable character dummy argument 'x1' at .1. must have deferred length as procedure 's1a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x1' at .1. with ALLOCATABLE attribute in procedure 's1a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=1), allocatable :: x1 +end + +subroutine s2a (x2) bind(C) ! { dg-error "Allocatable character dummy argument 'x2' at .1. must have deferred length as procedure 's2a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x2' at .1. with ALLOCATABLE attribute in procedure 's2a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=2), allocatable :: x2 +end + +subroutine s3a (xn, n) bind(C) ! { dg-error "Allocatable character dummy argument 'xn' at .1. must have deferred length as procedure 's3a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xn' at .1. with ALLOCATABLE attribute in procedure 's3a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + integer :: n + character(len=n), allocatable :: xn +end + +subroutine s4a (xstar) bind(C) ! { dg-error "Allocatable character dummy argument 'xstar' at .1. must have deferred length as procedure 's4a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xstar' at .1. with ALLOCATABLE attribute in procedure 's4a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=*), allocatable :: xstar +end + +subroutine s5a (xcolon) bind(C) ! { dg-error "Fortran 2018: Deferred-length character dummy argument 'xcolon' at .1. of procedure 's5a' with BIND\\(C\\) attribute" } + ! { dg-error "Fortran 2018: Variable 'xcolon' at .1. with ALLOCATABLE attribute in procedure 's5a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=:), allocatable :: xcolon +end + +! Assumed-shape array, allocatable + +subroutine a1a (x1) bind(C) ! { dg-error "Allocatable character dummy argument 'x1' at .1. must have deferred length as procedure 'a1a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x1' at .1. with ALLOCATABLE attribute in procedure 'a1a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=1), allocatable :: x1(:) +end + +subroutine a2a (x2) bind(C) ! { dg-error "Allocatable character dummy argument 'x2' at .1. must have deferred length as procedure 'a2a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x2' at .1. with ALLOCATABLE attribute in procedure 'a2a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=2), allocatable :: x2(:,:) +end + +subroutine a3a (xn, n) bind(C) ! { dg-error "Allocatable character dummy argument 'xn' at .1. must have deferred length as procedure 'a3a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xn' at .1. with ALLOCATABLE attribute in procedure 'a3a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + integer :: n + character(len=n), allocatable :: xn(:,:,:) +end + +subroutine a4a (xstar) bind(C) ! { dg-error "Allocatable character dummy argument 'xstar' at .1. must have deferred length as procedure 'a4a' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xstar' at .1. with ALLOCATABLE attribute in procedure 'a4a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=*), allocatable :: xstar(:,:,:,:) +end + +subroutine a5a (xcolon) bind(C) ! { dg-error "Fortran 2018: Deferred-length character dummy argument 'xcolon' at .1. of procedure 'a5a' with BIND\\(C\\) attribute" } + ! { dg-error "Fortran 2018: Variable 'xcolon' at .1. with ALLOCATABLE attribute in procedure 'a5a' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=:), allocatable :: xcolon(:) +end + +! Assumed-rank array, allocatable + +subroutine a1ar (x1) bind(C) ! { dg-error "Symbol 'x1' at .1. has no IMPLICIT type" } + character(len=1), allocatable :: x1(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a2ar (x2) bind(C) ! { dg-error "Symbol 'x2' at .1. has no IMPLICIT type" } + character(len=2), allocatable :: x2(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a3ar (xn, n) bind(C) ! { dg-error "Symbol 'xn' at .1. has no IMPLICIT type" } + integer :: n + character(len=n), allocatable :: xn(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a4ar (xstar) bind(C) ! { dg-error "Symbol 'xstar' at .1. has no IMPLICIT type" } + character(len=*), allocatable :: xstar(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a5ar (xcolon) bind(C) ! { dg-error "Symbol 'xcolon' at .1. has no IMPLICIT type" } + character(len=:), allocatable :: xcolon(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +! POINTER +! Scalar, pointer + +subroutine s1p (x1) bind(C) ! { dg-error "Pointer character dummy argument 'x1' at .1. must have deferred length as procedure 's1p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x1' at .1. with POINTER attribute in procedure 's1p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=1), pointer :: x1 +end + +subroutine s2p (x2) bind(C) ! { dg-error "Pointer character dummy argument 'x2' at .1. must have deferred length as procedure 's2p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x2' at .1. with POINTER attribute in procedure 's2p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=2), pointer :: x2 +end + +subroutine s3p (xn, n) bind(C) ! { dg-error "Pointer character dummy argument 'xn' at .1. must have deferred length as procedure 's3p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xn' at .1. with POINTER attribute in procedure 's3p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + integer :: n + character(len=n), pointer :: xn +end + +subroutine s4p (xstar) bind(C) ! { dg-error "Pointer character dummy argument 'xstar' at .1. must have deferred length as procedure 's4p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xstar' at .1. with POINTER attribute in procedure 's4p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=*), pointer :: xstar +end + +subroutine s5p (xcolon) bind(C) ! { dg-error "Fortran 2018: Deferred-length character dummy argument 'xcolon' at .1. of procedure 's5p' with BIND\\(C\\) attribute" } + ! { dg-error "Fortran 2018: Variable 'xcolon' at .1. with POINTER attribute in procedure 's5p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=:), pointer :: xcolon +end + +! Assumed-shape array, pointer + +subroutine a1p (x1) bind(C) ! { dg-error "Pointer character dummy argument 'x1' at .1. must have deferred length as procedure 'a1p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x1' at .1. with POINTER attribute in procedure 'a1p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=1), pointer :: x1(:) +end + +subroutine a2p (x2) bind(C) ! { dg-error "Pointer character dummy argument 'x2' at .1. must have deferred length as procedure 'a2p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'x2' at .1. with POINTER attribute in procedure 'a2p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=2), pointer :: x2(:,:) +end + +subroutine a3p (xn, n) bind(C) ! { dg-error "Pointer character dummy argument 'xn' at .1. must have deferred length as procedure 'a3p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xn' at .1. with POINTER attribute in procedure 'a3p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + integer :: n + character(len=n), pointer :: xn(:,:,:) +end + +subroutine a4p (xstar) bind(C) ! { dg-error "Pointer character dummy argument 'xstar' at .1. must have deferred length as procedure 'a4p' is BIND\\(C\\)" } + ! { dg-error "Fortran 2018: Variable 'xstar' at .1. with POINTER attribute in procedure 'a4p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=*), pointer :: xstar(:,:,:,:) +end + +subroutine a5p (xcolon) bind(C) ! { dg-error "Fortran 2018: Deferred-length character dummy argument 'xcolon' at .1. of procedure 'a5p' with BIND\\(C\\) attribute" } + ! { dg-error "Fortran 2018: Variable 'xcolon' at .1. with POINTER attribute in procedure 'a5p' with BIND\\(C\\)" "" { target *-*-* } .-1 } + character(len=:), pointer :: xcolon(:) +end + +! Assumed-rank array, pointer + +subroutine a1pr (x1) bind(C) ! { dg-error "Symbol 'x1' at .1. has no IMPLICIT type" } + character(len=1), pointer :: x1(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a2pr (x2) bind(C) ! { dg-error "Symbol 'x2' at .1. has no IMPLICIT type" } + character(len=2), pointer :: x2(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a3pr (xn, n) bind(C) ! { dg-error "Symbol 'xn' at .1. has no IMPLICIT type" } + integer :: n + character(len=n), pointer :: xn(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a4pr (xstar) bind(C) ! { dg-error "Symbol 'xstar' at .1. has no IMPLICIT type" } + character(len=*), pointer :: xstar(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end + +subroutine a5pr (xcolon) bind(C) ! { dg-error "Symbol 'xcolon' at .1. has no IMPLICIT type" } + character(len=:), pointer :: xcolon(..) ! { dg-error "Fortran 2018: Assumed-rank array at .1." } +end diff --git a/gcc/testsuite/gfortran.dg/bind_c_char_8.f90 b/gcc/testsuite/gfortran.dg/bind_c_char_8.f90 new file mode 100644 index 0000000..1d566c0 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/bind_c_char_8.f90 @@ -0,0 +1,249 @@ +! { dg-do compile } +! { dg-additional-options "-fimplicit-none" } + +! F2018 only permittes len=*, len=: or len= as dummy argument +! but not len= +! Additionally, for allocatable/pointer, len=: is required. + +! Scalar, nonallocatable/nonpointer + +subroutine val_s1(x1) bind(C) + character(len=1), value :: x1 +end + +subroutine val_s2(x2) bind(C) ! { dg-error "Character dummy argument 'x2' at .1. must be of length 1 as it has the VALUE attribute" } + character(len=2), value :: x2 +end + +subroutine s1 (x1) bind(C) + character(len=1) :: x1 +end + +subroutine s2 (x2) bind(C) + character(len=2) :: x2 +end + +subroutine s3 (xn, n) bind(C) ! { dg-error "Character dummy argument 'xn' at .1. must be of constant length or assumed length, unless it has assumed shape or assumed rank, as procedure 's3' has the BIND\\(C\\) attribute" } + integer :: n + character(len=n) :: xn +end + +subroutine s4 (xstar) bind(C) ! { dg-error "Sorry, character dummy argument 'xstar' at .1. with assumed length is not yet supported for procedure 's4' with BIND\\(C\\) attribute" } + character(len=*) :: xstar +end + +! Assumed-shape array, nonallocatable/nonpointer + +subroutine as1 (x1) bind(C) + character(len=1) :: x1(:) +end + +subroutine as2 (x2) bind(C) + character(len=2) :: x2(:,:) +end + +subroutine as3 (xn, n) bind(C) + integer :: n + character(len=n) :: xn(:,:,:) +end + +subroutine as4 (xstar) bind(C) + character(len=*) :: xstar(:,:,:,:) +end + +! Assumed-rank array, nonallocatable/nonpointer + +subroutine ar1 (x1) bind(C) + character(len=1) :: x1(..) +end + +subroutine ar2 (x2) bind(C) + character(len=2) :: x2(..) +end + +subroutine ar3 (xn, n) bind(C) + integer :: n + character(len=n) :: xn(..) +end + +subroutine ar4 (xstar) bind(C) + character(len=*) :: xstar(..) +end + +! Assumed-size array, nonallocatable/nonpointer + +subroutine az1 (x1) bind(C) + character(len=1) :: x1(*) +end + +subroutine az2 (x2) bind(C) + character(len=2) :: x2(*) +end + +subroutine az3 (xn, n) bind(C) ! { dg-error "Character dummy argument 'xn' at .1. must be of constant length or assumed length, unless it has assumed shape or assumed rank, as procedure 'az3' has the BIND\\(C\\) attribute" } + integer :: n + character(len=n) :: xn(*) +end + +subroutine az4 (xstar) bind(C) ! { dg-error "Sorry, character dummy argument 'xstar' at .1. with assumed length is not yet supported for procedure 'az4' with BIND\\(C\\) attribute" } + character(len=*) :: xstar(*) +end + +! Explicit-size array, nonallocatable/nonpointer + +subroutine ae1 (x1) bind(C) + character(len=1) :: x1(5) +end + +subroutine ae2 (x2) bind(C) + character(len=2) :: x2(7) +end + +subroutine ae3 (xn, n) bind(C) ! { dg-error "Character dummy argument 'xn' at .1. must be of constant length or assumed length, unless it has assumed shape or assumed rank, as procedure 'ae3' has the BIND\\(C\\) attribute" } + integer :: n + character(len=n) :: xn(9) +end + +subroutine ae4 (xstar) bind(C) ! { dg-error "Sorry, character dummy argument 'xstar' at .1. with assumed length is not yet supported for procedure 'ae4' with BIND\\(C\\) attribute" } + character(len=*) :: xstar(3) +end + +! ALLOCATABLE +! Scalar, allocatable + +subroutine s1a (x1) bind(C) ! { dg-error "Allocatable character dummy argument 'x1' at .1. must have deferred length as procedure 's1a' is BIND\\(C\\)" } + character(len=1), allocatable :: x1 +end + +subroutine s2a (x2) bind(C) ! { dg-error "Allocatable character dummy argument 'x2' at .1. must have deferred length as procedure 's2a' is BIND\\(C\\)" } + character(len=2), allocatable :: x2 +end + +subroutine s3a (xn, n) bind(C) ! { dg-error "Allocatable character dummy argument 'xn' at .1. must have deferred length as procedure 's3a' is BIND\\(C\\)" } + integer :: n + character(len=n), allocatable :: xn +end + +subroutine s4a (xstar) bind(C) ! { dg-error "Allocatable character dummy argument 'xstar' at .1. must have deferred length as procedure 's4a' is BIND\\(C\\)" } + character(len=*), allocatable :: xstar +end + +subroutine s5a (xcolon) bind(C) ! { dg-error "Sorry, deferred-length scalar character dummy argument 'xcolon' at .1. of procedure 's5a' with BIND\\(C\\) not yet supported" } + character(len=:), allocatable :: xcolon +end + +! Assumed-shape array, allocatable + +subroutine a1a (x1) bind(C) ! { dg-error "Allocatable character dummy argument 'x1' at .1. must have deferred length as procedure 'a1a' is BIND\\(C\\)" } + character(len=1), allocatable :: x1(:) +end + +subroutine a2a (x2) bind(C) ! { dg-error "Allocatable character dummy argument 'x2' at .1. must have deferred length as procedure 'a2a' is BIND\\(C\\)" } + character(len=2), allocatable :: x2(:,:) +end + +subroutine a3a (xn, n) bind(C) ! { dg-error "Allocatable character dummy argument 'xn' at .1. must have deferred length as procedure 'a3a' is BIND\\(C\\)" } + integer :: n + character(len=n), allocatable :: xn(:,:,:) +end + +subroutine a4a (xstar) bind(C) ! { dg-error "Allocatable character dummy argument 'xstar' at .1. must have deferred length as procedure 'a4a' is BIND\\(C\\)" } + character(len=*), allocatable :: xstar(:,:,:,:) +end + +subroutine a5a (xcolon) bind(C) + character(len=:), allocatable :: xcolon(:) +end + +! Assumed-rank array, allocatable + +subroutine a1ar (x1) bind(C) ! { dg-error "Allocatable character dummy argument 'x1' at .1. must have deferred length as procedure 'a1ar' is BIND\\(C\\)" } + character(len=1), allocatable :: x1(..) +end + +subroutine a2ar (x2) bind(C) ! { dg-error "Allocatable character dummy argument 'x2' at .1. must have deferred length as procedure 'a2ar' is BIND\\(C\\)" } + character(len=2), allocatable :: x2(..) +end + +subroutine a3ar (xn, n) bind(C) ! { dg-error "Allocatable character dummy argument 'xn' at .1. must have deferred length as procedure 'a3ar' is BIND\\(C\\)" } + integer :: n + character(len=n), allocatable :: xn(..) +end + +subroutine a4ar (xstar) bind(C) ! { dg-error "Allocatable character dummy argument 'xstar' at .1. must have deferred length as procedure 'a4ar' is BIND\\(C\\)" } + character(len=*), allocatable :: xstar(..) +end + +subroutine a5ar (xcolon) bind(C) + character(len=:), allocatable :: xcolon(..) +end + +! POINTER +! Scalar, pointer + +subroutine s1p (x1) bind(C) ! { dg-error "Pointer character dummy argument 'x1' at .1. must have deferred length as procedure 's1p' is BIND\\(C\\)" } + character(len=1), pointer :: x1 +end + +subroutine s2p (x2) bind(C) ! { dg-error "Pointer character dummy argument 'x2' at .1. must have deferred length as procedure 's2p' is BIND\\(C\\)" } + character(len=2), pointer :: x2 +end + +subroutine s3p (xn, n) bind(C) ! { dg-error "Pointer character dummy argument 'xn' at .1. must have deferred length as procedure 's3p' is BIND\\(C\\)" } + integer :: n + character(len=n), pointer :: xn +end + +subroutine s4p (xstar) bind(C) ! { dg-error "Pointer character dummy argument 'xstar' at .1. must have deferred length as procedure 's4p' is BIND\\(C\\)" } + character(len=*), pointer :: xstar +end + +subroutine s5p (xcolon) bind(C) ! { dg-error "Sorry, deferred-length scalar character dummy argument 'xcolon' at .1. of procedure 's5p' with BIND\\(C\\) not yet supported" } + character(len=:), pointer :: xcolon +end + +! Assumed-shape array, pointer + +subroutine a1p (x1) bind(C) ! { dg-error "Pointer character dummy argument 'x1' at .1. must have deferred length as procedure 'a1p' is BIND\\(C\\)" } + character(len=1), pointer :: x1(:) +end + +subroutine a2p (x2) bind(C) ! { dg-error "Pointer character dummy argument 'x2' at .1. must have deferred length as procedure 'a2p' is BIND\\(C\\)" } + character(len=2), pointer :: x2(:,:) +end + +subroutine a3p (xn, n) bind(C) ! { dg-error "Pointer character dummy argument 'xn' at .1. must have deferred length as procedure 'a3p' is BIND\\(C\\)" } + integer :: n + character(len=n), pointer :: xn(:,:,:) +end + +subroutine a4p (xstar) bind(C) ! { dg-error "Pointer character dummy argument 'xstar' at .1. must have deferred length as procedure 'a4p' is BIND\\(C\\)" } + character(len=*), pointer :: xstar(:,:,:,:) +end + +subroutine a5p (xcolon) bind(C) + character(len=:), pointer :: xcolon(:) +end + +! Assumed-rank array, pointer + +subroutine a1pr (x1) bind(C) ! { dg-error "Pointer character dummy argument 'x1' at .1. must have deferred length as procedure 'a1pr' is BIND\\(C\\)" } + character(len=1), pointer :: x1(..) +end + +subroutine a2pr (x2) bind(C) ! { dg-error "Pointer character dummy argument 'x2' at .1. must have deferred length as procedure 'a2pr' is BIND\\(C\\)" } + character(len=2), pointer :: x2(..) +end + +subroutine a3pr (xn, n) bind(C) ! { dg-error "Pointer character dummy argument 'xn' at .1. must have deferred length as procedure 'a3pr' is BIND\\(C\\)" } + integer :: n + character(len=n), pointer :: xn(..) +end + +subroutine a4pr (xstar) bind(C) ! { dg-error "Pointer character dummy argument 'xstar' at .1. must have deferred length as procedure 'a4pr' is BIND\\(C\\)" } + character(len=*), pointer :: xstar(..) +end + +subroutine a5pr (xcolon) bind(C) + character(len=:), pointer :: xcolon(..) +end diff --git a/gcc/testsuite/gfortran.dg/bind_c_char_9.f90 b/gcc/testsuite/gfortran.dg/bind_c_char_9.f90 new file mode 100644 index 0000000..d31862c --- /dev/null +++ b/gcc/testsuite/gfortran.dg/bind_c_char_9.f90 @@ -0,0 +1,188 @@ +! { dg-do run } +! { dg-additional-options "-fdump-tree-original" } + +! F2018 - examples without array descriptor + + +module m + use iso_c_binding, only: c_char + implicit none (type, external) + +contains + +! Scalar, nonallocatable/nonpointer +subroutine s1 (x1) bind(C) + character(kind=c_char, len=1) :: x1 + if (len (x1) /= 1) stop + if (x1 /= 'Z') stop + x1 = 'A' +end + +subroutine s2 (x2) bind(C) + character(kind=c_char, len=2) :: x2 + if (len (x2) /= 2) stop + if (x2 /= '42') stop + x2 = '64' +end + +! Assumed-size array, nonallocatable/nonpointer + +subroutine az1 (x1) bind(C) + character(kind=c_char, len=1) :: x1(*) + if (len(x1) /= 1) stop + if (any (x1(:6) /= ['g', & + 'd', & + 'f', & + 's', & + '3', & + '5'])) stop 1 + x1(:6) = ['1', & + 'h', & + 'f', & + '3', & + '4', & + 'h'] +end + +subroutine az2 (x2) bind(C) + character(kind=c_char, len=2) :: x2(*) + if (len(x2) /= 2) stop + if (any (x2(:6) /= ['ab', & + 'fd', & + 'D4', & + '54', & + 'ga', & + 'hg'])) stop + x2(:6) = ['ab', & + 'hd', & + 'fj', & + 'a4', & + '4a', & + 'hf'] +end + +! Explicit-size array, nonallocatable/nonpointer + +subroutine ae1 (x1) bind(C) + character(kind=c_char, len=1) :: x1(6) + if (size(x1) /= 6) stop + if (len(x1) /= 1) stop + if (any (x1 /= ['g', & + 'd', & + 'f', & + 's', & + '3', & + '5'])) stop 1 + x1 = ['1', & + 'h', & + 'f', & + '3', & + '4', & + 'h'] +end + +subroutine ae2 (x2) bind(C) + character(kind=c_char, len=2) :: x2(6) + if (size(x2) /= 6) stop + if (len(x2) /= 2) stop + if (any (x2 /= ['ab', & + 'fd', & + 'D4', & + '54', & + 'ga', & + 'hg'])) stop + x2 = ['ab', & + 'hd', & + 'fj', & + 'a4', & + '4a', & + 'hf'] +end + +end module m + +program main + use m + implicit none (type, external) + character(kind=c_char, len=1) :: str1 + character(kind=c_char, len=2) :: str2 + + character(kind=c_char, len=1) :: str1a6(6) + character(kind=c_char, len=2) :: str2a6(6) + + ! Scalar - no array descriptor + + str1 = 'Z' + call s1 (str1) + if (str1 /= 'A') stop + + str2 = '42' + call s2 (str2) + if (str2 /= '64') stop + + ! assumed size - without array descriptor + + str1a6 = ['g', & + 'd', & + 'f', & + 's', & + '3', & + '5'] + call az1 (str1a6) + if (any (str1a6 /= ['1', & + 'h', & + 'f', & + '3', & + '4', & + 'h'])) stop + str2a6 = ['ab', & + 'fd', & + 'D4', & + '54', & + 'ga', & + 'hg'] + call az2 (str2a6) + if (any (str2a6 /= ['ab', & + 'hd', & + 'fj', & + 'a4', & + '4a', & + 'hf'])) stop + ! explicit size - without array descriptor + + str1a6 = ['g', & + 'd', & + 'f', & + 's', & + '3', & + '5'] + call ae1 (str1a6) + if (any (str1a6 /= ['1', & + 'h', & + 'f', & + '3', & + '4', & + 'h'])) stop + str2a6 = ['ab', & + 'fd', & + 'D4', & + '54', & + 'ga', & + 'hg'] + call ae2 (str2a6) + if (any (str2a6 /= ['ab', & + 'hd', & + 'fj', & + 'a4', & + '4a', & + 'hf'])) stop +end + +! All argument shall be passed without descriptor +! { dg-final { scan-tree-dump-not "dtype" "original" } } +! { dg-final { scan-tree-dump-times "void s1 \\(character\\(kind=1\\)\\\[1:1\\\] & restrict x1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void s2 \\(character\\(kind=1\\)\\\[1:2\\\] & restrict x2\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void az1 \\(character\\(kind=1\\)\\\[0:\\\]\\\[1:1\\\] \\* restrict x1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void az2 \\(character\\(kind=1\\)\\\[0:\\\]\\\[1:2\\\] \\* restrict x2\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void ae1 \\(character\\(kind=1\\)\\\[6\\\]\\\[1:1\\\] \\* restrict x1\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "void ae2 \\(character\\(kind=1\\)\\\[6\\\]\\\[1:2\\\] \\* restrict x2\\)" 1 "original" } } diff --git a/gcc/testsuite/gfortran.dg/iso_c_binding_char_1.f90 b/gcc/testsuite/gfortran.dg/iso_c_binding_char_1.f90 index ebf9a24..abe5cb7 100644 --- a/gcc/testsuite/gfortran.dg/iso_c_binding_char_1.f90 +++ b/gcc/testsuite/gfortran.dg/iso_c_binding_char_1.f90 @@ -4,7 +4,7 @@ ! ! Contributed by Thomas Koenig ! -subroutine bar(c,d) BIND(C) ! { dg-error "must be length 1" } +subroutine bar(c,d) BIND(C) ! { dg-error "character dummy argument 'c' at .1. with assumed length is not yet supported for procedure 'bar' with BIND\\(C\\) attribute" } character (len=*) c character (len=2) d end diff --git a/gcc/testsuite/gfortran.dg/pr32599.f03 b/gcc/testsuite/gfortran.dg/pr32599.f03 index 297b75a..bf9bd8c 100644 --- a/gcc/testsuite/gfortran.dg/pr32599.f03 +++ b/gcc/testsuite/gfortran.dg/pr32599.f03 @@ -1,20 +1,20 @@ ! { dg-do compile } -! { dg-options "-std=f2008" } +! { dg-options "-std=f2003" } ! ! PR fortran/32599 ! Verifies that character string arguments to a bind(c) procedure have length -! 1, or no len is specified. Note that the C interop extensions in F2018 allow +! 1, or no len is specified. Note that the C interop extensions in F2008 allow ! string arguments of length greater than one to be passed to a C descriptor. ! module pr32599 interface - subroutine destroy(path) BIND(C) ! { dg-error "must be length 1" } + subroutine destroy(path) BIND(C) ! { dg-error "Fortran 2018: Assumed-length character dummy argument 'path' at .1. of procedure .destroy. with BIND\\(C\\) attribute" } use iso_c_binding implicit none character(len=*,kind=c_char), intent(IN) :: path end subroutine destroy - subroutine create(path) BIND(C) ! { dg-error "must be length 1" } + subroutine create(path) BIND(C) ! { dg-error "Fortran 2008: Character dummy argument 'path' at .1. with length greater than 1 for procedure 'create' with BIND\\(C\\) attribute" } use iso_c_binding implicit none character(len=5,kind=c_char), intent(IN) :: path -- cgit v1.1 From aea199f96cf116ba4c81426207acde371556610c Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 21 Jul 2021 09:38:59 +0200 Subject: c++: Ensure OpenMP reduction with reference type references complete type [PR101516] The following testcase ICEs because we haven't verified if reduction decl has reference type that TREE_TYPE of the reference is a complete type, require_complete_type on the decl doesn't ensure that. 2021-07-21 Jakub Jelinek PR c++/101516 * semantics.c (finish_omp_reduction_clause): Also call complete_type_or_else and return true if it fails. * g++.dg/gomp/pr101516.C: New test. --- gcc/cp/semantics.c | 3 ++- gcc/testsuite/g++.dg/gomp/pr101516.C | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/gomp/pr101516.C (limited to 'gcc') diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index b97dc1f..331daf8 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -6070,7 +6070,8 @@ finish_omp_reduction_clause (tree c, bool *need_default_ctor, bool *need_dtor) if (!processing_template_decl) { t = require_complete_type (t); - if (t == error_mark_node) + if (t == error_mark_node + || !complete_type_or_else (oatype, NULL_TREE)) return true; tree size = size_binop (EXACT_DIV_EXPR, TYPE_SIZE_UNIT (oatype), TYPE_SIZE_UNIT (type)); diff --git a/gcc/testsuite/g++.dg/gomp/pr101516.C b/gcc/testsuite/g++.dg/gomp/pr101516.C new file mode 100644 index 0000000..48f60de --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/pr101516.C @@ -0,0 +1,8 @@ +// PR c++/101516 + +void +foo (int (&v) []) +{ + #pragma omp parallel reduction (+:v) // { dg-error "invalid use of array with unspecified bounds" } + ; +} -- cgit v1.1 From b136b7a78774107943fe94051c42b5a968a3ad3f Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 21 Jul 2021 09:45:02 +0200 Subject: openmp: Fix up omp_check_private [PR101535] The target data construct shouldn't affect omp_check_private, unless the decl there is privatized (use_device_* clauses). The routine had some code for that, but it just did continue; in a loop that looped only if the region type is one of selected 4 kinds, so effectively resulted in return false; instead of looping again. And not diagnosing lastprivate (or reduction etc.) on a variable that is private to containing parallel results in ICEs later on, as there is no original list item to which store the last result. The target construct is unclear as it has an implicit parallel region and it is not obvious if the data privatization clauses on the construct shall be treated as data privatization on the implicit parallel or just on the target. For now treat those as privatization on the implicit parallel, but treat map clauses as shared on the implicit parallel. 2021-07-21 Jakub Jelinek PR middle-end/101535 * gimplify.c (omp_check_private): Properly skip ORT_TARGET_DATA contexts in which decl isn't privatized and for ORT_TARGET return false if decl is mapped. * c-c++-common/gomp/pr101535-1.c: New test. * c-c++-common/gomp/pr101535-2.c: New test. --- gcc/gimplify.c | 21 ++++++++++++++----- gcc/testsuite/c-c++-common/gomp/pr101535-1.c | 31 ++++++++++++++++++++++++++++ gcc/testsuite/c-c++-common/gomp/pr101535-2.c | 11 ++++++++++ 3 files changed, 58 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/gomp/pr101535-1.c create mode 100644 gcc/testsuite/c-c++-common/gomp/pr101535-2.c (limited to 'gcc') diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 93a2121..5d43f76 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -7798,7 +7798,13 @@ omp_check_private (struct gimplify_omp_ctx *ctx, tree decl, bool copyprivate) if ((ctx->region_type & (ORT_TARGET | ORT_TARGET_DATA)) != 0 && (n == NULL || (n->value & GOVD_DATA_SHARE_CLASS) == 0)) - continue; + { + if ((ctx->region_type & ORT_TARGET_DATA) != 0 + || n == NULL + || (n->value & GOVD_MAP) == 0) + continue; + return false; + } if (n != NULL) { @@ -7807,11 +7813,16 @@ omp_check_private (struct gimplify_omp_ctx *ctx, tree decl, bool copyprivate) return false; return (n->value & GOVD_SHARED) == 0; } + + if (ctx->region_type == ORT_WORKSHARE + || ctx->region_type == ORT_TASKGROUP + || ctx->region_type == ORT_SIMD + || ctx->region_type == ORT_ACC) + continue; + + break; } - while (ctx->region_type == ORT_WORKSHARE - || ctx->region_type == ORT_TASKGROUP - || ctx->region_type == ORT_SIMD - || ctx->region_type == ORT_ACC); + while (1); return false; } diff --git a/gcc/testsuite/c-c++-common/gomp/pr101535-1.c b/gcc/testsuite/c-c++-common/gomp/pr101535-1.c new file mode 100644 index 0000000..8285ce0 --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/pr101535-1.c @@ -0,0 +1,31 @@ +/* PR middle-end/101535 */ + +void +foo (void) +{ + int a = 1, i; + #pragma omp target data map(to:a) + #pragma omp for lastprivate(i) /* { dg-error "lastprivate variable 'i' is private in outer context" } */ + for (i = 1; i < 2; i++) + ; +} + +void +bar (void) +{ + int a = 1, i; + #pragma omp target private(i) + #pragma omp for lastprivate(i) /* { dg-error "lastprivate variable 'i' is private in outer context" } */ + for (i = 1; i < 2; i++) + ; +} + +void +baz (void) +{ + int a = 1, i; + #pragma omp target firstprivate(i) + #pragma omp for lastprivate(i) /* { dg-error "lastprivate variable 'i' is private in outer context" } */ + for (i = 1; i < 2; i++) + ; +} diff --git a/gcc/testsuite/c-c++-common/gomp/pr101535-2.c b/gcc/testsuite/c-c++-common/gomp/pr101535-2.c new file mode 100644 index 0000000..23c84af --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/pr101535-2.c @@ -0,0 +1,11 @@ +/* PR middle-end/101535 */ + +void +foo (void) +{ + int a = 1, i; + #pragma omp target map(tofrom:i) + #pragma omp for lastprivate(i) + for (i = 1; i < 2; i++) + ; +} -- cgit v1.1 From e0335bb7d1fc7dd05e91bcdd1f65b2bcf8ec1a09 Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Wed, 21 Jul 2021 16:31:10 +0800 Subject: Revert "RISC-V: Detect python and pick best one for calling multilib-generator" This reverts commit e695f0101a8cacbc29353c5a000731e50b2627e6. --- gcc/config.gcc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/config.gcc b/gcc/config.gcc index 3df9b52..93e2b32 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -4730,10 +4730,9 @@ case "${target}" in echo "--with-multilib-list= can't used with --with-multilib-generator= at same time" 1>&2 exit 1 fi - PYTHON=`which python || which python3 || which python2` case "${target}" in riscv*-*-elf*) - if ${PYTHON} ${srcdir}/config/riscv/multilib-generator \ + if ${srcdir}/config/riscv/multilib-generator \ `echo ${with_multilib_generator} | sed 's/;/ /g'`\ > t-multilib-config; then -- cgit v1.1 From f8884b9c51faea329196bf5914bcd2d700622c38 Mon Sep 17 00:00:00 2001 From: Sebastian Huber Date: Wed, 14 Jul 2021 09:40:11 +0200 Subject: gcov: Fix use of profile info section If the -fprofile-info-section is used, then the gcov information is registered in a linker set. This is done by build_gcov_info_var_registration(). The compiler generated object placed in the section was not marked as referenced, so once optimization was enabled, this object was optimized away. Mark it as referenced. gcc/ * coverage.c (build_gcov_info_var_registration): Mark the object placed in the linker set as referenced so that it does not get optimized away. --- gcc/coverage.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'gcc') diff --git a/gcc/coverage.c b/gcc/coverage.c index dfc8108..ac9a9fd 100644 --- a/gcc/coverage.c +++ b/gcc/coverage.c @@ -50,6 +50,7 @@ along with GCC; see the file COPYING3. If not see #include "auto-profile.h" #include "profile.h" #include "diagnostic.h" +#include "varasm.h" #include "gcov-io.c" @@ -1121,6 +1122,7 @@ build_gcov_info_var_registration (tree gcov_info_type) DECL_NAME (var) = get_identifier (name_buf); get_section (profile_info_section, SECTION_UNNAMED, NULL); set_decl_section_name (var, profile_info_section); + mark_decl_referenced (var); DECL_INITIAL (var) = build_fold_addr_expr (gcov_info_var); varpool_node::finalize_decl (var); } -- cgit v1.1 From c2b15fe27e6a0e42b108111d51acce69628593b4 Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Wed, 21 Jul 2021 18:54:00 +0200 Subject: Fortran: ICE, OOM while calculating sizes of derived type array components gcc/fortran/ChangeLog: PR fortran/101514 * target-memory.c (gfc_interpret_derived): Size of array component of derived type can only be computed here for explicit shape. * trans-types.c (gfc_get_nodesc_array_type): Do not dereference NULL pointers. gcc/testsuite/ChangeLog: PR fortran/101514 * gfortran.dg/pr101514.f90: New test. --- gcc/fortran/target-memory.c | 3 +++ gcc/fortran/trans-types.c | 4 ++-- gcc/testsuite/gfortran.dg/pr101514.f90 | 35 ++++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/pr101514.f90 (limited to 'gcc') diff --git a/gcc/fortran/target-memory.c b/gcc/fortran/target-memory.c index cfa8402..7b21a9e 100644 --- a/gcc/fortran/target-memory.c +++ b/gcc/fortran/target-memory.c @@ -534,6 +534,9 @@ gfc_interpret_derived (unsigned char *buffer, size_t buffer_size, gfc_expr *resu { int n; + if (cmp->as->type != AS_EXPLICIT) + return 0; + e->expr_type = EXPR_ARRAY; e->rank = cmp->as->rank; diff --git a/gcc/fortran/trans-types.c b/gcc/fortran/trans-types.c index d715838a..50fda43 100644 --- a/gcc/fortran/trans-types.c +++ b/gcc/fortran/trans-types.c @@ -1644,7 +1644,7 @@ gfc_get_nodesc_array_type (tree etype, gfc_array_spec * as, gfc_packed packed, GFC_TYPE_ARRAY_STRIDE (type, n) = tmp; expr = as->lower[n]; - if (expr->expr_type == EXPR_CONSTANT) + if (expr && expr->expr_type == EXPR_CONSTANT) { tmp = gfc_conv_mpz_to_tree (expr->value.integer, gfc_index_integer_kind); @@ -1694,7 +1694,7 @@ gfc_get_nodesc_array_type (tree etype, gfc_array_spec * as, gfc_packed packed, for (n = as->rank; n < as->rank + as->corank; n++) { expr = as->lower[n]; - if (expr->expr_type == EXPR_CONSTANT) + if (expr && expr->expr_type == EXPR_CONSTANT) tmp = gfc_conv_mpz_to_tree (expr->value.integer, gfc_index_integer_kind); else diff --git a/gcc/testsuite/gfortran.dg/pr101514.f90 b/gcc/testsuite/gfortran.dg/pr101514.f90 new file mode 100644 index 0000000..51fbf8a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr101514.f90 @@ -0,0 +1,35 @@ +! { dg-do compile } +! PR fortran/101514 - ICE: out of memory allocating ... bytes + +subroutine s + type t1 + integer :: a(..) ! { dg-error "must have an explicit shape" } + end type + type t2 + integer :: a(*) ! { dg-error "must have an explicit shape" } + end type + type t3 + integer :: a(:) ! { dg-error "must have an explicit shape" } + end type + type t4 + integer :: a(0:) ! { dg-error "must have an explicit shape" } + end type + type t5 + integer, allocatable :: a(:) + end type + type t6 + integer, pointer :: a(:) + end type + type(t1) :: a1 + type(t2) :: a2 + type(t3) :: a3 + type(t4) :: a4 + type(t5) :: a5 + type(t6) :: a6 + a1 = transfer(1, a1) + a2 = transfer(1, a2) + a3 = transfer(1, a3) + a4 = transfer(1, a4) + a5 = transfer(1, a5) + a6 = transfer(1, a6) +end -- cgit v1.1 From 7aa28dbc371cf3c09c05c68672b00d9006391595 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 21 Jul 2021 05:15:55 -0700 Subject: x86: Remove OPTION_MASK_ISA_SSE4_2 from CRC32 _builtin functions Since commit 39671f87b2df6a1894cc11a161e4a7949d1ddccd Author: H.J. Lu Date: Thu Apr 15 05:59:48 2021 -0700 x86: Use crc32 target option for CRC32 intrinsics enabled OPTION_MASK_ISA_CRC32 for -msse4 and removed TARGET_SSE4_2 check in sse4_2_crc32 pattens, remove OPTION_MASK_ISA_SSE4_2 from CRC32 _builtin functions. gcc/ PR target/101549 * config/i386/i386-builtin.def: Remove OPTION_MASK_ISA_SSE4_2 from CRC32 _builtin functions. gcc/testsuite/ PR target/101549 * gcc.target/i386/crc32-6.c: New test. --- gcc/config/i386/i386-builtin.def | 8 ++++---- gcc/testsuite/gcc.target/i386/crc32-6.c | 13 +++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/crc32-6.c (limited to 'gcc') diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 1cc0cc6..4b1ae0e 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -970,10 +970,10 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_pte /* SSE4.2 */ BDESC (OPTION_MASK_ISA_SSE4_2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI) -BDESC (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, 0, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR) -BDESC (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, 0, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT) -BDESC (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, 0, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT) -BDESC (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64) +BDESC (OPTION_MASK_ISA_CRC32, 0, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR) +BDESC (OPTION_MASK_ISA_CRC32, 0, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT) +BDESC (OPTION_MASK_ISA_CRC32, 0, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT) +BDESC (OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64) /* SSE4A */ BDESC (OPTION_MASK_ISA_SSE4A, 0, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT) diff --git a/gcc/testsuite/gcc.target/i386/crc32-6.c b/gcc/testsuite/gcc.target/i386/crc32-6.c new file mode 100644 index 0000000..464e344 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/crc32-6.c @@ -0,0 +1,13 @@ +/* PR target/101549 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4 -mno-crc32" } */ + +#include + +unsigned int +test_mm_crc32_u8 (unsigned int CRC, unsigned char V) +{ + return _mm_crc32_u8 (CRC, V); +} + +/* { dg-error "needs isa option -mcrc32" "" { target *-*-* } 0 } */ -- cgit v1.1 From 005054e48e2d6c4d9b0aac7fda2e4a324886307c Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 20 Jul 2021 18:25:53 -0400 Subject: rs6000: Main function with stubs for parsing and output 2021-07-20 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (rbtree.h): New #include. (num_bifs): New variable. (num_ovld_stanzas): Likewise. (num_ovlds): Likewise. (parse_codes): New enum. (bif_rbt): New variable. (ovld_rbt): Likewise. (fntype_rbt): Likewise. (bifo_rbt): Likewise. (parse_bif): New stub function. (create_bif_order): Likewise. (parse_ovld): Likewise. (write_header_file): Likewise. (write_init_file): Likewise. (write_defines_file): Likewise. (delete_output_files): New function. (main): Likewise. --- gcc/config/rs6000/rs6000-gen-builtins.c | 215 ++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index 34566fc..08aa76b 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -163,6 +163,7 @@ along with GCC; see the file COPYING3. If not see #include #include #include +#include "rbtree.h" /* Input and output file descriptors and pathnames. */ static FILE *bif_file; @@ -249,6 +250,29 @@ struct typeinfo char *val2; }; +static int num_bifs; +static int num_ovld_stanzas; +static int num_ovlds; + +/* Return codes for parsing routines. */ +enum parse_codes +{ + PC_OK, + PC_EOFILE, + PC_EOSTANZA, + PC_PARSEFAIL +}; + +/* The red-black trees for built-in function identifiers, built-in + overload identifiers, and function type descriptors. */ +static rbt_strings bif_rbt; +static rbt_strings ovld_rbt; +static rbt_strings fntype_rbt; + +/* Another red-black tree containing a mapping from built-in function + identifiers to the order in which they were encountered. */ +static rbt_strings bifo_rbt; + /* Pointer to a diagnostic function. */ static void (*diag) (const char *, ...) __attribute__ ((format (printf, 1, 2))); @@ -864,3 +888,194 @@ match_type (typeinfo *typedata, int voidok) return 1; } + +/* Parse the built-in file. */ +static parse_codes +parse_bif (void) +{ + return PC_OK; +} + +/* Create a mapping from function IDs in their final order to the order + they appear in the built-in function file. */ +static void +create_bif_order (void) +{ +} + +/* Parse the overload file. */ +static parse_codes +parse_ovld (void) +{ + return PC_OK; +} + +/* Write everything to the header file (rs6000-builtins.h). Return + 1 if successful, 0 otherwise. */ +static int +write_header_file (void) +{ + return 1; +} + +/* Write everything to the initialization file (rs6000-builtins.c). + Return 1 if successful, 0 otherwise. */ +static int +write_init_file (void) +{ + return 1; +} + +/* Write everything to the include file (rs6000-vecdefines.h). + Return 1 if successful, 0 otherwise. */ +static int +write_defines_file (void) +{ + return 1; +} + +/* Close and delete output files after any failure, so that subsequent + build dependencies will fail. */ +static void +delete_output_files (void) +{ + /* Depending on whence we're called, some of these may already be + closed. Don't check for errors. */ + fclose (header_file); + fclose (init_file); + fclose (defines_file); + + remove (header_path); + remove (init_path); + remove (defines_path); +} + +/* Main program to convert flat files into built-in initialization code. */ +int +main (int argc, const char **argv) +{ + if (argc != 6) + { + fprintf (stderr, + "Five arguments required: two input files and three output " + "files.\n"); + exit (1); + } + + pgm_path = argv[0]; + bif_path = argv[1]; + ovld_path = argv[2]; + header_path = argv[3]; + init_path = argv[4]; + defines_path = argv[5]; + + bif_file = fopen (bif_path, "r"); + if (!bif_file) + { + fprintf (stderr, "Cannot open input built-in file '%s'.\n", bif_path); + exit (1); + } + ovld_file = fopen (ovld_path, "r"); + if (!ovld_file) + { + fprintf (stderr, "Cannot open input overload file '%s'.\n", ovld_path); + exit (1); + } + header_file = fopen (header_path, "w"); + if (!header_file) + { + fprintf (stderr, "Cannot open header file '%s' for output.\n", + header_path); + exit (1); + } + init_file = fopen (init_path, "w"); + if (!init_file) + { + fprintf (stderr, "Cannot open init file '%s' for output.\n", init_path); + exit (1); + } + defines_file = fopen (defines_path, "w"); + if (!defines_file) + { + fprintf (stderr, "Cannot open defines file '%s' for output.\n", + defines_path); + exit (1); + } + + /* Initialize the balanced trees containing built-in function ids, + overload function ids, and function type declaration ids. */ + rbt_new (&bif_rbt); + rbt_new (&ovld_rbt); + rbt_new (&fntype_rbt); + + /* Initialize another balanced tree that contains a map from built-in + function ids to the order in which they were encountered. */ + rbt_new (&bifo_rbt); + + /* Parse the built-in function file. */ + num_bifs = 0; + line = 0; + if (parse_bif () == PC_PARSEFAIL) + { + fprintf (stderr, "Parsing of '%s' failed, aborting.\n", bif_path); + delete_output_files (); + exit (1); + } + fclose (bif_file); + + /* Create a mapping from function IDs in their final order to + the order they appear in the built-in function file. */ + create_bif_order (); + +#ifdef DEBUG + fprintf (stderr, "\nFunction ID list:\n"); + rbt_dump (&bif_rbt, bif_rbt.rbt_root); + fprintf (stderr, "\n"); +#endif + + /* Parse the overload file. */ + num_ovld_stanzas = 0; + num_ovlds = 0; + line = 0; + if (parse_ovld () == PC_PARSEFAIL) + { + fprintf (stderr, "Parsing of '%s' failed, aborting.\n", ovld_path); + delete_output_files (); + exit (1); + } + fclose (ovld_file); + +#ifdef DEBUG + fprintf (stderr, "\nFunction type decl list:\n"); + rbt_dump (&fntype_rbt, fntype_rbt.rbt_root); + fprintf (stderr, "\n"); +#endif + + /* Write the header file and the file containing initialization code. */ + if (!write_header_file ()) + { + fprintf (stderr, "Output to '%s' failed, aborting.\n", header_path); + delete_output_files (); + exit (1); + } + if (!write_init_file ()) + { + fprintf (stderr, "Output to '%s' failed, aborting.\n", init_path); + delete_output_files (); + exit (1); + } + + /* Write the defines file to be included into altivec.h. */ + if (!write_defines_file ()) + { + fprintf (stderr, "Output to '%s' failed, aborting.\n", defines_path); + delete_output_files (); + exit (1); + } + + fclose (header_file); + fclose (init_file); + fclose (defines_file); + + return 0; +} -- cgit v1.1 From c2d777d6f3a17ac07f78bc4c7dc4d1e0ddd566ae Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 20 Jul 2021 18:45:57 -0400 Subject: rs6000: Parsing built-in input file, part 1 of 3 2021-07-20 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (bif_stanza): New enum. (curr_bif_stanza): New variable. (stanza_entry): New struct. (stanza_map): New initialized variable. (enable_string): Likewise. (fnkinds): New enum. (typelist): New struct. (attrinfo): Likewise. (MAXRESTROPNDS): New macro. (prototype): New struct. (MAXBIFS): New macro. (bifdata): New struct. (bifs): New variable. (curr_bif): Likewise. (bif_order): Likewise. (bif_index): Likewise. (fatal): New function. (stanza_name_to_stanza): Likewise. (parse_bif_attrs): New stub function. (parse_prototype): Likewise. (parse_bif_entry): New function. (parse_bif_stanza): Likewise. (parse_bif): Implement. (set_bif_order): New function. (create_bif_order): Implement. --- gcc/config/rs6000/rs6000-gen-builtins.c | 379 +++++++++++++++++++++++++++++++- 1 file changed, 378 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index 08aa76b..b066ece 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -194,6 +194,101 @@ enum void_status VOID_OK }; +/* Stanzas are groupings of built-in functions and overloads by some + common feature/attribute. These definitions are for built-in function + stanzas. */ +enum bif_stanza +{ + BSTZ_ALWAYS, + BSTZ_P5, + BSTZ_P6, + BSTZ_ALTIVEC, + BSTZ_CELL, + BSTZ_VSX, + BSTZ_P7, + BSTZ_P7_64, + BSTZ_P8, + BSTZ_P8V, + BSTZ_P9, + BSTZ_P9_64, + BSTZ_P9V, + BSTZ_IEEE128_HW, + BSTZ_DFP, + BSTZ_CRYPTO, + BSTZ_HTM, + BSTZ_P10, + BSTZ_P10_64, + BSTZ_MMA, + NUMBIFSTANZAS +}; + +static bif_stanza curr_bif_stanza; + +struct stanza_entry +{ + const char *stanza_name; + bif_stanza stanza; +}; + +static stanza_entry stanza_map[NUMBIFSTANZAS] = + { + { "always", BSTZ_ALWAYS }, + { "power5", BSTZ_P5 }, + { "power6", BSTZ_P6 }, + { "altivec", BSTZ_ALTIVEC }, + { "cell", BSTZ_CELL }, + { "vsx", BSTZ_VSX }, + { "power7", BSTZ_P7 }, + { "power7-64", BSTZ_P7_64 }, + { "power8", BSTZ_P8 }, + { "power8-vector", BSTZ_P8V }, + { "power9", BSTZ_P9 }, + { "power9-64", BSTZ_P9_64 }, + { "power9-vector", BSTZ_P9V }, + { "ieee128-hw", BSTZ_IEEE128_HW }, + { "dfp", BSTZ_DFP }, + { "crypto", BSTZ_CRYPTO }, + { "htm", BSTZ_HTM }, + { "power10", BSTZ_P10 }, + { "power10-64", BSTZ_P10_64 }, + { "mma", BSTZ_MMA } + }; + +static const char *enable_string[NUMBIFSTANZAS] = + { + "ENB_ALWAYS", + "ENB_P5", + "ENB_P6", + "ENB_ALTIVEC", + "ENB_CELL", + "ENB_VSX", + "ENB_P7", + "ENB_P7_64", + "ENB_P8", + "ENB_P8V", + "ENB_P9", + "ENB_P9_64", + "ENB_P9V", + "ENB_IEEE128_HW", + "ENB_DFP", + "ENB_CRYPTO", + "ENB_HTM", + "ENB_P10", + "ENB_P10_64", + "ENB_MMA" + }; + +/* Function modifiers provide special handling for const, pure, and fpmath + functions. These are mutually exclusive, and therefore kept separate + from other bif attributes. */ +enum fnkinds +{ + FNK_NONE, + FNK_CONST, + FNK_PURE, + FNK_FPMATH +}; + /* Legal base types for an argument or return type. */ enum basetype { @@ -250,7 +345,76 @@ struct typeinfo char *val2; }; +/* A list of argument types. */ +struct typelist +{ + typeinfo info; + typelist *next; +}; + +/* Attributes of a builtin function. */ +struct attrinfo +{ + bool isinit; + bool isset; + bool isextract; + bool isnosoft; + bool isldvec; + bool isstvec; + bool isreve; + bool ispred; + bool ishtm; + bool ishtmspr; + bool ishtmcr; + bool ismma; + bool isquad; + bool ispair; + bool isno32bit; + bool is32bit; + bool iscpu; + bool isldstmask; + bool islxvrse; + bool islxvrze; + bool isendian; +}; + +/* Fields associated with a function prototype (bif or overload). */ +#define MAXRESTROPNDS 3 +struct prototype +{ + typeinfo rettype; + char *bifname; + int nargs; + typelist *args; + int restr_opnd[MAXRESTROPNDS]; + restriction restr[MAXRESTROPNDS]; + char *restr_val1[MAXRESTROPNDS]; + char *restr_val2[MAXRESTROPNDS]; +}; + +/* Data associated with a builtin function, and a table of such data. */ +#define MAXBIFS 16384 +struct bifdata +{ + int stanza; + fnkinds kind; + prototype proto; + char *idname; + char *patname; + attrinfo attrs; + char *fndecl; +}; + +static bifdata bifs[MAXBIFS]; static int num_bifs; +static int curr_bif; + +/* Array used to track the order in which built-ins appeared in the + built-in file. We reorder them alphabetically but sometimes need + this information. */ +static int *bif_order; +static int bif_index = 0; + static int num_ovld_stanzas; static int num_ovlds; @@ -419,6 +583,25 @@ handle_pointer (typeinfo *typedata) } } +/* Produce a fatal error message. */ +static void +fatal (const char *msg) +{ + fprintf (stderr, "FATAL: %s\n", msg); + abort (); +} + +static bif_stanza +stanza_name_to_stanza (const char *stanza_name) +{ + for (int i = 0; i < NUMBIFSTANZAS; i++) + if (!strcmp (stanza_name, stanza_map[i].stanza_name)) + return stanza_map[i].stanza; + fatal ("Stanza mapping is inconsistent."); + /* Unreachable. */ + return BSTZ_ALWAYS; +} + /* Match one of the allowable base types. Consumes one token unless the token is "long", which must be paired with a second "long". Optionally consumes a following '*' token for pointers. Return 1 for success, @@ -889,11 +1072,203 @@ match_type (typeinfo *typedata, int voidok) return 1; } +/* Parse the attribute list. */ +static parse_codes +parse_bif_attrs (attrinfo *attrptr) +{ + return PC_OK; +} + +/* Parse a function prototype. This code is shared by the bif and overload + file processing. */ +static parse_codes +parse_prototype (prototype *protoptr) +{ + return PC_OK; +} + +/* Parse a two-line entry for a built-in function. */ +static parse_codes +parse_bif_entry (void) +{ + /* Check for end of stanza. */ + pos = 0; + consume_whitespace (); + if (linebuf[pos] == '[') + return PC_EOSTANZA; + + /* Allocate an entry in the bif table. */ + if (num_bifs >= MAXBIFS - 1) + { + (*diag) ("too many built-in functions.\n"); + return PC_PARSEFAIL; + } + + curr_bif = num_bifs++; + bifs[curr_bif].stanza = curr_bif_stanza; + + /* Read the first token and see if it is a function modifier. */ + consume_whitespace (); + int oldpos = pos; + char *token = match_identifier (); + if (!token) + { + (*diag) ("malformed entry at column %d\n", oldpos + 1); + return PC_PARSEFAIL; + } + + if (!strcmp (token, "const")) + bifs[curr_bif].kind = FNK_CONST; + else if (!strcmp (token, "pure")) + bifs[curr_bif].kind = FNK_PURE; + else if (!strcmp (token, "fpmath")) + bifs[curr_bif].kind = FNK_FPMATH; + else + { + /* No function modifier, so push the token back. */ + pos = oldpos; + bifs[curr_bif].kind = FNK_NONE; + } + + if (parse_prototype (&bifs[curr_bif].proto) == PC_PARSEFAIL) + return PC_PARSEFAIL; + + /* Now process line 2. First up is the builtin id. */ + if (!advance_line (bif_file)) + { + (*diag) ("unexpected EOF.\n"); + return PC_PARSEFAIL; + } + + pos = 0; + consume_whitespace (); + oldpos = pos; + bifs[curr_bif].idname = match_identifier (); + if (!bifs[curr_bif].idname) + { + (*diag) ("missing builtin id at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + +#ifdef DEBUG + (*diag) ("ID name is '%s'.\n", bifs[curr_bif].idname); +#endif + + /* Save the ID in a lookup structure. */ + if (!rbt_insert (&bif_rbt, bifs[curr_bif].idname)) + { + (*diag) ("duplicate function ID '%s' at column %d.\n", + bifs[curr_bif].idname, oldpos + 1); + return PC_PARSEFAIL; + } + + /* Append a number representing the order in which this function + was encountered to its name, and save in another lookup + structure. */ + char *buf; + asprintf (&buf, "%s:%05d", bifs[curr_bif].idname, curr_bif); + + if (!rbt_insert (&bifo_rbt, buf)) + { + (*diag) ("internal error inserting '%s' in bifo_rbt\n", buf); + return PC_PARSEFAIL; + } + + /* Now the pattern name. */ + consume_whitespace (); + bifs[curr_bif].patname = match_identifier (); + if (!bifs[curr_bif].patname) + { + (*diag) ("missing pattern name at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + +#ifdef DEBUG + (*diag) ("pattern name is '%s'.\n", bifs[curr_bif].patname); +#endif + + /* Process attributes. */ + return parse_bif_attrs (&bifs[curr_bif].attrs); +} + +/* Parse one stanza of the input BIF file. linebuf already contains the + first line to parse. */ +static parse_codes +parse_bif_stanza (void) +{ + /* Parse the stanza header. */ + pos = 0; + consume_whitespace (); + + if (linebuf[pos] != '[') + { + (*diag) ("ill-formed stanza header at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + safe_inc_pos (); + + const char *stanza_name = match_to_right_bracket (); + if (!stanza_name) + { + (*diag) ("no expression found in stanza header.\n"); + return PC_PARSEFAIL; + } + + curr_bif_stanza = stanza_name_to_stanza (stanza_name); + + if (linebuf[pos] != ']') + { + (*diag) ("ill-formed stanza header at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + safe_inc_pos (); + + consume_whitespace (); + if (linebuf[pos] != '\n' && pos != LINELEN - 1) + { + (*diag) ("garbage after stanza header.\n"); + return PC_PARSEFAIL; + } + + parse_codes result = PC_OK; + + while (result != PC_EOSTANZA) + { + if (!advance_line (bif_file)) + return PC_EOFILE; + result = parse_bif_entry (); + if (result == PC_PARSEFAIL) + return PC_PARSEFAIL; + } + + return PC_OK; +} + /* Parse the built-in file. */ static parse_codes parse_bif (void) { - return PC_OK; + parse_codes result; + diag = &bif_diag; + if (!advance_line (bif_file)) + return PC_OK; + + do + result = parse_bif_stanza (); + while (result == PC_OK); + + if (result == PC_EOFILE) + return PC_OK; + return result; +} + +/* Callback function for create_bif_order. */ +void set_bif_order (char *str) +{ + int num = 0; + char *colon = strchr (str, ':'); + sscanf (++colon, "%d", &num); + bif_order[bif_index++] = num; } /* Create a mapping from function IDs in their final order to the order @@ -901,6 +1276,8 @@ parse_bif (void) static void create_bif_order (void) { + bif_order = (int *) malloc ((curr_bif + 1) * sizeof (int)); + rbt_inorder_callback (&bifo_rbt, bifo_rbt.rbt_root, set_bif_order); } /* Parse the overload file. */ -- cgit v1.1 From 3c51b62bb8f052a610c3de2a8a0892ee3c3945ad Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 21 Jul 2021 08:32:32 -0400 Subject: rs6000: Parsing built-in input file, part 2 of 3 2021-07-21 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (parse_args): New function. (parse_prototype): Implement. --- gcc/config/rs6000/rs6000-gen-builtins.c | 145 ++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index b066ece..ee32a0d 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -1072,6 +1072,93 @@ match_type (typeinfo *typedata, int voidok) return 1; } +/* Parse the argument list. */ +static parse_codes +parse_args (prototype *protoptr) +{ + typelist **argptr = &protoptr->args; + int *nargs = &protoptr->nargs; + int *restr_opnd = protoptr->restr_opnd; + restriction *restr = protoptr->restr; + char **val1 = protoptr->restr_val1; + char **val2 = protoptr->restr_val2; + int restr_cnt = 0; + + int success; + *nargs = 0; + + /* Start the argument list. */ + consume_whitespace (); + if (linebuf[pos] != '(') + { + (*diag) ("missing '(' at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + safe_inc_pos (); + + do { + consume_whitespace (); + int oldpos = pos; + typelist *argentry = (typelist *) malloc (sizeof (typelist)); + memset (argentry, 0, sizeof *argentry); + typeinfo *argtype = &argentry->info; + success = match_type (argtype, VOID_NOTOK); + if (success) + { + if (argtype->restr) + { + if (restr_cnt >= MAXRESTROPNDS) + { + (*diag) ("More than two %d operands\n", MAXRESTROPNDS); + return PC_PARSEFAIL; + } + restr_opnd[restr_cnt] = *nargs + 1; + restr[restr_cnt] = argtype->restr; + val1[restr_cnt] = argtype->val1; + val2[restr_cnt] = argtype->val2; + restr_cnt++; + } + (*nargs)++; + *argptr = argentry; + argptr = &argentry->next; + consume_whitespace (); + if (linebuf[pos] == ',') + safe_inc_pos (); + else if (linebuf[pos] != ')') + { + (*diag) ("arg not followed by ',' or ')' at column %d.\n", + pos + 1); + return PC_PARSEFAIL; + } + +#ifdef DEBUG + (*diag) ("argument type: isvoid = %d, isconst = %d, isvector = %d, " + "issigned = %d, isunsigned = %d, isbool = %d, ispixel = %d, " + "ispointer = %d, base = %d, restr = %d, val1 = \"%s\", " + "val2 = \"%s\", pos = %d.\n", + argtype->isvoid, argtype->isconst, argtype->isvector, + argtype->issigned, argtype->isunsigned, argtype->isbool, + argtype->ispixel, argtype->ispointer, argtype->base, + argtype->restr, argtype->val1, argtype->val2, pos + 1); +#endif + } + else + { + free (argentry); + *argptr = NULL; + pos = oldpos; + if (linebuf[pos] != ')') + { + (*diag) ("badly terminated arg list at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + safe_inc_pos (); + } + } while (success); + + return PC_OK; +} + /* Parse the attribute list. */ static parse_codes parse_bif_attrs (attrinfo *attrptr) @@ -1084,6 +1171,64 @@ parse_bif_attrs (attrinfo *attrptr) static parse_codes parse_prototype (prototype *protoptr) { + typeinfo *ret_type = &protoptr->rettype; + char **bifname = &protoptr->bifname; + + /* Get the return type. */ + consume_whitespace (); + int oldpos = pos; + int success = match_type (ret_type, VOID_OK); + if (!success) + { + (*diag) ("missing or badly formed return type at column %d.\n", + oldpos + 1); + return PC_PARSEFAIL; + } + +#ifdef DEBUG + (*diag) ("return type: isvoid = %d, isconst = %d, isvector = %d, " + "issigned = %d, isunsigned = %d, isbool = %d, ispixel = %d, " + "ispointer = %d, base = %d, restr = %d, val1 = \"%s\", " + "val2 = \"%s\", pos = %d.\n", + ret_type->isvoid, ret_type->isconst, ret_type->isvector, + ret_type->issigned, ret_type->isunsigned, ret_type->isbool, + ret_type->ispixel, ret_type->ispointer, ret_type->base, + ret_type->restr, ret_type->val1, ret_type->val2, pos + 1); +#endif + + /* Get the bif name. */ + consume_whitespace (); + oldpos = pos; + *bifname = match_identifier (); + if (!*bifname) + { + (*diag) ("missing function name at column %d.\n", oldpos + 1); + return PC_PARSEFAIL; + } + +#ifdef DEBUG + (*diag) ("function name is '%s'.\n", *bifname); +#endif + + /* Process arguments. */ + if (parse_args (protoptr) == PC_PARSEFAIL) + return PC_PARSEFAIL; + + /* Process terminating semicolon. */ + consume_whitespace (); + if (linebuf[pos] != ';') + { + (*diag) ("missing semicolon at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + safe_inc_pos (); + consume_whitespace (); + if (linebuf[pos] != '\n') + { + (*diag) ("garbage at end of line at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + return PC_OK; } -- cgit v1.1 From 63c334f286e764349f2961c6a4c2492ec0394d01 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 21 Jul 2021 08:36:25 -0400 Subject: rs6000: Parsing built-in input file, part 3 of 3 2021-07-21 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (parse_bif_attrs): Implement. --- gcc/config/rs6000/rs6000-gen-builtins.c | 102 ++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index ee32a0d..6030e13 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -1163,6 +1163,108 @@ parse_args (prototype *protoptr) static parse_codes parse_bif_attrs (attrinfo *attrptr) { + consume_whitespace (); + if (linebuf[pos] != '{') + { + (*diag) ("missing attribute set at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + safe_inc_pos (); + + memset (attrptr, 0, sizeof *attrptr); + char *attrname = NULL; + + do { + consume_whitespace (); + int oldpos = pos; + attrname = match_identifier (); + if (attrname) + { + if (!strcmp (attrname, "init")) + attrptr->isinit = 1; + else if (!strcmp (attrname, "set")) + attrptr->isset = 1; + else if (!strcmp (attrname, "extract")) + attrptr->isextract = 1; + else if (!strcmp (attrname, "nosoft")) + attrptr->isnosoft = 1; + else if (!strcmp (attrname, "ldvec")) + attrptr->isldvec = 1; + else if (!strcmp (attrname, "stvec")) + attrptr->isstvec = 1; + else if (!strcmp (attrname, "reve")) + attrptr->isreve = 1; + else if (!strcmp (attrname, "pred")) + attrptr->ispred = 1; + else if (!strcmp (attrname, "htm")) + attrptr->ishtm = 1; + else if (!strcmp (attrname, "htmspr")) + attrptr->ishtmspr = 1; + else if (!strcmp (attrname, "htmcr")) + attrptr->ishtmcr = 1; + else if (!strcmp (attrname, "mma")) + attrptr->ismma = 1; + else if (!strcmp (attrname, "quad")) + attrptr->isquad = 1; + else if (!strcmp (attrname, "pair")) + attrptr->ispair = 1; + else if (!strcmp (attrname, "no32bit")) + attrptr->isno32bit = 1; + else if (!strcmp (attrname, "32bit")) + attrptr->is32bit = 1; + else if (!strcmp (attrname, "cpu")) + attrptr->iscpu = 1; + else if (!strcmp (attrname, "ldstmask")) + attrptr->isldstmask = 1; + else if (!strcmp (attrname, "lxvrse")) + attrptr->islxvrse = 1; + else if (!strcmp (attrname, "lxvrze")) + attrptr->islxvrze = 1; + else if (!strcmp (attrname, "endian")) + attrptr->isendian = 1; + else + { + (*diag) ("unknown attribute at column %d.\n", oldpos + 1); + return PC_PARSEFAIL; + } + + consume_whitespace (); + if (linebuf[pos] == ',') + safe_inc_pos (); + else if (linebuf[pos] != '}') + { + (*diag) ("arg not followed by ',' or '}' at column %d.\n", + pos + 1); + return PC_PARSEFAIL; + } + } + else + { + pos = oldpos; + if (linebuf[pos] != '}') + { + (*diag) ("badly terminated attr set at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + safe_inc_pos (); + } + } while (attrname); + +#ifdef DEBUG + (*diag) ("attribute set: init = %d, set = %d, extract = %d, nosoft = %d, " + "ldvec = %d, stvec = %d, reve = %d, pred = %d, htm = %d, " + "htmspr = %d, htmcr = %d, mma = %d, quad = %d, pair = %d, " + "no32bit = %d, 32bit = %d, cpu = %d, ldstmask = %d, lxvrse = %d, " + "lxvrze = %d, endian = %d.\n", + attrptr->isinit, attrptr->isset, attrptr->isextract, + attrptr->isnosoft, attrptr->isldvec, attrptr->isstvec, + attrptr->isreve, attrptr->ispred, attrptr->ishtm, attrptr->ishtmspr, + attrptr->ishtmcr, attrptr->ismma, attrptr->isquad, attrptr->ispair, + attrptr->isno32bit, attrptr->is32bit, attrptr->iscpu, + attrptr->isldstmask, attrptr->islxvrse, attrptr->islxvrze, + attrptr->isendian); +#endif + return PC_OK; } -- cgit v1.1 From 582b56dfd023077e9210a3adce478dd73d96d340 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 21 Jul 2021 08:39:37 -0400 Subject: rs6000: Parsing of overload input file 2021-06-07 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (ovld_stanza): New struct. (MAXOVLDSTANZAS): New macro. (ovld_stanzas): New variable. (curr_ovld_stanza): Likewise. (MAXOVLDS): New macro. (ovlddata): New struct. (ovlds): New variable. (curr_ovld): Likewise. (max_ovld_args): Likewise. (parse_ovld_entry): New function. (parse_ovld_stanza): Likewise. (parse_ovld): Implement. --- gcc/config/rs6000/rs6000-gen-builtins.c | 235 +++++++++++++++++++++++++++++++- 1 file changed, 234 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index 6030e13..ddeb39e 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -415,8 +415,35 @@ static int curr_bif; static int *bif_order; static int bif_index = 0; +/* Stanzas are groupings of built-in functions and overloads by some + common feature/attribute. These definitions are for overload stanzas. */ +struct ovld_stanza +{ + char *stanza_id; + char *extern_name; + char *intern_name; + char *ifdef; +}; + +#define MAXOVLDSTANZAS 512 +static ovld_stanza ovld_stanzas[MAXOVLDSTANZAS]; static int num_ovld_stanzas; +static int curr_ovld_stanza; + +#define MAXOVLDS 16384 +struct ovlddata +{ + int stanza; + prototype proto; + char *bif_id_name; + char *ovld_id_name; + char *fndecl; +}; + +static ovlddata ovlds[MAXOVLDS]; static int num_ovlds; +static int curr_ovld; +static int max_ovld_args = 0; /* Return codes for parsing routines. */ enum parse_codes @@ -1527,11 +1554,217 @@ create_bif_order (void) rbt_inorder_callback (&bifo_rbt, bifo_rbt.rbt_root, set_bif_order); } +/* Parse one two-line entry in the overload file. */ +static parse_codes +parse_ovld_entry (void) +{ + /* Check for end of stanza. */ + pos = 0; + consume_whitespace (); + if (linebuf[pos] == '[') + return PC_EOSTANZA; + + /* Allocate an entry in the overload table. */ + if (num_ovlds >= MAXOVLDS - 1) + { + (*diag) ("too many overloads.\n"); + return PC_PARSEFAIL; + } + + curr_ovld = num_ovlds++; + ovlds[curr_ovld].stanza = curr_ovld_stanza; + + if (parse_prototype (&ovlds[curr_ovld].proto) == PC_PARSEFAIL) + return PC_PARSEFAIL; + + if (ovlds[curr_ovld].proto.nargs > max_ovld_args) + max_ovld_args = ovlds[curr_ovld].proto.nargs; + + /* Now process line 2, which just contains the builtin id and an + optional overload id. */ + if (!advance_line (ovld_file)) + { + (*diag) ("unexpected EOF.\n"); + return PC_EOFILE; + } + + pos = 0; + consume_whitespace (); + int oldpos = pos; + char *id = match_identifier (); + ovlds[curr_ovld].bif_id_name = id; + ovlds[curr_ovld].ovld_id_name = id; + if (!id) + { + (*diag) ("missing overload id at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + +#ifdef DEBUG + (*diag) ("ID name is '%s'.\n", id); +#endif + + /* The builtin id has to match one from the bif file. */ + if (!rbt_find (&bif_rbt, id)) + { + (*diag) ("builtin ID '%s' not found in bif file.\n", id); + return PC_PARSEFAIL; + } + + /* Check for an optional overload id. Usually we use the builtin + function id for that purpose, but sometimes we need multiple + overload entries for the same builtin id, and it needs to be unique. */ + consume_whitespace (); + if (linebuf[pos] != '\n') + { + id = match_identifier (); + ovlds[curr_ovld].ovld_id_name = id; + consume_whitespace (); + } + + /* Save the overload ID in a lookup structure. */ + if (!rbt_insert (&ovld_rbt, id)) + { + (*diag) ("duplicate overload ID '%s' at column %d.\n", id, oldpos + 1); + return PC_PARSEFAIL; + } + + if (linebuf[pos] != '\n') + { + (*diag) ("garbage at end of line at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + return PC_OK; +} + +/* Parse one stanza of the input overload file. linebuf already contains the + first line to parse. */ +static parse_codes +parse_ovld_stanza (void) +{ + /* Parse the stanza header. */ + pos = 0; + consume_whitespace (); + + if (linebuf[pos] != '[') + { + (*diag) ("ill-formed stanza header at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + safe_inc_pos (); + + char *stanza_name = match_identifier (); + if (!stanza_name) + { + (*diag) ("no identifier found in stanza header.\n"); + return PC_PARSEFAIL; + } + + /* Add the identifier to a table and set the number to be recorded + with subsequent overload entries. */ + if (num_ovld_stanzas >= MAXOVLDSTANZAS) + { + (*diag) ("too many stanza headers.\n"); + return PC_PARSEFAIL; + } + + curr_ovld_stanza = num_ovld_stanzas++; + ovld_stanza *stanza = &ovld_stanzas[curr_ovld_stanza]; + stanza->stanza_id = stanza_name; + + consume_whitespace (); + if (linebuf[pos] != ',') + { + (*diag) ("missing comma at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + safe_inc_pos (); + + consume_whitespace (); + stanza->extern_name = match_identifier (); + if (!stanza->extern_name) + { + (*diag) ("missing external name at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + + consume_whitespace (); + if (linebuf[pos] != ',') + { + (*diag) ("missing comma at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + safe_inc_pos (); + + consume_whitespace (); + stanza->intern_name = match_identifier (); + if (!stanza->intern_name) + { + (*diag) ("missing internal name at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + + consume_whitespace (); + if (linebuf[pos] == ',') + { + safe_inc_pos (); + consume_whitespace (); + stanza->ifdef = match_identifier (); + if (!stanza->ifdef) + { + (*diag) ("missing ifdef token at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + consume_whitespace (); + } + else + stanza->ifdef = 0; + + if (linebuf[pos] != ']') + { + (*diag) ("ill-formed stanza header at column %d.\n", pos + 1); + return PC_PARSEFAIL; + } + safe_inc_pos (); + + consume_whitespace (); + if (linebuf[pos] != '\n' && pos != LINELEN - 1) + { + (*diag) ("garbage after stanza header.\n"); + return PC_PARSEFAIL; + } + + parse_codes result = PC_OK; + + while (result != PC_EOSTANZA) + { + if (!advance_line (ovld_file)) + return PC_EOFILE; + + result = parse_ovld_entry (); + if (result == PC_EOFILE || result == PC_PARSEFAIL) + return result; + } + + return PC_OK; +} + /* Parse the overload file. */ static parse_codes parse_ovld (void) { - return PC_OK; + parse_codes result = PC_OK; + diag = &ovld_diag; + + if (!advance_line (ovld_file)) + return PC_OK; + + while (result == PC_OK) + result = parse_ovld_stanza (); + + if (result == PC_EOFILE) + return PC_OK; + return result; } /* Write everything to the header file (rs6000-builtins.h). Return -- cgit v1.1 From 04ef43c7b35e006559781f758a81c207e6d54a15 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 21 Jul 2021 08:45:36 -0400 Subject: rs6000: Build and store function type identifiers 2021-07-21 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (complete_vector_type): New function. (complete_base_type): Likewise. (construct_fntype_id): Likewise. (parse_bif_entry): Call contruct_fntype_id. (parse_ovld_entry): Likewise. --- gcc/config/rs6000/rs6000-gen-builtins.c | 231 ++++++++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index ddeb39e..2ddebcb 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -1295,6 +1295,229 @@ parse_bif_attrs (attrinfo *attrptr) return PC_OK; } +/* Convert a vector type into a mode string. */ +static void +complete_vector_type (typeinfo *typeptr, char *buf, int *bufi) +{ + if (typeptr->isbool) + buf[(*bufi)++] = 'b'; + buf[(*bufi)++] = 'v'; + if (typeptr->ispixel) + { + memcpy (&buf[*bufi], "p8hi", 4); + *bufi += 4; + return; + } + switch (typeptr->base) + { + case BT_CHAR: + memcpy (&buf[*bufi], "16qi", 4); + *bufi += 4; + break; + case BT_SHORT: + memcpy (&buf[*bufi], "8hi", 3); + *bufi += 3; + break; + case BT_INT: + memcpy (&buf[*bufi], "4si", 3); + *bufi += 3; + break; + case BT_LONGLONG: + memcpy (&buf[*bufi], "2di", 3); + *bufi += 3; + break; + case BT_FLOAT: + memcpy (&buf[*bufi], "4sf", 3); + *bufi += 3; + break; + case BT_DOUBLE: + memcpy (&buf[*bufi], "2df", 3); + *bufi += 3; + break; + case BT_INT128: + memcpy (&buf[*bufi], "1ti", 3); + *bufi += 3; + break; + case BT_FLOAT128: + memcpy (&buf[*bufi], "1tf", 3); + *bufi += 3; + break; + case BT_VPAIR: + memcpy (&buf[*bufi], "1poi", 4); + *bufi += 4; + break; + case BT_VQUAD: + memcpy (&buf[*bufi], "1pxi", 4); + *bufi += 4; + break; + default: + (*diag) ("unhandled basetype %d.\n", typeptr->base); + exit (1); + } +} + +/* Convert a base type into a mode string. */ +static void +complete_base_type (typeinfo *typeptr, char *buf, int *bufi) +{ + switch (typeptr->base) + { + case BT_CHAR: + memcpy (&buf[*bufi], "qi", 2); + break; + case BT_SHORT: + memcpy (&buf[*bufi], "hi", 2); + break; + case BT_INT: + memcpy (&buf[*bufi], "si", 2); + break; + case BT_LONG: + memcpy (&buf[*bufi], "lg", 2); + break; + case BT_LONGLONG: + memcpy (&buf[*bufi], "di", 2); + break; + case BT_FLOAT: + memcpy (&buf[*bufi], "sf", 2); + break; + case BT_DOUBLE: + memcpy (&buf[*bufi], "df", 2); + break; + case BT_LONGDOUBLE: + memcpy (&buf[*bufi], "ld", 2); + break; + case BT_INT128: + memcpy (&buf[*bufi], "ti", 2); + break; + case BT_FLOAT128: + memcpy (&buf[*bufi], "tf", 2); + break; + case BT_BOOL: + memcpy (&buf[*bufi], "bi", 2); + break; + case BT_STRING: + memcpy (&buf[*bufi], "st", 2); + break; + case BT_DECIMAL32: + memcpy (&buf[*bufi], "sd", 2); + break; + case BT_DECIMAL64: + memcpy (&buf[*bufi], "dd", 2); + break; + case BT_DECIMAL128: + memcpy (&buf[*bufi], "td", 2); + break; + case BT_IBM128: + memcpy (&buf[*bufi], "if", 2); + break; + default: + (*diag) ("unhandled basetype %d.\n", typeptr->base); + exit (1); + } + + *bufi += 2; +} + +/* Build a function type descriptor identifier from the return type + and argument types described by PROTOPTR, and store it if it does + not already exist. Return the identifier. */ +static char * +construct_fntype_id (prototype *protoptr) +{ + /* Determine the maximum space for a function type descriptor id. + Each type requires at most 9 characters (6 for the mode*, 1 for + the optional 'u' preceding the mode, 1 for the optional 'p' + preceding the mode, and 1 for an underscore following the mode). + We also need 5 characters for the string "ftype" that separates + the return mode from the argument modes. The last argument doesn't + need a trailing underscore, but we count that as the one trailing + "ftype" instead. For the special case of zero arguments, we need 9 + for the return type and 7 for "ftype_v". Finally, we need one + character for the terminating null. Thus for a function with N + arguments, we need at most 9N+15 characters for N>0, otherwise 17. + ---- + *Worst case is bv16qi for "vector bool char". */ + int len = protoptr->nargs ? (protoptr->nargs + 1) * 9 + 6 : 17; + char *buf = (char *) malloc (len); + int bufi = 0; + + if (protoptr->rettype.ispointer) + buf[bufi++] = 'p'; + + if (protoptr->rettype.isvoid) + buf[bufi++] = 'v'; + else + { + if (protoptr->rettype.isunsigned) + buf[bufi++] = 'u'; + if (protoptr->rettype.isvector) + complete_vector_type (&protoptr->rettype, buf, &bufi); + else + complete_base_type (&protoptr->rettype, buf, &bufi); + } + + memcpy (&buf[bufi], "_ftype", 6); + bufi += 6; + + if (!protoptr->nargs) + { + memcpy (&buf[bufi], "_v", 2); + bufi += 2; + } + else + { + typelist *argptr = protoptr->args; + for (int i = 0; i < protoptr->nargs; i++, argptr = argptr->next) + { + assert (argptr); + buf[bufi++] = '_'; + if (argptr->info.isconst + && argptr->info.base == BT_INT + && !argptr->info.ispointer) + { + buf[bufi++] = 'c'; + buf[bufi++] = 'i'; + continue; + } + if (argptr->info.ispointer) + { + if (argptr->info.isvoid) + { + if (argptr->info.isconst) + { + memcpy (&buf[bufi], "pcvoid", 6); + bufi += 6; + continue; + } + else + { + buf[bufi++] = 'p'; + buf[bufi++] = 'v'; + continue; + } + } + else + buf[bufi++] = 'p'; + } + + if (argptr->info.isunsigned) + buf[bufi++] = 'u'; + if (argptr->info.isvector) + complete_vector_type (&argptr->info, buf, &bufi); + else + complete_base_type (&argptr->info, buf, &bufi); + } + assert (!argptr); + } + + buf[bufi] = '\0'; + + /* Ignore return value, as duplicates are fine and expected here. */ + rbt_insert (&fntype_rbt, buf); + + return buf; +} + /* Parse a function prototype. This code is shared by the bif and overload file processing. */ static parse_codes @@ -1407,6 +1630,10 @@ parse_bif_entry (void) if (parse_prototype (&bifs[curr_bif].proto) == PC_PARSEFAIL) return PC_PARSEFAIL; + /* Build a function type descriptor identifier from the return type + and argument types, and store it if it does not already exist. */ + bifs[curr_bif].fndecl = construct_fntype_id (&bifs[curr_bif].proto); + /* Now process line 2. First up is the builtin id. */ if (!advance_line (bif_file)) { @@ -1580,6 +1807,10 @@ parse_ovld_entry (void) if (ovlds[curr_ovld].proto.nargs > max_ovld_args) max_ovld_args = ovlds[curr_ovld].proto.nargs; + /* Build a function type descriptor identifier from the return type + and argument types, and store it if it does not already exist. */ + ovlds[curr_ovld].fndecl = construct_fntype_id (&ovlds[curr_ovld].proto); + /* Now process line 2, which just contains the builtin id and an optional overload id. */ if (!advance_line (ovld_file)) -- cgit v1.1 From 86e5e4c93716b84a49a2aba2b52649b366a77b95 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 21 Jul 2021 08:47:49 -0400 Subject: rs6000: Write output to the builtin definition include file 2021-06-07 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (write_defines_file): Implement. --- gcc/config/rs6000/rs6000-gen-builtins.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index 2ddebcb..0f6fd0c 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -2019,6 +2019,23 @@ write_init_file (void) static int write_defines_file (void) { + fprintf (defines_file, "#ifndef _RS6000_VECDEFINES_H\n"); + fprintf (defines_file, "#define _RS6000_VECDEFINES_H 1\n\n"); + fprintf (defines_file, "#if defined(_ARCH_PPC64) && defined (_ARCH_PWR9)\n"); + fprintf (defines_file, " #define _ARCH_PPC64_PWR9 1\n"); + fprintf (defines_file, "#endif\n\n"); + for (int i = 0; i < num_ovld_stanzas; i++) + if (strcmp (ovld_stanzas[i].extern_name, "SKIP")) + { + if (ovld_stanzas[i].ifdef) + fprintf (defines_file, "#ifdef %s\n", ovld_stanzas[i].ifdef); + fprintf (defines_file, "#define %s %s\n", + ovld_stanzas[i].extern_name, + ovld_stanzas[i].intern_name); + if (ovld_stanzas[i].ifdef) + fprintf (defines_file, "#endif\n"); + } + fprintf (defines_file, "\n#endif\n"); return 1; } -- cgit v1.1 From 89c0330163f94043b65a0be4d8b29b2f695efc8a Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 21 Jul 2021 08:57:06 -0400 Subject: rs6000: Write output to the builtins header file 2021-07-21 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (write_autogenerated_header): New function. (write_decls): Likewise. (write_extern_fntype): New callback function. (write_header_file): Implement. --- gcc/config/rs6000/rs6000-gen-builtins.c | 228 ++++++++++++++++++++++++++++++++ 1 file changed, 228 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index 0f6fd0c..1b3a114 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -1998,11 +1998,239 @@ parse_ovld (void) return result; } +/* Write a comment at the top of FILE about how the code was generated. */ +static void +write_autogenerated_header (FILE *file) +{ + fprintf (file, "/* Automatically generated by the program '%s'\n", + pgm_path); + fprintf (file, " from the files '%s' and '%s'. */\n\n", + bif_path, ovld_path); +} + +/* Write declarations into the header file. */ +static void +write_decls (void) +{ + fprintf (header_file, "enum rs6000_gen_builtins\n{\n RS6000_BIF_NONE,\n"); + for (int i = 0; i <= curr_bif; i++) + fprintf (header_file, " RS6000_BIF_%s,\n", bifs[bif_order[i]].idname); + fprintf (header_file, " RS6000_BIF_MAX,\n"); + fprintf (header_file, " RS6000_OVLD_NONE,\n"); + for (int i = 0; i < num_ovld_stanzas; i++) + fprintf (header_file, " RS6000_OVLD_%s,\n", ovld_stanzas[i].stanza_id); + fprintf (header_file, " RS6000_OVLD_MAX\n};\n\n"); + + fprintf (header_file, + "extern GTY(()) tree rs6000_builtin_decls_x[RS6000_OVLD_MAX];\n\n"); + + fprintf (header_file, + "enum rs6000_ovld_instances\n{\n RS6000_INST_NONE,\n"); + for (int i = 0; i <= curr_ovld; i++) + fprintf (header_file, " RS6000_INST_%s,\n", ovlds[i].ovld_id_name); + fprintf (header_file, " RS6000_INST_MAX\n};\n\n"); + + fprintf (header_file, "#define MAX_OVLD_ARGS %d\n", max_ovld_args); + + fprintf (header_file, "enum restriction {\n"); + fprintf (header_file, " RES_NONE,\n"); + fprintf (header_file, " RES_BITS,\n"); + fprintf (header_file, " RES_RANGE,\n"); + fprintf (header_file, " RES_VAR_RANGE,\n"); + fprintf (header_file, " RES_VALUES\n"); + fprintf (header_file, "};\n\n"); + + fprintf (header_file, "enum bif_enable {\n"); + fprintf (header_file, " ENB_ALWAYS,\n"); + fprintf (header_file, " ENB_P5,\n"); + fprintf (header_file, " ENB_P6,\n"); + fprintf (header_file, " ENB_ALTIVEC,\n"); + fprintf (header_file, " ENB_CELL,\n"); + fprintf (header_file, " ENB_VSX,\n"); + fprintf (header_file, " ENB_P7,\n"); + fprintf (header_file, " ENB_P7_64,\n"); + fprintf (header_file, " ENB_P8,\n"); + fprintf (header_file, " ENB_P8V,\n"); + fprintf (header_file, " ENB_P9,\n"); + fprintf (header_file, " ENB_P9_64,\n"); + fprintf (header_file, " ENB_P9V,\n"); + fprintf (header_file, " ENB_IEEE128_HW,\n"); + fprintf (header_file, " ENB_DFP,\n"); + fprintf (header_file, " ENB_CRYPTO,\n"); + fprintf (header_file, " ENB_HTM,\n"); + fprintf (header_file, " ENB_P10,\n"); + fprintf (header_file, " ENB_P10_64,\n"); + fprintf (header_file, " ENB_MMA\n"); + fprintf (header_file, "};\n\n"); + + fprintf (header_file, "#define PPC_MAXRESTROPNDS 3\n"); + fprintf (header_file, "struct GTY((user)) bifdata\n"); + fprintf (header_file, "{\n"); + fprintf (header_file, " const char *bifname;\n"); + fprintf (header_file, " bif_enable enable;\n"); + fprintf (header_file, " tree fntype;\n"); + fprintf (header_file, " insn_code icode;\n"); + fprintf (header_file, " int nargs;\n"); + fprintf (header_file, " int bifattrs;\n"); + fprintf (header_file, " int restr_opnd[PPC_MAXRESTROPNDS];\n"); + fprintf (header_file, " restriction restr[PPC_MAXRESTROPNDS];\n"); + fprintf (header_file, " int restr_val1[PPC_MAXRESTROPNDS];\n"); + fprintf (header_file, " int restr_val2[PPC_MAXRESTROPNDS];\n"); + fprintf (header_file, " const char *attr_string;\n"); + fprintf (header_file, " rs6000_gen_builtins assoc_bif;\n"); + fprintf (header_file, "};\n\n"); + + fprintf (header_file, "#define bif_init_bit\t\t(0x00000001)\n"); + fprintf (header_file, "#define bif_set_bit\t\t(0x00000002)\n"); + fprintf (header_file, "#define bif_extract_bit\t\t(0x00000004)\n"); + fprintf (header_file, "#define bif_nosoft_bit\t\t(0x00000008)\n"); + fprintf (header_file, "#define bif_ldvec_bit\t\t(0x00000010)\n"); + fprintf (header_file, "#define bif_stvec_bit\t\t(0x00000020)\n"); + fprintf (header_file, "#define bif_reve_bit\t\t(0x00000040)\n"); + fprintf (header_file, "#define bif_pred_bit\t\t(0x00000080)\n"); + fprintf (header_file, "#define bif_htm_bit\t\t(0x00000100)\n"); + fprintf (header_file, "#define bif_htmspr_bit\t\t(0x00000200)\n"); + fprintf (header_file, "#define bif_htmcr_bit\t\t(0x00000400)\n"); + fprintf (header_file, "#define bif_mma_bit\t\t(0x00000800)\n"); + fprintf (header_file, "#define bif_quad_bit\t\t(0x00001000)\n"); + fprintf (header_file, "#define bif_pair_bit\t\t(0x00002000)\n"); + fprintf (header_file, "#define bif_no32bit_bit\t\t(0x00004000)\n"); + fprintf (header_file, "#define bif_32bit_bit\t\t(0x00008000)\n"); + fprintf (header_file, "#define bif_cpu_bit\t\t(0x00010000)\n"); + fprintf (header_file, "#define bif_ldstmask_bit\t(0x00020000)\n"); + fprintf (header_file, "#define bif_lxvrse_bit\t\t(0x00040000)\n"); + fprintf (header_file, "#define bif_lxvrze_bit\t\t(0x00080000)\n"); + fprintf (header_file, "#define bif_endian_bit\t\t(0x00100000)\n"); + fprintf (header_file, "\n"); + fprintf (header_file, + "#define bif_is_init(x)\t\t((x).bifattrs & bif_init_bit)\n"); + fprintf (header_file, + "#define bif_is_set(x)\t\t((x).bifattrs & bif_set_bit)\n"); + fprintf (header_file, + "#define bif_is_extract(x)\t((x).bifattrs & bif_extract_bit)\n"); + fprintf (header_file, + "#define bif_is_nosoft(x)\t((x).bifattrs & bif_nosoft_bit)\n"); + fprintf (header_file, + "#define bif_is_ldvec(x)\t\t((x).bifattrs & bif_ldvec_bit)\n"); + fprintf (header_file, + "#define bif_is_stvec(x)\t\t((x).bifattrs & bif_stvec_bit)\n"); + fprintf (header_file, + "#define bif_is_reve(x)\t\t((x).bifattrs & bif_reve_bit)\n"); + fprintf (header_file, + "#define bif_is_predicate(x)\t((x).bifattrs & bif_pred_bit)\n"); + fprintf (header_file, + "#define bif_is_htm(x)\t\t((x).bifattrs & bif_htm_bit)\n"); + fprintf (header_file, + "#define bif_is_htmspr(x)\t((x).bifattrs & bif_htmspr_bit)\n"); + fprintf (header_file, + "#define bif_is_htmcr(x)\t\t((x).bifattrs & bif_htmcr_bit)\n"); + fprintf (header_file, + "#define bif_is_mma(x)\t\t((x).bifattrs & bif_mma_bit)\n"); + fprintf (header_file, + "#define bif_is_quad(x)\t\t((x).bifattrs & bif_quad_bit)\n"); + fprintf (header_file, + "#define bif_is_pair(x)\t\t((x).bifattrs & bif_pair_bit)\n"); + fprintf (header_file, + "#define bif_is_no32bit(x)\t((x).bifattrs & bif_no32bit_bit)\n"); + fprintf (header_file, + "#define bif_is_32bit(x)\t((x).bifattrs & bif_32bit_bit)\n"); + fprintf (header_file, + "#define bif_is_cpu(x)\t\t((x).bifattrs & bif_cpu_bit)\n"); + fprintf (header_file, + "#define bif_is_ldstmask(x)\t((x).bifattrs & bif_ldstmask_bit)\n"); + fprintf (header_file, + "#define bif_is_lxvrse(x)\t((x).bifattrs & bif_lxvrse_bit)\n"); + fprintf (header_file, + "#define bif_is_lxvrze(x)\t((x).bifattrs & bif_lxvrze_bit)\n"); + fprintf (header_file, + "#define bif_is_endian(x)\t((x).bifattrs & bif_endian_bit)\n"); + fprintf (header_file, "\n"); + + /* #### Note that the _x is added for now to avoid conflict with + the existing rs6000_builtin_info[] file while testing. It will + be removed as we progress. */ + /* #### Cannot mark this as a GC root because only pointer types can + be marked as GTY((user)) and be GC roots. All trees in here are + kept alive by other globals, so not a big deal. Alternatively, + we could change the enum fields to ints and cast them in and out + to avoid requiring a GTY((user)) designation, but that seems + unnecessarily gross. */ + fprintf (header_file, + "extern bifdata rs6000_builtin_info_x[RS6000_BIF_MAX];\n\n"); + + fprintf (header_file, "struct GTY((user)) ovlddata\n"); + fprintf (header_file, "{\n"); + fprintf (header_file, " const char *bifname;\n"); + fprintf (header_file, " rs6000_gen_builtins bifid;\n"); + fprintf (header_file, " tree fntype;\n"); + fprintf (header_file, " ovlddata *next;\n"); + fprintf (header_file, "};\n\n"); + + fprintf (header_file, "struct ovldrecord\n"); + fprintf (header_file, "{\n"); + fprintf (header_file, " const char *ovld_name;\n"); + fprintf (header_file, " ovlddata *first_instance;\n"); + fprintf (header_file, "};\n\n"); + + fprintf (header_file, + "/* #### Cannot mark this as a GC root because only pointer\n" + " types can be marked as GTY((user)) and be GC roots. All\n" + " trees in here are kept alive by other globals, so not a big\n" + " deal. Alternatively, we could change the enum fields to ints\n" + " and cast them in and out to avoid requiring a GTY((user))\n" + " designation, but that seems unnecessarily gross. */\n"); + fprintf (header_file, + "extern ovlddata rs6000_instance_info[RS6000_INST_MAX];\n"); + fprintf (header_file, "extern ovldrecord rs6000_overload_info[];\n\n"); + + fprintf (header_file, "extern void rs6000_autoinit_builtins ();\n\n"); + fprintf (header_file, + "extern bool rs6000_new_builtin_is_supported_p " + "(rs6000_gen_builtins);\n"); + fprintf (header_file, + "extern tree rs6000_builtin_decl (unsigned, " + "bool ATTRIBUTE_UNUSED);\n\n"); + fprintf (header_file, + "extern void gt_ggc_mx (bifdata *bd);\n"); + fprintf (header_file, + "extern void gt_pch_nx (bifdata *bd);\n"); + fprintf (header_file, + "extern void gt_pch_nx (bifdata *bd, gt_pointer_operator op, " + "void *cookie);\n"); + fprintf (header_file, + "extern void gt_ggc_mx (ovlddata *od);\n"); + fprintf (header_file, + "extern void gt_pch_nx (ovlddata *od);\n"); + fprintf (header_file, + "extern void gt_pch_nx (ovlddata *od, gt_pointer_operator op, " + "void *cookie);\n"); +} + +/* Callback functions used for generating trees for function types. */ +void +write_extern_fntype (char *str) +{ + fprintf (header_file, "extern GTY(()) tree %s;\n", str); +} + /* Write everything to the header file (rs6000-builtins.h). Return 1 if successful, 0 otherwise. */ static int write_header_file (void) { + write_autogenerated_header (header_file); + + fprintf (header_file, "#ifndef _RS6000_BUILTINS_H\n"); + fprintf (header_file, "#define _RS6000_BUILTINS_H 1\n\n"); + fprintf (header_file, "extern int new_builtins_are_live;\n\n"); + + write_decls (); + + /* Write function type list declarators to the header file. */ + rbt_inorder_callback (&fntype_rbt, fntype_rbt.rbt_root, write_extern_fntype); + fprintf (header_file, "\n"); + fprintf (header_file, "\n#endif\n"); + return 1; } -- cgit v1.1 From ef9af12d24108340f9eafa572a5c3aebfff6df88 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 21 Jul 2021 09:16:55 -0400 Subject: rs6000: Write output to the builtins init file, part 1 of 3 2021-07-21 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (write_fntype): New callback function. (write_fntype_init): New stub function. (write_init_bif_table): Likewise. (write_init_ovld_table): New function. (write_init_file): Implement. --- gcc/config/rs6000/rs6000-gen-builtins.c | 164 ++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index 1b3a114..dd24369 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -2213,6 +2213,18 @@ write_extern_fntype (char *str) fprintf (header_file, "extern GTY(()) tree %s;\n", str); } +void +write_fntype (char *str) +{ + fprintf (init_file, "tree %s;\n", str); +} + +/* Write an initializer for a function type identified by STR. */ +void +write_fntype_init (char *str) +{ +} + /* Write everything to the header file (rs6000-builtins.h). Return 1 if successful, 0 otherwise. */ static int @@ -2234,11 +2246,163 @@ write_header_file (void) return 1; } +/* Write code to initialize the built-in function table. */ +static void +write_init_bif_table (void) +{ +} + +/* Write code to initialize the overload table. */ +static void +write_init_ovld_table (void) +{ + fprintf (init_file, " int base = RS6000_OVLD_NONE;\n\n"); + fprintf (init_file, + " /* The fndecl for an overload is arbitrarily the first one\n" + " for the overload. We sort out the real types when\n" + " processing the overload in the gcc front end. */\n"); + + for (int i = 0; i <= curr_ovld; i++) + { + fprintf (init_file, + " rs6000_instance_info[RS6000_INST_%s].fntype" + "\n = %s;\n", + ovlds[i].ovld_id_name, ovlds[i].fndecl); + + if (i == 0 || ovlds[i].stanza != ovlds[i-1].stanza) + { + ovld_stanza *stanza = &ovld_stanzas[ovlds[i].stanza]; + fprintf (init_file, "\n"); + + /* Check whether we have a "tf" token in this string, representing + a float128_type_node. It's possible that float128_type_node is + undefined (occurs for -maltivec -mno-vsx, for example), so we + must guard against that. */ + int tf_found = strstr (ovlds[i].fndecl, "tf") != NULL; + + /* Similarly, look for decimal float tokens. */ + int dfp_found = (strstr (ovlds[i].fndecl, "sd") != NULL + || strstr (ovlds[i].fndecl, "dd") != NULL + || strstr (ovlds[i].fndecl, "td") != NULL); + + fprintf (init_file, + " if (new_builtins_are_live)\n"); + fprintf (init_file, " {\n"); + + if (tf_found) + { + fprintf (init_file, " if (float128_type_node)\n"); + fprintf (init_file, " {\n"); + } + else if (dfp_found) + { + fprintf (init_file, " if (dfloat64_type_node)\n"); + fprintf (init_file, " {\n"); + } + + fprintf (init_file, + " rs6000_builtin_decls_x[(int)RS6000_OVLD_%s] = t\n", + stanza->stanza_id); + fprintf (init_file, + " = add_builtin_function (\"%s\",\n", + stanza->intern_name); + fprintf (init_file, + " %s,\n", + ovlds[i].fndecl); + fprintf (init_file, + " (int)RS6000_OVLD_%s," + " BUILT_IN_MD,\n", + stanza->stanza_id); + fprintf (init_file, + " NULL, NULL_TREE);\n"); + + if (tf_found || dfp_found) + fprintf (init_file, " }\n"); + + fprintf (init_file, " }\n\n"); + + fprintf (init_file, + " rs6000_overload_info[RS6000_OVLD_%s - base]" + ".first_instance\n", + stanza->stanza_id); + fprintf (init_file, + " = &rs6000_instance_info[RS6000_INST_%s];\n\n", + ovlds[i].ovld_id_name); + } + } +} + /* Write everything to the initialization file (rs6000-builtins.c). Return 1 if successful, 0 otherwise. */ static int write_init_file (void) { + write_autogenerated_header (init_file); + + fprintf (init_file, "#include \"config.h\"\n"); + fprintf (init_file, "#include \"system.h\"\n"); + fprintf (init_file, "#include \"coretypes.h\"\n"); + fprintf (init_file, "#include \"backend.h\"\n"); + fprintf (init_file, "#include \"rtl.h\"\n"); + fprintf (init_file, "#include \"tree.h\"\n"); + fprintf (init_file, "#include \"langhooks.h\"\n"); + fprintf (init_file, "#include \"insn-codes.h\"\n"); + fprintf (init_file, "#include \"rs6000-builtins.h\"\n"); + fprintf (init_file, "\n"); + + fprintf (init_file, "int new_builtins_are_live = 0;\n\n"); + + fprintf (init_file, "tree rs6000_builtin_decls_x[RS6000_OVLD_MAX];\n\n"); + + rbt_inorder_callback (&fntype_rbt, fntype_rbt.rbt_root, write_fntype); + fprintf (init_file, "\n"); + + fprintf (init_file, "void\n"); + fprintf (init_file, "rs6000_autoinit_builtins ()\n"); + fprintf (init_file, "{\n"); + fprintf (init_file, " tree t;\n"); + rbt_inorder_callback (&fntype_rbt, fntype_rbt.rbt_root, write_fntype_init); + fprintf (init_file, "\n"); + + fprintf (init_file, + " rs6000_builtin_decls_x[RS6000_BIF_NONE] = NULL_TREE;\n"); + fprintf (init_file, + " rs6000_builtin_decls_x[RS6000_BIF_MAX] = NULL_TREE;\n"); + fprintf (init_file, + " rs6000_builtin_decls_x[RS6000_OVLD_NONE] = NULL_TREE;\n\n"); + + write_init_bif_table (); + write_init_ovld_table (); + + fprintf (init_file, "}\n\n"); + + fprintf (init_file, + "void gt_ggc_mx (bifdata *bd)\n"); + fprintf (init_file, + "{\n gt_ggc_mx (bd->fntype);\n}\n\n"); + fprintf (init_file, + "void gt_pch_nx (bifdata *bd)\n"); + fprintf (init_file, + "{\n gt_pch_nx (bd->fntype);\n}\n\n"); + fprintf (init_file, + "void gt_pch_nx (bifdata *bd, gt_pointer_operator op, " + "void *cookie)\n"); + fprintf (init_file, + "{\n op(&(bd->fntype), cookie);\n}\n\n"); + fprintf (init_file, + "void gt_ggc_mx (ovlddata *od)\n"); + fprintf (init_file, + "{\n gt_ggc_mx (od->fntype);\n}\n\n"); + fprintf (init_file, + "void gt_pch_nx (ovlddata *od)\n"); + fprintf (init_file, + "{\n gt_pch_nx (od->fntype);\n}\n\n"); + fprintf (init_file, + "void gt_pch_nx (ovlddata *od, gt_pointer_operator op, " + "void *cookie)\n"); + fprintf (init_file, + "{\n op(&(od->fntype), cookie);\n}\n"); + return 1; } -- cgit v1.1 From d3f5a1418b423ec8c12f05025997c4d5b6e417de Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 21 Jul 2021 09:19:46 -0400 Subject: rs6000: Write output to the builtins init file, part 2 of 3 2021-07-21 Bill Schmidt gcc/ * config/rs6000/rs6000-gen-builtins.c (write_init_bif_table): Implement. --- gcc/config/rs6000/rs6000-gen-builtins.c | 81 +++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c index dd24369..da0d14e 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.c +++ b/gcc/config/rs6000/rs6000-gen-builtins.c @@ -2250,6 +2250,87 @@ write_header_file (void) static void write_init_bif_table (void) { + for (int i = 0; i <= curr_bif; i++) + { + fprintf (init_file, + " rs6000_builtin_info_x[RS6000_BIF_%s].fntype" + "\n = %s;\n", + bifs[i].idname, bifs[i].fndecl); + + /* Check whether we have a "tf" token in this string, representing + a float128_type_node. It's possible that float128_type_node is + undefined (occurs for -maltivec -mno-vsx, for example), so we + must guard against that. */ + int tf_found = strstr (bifs[i].fndecl, "tf") != NULL; + + /* Similarly, look for decimal float tokens. */ + int dfp_found = (strstr (bifs[i].fndecl, "sd") != NULL + || strstr (bifs[i].fndecl, "dd") != NULL + || strstr (bifs[i].fndecl, "td") != NULL); + + fprintf (init_file, + " if (new_builtins_are_live)\n"); + fprintf (init_file, " {\n"); + + if (tf_found) + { + fprintf (init_file, " if (float128_type_node)\n"); + fprintf (init_file, " {\n"); + } + else if (dfp_found) + { + fprintf (init_file, " if (dfloat64_type_node)\n"); + fprintf (init_file, " {\n"); + } + + fprintf (init_file, + " rs6000_builtin_decls_x[(int)RS6000_BIF_%s] = t\n", + bifs[i].idname); + fprintf (init_file, + " = add_builtin_function (\"%s\",\n", + bifs[i].proto.bifname); + fprintf (init_file, + " %s,\n", + bifs[i].fndecl); + fprintf (init_file, + " (int)RS6000_BIF_%s," + " BUILT_IN_MD,\n", + bifs[i].idname); + fprintf (init_file, + " NULL, NULL_TREE);\n"); + if (bifs[i].kind == FNK_CONST) + { + fprintf (init_file, " TREE_READONLY (t) = 1;\n"); + fprintf (init_file, " TREE_NOTHROW (t) = 1;\n"); + } + else if (bifs[i].kind == FNK_PURE) + { + fprintf (init_file, " DECL_PURE_P (t) = 1;\n"); + fprintf (init_file, " TREE_NOTHROW (t) = 1;\n"); + } + else if (bifs[i].kind == FNK_FPMATH) + { + fprintf (init_file, " TREE_NOTHROW (t) = 1;\n"); + fprintf (init_file, " if (flag_rounding_math)\n"); + fprintf (init_file, " {\n"); + fprintf (init_file, " DECL_PURE_P (t) = 1;\n"); + fprintf (init_file, " DECL_IS_NOVOPS (t) = 1;\n"); + fprintf (init_file, " }\n"); + fprintf (init_file, " else\n"); + fprintf (init_file, " TREE_READONLY (t) = 1;\n"); + } + + if (tf_found || dfp_found) + { + fprintf (init_file, " }\n"); + fprintf (init_file, " else\n"); + fprintf (init_file, " {\n"); + fprintf (init_file, " rs6000_builtin_decls_x" + "[(int)RS6000_BIF_%s] = NULL_TREE;\n", bifs[i].idname); + fprintf (init_file, " }\n"); + } + fprintf (init_file, " }\n\n"); + } } /* Write code to initialize the overload table. */ -- cgit v1.1 From 133aa7e54f77fdc15c311ecb52decfb3f52e179c Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 21 Jul 2021 09:23:45 -0500 Subject: rs6000: Add int128 target check to pr101129.c (PR101531) 2021-07-21 Bill Schmidt gcc/testsuite/ PR target/101531 * gcc.target/powerpc/pr101129.c: Adjust. --- gcc/testsuite/gcc.target/powerpc/pr101129.c | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/powerpc/pr101129.c b/gcc/testsuite/gcc.target/powerpc/pr101129.c index 1abc124..6b8e5a9 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr101129.c +++ b/gcc/testsuite/gcc.target/powerpc/pr101129.c @@ -1,5 +1,6 @@ /* { dg-do run } */ /* { dg-require-effective-target p8vector_hw } */ +/* { dg-require-effective-target int128 } */ /* { dg-options "-mdejagnu-cpu=power8 -O " } */ /* PR101129: The swaps pass was turning a mult-lopart into a mult-hipart. -- cgit v1.1 From e279e32243ca2989a46fc933764286f07fc933ef Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Wed, 21 Jul 2021 14:34:28 -0600 Subject: Fix typos in a comment. gcc/ChangeLog: * tree-ssa-alias.c (walk_aliased_vdefs_1): Fix typos in a comment. --- gcc/tree-ssa-alias.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c index 742a95a..cbd51ac 100644 --- a/gcc/tree-ssa-alias.c +++ b/gcc/tree-ssa-alias.c @@ -3745,7 +3745,7 @@ walk_non_aliased_vuses (ao_ref *ref, tree vuse, bool tbaa_p, } -/* Based on the memory reference REF call WALKER for each vdef which +/* Based on the memory reference REF call WALKER for each vdef whose defining statement may clobber REF, starting with VDEF. If REF is NULL_TREE, each defining statement is visited. @@ -3755,8 +3755,8 @@ walk_non_aliased_vuses (ao_ref *ref, tree vuse, bool tbaa_p, If function entry is reached, FUNCTION_ENTRY_REACHED is set to true. The pointer may be NULL and then we do not track this information. - At PHI nodes walk_aliased_vdefs forks into one walk for reach - PHI argument (but only one walk continues on merge points), the + At PHI nodes walk_aliased_vdefs forks into one walk for each + PHI argument (but only one walk continues at merge points), the return value is true if any of the walks was successful. The function returns the number of statements walked or -1 if -- cgit v1.1 From dcdf6bb24e5f113f2bb9298588105a071bddf50f Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 21 Jul 2021 17:19:31 -0400 Subject: analyzer: tweak dumping of min_expr/max_expr gcc/analyzer/ChangeLog: * svalue.cc (infix_p): New. (binop_svalue::dump_to_pp): Use it to print MIN_EXPR and MAX_EXPR in prefix form, rather than infix. Signed-off-by: David Malcolm --- gcc/analyzer/svalue.cc | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/svalue.cc b/gcc/analyzer/svalue.cc index 094c725..a1e6f50 100644 --- a/gcc/analyzer/svalue.cc +++ b/gcc/analyzer/svalue.cc @@ -1053,6 +1053,21 @@ unaryop_svalue::maybe_fold_bits_within (tree type, /* class binop_svalue : public svalue. */ +/* Return whether OP be printed as an infix operator. */ + +static bool +infix_p (enum tree_code op) +{ + switch (op) + { + default: + return true; + case MAX_EXPR: + case MIN_EXPR: + return false; + } +} + /* Implementation of svalue::dump_to_pp vfunc for binop_svalue. */ void @@ -1060,11 +1075,25 @@ binop_svalue::dump_to_pp (pretty_printer *pp, bool simple) const { if (simple) { - pp_character (pp, '('); - m_arg0->dump_to_pp (pp, simple); - pp_string (pp, op_symbol_code (m_op)); - m_arg1->dump_to_pp (pp, simple); - pp_character (pp, ')'); + if (infix_p (m_op)) + { + /* Print "(A OP B)". */ + pp_character (pp, '('); + m_arg0->dump_to_pp (pp, simple); + pp_string (pp, op_symbol_code (m_op)); + m_arg1->dump_to_pp (pp, simple); + pp_character (pp, ')'); + } + else + { + /* Print "OP(A, B)". */ + pp_string (pp, op_symbol_code (m_op)); + pp_character (pp, '('); + m_arg0->dump_to_pp (pp, simple); + pp_string (pp, ", "); + m_arg1->dump_to_pp (pp, simple); + pp_character (pp, ')'); + } } else { -- cgit v1.1 From 81703584769707c34533e78c7a2bc229b0e14b2d Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 21 Jul 2021 17:21:22 -0400 Subject: analyzer: show BB index in BEFORE_SUPERNODE's in-edge This is useful for debugging how the analyzer handles phi nodes. gcc/analyzer/ChangeLog: * program-point.cc (function_point::print): Show src BB index at BEFORE_SUPERNODE. Signed-off-by: David Malcolm --- gcc/analyzer/program-point.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/program-point.cc b/gcc/analyzer/program-point.cc index d8cfc61..d73b621 100644 --- a/gcc/analyzer/program-point.cc +++ b/gcc/analyzer/program-point.cc @@ -119,8 +119,15 @@ function_point::print (pretty_printer *pp, const format &f) const case PK_BEFORE_SUPERNODE: { if (m_from_edge) - pp_printf (pp, "before SN: %i (from SN: %i)", - m_supernode->m_index, m_from_edge->m_src->m_index); + { + if (basic_block bb = m_from_edge->m_src->m_bb) + pp_printf (pp, "before SN: %i (from SN: %i (bb: %i))", + m_supernode->m_index, m_from_edge->m_src->m_index, + bb->index); + else + pp_printf (pp, "before SN: %i (from SN: %i)", + m_supernode->m_index, m_from_edge->m_src->m_index); + } else pp_printf (pp, "before SN: %i (NULL from-edge)", m_supernode->m_index); -- cgit v1.1 From 6bbad96cd44774bc199b256dbf4260b25b87c7db Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 21 Jul 2021 17:22:45 -0400 Subject: analyzer: fixes to -fdump-analyzer-state-purge for phi nodes gcc/analyzer/ChangeLog: * state-purge.cc (state_purge_annotator::add_node_annotations): Rather than erroneously always using the NULL in-edge, determine each relevant in-edge, and print the appropriate data for each in-edge. Use print_needed to print the data as comma-separated lists of SSA names. (print_vec_of_names): Add "within_table" param and use it. (state_purge_annotator::add_stmt_annotations): Factor out collation and printing code into... (state_purge_annotator::print_needed): ...this new function. * state-purge.h (state_purge_annotator::print_needed): New decl. Signed-off-by: David Malcolm --- gcc/analyzer/state-purge.cc | 66 ++++++++++++++++++++++++++------------------- gcc/analyzer/state-purge.h | 4 +++ 2 files changed, 43 insertions(+), 27 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/state-purge.cc b/gcc/analyzer/state-purge.cc index e82ea87..3c3b775 100644 --- a/gcc/analyzer/state-purge.cc +++ b/gcc/analyzer/state-purge.cc @@ -477,23 +477,20 @@ state_purge_annotator::add_node_annotations (graphviz_out *gv, "lightblue"); pp_write_text_to_stream (pp); - // FIXME: passing in a NULL in-edge means we get no hits - function_point before_supernode - (function_point::before_supernode (&n, NULL)); - - for (state_purge_map::iterator iter = m_map->begin (); - iter != m_map->end (); - ++iter) + /* Different in-edges mean different names need purging. + Determine which points to dump. */ + auto_vec points; + if (n.entry_p ()) + points.safe_push (function_point::before_supernode (&n, NULL)); + else + for (auto inedge : n.m_preds) + points.safe_push (function_point::before_supernode (&n, inedge)); + + for (auto & point : points) { - tree name = (*iter).first; - state_purge_per_ssa_name *per_name_data = (*iter).second; - if (per_name_data->get_function () == n.m_fun) - { - if (per_name_data->needed_at_point_p (before_supernode)) - pp_printf (pp, "%qE needed here", name); - else - pp_printf (pp, "%qE not needed here", name); - } + point.print (pp, format (true)); + pp_newline (pp); + print_needed (gv, point, false); pp_newline (pp); } @@ -502,19 +499,20 @@ state_purge_annotator::add_node_annotations (graphviz_out *gv, return false; } -/* Print V to GV as a comma-separated list in braces within a , - titling it with TITLE. +/* Print V to GV as a comma-separated list in braces, titling it with TITLE. + If WITHIN_TABLE is true, print it within a - Subroutine of state_purge_annotator::add_stmt_annotations. */ + Subroutine of state_purge_annotator::print_needed. */ static void print_vec_of_names (graphviz_out *gv, const char *title, - const auto_vec &v) + const auto_vec &v, bool within_table) { pretty_printer *pp = gv->get_pp (); tree name; unsigned i; - gv->begin_trtd (); + if (within_table) + gv->begin_trtd (); pp_printf (pp, "%s: {", title); FOR_EACH_VEC_ELT (v, i, name) { @@ -523,8 +521,11 @@ print_vec_of_names (graphviz_out *gv, const char *title, pp_printf (pp, "%qE", name); } pp_printf (pp, "}"); - pp_write_text_as_html_like_dot_to_stream (pp); - gv->end_tdtr (); + if (within_table) + { + pp_write_text_as_html_like_dot_to_stream (pp); + gv->end_tdtr (); + } pp_newline (pp); } @@ -556,6 +557,17 @@ state_purge_annotator::add_stmt_annotations (graphviz_out *gv, function_point before_stmt (function_point::before_stmt (supernode, stmt_idx)); + print_needed (gv, before_stmt, true); +} + +/* Get the ssa names needed and not-needed at POINT, and print them to GV. + If WITHIN_TABLE is true, print them within elements. */ + +void +state_purge_annotator::print_needed (graphviz_out *gv, + const function_point &point, + bool within_table) const +{ auto_vec needed; auto_vec not_needed; for (state_purge_map::iterator iter = m_map->begin (); @@ -564,17 +576,17 @@ state_purge_annotator::add_stmt_annotations (graphviz_out *gv, { tree name = (*iter).first; state_purge_per_ssa_name *per_name_data = (*iter).second; - if (per_name_data->get_function () == supernode->m_fun) + if (per_name_data->get_function () == point.get_function ()) { - if (per_name_data->needed_at_point_p (before_stmt)) + if (per_name_data->needed_at_point_p (point)) needed.safe_push (name); else not_needed.safe_push (name); } } - print_vec_of_names (gv, "needed here", needed); - print_vec_of_names (gv, "not needed here", not_needed); + print_vec_of_names (gv, "needed here", needed, within_table); + print_vec_of_names (gv, "not needed here", not_needed, within_table); } #endif /* #if ENABLE_ANALYZER */ diff --git a/gcc/analyzer/state-purge.h b/gcc/analyzer/state-purge.h index 879013d..409490e 100644 --- a/gcc/analyzer/state-purge.h +++ b/gcc/analyzer/state-purge.h @@ -159,6 +159,10 @@ public: const FINAL OVERRIDE; private: + void print_needed (graphviz_out *gv, + const function_point &point, + bool within_table) const; + const state_purge_map *m_map; }; -- cgit v1.1 From e0a7a6752dad7848eb4b29b826a551c0992256ec Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 21 Jul 2021 17:24:08 -0400 Subject: analyzer: fix issues with phi handling The analyzer's state purging code was overzealously purging state for ssa names that might be used within phi nodes, leading to false positives from -Wanalyzer-use-of-uninitialized-value. This patch updates phi handling in the analyzer to fix these issues. gcc/analyzer/ChangeLog: * region-model.cc (region_model::handle_phi): Add "old_state" param and use it. (region_model::update_for_phis): Update so that all of the phi stmts are effectively handled simultaneously, rather than in order. * region-model.h (region_model::handle_phi): Add "old_state" param. * state-purge.cc (self_referential_phi_p): Replace with... (name_used_by_phis_p): ...this new function. (state_purge_per_ssa_name::process_point): Update to use the above, so that all phi stmts at a basic block are effectively considered simultaneously, and only consider the phi arguments for the pertinent in-edge. * supergraph.cc (cfg_superedge::get_phi_arg_idx): New. (cfg_superedge::get_phi_arg): Use the above. * supergraph.h (cfg_superedge::get_phi_arg_idx): New decl. gcc/testsuite/ChangeLog: * gcc.dg/analyzer/explode-2.c: Remove xfail. * gcc.dg/analyzer/explode-2a.c: Remove expected leak warning on while stmt. * gcc.dg/analyzer/phi-2.c: New test. Signed-off-by: David Malcolm --- gcc/analyzer/region-model.cc | 18 +++++++++---- gcc/analyzer/region-model.h | 1 + gcc/analyzer/state-purge.cc | 42 +++++++++++++++++------------- gcc/analyzer/supergraph.cc | 11 +++++++- gcc/analyzer/supergraph.h | 1 + gcc/testsuite/gcc.dg/analyzer/explode-2.c | 2 +- gcc/testsuite/gcc.dg/analyzer/explode-2a.c | 2 +- gcc/testsuite/gcc.dg/analyzer/phi-2.c | 27 +++++++++++++++++++ 8 files changed, 78 insertions(+), 26 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/phi-2.c (limited to 'gcc') diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 6d02c60..c029759 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -1553,11 +1553,14 @@ region_model::on_longjmp (const gcall *longjmp_call, const gcall *setjmp_call, /* Update this region_model for a phi stmt of the form LHS = PHI <...RHS...>. - where RHS is for the appropriate edge. */ + where RHS is for the appropriate edge. + Get state from OLD_STATE so that all of the phi stmts for a basic block + are effectively handled simultaneously. */ void region_model::handle_phi (const gphi *phi, tree lhs, tree rhs, + const region_model &old_state, region_model_context *ctxt) { /* For now, don't bother tracking the .MEM SSA names. */ @@ -1566,9 +1569,10 @@ region_model::handle_phi (const gphi *phi, if (VAR_DECL_IS_VIRTUAL_OPERAND (var)) return; - const svalue *rhs_sval = get_rvalue (rhs, ctxt); + const svalue *src_sval = old_state.get_rvalue (rhs, ctxt); + const region *dst_reg = old_state.get_lvalue (lhs, ctxt); - set_value (get_lvalue (lhs, ctxt), rhs_sval, ctxt); + set_value (dst_reg, src_sval, ctxt); if (ctxt) ctxt->on_phi (phi, rhs); @@ -3036,6 +3040,10 @@ region_model::update_for_phis (const supernode *snode, { gcc_assert (last_cfg_superedge); + /* Copy this state and pass it to handle_phi so that all of the phi stmts + are effectively handled simultaneously. */ + const region_model old_state (*this); + for (gphi_iterator gpi = const_cast(snode)->start_phis (); !gsi_end_p (gpi); gsi_next (&gpi)) { @@ -3044,8 +3052,8 @@ region_model::update_for_phis (const supernode *snode, tree src = last_cfg_superedge->get_phi_arg (phi); tree lhs = gimple_phi_result (phi); - /* Update next_state based on phi. */ - handle_phi (phi, lhs, src, ctxt); + /* Update next_state based on phi and old_state. */ + handle_phi (phi, lhs, src, old_state, ctxt); } } diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index 734ec60..cc39929 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -582,6 +582,7 @@ class region_model region_model_context *ctxt); void handle_phi (const gphi *phi, tree lhs, tree rhs, + const region_model &old_state, region_model_context *ctxt); bool maybe_update_for_edge (const superedge &edge, diff --git a/gcc/analyzer/state-purge.cc b/gcc/analyzer/state-purge.cc index 3c3b775..bfa48a9 100644 --- a/gcc/analyzer/state-purge.cc +++ b/gcc/analyzer/state-purge.cc @@ -288,17 +288,23 @@ state_purge_per_ssa_name::add_to_worklist (const function_point &point, } } -/* Does this phi depend on itself? - e.g. in: - added_2 = PHI - the middle defn (from edge 3) requires added_2 itself. */ +/* Return true iff NAME is used by any of the phi nodes in SNODE + when processing the in-edge with PHI_ARG_IDX. */ static bool -self_referential_phi_p (const gphi *phi) +name_used_by_phis_p (tree name, const supernode *snode, + size_t phi_arg_idx) { - for (unsigned i = 0; i < gimple_phi_num_args (phi); i++) - if (gimple_phi_arg_def (phi, i) == gimple_phi_result (phi)) - return true; + gcc_assert (TREE_CODE (name) == SSA_NAME); + + for (gphi_iterator gpi + = const_cast (snode)->start_phis (); + !gsi_end_p (gpi); gsi_next (&gpi)) + { + gphi *phi = gpi.phi (); + if (gimple_phi_arg_def (phi, phi_arg_idx) == name) + return true; + } return false; } @@ -339,27 +345,27 @@ state_purge_per_ssa_name::process_point (const function_point &point, = const_cast (snode)->start_phis (); !gsi_end_p (gpi); gsi_next (&gpi)) { + gcc_assert (point.get_from_edge ()); + const cfg_superedge *cfg_sedge + = point.get_from_edge ()->dyn_cast_cfg_superedge (); + gcc_assert (cfg_sedge); + gphi *phi = gpi.phi (); /* Are we at the def-stmt for m_name? */ if (phi == def_stmt) { - /* Does this phi depend on itself? - e.g. in: - added_2 = PHI - the middle defn (from edge 3) requires added_2 itself - so we can't purge it here. */ - if (self_referential_phi_p (phi)) + if (name_used_by_phis_p (m_name, snode, + cfg_sedge->get_phi_arg_idx ())) { if (logger) - logger->log ("self-referential def stmt within phis;" + logger->log ("name in def stmt used within phis;" " continuing"); } else { - /* Otherwise, we can stop here, so that m_name - can be purged. */ if (logger) - logger->log ("def stmt within phis; terminating"); + logger->log ("name in def stmt not used within phis;" + " terminating"); return; } } diff --git a/gcc/analyzer/supergraph.cc b/gcc/analyzer/supergraph.cc index 8611d0f..1eb2543 100644 --- a/gcc/analyzer/supergraph.cc +++ b/gcc/analyzer/supergraph.cc @@ -1032,12 +1032,21 @@ cfg_superedge::dump_label_to_pp (pretty_printer *pp, /* Otherwise, no label. */ } +/* Get the index number for this edge for use in phi stmts + in its destination. */ + +size_t +cfg_superedge::get_phi_arg_idx () const +{ + return m_cfg_edge->dest_idx; +} + /* Get the phi argument for PHI for this CFG edge. */ tree cfg_superedge::get_phi_arg (const gphi *phi) const { - size_t index = m_cfg_edge->dest_idx; + size_t index = get_phi_arg_idx (); return gimple_phi_arg_def (phi, index); } diff --git a/gcc/analyzer/supergraph.h b/gcc/analyzer/supergraph.h index f4090fd..877958f 100644 --- a/gcc/analyzer/supergraph.h +++ b/gcc/analyzer/supergraph.h @@ -514,6 +514,7 @@ class cfg_superedge : public superedge int false_value_p () const { return get_flags () & EDGE_FALSE_VALUE; } int back_edge_p () const { return get_flags () & EDGE_DFS_BACK; } + size_t get_phi_arg_idx () const; tree get_phi_arg (const gphi *phi) const; private: diff --git a/gcc/testsuite/gcc.dg/analyzer/explode-2.c b/gcc/testsuite/gcc.dg/analyzer/explode-2.c index 3b987e1..c16982f 100644 --- a/gcc/testsuite/gcc.dg/analyzer/explode-2.c +++ b/gcc/testsuite/gcc.dg/analyzer/explode-2.c @@ -24,7 +24,7 @@ void test (void) p0 = malloc (16); /* { dg-warning "leak" "" { xfail *-*-* } } */ break; case 1: - free (p0); /* { dg-warning "double-'free' of 'p0'" "" { xfail *-*-* } } */ + free (p0); /* { dg-warning "double-'free' of 'p0'" } */ break; case 2: diff --git a/gcc/testsuite/gcc.dg/analyzer/explode-2a.c b/gcc/testsuite/gcc.dg/analyzer/explode-2a.c index f60354c..32c71ca 100644 --- a/gcc/testsuite/gcc.dg/analyzer/explode-2a.c +++ b/gcc/testsuite/gcc.dg/analyzer/explode-2a.c @@ -14,7 +14,7 @@ void test (void) explode-2.c as this code. */ int a = get (); int b = get (); - while (a) /* { dg-warning "leak" } */ + while (a) { switch (b) { diff --git a/gcc/testsuite/gcc.dg/analyzer/phi-2.c b/gcc/testsuite/gcc.dg/analyzer/phi-2.c new file mode 100644 index 0000000..2ab8344 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/phi-2.c @@ -0,0 +1,27 @@ +/* { dg-additional-options "-O1" } */ + +struct list_head { + struct list_head *next, *prev; +}; + +struct mbochs_dmabuf { + /* [...snip...] */ + struct dma_buf *buf; + /* [...snip...] */ + struct list_head next; + /* [...snip...] */ +}; + +void mbochs_close(struct list_head *dmabufs, + struct mbochs_dmabuf *dmabuf, + struct mbochs_dmabuf *tmp) +{ + /* [...snip...] */ + while (&dmabuf->next != dmabufs) + { + dmabuf = tmp; + tmp = ((struct mbochs_dmabuf *)((void *)(tmp->next.next) - __builtin_offsetof(struct mbochs_dmabuf, next))); + } + + /* [...snip...] */ +} -- cgit v1.1 From 6099b9cc8ce70d2ec7f2fc9f71da95fbb66d335f Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Wed, 21 Jul 2021 08:20:18 +0200 Subject: [OpenACC] Fix '#pragma atomic update' typo in 'g++.dg/goacc/template.C' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [...]/g++.dg/goacc/template.C:58: warning: ignoring ‘#pragma atomic update’ [-Wunknown-pragmas] 58 | #pragma atomic update | Small fix-up for r229832 (commit 7a5e4956cc026cba54159d5c764486ac4151db85) "[openacc] tile, independent, default, private and firstprivate support in c/++". gcc/testsuite/ * g++.dg/goacc/template.C: Fix '#pragma atomic update' typo. --- gcc/testsuite/g++.dg/goacc/template.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/goacc/template.C b/gcc/testsuite/g++.dg/goacc/template.C index 8bcd2a1..51a3f54 100644 --- a/gcc/testsuite/g++.dg/goacc/template.C +++ b/gcc/testsuite/g++.dg/goacc/template.C @@ -55,7 +55,7 @@ oacc_parallel_copy (T a) #pragma acc atomic capture c = b++; -#pragma atomic update +#pragma acc atomic update c++; #pragma acc atomic read -- cgit v1.1 From a61f6afbee370785cf091fe46e2e022748528307 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Wed, 21 Jul 2021 18:30:00 +0200 Subject: OpenACC 'nohost' clause Do not "compile a version of this procedure for the host". gcc/ * tree-core.h (omp_clause_code): Add 'OMP_CLAUSE_NOHOST'. * tree.c (omp_clause_num_ops, omp_clause_code_name, walk_tree_1): Handle it. * tree-pretty-print.c (dump_omp_clause): Likewise. * omp-general.c (oacc_verify_routine_clauses): Likewise. * gimplify.c (gimplify_scan_omp_clauses) (gimplify_adjust_omp_clauses): Likewise. * tree-nested.c (convert_nonlocal_omp_clauses) (convert_local_omp_clauses): Likewise. * omp-low.c (scan_sharing_clauses): Likewise. * omp-offload.c (execute_oacc_device_lower): Update. gcc/c-family/ * c-pragma.h (pragma_omp_clause): Add 'PRAGMA_OACC_CLAUSE_NOHOST'. gcc/c/ * c-parser.c (c_parser_omp_clause_name): Handle 'nohost'. (c_parser_oacc_all_clauses): Handle 'PRAGMA_OACC_CLAUSE_NOHOST'. (OACC_ROUTINE_CLAUSE_MASK): Add 'PRAGMA_OACC_CLAUSE_NOHOST'. * c-typeck.c (c_finish_omp_clauses): Handle 'OMP_CLAUSE_NOHOST'. gcc/cp/ * parser.c (cp_parser_omp_clause_name): Handle 'nohost'. (cp_parser_oacc_all_clauses): Handle 'PRAGMA_OACC_CLAUSE_NOHOST'. (OACC_ROUTINE_CLAUSE_MASK): Add 'PRAGMA_OACC_CLAUSE_NOHOST'. * pt.c (tsubst_omp_clauses): Handle 'OMP_CLAUSE_NOHOST'. * semantics.c (finish_omp_clauses): Likewise. gcc/fortran/ * dump-parse-tree.c (show_attr): Update. * gfortran.h (symbol_attribute): Add 'oacc_routine_nohost' member. (gfc_omp_clauses): Add 'nohost' member. * module.c (ab_attribute): Add 'AB_OACC_ROUTINE_NOHOST'. (attr_bits, mio_symbol_attribute): Update. * openmp.c (omp_mask2): Add 'OMP_CLAUSE_NOHOST'. (gfc_match_omp_clauses): Handle 'OMP_CLAUSE_NOHOST'. (OACC_ROUTINE_CLAUSES): Add 'OMP_CLAUSE_NOHOST'. (gfc_match_oacc_routine): Update. * trans-decl.c (add_attributes_to_decl): Update. * trans-openmp.c (gfc_trans_omp_clauses): Likewise. gcc/testsuite/ * c-c++-common/goacc/classify-routine-nohost.c: New file. * c-c++-common/goacc/classify-routine.c: Update. * c-c++-common/goacc/routine-2.c: Likewise. * c-c++-common/goacc/routine-nohost-1.c: New file. * c-c++-common/goacc/routine-nohost-2.c: Likewise. * g++.dg/goacc/template.C: Update. * gfortran.dg/goacc/classify-routine-nohost.f95: New file. * gfortran.dg/goacc/classify-routine.f95: Update. * gfortran.dg/goacc/pure-elemental-procedures-2.f90: Likewise. * gfortran.dg/goacc/routine-6.f90: Likewise. * gfortran.dg/goacc/routine-intrinsic-2.f: Likewise. * gfortran.dg/goacc/routine-module-1.f90: Likewise. * gfortran.dg/goacc/routine-module-2.f90: Likewise. * gfortran.dg/goacc/routine-module-3.f90: Likewise. * gfortran.dg/goacc/routine-module-mod-1.f90: Likewise. * gfortran.dg/goacc/routine-multiple-directives-1.f90: Likewise. * gfortran.dg/goacc/routine-multiple-directives-2.f90: Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/routine-nohost-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/routine-nohost-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-nohost-2_2.c: Likewise. * testsuite/libgomp.oacc-fortran/routine-nohost-1.f90: Likewise. Co-Authored-By: Joseph Myers Co-Authored-By: Cesar Philippidis --- gcc/c-family/c-pragma.h | 1 + gcc/c/c-parser.c | 10 +- gcc/c/c-typeck.c | 1 + gcc/cp/parser.c | 11 +- gcc/cp/pt.c | 1 + gcc/cp/semantics.c | 1 + gcc/fortran/dump-parse-tree.c | 2 + gcc/fortran/gfortran.h | 2 + gcc/fortran/module.c | 7 + gcc/fortran/openmp.c | 30 ++++- gcc/fortran/trans-decl.c | 8 ++ gcc/fortran/trans-openmp.c | 2 + gcc/gimplify.c | 2 + gcc/omp-general.c | 17 +++ gcc/omp-low.c | 2 + gcc/omp-offload.c | 36 +++++ .../c-c++-common/goacc/classify-routine-nohost.c | 41 ++++++ .../c-c++-common/goacc/classify-routine.c | 10 +- gcc/testsuite/c-c++-common/goacc/routine-2.c | 4 + .../c-c++-common/goacc/routine-nohost-1.c | 50 +++++++ .../c-c++-common/goacc/routine-nohost-2.c | 96 ++++++++++++++ gcc/testsuite/g++.dg/goacc/template.C | 15 ++- .../gfortran.dg/goacc/classify-routine-nohost.f95 | 39 ++++++ .../gfortran.dg/goacc/classify-routine.f95 | 7 + .../goacc/pure-elemental-procedures-2.f90 | 24 ++++ gcc/testsuite/gfortran.dg/goacc/routine-6.f90 | 10 ++ .../gfortran.dg/goacc/routine-intrinsic-2.f | 10 ++ .../gfortran.dg/goacc/routine-module-1.f90 | 14 ++ .../gfortran.dg/goacc/routine-module-2.f90 | 6 + .../gfortran.dg/goacc/routine-module-3.f90 | 43 +++++- .../gfortran.dg/goacc/routine-module-mod-1.f90 | 60 +++++++++ .../goacc/routine-multiple-directives-1.f90 | 64 +++++++++ .../goacc/routine-multiple-directives-2.f90 | 147 +++++++++++++++++++++ gcc/tree-core.h | 5 +- gcc/tree-nested.c | 6 + gcc/tree-pretty-print.c | 3 + gcc/tree.c | 3 + 37 files changed, 779 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/goacc/classify-routine-nohost.c create mode 100644 gcc/testsuite/c-c++-common/goacc/routine-nohost-1.c create mode 100644 gcc/testsuite/c-c++-common/goacc/routine-nohost-2.c create mode 100644 gcc/testsuite/gfortran.dg/goacc/classify-routine-nohost.f95 (limited to 'gcc') diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h index e4fd3c9..c5d11ce 100644 --- a/gcc/c-family/c-pragma.h +++ b/gcc/c-family/c-pragma.h @@ -160,6 +160,7 @@ enum pragma_omp_clause { PRAGMA_OACC_CLAUSE_HOST, PRAGMA_OACC_CLAUSE_INDEPENDENT, PRAGMA_OACC_CLAUSE_NO_CREATE, + PRAGMA_OACC_CLAUSE_NOHOST, PRAGMA_OACC_CLAUSE_NUM_GANGS, PRAGMA_OACC_CLAUSE_NUM_WORKERS, PRAGMA_OACC_CLAUSE_PRESENT, diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 9a56e0c..92d22d1 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -12744,6 +12744,8 @@ c_parser_omp_clause_name (c_parser *parser) result = PRAGMA_OACC_CLAUSE_NO_CREATE; else if (!strcmp ("nogroup", p)) result = PRAGMA_OMP_CLAUSE_NOGROUP; + else if (!strcmp ("nohost", p)) + result = PRAGMA_OACC_CLAUSE_NOHOST; else if (!strcmp ("nontemporal", p)) result = PRAGMA_OMP_CLAUSE_NONTEMPORAL; else if (!strcmp ("notinbranch", p)) @@ -16248,6 +16250,11 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "no_create"; break; + case PRAGMA_OACC_CLAUSE_NOHOST: + clauses = c_parser_oacc_simple_clause (here, OMP_CLAUSE_NOHOST, + clauses); + c_name = "nohost"; + break; case PRAGMA_OACC_CLAUSE_NUM_GANGS: clauses = c_parser_oacc_single_int_clause (parser, OMP_CLAUSE_NUM_GANGS, @@ -17179,7 +17186,8 @@ c_parser_oacc_compute (location_t loc, c_parser *parser, ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_GANG) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WORKER) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_VECTOR) \ - | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_SEQ) ) + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_SEQ) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NOHOST) ) /* Parse an OpenACC routine directive. For named directives, we apply immediately to the named function. For unnamed ones we then parse diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index 4f7ed67..5d6565b 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -15168,6 +15168,7 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) case OMP_CLAUSE_TILE: case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_FINALIZE: + case OMP_CLAUSE_NOHOST: pc = &OMP_CLAUSE_CHAIN (c); continue; diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 821ce17..45216f0 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -35656,6 +35656,8 @@ cp_parser_omp_clause_name (cp_parser *parser) result = PRAGMA_OACC_CLAUSE_NO_CREATE; else if (!strcmp ("nogroup", p)) result = PRAGMA_OMP_CLAUSE_NOGROUP; + else if (!strcmp ("nohost", p)) + result = PRAGMA_OACC_CLAUSE_NOHOST; else if (!strcmp ("nontemporal", p)) result = PRAGMA_OMP_CLAUSE_NONTEMPORAL; else if (!strcmp ("notinbranch", p)) @@ -38879,6 +38881,11 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "no_create"; break; + case PRAGMA_OACC_CLAUSE_NOHOST: + clauses = cp_parser_oacc_simple_clause (here, OMP_CLAUSE_NOHOST, + clauses); + c_name = "nohost"; + break; case PRAGMA_OACC_CLAUSE_NUM_GANGS: code = OMP_CLAUSE_NUM_GANGS; c_name = "num_gangs"; @@ -44866,8 +44873,8 @@ cp_parser_omp_taskloop (cp_parser *parser, cp_token *pragma_tok, ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_GANG) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_WORKER) \ | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_VECTOR) \ - | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_SEQ)) - + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_SEQ) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_NOHOST) ) /* Parse the OpenACC routine pragma. This has an optional '( name )' component, which must resolve to a declared namespace-scope diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 94ca3bc..b396ddd 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -17479,6 +17479,7 @@ tsubst_omp_clauses (tree clauses, enum c_omp_region_type ort, case OMP_CLAUSE_SEQ: case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_FINALIZE: + case OMP_CLAUSE_NOHOST: break; default: gcc_unreachable (); diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index 331daf8..f64b084 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -8267,6 +8267,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) case OMP_CLAUSE_SEQ: case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_FINALIZE: + case OMP_CLAUSE_NOHOST: break; case OMP_CLAUSE_MERGEABLE: diff --git a/gcc/fortran/dump-parse-tree.c b/gcc/fortran/dump-parse-tree.c index 26841ee..8e4a101 100644 --- a/gcc/fortran/dump-parse-tree.c +++ b/gcc/fortran/dump-parse-tree.c @@ -926,6 +926,8 @@ show_attr (symbol_attribute *attr, const char * module) fputs (" ALWAYS-EXPLICIT", dumpfile); if (attr->is_main_program) fputs (" IS-MAIN-PROGRAM", dumpfile); + if (attr->oacc_routine_nohost) + fputs (" OACC-ROUTINE-NOHOST", dumpfile); /* FIXME: Still missing are oacc_routine_lop and ext_attr. */ fputc (')', dumpfile); diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index f4a50d7..921aed9 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -947,6 +947,7 @@ typedef struct /* OpenACC 'routine' directive's level of parallelism. */ ENUM_BITFIELD (oacc_routine_lop) oacc_routine_lop:3; + unsigned oacc_routine_nohost:1; /* Attributes set by compiler extensions (!GCC$ ATTRIBUTES). */ unsigned ext_attr:EXT_ATTR_NUM; @@ -1488,6 +1489,7 @@ typedef struct gfc_omp_clauses unsigned async:1, gang:1, worker:1, vector:1, seq:1, independent:1; unsigned par_auto:1, gang_static:1; unsigned if_present:1, finalize:1; + unsigned nohost:1; locus loc; } gfc_omp_clauses; diff --git a/gcc/fortran/module.c b/gcc/fortran/module.c index 321d3256..1804066 100644 --- a/gcc/fortran/module.c +++ b/gcc/fortran/module.c @@ -2088,6 +2088,7 @@ enum ab_attribute AB_PDT_TEMPLATE, AB_PDT_ARRAY, AB_PDT_STRING, AB_OACC_ROUTINE_LOP_GANG, AB_OACC_ROUTINE_LOP_WORKER, AB_OACC_ROUTINE_LOP_VECTOR, AB_OACC_ROUTINE_LOP_SEQ, + AB_OACC_ROUTINE_NOHOST, AB_OMP_REQ_REVERSE_OFFLOAD, AB_OMP_REQ_UNIFIED_ADDRESS, AB_OMP_REQ_UNIFIED_SHARED_MEMORY, AB_OMP_REQ_DYNAMIC_ALLOCATORS, AB_OMP_REQ_MEM_ORDER_SEQ_CST, AB_OMP_REQ_MEM_ORDER_ACQ_REL, @@ -2166,6 +2167,7 @@ static const mstring attr_bits[] = minit ("OACC_ROUTINE_LOP_WORKER", AB_OACC_ROUTINE_LOP_WORKER), minit ("OACC_ROUTINE_LOP_VECTOR", AB_OACC_ROUTINE_LOP_VECTOR), minit ("OACC_ROUTINE_LOP_SEQ", AB_OACC_ROUTINE_LOP_SEQ), + minit ("OACC_ROUTINE_NOHOST", AB_OACC_ROUTINE_NOHOST), minit ("OMP_REQ_REVERSE_OFFLOAD", AB_OMP_REQ_REVERSE_OFFLOAD), minit ("OMP_REQ_UNIFIED_ADDRESS", AB_OMP_REQ_UNIFIED_ADDRESS), minit ("OMP_REQ_UNIFIED_SHARED_MEMORY", AB_OMP_REQ_UNIFIED_SHARED_MEMORY), @@ -2420,6 +2422,8 @@ mio_symbol_attribute (symbol_attribute *attr) default: gcc_unreachable (); } + if (attr->oacc_routine_nohost) + MIO_NAME (ab_attribute) (AB_OACC_ROUTINE_NOHOST, attr_bits); if (attr->flavor == FL_MODULE && gfc_current_ns->omp_requires) { @@ -2682,6 +2686,9 @@ mio_symbol_attribute (symbol_attribute *attr) verify_OACC_ROUTINE_LOP_NONE (attr->oacc_routine_lop); attr->oacc_routine_lop = OACC_ROUTINE_LOP_SEQ; break; + case AB_OACC_ROUTINE_NOHOST: + attr->oacc_routine_nohost = 1; + break; case AB_OMP_REQ_REVERSE_OFFLOAD: gfc_omp_requires_add_clause (OMP_REQ_REVERSE_OFFLOAD, "reverse_offload", diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c index 357a1e1..520a435 100644 --- a/gcc/fortran/openmp.c +++ b/gcc/fortran/openmp.c @@ -880,6 +880,7 @@ enum omp_mask2 OMP_CLAUSE_IF_PRESENT, OMP_CLAUSE_FINALIZE, OMP_CLAUSE_ATTACH, + OMP_CLAUSE_NOHOST, /* This must come last. */ OMP_MASK2_LAST }; @@ -2083,6 +2084,13 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const omp_mask mask, c->nogroup = needs_space = true; continue; } + if ((mask & OMP_CLAUSE_NOHOST) + && !c->nohost + && gfc_match ("nohost") == MATCH_YES) + { + c->nohost = needs_space = true; + continue; + } if ((mask & OMP_CLAUSE_NOTEMPORAL) && gfc_match_omp_variable_list ("nontemporal (", &c->lists[OMP_LIST_NONTEMPORAL], @@ -2607,7 +2615,8 @@ end: omp_mask (OMP_CLAUSE_ASYNC) #define OACC_ROUTINE_CLAUSES \ (omp_mask (OMP_CLAUSE_GANG) | OMP_CLAUSE_WORKER | OMP_CLAUSE_VECTOR \ - | OMP_CLAUSE_SEQ) + | OMP_CLAUSE_SEQ \ + | OMP_CLAUSE_NOHOST) static match @@ -2936,6 +2945,7 @@ gfc_match_oacc_routine (void) gfc_omp_clauses *c = NULL; gfc_oacc_routine_name *n = NULL; oacc_routine_lop lop = OACC_ROUTINE_LOP_NONE; + bool nohost; old_loc = gfc_current_locus; @@ -3012,6 +3022,7 @@ gfc_match_oacc_routine (void) gfc_error ("Multiple loop axes specified for routine at %C"); goto cleanup; } + nohost = c ? c->nohost : false; if (isym != NULL) { @@ -3024,6 +3035,13 @@ gfc_match_oacc_routine (void) " clause"); goto cleanup; } + /* ..., and no 'nohost' clause. */ + if (nohost) + { + gfc_error ("Intrinsic symbol specified in !$ACC ROUTINE ( NAME )" + " at %C marked with incompatible NOHOST clause"); + goto cleanup; + } } else if (sym != NULL) { @@ -3037,7 +3055,9 @@ gfc_match_oacc_routine (void) if (n_p->sym == sym) { add = false; - if (lop != gfc_oacc_routine_lop (n_p->clauses)) + bool nohost_p = n_p->clauses ? n_p->clauses->nohost : false; + if (lop != gfc_oacc_routine_lop (n_p->clauses) + || nohost != nohost_p) { gfc_error ("!$ACC ROUTINE already applied at %C"); goto cleanup; @@ -3047,6 +3067,7 @@ gfc_match_oacc_routine (void) if (add) { sym->attr.oacc_routine_lop = lop; + sym->attr.oacc_routine_nohost = nohost; n = gfc_get_oacc_routine_name (); n->sym = sym; @@ -3061,8 +3082,10 @@ gfc_match_oacc_routine (void) /* For a repeated OpenACC 'routine' directive, diagnose if it doesn't match the first one. */ oacc_routine_lop lop_p = gfc_current_ns->proc_name->attr.oacc_routine_lop; + bool nohost_p = gfc_current_ns->proc_name->attr.oacc_routine_nohost; if (lop_p != OACC_ROUTINE_LOP_NONE - && lop != lop_p) + && (lop != lop_p + || nohost != nohost_p)) { gfc_error ("!$ACC ROUTINE already applied at %C"); goto cleanup; @@ -3073,6 +3096,7 @@ gfc_match_oacc_routine (void) &old_loc)) goto cleanup; gfc_current_ns->proc_name->attr.oacc_routine_lop = lop; + gfc_current_ns->proc_name->attr.oacc_routine_nohost = nohost; } else /* Something has gone wrong, possibly a syntax error. */ diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index a73ce8a..bf8783a 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -1473,6 +1473,14 @@ add_attributes_to_decl (symbol_attribute sym_attr, tree list) tree dims = oacc_build_routine_dims (clauses); list = oacc_replace_fn_attrib_attr (list, dims); } + + if (sym_attr.oacc_routine_nohost) + { + tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_NOHOST); + OMP_CLAUSE_CHAIN (c) = clauses; + clauses = c; + } + if (sym_attr.omp_device_type != OMP_DEVICE_TYPE_UNSET) { tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_DEVICE_TYPE); diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index ace4faf..ac3f5f3 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -4297,6 +4297,8 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, gcc_unreachable (); } } + /* OpenACC 'nohost' clauses cannot appear here. */ + gcc_checking_assert (!clauses->nohost); return nreverse (omp_clauses); } diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 5d43f76..21ff32e 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -10310,6 +10310,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, } break; + case OMP_CLAUSE_NOHOST: default: gcc_unreachable (); } @@ -11247,6 +11248,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, case OMP_CLAUSE_EXCLUSIVE: break; + case OMP_CLAUSE_NOHOST: default: gcc_unreachable (); } diff --git a/gcc/omp-general.c b/gcc/omp-general.c index a1bb9d8..b46a537 100644 --- a/gcc/omp-general.c +++ b/gcc/omp-general.c @@ -2576,6 +2576,7 @@ oacc_verify_routine_clauses (tree fndecl, tree *clauses, location_t loc, const char *routine_str) { tree c_level = NULL_TREE; + tree c_nohost = NULL_TREE; tree c_p = NULL_TREE; for (tree c = *clauses; c; c_p = c, c = OMP_CLAUSE_CHAIN (c)) switch (OMP_CLAUSE_CODE (c)) @@ -2608,6 +2609,10 @@ oacc_verify_routine_clauses (tree fndecl, tree *clauses, location_t loc, c = c_p; } break; + case OMP_CLAUSE_NOHOST: + /* Don't worry about duplicate clauses here. */ + c_nohost = c; + break; default: gcc_unreachable (); } @@ -2642,6 +2647,7 @@ oacc_verify_routine_clauses (tree fndecl, tree *clauses, location_t loc, this one for compatibility. */ /* Collect previous directive's clauses. */ tree c_level_p = NULL_TREE; + tree c_nohost_p = NULL_TREE; for (tree c = TREE_VALUE (attr); c; c = OMP_CLAUSE_CHAIN (c)) switch (OMP_CLAUSE_CODE (c)) { @@ -2652,6 +2658,10 @@ oacc_verify_routine_clauses (tree fndecl, tree *clauses, location_t loc, gcc_checking_assert (c_level_p == NULL_TREE); c_level_p = c; break; + case OMP_CLAUSE_NOHOST: + gcc_checking_assert (c_nohost_p == NULL_TREE); + c_nohost_p = c; + break; default: gcc_unreachable (); } @@ -2667,6 +2677,13 @@ oacc_verify_routine_clauses (tree fndecl, tree *clauses, location_t loc, c_diag_p = c_level_p; goto incompatible; } + /* Matching 'nohost' clauses? */ + if ((c_nohost == NULL_TREE) != (c_nohost_p == NULL_TREE)) + { + c_diag = c_nohost; + c_diag_p = c_nohost_p; + goto incompatible; + } /* Compatible. */ return 1; diff --git a/gcc/omp-low.c b/gcc/omp-low.c index e7049c8..2f735bc 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -1683,6 +1683,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) break; case OMP_CLAUSE__CACHE_: + case OMP_CLAUSE_NOHOST: default: gcc_unreachable (); } @@ -1869,6 +1870,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) break; case OMP_CLAUSE__CACHE_: + case OMP_CLAUSE_NOHOST: default: gcc_unreachable (); } diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c index 0320ea6..bfbb011 100644 --- a/gcc/omp-offload.c +++ b/gcc/omp-offload.c @@ -1981,6 +1981,42 @@ execute_oacc_device_lower () gcc_unreachable (); } + if (is_oacc_routine) + { + tree attr = lookup_attribute ("omp declare target", + DECL_ATTRIBUTES (current_function_decl)); + gcc_checking_assert (attr); + tree clauses = TREE_VALUE (attr); + gcc_checking_assert (clauses); + + /* Should this OpenACC routine be discarded? */ + bool discard = false; + + tree clause_nohost = omp_find_clause (clauses, OMP_CLAUSE_NOHOST); + if (dump_file) + fprintf (dump_file, + "OpenACC routine '%s' %s '%s' clause.\n", + lang_hooks.decl_printable_name (current_function_decl, 2), + clause_nohost ? "has" : "doesn't have", + omp_clause_code_name[OMP_CLAUSE_NOHOST]); + /* Host compiler, 'nohost' clause? */ +#ifndef ACCEL_COMPILER + if (clause_nohost) + discard = true; +#endif + + if (dump_file) + fprintf (dump_file, + "OpenACC routine '%s' %sdiscarded.\n", + lang_hooks.decl_printable_name (current_function_decl, 2), + discard ? "" : "not "); + if (discard) + { + TREE_ASM_WRITTEN (current_function_decl) = 1; + return TODO_discard_function; + } + } + /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1 kernels, so remove the parallelism dimensions function attributes potentially set earlier on. */ diff --git a/gcc/testsuite/c-c++-common/goacc/classify-routine-nohost.c b/gcc/testsuite/c-c++-common/goacc/classify-routine-nohost.c new file mode 100644 index 0000000..a58482f --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/classify-routine-nohost.c @@ -0,0 +1,41 @@ +/* Check offloaded function's attributes and classification for OpenACC + routine with 'nohost' clause. */ + +/* { dg-additional-options "-O2" } + { dg-additional-options "-fopt-info-optimized-omp" } + { dg-additional-options "-fdump-tree-ompexp" } + { dg-additional-options "-fdump-tree-oaccdevlow" } */ + +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + +#define N 1024 + +extern unsigned int *__restrict a; +extern unsigned int *__restrict b; +extern unsigned int *__restrict c; +#pragma acc declare copyin (a, b) create (c) + +#pragma acc routine nohost worker +void ROUTINE () +{ +#pragma acc loop /* { dg-bogus "assigned OpenACC .* loop parallelism" } */ + for (unsigned int i = 0; i < N; i++) + c[i] = a[i] + b[i]; +} + +/* Check the offloaded function's attributes. + { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp declare target \\(nohost worker\\), oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "ompexp" } } */ + +/* Check the offloaded function's classification. + { dg-final { scan-tree-dump-times "(?n)Function is OpenACC routine level 1" 1 "oaccdevlow" } } + { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'ROUTINE' has 'nohost' clause" 1 "oaccdevlow" { target c } } } + { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'void ROUTINE\\(\\)' has 'nohost' clause" 1 "oaccdevlow" { target { c++ && { ! offloading_enabled } } } } } + { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'ROUTINE\\(\\)' has 'nohost' clause" 1 "oaccdevlow" { target { c++ && offloading_enabled } } } } + { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'ROUTINE' discarded" 1 "oaccdevlow" { target c } } } + { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'void ROUTINE\\(\\)' discarded" 1 "oaccdevlow" { target { c++ && { ! offloading_enabled } } } } } + { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'ROUTINE\\(\\)' discarded" 1 "oaccdevlow" { target { c++ && offloading_enabled } } } } + TODO See PR101551 for 'offloading_enabled' differences. + { dg-final { scan-tree-dump-not "(?n)Compute dimensions" "oaccdevlow" } } + { dg-final { scan-tree-dump-not "(?n)__attribute__\\(.*omp declare target \\(nohost" "oaccdevlow" } } + { dg-final { scan-tree-dump-not "(?n)void ROUTINE \\(\\)" "oaccdevlow" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/classify-routine.c b/gcc/testsuite/c-c++-common/goacc/classify-routine.c index 81fe369..cc0ba2b 100644 --- a/gcc/testsuite/c-c++-common/goacc/classify-routine.c +++ b/gcc/testsuite/c-c++-common/goacc/classify-routine.c @@ -30,5 +30,13 @@ void ROUTINE () /* Check the offloaded function's classification and compute dimensions (will always be 1 x 1 x 1 for non-offloading compilation). { dg-final { scan-tree-dump-times "(?n)Function is OpenACC routine level 1" 1 "oaccdevlow" } } + { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'ROUTINE' doesn't have 'nohost' clause" 1 "oaccdevlow" { target c } } } + { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'void ROUTINE\\(\\)' doesn't have 'nohost' clause" 1 "oaccdevlow" { target { c++ && { ! offloading_enabled } } } } } + { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'ROUTINE\\(\\)' doesn't have 'nohost' clause" 1 "oaccdevlow" { target { c++ && offloading_enabled } } } } + { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'ROUTINE' not discarded" 1 "oaccdevlow" { target c } } } + { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'void ROUTINE\\(\\)' not discarded" 1 "oaccdevlow" { target { c++ && { ! offloading_enabled } } } } } + { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'ROUTINE\\(\\)' not discarded" 1 "oaccdevlow" { target { c++ && offloading_enabled } } } } + TODO See PR101551 for 'offloading_enabled' differences. { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } } - { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target \\(worker\\), oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } } */ + { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target \\(worker\\), oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } } + { dg-final { scan-tree-dump-times "(?n)void ROUTINE \\(\\)" 1 "oaccdevlow" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/routine-2.c b/gcc/testsuite/c-c++-common/goacc/routine-2.c index be1510a..3bf33e8 100644 --- a/gcc/testsuite/c-c++-common/goacc/routine-2.c +++ b/gcc/testsuite/c-c++-common/goacc/routine-2.c @@ -1,3 +1,7 @@ /* Test invalid use of the OpenACC 'routine' directive. */ #pragma acc routine (nothing) gang /* { dg-error "not been declared" } */ + + +#pragma acc routine nohost nohost /* { dg-error "too many 'nohost' clauses" } */ +extern void nohost (void); diff --git a/gcc/testsuite/c-c++-common/goacc/routine-nohost-1.c b/gcc/testsuite/c-c++-common/goacc/routine-nohost-1.c new file mode 100644 index 0000000..c892741 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/routine-nohost-1.c @@ -0,0 +1,50 @@ +/* Test OpenACC 'routine' with 'nohost' clause, valid use. */ + +/* { dg-additional-options "-fdump-tree-oaccdevlow" } */ + +#pragma acc routine nohost +int THREE(void) +{ + return 3; +} + +#pragma acc routine (THREE) nohost + +#pragma acc routine nohost +extern int THREE(void); + +/* { dg-final { scan-tree-dump-times {(?n)^OpenACC routine '[^']*THREE[^']*' has 'nohost' clause\.$} 1 oaccdevlow } } */ + + +#pragma acc routine nohost +extern void NOTHING(void); + +#pragma acc routine (NOTHING) nohost + +void NOTHING(void) +{ +} + +#pragma acc routine nohost +extern void NOTHING(void); + +#pragma acc routine (NOTHING) nohost + +/* { dg-final { scan-tree-dump-times {(?n)^OpenACC routine '[^']*NOTHING[^']*' has 'nohost' clause\.$} 1 oaccdevlow } } */ + + +extern float ADD(float, float); + +#pragma acc routine (ADD) nohost + +float ADD(float x, float y) +{ + return x + y; +} + +#pragma acc routine nohost +extern float ADD(float, float); + +#pragma acc routine (ADD) nohost + +/* { dg-final { scan-tree-dump-times {(?n)^OpenACC routine '[^']*ADD[^']*' has 'nohost' clause\.$} 1 oaccdevlow } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/routine-nohost-2.c b/gcc/testsuite/c-c++-common/goacc/routine-nohost-2.c new file mode 100644 index 0000000..d9acb80 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/routine-nohost-2.c @@ -0,0 +1,96 @@ +/* Test OpenACC 'routine' with 'nohost' clause, invalid use. */ + +#pragma acc routine /* { dg-note {\.\.\. without 'nohost' clause near to here} } */ +int THREE_1(void) +{ + return 3; +} + +#pragma acc routine (THREE_1) \ + nohost /* { dg-error {incompatible 'nohost' clause when applying '#pragma acc routine' to '[^']*THREE_1[^']*', which has already been marked with an OpenACC 'routine' directive} } */ + +#pragma acc routine \ + nohost /* { dg-error {incompatible 'nohost' clause when applying '#pragma acc routine' to '[^']*THREE_1[^']*', which has already been marked with an OpenACC 'routine' directive} } */ +extern int THREE_1(void); + + +#pragma acc routine /* { dg-note {\.\.\. without 'nohost' clause near to here} } */ +extern void NOTHING_1(void); + +#pragma acc routine (NOTHING_1) \ + nohost /* { dg-error {incompatible 'nohost' clause when applying '#pragma acc routine' to '[^']*NOTHING_1[^']*', which has already been marked with an OpenACC 'routine' directive} } */ + +void NOTHING_1(void) +{ +} + +#pragma acc routine \ + nohost /* { dg-error {incompatible 'nohost' clause when applying '#pragma acc routine' to '[^']*NOTHING_1[^']*', which has already been marked with an OpenACC 'routine' directive} } */ +extern void NOTHING_1(void); + +#pragma acc routine (NOTHING_1) \ + nohost /* { dg-error {incompatible 'nohost' clause when applying '#pragma acc routine' to '[^']*NOTHING_1[^']*', which has already been marked with an OpenACC 'routine' directive} } */ + + +extern float ADD_1(float, float); + +#pragma acc routine (ADD_1) /* { dg-note {\.\.\. without 'nohost' clause near to here} } */ + +float ADD_1(float x, float y) +{ + return x + y; +} + +#pragma acc routine \ + nohost /* { dg-error {incompatible 'nohost' clause when applying '#pragma acc routine' to '[^']*ADD_1[^']*', which has already been marked with an OpenACC 'routine' directive} } */ +extern float ADD_1(float, float); + +#pragma acc routine (ADD_1) \ + nohost /* { dg-error {incompatible 'nohost' clause when applying '#pragma acc routine' to '[^']*ADD_1[^']*', which has already been marked with an OpenACC 'routine' directive} } */ + + +/* The same again, but with/without nohost reversed. */ + +#pragma acc routine \ + nohost /* { dg-note {\.\.\. with 'nohost' clause here} } */ +int THREE_2(void) +{ + return 3; +} + +#pragma acc routine (THREE_2) /* { dg-error {missing 'nohost' clause when applying '#pragma acc routine' to '[^']*THREE_2[^']*', which has already been marked with an OpenACC 'routine' directive} } */ + +#pragma acc routine /* { dg-error {missing 'nohost' clause when applying '#pragma acc routine' to '[^']*THREE_2[^']*', which has already been marked with an OpenACC 'routine' directive} } */ +extern int THREE_2(void); + + +#pragma acc routine \ + nohost /* { dg-note {\.\.\. with 'nohost' clause here} } */ +extern void NOTHING_2(void); + +#pragma acc routine (NOTHING_2) /* { dg-error {missing 'nohost' clause when applying '#pragma acc routine' to '[^']*NOTHING_2[^']*', which has already been marked with an OpenACC 'routine' directive} } */ + +void NOTHING_2(void) +{ +} + +#pragma acc routine /* { dg-error {missing 'nohost' clause when applying '#pragma acc routine' to '[^']*NOTHING_2[^']*', which has already been marked with an OpenACC 'routine' directive} } */ +extern void NOTHING_2(void); + +#pragma acc routine (NOTHING_2) /* { dg-error {missing 'nohost' clause when applying '#pragma acc routine' to '[^']*NOTHING_2[^']*', which has already been marked with an OpenACC 'routine' directive} } */ + + +extern float ADD_2(float, float); + +#pragma acc routine (ADD_2) \ + nohost /* { dg-note {\.\.\. with 'nohost' clause here} } */ + +float ADD_2(float x, float y) +{ + return x + y; +} + +#pragma acc routine /* { dg-error {missing 'nohost' clause when applying '#pragma acc routine' to '[^']*ADD_2[^']*', which has already been marked with an OpenACC 'routine' directive} } */ +extern float ADD_2(float, float); + +#pragma acc routine (ADD_2) /* { dg-error {missing 'nohost' clause when applying '#pragma acc routine' to '[^']*ADD_2[^']*', which has already been marked with an OpenACC 'routine' directive} } */ diff --git a/gcc/testsuite/g++.dg/goacc/template.C b/gcc/testsuite/g++.dg/goacc/template.C index 51a3f54..f34fcfe 100644 --- a/gcc/testsuite/g++.dg/goacc/template.C +++ b/gcc/testsuite/g++.dg/goacc/template.C @@ -1,4 +1,6 @@ -#pragma acc routine +/* { dg-additional-options "-fdump-tree-oaccdevlow" } */ + +#pragma acc routine nohost template T accDouble(int val) { @@ -153,3 +155,14 @@ main () return b + c; } + +/* { dg-final { scan-tree-dump-times {(?n)^OpenACC routine '[^']+' has 'nohost' clause\.$} 4 oaccdevlow } } + { dg-final { scan-tree-dump-times {(?n)^OpenACC routine 'T accDouble\(int\) \[with T = char\]' has 'nohost' clause\.$} 1 oaccdevlow { target { ! offloading_enabled } } } } + { dg-final { scan-tree-dump-times {(?n)^OpenACC routine 'accDouble\(int\)char' has 'nohost' clause\.$} 1 oaccdevlow { target offloading_enabled } } } + { dg-final { scan-tree-dump-times {(?n)^OpenACC routine 'T accDouble\(int\) \[with T = int\]' has 'nohost' clause\.$} 1 oaccdevlow { target { ! offloading_enabled } } } } + { dg-final { scan-tree-dump-times {(?n)^OpenACC routine 'accDouble\(int\)int' has 'nohost' clause\.$} 1 oaccdevlow { target offloading_enabled } } } + { dg-final { scan-tree-dump-times {(?n)^OpenACC routine 'T accDouble\(int\) \[with T = float\]' has 'nohost' clause\.$} 1 oaccdevlow { target { ! offloading_enabled } } } } + { dg-final { scan-tree-dump-times {(?n)^OpenACC routine 'accDouble\(int\)float' has 'nohost' clause\.$} 1 oaccdevlow { target offloading_enabled } } } + { dg-final { scan-tree-dump-times {(?n)^OpenACC routine 'T accDouble\(int\) \[with T = double\]' has 'nohost' clause\.$} 1 oaccdevlow { target { ! offloading_enabled } } } } + { dg-final { scan-tree-dump-times {(?n)^OpenACC routine 'accDouble\(int\)double' has 'nohost' clause\.$} 1 oaccdevlow { target offloading_enabled } } } + TODO See PR101551 for 'offloading_enabled' differences. */ diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-routine-nohost.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-routine-nohost.f95 new file mode 100644 index 0000000..0e06fb9 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/classify-routine-nohost.f95 @@ -0,0 +1,39 @@ +! Check offloaded function's attributes and classification for OpenACC +! routine with 'nohost' clause. + +! { dg-additional-options "-O2" } +! { dg-additional-options "-fopt-info-optimized-omp" } +! { dg-additional-options "-fdump-tree-ompexp" } +! { dg-additional-options "-fdump-tree-oaccdevlow" } + +! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting +! aspects of that functionality. + +subroutine ROUTINE + !$acc routine nohost worker + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i + + call setup(a, b) + + !$acc loop ! { dg-bogus "assigned OpenACC .* loop parallelism" } + do i = 0, n - 1 + c(i) = a(i) + b(i) + end do +end subroutine ROUTINE + +! Check the offloaded function's attributes. +! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 0, 1 0\\), omp declare target \\(nohost worker\\)\\)\\)" 1 "ompexp" } } + +! Check the offloaded function's classification. +! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC routine level 1" 1 "oaccdevlow" } } +! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'routine' has 'nohost' clause" 1 "oaccdevlow" { target { ! offloading_enabled } } } } +! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'routine_' has 'nohost' clause" 1 "oaccdevlow" { target offloading_enabled } } } +! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'routine' discarded" 1 "oaccdevlow" { target { ! offloading_enabled } } } } +! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'routine_' discarded" 1 "oaccdevlow" { target offloading_enabled } } } +! { dg-final { scan-tree-dump-not "(?n)Compute dimensions" "oaccdevlow" } } +! { dg-final { scan-tree-dump-not "(?n)__attribute__\\(.*omp declare target \\(nohost" "oaccdevlow" } } +! { dg-final { scan-tree-dump-not "(?n)void routine \\(\\)" "oaccdevlow" { target { ! offloading_enabled } } } } +! { dg-final { scan-tree-dump-not "(?n)void routine_ \\(\\)" "oaccdevlow" { target offloading_enabled } } } +!TODO See PR101551 for 'offloading_enabled' differences. diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-routine.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-routine.f95 index 52cc870..92d3243 100644 --- a/gcc/testsuite/gfortran.dg/goacc/classify-routine.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/classify-routine.f95 @@ -29,5 +29,12 @@ end subroutine ROUTINE ! Check the offloaded function's classification and compute dimensions (will ! always be 1 x 1 x 1 for non-offloading compilation). ! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC routine level 1" 1 "oaccdevlow" } } +! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'routine' doesn't have 'nohost' clause" 1 "oaccdevlow" { target { ! offloading_enabled } } } } +! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'routine_' doesn't have 'nohost' clause" 1 "oaccdevlow" { target offloading_enabled } } } +! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'routine' not discarded" 1 "oaccdevlow" { target { ! offloading_enabled } } } } +! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'routine_' not discarded" 1 "oaccdevlow" { target offloading_enabled } } } ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } } ! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target \\(worker\\)\\)\\)" 1 "oaccdevlow" } } +! { dg-final { scan-tree-dump-times "(?n)void routine \\(\\)" 1 "oaccdevlow" { target { ! offloading_enabled } } } } +! { dg-final { scan-tree-dump-times "(?n)void routine_ \\(\\)" 1 "oaccdevlow" { target offloading_enabled } } } +!TODO See PR101551 for 'offloading_enabled' differences. diff --git a/gcc/testsuite/gfortran.dg/goacc/pure-elemental-procedures-2.f90 b/gcc/testsuite/gfortran.dg/goacc/pure-elemental-procedures-2.f90 index 97d92c3..31233b3 100644 --- a/gcc/testsuite/gfortran.dg/goacc/pure-elemental-procedures-2.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/pure-elemental-procedures-2.f90 @@ -2,6 +2,10 @@ pure elemental subroutine foo() !$acc routine vector ! { dg-error "ROUTINE with GANG, WORKER, or VECTOR clause is not permitted in PURE procedure" } end +pure elemental subroutine foo_nh() +!$acc routine nohost vector ! { dg-error "ROUTINE with GANG, WORKER, or VECTOR clause is not permitted in PURE procedure" } +end + elemental subroutine foo2() !$acc routine (myfoo2) gang ! { dg-error "Invalid NAME 'myfoo2' in" } end @@ -10,18 +14,38 @@ elemental subroutine foo2a() !$acc routine gang ! { dg-error "ROUTINE with GANG, WORKER, or VECTOR clause is not permitted in PURE procedure" } end +elemental subroutine foo2a_nh() +!$acc routine nohost gang ! { dg-error "ROUTINE with GANG, WORKER, or VECTOR clause is not permitted in PURE procedure" } +end + pure subroutine foo3() !$acc routine vector ! { dg-error "ROUTINE with GANG, WORKER, or VECTOR clause is not permitted in PURE procedure" } end +pure subroutine foo3_nh() +!$acc routine nohost vector ! { dg-error "ROUTINE with GANG, WORKER, or VECTOR clause is not permitted in PURE procedure" } +end + elemental impure subroutine foo4() !$acc routine vector ! OK: impure end +elemental impure subroutine foo4_nh() +!$acc routine nohost vector ! OK: impure +end + pure subroutine foo5() !$acc routine seq ! OK: seq end +pure subroutine foo5_nh() +!$acc routine nohost seq ! OK: seq +end + pure subroutine foo6() !$acc routine ! OK (implied 'seq') end + +pure subroutine foo6_nh() +!$acc routine nohost ! OK (implied 'seq') +end diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-6.f90 b/gcc/testsuite/gfortran.dg/goacc/routine-6.f90 index f1e2aa3..3cd543e 100644 --- a/gcc/testsuite/gfortran.dg/goacc/routine-6.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/routine-6.f90 @@ -116,3 +116,13 @@ subroutine subr10 (x) x = x * x - 1 end if end subroutine subr10 + +subroutine subr20 (x) + !$acc routine (subr20) nohost nohost ! { dg-error "Failed to match clause" } + integer, intent(inout) :: x + if (x < 1) then + x = 1 + else + x = x * x - 1 + end if +end subroutine subr20 diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-intrinsic-2.f b/gcc/testsuite/gfortran.dg/goacc/routine-intrinsic-2.f index 22524cc..0372e78 100644 --- a/gcc/testsuite/gfortran.dg/goacc/routine-intrinsic-2.f +++ b/gcc/testsuite/gfortran.dg/goacc/routine-intrinsic-2.f @@ -7,6 +7,11 @@ !$ACC ROUTINE (ABORT) GANG ! { dg-error "Intrinsic symbol specified in \\!\\\$ACC ROUTINE \\( NAME \\) at \\(1\\) marked with incompatible GANG, WORKER, or VECTOR clause" } !$ACC ROUTINE (ABORT) VECTOR ! { dg-error "Intrinsic symbol specified in \\!\\\$ACC ROUTINE \\( NAME \\) at \\(1\\) marked with incompatible GANG, WORKER, or VECTOR clause" } +!$ACC ROUTINE (ABORT) NOHOST ! { dg-error "Intrinsic symbol specified in \\!\\\$ACC ROUTINE \\( NAME \\) at \\(1\\) marked with incompatible NOHOST clause" } + +!$ACC ROUTINE (ABORT) WORKER NOHOST ! { dg-error "Intrinsic symbol specified in \\!\\\$ACC ROUTINE \\( NAME \\) at \\(1\\) marked with incompatible GANG, WORKER, or VECTOR clause" } +!$ACC ROUTINE (ABORT) NOHOST GANG ! { dg-error "Intrinsic symbol specified in \\!\\\$ACC ROUTINE \\( NAME \\) at \\(1\\) marked with incompatible GANG, WORKER, or VECTOR clause" } + CALL ABORT END SUBROUTINE sub_1 @@ -16,6 +21,11 @@ !$ACC ROUTINE (ABORT) WORKER ! { dg-error "Intrinsic symbol specified in \\!\\\$ACC ROUTINE \\( NAME \\) at \\(1\\) marked with incompatible GANG, WORKER, or VECTOR clause" } !$ACC ROUTINE (ABORT) GANG ! { dg-error "Intrinsic symbol specified in \\!\\\$ACC ROUTINE \\( NAME \\) at \\(1\\) marked with incompatible GANG, WORKER, or VECTOR clause" } +!$ACC ROUTINE (ABORT) NOHOST ! { dg-error "Intrinsic symbol specified in \\!\\\$ACC ROUTINE \\( NAME \\) at \\(1\\) marked with incompatible NOHOST clause" } + +!$ACC ROUTINE (ABORT) VECTOR NOHOST ! { dg-error "Intrinsic symbol specified in \\!\\\$ACC ROUTINE \\( NAME \\) at \\(1\\) marked with incompatible GANG, WORKER, or VECTOR clause" } +!$ACC ROUTINE (ABORT) NOHOST WORKER ! { dg-error "Intrinsic symbol specified in \\!\\\$ACC ROUTINE \\( NAME \\) at \\(1\\) marked with incompatible GANG, WORKER, or VECTOR clause" } + CONTAINS SUBROUTINE sub_2 CALL ABORT diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-module-1.f90 b/gcc/testsuite/gfortran.dg/goacc/routine-module-1.f90 index 4e81f11..46eec3d 100644 --- a/gcc/testsuite/gfortran.dg/goacc/routine-module-1.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/routine-module-1.f90 @@ -14,34 +14,48 @@ program main !$acc parallel loop seq ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } do i = 1, 10 call s_1 ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + call s_1_nh ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } call s_2 ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + call s_2_nh ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } call g_1 ! { dg-message "optimized: assigned OpenACC gang worker vector loop parallelism" } + call g_1_nh ! { dg-message "optimized: assigned OpenACC gang worker vector loop parallelism" } call w_1 ! { dg-message "optimized: assigned OpenACC worker vector loop parallelism" } + call w_1_nh ! { dg-message "optimized: assigned OpenACC worker vector loop parallelism" } call v_1 ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } + call v_1_nh ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } end do !$acc end parallel loop !$acc parallel loop gang ! { dg-message "optimized: assigned OpenACC gang loop parallelism" } do i = 1, 10 call s_1 ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + call s_1_nh ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } call s_2 ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + call s_2_nh ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } call w_1 ! { dg-message "optimized: assigned OpenACC worker vector loop parallelism" } + call w_1_nh ! { dg-message "optimized: assigned OpenACC worker vector loop parallelism" } call v_1 ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } + call v_1_nh ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } end do !$acc end parallel loop !$acc parallel loop worker ! { dg-message "optimized: assigned OpenACC worker loop parallelism" } do i = 1, 10 call s_1 ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + call s_1_nh ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } call s_2 ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + call s_2_nh ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } call v_1 ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } + call v_1_nh ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } end do !$acc end parallel loop !$acc parallel loop vector ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } do i = 1, 10 call s_1 ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + call s_1_nh ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } call s_2 ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + call s_2_nh ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } end do !$acc end parallel loop end program main diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-module-2.f90 b/gcc/testsuite/gfortran.dg/goacc/routine-module-2.f90 index eae0807..e796c1d 100644 --- a/gcc/testsuite/gfortran.dg/goacc/routine-module-2.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/routine-module-2.f90 @@ -11,21 +11,27 @@ program main !$acc parallel loop gang do i = 1, 10 call g_1 ! { dg-error "routine call uses same OpenACC parallelism as containing loop" } + call g_1_nh ! { dg-error "routine call uses same OpenACC parallelism as containing loop" } end do !$acc end parallel loop !$acc parallel loop worker do i = 1, 10 call g_1 ! { dg-error "routine call uses same OpenACC parallelism as containing loop" } + call g_1_nh ! { dg-error "routine call uses same OpenACC parallelism as containing loop" } call w_1 ! { dg-error "routine call uses same OpenACC parallelism as containing loop" } + call w_1_nh ! { dg-error "routine call uses same OpenACC parallelism as containing loop" } end do !$acc end parallel loop !$acc parallel loop vector do i = 1, 10 call g_1 ! { dg-error "routine call uses same OpenACC parallelism as containing loop" } + call g_1_nh ! { dg-error "routine call uses same OpenACC parallelism as containing loop" } call w_1 ! { dg-error "routine call uses same OpenACC parallelism as containing loop" } + call w_1_nh ! { dg-error "routine call uses same OpenACC parallelism as containing loop" } call v_1 ! { dg-error "routine call uses same OpenACC parallelism as containing loop" } + call v_1_nh ! { dg-error "routine call uses same OpenACC parallelism as containing loop" } end do !$acc end parallel loop end program main diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-module-3.f90 b/gcc/testsuite/gfortran.dg/goacc/routine-module-3.f90 index a4ff549..80fe07a 100644 --- a/gcc/testsuite/gfortran.dg/goacc/routine-module-3.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/routine-module-3.f90 @@ -2,15 +2,54 @@ ! { dg-compile-aux-modules "routine-module-mod-1.f90" } -program main +subroutine sr_1 use routine_module_mod_1 implicit none + !$acc routine (s_1) seq ! { dg-error "Cannot change attributes of USE-associated symbol s_1" } ! { dg-error "NAME 's_1' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (s_1_nh) seq nohost ! { dg-error "Cannot change attributes of USE-associated symbol s_1_nh" } + ! { dg-error "NAME 's_1_nh' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } !$acc routine (s_2) seq ! { dg-error "Cannot change attributes of USE-associated symbol s_2" } ! { dg-error "NAME 's_2' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (s_2_nh) seq nohost ! { dg-error "Cannot change attributes of USE-associated symbol s_2_nh" } + ! { dg-error "NAME 's_2_nh' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } !$acc routine (v_1) seq ! { dg-error "Cannot change attributes of USE-associated symbol v_1" } ! { dg-error "NAME 'v_1' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (v_1_nh) seq nohost ! { dg-error "Cannot change attributes of USE-associated symbol v_1_nh" } + ! { dg-error "NAME 'v_1_nh' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } !$acc routine (w_1) gang ! { dg-error "Cannot change attributes of USE-associated symbol w_1" } ! { dg-error "NAME 'w_1' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } -end program main + !$acc routine (w_1_nh) gang nohost ! { dg-error "Cannot change attributes of USE-associated symbol w_1_nh" } + ! { dg-error "NAME 'w_1_nh' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (g_1) gang ! { dg-error "Cannot change attributes of USE-associated symbol g_1" } + ! { dg-error "NAME 'g_1' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (g_1_nh) gang nohost ! { dg-error "Cannot change attributes of USE-associated symbol g_1_nh" } + ! { dg-error "NAME 'g_1_nh' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } +end subroutine sr_1 + +subroutine sr_2 + use routine_module_mod_1 + implicit none + + !$acc routine (s_1) seq nohost ! { dg-error "Cannot change attributes of USE-associated symbol s_1" } + ! { dg-error "NAME 's_1' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (s_1_nh) seq ! { dg-error "Cannot change attributes of USE-associated symbol s_1_nh" } + ! { dg-error "NAME 's_1_nh' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (s_2) seq nohost ! { dg-error "Cannot change attributes of USE-associated symbol s_2" } + ! { dg-error "NAME 's_2' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (s_2_nh) seq ! { dg-error "Cannot change attributes of USE-associated symbol s_2_nh" } + ! { dg-error "NAME 's_2_nh' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (v_1) vector nohost ! { dg-error "Cannot change attributes of USE-associated symbol v_1" } + ! { dg-error "NAME 'v_1' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (v_1_nh) vector ! { dg-error "Cannot change attributes of USE-associated symbol v_1_nh" } + ! { dg-error "NAME 'v_1_nh' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (w_1) worker nohost ! { dg-error "Cannot change attributes of USE-associated symbol w_1" } + ! { dg-error "NAME 'w_1' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (w_1_nh) worker ! { dg-error "Cannot change attributes of USE-associated symbol w_1_nh" } + ! { dg-error "NAME 'w_1_nh' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (g_1) worker nohost ! { dg-error "Cannot change attributes of USE-associated symbol g_1" } + ! { dg-error "NAME 'g_1' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } + !$acc routine (g_1_nh) worker ! { dg-error "Cannot change attributes of USE-associated symbol g_1_nh" } + ! { dg-error "NAME 'g_1_nh' invalid in \\!\\\$ACC ROUTINE \\( NAME \\)" "" { target *-*-* } .-1 } +end subroutine sr_2 diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-module-mod-1.f90 b/gcc/testsuite/gfortran.dg/goacc/routine-module-mod-1.f90 index 835619c..10e1096 100644 --- a/gcc/testsuite/gfortran.dg/goacc/routine-module-mod-1.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/routine-module-mod-1.f90 @@ -19,6 +19,17 @@ contains end do end subroutine s_1 + subroutine s_1_nh + implicit none + !$acc routine nohost + + integer :: i + + !$acc loop ! { dg-bogus "assigned OpenACC .* loop parallelism" } + do i = 1, 3 + end do + end subroutine s_1_nh + subroutine s_2 implicit none !$acc routine (s_2) seq @@ -31,6 +42,17 @@ contains end do end subroutine s_2 + subroutine s_2_nh + implicit none + !$acc routine (s_2_nh) seq nohost + + integer :: i + + !$acc loop ! { dg-bogus "assigned OpenACC .* loop parallelism" } + do i = 1, 3 + end do + end subroutine s_2_nh + subroutine v_1 implicit none !$acc routine vector @@ -42,6 +64,17 @@ contains end do end subroutine v_1 + subroutine v_1_nh + implicit none + !$acc routine vector nohost + + integer :: i + + !$acc loop ! { dg-bogus "assigned OpenACC .* loop parallelism" } + do i = 1, 3 + end do + end subroutine v_1_nh + subroutine w_1 implicit none !$acc routine (w_1) worker @@ -53,6 +86,17 @@ contains end do end subroutine w_1 + subroutine w_1_nh + implicit none + !$acc routine (w_1_nh) worker nohost + + integer :: i + + !$acc loop ! { dg-bogus "assigned OpenACC .* loop parallelism" } + do i = 1, 3 + end do + end subroutine w_1_nh + subroutine g_1 implicit none !$acc routine gang @@ -65,6 +109,17 @@ contains end do end subroutine g_1 + subroutine g_1_nh + implicit none + !$acc routine gang nohost + + integer :: i + + !$acc loop ! { dg-bogus "assigned OpenACC .* loop parallelism" } + do i = 1, 3 + end do + end subroutine g_1_nh + subroutine pl_1 implicit none @@ -74,10 +129,15 @@ contains ! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 } do i = 1, 3 call s_1 ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + call s_1_nh ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } call s_2 ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + call s_2_nh ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } call v_1 ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } + call v_1_nh ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } call w_1 ! { dg-message "optimized: assigned OpenACC worker vector loop parallelism" } + call w_1_nh ! { dg-message "optimized: assigned OpenACC worker vector loop parallelism" } call g_1 ! { dg-message "optimized: assigned OpenACC gang worker vector loop parallelism" } + call g_1_nh ! { dg-message "optimized: assigned OpenACC gang worker vector loop parallelism" } end do end subroutine pl_1 end module routine_module_mod_1 diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-multiple-directives-1.f90 b/gcc/testsuite/gfortran.dg/goacc/routine-multiple-directives-1.f90 index 622a9d9..44ef453 100644 --- a/gcc/testsuite/gfortran.dg/goacc/routine-multiple-directives-1.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/routine-multiple-directives-1.f90 @@ -1,5 +1,8 @@ ! Check for valid cases of multiple OpenACC 'routine' directives. +! { dg-additional-options "-fdump-tree-oaccdevlow" } +!TODO See PR101551 for 'offloading_enabled' differences. + ! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting ! aspects of that functionality. @@ -8,12 +11,32 @@ !$ACC ROUTINE(s_1) SEQ !$ACC ROUTINE SEQ END SUBROUTINE s_1 + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 's_1' doesn't have 'nohost' clause" 1 "oaccdevlow" { target { ! offloading_enabled } } } } + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 's_1_' doesn't have 'nohost' clause" 1 "oaccdevlow" { target offloading_enabled } } } + + SUBROUTINE s_1_nh +!$ACC ROUTINE(s_1_nh) NOHOST +!$ACC ROUTINE(s_1_nh) SEQ NOHOST +!$ACC ROUTINE NOHOST SEQ + END SUBROUTINE s_1_nh + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 's_1_nh' has 'nohost' clause" 1 "oaccdevlow" { target { ! offloading_enabled } } } } + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 's_1_nh_' has 'nohost' clause" 1 "oaccdevlow" { target offloading_enabled } } } SUBROUTINE s_2 !$ACC ROUTINE !$ACC ROUTINE SEQ !$ACC ROUTINE(s_2) END SUBROUTINE s_2 + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 's_2' doesn't have 'nohost' clause" 1 "oaccdevlow" { target { ! offloading_enabled } } } } + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 's_2_' doesn't have 'nohost' clause" 1 "oaccdevlow" { target offloading_enabled } } } + + SUBROUTINE s_2_nh +!$ACC ROUTINE NOHOST +!$ACC ROUTINE NOHOST SEQ +!$ACC ROUTINE(s_2_nh) NOHOST + END SUBROUTINE s_2_nh + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 's_2_nh' has 'nohost' clause" 1 "oaccdevlow" { target { ! offloading_enabled } } } } + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 's_2_nh_' has 'nohost' clause" 1 "oaccdevlow" { target offloading_enabled } } } SUBROUTINE v_1 !$ACC ROUTINE VECTOR @@ -22,6 +45,18 @@ !$ACC ROUTINE VECTOR ! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-5 } END SUBROUTINE v_1 + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'v_1' doesn't have 'nohost' clause" 1 "oaccdevlow" { target { ! offloading_enabled } } } } + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'v_1_' doesn't have 'nohost' clause" 1 "oaccdevlow" { target offloading_enabled } } } + + SUBROUTINE v_1_nh +!$ACC ROUTINE NOHOST VECTOR +!$ACC ROUTINE VECTOR NOHOST +!$ACC ROUTINE(v_1_nh) NOHOST VECTOR +!$ACC ROUTINE VECTOR NOHOST +! { dg-bogus "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-5 } + END SUBROUTINE v_1_nh + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'v_1_nh' has 'nohost' clause" 1 "oaccdevlow" { target { ! offloading_enabled } } } } + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'v_1_nh_' has 'nohost' clause" 1 "oaccdevlow" { target offloading_enabled } } } SUBROUTINE v_2 !$ACC ROUTINE(v_2) VECTOR @@ -29,6 +64,17 @@ !$ACC ROUTINE(v_2) VECTOR ! { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-4 } END SUBROUTINE v_2 + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'v_2' doesn't have 'nohost' clause" 1 "oaccdevlow" { target { ! offloading_enabled } } } } + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'v_2_' doesn't have 'nohost' clause" 1 "oaccdevlow" { target offloading_enabled } } } + + SUBROUTINE v_2_nh +!$ACC ROUTINE(v_2_nh) VECTOR NOHOST +!$ACC ROUTINE VECTOR NOHOST +!$ACC ROUTINE(v_2_nh) NOHOST VECTOR +! { dg-bogus "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-4 } + END SUBROUTINE v_2_nh + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'v_2_nh' has 'nohost' clause" 1 "oaccdevlow" { target { ! offloading_enabled } } } } + ! { dg-final { scan-tree-dump-times "(?n)OpenACC routine 'v_2_nh_' has 'nohost' clause" 1 "oaccdevlow" { target offloading_enabled } } } SUBROUTINE sub_1 IMPLICIT NONE @@ -36,12 +82,22 @@ !$ACC ROUTINE (g_1) GANG !$ACC ROUTINE (g_1) GANG !$ACC ROUTINE (g_1) GANG + EXTERNAL :: g_1_nh +!$ACC ROUTINE (g_1_nh) GANG NOHOST +!$ACC ROUTINE (g_1_nh) NOHOST GANG +!$ACC ROUTINE (g_1_nh) NOHOST GANG +!$ACC ROUTINE (g_1_nh) GANG NOHOST CALL s_1 + CALL s_1_nh CALL s_2 + CALL s_2_nh CALL v_1 + CALL v_1_nh CALL v_2 + CALL v_2_nh CALL g_1 + CALL g_1_nh CALL ABORT END SUBROUTINE sub_1 @@ -50,14 +106,22 @@ EXTERNAL :: w_1 !$ACC ROUTINE (w_1) WORKER !$ACC ROUTINE (w_1) WORKER + EXTERNAL :: w_1_nh +!$ACC ROUTINE (w_1_nh) NOHOST WORKER +!$ACC ROUTINE (w_1_nh) WORKER NOHOST CONTAINS SUBROUTINE sub_2 CALL s_1 + CALL s_1_nh CALL s_2 + CALL s_2_nh CALL v_1 + CALL v_1_nh CALL v_2 + CALL v_2_nh CALL w_1 + CALL w_1_nh CALL ABORT END SUBROUTINE sub_2 END MODULE m_w_1 diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-multiple-directives-2.f90 b/gcc/testsuite/gfortran.dg/goacc/routine-multiple-directives-2.f90 index 54365ae..f332ed5 100644 --- a/gcc/testsuite/gfortran.dg/goacc/routine-multiple-directives-2.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/routine-multiple-directives-2.f90 @@ -9,8 +9,32 @@ !$ACC ROUTINE !$ACC ROUTINE(s_1) WORKER ! { dg-error "\\!\\\$ACC ROUTINE already applied" } !$ACC ROUTINE GANG VECTOR ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE VECTOR NOHOST WORKER ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE(s_1) NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE NOHOST GANG ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(s_1) SEQ NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(s_1) NOHOST WORKER ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE GANG NOHOST VECTOR ! { dg-error "Multiple loop axes specified for routine" } END SUBROUTINE s_1 + SUBROUTINE s_1_nh +!$ACC ROUTINE NOHOST VECTOR WORKER ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE(s_1_nh) NOHOST +!$ACC ROUTINE NOHOST GANG ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(s_1_nh) NOHOST SEQ +!$ACC ROUTINE NOHOST +!$ACC ROUTINE(s_1_nh) WORKER NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE GANG NOHOST VECTOR ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE VECTOR WORKER ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE(s_1_nh) ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE GANG ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(s_1_nh) SEQ ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(s_1_nh) WORKER ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE GANG VECTOR ! { dg-error "Multiple loop axes specified for routine" } + END SUBROUTINE s_1_nh + SUBROUTINE s_2 !$ACC ROUTINE(s_2) VECTOR WORKER ! { dg-error "Multiple loop axes specified for routine" } !$ACC ROUTINE @@ -19,8 +43,32 @@ !$ACC ROUTINE(s_2) !$ACC ROUTINE WORKER ! { dg-error "\\!\\\$ACC ROUTINE already applied" } !$ACC ROUTINE(s_2) GANG VECTOR ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE(s_2) VECTOR NOHOST WORKER ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(s_2) GANG NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE SEQ NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(s_2) NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE NOHOST WORKER ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(s_2) NOHOST GANG VECTOR ! { dg-error "Multiple loop axes specified for routine" } END SUBROUTINE s_2 + SUBROUTINE s_2_nh +!$ACC ROUTINE(s_2_nh) NOHOST VECTOR WORKER ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE NOHOST +!$ACC ROUTINE(s_2_nh) GANG NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE SEQ NOHOST +!$ACC ROUTINE(s_2_nh) NOHOST +!$ACC ROUTINE NOHOST WORKER ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(s_2_nh) NOHOST GANG VECTOR ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE(s_2_nh) VECTOR WORKER ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(s_2_nh) GANG ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE SEQ ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(s_2_nh) ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE WORKER ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(s_2_nh) GANG VECTOR ! { dg-error "Multiple loop axes specified for routine" } + END SUBROUTINE s_2_nh + SUBROUTINE v_1 !$ACC ROUTINE VECTOR WORKER ! { dg-error "Multiple loop axes specified for routine" } !$ACC ROUTINE VECTOR @@ -30,16 +78,61 @@ !$ACC ROUTINE(v_1) VECTOR !$ACC ROUTINE WORKER ! { dg-error "\\!\\\$ACC ROUTINE already applied" } !$ACC ROUTINE GANG VECTOR ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE NOHOST VECTOR WORKER ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE NOHOST VECTOR ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE GANG NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE NOHOST SEQ ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(v_1) VECTOR NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE WORKER NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE GANG VECTOR NOHOST ! { dg-error "Multiple loop axes specified for routine" } END SUBROUTINE v_1 + SUBROUTINE v_1_nh +!$ACC ROUTINE VECTOR WORKER NOHOST ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE VECTOR NOHOST +!$ACC ROUTINE GANG NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE NOHOST SEQ ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(v_1_nh) VECTOR NOHOST +!$ACC ROUTINE WORKER NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE GANG NOHOST VECTOR ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE VECTOR WORKER ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE VECTOR ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE GANG ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE SEQ ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(v_1_nh) VECTOR ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE WORKER ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE GANG VECTOR ! { dg-error "Multiple loop axes specified for routine" } + END SUBROUTINE v_1_nh + SUBROUTINE v_2 !$ACC ROUTINE(v_2) VECTOR !$ACC ROUTINE(v_2) VECTOR WORKER ! { dg-error "Multiple loop axes specified for routine" } !$ACC ROUTINE(v_2) ! { dg-error "\\!\\\$ACC ROUTINE already applied" } !$ACC ROUTINE VECTOR !$ACC ROUTINE(v_2) GANG VECTOR ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE(v_2) VECTOR NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(v_2) VECTOR NOHOST WORKER ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE(v_2) NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE VECTOR NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(v_2) NOHOST GANG VECTOR ! { dg-error "Multiple loop axes specified for routine" } END SUBROUTINE v_2 + SUBROUTINE v_2_nh +!$ACC ROUTINE(v_2_nh) VECTOR NOHOST +!$ACC ROUTINE(v_2_nh) VECTOR WORKER NOHOST ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE(v_2_nh) NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE VECTOR NOHOST +!$ACC ROUTINE(v_2_nh) GANG NOHOST VECTOR ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE(v_2_nh) VECTOR ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(v_2_nh) VECTOR WORKER ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE(v_2_nh) ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE VECTOR ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE(v_2_nh) GANG VECTOR ! { dg-error "Multiple loop axes specified for routine" } + END SUBROUTINE v_2_nh + SUBROUTINE sub_1 IMPLICIT NONE EXTERNAL :: g_1 @@ -50,12 +143,39 @@ !$ACC ROUTINE (g_1) ! { dg-error "\\!\\\$ACC ROUTINE already applied" } !$ACC ROUTINE (g_1) GANG !$ACC ROUTINE (g_1) ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1) NOHOST GANG ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1) GANG WORKER NOHOST ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE (g_1) NOHOST VECTOR ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1) NOHOST SEQ ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1) NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1) GANG NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1) NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } + EXTERNAL :: g_1_nh +!$ACC ROUTINE (g_1_nh) NOHOST GANG +!$ACC ROUTINE (g_1_nh) GANG NOHOST WORKER ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE (g_1_nh) NOHOST VECTOR ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1_nh) SEQ NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1_nh) NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1_nh) GANG NOHOST +!$ACC ROUTINE (g_1_nh) NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1_nh) GANG ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1_nh) GANG WORKER ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE (g_1_nh) VECTOR ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1_nh) SEQ ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1_nh) ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1_nh) GANG ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (g_1_nh) ! { dg-error "\\!\\\$ACC ROUTINE already applied" } CALL s_1 + CALL s_1_nh CALL s_2 + CALL s_2_nh CALL v_1 + CALL v_1_nh CALL v_2 + CALL v_2_nh CALL g_1 + CALL g_1_nh CALL ABORT END SUBROUTINE sub_1 @@ -69,14 +189,41 @@ !$ACC ROUTINE (w_1) SEQ ! { dg-error "\\!\\\$ACC ROUTINE already applied" } !$ACC ROUTINE (w_1) ! { dg-error "\\!\\\$ACC ROUTINE already applied" } !$ACC ROUTINE (w_1) VECTOR ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1) WORKER NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1) WORKER NOHOST SEQ ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE (w_1) NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1) NOHOST WORKER ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1) SEQ NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1) NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1) VECTOR NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } + EXTERNAL :: w_1_nh +!$ACC ROUTINE (w_1_nh) WORKER NOHOST +!$ACC ROUTINE (w_1_nh) WORKER NOHOST SEQ ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE (w_1_nh) NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1_nh) NOHOST WORKER +!$ACC ROUTINE (w_1_nh) NOHOST SEQ ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1_nh) NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1_nh) VECTOR NOHOST ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1_nh) WORKER ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1_nh) WORKER SEQ ! { dg-error "Multiple loop axes specified for routine" } +!$ACC ROUTINE (w_1_nh) ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1_nh) WORKER ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1_nh) SEQ ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1_nh) ! { dg-error "\\!\\\$ACC ROUTINE already applied" } +!$ACC ROUTINE (w_1_nh) VECTOR ! { dg-error "\\!\\\$ACC ROUTINE already applied" } CONTAINS SUBROUTINE sub_2 CALL s_1 + CALL s_1_nh CALL s_2 + CALL s_2_nh CALL v_1 + CALL v_1_nh CALL v_2 + CALL v_2_nh CALL w_1 + CALL w_1_nh CALL ABORT END SUBROUTINE sub_2 END MODULE m_w_1 diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 9391609..bfab988 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -508,7 +508,10 @@ enum omp_clause_code { OMP_CLAUSE_IF_PRESENT, /* OpenACC clause: finalize. */ - OMP_CLAUSE_FINALIZE + OMP_CLAUSE_FINALIZE, + + /* OpenACC clause: nohost. */ + OMP_CLAUSE_NOHOST, }; #undef DEFTREESTRUCT diff --git a/gcc/tree-nested.c b/gcc/tree-nested.c index 9edd922..0c3fb02 100644 --- a/gcc/tree-nested.c +++ b/gcc/tree-nested.c @@ -1510,6 +1510,9 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE__REDUCTEMP_: case OMP_CLAUSE__SIMDUID_: case OMP_CLAUSE__SIMT_: + /* The following clauses are only allowed on OpenACC 'routine' + directives, not seen here. */ + case OMP_CLAUSE_NOHOST: /* Anything else. */ default: gcc_unreachable (); @@ -2291,6 +2294,9 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE__REDUCTEMP_: case OMP_CLAUSE__SIMDUID_: case OMP_CLAUSE__SIMT_: + /* The following clauses are only allowed on OpenACC 'routine' + directives, not seen here. */ + case OMP_CLAUSE_NOHOST: /* Anything else. */ default: gcc_unreachable (); diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index fde07df..7201bd7 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -1303,6 +1303,9 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags) case OMP_CLAUSE_FINALIZE: pp_string (pp, "finalize"); break; + case OMP_CLAUSE_NOHOST: + pp_string (pp, "nohost"); + break; case OMP_CLAUSE_DETACH: pp_string (pp, "detach("); dump_generic_node (pp, OMP_CLAUSE_DECL (clause), spc, flags, diff --git a/gcc/tree.c b/gcc/tree.c index bead1ac..c621f87 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -361,6 +361,7 @@ unsigned const char omp_clause_num_ops[] = 3, /* OMP_CLAUSE_TILE */ 0, /* OMP_CLAUSE_IF_PRESENT */ 0, /* OMP_CLAUSE_FINALIZE */ + 0, /* OMP_CLAUSE_NOHOST */ }; const char * const omp_clause_code_name[] = @@ -448,6 +449,7 @@ const char * const omp_clause_code_name[] = "tile", "if_present", "finalize", + "nohost", }; @@ -11165,6 +11167,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, case OMP_CLAUSE__SIMT_: case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_FINALIZE: + case OMP_CLAUSE_NOHOST: WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); case OMP_CLAUSE_LASTPRIVATE: -- cgit v1.1 From 87bd75cd49aac68e90bd9b6b5e14582d6e0ccafa Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 21 Jul 2021 19:16:08 -0400 Subject: analyzer: fix ICE in binding_cluster::purge_state_involving [PR101522] gcc/analyzer/ChangeLog: PR analyzer/101522 * store.cc (binding_cluster::purge_state_involving): Don't change m_map whilst iterating through it. gcc/testsuite/ChangeLog: PR analyzer/101522 * g++.dg/analyzer/pr101522.C: New test. Signed-off-by: David Malcolm --- gcc/analyzer/store.cc | 14 +++++++++----- gcc/testsuite/g++.dg/analyzer/pr101522.C | 31 +++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/g++.dg/analyzer/pr101522.C (limited to 'gcc') diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc index 0042a20..8ee414d 100644 --- a/gcc/analyzer/store.cc +++ b/gcc/analyzer/store.cc @@ -1323,6 +1323,7 @@ binding_cluster::purge_state_involving (const svalue *sval, region_model_manager *sval_mgr) { auto_vec to_remove; + auto_vec > to_make_unknown; for (auto iter : m_map) { const binding_key *iter_key = iter.first; @@ -1335,17 +1336,20 @@ binding_cluster::purge_state_involving (const svalue *sval, } const svalue *iter_sval = iter.second; if (iter_sval->involves_p (sval)) - { - const svalue *new_sval - = sval_mgr->get_or_create_unknown_svalue (iter_sval->get_type ()); - m_map.put (iter_key, new_sval); - } + to_make_unknown.safe_push (std::make_pair(iter_key, + iter_sval->get_type ())); } for (auto iter : to_remove) { m_map.remove (iter); m_touched = true; } + for (auto iter : to_make_unknown) + { + const svalue *new_sval + = sval_mgr->get_or_create_unknown_svalue (iter.second); + m_map.put (iter.first, new_sval); + } } /* Get any SVAL bound to REG within this cluster via kind KIND, diff --git a/gcc/testsuite/g++.dg/analyzer/pr101522.C b/gcc/testsuite/g++.dg/analyzer/pr101522.C new file mode 100644 index 0000000..634a2ac --- /dev/null +++ b/gcc/testsuite/g++.dg/analyzer/pr101522.C @@ -0,0 +1,31 @@ +// { dg-do compile { target c++11 } } + +double +sqrt (); + +namespace std { + class gamma_distribution { + public: + gamma_distribution () : _M_param () {} + + private: + struct param_type { + param_type () : _M_beta () { _M_a2 = 1 / ::sqrt (); } + double _M_beta, _M_a2; + }; + param_type _M_param; + int _M_saved_available, _M_saved = 0, _M_param0 = 0; + }; + + struct fisher_f_distribution { + gamma_distribution _M_gd_x, _M_gd_y; + }; +} + +int +main () +{ + std::fisher_f_distribution d; + + return 0; +} -- cgit v1.1 From 893b12cc12877aca1c9df6272123b26eddf12722 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 21 Jul 2021 19:19:31 -0400 Subject: analyzer: bulletproof -Wanalyzer-file-leak [PR101547] gcc/analyzer/ChangeLog: PR analyzer/101547 * sm-file.cc (file_leak::emit): Handle m_arg being NULL. (file_leak::describe_final_event): Handle ev.m_expr being NULL. gcc/testsuite/ChangeLog: PR analyzer/101547 * gcc.dg/analyzer/pr101547.c: New test. Signed-off-by: David Malcolm --- gcc/analyzer/sm-file.cc | 27 +++++++++++++++++++++------ gcc/testsuite/gcc.dg/analyzer/pr101547.c | 11 +++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr101547.c (limited to 'gcc') diff --git a/gcc/analyzer/sm-file.cc b/gcc/analyzer/sm-file.cc index b40a9a1..6a17019 100644 --- a/gcc/analyzer/sm-file.cc +++ b/gcc/analyzer/sm-file.cc @@ -193,9 +193,13 @@ public: /* CWE-775: "Missing Release of File Descriptor or Handle after Effective Lifetime". */ m.add_cwe (775); - return warning_meta (rich_loc, m, OPT_Wanalyzer_file_leak, - "leak of FILE %qE", - m_arg); + if (m_arg) + return warning_meta (rich_loc, m, OPT_Wanalyzer_file_leak, + "leak of FILE %qE", + m_arg); + else + return warning_meta (rich_loc, m, OPT_Wanalyzer_file_leak, + "leak of FILE"); } label_text describe_state_change (const evdesc::state_change &change) @@ -212,10 +216,21 @@ public: label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE { if (m_fopen_event.known_p ()) - return ev.formatted_print ("%qE leaks here; was opened at %@", - ev.m_expr, &m_fopen_event); + { + if (ev.m_expr) + return ev.formatted_print ("%qE leaks here; was opened at %@", + ev.m_expr, &m_fopen_event); + else + return ev.formatted_print ("leaks here; was opened at %@", + &m_fopen_event); + } else - return ev.formatted_print ("%qE leaks here", ev.m_expr); + { + if (ev.m_expr) + return ev.formatted_print ("%qE leaks here", ev.m_expr); + else + return ev.formatted_print ("leaks here"); + } } private: diff --git a/gcc/testsuite/gcc.dg/analyzer/pr101547.c b/gcc/testsuite/gcc.dg/analyzer/pr101547.c new file mode 100644 index 0000000..8791cff --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pr101547.c @@ -0,0 +1,11 @@ +char * +fopen (const char *restrict, const char *restrict); + +void +k2 (void) +{ + char *setfiles[1]; + int i; + + setfiles[i] = fopen ("", ""); /* { dg-warning "use of uninitialized value 'i'" } */ +} /* { dg-warning "leak of FILE" } */ -- cgit v1.1 From 419c6c68e60adc8801b44dab72ebcd680cfe1d97 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 22 Jul 2021 00:16:46 +0000 Subject: Daily bump. --- gcc/ChangeLog | 222 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 55 ++++++++++++ gcc/c-family/ChangeLog | 6 ++ gcc/c/ChangeLog | 9 ++ gcc/cp/ChangeLog | 16 ++++ gcc/fortran/ChangeLog | 31 +++++++ gcc/testsuite/ChangeLog | 87 +++++++++++++++++++ 8 files changed, 427 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5755379..d450c6b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,225 @@ +2021-07-21 Thomas Schwinge + Joseph Myers + Cesar Philippidis + + * tree-core.h (omp_clause_code): Add 'OMP_CLAUSE_NOHOST'. + * tree.c (omp_clause_num_ops, omp_clause_code_name, walk_tree_1): + Handle it. + * tree-pretty-print.c (dump_omp_clause): Likewise. + * omp-general.c (oacc_verify_routine_clauses): Likewise. + * gimplify.c (gimplify_scan_omp_clauses) + (gimplify_adjust_omp_clauses): Likewise. + * tree-nested.c (convert_nonlocal_omp_clauses) + (convert_local_omp_clauses): Likewise. + * omp-low.c (scan_sharing_clauses): Likewise. + * omp-offload.c (execute_oacc_device_lower): Update. + +2021-07-21 Martin Sebor + + * tree-ssa-alias.c (walk_aliased_vdefs_1): Fix typos in a comment. + +2021-07-21 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (write_init_bif_table): + Implement. + +2021-07-21 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (write_fntype): New + callback function. + (write_fntype_init): New stub function. + (write_init_bif_table): Likewise. + (write_init_ovld_table): New function. + (write_init_file): Implement. + +2021-07-21 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c + (write_autogenerated_header): New function. + (write_decls): Likewise. + (write_extern_fntype): New callback function. + (write_header_file): Implement. + +2021-07-21 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (write_defines_file): + Implement. + +2021-07-21 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (complete_vector_type): New + function. + (complete_base_type): Likewise. + (construct_fntype_id): Likewise. + (parse_bif_entry): Call contruct_fntype_id. + (parse_ovld_entry): Likewise. + +2021-07-21 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (ovld_stanza): New struct. + (MAXOVLDSTANZAS): New macro. + (ovld_stanzas): New variable. + (curr_ovld_stanza): Likewise. + (MAXOVLDS): New macro. + (ovlddata): New struct. + (ovlds): New variable. + (curr_ovld): Likewise. + (max_ovld_args): Likewise. + (parse_ovld_entry): New function. + (parse_ovld_stanza): Likewise. + (parse_ovld): Implement. + +2021-07-21 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (parse_bif_attrs): + Implement. + +2021-07-21 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (parse_args): New function. + (parse_prototype): Implement. + +2021-07-21 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (bif_stanza): New enum. + (curr_bif_stanza): New variable. + (stanza_entry): New struct. + (stanza_map): New initialized variable. + (enable_string): Likewise. + (fnkinds): New enum. + (typelist): New struct. + (attrinfo): Likewise. + (MAXRESTROPNDS): New macro. + (prototype): New struct. + (MAXBIFS): New macro. + (bifdata): New struct. + (bifs): New variable. + (curr_bif): Likewise. + (bif_order): Likewise. + (bif_index): Likewise. + (fatal): New function. + (stanza_name_to_stanza): Likewise. + (parse_bif_attrs): New stub function. + (parse_prototype): Likewise. + (parse_bif_entry): New function. + (parse_bif_stanza): Likewise. + (parse_bif): Implement. + (set_bif_order): New function. + (create_bif_order): Implement. + +2021-07-21 Bill Schmidt + + * config/rs6000/rs6000-gen-builtins.c (rbtree.h): New #include. + (num_bifs): New variable. + (num_ovld_stanzas): Likewise. + (num_ovlds): Likewise. + (parse_codes): New enum. + (bif_rbt): New variable. + (ovld_rbt): Likewise. + (fntype_rbt): Likewise. + (bifo_rbt): Likewise. + (parse_bif): New stub function. + (create_bif_order): Likewise. + (parse_ovld): Likewise. + (write_header_file): Likewise. + (write_init_file): Likewise. + (write_defines_file): Likewise. + (delete_output_files): New function. + (main): Likewise. + +2021-07-21 H.J. Lu + + PR target/101549 + * config/i386/i386-builtin.def: Remove OPTION_MASK_ISA_SSE4_2 + from CRC32 _builtin functions. + +2021-07-21 Sebastian Huber + + * coverage.c (build_gcov_info_var_registration): Mark the object placed + in the linker set as referenced so that it does not get optimized away. + +2021-07-21 Kito Cheng + + Revert: + 2021-07-20 Kito Cheng + + * config.gcc (riscv*-*-*): Detect which python is available. + +2021-07-21 Jakub Jelinek + + PR middle-end/101535 + * gimplify.c (omp_check_private): Properly skip ORT_TARGET_DATA + contexts in which decl isn't privatized and for ORT_TARGET return + false if decl is mapped. + +2021-07-21 Richard Sandiford + + * gimple-loop-jam.c: Include tree-ssa-sccvn.h. + (tree_loop_unroll_and_jam): Run value-numbering on a loop that + has been successfully unrolled. + +2021-07-21 Richard Sandiford + + * tree-ssa-loop-manip.c (determine_exit_conditions): Return a null + exit condition if no tail loop is needed, and if the original exit + condition should therefore be kept as-is. + (tree_transform_and_unroll_loop): Handle that case here too. + +2021-07-21 Kewen Lin + + * tree-data-ref.c (free_dependence_relations): Adjust to pass vec + by reference. + (free_data_refs): Likewise. + * tree-data-ref.h (free_dependence_relations): Likewise. + (free_data_refs): Likewise. + * tree-predcom.c (struct chain): Use auto_vec instead of vec for + members. + (struct component): Likewise. + (pcom_worker::pcom_worker): Adjust for auto_vec and renaming changes. + (pcom_worker::~pcom_worker): Likewise. + (pcom_worker::release_chain): Adjust as auto_vec changes. + (pcom_worker::loop): Rename to ... + (pcom_worker::m_loop): ... this. + (pcom_worker::datarefs): Rename to ... + (pcom_worker::m_datarefs): ... this. Use auto_vec instead of vec. + (pcom_worker::dependences): Rename to ... + (pcom_worker::m_dependences): ... this. Use auto_vec instead of vec. + (pcom_worker::chains): Rename to ... + (pcom_worker::m_chains): ... this. Use auto_vec instead of vec. + (pcom_worker::looparound_phis): Rename to ... + (pcom_worker::m_looparound_phis): ... this. Use auto_vec instead of + vec. + (pcom_worker::cache): Rename to ... + (pcom_worker::m_cache): ... this. Use auto_vec instead of vec. + (pcom_worker::release_chain): Adjust for auto_vec changes. + (pcom_worker::release_chains): Adjust for auto_vec and renaming + changes. + (release_component): Remove. + (release_components): Adjust for release_component removal. + (component_of): Adjust to use vec. + (merge_comps): Likewise. + (pcom_worker::aff_combination_dr_offset): Adjust for renaming changes. + (pcom_worker::determine_offset): Likewise. + (class comp_ptrs): Remove. + (pcom_worker::split_data_refs_to_components): Adjust for renaming + changes, for comp_ptrs removal with auto_vec. + (pcom_worker::suitable_component_p): Adjust for renaming changes. + (pcom_worker::filter_suitable_components): Adjust for release_component + removal. + (pcom_worker::valid_initializer_p): Adjust for renaming changes. + (pcom_worker::find_looparound_phi): Likewise. + (pcom_worker::add_looparound_copies): Likewise. + (pcom_worker::determine_roots_comp): Likewise. + (pcom_worker::single_nonlooparound_use): Likewise. + (pcom_worker::execute_pred_commoning_chain): Likewise. + (pcom_worker::execute_pred_commoning): Likewise. + (pcom_worker::try_combine_chains): Likewise. + (pcom_worker::prepare_initializers_chain): Likewise. + (pcom_worker::prepare_initializers): Likewise. + (pcom_worker::prepare_finalizers_chain): Likewise. + (pcom_worker::prepare_finalizers): Likewise. + (pcom_worker::tree_predictive_commoning_loop): Likewise. + 2021-07-20 Martin Sebor PR middle-end/101397 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 4bc9d90..9f48762 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210721 +20210722 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index f32fe08..272bf15 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,58 @@ +2021-07-21 David Malcolm + + PR analyzer/101547 + * sm-file.cc (file_leak::emit): Handle m_arg being NULL. + (file_leak::describe_final_event): Handle ev.m_expr being NULL. + +2021-07-21 David Malcolm + + PR analyzer/101522 + * store.cc (binding_cluster::purge_state_involving): Don't change + m_map whilst iterating through it. + +2021-07-21 David Malcolm + + * region-model.cc (region_model::handle_phi): Add "old_state" + param and use it. + (region_model::update_for_phis): Update so that all of the phi + stmts are effectively handled simultaneously, rather than in + order. + * region-model.h (region_model::handle_phi): Add "old_state" + param. + * state-purge.cc (self_referential_phi_p): Replace with... + (name_used_by_phis_p): ...this new function. + (state_purge_per_ssa_name::process_point): Update to use the + above, so that all phi stmts at a basic block are effectively + considered simultaneously, and only consider the phi arguments for + the pertinent in-edge. + * supergraph.cc (cfg_superedge::get_phi_arg_idx): New. + (cfg_superedge::get_phi_arg): Use the above. + * supergraph.h (cfg_superedge::get_phi_arg_idx): New decl. + +2021-07-21 David Malcolm + + * state-purge.cc (state_purge_annotator::add_node_annotations): + Rather than erroneously always using the NULL in-edge, determine + each relevant in-edge, and print the appropriate data for each + in-edge. Use print_needed to print the data as comma-separated + lists of SSA names. + (print_vec_of_names): Add "within_table" param and use it. + (state_purge_annotator::add_stmt_annotations): Factor out + collation and printing code into... + (state_purge_annotator::print_needed): ...this new function. + * state-purge.h (state_purge_annotator::print_needed): New decl. + +2021-07-21 David Malcolm + + * program-point.cc (function_point::print): Show src BB index at + BEFORE_SUPERNODE. + +2021-07-21 David Malcolm + + * svalue.cc (infix_p): New. + (binop_svalue::dump_to_pp): Use it to print MIN_EXPR and MAX_EXPR + in prefix form, rather than infix. + 2021-07-19 David Malcolm PR analyzer/101503 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 87d658a..55f18d9 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,9 @@ +2021-07-21 Thomas Schwinge + Joseph Myers + Cesar Philippidis + + * c-pragma.h (pragma_omp_clause): Add 'PRAGMA_OACC_CLAUSE_NOHOST'. + 2021-07-20 Martin Sebor * c-common.c (c_build_shufflevector): Adjust by-value argument to diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index 9cd1dd6..367c9c7 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,12 @@ +2021-07-21 Thomas Schwinge + Joseph Myers + Cesar Philippidis + + * c-parser.c (c_parser_omp_clause_name): Handle 'nohost'. + (c_parser_oacc_all_clauses): Handle 'PRAGMA_OACC_CLAUSE_NOHOST'. + (OACC_ROUTINE_CLAUSE_MASK): Add 'PRAGMA_OACC_CLAUSE_NOHOST'. + * c-typeck.c (c_finish_omp_clauses): Handle 'OMP_CLAUSE_NOHOST'. + 2021-07-20 Martin Sebor * c-tree.h (c_build_function_call_vec): Adjust by-value argument to diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index bb46293..37ea7f5 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,19 @@ +2021-07-21 Thomas Schwinge + Joseph Myers + Cesar Philippidis + + * parser.c (cp_parser_omp_clause_name): Handle 'nohost'. + (cp_parser_oacc_all_clauses): Handle 'PRAGMA_OACC_CLAUSE_NOHOST'. + (OACC_ROUTINE_CLAUSE_MASK): Add 'PRAGMA_OACC_CLAUSE_NOHOST'. + * pt.c (tsubst_omp_clauses): Handle 'OMP_CLAUSE_NOHOST'. + * semantics.c (finish_omp_clauses): Likewise. + +2021-07-21 Jakub Jelinek + + PR c++/101516 + * semantics.c (finish_omp_reduction_clause): Also call + complete_type_or_else and return true if it fails. + 2021-07-19 Iain Sandoe PR c++/95520 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index f1fe435..1c6aa03 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,34 @@ +2021-07-21 Thomas Schwinge + Joseph Myers + Cesar Philippidis + + * dump-parse-tree.c (show_attr): Update. + * gfortran.h (symbol_attribute): Add 'oacc_routine_nohost' member. + (gfc_omp_clauses): Add 'nohost' member. + * module.c (ab_attribute): Add 'AB_OACC_ROUTINE_NOHOST'. + (attr_bits, mio_symbol_attribute): Update. + * openmp.c (omp_mask2): Add 'OMP_CLAUSE_NOHOST'. + (gfc_match_omp_clauses): Handle 'OMP_CLAUSE_NOHOST'. + (OACC_ROUTINE_CLAUSES): Add 'OMP_CLAUSE_NOHOST'. + (gfc_match_oacc_routine): Update. + * trans-decl.c (add_attributes_to_decl): Update. + * trans-openmp.c (gfc_trans_omp_clauses): Likewise. + +2021-07-21 Harald Anlauf + + PR fortran/101514 + * target-memory.c (gfc_interpret_derived): Size of array component + of derived type can only be computed here for explicit shape. + * trans-types.c (gfc_get_nodesc_array_type): Do not dereference + NULL pointers. + +2021-07-21 Tobias Burnus + + * decl.c (gfc_verify_c_interop_param): Update for F2008 + F2018 + changes; reject unsupported bits with 'Error: Sorry,'. + * trans-expr.c (gfc_conv_procedure_call): Fix condition to + For using CFI descriptor with characters. + 2021-07-18 Harald Anlauf PR fortran/101084 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2a1f24b..141e948 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,90 @@ +2021-07-21 David Malcolm + + PR analyzer/101547 + * gcc.dg/analyzer/pr101547.c: New test. + +2021-07-21 David Malcolm + + PR analyzer/101522 + * g++.dg/analyzer/pr101522.C: New test. + +2021-07-21 Thomas Schwinge + Joseph Myers + Cesar Philippidis + + * c-c++-common/goacc/classify-routine-nohost.c: New file. + * c-c++-common/goacc/classify-routine.c: Update. + * c-c++-common/goacc/routine-2.c: Likewise. + * c-c++-common/goacc/routine-nohost-1.c: New file. + * c-c++-common/goacc/routine-nohost-2.c: Likewise. + * g++.dg/goacc/template.C: Update. + * gfortran.dg/goacc/classify-routine-nohost.f95: New file. + * gfortran.dg/goacc/classify-routine.f95: Update. + * gfortran.dg/goacc/pure-elemental-procedures-2.f90: Likewise. + * gfortran.dg/goacc/routine-6.f90: Likewise. + * gfortran.dg/goacc/routine-intrinsic-2.f: Likewise. + * gfortran.dg/goacc/routine-module-1.f90: Likewise. + * gfortran.dg/goacc/routine-module-2.f90: Likewise. + * gfortran.dg/goacc/routine-module-3.f90: Likewise. + * gfortran.dg/goacc/routine-module-mod-1.f90: Likewise. + * gfortran.dg/goacc/routine-multiple-directives-1.f90: Likewise. + * gfortran.dg/goacc/routine-multiple-directives-2.f90: Likewise. + +2021-07-21 Thomas Schwinge + + * g++.dg/goacc/template.C: Fix '#pragma atomic update' typo. + +2021-07-21 David Malcolm + + * gcc.dg/analyzer/explode-2.c: Remove xfail. + * gcc.dg/analyzer/explode-2a.c: Remove expected leak warning on + while stmt. + * gcc.dg/analyzer/phi-2.c: New test. + +2021-07-21 Bill Schmidt + + PR target/101531 + * gcc.target/powerpc/pr101129.c: Adjust. + +2021-07-21 H.J. Lu + + PR target/101549 + * gcc.target/i386/crc32-6.c: New test. + +2021-07-21 Harald Anlauf + + PR fortran/101514 + * gfortran.dg/pr101514.f90: New test. + +2021-07-21 Jakub Jelinek + + PR middle-end/101535 + * c-c++-common/gomp/pr101535-1.c: New test. + * c-c++-common/gomp/pr101535-2.c: New test. + +2021-07-21 Jakub Jelinek + + PR c++/101516 + * g++.dg/gomp/pr101516.C: New test. + +2021-07-21 Tobias Burnus + + * gfortran.dg/iso_c_binding_char_1.f90: Update dg-error. + * gfortran.dg/pr32599.f03: Use -std=-f2003 + update comment. + * gfortran.dg/bind_c_char_10.f90: New test. + * gfortran.dg/bind_c_char_6.f90: New test. + * gfortran.dg/bind_c_char_7.f90: New test. + * gfortran.dg/bind_c_char_8.f90: New test. + * gfortran.dg/bind_c_char_9.f90: New test. + +2021-07-21 Richard Sandiford + + * gcc.dg/unroll-10.c: New test. + +2021-07-21 Richard Sandiford + + * gcc.dg/unroll-9.c: New test/ + 2021-07-20 Martin Sebor PR middle-end/101397 -- cgit v1.1 From a56c251898ea70b46798d7893a871bcfe318529b Mon Sep 17 00:00:00 2001 From: liuhongt Date: Tue, 20 Jul 2021 18:32:35 +0800 Subject: Support logic shift left/right for avx512 mask type. gcc/ChangeLog: * config/i386/constraints.md (Wb): New constraint. (Ww): Ditto. * config/i386/i386.md (*ashlhi3_1): Extend to avx512 mask shift. (*ashlqi3_1): Ditto. (*3_1): Split to .. (*ashr3_1): this, ... (*lshr3_1): and this, also extend this pattern to avx512 mask registers. (*3_1): Split to .. (*ashr3_1): this, ... (*lshrqi3_1): and this, also extend this pattern to avx512 mask registers. (*lshrhi3_1): And this, also extend this pattern to avx512 mask registers. * config/i386/sse.md (k): New define_split after it to convert generic shift pattern to mask shift ones. gcc/testsuite/ChangeLog: * gcc.target/i386/mask-shift.c: New test. --- gcc/config/i386/constraints.md | 10 ++ gcc/config/i386/i386.md | 162 ++++++++++++++++++++++++----- gcc/config/i386/sse.md | 14 +++ gcc/testsuite/gcc.target/i386/mask-shift.c | 83 +++++++++++++++ 4 files changed, 245 insertions(+), 24 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/mask-shift.c (limited to 'gcc') diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 485e3f5..4aa28a5 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -222,6 +222,16 @@ (match_operand 0 "vector_all_ones_operand")))) ;; Integer constant constraints. +(define_constraint "Wb" + "Integer constant in the range 0 @dots{} 7, for 8-bit shifts." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 7)"))) + +(define_constraint "Ww" + "Integer constant in the range 0 @dots{} 15, for 16-bit shifts." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 15)"))) + (define_constraint "I" "Integer constant in the range 0 @dots{} 31, for 32-bit shifts." (and (match_code "const_int") diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 8b809c4..44ae18e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1136,6 +1136,7 @@ ;; Immediate operand constraint for shifts. (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")]) +(define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")]) ;; Print register name in the specified mode. (define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")]) @@ -11088,9 +11089,9 @@ (set_attr "mode" "")]) (define_insn "*ashl3_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r") - (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm") - (match_operand:QI 2 "nonmemory_operand" "c,M,r"))) + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k") + (match_operand:QI 2 "nonmemory_operand" "c,M,r,"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, mode, operands)" { @@ -11098,6 +11099,7 @@ { case TYPE_LEA: case TYPE_ISHIFTX: + case TYPE_MSKLOG: return "#"; case TYPE_ALU: @@ -11113,7 +11115,7 @@ return "sal{}\t{%2, %0|%0, %2}"; } } - [(set_attr "isa" "*,*,bmi2") + [(set_attr "isa" "*,*,bmi2,avx512bw") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") @@ -11123,6 +11125,8 @@ (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) (const_string "alu") + (eq_attr "alternative" "3") + (const_string "msklog") ] (const_string "ishift"))) (set (attr "length_immediate") @@ -11218,15 +11222,16 @@ "operands[2] = gen_lowpart (SImode, operands[2]);") (define_insn "*ashlhi3_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp") - (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l") - (match_operand:QI 2 "nonmemory_operand" "cI,M"))) + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k") + (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, HImode, operands)" { switch (get_attr_type (insn)) { case TYPE_LEA: + case TYPE_MSKLOG: return "#"; case TYPE_ALU: @@ -11241,9 +11246,12 @@ return "sal{w}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "*,*,avx512f") + (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") + (eq_attr "alternative" "2") + (const_string "msklog") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) @@ -11259,18 +11267,19 @@ (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) - (set_attr "mode" "HI,SI")]) + (set_attr "mode" "HI,SI,HI")]) (define_insn "*ashlqi3_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp") - (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l") - (match_operand:QI 2 "nonmemory_operand" "cI,cI,M"))) + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k") + (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, QImode, operands)" { switch (get_attr_type (insn)) { case TYPE_LEA: + case TYPE_MSKLOG: return "#"; case TYPE_ALU: @@ -11298,9 +11307,12 @@ } } } - [(set (attr "type") + [(set_attr "isa" "*,*,*,avx512dq") + (set (attr "type") (cond [(eq_attr "alternative" "2") (const_string "lea") + (eq_attr "alternative" "3") + (const_string "msklog") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) @@ -11316,7 +11328,7 @@ (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) - (set_attr "mode" "QI,SI,SI") + (set_attr "mode" "QI,SI,SI,QI") ;; Potential partial reg stall on alternative 1. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "1") @@ -11818,13 +11830,13 @@ [(set_attr "type" "ishiftx") (set_attr "mode" "")]) -(define_insn "*3_1" +(define_insn "*ashr3_1" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") - (any_shiftrt:SWI48 + (ashiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm") (match_operand:QI 2 "nonmemory_operand" "c,r"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (, mode, operands)" + "ix86_binary_operator_ok (ASHIFTRT, mode, operands)" { switch (get_attr_type (insn)) { @@ -11834,9 +11846,9 @@ default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "{}\t%0"; + return "sar{}\t%0"; else - return "{}\t{%2, %0|%0, %2}"; + return "sar{}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,bmi2") @@ -11850,6 +11862,40 @@ (const_string "*"))) (set_attr "mode" "")]) +(define_insn "*lshr3_1" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k") + (lshiftrt:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k") + (match_operand:QI 2 "nonmemory_operand" "c,r,"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, mode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ISHIFTX: + case TYPE_MSKLOG: + return "#"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "shr{}\t%0"; + else + return "shr{}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "isa" "*,bmi2,avx512bw") + (set_attr "type" "ishift,ishiftx,msklog") + (set (attr "length_immediate") + (if_then_else + (and (and (match_operand 2 "const1_operand") + (eq_attr "alternative" "0")) + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "")]) + ;; Convert shift to the shiftx pattern to avoid flags dependency. (define_split [(set (match_operand:SWI48 0 "register_operand") @@ -11915,19 +11961,19 @@ (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))] "operands[2] = gen_lowpart (SImode, operands[2]);") -(define_insn "*3_1" +(define_insn "*ashr3_1" [(set (match_operand:SWI12 0 "nonimmediate_operand" "=m") - (any_shiftrt:SWI12 + (ashiftrt:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0") (match_operand:QI 2 "nonmemory_operand" "c"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (, mode, operands)" + "ix86_binary_operator_ok (ASHIFTRT, mode, operands)" { if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "{}\t%0"; + return "sar{}\t%0"; else - return "{}\t{%2, %0|%0, %2}"; + return "sar{}\t{%2, %0|%0, %2}"; } [(set_attr "type" "ishift") (set (attr "length_immediate") @@ -11939,6 +11985,74 @@ (const_string "*"))) (set_attr "mode" "")]) +(define_insn "*lshrqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k") + (lshiftrt:QI + (match_operand:QI 1 "nonimmediate_operand" "0, k") + (match_operand:QI 2 "nonmemory_operand" "cI,Wb"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ISHIFT: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "shr{b}\t%0"; + else + return "shr{b}\t{%2, %0|%0, %2}"; + case TYPE_MSKLOG: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "*,avx512dq") + (set_attr "type" "ishift,msklog") + (set (attr "length_immediate") + (if_then_else + (and (and (match_operand 2 "const1_operand") + (eq_attr "alternative" "0")) + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +(define_insn "*lshrhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k") + (lshiftrt:HI + (match_operand:HI 1 "nonimmediate_operand" "0, k") + (match_operand:QI 2 "nonmemory_operand" "cI, Ww"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ISHIFT: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "shr{w}\t%0"; + else + return "shr{w}\t{%2, %0|%0, %2}"; + case TYPE_MSKLOG: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "*, avx512f") + (set_attr "type" "ishift,msklog") + (set (attr "length_immediate") + (if_then_else + (and (and (match_operand 2 "const1_operand") + (eq_attr "alternative" "0")) + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "HI")]) + (define_insn "*3_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+")) (any_shiftrt:SWI12 (match_operand:SWI12 1 "register_operand" "0") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ab29999..f8759e4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1755,6 +1755,20 @@ (set_attr "prefix" "vex") (set_attr "mode" "")]) +(define_split + [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand") + (any_lshift:SWI1248_AVX512BW + (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand") + (match_operand 2 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512F && reload_completed" + [(parallel + [(set (match_dup 0) + (any_lshift:SWI1248_AVX512BW + (match_dup 1) + (match_dup 2))) + (unspec [(const_int 0)] UNSPEC_MASKOP)])]) + (define_insn "ktest" [(set (reg:CC FLAGS_REG) (unspec:CC diff --git a/gcc/testsuite/gcc.target/i386/mask-shift.c b/gcc/testsuite/gcc.target/i386/mask-shift.c new file mode 100644 index 0000000..4cb6ef3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/mask-shift.c @@ -0,0 +1,83 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mavx512dq -O2" } */ + +#include +void +fooq (__m512i a, __m512i b, void* p) +{ + __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b); + m1 >>= 4; + _mm512_mask_storeu_epi64 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftrb} "1" } } */ + +void +food (__m512i a, __m512i b, void* p) +{ + __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b); + m1 >>= 8; + _mm512_mask_storeu_epi32 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftrw} "1" } } */ + +void +foow (__m512i a, __m512i b, void* p) +{ + __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b); + m1 >>= 16; + _mm512_mask_storeu_epi16 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftrd} "1" } } */ + +void +foob (__m512i a, __m512i b, void* p) +{ + __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b); + m1 >>= 32; + _mm512_mask_storeu_epi8 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftrq} "1" { target { ! ia32 } } } } */ + +void +fooq1 (__m512i a, __m512i b, void* p) +{ + __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b); + m1 <<= 4; + _mm512_mask_storeu_epi64 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftlb} "1" } } */ + +void +food1 (__m512i a, __m512i b, void* p) +{ + __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b); + m1 <<= 8; + _mm512_mask_storeu_epi32 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftlw} "1" } } */ + +void +foow1 (__m512i a, __m512i b, void* p) +{ + __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b); + m1 <<= 16; + _mm512_mask_storeu_epi16 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftld} "1" } } */ + +void +foob1 (__m512i a, __m512i b, void* p) +{ + __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b); + m1 <<= 32; + _mm512_mask_storeu_epi8 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftlq} "1" { target { ! ia32 } } } } */ -- cgit v1.1 From a6291d88d5b6c17d41950e21d7d452f7f0f73020 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Tue, 13 Jul 2021 18:22:03 +0800 Subject: Remove pass_cpb which is related to enable avx512 embedded broadcast from constant pool. By optimizing vector movement to broadcast in ix86_expand_vector_move during pass_expand, pass_reload/LRA can automatically generate an avx512 embedded broadcast, pass_cpb is not needed. Considering that in the absence of avx512f, broadcast from memory is still slightly faster than loading the entire memory, so always enable broadcast. benchmark: https://gitlab.com/x86-benchmarks/microbenchmark/-/tree/vaddps/broadcast The performance diff strategy : cycles memory : 1046611188 memory : 1255420817 memory : 1044720793 memory : 1253414145 average : 1097868397 broadcast : 1044430688 broadcast : 1044477630 broadcast : 1253554603 broadcast : 1044561934 average : 1096756213 But however broadcast has larger size. the size diff size broadcast.o text data bss dec hex filename 137 0 0 137 89 broadcast.o size memory.o text data bss dec hex filename 115 0 0 115 73 memory.o gcc/ChangeLog: * config/i386/i386-expand.c (ix86_broadcast_from_integer_constant): Rename to .. (ix86_broadcast_from_constant): .. this, and extend it to handle float mode. (ix86_expand_vector_move): Extend to float mode. * config/i386/i386-features.c (replace_constant_pool_with_broadcast): Remove. (remove_partial_avx_dependency_gate): Ditto. (constant_pool_broadcast): Ditto. (class pass_constant_pool_broadcast): Ditto. (make_pass_constant_pool_broadcast): Ditto. (remove_partial_avx_dependency): Adjust gate. * config/i386/i386-passes.def: Remove pass_constant_pool_broadcast. * config/i386/i386-protos.h (make_pass_constant_pool_broadcast): Remove. gcc/testsuite/ChangeLog: * gcc.target/i386/fuse-caller-save-xmm.c: Adjust testcase. --- gcc/config/i386/i386-expand.c | 36 +++-- gcc/config/i386/i386-features.c | 157 +-------------------- gcc/config/i386/i386-passes.def | 1 - gcc/config/i386/i386-protos.h | 1 - .../gcc.target/i386/fuse-caller-save-xmm.c | 2 +- 5 files changed, 34 insertions(+), 163 deletions(-) (limited to 'gcc') diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 69ea79e..896bd68 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -453,8 +453,10 @@ ix86_expand_move (machine_mode mode, rtx operands[]) emit_insn (gen_rtx_SET (op0, op1)); } +/* OP is a memref of CONST_VECTOR, return scalar constant mem + if CONST_VECTOR is a vec_duplicate, else return NULL. */ static rtx -ix86_broadcast_from_integer_constant (machine_mode mode, rtx op) +ix86_broadcast_from_constant (machine_mode mode, rtx op) { int nunits = GET_MODE_NUNITS (mode); if (nunits < 2) @@ -462,7 +464,8 @@ ix86_broadcast_from_integer_constant (machine_mode mode, rtx op) /* Don't use integer vector broadcast if we can't move from GPR to SSE register directly. */ - if (!TARGET_INTER_UNIT_MOVES_TO_VEC) + if (!TARGET_INTER_UNIT_MOVES_TO_VEC + && INTEGRAL_MODE_P (mode)) return nullptr; /* Convert CONST_VECTOR to a non-standard SSE constant integer @@ -470,12 +473,17 @@ ix86_broadcast_from_integer_constant (machine_mode mode, rtx op) if (!(TARGET_AVX2 || (TARGET_AVX && (GET_MODE_INNER (mode) == SImode - || GET_MODE_INNER (mode) == DImode))) + || GET_MODE_INNER (mode) == DImode)) + || FLOAT_MODE_P (mode)) || standard_sse_constant_p (op, mode)) return nullptr; - /* Don't broadcast from a 64-bit integer constant in 32-bit mode. */ - if (GET_MODE_INNER (mode) == DImode && !TARGET_64BIT) + /* Don't broadcast from a 64-bit integer constant in 32-bit mode. + We can still put 64-bit integer constant in memory when + avx512 embed broadcast is available. */ + if (GET_MODE_INNER (mode) == DImode && !TARGET_64BIT + && (!TARGET_AVX512F + || (GET_MODE_SIZE (mode) < 64 && !TARGET_AVX512VL))) return nullptr; if (GET_MODE_INNER (mode) == TImode) @@ -561,17 +569,29 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) if (can_create_pseudo_p () && GET_MODE_SIZE (mode) >= 16 - && GET_MODE_CLASS (mode) == MODE_VECTOR_INT + && VECTOR_MODE_P (mode) && (MEM_P (op1) && SYMBOL_REF_P (XEXP (op1, 0)) && CONSTANT_POOL_ADDRESS_P (XEXP (op1, 0)))) { - rtx first = ix86_broadcast_from_integer_constant (mode, op1); + rtx first = ix86_broadcast_from_constant (mode, op1); if (first != nullptr) { /* Broadcast to XMM/YMM/ZMM register from an integer - constant. */ + constant or scalar mem. */ + /* Hard registers are used for 2 purposes: + 1. Prevent stack realignment when the original code + doesn't use vector registers, which is the same for + memcpy and memset. + 2. Prevent combine to convert constant broadcast to + load from constant pool. */ op1 = ix86_gen_scratch_sse_rtx (mode); + if (FLOAT_MODE_P (mode) + || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode)) + { + first = force_const_mem (GET_MODE_INNER (mode), first); + op1 = gen_reg_rtx (mode); + } bool ok = ix86_expand_vector_init_duplicate (false, mode, op1, first); gcc_assert (ok); diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index cbd430a..d9c6652 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -2136,81 +2136,6 @@ make_pass_insert_endbr_and_patchable_area (gcc::context *ctxt) return new pass_insert_endbr_and_patchable_area (ctxt); } -/* Replace all one-value const vector that are referenced by SYMBOL_REFs in x - with embedded broadcast. i.e.transform - - vpaddq .LC0(%rip), %zmm0, %zmm0 - ret - .LC0: - .quad 3 - .quad 3 - .quad 3 - .quad 3 - .quad 3 - .quad 3 - .quad 3 - .quad 3 - - to - - vpaddq .LC0(%rip){1to8}, %zmm0, %zmm0 - ret - .LC0: - .quad 3 */ -static void -replace_constant_pool_with_broadcast (rtx_insn *insn) -{ - subrtx_ptr_iterator::array_type array; - FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), ALL) - { - rtx *loc = *iter; - rtx x = *loc; - rtx broadcast_mem, vec_dup, constant, first; - machine_mode mode; - - /* Constant pool. */ - if (!MEM_P (x) - || !SYMBOL_REF_P (XEXP (x, 0)) - || !CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))) - continue; - - /* Const vector. */ - mode = GET_MODE (x); - if (!VECTOR_MODE_P (mode)) - return; - constant = get_pool_constant (XEXP (x, 0)); - if (GET_CODE (constant) != CONST_VECTOR) - return; - - /* There could be some rtx like - (mem/u/c:V16QI (symbol_ref/u:DI ("*.LC1"))) - but with "*.LC1" refer to V2DI constant vector. */ - if (GET_MODE (constant) != mode) - { - constant = simplify_subreg (mode, constant, GET_MODE (constant), 0); - if (constant == NULL_RTX || GET_CODE (constant) != CONST_VECTOR) - return; - } - first = XVECEXP (constant, 0, 0); - - for (int i = 1; i < GET_MODE_NUNITS (mode); ++i) - { - rtx tmp = XVECEXP (constant, 0, i); - /* Vector duplicate value. */ - if (!rtx_equal_p (tmp, first)) - return; - } - - /* Replace with embedded broadcast. */ - broadcast_mem = force_const_mem (GET_MODE_INNER (mode), first); - vec_dup = gen_rtx_VEC_DUPLICATE (mode, broadcast_mem); - validate_change (insn, loc, vec_dup, 0); - - /* At most 1 memory_operand in an insn. */ - return; - } -} - /* At entry of the nearest common dominator for basic blocks with conversions, generate a single vxorps %xmmN, %xmmN, %xmmN @@ -2249,10 +2174,6 @@ remove_partial_avx_dependency (void) if (!NONDEBUG_INSN_P (insn)) continue; - /* Handle AVX512 embedded broadcast here to save compile time. */ - if (TARGET_AVX512F) - replace_constant_pool_with_broadcast (insn); - set = single_set (insn); if (!set) continue; @@ -2384,16 +2305,6 @@ remove_partial_avx_dependency (void) return 0; } -static bool -remove_partial_avx_dependency_gate () -{ - return (TARGET_AVX - && TARGET_SSE_PARTIAL_REG_DEPENDENCY - && TARGET_SSE_MATH - && optimize - && optimize_function_for_speed_p (cfun)); -} - namespace { const pass_data pass_data_remove_partial_avx_dependency = @@ -2419,7 +2330,11 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return remove_partial_avx_dependency_gate (); + return (TARGET_AVX + && TARGET_SSE_PARTIAL_REG_DEPENDENCY + && TARGET_SSE_MATH + && optimize + && optimize_function_for_speed_p (cfun)); } virtual unsigned int execute (function *) @@ -2436,68 +2351,6 @@ make_pass_remove_partial_avx_dependency (gcc::context *ctxt) return new pass_remove_partial_avx_dependency (ctxt); } -/* For const vector having one duplicated value, there's no need to put - whole vector in the constant pool when target supports embedded broadcast. */ -static unsigned int -constant_pool_broadcast (void) -{ - timevar_push (TV_MACH_DEP); - rtx_insn *insn; - - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - { - if (INSN_P (insn)) - replace_constant_pool_with_broadcast (insn); - } - timevar_pop (TV_MACH_DEP); - return 0; -} - -namespace { - -const pass_data pass_data_constant_pool_broadcast = -{ - RTL_PASS, /* type */ - "cpb", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - TV_MACH_DEP, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_df_finish, /* todo_flags_finish */ -}; - -class pass_constant_pool_broadcast : public rtl_opt_pass -{ -public: - pass_constant_pool_broadcast (gcc::context *ctxt) - : rtl_opt_pass (pass_data_constant_pool_broadcast, ctxt) - {} - - /* opt_pass methods: */ - virtual bool gate (function *) - { - /* Return false if rpad pass gate is true. - replace_constant_pool_with_broadcast is called - from both this pass and rpad pass. */ - return (TARGET_AVX512F && !remove_partial_avx_dependency_gate ()); - } - - virtual unsigned int execute (function *) - { - return constant_pool_broadcast (); - } -}; // class pass_cpb - -} // anon namespace - -rtl_opt_pass * -make_pass_constant_pool_broadcast (gcc::context *ctxt) -{ - return new pass_constant_pool_broadcast (ctxt); -} - /* This compares the priority of target features in function DECL1 and DECL2. It returns positive value if DECL1 is higher priority, negative value if DECL2 is higher priority and 0 if they are the diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def index 44df00e..29baf8a 100644 --- a/gcc/config/i386/i386-passes.def +++ b/gcc/config/i386/i386-passes.def @@ -33,4 +33,3 @@ along with GCC; see the file COPYING3. If not see INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_endbr_and_patchable_area); INSERT_PASS_AFTER (pass_combine, 1, pass_remove_partial_avx_dependency); - INSERT_PASS_AFTER (pass_combine, 1, pass_constant_pool_broadcast); diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 51376fc..07ac02a 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -395,4 +395,3 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area (gcc::context *); extern rtl_opt_pass *make_pass_remove_partial_avx_dependency (gcc::context *); -extern rtl_opt_pass *make_pass_constant_pool_broadcast (gcc::context *); diff --git a/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c b/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c index 4deff93..b0d3dc3 100644 --- a/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c +++ b/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c @@ -6,7 +6,7 @@ typedef double v2df __attribute__((vector_size (16))); static v2df __attribute__((noinline)) bar (v2df a) { - return a + (v2df){ 3.0, 3.0 }; + return a + (v2df){ 3.0, 4.0 }; } v2df __attribute__((noinline)) -- cgit v1.1 From e58093276a6e319c2a6d9f02e343fbf8400dab60 Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Mon, 19 Jul 2021 14:02:57 -0400 Subject: Only call vrp_visit_cond_stmt if range_of_stmt doesn't resolve to a const. Eevntually all functionality will be subsumed. Until then, call it only if needed. gcc/ PR tree-optimization/101496 * vr-values.c (simplify_using_ranges::fold_cond): Call range_of_stmt first, then vrp_visit_cond_Stmt. gcc/testsuite * gcc.dg/pr101496.c: New. --- gcc/testsuite/gcc.dg/pr101496.c | 22 ++++++++++++++++++++++ gcc/vr-values.c | 30 +++++++++++++++++------------- 2 files changed, 39 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr101496.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/pr101496.c b/gcc/testsuite/gcc.dg/pr101496.c new file mode 100644 index 0000000..091d4ad --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr101496.c @@ -0,0 +1,22 @@ +/* PR tree-optimization/101496 */ +/* { dg-do compile } */ +/* { dg-options "-O2 " } */ + +int c_1, li_2, us_3, func_14_s_5; + +void func_14() { + { + unsigned uli_8 = 0; + lbl1806324B: + if (uli_8 /= us_3 |= func_14_s_5 < 0 | func_14_s_5 != c_1) { + uli_8 += c_1 >= us_3; + if (uli_8) + ; + else + li_2 &&func_14_s_5 <= c_1 ?: 0; + unsigned *ptr_9 = &uli_8; + } + } + goto lbl1806324B; +} + diff --git a/gcc/vr-values.c b/gcc/vr-values.c index 1b3ec38..c999ca8 100644 --- a/gcc/vr-values.c +++ b/gcc/vr-values.c @@ -3460,11 +3460,6 @@ range_fits_type_p (const value_range *vr, bool simplify_using_ranges::fold_cond (gcond *cond) { - /* ?? vrp_folder::fold_predicate_in() is a superset of this. At - some point we should merge all variants of this code. */ - edge taken_edge; - vrp_visit_cond_stmt (cond, &taken_edge); - int_range_max r; if (query->range_of_stmt (r, cond) && r.singleton_p ()) { @@ -3475,17 +3470,13 @@ simplify_using_ranges::fold_cond (gcond *cond) if (r.zero_p ()) { - gcc_checking_assert (!taken_edge - || taken_edge->flags & EDGE_FALSE_VALUE); - if (dump_file && (dump_flags & TDF_DETAILS) && !taken_edge) + if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "\nPredicate evaluates to: 0\n"); gimple_cond_make_false (cond); } else { - gcc_checking_assert (!taken_edge - || taken_edge->flags & EDGE_TRUE_VALUE); - if (dump_file && (dump_flags & TDF_DETAILS) && !taken_edge) + if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "\nPredicate evaluates to: 1\n"); gimple_cond_make_true (cond); } @@ -3493,12 +3484,25 @@ simplify_using_ranges::fold_cond (gcond *cond) return true; } + /* ?? vrp_folder::fold_predicate_in() is a superset of this. At + some point we should merge all variants of this code. */ + edge taken_edge; + vrp_visit_cond_stmt (cond, &taken_edge); + if (taken_edge) { if (taken_edge->flags & EDGE_TRUE_VALUE) - gimple_cond_make_true (cond); + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nVRP Predicate evaluates to: 1\n"); + gimple_cond_make_true (cond); + } else if (taken_edge->flags & EDGE_FALSE_VALUE) - gimple_cond_make_false (cond); + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nVRP Predicate evaluates to: 0\n"); + gimple_cond_make_false (cond); + } else gcc_unreachable (); update_stmt (cond); -- cgit v1.1 From ea789238b2c24eedf70b56257235adf3d33c5a0a Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Mon, 19 Jul 2021 15:16:25 -0400 Subject: Check for undefined on COND_EXPR before querying type. gcc/ PR tree-optimization/101497 * gimple-range-fold.cc (fold_using_range::range_of_cond_expr): Check for undefined. gcc/testsuite * gcc.dg/pr101497.c: New. --- gcc/gimple-range-fold.cc | 3 ++- gcc/testsuite/gcc.dg/pr101497.c | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/pr101497.c (limited to 'gcc') diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc index f8578c0..f95af3d 100644 --- a/gcc/gimple-range-fold.cc +++ b/gcc/gimple-range-fold.cc @@ -1111,7 +1111,8 @@ fold_using_range::range_of_cond_expr (irange &r, gassign *s, fur_source &src) r = range1; r.union_ (range2); } - gcc_checking_assert (range_compatible_p (r.type (), type)); + gcc_checking_assert (r.undefined_p () + || range_compatible_p (r.type (), type)); return true; } diff --git a/gcc/testsuite/gcc.dg/pr101497.c b/gcc/testsuite/gcc.dg/pr101497.c new file mode 100644 index 0000000..fcfe059 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr101497.c @@ -0,0 +1,13 @@ +/* PR tree-optimization/101497 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -Wno-div-by-zero" } */ + +char uc_1; +int i_4, func_12_uli_6; +void func_12() { + int *ptr_8 = &func_12_uli_6; + *ptr_8 = 0 >= 211 - uc_1 <= 0; + i_4 %= 0; + i_4 *= *ptr_8; +} + -- cgit v1.1 From d3fa77472b78c5ddada03a1052b229bea11cb76f Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Mon, 19 Jul 2021 15:54:57 -0400 Subject: Allow non-symmetrical equivalences. Don't trap if equivalences are processed out of DOM order, and aren't completely symmetrical. We will eventually resolve this, but its OK for now. gcc/ PR tree-optimization/101511 * value-relation.cc (relation_oracle::query_relation): Check if ssa1 is in ssa2's equiv set, and don't trap if so. gcc/testsuite/ * g++.dg/pr101511.C: New. --- gcc/testsuite/g++.dg/pr101511.C | 22 ++++++++++++++++++++++ gcc/value-relation.cc | 8 ++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/pr101511.C (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/pr101511.C b/gcc/testsuite/g++.dg/pr101511.C new file mode 100644 index 0000000..ee2c7fd --- /dev/null +++ b/gcc/testsuite/g++.dg/pr101511.C @@ -0,0 +1,22 @@ +// { dg-do compile } +// { dg-options "-O2 -Wno-div-by-zero" } + +void __assert_fail(const char *, const char *, int, const char *) + __attribute__((__noreturn__)); +template void test_uint() { + long __trans_tmp_3, __trans_tmp_1; + int Error; + for (;;) { + { + unsigned long Tmp = -1; + __trans_tmp_3 = Tmp - Tmp % 0; + } + Error += 0 == __trans_tmp_3 ? 0 : 1; + !Error ? void() : __assert_fail("", "", 3, __PRETTY_FUNCTION__); + T Tmp = -1; + __trans_tmp_1 = Tmp - Tmp % 0; + Error += 0 == __trans_tmp_1 ? 0 : 1; + !Error ? void() : __assert_fail("", "", 7, __PRETTY_FUNCTION__); + } +} +void test() { test_uint(); } diff --git a/gcc/value-relation.cc b/gcc/value-relation.cc index 43fcab7..bcfe388 100644 --- a/gcc/value-relation.cc +++ b/gcc/value-relation.cc @@ -873,11 +873,15 @@ relation_oracle::query_relation (basic_block bb, tree ssa1, tree ssa2) if (kind != VREL_NONE) return kind; - // If one is not found, see if there is a relationship between equivalences. // If v2 isn't in v1s equiv set, then v1 shouldn't be in v2's set either. + // It is possible for out-of-order dominator processing to have an out of + // sync set of equivalences.. Down the road, when we do full updates, + // change this to an assert to ensure everything is in sync. const_bitmap equiv2 = equiv_set (ssa2, bb); - gcc_checking_assert (!equiv2 || !bitmap_bit_p (equiv2, v1)); + if (equiv2 && bitmap_bit_p (equiv2, v1)) + return EQ_EXPR; + // If not equal, see if there is a relationship between equivalences. if (!equiv1 && !equiv2) kind = VREL_NONE; else if (!equiv1) -- cgit v1.1 From 4048d8a08621820dd6cc6035e13de3c3c82af4a5 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Thu, 22 Jul 2021 16:03:53 +0200 Subject: Allow non-null adjustments for pointers even when there is a known range. Fix non_null_ref::adjust_range so it always adjust ranges, not just varying ranges. This will allow pointers that have a range, but are not necessarily non-null, to be adjusted. gcc/ChangeLog: * gimple-range-cache.cc (non_null_ref::adjust_range): Replace varying_p check for null/non-null check. --- gcc/gimple-range-cache.cc | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc index 23597ad..265a64b 100644 --- a/gcc/gimple-range-cache.cc +++ b/gcc/gimple-range-cache.cc @@ -89,12 +89,17 @@ bool non_null_ref::adjust_range (irange &r, tree name, basic_block bb, bool search_dom) { - // Check if pointers have any non-null dereferences. Non-call - // exceptions mean we could throw in the middle of the block, so just - // punt for now on those. - if (!cfun->can_throw_non_call_exceptions - && r.varying_p () - && non_null_deref_p (name, bb, search_dom)) + // Non-call exceptions mean we could throw in the middle of the + // block, so just punt on those for now. + if (cfun->can_throw_non_call_exceptions) + return false; + + // We only care about the null / non-null property of pointers. + if (!POINTER_TYPE_P (TREE_TYPE (name)) || r.zero_p () || r.nonzero_p ()) + return false; + + // Check if pointers have any non-null dereferences. + if (non_null_deref_p (name, bb, search_dom)) { int_range<2> nz; nz.set_nonzero (TREE_TYPE (name)); -- cgit v1.1 From 8819419ba1d397c0444d89079ec16657a09914fb Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Tue, 20 Jul 2021 11:25:43 -0700 Subject: Fix PR 10153: tail recusion for vector types. The problem here is we try to an initialized value from a scalar constant. For vectors we need to do a vect_dup instead. This fixes that issue by using build_{one,zero}_cst instead of integer_{one,zero}_node when calling create_tailcall_accumulator. Changes from v1: * v2: Use build_{one,zero}_cst and get the correct type before. OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. gcc/ChangeLog: PR tree-optimization/10153 * tree-tailcall.c (create_tailcall_accumulator): Don't call fold_convert as the type should be correct already. (tree_optimize_tail_calls_1): Use build_{one,zero}_cst instead of integer_{one,zero}_node for the call of create_tailcall_accumulator. gcc/testsuite/ChangeLog: PR tree-optimization/10153 * gcc.c-torture/compile/pr10153-1.c: New test. * gcc.c-torture/compile/pr10153-2.c: New test. --- gcc/testsuite/gcc.c-torture/compile/pr10153-1.c | 7 +++++++ gcc/testsuite/gcc.c-torture/compile/pr10153-2.c | 9 +++++++++ gcc/tree-tailcall.c | 10 ++++++---- 3 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr10153-1.c create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr10153-2.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.c-torture/compile/pr10153-1.c b/gcc/testsuite/gcc.c-torture/compile/pr10153-1.c new file mode 100644 index 0000000..3f2040f --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr10153-1.c @@ -0,0 +1,7 @@ +typedef int V __attribute__ ((vector_size (2 * sizeof (int)))); +V +foo (void) +{ + V v = { }; + return v - foo(); +} diff --git a/gcc/testsuite/gcc.c-torture/compile/pr10153-2.c b/gcc/testsuite/gcc.c-torture/compile/pr10153-2.c new file mode 100644 index 0000000..1af4c8e --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr10153-2.c @@ -0,0 +1,9 @@ +typedef int V __attribute__ ((vector_size (2 * sizeof (int)))); +V +foo (int t) +{ + if (t < 10) + return (V){1, 1}; + V v = { }; + return v - foo(t - 1); +} diff --git a/gcc/tree-tailcall.c b/gcc/tree-tailcall.c index a4d31c9..f2833d2 100644 --- a/gcc/tree-tailcall.c +++ b/gcc/tree-tailcall.c @@ -1079,8 +1079,7 @@ create_tailcall_accumulator (const char *label, basic_block bb, tree init) gphi *phi; phi = create_phi_node (tmp, bb); - /* RET_TYPE can be a float when -ffast-maths is enabled. */ - add_phi_arg (phi, fold_convert (ret_type, init), single_pred_edge (bb), + add_phi_arg (phi, init, single_pred_edge (bb), UNKNOWN_LOCATION); return PHI_RESULT (phi); } @@ -1157,14 +1156,17 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls) } phis_constructed = true; } + tree ret_type = TREE_TYPE (DECL_RESULT (current_function_decl)); + if (POINTER_TYPE_P (ret_type)) + ret_type = sizetype; if (act->add && !a_acc) a_acc = create_tailcall_accumulator ("add_acc", first, - integer_zero_node); + build_zero_cst (ret_type)); if (act->mult && !m_acc) m_acc = create_tailcall_accumulator ("mult_acc", first, - integer_one_node); + build_one_cst (ret_type)); } if (a_acc || m_acc) -- cgit v1.1 From 50f3ac1beb0cb400484dd2621dd0e3e530583e69 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Thu, 22 Jul 2021 10:23:11 -0600 Subject: Remove an invalid defintion [PR101568]. Resolves: PR testsuite/101568 - g++.dg/ipa/pr82352.C fails gcc/testsuite/ChangeLog: PR testsuite/101568 * g++.dg/ipa/pr82352.C --- gcc/testsuite/g++.dg/ipa/pr82352.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/ipa/pr82352.C b/gcc/testsuite/g++.dg/ipa/pr82352.C index 08516da..7c8d0eb 100644 --- a/gcc/testsuite/g++.dg/ipa/pr82352.C +++ b/gcc/testsuite/g++.dg/ipa/pr82352.C @@ -17,7 +17,7 @@ private : class B { public : - void *operator new(size_t t) { return (void*)(42); }; + void *operator new(size_t); }; class C -- cgit v1.1 From b362d7947b37059fdb6de62145fa5146258dd58f Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Thu, 22 Jul 2021 11:49:33 -0600 Subject: Add new test for PR65178. gcc/testsuite/ChangeLog: PR tree-optimization/65178 * gcc.dg/uninit-pr65178.c: New test. --- gcc/testsuite/gcc.dg/uninit-pr65178.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/uninit-pr65178.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/uninit-pr65178.c b/gcc/testsuite/gcc.dg/uninit-pr65178.c new file mode 100644 index 0000000..21eb354 --- /dev/null +++ b/gcc/testsuite/gcc.dg/uninit-pr65178.c @@ -0,0 +1,21 @@ +/* PR tree-optimizatiom/65178 - incorrect -Wmaybe-uninitialized when using + nested loops + { dg-do compile } + { dg-options "-O2 -Wall" } */ + +void *bar (int); + +char *foo (void) +{ + char *c = "bla"; + char *buf; + for (int a = 1;; a = 0) + { + for (char *s = c; *s; ++s) + { + } + if (!a) break; + buf = (char *) bar (1); + } + return buf; // { dg-bogus "\\\[-Wmaybe-uninitialized" } +} -- cgit v1.1 From 01ac2f08b0105a1c85425d0e59216eb528c6d2ab Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Fri, 23 Jul 2021 00:16:31 +0000 Subject: Daily bump. --- gcc/ChangeLog | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/testsuite/ChangeLog | 36 ++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d450c6b..b70e99c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,72 @@ +2021-07-22 Andrew Pinski + + PR tree-optimization/10153 + * tree-tailcall.c (create_tailcall_accumulator): + Don't call fold_convert as the type should be correct already. + (tree_optimize_tail_calls_1): Use build_{one,zero}_cst instead + of integer_{one,zero}_node for the call of create_tailcall_accumulator. + +2021-07-22 Aldy Hernandez + + * gimple-range-cache.cc (non_null_ref::adjust_range): Replace + varying_p check for null/non-null check. + +2021-07-22 Andrew MacLeod + + PR tree-optimization/101511 + * value-relation.cc (relation_oracle::query_relation): Check if ssa1 + is in ssa2's equiv set, and don't trap if so. + +2021-07-22 Andrew MacLeod + + PR tree-optimization/101497 + * gimple-range-fold.cc (fold_using_range::range_of_cond_expr): Check + for undefined. + +2021-07-22 Andrew MacLeod + + PR tree-optimization/101496 + * vr-values.c (simplify_using_ranges::fold_cond): Call range_of_stmt + first, then vrp_visit_cond_Stmt. + +2021-07-22 liuhongt + + * config/i386/i386-expand.c + (ix86_broadcast_from_integer_constant): Rename to .. + (ix86_broadcast_from_constant): .. this, and extend it to + handle float mode. + (ix86_expand_vector_move): Extend to float mode. + * config/i386/i386-features.c + (replace_constant_pool_with_broadcast): Remove. + (remove_partial_avx_dependency_gate): Ditto. + (constant_pool_broadcast): Ditto. + (class pass_constant_pool_broadcast): Ditto. + (make_pass_constant_pool_broadcast): Ditto. + (remove_partial_avx_dependency): Adjust gate. + * config/i386/i386-passes.def: Remove pass_constant_pool_broadcast. + * config/i386/i386-protos.h + (make_pass_constant_pool_broadcast): Remove. + +2021-07-22 liuhongt + + * config/i386/constraints.md (Wb): New constraint. + (Ww): Ditto. + * config/i386/i386.md (*ashlhi3_1): Extend to avx512 mask + shift. + (*ashlqi3_1): Ditto. + (*3_1): Split to .. + (*ashr3_1): this, ... + (*lshr3_1): and this, also extend this pattern to avx512 + mask registers. + (*3_1): Split to .. + (*ashr3_1): this, ... + (*lshrqi3_1): and this, also extend this pattern to avx512 + mask registers. + (*lshrhi3_1): And this, also extend this pattern to avx512 + mask registers. + * config/i386/sse.md (k): New define_split after + it to convert generic shift pattern to mask shift ones. + 2021-07-21 Thomas Schwinge Joseph Myers Cesar Philippidis diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 9f48762..dd60122 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210722 +20210723 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 141e948..22ff279 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,39 @@ +2021-07-22 Martin Sebor + + PR tree-optimization/65178 + * gcc.dg/uninit-pr65178.c: New test. + +2021-07-22 Martin Sebor + + PR testsuite/101568 + * g++.dg/ipa/pr82352.C + +2021-07-22 Andrew Pinski + + PR tree-optimization/10153 + * gcc.c-torture/compile/pr10153-1.c: New test. + * gcc.c-torture/compile/pr10153-2.c: New test. + +2021-07-22 Andrew MacLeod + + * g++.dg/pr101511.C: New. + +2021-07-22 Andrew MacLeod + + * gcc.dg/pr101497.c: New. + +2021-07-22 Andrew MacLeod + + * gcc.dg/pr101496.c: New. + +2021-07-22 liuhongt + + * gcc.target/i386/fuse-caller-save-xmm.c: Adjust testcase. + +2021-07-22 liuhongt + + * gcc.target/i386/mask-shift.c: New test. + 2021-07-21 David Malcolm PR analyzer/101547 -- cgit v1.1 From 3382846558e02044598556e66e5ea1cb3115429d Mon Sep 17 00:00:00 2001 From: Haochen Gui Date: Fri, 23 Jul 2021 09:47:23 +0800 Subject: Fix execution failure of parity_1.f90 on P10 [PR100952] gcc/ PR target/100952 * config/rs6000/rs6000.md (cstore4): Fix wrong fall through. --- gcc/config/rs6000/rs6000.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 2368153..a84438f 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -11831,7 +11831,10 @@ { /* Everything is best done with setbc[r] if available. */ if (TARGET_POWER10 && TARGET_ISEL) - rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx); + { + rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx); + DONE; + } /* Expanding EQ and NE directly to some machine instructions does not help but does hurt combine. So don't. */ -- cgit v1.1 From 60933a148ab33c82915b40690b3ced6abc32a1bf Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Thu, 22 Jul 2021 22:36:05 -0400 Subject: analyzer: fix feasibility false +ve with overly complex svalues gcc/analyzer/ChangeLog: * diagnostic-manager.cc (class auto_disable_complexity_checks): New. (epath_finder::explore_feasible_paths): Use it to disable complexity checks whilst processing the worklist. * region-model-manager.cc (region_model_manager::region_model_manager): Initialize m_check_complexity. (region_model_manager::reject_if_too_complex): Bail if m_check_complexity is false. * region-model.h (region_model_manager::enable_complexity_check): New. (region_model_manager::disable_complexity_check): New. (region_model_manager::m_check_complexity): New. gcc/testsuite/ChangeLog: * gcc.dg/analyzer/feasibility-3.c: New test. Signed-off-by: David Malcolm --- gcc/analyzer/diagnostic-manager.cc | 47 +++++++-- gcc/analyzer/region-model-manager.cc | 4 + gcc/analyzer/region-model.h | 5 + gcc/testsuite/gcc.dg/analyzer/feasibility-3.c | 133 ++++++++++++++++++++++++++ 4 files changed, 182 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/feasibility-3.c (limited to 'gcc') diff --git a/gcc/analyzer/diagnostic-manager.cc b/gcc/analyzer/diagnostic-manager.cc index 631fef6..ef3df32 100644 --- a/gcc/analyzer/diagnostic-manager.cc +++ b/gcc/analyzer/diagnostic-manager.cc @@ -292,6 +292,34 @@ private: const shortest_paths &m_sep; }; +/* When we're building the exploded graph we want to simplify + overly-complicated symbolic values down to "UNKNOWN" to try to avoid + state explosions and unbounded chains of exploration. + + However, when we're building the feasibility graph for a diagnostic + (actually a tree), we don't want UNKNOWN values, as conditions on them + are also unknown: we don't want to have a contradiction such as a path + where (VAL != 0) and then (VAL == 0) along the same path. + + Hence this is an RAII class for temporarily disabling complexity-checking + in the region_model_manager, for use within + epath_finder::explore_feasible_paths. */ + +class auto_disable_complexity_checks +{ +public: + auto_disable_complexity_checks (region_model_manager *mgr) : m_mgr (mgr) + { + m_mgr->disable_complexity_check (); + } + ~auto_disable_complexity_checks () + { + m_mgr->enable_complexity_check (); + } +private: + region_model_manager *m_mgr; +}; + /* Attempt to find the shortest feasible path from the origin to TARGET_ENODE by iteratively building a feasible_graph, in which every path to a feasible_node is feasible by construction. @@ -344,6 +372,8 @@ epath_finder::explore_feasible_paths (const exploded_node *target_enode, logger *logger = get_logger (); LOG_SCOPE (logger); + region_model_manager *mgr = m_eg.get_engine ()->get_model_manager (); + /* Determine the shortest path to TARGET_ENODE from each node in the exploded graph. */ shortest_paths sep @@ -363,8 +393,7 @@ epath_finder::explore_feasible_paths (const exploded_node *target_enode, /* Populate the worklist with the origin node. */ { - feasibility_state init_state (m_eg.get_engine ()->get_model_manager (), - m_eg.get_supergraph ()); + feasibility_state init_state (mgr, m_eg.get_supergraph ()); feasible_node *origin = fg.add_node (m_eg.get_origin (), init_state, 0); worklist.add_node (origin); } @@ -376,11 +405,15 @@ epath_finder::explore_feasible_paths (const exploded_node *target_enode, /* Set this if we find a feasible path to TARGET_ENODE. */ exploded_path *best_path = NULL; - while (process_worklist_item (&worklist, tg, &fg, target_enode, diag_idx, - &best_path)) - { - /* Empty; the work is done within process_worklist_item. */ - } + { + auto_disable_complexity_checks sentinel (mgr); + + while (process_worklist_item (&worklist, tg, &fg, target_enode, diag_idx, + &best_path)) + { + /* Empty; the work is done within process_worklist_item. */ + } + } if (logger) { diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc index fccb93e..14c57d8 100644 --- a/gcc/analyzer/region-model-manager.cc +++ b/gcc/analyzer/region-model-manager.cc @@ -71,6 +71,7 @@ region_model_manager::region_model_manager () m_stack_region (alloc_region_id (), &m_root_region), m_heap_region (alloc_region_id (), &m_root_region), m_unknown_NULL (NULL), + m_check_complexity (true), m_max_complexity (0, 0), m_code_region (alloc_region_id (), &m_root_region), m_fndecls_map (), m_labels_map (), @@ -160,6 +161,9 @@ region_model_manager::too_complex_p (const complexity &c) const bool region_model_manager::reject_if_too_complex (svalue *sval) { + if (!m_check_complexity) + return false; + const complexity &c = sval->get_complexity (); if (!too_complex_p (c)) { diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index cc39929..1c7a386 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -323,6 +323,9 @@ public: void log_stats (logger *logger, bool show_objs) const; + void enable_complexity_check (void) { m_check_complexity = true; } + void disable_complexity_check (void) { m_check_complexity = false; } + private: bool too_complex_p (const complexity &c) const; bool reject_if_too_complex (svalue *sval); @@ -407,6 +410,8 @@ private: conjured_svalue *> conjured_values_map_t; conjured_values_map_t m_conjured_values_map; + bool m_check_complexity; + /* Maximum complexity of svalues that weren't rejected. */ complexity m_max_complexity; diff --git a/gcc/testsuite/gcc.dg/analyzer/feasibility-3.c b/gcc/testsuite/gcc.dg/analyzer/feasibility-3.c new file mode 100644 index 0000000..0c0bd14 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/feasibility-3.c @@ -0,0 +1,133 @@ +/* Reduced and adapted from Linux: fs/proc/inode.c: proc_reg_open + (GPL v2.0). */ + +/* Types. */ + +typedef unsigned char u8; +typedef _Bool bool; +typedef unsigned int gfp_t; + +struct file; +struct kmem_cache; +struct proc_dir_entry; + +struct inode { /* [...snip...] */ }; + +enum { + PROC_ENTRY_PERMANENT = 1U << 0, +}; + +struct proc_ops { + /* [...snip...] */ + int (*proc_open)(struct inode *, struct file *); + /* [...snip...] */ + int (*proc_release)(struct inode *, struct file *); + /* [...snip...] */ +}; + +struct proc_dir_entry { + /* [...snip...] */ + struct completion *pde_unload_completion; + /* [...snip...] */ + union { + const struct proc_ops *proc_ops; + const struct file_operations *proc_dir_ops; + }; + /* [...snip...] */ + u8 flags; + /* [...snip...] */ +}; + +struct pde_opener { + /* [...snip...] */ + struct file *file; + /* [...snip...] */ +}; + +struct proc_inode { + /* [...snip...] */ + struct proc_dir_entry *pde; + /* [...snip...] */ + struct inode vfs_inode; +}; + +/* Data. */ + +static struct kmem_cache *pde_opener_cache __attribute__((__section__(".data..ro_after_init"))); + +/* Functions. */ + +void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __attribute__((__malloc__)); +void kmem_cache_free(struct kmem_cache *, void *); + +static inline bool pde_is_permanent(const struct proc_dir_entry *pde) +{ + return pde->flags & PROC_ENTRY_PERMANENT; +} + +static inline struct proc_inode *PROC_I(const struct inode *inode) +{ + void *__mptr = (void *)(inode); + return ((struct proc_inode *)(__mptr - __builtin_offsetof(struct proc_inode, vfs_inode))); +} + +static inline struct proc_dir_entry *PDE(const struct inode *inode) +{ + return PROC_I(inode)->pde; +} + +/* We don't want to emit bogus use of uninitialized value 'pdeo' + warnings from -Wanalyzer-use-of-uninitialized-value in this function; + these would require following infeasible paths in which "release" is + first NULL (to avoid the initialization of "pdeo") and then is non-NULL + (to access "pdeo"). + + "release" is sufficiently complicated in this function to hit the + complexity limit for symbolic values during enode exploration. */ + +static int proc_reg_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *pde = PDE(inode); + int rv = 0; + typeof(((struct proc_ops*)0)->proc_open) open; + typeof(((struct proc_ops*)0)->proc_release) release; + struct pde_opener *pdeo; + + if (pde_is_permanent(pde)) { + open = pde->proc_ops->proc_open; + if (open) + rv = open(inode, file); + return rv; + } + + /* [...snip...] */ + + release = pde->proc_ops->proc_release; + if (release) { + pdeo = kmem_cache_alloc(pde_opener_cache, + ((( gfp_t)(0x400u|0x800u)) + | (( gfp_t)0x40u) + | (( gfp_t)0x80u))); + if (!pdeo) { + rv = -12; + goto out_unuse; + } + } + + open = pde->proc_ops->proc_open; + if (open) + rv = open(inode, file); + + if (release) { + if (rv == 0) { + + pdeo->file = file; /* { dg-bogus "uninit" } */ + /* [...snip...] */ + } else + kmem_cache_free(pde_opener_cache, pdeo); /* { dg-bogus "uninit" } */ + } + +out_unuse: + /* [...snip...] */ + return rv; +} -- cgit v1.1 From 863737b8de745292909b46217774f4e58019e28a Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Fri, 23 Jul 2021 13:54:11 +0800 Subject: Revert "testsuite: mips: use noinline attribute instead of -fno-inline" This reverts commit 3b33b1136d5ba1903a56fa601a848accc3db46ef. --- gcc/testsuite/gcc.target/mips/cfgcleanup-jalr2.c | 11 +++-------- gcc/testsuite/gcc.target/mips/cfgcleanup-jalr3.c | 6 +++--- 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/mips/cfgcleanup-jalr2.c b/gcc/testsuite/gcc.target/mips/cfgcleanup-jalr2.c index 6a9f86a..bf22f06 100644 --- a/gcc/testsuite/gcc.target/mips/cfgcleanup-jalr2.c +++ b/gcc/testsuite/gcc.target/mips/cfgcleanup-jalr2.c @@ -1,15 +1,10 @@ /* { dg-do compile } */ -/* { dg-options "-mabicalls -fpic -mno-mips16 -mno-micromips -fipa-ra -mcompact-branches=never" } */ +/* { dg-options "-mabicalls -fpic -mno-mips16 -mno-micromips -fno-inline -fipa-ra -mcompact-branches=never" } */ /* { dg-skip-if "needs codesize optimization" { *-*-* } { "-O0" "-O1" "-O2" "-O3" } { "" } } */ -static int __attribute__((noinline)) -foo (void* p) -{ - __asm__ (""::"r"(p):"$t0"); - return 0; -} +static int foo (void* p) { __asm__ (""::"r"(p):"$t0"); return 0; } -__attribute__((noinline)) static int bar (void* p) { return 1; } +static int bar (void* p) { return 1; } int test (void* p) diff --git a/gcc/testsuite/gcc.target/mips/cfgcleanup-jalr3.c b/gcc/testsuite/gcc.target/mips/cfgcleanup-jalr3.c index 5093741..805b31a 100644 --- a/gcc/testsuite/gcc.target/mips/cfgcleanup-jalr3.c +++ b/gcc/testsuite/gcc.target/mips/cfgcleanup-jalr3.c @@ -1,10 +1,10 @@ /* { dg-do compile } */ -/* { dg-options "-mabicalls -fpic -mno-mips16 -mno-micromips -fipa-ra -mcompact-branches=never" } */ +/* { dg-options "-mabicalls -fpic -mno-mips16 -mno-micromips -fno-inline -fipa-ra -mcompact-branches=never" } */ /* { dg-skip-if "needs codesize optimization" { *-*-* } { "-O0" "-O1" "-O2" "-O3" } { "" } } */ -__attribute__((noinline)) static int foo (void* p) { return 0; } +static int foo (void* p) { return 0; } -__attribute__((noinline)) static int bar (void* p) { return 1; } +static int bar (void* p) { return 1; } int test (void* p) -- cgit v1.1 From 19e05058799ffd611f4946d1871e747bae7a0046 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Fri, 23 Jul 2021 13:55:56 +0800 Subject: testsuite: mips: pass -finline/-fnoinline through gcc/testsuite/ * gcc.target/mips/mips.exp (mips_option_groups): add -finline and -fno-inline. --- gcc/testsuite/gcc.target/mips/mips.exp | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/mips/mips.exp b/gcc/testsuite/gcc.target/mips/mips.exp index 0129231..580e7c0 100644 --- a/gcc/testsuite/gcc.target/mips/mips.exp +++ b/gcc/testsuite/gcc.target/mips/mips.exp @@ -325,6 +325,7 @@ foreach option { finite-math-only fixed-hi fixed-lo + inline lax-vector-conversions omit-frame-pointer optimize-sibling-calls -- cgit v1.1 From 2c5d803d03209478b4f060785c6f6ba2f0de88ad Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 23 Jul 2021 09:37:36 +0200 Subject: openmp: Diagnose invalid mixing of the attribute and pragma syntax directives The OpenMP 5.1 spec says that the attribute and pragma syntax directives should not be mixed on the same statement. The following patch adds diagnostic for that, [[omp::directive (...)]] #pragma omp ... is always an error and for the other order #pragma omp ... [[omp::directive (...)]] it depends on whether the pragma directive is an OpenMP construct (then it is an error because it needs a structured block or loop or statement as body) or e.g. a standalone directive (then it is fine). Only block scope is handled for now though, namespace scope and class scope still needs implementing even the basic support. 2021-07-23 Jakub Jelinek gcc/c-family/ * c-pragma.h (enum pragma_kind): Add PRAGMA_OMP__START_ and PRAGMA_OMP__LAST_ enumerators. gcc/cp/ * parser.h (struct cp_parser): Add omp_attrs_forbidden_p member. * parser.c (cp_parser_handle_statement_omp_attributes): Diagnose mixing of attribute and pragma syntax directives when seeing omp::directive if parser->omp_attrs_forbidden_p or if attribute syntax directives are followed by OpenMP pragma. (cp_parser_statement): Clear parser->omp_attrs_forbidden_p after the cp_parser_handle_statement_omp_attributes call. (cp_parser_omp_structured_block): Add disallow_omp_attrs argument, if true, set parser->omp_attrs_forbidden_p. (cp_parser_omp_scan_loop_body, cp_parser_omp_sections_scope): Pass false as disallow_omp_attrs to cp_parser_omp_structured_block. (cp_parser_omp_parallel, cp_parser_omp_task): Set parser->omp_attrs_forbidden_p. gcc/testsuite/ * g++.dg/gomp/attrs-4.C: New test. * g++.dg/gomp/attrs-5.C: New test. --- gcc/c-family/c-pragma.h | 4 +++ gcc/cp/parser.c | 40 +++++++++++++++++++++--- gcc/cp/parser.h | 3 ++ gcc/testsuite/g++.dg/gomp/attrs-4.C | 61 +++++++++++++++++++++++++++++++++++++ gcc/testsuite/g++.dg/gomp/attrs-5.C | 46 ++++++++++++++++++++++++++++ 5 files changed, 149 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/g++.dg/gomp/attrs-4.C create mode 100644 gcc/testsuite/g++.dg/gomp/attrs-5.C (limited to 'gcc') diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h index c5d11ce..abd6667 100644 --- a/gcc/c-family/c-pragma.h +++ b/gcc/c-family/c-pragma.h @@ -42,7 +42,9 @@ enum pragma_kind { PRAGMA_OACC_UPDATE, PRAGMA_OACC_WAIT, + /* PRAGMA_OMP__START_ should be equal to the first PRAGMA_OMP_* code. */ PRAGMA_OMP_ALLOCATE, + PRAGMA_OMP__START_ = PRAGMA_OMP_ALLOCATE, PRAGMA_OMP_ATOMIC, PRAGMA_OMP_BARRIER, PRAGMA_OMP_CANCEL, @@ -72,6 +74,8 @@ enum pragma_kind { PRAGMA_OMP_TASKYIELD, PRAGMA_OMP_THREADPRIVATE, PRAGMA_OMP_TEAMS, + /* PRAGMA_OMP__LAST_ should be equal to the last PRAGMA_OMP_* code. */ + PRAGMA_OMP__LAST_ = PRAGMA_OMP_TEAMS, PRAGMA_GCC_PCH_PREPROCESS, PRAGMA_IVDEP, diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 45216f0..18905cf 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -11665,6 +11665,7 @@ cp_parser_handle_statement_omp_attributes (cp_parser *parser, tree attrs) auto_vec vec; int cnt = 0; int tokens = 0; + bool bad = false; for (tree *pa = &attrs; *pa; ) if (get_attribute_namespace (*pa) == omp_identifier && is_attribute_p ("directive", get_attribute_name (*pa))) @@ -11676,6 +11677,14 @@ cp_parser_handle_statement_omp_attributes (cp_parser *parser, tree attrs) gcc_assert (TREE_CODE (d) == DEFERRED_PARSE); cp_token *first = DEFPARSE_TOKENS (d)->first; cp_token *last = DEFPARSE_TOKENS (d)->last; + if (parser->omp_attrs_forbidden_p) + { + error_at (first->location, + "mixing OpenMP directives with attribute and pragma " + "syntax on the same statement"); + parser->omp_attrs_forbidden_p = false; + bad = true; + } const char *directive[3] = {}; for (int i = 0; i < 3; i++) { @@ -11731,6 +11740,9 @@ cp_parser_handle_statement_omp_attributes (cp_parser *parser, tree attrs) else pa = &TREE_CHAIN (*pa); + if (bad) + return attrs; + unsigned int i; cp_omp_attribute_data *v; cp_omp_attribute_data *construct_seen = nullptr; @@ -11780,6 +11792,18 @@ cp_parser_handle_statement_omp_attributes (cp_parser *parser, tree attrs) " can only appear on an empty statement"); return attrs; } + if (cnt && cp_lexer_next_token_is (parser->lexer, CPP_PRAGMA)) + { + cp_token *token = cp_lexer_peek_token (parser->lexer); + enum pragma_kind kind = cp_parser_pragma_kind (token); + if (kind >= PRAGMA_OMP__START_ && kind <= PRAGMA_OMP__LAST_) + { + error_at (token->location, + "mixing OpenMP directives with attribute and pragma " + "syntax on the same statement"); + return attrs; + } + } if (!tokens) return attrs; @@ -11904,6 +11928,7 @@ cp_parser_statement (cp_parser* parser, tree in_statement_expr, if (std_attrs && (flag_openmp || flag_openmp_simd)) std_attrs = cp_parser_handle_statement_omp_attributes (parser, std_attrs); + parser->omp_attrs_forbidden_p = false; /* Peek at the next token. */ token = cp_lexer_peek_token (parser->lexer); @@ -39391,11 +39416,14 @@ cp_parser_end_omp_structured_block (cp_parser *parser, unsigned save) } static tree -cp_parser_omp_structured_block (cp_parser *parser, bool *if_p) +cp_parser_omp_structured_block (cp_parser *parser, bool *if_p, + bool disallow_omp_attrs = true) { tree stmt = begin_omp_structured_block (); unsigned int save = cp_parser_begin_omp_structured_block (parser); + if (disallow_omp_attrs) + parser->omp_attrs_forbidden_p = true; cp_parser_statement (parser, NULL_TREE, false, if_p); cp_parser_end_omp_structured_block (parser, save); @@ -40761,7 +40789,7 @@ cp_parser_omp_scan_loop_body (cp_parser *parser) if (!braces.require_open (parser)) return; - substmt = cp_parser_omp_structured_block (parser, NULL); + substmt = cp_parser_omp_structured_block (parser, NULL, false); substmt = build2 (OMP_SCAN, void_type_node, substmt, NULL_TREE); add_stmt (substmt); @@ -40796,7 +40824,7 @@ cp_parser_omp_scan_loop_body (cp_parser *parser) error ("expected %<#pragma omp scan%>"); clauses = finish_omp_clauses (clauses, C_ORT_OMP); - substmt = cp_parser_omp_structured_block (parser, NULL); + substmt = cp_parser_omp_structured_block (parser, NULL, false); substmt = build2_loc (tok->location, OMP_SCAN, void_type_node, substmt, clauses); add_stmt (substmt); @@ -41597,7 +41625,7 @@ cp_parser_omp_sections_scope (cp_parser *parser) if (cp_parser_pragma_kind (cp_lexer_peek_token (parser->lexer)) != PRAGMA_OMP_SECTION) { - substmt = cp_parser_omp_structured_block (parser, NULL); + substmt = cp_parser_omp_structured_block (parser, NULL, false); substmt = build1 (OMP_SECTION, void_type_node, substmt); add_stmt (substmt); } @@ -41622,7 +41650,7 @@ cp_parser_omp_sections_scope (cp_parser *parser) error_suppress = true; } - substmt = cp_parser_omp_structured_block (parser, NULL); + substmt = cp_parser_omp_structured_block (parser, NULL, false); substmt = build1 (OMP_SECTION, void_type_node, substmt); add_stmt (substmt); } @@ -41842,6 +41870,7 @@ cp_parser_omp_parallel (cp_parser *parser, cp_token *pragma_tok, block = begin_omp_parallel (); save = cp_parser_begin_omp_structured_block (parser); + parser->omp_attrs_forbidden_p = true; cp_parser_statement (parser, NULL_TREE, false, if_p); cp_parser_end_omp_structured_block (parser, save); stmt = finish_omp_parallel (clauses, block); @@ -41904,6 +41933,7 @@ cp_parser_omp_task (cp_parser *parser, cp_token *pragma_tok, bool *if_p) "#pragma omp task", pragma_tok); block = begin_omp_task (); save = cp_parser_begin_omp_structured_block (parser); + parser->omp_attrs_forbidden_p = true; cp_parser_statement (parser, NULL_TREE, false, if_p); cp_parser_end_omp_structured_block (parser, save); return finish_omp_task (clauses, block); diff --git a/gcc/cp/parser.h b/gcc/cp/parser.h index 5ef7047..6fdd214 100644 --- a/gcc/cp/parser.h +++ b/gcc/cp/parser.h @@ -398,6 +398,9 @@ struct GTY(()) cp_parser { identifiers) rather than an explicit template parameter list. */ bool fully_implicit_function_template_p; + /* TRUE if omp::directive or omp::sequence attributes may not appear. */ + bool omp_attrs_forbidden_p; + /* Tracks the function's template parameter list when declaring a function using generic type parameters. This is either a new chain in the case of a fully implicit function template or an extension of the function's existing diff --git a/gcc/testsuite/g++.dg/gomp/attrs-4.C b/gcc/testsuite/g++.dg/gomp/attrs-4.C new file mode 100644 index 0000000..005add82 --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/attrs-4.C @@ -0,0 +1,61 @@ +// { dg-do compile { target c++11 } } + +void +foo (int x) +{ + [[omp::directive (parallel)]] + #pragma omp for // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + for (int i = 0; i < 16; i++) + ; + [[omp::directive (barrier)]] // { dg-error "standalone OpenMP directives in 'omp::directive' attribute can only appear on an empty statement" } + #pragma omp flush + ; + #pragma omp parallel + [[omp::directive (master)]] // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + ; + #pragma omp teams + [[omp::sequence (directive (parallel), directive (master))]] // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + ; + #pragma omp task + [[omp::directive (flush)]] // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + ; + #pragma omp master + [[omp::directive (flush)]] // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + ; + #pragma omp for ordered + for (int i = 0; i < 16; i++) + #pragma omp ordered + [[omp::directive (flush)]] // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + ; + #pragma omp single + [[omp::directive (flush)]] // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + ; + #pragma omp taskgroup + [[omp::directive (taskyield)]] // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + ; + #pragma omp target data map (tofrom: x) + [[omp::directive (flush)]] // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + ; + #pragma omp target + [[omp::directive (teams)]] // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + ; + [[omp::directive (parallel)]] + #pragma omp master // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + [[omp::sequence (omp::directive (taskloop))]] // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + for (int i = 0; i < 16; i++) + ; + #pragma omp parallel + [[omp::directive (for)]] // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + for (int i = 0; i < 16; i++) + ; + #pragma omp for + [[omp::directive (master)]] // { dg-error "for statement expected before '\\\[' token" } + ; + #pragma omp target teams + [[omp::directive (parallel)]] // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + ; + #pragma omp parallel master + [[omp::directive (taskloop)]] // { dg-error "mixing OpenMP directives with attribute and pragma syntax on the same statement" } + for (int i = 0; i < 16; i++) + ; +} diff --git a/gcc/testsuite/g++.dg/gomp/attrs-5.C b/gcc/testsuite/g++.dg/gomp/attrs-5.C new file mode 100644 index 0000000..f6d24b9 --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/attrs-5.C @@ -0,0 +1,46 @@ +// { dg-do compile { target c++11 } } + +typedef struct __attribute__((__aligned__ (sizeof (void *)))) omp_depend_t { + char __omp_depend_t__[2 * sizeof (void *)]; +} omp_depend_t; + +void +foo (int x) +{ + #pragma omp barrier + [[omp::directive (barrier)]]; + #pragma omp parallel + { + #pragma omp cancel parallel + [[omp::directive (cancellation point, parallel)]]; + } + #pragma omp parallel + { + #pragma omp cancellation point parallel + [[omp::directive (cancel parallel)]]; + } + #pragma omp parallel + { + [[omp::directive (cancel, parallel)]]; + #pragma omp cancellation point parallel + } + omp_depend_t depobj; + #pragma omp depobj(depobj) update(inout) + [[omp::directive (depobj(depobj), destroy)]]; + #pragma omp flush + [[omp::directive (flush)]]; + #pragma omp target enter data map (to: x) + [[omp::directive (target exit data, map (from: x))]]; + [[omp::directive (target enter data, map (to: x))]]; + #pragma omp target exit data map (from: x) + [[omp::directive (flush)]]; + #pragma omp target update to (x) + [[omp::directive (flush)]]; + #pragma omp taskwait + [[omp::directive (flush)]]; + #pragma omp taskyield + [[omp::directive (flush)]]; + extern int t; + #pragma omp threadprivate (t) + [[omp::directive (flush)]]; +} -- cgit v1.1 From 7f7364108f7441e6bd6f6f79a2d991e4e0f71b28 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 23 Jul 2021 09:50:15 +0200 Subject: openmp: Add support for __has_attribute(omp::directive) and __has_attribute(omp::sequence) Now that the C++ FE supports these attributes, but not through registering them in the attributes tables (they work quite differently from other attributes), this teaches c_common_has_attributes about those. 2021-07-23 Jakub Jelinek * c-lex.c (c_common_has_attribute): Call canonicalize_attr_name also on attr_id. Return 1 for omp::directive or omp::sequence in C++11 and later. * c-c++-common/gomp/attrs-1.c: New test. * c-c++-common/gomp/attrs-2.c: New test. * c-c++-common/gomp/attrs-3.c: New test. --- gcc/c-family/c-lex.c | 15 ++- gcc/testsuite/c-c++-common/gomp/attrs-1.c | 146 ++++++++++++++++++++++++++++++ gcc/testsuite/c-c++-common/gomp/attrs-2.c | 146 ++++++++++++++++++++++++++++++ gcc/testsuite/c-c++-common/gomp/attrs-3.c | 74 +++++++++++++++ 4 files changed, 380 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/c-c++-common/gomp/attrs-1.c create mode 100644 gcc/testsuite/c-c++-common/gomp/attrs-2.c create mode 100644 gcc/testsuite/c-c++-common/gomp/attrs-3.c (limited to 'gcc') diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c index c44e7a1..4b04e71 100644 --- a/gcc/c-family/c-lex.c +++ b/gcc/c-family/c-lex.c @@ -338,7 +338,20 @@ c_common_has_attribute (cpp_reader *pfile, bool std_syntax) tree attr_id = get_identifier ((const char *) cpp_token_as_text (pfile, nxt_token)); - attr_name = build_tree_list (attr_ns, attr_id); + attr_id = canonicalize_attr_name (attr_id); + if (c_dialect_cxx ()) + { + /* OpenMP attributes need special handling. */ + if ((flag_openmp || flag_openmp_simd) + && is_attribute_p ("omp", attr_ns) + && (is_attribute_p ("directive", attr_id) + || is_attribute_p ("sequence", attr_id))) + result = 1; + } + if (result) + attr_name = NULL_TREE; + else + attr_name = build_tree_list (attr_ns, attr_id); } else { diff --git a/gcc/testsuite/c-c++-common/gomp/attrs-1.c b/gcc/testsuite/c-c++-common/gomp/attrs-1.c new file mode 100644 index 0000000..e3c0fa6 --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/attrs-1.c @@ -0,0 +1,146 @@ +/* { dg-do compile } */ +/* { dg-options "-fopenmp" } */ + +#if __has_attribute(omp::directive) +#ifndef __cplusplus +#error omp::directive supported in C +#endif +#else +#ifdef __cplusplus +#error omp::directive not supported in C++ +#endif +#endif + +#if __has_attribute(omp::sequence) +#ifndef __cplusplus +#error omp::sequence supported in C +#endif +#else +#ifdef __cplusplus +#error omp::sequence not supported in C++ +#endif +#endif + +#if __has_attribute(omp::unknown) +#error omp::unknown supported +#endif + +#if __has_cpp_attribute(omp::directive) +#ifndef __cplusplus +#error omp::directive supported in C +#endif +#else +#ifdef __cplusplus +#error omp::directive not supported in C++ +#endif +#endif + +#if __has_cpp_attribute(omp::sequence) +#ifndef __cplusplus +#error omp::sequence supported in C +#endif +#else +#ifdef __cplusplus +#error omp::sequence not supported in C++ +#endif +#endif + +#if __has_cpp_attribute(omp::unknown) +#error omp::unknown supported +#endif + +#if __has_attribute(__omp__::__directive__) +#ifndef __cplusplus +#error __omp__::__directive__ supported in C +#endif +#else +#ifdef __cplusplus +#error __omp__::__directive__ not supported in C++ +#endif +#endif + +#if __has_attribute(__omp__::__sequence__) +#ifndef __cplusplus +#error __omp__::__sequence__ supported in C +#endif +#else +#ifdef __cplusplus +#error __omp__::__sequence__ not supported in C++ +#endif +#endif + +#if __has_attribute(__omp__::__unknown__) +#error __omp__::__unknown__ supported +#endif + +#if __has_cpp_attribute(__omp__::__directive__) +#ifndef __cplusplus +#error __omp__::__directive__ supported in C +#endif +#else +#ifdef __cplusplus +#error __omp__::__directive__ not supported in C++ +#endif +#endif + +#if __has_cpp_attribute(__omp__::__sequence__) +#ifndef __cplusplus +#error __omp__::__sequence__ supported in C +#endif +#else +#ifdef __cplusplus +#error __omp__::__sequence__ not supported in C++ +#endif +#endif + +#if __has_cpp_attribute(__omp__::__unknown__) +#error __omp__::__unknown__ supported +#endif + +#if __has_attribute(omp::__directive__) +#ifndef __cplusplus +#error omp::__directive__ supported in C +#endif +#else +#ifdef __cplusplus +#error omp::__directive__ not supported in C++ +#endif +#endif + +#if __has_attribute(__omp__::sequence) +#ifndef __cplusplus +#error __omp__::sequence supported in C +#endif +#else +#ifdef __cplusplus +#error __omp__::sequence not supported in C++ +#endif +#endif + +#if __has_attribute(omp::__unknown__) +#error omp::__unknown__ supported +#endif + +#if __has_cpp_attribute(__omp__::directive) +#ifndef __cplusplus +#error __omp__::directive supported in C +#endif +#else +#ifdef __cplusplus +#error __omp__::directive not supported in C++ +#endif +#endif + +#if __has_cpp_attribute(omp::__sequence__) +#ifndef __cplusplus +#error omp::__sequence__ supported in C +#endif +#else +#ifdef __cplusplus +#error omp::__sequence__ not supported in C++ +#endif +#endif + +#if __has_cpp_attribute(__omp__::unknown) +#error __omp__::unknown supported +#endif diff --git a/gcc/testsuite/c-c++-common/gomp/attrs-2.c b/gcc/testsuite/c-c++-common/gomp/attrs-2.c new file mode 100644 index 0000000..21abcdd --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/attrs-2.c @@ -0,0 +1,146 @@ +/* { dg-do compile } */ +/* { dg-options "-fno-openmp -fopenmp-simd" } */ + +#if __has_attribute(omp::directive) +#ifndef __cplusplus +#error omp::directive supported in C +#endif +#else +#ifdef __cplusplus +#error omp::directive not supported in C++ +#endif +#endif + +#if __has_attribute(omp::sequence) +#ifndef __cplusplus +#error omp::sequence supported in C +#endif +#else +#ifdef __cplusplus +#error omp::sequence not supported in C++ +#endif +#endif + +#if __has_attribute(omp::unknown) +#error omp::unknown supported +#endif + +#if __has_cpp_attribute(omp::directive) +#ifndef __cplusplus +#error omp::directive supported in C +#endif +#else +#ifdef __cplusplus +#error omp::directive not supported in C++ +#endif +#endif + +#if __has_cpp_attribute(omp::sequence) +#ifndef __cplusplus +#error omp::sequence supported in C +#endif +#else +#ifdef __cplusplus +#error omp::sequence not supported in C++ +#endif +#endif + +#if __has_cpp_attribute(omp::unknown) +#error omp::unknown supported +#endif + +#if __has_attribute(__omp__::__directive__) +#ifndef __cplusplus +#error __omp__::__directive__ supported in C +#endif +#else +#ifdef __cplusplus +#error __omp__::__directive__ not supported in C++ +#endif +#endif + +#if __has_attribute(__omp__::__sequence__) +#ifndef __cplusplus +#error __omp__::__sequence__ supported in C +#endif +#else +#ifdef __cplusplus +#error __omp__::__sequence__ not supported in C++ +#endif +#endif + +#if __has_attribute(__omp__::__unknown__) +#error __omp__::__unknown__ supported +#endif + +#if __has_cpp_attribute(__omp__::__directive__) +#ifndef __cplusplus +#error __omp__::__directive__ supported in C +#endif +#else +#ifdef __cplusplus +#error __omp__::__directive__ not supported in C++ +#endif +#endif + +#if __has_cpp_attribute(__omp__::__sequence__) +#ifndef __cplusplus +#error __omp__::__sequence__ supported in C +#endif +#else +#ifdef __cplusplus +#error __omp__::__sequence__ not supported in C++ +#endif +#endif + +#if __has_cpp_attribute(__omp__::__unknown__) +#error __omp__::__unknown__ supported +#endif + +#if __has_attribute(omp::__directive__) +#ifndef __cplusplus +#error omp::__directive__ supported in C +#endif +#else +#ifdef __cplusplus +#error omp::__directive__ not supported in C++ +#endif +#endif + +#if __has_attribute(__omp__::sequence) +#ifndef __cplusplus +#error __omp__::sequence supported in C +#endif +#else +#ifdef __cplusplus +#error __omp__::sequence not supported in C++ +#endif +#endif + +#if __has_attribute(omp::__unknown__) +#error omp::__unknown__ supported +#endif + +#if __has_cpp_attribute(__omp__::directive) +#ifndef __cplusplus +#error __omp__::directive supported in C +#endif +#else +#ifdef __cplusplus +#error __omp__::directive not supported in C++ +#endif +#endif + +#if __has_cpp_attribute(omp::__sequence__) +#ifndef __cplusplus +#error omp::__sequence__ supported in C +#endif +#else +#ifdef __cplusplus +#error omp::__sequence__ not supported in C++ +#endif +#endif + +#if __has_cpp_attribute(__omp__::unknown) +#error __omp__::unknown supported +#endif diff --git a/gcc/testsuite/c-c++-common/gomp/attrs-3.c b/gcc/testsuite/c-c++-common/gomp/attrs-3.c new file mode 100644 index 0000000..5900244 --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/attrs-3.c @@ -0,0 +1,74 @@ +/* { dg-do compile } */ +/* { dg-options "-fno-openmp -fno-openmp-simd" } */ + +#if __has_attribute(omp::directive) +#error omp::directive supported even when -fno-openmp{,-simd} +#endif + +#if __has_attribute(omp::sequence) +#error omp::sequence supported even when -fno-openmp{,-simd} +#endif + +#if __has_attribute(omp::unknown) +#error omp::unknown supported +#endif + +#if __has_cpp_attribute(omp::directive) +#error omp::directive supported even when -fno-openmp{,-simd} +#endif + +#if __has_cpp_attribute(omp::sequence) +#error omp::sequence supported even when -fno-openmp{,-simd} +#endif + +#if __has_cpp_attribute(omp::unknown) +#error omp::unknown supported +#endif + +#if __has_attribute(__omp__::__directive__) +#error __omp__::__directive__ supported even when -fno-openmp{,-simd} +#endif + +#if __has_attribute(__omp__::__sequence__) +#error __omp__::__sequence__ supported even when -fno-openmp{,-simd} +#endif + +#if __has_attribute(__omp__::__unknown__) +#error __omp__::__unknown__ supported +#endif + +#if __has_cpp_attribute(__omp__::__directive__) +#error __omp__::__directive__ supported even when -fno-openmp{,-simd} +#endif + +#if __has_cpp_attribute(__omp__::__sequence__) +#error __omp__::__sequence__ supported even when -fno-openmp{,-simd} +#endif + +#if __has_cpp_attribute(__omp__::__unknown__) +#error __omp__::__unknown__ supported +#endif + +#if __has_attribute(omp::__directive__) +#error omp::__directive__ supported even when -fno-openmp{,-simd} +#endif + +#if __has_attribute(__omp__::sequence) +#error __omp__::sequence supported even when -fno-openmp{,-simd} +#endif + +#if __has_attribute(omp::__unknown__) +#error omp::__unknown__ supported +#endif + +#if __has_cpp_attribute(__omp__::directive) +#error __omp__::directive supported even when -fno-openmp{,-simd} +#endif + +#if __has_cpp_attribute(omp::__sequence__) +#error omp::__sequence__ supported even when -fno-openmp{,-simd} +#endif + +#if __has_cpp_attribute(__omp__::unknown) +#error __omp__::unknown supported +#endif -- cgit v1.1 From 484acfa4cfe9385d7b78919ca9eb2047ded8f078 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Tue, 6 Jul 2021 16:20:02 +0100 Subject: aarch64: Use memcpy to copy vector tables in vqtbl[234] intrinsics Use __builtin_memcpy to copy vector structures instead of building a new opaque structure one vector at a time in each of the vqtbl[234] Neon intrinsics in arm_neon.h. This simplifies the header file and also improves code generation - superfluous move instructions were emitted for every register extraction/set in this additional structure. Add new code generation tests to verify that superfluous move instructions are no longer generated for the vqtbl[234] intrinsics. gcc/ChangeLog: 2021-07-08 Jonathan Wright * config/aarch64/arm_neon.h (vqtbl2_s8): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_oi one vector at a time. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_ci one vector at a time. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_xi one vector at a time. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vector_structure_intrinsics.c: New test. --- gcc/config/aarch64/arm_neon.h | 72 ++++++---------------- .../aarch64/vector_structure_intrinsics.c | 44 +++++++++++++ 2 files changed, 62 insertions(+), 54 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 1048d7c..31ae86e 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -23321,8 +23321,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl2_s8 (int8x16x2_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return __builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } @@ -23331,8 +23330,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl2_u8 (uint8x16x2_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } @@ -23341,8 +23339,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl2_p8 (poly8x16x2_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } @@ -23351,8 +23348,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl2q_s8 (int8x16x2_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return __builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx); } @@ -23361,8 +23357,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl2q_u8 (uint8x16x2_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (uint8x16_t)__builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx); } @@ -23371,8 +23366,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl2q_p8 (poly8x16x2_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (poly8x16_t)__builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx); } @@ -23383,9 +23377,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl3_s8 (int8x16x3_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx); } @@ -23394,9 +23386,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl3_u8 (uint8x16x3_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx); } @@ -23405,9 +23395,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl3_p8 (poly8x16x3_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx); } @@ -23416,9 +23404,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl3q_s8 (int8x16x3_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx); } @@ -23427,9 +23413,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl3q_u8 (uint8x16x3_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx); } @@ -23438,9 +23422,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl3q_p8 (poly8x16x3_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx); } @@ -23451,10 +23433,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl4_s8 (int8x16x4_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx); } @@ -23463,10 +23442,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl4_u8 (uint8x16x4_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx); } @@ -23475,10 +23451,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl4_p8 (poly8x16x4_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx); } @@ -23487,10 +23460,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl4q_s8 (int8x16x4_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx); } @@ -23499,10 +23469,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl4q_u8 (uint8x16x4_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx); } @@ -23511,10 +23478,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbl4q_p8 (poly8x16x4_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx); } diff --git a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c new file mode 100644 index 0000000..0b07e9e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +#include + +#define TEST_TBL(name, rettype, tbltype, idxtype, ts) \ + rettype test_ ## name ## _ ## ts (tbltype a, idxtype b) \ + { \ + return name ## _ ## ts (a, b); \ + } + +TEST_TBL (vqtbl2, int8x8_t, int8x16x2_t, uint8x8_t, s8) +TEST_TBL (vqtbl2, uint8x8_t, uint8x16x2_t, uint8x8_t, u8) +TEST_TBL (vqtbl2, poly8x8_t, poly8x16x2_t, uint8x8_t, p8) + +TEST_TBL (vqtbl2q, int8x16_t, int8x16x2_t, uint8x16_t, s8) +TEST_TBL (vqtbl2q, uint8x16_t, uint8x16x2_t, uint8x16_t, u8) +TEST_TBL (vqtbl2q, poly8x16_t, poly8x16x2_t, uint8x16_t, p8) + +TEST_TBL (vqtbl4, int8x8_t, int8x16x4_t, uint8x8_t, s8) +TEST_TBL (vqtbl4, uint8x8_t, uint8x16x4_t, uint8x8_t, u8) +TEST_TBL (vqtbl4, poly8x8_t, poly8x16x4_t, uint8x8_t, p8) + +TEST_TBL (vqtbl4q, int8x16_t, int8x16x4_t, uint8x16_t, s8) +TEST_TBL (vqtbl4q, uint8x16_t, uint8x16x4_t, uint8x16_t, u8) +TEST_TBL (vqtbl4q, poly8x16_t, poly8x16x4_t, uint8x16_t, p8) + +#define TEST_TBL3(name, rettype, tbltype, idxtype, ts) \ + rettype test_ ## name ## _ ## ts (idxtype a, tbltype b) \ + { \ + return name ## _ ## ts (b, a); \ + } + +TEST_TBL3 (vqtbl3, int8x8_t, int8x16x3_t, uint8x8_t, s8) +TEST_TBL3 (vqtbl3, uint8x8_t, uint8x16x3_t, uint8x8_t, u8) +TEST_TBL3 (vqtbl3, poly8x8_t, poly8x16x3_t, uint8x8_t, p8) + +TEST_TBL3 (vqtbl3q, int8x16_t, int8x16x3_t, uint8x16_t, s8) +TEST_TBL3 (vqtbl3q, uint8x16_t, uint8x16x3_t, uint8x16_t, u8) +TEST_TBL3 (vqtbl3q, poly8x16_t, poly8x16x3_t, uint8x16_t, p8) + +/* { dg-final { scan-assembler-not "mov\\t" } } */ + +/* { dg-final { scan-assembler-times "tbl\\t" 18} } */ -- cgit v1.1 From 5f65676eba16f38e5e22122e6885c0bd8e504276 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Thu, 8 Jul 2021 12:32:45 +0100 Subject: aarch64: Use memcpy to copy vector tables in vqtbx[234] intrinsics Use __builtin_memcpy to copy vector structures instead of building a new opaque structure one vector at a time in each of the vqtbx[234] Neon intrinsics in arm_neon.h. This simplifies the header file and also improves code generation - superfluous move instructions were emitted for every register extraction/set in this additional structure. Add new code generation tests to verify that superfluous move instructions are no longer generated for the vqtbx[234] intrinsics. gcc/ChangeLog: 2021-07-08 Jonathan Wright * config/aarch64/arm_neon.h (vqtbx2_s8): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_oi one vector at a time. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_ci one vector at a time. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_xi one vector at a time. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vector_structure_intrinsics.c: New tests. --- gcc/config/aarch64/arm_neon.h | 77 ++++++---------------- .../aarch64/vector_structure_intrinsics.c | 44 +++++++++++++ 2 files changed, 65 insertions(+), 56 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 31ae86e..a7b8449 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -23482,15 +23482,14 @@ vqtbl4q_p8 (poly8x16x4_t __tab, uint8x16_t __idx) return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx); } - /* vqtbx2 */ + __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx2_s8 (int8x8_t __r, int8x16x2_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return __builtin_aarch64_qtbx2v8qi (__r, __o, (int8x8_t)__idx); } @@ -23499,8 +23498,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx2_u8 (uint8x8_t __r, uint8x16x2_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (uint8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o, (int8x8_t)__idx); } @@ -23510,8 +23508,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx2_p8 (poly8x8_t __r, poly8x16x2_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (poly8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o, (int8x8_t)__idx); } @@ -23521,8 +23518,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx2q_s8 (int8x16_t __r, int8x16x2_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return __builtin_aarch64_qtbx2v16qi (__r, __o, (int8x16_t)__idx); } @@ -23531,10 +23527,9 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx2q_u8 (uint8x16_t __r, uint8x16x2_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (uint8x16_t)__builtin_aarch64_qtbx2v16qi ((int8x16_t)__r, __o, - (int8x16_t)__idx); + (int8x16_t)__idx); } __extension__ extern __inline poly8x16_t @@ -23542,21 +23537,19 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx2q_p8 (poly8x16_t __r, poly8x16x2_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (poly8x16_t)__builtin_aarch64_qtbx2v16qi ((int8x16_t)__r, __o, (int8x16_t)__idx); } /* vqtbx3 */ + __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx3_s8 (int8x8_t __r, int8x16x3_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[2], 2); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return __builtin_aarch64_qtbx3v8qi (__r, __o, (int8x8_t)__idx); } @@ -23565,9 +23558,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx3_u8 (uint8x8_t __r, uint8x16x3_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)__r, __o, (int8x8_t)__idx); } @@ -23577,9 +23568,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx3_p8 (poly8x8_t __r, poly8x16x3_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)__r, __o, (int8x8_t)__idx); } @@ -23589,9 +23578,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx3q_s8 (int8x16_t __r, int8x16x3_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[2], 2); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return __builtin_aarch64_qtbx3v16qi (__r, __o, (int8x16_t)__idx); } @@ -23600,9 +23587,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx3q_u8 (uint8x16_t __r, uint8x16x3_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)__r, __o, (int8x16_t)__idx); } @@ -23612,9 +23597,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx3q_p8 (poly8x16_t __r, poly8x16x3_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)__r, __o, (int8x16_t)__idx); } @@ -23626,10 +23609,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx4_s8 (int8x8_t __r, int8x16x4_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[3], 3); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return __builtin_aarch64_qtbx4v8qi (__r, __o, (int8x8_t)__idx); } @@ -23638,10 +23618,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx4_u8 (uint8x8_t __r, uint8x16x4_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)__r, __o, (int8x8_t)__idx); } @@ -23651,10 +23628,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx4_p8 (poly8x8_t __r, poly8x16x4_t __tab, uint8x8_t __idx) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)__r, __o, (int8x8_t)__idx); } @@ -23664,10 +23638,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx4q_s8 (int8x16_t __r, int8x16x4_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[3], 3); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return __builtin_aarch64_qtbx4v16qi (__r, __o, (int8x16_t)__idx); } @@ -23676,10 +23647,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx4q_u8 (uint8x16_t __r, uint8x16x4_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)__r, __o, (int8x16_t)__idx); } @@ -23689,10 +23657,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqtbx4q_p8 (poly8x16_t __r, poly8x16x4_t __tab, uint8x16_t __idx) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); + __builtin_memcpy (&__o, &__tab, sizeof (__tab)); return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)__r, __o, (int8x16_t)__idx); } diff --git a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c index 0b07e9e..b60e80e 100644 --- a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c @@ -39,6 +39,50 @@ TEST_TBL3 (vqtbl3q, int8x16_t, int8x16x3_t, uint8x16_t, s8) TEST_TBL3 (vqtbl3q, uint8x16_t, uint8x16x3_t, uint8x16_t, u8) TEST_TBL3 (vqtbl3q, poly8x16_t, poly8x16x3_t, uint8x16_t, p8) +#define TEST_TBX2(name, rettype, tbltype, idxtype, ts) \ + rettype test_ ## name ## _ ## ts (rettype a, idxtype b, tbltype c) \ + { \ + return name ## _ ## ts (a, c, b); \ + } + +TEST_TBX2 (vqtbx2, int8x8_t, int8x16x2_t, uint8x8_t, s8) +TEST_TBX2 (vqtbx2, uint8x8_t, uint8x16x2_t, uint8x8_t, u8) +TEST_TBX2 (vqtbx2, poly8x8_t, poly8x16x2_t, uint8x8_t, p8) + +TEST_TBX2 (vqtbx2q, int8x16_t, int8x16x2_t, uint8x16_t, s8) +TEST_TBX2 (vqtbx2q, uint8x16_t, uint8x16x2_t, uint8x16_t, u8) +TEST_TBX2 (vqtbx2q, poly8x16_t, poly8x16x2_t, uint8x16_t, p8) + +#define TEST_TBX3(name, rettype, tbltype, idxtype, ts) \ + rettype test_ ## name ## _ ## ts (rettype a, tbltype b, idxtype c) \ + { \ + return name ## _ ## ts (a, b, c); \ + } + +TEST_TBX3 (vqtbx3, int8x8_t, int8x16x3_t, uint8x8_t, s8) +TEST_TBX3 (vqtbx3, uint8x8_t, uint8x16x3_t, uint8x8_t, u8) +TEST_TBX3 (vqtbx3, poly8x8_t, poly8x16x3_t, uint8x8_t, p8) + +TEST_TBX3 (vqtbx3q, int8x16_t, int8x16x3_t, uint8x16_t, s8) +TEST_TBX3 (vqtbx3q, uint8x16_t, uint8x16x3_t, uint8x16_t, u8) +TEST_TBX3 (vqtbx3q, poly8x16_t, poly8x16x3_t, uint8x16_t, p8) + +#define TEST_TBX4(name, rettype, tbltype, idxtype, ts) \ + rettype test_ ## name ## _ ## ts (rettype a, idxtype b, idxtype dummy1, \ + idxtype dummy2, tbltype c) \ + { \ + return name ## _ ## ts (a, c, b); \ + } + +TEST_TBX4 (vqtbx4, int8x8_t, int8x16x4_t, uint8x8_t, s8) +TEST_TBX4 (vqtbx4, uint8x8_t, uint8x16x4_t, uint8x8_t, u8) +TEST_TBX4 (vqtbx4, poly8x8_t, poly8x16x4_t, uint8x8_t, p8) + +TEST_TBX4 (vqtbx4q, int8x16_t, int8x16x4_t, uint8x16_t, s8) +TEST_TBX4 (vqtbx4q, uint8x16_t, uint8x16x4_t, uint8x16_t, u8) +TEST_TBX4 (vqtbx4q, poly8x16_t, poly8x16x4_t, uint8x16_t, p8) + /* { dg-final { scan-assembler-not "mov\\t" } } */ /* { dg-final { scan-assembler-times "tbl\\t" 18} } */ +/* { dg-final { scan-assembler-times "tbx\\t" 18} } */ -- cgit v1.1 From f2f04d8b9d1f5d4fc8c3a17c7fa5ac518574f2df Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Thu, 8 Jul 2021 23:27:54 +0100 Subject: aarch64: Use memcpy to copy vector tables in vtbl[34] intrinsics Use __builtin_memcpy to copy vector structures instead of building a new opaque structure one vector at a time in each of the vtbl[34] Neon intrinsics in arm_neon.h. This simplifies the header file and also improves code generation - superfluous move instructions were emitted for every register extraction/set in this additional structure. gcc/ChangeLog: 2021-07-08 Jonathan Wright * config/aarch64/arm_neon.h (vtbl3_s8): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_oi one vector at a time. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. --- gcc/config/aarch64/arm_neon.h | 39 ++++++++++++--------------------------- 1 file changed, 12 insertions(+), 27 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index a7b8449..0ec46ef 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -9682,11 +9682,9 @@ vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx) int8x16x2_t __temp; __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); - __temp.val[1] = vcombine_s8 (__tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); + __temp.val[1] = vcombine_s8 (__tab.val[2], + vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); return __builtin_aarch64_qtbl2v8qi (__o, __idx); } @@ -9697,11 +9695,9 @@ vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx) uint8x16x2_t __temp; __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); - __temp.val[1] = vcombine_u8 (__tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); + __temp.val[1] = vcombine_u8 (__tab.val[2], + vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } @@ -9712,11 +9708,9 @@ vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx) poly8x16x2_t __temp; __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); - __temp.val[1] = vcombine_p8 (__tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); + __temp.val[1] = vcombine_p8 (__tab.val[2], + vcreate_p8 (__AARCH64_UINT64_C (0x0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); return (poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } @@ -9728,10 +9722,7 @@ vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx) __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); __temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); return __builtin_aarch64_qtbl2v8qi (__o, __idx); } @@ -9743,10 +9734,7 @@ vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx) __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); __temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } @@ -9758,10 +9746,7 @@ vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx) __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); __temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); return(poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx); } -- cgit v1.1 From 4848e283ccaed451ddcc38edcb9f5ce9e9f2d7eb Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Thu, 8 Jul 2021 23:27:54 +0100 Subject: aarch64: Use memcpy to copy vector tables in vtbx4 intrinsics Use __builtin_memcpy to copy vector structures instead of building a new opaque structure one vector at a time in each of the vtbx4 Neon intrinsics in arm_neon.h. This simplifies the header file and also improves code generation - superfluous move instructions were emitted for every register extraction/set in this additional structure. gcc/ChangeLog: 2021-07-19 Jonathan Wright * config/aarch64/arm_neon.h (vtbx4_s8): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_oi one vector at a time. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. --- gcc/config/aarch64/arm_neon.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 0ec46ef..d383af3 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -28417,10 +28417,7 @@ vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx) __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); __temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); return __builtin_aarch64_qtbx2v8qi (__r, __o, __idx); } @@ -28432,10 +28429,7 @@ vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx) __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); __temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); return (uint8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o, (int8x8_t)__idx); } @@ -28448,10 +28442,7 @@ vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx) __builtin_aarch64_simd_oi __o; __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); __temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, - (int8x16_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); return (poly8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o, (int8x8_t)__idx); } -- cgit v1.1 From e8de7edde6c5c3cc60f15c78422b85b4ccdc08bf Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Tue, 20 Jul 2021 10:28:34 +0100 Subject: aarch64: Use memcpy to copy vector tables in vst4[q] intrinsics Use __builtin_memcpy to copy vector structures instead of building a new opaque structure one vector at a time in each of the vst4[q] Neon intrinsics in arm_neon.h. This simplifies the header file and also improves code generation - superfluous move instructions were emitted for every register extraction/set in this additional structure. Add new code generation tests to verify that superfluous move instructions are no longer generated for the vst4q intrinsics. gcc/ChangeLog: 2021-07-20 Jonathan Wright * config/aarch64/arm_neon.h (vst4_s64): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_xi one vector at a time. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vector_structure_intrinsics.c: Add new tests. --- gcc/config/aarch64/arm_neon.h | 148 ++++----------------- .../aarch64/vector_structure_intrinsics.c | 22 +++ 2 files changed, 50 insertions(+), 120 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index d383af3..ae3ce8c 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -27914,10 +27914,7 @@ vst4_s64 (int64_t * __a, int64x1x4_t __val) __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); __temp.val[3] = vcombine_s64 (__val.val[3], vcreate_s64 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -27931,10 +27928,7 @@ vst4_u64 (uint64_t * __a, uint64x1x4_t __val) __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); __temp.val[3] = vcombine_u64 (__val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -27948,10 +27942,7 @@ vst4_f64 (float64_t * __a, float64x1x4_t __val) __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); __temp.val[3] = vcombine_f64 (__val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o); } @@ -27965,10 +27956,7 @@ vst4_s8 (int8_t * __a, int8x8x4_t __val) __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); __temp.val[3] = vcombine_s8 (__val.val[3], vcreate_s8 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27982,10 +27970,7 @@ vst4_p8 (poly8_t * __a, poly8x8x4_t __val) __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); __temp.val[3] = vcombine_p8 (__val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27999,10 +27984,7 @@ vst4_s16 (int16_t * __a, int16x4x4_t __val) __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); __temp.val[3] = vcombine_s16 (__val.val[3], vcreate_s16 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -28016,10 +27998,7 @@ vst4_p16 (poly16_t * __a, poly16x4x4_t __val) __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); __temp.val[3] = vcombine_p16 (__val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -28033,10 +28012,7 @@ vst4_s32 (int32_t * __a, int32x2x4_t __val) __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); __temp.val[3] = vcombine_s32 (__val.val[3], vcreate_s32 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -28050,10 +28026,7 @@ vst4_u8 (uint8_t * __a, uint8x8x4_t __val) __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); __temp.val[3] = vcombine_u8 (__val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -28067,10 +28040,7 @@ vst4_u16 (uint16_t * __a, uint16x4x4_t __val) __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); __temp.val[3] = vcombine_u16 (__val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -28084,10 +28054,7 @@ vst4_u32 (uint32_t * __a, uint32x2x4_t __val) __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); __temp.val[3] = vcombine_u32 (__val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -28101,10 +28068,7 @@ vst4_f16 (float16_t * __a, float16x4x4_t __val) __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); __temp.val[3] = vcombine_f16 (__val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o); } @@ -28118,10 +28082,7 @@ vst4_f32 (float32_t * __a, float32x2x4_t __val) __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); __temp.val[3] = vcombine_f32 (__val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o); } @@ -28135,14 +28096,7 @@ vst4_p64 (poly64_t * __a, poly64x1x4_t __val) __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); __temp.val[3] = vcombine_p64 (__val.val[3], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -28151,10 +28105,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_s8 (int8_t * __a, int8x16x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -28163,10 +28114,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_p8 (poly8_t * __a, poly8x16x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -28175,10 +28123,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_s16 (int16_t * __a, int16x8x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -28187,10 +28132,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_p16 (poly16_t * __a, poly16x8x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -28199,10 +28141,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_s32 (int32_t * __a, int32x4x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -28211,10 +28150,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_s64 (int64_t * __a, int64x2x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -28223,10 +28159,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_u8 (uint8_t * __a, uint8x16x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -28235,10 +28168,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_u16 (uint16_t * __a, uint16x8x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -28247,10 +28177,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_u32 (uint32_t * __a, uint32x4x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -28259,10 +28186,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_u64 (uint64_t * __a, uint64x2x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -28271,10 +28195,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_f16 (float16_t * __a, float16x8x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o); } @@ -28283,10 +28204,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_f32 (float32_t * __a, float32x4x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o); } @@ -28295,10 +28213,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_f64 (float64_t * __a, float64x2x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o); } @@ -28307,14 +28222,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_p64 (poly64_t * __a, poly64x2x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, - (poly64x2_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); } diff --git a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c index b60e80e..9061070 100644 --- a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c @@ -82,7 +82,29 @@ TEST_TBX4 (vqtbx4q, int8x16_t, int8x16x4_t, uint8x16_t, s8) TEST_TBX4 (vqtbx4q, uint8x16_t, uint8x16x4_t, uint8x16_t, u8) TEST_TBX4 (vqtbx4q, poly8x16_t, poly8x16x4_t, uint8x16_t, p8) +#define TEST_STX(name, tbltype, ptrtype, ts) \ + void test_ ## name ## _ ## ts (ptrtype a, tbltype b) \ + { \ + name ## _ ## ts (a, b); \ + } + +TEST_STX (vst4q, int8x16x4_t, int8_t*, s8); +TEST_STX (vst4q, uint8x16x4_t, uint8_t*, u8); +TEST_STX (vst4q, poly8x16x4_t, poly8_t*, p8); +TEST_STX (vst4q, int16x8x4_t, int16_t*, s16); +TEST_STX (vst4q, uint16x8x4_t, uint16_t*, u16); +TEST_STX (vst4q, poly16x8x4_t, poly16_t*, p16); +TEST_STX (vst4q, float16x8x4_t, float16_t*, f16); +TEST_STX (vst4q, int32x4x4_t, int32_t*, s32); +TEST_STX (vst4q, uint32x4x4_t, uint32_t*, u32); +TEST_STX (vst4q, float32x4x4_t, float32_t*, f32); +TEST_STX (vst4q, int64x2x4_t, int64_t*, s64); +TEST_STX (vst4q, uint64x2x4_t, uint64_t*, u64); +TEST_STX (vst4q, float64x2x4_t, float64_t*, f64); +TEST_STX (vst4q, poly64x2x4_t, poly64_t*, p64); + /* { dg-final { scan-assembler-not "mov\\t" } } */ /* { dg-final { scan-assembler-times "tbl\\t" 18} } */ /* { dg-final { scan-assembler-times "tbx\\t" 18} } */ +/* { dg-final { scan-assembler-times "st4\\t" 14} } */ -- cgit v1.1 From 95509ee2c135c5338b0bb69bdce63e3b20420bd3 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Wed, 21 Jul 2021 10:55:00 +0100 Subject: aarch64: Use memcpy to copy vector tables in vst3[q] intrinsics Use __builtin_memcpy to copy vector structures instead of building a new opaque structure one vector at a time in each of the vst3[q] Neon intrinsics in arm_neon.h. This simplifies the header file and also improves code generation - superfluous move instructions were emitted for every register extraction/set in this additional structure. Add new code generation tests to verify that superfluous move instructions are no longer generated for the vst3q intrinsics. gcc/ChangeLog: 2021-07-21 Jonathan Wright * config/aarch64/arm_neon.h (vst3_s64): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_ci one vector at a time. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vector_structure_intrinsics.c: Add new tests. --- gcc/config/aarch64/arm_neon.h | 118 +++++---------------- .../aarch64/vector_structure_intrinsics.c | 22 ++++ 2 files changed, 50 insertions(+), 90 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index ae3ce8c..fde321e 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -27543,9 +27543,7 @@ vst3_s64 (int64_t * __a, int64x1x3_t __val) __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -27558,9 +27556,7 @@ vst3_u64 (uint64_t * __a, uint64x1x3_t __val) __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -27573,9 +27569,7 @@ vst3_f64 (float64_t * __a, float64x1x3_t __val) __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o); } @@ -27588,9 +27582,7 @@ vst3_s8 (int8_t * __a, int8x8x3_t __val) __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27603,9 +27595,7 @@ vst3_p8 (poly8_t * __a, poly8x8x3_t __val) __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27618,9 +27608,7 @@ vst3_s16 (int16_t * __a, int16x4x3_t __val) __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -27633,9 +27621,7 @@ vst3_p16 (poly16_t * __a, poly16x4x3_t __val) __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -27648,9 +27634,7 @@ vst3_s32 (int32_t * __a, int32x2x3_t __val) __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -27663,9 +27647,7 @@ vst3_u8 (uint8_t * __a, uint8x8x3_t __val) __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27678,9 +27660,7 @@ vst3_u16 (uint16_t * __a, uint16x4x3_t __val) __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -27693,9 +27673,7 @@ vst3_u32 (uint32_t * __a, uint32x2x3_t __val) __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -27708,9 +27686,7 @@ vst3_f16 (float16_t * __a, float16x4x3_t __val) __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o); } @@ -27723,9 +27699,7 @@ vst3_f32 (float32_t * __a, float32x2x3_t __val) __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o); } @@ -27738,12 +27712,7 @@ vst3_p64 (poly64_t * __a, poly64x1x3_t __val) __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -27752,9 +27721,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_s8 (int8_t * __a, int8x16x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27763,9 +27730,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_p8 (poly8_t * __a, poly8x16x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27774,9 +27739,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_s16 (int16_t * __a, int16x8x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -27785,9 +27748,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_p16 (poly16_t * __a, poly16x8x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -27796,9 +27757,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_s32 (int32_t * __a, int32x4x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -27807,9 +27766,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_s64 (int64_t * __a, int64x2x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -27818,9 +27775,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_u8 (uint8_t * __a, uint8x16x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27829,9 +27784,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_u16 (uint16_t * __a, uint16x8x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -27840,9 +27793,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_u32 (uint32_t * __a, uint32x4x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -27851,9 +27802,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_u64 (uint64_t * __a, uint64x2x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -27862,9 +27811,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_f16 (float16_t * __a, float16x8x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o); } @@ -27873,9 +27820,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_f32 (float32_t * __a, float32x4x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o); } @@ -27884,9 +27829,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_f64 (float64_t * __a, float64x2x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o); } @@ -27895,12 +27838,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_p64 (poly64_t * __a, poly64x2x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); } diff --git a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c index 9061070..664de1f 100644 --- a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c @@ -103,8 +103,30 @@ TEST_STX (vst4q, uint64x2x4_t, uint64_t*, u64); TEST_STX (vst4q, float64x2x4_t, float64_t*, f64); TEST_STX (vst4q, poly64x2x4_t, poly64_t*, p64); +#define TEST_ST3(name, tbltype, ptrtype, ts) \ + void test_ ## name ## _ ## ts (ptrtype a, int8x8_t dummy, tbltype b) \ + { \ + name ## _ ## ts (a, b); \ + } + +TEST_ST3 (vst3q, int8x16x3_t, int8_t*, s8); +TEST_ST3 (vst3q, uint8x16x3_t, uint8_t*, u8); +TEST_ST3 (vst3q, poly8x16x3_t, poly8_t*, p8); +TEST_ST3 (vst3q, int16x8x3_t, int16_t*, s16); +TEST_ST3 (vst3q, uint16x8x3_t, uint16_t*, u16); +TEST_ST3 (vst3q, poly16x8x3_t, poly16_t*, p16); +TEST_ST3 (vst3q, float16x8x3_t, float16_t*, f16); +TEST_ST3 (vst3q, int32x4x3_t, int32_t*, s32); +TEST_ST3 (vst3q, uint32x4x3_t, uint32_t*, u32); +TEST_ST3 (vst3q, float32x4x3_t, float32_t*, f32); +TEST_ST3 (vst3q, int64x2x3_t, int64_t*, s64); +TEST_ST3 (vst3q, uint64x2x3_t, uint64_t*, u64); +TEST_ST3 (vst3q, float64x2x3_t, float64_t*, f64); +TEST_ST3 (vst3q, poly64x2x3_t, poly64_t*, p64); + /* { dg-final { scan-assembler-not "mov\\t" } } */ /* { dg-final { scan-assembler-times "tbl\\t" 18} } */ /* { dg-final { scan-assembler-times "tbx\\t" 18} } */ /* { dg-final { scan-assembler-times "st4\\t" 14} } */ +/* { dg-final { scan-assembler-times "st3\\t" 14} } */ -- cgit v1.1 From 03148b8e508ea09ce62259ffb95844182c0b90c6 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Wed, 21 Jul 2021 12:37:01 +0100 Subject: aarch64: Use memcpy to copy vector tables in vst2[q] intrinsics Use __builtin_memcpy to copy vector structures instead of building a new opaque structure one vector at a time in each of the vst2[q] Neon intrinsics in arm_neon.h. This simplifies the header file and also improves code generation - superfluous move instructions were emitted for every register extraction/set in this additional structure. Add new code generation tests to verify that superfluous move instructions are no longer generated for the vst2q intrinsics. gcc/ChangeLog: 2021-07-21 Jonathan Wrightt * config/aarch64/arm_neon.h (vst2_s64): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_oi one vector at a time. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vector_structure_intrinsics.c: Add new tests. --- gcc/config/aarch64/arm_neon.h | 88 +++++++--------------- .../aarch64/vector_structure_intrinsics.c | 16 ++++ 2 files changed, 44 insertions(+), 60 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index fde321e..0e4ab35 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -27216,8 +27216,7 @@ vst2_s64 (int64_t * __a, int64x1x2_t __val) int64x2x2_t __temp; __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -27229,8 +27228,7 @@ vst2_u64 (uint64_t * __a, uint64x1x2_t __val) uint64x2x2_t __temp; __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -27242,8 +27240,7 @@ vst2_f64 (float64_t * __a, float64x1x2_t __val) float64x2x2_t __temp; __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o); } @@ -27255,8 +27252,7 @@ vst2_s8 (int8_t * __a, int8x8x2_t __val) int8x16x2_t __temp; __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27268,8 +27264,7 @@ vst2_p8 (poly8_t * __a, poly8x8x2_t __val) poly8x16x2_t __temp; __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27281,8 +27276,7 @@ vst2_s16 (int16_t * __a, int16x4x2_t __val) int16x8x2_t __temp; __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -27294,8 +27288,7 @@ vst2_p16 (poly16_t * __a, poly16x4x2_t __val) poly16x8x2_t __temp; __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -27307,8 +27300,7 @@ vst2_s32 (int32_t * __a, int32x2x2_t __val) int32x4x2_t __temp; __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -27320,8 +27312,7 @@ vst2_u8 (uint8_t * __a, uint8x8x2_t __val) uint8x16x2_t __temp; __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27333,8 +27324,7 @@ vst2_u16 (uint16_t * __a, uint16x4x2_t __val) uint16x8x2_t __temp; __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -27346,8 +27336,7 @@ vst2_u32 (uint32_t * __a, uint32x2x2_t __val) uint32x4x2_t __temp; __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -27359,8 +27348,7 @@ vst2_f16 (float16_t * __a, float16x4x2_t __val) float16x8x2_t __temp; __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2v4hf (__a, __o); } @@ -27372,8 +27360,7 @@ vst2_f32 (float32_t * __a, float32x2x2_t __val) float32x4x2_t __temp; __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); } @@ -27385,10 +27372,7 @@ vst2_p64 (poly64_t * __a, poly64x1x2_t __val) poly64x2x2_t __temp; __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -27397,8 +27381,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_s8 (int8_t * __a, int8x16x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27407,8 +27390,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_p8 (poly8_t * __a, poly8x16x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27417,8 +27399,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_s16 (int16_t * __a, int16x8x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -27427,8 +27408,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_p16 (poly16_t * __a, poly16x8x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -27437,8 +27417,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_s32 (int32_t * __a, int32x4x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -27447,8 +27426,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_s64 (int64_t * __a, int64x2x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -27457,8 +27435,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_u8 (uint8_t * __a, uint8x16x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -27467,8 +27444,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_u16 (uint16_t * __a, uint16x8x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -27477,8 +27453,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_u32 (uint32_t * __a, uint32x4x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -27487,8 +27462,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_u64 (uint64_t * __a, uint64x2x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -27497,8 +27471,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_f16 (float16_t * __a, float16x8x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v8hf (__a, __o); } @@ -27507,8 +27480,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_f32 (float32_t * __a, float32x4x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); } @@ -27517,8 +27489,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_f64 (float64_t * __a, float64x2x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o); } @@ -27527,10 +27498,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_p64 (poly64_t * __a, poly64x2x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); } diff --git a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c index 664de1f..5a6663a 100644 --- a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c @@ -103,6 +103,21 @@ TEST_STX (vst4q, uint64x2x4_t, uint64_t*, u64); TEST_STX (vst4q, float64x2x4_t, float64_t*, f64); TEST_STX (vst4q, poly64x2x4_t, poly64_t*, p64); +TEST_STX (vst2q, int8x16x2_t, int8_t*, s8); +TEST_STX (vst2q, uint8x16x2_t, uint8_t*, u8); +TEST_STX (vst2q, poly8x16x2_t, poly8_t*, p8); +TEST_STX (vst2q, int16x8x2_t, int16_t*, s16); +TEST_STX (vst2q, uint16x8x2_t, uint16_t*, u16); +TEST_STX (vst2q, poly16x8x2_t, poly16_t*, p16); +TEST_STX (vst2q, float16x8x2_t, float16_t*, f16); +TEST_STX (vst2q, int32x4x2_t, int32_t*, s32); +TEST_STX (vst2q, uint32x4x2_t, uint32_t*, u32); +TEST_STX (vst2q, float32x4x2_t, float32_t*, f32); +TEST_STX (vst2q, int64x2x2_t, int64_t*, s64); +TEST_STX (vst2q, uint64x2x2_t, uint64_t*, u64); +TEST_STX (vst2q, float64x2x2_t, float64_t*, f64); +TEST_STX (vst2q, poly64x2x2_t, poly64_t*, p64); + #define TEST_ST3(name, tbltype, ptrtype, ts) \ void test_ ## name ## _ ## ts (ptrtype a, int8x8_t dummy, tbltype b) \ { \ @@ -130,3 +145,4 @@ TEST_ST3 (vst3q, poly64x2x3_t, poly64_t*, p64); /* { dg-final { scan-assembler-times "tbx\\t" 18} } */ /* { dg-final { scan-assembler-times "st4\\t" 14} } */ /* { dg-final { scan-assembler-times "st3\\t" 14} } */ +/* { dg-final { scan-assembler-times "st2\\t" 14} } */ -- cgit v1.1 From 1711b045829d281da9da440d70f2bf410127eea4 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Wed, 21 Jul 2021 16:55:01 +0100 Subject: aarch64: Use memcpy to copy vector tables in vst1[q]_x4 intrinsics Use __builtin_memcpy to copy vector structures instead of using a union in each of the vst1[q]_x4 Neon intrinsics in arm_neon.h. Add new code generation tests to verify that superfluous move instructions are not generated for the vst1q_x4 intrinsics. gcc/ChangeLog: 2021-07-21 Jonathan Wright * config/aarch64/arm_neon.h (vst1_s8_x4): Use __builtin_memcpy instead of using a union. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vector_structure_intrinsics.c: Add new tests. --- gcc/config/aarch64/arm_neon.h | 266 ++++++++++++++------- .../aarch64/vector_structure_intrinsics.c | 22 ++ 2 files changed, 204 insertions(+), 84 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 0e4ab35..9cf16a8 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26984,226 +26984,324 @@ vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t __val) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s8_x4 (int8_t * __a, int8x8x4_t val) +vst1_s8_x4 (int8_t * __a, int8x8x4_t __val) { - union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + int8x16x4_t __temp; + __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); + __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); + __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); + __temp.val[3] = vcombine_s8 (__val.val[3], vcreate_s8 (__AARCH64_INT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s8_x4 (int8_t * __a, int8x16x4_t val) +vst1q_s8_x4 (int8_t * __a, int8x16x4_t __val) { - union { int8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s16_x4 (int16_t * __a, int16x4x4_t val) +vst1_s16_x4 (int16_t * __a, int16x4x4_t __val) { - union { int16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + int16x8x4_t __temp; + __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); + __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); + __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); + __temp.val[3] = vcombine_s16 (__val.val[3], vcreate_s16 (__AARCH64_INT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s16_x4 (int16_t * __a, int16x8x4_t val) +vst1q_s16_x4 (int16_t * __a, int16x8x4_t __val) { - union { int16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s32_x4 (int32_t * __a, int32x2x4_t val) +vst1_s32_x4 (int32_t * __a, int32x2x4_t __val) { - union { int32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + int32x4x4_t __temp; + __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); + __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); + __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); + __temp.val[3] = vcombine_s32 (__val.val[3], vcreate_s32 (__AARCH64_INT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s32_x4 (int32_t * __a, int32x4x4_t val) +vst1q_s32_x4 (int32_t * __a, int32x4x4_t __val) { - union { int32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u8_x4 (uint8_t * __a, uint8x8x4_t val) +vst1_u8_x4 (uint8_t * __a, uint8x8x4_t __val) { - union { uint8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + uint8x16x4_t __temp; + __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_u8 (__val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u8_x4 (uint8_t * __a, uint8x16x4_t val) +vst1q_u8_x4 (uint8_t * __a, uint8x16x4_t __val) { - union { uint8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u16_x4 (uint16_t * __a, uint16x4x4_t val) +vst1_u16_x4 (uint16_t * __a, uint16x4x4_t __val) { - union { uint16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + uint16x8x4_t __temp; + __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_u16 (__val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u16_x4 (uint16_t * __a, uint16x8x4_t val) +vst1q_u16_x4 (uint16_t * __a, uint16x8x4_t __val) { - union { uint16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u32_x4 (uint32_t * __a, uint32x2x4_t val) +vst1_u32_x4 (uint32_t * __a, uint32x2x4_t __val) { - union { uint32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + uint32x4x4_t __temp; + __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_u32 (__val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u32_x4 (uint32_t * __a, uint32x4x4_t val) +vst1q_u32_x4 (uint32_t * __a, uint32x4x4_t __val) { - union { uint32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f16_x4 (float16_t * __a, float16x4x4_t val) +vst1_f16_x4 (float16_t * __a, float16x4x4_t __val) { - union { float16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4hf ((__builtin_aarch64_simd_hf *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + float16x8x4_t __temp; + __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_f16 (__val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4v4hf ((__builtin_aarch64_simd_hf *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_f16_x4 (float16_t * __a, float16x8x4_t val) +vst1q_f16_x4 (float16_t * __a, float16x8x4_t __val) { - union { float16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8hf ((__builtin_aarch64_simd_hf *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v8hf ((__builtin_aarch64_simd_hf *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f32_x4 (float32_t * __a, float32x2x4_t val) +vst1_f32_x4 (float32_t * __a, float32x2x4_t __val) { - union { float32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2sf ((__builtin_aarch64_simd_sf *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + float32x4x4_t __temp; + __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_f32 (__val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4v2sf ((__builtin_aarch64_simd_sf *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_f32_x4 (float32_t * __a, float32x4x4_t val) +vst1q_f32_x4 (float32_t * __a, float32x4x4_t __val) { - union { float32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4sf ((__builtin_aarch64_simd_sf *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v4sf ((__builtin_aarch64_simd_sf *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p8_x4 (poly8_t * __a, poly8x8x4_t val) +vst1_p8_x4 (poly8_t * __a, poly8x8x4_t __val) { - union { poly8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + poly8x16x4_t __temp; + __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_p8 (__val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p8_x4 (poly8_t * __a, poly8x16x4_t val) +vst1q_p8_x4 (poly8_t * __a, poly8x16x4_t __val) { - union { poly8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p16_x4 (poly16_t * __a, poly16x4x4_t val) +vst1_p16_x4 (poly16_t * __a, poly16x4x4_t __val) { - union { poly16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + poly16x8x4_t __temp; + __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_p16 (__val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p16_x4 (poly16_t * __a, poly16x8x4_t val) +vst1q_p16_x4 (poly16_t * __a, poly16x8x4_t __val) { - union { poly16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s64_x4 (int64_t * __a, int64x1x4_t val) +vst1_s64_x4 (int64_t * __a, int64x1x4_t __val) { - union { int64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + int64x2x4_t __temp; + __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); + __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); + __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); + __temp.val[3] = vcombine_s64 (__val.val[3], vcreate_s64 (__AARCH64_INT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u64_x4 (uint64_t * __a, uint64x1x4_t val) +vst1_u64_x4 (uint64_t * __a, uint64x1x4_t __val) { - union { uint64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + uint64x2x4_t __temp; + __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_u64 (__val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p64_x4 (poly64_t * __a, poly64x1x4_t val) +vst1_p64_x4 (poly64_t * __a, poly64x1x4_t __val) { - union { poly64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + poly64x2x4_t __temp; + __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_p64 (__val.val[3], vcreate_p64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s64_x4 (int64_t * __a, int64x2x4_t val) +vst1q_s64_x4 (int64_t * __a, int64x2x4_t __val) { - union { int64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u64_x4 (uint64_t * __a, uint64x2x4_t val) +vst1q_u64_x4 (uint64_t * __a, uint64x2x4_t __val) { - union { uint64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p64_x4 (poly64_t * __a, poly64x2x4_t val) +vst1q_p64_x4 (poly64_t * __a, poly64x2x4_t __val) { - union { poly64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f64_x4 (float64_t * __a, float64x1x4_t val) +vst1_f64_x4 (float64_t * __a, float64x1x4_t __val) { - union { float64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4df ((__builtin_aarch64_simd_df *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + float64x2x4_t __temp; + __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_f64 (__val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4df ((__builtin_aarch64_simd_df *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_f64_x4 (float64_t * __a, float64x2x4_t val) +vst1q_f64_x4 (float64_t * __a, float64x2x4_t __val) { - union { float64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v2df ((__builtin_aarch64_simd_df *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v2df ((__builtin_aarch64_simd_df *) __a, __o); } /* vstn */ diff --git a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c index 5a6663a..6537f68 100644 --- a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c @@ -139,6 +139,27 @@ TEST_ST3 (vst3q, uint64x2x3_t, uint64_t*, u64); TEST_ST3 (vst3q, float64x2x3_t, float64_t*, f64); TEST_ST3 (vst3q, poly64x2x3_t, poly64_t*, p64); +#define TEST_ST1xN(name, tbltype, ptrtype, ts, xn) \ + void test_ ## name ## _ ## ts ## _ ## xn (ptrtype a, tbltype b) \ + { \ + name ## _ ## ts ## _ ## xn (a, b); \ + } + +TEST_ST1xN (vst1q, int8x16x4_t, int8_t*, s8, x4); +TEST_ST1xN (vst1q, uint8x16x4_t, uint8_t*, u8, x4); +TEST_ST1xN (vst1q, poly8x16x4_t, poly8_t*, p8, x4); +TEST_ST1xN (vst1q, int16x8x4_t, int16_t*, s16, x4); +TEST_ST1xN (vst1q, uint16x8x4_t, uint16_t*, u16, x4); +TEST_ST1xN (vst1q, poly16x8x4_t, poly16_t*, p16, x4); +TEST_ST1xN (vst1q, float16x8x4_t, float16_t*, f16, x4); +TEST_ST1xN (vst1q, int32x4x4_t, int32_t*, s32, x4); +TEST_ST1xN (vst1q, uint32x4x4_t, uint32_t*, u32, x4); +TEST_ST1xN (vst1q, float32x4x4_t, float32_t*, f32, x4); +TEST_ST1xN (vst1q, int64x2x4_t, int64_t*, s64, x4); +TEST_ST1xN (vst1q, uint64x2x4_t, uint64_t*, u64, x4); +TEST_ST1xN (vst1q, poly64x2x4_t, poly64_t*, p64, x4); +TEST_ST1xN (vst1q, float64x2x4_t, float64_t*, f64, x4); + /* { dg-final { scan-assembler-not "mov\\t" } } */ /* { dg-final { scan-assembler-times "tbl\\t" 18} } */ @@ -146,3 +167,4 @@ TEST_ST3 (vst3q, poly64x2x3_t, poly64_t*, p64); /* { dg-final { scan-assembler-times "st4\\t" 14} } */ /* { dg-final { scan-assembler-times "st3\\t" 14} } */ /* { dg-final { scan-assembler-times "st2\\t" 14} } */ +/* { dg-final { scan-assembler-times "st1\\t" 14} } */ -- cgit v1.1 From 085666673db03c2e53db368d699c47032c6c5f2e Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 22 Jul 2021 05:17:27 -0700 Subject: x86: Don't return hard register when LRA is in progress Don't return hard register in ix86_gen_scratch_sse_rtx when LRA is in progress to avoid ICE when there are no available hard registers for LRA. gcc/ PR target/101504 * config/i386/i386.c (ix86_gen_scratch_sse_rtx): Don't return hard register when LRA is in progress. gcc/testsuite/ PR target/101504 * gcc.target/i386/pr101504.c: New test. --- gcc/config/i386/i386.c | 2 +- gcc/testsuite/gcc.target/i386/pr101504.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr101504.c (limited to 'gcc') diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ff96134..876a19f 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -23180,7 +23180,7 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, rtx ix86_gen_scratch_sse_rtx (machine_mode mode) { - if (TARGET_SSE) + if (TARGET_SSE && !lra_in_progress) return gen_rtx_REG (mode, (TARGET_64BIT ? LAST_REX_SSE_REG : LAST_SSE_REG)); diff --git a/gcc/testsuite/gcc.target/i386/pr101504.c b/gcc/testsuite/gcc.target/i386/pr101504.c new file mode 100644 index 0000000..2ad0405 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101504.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake" } */ + +typedef unsigned int __attribute__((__vector_size__ (32))) U; +typedef unsigned char __attribute__((__vector_size__ (64))) V; + +V g; + +U +foo (void) +{ + V v = __builtin_shufflevector (g, g, + 0, 1, 2, 0, 5, 1, 0, 1, 3, 2, 3, 0, 4, 3, 1, 2, + 2, 0, 4, 2, 3, 1, 1, 2, 3, 4, 1, 1, 0, 0, 5, 2, + 0, 3, 3, 3, 3, 4, 5, 0, 1, 5, 2, 1, 0, 1, 1, 2, + 3, 2, 0, 5, 4, 5, 1, 0, 1, 4, 4, 3, 4, 5, 2, 0); + v ^= 255; + V w = v + g; + U u = ((union { V a; U b; }) w).b + ((union { V a; U b; }) w).b[1]; + return u; +} + +/* { dg-final { scan-assembler-not "\.byte\[ \t\]+-1\n" } } */ -- cgit v1.1 From ccf6e2c21be84a478bcef4cced49879879a1104c Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Fri, 23 Jul 2021 12:41:05 +0100 Subject: aarch64: Use memcpy to copy vector tables in vst1[q]_x3 intrinsics Use __builtin_memcpy to copy vector structures instead of building a new opaque structure one vector at a time in each of the vst1[q]_x3 Neon intrinsics in arm_neon.h. This simplifies the header file and also improves code generation - superfluous move instructions were emitted for every register extraction/set in this additional structure. Add new code generation tests to verify that superfluous move instructions are not generated for the vst1q_x3 intrinsics. gcc/ChangeLog: 2021-07-23 Jonathan Wright * config/aarch64/arm_neon.h (vst1_s64_x3): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_ci one vector at a time. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vector_structure_intrinsics.c: Add new tests. --- gcc/config/aarch64/arm_neon.h | 118 +++++---------------- .../aarch64/vector_structure_intrinsics.c | 24 ++++- 2 files changed, 51 insertions(+), 91 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 9cf16a8..47bb94c 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26619,9 +26619,7 @@ vst1_s64_x3 (int64_t * __a, int64x1x3_t __val) __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -26634,9 +26632,7 @@ vst1_u64_x3 (uint64_t * __a, uint64x1x3_t __val) __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -26649,9 +26645,7 @@ vst1_f64_x3 (float64_t * __a, float64x1x3_t __val) __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3df ((__builtin_aarch64_simd_df *) __a, __o); } @@ -26664,9 +26658,7 @@ vst1_s8_x3 (int8_t * __a, int8x8x3_t __val) __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -26679,9 +26671,7 @@ vst1_p8_x3 (poly8_t * __a, poly8x8x3_t __val) __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -26694,9 +26684,7 @@ vst1_s16_x3 (int16_t * __a, int16x4x3_t __val) __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -26709,9 +26697,7 @@ vst1_p16_x3 (poly16_t * __a, poly16x4x3_t __val) __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -26724,9 +26710,7 @@ vst1_s32_x3 (int32_t * __a, int32x2x3_t __val) __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -26739,9 +26723,7 @@ vst1_u8_x3 (uint8_t * __a, uint8x8x3_t __val) __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -26754,9 +26736,7 @@ vst1_u16_x3 (uint16_t * __a, uint16x4x3_t __val) __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -26769,9 +26749,7 @@ vst1_u32_x3 (uint32_t * __a, uint32x2x3_t __val) __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -26784,9 +26762,7 @@ vst1_f16_x3 (float16_t * __a, float16x4x3_t __val) __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_hf *) __a, __o); } @@ -26799,9 +26775,7 @@ vst1_f32_x3 (float32_t * __a, float32x2x3_t __val) __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3v2sf ((__builtin_aarch64_simd_sf *) __a, __o); } @@ -26814,12 +26788,7 @@ vst1_p64_x3 (poly64_t * __a, poly64x1x3_t __val) __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -26828,9 +26797,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s8_x3 (int8_t * __a, int8x16x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -26839,9 +26806,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p8_x3 (poly8_t * __a, poly8x16x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -26850,9 +26815,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s16_x3 (int16_t * __a, int16x8x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -26861,9 +26824,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -26872,9 +26833,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s32_x3 (int32_t * __a, int32x4x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -26883,9 +26842,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s64_x3 (int64_t * __a, int64x2x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -26894,9 +26851,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u8_x3 (uint8_t * __a, uint8x16x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -26905,9 +26860,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u16_x3 (uint16_t * __a, uint16x8x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -26916,9 +26869,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u32_x3 (uint32_t * __a, uint32x4x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -26927,9 +26878,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -26938,9 +26887,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_f16_x3 (float16_t * __a, float16x8x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v8hf ((__builtin_aarch64_simd_hf *) __a, __o); } @@ -26949,9 +26896,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_f32_x3 (float32_t * __a, float32x4x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v4sf ((__builtin_aarch64_simd_sf *) __a, __o); } @@ -26960,9 +26905,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_f64_x3 (float64_t * __a, float64x2x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v2df ((__builtin_aarch64_simd_df *) __a, __o); } @@ -26971,12 +26914,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, - (poly64x2_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o); } diff --git a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c index 6537f68..9dff6d8 100644 --- a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c @@ -160,6 +160,28 @@ TEST_ST1xN (vst1q, uint64x2x4_t, uint64_t*, u64, x4); TEST_ST1xN (vst1q, poly64x2x4_t, poly64_t*, p64, x4); TEST_ST1xN (vst1q, float64x2x4_t, float64_t*, f64, x4); +#define TEST_ST1x3(name, tbltype, ptrtype, ts, xn) \ + void test_ ## name ## _ ## ts ## _ ## xn (ptrtype a, int8x8_t dummy, \ + tbltype b) \ + { \ + name ## _ ## ts ## _ ## xn (a, b); \ + } + +TEST_ST1x3 (vst1q, int8x16x3_t, int8_t*, s8, x3); +TEST_ST1x3 (vst1q, uint8x16x3_t, uint8_t*, u8, x3); +TEST_ST1x3 (vst1q, poly8x16x3_t, poly8_t*, p8, x3); +TEST_ST1x3 (vst1q, int16x8x3_t, int16_t*, s16, x3); +TEST_ST1x3 (vst1q, uint16x8x3_t, uint16_t*, u16, x3); +TEST_ST1x3 (vst1q, poly16x8x3_t, poly16_t*, p16, x3); +TEST_ST1x3 (vst1q, float16x8x3_t, float16_t*, f16, x3); +TEST_ST1x3 (vst1q, int32x4x3_t, int32_t*, s32, x3); +TEST_ST1x3 (vst1q, uint32x4x3_t, uint32_t*, u32, x3); +TEST_ST1x3 (vst1q, float32x4x3_t, float32_t*, f32, x3); +TEST_ST1x3 (vst1q, int64x2x3_t, int64_t*, s64, x3); +TEST_ST1x3 (vst1q, uint64x2x3_t, uint64_t*, u64, x3); +TEST_ST1x3 (vst1q, poly64x2x3_t, poly64_t*, p64, x3); +TEST_ST1x3 (vst1q, float64x2x3_t, float64_t*, f64, x3); + /* { dg-final { scan-assembler-not "mov\\t" } } */ /* { dg-final { scan-assembler-times "tbl\\t" 18} } */ @@ -167,4 +189,4 @@ TEST_ST1xN (vst1q, float64x2x4_t, float64_t*, f64, x4); /* { dg-final { scan-assembler-times "st4\\t" 14} } */ /* { dg-final { scan-assembler-times "st3\\t" 14} } */ /* { dg-final { scan-assembler-times "st2\\t" 14} } */ -/* { dg-final { scan-assembler-times "st1\\t" 14} } */ +/* { dg-final { scan-assembler-times "st1\\t" 28} } */ -- cgit v1.1 From 50752b751fff56e7e2c74024bae659d5e9dea50f Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Fri, 23 Jul 2021 13:41:39 +0100 Subject: aarch64: Use memcpy to copy vector tables in vst1[q]_x2 intrinsics Use __builtin_memcpy to copy vector structures instead of building a new opaque structure one vector at a time in each of the vst1[q]_x2 Neon intrinsics in arm_neon.h. This simplifies the header file and also improves code generation - superfluous move instructions were emitted for every register extraction/set in this additional structure. Add new code generation tests to verify that superfluous move instructions are not generated for the vst1q_x2 intrinsics. gcc/ChangeLog: 2021-07-23 Jonathan Wright * config/aarch64/arm_neon.h (vst1_s64_x2): Use __builtin_memcpy instead of constructing __builtin_aarch64_simd_oi one vector at a time. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vector_structure_intrinsics.c: Add new tests. --- gcc/config/aarch64/arm_neon.h | 88 +++++++--------------- .../aarch64/vector_structure_intrinsics.c | 17 ++++- 2 files changed, 44 insertions(+), 61 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 47bb94c..7523974 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26276,8 +26276,7 @@ vst1_s64_x2 (int64_t * __a, int64x1x2_t __val) = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -26291,8 +26290,7 @@ vst1_u64_x2 (uint64_t * __a, uint64x1x2_t __val) = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -26306,8 +26304,7 @@ vst1_f64_x2 (float64_t * __a, float64x1x2_t __val) = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2df ((__builtin_aarch64_simd_df *) __a, __o); } @@ -26321,8 +26318,7 @@ vst1_s8_x2 (int8_t * __a, int8x8x2_t __val) = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -26336,8 +26332,7 @@ vst1_p8_x2 (poly8_t * __a, poly8x8x2_t __val) = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -26351,8 +26346,7 @@ vst1_s16_x2 (int16_t * __a, int16x4x2_t __val) = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -26366,8 +26360,7 @@ vst1_p16_x2 (poly16_t * __a, poly16x4x2_t __val) = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -26381,8 +26374,7 @@ vst1_s32_x2 (int32_t * __a, int32x2x2_t __val) = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -26394,8 +26386,7 @@ vst1_u8_x2 (uint8_t * __a, uint8x8x2_t __val) uint8x16x2_t __temp; __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -26407,8 +26398,7 @@ vst1_u16_x2 (uint16_t * __a, uint16x4x2_t __val) uint16x8x2_t __temp; __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -26420,8 +26410,7 @@ vst1_u32_x2 (uint32_t * __a, uint32x2x2_t __val) uint32x4x2_t __temp; __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -26433,8 +26422,7 @@ vst1_f16_x2 (float16_t * __a, float16x4x2_t __val) float16x8x2_t __temp; __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2v4hf (__a, __o); } @@ -26446,8 +26434,7 @@ vst1_f32_x2 (float32_t * __a, float32x2x2_t __val) float32x4x2_t __temp; __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); } @@ -26459,10 +26446,7 @@ vst1_p64_x2 (poly64_t * __a, poly64x1x2_t __val) poly64x2x2_t __temp; __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -26471,8 +26455,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s8_x2 (int8_t * __a, int8x16x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -26481,8 +26464,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p8_x2 (poly8_t * __a, poly8x16x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -26491,8 +26473,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s16_x2 (int16_t * __a, int16x8x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -26501,8 +26482,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p16_x2 (poly16_t * __a, poly16x8x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -26511,8 +26491,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s32_x2 (int32_t * __a, int32x4x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -26521,8 +26500,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s64_x2 (int64_t * __a, int64x2x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -26531,8 +26509,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u8_x2 (uint8_t * __a, uint8x16x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); } @@ -26541,8 +26518,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u16_x2 (uint16_t * __a, uint16x8x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); } @@ -26551,8 +26527,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u32_x2 (uint32_t * __a, uint32x4x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __o); } @@ -26561,8 +26536,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u64_x2 (uint64_t * __a, uint64x2x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o); } @@ -26571,8 +26545,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_f16_x2 (float16_t * __a, float16x8x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v8hf (__a, __o); } @@ -26581,8 +26554,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_f32_x2 (float32_t * __a, float32x4x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); } @@ -26591,8 +26563,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_f64_x2 (float64_t * __a, float64x2x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v2df ((__builtin_aarch64_simd_df *) __a, __o); } @@ -26601,10 +26572,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, - (poly64x2_t) __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o); } diff --git a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c index 9dff6d8..60c53bc 100644 --- a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c @@ -160,6 +160,21 @@ TEST_ST1xN (vst1q, uint64x2x4_t, uint64_t*, u64, x4); TEST_ST1xN (vst1q, poly64x2x4_t, poly64_t*, p64, x4); TEST_ST1xN (vst1q, float64x2x4_t, float64_t*, f64, x4); +TEST_ST1xN (vst1q, int8x16x2_t, int8_t*, s8, x2); +TEST_ST1xN (vst1q, uint8x16x2_t, uint8_t*, u8, x2); +TEST_ST1xN (vst1q, poly8x16x2_t, poly8_t*, p8, x2); +TEST_ST1xN (vst1q, int16x8x2_t, int16_t*, s16, x2); +TEST_ST1xN (vst1q, uint16x8x2_t, uint16_t*, u16, x2); +TEST_ST1xN (vst1q, poly16x8x2_t, poly16_t*, p16, x2); +TEST_ST1xN (vst1q, float16x8x2_t, float16_t*, f16, x2); +TEST_ST1xN (vst1q, int32x4x2_t, int32_t*, s32, x2); +TEST_ST1xN (vst1q, uint32x4x2_t, uint32_t*, u32, x2); +TEST_ST1xN (vst1q, float32x4x2_t, float32_t*, f32, x2); +TEST_ST1xN (vst1q, int64x2x2_t, int64_t*, s64, x2); +TEST_ST1xN (vst1q, uint64x2x2_t, uint64_t*, u64, x2); +TEST_ST1xN (vst1q, poly64x2x2_t, poly64_t*, p64, x2); +TEST_ST1xN (vst1q, float64x2x2_t, float64_t*, f64, x2); + #define TEST_ST1x3(name, tbltype, ptrtype, ts, xn) \ void test_ ## name ## _ ## ts ## _ ## xn (ptrtype a, int8x8_t dummy, \ tbltype b) \ @@ -189,4 +204,4 @@ TEST_ST1x3 (vst1q, float64x2x3_t, float64_t*, f64, x3); /* { dg-final { scan-assembler-times "st4\\t" 14} } */ /* { dg-final { scan-assembler-times "st3\\t" 14} } */ /* { dg-final { scan-assembler-times "st2\\t" 14} } */ -/* { dg-final { scan-assembler-times "st1\\t" 28} } */ +/* { dg-final { scan-assembler-times "st1\\t" 42} } */ -- cgit v1.1 From 435f90187eb4b6ddd66df692320057f303841ed6 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Fri, 23 Jul 2021 16:19:59 +0200 Subject: Use range_query object in array bounds class. Now that all dependencies of array_bounds_checker take a range_query, we can sever the relationship with vr_values. Changing this will allow us to use the array_bounds_checker with VRP, evrp, or the ranger. Tested on x86-64 Linux. gcc/ChangeLog: * gimple-array-bounds.h (class array_bounds_checker): Change ranges type to range_query. --- gcc/gimple-array-bounds.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-array-bounds.h b/gcc/gimple-array-bounds.h index 1bfa2d4..fa64262 100644 --- a/gcc/gimple-array-bounds.h +++ b/gcc/gimple-array-bounds.h @@ -25,7 +25,7 @@ class array_bounds_checker friend class check_array_bounds_dom_walker; public: - array_bounds_checker (struct function *fun, class vr_values *v) + array_bounds_checker (struct function *fun, range_query *v) : fun (fun), ranges (v) { } void check (); @@ -37,7 +37,7 @@ private: const value_range *get_value_range (const_tree op); struct function *fun; - class vr_values *ranges; + range_query *ranges; }; #endif // GCC_GIMPLE_ARRAY_BOUNDS_H -- cgit v1.1 From 8408d34570c9fe9f3d22a25a76df2a4c64f08477 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 23 Jul 2021 19:55:16 +0200 Subject: expmed: Fix store_integral_bit_field [PR101562] Our documentation says that paradoxical subregs shouldn't appear in strict_low_part: '(strict_low_part (subreg:M (reg:N R) 0))' This expression code is used in only one context: as the destination operand of a 'set' expression. In addition, the operand of this expression must be a non-paradoxical 'subreg' expression. but on the testcase below that triggers UB at runtime store_integral_bit_field emits exactly that. The following patch fixes it by ensuring the requirement is satisfied. 2021-07-23 Jakub Jelinek PR rtl-optimization/101562 * expmed.c (store_integral_bit_field): Only use movstrict_optab if the operand isn't paradoxical. * gcc.c-torture/compile/pr101562.c: New test. --- gcc/expmed.c | 5 ++++- gcc/testsuite/gcc.c-torture/compile/pr101562.c | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr101562.c (limited to 'gcc') diff --git a/gcc/expmed.c b/gcc/expmed.c index 1fb6317..3143f38 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -921,7 +921,10 @@ store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode, } subreg_off = bitnum / BITS_PER_UNIT; - if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)) + if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off) + /* STRICT_LOW_PART must have a non-paradoxical subreg as + operand. */ + && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0))) { arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); diff --git a/gcc/testsuite/gcc.c-torture/compile/pr101562.c b/gcc/testsuite/gcc.c-torture/compile/pr101562.c new file mode 100644 index 0000000..ea4a5f7 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr101562.c @@ -0,0 +1,21 @@ +/* PR rtl-optimization/101562 */ + +struct S { char c; }; +void baz (struct S a, struct S b); + +void +foo (void) +{ + struct S x[1]; + *(short *)&x[0] = 256; + baz (x[0], x[1]); +} + +void +bar (void) +{ + struct S x[1]; + x[0].c = 0; + x[1].c = 1; + baz (x[0], x[1]); +} -- cgit v1.1 From e314cfc371d8b2405a1d81e51b90f9fb24b9061f Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Fri, 23 Jul 2021 21:00:10 +0200 Subject: Fortran: extend check for array arguments and reject CLASS array elements. gcc/fortran/ChangeLog: PR fortran/101536 * check.c (array_check): Adjust check for the case of CLASS arrays. gcc/testsuite/ChangeLog: PR fortran/101536 * gfortran.dg/pr101536.f90: New test. --- gcc/fortran/check.c | 3 +-- gcc/testsuite/gfortran.dg/pr101536.f90 | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/pr101536.f90 (limited to 'gcc') diff --git a/gcc/fortran/check.c b/gcc/fortran/check.c index 27bf3a7..851af1b 100644 --- a/gcc/fortran/check.c +++ b/gcc/fortran/check.c @@ -731,12 +731,11 @@ logical_array_check (gfc_expr *array, int n) static bool array_check (gfc_expr *e, int n) { - if (e->ts.type == BT_CLASS && gfc_expr_attr (e).class_ok + if (e->rank != 0 && e->ts.type == BT_CLASS && gfc_expr_attr (e).class_ok && CLASS_DATA (e)->attr.dimension && CLASS_DATA (e)->as->rank) { gfc_add_class_array_ref (e); - return true; } if (e->rank != 0 && e->ts.type != BT_PROCEDURE) diff --git a/gcc/testsuite/gfortran.dg/pr101536.f90 b/gcc/testsuite/gfortran.dg/pr101536.f90 new file mode 100644 index 0000000..b16af00 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr101536.f90 @@ -0,0 +1,33 @@ +! { dg-do compile } +! PR fortran/101536 - ICE in gfc_conv_expr_descriptor + +program p + type s + class(*), allocatable :: c + end type + type t + class(*), allocatable :: c(:) + end type t + type u + integer :: c(2) + end type + type(t) :: x + x%c = [1,2,3,4] +! print *, size (x) + print *, size (x%c) + print *, size (x%c(1)) ! { dg-error "must be an array" } +contains + integer function f(x, y, z) + class(t), allocatable :: x(:) + class(u) :: y(:) + class(s) :: z + f = size (x) + f = size (x(1)) ! { dg-error "must be an array" } + f = size (y) + f = size (y%c(1)) + f = size (y(2)%c) + f = size (y(2)%c(1)) ! { dg-error "must be an array" } + f = size (z) ! { dg-error "must be an array" } + f = size (z% c) ! { dg-error "must be an array" } + end +end -- cgit v1.1 From ead235f60139edc6eb408d8d083cbb15e417b447 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sat, 24 Jul 2021 00:16:44 +0000 Subject: Daily bump. --- gcc/ChangeLog | 288 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 16 +++ gcc/c-family/ChangeLog | 11 ++ gcc/cp/ChangeLog | 16 +++ gcc/fortran/ChangeLog | 6 + gcc/testsuite/ChangeLog | 82 ++++++++++++++ 7 files changed, 420 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b70e99c..b86653f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,291 @@ +2021-07-23 Jakub Jelinek + + PR rtl-optimization/101562 + * expmed.c (store_integral_bit_field): Only use movstrict_optab + if the operand isn't paradoxical. + +2021-07-23 Aldy Hernandez + + * gimple-array-bounds.h (class array_bounds_checker): Change + ranges type to range_query. + +2021-07-23 Jonathan Wright + + * config/aarch64/arm_neon.h (vst1_s64_x2): Use + __builtin_memcpy instead of constructing + __builtin_aarch64_simd_oi one vector at a time. + (vst1_u64_x2): Likewise. + (vst1_f64_x2): Likewise. + (vst1_s8_x2): Likewise. + (vst1_p8_x2): Likewise. + (vst1_s16_x2): Likewise. + (vst1_p16_x2): Likewise. + (vst1_s32_x2): Likewise. + (vst1_u8_x2): Likewise. + (vst1_u16_x2): Likewise. + (vst1_u32_x2): Likewise. + (vst1_f16_x2): Likewise. + (vst1_f32_x2): Likewise. + (vst1_p64_x2): Likewise. + (vst1q_s8_x2): Likewise. + (vst1q_p8_x2): Likewise. + (vst1q_s16_x2): Likewise. + (vst1q_p16_x2): Likewise. + (vst1q_s32_x2): Likewise. + (vst1q_s64_x2): Likewise. + (vst1q_u8_x2): Likewise. + (vst1q_u16_x2): Likewise. + (vst1q_u32_x2): Likewise. + (vst1q_u64_x2): Likewise. + (vst1q_f16_x2): Likewise. + (vst1q_f32_x2): Likewise. + (vst1q_f64_x2): Likewise. + (vst1q_p64_x2): Likewise. + +2021-07-23 Jonathan Wright + + * config/aarch64/arm_neon.h (vst1_s64_x3): Use + __builtin_memcpy instead of constructing + __builtin_aarch64_simd_ci one vector at a time. + (vst1_u64_x3): Likewise. + (vst1_f64_x3): Likewise. + (vst1_s8_x3): Likewise. + (vst1_p8_x3): Likewise. + (vst1_s16_x3): Likewise. + (vst1_p16_x3): Likewise. + (vst1_s32_x3): Likewise. + (vst1_u8_x3): Likewise. + (vst1_u16_x3): Likewise. + (vst1_u32_x3): Likewise. + (vst1_f16_x3): Likewise. + (vst1_f32_x3): Likewise. + (vst1_p64_x3): Likewise. + (vst1q_s8_x3): Likewise. + (vst1q_p8_x3): Likewise. + (vst1q_s16_x3): Likewise. + (vst1q_p16_x3): Likewise. + (vst1q_s32_x3): Likewise. + (vst1q_s64_x3): Likewise. + (vst1q_u8_x3): Likewise. + (vst1q_u16_x3): Likewise. + (vst1q_u32_x3): Likewise. + (vst1q_u64_x3): Likewise. + (vst1q_f16_x3): Likewise. + (vst1q_f32_x3): Likewise. + (vst1q_f64_x3): Likewise. + (vst1q_p64_x3): Likewise. + +2021-07-23 H.J. Lu + + PR target/101504 + * config/i386/i386.c (ix86_gen_scratch_sse_rtx): Don't return + hard register when LRA is in progress. + +2021-07-23 Jonathan Wright + + * config/aarch64/arm_neon.h (vst1_s8_x4): Use + __builtin_memcpy instead of using a union. + (vst1q_s8_x4): Likewise. + (vst1_s16_x4): Likewise. + (vst1q_s16_x4): Likewise. + (vst1_s32_x4): Likewise. + (vst1q_s32_x4): Likewise. + (vst1_u8_x4): Likewise. + (vst1q_u8_x4): Likewise. + (vst1_u16_x4): Likewise. + (vst1q_u16_x4): Likewise. + (vst1_u32_x4): Likewise. + (vst1q_u32_x4): Likewise. + (vst1_f16_x4): Likewise. + (vst1q_f16_x4): Likewise. + (vst1_f32_x4): Likewise. + (vst1q_f32_x4): Likewise. + (vst1_p8_x4): Likewise. + (vst1q_p8_x4): Likewise. + (vst1_p16_x4): Likewise. + (vst1q_p16_x4): Likewise. + (vst1_s64_x4): Likewise. + (vst1_u64_x4): Likewise. + (vst1_p64_x4): Likewise. + (vst1q_s64_x4): Likewise. + (vst1q_u64_x4): Likewise. + (vst1q_p64_x4): Likewise. + (vst1_f64_x4): Likewise. + (vst1q_f64_x4): Likewise. + +2021-07-23 Jonathan Wrightt + + * config/aarch64/arm_neon.h (vst2_s64): Use __builtin_memcpy + instead of constructing __builtin_aarch64_simd_oi one vector + at a time. + (vst2_u64): Likewise. + (vst2_f64): Likewise. + (vst2_s8): Likewise. + (vst2_p8): Likewise. + (vst2_s16): Likewise. + (vst2_p16): Likewise. + (vst2_s32): Likewise. + (vst2_u8): Likewise. + (vst2_u16): Likewise. + (vst2_u32): Likewise. + (vst2_f16): Likewise. + (vst2_f32): Likewise. + (vst2_p64): Likewise. + (vst2q_s8): Likewise. + (vst2q_p8): Likewise. + (vst2q_s16): Likewise. + (vst2q_p16): Likewise. + (vst2q_s32): Likewise. + (vst2q_s64): Likewise. + (vst2q_u8): Likewise. + (vst2q_u16): Likewise. + (vst2q_u32): Likewise. + (vst2q_u64): Likewise. + (vst2q_f16): Likewise. + (vst2q_f32): Likewise. + (vst2q_f64): Likewise. + (vst2q_p64): Likewise. + +2021-07-23 Jonathan Wright + + * config/aarch64/arm_neon.h (vst3_s64): Use __builtin_memcpy + instead of constructing __builtin_aarch64_simd_ci one vector + at a time. + (vst3_u64): Likewise. + (vst3_f64): Likewise. + (vst3_s8): Likewise. + (vst3_p8): Likewise. + (vst3_s16): Likewise. + (vst3_p16): Likewise. + (vst3_s32): Likewise. + (vst3_u8): Likewise. + (vst3_u16): Likewise. + (vst3_u32): Likewise. + (vst3_f16): Likewise. + (vst3_f32): Likewise. + (vst3_p64): Likewise. + (vst3q_s8): Likewise. + (vst3q_p8): Likewise. + (vst3q_s16): Likewise. + (vst3q_p16): Likewise. + (vst3q_s32): Likewise. + (vst3q_s64): Likewise. + (vst3q_u8): Likewise. + (vst3q_u16): Likewise. + (vst3q_u32): Likewise. + (vst3q_u64): Likewise. + (vst3q_f16): Likewise. + (vst3q_f32): Likewise. + (vst3q_f64): Likewise. + (vst3q_p64): Likewise. + +2021-07-23 Jonathan Wright + + * config/aarch64/arm_neon.h (vst4_s64): Use __builtin_memcpy + instead of constructing __builtin_aarch64_simd_xi one vector + at a time. + (vst4_u64): Likewise. + (vst4_f64): Likewise. + (vst4_s8): Likewise. + (vst4_p8): Likewise. + (vst4_s16): Likewise. + (vst4_p16): Likewise. + (vst4_s32): Likewise. + (vst4_u8): Likewise. + (vst4_u16): Likewise. + (vst4_u32): Likewise. + (vst4_f16): Likewise. + (vst4_f32): Likewise. + (vst4_p64): Likewise. + (vst4q_s8): Likewise. + (vst4q_p8): Likewise. + (vst4q_s16): Likewise. + (vst4q_p16): Likewise. + (vst4q_s32): Likewise. + (vst4q_s64): Likewise. + (vst4q_u8): Likewise. + (vst4q_u16): Likewise. + (vst4q_u32): Likewise. + (vst4q_u64): Likewise. + (vst4q_f16): Likewise. + (vst4q_f32): Likewise. + (vst4q_f64): Likewise. + (vst4q_p64): Likewise. + +2021-07-23 Jonathan Wright + + * config/aarch64/arm_neon.h (vtbx4_s8): Use __builtin_memcpy + instead of constructing __builtin_aarch64_simd_oi one vector + at a time. + (vtbx4_u8): Likewise. + (vtbx4_p8): Likewise. + +2021-07-23 Jonathan Wright + + * config/aarch64/arm_neon.h (vtbl3_s8): Use __builtin_memcpy + instead of constructing __builtin_aarch64_simd_oi one vector + at a time. + (vtbl3_u8): Likewise. + (vtbl3_p8): Likewise. + (vtbl4_s8): Likewise. + (vtbl4_u8): Likewise. + (vtbl4_p8): Likewise. + +2021-07-23 Jonathan Wright + + * config/aarch64/arm_neon.h (vqtbx2_s8): Use __builtin_memcpy + instead of constructing __builtin_aarch64_simd_oi one vector + at a time. + (vqtbx2_u8): Likewise. + (vqtbx2_p8): Likewise. + (vqtbx2q_s8): Likewise. + (vqtbx2q_u8): Likewise. + (vqtbx2q_p8): Likewise. + (vqtbx3_s8): Use __builtin_memcpy instead of constructing + __builtin_aarch64_simd_ci one vector at a time. + (vqtbx3_u8): Likewise. + (vqtbx3_p8): Likewise. + (vqtbx3q_s8): Likewise. + (vqtbx3q_u8): Likewise. + (vqtbx3q_p8): Likewise. + (vqtbx4_s8): Use __builtin_memcpy instead of constructing + __builtin_aarch64_simd_xi one vector at a time. + (vqtbx4_u8): Likewise. + (vqtbx4_p8): Likewise. + (vqtbx4q_s8): Likewise. + (vqtbx4q_u8): Likewise. + (vqtbx4q_p8): Likewise. + +2021-07-23 Jonathan Wright + + * config/aarch64/arm_neon.h (vqtbl2_s8): Use __builtin_memcpy + instead of constructing __builtin_aarch64_simd_oi one vector + at a time. + (vqtbl2_u8): Likewise. + (vqtbl2_p8): Likewise. + (vqtbl2q_s8): Likewise. + (vqtbl2q_u8): Likewise. + (vqtbl2q_p8): Likewise. + (vqtbl3_s8): Use __builtin_memcpy instead of constructing + __builtin_aarch64_simd_ci one vector at a time. + (vqtbl3_u8): Likewise. + (vqtbl3_p8): Likewise. + (vqtbl3q_s8): Likewise. + (vqtbl3q_u8): Likewise. + (vqtbl3q_p8): Likewise. + (vqtbl4_s8): Use __builtin_memcpy instead of constructing + __builtin_aarch64_simd_xi one vector at a time. + (vqtbl4_u8): Likewise. + (vqtbl4_p8): Likewise. + (vqtbl4q_s8): Likewise. + (vqtbl4q_u8): Likewise. + (vqtbl4q_p8): Likewise. + +2021-07-23 Haochen Gui + + PR target/100952 + * config/rs6000/rs6000.md (cstore4): Fix wrong fall through. + 2021-07-22 Andrew Pinski PR tree-optimization/10153 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index dd60122..3b58862 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210723 +20210724 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 272bf15..fd799e3 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,19 @@ +2021-07-23 David Malcolm + + * diagnostic-manager.cc + (class auto_disable_complexity_checks): New. + (epath_finder::explore_feasible_paths): Use it to disable + complexity checks whilst processing the worklist. + * region-model-manager.cc + (region_model_manager::region_model_manager): Initialize + m_check_complexity. + (region_model_manager::reject_if_too_complex): Bail if + m_check_complexity is false. + * region-model.h + (region_model_manager::enable_complexity_check): New. + (region_model_manager::disable_complexity_check): New. + (region_model_manager::m_check_complexity): New. + 2021-07-21 David Malcolm PR analyzer/101547 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 55f18d9..ce5d70d 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,14 @@ +2021-07-23 Jakub Jelinek + + * c-lex.c (c_common_has_attribute): Call canonicalize_attr_name also + on attr_id. Return 1 for omp::directive or omp::sequence in C++11 + and later. + +2021-07-23 Jakub Jelinek + + * c-pragma.h (enum pragma_kind): Add PRAGMA_OMP__START_ and + PRAGMA_OMP__LAST_ enumerators. + 2021-07-21 Thomas Schwinge Joseph Myers Cesar Philippidis diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 37ea7f5..293f620 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,19 @@ +2021-07-23 Jakub Jelinek + + * parser.h (struct cp_parser): Add omp_attrs_forbidden_p member. + * parser.c (cp_parser_handle_statement_omp_attributes): Diagnose + mixing of attribute and pragma syntax directives when seeing + omp::directive if parser->omp_attrs_forbidden_p or if attribute syntax + directives are followed by OpenMP pragma. + (cp_parser_statement): Clear parser->omp_attrs_forbidden_p after + the cp_parser_handle_statement_omp_attributes call. + (cp_parser_omp_structured_block): Add disallow_omp_attrs argument, + if true, set parser->omp_attrs_forbidden_p. + (cp_parser_omp_scan_loop_body, cp_parser_omp_sections_scope): Pass + false as disallow_omp_attrs to cp_parser_omp_structured_block. + (cp_parser_omp_parallel, cp_parser_omp_task): Set + parser->omp_attrs_forbidden_p. + 2021-07-21 Thomas Schwinge Joseph Myers Cesar Philippidis diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 1c6aa03..e3bf9d6 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,9 @@ +2021-07-23 Harald Anlauf + + PR fortran/101536 + * check.c (array_check): Adjust check for the case of CLASS + arrays. + 2021-07-21 Thomas Schwinge Joseph Myers Cesar Philippidis diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 22ff279..681aefc 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,85 @@ +2021-07-23 Harald Anlauf + + PR fortran/101536 + * gfortran.dg/pr101536.f90: New test. + +2021-07-23 Jakub Jelinek + + PR rtl-optimization/101562 + * gcc.c-torture/compile/pr101562.c: New test. + +2021-07-23 Jonathan Wright + + * gcc.target/aarch64/vector_structure_intrinsics.c: Add new + tests. + +2021-07-23 Jonathan Wright + + * gcc.target/aarch64/vector_structure_intrinsics.c: Add new + tests. + +2021-07-23 H.J. Lu + + PR target/101504 + * gcc.target/i386/pr101504.c: New test. + +2021-07-23 Jonathan Wright + + * gcc.target/aarch64/vector_structure_intrinsics.c: Add new + tests. + +2021-07-23 Jonathan Wright + + * gcc.target/aarch64/vector_structure_intrinsics.c: Add new + tests. + +2021-07-23 Jonathan Wright + + * gcc.target/aarch64/vector_structure_intrinsics.c: Add new + tests. + +2021-07-23 Jonathan Wright + + * gcc.target/aarch64/vector_structure_intrinsics.c: Add new + tests. + +2021-07-23 Jonathan Wright + + * gcc.target/aarch64/vector_structure_intrinsics.c: New tests. + +2021-07-23 Jonathan Wright + + * gcc.target/aarch64/vector_structure_intrinsics.c: New test. + +2021-07-23 Jakub Jelinek + + * c-c++-common/gomp/attrs-1.c: New test. + * c-c++-common/gomp/attrs-2.c: New test. + * c-c++-common/gomp/attrs-3.c: New test. + +2021-07-23 Jakub Jelinek + + * g++.dg/gomp/attrs-4.C: New test. + * g++.dg/gomp/attrs-5.C: New test. + +2021-07-23 Xi Ruoyao + + * gcc.target/mips/mips.exp (mips_option_groups): add + -finline and -fno-inline. + +2021-07-23 Xi Ruoyao + + Revert: + 2021-07-09 Xi Ruoyao + + * gcc.target/mips/cfgcleanup-jalr2.c: Remove -fno-inline and add + __attribute__((noinline)). + * gcc.target/mips/cfgcleanup-jalr3.c: Likewise. + +2021-07-23 David Malcolm + + * gcc.dg/analyzer/feasibility-3.c: New test. + 2021-07-22 Martin Sebor PR tree-optimization/65178 -- cgit v1.1 From 5a957cd388493b4f88724700a0fed5f1d98a7bb8 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sun, 25 Jul 2021 00:16:22 +0000 Subject: Daily bump. --- gcc/DATESTAMP | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 3b58862..16113c0 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210724 +20210725 -- cgit v1.1 From b454c40956947938c9e274d75cef8a43171f3efa Mon Sep 17 00:00:00 2001 From: Arnaud Charlet Date: Sun, 25 Jul 2021 09:23:44 -0400 Subject: [Ada] Declare time_t uniformly based on a system parameter #2 gcc/ada/ * libgnat/s-osprim__x32.adb: Add missing with clause. --- gcc/ada/libgnat/s-osprim__x32.adb | 2 ++ 1 file changed, 2 insertions(+) (limited to 'gcc') diff --git a/gcc/ada/libgnat/s-osprim__x32.adb b/gcc/ada/libgnat/s-osprim__x32.adb index d3c922c..9dc1ba9 100644 --- a/gcc/ada/libgnat/s-osprim__x32.adb +++ b/gcc/ada/libgnat/s-osprim__x32.adb @@ -31,6 +31,8 @@ -- This version is for Linux/x32 +with System.Parameters; + package body System.OS_Primitives is -- ??? These definitions are duplicated from System.OS_Interface -- cgit v1.1 From 124bb55777c280a85d0c72ec13e293a32917a6b9 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Mon, 26 Jul 2021 00:16:23 +0000 Subject: Daily bump. --- gcc/DATESTAMP | 2 +- gcc/ada/ChangeLog | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 16113c0..e8d397bf 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210725 +20210726 diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index a60b02a..254b562 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,7 @@ +2021-07-25 Arnaud Charlet + + * libgnat/s-osprim__x32.adb: Add missing with clause. + 2021-07-12 Pierre-Marie de Rodat * adaint.c (__gnat_number_of_cpus): Replace "#ifdef" by "#if -- cgit v1.1 From acf9d1fd806fabf62dfe232439b11263c191e32d Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 26 Jul 2021 09:13:47 +0200 Subject: openmp: Add support for omp attributes section and scan directives This patch adds support for expressing the section and scan directives using the attribute syntax and additionally fixes some bugs in the attribute syntax directive handling. For now it requires that the scan and section directives appear as the only attribute, not combined with other OpenMP or non-OpenMP attributes on the same statement. 2021-07-26 Jakub Jelinek * parser.h (struct cp_lexer): Add orphan_p member. * parser.c (cp_parser_statement): Don't change in_omp_attribute_pragma upon restart from CPP_PRAGMA handling. Fix up condition when a lexer should be destroyed and adjust saved_tokens if it records tokens from the to be destroyed lexer. (cp_parser_omp_section_scan): New function. (cp_parser_omp_scan_loop_body): Use it. If parser->lexer->in_omp_attribute_pragma, allow optional comma after scan. (cp_parser_omp_sections_scope): Use cp_parser_omp_section_scan. * g++.dg/gomp/attrs-1.C: Use attribute syntax even for section and scan directives. * g++.dg/gomp/attrs-2.C: Likewise. * g++.dg/gomp/attrs-6.C: New test. * g++.dg/gomp/attrs-7.C: New test. * g++.dg/gomp/attrs-8.C: New test. --- gcc/cp/parser.c | 105 ++++++++++++++++++++++++++++++++++-- gcc/cp/parser.h | 4 ++ gcc/testsuite/g++.dg/gomp/attrs-1.C | 12 ++--- gcc/testsuite/g++.dg/gomp/attrs-2.C | 12 ++--- gcc/testsuite/g++.dg/gomp/attrs-6.C | 50 +++++++++++++++++ gcc/testsuite/g++.dg/gomp/attrs-7.C | 64 ++++++++++++++++++++++ gcc/testsuite/g++.dg/gomp/attrs-8.C | 10 ++++ 7 files changed, 240 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/g++.dg/gomp/attrs-6.C create mode 100644 gcc/testsuite/g++.dg/gomp/attrs-7.C create mode 100644 gcc/testsuite/g++.dg/gomp/attrs-8.C (limited to 'gcc') diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 18905cf..976e2e7 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -11901,10 +11901,9 @@ cp_parser_statement (cp_parser* parser, tree in_statement_expr, tree statement, std_attrs = NULL_TREE; cp_token *token; location_t statement_location, attrs_loc; - bool in_omp_attribute_pragma; + bool in_omp_attribute_pragma = parser->lexer->in_omp_attribute_pragma; restart: - in_omp_attribute_pragma = parser->lexer->in_omp_attribute_pragma; if (if_p != NULL) *if_p = false; /* There is no statement yet. */ @@ -11951,6 +11950,7 @@ cp_parser_statement (cp_parser* parser, tree in_statement_expr, the statement. */ cp_parser_label_for_labeled_statement (parser, std_attrs); in_compound = false; + in_omp_attribute_pragma = parser->lexer->in_omp_attribute_pragma; goto restart; case RID_IF: @@ -12034,6 +12034,7 @@ cp_parser_statement (cp_parser* parser, tree in_statement_expr, cp_parser_label_for_labeled_statement (parser, std_attrs); in_compound = false; + in_omp_attribute_pragma = parser->lexer->in_omp_attribute_pragma; goto restart; } } @@ -12058,13 +12059,28 @@ cp_parser_statement (cp_parser* parser, tree in_statement_expr, cp_parser_pragma (parser, pragma_compound, if_p); else if (!cp_parser_pragma (parser, pragma_stmt, if_p)) do_restart = true; - if (lexer->in_omp_attribute_pragma && !in_omp_attribute_pragma) + if (parser->lexer != lexer + && lexer->in_omp_attribute_pragma + && (!in_omp_attribute_pragma || lexer->orphan_p)) { - gcc_assert (parser->lexer != lexer); + if (saved_tokens.lexer == lexer) + { + if (saved_tokens.commit) + cp_lexer_commit_tokens (lexer); + gcc_assert (lexer->saved_tokens.length () == saved_tokens.len); + saved_tokens.lexer = parser->lexer; + saved_tokens.commit = false; + saved_tokens.len = parser->lexer->saved_tokens.length (); + } cp_lexer_destroy (lexer); + lexer = parser->lexer; } if (do_restart) goto restart; + if (parser->lexer == lexer + && lexer->in_omp_attribute_pragma + && !in_omp_attribute_pragma) + parser->lexer->orphan_p = true; return; } else if (token->type == CPP_EOF) @@ -40775,6 +40791,77 @@ cp_finish_omp_range_for (tree orig, tree begin) cp_finish_decomp (decl, decomp_first_name, decomp_cnt); } +/* Return true if next tokens contain a standard attribute that contains + omp::directive (DIRECTIVE). */ + +static bool +cp_parser_omp_section_scan (cp_parser *parser, const char *directive, + bool tentative) +{ + size_t n = cp_parser_skip_attributes_opt (parser, 1), i; + if (n < 10) + return false; + for (i = 5; i < n - 4; i++) + if (cp_lexer_nth_token_is (parser->lexer, i, CPP_NAME) + && cp_lexer_nth_token_is (parser->lexer, i + 1, CPP_OPEN_PAREN) + && cp_lexer_nth_token_is (parser->lexer, i + 2, CPP_NAME)) + { + tree first = cp_lexer_peek_nth_token (parser->lexer, i)->u.value; + tree second = cp_lexer_peek_nth_token (parser->lexer, i + 2)->u.value; + if (strcmp (IDENTIFIER_POINTER (first), "directive")) + continue; + if (strcmp (IDENTIFIER_POINTER (second), directive) == 0) + break; + } + if (i == n - 4) + return false; + cp_parser_parse_tentatively (parser); + location_t first_loc = cp_lexer_peek_token (parser->lexer)->location; + location_t last_loc + = cp_lexer_peek_nth_token (parser->lexer, n - 1)->location; + location_t middle_loc = UNKNOWN_LOCATION; + tree std_attrs = cp_parser_std_attribute_spec_seq (parser); + int cnt = 0; + bool seen = false; + for (tree attr = std_attrs; attr; attr = TREE_CHAIN (attr)) + if (get_attribute_namespace (attr) == omp_identifier + && is_attribute_p ("directive", get_attribute_name (attr))) + { + for (tree a = TREE_VALUE (attr); a; a = TREE_CHAIN (a)) + { + tree d = TREE_VALUE (a); + gcc_assert (TREE_CODE (d) == DEFERRED_PARSE); + cp_token *first = DEFPARSE_TOKENS (d)->first; + cnt++; + if (first->type == CPP_NAME + && strcmp (IDENTIFIER_POINTER (first->u.value), + directive) == 0) + { + seen = true; + if (middle_loc == UNKNOWN_LOCATION) + middle_loc = first->location; + } + } + } + if (!seen || tentative) + { + cp_parser_abort_tentative_parse (parser); + return seen; + } + if (cnt != 1 || TREE_CHAIN (std_attrs)) + { + error_at (make_location (first_loc, last_loc, middle_loc), + "%<[[omp::directive(%s)]]%> must be the only specified " + "attribute on a statement", directive); + cp_parser_abort_tentative_parse (parser); + return false; + } + if (!cp_parser_parse_definitely (parser)) + return false; + cp_parser_handle_statement_omp_attributes (parser, std_attrs); + return true; +} + /* OpenMP 5.0: scan-loop-body: @@ -40793,6 +40880,7 @@ cp_parser_omp_scan_loop_body (cp_parser *parser) substmt = build2 (OMP_SCAN, void_type_node, substmt, NULL_TREE); add_stmt (substmt); + cp_parser_omp_section_scan (parser, "scan", false); cp_token *tok = cp_lexer_peek_token (parser->lexer); if (cp_parser_pragma_kind (tok) == PRAGMA_OMP_SCAN) { @@ -40800,6 +40888,10 @@ cp_parser_omp_scan_loop_body (cp_parser *parser) cp_lexer_consume_token (parser->lexer); + if (parser->lexer->in_omp_attribute_pragma + && cp_lexer_next_token_is (parser->lexer, CPP_COMMA)) + cp_lexer_consume_token (parser->lexer); + if (cp_lexer_next_token_is (parser->lexer, CPP_NAME)) { tree id = cp_lexer_peek_token (parser->lexer)->u.value; @@ -41623,7 +41715,8 @@ cp_parser_omp_sections_scope (cp_parser *parser) stmt = push_stmt_list (); if (cp_parser_pragma_kind (cp_lexer_peek_token (parser->lexer)) - != PRAGMA_OMP_SECTION) + != PRAGMA_OMP_SECTION + && !cp_parser_omp_section_scan (parser, "section", true)) { substmt = cp_parser_omp_structured_block (parser, NULL, false); substmt = build1 (OMP_SECTION, void_type_node, substmt); @@ -41638,6 +41731,8 @@ cp_parser_omp_sections_scope (cp_parser *parser) if (tok->type == CPP_EOF) break; + if (cp_parser_omp_section_scan (parser, "section", false)) + tok = cp_lexer_peek_token (parser->lexer); if (cp_parser_pragma_kind (tok) == PRAGMA_OMP_SECTION) { cp_lexer_consume_token (parser->lexer); diff --git a/gcc/cp/parser.h b/gcc/cp/parser.h index 6fdd214..e62742d 100644 --- a/gcc/cp/parser.h +++ b/gcc/cp/parser.h @@ -117,6 +117,10 @@ struct GTY (()) cp_lexer { /* True if we're in the context of OpenMP directives written as C++11 attributes turned into pragma. */ bool in_omp_attribute_pragma; + + /* True for in_omp_attribute_pragma lexer that should be destroyed + when it is no longer in use. */ + bool orphan_p; }; diff --git a/gcc/testsuite/g++.dg/gomp/attrs-1.C b/gcc/testsuite/g++.dg/gomp/attrs-1.C index c2734a1..6bbdcac 100644 --- a/gcc/testsuite/g++.dg/gomp/attrs-1.C +++ b/gcc/testsuite/g++.dg/gomp/attrs-1.C @@ -146,17 +146,17 @@ bar (int d, int m, int i1, int i2, int i3, int p, int *idp, int s, private (p) firstprivate (f) if (parallel: i2) default(shared) shared(s) copyin(t) reduction(+:r) num_threads (nth) proc_bind(spread) lastprivate (l) allocate (f))]] { - #pragma omp section + [[omp::directive (section)]] {} - #pragma omp section + [[omp::sequence (omp::directive (section))]] {} } [[omp::directive (sections private (p) firstprivate (f) reduction(+:r) lastprivate (l) allocate (f) nowait)]] { ; - #pragma omp section + [[omp::sequence (sequence (directive (section)))]] ; - #pragma omp section + [[omp::directive (section)]] {} } [[omp::directive (barrier)]]; @@ -539,14 +539,14 @@ garply (int a, int *c, int *d, int *e, int *f) for (i = 0; i < 64; i++) { d[i] = a; - #pragma omp scan exclusive (a) + [[omp::directive (scan exclusive (a))]] a += c[i]; } [[omp::directive (simd reduction (inscan, +: a))]] for (i = 0; i < 64; i++) { a += c[i]; - #pragma omp scan inclusive (a) + [[omp::sequence (omp::sequence (omp::directive (scan inclusive (a))))]] d[i] = a; } return a; diff --git a/gcc/testsuite/g++.dg/gomp/attrs-2.C b/gcc/testsuite/g++.dg/gomp/attrs-2.C index 1eb6263..189dc6b 100644 --- a/gcc/testsuite/g++.dg/gomp/attrs-2.C +++ b/gcc/testsuite/g++.dg/gomp/attrs-2.C @@ -146,17 +146,17 @@ bar (int d, int m, int i1, int i2, int i3, int p, int *idp, int s, private (p),firstprivate (f),if (parallel: i2),default(shared),shared(s),copyin(t),reduction(+:r),num_threads (nth),proc_bind(spread), lastprivate (l),allocate (f))]] { - #pragma omp section + [[using omp:directive (section)]] {} - #pragma omp section + [[omp::sequence (omp::directive (section))]] {} } [[omp::directive (sections, private (p),firstprivate (f),reduction(+:r),lastprivate (l),allocate (f),nowait)]] { ; - #pragma omp section + [[omp::sequence (sequence (directive (section)))]] ; - #pragma omp section + [[omp::directive (section)]] {} } [[omp::directive (barrier)]]; @@ -539,14 +539,14 @@ garply (int a, int *c, int *d, int *e, int *f) for (i = 0; i < 64; i++) { d[i] = a; - #pragma omp scan exclusive (a) + [[omp::directive (scan, exclusive (a))]] a += c[i]; } [[omp::directive (simd, reduction (inscan, +: a))]] for (i = 0; i < 64; i++) { a += c[i]; - #pragma omp scan inclusive (a) + [[using omp : sequence (sequence (directive (scan inclusive (a))))]] d[i] = a; } return a; diff --git a/gcc/testsuite/g++.dg/gomp/attrs-6.C b/gcc/testsuite/g++.dg/gomp/attrs-6.C new file mode 100644 index 0000000..30b47e1 --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/attrs-6.C @@ -0,0 +1,50 @@ +// { dg-do compile { target c++11 } } + +void +foo () +{ + int a[10] = {}; + #pragma omp parallel sections + { + #pragma omp section + a[0]++; + [[omp::directive (section)]] { + a[1]++; + } [[omp::directive (section)]] + a[2]++; + #pragma omp section + { a[3]++; } + } + [[omp::directive (parallel sections)]] + { + #pragma omp section + a[0]++; + [[omp::directive (section)]] { + a[1]++; + } [[omp::directive (section)]] + a[2]++; + #pragma omp section + { a[3]++; } + } +} + +int +bar (int a, int *c, int *d, int *e, int *f) +{ + int i; + #pragma omp simd reduction (inscan, +: a) + for (i = 0; i < 64; i++) + { + d[i] = a; + [[omp::directive (scan, exclusive (a))]] + a += c[i]; + } + [[omp::directive (simd reduction (inscan, +: a))]] + for (i = 0; i < 64; i++) + { + a += c[i]; + #pragma omp scan inclusive (a) + d[i] = a; + } + return a; +} diff --git a/gcc/testsuite/g++.dg/gomp/attrs-7.C b/gcc/testsuite/g++.dg/gomp/attrs-7.C new file mode 100644 index 0000000..598c32a --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/attrs-7.C @@ -0,0 +1,64 @@ +// { dg-do compile { target c++11 } } + +void +foo () +{ + + [[omp::directive (parallel sections)]] + { + [[omp::directive (parallel)]]; + [[omp::sequence (directive (section), directive (flush))]]; // { dg-error "must be the only specified attribute on a statement" } + // { dg-error "#pragma omp section" "" { target *-*-* } .-1 } + // { dg-error "#pragma omp flush" "" { target *-*-* } .-2 } + [[omp::sequence (directive (flush), omp::directive (section))]]; // { dg-error "must be the only specified attribute on a statement" } + // { dg-error "#pragma omp section" "" { target *-*-* } .-1 } + // { dg-error "#pragma omp flush" "" { target *-*-* } .-2 } + [[gnu::cold, omp::directive (section)]]; // { dg-error "must be the only specified attribute on a statement" } + // { dg-error "#pragma omp section" "" { target *-*-* } .-1 } + [[omp::directive (section)]] [[gnu::cold]]; // { dg-error "must be the only specified attribute on a statement" } + // { dg-error "#pragma omp section" "" { target *-*-* } .-1 } + [[omp::directive (section foo)]]; // { dg-error "expected end of line before 'foo'" } + } +} + +int +bar (int a, int *c, int *d, int *e, int *f) +{ + int i; + [[omp::directive (parallel for reduction (inscan, +: a))]] // { dg-error "'a' specified in 'inscan' 'reduction' clause but not in 'scan' directive clause" } + for (i = 0; i < 64; i++) + { + d[i] = a; + [[omp::sequence (omp::directive (parallel), omp::directive (scan, exclusive (a)))]] // { dg-error "must be the only specified attribute on a statement" } + a += c[i]; // { dg-error "#pragma omp scan" "" { target *-*-* } .-1 } + } + [[omp::directive (parallel for reduction (inscan, +: a))]] // { dg-error "'a' specified in 'inscan' 'reduction' clause but not in 'scan' directive clause" } + for (i = 0; i < 64; i++) + { + a += c[i]; + [[omp::sequence (directive (scan inclusive (a)), directive (critical))]] // { dg-error "must be the only specified attribute on a statement" } + d[i] = a; // { dg-error "#pragma omp scan" "" { target *-*-* } .-1 } + } + [[omp::directive (parallel for reduction (inscan, +: a))]] // { dg-error "'a' specified in 'inscan' 'reduction' clause but not in 'scan' directive clause" } + for (i = 0; i < 64; i++) + { + d[i] = a; + [[gnu::cold]] [[omp::directive (scan, exclusive (a))]] // { dg-error "must be the only specified attribute on a statement" } + a += c[i]; // { dg-error "#pragma omp scan" "" { target *-*-* } .-1 } + } + [[omp::directive (parallel for reduction (inscan, +: a))]] // { dg-error "'a' specified in 'inscan' 'reduction' clause but not in 'scan' directive clause" } + for (i = 0; i < 64; i++) + { + d[i] = a; + [[omp::directive (scan, exclusive (a)), gnu::cold]] // { dg-error "must be the only specified attribute on a statement" } + a += c[i]; // { dg-error "#pragma omp scan" "" { target *-*-* } .-1 } + } + [[omp::directive (parallel for reduction (inscan, +: a))]] // { dg-error "'a' specified in 'inscan' 'reduction' clause but not in 'scan' directive clause" } + for (i = 0; i < 64; i++) + { + d[i] = a; + [[omp::directive (scan)]] // { dg-error "expected 'inclusive' or 'exclusive' clause before end of line" } + a += c[i]; + } + return a; +} diff --git a/gcc/testsuite/g++.dg/gomp/attrs-8.C b/gcc/testsuite/g++.dg/gomp/attrs-8.C new file mode 100644 index 0000000..30cfe99 --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/attrs-8.C @@ -0,0 +1,10 @@ +// { dg-do compile { target c++11 } } + +void +foo () +{ + // Unsure if this shouldn't be invalid, whether we shouldn't require + // that each standalone directive sits on its own empty statement. + [[omp::sequence (omp::directive (barrier), omp::directive (barrier))]]; + [[omp::sequence (omp::directive (taskyield), omp::directive (taskwait))]]; +} -- cgit v1.1 From 2050ac1a547eebe7de4af98b57429a934e75fff4 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Mon, 26 Jul 2021 10:22:23 +0100 Subject: AArch64: correct usdot vectorizer and intrinsics optabs There's a slight mismatch between the vectorizer optabs and the intrinsics patterns for NEON. The vectorizer expects operands[3] and operands[0] to be the same but the aarch64 intrinsics expanders expect operands[0] and operands[1] to be the same. This means we need different patterns here. This adds a separate usdot vectorizer pattern which just shuffles around the RTL params. There's also an inconsistency between the usdot and (u|s)dot intrinsics RTL patterns which is not corrected here. gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (TYPES_TERNOP_SUSS, aarch64_types_ternop_suss_qualifiers): New. * config/aarch64/aarch64-simd-builtins.def (usdot_prod): Use it. * config/aarch64/aarch64-simd.md (usdot_prod): Re-organize RTL. * config/aarch64/arm_neon.h (vusdot_s32, vusdotq_s32): Use it. --- gcc/config/aarch64/aarch64-builtins.c | 4 ++++ gcc/config/aarch64/aarch64-simd-builtins.def | 2 +- gcc/config/aarch64/aarch64-simd.md | 28 ++++++++++++++-------------- gcc/config/aarch64/arm_neon.h | 4 ++-- 4 files changed, 21 insertions(+), 17 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 9ed4b72..f6b41d9 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -209,6 +209,10 @@ static enum aarch64_type_qualifiers aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none }; #define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_ternop_suss_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_unsigned, qualifier_none, qualifier_none }; +#define TYPES_TERNOP_SUSS (aarch64_types_ternop_suss_qualifiers) static enum aarch64_type_qualifiers diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index b7f1237..3bb45a8 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -377,7 +377,7 @@ /* Implemented by _prod. */ BUILTIN_VB (TERNOP, sdot, 0, NONE) BUILTIN_VB (TERNOPU, udot, 0, NONE) - BUILTIN_VB (TERNOP_SSUS, usdot_prod, 10, NONE) + BUILTIN_VB (TERNOP_SUSS, usdot_prod, 10, NONE) /* Implemented by aarch64__lane{q}. */ BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE) BUILTIN_VB (QUADOPU_LANE, udot_lane, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 7332a73..bf667b9 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -599,20 +599,6 @@ [(set_attr "type" "neon_dot")] ) -;; These instructions map to the __builtins for the armv8.6a I8MM usdot -;; (vector) Dot Product operation. -(define_insn "usdot_prod" - [(set (match_operand:VS 0 "register_operand" "=w") - (plus:VS - (unspec:VS [(match_operand: 2 "register_operand" "w") - (match_operand: 3 "register_operand" "w")] - UNSPEC_USDOT) - (match_operand:VS 1 "register_operand" "0")))] - "TARGET_I8MM" - "usdot\\t%0., %2., %3." - [(set_attr "type" "neon_dot")] -) - ;; These expands map to the Dot Product optab the vectorizer checks for. ;; The auto-vectorizer expects a dot product builtin that also does an ;; accumulation into the provided register. @@ -648,6 +634,20 @@ DONE; }) +;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot +;; (vector) Dot Product operation and the vectorized optab. +(define_insn "usdot_prod" + [(set (match_operand:VS 0 "register_operand" "=w") + (plus:VS + (unspec:VS [(match_operand: 1 "register_operand" "w") + (match_operand: 2 "register_operand" "w")] + UNSPEC_USDOT) + (match_operand:VS 3 "register_operand" "0")))] + "TARGET_I8MM" + "usdot\\t%0., %1., %2." + [(set_attr "type" "neon_dot")] +) + ;; These instructions map to the __builtins for the Dot Product ;; indexed operations. (define_insn "aarch64_dot_lane" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 7523974..0f43994 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -33744,14 +33744,14 @@ __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b) { - return __builtin_aarch64_usdot_prodv8qi_ssus (__r, __a, __b); + return __builtin_aarch64_usdot_prodv8qi_suss (__a, __b, __r); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b) { - return __builtin_aarch64_usdot_prodv16qi_ssus (__r, __a, __b); + return __builtin_aarch64_usdot_prodv16qi_suss (__a, __b, __r); } __extension__ extern __inline int32x2_t -- cgit v1.1 From 1ab2270036dc0f2a13442ce682267bc7433ffb34 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Mon, 26 Jul 2021 10:23:21 +0100 Subject: AArch64: correct dot-product RTL patterns for aarch64. The previous fix for this problem was wrong due to a subtle difference between where NEON expects the RMW values and where intrinsics expects them. The insn pattern is modeled after the intrinsics and so needs an expand for the vectorizer optab to switch the RTL. However operand[3] is not expected to be written to so the current pattern is bogus. Instead I rewrite the RTL to be in canonical ordering and merge them. gcc/ChangeLog: * config/aarch64/aarch64-simd-builtins.def (sdot, udot): Rename to.. (sdot_prod, udot_prod): ... This. * config/aarch64/aarch64-simd.md (aarch64_dot): Merged into... (dot_prod): ... this. (aarch64_dot_lane, aarch64_dot_laneq): Change operands order. (sadv16qi): Use new operands order. * config/aarch64/arm_neon.h (vdot_u32, vdotq_u32, vdot_s32, vdotq_s32): Use new RTL ordering. --- gcc/config/aarch64/aarch64-simd-builtins.def | 4 +- gcc/config/aarch64/aarch64-simd.md | 63 +++++++++++----------------- gcc/config/aarch64/arm_neon.h | 8 ++-- 3 files changed, 31 insertions(+), 44 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 3bb45a8..402453a 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -375,8 +375,8 @@ BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0, NONE) /* Implemented by _prod. */ - BUILTIN_VB (TERNOP, sdot, 0, NONE) - BUILTIN_VB (TERNOPU, udot, 0, NONE) + BUILTIN_VB (TERNOP, sdot_prod, 10, NONE) + BUILTIN_VB (TERNOPU, udot_prod, 10, NONE) BUILTIN_VB (TERNOP_SUSS, usdot_prod, 10, NONE) /* Implemented by aarch64__lane{q}. */ BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index bf667b9..13c8698 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -587,19 +587,8 @@ DONE; }) -;; These instructions map to the __builtins for the Dot Product operations. -(define_insn "aarch64_dot" - [(set (match_operand:VS 0 "register_operand" "=w") - (plus:VS (match_operand:VS 1 "register_operand" "0") - (unspec:VS [(match_operand: 2 "register_operand" "w") - (match_operand: 3 "register_operand" "w")] - DOTPROD)))] - "TARGET_DOTPROD" - "dot\\t%0., %2., %3." - [(set_attr "type" "neon_dot")] -) - -;; These expands map to the Dot Product optab the vectorizer checks for. +;; These expands map to the Dot Product optab the vectorizer checks for +;; and to the intrinsics patttern. ;; The auto-vectorizer expects a dot product builtin that also does an ;; accumulation into the provided register. ;; Given the following pattern @@ -619,20 +608,17 @@ ;; ... ;; ;; and so the vectorizer provides r, in which the result has to be accumulated. -(define_expand "dot_prod" - [(set (match_operand:VS 0 "register_operand") - (plus:VS (unspec:VS [(match_operand: 1 "register_operand") - (match_operand: 2 "register_operand")] - DOTPROD) - (match_operand:VS 3 "register_operand")))] +(define_insn "dot_prod" + [(set (match_operand:VS 0 "register_operand" "=w") + (plus:VS + (unspec:VS [(match_operand: 1 "register_operand" "w") + (match_operand: 2 "register_operand" "w")] + DOTPROD) + (match_operand:VS 3 "register_operand" "0")))] "TARGET_DOTPROD" -{ - emit_insn ( - gen_aarch64_dot (operands[3], operands[3], operands[1], - operands[2])); - emit_insn (gen_rtx_SET (operands[0], operands[3])); - DONE; -}) + "dot\\t%0., %1., %2." + [(set_attr "type" "neon_dot")] +) ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot ;; (vector) Dot Product operation and the vectorized optab. @@ -652,11 +638,12 @@ ;; indexed operations. (define_insn "aarch64_dot_lane" [(set (match_operand:VS 0 "register_operand" "=w") - (plus:VS (match_operand:VS 1 "register_operand" "0") - (unspec:VS [(match_operand: 2 "register_operand" "w") - (match_operand:V8QI 3 "register_operand" "") - (match_operand:SI 4 "immediate_operand" "i")] - DOTPROD)))] + (plus:VS + (unspec:VS [(match_operand: 2 "register_operand" "w") + (match_operand:V8QI 3 "register_operand" "") + (match_operand:SI 4 "immediate_operand" "i")] + DOTPROD) + (match_operand:VS 1 "register_operand" "0")))] "TARGET_DOTPROD" { operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4])); @@ -667,11 +654,12 @@ (define_insn "aarch64_dot_laneq" [(set (match_operand:VS 0 "register_operand" "=w") - (plus:VS (match_operand:VS 1 "register_operand" "0") - (unspec:VS [(match_operand: 2 "register_operand" "w") - (match_operand:V16QI 3 "register_operand" "") - (match_operand:SI 4 "immediate_operand" "i")] - DOTPROD)))] + (plus:VS + (unspec:VS [(match_operand: 2 "register_operand" "w") + (match_operand:V16QI 3 "register_operand" "") + (match_operand:SI 4 "immediate_operand" "i")] + DOTPROD) + (match_operand:VS 1 "register_operand" "0")))] "TARGET_DOTPROD" { operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4])); @@ -944,8 +932,7 @@ rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode)); rtx abd = gen_reg_rtx (V16QImode); emit_insn (gen_aarch64_abdv16qi (abd, operands[1], operands[2])); - emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3], - abd, ones)); + emit_insn (gen_udot_prodv16qi (operands[0], abd, ones, operands[3])); DONE; } rtx reduc = gen_reg_rtx (V8HImode); diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 0f43994..313b35f 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -31472,28 +31472,28 @@ __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdot_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b) { - return __builtin_aarch64_udotv8qi_uuuu (__r, __a, __b); + return __builtin_aarch64_udot_prodv8qi_uuuu (__a, __b, __r); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdotq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b) { - return __builtin_aarch64_udotv16qi_uuuu (__r, __a, __b); + return __builtin_aarch64_udot_prodv16qi_uuuu (__a, __b, __r); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdot_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b) { - return __builtin_aarch64_sdotv8qi (__r, __a, __b); + return __builtin_aarch64_sdot_prodv8qi (__a, __b, __r); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vdotq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b) { - return __builtin_aarch64_sdotv16qi (__r, __a, __b); + return __builtin_aarch64_sdot_prodv16qi (__a, __b, __r); } __extension__ extern __inline uint32x2_t -- cgit v1.1 From dd44445f09bcf92198e9238a28bf026959152be1 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 26 Jul 2021 09:47:42 +0200 Subject: Pass gimple context to array_bounds_checker. I have changed the use of the array_bounds_checker in VRP to use a ranger in my local tree to make sure there are no regressions when using either VRP or the ranger. In doing so I noticed that the checker does not pass context to get_value_range, which causes the ranger to miss a few cases. This patch fixes the oversight. Tested on x86-64 Linux using the array bounds checker both with VRP and the ranger. gcc/ChangeLog: * gimple-array-bounds.cc (array_bounds_checker::get_value_range): Add gimple argument. (array_bounds_checker::check_array_ref): Same. (array_bounds_checker::check_addr_expr): Same. (array_bounds_checker::check_array_bounds): Pass statement to check_array_bounds and check_addr_expr. * gimple-array-bounds.h (check_array_bounds): Add gimple argument. (check_addr_expr): Same. (get_value_range): Same. --- gcc/gimple-array-bounds.cc | 17 +++++++++-------- gcc/gimple-array-bounds.h | 6 +++--- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-array-bounds.cc b/gcc/gimple-array-bounds.cc index 8dfd6f9..598c76b 100644 --- a/gcc/gimple-array-bounds.cc +++ b/gcc/gimple-array-bounds.cc @@ -43,9 +43,9 @@ along with GCC; see the file COPYING3. If not see // break the dependency on equivalences for this pass. const value_range * -array_bounds_checker::get_value_range (const_tree op) +array_bounds_checker::get_value_range (const_tree op, gimple *stmt) { - return ranges->get_value_range (op); + return ranges->get_value_range (op, stmt); } /* Try to determine the DECL that REF refers to. Return the DECL or @@ -173,7 +173,7 @@ trailing_array (tree arg, tree *pref) bool array_bounds_checker::check_array_ref (location_t location, tree ref, - bool ignore_off_by_one) + gimple *stmt, bool ignore_off_by_one) { if (warning_suppressed_p (ref, OPT_Warray_bounds)) /* Return true to have the caller prevent warnings for enclosing @@ -287,7 +287,7 @@ array_bounds_checker::check_array_ref (location_t location, tree ref, const value_range *vr = NULL; if (TREE_CODE (low_sub) == SSA_NAME) { - vr = get_value_range (low_sub); + vr = get_value_range (low_sub, stmt); if (!vr->undefined_p () && !vr->varying_p ()) { low_sub = vr->kind () == VR_RANGE ? vr->max () : vr->min (); @@ -563,7 +563,8 @@ array_bounds_checker::check_mem_ref (location_t location, tree ref, address of an ARRAY_REF, and call check_array_ref on it. */ void -array_bounds_checker::check_addr_expr (location_t location, tree t) +array_bounds_checker::check_addr_expr (location_t location, tree t, + gimple *stmt) { /* For the most significant subscript only, accept taking the address of the just-past-the-end element. */ @@ -575,7 +576,7 @@ array_bounds_checker::check_addr_expr (location_t location, tree t) bool warned = false; if (TREE_CODE (t) == ARRAY_REF) { - warned = check_array_ref (location, t, ignore_off_by_one); + warned = check_array_ref (location, t, stmt, ignore_off_by_one); ignore_off_by_one = false; } else if (TREE_CODE (t) == MEM_REF) @@ -728,14 +729,14 @@ array_bounds_checker::check_array_bounds (tree *tp, int *walk_subtree, bool warned = false; array_bounds_checker *checker = (array_bounds_checker *) wi->info; if (TREE_CODE (t) == ARRAY_REF) - warned = checker->check_array_ref (location, t, + warned = checker->check_array_ref (location, t, wi->stmt, false/*ignore_off_by_one*/); else if (TREE_CODE (t) == MEM_REF) warned = checker->check_mem_ref (location, t, false /*ignore_off_by_one*/); else if (TREE_CODE (t) == ADDR_EXPR) { - checker->check_addr_expr (location, t); + checker->check_addr_expr (location, t, wi->stmt); *walk_subtree = false; } else if (inbounds_memaccess_p (t)) diff --git a/gcc/gimple-array-bounds.h b/gcc/gimple-array-bounds.h index fa64262..d8f7ff7 100644 --- a/gcc/gimple-array-bounds.h +++ b/gcc/gimple-array-bounds.h @@ -31,10 +31,10 @@ public: private: static tree check_array_bounds (tree *tp, int *walk_subtree, void *data); - bool check_array_ref (location_t, tree, bool ignore_off_by_one); + bool check_array_ref (location_t, tree, gimple *, bool ignore_off_by_one); bool check_mem_ref (location_t, tree, bool ignore_off_by_one); - void check_addr_expr (location_t, tree); - const value_range *get_value_range (const_tree op); + void check_addr_expr (location_t, tree, gimple *); + const value_range *get_value_range (const_tree op, gimple *); struct function *fun; range_query *ranges; -- cgit v1.1 From 32f7506bdc3956762bcc7dc84133fd7c3a00bb7b Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 26 Jul 2021 11:53:41 +0200 Subject: Abstract out conditional simplification out of execute_vrp. VRP simplifies conditionals involving casted values outside of the main folding mechanism, because this optimization inhibits the VRP jump threader from threading through the comparison. As part of replacing VRP with an evrp instance, I am making sure we do everything VRP does. Hence, I am abstracting this functionality out so we can call it from from elsewhere. ISTM that when the proposed ranger-based jump threader can handle everything the forward threader does, there will be no need for this optimization to be done outside of the evrp folder. Perhaps we can fold this into the substitute_using_ranges class. But that's further down the line. Also, there is no need to pass a vr_values around, when the base range_query class will do. I fixed this, at it makes it trivial to pass down a ranger or evrp instance. Tested on x86-64 Linux. gcc/ChangeLog: * tree-vrp.c (vrp_simplify_cond_using_ranges): Rename vr_values with range_query. (execute_vrp): Abstract out simplification of conditionals... (simplify_casted_conds): ...here. --- gcc/tree-vrp.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c index a9c31bc..58111f8 100644 --- a/gcc/tree-vrp.c +++ b/gcc/tree-vrp.c @@ -4359,7 +4359,7 @@ vrp_jump_threader::after_dom_children (basic_block bb) subsequent passes. */ static void -vrp_simplify_cond_using_ranges (vr_values *query, gcond *stmt) +vrp_simplify_cond_using_ranges (range_query *query, gcond *stmt) { tree op0 = gimple_cond_lhs (stmt); tree op1 = gimple_cond_rhs (stmt); @@ -4423,6 +4423,27 @@ vrp_simplify_cond_using_ranges (vr_values *query, gcond *stmt) } } +/* A comparison of an SSA_NAME against a constant where the SSA_NAME + was set by a type conversion can often be rewritten to use the RHS + of the type conversion. Do this optimization for all conditionals + in FUN. + + However, doing so inhibits jump threading through the comparison. + So that transformation is not performed until after jump threading + is complete. */ + +static void +simplify_casted_conds (function *fun, range_query *query) +{ + basic_block bb; + FOR_EACH_BB_FN (bb, fun) + { + gimple *last = last_stmt (bb); + if (last && gimple_code (last) == GIMPLE_COND) + vrp_simplify_cond_using_ranges (query, as_a (last)); + } +} + /* Main entry point to VRP (Value Range Propagation). This pass is loosely based on J. R. C. Patterson, ``Accurate Static Branch Prediction by Value Range Propagation,'' in SIGPLAN Conference on @@ -4519,21 +4540,7 @@ execute_vrp (struct function *fun, bool warn_array_bounds_p) vrp_jump_threader threader (fun, &vrp_vr_values); threader.thread_jumps (); - /* A comparison of an SSA_NAME against a constant where the SSA_NAME - was set by a type conversion can often be rewritten to use the - RHS of the type conversion. - - However, doing so inhibits jump threading through the comparison. - So that transformation is not performed until after jump threading - is complete. */ - basic_block bb; - FOR_EACH_BB_FN (bb, fun) - { - gimple *last = last_stmt (bb); - if (last && gimple_code (last) == GIMPLE_COND) - vrp_simplify_cond_using_ranges (&vrp_vr_values, - as_a (last)); - } + simplify_casted_conds (fun, &vrp_vr_values); free_numbers_of_iterations_estimates (fun); -- cgit v1.1 From 0cbf03689e3e7d9d6002b8e5d159ef3716d0404c Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Mon, 26 Jul 2021 14:20:46 +0200 Subject: PR fortran/93308/93963/94327/94331/97046 problems raised by descriptor handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fortran: Fix attributes and bounds in ISO_Fortran_binding. 2021-07-26 José Rui Faustino de Sousa Tobias Burnus PR fortran/93308 PR fortran/93963 PR fortran/94327 PR fortran/94331 PR fortran/97046 gcc/fortran/ChangeLog: * trans-decl.c (convert_CFI_desc): Only copy out the descriptor if necessary. * trans-expr.c (gfc_conv_gfc_desc_to_cfi_desc): Updated attribute handling which reflect a previous intermediate version of the standard. Only copy out the descriptor if necessary. libgfortran/ChangeLog: * runtime/ISO_Fortran_binding.c (cfi_desc_to_gfc_desc): Add code to verify the descriptor. Correct bounds calculation. (gfc_desc_to_cfi_desc): Add code to verify the descriptor. gcc/testsuite/ChangeLog: * gfortran.dg/ISO_Fortran_binding_1.f90: Add pointer attribute, this test is still erroneous but now it compiles. * gfortran.dg/bind_c_array_params_2.f90: Update regex to match code changes. * gfortran.dg/PR93308.f90: New test. * gfortran.dg/PR93963.f90: New test. * gfortran.dg/PR94327.c: New test. * gfortran.dg/PR94327.f90: New test. * gfortran.dg/PR94331.c: New test. * gfortran.dg/PR94331.f90: New test. * gfortran.dg/PR97046.f90: New test. --- gcc/fortran/trans-decl.c | 32 +-- gcc/fortran/trans-expr.c | 24 +- .../gfortran.dg/ISO_Fortran_binding_1.f90 | 2 +- gcc/testsuite/gfortran.dg/PR93308.f90 | 52 +++++ gcc/testsuite/gfortran.dg/PR93963.f90 | 150 ++++++++++++ gcc/testsuite/gfortran.dg/PR94327.c | 70 ++++++ gcc/testsuite/gfortran.dg/PR94327.f90 | 195 ++++++++++++++++ gcc/testsuite/gfortran.dg/PR94331.c | 73 ++++++ gcc/testsuite/gfortran.dg/PR94331.f90 | 252 +++++++++++++++++++++ gcc/testsuite/gfortran.dg/PR97046.f90 | 58 +++++ .../gfortran.dg/bind_c_array_params_2.f90 | 2 +- 11 files changed, 885 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/PR93308.f90 create mode 100644 gcc/testsuite/gfortran.dg/PR93963.f90 create mode 100644 gcc/testsuite/gfortran.dg/PR94327.c create mode 100644 gcc/testsuite/gfortran.dg/PR94327.f90 create mode 100644 gcc/testsuite/gfortran.dg/PR94331.c create mode 100644 gcc/testsuite/gfortran.dg/PR94331.f90 create mode 100644 gcc/testsuite/gfortran.dg/PR97046.f90 (limited to 'gcc') diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index bf8783a..784f7b6 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -4539,22 +4539,28 @@ convert_CFI_desc (gfc_wrapped_block * block, gfc_symbol *sym) gfc_add_expr_to_block (&outer_block, incoming); incoming = gfc_finish_block (&outer_block); - /* Convert the gfc descriptor back to the CFI type before going out of scope, if the CFI type was present at entry. */ - gfc_init_block (&outer_block); - gfc_init_block (&tmpblock); - - tmp = gfc_build_addr_expr (ppvoid_type_node, CFI_desc_ptr); - outgoing = build_call_expr_loc (input_location, - gfor_fndecl_gfc_to_cfi, 2, tmp, gfc_desc_ptr); - gfc_add_expr_to_block (&tmpblock, outgoing); + outgoing = NULL_TREE; + if ((sym->attr.pointer || sym->attr.allocatable) + && !sym->attr.value + && sym->attr.intent != INTENT_IN) + { + gfc_init_block (&outer_block); + gfc_init_block (&tmpblock); - outgoing = build3_v (COND_EXPR, present, - gfc_finish_block (&tmpblock), - build_empty_stmt (input_location)); - gfc_add_expr_to_block (&outer_block, outgoing); - outgoing = gfc_finish_block (&outer_block); + tmp = gfc_build_addr_expr (ppvoid_type_node, CFI_desc_ptr); + outgoing = build_call_expr_loc (input_location, + gfor_fndecl_gfc_to_cfi, 2, + tmp, gfc_desc_ptr); + gfc_add_expr_to_block (&tmpblock, outgoing); + + outgoing = build3_v (COND_EXPR, present, + gfc_finish_block (&tmpblock), + build_empty_stmt (input_location)); + gfc_add_expr_to_block (&outer_block, outgoing); + outgoing = gfc_finish_block (&outer_block); + } /* Add the lot to the procedure init and finally blocks. */ gfc_add_init_cleanup (block, incoming, outgoing); diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index b18a9ec..c4291cc 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -5502,13 +5502,12 @@ gfc_conv_gfc_desc_to_cfi_desc (gfc_se *parmse, gfc_expr *e, gfc_symbol *fsym) attribute = 1; } - /* If the formal argument is assumed shape and neither a pointer nor - allocatable, it is unconditionally CFI_attribute_other. */ - if (fsym->as->type == AS_ASSUMED_SHAPE - && !fsym->attr.pointer && !fsym->attr.allocatable) - cfi_attribute = 2; + if (fsym->attr.pointer) + cfi_attribute = 0; + else if (fsym->attr.allocatable) + cfi_attribute = 1; else - cfi_attribute = attribute; + cfi_attribute = 2; if (e->rank != 0) { @@ -5616,10 +5615,15 @@ gfc_conv_gfc_desc_to_cfi_desc (gfc_se *parmse, gfc_expr *e, gfc_symbol *fsym) gfc_prepend_expr_to_block (&parmse->post, tmp); /* Transfer values back to gfc descriptor. */ - tmp = gfc_build_addr_expr (NULL_TREE, parmse->expr); - tmp = build_call_expr_loc (input_location, - gfor_fndecl_cfi_to_gfc, 2, gfc_desc_ptr, tmp); - gfc_prepend_expr_to_block (&parmse->post, tmp); + if (cfi_attribute != 2 /* CFI_attribute_other. */ + && !fsym->attr.value + && fsym->attr.intent != INTENT_IN) + { + tmp = gfc_build_addr_expr (NULL_TREE, parmse->expr); + tmp = build_call_expr_loc (input_location, + gfor_fndecl_cfi_to_gfc, 2, gfc_desc_ptr, tmp); + gfc_prepend_expr_to_block (&parmse->post, tmp); + } /* Deal with an optional dummy being passed to an optional formal arg by finishing the pre and post blocks and making their execution diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_1.f90 b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_1.f90 index 102bc60..0cf3b2c 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_1.f90 +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_1.f90 @@ -39,7 +39,7 @@ USE, INTRINSIC :: ISO_C_BINDING import INTEGER(C_INT) :: err - type (T), DIMENSION(..), intent(out) :: a + type (T), pointer, DIMENSION(..), intent(out) :: a END FUNCTION c_establish FUNCTION c_contiguous(a) BIND(C, NAME="contiguous_c") RESULT(err) diff --git a/gcc/testsuite/gfortran.dg/PR93308.f90 b/gcc/testsuite/gfortran.dg/PR93308.f90 new file mode 100644 index 0000000..ee116f9 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/PR93308.f90 @@ -0,0 +1,52 @@ +! { dg-do run } +! +! Test the fix for PR94331 +! +! Contributed by Robin Hogan +! + +program test + + use, intrinsic :: iso_c_binding, only: & + c_int, c_float + + implicit none + + integer :: i + integer, parameter :: n = 11 + real(kind=c_float), parameter :: u(*) = [(real(i, kind=c_float), i=1,n)] + + real(kind=c_float), allocatable :: A(:) + real(kind=c_float) :: E(n) + integer(kind=c_int) :: l1, l2, l3 + + allocate(A, source=u) + l1 = lbound(A, 1) + call routine_bindc(A, l2) ! in gcc-9.2.1 this changes lbound of A... + l3 = lbound(A, 1) + if (l1 /= 1) stop 1 + if (l1 /= l2) stop 2 + if (l1 /= l3) stop 3 + if (any(abs(A(1:n)-u)>0.0_c_float)) stop 4 + deallocate(A) + ! + E = u + l1 = lbound(E, 1) + call routine_bindc(E, l2) ! ...but does not change lbound of E + l3 = lbound(E, 1) + if (l1 /= 1) stop 5 + if (l1 /= l2) stop 6 + if (l1 /= l3) stop 7 + if (any(abs(E(1:n)-u)>0.0_c_float)) stop 8 + +contains + + subroutine routine_bindc(v, l) bind(c) + real(kind=c_float), intent(inout) :: v(:) + integer(kind=c_int), intent(out) :: l + + l = lbound(v, 1) + if (any(abs(v(1:n)-u)>0.0_c_float)) stop 9 + end subroutine routine_bindc + +end program test diff --git a/gcc/testsuite/gfortran.dg/PR93963.f90 b/gcc/testsuite/gfortran.dg/PR93963.f90 new file mode 100644 index 0000000..4e1b06f --- /dev/null +++ b/gcc/testsuite/gfortran.dg/PR93963.f90 @@ -0,0 +1,150 @@ +! { dg-do run } +! +! Test the fix for PR93963 +! + +function rank_p(this) result(rnk) bind(c) + use, intrinsic :: iso_c_binding, only: c_int + + implicit none + + integer(kind=c_int), pointer, intent(in) :: this(..) + integer(kind=c_int) :: rnk + + select rank(this) + rank(0) + rnk = 0 + rank(1) + rnk = 1 + rank(2) + rnk = 2 + rank(3) + rnk = 3 + rank(4) + rnk = 4 + rank(5) + rnk = 5 + rank(6) + rnk = 6 + rank(7) + rnk = 7 + rank(8) + rnk = 8 + rank(9) + rnk = 9 + rank(10) + rnk = 10 + rank(11) + rnk = 11 + rank(12) + rnk = 12 + rank(13) + rnk = 13 + rank(14) + rnk = 14 + rank(15) + rnk = 15 + rank default + rnk = -1000 + end select + return +end function rank_p + +function rank_a(this) result(rnk) bind(c) + use, intrinsic :: iso_c_binding, only: c_int + + implicit none + + integer(kind=c_int), allocatable, intent(in) :: this(..) + integer(kind=c_int) :: rnk + + select rank(this) + rank(0) + rnk = 0 + rank(1) + rnk = 1 + rank(2) + rnk = 2 + rank(3) + rnk = 3 + rank(4) + rnk = 4 + rank(5) + rnk = 5 + rank(6) + rnk = 6 + rank(7) + rnk = 7 + rank(8) + rnk = 8 + rank(9) + rnk = 9 + rank(10) + rnk = 10 + rank(11) + rnk = 11 + rank(12) + rnk = 12 + rank(13) + rnk = 13 + rank(14) + rnk = 14 + rank(15) + rnk = 15 + rank default + rnk = -1000 + end select + return +end function rank_a + +program selr_p + + use, intrinsic :: iso_c_binding, only: c_int + + implicit none + + interface + function rank_p(this) result(rnk) bind(c) + use, intrinsic :: iso_c_binding, only: c_int + integer(kind=c_int), pointer, intent(in) :: this(..) + integer(kind=c_int) :: rnk + end function rank_p + end interface + + interface + function rank_a(this) result(rnk) bind(c) + use, intrinsic :: iso_c_binding, only: c_int + integer(kind=c_int), allocatable, intent(in) :: this(..) + integer(kind=c_int) :: rnk + end function rank_a + end interface + + integer(kind=c_int), parameter :: siz = 7 + integer(kind=c_int), parameter :: rnk = 1 + + integer(kind=c_int), pointer :: intp(:) + integer(kind=c_int), allocatable :: inta(:) + integer(kind=c_int) :: irnk + + nullify(intp) + irnk = rank_p(intp) + if (irnk /= rnk) stop 1 + if (irnk /= rank(intp)) stop 2 + ! + irnk = rank_a(inta) + if (irnk /= rnk) stop 3 + if (irnk /= rank(inta)) stop 4 + ! + allocate(intp(siz)) + irnk = rank_p(intp) + if (irnk /= rnk) stop 5 + if (irnk /= rank(intp)) stop 6 + deallocate(intp) + nullify(intp) + ! + allocate(inta(siz)) + if (irnk /= rnk) stop 7 + if (irnk /= rank(inta)) stop 8 + deallocate(inta) + +end program selr_p diff --git a/gcc/testsuite/gfortran.dg/PR94327.c b/gcc/testsuite/gfortran.dg/PR94327.c new file mode 100644 index 0000000..6791c37 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/PR94327.c @@ -0,0 +1,70 @@ +/* Test the fix for PR94327. */ + +#include +#include +#include + +#include "../../../libgfortran/ISO_Fortran_binding.h" + +bool c_vrfy (const CFI_cdesc_t *restrict); + +char get_attr (const CFI_cdesc_t*restrict, bool); + +bool +c_vrfy (const CFI_cdesc_t *restrict auxp) +{ + CFI_index_t i, lb, ub, ex; + int *ip = NULL; + + assert (auxp); + assert (auxp->base_addr); + lb = auxp->dim[0].lower_bound; + ex = auxp->dim[0].extent; + ub = ex + lb - 1; + ip = (int*)auxp->base_addr; + for (i=0; ielem_len == 4); + assert (auxp->rank == 1); + assert (auxp->type == CFI_type_int); + attr = '\0'; + switch (auxp->attribute) + { + case CFI_attribute_pointer: + if (alloc && !c_vrfy (auxp)) + break; + attr = 'p'; + break; + case CFI_attribute_allocatable: + if (alloc && !c_vrfy (auxp)) + break; + attr = 'a'; + break; + case CFI_attribute_other: + assert (alloc); + if (!c_vrfy (auxp)) + break; + attr = 'o'; + break; + default: + break; + } + return attr; +} + diff --git a/gcc/testsuite/gfortran.dg/PR94327.f90 b/gcc/testsuite/gfortran.dg/PR94327.f90 new file mode 100644 index 0000000..3cb3ac3 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/PR94327.f90 @@ -0,0 +1,195 @@ +! { dg-do run } +! { dg-additional-sources PR94327.c } +! +! Test the fix for PR94327 +! + +program attr_p + + use, intrinsic :: iso_c_binding, only: & + c_int, c_bool, c_char + + implicit none + + integer :: i + integer, parameter :: n = 11 + integer, parameter :: u(*) = [(i, i=1,n)] + + interface + function attr_p_as(a, s) result(c) & + bind(c, name="get_attr") + use, intrinsic :: iso_c_binding, only: & + c_int, c_bool, c_char + implicit none + integer(kind=c_int), pointer, intent(in) :: a(:) + logical(kind=c_bool), value, intent(in) :: s + character(kind=c_char) :: c + end function attr_p_as + function attr_a_as(a, s) result(c) & + bind(c, name="get_attr") + use, intrinsic :: iso_c_binding, only: & + c_int, c_bool, c_char + implicit none + integer(kind=c_int), allocatable, intent(in) :: a(:) + logical(kind=c_bool), value, intent(in) :: s + character(kind=c_char) :: c + end function attr_a_as + function attr_o_as(a, s) result(c) & + bind(c, name="get_attr") + use, intrinsic :: iso_c_binding, only: & + c_int, c_bool, c_char + implicit none + integer(kind=c_int), intent(in) :: a(:) + logical(kind=c_bool), value, intent(in) :: s + character(kind=c_char) :: c + end function attr_o_as + function attr_p_ar(a, s) result(c) & + bind(c, name="get_attr") + use, intrinsic :: iso_c_binding, only: & + c_int, c_bool, c_char + implicit none + integer(kind=c_int), pointer, intent(in) :: a(..) + logical(kind=c_bool), value, intent(in) :: s + character(kind=c_char) :: c + end function attr_p_ar + function attr_a_ar(a, s) result(c) & + bind(c, name="get_attr") + use, intrinsic :: iso_c_binding, only: & + c_int, c_bool, c_char + implicit none + integer(kind=c_int), allocatable, intent(in) :: a(..) + logical(kind=c_bool), value, intent(in) :: s + character(kind=c_char) :: c + end function attr_a_ar + function attr_o_ar(a, s) result(c) & + bind(c, name="get_attr") + use, intrinsic :: iso_c_binding, only: & + c_int, c_bool, c_char + implicit none + integer(kind=c_int), intent(in) :: a(..) + logical(kind=c_bool), value, intent(in) :: s + character(kind=c_char) :: c + end function attr_o_ar + end interface + + integer(kind=c_int), target :: a(n) + integer(kind=c_int), allocatable, target :: b(:) + integer(kind=c_int), pointer :: p(:) + character(kind=c_char) :: c + + a = u + c = attr_p_as(a, .true._c_bool) + if(c/='p') stop 1 + if(any(a/=u)) stop 2 + ! + a = u + c = attr_p_ar(a, .true._c_bool) + if(c/='p') stop 3 + if(any(a/=u)) stop 4 + ! + a = u + c = attr_o_as(a, .true._c_bool) + if(c/='o') stop 5 + if(any(a/=u)) stop 6 + ! + a = u + c = attr_o_ar(a, .true._c_bool) + if(c/='o') stop 7 + if(any(a/=u)) stop 8 + ! + allocate(b, source=u) + c = attr_p_as(b, .true._c_bool) + if(c/='p') stop 9 + if(.not.allocated(b)) stop 10 + if(any(b/=u)) stop 11 + ! + deallocate(b) + allocate(b, source=u) + c = attr_p_ar(b, .true._c_bool) + if(c/='p') stop 12 + if(.not.allocated(b)) stop 13 + if(any(b/=u)) stop 14 + ! + deallocate(b) + allocate(b, source=u) + c = attr_a_as(b, .true._c_bool) + if(c/='a') stop 15 + if(.not.allocated(b)) stop 16 + if(any(b/=u)) stop 17 + ! + deallocate(b) + allocate(b, source=u) + c = attr_a_ar(b, .true._c_bool) + if(c/='a') stop 18 + if(.not.allocated(b)) stop 19 + if(any(b/=u)) stop 20 + ! + deallocate(b) + allocate(b, source=u) + c = attr_o_as(b, .true._c_bool) + if(c/='o') stop 21 + if(.not.allocated(b)) stop 22 + if(any(b/=u)) stop 23 + ! + deallocate(b) + allocate(b, source=u) + c = attr_o_ar(b, .true._c_bool) + if(c/='o') stop 24 + if(.not.allocated(b)) stop 25 + if(any(b/=u)) stop 26 + ! + deallocate(b) + c = attr_a_as(b, .false._c_bool) + if(c/='a') stop 27 + if(allocated(b)) stop 28 + ! + c = attr_a_ar(b, .false._c_bool) + if(c/='a') stop 29 + if(allocated(b)) stop 30 + ! + nullify(p) + p => a + c = attr_p_as(p, .true._c_bool) + if(c/='p') stop 31 + if(.not.associated(p)) stop 32 + if(.not.associated(p, a)) stop 33 + if(any(p/=u)) stop 34 + ! + nullify(p) + p => a + c = attr_p_ar(p, .true._c_bool) + if(c/='p') stop 35 + if(.not.associated(p)) stop 36 + if(.not.associated(p, a)) stop 37 + if(any(p/=u)) stop 38 + ! + nullify(p) + p => a + c = attr_o_as(p, .true._c_bool) + if(c/='o') stop 39 + if(.not.associated(p)) stop 40 + if(.not.associated(p, a)) stop 41 + if(any(p/=u)) stop 42 + ! + nullify(p) + p => a + c = attr_o_ar(p, .true._c_bool) + if(c/='o') stop 43 + if(.not.associated(p)) stop 44 + if(.not.associated(p, a)) stop 45 + if(any(p/=u)) stop 46 + ! + nullify(p) + c = attr_p_as(p, .false._c_bool) + if(c/='p') stop 47 + if(associated(p)) stop 48 + if(associated(p, a)) stop 49 + ! + nullify(p) + c = attr_p_ar(p, .false._c_bool) + if(c/='p') stop 50 + if(associated(p)) stop 51 + if(associated(p, a)) stop 52 + stop + +end program attr_p diff --git a/gcc/testsuite/gfortran.dg/PR94331.c b/gcc/testsuite/gfortran.dg/PR94331.c new file mode 100644 index 0000000..4e13051 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/PR94331.c @@ -0,0 +1,73 @@ +/* Test the fix for PR94331. */ + +#include +#include +#include + +#include "../../../libgfortran/ISO_Fortran_binding.h" + +bool c_vrfy (const CFI_cdesc_t *restrict); + +bool check_bounds(const CFI_cdesc_t*restrict, const int, const int); + +bool +c_vrfy (const CFI_cdesc_t *restrict auxp) +{ + CFI_index_t i, lb, ub, ex; + int *ip = NULL; + + assert (auxp); + assert (auxp->base_addr); + lb = auxp->dim[0].lower_bound; + ex = auxp->dim[0].extent; + ub = ex + lb - 1; + ip = (int*)auxp->base_addr; + for (i=0; ielem_len; + assert (auxp->rank==1); + assert (auxp->type==CFI_type_int); + assert (auxp->dim[0].sm>0); + assert ((size_t)auxp->dim[0].sm==el); + if (auxp->dim[0].extent==ex + && auxp->dim[0].lower_bound==lb) + { + switch(auxp->attribute) + { + case CFI_attribute_pointer: + case CFI_attribute_allocatable: + if (!c_vrfy (auxp)) + break; + is_ok = true; + break; + case CFI_attribute_other: + if (!c_vrfy (auxp)) + break; + is_ok = (lb==0); + break; + default: + assert (false); + break; + } + } + return is_ok; +} + diff --git a/gcc/testsuite/gfortran.dg/PR94331.f90 b/gcc/testsuite/gfortran.dg/PR94331.f90 new file mode 100644 index 0000000..6185031 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/PR94331.f90 @@ -0,0 +1,252 @@ +! { dg-do run } +! { dg-additional-sources PR94331.c } +! +! Test the fix for PR94331 +! + +program main_p + + use, intrinsic :: iso_c_binding, only: & + c_int + + implicit none + + integer :: i + integer, parameter :: ex = 11 + integer, parameter :: lb = 11 + integer, parameter :: ub = ex+lb-1 + integer, parameter :: u(*) = [(i, i=1,ex)] + + interface + function checkb_p_as(a, l, u) result(c) & + bind(c, name="check_bounds") + use, intrinsic :: iso_c_binding, only: & + c_int, c_bool + implicit none + integer(kind=c_int), pointer, intent(in) :: a(:) + integer(kind=c_int), value, intent(in) :: l + integer(kind=c_int), value, intent(in) :: u + logical(kind=c_bool) :: c + end function checkb_p_as + function checkb_a_as(a, l, u) result(c) & + bind(c, name="check_bounds") + use, intrinsic :: iso_c_binding, only: & + c_int, c_bool + implicit none + integer(kind=c_int), allocatable, intent(in) :: a(:) + integer(kind=c_int), value, intent(in) :: l + integer(kind=c_int), value, intent(in) :: u + logical(kind=c_bool) :: c + end function checkb_a_as + function checkb_o_as(a, l, u) result(c) & + bind(c, name="check_bounds") + use, intrinsic :: iso_c_binding, only: & + c_int, c_bool + implicit none + integer(kind=c_int), intent(in) :: a(:) + integer(kind=c_int), value, intent(in) :: l + integer(kind=c_int), value, intent(in) :: u + logical(kind=c_bool) :: c + end function checkb_o_as + function checkb_p_ar(a, l, u) result(c) & + bind(c, name="check_bounds") + use, intrinsic :: iso_c_binding, only: & + c_int, c_bool + implicit none + integer(kind=c_int), pointer, intent(in) :: a(..) + integer(kind=c_int), value, intent(in) :: l + integer(kind=c_int), value, intent(in) :: u + logical(kind=c_bool) :: c + end function checkb_p_ar + function checkb_a_ar(a, l, u) result(c) & + bind(c, name="check_bounds") + use, intrinsic :: iso_c_binding, only: & + c_int, c_bool + implicit none + integer(kind=c_int), allocatable, intent(in) :: a(..) + integer(kind=c_int), value, intent(in) :: l + integer(kind=c_int), value, intent(in) :: u + logical(kind=c_bool) :: c + end function checkb_a_ar + function checkb_o_ar(a, l, u) result(c) & + bind(c, name="check_bounds") + use, intrinsic :: iso_c_binding, only: & + c_int, c_bool + implicit none + integer(kind=c_int), intent(in) :: a(..) + integer(kind=c_int), value, intent(in) :: l + integer(kind=c_int), value, intent(in) :: u + logical(kind=c_bool) :: c + end function checkb_o_ar + end interface + + integer(kind=c_int), target :: a(lb:ub) + integer(kind=c_int), allocatable, target :: b(:) + integer(kind=c_int), pointer :: p(:) + + a = u + if(lbound(a,1)/=lb) stop 1 + if(ubound(a,1)/=ub) stop 2 + if(any(shape(a)/=[ex])) stop 3 + if(.not.checkb_p_as(a, lb, ub)) stop 4 + if(lbound(a,1)/=lb) stop 5 + if(ubound(a,1)/=ub) stop 6 + if(any(shape(a)/=[ex])) stop 7 + if(any(a/=u)) stop 8 + ! + a = u + if(lbound(a,1)/=lb) stop 9 + if(ubound(a,1)/=ub) stop 10 + if(any(shape(a)/=[ex])) stop 11 + if(.not.checkb_p_ar(a, lb, ub)) stop 12 + if(lbound(a,1)/=lb) stop 13 + if(ubound(a,1)/=ub) stop 14 + if(any(shape(a)/=[ex])) stop 15 + if(any(a/=u)) stop 16 + ! + a = u + if(lbound(a,1)/=lb) stop 17 + if(ubound(a,1)/=ub) stop 18 + if(any(shape(a)/=[ex])) stop 19 + if(.not.checkb_o_as(a, 0, ex-1))stop 20 + if(lbound(a,1)/=lb) stop 21 + if(ubound(a,1)/=ub) stop 22 + if(any(shape(a)/=[ex])) stop 23 + if(any(a/=u)) stop 24 + ! + a = u + if(lbound(a,1)/=lb) stop 25 + if(ubound(a,1)/=ub) stop 26 + if(any(shape(a)/=[ex])) stop 27 + if(.not.checkb_o_ar(a, 0, ex-1))stop 28 + if(lbound(a,1)/=lb) stop 29 + if(ubound(a,1)/=ub) stop 30 + if(any(shape(a)/=[ex])) stop 31 + if(any(a/=u)) stop 32 + ! + allocate(b(lb:ub), source=u) + if(lbound(b,1)/=lb) stop 33 + if(ubound(b,1)/=ub) stop 34 + if(any(shape(b)/=[ex])) stop 35 + if(.not.checkb_p_as(b, lb, ub)) stop 36 + if(.not.allocated(b)) stop 37 + if(lbound(b,1)/=lb) stop 38 + if(ubound(b,1)/=ub) stop 39 + if(any(shape(b)/=[ex])) stop 40 + if(any(b/=u)) stop 41 + ! + deallocate(b) + allocate(b(lb:ub), source=u) + if(lbound(b,1)/=lb) stop 42 + if(ubound(b,1)/=ub) stop 43 + if(any(shape(b)/=[ex])) stop 44 + if(.not.checkb_p_ar(b, lb, ub)) stop 45 + if(.not.allocated(b)) stop 46 + if(lbound(b,1)/=lb) stop 47 + if(ubound(b,1)/=ub) stop 48 + if(any(shape(b)/=[ex])) stop 49 + if(any(b/=u)) stop 50 + ! + deallocate(b) + allocate(b(lb:ub), source=u) + if(lbound(b,1)/=lb) stop 51 + if(ubound(b,1)/=ub) stop 52 + if(any(shape(b)/=[ex])) stop 53 + if(.not.checkb_a_as(b, lb, ub)) stop 54 + if(.not.allocated(b)) stop 55 + if(lbound(b,1)/=lb) stop 56 + if(ubound(b,1)/=ub) stop 57 + if(any(shape(b)/=[ex])) stop 58 + if(any(b/=u)) stop 59 + ! + deallocate(b) + allocate(b(lb:ub), source=u) + if(lbound(b,1)/=lb) stop 60 + if(ubound(b,1)/=ub) stop 61 + if(any(shape(b)/=[ex])) stop 62 + if(.not.checkb_a_ar(b, lb, ub)) stop 63 + if(.not.allocated(b)) stop 64 + if(lbound(b,1)/=lb) stop 65 + if(ubound(b,1)/=ub) stop 66 + if(any(shape(b)/=[ex])) stop 67 + if(any(b/=u)) stop 68 + ! + deallocate(b) + allocate(b(lb:ub), source=u) + if(lbound(b,1)/=lb) stop 69 + if(ubound(b,1)/=ub) stop 70 + if(any(shape(b)/=[ex])) stop 71 + if(.not.checkb_o_as(b, 0, ex-1))stop 72 + if(.not.allocated(b)) stop 73 + if(lbound(b,1)/=lb) stop 74 + if(ubound(b,1)/=ub) stop 75 + if(any(shape(b)/=[ex])) stop 76 + if(any(b/=u)) stop 77 + ! + deallocate(b) + allocate(b(lb:ub), source=u) + if(lbound(b,1)/=lb) stop 78 + if(ubound(b,1)/=ub) stop 79 + if(any(shape(b)/=[ex])) stop 80 + if(.not.checkb_o_ar(b, 0, ex-1))stop 81 + if(.not.allocated(b)) stop 82 + if(lbound(b,1)/=lb) stop 83 + if(ubound(b,1)/=ub) stop 84 + if(any(shape(b)/=[ex])) stop 85 + if(any(b/=u)) stop 86 + deallocate(b) + ! + p(lb:ub) => a + if(lbound(p,1)/=lb) stop 87 + if(ubound(p,1)/=ub) stop 88 + if(any(shape(p)/=[ex])) stop 89 + if(.not.checkb_p_as(p, lb, ub)) stop 90 + if(.not.associated(p)) stop 91 + if(.not.associated(p, a)) stop 92 + if(lbound(p,1)/=lb) stop 93 + if(ubound(p,1)/=ub) stop 94 + if(any(shape(p)/=[ex])) stop 95 + if(any(p/=u)) stop 96 + ! + nullify(p) + p(lb:ub) => a + if(lbound(p,1)/=lb) stop 97 + if(ubound(p,1)/=ub) stop 98 + if(any(shape(p)/=[ex])) stop 99 + if(.not.checkb_p_ar(p, lb, ub)) stop 100 + if(.not.associated(p)) stop 101 + if(.not.associated(p, a)) stop 102 + if(lbound(p,1)/=lb) stop 103 + if(ubound(p,1)/=ub) stop 104 + if(any(shape(p)/=[ex])) stop 105 + if(any(p/=u)) stop 106 + ! + nullify(p) + p(lb:ub) => a + if(lbound(p,1)/=lb) stop 107 + if(ubound(p,1)/=ub) stop 108 + if(any(shape(p)/=[ex])) stop 109 + if(.not.checkb_o_as(p, 0, ex-1))stop 110 + if(.not.associated(p)) stop 111 + if(.not.associated(p, a)) stop 112 + if(lbound(p,1)/=lb) stop 113 + if(ubound(p,1)/=ub) stop 114 + if(any(shape(p)/=[ex])) stop 115 + if(any(p/=u)) stop 116 + ! + nullify(p) + p(lb:ub) => a + if(lbound(p,1)/=lb) stop 117 + if(ubound(p,1)/=ub) stop 118 + if(any(shape(p)/=[ex])) stop 119 + if(.not.checkb_o_ar(p, 0, ex-1))stop 120 + if(.not.associated(p)) stop 121 + if(.not.associated(p, a)) stop 122 + if(lbound(p,1)/=lb) stop 123 + if(ubound(p,1)/=ub) stop 124 + if(any(shape(p)/=[ex])) stop 125 + if(any(p/=u)) stop 126 + nullify(p) + stop + +end program main_p diff --git a/gcc/testsuite/gfortran.dg/PR97046.f90 b/gcc/testsuite/gfortran.dg/PR97046.f90 new file mode 100644 index 0000000..7d133a5 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/PR97046.f90 @@ -0,0 +1,58 @@ +! { dg-do run } +! +! Test the fix for PR94331 +! +! Contributed by Igor Gayday +! + +MODULE FOO + + implicit none + + INTEGER, parameter :: n = 11 + +contains + + SUBROUTINE dummyc(x0) BIND(C) + type(*), dimension(..) :: x0 + if(LBOUND(x0,1)/=1) stop 5 + if(UBOUND(x0,1)/=n) stop 6 + if(rank(x0)/=1) stop 7 + END SUBROUTINE dummyc + + SUBROUTINE dummy(x0) + type(*), dimension(..) :: x0 + call dummyc(x0) + END SUBROUTINE dummy + +END MODULE + +PROGRAM main + USE FOO + IMPLICIT NONE + integer :: before(2), after(2) + + DOUBLE PRECISION, ALLOCATABLE :: buf(:) + DOUBLE PRECISION :: buf2(n) + + ALLOCATE(buf(n)) + before(1) = LBOUND(buf,1) + before(2) = UBOUND(buf,1) + CALL dummy (buf) + after(1) = LBOUND(buf,1) + after(2) = UBOUND(buf,1) + deallocate(buf) + + if (before(1) .NE. after(1)) stop 1 + if (before(2) .NE. after(2)) stop 2 + + before(1) = LBOUND(buf2,1) + before(2) = UBOUND(buf2,1) + CALL dummy (buf2) + after(1) = LBOUND(buf2,1) + after(2) = UBOUND(buf2,1) + + if (before(1) .NE. after(1)) stop 3 + if (before(2) .NE. after(2)) stop 4 + +END PROGRAM diff --git a/gcc/testsuite/gfortran.dg/bind_c_array_params_2.f90 b/gcc/testsuite/gfortran.dg/bind_c_array_params_2.f90 index 00628c1..ede6eff 100644 --- a/gcc/testsuite/gfortran.dg/bind_c_array_params_2.f90 +++ b/gcc/testsuite/gfortran.dg/bind_c_array_params_2.f90 @@ -22,4 +22,4 @@ end ! { dg-final { scan-assembler-times "brasl\t%r\[0-9\]*,myBindC" 1 { target { s390*-*-* } } } } ! { dg-final { scan-assembler-times "bl \.myBindC" 1 { target { powerpc-ibm-aix* } } } } ! { dg-final { scan-assembler-times "add_u32\t\[sv\]\[0-9\]*, \[sv\]\[0-9\]*, myBindC@rel32@lo" 1 { target { amdgcn*-*-* } } } } -! { dg-final { scan-tree-dump-times "cfi_desc_to_gfc_desc \\\(&parm\\." 1 "original" } } +! { dg-final { scan-tree-dump-times "gfc_desc_to_cfi_desc \\\(&cfi\\." 1 "original" } } -- cgit v1.1 From bf6d414415e14e13be16abf23375160733567d20 Mon Sep 17 00:00:00 2001 From: Ashimida Date: Mon, 26 Jul 2021 10:38:50 -0400 Subject: Remove legacy external declarations in toplev.h [PR101447] gcc/ PR driver/101447 * toplev.h (min_align_loops_log): Remove declaration. (min_align_jumps_log, min_align_labels_log): Likewise. (min_align_functions_log): Likewise. --- gcc/toplev.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'gcc') diff --git a/gcc/toplev.h b/gcc/toplev.h index 175944c..f543554 100644 --- a/gcc/toplev.h +++ b/gcc/toplev.h @@ -94,11 +94,6 @@ extern bool set_src_pwd (const char *); extern HOST_WIDE_INT get_random_seed (bool); extern void set_random_seed (const char *); -extern unsigned int min_align_loops_log; -extern unsigned int min_align_jumps_log; -extern unsigned int min_align_labels_log; -extern unsigned int min_align_functions_log; - extern void parse_alignment_opts (void); extern void initialize_rtl (void); -- cgit v1.1 From 3cb72ac17181fd38384cd2149993e643fb8df89f Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 26 Jul 2021 06:06:37 -0500 Subject: Pass relationship to methods calling generic fold_range. Fix a small oversight in methods calling the base class fold_range. gcc/ChangeLog: * range-op.cc (operator_lshift::fold_range): Pass rel to base class fold_range. (operator_rshift::fold_range): Same. --- gcc/range-op.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/range-op.cc b/gcc/range-op.cc index e0be51d..4bdd14d 100644 --- a/gcc/range-op.cc +++ b/gcc/range-op.cc @@ -1929,7 +1929,7 @@ bool operator_lshift::fold_range (irange &r, tree type, const irange &op1, const irange &op2, - relation_kind rel ATTRIBUTE_UNUSED) const + relation_kind rel) const { int_range_max shift_range; if (!get_shift_range (shift_range, type, op2)) @@ -1960,7 +1960,7 @@ operator_lshift::fold_range (irange &r, tree type, } else // Otherwise, invoke the generic fold routine. - return range_operator::fold_range (r, type, op1, shift_range); + return range_operator::fold_range (r, type, op1, shift_range, rel); } void @@ -2189,7 +2189,7 @@ bool operator_rshift::fold_range (irange &r, tree type, const irange &op1, const irange &op2, - relation_kind rel ATTRIBUTE_UNUSED) const + relation_kind rel) const { int_range_max shift; if (!get_shift_range (shift, type, op2)) @@ -2201,7 +2201,7 @@ operator_rshift::fold_range (irange &r, tree type, return true; } - return range_operator::fold_range (r, type, op1, shift); + return range_operator::fold_range (r, type, op1, shift, rel); } void -- cgit v1.1 From f384e2f551fef6dd73b815675633814fb8ff1e13 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 26 Jul 2021 06:08:24 -0500 Subject: Implement operator_bitwise_xor::op1_op2_relation_effect. This patch adjusts XORing of ranges where the operands are known to be equal or not equal. We should probably do the same thing for the op[12]_range methods. gcc/ChangeLog: * range-op.cc (operator_bitwise_xor::op1_op2_relation_effect): New. --- gcc/range-op.cc | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'gcc') diff --git a/gcc/range-op.cc b/gcc/range-op.cc index 4bdd14d..b1fb25c 100644 --- a/gcc/range-op.cc +++ b/gcc/range-op.cc @@ -3101,6 +3101,11 @@ public: const irange &lhs, const irange &op1, relation_kind rel = VREL_NONE) const; + virtual bool op1_op2_relation_effect (irange &lhs_range, + tree type, + const irange &op1_range, + const irange &op2_range, + relation_kind rel) const; } op_bitwise_xor; void @@ -3135,6 +3140,34 @@ operator_bitwise_xor::wi_fold (irange &r, tree type, } bool +operator_bitwise_xor::op1_op2_relation_effect (irange &lhs_range, + tree type, + const irange &, + const irange &, + relation_kind rel) const +{ + if (rel == VREL_NONE) + return false; + + int_range<2> rel_range; + + switch (rel) + { + case EQ_EXPR: + rel_range.set_zero (type); + break; + case NE_EXPR: + rel_range.set_nonzero (type); + break; + default: + return false; + } + + lhs_range.intersect (rel_range); + return true; +} + +bool operator_bitwise_xor::op1_range (irange &r, tree type, const irange &lhs, const irange &op2, -- cgit v1.1 From 44e322f4326202799cca46e0cc1b435fcfcb06c5 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Mon, 26 Jul 2021 15:27:23 +0000 Subject: Regenerate .pot files. gcc/po/ * gcc.pot: Regenerate. libcpp/po/ * cpplib.pot: Regenerate. --- gcc/po/gcc.pot | 21070 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 10865 insertions(+), 10205 deletions(-) (limited to 'gcc') diff --git a/gcc/po/gcc.pot b/gcc/po/gcc.pot index 394fc1b..212c412 100644 --- a/gcc/po/gcc.pot +++ b/gcc/po/gcc.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: https://gcc.gnu.org/bugs/\n" -"POT-Creation-Date: 2021-04-20 18:17+0000\n" +"POT-Creation-Date: 2021-07-26 15:26+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -18,19 +18,19 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n" -#: cfgrtl.c:2797 +#: cfgrtl.c:2772 msgid "flow control insn inside a basic block" msgstr "" -#: cfgrtl.c:3029 +#: cfgrtl.c:3004 msgid "wrong insn in the fallthru edge" msgstr "" -#: cfgrtl.c:3085 +#: cfgrtl.c:3060 msgid "insn outside basic block" msgstr "" -#: cfgrtl.c:3093 +#: cfgrtl.c:3068 msgid "return not followed by barrier" msgstr "" @@ -77,14 +77,14 @@ msgid "" "write_c_file - output name is %s, prefix is %s\n" msgstr "" -#: collect2.c:2608 +#: collect2.c:2604 #, c-format msgid "" "\n" "ldd output with constructors/destructors.\n" msgstr "" -#: cprop.c:1755 +#: cprop.c:1750 msgid "const/copy propagation disabled" msgstr "" @@ -98,7 +98,7 @@ msgstr "" msgid "%s: some warnings being treated as errors" msgstr "" -#: diagnostic.c:440 input.c:225 input.c:1876 c-family/c-opts.c:1458 +#: diagnostic.c:440 input.c:249 input.c:1947 c-family/c-opts.c:1463 #: fortran/cpp.c:579 fortran/error.c:1049 fortran/error.c:1069 msgid "" msgstr "" @@ -161,59 +161,59 @@ msgstr "" msgid "imported at" msgstr "" -#: diagnostic.c:1220 +#: diagnostic.c:1309 #, c-format msgid "%s:%d: confused by earlier errors, bailing out\n" msgstr "" -#: diagnostic.c:1857 +#: diagnostic.c:1944 #, c-format msgid "Internal compiler error: Error reporting routines re-entered.\n" msgstr "" -#: diagnostic.c:1888 diagnostic.c:1907 +#: diagnostic.c:1975 diagnostic.c:1994 #, gcc-internal-format, gfc-internal-format msgid "in %s, at %s:%d" msgstr "" -#: final.c:1127 +#: final.c:1113 msgid "negative insn length" msgstr "" -#: final.c:3092 +#: final.c:2861 msgid "could not split insn" msgstr "" -#: final.c:3620 +#: final.c:3228 msgid "invalid 'asm': " msgstr "" -#: final.c:3753 +#: final.c:3361 #, c-format msgid "nested assembly dialect alternatives" msgstr "" -#: final.c:3781 final.c:3793 +#: final.c:3389 final.c:3401 #, c-format msgid "unterminated assembly dialect alternative" msgstr "" -#: final.c:3935 +#: final.c:3543 #, c-format msgid "operand number missing after %%-letter" msgstr "" -#: final.c:3938 final.c:3979 +#: final.c:3546 final.c:3587 #, c-format msgid "operand number out of range" msgstr "" -#: final.c:3996 +#: final.c:3604 #, c-format msgid "invalid %%-code" msgstr "" -#: final.c:4030 +#: final.c:3638 #, c-format msgid "'%%l' operand isn't a label" msgstr "" @@ -222,13 +222,13 @@ msgstr "" #. PRINT_OPERAND must handle them. #. We can't handle floating point constants; #. TARGET_PRINT_OPERAND must handle them. -#: final.c:4166 config/arc/arc.c:6404 config/i386/i386.c:11967 +#: final.c:3774 config/arc/arc.c:6406 config/i386/i386.c:12000 #, c-format msgid "floating constant misused" msgstr "" -#: final.c:4224 config/arc/arc.c:6501 config/i386/i386.c:12058 -#: config/pdp11/pdp11.c:1874 +#: final.c:3832 config/arc/arc.c:6503 config/i386/i386.c:12091 +#: config/pdp11/pdp11.c:1872 #, c-format msgid "invalid expression as operand" msgstr "" @@ -238,277 +238,281 @@ msgstr "" msgid "%s\n" msgstr "" -#: gcc.c:1841 +#: gcc.c:1847 #, c-format msgid "Using built-in specs.\n" msgstr "" -#: gcc.c:2086 +#: gcc.c:2092 #, c-format msgid "" "Setting spec %s to '%s'\n" "\n" msgstr "" -#: gcc.c:2291 +#: gcc.c:2297 #, c-format msgid "Reading specs from %s\n" msgstr "" -#: gcc.c:2423 +#: gcc.c:2429 #, c-format msgid "could not find specs file %s\n" msgstr "" -#: gcc.c:2498 +#: gcc.c:2504 #, c-format msgid "rename spec %s to %s\n" msgstr "" -#: gcc.c:2500 +#: gcc.c:2506 #, c-format msgid "" "spec is '%s'\n" "\n" msgstr "" -#: gcc.c:3347 +#: gcc.c:3358 #, c-format msgid "" "\n" "Go ahead? (y or n) " msgstr "" -#: gcc.c:3519 +#: gcc.c:3530 #, c-format msgid "# %s %.2f %.2f\n" msgstr "" -#: gcc.c:3735 +#: gcc.c:3746 #, c-format msgid "Usage: %s [options] file...\n" msgstr "" -#: gcc.c:3736 +#: gcc.c:3747 msgid "Options:\n" msgstr "" -#: gcc.c:3738 +#: gcc.c:3749 msgid " -pass-exit-codes Exit with highest error code from a phase.\n" msgstr "" -#: gcc.c:3739 +#: gcc.c:3750 msgid " --help Display this information.\n" msgstr "" -#: gcc.c:3740 +#: gcc.c:3751 msgid "" " --target-help Display target specific command line options.\n" msgstr "" -#: gcc.c:3741 +#: gcc.c:3752 msgid "" " --help={common|optimizers|params|target|warnings|[^]{joined|separate|" "undocumented}}[,...].\n" msgstr "" -#: gcc.c:3742 +#: gcc.c:3753 msgid "" " Display specific types of command line options.\n" msgstr "" -#: gcc.c:3744 +#: gcc.c:3755 msgid " (Use '-v --help' to display command line options of sub-processes).\n" msgstr "" -#: gcc.c:3745 +#: gcc.c:3756 msgid " --version Display compiler version information.\n" msgstr "" -#: gcc.c:3746 +#: gcc.c:3757 msgid " -dumpspecs Display all of the built in spec strings.\n" msgstr "" -#: gcc.c:3747 +#: gcc.c:3758 msgid " -dumpversion Display the version of the compiler.\n" msgstr "" -#: gcc.c:3748 +#: gcc.c:3759 msgid " -dumpmachine Display the compiler's target processor.\n" msgstr "" -#: gcc.c:3749 +#: gcc.c:3760 +msgid " -foffload= Specify offloading targets.\n" +msgstr "" + +#: gcc.c:3761 msgid "" " -print-search-dirs Display the directories in the compiler's search " "path.\n" msgstr "" -#: gcc.c:3750 +#: gcc.c:3762 msgid "" " -print-libgcc-file-name Display the name of the compiler's companion " "library.\n" msgstr "" -#: gcc.c:3751 +#: gcc.c:3763 msgid " -print-file-name= Display the full path to library .\n" msgstr "" -#: gcc.c:3752 +#: gcc.c:3764 msgid "" " -print-prog-name= Display the full path to compiler component " ".\n" msgstr "" -#: gcc.c:3753 +#: gcc.c:3765 msgid "" " -print-multiarch Display the target's normalized GNU triplet, used " "as\n" " a component in the library path.\n" msgstr "" -#: gcc.c:3756 +#: gcc.c:3768 msgid "" " -print-multi-directory Display the root directory for versions of " "libgcc.\n" msgstr "" -#: gcc.c:3757 +#: gcc.c:3769 msgid "" " -print-multi-lib Display the mapping between command line options " "and\n" " multiple library search directories.\n" msgstr "" -#: gcc.c:3760 +#: gcc.c:3772 msgid "" " -print-multi-os-directory Display the relative path to OS libraries.\n" msgstr "" -#: gcc.c:3761 +#: gcc.c:3773 msgid " -print-sysroot Display the target libraries directory.\n" msgstr "" -#: gcc.c:3762 +#: gcc.c:3774 msgid "" " -print-sysroot-headers-suffix Display the sysroot suffix used to find " "headers.\n" msgstr "" -#: gcc.c:3763 +#: gcc.c:3775 msgid "" " -Wa, Pass comma-separated on to the " "assembler.\n" msgstr "" -#: gcc.c:3764 +#: gcc.c:3776 msgid "" " -Wp, Pass comma-separated on to the " "preprocessor.\n" msgstr "" -#: gcc.c:3765 +#: gcc.c:3777 msgid "" " -Wl, Pass comma-separated on to the linker.\n" msgstr "" -#: gcc.c:3766 +#: gcc.c:3778 msgid " -Xassembler Pass on to the assembler.\n" msgstr "" -#: gcc.c:3767 +#: gcc.c:3779 msgid " -Xpreprocessor Pass on to the preprocessor.\n" msgstr "" -#: gcc.c:3768 +#: gcc.c:3780 msgid " -Xlinker Pass on to the linker.\n" msgstr "" -#: gcc.c:3769 +#: gcc.c:3781 msgid " -save-temps Do not delete intermediate files.\n" msgstr "" -#: gcc.c:3770 +#: gcc.c:3782 msgid " -save-temps= Do not delete intermediate files.\n" msgstr "" -#: gcc.c:3771 +#: gcc.c:3783 msgid "" " -no-canonical-prefixes Do not canonicalize paths when building relative\n" " prefixes to other gcc components.\n" msgstr "" -#: gcc.c:3774 +#: gcc.c:3786 msgid " -pipe Use pipes rather than intermediate files.\n" msgstr "" -#: gcc.c:3775 +#: gcc.c:3787 msgid " -time Time the execution of each subprocess.\n" msgstr "" -#: gcc.c:3776 +#: gcc.c:3788 msgid "" " -specs= Override built-in specs with the contents of " ".\n" msgstr "" -#: gcc.c:3777 +#: gcc.c:3789 msgid "" " -std= Assume that the input sources are for " ".\n" msgstr "" -#: gcc.c:3778 +#: gcc.c:3790 msgid "" " --sysroot= Use as the root directory for " "headers\n" " and libraries.\n" msgstr "" -#: gcc.c:3781 +#: gcc.c:3793 msgid "" " -B Add to the compiler's search paths.\n" msgstr "" -#: gcc.c:3782 +#: gcc.c:3794 msgid "" " -v Display the programs invoked by the compiler.\n" msgstr "" -#: gcc.c:3783 +#: gcc.c:3795 msgid "" " -### Like -v but options quoted and commands not " "executed.\n" msgstr "" -#: gcc.c:3784 +#: gcc.c:3796 msgid "" " -E Preprocess only; do not compile, assemble or " "link.\n" msgstr "" -#: gcc.c:3785 +#: gcc.c:3797 msgid " -S Compile only; do not assemble or link.\n" msgstr "" -#: gcc.c:3786 +#: gcc.c:3798 msgid " -c Compile and assemble, but do not link.\n" msgstr "" -#: gcc.c:3787 +#: gcc.c:3799 msgid " -o Place the output into .\n" msgstr "" -#: gcc.c:3788 +#: gcc.c:3800 msgid "" " -pie Create a dynamically linked position independent\n" " executable.\n" msgstr "" -#: gcc.c:3790 +#: gcc.c:3802 msgid " -shared Create a shared library.\n" msgstr "" -#: gcc.c:3791 +#: gcc.c:3803 msgid "" " -x Specify the language of the following input " "files.\n" @@ -519,7 +523,7 @@ msgid "" "extension.\n" msgstr "" -#: gcc.c:3798 +#: gcc.c:3810 #, c-format msgid "" "\n" @@ -528,96 +532,96 @@ msgid "" " other options on to these processes the -W options must be used.\n" msgstr "" -#: gcc.c:6668 +#: gcc.c:6770 #, c-format msgid "Processing spec (%s), which is '%s'\n" msgstr "" -#: gcc.c:7429 +#: gcc.c:7531 #, c-format msgid "Target: %s\n" msgstr "" -#: gcc.c:7430 +#: gcc.c:7532 #, c-format msgid "Configured with: %s\n" msgstr "" -#: gcc.c:7444 +#: gcc.c:7546 #, c-format msgid "Thread model: %s\n" msgstr "" -#: gcc.c:7445 +#: gcc.c:7547 #, c-format msgid "Supported LTO compression algorithms: zlib" msgstr "" -#: gcc.c:7447 +#: gcc.c:7549 #, c-format msgid " zstd" msgstr "" -#: gcc.c:7449 gcov.c:1461 gcov.c:1519 gcov.c:1531 gcov.c:2872 +#: gcc.c:7551 gcov.c:1461 gcov.c:1519 gcov.c:1531 gcov.c:2872 #, c-format msgid "\n" msgstr "" -#: gcc.c:7460 +#: gcc.c:7562 #, c-format msgid "gcc version %s %s\n" msgstr "" -#: gcc.c:7463 +#: gcc.c:7565 #, c-format msgid "gcc driver version %s %sexecuting gcc version %s\n" msgstr "" -#: gcc.c:7536 gcc.c:7746 +#: gcc.c:7638 gcc.c:7848 #, c-format msgid "" "The bug is not reproducible, so it is likely a hardware or OS problem.\n" msgstr "" -#: gcc.c:7670 +#: gcc.c:7772 #, c-format msgid "" "Preprocessed source stored into %s file, please attach this to your " "bugreport.\n" msgstr "" -#: gcc.c:8520 +#: gcc.c:8624 #, c-format msgid "install: %s%s\n" msgstr "" -#: gcc.c:8523 +#: gcc.c:8627 #, c-format msgid "programs: %s\n" msgstr "" -#: gcc.c:8525 +#: gcc.c:8629 #, c-format msgid "libraries: %s\n" msgstr "" -#: gcc.c:8642 +#: gcc.c:8746 #, c-format msgid "" "\n" "For bug reporting instructions, please see:\n" msgstr "" -#: gcc.c:8658 gcov-tool.c:527 +#: gcc.c:8762 gcov-tool.c:527 #, c-format msgid "%s %s%s\n" msgstr "" -#: gcc.c:8661 gcov-tool.c:529 gcov.c:967 fortran/gfortranspec.c:282 +#: gcc.c:8765 gcov-tool.c:529 gcov.c:967 fortran/gfortranspec.c:282 msgid "(C)" msgstr "" -#: gcc.c:8662 gcov-tool.c:531 gcov.c:969 fortran/gfortranspec.c:283 +#: gcc.c:8766 gcov-tool.c:531 gcov.c:969 fortran/gfortranspec.c:283 #, c-format msgid "" "This is free software; see the source for copying conditions. There is NO\n" @@ -625,7 +629,7 @@ msgid "" "\n" msgstr "" -#: gcc.c:9005 +#: gcc.c:9109 #, c-format msgid "" "\n" @@ -634,14 +638,14 @@ msgid "" "\n" msgstr "" -#: gcc.c:9006 +#: gcc.c:9110 #, c-format msgid "" "Use \"-Wl,OPTION\" to pass \"OPTION\" to the linker.\n" "\n" msgstr "" -#: gcc.c:10421 +#: gcc.c:10525 #, c-format msgid "" "Assembler options\n" @@ -649,7 +653,7 @@ msgid "" "\n" msgstr "" -#: gcc.c:10422 +#: gcc.c:10526 #, c-format msgid "" "Use \"-Wa,OPTION\" to pass \"OPTION\" to the assembler.\n" @@ -1168,11 +1172,11 @@ msgstr "" msgid "Cannot open source file %s\n" msgstr "" -#: gcse.c:2596 +#: gcse.c:2578 msgid "PRE disabled" msgstr "" -#: gcse.c:3526 +#: gcse.c:3505 msgid "GCSE disabled" msgstr "" @@ -1220,25 +1224,25 @@ msgstr "" msgid "At top level:" msgstr "" -#: langhooks.c:400 cp/error.c:3564 +#: langhooks.c:400 cp/error.c:3602 #, c-format msgid "In member function %qs" msgstr "" -#: langhooks.c:404 cp/error.c:3567 +#: langhooks.c:404 cp/error.c:3605 #, c-format msgid "In function %qs" msgstr "" -#: langhooks.c:449 cp/error.c:3517 +#: langhooks.c:449 cp/error.c:3555 msgid " inlined from %qs at %r%s:%d:%d%R" msgstr "" -#: langhooks.c:454 cp/error.c:3522 +#: langhooks.c:454 cp/error.c:3560 msgid " inlined from %qs at %r%s:%d%R" msgstr "" -#: langhooks.c:460 cp/error.c:3528 +#: langhooks.c:460 cp/error.c:3566 #, c-format msgid " inlined from %qs" msgstr "" @@ -1251,126 +1255,126 @@ msgstr "" msgid "unable to generate reloads for impossible constraints:" msgstr "" -#: lra-constraints.c:4138 reload.c:3868 +#: lra-constraints.c:4132 reload.c:3840 msgid "unable to generate reloads for:" msgstr "" #. What to print when a switch has no documentation. -#: opts.c:188 +#: opts.c:305 msgid "This option lacks documentation." msgstr "" -#: opts.c:189 +#: opts.c:306 msgid "Uses of this option are diagnosed." msgstr "" -#: opts.c:1372 +#: opts.c:1490 #, c-format msgid "Same as %s%s (or, in negated form, %s%s)." msgstr "" -#: opts.c:1377 +#: opts.c:1495 #, c-format msgid "Same as %s%s." msgstr "" -#: opts.c:1382 +#: opts.c:1500 #, c-format msgid "Same as %s." msgstr "" -#: opts.c:1390 +#: opts.c:1508 #, c-format msgid "%s Same as %s." msgstr "" -#: opts.c:1453 +#: opts.c:1571 msgid "[available in " msgstr "" -#: opts.c:1485 +#: opts.c:1603 msgid "[default]" msgstr "" -#: opts.c:1494 +#: opts.c:1612 #, c-format msgid "%llu bytes" msgstr "" -#: opts.c:1531 +#: opts.c:1649 msgid "[enabled]" msgstr "" -#: opts.c:1533 +#: opts.c:1651 msgid "[disabled]" msgstr "" -#: opts.c:1569 +#: opts.c:1687 #, c-format msgid " No options with the desired characteristics were found\n" msgstr "" -#: opts.c:1578 +#: opts.c:1696 #, c-format msgid "" " None found. Use --help=%s to show *all* the options supported by the %s " "front-end.\n" msgstr "" -#: opts.c:1584 +#: opts.c:1702 #, c-format msgid "" " All options with the desired characteristics have already been displayed\n" msgstr "" -#: opts.c:1629 +#: opts.c:1747 #, c-format msgid "" " Known valid arguments for %s option:\n" " " msgstr "" -#: opts.c:1679 +#: opts.c:1797 msgid "The following options are target specific" msgstr "" -#: opts.c:1682 +#: opts.c:1800 msgid "The following options control compiler warning messages" msgstr "" -#: opts.c:1685 +#: opts.c:1803 msgid "The following options control optimizations" msgstr "" -#: opts.c:1688 opts.c:1728 +#: opts.c:1806 opts.c:1846 msgid "The following options are language-independent" msgstr "" -#: opts.c:1691 +#: opts.c:1809 msgid "The following options control parameters" msgstr "" -#: opts.c:1697 +#: opts.c:1815 msgid "The following options are specific to just the language " msgstr "" -#: opts.c:1699 +#: opts.c:1817 msgid "The following options are supported by the language " msgstr "" -#: opts.c:1710 +#: opts.c:1828 msgid "The following options are not documented" msgstr "" -#: opts.c:1712 +#: opts.c:1830 msgid "The following options take separate arguments" msgstr "" -#: opts.c:1714 +#: opts.c:1832 msgid "The following options take joined arguments" msgstr "" -#: opts.c:1726 +#: opts.c:1844 msgid "The following options are language-related" msgstr "" @@ -1399,21 +1403,21 @@ msgid "" "you can reproduce it without enabling any plugins.\n" msgstr "" -#: postreload-gcse.c:1354 +#: postreload-gcse.c:1355 msgid "using simple load CSE after register allocation" msgstr "" #. It's the compiler's fault. -#: reload1.c:5997 +#: reload1.c:5994 msgid "could not find a spill register" msgstr "" #. It's the compiler's fault. -#: reload1.c:7879 +#: reload1.c:7876 msgid "VOIDmode on an output" msgstr "" -#: reload1.c:8612 +#: reload1.c:8609 msgid "failure trying to reload:" msgstr "" @@ -1425,53 +1429,53 @@ msgstr "" msgid "insn does not satisfy its constraints:" msgstr "" -#: targhooks.c:2089 +#: targhooks.c:2188 #, c-format msgid "created and used with differing settings of '%s'" msgstr "" -#: targhooks.c:2104 +#: targhooks.c:2203 msgid "created and used with different settings of %<-fpic%>" msgstr "" -#: targhooks.c:2106 +#: targhooks.c:2205 msgid "created and used with different settings of %<-fpie%>" msgstr "" -#: toplev.c:323 +#: toplev.c:324 #, c-format msgid "unrecoverable error" msgstr "" -#: toplev.c:613 +#: toplev.c:615 #, c-format msgid "" "%s%s%s %sversion %s (%s)\n" "%s\tcompiled by GNU C version %s, " msgstr "" -#: toplev.c:615 +#: toplev.c:617 #, c-format msgid "%s%s%s %sversion %s (%s) compiled by CC, " msgstr "" -#: toplev.c:619 +#: toplev.c:621 #, c-format msgid "GMP version %s, MPFR version %s, MPC version %s, isl version %s\n" msgstr "" -#: toplev.c:621 +#: toplev.c:623 #, c-format msgid "%s%swarning: %s header version %s differs from library version %s.\n" msgstr "" -#: toplev.c:623 +#: toplev.c:625 #, c-format msgid "" "%s%sGGC heuristics: --param ggc-min-expand=%d --param ggc-min-heapsize=%d\n" msgstr "" -#: tree-diagnostic.c:299 c/c-decl.c:6040 c/c-typeck.c:7840 cp/error.c:1111 +#: tree-diagnostic.c:290 c/c-decl.c:6047 c/c-typeck.c:7857 cp/error.c:1149 #: tree-diagnostic-path.cc:257 c-family/c-pretty-print.c:424 #, gcc-internal-format msgid "" @@ -1913,87 +1917,87 @@ msgid "length modifier in strfmon format" msgstr "" #. Handle deferred options from command-line. -#: c-family/c-opts.c:1482 fortran/cpp.c:592 +#: c-family/c-opts.c:1487 fortran/cpp.c:592 msgid "" msgstr "" -#: config/aarch64/aarch64.c:10663 +#: config/aarch64/aarch64.c:10661 #, c-format msgid "unsupported operand for code '%c'" msgstr "" -#: config/aarch64/aarch64.c:10672 config/aarch64/aarch64.c:10685 -#: config/aarch64/aarch64.c:10697 config/aarch64/aarch64.c:10708 -#: config/aarch64/aarch64.c:10724 config/aarch64/aarch64.c:10738 -#: config/aarch64/aarch64.c:10758 config/aarch64/aarch64.c:10832 -#: config/aarch64/aarch64.c:10843 config/aarch64/aarch64.c:10857 -#: config/aarch64/aarch64.c:11079 config/aarch64/aarch64.c:11097 +#: config/aarch64/aarch64.c:10670 config/aarch64/aarch64.c:10683 +#: config/aarch64/aarch64.c:10695 config/aarch64/aarch64.c:10706 +#: config/aarch64/aarch64.c:10722 config/aarch64/aarch64.c:10736 +#: config/aarch64/aarch64.c:10756 config/aarch64/aarch64.c:10830 +#: config/aarch64/aarch64.c:10841 config/aarch64/aarch64.c:10855 +#: config/aarch64/aarch64.c:11077 config/aarch64/aarch64.c:11095 #: config/pru/pru.c:1669 config/pru/pru.c:1679 config/pru/pru.c:1710 #: config/pru/pru.c:1721 config/pru/pru.c:1793 #, c-format msgid "invalid operand for '%%%c'" msgstr "" -#: config/aarch64/aarch64.c:10776 config/aarch64/aarch64.c:10787 -#: config/aarch64/aarch64.c:10939 config/aarch64/aarch64.c:10950 +#: config/aarch64/aarch64.c:10774 config/aarch64/aarch64.c:10785 +#: config/aarch64/aarch64.c:10937 config/aarch64/aarch64.c:10948 #, c-format msgid "invalid vector constant" msgstr "" -#: config/aarch64/aarch64.c:10799 config/aarch64/aarch64.c:10811 +#: config/aarch64/aarch64.c:10797 config/aarch64/aarch64.c:10809 #, c-format msgid "incompatible floating point / vector register operand for '%%%c'" msgstr "" -#: config/aarch64/aarch64.c:10825 +#: config/aarch64/aarch64.c:10823 #, c-format msgid "incompatible register operand for '%%%c'" msgstr "" -#: config/aarch64/aarch64.c:10891 config/arm/arm.c:24308 +#: config/aarch64/aarch64.c:10889 config/arm/arm.c:24343 #, c-format msgid "missing operand" msgstr "" -#: config/aarch64/aarch64.c:10976 +#: config/aarch64/aarch64.c:10974 #, c-format msgid "invalid constant" msgstr "" -#: config/aarch64/aarch64.c:10979 +#: config/aarch64/aarch64.c:10977 #, c-format msgid "invalid operand" msgstr "" -#: config/aarch64/aarch64.c:11105 config/aarch64/aarch64.c:11110 +#: config/aarch64/aarch64.c:11103 config/aarch64/aarch64.c:11108 #, c-format msgid "invalid operand prefix '%%%c'" msgstr "" -#: config/aarch64/aarch64.c:11130 +#: config/aarch64/aarch64.c:11128 #, c-format msgid "invalid address mode" msgstr "" -#: config/aarch64/aarch64.c:25211 config/arm/arm.c:33660 +#: config/aarch64/aarch64.c:25212 config/arm/arm.c:33830 msgid "invalid conversion from type %" msgstr "" -#: config/aarch64/aarch64.c:25213 config/arm/arm.c:33662 +#: config/aarch64/aarch64.c:25214 config/arm/arm.c:33832 msgid "invalid conversion to type %" msgstr "" -#: config/aarch64/aarch64.c:25228 config/aarch64/aarch64.c:25244 -#: config/arm/arm.c:33677 config/arm/arm.c:33693 +#: config/aarch64/aarch64.c:25229 config/aarch64/aarch64.c:25245 +#: config/arm/arm.c:33847 config/arm/arm.c:33863 msgid "operation not permitted on type %" msgstr "" -#: config/aarch64/aarch64.c:25252 +#: config/aarch64/aarch64.c:25253 msgid "cannot combine GNU and SVE vectors in a binary operation" msgstr "" -#: config/alpha/alpha.c:5076 config/i386/i386.c:13227 -#: config/rs6000/rs6000.c:14147 config/sparc/sparc.c:9323 +#: config/alpha/alpha.c:5076 config/i386/i386.c:13260 +#: config/rs6000/rs6000.c:14163 config/sparc/sparc.c:9260 #, c-format msgid "'%%&' used without any local dynamic TLS references" msgstr "" @@ -2009,18 +2013,18 @@ msgid "invalid %%r value" msgstr "" #: config/alpha/alpha.c:5174 config/ia64/ia64.c:5531 -#: config/rs6000/rs6000.c:13841 config/xtensa/xtensa.c:2459 +#: config/rs6000/rs6000.c:13857 config/xtensa/xtensa.c:2459 #, c-format msgid "invalid %%R value" msgstr "" -#: config/alpha/alpha.c:5180 config/rs6000/rs6000.c:13761 +#: config/alpha/alpha.c:5180 config/rs6000/rs6000.c:13777 #: config/xtensa/xtensa.c:2426 #, c-format msgid "invalid %%N value" msgstr "" -#: config/alpha/alpha.c:5188 config/rs6000/rs6000.c:13789 +#: config/alpha/alpha.c:5188 config/rs6000/rs6000.c:13805 #, c-format msgid "invalid %%P value" msgstr "" @@ -2050,7 +2054,7 @@ msgstr "" msgid "invalid %%U value" msgstr "" -#: config/alpha/alpha.c:5274 config/rs6000/rs6000.c:13849 +#: config/alpha/alpha.c:5274 config/rs6000/rs6000.c:13865 #, c-format msgid "invalid %%s value" msgstr "" @@ -2060,7 +2064,7 @@ msgstr "" msgid "invalid %%C value" msgstr "" -#: config/alpha/alpha.c:5322 config/rs6000/rs6000.c:13625 +#: config/alpha/alpha.c:5322 config/rs6000/rs6000.c:13641 #, c-format msgid "invalid %%E value" msgstr "" @@ -2070,10 +2074,10 @@ msgstr "" msgid "unknown relocation unspec" msgstr "" -#: config/alpha/alpha.c:5356 config/cr16/cr16.c:1570 config/gcn/gcn.c:5799 -#: config/gcn/gcn.c:5808 config/gcn/gcn.c:5868 config/gcn/gcn.c:5876 -#: config/gcn/gcn.c:5892 config/gcn/gcn.c:5910 config/gcn/gcn.c:5961 -#: config/gcn/gcn.c:6080 config/gcn/gcn.c:6191 config/rs6000/rs6000.c:14152 +#: config/alpha/alpha.c:5356 config/cr16/cr16.c:1572 config/gcn/gcn.c:5964 +#: config/gcn/gcn.c:5973 config/gcn/gcn.c:6033 config/gcn/gcn.c:6041 +#: config/gcn/gcn.c:6057 config/gcn/gcn.c:6075 config/gcn/gcn.c:6126 +#: config/gcn/gcn.c:6245 config/gcn/gcn.c:6356 config/rs6000/rs6000.c:14168 #, c-format msgid "invalid %%xn code" msgstr "" @@ -2083,107 +2087,107 @@ msgstr "" msgid "invalid operand address" msgstr "" -#: config/arc/arc.c:4519 +#: config/arc/arc.c:4521 #, c-format msgid "invalid operand to %%Z code" msgstr "" -#: config/arc/arc.c:4527 +#: config/arc/arc.c:4529 #, c-format msgid "invalid operand to %%z code" msgstr "" -#: config/arc/arc.c:4535 +#: config/arc/arc.c:4537 #, c-format msgid "invalid operands to %%c code" msgstr "" -#: config/arc/arc.c:4543 +#: config/arc/arc.c:4545 #, c-format msgid "invalid operand to %%M code" msgstr "" -#: config/arc/arc.c:4551 config/m32r/m32r.c:2085 +#: config/arc/arc.c:4553 config/m32r/m32r.c:2085 #, c-format msgid "invalid operand to %%p code" msgstr "" -#: config/arc/arc.c:4562 config/m32r/m32r.c:2078 +#: config/arc/arc.c:4564 config/m32r/m32r.c:2078 #, c-format msgid "invalid operand to %%s code" msgstr "" -#: config/arc/arc.c:4710 config/m32r/m32r.c:2111 +#: config/arc/arc.c:4712 config/m32r/m32r.c:2111 #, c-format msgid "invalid operand to %%R code" msgstr "" -#: config/arc/arc.c:4786 config/m32r/m32r.c:2134 +#: config/arc/arc.c:4788 config/m32r/m32r.c:2134 #, c-format msgid "invalid operand to %%H/%%L code" msgstr "" -#: config/arc/arc.c:4854 config/m32r/m32r.c:2205 +#: config/arc/arc.c:4856 config/m32r/m32r.c:2205 #, c-format msgid "invalid operand to %%U code" msgstr "" -#: config/arc/arc.c:4866 +#: config/arc/arc.c:4868 #, c-format msgid "invalid operand to %%V code" msgstr "" -#: config/arc/arc.c:4923 +#: config/arc/arc.c:4925 #, c-format msgid "invalid operand to %%O code" msgstr "" #. Unknown flag. #. Undocumented flag. -#: config/arc/arc.c:4949 config/epiphany/epiphany.c:1307 -#: config/m32r/m32r.c:2232 config/nds32/nds32.c:3517 config/sparc/sparc.c:9602 +#: config/arc/arc.c:4951 config/epiphany/epiphany.c:1307 +#: config/m32r/m32r.c:2232 config/nds32/nds32.c:3517 config/sparc/sparc.c:9539 #, c-format msgid "invalid operand output code" msgstr "" -#: config/arc/arc.c:6489 +#: config/arc/arc.c:6491 #, c-format msgid "invalid UNSPEC as operand: %d" msgstr "" -#: config/arc/arc.c:6705 +#: config/arc/arc.c:6707 msgid "unrecognized supposed constant" msgstr "" -#: config/arm/arm.c:20672 config/arm/arm.c:20697 config/arm/arm.c:20707 -#: config/arm/arm.c:20716 config/arm/arm.c:20725 +#: config/arm/arm.c:20707 config/arm/arm.c:20732 config/arm/arm.c:20742 +#: config/arm/arm.c:20751 config/arm/arm.c:20760 #, c-format msgid "invalid shift operand" msgstr "" -#: config/arm/arm.c:23581 config/arm/arm.c:23599 +#: config/arm/arm.c:23616 config/arm/arm.c:23634 #, c-format msgid "predicated Thumb instruction" msgstr "" -#: config/arm/arm.c:23587 +#: config/arm/arm.c:23622 #, c-format msgid "predicated instruction in conditional sequence" msgstr "" -#: config/arm/arm.c:23705 config/arm/arm.c:23718 config/arm/arm.c:23743 +#: config/arm/arm.c:23740 config/arm/arm.c:23753 config/arm/arm.c:23778 #: config/nios2/nios2.c:3080 #, c-format msgid "Unsupported operand for code '%c'" msgstr "" -#: config/arm/arm.c:23820 config/arm/arm.c:23842 config/arm/arm.c:23852 -#: config/arm/arm.c:23862 config/arm/arm.c:23872 config/arm/arm.c:23911 -#: config/arm/arm.c:23929 config/arm/arm.c:23954 config/arm/arm.c:23969 -#: config/arm/arm.c:23996 config/arm/arm.c:24003 config/arm/arm.c:24021 -#: config/arm/arm.c:24028 config/arm/arm.c:24036 config/arm/arm.c:24057 -#: config/arm/arm.c:24064 config/arm/arm.c:24255 config/arm/arm.c:24262 -#: config/arm/arm.c:24289 config/arm/arm.c:24296 config/bfin/bfin.c:1440 +#: config/arm/arm.c:23855 config/arm/arm.c:23877 config/arm/arm.c:23887 +#: config/arm/arm.c:23897 config/arm/arm.c:23907 config/arm/arm.c:23946 +#: config/arm/arm.c:23964 config/arm/arm.c:23989 config/arm/arm.c:24004 +#: config/arm/arm.c:24031 config/arm/arm.c:24038 config/arm/arm.c:24056 +#: config/arm/arm.c:24063 config/arm/arm.c:24071 config/arm/arm.c:24092 +#: config/arm/arm.c:24099 config/arm/arm.c:24290 config/arm/arm.c:24297 +#: config/arm/arm.c:24324 config/arm/arm.c:24331 config/bfin/bfin.c:1440 #: config/bfin/bfin.c:1447 config/bfin/bfin.c:1454 config/bfin/bfin.c:1461 #: config/bfin/bfin.c:1470 config/bfin/bfin.c:1477 config/bfin/bfin.c:1484 #: config/bfin/bfin.c:1491 config/nds32/nds32.c:3543 @@ -2191,101 +2195,101 @@ msgstr "" msgid "invalid operand for code '%c'" msgstr "" -#: config/arm/arm.c:23924 +#: config/arm/arm.c:23959 #, c-format msgid "instruction never executed" msgstr "" #. Former Maverick support, removed after GCC-4.7. -#: config/arm/arm.c:23945 +#: config/arm/arm.c:23980 #, c-format msgid "obsolete Maverick format code '%c'" msgstr "" -#: config/avr/avr.c:2632 +#: config/avr/avr.c:2642 #, c-format msgid "address operand requires constraint for X, Y, or Z register" msgstr "" -#: config/avr/avr.c:2813 +#: config/avr/avr.c:2825 msgid "operands to %T/%t must be reg + const_int:" msgstr "" -#: config/avr/avr.c:2863 config/avr/avr.c:2930 +#: config/avr/avr.c:2875 config/avr/avr.c:2942 msgid "bad address, not an I/O address:" msgstr "" -#: config/avr/avr.c:2872 +#: config/avr/avr.c:2884 msgid "bad address, not a constant:" msgstr "" -#: config/avr/avr.c:2890 config/avr/avr.c:2897 +#: config/avr/avr.c:2902 config/avr/avr.c:2909 msgid "bad address, not (reg+disp):" msgstr "" -#: config/avr/avr.c:2904 +#: config/avr/avr.c:2916 msgid "bad address, not post_inc or pre_dec:" msgstr "" -#: config/avr/avr.c:2916 +#: config/avr/avr.c:2928 msgid "internal compiler error. Bad address:" msgstr "" -#: config/avr/avr.c:2949 +#: config/avr/avr.c:2961 #, c-format msgid "Unsupported code '%c' for fixed-point:" msgstr "" -#: config/avr/avr.c:2957 +#: config/avr/avr.c:2969 msgid "internal compiler error. Unknown mode:" msgstr "" -#: config/avr/avr.c:3999 config/avr/avr.c:4943 config/avr/avr.c:5390 +#: config/avr/avr.c:3866 config/avr/avr.c:4810 config/avr/avr.c:5257 msgid "invalid insn:" msgstr "" -#: config/avr/avr.c:4053 config/avr/avr.c:4165 config/avr/avr.c:4223 -#: config/avr/avr.c:4275 config/avr/avr.c:4294 config/avr/avr.c:4486 -#: config/avr/avr.c:4794 config/avr/avr.c:5079 config/avr/avr.c:5283 -#: config/avr/avr.c:5447 config/avr/avr.c:5540 config/avr/avr.c:5739 +#: config/avr/avr.c:3920 config/avr/avr.c:4032 config/avr/avr.c:4090 +#: config/avr/avr.c:4142 config/avr/avr.c:4161 config/avr/avr.c:4353 +#: config/avr/avr.c:4661 config/avr/avr.c:4946 config/avr/avr.c:5150 +#: config/avr/avr.c:5314 config/avr/avr.c:5407 config/avr/avr.c:5606 msgid "incorrect insn:" msgstr "" -#: config/avr/avr.c:4310 config/avr/avr.c:4585 config/avr/avr.c:4865 -#: config/avr/avr.c:5151 config/avr/avr.c:5329 config/avr/avr.c:5596 -#: config/avr/avr.c:5797 +#: config/avr/avr.c:4177 config/avr/avr.c:4452 config/avr/avr.c:4732 +#: config/avr/avr.c:5018 config/avr/avr.c:5196 config/avr/avr.c:5463 +#: config/avr/avr.c:5664 msgid "unknown move insn:" msgstr "" -#: config/avr/avr.c:6256 +#: config/avr/avr.c:6131 msgid "bad shift insn:" msgstr "" -#: config/avr/avr.c:6364 config/avr/avr.c:6845 config/avr/avr.c:7260 +#: config/avr/avr.c:6239 config/avr/avr.c:6722 config/avr/avr.c:7139 msgid "internal compiler error. Incorrect shift:" msgstr "" -#: config/avr/avr.c:8666 +#: config/avr/avr.c:8547 msgid "unsupported fixed-point conversion" msgstr "" -#: config/avr/avr.c:10023 +#: config/avr/avr.c:9916 msgid "variable" msgstr "" -#: config/avr/avr.c:10028 +#: config/avr/avr.c:9921 msgid "function parameter" msgstr "" -#: config/avr/avr.c:10033 +#: config/avr/avr.c:9926 msgid "structure field" msgstr "" -#: config/avr/avr.c:10039 +#: config/avr/avr.c:9932 msgid "return type of function" msgstr "" -#: config/avr/avr.c:10044 +#: config/avr/avr.c:9937 msgid "pointer" msgstr "" @@ -2306,18 +2310,18 @@ msgstr "" msgid "invalid const_double operand" msgstr "" -#: config/bpf/bpf.c:776 +#: config/bpf/bpf.c:772 msgid "invalid address in operand" msgstr "" #. Fallthrough. -#: config/bpf/bpf.c:783 +#: config/bpf/bpf.c:779 msgid "unsupported operand" msgstr "" #: config/cris/cris.c:775 config/ft32/ft32.c:110 config/moxie/moxie.c:108 -#: final.c:3625 final.c:3627 fold-const.c:267 gcc.c:6030 gcc.c:6044 -#: rtl-error.c:101 toplev.c:327 vr-values.c:2370 cp/typeck.c:6939 +#: final.c:3233 final.c:3235 fold-const.c:269 gcc.c:6134 gcc.c:6148 +#: rtl-error.c:101 toplev.c:328 vr-values.c:2383 cp/typeck.c:6984 #: d/dmd/dsymbolsem.c:1451 d/dmd/semantic2.c:75 lto/lto-object.c:184 #: lto/lto-object.c:281 lto/lto-object.c:338 lto/lto-object.c:362 #, gcc-internal-format, gfc-internal-format @@ -2446,206 +2450,206 @@ msgstr "" msgid "fr30_print_operand: unhandled MEM" msgstr "" -#: config/frv/frv.c:2501 +#: config/frv/frv.c:2489 msgid "bad insn to frv_print_operand_address:" msgstr "" -#: config/frv/frv.c:2512 +#: config/frv/frv.c:2500 msgid "bad register to frv_print_operand_memory_reference_reg:" msgstr "" -#: config/frv/frv.c:2551 config/frv/frv.c:2561 config/frv/frv.c:2570 -#: config/frv/frv.c:2591 config/frv/frv.c:2596 +#: config/frv/frv.c:2539 config/frv/frv.c:2549 config/frv/frv.c:2558 +#: config/frv/frv.c:2579 config/frv/frv.c:2584 msgid "bad insn to frv_print_operand_memory_reference:" msgstr "" -#: config/frv/frv.c:2681 +#: config/frv/frv.c:2669 #, c-format msgid "bad condition code" msgstr "" -#: config/frv/frv.c:2755 +#: config/frv/frv.c:2743 msgid "bad insn in frv_print_operand, bad const_double" msgstr "" -#: config/frv/frv.c:2816 +#: config/frv/frv.c:2804 msgid "bad insn to frv_print_operand, 'e' modifier:" msgstr "" -#: config/frv/frv.c:2824 +#: config/frv/frv.c:2812 msgid "bad insn to frv_print_operand, 'F' modifier:" msgstr "" -#: config/frv/frv.c:2840 +#: config/frv/frv.c:2828 msgid "bad insn to frv_print_operand, 'f' modifier:" msgstr "" -#: config/frv/frv.c:2854 +#: config/frv/frv.c:2842 msgid "bad insn to frv_print_operand, 'g' modifier:" msgstr "" -#: config/frv/frv.c:2902 +#: config/frv/frv.c:2890 msgid "bad insn to frv_print_operand, 'L' modifier:" msgstr "" -#: config/frv/frv.c:2915 +#: config/frv/frv.c:2903 msgid "bad insn to frv_print_operand, 'M/N' modifier:" msgstr "" -#: config/frv/frv.c:2936 +#: config/frv/frv.c:2924 msgid "bad insn to frv_print_operand, 'O' modifier:" msgstr "" -#: config/frv/frv.c:2954 +#: config/frv/frv.c:2942 msgid "bad insn to frv_print_operand, P modifier:" msgstr "" -#: config/frv/frv.c:2974 +#: config/frv/frv.c:2962 msgid "bad insn in frv_print_operand, z case" msgstr "" -#: config/frv/frv.c:3005 +#: config/frv/frv.c:2993 msgid "bad insn in frv_print_operand, 0 case" msgstr "" -#: config/frv/frv.c:3010 +#: config/frv/frv.c:2998 msgid "frv_print_operand: unknown code" msgstr "" -#: config/frv/frv.c:4384 +#: config/frv/frv.c:4372 msgid "bad output_move_single operand" msgstr "" -#: config/frv/frv.c:4511 +#: config/frv/frv.c:4499 msgid "bad output_move_double operand" msgstr "" -#: config/frv/frv.c:4653 +#: config/frv/frv.c:4641 msgid "bad output_condmove_single operand" msgstr "" -#: config/gcn/gcn.c:5469 config/gcn/gcn.c:5493 config/gcn/gcn.c:5497 -#: config/gcn/gcn.c:5841 config/gcn/gcn.c:5852 config/gcn/gcn.c:5855 +#: config/gcn/gcn.c:5625 config/gcn/gcn.c:5658 config/gcn/gcn.c:5662 +#: config/gcn/gcn.c:6006 config/gcn/gcn.c:6017 config/gcn/gcn.c:6020 #, c-format msgid "bad ADDR_SPACE_GLOBAL address" msgstr "" -#: config/gcn/gcn.c:5609 config/gcn/gcn.c:5632 config/gcn/gcn.c:5664 -#: config/gcn/gcn.c:5680 config/gcn/gcn.c:5695 config/gcn/gcn.c:5714 -#: config/gcn/gcn.c:5790 config/gcn/gcn.c:5986 config/gcn/gcn.c:6101 +#: config/gcn/gcn.c:5774 config/gcn/gcn.c:5797 config/gcn/gcn.c:5829 +#: config/gcn/gcn.c:5845 config/gcn/gcn.c:5860 config/gcn/gcn.c:5879 +#: config/gcn/gcn.c:5955 config/gcn/gcn.c:6151 config/gcn/gcn.c:6266 #, c-format msgid "invalid operand %%xn code" msgstr "" -#: config/gcn/gcn.c:6089 +#: config/gcn/gcn.c:6254 #, c-format msgid "operand %%xn code invalid for QImode" msgstr "" -#: config/gcn/gcn.c:6171 +#: config/gcn/gcn.c:6336 #, c-format msgid "invalid fp constant" msgstr "" -#: config/h8300/h8300.c:1565 config/h8300/h8300.c:1573 -#: config/h8300/h8300.c:1581 config/h8300/h8300.c:1589 -#: config/h8300/h8300.c:1597 config/h8300/h8300.c:1605 +#: config/h8300/h8300.c:1564 config/h8300/h8300.c:1572 +#: config/h8300/h8300.c:1580 config/h8300/h8300.c:1588 +#: config/h8300/h8300.c:1596 config/h8300/h8300.c:1604 #, c-format msgid "Expected register or constant integer." msgstr "" -#: config/i386/i386.c:12052 +#: config/i386/i386.c:12085 #, c-format msgid "invalid UNSPEC as operand" msgstr "" -#: config/i386/i386.c:12591 +#: config/i386/i386.c:12624 #, c-format msgid "invalid use of register '%s'" msgstr "" -#: config/i386/i386.c:12596 +#: config/i386/i386.c:12629 #, c-format msgid "invalid use of asm flag output" msgstr "" -#: config/i386/i386.c:12829 +#: config/i386/i386.c:12862 #, c-format msgid "invalid operand size for operand code 'O'" msgstr "" -#: config/i386/i386.c:12864 +#: config/i386/i386.c:12897 #, c-format msgid "invalid operand size for operand code 'z'" msgstr "" -#: config/i386/i386.c:12933 +#: config/i386/i386.c:12966 #, c-format msgid "invalid operand type used with operand code 'Z'" msgstr "" -#: config/i386/i386.c:12938 +#: config/i386/i386.c:12971 #, c-format msgid "invalid operand size for operand code 'Z'" msgstr "" -#: config/i386/i386.c:13015 +#: config/i386/i386.c:13048 #, c-format msgid "operand is not a condition code, invalid operand code 'Y'" msgstr "" -#: config/i386/i386.c:13094 +#: config/i386/i386.c:13127 #, c-format msgid "operand is not a condition code, invalid operand code 'D'" msgstr "" -#: config/i386/i386.c:13112 +#: config/i386/i386.c:13145 #, c-format msgid "operand is not a condition code, invalid operand code '%c'" msgstr "" -#: config/i386/i386.c:13125 +#: config/i386/i386.c:13158 #, c-format msgid "" "operand is not an offsettable memory reference, invalid operand code 'H'" msgstr "" -#: config/i386/i386.c:13140 +#: config/i386/i386.c:13173 #, c-format msgid "operand is not an integer, invalid operand code 'K'" msgstr "" -#: config/i386/i386.c:13168 +#: config/i386/i386.c:13201 #, c-format msgid "operand is not a specific integer, invalid operand code 'r'" msgstr "" -#: config/i386/i386.c:13186 +#: config/i386/i386.c:13219 #, c-format msgid "operand is not an integer, invalid operand code 'R'" msgstr "" -#: config/i386/i386.c:13209 +#: config/i386/i386.c:13242 #, c-format msgid "operand is not a specific integer, invalid operand code 'R'" msgstr "" -#: config/i386/i386.c:13313 +#: config/i386/i386.c:13346 #, c-format msgid "invalid operand code '%c'" msgstr "" -#: config/i386/i386.c:13375 +#: config/i386/i386.c:13408 #, c-format msgid "invalid constraints for operand" msgstr "" -#: config/i386/i386.c:13462 +#: config/i386/i386.c:13495 #, c-format msgid "invalid vector immediate" msgstr "" -#: config/i386/i386.c:16384 +#: config/i386/i386.c:16427 msgid "unknown insn mode" msgstr "" @@ -2682,7 +2686,7 @@ msgstr "" msgid "invalid %%P operand" msgstr "" -#: config/iq2000/iq2000.c:3134 config/rs6000/rs6000.c:13779 +#: config/iq2000/iq2000.c:3134 config/rs6000/rs6000.c:13795 #, c-format msgid "invalid %%p value" msgstr "" @@ -2735,7 +2739,7 @@ msgstr "" msgid "post-increment address is not a register" msgstr "" -#: config/m32r/m32r.c:2335 config/m32r/m32r.c:2350 config/rs6000/rs6000.c:20645 +#: config/m32r/m32r.c:2335 config/m32r/m32r.c:2350 config/rs6000/rs6000.c:20323 msgid "bad address" msgstr "" @@ -2777,18 +2781,18 @@ msgstr "" msgid "letter %c was found & insn was not CONST_INT" msgstr "" -#: config/mips/mips.c:8934 config/mips/mips.c:8961 config/mips/mips.c:9144 +#: config/mips/mips.c:8935 config/mips/mips.c:8962 config/mips/mips.c:9145 #, c-format msgid "'%%%c' is not a valid operand prefix" msgstr "" -#: config/mips/mips.c:9033 config/mips/mips.c:9040 config/mips/mips.c:9047 -#: config/mips/mips.c:9054 config/mips/mips.c:9067 config/mips/mips.c:9074 -#: config/mips/mips.c:9084 config/mips/mips.c:9087 config/mips/mips.c:9099 -#: config/mips/mips.c:9102 config/mips/mips.c:9162 config/mips/mips.c:9169 -#: config/mips/mips.c:9190 config/mips/mips.c:9205 config/mips/mips.c:9224 -#: config/mips/mips.c:9233 config/riscv/riscv.c:3335 config/riscv/riscv.c:3450 -#: config/riscv/riscv.c:3456 config/riscv/riscv.c:3465 +#: config/mips/mips.c:9034 config/mips/mips.c:9041 config/mips/mips.c:9048 +#: config/mips/mips.c:9055 config/mips/mips.c:9068 config/mips/mips.c:9075 +#: config/mips/mips.c:9085 config/mips/mips.c:9088 config/mips/mips.c:9100 +#: config/mips/mips.c:9103 config/mips/mips.c:9163 config/mips/mips.c:9170 +#: config/mips/mips.c:9191 config/mips/mips.c:9206 config/mips/mips.c:9225 +#: config/mips/mips.c:9234 config/riscv/riscv.c:3354 config/riscv/riscv.c:3469 +#: config/riscv/riscv.c:3475 config/riscv/riscv.c:3484 #, c-format msgid "invalid use of '%%%c'" msgstr "" @@ -2834,19 +2838,19 @@ msgstr "" msgid "MMIX Internal: This is not a constant:" msgstr "" -#: config/msp430/msp430.c:4230 +#: config/msp430/msp430.c:4229 #, c-format msgid "" "%%d, %%e, %%f, %%g operand modifiers are for memory references or constant " "values only" msgstr "" -#: config/msp430/msp430.c:4343 +#: config/msp430/msp430.c:4342 #, c-format msgid "invalid operand prefix" msgstr "" -#: config/msp430/msp430.c:4377 +#: config/msp430/msp430.c:4376 #, c-format msgid "invalid zero extract" msgstr "" @@ -2926,286 +2930,286 @@ msgstr "" msgid "Try running '%s' in the shell to raise its limit.\n" msgstr "" -#: config/rs6000/rs6000.c:3855 +#: config/rs6000/rs6000.c:3860 msgid "%<-mvsx%> requires hardware floating point" msgstr "" -#: config/rs6000/rs6000.c:3863 +#: config/rs6000/rs6000.c:3868 msgid "%<-mvsx%> needs indexed addressing" msgstr "" -#: config/rs6000/rs6000.c:3868 +#: config/rs6000/rs6000.c:3873 msgid "%<-mvsx%> and %<-mno-altivec%> are incompatible" msgstr "" -#: config/rs6000/rs6000.c:3870 +#: config/rs6000/rs6000.c:3875 msgid "%<-mno-altivec%> disables vsx" msgstr "" -#: config/rs6000/rs6000.c:4010 +#: config/rs6000/rs6000.c:4015 msgid "%<-mquad-memory%> requires 64-bit mode" msgstr "" -#: config/rs6000/rs6000.c:4013 +#: config/rs6000/rs6000.c:4018 msgid "%<-mquad-memory-atomic%> requires 64-bit mode" msgstr "" -#: config/rs6000/rs6000.c:4025 +#: config/rs6000/rs6000.c:4030 msgid "%<-mquad-memory%> is not available in little endian mode" msgstr "" -#: config/rs6000/rs6000.c:10845 +#: config/rs6000/rs6000.c:10861 msgid "bad move" msgstr "" -#: config/rs6000/rs6000.c:13409 +#: config/rs6000/rs6000.c:13425 msgid "Bad 128-bit move" msgstr "" -#: config/rs6000/rs6000.c:13589 +#: config/rs6000/rs6000.c:13605 #, c-format msgid "invalid %%A value" msgstr "" -#: config/rs6000/rs6000.c:13598 config/xtensa/xtensa.c:2402 +#: config/rs6000/rs6000.c:13614 config/xtensa/xtensa.c:2402 #, c-format msgid "invalid %%D value" msgstr "" -#: config/rs6000/rs6000.c:13613 +#: config/rs6000/rs6000.c:13629 #, c-format msgid "invalid %%e value" msgstr "" -#: config/rs6000/rs6000.c:13634 +#: config/rs6000/rs6000.c:13650 #, c-format msgid "invalid %%f value" msgstr "" -#: config/rs6000/rs6000.c:13643 +#: config/rs6000/rs6000.c:13659 #, c-format msgid "invalid %%F value" msgstr "" -#: config/rs6000/rs6000.c:13652 +#: config/rs6000/rs6000.c:13668 #, c-format msgid "invalid %%G value" msgstr "" -#: config/rs6000/rs6000.c:13687 +#: config/rs6000/rs6000.c:13703 #, c-format msgid "invalid %%j code" msgstr "" -#: config/rs6000/rs6000.c:13697 +#: config/rs6000/rs6000.c:13713 #, c-format msgid "invalid %%J code" msgstr "" -#: config/rs6000/rs6000.c:13707 +#: config/rs6000/rs6000.c:13723 #, c-format msgid "invalid %%k value" msgstr "" -#: config/rs6000/rs6000.c:13722 config/xtensa/xtensa.c:2445 +#: config/rs6000/rs6000.c:13738 config/xtensa/xtensa.c:2445 #, c-format msgid "invalid %%K value" msgstr "" -#: config/rs6000/rs6000.c:13769 +#: config/rs6000/rs6000.c:13785 #, c-format msgid "invalid %%O value" msgstr "" -#: config/rs6000/rs6000.c:13816 +#: config/rs6000/rs6000.c:13832 #, c-format msgid "invalid %%q value" msgstr "" -#: config/rs6000/rs6000.c:13858 +#: config/rs6000/rs6000.c:13874 #, c-format msgid "invalid %%t value" msgstr "" -#: config/rs6000/rs6000.c:13875 +#: config/rs6000/rs6000.c:13891 #, c-format msgid "invalid %%T value" msgstr "" -#: config/rs6000/rs6000.c:13887 +#: config/rs6000/rs6000.c:13903 #, c-format msgid "invalid %%u value" msgstr "" -#: config/rs6000/rs6000.c:13901 config/xtensa/xtensa.c:2414 +#: config/rs6000/rs6000.c:13917 config/xtensa/xtensa.c:2414 #, c-format msgid "invalid %%v value" msgstr "" -#: config/rs6000/rs6000.c:13951 +#: config/rs6000/rs6000.c:13967 #, c-format msgid "invalid %%V value" msgstr "" -#: config/rs6000/rs6000.c:13968 config/xtensa/xtensa.c:2466 +#: config/rs6000/rs6000.c:13984 config/xtensa/xtensa.c:2466 #, c-format msgid "invalid %%x value" msgstr "" -#: config/rs6000/rs6000.c:14025 +#: config/rs6000/rs6000.c:14041 #, c-format msgid "invalid %%z value" msgstr "" -#: config/rs6000/rs6000.c:14094 +#: config/rs6000/rs6000.c:14110 #, c-format msgid "invalid %%y value, try using the 'Z' constraint" msgstr "" -#: config/rs6000/rs6000.c:14962 +#: config/rs6000/rs6000.c:14978 msgid "Invalid mixing of IEEE 128-bit and IBM 128-bit floating point types" msgstr "" -#: config/rs6000/rs6000.c:23920 +#: config/rs6000/rs6000.c:23687 msgid "AltiVec argument passed to unprototyped function" msgstr "" -#: config/rs6000/rs6000.c:27071 +#: config/rs6000/rs6000.c:27267 msgid "Could not generate addis value for fusion" msgstr "" -#: config/rs6000/rs6000.c:27140 +#: config/rs6000/rs6000.c:27336 msgid "Unable to generate load/store offset for fusion" msgstr "" -#: config/rs6000/rs6000.c:27216 +#: config/rs6000/rs6000.c:27412 msgid "Bad GPR fusion" msgstr "" -#: config/rs6000/rs6000.c:27776 +#: config/rs6000/rs6000.c:27972 msgid "invalid conversion from type %<__vector_quad%>" msgstr "" -#: config/rs6000/rs6000.c:27778 +#: config/rs6000/rs6000.c:27974 msgid "invalid conversion to type %<__vector_quad%>" msgstr "" -#: config/rs6000/rs6000.c:27780 +#: config/rs6000/rs6000.c:27976 msgid "invalid conversion from type %<__vector_pair%>" msgstr "" -#: config/rs6000/rs6000.c:27782 +#: config/rs6000/rs6000.c:27978 msgid "invalid conversion to type %<__vector_pair%>" msgstr "" -#: config/rs6000/rs6000.c:27797 +#: config/rs6000/rs6000.c:27993 msgid "invalid conversion from type %<* __vector_quad%>" msgstr "" -#: config/rs6000/rs6000.c:27799 +#: config/rs6000/rs6000.c:27995 msgid "invalid conversion to type %<* __vector_quad%>" msgstr "" -#: config/rs6000/rs6000.c:27801 +#: config/rs6000/rs6000.c:27997 msgid "invalid conversion from type %<* __vector_pair%>" msgstr "" -#: config/rs6000/rs6000.c:27803 +#: config/rs6000/rs6000.c:27999 msgid "invalid conversion to type %<* __vector_pair%>" msgstr "" -#: config/s390/s390.c:7907 +#: config/s390/s390.c:7913 #, c-format msgid "symbolic memory references are only supported on z10 or later" msgstr "" -#: config/s390/s390.c:7918 +#: config/s390/s390.c:7924 #, c-format msgid "cannot decompose address" msgstr "" -#: config/s390/s390.c:7999 +#: config/s390/s390.c:8006 #, c-format msgid "invalid comparison operator for 'E' output modifier" msgstr "" -#: config/s390/s390.c:8022 +#: config/s390/s390.c:8029 #, c-format msgid "invalid reference for 'J' output modifier" msgstr "" -#: config/s390/s390.c:8040 +#: config/s390/s390.c:8047 #, c-format msgid "invalid address for 'O' output modifier" msgstr "" -#: config/s390/s390.c:8062 +#: config/s390/s390.c:8069 #, c-format msgid "invalid address for 'R' output modifier" msgstr "" -#: config/s390/s390.c:8080 +#: config/s390/s390.c:8087 #, c-format msgid "memory reference expected for 'S' output modifier" msgstr "" -#: config/s390/s390.c:8090 +#: config/s390/s390.c:8097 #, c-format msgid "invalid address for 'S' output modifier" msgstr "" -#: config/s390/s390.c:8111 +#: config/s390/s390.c:8118 #, c-format msgid "register or memory expression expected for 'N' output modifier" msgstr "" -#: config/s390/s390.c:8122 +#: config/s390/s390.c:8129 #, c-format msgid "register or memory expression expected for 'M' output modifier" msgstr "" -#: config/s390/s390.c:8208 config/s390/s390.c:8229 +#: config/s390/s390.c:8238 config/s390/s390.c:8259 #, c-format msgid "invalid constant for output modifier '%c'" msgstr "" -#: config/s390/s390.c:8226 +#: config/s390/s390.c:8256 #, c-format msgid "invalid constant - try using an output modifier" msgstr "" -#: config/s390/s390.c:8263 +#: config/s390/s390.c:8293 #, c-format msgid "invalid constant vector for output modifier '%c'" msgstr "" -#: config/s390/s390.c:8270 +#: config/s390/s390.c:8300 #, c-format msgid "invalid expression - try using an output modifier" msgstr "" -#: config/s390/s390.c:8273 +#: config/s390/s390.c:8303 #, c-format msgid "invalid expression for output modifier '%c'" msgstr "" -#: config/s390/s390.c:12052 +#: config/s390/s390.c:12082 msgid "vector argument passed to unprototyped function" msgstr "" -#: config/s390/s390.c:16426 +#: config/s390/s390.c:16452 msgid "types differ in signedness" msgstr "" -#: config/s390/s390.c:16436 +#: config/s390/s390.c:16462 msgid "binary operator does not support two vector bool operands" msgstr "" -#: config/s390/s390.c:16439 +#: config/s390/s390.c:16465 msgid "binary operator does not support vector bool operand" msgstr "" -#: config/s390/s390.c:16447 +#: config/s390/s390.c:16473 msgid "" "binary operator does not support mixing vector bool with floating point " "vector operands" @@ -3233,43 +3237,43 @@ msgstr "" msgid "created and used with different endianness" msgstr "" -#: config/sparc/sparc.c:9332 config/sparc/sparc.c:9338 +#: config/sparc/sparc.c:9269 config/sparc/sparc.c:9275 #, c-format msgid "invalid %%Y operand" msgstr "" -#: config/sparc/sparc.c:9425 +#: config/sparc/sparc.c:9362 #, c-format msgid "invalid %%A operand" msgstr "" -#: config/sparc/sparc.c:9445 +#: config/sparc/sparc.c:9382 #, c-format msgid "invalid %%B operand" msgstr "" -#: config/sparc/sparc.c:9525 config/tilegx/tilegx.c:5088 +#: config/sparc/sparc.c:9462 config/tilegx/tilegx.c:5088 #: config/tilepro/tilepro.c:4497 #, c-format msgid "invalid %%C operand" msgstr "" -#: config/sparc/sparc.c:9557 config/tilegx/tilegx.c:5121 +#: config/sparc/sparc.c:9494 config/tilegx/tilegx.c:5121 #, c-format msgid "invalid %%D operand" msgstr "" -#: config/sparc/sparc.c:9576 +#: config/sparc/sparc.c:9513 #, c-format msgid "invalid %%f operand" msgstr "" -#: config/sparc/sparc.c:9588 +#: config/sparc/sparc.c:9525 #, c-format msgid "invalid %%s operand" msgstr "" -#: config/sparc/sparc.c:9633 +#: config/sparc/sparc.c:9570 #, c-format msgid "floating-point constant not a valid immediate operand" msgstr "" @@ -3382,22 +3386,22 @@ msgstr "" msgid "output_move_single:" msgstr "" -#: config/vax/vax.c:479 +#: config/vax/vax.c:483 #, c-format msgid "symbol used with both base and indexed registers" msgstr "" -#: config/vax/vax.c:488 +#: config/vax/vax.c:492 #, c-format msgid "symbol with offset used in PIC mode" msgstr "" -#: config/vax/vax.c:576 +#: config/vax/vax.c:580 #, c-format msgid "symbol used as immediate operand" msgstr "" -#: config/vax/vax.c:1668 +#: config/vax/vax.c:1674 msgid "illegal operand detected" msgstr "" @@ -3443,11 +3447,15 @@ msgstr "" msgid "address offset not a constant" msgstr "" -#: c/c-objc-common.c:225 +#: c/c-objc-common.c:190 +msgid "{erroneous}" +msgstr "" + +#: c/c-objc-common.c:231 msgid "aka" msgstr "" -#: c/c-objc-common.c:332 +#: c/c-objc-common.c:323 msgid "({anonymous})" msgstr "" @@ -3462,43 +3470,43 @@ msgstr "" #. Use c_parser_require to get an error with a fix-it hint. #: c/c-parser.c:2462 c/c-parser.c:2581 c/c-parser.c:2595 c/c-parser.c:5652 #: c/c-parser.c:6251 c/c-parser.c:6680 c/c-parser.c:6859 c/c-parser.c:6892 -#: c/c-parser.c:7158 c/c-parser.c:10981 c/c-parser.c:11016 c/c-parser.c:11047 -#: c/c-parser.c:11094 c/c-parser.c:11275 c/c-parser.c:12107 c/c-parser.c:12182 -#: c/c-parser.c:12225 c/c-parser.c:17859 c/c-parser.c:17883 c/c-parser.c:17901 -#: c/c-parser.c:18324 c/c-parser.c:18368 c/gimple-parser.c:392 +#: c/c-parser.c:7158 c/c-parser.c:11022 c/c-parser.c:11057 c/c-parser.c:11088 +#: c/c-parser.c:11135 c/c-parser.c:11316 c/c-parser.c:12148 c/c-parser.c:12223 +#: c/c-parser.c:12266 c/c-parser.c:17978 c/c-parser.c:18002 c/c-parser.c:18020 +#: c/c-parser.c:18443 c/c-parser.c:18487 c/gimple-parser.c:392 #: c/gimple-parser.c:433 c/gimple-parser.c:442 c/gimple-parser.c:651 -#: c/gimple-parser.c:2193 c/gimple-parser.c:2230 c/gimple-parser.c:2309 -#: c/gimple-parser.c:2336 c/c-parser.c:3274 c/c-parser.c:3461 c/c-parser.c:3494 -#: c/c-parser.c:11268 c/gimple-parser.c:2027 c/gimple-parser.c:2066 -#: cp/parser.c:14404 cp/parser.c:31070 cp/parser.c:31680 +#: c/gimple-parser.c:2213 c/gimple-parser.c:2250 c/gimple-parser.c:2329 +#: c/gimple-parser.c:2356 c/c-parser.c:3274 c/c-parser.c:3461 c/c-parser.c:3494 +#: c/c-parser.c:11309 c/gimple-parser.c:2039 c/gimple-parser.c:2078 +#: cp/parser.c:14789 cp/parser.c:31615 cp/parser.c:32225 #, gcc-internal-format msgid "expected %<;%>" msgstr "" #: c/c-parser.c:3044 c/c-parser.c:4014 c/c-parser.c:4209 c/c-parser.c:4274 #: c/c-parser.c:4332 c/c-parser.c:4694 c/c-parser.c:4715 c/c-parser.c:4724 -#: c/c-parser.c:4775 c/c-parser.c:4784 c/c-parser.c:8502 c/c-parser.c:8568 -#: c/c-parser.c:9074 c/c-parser.c:9099 c/c-parser.c:9133 c/c-parser.c:9242 -#: c/c-parser.c:10025 c/c-parser.c:11382 c/c-parser.c:13638 c/c-parser.c:14280 -#: c/c-parser.c:14339 c/c-parser.c:14394 c/c-parser.c:15716 c/c-parser.c:15814 -#: c/c-parser.c:17113 c/c-parser.c:17943 c/c-parser.c:18332 c/c-parser.c:21145 -#: c/c-parser.c:21223 c/gimple-parser.c:195 c/gimple-parser.c:198 +#: c/c-parser.c:4775 c/c-parser.c:4784 c/c-parser.c:8504 c/c-parser.c:8571 +#: c/c-parser.c:9077 c/c-parser.c:9102 c/c-parser.c:9136 c/c-parser.c:9245 +#: c/c-parser.c:10066 c/c-parser.c:11423 c/c-parser.c:13684 c/c-parser.c:14326 +#: c/c-parser.c:14385 c/c-parser.c:14440 c/c-parser.c:15852 c/c-parser.c:15921 +#: c/c-parser.c:17230 c/c-parser.c:18062 c/c-parser.c:18451 c/c-parser.c:21290 +#: c/c-parser.c:21368 c/gimple-parser.c:195 c/gimple-parser.c:198 #: c/gimple-parser.c:527 c/gimple-parser.c:561 c/gimple-parser.c:566 -#: c/gimple-parser.c:735 c/gimple-parser.c:832 c/gimple-parser.c:1025 -#: c/gimple-parser.c:1051 c/gimple-parser.c:1054 c/gimple-parser.c:1185 -#: c/gimple-parser.c:1312 c/gimple-parser.c:1438 c/gimple-parser.c:1454 -#: c/gimple-parser.c:1470 c/gimple-parser.c:1492 c/gimple-parser.c:1522 -#: c/gimple-parser.c:1548 c/gimple-parser.c:1756 c/gimple-parser.c:1949 -#: c/gimple-parser.c:1969 c/gimple-parser.c:2103 c/gimple-parser.c:2266 -#: c/c-parser.c:7110 cp/parser.c:31728 +#: c/gimple-parser.c:735 c/gimple-parser.c:832 c/gimple-parser.c:1030 +#: c/gimple-parser.c:1056 c/gimple-parser.c:1059 c/gimple-parser.c:1190 +#: c/gimple-parser.c:1317 c/gimple-parser.c:1443 c/gimple-parser.c:1459 +#: c/gimple-parser.c:1475 c/gimple-parser.c:1497 c/gimple-parser.c:1527 +#: c/gimple-parser.c:1553 c/gimple-parser.c:1761 c/gimple-parser.c:1961 +#: c/gimple-parser.c:1981 c/gimple-parser.c:2123 c/gimple-parser.c:2286 +#: c/c-parser.c:7110 cp/parser.c:32273 #, gcc-internal-format msgid "expected %<)%>" msgstr "" #: c/c-parser.c:4103 c/c-parser.c:4835 c/c-parser.c:4976 c/c-parser.c:5002 #: c/c-parser.c:5003 c/c-parser.c:5417 c/c-parser.c:5453 c/c-parser.c:7209 -#: c/c-parser.c:9233 c/c-parser.c:10123 c/c-parser.c:10412 c/c-parser.c:13085 -#: c/gimple-parser.c:1733 cp/parser.c:31692 +#: c/c-parser.c:9236 c/c-parser.c:10164 c/c-parser.c:10453 c/c-parser.c:13131 +#: c/gimple-parser.c:1738 cp/parser.c:32237 #, gcc-internal-format msgid "expected %<]%>" msgstr "" @@ -3508,384 +3516,385 @@ msgid "expected %<;%>, %<,%> or %<)%>" msgstr "" #. Look for the two `(' tokens. -#: c/c-parser.c:4744 c/c-parser.c:4749 c/c-parser.c:13621 c/c-parser.c:14369 -#: c/c-parser.c:20506 c/c-parser.c:20953 c/c-parser.c:21166 +#: c/c-parser.c:4744 c/c-parser.c:4749 c/c-parser.c:13667 c/c-parser.c:14415 +#: c/c-parser.c:20651 c/c-parser.c:21098 c/c-parser.c:21311 #: c/gimple-parser.c:180 c/gimple-parser.c:474 c/gimple-parser.c:513 -#: c/gimple-parser.c:545 c/gimple-parser.c:802 c/gimple-parser.c:1019 -#: c/gimple-parser.c:1045 c/gimple-parser.c:1172 c/gimple-parser.c:1307 -#: c/gimple-parser.c:1428 c/gimple-parser.c:1488 c/gimple-parser.c:1506 -#: c/gimple-parser.c:1541 c/gimple-parser.c:1918 c/gimple-parser.c:1929 -#: c/gimple-parser.c:1935 c/gimple-parser.c:2100 c/gimple-parser.c:2263 -#: c/c-parser.c:13443 cp/parser.c:31683 +#: c/gimple-parser.c:545 c/gimple-parser.c:802 c/gimple-parser.c:1024 +#: c/gimple-parser.c:1050 c/gimple-parser.c:1177 c/gimple-parser.c:1312 +#: c/gimple-parser.c:1433 c/gimple-parser.c:1493 c/gimple-parser.c:1511 +#: c/gimple-parser.c:1546 c/gimple-parser.c:1930 c/gimple-parser.c:1941 +#: c/gimple-parser.c:1947 c/gimple-parser.c:2112 c/gimple-parser.c:2283 +#: c/c-parser.c:13489 cp/parser.c:32228 #, gcc-internal-format msgid "expected %<(%>" msgstr "" -#: c/c-parser.c:4972 c/c-parser.c:4974 c/c-parser.c:13013 cp/parser.c:31695 -#: cp/parser.c:35379 +#: c/c-parser.c:4972 c/c-parser.c:4974 c/c-parser.c:13058 cp/parser.c:32240 +#: cp/parser.c:35924 #, gcc-internal-format msgid "expected %<[%>" msgstr "" -#: c/c-parser.c:5588 c/c-parser.c:11611 c/c-parser.c:18127 c/c-parser.c:18941 -#: c/c-parser.c:22005 c/gimple-parser.c:385 c/gimple-parser.c:2269 -#: c/c-parser.c:3262 c/c-parser.c:3484 c/c-parser.c:11163 cp/parser.c:19978 -#: cp/parser.c:31689 +#: c/c-parser.c:5588 c/c-parser.c:11652 c/c-parser.c:18246 c/c-parser.c:19063 +#: c/c-parser.c:22150 c/gimple-parser.c:385 c/gimple-parser.c:2289 +#: c/c-parser.c:3262 c/c-parser.c:3484 c/c-parser.c:11204 cp/parser.c:20384 +#: cp/parser.c:32234 #, gcc-internal-format msgid "expected %<{%>" msgstr "" -#: c/c-parser.c:5878 c/c-parser.c:5887 c/c-parser.c:7642 c/c-parser.c:8705 -#: c/c-parser.c:11375 c/c-parser.c:11771 c/c-parser.c:11835 c/c-parser.c:13067 -#: c/c-parser.c:13981 c/c-parser.c:14197 c/c-parser.c:14687 c/c-parser.c:14788 -#: c/c-parser.c:15155 c/c-parser.c:15468 c/c-parser.c:15595 c/c-parser.c:20368 -#: c/c-parser.c:21010 c/c-parser.c:21069 c/gimple-parser.c:568 -#: c/gimple-parser.c:872 c/gimple-parser.c:2317 c/gimple-parser.c:2344 -#: c/c-parser.c:7117 c/c-parser.c:13546 c/c-parser.c:14793 cp/parser.c:31722 -#: cp/parser.c:33370 cp/parser.c:36178 cp/parser.c:36962 +#: c/c-parser.c:5878 c/c-parser.c:5887 c/c-parser.c:7642 c/c-parser.c:8708 +#: c/c-parser.c:11416 c/c-parser.c:11812 c/c-parser.c:11876 c/c-parser.c:13113 +#: c/c-parser.c:14027 c/c-parser.c:14243 c/c-parser.c:14733 c/c-parser.c:14834 +#: c/c-parser.c:15201 c/c-parser.c:15514 c/c-parser.c:15590 c/c-parser.c:15702 +#: c/c-parser.c:20513 c/c-parser.c:21155 c/c-parser.c:21214 +#: c/gimple-parser.c:568 c/gimple-parser.c:872 c/gimple-parser.c:2337 +#: c/gimple-parser.c:2364 c/c-parser.c:7117 c/c-parser.c:13592 +#: c/c-parser.c:14839 cp/parser.c:32267 cp/parser.c:33915 cp/parser.c:36724 +#: cp/parser.c:37508 #, gcc-internal-format msgid "expected %<:%>" msgstr "" -#: c/c-parser.c:6667 cp/parser.c:31609 +#: c/c-parser.c:6667 cp/parser.c:32154 #, gcc-internal-format msgid "expected %" msgstr "" -#: c/c-parser.c:8466 c/c-parser.c:8656 c/c-parser.c:9123 c/c-parser.c:9166 -#: c/c-parser.c:9304 c/c-parser.c:10015 c/c-parser.c:14374 c/c-parser.c:15551 -#: c/gimple-parser.c:1022 c/gimple-parser.c:1048 c/gimple-parser.c:1176 -#: c/gimple-parser.c:1179 c/gimple-parser.c:1510 c/gimple-parser.c:1516 -#: cp/parser.c:31068 cp/parser.c:31698 +#: c/c-parser.c:8467 c/c-parser.c:8659 c/c-parser.c:9126 c/c-parser.c:9169 +#: c/c-parser.c:9307 c/c-parser.c:10056 c/c-parser.c:14420 c/c-parser.c:15658 +#: c/gimple-parser.c:1027 c/gimple-parser.c:1053 c/gimple-parser.c:1181 +#: c/gimple-parser.c:1184 c/gimple-parser.c:1515 c/gimple-parser.c:1521 +#: cp/parser.c:31613 cp/parser.c:32243 #, gcc-internal-format msgid "expected %<,%>" msgstr "" -#: c/c-parser.c:9020 +#: c/c-parser.c:9023 msgid "expected %<.%>" msgstr "" -#: c/c-parser.c:10834 c/c-parser.c:10866 c/c-parser.c:11106 cp/parser.c:33944 -#: cp/parser.c:33965 +#: c/c-parser.c:10875 c/c-parser.c:10907 c/c-parser.c:11147 cp/parser.c:34489 +#: cp/parser.c:34510 #, gcc-internal-format msgid "expected %<@end%>" msgstr "" -#: c/c-parser.c:11524 c/gimple-parser.c:1346 cp/parser.c:31707 +#: c/c-parser.c:11565 c/gimple-parser.c:1351 cp/parser.c:32252 #, gcc-internal-format msgid "expected %<>%>" msgstr "" -#: c/c-parser.c:14882 c/c-parser.c:15832 cp/parser.c:31731 +#: c/c-parser.c:14928 c/c-parser.c:15939 cp/parser.c:32276 #, gcc-internal-format msgid "expected %<,%> or %<)%>" msgstr "" #. All following cases are statements with LHS. -#: c/c-parser.c:15460 c/c-parser.c:17592 c/c-parser.c:17636 c/c-parser.c:17868 -#: c/c-parser.c:18311 c/c-parser.c:20575 c/c-parser.c:21207 -#: c/gimple-parser.c:726 c/c-parser.c:5476 cp/parser.c:31710 +#: c/c-parser.c:15506 c/c-parser.c:17711 c/c-parser.c:17755 c/c-parser.c:17987 +#: c/c-parser.c:18430 c/c-parser.c:20720 c/c-parser.c:21352 +#: c/gimple-parser.c:726 c/c-parser.c:5476 cp/parser.c:32255 #, gcc-internal-format msgid "expected %<=%>" msgstr "" -#: c/c-parser.c:17884 c/c-parser.c:18175 c/gimple-parser.c:1564 -#: c/gimple-parser.c:1596 c/gimple-parser.c:1606 c/gimple-parser.c:2354 -#: cp/parser.c:31686 cp/parser.c:34154 +#: c/c-parser.c:18003 c/c-parser.c:18294 c/gimple-parser.c:1569 +#: c/gimple-parser.c:1601 c/gimple-parser.c:1611 c/gimple-parser.c:2374 +#: cp/parser.c:32231 cp/parser.c:34699 #, gcc-internal-format msgid "expected %<}%>" msgstr "" -#: c/c-parser.c:18984 c/c-parser.c:18974 cp/parser.c:40966 +#: c/c-parser.c:19106 c/c-parser.c:19096 cp/parser.c:41744 #, gcc-internal-format msgid "expected %<#pragma omp section%> or %<}%>" msgstr "" -#: c/c-typeck.c:8430 +#: c/c-typeck.c:8447 msgid "(anonymous)" msgstr "" -#: c/gimple-parser.c:1335 cp/parser.c:17388 cp/parser.c:31704 +#: c/gimple-parser.c:1340 cp/parser.c:17784 cp/parser.c:32249 #, gcc-internal-format msgid "expected %<<%>" msgstr "" -#: c/gimple-parser.c:2313 c/gimple-parser.c:2340 c/gimple-parser.c:2179 -#: c/gimple-parser.c:2216 +#: c/gimple-parser.c:2333 c/gimple-parser.c:2360 c/gimple-parser.c:2199 +#: c/gimple-parser.c:2236 #, gcc-internal-format msgid "expected label" msgstr "" -#: cp/call.c:3907 +#: cp/call.c:3909 msgid "candidate:" msgstr "" -#: cp/call.c:7411 +#: cp/call.c:7452 msgid " after user-defined conversion:" msgstr "" -#: cp/call.c:7549 cp/pt.c:2046 cp/pt.c:25151 +#: cp/call.c:7591 cp/pt.c:2056 cp/pt.c:25103 msgid "candidate is:" msgid_plural "candidates are:" msgstr[0] "" msgstr[1] "" -#: cp/call.c:12113 +#: cp/call.c:12174 msgid "candidate 1:" msgstr "" -#: cp/call.c:12114 +#: cp/call.c:12175 msgid "candidate 2:" msgstr "" -#: cp/decl.c:3374 +#: cp/decl.c:3389 msgid "jump to label %qD" msgstr "" -#: cp/decl.c:3375 +#: cp/decl.c:3390 msgid "jump to case label" msgstr "" -#: cp/error.c:414 +#: cp/error.c:452 msgid "" msgstr "" -#: cp/error.c:516 +#: cp/error.c:554 msgid "" msgstr "" -#: cp/error.c:518 +#: cp/error.c:556 msgid "" msgstr "" -#: cp/error.c:688 +#: cp/error.c:726 msgid "" msgstr "" #. A lambda's "type" is essentially its signature. -#: cp/error.c:787 +#: cp/error.c:825 msgid "" msgstr "" -#: cp/error.c:799 +#: cp/error.c:837 #, c-format msgid "" msgstr "" -#: cp/error.c:928 +#: cp/error.c:966 msgid "" msgstr "" -#: cp/error.c:1061 +#: cp/error.c:1099 #, c-format msgid "(static initializers for %s)" msgstr "" -#: cp/error.c:1063 +#: cp/error.c:1101 #, c-format msgid "(static destructors for %s)" msgstr "" -#: cp/error.c:1109 +#: cp/error.c:1147 msgid "" msgstr "" -#: cp/error.c:1215 +#: cp/error.c:1253 msgid "vtable for " msgstr "" -#: cp/error.c:1239 +#: cp/error.c:1277 msgid " " msgstr "" -#: cp/error.c:1254 +#: cp/error.c:1292 msgid "{anonymous}" msgstr "" -#: cp/error.c:1256 +#: cp/error.c:1294 msgid "(anonymous namespace)" msgstr "" -#: cp/error.c:1356 +#: cp/error.c:1394 msgid "