diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/expmed.c | 55 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/mult-synth_1.c | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/mult-synth_2.c | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/mult-synth_3.c | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/mult-synth_4.c | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/mult-synth_5.c | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/mult-synth_6.c | 11 |
9 files changed, 106 insertions, 29 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 85c2ba3..1e56a37 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2015-04-21 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + * expmed.c: (synth_mult): Only assume overlapping + shift with previous steps in alg_sub_t_m2 case. + 2015-04-21 Richard Biener <rguenther@suse.de> PR tree-optimization/65650 diff --git a/gcc/expmed.c b/gcc/expmed.c index 6327629..6679f50 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -2664,14 +2664,28 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, m = exact_log2 (-orig_t + 1); if (m >= 0 && m < maxm) { - op_cost = shiftsub1_cost (speed, mode, m); + op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); + /* If the target has a cheap shift-and-subtract insn use + that in preference to a shift insn followed by a sub insn. + Assume that the shift-and-sub is "atomic" with a latency + equal to it's cost, otherwise assume that on superscalar + hardware the shift may be executed concurrently with the + earlier steps in the algorithm. */ + if (shiftsub1_cost (speed, mode, m) <= op_cost) + { + op_cost = shiftsub1_cost (speed, mode, m); + op_latency = op_cost; + } + else + op_latency = add_cost (speed, mode); + new_limit.cost = best_cost.cost - op_cost; - new_limit.latency = best_cost.latency - op_cost; + new_limit.latency = best_cost.latency - op_latency; synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode); alg_in->cost.cost += op_cost; - alg_in->cost.latency += op_cost; + alg_in->cost.latency += op_latency; if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) { best_cost = alg_in->cost; @@ -2704,20 +2718,12 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, if (t % d == 0 && t > d && m < maxm && (!cache_hit || cache_alg == alg_add_factor)) { - /* If the target has a cheap shift-and-add instruction use - that in preference to a shift insn followed by an add insn. - Assume that the shift-and-add is "atomic" with a latency - equal to its cost, otherwise assume that on superscalar - hardware the shift may be executed concurrently with the - earlier steps in the algorithm. */ op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); - if (shiftadd_cost (speed, mode, m) < op_cost) - { - op_cost = shiftadd_cost (speed, mode, m); - op_latency = op_cost; - } - else - op_latency = add_cost (speed, mode); + if (shiftadd_cost (speed, mode, m) <= op_cost) + op_cost = shiftadd_cost (speed, mode, m); + + op_latency = op_cost; + new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_latency; @@ -2742,20 +2748,11 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, if (t % d == 0 && t > d && m < maxm && (!cache_hit || cache_alg == alg_sub_factor)) { - /* If the target has a cheap shift-and-subtract insn use - that in preference to a shift insn followed by a sub insn. - Assume that the shift-and-sub is "atomic" with a latency - equal to it's cost, otherwise assume that on superscalar - hardware the shift may be executed concurrently with the - earlier steps in the algorithm. */ op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); - if (shiftsub0_cost (speed, mode, m) < op_cost) - { - op_cost = shiftsub0_cost (speed, mode, m); - op_latency = op_cost; - } - else - op_latency = add_cost (speed, mode); + if (shiftsub0_cost (speed, mode, m) <= op_cost) + op_cost = shiftsub0_cost (speed, mode, m); + + op_latency = op_cost; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_latency; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5573e42..c031a3a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2015-04-21 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + * gcc.target/aarch64/mult-synth_1.c: New test. + * gcc.target/aarch64/mult-synth_2.c: Likewise. + * gcc.target/aarch64/mult-synth_3.c: Likewise. + * gcc.target/aarch64/mult-synth_4.c: Likewise. + * gcc.target/aarch64/mult-synth_5.c: Likewise. + * gcc.target/aarch64/mult-synth_6.c: Likewise. + 2015-04-21 Richard Biener <rguenther@suse.de> PR tree-optimization/65650 diff --git a/gcc/testsuite/gcc.target/aarch64/mult-synth_1.c b/gcc/testsuite/gcc.target/aarch64/mult-synth_1.c new file mode 100644 index 0000000..37f079d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/mult-synth_1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */ + +int +foo (int x) +{ + return x * 100; +} + +/* { dg-final { scan-assembler-times "mul\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/mult-synth_2.c b/gcc/testsuite/gcc.target/aarch64/mult-synth_2.c new file mode 100644 index 0000000..4d2e5bf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/mult-synth_2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */ + +int +foo (int x) +{ + return x * 25; +} + +/* { dg-final { scan-assembler-times "mul\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/mult-synth_3.c b/gcc/testsuite/gcc.target/aarch64/mult-synth_3.c new file mode 100644 index 0000000..03e83e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/mult-synth_3.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */ + +int +foo (int x) +{ + return x * 11; +} + +/* { dg-final { scan-assembler-times "mul\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/mult-synth_4.c b/gcc/testsuite/gcc.target/aarch64/mult-synth_4.c new file mode 100644 index 0000000..05a82bd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/mult-synth_4.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */ + +long +foo (int x, int y) +{ + return (long)x * 6L; +} + +/* { dg-final { scan-assembler-times "smull\tx\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/mult-synth_5.c b/gcc/testsuite/gcc.target/aarch64/mult-synth_5.c new file mode 100644 index 0000000..8cf3314 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/mult-synth_5.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */ + +int +foo (int x) +{ + return x * 10; +} + +/* { dg-final { scan-assembler-not "\tw1" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/mult-synth_6.c b/gcc/testsuite/gcc.target/aarch64/mult-synth_6.c new file mode 100644 index 0000000..e941b72 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/mult-synth_6.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */ + +int +foo (int x) +{ + return x * 20; +} + +/* { dg-final { scan-assembler-not "\tw1" } } */ +/* { dg-final { cleanup-saved-temps } } */ |