diff options
author | Li Wei <liwei@loongson.cn> | 2023-11-28 15:38:37 +0800 |
---|---|---|
committer | Lulu Cheng <chenglulu@loongson.cn> | 2023-12-02 16:49:44 +0800 |
commit | a68ae55883113c10320b164738eebadaf08df5b0 (patch) | |
tree | 52dd9e8eac212b64d630182653e6368d4f076968 /gcc | |
parent | ccc77027576ca7591766557a350b7917a62c8c5f (diff) | |
download | gcc-a68ae55883113c10320b164738eebadaf08df5b0.zip gcc-a68ae55883113c10320b164738eebadaf08df5b0.tar.gz gcc-a68ae55883113c10320b164738eebadaf08df5b0.tar.bz2 |
LoongArch: Accelerate optimization of scalar signed/unsigned popcount.
In LoongArch, the vector popcount has corresponding instructions, while
the scalar does not. Currently, the scalar popcount is calculated
through a loop, and the value of a non-power of two needs to be iterated
several times, so the vector popcount instruction is considered for
optimization.
gcc/ChangeLog:
* config/loongarch/loongarch.md (v2di): Used to simplify the
following templates.
(popcount<mode>2): New.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/popcnt.c: New test.
* gcc.target/loongarch/popcount.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/loongarch/loongarch.md | 27 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/loongarch/popcnt.c | 41 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/loongarch/popcount.c | 17 |
3 files changed, 83 insertions, 2 deletions
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 1e01981..7a101dd 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -1512,7 +1512,30 @@ (set_attr "cnv_mode" "D2S") (set_attr "mode" "SF")]) - +;; In vector registers, popcount can be implemented directly through +;; the vector instruction [X]VPCNT. For GP registers, we can implement +;; it through the following method. Compared with loop implementation +;; of popcount, the following method has better performance. + +;; This attribute used for get connection of scalar mode and corresponding +;; vector mode. +(define_mode_attr cntmap [(SI "v4si") (DI "v2di")]) + +(define_expand "popcount<mode>2" + [(set (match_operand:GPR 0 "register_operand") + (popcount:GPR (match_operand:GPR 1 "register_operand")))] + "ISA_HAS_LSX" +{ + rtx in = operands[1]; + rtx out = operands[0]; + rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) : + gen_reg_rtx (V2DImode); + emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1))); + emit_insn (gen_popcount<cntmap>2 (vreg, vreg)); + emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0))); + DONE; +}) + ;; ;; .................... ;; @@ -3879,7 +3902,7 @@ (any_extend:SI (match_dup 3)))])] "") - + (define_mode_iterator QHSD [QI HI SI DI]) diff --git a/gcc/testsuite/gcc.target/loongarch/popcnt.c b/gcc/testsuite/gcc.target/loongarch/popcnt.c new file mode 100644 index 0000000..a10fca4 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/popcnt.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlsx" } */ +/* { dg-final { scan-assembler-not {popcount} } } */ +/* { dg-final { scan-assembler-times "vpcnt.d" 2 { target { loongarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times "vpcnt.w" 4 { target { loongarch64*-*-* } } } } */ + +int +foo (int x) +{ + return __builtin_popcount (x); +} + +long +foo1 (long x) +{ + return __builtin_popcountl (x); +} + +long long +foo2 (long long x) +{ + return __builtin_popcountll (x); +} + +int +foo3 (int *p) +{ + return __builtin_popcount (*p); +} + +unsigned +foo4 (int x) +{ + return __builtin_popcount (x); +} + +unsigned long +foo5 (int x) +{ + return __builtin_popcount (x); +} diff --git a/gcc/testsuite/gcc.target/loongarch/popcount.c b/gcc/testsuite/gcc.target/loongarch/popcount.c new file mode 100644 index 0000000..390ff06 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/popcount.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlsx -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 "optimized" } } */ + +int +PopCount (long b) +{ + int c = 0; + + while (b) + { + b &= b - 1; + c++; + } + + return c; +} |