aboutsummaryrefslogtreecommitdiff
path: root/libcxx/benchmarks
diff options
context:
space:
mode:
authorMartijn Vels <mvels@google.com>2023-09-13 18:16:35 -0400
committerLouis Dionne <ldionne.2@gmail.com>2023-10-02 09:12:37 -0400
commit6fe4e033f07d332980e1997c19fe705cff9d07a4 (patch)
tree55b4e099472d7e8a324bbb5b7c966161e878add2 /libcxx/benchmarks
parentb52a5c636064ca7138eb996d74941bbe6f07080c (diff)
downloadllvm-6fe4e033f07d332980e1997c19fe705cff9d07a4.zip
llvm-6fe4e033f07d332980e1997c19fe705cff9d07a4.tar.gz
llvm-6fe4e033f07d332980e1997c19fe705cff9d07a4.tar.bz2
[libc++] Optimize vector push_back to avoid continuous load and store of end pointer
Credits: this change is based on analysis and a proof of concept by gerbens@google.com. Before, the compiler loses track of end as 'this' and other references possibly escape beyond the compiler's scope. This can be see in the generated assembly: 16.28 │200c80: mov %r15d,(%rax) 60.87 │200c83: add $0x4,%rax │200c87: mov %rax,-0x38(%rbp) 0.03 │200c8b: → jmpq 200d4e ... ... 1.69 │200d4e: cmp %r15d,%r12d │200d51: → je 200c40 16.34 │200d57: inc %r15d 0.05 │200d5a: mov -0x38(%rbp),%rax 3.27 │200d5e: mov -0x30(%rbp),%r13 1.47 │200d62: cmp %r13,%rax │200d65: → jne 200c80 We fix this by always explicitly storing the loaded local and pointer back at the end of push back. This generates some slight source 'noise', but creates nice and compact fast path code, i.e.: 32.64 │200760: mov %r14d,(%r12) 9.97 │200764: add $0x4,%r12 6.97 │200768: mov %r12,-0x38(%rbp) 32.17 │20076c: add $0x1,%r14d 2.36 │200770: cmp %r14d,%ebx │200773: → je 200730 8.98 │200775: mov -0x30(%rbp),%r13 6.75 │200779: cmp %r13,%r12 │20077c: → jne 200760 Now there is a single store for the push_back value (as before), and a single store for the end without a reload (dependency). For fully local vectors, (i.e., not referenced elsewhere), the capacity load and store inside the loop could also be removed, but this requires more substantial refactoring inside vector. Differential Revision: https://reviews.llvm.org/D80588
Diffstat (limited to 'libcxx/benchmarks')
-rw-r--r--libcxx/benchmarks/ContainerBenchmarks.h13
-rw-r--r--libcxx/benchmarks/vector_operations.bench.cpp2
2 files changed, 15 insertions, 0 deletions
diff --git a/libcxx/benchmarks/ContainerBenchmarks.h b/libcxx/benchmarks/ContainerBenchmarks.h
index 071e46c..9a9abfd 100644
--- a/libcxx/benchmarks/ContainerBenchmarks.h
+++ b/libcxx/benchmarks/ContainerBenchmarks.h
@@ -79,6 +79,19 @@ void BM_ConstructFromRange(benchmark::State& st, Container, GenInputs gen) {
}
}
+template <class Container>
+void BM_Pushback(benchmark::State& state, Container c) {
+ int count = state.range(0);
+ c.reserve(count);
+ while (state.KeepRunningBatch(count)) {
+ c.clear();
+ for (int i = 0; i != count; ++i) {
+ c.push_back(i);
+ }
+ benchmark::DoNotOptimize(c.data());
+ }
+}
+
template <class Container, class GenInputs>
void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
diff --git a/libcxx/benchmarks/vector_operations.bench.cpp b/libcxx/benchmarks/vector_operations.bench.cpp
index be0bee6..38b14c5 100644
--- a/libcxx/benchmarks/vector_operations.bench.cpp
+++ b/libcxx/benchmarks/vector_operations.bench.cpp
@@ -39,4 +39,6 @@ BENCHMARK_CAPTURE(BM_ConstructFromRange, vector_size_t, std::vector<size_t>{}, g
BENCHMARK_CAPTURE(BM_ConstructFromRange, vector_string, std::vector<std::string>{}, getRandomStringInputs)
->Arg(TestNumInputs);
+BENCHMARK_CAPTURE(BM_Pushback, vector_int, std::vector<int>{})->Arg(TestNumInputs);
+
BENCHMARK_MAIN();