diff options
author | Nikolas Klauser <nikolasklauser@berlin.de> | 2025-09-08 12:00:04 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-09-08 12:00:04 +0200 |
commit | 04518e7357b17e0802445b724377f844511a661f (patch) | |
tree | b439614336a9f8ccf09efdd9dcc94af8ed1a83cd /libcxx/include/__algorithm | |
parent | be7094ed67526ee149ca150b9c9b1e7e724b226e (diff) | |
download | llvm-04518e7357b17e0802445b724377f844511a661f.zip llvm-04518e7357b17e0802445b724377f844511a661f.tar.gz llvm-04518e7357b17e0802445b724377f844511a661f.tar.bz2 |
[libc++] Improve the performance of std::make_heap a bit (#154092)
```
Apple M4:
-----------------------------------------------------------------------------------------------------
Benchmark old new
-----------------------------------------------------------------------------------------------------
BM_MakeHeap_uint32_Random_1 0.285 ns 0.271 ns
BM_MakeHeap_uint32_Random_4 2.09 ns 1.80 ns
BM_MakeHeap_uint32_Random_16 1.85 ns 1.83 ns
BM_MakeHeap_uint32_Random_64 1.92 ns 1.50 ns
BM_MakeHeap_uint32_Random_256 2.10 ns 1.87 ns
BM_MakeHeap_uint32_Random_1024 1.73 ns 1.86 ns
BM_MakeHeap_uint32_Random_16384 2.17 ns 2.05 ns
BM_MakeHeap_uint32_Random_262144 1.77 ns 1.77 ns
BM_MakeHeap_uint32_Ascending_1 0.288 ns 0.277 ns
BM_MakeHeap_uint32_Ascending_4 0.658 ns 0.481 ns
BM_MakeHeap_uint32_Ascending_16 0.636 ns 0.637 ns
BM_MakeHeap_uint32_Ascending_64 0.643 ns 0.601 ns
BM_MakeHeap_uint32_Ascending_256 0.710 ns 0.636 ns
BM_MakeHeap_uint32_Ascending_1024 0.747 ns 0.660 ns
BM_MakeHeap_uint32_Ascending_16384 0.713 ns 0.633 ns
BM_MakeHeap_uint32_Ascending_262144 0.769 ns 0.731 ns
BM_MakeHeap_uint32_Descending_1 0.294 ns 0.280 ns
BM_MakeHeap_uint32_Descending_4 0.379 ns 0.305 ns
BM_MakeHeap_uint32_Descending_16 0.376 ns 0.268 ns
BM_MakeHeap_uint32_Descending_64 0.358 ns 0.271 ns
BM_MakeHeap_uint32_Descending_256 0.377 ns 0.284 ns
BM_MakeHeap_uint32_Descending_1024 0.355 ns 0.267 ns
BM_MakeHeap_uint32_Descending_16384 0.348 ns 0.248 ns
BM_MakeHeap_uint32_Descending_262144 0.349 ns 0.247 ns
BM_MakeHeap_uint32_SingleElement_1 0.292 ns 0.280 ns
BM_MakeHeap_uint32_SingleElement_4 0.570 ns 0.332 ns
BM_MakeHeap_uint32_SingleElement_16 0.635 ns 0.604 ns
BM_MakeHeap_uint32_SingleElement_64 0.653 ns 0.567 ns
BM_MakeHeap_uint32_SingleElement_256 0.703 ns 0.609 ns
BM_MakeHeap_uint32_SingleElement_1024 0.737 ns 0.604 ns
BM_MakeHeap_uint32_SingleElement_16384 0.699 ns 0.574 ns
BM_MakeHeap_uint32_SingleElement_262144 0.803 ns 0.684 ns
BM_MakeHeap_uint32_PipeOrgan_1 0.291 ns 0.284 ns
BM_MakeHeap_uint32_PipeOrgan_4 0.588 ns 0.399 ns
BM_MakeHeap_uint32_PipeOrgan_16 0.648 ns 1.12 ns
BM_MakeHeap_uint32_PipeOrgan_64 0.662 ns 0.771 ns
BM_MakeHeap_uint32_PipeOrgan_256 0.723 ns 0.672 ns
BM_MakeHeap_uint32_PipeOrgan_1024 0.749 ns 0.674 ns
BM_MakeHeap_uint32_PipeOrgan_16384 0.708 ns 0.638 ns
BM_MakeHeap_uint32_PipeOrgan_262144 0.786 ns 0.743 ns
BM_MakeHeap_uint32_Heap_1 0.298 ns 0.282 ns
BM_MakeHeap_uint32_Heap_4 0.396 ns 0.308 ns
BM_MakeHeap_uint32_Heap_16 0.377 ns 0.268 ns
BM_MakeHeap_uint32_Heap_64 0.356 ns 0.271 ns
BM_MakeHeap_uint32_Heap_256 0.378 ns 0.290 ns
BM_MakeHeap_uint32_Heap_1024 0.356 ns 0.275 ns
BM_MakeHeap_uint32_Heap_16384 0.348 ns 0.252 ns
BM_MakeHeap_uint32_Heap_262144 0.347 ns 0.250 ns
BM_MakeHeap_uint32_QuickSortAdversary_1 0.290 ns 0.284 ns
BM_MakeHeap_uint32_QuickSortAdversary_4 0.627 ns 0.409 ns
BM_MakeHeap_uint32_QuickSortAdversary_16 0.640 ns 0.653 ns
BM_MakeHeap_uint32_QuickSortAdversary_64 0.577 ns 0.484 ns
BM_MakeHeap_uint32_QuickSortAdversary_256 0.613 ns 0.521 ns
BM_MakeHeap_uint32_QuickSortAdversary_1024 0.652 ns 0.514 ns
BM_MakeHeap_uint32_QuickSortAdversary_16384 0.428 ns 0.308 ns
BM_MakeHeap_uint32_QuickSortAdversary_262144 0.373 ns 0.261 ns
BM_MakeHeap_uint64_Random_1 0.291 ns 0.281 ns
BM_MakeHeap_uint64_Random_4 2.20 ns 1.97 ns
BM_MakeHeap_uint64_Random_16 1.93 ns 1.70 ns
BM_MakeHeap_uint64_Random_64 1.89 ns 1.48 ns
BM_MakeHeap_uint64_Random_256 2.10 ns 1.99 ns
BM_MakeHeap_uint64_Random_1024 1.41 ns 2.04 ns
BM_MakeHeap_uint64_Random_16384 2.12 ns 1.83 ns
BM_MakeHeap_uint64_Random_262144 1.83 ns 1.63 ns
BM_MakeHeap_uint64_Ascending_1 0.288 ns 0.283 ns
BM_MakeHeap_uint64_Ascending_4 0.624 ns 0.493 ns
BM_MakeHeap_uint64_Ascending_16 0.648 ns 0.688 ns
BM_MakeHeap_uint64_Ascending_64 0.671 ns 0.634 ns
BM_MakeHeap_uint64_Ascending_256 0.739 ns 0.680 ns
BM_MakeHeap_uint64_Ascending_1024 0.761 ns 0.698 ns
BM_MakeHeap_uint64_Ascending_16384 0.740 ns 0.685 ns
BM_MakeHeap_uint64_Ascending_262144 0.849 ns 0.837 ns
BM_MakeHeap_uint64_Descending_1 0.304 ns 0.287 ns
BM_MakeHeap_uint64_Descending_4 0.395 ns 0.312 ns
BM_MakeHeap_uint64_Descending_16 0.374 ns 0.276 ns
BM_MakeHeap_uint64_Descending_64 0.358 ns 0.272 ns
BM_MakeHeap_uint64_Descending_256 0.389 ns 0.303 ns
BM_MakeHeap_uint64_Descending_1024 0.361 ns 0.278 ns
BM_MakeHeap_uint64_Descending_16384 0.352 ns 0.253 ns
BM_MakeHeap_uint64_Descending_262144 0.350 ns 0.251 ns
BM_MakeHeap_uint64_SingleElement_1 0.295 ns 0.285 ns
BM_MakeHeap_uint64_SingleElement_4 0.569 ns 0.358 ns
BM_MakeHeap_uint64_SingleElement_16 0.649 ns 0.652 ns
BM_MakeHeap_uint64_SingleElement_64 0.673 ns 0.565 ns
BM_MakeHeap_uint64_SingleElement_256 0.732 ns 0.651 ns
BM_MakeHeap_uint64_SingleElement_1024 0.759 ns 0.632 ns
BM_MakeHeap_uint64_SingleElement_16384 0.748 ns 0.614 ns
BM_MakeHeap_uint64_SingleElement_262144 0.947 ns 0.797 ns
BM_MakeHeap_uint64_PipeOrgan_1 0.295 ns 0.284 ns
BM_MakeHeap_uint64_PipeOrgan_4 0.601 ns 0.496 ns
BM_MakeHeap_uint64_PipeOrgan_16 0.655 ns 1.18 ns
BM_MakeHeap_uint64_PipeOrgan_64 0.682 ns 0.803 ns
BM_MakeHeap_uint64_PipeOrgan_256 0.759 ns 0.710 ns
BM_MakeHeap_uint64_PipeOrgan_1024 0.759 ns 0.713 ns
BM_MakeHeap_uint64_PipeOrgan_16384 0.739 ns 0.696 ns
BM_MakeHeap_uint64_PipeOrgan_262144 0.870 ns 0.849 ns
BM_MakeHeap_uint64_Heap_1 0.290 ns 0.284 ns
BM_MakeHeap_uint64_Heap_4 0.413 ns 0.314 ns
BM_MakeHeap_uint64_Heap_16 0.378 ns 0.277 ns
BM_MakeHeap_uint64_Heap_64 0.362 ns 0.272 ns
BM_MakeHeap_uint64_Heap_256 0.389 ns 0.303 ns
BM_MakeHeap_uint64_Heap_1024 0.362 ns 0.283 ns
BM_MakeHeap_uint64_Heap_16384 0.352 ns 0.253 ns
BM_MakeHeap_uint64_Heap_262144 0.350 ns 0.251 ns
BM_MakeHeap_uint64_QuickSortAdversary_1 0.293 ns 0.284 ns
BM_MakeHeap_uint64_QuickSortAdversary_4 0.606 ns 0.494 ns
BM_MakeHeap_uint64_QuickSortAdversary_16 0.645 ns 0.691 ns
BM_MakeHeap_uint64_QuickSortAdversary_64 0.607 ns 0.511 ns
BM_MakeHeap_uint64_QuickSortAdversary_256 0.641 ns 0.537 ns
BM_MakeHeap_uint64_QuickSortAdversary_1024 0.657 ns 0.529 ns
BM_MakeHeap_uint64_QuickSortAdversary_16384 0.435 ns 0.316 ns
BM_MakeHeap_uint64_QuickSortAdversary_262144 0.382 ns 0.266 ns
BM_MakeHeap_pair<uint32, uint32>_Random_1 0.297 ns 0.378 ns
BM_MakeHeap_pair<uint32, uint32>_Random_4 2.80 ns 0.765 ns
BM_MakeHeap_pair<uint32, uint32>_Random_16 2.92 ns 2.20 ns
BM_MakeHeap_pair<uint32, uint32>_Random_64 3.17 ns 3.64 ns
BM_MakeHeap_pair<uint32, uint32>_Random_256 3.44 ns 3.20 ns
BM_MakeHeap_pair<uint32, uint32>_Random_1024 3.35 ns 3.64 ns
BM_MakeHeap_pair<uint32, uint32>_Random_16384 3.22 ns 3.50 ns
BM_MakeHeap_pair<uint32, uint32>_Random_262144 3.61 ns 3.46 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_1 0.291 ns 0.379 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_4 0.779 ns 0.436 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_16 0.943 ns 1.01 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_64 1.17 ns 1.26 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_256 1.38 ns 1.44 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_1024 1.37 ns 1.43 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_16384 1.29 ns 1.31 ns
BM_MakeHeap_pair<uint32, uint32>_Ascending_262144 1.37 ns 1.40 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_1 0.292 ns 0.382 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_4 0.440 ns 0.347 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_16 0.529 ns 0.520 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_64 0.540 ns 0.527 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_256 0.637 ns 0.583 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_1024 0.552 ns 0.513 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_16384 0.522 ns 0.488 ns
BM_MakeHeap_pair<uint32, uint32>_Descending_262144 0.515 ns 0.492 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_1 0.299 ns 0.377 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_4 0.787 ns 0.474 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_16 1.07 ns 0.921 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_64 1.20 ns 1.15 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_256 1.30 ns 1.27 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_1024 1.30 ns 1.31 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_16384 1.29 ns 1.28 ns
BM_MakeHeap_pair<uint32, uint32>_SingleElement_262144 1.37 ns 1.38 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1 0.293 ns 0.385 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_4 0.677 ns 0.438 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16 1.04 ns 1.00 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_64 1.20 ns 1.27 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_256 1.40 ns 1.43 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1024 1.36 ns 1.43 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16384 1.27 ns 1.31 ns
BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_262144 1.37 ns 1.41 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_1 0.292 ns 0.378 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_4 0.440 ns 0.380 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_16 0.560 ns 0.606 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_64 0.588 ns 0.573 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_256 0.632 ns 0.607 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_1024 0.598 ns 0.580 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_16384 0.576 ns 0.563 ns
BM_MakeHeap_pair<uint32, uint32>_Heap_262144 0.572 ns 0.561 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1 0.304 ns 0.379 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_4 0.823 ns 0.430 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16 1.08 ns 1.03 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_64 1.18 ns 1.23 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_256 1.39 ns 1.43 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1024 1.36 ns 1.42 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16384 1.28 ns 1.32 ns
BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_262144 1.34 ns 1.37 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1 0.276 ns 0.511 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_4 4.25 ns 1.96 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16 4.84 ns 3.77 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_64 5.53 ns 4.93 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_256 5.30 ns 5.06 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1024 5.29 ns 5.02 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16384 5.53 ns 5.31 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_262144 5.49 ns 5.29 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1 0.275 ns 0.443 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_4 1.22 ns 0.764 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16 1.39 ns 1.49 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_64 1.66 ns 1.76 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_256 1.85 ns 1.99 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1024 1.90 ns 2.02 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16384 2.15 ns 2.27 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_262144 2.25 ns 2.37 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1 0.274 ns 0.445 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_4 0.939 ns 0.520 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16 1.02 ns 0.811 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_64 1.01 ns 0.941 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_256 1.03 ns 1.01 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1024 0.969 ns 0.947 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16384 0.882 ns 0.876 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_262144 0.893 ns 0.871 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1 0.276 ns 0.443 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_4 1.34 ns 0.870 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16 1.60 ns 1.59 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_64 1.91 ns 2.00 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_256 1.91 ns 2.08 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1024 1.91 ns 2.10 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16384 2.13 ns 2.35 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_262144 2.25 ns 2.48 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1 0.275 ns 0.446 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_4 1.07 ns 0.671 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16 1.36 ns 1.44 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_64 1.70 ns 1.80 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_256 1.88 ns 2.05 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1024 1.92 ns 2.06 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16384 2.11 ns 2.26 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_262144 2.18 ns 2.34 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1 0.274 ns 0.441 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_4 0.938 ns 0.587 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16 1.01 ns 0.873 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_64 1.08 ns 1.00 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_256 1.21 ns 1.18 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1024 1.37 ns 1.29 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16384 1.31 ns 1.26 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_262144 1.29 ns 1.25 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1 0.275 ns 0.447 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_4 1.22 ns 0.764 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16 1.35 ns 1.46 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_64 1.63 ns 1.73 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_256 1.83 ns 1.87 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1024 1.81 ns 1.94 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16384 2.06 ns 2.19 ns
BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_262144 2.14 ns 2.31 ns
BM_MakeHeap_string_Random_1 0.289 ns 0.446 ns
BM_MakeHeap_string_Random_4 7.43 ns 4.84 ns
BM_MakeHeap_string_Random_16 9.73 ns 8.92 ns
BM_MakeHeap_string_Random_64 11.5 ns 11.5 ns
BM_MakeHeap_string_Random_256 12.6 ns 12.4 ns
BM_MakeHeap_string_Random_1024 13.2 ns 13.2 ns
BM_MakeHeap_string_Random_16384 15.4 ns 15.4 ns
BM_MakeHeap_string_Random_262144 21.4 ns 21.5 ns
BM_MakeHeap_string_Ascending_1 0.287 ns 0.447 ns
BM_MakeHeap_string_Ascending_4 3.22 ns 2.44 ns
BM_MakeHeap_string_Ascending_16 3.77 ns 3.62 ns
BM_MakeHeap_string_Ascending_64 4.84 ns 5.17 ns
BM_MakeHeap_string_Ascending_256 5.79 ns 6.04 ns
BM_MakeHeap_string_Ascending_1024 5.93 ns 6.60 ns
BM_MakeHeap_string_Ascending_16384 6.84 ns 7.25 ns
BM_MakeHeap_string_Ascending_262144 13.5 ns 14.3 ns
BM_MakeHeap_string_Descending_1 0.293 ns 0.447 ns
BM_MakeHeap_string_Descending_4 2.61 ns 1.83 ns
BM_MakeHeap_string_Descending_16 2.64 ns 2.60 ns
BM_MakeHeap_string_Descending_64 2.75 ns 2.74 ns
BM_MakeHeap_string_Descending_256 3.78 ns 3.57 ns
BM_MakeHeap_string_Descending_1024 3.20 ns 3.51 ns
BM_MakeHeap_string_Descending_16384 3.57 ns 3.85 ns
BM_MakeHeap_string_Descending_262144 6.27 ns 6.39 ns
BM_MakeHeap_string_SingleElement_1 0.291 ns 0.448 ns
BM_MakeHeap_string_SingleElement_4 3.88 ns 2.71 ns
BM_MakeHeap_string_SingleElement_16 5.08 ns 4.96 ns
BM_MakeHeap_string_SingleElement_64 6.14 ns 6.29 ns
BM_MakeHeap_string_SingleElement_256 6.13 ns 6.46 ns
BM_MakeHeap_string_SingleElement_1024 5.98 ns 6.60 ns
BM_MakeHeap_string_SingleElement_16384 5.88 ns 6.39 ns
BM_MakeHeap_string_SingleElement_262144 9.95 ns 10.2 ns
BM_MakeHeap_string_PipeOrgan_1 0.292 ns 0.447 ns
BM_MakeHeap_string_PipeOrgan_4 2.97 ns 2.51 ns
BM_MakeHeap_string_PipeOrgan_16 3.76 ns 3.91 ns
BM_MakeHeap_string_PipeOrgan_64 4.89 ns 5.20 ns
BM_MakeHeap_string_PipeOrgan_256 5.77 ns 6.09 ns
BM_MakeHeap_string_PipeOrgan_1024 6.14 ns 6.40 ns
BM_MakeHeap_string_PipeOrgan_16384 6.83 ns 7.32 ns
BM_MakeHeap_string_PipeOrgan_262144 13.8 ns 14.6 ns
BM_MakeHeap_string_Heap_1 0.288 ns 0.515 ns
BM_MakeHeap_string_Heap_4 3.62 ns 4.20 ns
BM_MakeHeap_string_Heap_16 5.36 ns 5.23 ns
BM_MakeHeap_string_Heap_64 5.79 ns 5.38 ns
BM_MakeHeap_string_Heap_256 5.70 ns 5.40 ns
BM_MakeHeap_string_Heap_1024 5.78 ns 5.37 ns
BM_MakeHeap_string_Heap_16384 6.09 ns 5.67 ns
BM_MakeHeap_string_Heap_262144 6.37 ns 5.96 ns
BM_MakeHeap_string_QuickSortAdversary_1 0.282 ns 0.448 ns
BM_MakeHeap_string_QuickSortAdversary_4 7.45 ns 5.60 ns
BM_MakeHeap_string_QuickSortAdversary_16 9.76 ns 8.85 ns
BM_MakeHeap_string_QuickSortAdversary_64 11.5 ns 11.2 ns
BM_MakeHeap_string_QuickSortAdversary_256 12.0 ns 11.8 ns
BM_MakeHeap_string_QuickSortAdversary_1024 12.2 ns 12.0 ns
BM_MakeHeap_string_QuickSortAdversary_16384 13.7 ns 13.6 ns
BM_MakeHeap_string_QuickSortAdversary_262144 14.1 ns 14.8 ns
BM_MakeHeap_float_Random_1 0.287 ns 0.287 ns
BM_MakeHeap_float_Random_4 2.29 ns 2.60 ns
BM_MakeHeap_float_Random_16 4.00 ns 2.48 ns
BM_MakeHeap_float_Random_64 4.41 ns 1.92 ns
BM_MakeHeap_float_Random_256 4.73 ns 2.05 ns
BM_MakeHeap_float_Random_1024 4.90 ns 2.27 ns
BM_MakeHeap_float_Random_16384 4.42 ns 2.27 ns
BM_MakeHeap_float_Random_262144 4.72 ns 1.39 ns
BM_MakeHeap_float_Ascending_1 0.291 ns 0.293 ns
BM_MakeHeap_float_Ascending_4 0.633 ns 0.428 ns
BM_MakeHeap_float_Ascending_16 0.638 ns 0.874 ns
BM_MakeHeap_float_Ascending_64 0.614 ns 0.698 ns
BM_MakeHeap_float_Ascending_256 0.663 ns 0.713 ns
BM_MakeHeap_float_Ascending_1024 0.660 ns 0.761 ns
BM_MakeHeap_float_Ascending_16384 0.628 ns 0.725 ns
BM_MakeHeap_float_Ascending_262144 0.629 ns 0.814 ns
BM_MakeHeap_float_Descending_1 0.290 ns 0.290 ns
BM_MakeHeap_float_Descending_4 0.421 ns 0.316 ns
BM_MakeHeap_float_Descending_16 0.302 ns 0.225 ns
BM_MakeHeap_float_Descending_64 0.293 ns 0.212 ns
BM_MakeHeap_float_Descending_256 0.314 ns 0.246 ns
BM_MakeHeap_float_Descending_1024 0.300 ns 0.231 ns
BM_MakeHeap_float_Descending_16384 0.308 ns 0.205 ns
BM_MakeHeap_float_Descending_262144 0.309 ns 0.203 ns
BM_MakeHeap_float_SingleElement_1 0.289 ns 0.292 ns
BM_MakeHeap_float_SingleElement_4 0.569 ns 0.347 ns
BM_MakeHeap_float_SingleElement_16 0.538 ns 0.825 ns
BM_MakeHeap_float_SingleElement_64 0.585 ns 0.727 ns
BM_MakeHeap_float_SingleElement_256 0.603 ns 0.708 ns
BM_MakeHeap_float_SingleElement_1024 0.618 ns 0.760 ns
BM_MakeHeap_float_SingleElement_16384 0.599 ns 0.726 ns
BM_MakeHeap_float_SingleElement_262144 0.723 ns 0.820 ns
BM_MakeHeap_float_PipeOrgan_1 0.289 ns 0.291 ns
BM_MakeHeap_float_PipeOrgan_4 0.457 ns 0.420 ns
BM_MakeHeap_float_PipeOrgan_16 0.670 ns 1.32 ns
BM_MakeHeap_float_PipeOrgan_64 0.764 ns 0.889 ns
BM_MakeHeap_float_PipeOrgan_256 0.793 ns 0.757 ns
BM_MakeHeap_float_PipeOrgan_1024 0.755 ns 0.764 ns
BM_MakeHeap_float_PipeOrgan_16384 0.723 ns 0.723 ns
BM_MakeHeap_float_PipeOrgan_262144 0.654 ns 0.817 ns
BM_MakeHeap_float_Heap_1 0.291 ns 0.289 ns
BM_MakeHeap_float_Heap_4 0.388 ns 0.316 ns
BM_MakeHeap_float_Heap_16 0.317 ns 0.225 ns
BM_MakeHeap_float_Heap_64 0.353 ns 0.213 ns
BM_MakeHeap_float_Heap_256 0.361 ns 0.246 ns
BM_MakeHeap_float_Heap_1024 0.381 ns 0.233 ns
BM_MakeHeap_float_Heap_16384 0.390 ns 0.205 ns
BM_MakeHeap_float_Heap_262144 0.379 ns 0.202 ns
BM_MakeHeap_float_QuickSortAdversary_1 0.295 ns 0.289 ns
BM_MakeHeap_float_QuickSortAdversary_4 0.640 ns 0.422 ns
BM_MakeHeap_float_QuickSortAdversary_16 0.658 ns 0.871 ns
BM_MakeHeap_float_QuickSortAdversary_64 0.574 ns 0.659 ns
BM_MakeHeap_float_QuickSortAdversary_256 0.631 ns 0.550 ns
BM_MakeHeap_float_QuickSortAdversary_1024 0.617 ns 0.552 ns
BM_MakeHeap_float_QuickSortAdversary_16384 0.424 ns 0.283 ns
BM_MakeHeap_float_QuickSortAdversary_262144 0.386 ns 0.219 ns
```
Fixes #120752
Diffstat (limited to 'libcxx/include/__algorithm')
-rw-r--r-- | libcxx/include/__algorithm/make_heap.h | 20 | ||||
-rw-r--r-- | libcxx/include/__algorithm/partial_sort.h | 2 | ||||
-rw-r--r-- | libcxx/include/__algorithm/partial_sort_copy.h | 2 | ||||
-rw-r--r-- | libcxx/include/__algorithm/sift_down.h | 37 |
4 files changed, 37 insertions, 24 deletions
diff --git a/libcxx/include/__algorithm/make_heap.h b/libcxx/include/__algorithm/make_heap.h index e8f0cdb..8cfeda2 100644 --- a/libcxx/include/__algorithm/make_heap.h +++ b/libcxx/include/__algorithm/make_heap.h @@ -12,9 +12,11 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> #include <__algorithm/iterator_operations.h> +#include <__algorithm/push_heap.h> #include <__algorithm/sift_down.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__type_traits/is_arithmetic.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -31,13 +33,23 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare&& __comp) { __comp_ref_type<_Compare> __comp_ref = __comp; - using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type; - difference_type __n = __last - __first; + using __diff_t = __iter_diff_t<_RandomAccessIterator>; + const __diff_t __n = __last - __first; + + static const bool __assume_both_children = is_arithmetic<__iter_value_type<_RandomAccessIterator> >::value; + + // While it would be correct to always assume we have both children, in practice we observed this to be a performance + // improvement only for arithmetic types. + const __diff_t __sift_down_n = __assume_both_children ? ((__n & 1) ? __n : __n - 1) : __n; + if (__n > 1) { // start from the first parent, there is no need to consider children - for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) { - std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, __first + __start); + + for (__diff_t __start = (__sift_down_n - 2) / 2; __start >= 0; --__start) { + std::__sift_down<_AlgPolicy, __assume_both_children>(__first, __comp_ref, __sift_down_n, __start); } + if _LIBCPP_CONSTEXPR (__assume_both_children) + std::__sift_up<_AlgPolicy>(__first, __last, __comp, __n); } } diff --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h index 7f8d0c4..4b39ae0 100644 --- a/libcxx/include/__algorithm/partial_sort.h +++ b/libcxx/include/__algorithm/partial_sort.h @@ -45,7 +45,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator __part for (; __i != __last; ++__i) { if (__comp(*__i, *__first)) { _IterOps<_AlgPolicy>::iter_swap(__i, __first); - std::__sift_down<_AlgPolicy>(__first, __comp, __len, __first); + std::__sift_down<_AlgPolicy, false>(__first, __comp, __len, 0); } } std::__sort_heap<_AlgPolicy>(std::move(__first), std::move(__middle), __comp); diff --git a/libcxx/include/__algorithm/partial_sort_copy.h b/libcxx/include/__algorithm/partial_sort_copy.h index 172f53b..2230dfc 100644 --- a/libcxx/include/__algorithm/partial_sort_copy.h +++ b/libcxx/include/__algorithm/partial_sort_copy.h @@ -60,7 +60,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator, _Random for (; __first != __last; ++__first) if (std::__invoke(__comp, std::__invoke(__proj1, *__first), std::__invoke(__proj2, *__result_first))) { *__result_first = *__first; - std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first); + std::__sift_down<_AlgPolicy, false>(__result_first, __projected_comp, __len, 0); } std::__sort_heap<_AlgPolicy>(__result_first, __r, __projected_comp); } diff --git a/libcxx/include/__algorithm/sift_down.h b/libcxx/include/__algorithm/sift_down.h index 42803e3..e01c9b2 100644 --- a/libcxx/include/__algorithm/sift_down.h +++ b/libcxx/include/__algorithm/sift_down.h @@ -24,59 +24,60 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> +template <class _AlgPolicy, bool __assume_both_children, class _Compare, class _RandomAccessIterator> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __sift_down(_RandomAccessIterator __first, _Compare&& __comp, - typename iterator_traits<_RandomAccessIterator>::difference_type __len, - _RandomAccessIterator __start) { + __iter_diff_t<_RandomAccessIterator> __len, + __iter_diff_t<_RandomAccessIterator> __start) { using _Ops = _IterOps<_AlgPolicy>; typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; // left-child of __start is at 2 * __start + 1 // right-child of __start is at 2 * __start + 2 - difference_type __child = __start - __first; + difference_type __child = __start; if (__len < 2 || (__len - 2) / 2 < __child) return; - __child = 2 * __child + 1; - _RandomAccessIterator __child_i = __first + __child; + __child = 2 * __child + 1; - if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) { + if _LIBCPP_CONSTEXPR (__assume_both_children) { + // right-child exists and is greater than left-child + __child += __comp(__first[__child], __first[__child + 1]); + } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) { // right-child exists and is greater than left-child - ++__child_i; ++__child; } // check if we are in heap-order - if (__comp(*__child_i, *__start)) + if (__comp(__first[__child], __first[__start])) // we are, __start is larger than its largest child return; - value_type __top(_Ops::__iter_move(__start)); + value_type __top(_Ops::__iter_move(__first + __start)); do { // we are not in heap-order, swap the parent with its largest child - *__start = _Ops::__iter_move(__child_i); - __start = __child_i; + __first[__start] = _Ops::__iter_move(__first + __child); + __start = __child; if ((__len - 2) / 2 < __child) break; // recompute the child based off of the updated parent - __child = 2 * __child + 1; - __child_i = __first + __child; + __child = 2 * __child + 1; - if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) { + if _LIBCPP_CONSTEXPR (__assume_both_children) { + __child += __comp(__first[__child], __first[__child + 1]); + } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) { // right-child exists and is greater than left-child - ++__child_i; ++__child; } // check if we are in heap-order - } while (!__comp(*__child_i, __top)); - *__start = std::move(__top); + } while (!__comp(__first[__child], __top)); + __first[__start] = std::move(__top); } template <class _AlgPolicy, class _Compare, class _RandomAccessIterator> |