aboutsummaryrefslogtreecommitdiff
path: root/libcxx/include/__algorithm
diff options
context:
space:
mode:
authorNikolas Klauser <nikolasklauser@berlin.de>2025-09-08 12:00:04 +0200
committerGitHub <noreply@github.com>2025-09-08 12:00:04 +0200
commit04518e7357b17e0802445b724377f844511a661f (patch)
treeb439614336a9f8ccf09efdd9dcc94af8ed1a83cd /libcxx/include/__algorithm
parentbe7094ed67526ee149ca150b9c9b1e7e724b226e (diff)
downloadllvm-04518e7357b17e0802445b724377f844511a661f.zip
llvm-04518e7357b17e0802445b724377f844511a661f.tar.gz
llvm-04518e7357b17e0802445b724377f844511a661f.tar.bz2
[libc++] Improve the performance of std::make_heap a bit (#154092)
``` Apple M4: ----------------------------------------------------------------------------------------------------- Benchmark old new ----------------------------------------------------------------------------------------------------- BM_MakeHeap_uint32_Random_1 0.285 ns 0.271 ns BM_MakeHeap_uint32_Random_4 2.09 ns 1.80 ns BM_MakeHeap_uint32_Random_16 1.85 ns 1.83 ns BM_MakeHeap_uint32_Random_64 1.92 ns 1.50 ns BM_MakeHeap_uint32_Random_256 2.10 ns 1.87 ns BM_MakeHeap_uint32_Random_1024 1.73 ns 1.86 ns BM_MakeHeap_uint32_Random_16384 2.17 ns 2.05 ns BM_MakeHeap_uint32_Random_262144 1.77 ns 1.77 ns BM_MakeHeap_uint32_Ascending_1 0.288 ns 0.277 ns BM_MakeHeap_uint32_Ascending_4 0.658 ns 0.481 ns BM_MakeHeap_uint32_Ascending_16 0.636 ns 0.637 ns BM_MakeHeap_uint32_Ascending_64 0.643 ns 0.601 ns BM_MakeHeap_uint32_Ascending_256 0.710 ns 0.636 ns BM_MakeHeap_uint32_Ascending_1024 0.747 ns 0.660 ns BM_MakeHeap_uint32_Ascending_16384 0.713 ns 0.633 ns BM_MakeHeap_uint32_Ascending_262144 0.769 ns 0.731 ns BM_MakeHeap_uint32_Descending_1 0.294 ns 0.280 ns BM_MakeHeap_uint32_Descending_4 0.379 ns 0.305 ns BM_MakeHeap_uint32_Descending_16 0.376 ns 0.268 ns BM_MakeHeap_uint32_Descending_64 0.358 ns 0.271 ns BM_MakeHeap_uint32_Descending_256 0.377 ns 0.284 ns BM_MakeHeap_uint32_Descending_1024 0.355 ns 0.267 ns BM_MakeHeap_uint32_Descending_16384 0.348 ns 0.248 ns BM_MakeHeap_uint32_Descending_262144 0.349 ns 0.247 ns BM_MakeHeap_uint32_SingleElement_1 0.292 ns 0.280 ns BM_MakeHeap_uint32_SingleElement_4 0.570 ns 0.332 ns BM_MakeHeap_uint32_SingleElement_16 0.635 ns 0.604 ns BM_MakeHeap_uint32_SingleElement_64 0.653 ns 0.567 ns BM_MakeHeap_uint32_SingleElement_256 0.703 ns 0.609 ns BM_MakeHeap_uint32_SingleElement_1024 0.737 ns 0.604 ns BM_MakeHeap_uint32_SingleElement_16384 0.699 ns 0.574 ns BM_MakeHeap_uint32_SingleElement_262144 0.803 ns 0.684 ns BM_MakeHeap_uint32_PipeOrgan_1 0.291 ns 0.284 ns BM_MakeHeap_uint32_PipeOrgan_4 0.588 ns 0.399 ns BM_MakeHeap_uint32_PipeOrgan_16 0.648 ns 1.12 ns BM_MakeHeap_uint32_PipeOrgan_64 0.662 ns 0.771 ns BM_MakeHeap_uint32_PipeOrgan_256 0.723 ns 0.672 ns BM_MakeHeap_uint32_PipeOrgan_1024 0.749 ns 0.674 ns BM_MakeHeap_uint32_PipeOrgan_16384 0.708 ns 0.638 ns BM_MakeHeap_uint32_PipeOrgan_262144 0.786 ns 0.743 ns BM_MakeHeap_uint32_Heap_1 0.298 ns 0.282 ns BM_MakeHeap_uint32_Heap_4 0.396 ns 0.308 ns BM_MakeHeap_uint32_Heap_16 0.377 ns 0.268 ns BM_MakeHeap_uint32_Heap_64 0.356 ns 0.271 ns BM_MakeHeap_uint32_Heap_256 0.378 ns 0.290 ns BM_MakeHeap_uint32_Heap_1024 0.356 ns 0.275 ns BM_MakeHeap_uint32_Heap_16384 0.348 ns 0.252 ns BM_MakeHeap_uint32_Heap_262144 0.347 ns 0.250 ns BM_MakeHeap_uint32_QuickSortAdversary_1 0.290 ns 0.284 ns BM_MakeHeap_uint32_QuickSortAdversary_4 0.627 ns 0.409 ns BM_MakeHeap_uint32_QuickSortAdversary_16 0.640 ns 0.653 ns BM_MakeHeap_uint32_QuickSortAdversary_64 0.577 ns 0.484 ns BM_MakeHeap_uint32_QuickSortAdversary_256 0.613 ns 0.521 ns BM_MakeHeap_uint32_QuickSortAdversary_1024 0.652 ns 0.514 ns BM_MakeHeap_uint32_QuickSortAdversary_16384 0.428 ns 0.308 ns BM_MakeHeap_uint32_QuickSortAdversary_262144 0.373 ns 0.261 ns BM_MakeHeap_uint64_Random_1 0.291 ns 0.281 ns BM_MakeHeap_uint64_Random_4 2.20 ns 1.97 ns BM_MakeHeap_uint64_Random_16 1.93 ns 1.70 ns BM_MakeHeap_uint64_Random_64 1.89 ns 1.48 ns BM_MakeHeap_uint64_Random_256 2.10 ns 1.99 ns BM_MakeHeap_uint64_Random_1024 1.41 ns 2.04 ns BM_MakeHeap_uint64_Random_16384 2.12 ns 1.83 ns BM_MakeHeap_uint64_Random_262144 1.83 ns 1.63 ns BM_MakeHeap_uint64_Ascending_1 0.288 ns 0.283 ns BM_MakeHeap_uint64_Ascending_4 0.624 ns 0.493 ns BM_MakeHeap_uint64_Ascending_16 0.648 ns 0.688 ns BM_MakeHeap_uint64_Ascending_64 0.671 ns 0.634 ns BM_MakeHeap_uint64_Ascending_256 0.739 ns 0.680 ns BM_MakeHeap_uint64_Ascending_1024 0.761 ns 0.698 ns BM_MakeHeap_uint64_Ascending_16384 0.740 ns 0.685 ns BM_MakeHeap_uint64_Ascending_262144 0.849 ns 0.837 ns BM_MakeHeap_uint64_Descending_1 0.304 ns 0.287 ns BM_MakeHeap_uint64_Descending_4 0.395 ns 0.312 ns BM_MakeHeap_uint64_Descending_16 0.374 ns 0.276 ns BM_MakeHeap_uint64_Descending_64 0.358 ns 0.272 ns BM_MakeHeap_uint64_Descending_256 0.389 ns 0.303 ns BM_MakeHeap_uint64_Descending_1024 0.361 ns 0.278 ns BM_MakeHeap_uint64_Descending_16384 0.352 ns 0.253 ns BM_MakeHeap_uint64_Descending_262144 0.350 ns 0.251 ns BM_MakeHeap_uint64_SingleElement_1 0.295 ns 0.285 ns BM_MakeHeap_uint64_SingleElement_4 0.569 ns 0.358 ns BM_MakeHeap_uint64_SingleElement_16 0.649 ns 0.652 ns BM_MakeHeap_uint64_SingleElement_64 0.673 ns 0.565 ns BM_MakeHeap_uint64_SingleElement_256 0.732 ns 0.651 ns BM_MakeHeap_uint64_SingleElement_1024 0.759 ns 0.632 ns BM_MakeHeap_uint64_SingleElement_16384 0.748 ns 0.614 ns BM_MakeHeap_uint64_SingleElement_262144 0.947 ns 0.797 ns BM_MakeHeap_uint64_PipeOrgan_1 0.295 ns 0.284 ns BM_MakeHeap_uint64_PipeOrgan_4 0.601 ns 0.496 ns BM_MakeHeap_uint64_PipeOrgan_16 0.655 ns 1.18 ns BM_MakeHeap_uint64_PipeOrgan_64 0.682 ns 0.803 ns BM_MakeHeap_uint64_PipeOrgan_256 0.759 ns 0.710 ns BM_MakeHeap_uint64_PipeOrgan_1024 0.759 ns 0.713 ns BM_MakeHeap_uint64_PipeOrgan_16384 0.739 ns 0.696 ns BM_MakeHeap_uint64_PipeOrgan_262144 0.870 ns 0.849 ns BM_MakeHeap_uint64_Heap_1 0.290 ns 0.284 ns BM_MakeHeap_uint64_Heap_4 0.413 ns 0.314 ns BM_MakeHeap_uint64_Heap_16 0.378 ns 0.277 ns BM_MakeHeap_uint64_Heap_64 0.362 ns 0.272 ns BM_MakeHeap_uint64_Heap_256 0.389 ns 0.303 ns BM_MakeHeap_uint64_Heap_1024 0.362 ns 0.283 ns BM_MakeHeap_uint64_Heap_16384 0.352 ns 0.253 ns BM_MakeHeap_uint64_Heap_262144 0.350 ns 0.251 ns BM_MakeHeap_uint64_QuickSortAdversary_1 0.293 ns 0.284 ns BM_MakeHeap_uint64_QuickSortAdversary_4 0.606 ns 0.494 ns BM_MakeHeap_uint64_QuickSortAdversary_16 0.645 ns 0.691 ns BM_MakeHeap_uint64_QuickSortAdversary_64 0.607 ns 0.511 ns BM_MakeHeap_uint64_QuickSortAdversary_256 0.641 ns 0.537 ns BM_MakeHeap_uint64_QuickSortAdversary_1024 0.657 ns 0.529 ns BM_MakeHeap_uint64_QuickSortAdversary_16384 0.435 ns 0.316 ns BM_MakeHeap_uint64_QuickSortAdversary_262144 0.382 ns 0.266 ns BM_MakeHeap_pair<uint32, uint32>_Random_1 0.297 ns 0.378 ns BM_MakeHeap_pair<uint32, uint32>_Random_4 2.80 ns 0.765 ns BM_MakeHeap_pair<uint32, uint32>_Random_16 2.92 ns 2.20 ns BM_MakeHeap_pair<uint32, uint32>_Random_64 3.17 ns 3.64 ns BM_MakeHeap_pair<uint32, uint32>_Random_256 3.44 ns 3.20 ns BM_MakeHeap_pair<uint32, uint32>_Random_1024 3.35 ns 3.64 ns BM_MakeHeap_pair<uint32, uint32>_Random_16384 3.22 ns 3.50 ns BM_MakeHeap_pair<uint32, uint32>_Random_262144 3.61 ns 3.46 ns BM_MakeHeap_pair<uint32, uint32>_Ascending_1 0.291 ns 0.379 ns BM_MakeHeap_pair<uint32, uint32>_Ascending_4 0.779 ns 0.436 ns BM_MakeHeap_pair<uint32, uint32>_Ascending_16 0.943 ns 1.01 ns BM_MakeHeap_pair<uint32, uint32>_Ascending_64 1.17 ns 1.26 ns BM_MakeHeap_pair<uint32, uint32>_Ascending_256 1.38 ns 1.44 ns BM_MakeHeap_pair<uint32, uint32>_Ascending_1024 1.37 ns 1.43 ns BM_MakeHeap_pair<uint32, uint32>_Ascending_16384 1.29 ns 1.31 ns BM_MakeHeap_pair<uint32, uint32>_Ascending_262144 1.37 ns 1.40 ns BM_MakeHeap_pair<uint32, uint32>_Descending_1 0.292 ns 0.382 ns BM_MakeHeap_pair<uint32, uint32>_Descending_4 0.440 ns 0.347 ns BM_MakeHeap_pair<uint32, uint32>_Descending_16 0.529 ns 0.520 ns BM_MakeHeap_pair<uint32, uint32>_Descending_64 0.540 ns 0.527 ns BM_MakeHeap_pair<uint32, uint32>_Descending_256 0.637 ns 0.583 ns BM_MakeHeap_pair<uint32, uint32>_Descending_1024 0.552 ns 0.513 ns BM_MakeHeap_pair<uint32, uint32>_Descending_16384 0.522 ns 0.488 ns BM_MakeHeap_pair<uint32, uint32>_Descending_262144 0.515 ns 0.492 ns BM_MakeHeap_pair<uint32, uint32>_SingleElement_1 0.299 ns 0.377 ns BM_MakeHeap_pair<uint32, uint32>_SingleElement_4 0.787 ns 0.474 ns BM_MakeHeap_pair<uint32, uint32>_SingleElement_16 1.07 ns 0.921 ns BM_MakeHeap_pair<uint32, uint32>_SingleElement_64 1.20 ns 1.15 ns BM_MakeHeap_pair<uint32, uint32>_SingleElement_256 1.30 ns 1.27 ns BM_MakeHeap_pair<uint32, uint32>_SingleElement_1024 1.30 ns 1.31 ns BM_MakeHeap_pair<uint32, uint32>_SingleElement_16384 1.29 ns 1.28 ns BM_MakeHeap_pair<uint32, uint32>_SingleElement_262144 1.37 ns 1.38 ns BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1 0.293 ns 0.385 ns BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_4 0.677 ns 0.438 ns BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16 1.04 ns 1.00 ns BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_64 1.20 ns 1.27 ns BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_256 1.40 ns 1.43 ns BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_1024 1.36 ns 1.43 ns BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_16384 1.27 ns 1.31 ns BM_MakeHeap_pair<uint32, uint32>_PipeOrgan_262144 1.37 ns 1.41 ns BM_MakeHeap_pair<uint32, uint32>_Heap_1 0.292 ns 0.378 ns BM_MakeHeap_pair<uint32, uint32>_Heap_4 0.440 ns 0.380 ns BM_MakeHeap_pair<uint32, uint32>_Heap_16 0.560 ns 0.606 ns BM_MakeHeap_pair<uint32, uint32>_Heap_64 0.588 ns 0.573 ns BM_MakeHeap_pair<uint32, uint32>_Heap_256 0.632 ns 0.607 ns BM_MakeHeap_pair<uint32, uint32>_Heap_1024 0.598 ns 0.580 ns BM_MakeHeap_pair<uint32, uint32>_Heap_16384 0.576 ns 0.563 ns BM_MakeHeap_pair<uint32, uint32>_Heap_262144 0.572 ns 0.561 ns BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1 0.304 ns 0.379 ns BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_4 0.823 ns 0.430 ns BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16 1.08 ns 1.03 ns BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_64 1.18 ns 1.23 ns BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_256 1.39 ns 1.43 ns BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_1024 1.36 ns 1.42 ns BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_16384 1.28 ns 1.32 ns BM_MakeHeap_pair<uint32, uint32>_QuickSortAdversary_262144 1.34 ns 1.37 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1 0.276 ns 0.511 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_4 4.25 ns 1.96 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16 4.84 ns 3.77 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_64 5.53 ns 4.93 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_256 5.30 ns 5.06 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_1024 5.29 ns 5.02 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_16384 5.53 ns 5.31 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Random_262144 5.49 ns 5.29 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1 0.275 ns 0.443 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_4 1.22 ns 0.764 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16 1.39 ns 1.49 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_64 1.66 ns 1.76 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_256 1.85 ns 1.99 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_1024 1.90 ns 2.02 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_16384 2.15 ns 2.27 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Ascending_262144 2.25 ns 2.37 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1 0.274 ns 0.445 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_4 0.939 ns 0.520 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16 1.02 ns 0.811 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_64 1.01 ns 0.941 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_256 1.03 ns 1.01 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_1024 0.969 ns 0.947 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_16384 0.882 ns 0.876 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Descending_262144 0.893 ns 0.871 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1 0.276 ns 0.443 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_4 1.34 ns 0.870 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16 1.60 ns 1.59 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_64 1.91 ns 2.00 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_256 1.91 ns 2.08 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_1024 1.91 ns 2.10 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_16384 2.13 ns 2.35 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_SingleElement_262144 2.25 ns 2.48 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1 0.275 ns 0.446 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_4 1.07 ns 0.671 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16 1.36 ns 1.44 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_64 1.70 ns 1.80 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_256 1.88 ns 2.05 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_1024 1.92 ns 2.06 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_16384 2.11 ns 2.26 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_PipeOrgan_262144 2.18 ns 2.34 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1 0.274 ns 0.441 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_4 0.938 ns 0.587 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16 1.01 ns 0.873 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_64 1.08 ns 1.00 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_256 1.21 ns 1.18 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_1024 1.37 ns 1.29 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_16384 1.31 ns 1.26 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_Heap_262144 1.29 ns 1.25 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1 0.275 ns 0.447 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_4 1.22 ns 0.764 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16 1.35 ns 1.46 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_64 1.63 ns 1.73 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_256 1.83 ns 1.87 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_1024 1.81 ns 1.94 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_16384 2.06 ns 2.19 ns BM_MakeHeap_tuple<uint32, uint64, uint32>_QuickSortAdversary_262144 2.14 ns 2.31 ns BM_MakeHeap_string_Random_1 0.289 ns 0.446 ns BM_MakeHeap_string_Random_4 7.43 ns 4.84 ns BM_MakeHeap_string_Random_16 9.73 ns 8.92 ns BM_MakeHeap_string_Random_64 11.5 ns 11.5 ns BM_MakeHeap_string_Random_256 12.6 ns 12.4 ns BM_MakeHeap_string_Random_1024 13.2 ns 13.2 ns BM_MakeHeap_string_Random_16384 15.4 ns 15.4 ns BM_MakeHeap_string_Random_262144 21.4 ns 21.5 ns BM_MakeHeap_string_Ascending_1 0.287 ns 0.447 ns BM_MakeHeap_string_Ascending_4 3.22 ns 2.44 ns BM_MakeHeap_string_Ascending_16 3.77 ns 3.62 ns BM_MakeHeap_string_Ascending_64 4.84 ns 5.17 ns BM_MakeHeap_string_Ascending_256 5.79 ns 6.04 ns BM_MakeHeap_string_Ascending_1024 5.93 ns 6.60 ns BM_MakeHeap_string_Ascending_16384 6.84 ns 7.25 ns BM_MakeHeap_string_Ascending_262144 13.5 ns 14.3 ns BM_MakeHeap_string_Descending_1 0.293 ns 0.447 ns BM_MakeHeap_string_Descending_4 2.61 ns 1.83 ns BM_MakeHeap_string_Descending_16 2.64 ns 2.60 ns BM_MakeHeap_string_Descending_64 2.75 ns 2.74 ns BM_MakeHeap_string_Descending_256 3.78 ns 3.57 ns BM_MakeHeap_string_Descending_1024 3.20 ns 3.51 ns BM_MakeHeap_string_Descending_16384 3.57 ns 3.85 ns BM_MakeHeap_string_Descending_262144 6.27 ns 6.39 ns BM_MakeHeap_string_SingleElement_1 0.291 ns 0.448 ns BM_MakeHeap_string_SingleElement_4 3.88 ns 2.71 ns BM_MakeHeap_string_SingleElement_16 5.08 ns 4.96 ns BM_MakeHeap_string_SingleElement_64 6.14 ns 6.29 ns BM_MakeHeap_string_SingleElement_256 6.13 ns 6.46 ns BM_MakeHeap_string_SingleElement_1024 5.98 ns 6.60 ns BM_MakeHeap_string_SingleElement_16384 5.88 ns 6.39 ns BM_MakeHeap_string_SingleElement_262144 9.95 ns 10.2 ns BM_MakeHeap_string_PipeOrgan_1 0.292 ns 0.447 ns BM_MakeHeap_string_PipeOrgan_4 2.97 ns 2.51 ns BM_MakeHeap_string_PipeOrgan_16 3.76 ns 3.91 ns BM_MakeHeap_string_PipeOrgan_64 4.89 ns 5.20 ns BM_MakeHeap_string_PipeOrgan_256 5.77 ns 6.09 ns BM_MakeHeap_string_PipeOrgan_1024 6.14 ns 6.40 ns BM_MakeHeap_string_PipeOrgan_16384 6.83 ns 7.32 ns BM_MakeHeap_string_PipeOrgan_262144 13.8 ns 14.6 ns BM_MakeHeap_string_Heap_1 0.288 ns 0.515 ns BM_MakeHeap_string_Heap_4 3.62 ns 4.20 ns BM_MakeHeap_string_Heap_16 5.36 ns 5.23 ns BM_MakeHeap_string_Heap_64 5.79 ns 5.38 ns BM_MakeHeap_string_Heap_256 5.70 ns 5.40 ns BM_MakeHeap_string_Heap_1024 5.78 ns 5.37 ns BM_MakeHeap_string_Heap_16384 6.09 ns 5.67 ns BM_MakeHeap_string_Heap_262144 6.37 ns 5.96 ns BM_MakeHeap_string_QuickSortAdversary_1 0.282 ns 0.448 ns BM_MakeHeap_string_QuickSortAdversary_4 7.45 ns 5.60 ns BM_MakeHeap_string_QuickSortAdversary_16 9.76 ns 8.85 ns BM_MakeHeap_string_QuickSortAdversary_64 11.5 ns 11.2 ns BM_MakeHeap_string_QuickSortAdversary_256 12.0 ns 11.8 ns BM_MakeHeap_string_QuickSortAdversary_1024 12.2 ns 12.0 ns BM_MakeHeap_string_QuickSortAdversary_16384 13.7 ns 13.6 ns BM_MakeHeap_string_QuickSortAdversary_262144 14.1 ns 14.8 ns BM_MakeHeap_float_Random_1 0.287 ns 0.287 ns BM_MakeHeap_float_Random_4 2.29 ns 2.60 ns BM_MakeHeap_float_Random_16 4.00 ns 2.48 ns BM_MakeHeap_float_Random_64 4.41 ns 1.92 ns BM_MakeHeap_float_Random_256 4.73 ns 2.05 ns BM_MakeHeap_float_Random_1024 4.90 ns 2.27 ns BM_MakeHeap_float_Random_16384 4.42 ns 2.27 ns BM_MakeHeap_float_Random_262144 4.72 ns 1.39 ns BM_MakeHeap_float_Ascending_1 0.291 ns 0.293 ns BM_MakeHeap_float_Ascending_4 0.633 ns 0.428 ns BM_MakeHeap_float_Ascending_16 0.638 ns 0.874 ns BM_MakeHeap_float_Ascending_64 0.614 ns 0.698 ns BM_MakeHeap_float_Ascending_256 0.663 ns 0.713 ns BM_MakeHeap_float_Ascending_1024 0.660 ns 0.761 ns BM_MakeHeap_float_Ascending_16384 0.628 ns 0.725 ns BM_MakeHeap_float_Ascending_262144 0.629 ns 0.814 ns BM_MakeHeap_float_Descending_1 0.290 ns 0.290 ns BM_MakeHeap_float_Descending_4 0.421 ns 0.316 ns BM_MakeHeap_float_Descending_16 0.302 ns 0.225 ns BM_MakeHeap_float_Descending_64 0.293 ns 0.212 ns BM_MakeHeap_float_Descending_256 0.314 ns 0.246 ns BM_MakeHeap_float_Descending_1024 0.300 ns 0.231 ns BM_MakeHeap_float_Descending_16384 0.308 ns 0.205 ns BM_MakeHeap_float_Descending_262144 0.309 ns 0.203 ns BM_MakeHeap_float_SingleElement_1 0.289 ns 0.292 ns BM_MakeHeap_float_SingleElement_4 0.569 ns 0.347 ns BM_MakeHeap_float_SingleElement_16 0.538 ns 0.825 ns BM_MakeHeap_float_SingleElement_64 0.585 ns 0.727 ns BM_MakeHeap_float_SingleElement_256 0.603 ns 0.708 ns BM_MakeHeap_float_SingleElement_1024 0.618 ns 0.760 ns BM_MakeHeap_float_SingleElement_16384 0.599 ns 0.726 ns BM_MakeHeap_float_SingleElement_262144 0.723 ns 0.820 ns BM_MakeHeap_float_PipeOrgan_1 0.289 ns 0.291 ns BM_MakeHeap_float_PipeOrgan_4 0.457 ns 0.420 ns BM_MakeHeap_float_PipeOrgan_16 0.670 ns 1.32 ns BM_MakeHeap_float_PipeOrgan_64 0.764 ns 0.889 ns BM_MakeHeap_float_PipeOrgan_256 0.793 ns 0.757 ns BM_MakeHeap_float_PipeOrgan_1024 0.755 ns 0.764 ns BM_MakeHeap_float_PipeOrgan_16384 0.723 ns 0.723 ns BM_MakeHeap_float_PipeOrgan_262144 0.654 ns 0.817 ns BM_MakeHeap_float_Heap_1 0.291 ns 0.289 ns BM_MakeHeap_float_Heap_4 0.388 ns 0.316 ns BM_MakeHeap_float_Heap_16 0.317 ns 0.225 ns BM_MakeHeap_float_Heap_64 0.353 ns 0.213 ns BM_MakeHeap_float_Heap_256 0.361 ns 0.246 ns BM_MakeHeap_float_Heap_1024 0.381 ns 0.233 ns BM_MakeHeap_float_Heap_16384 0.390 ns 0.205 ns BM_MakeHeap_float_Heap_262144 0.379 ns 0.202 ns BM_MakeHeap_float_QuickSortAdversary_1 0.295 ns 0.289 ns BM_MakeHeap_float_QuickSortAdversary_4 0.640 ns 0.422 ns BM_MakeHeap_float_QuickSortAdversary_16 0.658 ns 0.871 ns BM_MakeHeap_float_QuickSortAdversary_64 0.574 ns 0.659 ns BM_MakeHeap_float_QuickSortAdversary_256 0.631 ns 0.550 ns BM_MakeHeap_float_QuickSortAdversary_1024 0.617 ns 0.552 ns BM_MakeHeap_float_QuickSortAdversary_16384 0.424 ns 0.283 ns BM_MakeHeap_float_QuickSortAdversary_262144 0.386 ns 0.219 ns ``` Fixes #120752
Diffstat (limited to 'libcxx/include/__algorithm')
-rw-r--r--libcxx/include/__algorithm/make_heap.h20
-rw-r--r--libcxx/include/__algorithm/partial_sort.h2
-rw-r--r--libcxx/include/__algorithm/partial_sort_copy.h2
-rw-r--r--libcxx/include/__algorithm/sift_down.h37
4 files changed, 37 insertions, 24 deletions
diff --git a/libcxx/include/__algorithm/make_heap.h b/libcxx/include/__algorithm/make_heap.h
index e8f0cdb..8cfeda2 100644
--- a/libcxx/include/__algorithm/make_heap.h
+++ b/libcxx/include/__algorithm/make_heap.h
@@ -12,9 +12,11 @@
#include <__algorithm/comp.h>
#include <__algorithm/comp_ref_type.h>
#include <__algorithm/iterator_operations.h>
+#include <__algorithm/push_heap.h>
#include <__algorithm/sift_down.h>
#include <__config>
#include <__iterator/iterator_traits.h>
+#include <__type_traits/is_arithmetic.h>
#include <__utility/move.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -31,13 +33,23 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
__make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare&& __comp) {
__comp_ref_type<_Compare> __comp_ref = __comp;
- using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
- difference_type __n = __last - __first;
+ using __diff_t = __iter_diff_t<_RandomAccessIterator>;
+ const __diff_t __n = __last - __first;
+
+ static const bool __assume_both_children = is_arithmetic<__iter_value_type<_RandomAccessIterator> >::value;
+
+ // While it would be correct to always assume we have both children, in practice we observed this to be a performance
+ // improvement only for arithmetic types.
+ const __diff_t __sift_down_n = __assume_both_children ? ((__n & 1) ? __n : __n - 1) : __n;
+
if (__n > 1) {
// start from the first parent, there is no need to consider children
- for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) {
- std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, __first + __start);
+
+ for (__diff_t __start = (__sift_down_n - 2) / 2; __start >= 0; --__start) {
+ std::__sift_down<_AlgPolicy, __assume_both_children>(__first, __comp_ref, __sift_down_n, __start);
}
+ if _LIBCPP_CONSTEXPR (__assume_both_children)
+ std::__sift_up<_AlgPolicy>(__first, __last, __comp, __n);
}
}
diff --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h
index 7f8d0c4..4b39ae0 100644
--- a/libcxx/include/__algorithm/partial_sort.h
+++ b/libcxx/include/__algorithm/partial_sort.h
@@ -45,7 +45,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator __part
for (; __i != __last; ++__i) {
if (__comp(*__i, *__first)) {
_IterOps<_AlgPolicy>::iter_swap(__i, __first);
- std::__sift_down<_AlgPolicy>(__first, __comp, __len, __first);
+ std::__sift_down<_AlgPolicy, false>(__first, __comp, __len, 0);
}
}
std::__sort_heap<_AlgPolicy>(std::move(__first), std::move(__middle), __comp);
diff --git a/libcxx/include/__algorithm/partial_sort_copy.h b/libcxx/include/__algorithm/partial_sort_copy.h
index 172f53b..2230dfc 100644
--- a/libcxx/include/__algorithm/partial_sort_copy.h
+++ b/libcxx/include/__algorithm/partial_sort_copy.h
@@ -60,7 +60,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator, _Random
for (; __first != __last; ++__first)
if (std::__invoke(__comp, std::__invoke(__proj1, *__first), std::__invoke(__proj2, *__result_first))) {
*__result_first = *__first;
- std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first);
+ std::__sift_down<_AlgPolicy, false>(__result_first, __projected_comp, __len, 0);
}
std::__sort_heap<_AlgPolicy>(__result_first, __r, __projected_comp);
}
diff --git a/libcxx/include/__algorithm/sift_down.h b/libcxx/include/__algorithm/sift_down.h
index 42803e3..e01c9b2 100644
--- a/libcxx/include/__algorithm/sift_down.h
+++ b/libcxx/include/__algorithm/sift_down.h
@@ -24,59 +24,60 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
+template <class _AlgPolicy, bool __assume_both_children, class _Compare, class _RandomAccessIterator>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
__sift_down(_RandomAccessIterator __first,
_Compare&& __comp,
- typename iterator_traits<_RandomAccessIterator>::difference_type __len,
- _RandomAccessIterator __start) {
+ __iter_diff_t<_RandomAccessIterator> __len,
+ __iter_diff_t<_RandomAccessIterator> __start) {
using _Ops = _IterOps<_AlgPolicy>;
typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
// left-child of __start is at 2 * __start + 1
// right-child of __start is at 2 * __start + 2
- difference_type __child = __start - __first;
+ difference_type __child = __start;
if (__len < 2 || (__len - 2) / 2 < __child)
return;
- __child = 2 * __child + 1;
- _RandomAccessIterator __child_i = __first + __child;
+ __child = 2 * __child + 1;
- if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
+ if _LIBCPP_CONSTEXPR (__assume_both_children) {
+ // right-child exists and is greater than left-child
+ __child += __comp(__first[__child], __first[__child + 1]);
+ } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) {
// right-child exists and is greater than left-child
- ++__child_i;
++__child;
}
// check if we are in heap-order
- if (__comp(*__child_i, *__start))
+ if (__comp(__first[__child], __first[__start]))
// we are, __start is larger than its largest child
return;
- value_type __top(_Ops::__iter_move(__start));
+ value_type __top(_Ops::__iter_move(__first + __start));
do {
// we are not in heap-order, swap the parent with its largest child
- *__start = _Ops::__iter_move(__child_i);
- __start = __child_i;
+ __first[__start] = _Ops::__iter_move(__first + __child);
+ __start = __child;
if ((__len - 2) / 2 < __child)
break;
// recompute the child based off of the updated parent
- __child = 2 * __child + 1;
- __child_i = __first + __child;
+ __child = 2 * __child + 1;
- if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
+ if _LIBCPP_CONSTEXPR (__assume_both_children) {
+ __child += __comp(__first[__child], __first[__child + 1]);
+ } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) {
// right-child exists and is greater than left-child
- ++__child_i;
++__child;
}
// check if we are in heap-order
- } while (!__comp(*__child_i, __top));
- *__start = std::move(__top);
+ } while (!__comp(__first[__child], __top));
+ __first[__start] = std::move(__top);
}
template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>