diff options
author | Roman Lebedev <lebedev.ri@gmail.com> | 2022-12-06 17:55:35 +0300 |
---|---|---|
committer | Roman Lebedev <lebedev.ri@gmail.com> | 2022-12-06 18:05:22 +0300 |
commit | 2ffe225d113031cc211d20d8d2cb82eeaa1a34a2 (patch) | |
tree | c3ff2f103b392a83e732f2c54b525b9e8e3d1cbc /llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp | |
parent | 26e50c4c4df10de7bd8fcad520bf11ab94331eac (diff) | |
download | llvm-2ffe225d113031cc211d20d8d2cb82eeaa1a34a2.zip llvm-2ffe225d113031cc211d20d8d2cb82eeaa1a34a2.tar.gz llvm-2ffe225d113031cc211d20d8d2cb82eeaa1a34a2.tar.bz2 |
[llvm-exegesis] parallel snippet generator: avoid Read-After-Write pitfail for instrs w/ tied variables
As it is being discussed in https://github.com/llvm/llvm-project/issues/59325,
at least for the instructions with tied variables,
when trying to parallelize the instructions,
register selection is rather bad, and may either
use a register which we have used for def,
or vice versa.
That introduces serialization, and leads to
overly pessimistic inverse throughput measurement.
The new implementation avoids that,
New result:
```
$ ninja llvm-exegesis && ./bin/llvm-exegesis --mode=inverse_throughput --opcode-name=VFMADD132PDr --max-configs-per-opcode=9182
ninja: no work to do.
Check generated assembly with: /usr/bin/objdump -d /tmp/snippet-4af034.o
---
mode: inverse_throughput
key:
instructions:
- 'VFMADD132PDr XMM3 XMM3 XMM4 XMM8'
- 'VFMADD132PDr XMM5 XMM5 XMM14 XMM7'
- 'VFMADD132PDr XMM10 XMM10 XMM11 XMM15'
- 'VFMADD132PDr XMM13 XMM13 XMM15 XMM15'
- 'VFMADD132PDr XMM12 XMM12 XMM11 XMM1'
- 'VFMADD132PDr XMM0 XMM0 XMM6 XMM9'
- 'VFMADD132PDr XMM2 XMM2 XMM15 XMM11'
config: ''
register_initial_values:
- 'XMM3=0x0'
- 'XMM4=0x0'
- 'XMM8=0x0'
- 'MXCSR=0x0'
- 'XMM5=0x0'
- 'XMM14=0x0'
- 'XMM7=0x0'
- 'XMM10=0x0'
- 'XMM11=0x0'
- 'XMM15=0x0'
- 'XMM13=0x0'
- 'XMM12=0x0'
- 'XMM1=0x0'
- 'XMM0=0x0'
- 'XMM6=0x0'
- 'XMM9=0x0'
- 'XMM2=0x0'
cpu_name: znver3
llvm_triple: x86_64-unknown-linux-gnu
num_repetitions: 10000
measurements:
- { key: inverse_throughput, value: 0.6403, per_snippet_value: 4.4821 }
error: ''
info: instruction has tied variables, avoiding Read-After-Write issue, picking random def and use registers not aliasing each other, randomizing registers for uses
assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F04244883C4104883EC04C70424801F0000C5F8AE14244883C4044883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F14244883C410C4C2D998D8C4E28998EFC442A198D7C4428198EFC462A198E1C4C2C998C1C4C28198D3C4C2D998D8C4E28998EFC442A198D7C4428198EFC462A198E1C4C2C998C1C4C28198D3C4C2D998D8C4E28998EFC442A198D7C4428198EFC462A198E1C4C2C998C1C4C28198D3C4C2D998D8C4E28998EFC442A198D7C4428198EFC462A198E1C4C2C998C1C4C28198D3C3
...
Check generated assembly with: /usr/bin/objdump -d /tmp/snippet-f05c2f.o
---
mode: inverse_throughput
key:
instructions:
- 'VFMADD132PDr XMM15 XMM15 XMM11 XMM2'
- 'VFMADD132PDr XMM5 XMM5 XMM11 XMM2'
- 'VFMADD132PDr XMM14 XMM14 XMM11 XMM2'
- 'VFMADD132PDr XMM4 XMM4 XMM11 XMM2'
- 'VFMADD132PDr XMM8 XMM8 XMM11 XMM2'
- 'VFMADD132PDr XMM3 XMM3 XMM11 XMM2'
- 'VFMADD132PDr XMM10 XMM10 XMM11 XMM2'
- 'VFMADD132PDr XMM7 XMM7 XMM11 XMM2'
- 'VFMADD132PDr XMM13 XMM13 XMM11 XMM2'
- 'VFMADD132PDr XMM9 XMM9 XMM11 XMM2'
- 'VFMADD132PDr XMM1 XMM1 XMM11 XMM2'
- 'VFMADD132PDr XMM6 XMM6 XMM11 XMM2'
- 'VFMADD132PDr XMM0 XMM0 XMM11 XMM2'
- 'VFMADD132PDr XMM12 XMM12 XMM11 XMM2'
config: ''
register_initial_values:
- 'XMM15=0x0'
- 'XMM11=0x0'
- 'XMM2=0x0'
- 'MXCSR=0x0'
- 'XMM5=0x0'
- 'XMM14=0x0'
- 'XMM4=0x0'
- 'XMM8=0x0'
- 'XMM3=0x0'
- 'XMM10=0x0'
- 'XMM7=0x0'
- 'XMM13=0x0'
- 'XMM9=0x0'
- 'XMM1=0x0'
- 'XMM6=0x0'
- 'XMM0=0x0'
- 'XMM12=0x0'
cpu_name: znver3
llvm_triple: x86_64-unknown-linux-gnu
num_repetitions: 10000
measurements:
- { key: inverse_throughput, value: 0.5312, per_snippet_value: 7.4368 }
error: ''
info: instruction has tied variables, avoiding Read-After-Write issue, picking random def and use registers not aliasing each other, one unique register for each use position
assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F14244883C4104883EC04C70424801F0000C5F8AE14244883C4044883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F24244883C410C462A198FAC4E2A198EAC462A198F2C4E2A198E2C462A198C2C4E2A198DAC462A198D2C4E2A198FAC462A198EAC462A198CAC4E2A198CAC4E2A198F2C4E2A198C2C462A198E2C462A198FAC4E2A198EAC462A198F2C4E2A198E2C462A198C2C4E2A198DAC462A198D2C4E2A198FAC462A198EAC462A198CAC4E2A198CAC4E2A198F2C4E2A198C2C462A198E2C462A198FAC4E2A198EAC462A198F2C4E2A198E2C462A198C2C4E2A198DAC462A198D2C4E2A198FAC462A198EAC462A198CAC4E2A198CAC4E2A198F2C4E2A198C2C462A198E2C462A198FAC4E2A198EAC462A198F2C4E2A198E2C462A198C2C4E2A198DAC462A198D2C4E2A198FAC462A198EAC462A198CAC4E2A198CAC4E2A198F2C4E2A198C2C462A198E2C3
...
Check generated assembly with: /usr/bin/objdump -d /tmp/snippet-c32060.o
---
mode: inverse_throughput
key:
instructions:
- 'VFMADD132PDr XMM10 XMM10 XMM6 XMM6'
- 'VFMADD132PDr XMM8 XMM8 XMM6 XMM6'
- 'VFMADD132PDr XMM12 XMM12 XMM6 XMM6'
- 'VFMADD132PDr XMM9 XMM9 XMM6 XMM6'
- 'VFMADD132PDr XMM7 XMM7 XMM6 XMM6'
- 'VFMADD132PDr XMM1 XMM1 XMM6 XMM6'
- 'VFMADD132PDr XMM0 XMM0 XMM6 XMM6'
- 'VFMADD132PDr XMM5 XMM5 XMM6 XMM6'
- 'VFMADD132PDr XMM11 XMM11 XMM6 XMM6'
- 'VFMADD132PDr XMM2 XMM2 XMM6 XMM6'
- 'VFMADD132PDr XMM15 XMM15 XMM6 XMM6'
- 'VFMADD132PDr XMM3 XMM3 XMM6 XMM6'
- 'VFMADD132PDr XMM14 XMM14 XMM6 XMM6'
- 'VFMADD132PDr XMM4 XMM4 XMM6 XMM6'
- 'VFMADD132PDr XMM13 XMM13 XMM6 XMM6'
config: ''
register_initial_values:
- 'XMM10=0x0'
- 'XMM6=0x0'
- 'MXCSR=0x0'
- 'XMM8=0x0'
- 'XMM12=0x0'
- 'XMM9=0x0'
- 'XMM7=0x0'
- 'XMM1=0x0'
- 'XMM0=0x0'
- 'XMM5=0x0'
- 'XMM11=0x0'
- 'XMM2=0x0'
- 'XMM15=0x0'
- 'XMM3=0x0'
- 'XMM14=0x0'
- 'XMM4=0x0'
- 'XMM13=0x0'
cpu_name: znver3
llvm_triple: x86_64-unknown-linux-gnu
num_repetitions: 10000
measurements:
- { key: inverse_throughput, value: 0.5311, per_snippet_value: 7.9665 }
error: ''
info: instruction has tied variables, avoiding Read-After-Write issue, picking random def and use registers not aliasing each other, reusing the same register for all uses
assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F34244883C4104883EC04C70424801F0000C5F8AE14244883C4044883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F2C244883C410C462C998D6C462C998C6C462C998E6C462C998CEC4E2C998FEC4E2C998CEC4E2C998C6C4E2C998EEC462C998DEC4E2C998D6C462C998FEC4E2C998DEC462C998F6C4E2C998E6C462C998EEC462C998D6C462C998C6C462C998E6C462C998CEC4E2C998FEC4E2C998CEC4E2C998C6C4E2C998EEC462C998DEC4E2C998D6C462C998FEC4E2C998DEC462C998F6C4E2C998E6C462C998EEC462C998D6C462C998C6C462C998E6C462C998CEC4E2C998FEC4E2C998CEC4E2C998C6C4E2C998EEC462C998DEC4E2C998D6C462C998FEC4E2C998DEC462C998F6C4E2C998E6C462C998EEC462C998D6C462C998C6C462C998E6C462C998CEC4E2C998FEC4E2C998CEC4E2C998C6C4E2C998EEC462C998DEC4E2C998D6C462C998FEC4E2C998DEC462C998F6C4E2C998E6C462C998EEC3
...
```
Reviewed By: courbet
Differential Revision: https://reviews.llvm.org/D139283
Diffstat (limited to 'llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp')
0 files changed, 0 insertions, 0 deletions