1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
#!/usr/bin/python
# Generate Intel taken branches Linux perf event script for autofdo profiling.
# Copyright (C) 2016 Free Software Foundation, Inc.
#
# GCC is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3, or (at your option) any later
# version.
#
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>. */
# Run it with perf record -b -e EVENT program ...
# The Linux Kernel needs to support the PMU of the current CPU, and
# It will likely not work in VMs.
# Add --all to print for all cpus, otherwise for current cpu.
# Add --script to generate shell script to run correct event.
#
# Requires internet (https) access. This may require setting up a proxy
# with export https_proxy=...
#
import urllib2
import sys
import json
import argparse
import collections
baseurl = "https://download.01.org/perfmon"
target_events = (u'BR_INST_RETIRED.NEAR_TAKEN',
u'BR_INST_EXEC.TAKEN',
u'BR_INST_RETIRED.TAKEN_JCC',
u'BR_INST_TYPE_RETIRED.COND_TAKEN')
ap = argparse.ArgumentParser()
ap.add_argument('--all', '-a', help='Print for all CPUs', action='store_true')
ap.add_argument('--script', help='Generate shell script', action='store_true')
args = ap.parse_args()
eventmap = collections.defaultdict(list)
def get_cpu_str():
with open('/proc/cpuinfo', 'r') as c:
vendor, fam, model = None, None, None
for j in c:
n = j.split()
if n[0] == 'vendor_id':
vendor = n[2]
elif n[0] == 'model' and n[1] == ':':
model = int(n[2])
elif n[0] == 'cpu' and n[1] == 'family':
fam = int(n[3])
if vendor and fam and model:
return "%s-%d-%X" % (vendor, fam, model), model
return None, None
def find_event(eventurl, model):
print >>sys.stderr, "Downloading", eventurl
u = urllib2.urlopen(eventurl)
events = json.loads(u.read())
u.close()
found = 0
for j in events:
if j[u'EventName'] in target_events:
event = "cpu/event=%s,umask=%s/" % (j[u'EventCode'], j[u'UMask'])
if u'PEBS' in j and j[u'PEBS'] > 0:
event += "p"
if args.script:
eventmap[event].append(model)
else:
print j[u'EventName'], "event for model", model, "is", event
found += 1
return found
if not args.all:
cpu, model = get_cpu_str()
if not cpu:
sys.exit("Unknown CPU type")
url = baseurl + "/mapfile.csv"
print >>sys.stderr, "Downloading", url
u = urllib2.urlopen(url)
found = 0
cpufound = 0
for j in u:
n = j.rstrip().split(',')
if len(n) >= 4 and (args.all or n[0] == cpu) and n[3] == "core":
if args.all:
vendor, fam, model = n[0].split("-")
model = int(model, 16)
cpufound += 1
found += find_event(baseurl + n[2], model)
u.close()
if args.script:
print '''#!/bin/sh
# Profile workload for gcc profile feedback (autofdo) using Linux perf.
# Auto generated. To regenerate for new CPUs run
# contrib/gen_autofdo_event.py --script --all in gcc source
# usages:
# gcc-auto-profile program (profile program and children)
# gcc-auto-profile -a sleep X (profile all for X secs, may need root)
# gcc-auto-profile -p PID sleep X (profile PID)
# gcc-auto-profile --kernel -a sleep X (profile kernel)
# gcc-auto-profile --all -a sleep X (profile kernel and user space)
# Identify branches taken event for CPU.
#
FLAGS=u
if [ "$1" = "--kernel" ] ; then
FLAGS=k
shift
fi
if [ "$1" = "--all" ] ; then
FLAGS=uk
shift
fi
if ! grep -q Intel /proc/cpuinfo ; then
echo >&2 "Only Intel CPUs supported"
exit 1
fi
if grep -q hypervisor /proc/cpuinfo ; then
echo >&2 "Warning: branch profiling may not be functional in VMs"
fi
case `egrep -q "^cpu family\s*: 6" /proc/cpuinfo &&
egrep "^model\s*:" /proc/cpuinfo | head -n1` in'''
for event, mod in eventmap.iteritems():
for m in mod[:-1]:
print "model*:\ %s|\\" % m
print 'model*:\ %s) E="%s$FLAGS" ;;' % (mod[-1], event)
print '''*)
echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to update script."
exit 1 ;;'''
print "esac"
print 'exec perf record -e $E -b "$@"'
if cpufound == 0 and not args.all:
sys.exit('CPU %s not found' % cpu)
if found == 0:
sys.exit('Branch event not found')
|