From 5f6caa4f2ba45c8a99c915c09c4d56bd1621a450 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 7 Oct 2011 09:37:57 +0200 Subject: kvm: Add tool for querying VMX capabilities Taken from original qemu-kvm/kvm/scripts/vmxcap. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- scripts/kvm/vmxcap | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100755 scripts/kvm/vmxcap diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap new file mode 100755 index 0000000..a74ce71 --- /dev/null +++ b/scripts/kvm/vmxcap @@ -0,0 +1,224 @@ +#!/usr/bin/python +# +# tool for querying VMX capabilities +# +# Copyright 2009-2010 Red Hat, Inc. +# +# Authors: +# Avi Kivity +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. + +MSR_IA32_VMX_BASIC = 0x480 +MSR_IA32_VMX_PINBASED_CTLS = 0x481 +MSR_IA32_VMX_PROCBASED_CTLS = 0x482 +MSR_IA32_VMX_EXIT_CTLS = 0x483 +MSR_IA32_VMX_ENTRY_CTLS = 0x484 +MSR_IA32_VMX_MISC_CTLS = 0x485 +MSR_IA32_VMX_PROCBASED_CTLS2 = 0x48B +MSR_IA32_VMX_EPT_VPID_CAP = 0x48C +MSR_IA32_VMX_TRUE_PINBASED_CTLS = 0x48D +MSR_IA32_VMX_TRUE_PROCBASED_CTLS = 0x48E +MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F +MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490 + +class msr(object): + def __init__(self): + try: + self.f = file('/dev/cpu/0/msr') + except: + self.f = file('/dev/msr0') + def read(self, index, default = None): + import struct + self.f.seek(index) + try: + return struct.unpack('Q', self.f.read(8))[0] + except: + return default + +class Control(object): + def __init__(self, name, bits, cap_msr, true_cap_msr = None): + self.name = name + self.bits = bits + self.cap_msr = cap_msr + self.true_cap_msr = true_cap_msr + def read2(self, nr): + m = msr() + val = m.read(nr, 0) + return (val & 0xffffffff, val >> 32) + def show(self): + print self.name + mbz, mb1 = self.read2(self.cap_msr) + tmbz, tmb1 = 0, 0 + if self.true_cap_msr: + tmbz, tmb1 = self.read2(self.true_cap_msr) + for bit in sorted(self.bits.keys()): + zero = not (mbz & (1 << bit)) + one = mb1 & (1 << bit) + true_zero = not (tmbz & (1 << bit)) + true_one = tmb1 & (1 << bit) + s= '?' + if (self.true_cap_msr and true_zero and true_one + and one and not zero): + s = 'default' + elif zero and not one: + s = 'no' + elif one and not zero: + s = 'forced' + elif one and zero: + s = 'yes' + print ' %-40s %s' % (self.bits[bit], s) + +class Misc(object): + def __init__(self, name, bits, msr): + self.name = name + self.bits = bits + self.msr = msr + def show(self): + print self.name + value = msr().read(self.msr, 0) + def first_bit(key): + if type(key) is tuple: + return key[0] + else: + return key + for bits in sorted(self.bits.keys(), key = first_bit): + if type(bits) is tuple: + lo, hi = bits + fmt = int + else: + lo = hi = bits + def fmt(x): + return { True: 'yes', False: 'no' }[x] + v = (value >> lo) & ((1 << (hi - lo + 1)) - 1) + print ' %-40s %s' % (self.bits[bits], fmt(v)) + +controls = [ + Control( + name = 'pin-based controls', + bits = { + 0: 'External interrupt exiting', + 3: 'NMI exiting', + 5: 'Virtual NMIs', + 6: 'Activate VMX-preemption timer', + }, + cap_msr = MSR_IA32_VMX_PINBASED_CTLS, + true_cap_msr = MSR_IA32_VMX_TRUE_PINBASED_CTLS, + ), + + Control( + name = 'primary processor-based controls', + bits = { + 2: 'Interrupt window exiting', + 3: 'Use TSC offsetting', + 7: 'HLT exiting', + 9: 'INVLPG exiting', + 10: 'MWAIT exiting', + 11: 'RDPMC exiting', + 12: 'RDTSC exiting', + 15: 'CR3-load exiting', + 16: 'CR3-store exiting', + 19: 'CR8-load exiting', + 20: 'CR8-store exiting', + 21: 'Use TPR shadow', + 22: 'NMI-window exiting', + 23: 'MOV-DR exiting', + 24: 'Unconditional I/O exiting', + 25: 'Use I/O bitmaps', + 27: 'Monitor trap flag', + 28: 'Use MSR bitmaps', + 29: 'MONITOR exiting', + 30: 'PAUSE exiting', + 31: 'Activate secondary control', + }, + cap_msr = MSR_IA32_VMX_PROCBASED_CTLS, + true_cap_msr = MSR_IA32_VMX_TRUE_PROCBASED_CTLS, + ), + + Control( + name = 'secondary processor-based controls', + bits = { + 0: 'Virtualize APIC accesses', + 1: 'Enable EPT', + 2: 'Descriptor-table exiting', + 4: 'Virtualize x2APIC mode', + 5: 'Enable VPID', + 6: 'WBINVD exiting', + 7: 'Unrestricted guest', + 10: 'PAUSE-loop exiting', + }, + cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2, + ), + + Control( + name = 'VM-Exit controls', + bits = { + 2: 'Save debug controls', + 9: 'Host address-space size', + 12: 'Load IA32_PERF_GLOBAL_CTRL', + 15: 'Acknowledge interrupt on exit', + 18: 'Save IA32_PAT', + 19: 'Load IA32_PAT', + 20: 'Save IA32_EFER', + 21: 'Load IA32_EFER', + 22: 'Save VMX-preemption timer value', + }, + cap_msr = MSR_IA32_VMX_EXIT_CTLS, + true_cap_msr = MSR_IA32_VMX_TRUE_EXIT_CTLS, + ), + + Control( + name = 'VM-Entry controls', + bits = { + 2: 'Load debug controls', + 9: 'IA-64 mode guest', + 10: 'Entry to SMM', + 11: 'Deactivate dual-monitor treatment', + 13: 'Load IA32_PERF_GLOBAL_CTRL', + 14: 'Load IA32_PAT', + 15: 'Load IA32_EFER', + }, + cap_msr = MSR_IA32_VMX_ENTRY_CTLS, + true_cap_msr = MSR_IA32_VMX_TRUE_ENTRY_CTLS, + ), + + Misc( + name = 'Miscellaneous data', + bits = { + (0,4): 'VMX-preemption timer scale (log2)', + 5: 'Store EFER.LMA into IA-32e mode guest control', + 6: 'HLT activity state', + 7: 'Shutdown activity state', + 8: 'Wait-for-SIPI activity state', + (16,24): 'Number of CR3-target values', + (25,27): 'MSR-load/store count recommenation', + (32,62): 'MSEG revision identifier', + }, + msr = MSR_IA32_VMX_MISC_CTLS, + ), + + Misc( + name = 'VPID and EPT capabilities', + bits = { + 0: 'Execute-only EPT translations', + 6: 'Page-walk length 4', + 8: 'Paging-structure memory type UC', + 14: 'Paging-structure memory type WB', + 16: '2MB EPT pages', + 17: '1GB EPT pages', + 20: 'INVEPT supported', + 25: 'Single-context INVEPT', + 26: 'All-context INVEPT', + 32: 'INVVPID supported', + 40: 'Individual-address INVVPID', + 41: 'Single-context INVVPID', + 42: 'All-context INVVPID', + 43: 'Single-context-retaining-globals INVVPID', + }, + msr = MSR_IA32_VMX_EPT_VPID_CAP, + ), + ] + +for c in controls: + c.show() -- cgit v1.1 From 626c427624ac1d6b5dd245cb37988f046cec5f03 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 7 Oct 2011 09:37:49 +0200 Subject: kvm: Add top-like kvm statistics script Taken from original qemu-kvm/kvm/kvm_stat. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- scripts/kvm/kvm_stat | 480 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 480 insertions(+) create mode 100755 scripts/kvm/kvm_stat diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat new file mode 100755 index 0000000..56d2bd7 --- /dev/null +++ b/scripts/kvm/kvm_stat @@ -0,0 +1,480 @@ +#!/usr/bin/python +# +# top-like utility for displaying kvm statistics +# +# Copyright 2006-2008 Qumranet Technologies +# Copyright 2008-2011 Red Hat, Inc. +# +# Authors: +# Avi Kivity +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. + +import curses +import sys, os, time, optparse + +class DebugfsProvider(object): + def __init__(self): + self.base = '/sys/kernel/debug/kvm' + self._fields = os.listdir(self.base) + def fields(self): + return self._fields + def select(self, fields): + self._fields = fields + def read(self): + def val(key): + return int(file(self.base + '/' + key).read()) + return dict([(key, val(key)) for key in self._fields]) + +vmx_exit_reasons = { + 0: 'EXCEPTION_NMI', + 1: 'EXTERNAL_INTERRUPT', + 2: 'TRIPLE_FAULT', + 7: 'PENDING_INTERRUPT', + 8: 'NMI_WINDOW', + 9: 'TASK_SWITCH', + 10: 'CPUID', + 12: 'HLT', + 14: 'INVLPG', + 15: 'RDPMC', + 16: 'RDTSC', + 18: 'VMCALL', + 19: 'VMCLEAR', + 20: 'VMLAUNCH', + 21: 'VMPTRLD', + 22: 'VMPTRST', + 23: 'VMREAD', + 24: 'VMRESUME', + 25: 'VMWRITE', + 26: 'VMOFF', + 27: 'VMON', + 28: 'CR_ACCESS', + 29: 'DR_ACCESS', + 30: 'IO_INSTRUCTION', + 31: 'MSR_READ', + 32: 'MSR_WRITE', + 33: 'INVALID_STATE', + 36: 'MWAIT_INSTRUCTION', + 39: 'MONITOR_INSTRUCTION', + 40: 'PAUSE_INSTRUCTION', + 41: 'MCE_DURING_VMENTRY', + 43: 'TPR_BELOW_THRESHOLD', + 44: 'APIC_ACCESS', + 48: 'EPT_VIOLATION', + 49: 'EPT_MISCONFIG', + 54: 'WBINVD', + 55: 'XSETBV', +} + +svm_exit_reasons = { + 0x000: 'READ_CR0', + 0x003: 'READ_CR3', + 0x004: 'READ_CR4', + 0x008: 'READ_CR8', + 0x010: 'WRITE_CR0', + 0x013: 'WRITE_CR3', + 0x014: 'WRITE_CR4', + 0x018: 'WRITE_CR8', + 0x020: 'READ_DR0', + 0x021: 'READ_DR1', + 0x022: 'READ_DR2', + 0x023: 'READ_DR3', + 0x024: 'READ_DR4', + 0x025: 'READ_DR5', + 0x026: 'READ_DR6', + 0x027: 'READ_DR7', + 0x030: 'WRITE_DR0', + 0x031: 'WRITE_DR1', + 0x032: 'WRITE_DR2', + 0x033: 'WRITE_DR3', + 0x034: 'WRITE_DR4', + 0x035: 'WRITE_DR5', + 0x036: 'WRITE_DR6', + 0x037: 'WRITE_DR7', + 0x040: 'EXCP_BASE', + 0x060: 'INTR', + 0x061: 'NMI', + 0x062: 'SMI', + 0x063: 'INIT', + 0x064: 'VINTR', + 0x065: 'CR0_SEL_WRITE', + 0x066: 'IDTR_READ', + 0x067: 'GDTR_READ', + 0x068: 'LDTR_READ', + 0x069: 'TR_READ', + 0x06a: 'IDTR_WRITE', + 0x06b: 'GDTR_WRITE', + 0x06c: 'LDTR_WRITE', + 0x06d: 'TR_WRITE', + 0x06e: 'RDTSC', + 0x06f: 'RDPMC', + 0x070: 'PUSHF', + 0x071: 'POPF', + 0x072: 'CPUID', + 0x073: 'RSM', + 0x074: 'IRET', + 0x075: 'SWINT', + 0x076: 'INVD', + 0x077: 'PAUSE', + 0x078: 'HLT', + 0x079: 'INVLPG', + 0x07a: 'INVLPGA', + 0x07b: 'IOIO', + 0x07c: 'MSR', + 0x07d: 'TASK_SWITCH', + 0x07e: 'FERR_FREEZE', + 0x07f: 'SHUTDOWN', + 0x080: 'VMRUN', + 0x081: 'VMMCALL', + 0x082: 'VMLOAD', + 0x083: 'VMSAVE', + 0x084: 'STGI', + 0x085: 'CLGI', + 0x086: 'SKINIT', + 0x087: 'RDTSCP', + 0x088: 'ICEBP', + 0x089: 'WBINVD', + 0x08a: 'MONITOR', + 0x08b: 'MWAIT', + 0x08c: 'MWAIT_COND', + 0x400: 'NPF', +} + +vendor_exit_reasons = { + 'vmx': vmx_exit_reasons, + 'svm': svm_exit_reasons, +} + +exit_reasons = None + +for line in file('/proc/cpuinfo').readlines(): + if line.startswith('flags'): + for flag in line.split(): + if flag in vendor_exit_reasons: + exit_reasons = vendor_exit_reasons[flag] + +filters = { + 'kvm_exit': ('exit_reason', exit_reasons) +} + +def invert(d): + return dict((x[1], x[0]) for x in d.iteritems()) + +for f in filters: + filters[f] = (filters[f][0], invert(filters[f][1])) + +import ctypes, struct, array + +libc = ctypes.CDLL('libc.so.6') +syscall = libc.syscall +class perf_event_attr(ctypes.Structure): + _fields_ = [('type', ctypes.c_uint32), + ('size', ctypes.c_uint32), + ('config', ctypes.c_uint64), + ('sample_freq', ctypes.c_uint64), + ('sample_type', ctypes.c_uint64), + ('read_format', ctypes.c_uint64), + ('flags', ctypes.c_uint64), + ('wakeup_events', ctypes.c_uint32), + ('bp_type', ctypes.c_uint32), + ('bp_addr', ctypes.c_uint64), + ('bp_len', ctypes.c_uint64), + ] +def _perf_event_open(attr, pid, cpu, group_fd, flags): + return syscall(298, ctypes.pointer(attr), ctypes.c_int(pid), + ctypes.c_int(cpu), ctypes.c_int(group_fd), + ctypes.c_long(flags)) + +PERF_TYPE_HARDWARE = 0 +PERF_TYPE_SOFTWARE = 1 +PERF_TYPE_TRACEPOINT = 2 +PERF_TYPE_HW_CACHE = 3 +PERF_TYPE_RAW = 4 +PERF_TYPE_BREAKPOINT = 5 + +PERF_SAMPLE_IP = 1 << 0 +PERF_SAMPLE_TID = 1 << 1 +PERF_SAMPLE_TIME = 1 << 2 +PERF_SAMPLE_ADDR = 1 << 3 +PERF_SAMPLE_READ = 1 << 4 +PERF_SAMPLE_CALLCHAIN = 1 << 5 +PERF_SAMPLE_ID = 1 << 6 +PERF_SAMPLE_CPU = 1 << 7 +PERF_SAMPLE_PERIOD = 1 << 8 +PERF_SAMPLE_STREAM_ID = 1 << 9 +PERF_SAMPLE_RAW = 1 << 10 + +PERF_FORMAT_TOTAL_TIME_ENABLED = 1 << 0 +PERF_FORMAT_TOTAL_TIME_RUNNING = 1 << 1 +PERF_FORMAT_ID = 1 << 2 +PERF_FORMAT_GROUP = 1 << 3 + +import re + +sys_tracing = '/sys/kernel/debug/tracing' + +class Group(object): + def __init__(self, cpu): + self.events = [] + self.group_leader = None + self.cpu = cpu + def add_event(self, name, event_set, tracepoint, filter = None): + self.events.append(Event(group = self, + name = name, event_set = event_set, + tracepoint = tracepoint, filter = filter)) + if len(self.events) == 1: + self.file = os.fdopen(self.events[0].fd) + def read(self): + bytes = 8 * (1 + len(self.events)) + fmt = 'xxxxxxxx' + 'q' * len(self.events) + return dict(zip([event.name for event in self.events], + struct.unpack(fmt, self.file.read(bytes)))) + +class Event(object): + def __init__(self, group, name, event_set, tracepoint, filter = None): + self.name = name + attr = perf_event_attr() + attr.type = PERF_TYPE_TRACEPOINT + attr.size = ctypes.sizeof(attr) + id_path = os.path.join(sys_tracing, 'events', event_set, + tracepoint, 'id') + id = int(file(id_path).read()) + attr.config = id + attr.sample_type = (PERF_SAMPLE_RAW + | PERF_SAMPLE_TIME + | PERF_SAMPLE_CPU) + attr.sample_period = 1 + attr.read_format = PERF_FORMAT_GROUP + group_leader = -1 + if group.events: + group_leader = group.events[0].fd + fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0) + if fd == -1: + raise Exception('perf_event_open failed') + if filter: + import fcntl + fcntl.ioctl(fd, 0x40082406, filter) + self.fd = fd + def enable(self): + import fcntl + fcntl.ioctl(self.fd, 0x00002400, 0) + def disable(self): + import fcntl + fcntl.ioctl(self.fd, 0x00002401, 0) + +class TracepointProvider(object): + def __init__(self): + path = os.path.join(sys_tracing, 'events', 'kvm') + fields = [f + for f in os.listdir(path) + if os.path.isdir(os.path.join(path, f))] + extra = [] + for f in fields: + if f in filters: + subfield, values = filters[f] + for name, number in values.iteritems(): + extra.append(f + '(' + name + ')') + fields += extra + self._setup(fields) + self.select(fields) + def fields(self): + return self._fields + def _setup(self, _fields): + self._fields = _fields + cpure = r'cpu([0-9]+)' + self.cpus = [int(re.match(cpure, x).group(1)) + for x in os.listdir('/sys/devices/system/cpu') + if re.match(cpure, x)] + import resource + nfiles = len(self.cpus) * 1000 + resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles)) + events = [] + self.group_leaders = [] + for cpu in self.cpus: + group = Group(cpu) + for name in _fields: + tracepoint = name + filter = None + m = re.match(r'(.*)\((.*)\)', name) + if m: + tracepoint, sub = m.groups() + filter = '%s==%d\0' % (filters[tracepoint][0], + filters[tracepoint][1][sub]) + event = group.add_event(name, event_set = 'kvm', + tracepoint = tracepoint, + filter = filter) + self.group_leaders.append(group) + def select(self, fields): + for group in self.group_leaders: + for event in group.events: + if event.name in fields: + event.enable() + else: + event.disable() + def read(self): + from collections import defaultdict + ret = defaultdict(int) + for group in self.group_leaders: + for name, val in group.read().iteritems(): + ret[name] += val + return ret + +class Stats: + def __init__(self, provider, fields = None): + self.provider = provider + self.fields_filter = fields + self._update() + def _update(self): + def wanted(key): + import re + if not self.fields_filter: + return True + return re.match(self.fields_filter, key) is not None + self.values = dict([(key, None) + for key in provider.fields() + if wanted(key)]) + self.provider.select(self.values.keys()) + def set_fields_filter(self, fields_filter): + self.fields_filter = fields_filter + self._update() + def get(self): + new = self.provider.read() + for key in self.provider.fields(): + oldval = self.values.get(key, (0, 0)) + newval = new[key] + newdelta = None + if oldval is not None: + newdelta = newval - oldval[0] + self.values[key] = (newval, newdelta) + return self.values + +if not os.access('/sys/kernel/debug', os.F_OK): + print 'Please enable CONFIG_DEBUG_FS in your kernel' + sys.exit(1) +if not os.access('/sys/kernel/debug/kvm', os.F_OK): + print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')" + print "and ensure the kvm modules are loaded" + sys.exit(1) + +label_width = 40 +number_width = 10 + +def tui(screen, stats): + curses.use_default_colors() + curses.noecho() + drilldown = False + fields_filter = stats.fields_filter + def update_drilldown(): + if not fields_filter: + if drilldown: + stats.set_fields_filter(None) + else: + stats.set_fields_filter(r'^[^\(]*$') + update_drilldown() + def refresh(sleeptime): + screen.erase() + screen.addstr(0, 0, 'kvm statistics') + row = 2 + s = stats.get() + def sortkey(x): + if s[x][1]: + return (-s[x][1], -s[x][0]) + else: + return (0, -s[x][0]) + for key in sorted(s.keys(), key = sortkey): + if row >= screen.getmaxyx()[0]: + break + values = s[key] + if not values[0] and not values[1]: + break + col = 1 + screen.addstr(row, col, key) + col += label_width + screen.addstr(row, col, '%10d' % (values[0],)) + col += number_width + if values[1] is not None: + screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) + row += 1 + screen.refresh() + + sleeptime = 0.25 + while True: + refresh(sleeptime) + curses.halfdelay(int(sleeptime * 10)) + sleeptime = 3 + try: + c = screen.getkey() + if c == 'x': + drilldown = not drilldown + update_drilldown() + if c == 'q': + break + except KeyboardInterrupt: + break + except curses.error: + continue + +def batch(stats): + s = stats.get() + time.sleep(1) + s = stats.get() + for key in sorted(s.keys()): + values = s[key] + print '%-22s%10d%10d' % (key, values[0], values[1]) + +def log(stats): + keys = sorted(stats.get().iterkeys()) + def banner(): + for k in keys: + print '%10s' % k[0:9], + print + def statline(): + s = stats.get() + for k in keys: + print ' %9d' % s[k][1], + print + line = 0 + banner_repeat = 20 + while True: + time.sleep(1) + if line % banner_repeat == 0: + banner() + statline() + line += 1 + +options = optparse.OptionParser() +options.add_option('-1', '--once', '--batch', + action = 'store_true', + default = False, + dest = 'once', + help = 'run in batch mode for one second', + ) +options.add_option('-l', '--log', + action = 'store_true', + default = False, + dest = 'log', + help = 'run in logging mode (like vmstat)', + ) +options.add_option('-f', '--fields', + action = 'store', + default = None, + dest = 'fields', + help = 'fields to display (regex)', + ) +(options, args) = options.parse_args(sys.argv) + +try: + provider = TracepointProvider() +except: + provider = DebugfsProvider() + +stats = Stats(provider, fields = options.fields) + +if options.log: + log(stats) +elif not options.once: + import curses.wrapper + curses.wrapper(tui, stats) +else: + batch(stats) -- cgit v1.1 From 1cae88b9f4121c9af0bf677435c6129e643280fd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 18 Oct 2011 19:43:12 +0200 Subject: kvm: avoid reentring kvm_flush_coalesced_mmio_buffer() mmio callbacks invoked by kvm_flush_coalesced_mmio_buffer() may themselves indirectly call kvm_flush_coalesced_mmio_buffer(). Prevent reentering the function by checking a flag that indicates we're processing coalesced mmio requests. Signed-off-by: Avi Kivity --- kvm-all.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kvm-all.c b/kvm-all.c index e7faf5c..c09ddf7 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -64,6 +64,7 @@ struct KVMState int vmfd; int coalesced_mmio; struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; + bool coalesced_flush_in_progress; int broken_set_mem_region; int migration_log; int vcpu_events; @@ -876,6 +877,13 @@ static int kvm_handle_internal_error(CPUState *env, struct kvm_run *run) void kvm_flush_coalesced_mmio_buffer(void) { KVMState *s = kvm_state; + + if (s->coalesced_flush_in_progress) { + return; + } + + s->coalesced_flush_in_progress = true; + if (s->coalesced_mmio_ring) { struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring; while (ring->first != ring->last) { @@ -888,6 +896,8 @@ void kvm_flush_coalesced_mmio_buffer(void) ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX; } } + + s->coalesced_flush_in_progress = false; } static void do_kvm_cpu_synchronize_state(void *_env) -- cgit v1.1 From 38d2c27ea68468bd2fdaa19c74d9e6d290f94777 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 24 Oct 2011 21:27:16 -0200 Subject: Revert "kvm: support TSC deadline MSR" This reverts commit bfc2455ddbb41148494a084d15777e6bed7533c3. New patch with subsections will follow. Signed-off-by: Marcelo Tosatti --- target-i386/cpu.h | 4 +--- target-i386/kvm.c | 14 -------------- target-i386/machine.c | 1 - 3 files changed, 1 insertion(+), 18 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index a973f2e..ae36489 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -283,7 +283,6 @@ #define MSR_IA32_APICBASE_BSP (1<<8) #define MSR_IA32_APICBASE_ENABLE (1<<11) #define MSR_IA32_APICBASE_BASE (0xfffff<<12) -#define MSR_IA32_TSCDEADLINE 0x6e0 #define MSR_MTRRcap 0xfe #define MSR_MTRRcap_VCNT 8 @@ -688,7 +687,6 @@ typedef struct CPUX86State { uint64_t async_pf_en_msr; uint64_t tsc; - uint64_t tsc_deadline; uint64_t mcg_status; @@ -949,7 +947,7 @@ uint64_t cpu_get_tsc(CPUX86State *env); #define cpu_list_id x86_cpu_list #define cpudef_setup x86_cpudef_setup -#define CPU_SAVE_VERSION 13 +#define CPU_SAVE_VERSION 12 /* MMU modes definitions */ #define MMU_MODE0_SUFFIX _kernel diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 90a6ffb..b6eef04 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -59,7 +59,6 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { static bool has_msr_star; static bool has_msr_hsave_pa; -static bool has_msr_tsc_deadline; static bool has_msr_async_pf_en; static int lm_capable_kernel; @@ -569,10 +568,6 @@ static int kvm_get_supported_msrs(KVMState *s) has_msr_hsave_pa = true; continue; } - if (kvm_msr_list->indices[i] == MSR_IA32_TSCDEADLINE) { - has_msr_tsc_deadline = true; - continue; - } } } @@ -886,9 +881,6 @@ static int kvm_put_msrs(CPUState *env, int level) if (has_msr_hsave_pa) { kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); } - if (has_msr_tsc_deadline) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSCDEADLINE, env->tsc_deadline); - } #ifdef TARGET_X86_64 if (lm_capable_kernel) { kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); @@ -1135,9 +1127,6 @@ static int kvm_get_msrs(CPUState *env) if (has_msr_hsave_pa) { msrs[n++].index = MSR_VM_HSAVE_PA; } - if (has_msr_tsc_deadline) { - msrs[n++].index = MSR_IA32_TSCDEADLINE; - } if (!env->tsc_valid) { msrs[n++].index = MSR_IA32_TSC; @@ -1206,9 +1195,6 @@ static int kvm_get_msrs(CPUState *env) case MSR_IA32_TSC: env->tsc = msrs[i].data; break; - case MSR_IA32_TSCDEADLINE: - env->tsc_deadline = msrs[i].data; - break; case MSR_VM_HSAVE_PA: env->vm_hsave = msrs[i].data; break; diff --git a/target-i386/machine.c b/target-i386/machine.c index 25fa97d..9aca8e0 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -410,7 +410,6 @@ static const VMStateDescription vmstate_cpu = { VMSTATE_UINT64_V(xcr0, CPUState, 12), VMSTATE_UINT64_V(xstate_bv, CPUState, 12), VMSTATE_YMMH_REGS_VARS(ymmh_regs, CPUState, CPU_NB_REGS, 12), - VMSTATE_UINT64_V(tsc_deadline, CPUState, 13), VMSTATE_END_OF_LIST() /* The above list is not sorted /wrt version numbers, watch out! */ }, -- cgit v1.1 From aa82ba549a3a88f7215e65956f3cb4bfd34835cc Mon Sep 17 00:00:00 2001 From: "Liu, Jinsong" Date: Wed, 5 Oct 2011 16:52:32 -0300 Subject: kvm: support TSC deadline MSR with subsection KVM add emulation of lapic tsc deadline timer for guest. This patch is co-operation work at qemu side. Use subsections to save/restore the field (mtosatti). Signed-off-by: Liu, Jinsong Signed-off-by: Marcelo Tosatti --- target-i386/cpu.h | 2 ++ target-i386/kvm.c | 14 ++++++++++++++ target-i386/machine.c | 21 +++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index ae36489..29412dc 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -283,6 +283,7 @@ #define MSR_IA32_APICBASE_BSP (1<<8) #define MSR_IA32_APICBASE_ENABLE (1<<11) #define MSR_IA32_APICBASE_BASE (0xfffff<<12) +#define MSR_IA32_TSCDEADLINE 0x6e0 #define MSR_MTRRcap 0xfe #define MSR_MTRRcap_VCNT 8 @@ -687,6 +688,7 @@ typedef struct CPUX86State { uint64_t async_pf_en_msr; uint64_t tsc; + uint64_t tsc_deadline; uint64_t mcg_status; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index b6eef04..90a6ffb 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -59,6 +59,7 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { static bool has_msr_star; static bool has_msr_hsave_pa; +static bool has_msr_tsc_deadline; static bool has_msr_async_pf_en; static int lm_capable_kernel; @@ -568,6 +569,10 @@ static int kvm_get_supported_msrs(KVMState *s) has_msr_hsave_pa = true; continue; } + if (kvm_msr_list->indices[i] == MSR_IA32_TSCDEADLINE) { + has_msr_tsc_deadline = true; + continue; + } } } @@ -881,6 +886,9 @@ static int kvm_put_msrs(CPUState *env, int level) if (has_msr_hsave_pa) { kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); } + if (has_msr_tsc_deadline) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSCDEADLINE, env->tsc_deadline); + } #ifdef TARGET_X86_64 if (lm_capable_kernel) { kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); @@ -1127,6 +1135,9 @@ static int kvm_get_msrs(CPUState *env) if (has_msr_hsave_pa) { msrs[n++].index = MSR_VM_HSAVE_PA; } + if (has_msr_tsc_deadline) { + msrs[n++].index = MSR_IA32_TSCDEADLINE; + } if (!env->tsc_valid) { msrs[n++].index = MSR_IA32_TSC; @@ -1195,6 +1206,9 @@ static int kvm_get_msrs(CPUState *env) case MSR_IA32_TSC: env->tsc = msrs[i].data; break; + case MSR_IA32_TSCDEADLINE: + env->tsc_deadline = msrs[i].data; + break; case MSR_VM_HSAVE_PA: env->vm_hsave = msrs[i].data; break; diff --git a/target-i386/machine.c b/target-i386/machine.c index 9aca8e0..176d372 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -310,6 +310,24 @@ static const VMStateDescription vmstate_fpop_ip_dp = { } }; +static bool tscdeadline_needed(void *opaque) +{ + CPUState *env = opaque; + + return env->tsc_deadline != 0; +} + +static const VMStateDescription vmstate_msr_tscdeadline = { + .name = "cpu/msr_tscdeadline", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField []) { + VMSTATE_UINT64(tsc_deadline, CPUState), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_cpu = { .name = "cpu", .version_id = CPU_SAVE_VERSION, @@ -420,6 +438,9 @@ static const VMStateDescription vmstate_cpu = { } , { .vmsd = &vmstate_fpop_ip_dp, .needed = fpop_ip_dp_needed, + }, { + .vmsd = &vmstate_msr_tscdeadline, + .needed = tscdeadline_needed, } , { /* empty */ } -- cgit v1.1 From 21e87c4625f290824f4f05d098e576cda40421ce Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 4 Oct 2011 16:26:35 +0200 Subject: i386: wire up MSR_IA32_MISC_ENABLE It's needed for its default value - bit 0 specifies that "rep movs" is good enough for memcpy, and Linux may use a slower memcpu if it is not set, depending on cpu family/model. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- target-i386/cpu.h | 5 +++++ target-i386/helper.c | 1 + target-i386/kvm.c | 15 +++++++++++++++ target-i386/machine.c | 21 +++++++++++++++++++++ target-i386/op_helper.c | 6 ++++++ 5 files changed, 48 insertions(+) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 29412dc..a08ce9d 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -300,6 +300,10 @@ #define MSR_IA32_PERF_STATUS 0x198 +#define MSR_IA32_MISC_ENABLE 0x1a0 +/* Indicates good rep/movs microcode on some processors: */ +#define MSR_IA32_MISC_ENABLE_DEFAULT 1 + #define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg)) #define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1) @@ -691,6 +695,7 @@ typedef struct CPUX86State { uint64_t tsc_deadline; uint64_t mcg_status; + uint64_t msr_ia32_misc_enable; /* exception/interrupt handling */ int error_code; diff --git a/target-i386/helper.c b/target-i386/helper.c index 5df40d4..6c6a167 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -98,6 +98,7 @@ void cpu_reset(CPUX86State *env) env->mxcsr = 0x1f80; env->pat = 0x0007040600070406ULL; + env->msr_ia32_misc_enable = MSR_IA32_MISC_ENABLE_DEFAULT; memset(env->dr, 0, sizeof(env->dr)); env->dr[6] = DR6_FIXED_1; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 90a6ffb..ddd115c 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -61,6 +61,7 @@ static bool has_msr_star; static bool has_msr_hsave_pa; static bool has_msr_tsc_deadline; static bool has_msr_async_pf_en; +static bool has_msr_misc_enable; static int lm_capable_kernel; static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) @@ -573,6 +574,10 @@ static int kvm_get_supported_msrs(KVMState *s) has_msr_tsc_deadline = true; continue; } + if (kvm_msr_list->indices[i] == MSR_IA32_MISC_ENABLE) { + has_msr_misc_enable = true; + continue; + } } } @@ -889,6 +894,10 @@ static int kvm_put_msrs(CPUState *env, int level) if (has_msr_tsc_deadline) { kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSCDEADLINE, env->tsc_deadline); } + if (has_msr_misc_enable) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_MISC_ENABLE, + env->msr_ia32_misc_enable); + } #ifdef TARGET_X86_64 if (lm_capable_kernel) { kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); @@ -1138,6 +1147,9 @@ static int kvm_get_msrs(CPUState *env) if (has_msr_tsc_deadline) { msrs[n++].index = MSR_IA32_TSCDEADLINE; } + if (has_msr_misc_enable) { + msrs[n++].index = MSR_IA32_MISC_ENABLE; + } if (!env->tsc_valid) { msrs[n++].index = MSR_IA32_TSC; @@ -1224,6 +1236,9 @@ static int kvm_get_msrs(CPUState *env) case MSR_MCG_CTL: env->mcg_ctl = msrs[i].data; break; + case MSR_IA32_MISC_ENABLE: + env->msr_ia32_misc_enable = msrs[i].data; + break; default: if (msrs[i].index >= MSR_MC0_CTL && msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) { diff --git a/target-i386/machine.c b/target-i386/machine.c index 176d372..d6e98ff 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -328,6 +328,24 @@ static const VMStateDescription vmstate_msr_tscdeadline = { } }; +static bool misc_enable_needed(void *opaque) +{ + CPUState *env = opaque; + + return env->msr_ia32_misc_enable != MSR_IA32_MISC_ENABLE_DEFAULT; +} + +static const VMStateDescription vmstate_msr_ia32_misc_enable = { + .name = "cpu/msr_ia32_misc_enable", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField []) { + VMSTATE_UINT64(msr_ia32_misc_enable, CPUState), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_cpu = { .name = "cpu", .version_id = CPU_SAVE_VERSION, @@ -441,6 +459,9 @@ static const VMStateDescription vmstate_cpu = { }, { .vmsd = &vmstate_msr_tscdeadline, .needed = tscdeadline_needed, + }, { + .vmsd = &vmstate_msr_ia32_misc_enable, + .needed = misc_enable_needed, } , { /* empty */ } diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c index 3bb5a91..c89e4a4 100644 --- a/target-i386/op_helper.c +++ b/target-i386/op_helper.c @@ -3280,6 +3280,9 @@ void helper_wrmsr(void) case MSR_TSC_AUX: env->tsc_aux = val; break; + case MSR_IA32_MISC_ENABLE: + env->msr_ia32_misc_enable = val; + break; default: if ((uint32_t)ECX >= MSR_MC0_CTL && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) { @@ -3413,6 +3416,9 @@ void helper_rdmsr(void) case MSR_MCG_STATUS: val = env->mcg_status; break; + case MSR_IA32_MISC_ENABLE: + val = env->msr_ia32_misc_enable; + break; default: if ((uint32_t)ECX >= MSR_MC0_CTL && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) { -- cgit v1.1