From 3bef0451e692966554b31e603424e7d40ba98d3a Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 29 Jan 2016 17:07:31 +0100 Subject: linux-user: Fix qemu-binfmt-conf.sh to store config across reboot Original qemu-binfmt-conf.sh is only able to write configuration into /proc/sys/fs/binfmt_misc, and the configuration is lost on reboot. This script can configure debian and systemd services to restore configuration on reboot. Moreover, it is able to manage binfmt credential and to configure the path of the interpreter. List of supported CPU is: i386 i486 alpha arm sparc32plus ppc ppc64 ppc64le m68k mips mipsel mipsn32 mipsn32el mips64 mips64el sh4 sh4eb s390x aarch64 Usage: qemu-binfmt-conf.sh [--qemu-path PATH][--debian][--systemd CPU] [--help][--credential yes|no][--exportdir PATH] Configure binfmt_misc to use qemu interpreter --help: display this usage --qemu-path: set path to qemu interpreter (/usr/local/bin) --debian: don't write into /proc, instead generate update-binfmts templates --systemd: don't write into /proc, instead generate file for systemd-binfmt.service for the given CPU --exportdir: define where to write configuration files (default: /etc/binfmt.d or /usr/share/binfmts) --credential: if yes, credential an security tokens are calculated according to the binary to interpret To import templates with update-binfmts, use : sudo update-binfmts --importdir /usr/share/binfmts --import qemu-CPU To remove interpreter, use : sudo update-binfmts --package qemu-CPU --remove qemu-CPU /usr/local/bin With systemd, binfmt files are loaded by systemd-binfmt.service The environment variable HOST_ARCH allows to override 'uname' to generate configuration files for a different architecture than the current one. Signed-off-by: Laurent Vivier Reviewed-by: Alexander Graf Signed-off-by: Riku Voipio --- scripts/qemu-binfmt-conf.sh | 389 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 320 insertions(+), 69 deletions(-) mode change 100644 => 100755 scripts/qemu-binfmt-conf.sh diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh old mode 100644 new mode 100755 index 289b1a3..de4d1c1 --- a/scripts/qemu-binfmt-conf.sh +++ b/scripts/qemu-binfmt-conf.sh @@ -1,72 +1,323 @@ #!/bin/sh # enable automatic i386/ARM/M68K/MIPS/SPARC/PPC/s390 program execution by the kernel -# load the binfmt_misc module -if [ ! -d /proc/sys/fs/binfmt_misc ]; then - /sbin/modprobe binfmt_misc -fi -if [ ! -f /proc/sys/fs/binfmt_misc/register ]; then - mount binfmt_misc -t binfmt_misc /proc/sys/fs/binfmt_misc -fi - -# probe cpu type -cpu=`uname -m` -case "$cpu" in - i386|i486|i586|i686|i86pc|BePC|x86_64) - cpu="i386" - ;; - m68k) - cpu="m68k" - ;; - mips*) - cpu="mips" - ;; - "Power Macintosh"|ppc|ppc64) - cpu="ppc" - ;; - armv[4-9]*) - cpu="arm" - ;; -esac - -# register the interpreter for each cpu except for the native one -if [ $cpu != "i386" ] ; then - echo ':i386:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00:\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-i386:' > /proc/sys/fs/binfmt_misc/register - echo ':i486:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x06\x00:\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-i386:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "alpha" ] ; then - echo ':alpha:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x26\x90:\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-alpha:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "arm" ] ; then - echo ':arm:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-arm:' > /proc/sys/fs/binfmt_misc/register - echo ':armeb:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-armeb:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "aarch64" ] ; then - echo ':aarch64:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-aarch64:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "sparc" ] ; then - echo ':sparc:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-sparc:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "ppc" ] ; then - echo ':ppc:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x14:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-ppc:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "m68k" ] ; then - echo 'Please check cpu value and header information for m68k!' - echo ':m68k:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x04:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-m68k:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "mips" ] ; then - # FIXME: We could use the other endianness on a MIPS host. - echo ':mips:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-mips:' > /proc/sys/fs/binfmt_misc/register - echo ':mipsel:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-mipsel:' > /proc/sys/fs/binfmt_misc/register - echo ':mipsn32:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-mipsn32:' > /proc/sys/fs/binfmt_misc/register - echo ':mipsn32el:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-mipsn32el:' > /proc/sys/fs/binfmt_misc/register - echo ':mips64:M::\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-mips64:' > /proc/sys/fs/binfmt_misc/register - echo ':mips64el:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-mips64el:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "sh" ] ; then - echo ':sh4:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a\x00:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-sh4:' > /proc/sys/fs/binfmt_misc/register - echo ':sh4eb:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-sh4eb:' > /proc/sys/fs/binfmt_misc/register -fi -if [ $cpu != "s390x" ] ; then - echo ':s390x:M::\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x16:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-s390x:' > /proc/sys/fs/binfmt_misc/register -fi +qemu_target_list="i386 i486 alpha arm sparc32plus ppc ppc64 ppc64le m68k \ +mips mipsel mipsn32 mipsn32el mips64 mips64el \ +sh4 sh4eb s390x aarch64" + +i386_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00' +i386_mask='\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +i386_family=i386 + +i486_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x06\x00' +i486_mask='\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +i486_family=i386 + +alpha_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x26\x90' +alpha_mask='\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +alpha_family=alpha + +arm_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00' +arm_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +arm_family=arm + +armeb_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28' +armeb_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +armeb_family=arm + +sparc_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02' +sparc_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +sparc_family=sparc + +sparc32plus_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x12' +sparc32plus_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +sparc32plus_family=sparc + +ppc_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x14' +ppc_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +ppc_family=ppc + +ppc64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15' +ppc64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +ppc64_family=ppc + +ppc64le_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15\x00' +ppc64le_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\x00' +ppc64le_family=ppcle + +m68k_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x04' +m68k_mask='\xff\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +m68k_family=m68k + +# FIXME: We could use the other endianness on a MIPS host. + +mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' +mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips_family=mips + +mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' +mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsel_family=mips + +mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' +mipsn32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mipsn32_family=mips + +mipsn32el_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' +mipsn32el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mipsn32el_family=mips + +mips64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08' +mips64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +mips64_family=mips + +mips64el_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00' +mips64el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +mips64el_family=mips + +sh4_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a\x00' +sh4_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +sh4_family=sh4 + +sh4eb_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a' +sh4eb_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +sh4eb_family=sh4 + +s390x_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x16' +s390x_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff' +s390x_family=s390x + +aarch64_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7\x00' +aarch64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +aarch64_family=arm + +qemu_get_family() { + cpu=${HOST_ARCH:-$(uname -m)} + case "$cpu" in + amd64|i386|i486|i586|i686|i86pc|BePC|x86_64) + echo "i386" + ;; + mips*) + echo "mips" + ;; + "Power Macintosh"|ppc64|powerpc|ppc) + echo "ppc" + ;; + ppc64el|ppc64le) + echo "ppcle" + ;; + arm|armel|armhf|arm64|armv[4-9]*) + echo "arm" + ;; + sparc*) + echo "sparc" + ;; + *) + echo "$cpu" + ;; + esac +} + +usage() { + cat <&2 + exit 1 + fi +} + +qemu_check_bintfmt_misc() { + # load the binfmt_misc module + if [ ! -d /proc/sys/fs/binfmt_misc ]; then + if ! /sbin/modprobe binfmt_misc ; then + exit 1 + fi + fi + if [ ! -f /proc/sys/fs/binfmt_misc/register ]; then + if ! mount binfmt_misc -t binfmt_misc /proc/sys/fs/binfmt_misc ; then + exit 1 + fi + fi + + qemu_check_access /proc/sys/fs/binfmt_misc/register +} + +installed_dpkg() { + dpkg --status "$1" > /dev/null 2>&1 +} + +qemu_check_debian() { + if [ ! -e /etc/debian_version ] ; then + echo "WARNING: your system is not a Debian based distro" 1>&2 + elif ! installed_dpkg binfmt-support ; then + echo "WARNING: package binfmt-support is needed" 1>&2 + fi + qemu_check_access "$EXPORTDIR" +} + +qemu_check_systemd() { + if ! systemctl -q is-enabled systemd-binfmt.service ; then + echo "WARNING: systemd-binfmt.service is missing or disabled" 1>&2 + fi + qemu_check_access "$EXPORTDIR" +} + +qemu_generate_register() { + echo ":qemu-$cpu:M::$magic:$mask:$qemu:$FLAGS" +} + +qemu_register_interpreter() { + echo "Setting $qemu as binfmt interpreter for $cpu" + qemu_generate_register > /proc/sys/fs/binfmt_misc/register +} + +qemu_generate_systemd() { + echo "Setting $qemu as binfmt interpreter for $cpu for systemd-binfmt.service" + qemu_generate_register > "$EXPORTDIR/qemu-$cpu.conf" +} + +qemu_generate_debian() { + cat > "$EXPORTDIR/qemu-$cpu" <> "$EXPORTDIR/qemu-$cpu" + fi +} + +qemu_set_binfmts() { + # probe cpu type + host_family=$(qemu_get_family) + + # register the interpreter for each cpu except for the native one + + for cpu in ${qemu_target_list} ; do + magic=$(eval echo \$${cpu}_magic) + mask=$(eval echo \$${cpu}_mask) + family=$(eval echo \$${cpu}_family) + + if [ "$magic" = "" ] || [ "$mask" = "" ] || [ "$family" = "" ] ; then + echo "INTERNAL ERROR: unknown cpu $cpu" 1>&2 + continue + fi + + qemu="$QEMU_PATH/qemu-$cpu" + if [ "$cpu" = "i486" ] ; then + qemu="$QEMU_PATH/qemu-i386" + fi + + if [ "$host_family" != "$family" ] ; then + $BINFMT_SET + fi + done +} + +CHECK=qemu_check_bintfmt_misc +BINFMT_SET=qemu_register_interpreter + +SYSTEMDDIR="/etc/binfmt.d" +DEBIANDIR="/usr/share/binfmts" + +QEMU_PATH=/usr/local/bin +FLAGS="" + +options=$(getopt -o ds:Q:e:hc: -l debian,systemd:,qemu-path:,exportdir:,help,credential: -- "$@") +eval set -- "$options" + +while true ; do + case "$1" in + -d|--debian) + CHECK=qemu_check_debian + BINFMT_SET=qemu_generate_debian + EXPORTDIR=${EXPORTDIR:-$DEBIANDIR} + ;; + -s|--systemd) + CHECK=qemu_check_systemd + BINFMT_SET=qemu_generate_systemd + EXPORTDIR=${EXPORTDIR:-$SYSTEMDDIR} + shift + # check given cpu is in the supported CPU list + for cpu in ${qemu_target_list} ; do + if [ "$cpu" == "$1" ] ; then + break + fi + done + + if [ "$cpu" == "$1" ] ; then + qemu_target_list="$1" + else + echo "ERROR: unknown CPU \"$1\"" 1>&2 + usage + exit 1 + fi + ;; + -Q|--qemu-path) + shift + QEMU_PATH="$1" + ;; + -e|--exportdir) + shift + EXPORTDIR="$1" + ;; + -h|--help) + usage + exit 1 + ;; + -c|--credential) + shift + if [ "$1" = "yes" ] ; then + FLAGS="OC" + else + FLAGS="" + fi + ;; + *) + break + ;; + esac + shift +done + +$CHECK +qemu_set_binfmts -- cgit v1.1 From 6c5b5645ae0b73c052df962e18e48d87bb7385e0 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Sun, 22 May 2016 18:56:19 +0200 Subject: linux-user: add rtnetlink(7) support rtnetlink is needed to use iproute package (ip addr, ip route) and dhcp client. Examples: Without this patch: # ip link Cannot open netlink socket: Address family not supported by protocol # ip addr Cannot open netlink socket: Address family not supported by protocol # ip route Cannot open netlink socket: Address family not supported by protocol # dhclient eth0 Cannot open netlink socket: Address family not supported by protocol Cannot open netlink socket: Address family not supported by protocol With this patch: # ip link 1: lo: mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 51: eth0: mtu 1500 qdisc fq_codel state UP mode DEFAULT qlen 1000 link/ether 00:16:3e:89:6b:d7 brd ff:ff:ff:ff:ff:ff # ip addr show eth0 51: eth0: mtu 1500 qdisc fq_codel state UP qlen 1000 link/ether 00:16:3e:89:6b:d7 brd ff:ff:ff:ff:ff:ff inet 192.168.122.197/24 brd 192.168.122.255 scope global eth0 valid_lft forever preferred_lft forever inet6 fe80::216:3eff:fe89:6bd7/64 scope link valid_lft forever preferred_lft forever # ip route default via 192.168.122.1 dev eth0 192.168.122.0/24 dev eth0 proto kernel scope link src 192.168.122.197 # ip addr flush eth0 # ip addr add 192.168.122.10 dev eth0 # ip addr show eth0 51: eth0: mtu 1500 qdisc fq_codel state UP qlen 1000 link/ether 00:16:3e:89:6b:d7 brd ff:ff:ff:ff:ff:ff inet 192.168.122.10/32 scope global eth0 valid_lft forever preferred_lft forever # ip route add 192.168.122.0/24 via 192.168.122.10 # ip route 192.168.122.0/24 via 192.168.122.10 dev eth0 Signed-off-by: Laurent Vivier Signed-off-by: Riku Voipio --- linux-user/syscall.c | 581 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 575 insertions(+), 6 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index df70255..3e4895e 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -101,6 +101,8 @@ int __clone2(int (*fn)(void *), void *child_stack_base, #include #include #include +#include +#include #include "linux_loop.h" #include "uname.h" @@ -304,6 +306,14 @@ static TargetFdTrans **target_fd_trans; static unsigned int target_fd_max; +static TargetFdDataFunc fd_trans_target_to_host_data(int fd) +{ + if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { + return target_fd_trans[fd]->target_to_host_data; + } + return NULL; +} + static TargetFdDataFunc fd_trans_host_to_target_data(int fd) { if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { @@ -1261,7 +1271,13 @@ static inline abi_long target_to_host_sockaddr(int fd, struct sockaddr *addr, memcpy(addr, target_saddr, len); addr->sa_family = sa_family; - if (sa_family == AF_PACKET) { + if (sa_family == AF_NETLINK) { + struct sockaddr_nl *nladdr; + + nladdr = (struct sockaddr_nl *)addr; + nladdr->nl_pid = tswap32(nladdr->nl_pid); + nladdr->nl_groups = tswap32(nladdr->nl_groups); + } else if (sa_family == AF_PACKET) { struct target_sockaddr_ll *lladdr; lladdr = (struct target_sockaddr_ll *)addr; @@ -1284,6 +1300,11 @@ static inline abi_long host_to_target_sockaddr(abi_ulong target_addr, return -TARGET_EFAULT; memcpy(target_saddr, addr, len); target_saddr->sa_family = tswap16(addr->sa_family); + if (addr->sa_family == AF_NETLINK) { + struct sockaddr_nl *target_nl = (struct sockaddr_nl *)target_saddr; + target_nl->nl_pid = tswap32(target_nl->nl_pid); + target_nl->nl_groups = tswap32(target_nl->nl_groups); + } unlock_user(target_saddr, target_addr, len); return 0; @@ -1515,6 +1536,511 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, return 0; } +static void tswap_nlmsghdr(struct nlmsghdr *nlh) +{ + nlh->nlmsg_len = tswap32(nlh->nlmsg_len); + nlh->nlmsg_type = tswap16(nlh->nlmsg_type); + nlh->nlmsg_flags = tswap16(nlh->nlmsg_flags); + nlh->nlmsg_seq = tswap32(nlh->nlmsg_seq); + nlh->nlmsg_pid = tswap32(nlh->nlmsg_pid); +} + +static abi_long host_to_target_for_each_nlmsg(struct nlmsghdr *nlh, + size_t len, + abi_long (*host_to_target_nlmsg) + (struct nlmsghdr *)) +{ + uint32_t nlmsg_len; + abi_long ret; + + while (len > sizeof(struct nlmsghdr)) { + + nlmsg_len = nlh->nlmsg_len; + if (nlmsg_len < sizeof(struct nlmsghdr) || + nlmsg_len > len) { + break; + } + + switch (nlh->nlmsg_type) { + case NLMSG_DONE: + tswap_nlmsghdr(nlh); + return 0; + case NLMSG_NOOP: + break; + case NLMSG_ERROR: + { + struct nlmsgerr *e = NLMSG_DATA(nlh); + e->error = tswap32(e->error); + tswap_nlmsghdr(&e->msg); + tswap_nlmsghdr(nlh); + return 0; + } + default: + ret = host_to_target_nlmsg(nlh); + if (ret < 0) { + tswap_nlmsghdr(nlh); + return ret; + } + break; + } + tswap_nlmsghdr(nlh); + len -= NLMSG_ALIGN(nlmsg_len); + nlh = (struct nlmsghdr *)(((char*)nlh) + NLMSG_ALIGN(nlmsg_len)); + } + return 0; +} + +static abi_long target_to_host_for_each_nlmsg(struct nlmsghdr *nlh, + size_t len, + abi_long (*target_to_host_nlmsg) + (struct nlmsghdr *)) +{ + int ret; + + while (len > sizeof(struct nlmsghdr)) { + if (tswap32(nlh->nlmsg_len) < sizeof(struct nlmsghdr) || + tswap32(nlh->nlmsg_len) > len) { + break; + } + tswap_nlmsghdr(nlh); + switch (nlh->nlmsg_type) { + case NLMSG_DONE: + return 0; + case NLMSG_NOOP: + break; + case NLMSG_ERROR: + { + struct nlmsgerr *e = NLMSG_DATA(nlh); + e->error = tswap32(e->error); + tswap_nlmsghdr(&e->msg); + } + default: + ret = target_to_host_nlmsg(nlh); + if (ret < 0) { + return ret; + } + } + len -= NLMSG_ALIGN(nlh->nlmsg_len); + nlh = (struct nlmsghdr *)(((char *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); + } + return 0; +} + +static abi_long host_to_target_for_each_rtattr(struct rtattr *rtattr, + size_t len, + abi_long (*host_to_target_rtattr) + (struct rtattr *)) +{ + unsigned short rta_len; + abi_long ret; + + while (len > sizeof(struct rtattr)) { + rta_len = rtattr->rta_len; + if (rta_len < sizeof(struct rtattr) || + rta_len > len) { + break; + } + ret = host_to_target_rtattr(rtattr); + rtattr->rta_len = tswap16(rtattr->rta_len); + rtattr->rta_type = tswap16(rtattr->rta_type); + if (ret < 0) { + return ret; + } + len -= RTA_ALIGN(rta_len); + rtattr = (struct rtattr *)(((char *)rtattr) + RTA_ALIGN(rta_len)); + } + return 0; +} + +static abi_long host_to_target_data_link_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + struct rtnl_link_stats *st; + struct rtnl_link_stats64 *st64; + struct rtnl_link_ifmap *map; + + switch (rtattr->rta_type) { + /* binary stream */ + case IFLA_ADDRESS: + case IFLA_BROADCAST: + /* string */ + case IFLA_IFNAME: + case IFLA_QDISC: + break; + /* uin8_t */ + case IFLA_OPERSTATE: + case IFLA_LINKMODE: + case IFLA_CARRIER: + case IFLA_PROTO_DOWN: + break; + /* uint32_t */ + case IFLA_MTU: + case IFLA_LINK: + case IFLA_WEIGHT: + case IFLA_TXQLEN: + case IFLA_CARRIER_CHANGES: + case IFLA_NUM_RX_QUEUES: + case IFLA_NUM_TX_QUEUES: + case IFLA_PROMISCUITY: + case IFLA_EXT_MASK: + case IFLA_LINK_NETNSID: + case IFLA_GROUP: + case IFLA_MASTER: + case IFLA_NUM_VF: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + /* struct rtnl_link_stats */ + case IFLA_STATS: + st = RTA_DATA(rtattr); + st->rx_packets = tswap32(st->rx_packets); + st->tx_packets = tswap32(st->tx_packets); + st->rx_bytes = tswap32(st->rx_bytes); + st->tx_bytes = tswap32(st->tx_bytes); + st->rx_errors = tswap32(st->rx_errors); + st->tx_errors = tswap32(st->tx_errors); + st->rx_dropped = tswap32(st->rx_dropped); + st->tx_dropped = tswap32(st->tx_dropped); + st->multicast = tswap32(st->multicast); + st->collisions = tswap32(st->collisions); + + /* detailed rx_errors: */ + st->rx_length_errors = tswap32(st->rx_length_errors); + st->rx_over_errors = tswap32(st->rx_over_errors); + st->rx_crc_errors = tswap32(st->rx_crc_errors); + st->rx_frame_errors = tswap32(st->rx_frame_errors); + st->rx_fifo_errors = tswap32(st->rx_fifo_errors); + st->rx_missed_errors = tswap32(st->rx_missed_errors); + + /* detailed tx_errors */ + st->tx_aborted_errors = tswap32(st->tx_aborted_errors); + st->tx_carrier_errors = tswap32(st->tx_carrier_errors); + st->tx_fifo_errors = tswap32(st->tx_fifo_errors); + st->tx_heartbeat_errors = tswap32(st->tx_heartbeat_errors); + st->tx_window_errors = tswap32(st->tx_window_errors); + + /* for cslip etc */ + st->rx_compressed = tswap32(st->rx_compressed); + st->tx_compressed = tswap32(st->tx_compressed); + break; + /* struct rtnl_link_stats64 */ + case IFLA_STATS64: + st64 = RTA_DATA(rtattr); + st64->rx_packets = tswap64(st64->rx_packets); + st64->tx_packets = tswap64(st64->tx_packets); + st64->rx_bytes = tswap64(st64->rx_bytes); + st64->tx_bytes = tswap64(st64->tx_bytes); + st64->rx_errors = tswap64(st64->rx_errors); + st64->tx_errors = tswap64(st64->tx_errors); + st64->rx_dropped = tswap64(st64->rx_dropped); + st64->tx_dropped = tswap64(st64->tx_dropped); + st64->multicast = tswap64(st64->multicast); + st64->collisions = tswap64(st64->collisions); + + /* detailed rx_errors: */ + st64->rx_length_errors = tswap64(st64->rx_length_errors); + st64->rx_over_errors = tswap64(st64->rx_over_errors); + st64->rx_crc_errors = tswap64(st64->rx_crc_errors); + st64->rx_frame_errors = tswap64(st64->rx_frame_errors); + st64->rx_fifo_errors = tswap64(st64->rx_fifo_errors); + st64->rx_missed_errors = tswap64(st64->rx_missed_errors); + + /* detailed tx_errors */ + st64->tx_aborted_errors = tswap64(st64->tx_aborted_errors); + st64->tx_carrier_errors = tswap64(st64->tx_carrier_errors); + st64->tx_fifo_errors = tswap64(st64->tx_fifo_errors); + st64->tx_heartbeat_errors = tswap64(st64->tx_heartbeat_errors); + st64->tx_window_errors = tswap64(st64->tx_window_errors); + + /* for cslip etc */ + st64->rx_compressed = tswap64(st64->rx_compressed); + st64->tx_compressed = tswap64(st64->tx_compressed); + break; + /* struct rtnl_link_ifmap */ + case IFLA_MAP: + map = RTA_DATA(rtattr); + map->mem_start = tswap64(map->mem_start); + map->mem_end = tswap64(map->mem_end); + map->base_addr = tswap64(map->base_addr); + map->irq = tswap16(map->irq); + break; + /* nested */ + case IFLA_AF_SPEC: + case IFLA_LINKINFO: + /* FIXME: implement nested type */ + gemu_log("Unimplemented nested type %d\n", rtattr->rta_type); + break; + default: + gemu_log("Unknown host IFLA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long host_to_target_data_addr_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + struct ifa_cacheinfo *ci; + + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case IFA_ADDRESS: + case IFA_LOCAL: + break; + /* string */ + case IFA_LABEL: + break; + /* u32 */ + case IFA_FLAGS: + case IFA_BROADCAST: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + /* struct ifa_cacheinfo */ + case IFA_CACHEINFO: + ci = RTA_DATA(rtattr); + ci->ifa_prefered = tswap32(ci->ifa_prefered); + ci->ifa_valid = tswap32(ci->ifa_valid); + ci->cstamp = tswap32(ci->cstamp); + ci->tstamp = tswap32(ci->tstamp); + break; + default: + gemu_log("Unknown host IFA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long host_to_target_data_route_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case RTA_GATEWAY: + case RTA_DST: + case RTA_PREFSRC: + break; + /* u32 */ + case RTA_PRIORITY: + case RTA_TABLE: + case RTA_OIF: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + default: + gemu_log("Unknown host RTA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long host_to_target_link_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + return host_to_target_for_each_rtattr(rtattr, rtattr_len, + host_to_target_data_link_rtattr); +} + +static abi_long host_to_target_addr_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + return host_to_target_for_each_rtattr(rtattr, rtattr_len, + host_to_target_data_addr_rtattr); +} + +static abi_long host_to_target_route_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + return host_to_target_for_each_rtattr(rtattr, rtattr_len, + host_to_target_data_route_rtattr); +} + +static abi_long host_to_target_data_route(struct nlmsghdr *nlh) +{ + uint32_t nlmsg_len; + struct ifinfomsg *ifi; + struct ifaddrmsg *ifa; + struct rtmsg *rtm; + + nlmsg_len = nlh->nlmsg_len; + switch (nlh->nlmsg_type) { + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_GETLINK: + ifi = NLMSG_DATA(nlh); + ifi->ifi_type = tswap16(ifi->ifi_type); + ifi->ifi_index = tswap32(ifi->ifi_index); + ifi->ifi_flags = tswap32(ifi->ifi_flags); + ifi->ifi_change = tswap32(ifi->ifi_change); + host_to_target_link_rtattr(IFLA_RTA(ifi), + nlmsg_len - NLMSG_LENGTH(sizeof(*ifi))); + break; + case RTM_NEWADDR: + case RTM_DELADDR: + case RTM_GETADDR: + ifa = NLMSG_DATA(nlh); + ifa->ifa_index = tswap32(ifa->ifa_index); + host_to_target_addr_rtattr(IFA_RTA(ifa), + nlmsg_len - NLMSG_LENGTH(sizeof(*ifa))); + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + case RTM_GETROUTE: + rtm = NLMSG_DATA(nlh); + rtm->rtm_flags = tswap32(rtm->rtm_flags); + host_to_target_route_rtattr(RTM_RTA(rtm), + nlmsg_len - NLMSG_LENGTH(sizeof(*rtm))); + break; + default: + return -TARGET_EINVAL; + } + return 0; +} + +static inline abi_long host_to_target_nlmsg_route(struct nlmsghdr *nlh, + size_t len) +{ + return host_to_target_for_each_nlmsg(nlh, len, host_to_target_data_route); +} + +static abi_long target_to_host_for_each_rtattr(struct rtattr *rtattr, + size_t len, + abi_long (*target_to_host_rtattr) + (struct rtattr *)) +{ + abi_long ret; + + while (len >= sizeof(struct rtattr)) { + if (tswap16(rtattr->rta_len) < sizeof(struct rtattr) || + tswap16(rtattr->rta_len) > len) { + break; + } + rtattr->rta_len = tswap16(rtattr->rta_len); + rtattr->rta_type = tswap16(rtattr->rta_type); + ret = target_to_host_rtattr(rtattr); + if (ret < 0) { + return ret; + } + len -= RTA_ALIGN(rtattr->rta_len); + rtattr = (struct rtattr *)(((char *)rtattr) + + RTA_ALIGN(rtattr->rta_len)); + } + return 0; +} + +static abi_long target_to_host_data_link_rtattr(struct rtattr *rtattr) +{ + switch (rtattr->rta_type) { + default: + gemu_log("Unknown target IFLA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long target_to_host_data_addr_rtattr(struct rtattr *rtattr) +{ + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case IFA_LOCAL: + case IFA_ADDRESS: + break; + default: + gemu_log("Unknown target IFA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long target_to_host_data_route_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case RTA_DST: + case RTA_SRC: + case RTA_GATEWAY: + break; + /* u32 */ + case RTA_OIF: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + default: + gemu_log("Unknown target RTA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static void target_to_host_link_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + target_to_host_for_each_rtattr(rtattr, rtattr_len, + target_to_host_data_link_rtattr); +} + +static void target_to_host_addr_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + target_to_host_for_each_rtattr(rtattr, rtattr_len, + target_to_host_data_addr_rtattr); +} + +static void target_to_host_route_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + target_to_host_for_each_rtattr(rtattr, rtattr_len, + target_to_host_data_route_rtattr); +} + +static abi_long target_to_host_data_route(struct nlmsghdr *nlh) +{ + struct ifinfomsg *ifi; + struct ifaddrmsg *ifa; + struct rtmsg *rtm; + + switch (nlh->nlmsg_type) { + case RTM_GETLINK: + break; + case RTM_NEWLINK: + case RTM_DELLINK: + ifi = NLMSG_DATA(nlh); + ifi->ifi_type = tswap16(ifi->ifi_type); + ifi->ifi_index = tswap32(ifi->ifi_index); + ifi->ifi_flags = tswap32(ifi->ifi_flags); + ifi->ifi_change = tswap32(ifi->ifi_change); + target_to_host_link_rtattr(IFLA_RTA(ifi), nlh->nlmsg_len - + NLMSG_LENGTH(sizeof(*ifi))); + break; + case RTM_GETADDR: + case RTM_NEWADDR: + case RTM_DELADDR: + ifa = NLMSG_DATA(nlh); + ifa->ifa_index = tswap32(ifa->ifa_index); + target_to_host_addr_rtattr(IFA_RTA(ifa), nlh->nlmsg_len - + NLMSG_LENGTH(sizeof(*ifa))); + break; + case RTM_GETROUTE: + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + rtm = NLMSG_DATA(nlh); + rtm->rtm_flags = tswap32(rtm->rtm_flags); + target_to_host_route_rtattr(RTM_RTA(rtm), nlh->nlmsg_len - + NLMSG_LENGTH(sizeof(*rtm))); + break; + default: + return -TARGET_EOPNOTSUPP; + } + return 0; +} + +static abi_long target_to_host_nlmsg_route(struct nlmsghdr *nlh, size_t len) +{ + return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_route); +} + /* do_setsockopt() Must return target values and target errnos. */ static abi_long do_setsockopt(int sockfd, int level, int optname, abi_ulong optval_addr, socklen_t optlen) @@ -2165,6 +2691,21 @@ static TargetFdTrans target_packet_trans = { .target_to_host_addr = packet_target_to_host_sockaddr, }; +static abi_long netlink_route_target_to_host(void *buf, size_t len) +{ + return target_to_host_nlmsg_route(buf, len); +} + +static abi_long netlink_route_host_to_target(void *buf, size_t len) +{ + return host_to_target_nlmsg_route(buf, len); +} + +static TargetFdTrans target_netlink_route_trans = { + .target_to_host_data = netlink_route_target_to_host, + .host_to_target_data = netlink_route_host_to_target, +}; + /* do_socket() Must return target values and target errnos. */ static abi_long do_socket(int domain, int type, int protocol) { @@ -2176,8 +2717,10 @@ static abi_long do_socket(int domain, int type, int protocol) return ret; } - if (domain == PF_NETLINK) - return -TARGET_EAFNOSUPPORT; + if (domain == PF_NETLINK && + protocol != NETLINK_ROUTE) { + return -EPFNOSUPPORT; + } if (domain == AF_PACKET || (domain == AF_INET && type == SOCK_PACKET)) { @@ -2192,6 +2735,14 @@ static abi_long do_socket(int domain, int type, int protocol) * if socket type is SOCK_PACKET, bind by name */ fd_trans_register(ret, &target_packet_trans); + } else if (domain == PF_NETLINK) { + switch (protocol) { + case NETLINK_ROUTE: + fd_trans_register(ret, &target_netlink_route_trans); + break; + default: + g_assert_not_reached(); + } } } return ret; @@ -2276,14 +2827,25 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp, msg.msg_iov = vec; if (send) { - ret = target_to_host_cmsg(&msg, msgp); - if (ret == 0) + if (fd_trans_target_to_host_data(fd)) { + ret = fd_trans_target_to_host_data(fd)(msg.msg_iov->iov_base, + msg.msg_iov->iov_len); + } else { + ret = target_to_host_cmsg(&msg, msgp); + } + if (ret == 0) { ret = get_errno(sendmsg(fd, &msg, flags)); + } } else { ret = get_errno(recvmsg(fd, &msg, flags)); if (!is_error(ret)) { len = ret; - ret = host_to_target_cmsg(msgp, &msg); + if (fd_trans_host_to_target_data(fd)) { + ret = fd_trans_host_to_target_data(fd)(msg.msg_iov->iov_base, + msg.msg_iov->iov_len); + } else { + ret = host_to_target_cmsg(msgp, &msg); + } if (!is_error(ret)) { msgp->msg_namelen = tswap32(msg.msg_namelen); if (msg.msg_name != NULL) { @@ -2510,6 +3072,13 @@ static abi_long do_sendto(int fd, abi_ulong msg, size_t len, int flags, host_msg = lock_user(VERIFY_READ, msg, len, 1); if (!host_msg) return -TARGET_EFAULT; + if (fd_trans_target_to_host_data(fd)) { + ret = fd_trans_target_to_host_data(fd)(host_msg, len); + if (ret < 0) { + unlock_user(host_msg, msg, 0); + return ret; + } + } if (target_addr) { addr = alloca(addrlen+1); ret = target_to_host_sockaddr(fd, addr, target_addr, addrlen); -- cgit v1.1 From b265620bfbe300528247b31de54bacfd513109e8 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Sun, 22 May 2016 18:56:20 +0200 Subject: linux-user: support netlink protocol NETLINK_KOBJECT_UEVENT This is the protocol used by udevd to manage kernel events. Signed-off-by: Laurent Vivier Signed-off-by: Riku Voipio --- linux-user/syscall.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 3e4895e..584aecc 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -2718,7 +2718,8 @@ static abi_long do_socket(int domain, int type, int protocol) } if (domain == PF_NETLINK && - protocol != NETLINK_ROUTE) { + !(protocol == NETLINK_ROUTE || + protocol == NETLINK_KOBJECT_UEVENT)) { return -EPFNOSUPPORT; } @@ -2740,6 +2741,9 @@ static abi_long do_socket(int domain, int type, int protocol) case NETLINK_ROUTE: fd_trans_register(ret, &target_netlink_route_trans); break; + case NETLINK_KOBJECT_UEVENT: + /* nothing to do: messages are strings */ + break; default: g_assert_not_reached(); } -- cgit v1.1 From 5ce9bb5937aa549efb0f93ee78a06ce8bded0d50 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Sun, 22 May 2016 18:56:21 +0200 Subject: linux-user: add netlink audit This is, for instance, needed to log in a container. Without this, the user cannot be identified and the console login fails with "Login incorrect". Signed-off-by: Laurent Vivier Signed-off-by: Riku Voipio --- linux-user/syscall.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 584aecc..933c2cd 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -103,6 +103,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base, #include #include #include +#include #include "linux_loop.h" #include "uname.h" @@ -2041,6 +2042,44 @@ static abi_long target_to_host_nlmsg_route(struct nlmsghdr *nlh, size_t len) return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_route); } +static abi_long host_to_target_data_audit(struct nlmsghdr *nlh) +{ + switch (nlh->nlmsg_type) { + default: + gemu_log("Unknown host audit message type %d\n", + nlh->nlmsg_type); + return -TARGET_EINVAL; + } + return 0; +} + +static inline abi_long host_to_target_nlmsg_audit(struct nlmsghdr *nlh, + size_t len) +{ + return host_to_target_for_each_nlmsg(nlh, len, host_to_target_data_audit); +} + +static abi_long target_to_host_data_audit(struct nlmsghdr *nlh) +{ + switch (nlh->nlmsg_type) { + case AUDIT_USER: + case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: + case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: + break; + default: + gemu_log("Unknown target audit message type %d\n", + nlh->nlmsg_type); + return -TARGET_EINVAL; + } + + return 0; +} + +static abi_long target_to_host_nlmsg_audit(struct nlmsghdr *nlh, size_t len) +{ + return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_audit); +} + /* do_setsockopt() Must return target values and target errnos. */ static abi_long do_setsockopt(int sockfd, int level, int optname, abi_ulong optval_addr, socklen_t optlen) @@ -2706,6 +2745,21 @@ static TargetFdTrans target_netlink_route_trans = { .host_to_target_data = netlink_route_host_to_target, }; +static abi_long netlink_audit_target_to_host(void *buf, size_t len) +{ + return target_to_host_nlmsg_audit(buf, len); +} + +static abi_long netlink_audit_host_to_target(void *buf, size_t len) +{ + return host_to_target_nlmsg_audit(buf, len); +} + +static TargetFdTrans target_netlink_audit_trans = { + .target_to_host_data = netlink_audit_target_to_host, + .host_to_target_data = netlink_audit_host_to_target, +}; + /* do_socket() Must return target values and target errnos. */ static abi_long do_socket(int domain, int type, int protocol) { @@ -2719,7 +2773,8 @@ static abi_long do_socket(int domain, int type, int protocol) if (domain == PF_NETLINK && !(protocol == NETLINK_ROUTE || - protocol == NETLINK_KOBJECT_UEVENT)) { + protocol == NETLINK_KOBJECT_UEVENT || + protocol == NETLINK_AUDIT)) { return -EPFNOSUPPORT; } @@ -2744,6 +2799,9 @@ static abi_long do_socket(int domain, int type, int protocol) case NETLINK_KOBJECT_UEVENT: /* nothing to do: messages are strings */ break; + case NETLINK_AUDIT: + fd_trans_register(ret, &target_netlink_audit_trans); + break; default: g_assert_not_reached(); } -- cgit v1.1 From 575b22b1b7a843f34f52ebc9c00fb1c967258912 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 2 Jun 2016 22:14:15 +0200 Subject: linux-user: check if NETLINK_ROUTE is available Some IFLA_* symbols can be missing in the host linux/if_link.h, but as they are enums and not "#defines", check in "configure" if last known (IFLA_PROTO_DOWN) is available and if not, disable management of NETLINK_ROUTE protocol. Signed-off-by: Laurent Vivier Signed-off-by: Riku Voipio --- configure | 15 +++++++++++++++ linux-user/syscall.c | 18 ++++++++++++++---- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/configure b/configure index 6d01d96..d1dbc4b 100755 --- a/configure +++ b/configure @@ -4526,6 +4526,17 @@ if compile_prog "" "" ; then have_fsxattr=yes fi +have_rtnetlink=no +cat > $TMPC << EOF +#include +int main(void) { + return IFLA_PROTO_DOWN; +} +EOF +if compile_prog "" "" ; then + have_rtnetlink=yes +fi + ########################################## # End of CC checks # After here, no more $cc or $ld runs @@ -5461,6 +5472,10 @@ if test "$rdma" = "yes" ; then echo "CONFIG_RDMA=y" >> $config_host_mak fi +if test "$have_rtnetlink" = "yes" ; then + echo "CONFIG_RTNETLINK=y" >> $config_host_mak +fi + # Hold two types of flag: # CONFIG_THREAD_SETNAME_BYTHREAD - we've got a way of setting the name on # a thread we have a handle to diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 933c2cd..9cc9a35 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -102,7 +102,9 @@ int __clone2(int (*fn)(void *), void *child_stack_base, #include #include #include +#ifdef CONFIG_RTNETLINK #include +#endif #include #include "linux_loop.h" #include "uname.h" @@ -1627,6 +1629,7 @@ static abi_long target_to_host_for_each_nlmsg(struct nlmsghdr *nlh, return 0; } +#ifdef CONFIG_RTNETLINK static abi_long host_to_target_for_each_rtattr(struct rtattr *rtattr, size_t len, abi_long (*host_to_target_rtattr) @@ -2041,6 +2044,7 @@ static abi_long target_to_host_nlmsg_route(struct nlmsghdr *nlh, size_t len) { return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_route); } +#endif /* CONFIG_RTNETLINK */ static abi_long host_to_target_data_audit(struct nlmsghdr *nlh) { @@ -2730,6 +2734,7 @@ static TargetFdTrans target_packet_trans = { .target_to_host_addr = packet_target_to_host_sockaddr, }; +#ifdef CONFIG_RTNETLINK static abi_long netlink_route_target_to_host(void *buf, size_t len) { return target_to_host_nlmsg_route(buf, len); @@ -2744,6 +2749,7 @@ static TargetFdTrans target_netlink_route_trans = { .target_to_host_data = netlink_route_target_to_host, .host_to_target_data = netlink_route_host_to_target, }; +#endif /* CONFIG_RTNETLINK */ static abi_long netlink_audit_target_to_host(void *buf, size_t len) { @@ -2771,10 +2777,12 @@ static abi_long do_socket(int domain, int type, int protocol) return ret; } - if (domain == PF_NETLINK && - !(protocol == NETLINK_ROUTE || - protocol == NETLINK_KOBJECT_UEVENT || - protocol == NETLINK_AUDIT)) { + if (domain == PF_NETLINK && !( +#ifdef CONFIG_RTNETLINK + protocol == NETLINK_ROUTE || +#endif + protocol == NETLINK_KOBJECT_UEVENT || + protocol == NETLINK_AUDIT)) { return -EPFNOSUPPORT; } @@ -2793,9 +2801,11 @@ static abi_long do_socket(int domain, int type, int protocol) fd_trans_register(ret, &target_packet_trans); } else if (domain == PF_NETLINK) { switch (protocol) { +#ifdef CONFIG_RTNETLINK case NETLINK_ROUTE: fd_trans_register(ret, &target_netlink_route_trans); break; +#endif case NETLINK_KOBJECT_UEVENT: /* nothing to do: messages are strings */ break; -- cgit v1.1 From eb5525013ae4eaaa3038607aeb7bc1954fff8656 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 27 May 2016 15:51:43 +0100 Subject: linux-user: Factor out handle_signal code from process_pending_signals() Factor out the code to handle a single signal from the process_pending_signals() function. The use of goto for flow control is OK currently, but would get significantly uglier if extended to allow running the handle_signal code multiple times. Signed-off-by: Peter Maydell Reviewed-by: Laurent Vivier Signed-off-by: Riku Voipio --- linux-user/signal.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index 8090b4d..a9ac491 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -5765,33 +5765,40 @@ long do_rt_sigreturn(CPUArchState *env) #endif +static void handle_pending_signal(CPUArchState *cpu_env, int sig); + void process_pending_signals(CPUArchState *cpu_env) { CPUState *cpu = ENV_GET_CPU(cpu_env); int sig; - abi_ulong handler; - sigset_t set, old_set; - target_sigset_t target_old_set; - struct emulated_sigtable *k; - struct target_sigaction *sa; - struct sigqueue *q; TaskState *ts = cpu->opaque; if (!ts->signal_pending) return; /* FIXME: This is not threadsafe. */ - k = ts->sigtab; for(sig = 1; sig <= TARGET_NSIG; sig++) { - if (k->pending) - goto handle_signal; - k++; + if (ts->sigtab[sig - 1].pending) { + handle_pending_signal(cpu_env, sig); + return; + } } /* if no signal is pending, just return */ ts->signal_pending = 0; return; +} + +static void handle_pending_signal(CPUArchState *cpu_env, int sig) +{ + CPUState *cpu = ENV_GET_CPU(cpu_env); + abi_ulong handler; + sigset_t set, old_set; + target_sigset_t target_old_set; + struct target_sigaction *sa; + struct sigqueue *q; + TaskState *ts = cpu->opaque; + struct emulated_sigtable *k = &ts->sigtab[sig - 1]; - handle_signal: trace_user_handle_signal(cpu_env, sig); /* dequeue signal */ q = k->first; -- cgit v1.1 From e902d588dcaaff98a2832d3a61cba2f058f50dfc Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 27 May 2016 15:51:44 +0100 Subject: linux-user: Move handle_pending_signal() to avoid need for declaration Move the handle_pending_signal() function above process_pending_signals() to avoid the need for a forward declaration. (Whitespace only change.) Signed-off-by: Peter Maydell Reviewed-by: Laurent Vivier Signed-off-by: Riku Voipio --- linux-user/signal.c | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index a9ac491..5f98c71 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -5765,29 +5765,6 @@ long do_rt_sigreturn(CPUArchState *env) #endif -static void handle_pending_signal(CPUArchState *cpu_env, int sig); - -void process_pending_signals(CPUArchState *cpu_env) -{ - CPUState *cpu = ENV_GET_CPU(cpu_env); - int sig; - TaskState *ts = cpu->opaque; - - if (!ts->signal_pending) - return; - - /* FIXME: This is not threadsafe. */ - for(sig = 1; sig <= TARGET_NSIG; sig++) { - if (ts->sigtab[sig - 1].pending) { - handle_pending_signal(cpu_env, sig); - return; - } - } - /* if no signal is pending, just return */ - ts->signal_pending = 0; - return; -} - static void handle_pending_signal(CPUArchState *cpu_env, int sig) { CPUState *cpu = ENV_GET_CPU(cpu_env); @@ -5876,3 +5853,24 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig) if (q != &k->info) free_sigqueue(cpu_env, q); } + +void process_pending_signals(CPUArchState *cpu_env) +{ + CPUState *cpu = ENV_GET_CPU(cpu_env); + int sig; + TaskState *ts = cpu->opaque; + + if (!ts->signal_pending) + return; + + /* FIXME: This is not threadsafe. */ + for(sig = 1; sig <= TARGET_NSIG; sig++) { + if (ts->sigtab[sig - 1].pending) { + handle_pending_signal(cpu_env, sig); + return; + } + } + /* if no signal is pending, just return */ + ts->signal_pending = 0; + return; +} -- cgit v1.1 From 7ec87e06c7371c6c574de8ce2cb6553a26c3f3de Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 27 May 2016 15:51:45 +0100 Subject: linux-user: Fix stray tab-indent Fix a stray tab-indented linux in linux-user/signal.c. Signed-off-by: Peter Maydell Reviewed-by: Laurent Vivier Signed-off-by: Riku Voipio --- linux-user/signal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index 5f98c71..5069c3f 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -5847,8 +5847,9 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig) else setup_frame(sig, sa, &target_old_set, cpu_env); #endif - if (sa->sa_flags & TARGET_SA_RESETHAND) + if (sa->sa_flags & TARGET_SA_RESETHAND) { sa->_sa_handler = TARGET_SIG_DFL; + } } if (q != &k->info) free_sigqueue(cpu_env, q); -- cgit v1.1 From 9eede5b69fbbed46f29c7c586cf9e067f56002a6 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 27 May 2016 15:51:46 +0100 Subject: linux-user: Factor out uses of do_sigprocmask() from sigreturn code All the architecture specific handlers for sigreturn include calls to do_sigprocmask(SIGSETMASK, &set, NULL) to set the signal mask from the uc_sigmask in the context being restored. Factor these out into calls to a set_sigmask() function. The next patch will want to add code which is not run when setting the signal mask via do_sigreturn, and this change allows us to separate the two cases. Signed-off-by: Peter Maydell Reviewed-by: Laurent Vivier Signed-off-by: Riku Voipio --- linux-user/signal.c | 55 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index 5069c3f..1b86a85 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -239,6 +239,15 @@ int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset) return ret; } +#if !defined(TARGET_OPENRISC) && !defined(TARGET_UNICORE32) && \ + !defined(TARGET_X86_64) +/* Just set the guest's signal mask to the specified value */ +static void set_sigmask(const sigset_t *set) +{ + do_sigprocmask(SIG_SETMASK, set, NULL); +} +#endif + /* siginfo conversion */ static inline void host_to_target_siginfo_noswap(target_siginfo_t *tinfo, @@ -1093,7 +1102,7 @@ long do_sigreturn(CPUX86State *env) } target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); /* restore registers */ if (restore_sigcontext(env, &frame->sc)) @@ -1118,7 +1127,7 @@ long do_rt_sigreturn(CPUX86State *env) if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) goto badframe; target_to_host_sigset(&set, &frame->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) { goto badframe; @@ -1258,7 +1267,7 @@ static int target_restore_sigframe(CPUARMState *env, uint64_t pstate; target_to_host_sigset(&set, &sf->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); for (i = 0; i < 31; i++) { __get_user(env->xregs[i], &sf->uc.tuc_mcontext.regs[i]); @@ -1900,7 +1909,7 @@ static long do_sigreturn_v1(CPUARMState *env) } target_to_host_sigset_internal(&host_set, &set); - do_sigprocmask(SIG_SETMASK, &host_set, NULL); + set_sigmask(&host_set); if (restore_sigcontext(env, &frame->sc)) { goto badframe; @@ -1981,7 +1990,7 @@ static int do_sigframe_return_v2(CPUARMState *env, target_ulong frame_addr, abi_ulong *regspace; target_to_host_sigset(&host_set, &uc->tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &host_set, NULL); + set_sigmask(&host_set); if (restore_sigcontext(env, &uc->tuc_mcontext)) return 1; @@ -2077,7 +2086,7 @@ static long do_rt_sigreturn_v1(CPUARMState *env) } target_to_host_sigset(&host_set, &frame->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &host_set, NULL); + set_sigmask(&host_set); if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) { goto badframe; @@ -2453,7 +2462,7 @@ long do_sigreturn(CPUSPARCState *env) } target_to_host_sigset_internal(&host_set, &set); - do_sigprocmask(SIG_SETMASK, &host_set, NULL); + set_sigmask(&host_set); if (err) { goto segv_and_exit; @@ -2576,7 +2585,7 @@ void sparc64_set_context(CPUSPARCState *env) } } target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); } env->pc = pc; env->npc = npc; @@ -2993,7 +3002,7 @@ long do_sigreturn(CPUMIPSState *regs) } target_to_host_sigset_internal(&blocked, &target_set); - do_sigprocmask(SIG_SETMASK, &blocked, NULL); + set_sigmask(&blocked); restore_sigcontext(regs, &frame->sf_sc); @@ -3097,7 +3106,7 @@ long do_rt_sigreturn(CPUMIPSState *env) } target_to_host_sigset(&blocked, &frame->rs_uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &blocked, NULL); + set_sigmask(&blocked); restore_sigcontext(env, &frame->rs_uc.tuc_mcontext); @@ -3371,7 +3380,7 @@ long do_sigreturn(CPUSH4State *regs) goto badframe; target_to_host_sigset_internal(&blocked, &target_set); - do_sigprocmask(SIG_SETMASK, &blocked, NULL); + set_sigmask(&blocked); restore_sigcontext(regs, &frame->sc); @@ -3397,7 +3406,7 @@ long do_rt_sigreturn(CPUSH4State *regs) } target_to_host_sigset(&blocked, &frame->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &blocked, NULL); + set_sigmask(&blocked); restore_sigcontext(regs, &frame->uc.tuc_mcontext); @@ -3621,7 +3630,7 @@ long do_sigreturn(CPUMBState *env) __get_user(target_set.sig[i], &frame->extramask[i - 1]); } target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); restore_sigcontext(&frame->uc.tuc_mcontext, env); /* We got here through a sigreturn syscall, our path back is via an @@ -3792,7 +3801,7 @@ long do_sigreturn(CPUCRISState *env) __get_user(target_set.sig[i], &frame->extramask[i - 1]); } target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); restore_sigcontext(&frame->sc, env); unlock_user_struct(frame, frame_addr, 0); @@ -4284,7 +4293,7 @@ long do_sigreturn(CPUS390XState *env) __get_user(target_set.sig[0], &frame->sc.oldmask[0]); target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); /* ~_BLOCKABLE? */ + set_sigmask(&set); /* ~_BLOCKABLE? */ if (restore_sigregs(env, &frame->sregs)) { goto badframe; @@ -4310,7 +4319,7 @@ long do_rt_sigreturn(CPUS390XState *env) } target_to_host_sigset(&set, &frame->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &set, NULL); /* ~_BLOCKABLE? */ + set_sigmask(&set); /* ~_BLOCKABLE? */ if (restore_sigregs(env, &frame->uc.tuc_mcontext)) { goto badframe; @@ -4872,7 +4881,7 @@ long do_sigreturn(CPUPPCState *env) __get_user(set.sig[1], &sc->_unused[3]); #endif target_to_host_sigset_internal(&blocked, &set); - do_sigprocmask(SIG_SETMASK, &blocked, NULL); + set_sigmask(&blocked); __get_user(sr_addr, &sc->regs); if (!lock_user_struct(VERIFY_READ, sr, sr_addr, 1)) @@ -4913,7 +4922,7 @@ static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig) return 1; target_to_host_sigset_internal(&blocked, &set); - do_sigprocmask(SIG_SETMASK, &blocked, NULL); + set_sigmask(&blocked); restore_user_regs(env, mcp, sig); unlock_user_struct(mcp, mcp_addr, 1); @@ -5261,7 +5270,7 @@ long do_sigreturn(CPUM68KState *env) } target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); /* restore registers */ @@ -5287,7 +5296,7 @@ long do_rt_sigreturn(CPUM68KState *env) goto badframe; target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); /* restore registers */ @@ -5530,7 +5539,7 @@ long do_sigreturn(CPUAlphaState *env) __get_user(target_set.sig[0], &sc->sc_mask); target_to_host_sigset_internal(&set, &target_set); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); restore_sigcontext(env, sc); unlock_user_struct(sc, sc_addr, 0); @@ -5551,7 +5560,7 @@ long do_rt_sigreturn(CPUAlphaState *env) goto badframe; } target_to_host_sigset(&set, &frame->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); restore_sigcontext(env, &frame->uc.tuc_mcontext); if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe, @@ -5718,7 +5727,7 @@ long do_rt_sigreturn(CPUTLGState *env) goto badframe; } target_to_host_sigset(&set, &frame->uc.tuc_sigmask); - do_sigprocmask(SIG_SETMASK, &set, NULL); + set_sigmask(&set); restore_sigcontext(env, &frame->uc.tuc_mcontext); if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe, -- cgit v1.1 From b28a1f333a5875e6f48725efd19c76fc3d27d8d1 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 27 May 2016 15:51:47 +0100 Subject: linux-user: Define macro for size of host kernel sigset_t Some host syscalls take an argument specifying the size of a host kernel's sigset_t (which isn't necessarily the same as that of the host libc's type of that name). Instead of hardcoding _NSIG / 8 where we do this, define and use a SIGSET_T_SIZE macro. Signed-off-by: Peter Maydell Reviewed-by: Laurent Vivier Signed-off-by: Riku Voipio --- linux-user/syscall.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 9cc9a35..d2749a6 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -124,6 +124,10 @@ int __clone2(int (*fn)(void *), void *child_stack_base, #define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct linux_dirent [2]) #define VFAT_IOCTL_READDIR_SHORT _IOR('r', 2, struct linux_dirent [2]) +/* This is the size of the host kernel's sigset_t, needed where we make + * direct system calls that take a sigset_t pointer and a size. + */ +#define SIGSET_T_SIZE (_NSIG / 8) #undef _syscall0 #undef _syscall1 @@ -7862,7 +7866,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, /* Extract the two packed args for the sigset */ if (arg6) { sig_ptr = &sig; - sig.size = _NSIG / 8; + sig.size = SIGSET_T_SIZE; arg7 = lock_user(VERIFY_READ, arg6, sizeof(*arg7) * 2, 1); if (!arg7) { @@ -8916,7 +8920,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, set = NULL; } - ret = get_errno(sys_ppoll(pfd, nfds, timeout_ts, set, _NSIG/8)); + ret = get_errno(sys_ppoll(pfd, nfds, timeout_ts, + set, SIGSET_T_SIZE)); if (!is_error(ret) && arg3) { host_to_target_timespec(arg3, timeout_ts); -- cgit v1.1 From 2fe4fba115b5b9f7e6722720c57810e0fc64b9b5 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 27 May 2016 15:51:48 +0100 Subject: linux-user: Use safe_syscall for sigsuspend syscalls Use the safe_syscall wrapper for sigsuspend syscalls. This means that we will definitely deliver a signal that arrives before we do the sigsuspend call, rather than blocking first and delivering afterwards. Signed-off-by: Peter Maydell Reviewed-by: Laurent Vivier Signed-off-by: Riku Voipio --- linux-user/syscall.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index d2749a6..5bdfe2a 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -716,6 +716,7 @@ safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \ fd_set *, exceptfds, struct timespec *, timeout, void *, sig) safe_syscall6(int,futex,int *,uaddr,int,op,int,val, \ const struct timespec *,timeout,int *,uaddr2,int,val3) +safe_syscall2(int, rt_sigsuspend, sigset_t *, newset, size_t, sigsetsize) static inline int host_to_target_sock_type(int host_type) { @@ -7648,7 +7649,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, target_to_host_old_sigset(&set, p); unlock_user(p, arg1, 0); #endif - ret = get_errno(sigsuspend(&set)); + ret = get_errno(safe_rt_sigsuspend(&set, SIGSET_T_SIZE)); } break; #endif @@ -7659,7 +7660,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, goto efault; target_to_host_sigset(&set, p); unlock_user(p, arg1, 0); - ret = get_errno(sigsuspend(&set)); + ret = get_errno(safe_rt_sigsuspend(&set, SIGSET_T_SIZE)); } break; case TARGET_NR_rt_sigtimedwait: -- cgit v1.1 From 3d3efba020da1de57a715e2087cf761ed0ad0904 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 27 May 2016 15:51:49 +0100 Subject: linux-user: Fix race between multiple signals If multiple host signals are received in quick succession they would be queued in TaskState then delivered to the guest in spite of signals being supposed to be blocked by the guest signal handler's sa_mask. Fix this by decoupling the guest signal mask from the host signal mask, so we can have protected sections where all host signals are blocked. In particular we block signals from when host_signal_handler() queues a signal from the guest until process_pending_signals() has unqueued it. We also block signals while we are manipulating the guest signal mask in emulation of sigprocmask and similar syscalls. Blocking host signals also ensures the correct behaviour with respect to multiple threads and the overrun count of timer related signals. Alas blocking and queuing in qemu is still needed because of virtual processor exceptions, SIGSEGV and SIGBUS. Blocking signals inside process_pending_signals() protects against concurrency problems that would otherwise happen if host_signal_handler() ran and accessed the signal data structures while process_pending_signals() was manipulating them. Since we now track the guest signal mask separately from that of the host, the sigsuspend system calls must track the signal mask passed to them, because when we process signals as we leave the sigsuspend the guest signal mask in force is that passed to sigsuspend. Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-19-git-send-email-T.E.Baldwin99@members.leeds.ac.uk [PMM: make signal_pending a simple flag rather than a word with two flag bits; ensure we don't call block_signals() twice in sigreturn codepaths; document and assert() the guarantee that using do_sigprocmask() to get the current mask never fails; use the qemu atomics.h functions rather than raw volatile variable access; add extra commentary and documentation; block SIGSEGV/SIGBUS in block_signals() and in process_pending_signals() because they can't occur synchronously here; check the right do_sigprocmask() call for errors in ssetmask syscall; expand commit message; fixed sigsuspend() hanging] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/qemu.h | 50 +++++++++++++++- linux-user/signal.c | 163 +++++++++++++++++++++++++++++++++++---------------- linux-user/syscall.c | 73 ++++++++++++++++------- 3 files changed, 213 insertions(+), 73 deletions(-) diff --git a/linux-user/qemu.h b/linux-user/qemu.h index f09b750..5138289 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -123,14 +123,33 @@ typedef struct TaskState { #endif uint32_t stack_base; int used; /* non zero if used */ - bool sigsegv_blocked; /* SIGSEGV blocked by guest */ struct image_info *info; struct linux_binprm *bprm; struct emulated_sigtable sigtab[TARGET_NSIG]; struct sigqueue sigqueue_table[MAX_SIGQUEUE_SIZE]; /* siginfo queue */ struct sigqueue *first_free; /* first free siginfo queue entry */ - int signal_pending; /* non zero if a signal may be pending */ + /* This thread's signal mask, as requested by the guest program. + * The actual signal mask of this thread may differ: + * + we don't let SIGSEGV and SIGBUS be blocked while running guest code + * + sometimes we block all signals to avoid races + */ + sigset_t signal_mask; + /* The signal mask imposed by a guest sigsuspend syscall, if we are + * currently in the middle of such a syscall + */ + sigset_t sigsuspend_mask; + /* Nonzero if we're leaving a sigsuspend and sigsuspend_mask is valid. */ + int in_sigsuspend; + + /* Nonzero if process_pending_signals() needs to do something (either + * handle a pending signal or unblock signals). + * This flag is written from a signal handler so should be accessed via + * the atomic_read() and atomic_write() functions. (It is not accessed + * from multiple threads.) + */ + int signal_pending; + } __attribute__((aligned(16))) TaskState; extern char *exec_path; @@ -235,6 +254,12 @@ unsigned long init_guest_space(unsigned long host_start, * It's also OK to implement these with safe_syscall, though it will be * a little less efficient if a signal is delivered at the 'wrong' moment. * + * Some non-interruptible syscalls need to be handled using block_signals() + * to block signals for the duration of the syscall. This mainly applies + * to code which needs to modify the data structures used by the + * host_signal_handler() function and the functions it calls, including + * all syscalls which change the thread's signal mask. + * * (2) Interruptible syscalls * * These are guest syscalls that can be interrupted by signals and @@ -266,6 +291,8 @@ unsigned long init_guest_space(unsigned long host_start, * you make in the implementation returns either -TARGET_ERESTARTSYS or * EINTR though.) * + * block_signals() cannot be used for interruptible syscalls. + * * * How and why the safe_syscall implementation works: * @@ -352,6 +379,25 @@ long do_sigreturn(CPUArchState *env); long do_rt_sigreturn(CPUArchState *env); abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp); int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset); +/** + * block_signals: block all signals while handling this guest syscall + * + * Block all signals, and arrange that the signal mask is returned to + * its correct value for the guest before we resume execution of guest code. + * If this function returns non-zero, then the caller should immediately + * return -TARGET_ERESTARTSYS to the main loop, which will take the pending + * signal and restart execution of the syscall. + * If block_signals() returns zero, then the caller can continue with + * emulation of the system call knowing that no signals can be taken + * (and therefore that no race conditions will result). + * This should only be called once, because if it is called a second time + * it will always return non-zero. (Think of it like a mutex that can't + * be recursively locked.) + * Signals will be unblocked again by process_pending_signals(). + * + * Return value: non-zero if there was a pending signal, zero if not. + */ +int block_signals(void); /* Returns non zero if signal pending */ #ifdef TARGET_I386 /* vm86.c */ diff --git a/linux-user/signal.c b/linux-user/signal.c index 1b86a85..a89853d 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -190,61 +190,81 @@ void target_to_host_old_sigset(sigset_t *sigset, target_to_host_sigset(sigset, &d); } +int block_signals(void) +{ + TaskState *ts = (TaskState *)thread_cpu->opaque; + sigset_t set; + int pending; + + /* It's OK to block everything including SIGSEGV, because we won't + * run any further guest code before unblocking signals in + * process_pending_signals(). + */ + sigfillset(&set); + sigprocmask(SIG_SETMASK, &set, 0); + + pending = atomic_xchg(&ts->signal_pending, 1); + + return pending; +} + /* Wrapper for sigprocmask function * Emulates a sigprocmask in a safe way for the guest. Note that set and oldset - * are host signal set, not guest ones. This wraps the sigprocmask host calls - * that should be protected (calls originated from guest) + * are host signal set, not guest ones. Returns -TARGET_ERESTARTSYS if + * a signal was already pending and the syscall must be restarted, or + * 0 on success. + * If set is NULL, this is guaranteed not to fail. */ int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset) { - int ret; - sigset_t val; - sigset_t *temp = NULL; - CPUState *cpu = thread_cpu; - TaskState *ts = (TaskState *)cpu->opaque; - bool segv_was_blocked = ts->sigsegv_blocked; + TaskState *ts = (TaskState *)thread_cpu->opaque; + + if (oldset) { + *oldset = ts->signal_mask; + } if (set) { - bool has_sigsegv = sigismember(set, SIGSEGV); - val = *set; - temp = &val; + int i; - sigdelset(temp, SIGSEGV); + if (block_signals()) { + return -TARGET_ERESTARTSYS; + } switch (how) { case SIG_BLOCK: - if (has_sigsegv) { - ts->sigsegv_blocked = true; - } + sigorset(&ts->signal_mask, &ts->signal_mask, set); break; case SIG_UNBLOCK: - if (has_sigsegv) { - ts->sigsegv_blocked = false; + for (i = 1; i <= NSIG; ++i) { + if (sigismember(set, i)) { + sigdelset(&ts->signal_mask, i); + } } break; case SIG_SETMASK: - ts->sigsegv_blocked = has_sigsegv; + ts->signal_mask = *set; break; default: g_assert_not_reached(); } - } - - ret = sigprocmask(how, temp, oldset); - if (oldset && segv_was_blocked) { - sigaddset(oldset, SIGSEGV); + /* Silently ignore attempts to change blocking status of KILL or STOP */ + sigdelset(&ts->signal_mask, SIGKILL); + sigdelset(&ts->signal_mask, SIGSTOP); } - - return ret; + return 0; } #if !defined(TARGET_OPENRISC) && !defined(TARGET_UNICORE32) && \ !defined(TARGET_X86_64) -/* Just set the guest's signal mask to the specified value */ +/* Just set the guest's signal mask to the specified value; the + * caller is assumed to have called block_signals() already. + */ static void set_sigmask(const sigset_t *set) { - do_sigprocmask(SIG_SETMASK, set, NULL); + TaskState *ts = (TaskState *)thread_cpu->opaque; + + ts->signal_mask = *set; } #endif @@ -376,6 +396,7 @@ static int core_dump_signal(int sig) void signal_init(void) { + TaskState *ts = (TaskState *)thread_cpu->opaque; struct sigaction act; struct sigaction oact; int i, j; @@ -391,6 +412,9 @@ void signal_init(void) target_to_host_signal_table[j] = i; } + /* Set the signal mask from the host mask. */ + sigprocmask(0, 0, &ts->signal_mask); + /* set all host signal handlers. ALL signals are blocked during the handlers to serialize them. */ memset(sigact_table, 0, sizeof(sigact_table)); @@ -509,7 +533,7 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info) queue = gdb_queuesig (); handler = sigact_table[sig - 1]._sa_handler; - if (ts->sigsegv_blocked && sig == TARGET_SIGSEGV) { + if (sig == TARGET_SIGSEGV && sigismember(&ts->signal_mask, SIGSEGV)) { /* Guest has blocked SIGSEGV but we got one anyway. Assume this * is a forced SIGSEGV (ie one the kernel handles via force_sig_info * because it got a real MMU fault). A blocked SIGSEGV in that @@ -565,7 +589,7 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info) q->next = NULL; k->pending = 1; /* signal that a new signal is pending */ - ts->signal_pending = 1; + atomic_set(&ts->signal_pending, 1); return 1; /* indicates that the signal was queued */ } } @@ -583,6 +607,7 @@ static void host_signal_handler(int host_signum, siginfo_t *info, CPUArchState *env = thread_cpu->env_ptr; int sig; target_siginfo_t tinfo; + ucontext_t *uc = puc; /* the CPU emulator uses some host signals to detect exceptions, we forward to it some signals */ @@ -602,6 +627,16 @@ static void host_signal_handler(int host_signum, siginfo_t *info, host_to_target_siginfo_noswap(&tinfo, info); if (queue_signal(env, sig, &tinfo) == 1) { + /* Block host signals until target signal handler entered. We + * can't block SIGSEGV or SIGBUS while we're executing guest + * code in case the guest code provokes one in the window between + * now and it getting out to the main loop. Signals will be + * unblocked again in process_pending_signals(). + */ + sigfillset(&uc->uc_sigmask); + sigdelset(&uc->uc_sigmask, SIGSEGV); + sigdelset(&uc->uc_sigmask, SIGBUS); + /* interrupt the virtual CPU as soon as possible */ cpu_exit(thread_cpu); } @@ -2673,9 +2708,13 @@ void sparc64_get_context(CPUSPARCState *env) env->pc = env->npc; env->npc += 4; - err = 0; - - do_sigprocmask(0, NULL, &set); + /* If we're only reading the signal mask then do_sigprocmask() + * is guaranteed not to fail, which is important because we don't + * have any way to signal a failure or restart this operation since + * this is not a normal syscall. + */ + err = do_sigprocmask(0, NULL, &set); + assert(err == 0); host_to_target_sigset_internal(&target_set, &set); if (TARGET_NSIG_WORDS == 1) { __put_user(target_set.sig[0], @@ -5778,7 +5817,7 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig) { CPUState *cpu = ENV_GET_CPU(cpu_env); abi_ulong handler; - sigset_t set, old_set; + sigset_t set; target_sigset_t target_old_set; struct target_sigaction *sa; struct sigqueue *q; @@ -5801,7 +5840,7 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig) handler = sa->_sa_handler; } - if (ts->sigsegv_blocked && sig == TARGET_SIGSEGV) { + if (sig == TARGET_SIGSEGV && sigismember(&ts->signal_mask, SIGSEGV)) { /* Guest has blocked SIGSEGV but we got one anyway. Assume this * is a forced SIGSEGV (ie one the kernel handles via force_sig_info * because it got a real MMU fault), and treat as if default handler. @@ -5825,17 +5864,23 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig) force_sig(sig); } else { /* compute the blocked signals during the handler execution */ + sigset_t *blocked_set; + target_to_host_sigset(&set, &sa->sa_mask); /* SA_NODEFER indicates that the current signal should not be blocked during the handler */ if (!(sa->sa_flags & TARGET_SA_NODEFER)) sigaddset(&set, target_to_host_signal(sig)); - /* block signals in the handler using Linux */ - do_sigprocmask(SIG_BLOCK, &set, &old_set); /* save the previous blocked signal state to restore it at the end of the signal execution (see do_sigreturn) */ - host_to_target_sigset_internal(&target_old_set, &old_set); + host_to_target_sigset_internal(&target_old_set, &ts->signal_mask); + + /* block signals in the handler */ + blocked_set = ts->in_sigsuspend ? + &ts->sigsuspend_mask : &ts->signal_mask; + sigorset(&ts->signal_mask, blocked_set, &set); + ts->in_sigsuspend = 0; /* if the CPU is in VM86 mode, we restore the 32 bit values */ #if defined(TARGET_I386) && !defined(TARGET_X86_64) @@ -5869,18 +5914,38 @@ void process_pending_signals(CPUArchState *cpu_env) CPUState *cpu = ENV_GET_CPU(cpu_env); int sig; TaskState *ts = cpu->opaque; + sigset_t set; + sigset_t *blocked_set; - if (!ts->signal_pending) - return; - - /* FIXME: This is not threadsafe. */ - for(sig = 1; sig <= TARGET_NSIG; sig++) { - if (ts->sigtab[sig - 1].pending) { - handle_pending_signal(cpu_env, sig); - return; + while (atomic_read(&ts->signal_pending)) { + /* FIXME: This is not threadsafe. */ + sigfillset(&set); + sigprocmask(SIG_SETMASK, &set, 0); + + for (sig = 1; sig <= TARGET_NSIG; sig++) { + blocked_set = ts->in_sigsuspend ? + &ts->sigsuspend_mask : &ts->signal_mask; + + if (ts->sigtab[sig - 1].pending && + (!sigismember(blocked_set, + target_to_host_signal_table[sig]) + || sig == TARGET_SIGSEGV)) { + handle_pending_signal(cpu_env, sig); + /* Restart scan from the beginning */ + sig = 1; + } } - } - /* if no signal is pending, just return */ - ts->signal_pending = 0; - return; + + /* if no signal is pending, unblock signals and recheck (the act + * of unblocking might cause us to take another host signal which + * will set signal_pending again). + */ + atomic_set(&ts->signal_pending, 0); + ts->in_sigsuspend = 0; + set = ts->signal_mask; + sigdelset(&set, SIGSEGV); + sigdelset(&set, SIGBUS); + sigprocmask(SIG_SETMASK, &set, 0); + } + ts->in_sigsuspend = 0; } diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 5bdfe2a..639b328 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -5387,6 +5387,7 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, new_cpu->opaque = ts; ts->bprm = parent_ts->bprm; ts->info = parent_ts->info; + ts->signal_mask = parent_ts->signal_mask; nptl_flags = flags; flags &= ~CLONE_NPTL_FLAGS2; @@ -7482,9 +7483,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { sigset_t cur_set; abi_ulong target_set; - do_sigprocmask(0, NULL, &cur_set); - host_to_target_old_sigset(&target_set, &cur_set); - ret = target_set; + ret = do_sigprocmask(0, NULL, &cur_set); + if (!ret) { + host_to_target_old_sigset(&target_set, &cur_set); + ret = target_set; + } } break; #endif @@ -7493,12 +7496,20 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { sigset_t set, oset, cur_set; abi_ulong target_set = arg1; - do_sigprocmask(0, NULL, &cur_set); + /* We only have one word of the new mask so we must read + * the rest of it with do_sigprocmask() and OR in this word. + * We are guaranteed that a do_sigprocmask() that only queries + * the signal mask will not fail. + */ + ret = do_sigprocmask(0, NULL, &cur_set); + assert(!ret); target_to_host_old_sigset(&set, &target_set); sigorset(&set, &set, &cur_set); - do_sigprocmask(SIG_SETMASK, &set, &oset); - host_to_target_old_sigset(&target_set, &oset); - ret = target_set; + ret = do_sigprocmask(SIG_SETMASK, &set, &oset); + if (!ret) { + host_to_target_old_sigset(&target_set, &oset); + ret = target_set; + } } break; #endif @@ -7527,7 +7538,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, mask = arg2; target_to_host_old_sigset(&set, &mask); - ret = get_errno(do_sigprocmask(how, &set, &oldset)); + ret = do_sigprocmask(how, &set, &oldset); if (!is_error(ret)) { host_to_target_old_sigset(&mask, &oldset); ret = mask; @@ -7561,7 +7572,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, how = 0; set_ptr = NULL; } - ret = get_errno(do_sigprocmask(how, set_ptr, &oldset)); + ret = do_sigprocmask(how, set_ptr, &oldset); if (!is_error(ret) && arg3) { if (!(p = lock_user(VERIFY_WRITE, arg3, sizeof(target_sigset_t), 0))) goto efault; @@ -7601,7 +7612,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, how = 0; set_ptr = NULL; } - ret = get_errno(do_sigprocmask(how, set_ptr, &oldset)); + ret = do_sigprocmask(how, set_ptr, &oldset); if (!is_error(ret) && arg3) { if (!(p = lock_user(VERIFY_WRITE, arg3, sizeof(target_sigset_t), 0))) goto efault; @@ -7639,28 +7650,36 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #ifdef TARGET_NR_sigsuspend case TARGET_NR_sigsuspend: { - sigset_t set; + TaskState *ts = cpu->opaque; #if defined(TARGET_ALPHA) abi_ulong mask = arg1; - target_to_host_old_sigset(&set, &mask); + target_to_host_old_sigset(&ts->sigsuspend_mask, &mask); #else if (!(p = lock_user(VERIFY_READ, arg1, sizeof(target_sigset_t), 1))) goto efault; - target_to_host_old_sigset(&set, p); + target_to_host_old_sigset(&ts->sigsuspend_mask, p); unlock_user(p, arg1, 0); #endif - ret = get_errno(safe_rt_sigsuspend(&set, SIGSET_T_SIZE)); + ret = get_errno(safe_rt_sigsuspend(&ts->sigsuspend_mask, + SIGSET_T_SIZE)); + if (ret != -TARGET_ERESTARTSYS) { + ts->in_sigsuspend = 1; + } } break; #endif case TARGET_NR_rt_sigsuspend: { - sigset_t set; + TaskState *ts = cpu->opaque; if (!(p = lock_user(VERIFY_READ, arg1, sizeof(target_sigset_t), 1))) goto efault; - target_to_host_sigset(&set, p); + target_to_host_sigset(&ts->sigsuspend_mask, p); unlock_user(p, arg1, 0); - ret = get_errno(safe_rt_sigsuspend(&set, SIGSET_T_SIZE)); + ret = get_errno(safe_rt_sigsuspend(&ts->sigsuspend_mask, + SIGSET_T_SIZE)); + if (ret != -TARGET_ERESTARTSYS) { + ts->in_sigsuspend = 1; + } } break; case TARGET_NR_rt_sigtimedwait: @@ -7706,11 +7725,19 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; #ifdef TARGET_NR_sigreturn case TARGET_NR_sigreturn: - ret = do_sigreturn(cpu_env); + if (block_signals()) { + ret = -TARGET_ERESTARTSYS; + } else { + ret = do_sigreturn(cpu_env); + } break; #endif case TARGET_NR_rt_sigreturn: - ret = do_rt_sigreturn(cpu_env); + if (block_signals()) { + ret = -TARGET_ERESTARTSYS; + } else { + ret = do_rt_sigreturn(cpu_env); + } break; case TARGET_NR_sethostname: if (!(p = lock_user_string(arg1))) @@ -9764,9 +9791,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } mask = arg2; target_to_host_old_sigset(&set, &mask); - do_sigprocmask(how, &set, &oldset); - host_to_target_old_sigset(&mask, &oldset); - ret = mask; + ret = do_sigprocmask(how, &set, &oldset); + if (!ret) { + host_to_target_old_sigset(&mask, &oldset); + ret = mask; + } } break; #endif -- cgit v1.1 From c19c1578f8a9b894f5e368e35139620a98bf6a69 Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Fri, 27 May 2016 15:51:50 +0100 Subject: linux-user: Remove redundant default action check in queue_signal() Both queue_signal() and process_pending_signals() did check for default actions of signals, this is redundant and also causes fatal and stopping signals to incorrectly cause guest system calls to be interrupted. The code in queue_signal() is removed. Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-21-git-send-email-T.E.Baldwin99@members.leeds.ac.uk Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/signal.c | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index a89853d..2c6790d 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -525,46 +525,10 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info) TaskState *ts = cpu->opaque; struct emulated_sigtable *k; struct sigqueue *q, **pq; - abi_ulong handler; - int queue; trace_user_queue_signal(env, sig); k = &ts->sigtab[sig - 1]; - queue = gdb_queuesig (); - handler = sigact_table[sig - 1]._sa_handler; - if (sig == TARGET_SIGSEGV && sigismember(&ts->signal_mask, SIGSEGV)) { - /* Guest has blocked SIGSEGV but we got one anyway. Assume this - * is a forced SIGSEGV (ie one the kernel handles via force_sig_info - * because it got a real MMU fault). A blocked SIGSEGV in that - * situation is treated as if using the default handler. This is - * not correct if some other process has randomly sent us a SIGSEGV - * via kill(), but that is not easy to distinguish at this point, - * so we assume it doesn't happen. - */ - handler = TARGET_SIG_DFL; - } - - if (!queue && handler == TARGET_SIG_DFL) { - if (sig == TARGET_SIGTSTP || sig == TARGET_SIGTTIN || sig == TARGET_SIGTTOU) { - kill(getpid(),SIGSTOP); - return 0; - } else - /* default handler : ignore some signal. The other are fatal */ - if (sig != TARGET_SIGCHLD && - sig != TARGET_SIGURG && - sig != TARGET_SIGWINCH && - sig != TARGET_SIGCONT) { - force_sig(sig); - } else { - return 0; /* indicate ignored */ - } - } else if (!queue && handler == TARGET_SIG_IGN) { - /* ignore signal */ - return 0; - } else if (!queue && handler == TARGET_SIG_ERR) { - force_sig(sig); - } else { pq = &k->first; if (sig < TARGET_SIGRTMIN) { /* if non real time signal, we queue exactly one signal */ @@ -591,7 +555,6 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info) /* signal that a new signal is pending */ atomic_set(&ts->signal_pending, 1); return 1; /* indicates that the signal was queued */ - } } #ifndef HAVE_SAFE_SYSCALL -- cgit v1.1 From 8fdb9fef3d63b5e245a496e4999ebb599b9b9496 Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Fri, 27 May 2016 15:51:51 +0100 Subject: linux-user: Remove redundant gdb_queuesig() Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-22-git-send-email-T.E.Baldwin99@members.leeds.ac.uk Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- gdbstub.c | 13 ------------- include/exec/gdbstub.h | 1 - 2 files changed, 14 deletions(-) diff --git a/gdbstub.c b/gdbstub.c index 8155eed..b3101bf 100644 --- a/gdbstub.c +++ b/gdbstub.c @@ -1494,19 +1494,6 @@ void gdb_exit(CPUArchState *env, int code) #ifdef CONFIG_USER_ONLY int -gdb_queuesig (void) -{ - GDBState *s; - - s = gdbserver_state; - - if (gdbserver_fd < 0 || s->fd < 0) - return 0; - else - return 1; -} - -int gdb_handlesig(CPUState *cpu, int sig) { GDBState *s; diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h index 8e3f8d8..f9708bb 100644 --- a/include/exec/gdbstub.h +++ b/include/exec/gdbstub.h @@ -48,7 +48,6 @@ int use_gdb_syscalls(void); void gdb_set_stop_cpu(CPUState *cpu); void gdb_exit(CPUArchState *, int); #ifdef CONFIG_USER_ONLY -int gdb_queuesig (void); int gdb_handlesig(CPUState *, int); void gdb_signalled(CPUArchState *, int); void gdbserver_fork(CPUState *); -- cgit v1.1 From 907f5fddaa673ac3f6dc955df6eac2870e3603f4 Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Fri, 27 May 2016 15:51:52 +0100 Subject: linux-user: Remove real-time signal queuing As host signals are now blocked whenever guest signals are blocked, the queue of realtime signals is now in Linux. The QEMU queue is now redundant and can be removed. (We already did not queue non-RT signals, and none of the calls to queue_signal() except the one in host_signal_handler() pass an RT signal number.) Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-23-git-send-email-T.E.Baldwin99@members.leeds.ac.uk Reviewed-by: Peter Maydell [PMM: minor commit message tweak] Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/main.c | 7 ------ linux-user/qemu.h | 11 +-------- linux-user/signal.c | 70 ++++++++++------------------------------------------- 3 files changed, 14 insertions(+), 74 deletions(-) diff --git a/linux-user/main.c b/linux-user/main.c index b2bc6ab..b6da0ba 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -3794,14 +3794,7 @@ void stop_all_tasks(void) /* Assumes contents are already zeroed. */ void init_task_state(TaskState *ts) { - int i; - ts->used = 1; - ts->first_free = ts->sigqueue_table; - for (i = 0; i < MAX_SIGQUEUE_SIZE - 1; i++) { - ts->sigqueue_table[i].next = &ts->sigqueue_table[i + 1]; - } - ts->sigqueue_table[i].next = NULL; } CPUArchState *cpu_copy(CPUArchState *env) diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 5138289..b201f90 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -78,16 +78,9 @@ struct vm86_saved_state { #define MAX_SIGQUEUE_SIZE 1024 -struct sigqueue { - struct sigqueue *next; - target_siginfo_t info; -}; - struct emulated_sigtable { int pending; /* true if signal is pending */ - struct sigqueue *first; - struct sigqueue info; /* in order to always have memory for the - first signal, we put it here */ + target_siginfo_t info; }; /* NOTE: we force a big alignment so that the stack stored after is @@ -127,8 +120,6 @@ typedef struct TaskState { struct linux_binprm *bprm; struct emulated_sigtable sigtab[TARGET_NSIG]; - struct sigqueue sigqueue_table[MAX_SIGQUEUE_SIZE]; /* siginfo queue */ - struct sigqueue *first_free; /* first free siginfo queue entry */ /* This thread's signal mask, as requested by the guest program. * The actual signal mask of this thread may differ: * + we don't let SIGSEGV and SIGBUS be blocked while running guest code diff --git a/linux-user/signal.c b/linux-user/signal.c index 2c6790d..5db1c0b 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -441,27 +441,6 @@ void signal_init(void) } } -/* signal queue handling */ - -static inline struct sigqueue *alloc_sigqueue(CPUArchState *env) -{ - CPUState *cpu = ENV_GET_CPU(env); - TaskState *ts = cpu->opaque; - struct sigqueue *q = ts->first_free; - if (!q) - return NULL; - ts->first_free = q->next; - return q; -} - -static inline void free_sigqueue(CPUArchState *env, struct sigqueue *q) -{ - CPUState *cpu = ENV_GET_CPU(env); - TaskState *ts = cpu->opaque; - - q->next = ts->first_free; - ts->first_free = q; -} /* abort execution with signal */ static void QEMU_NORETURN force_sig(int target_sig) @@ -524,37 +503,20 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info) CPUState *cpu = ENV_GET_CPU(env); TaskState *ts = cpu->opaque; struct emulated_sigtable *k; - struct sigqueue *q, **pq; trace_user_queue_signal(env, sig); k = &ts->sigtab[sig - 1]; - pq = &k->first; - if (sig < TARGET_SIGRTMIN) { - /* if non real time signal, we queue exactly one signal */ - if (!k->pending) - q = &k->info; - else - return 0; - } else { - if (!k->pending) { - /* first signal */ - q = &k->info; - } else { - q = alloc_sigqueue(env); - if (!q) - return -EAGAIN; - while (*pq != NULL) - pq = &(*pq)->next; - } - } - *pq = q; - q->info = *info; - q->next = NULL; - k->pending = 1; - /* signal that a new signal is pending */ - atomic_set(&ts->signal_pending, 1); - return 1; /* indicates that the signal was queued */ + /* we queue exactly one signal */ + if (k->pending) { + return 0; + } + + k->info = *info; + k->pending = 1; + /* signal that a new signal is pending */ + atomic_set(&ts->signal_pending, 1); + return 1; /* indicates that the signal was queued */ } #ifndef HAVE_SAFE_SYSCALL @@ -5783,16 +5745,12 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig) sigset_t set; target_sigset_t target_old_set; struct target_sigaction *sa; - struct sigqueue *q; TaskState *ts = cpu->opaque; struct emulated_sigtable *k = &ts->sigtab[sig - 1]; trace_user_handle_signal(cpu_env, sig); /* dequeue signal */ - q = k->first; - k->first = q->next; - if (!k->first) - k->pending = 0; + k->pending = 0; sig = gdb_handlesig(cpu, sig); if (!sig) { @@ -5857,10 +5815,10 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig) #if defined(TARGET_ABI_MIPSN32) || defined(TARGET_ABI_MIPSN64) \ || defined(TARGET_OPENRISC) || defined(TARGET_TILEGX) /* These targets do not have traditional signals. */ - setup_rt_frame(sig, sa, &q->info, &target_old_set, cpu_env); + setup_rt_frame(sig, sa, &k->info, &target_old_set, cpu_env); #else if (sa->sa_flags & TARGET_SA_SIGINFO) - setup_rt_frame(sig, sa, &q->info, &target_old_set, cpu_env); + setup_rt_frame(sig, sa, &k->info, &target_old_set, cpu_env); else setup_frame(sig, sa, &target_old_set, cpu_env); #endif @@ -5868,8 +5826,6 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig) sa->_sa_handler = TARGET_SIG_DFL; } } - if (q != &k->info) - free_sigqueue(cpu_env, q); } void process_pending_signals(CPUArchState *cpu_env) -- cgit v1.1 From 655ed67c2a248cf0a887229d8492d6ddc0518545 Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Fri, 27 May 2016 15:51:53 +0100 Subject: linux-user: Queue synchronous signals separately If a synchronous signal and an asynchronous signal arrive near simultaneously, and the signal number of the asynchronous signal is lower than that of the synchronous signal the the handler for the asynchronous would be called first, and then the handler for the synchronous signal would be called within or after the first handler with an incorrect context. This is fixed by queuing synchronous signals separately. Note that this does risk delaying a asynchronous signal until the synchronous signal handler returns rather than handling the signal on another thread, but this seems unlikely to cause problems for real guest programs and is unavoidable unless we could guarantee to roll back and reexecute whatever guest instruction caused the synchronous signal (which would be a bit odd if we've already logged its execution, for instance, and would require careful analysis of all guest CPUs to check it was possible in all cases). Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-24-git-send-email-T.E.Baldwin99@members.leeds.ac.uk [PMM: added a comment] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/qemu.h | 1 + linux-user/signal.c | 74 ++++++++++++++++++++++++++++++----------------------- 2 files changed, 43 insertions(+), 32 deletions(-) diff --git a/linux-user/qemu.h b/linux-user/qemu.h index b201f90..6bd7b32 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -119,6 +119,7 @@ typedef struct TaskState { struct image_info *info; struct linux_binprm *bprm; + struct emulated_sigtable sync_signal; struct emulated_sigtable sigtab[TARGET_NSIG]; /* This thread's signal mask, as requested by the guest program. * The actual signal mask of this thread may differ: diff --git a/linux-user/signal.c b/linux-user/signal.c index 5db1c0b..f489028 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -502,18 +502,11 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info) { CPUState *cpu = ENV_GET_CPU(env); TaskState *ts = cpu->opaque; - struct emulated_sigtable *k; trace_user_queue_signal(env, sig); - k = &ts->sigtab[sig - 1]; - - /* we queue exactly one signal */ - if (k->pending) { - return 0; - } - k->info = *info; - k->pending = 1; + ts->sync_signal.info = *info; + ts->sync_signal.pending = sig; /* signal that a new signal is pending */ atomic_set(&ts->signal_pending, 1); return 1; /* indicates that the signal was queued */ @@ -530,9 +523,13 @@ static void host_signal_handler(int host_signum, siginfo_t *info, void *puc) { CPUArchState *env = thread_cpu->env_ptr; + CPUState *cpu = ENV_GET_CPU(env); + TaskState *ts = cpu->opaque; + int sig; target_siginfo_t tinfo; ucontext_t *uc = puc; + struct emulated_sigtable *k; /* the CPU emulator uses some host signals to detect exceptions, we forward to it some signals */ @@ -551,20 +548,23 @@ static void host_signal_handler(int host_signum, siginfo_t *info, rewind_if_in_safe_syscall(puc); host_to_target_siginfo_noswap(&tinfo, info); - if (queue_signal(env, sig, &tinfo) == 1) { - /* Block host signals until target signal handler entered. We - * can't block SIGSEGV or SIGBUS while we're executing guest - * code in case the guest code provokes one in the window between - * now and it getting out to the main loop. Signals will be - * unblocked again in process_pending_signals(). - */ - sigfillset(&uc->uc_sigmask); - sigdelset(&uc->uc_sigmask, SIGSEGV); - sigdelset(&uc->uc_sigmask, SIGBUS); + k = &ts->sigtab[sig - 1]; + k->info = tinfo; + k->pending = sig; + ts->signal_pending = 1; + + /* Block host signals until target signal handler entered. We + * can't block SIGSEGV or SIGBUS while we're executing guest + * code in case the guest code provokes one in the window between + * now and it getting out to the main loop. Signals will be + * unblocked again in process_pending_signals(). + */ + sigfillset(&uc->uc_sigmask); + sigdelset(&uc->uc_sigmask, SIGSEGV); + sigdelset(&uc->uc_sigmask, SIGBUS); - /* interrupt the virtual CPU as soon as possible */ - cpu_exit(thread_cpu); - } + /* interrupt the virtual CPU as soon as possible */ + cpu_exit(thread_cpu); } /* do_sigaltstack() returns target values and errnos. */ @@ -5761,14 +5761,6 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig) handler = sa->_sa_handler; } - if (sig == TARGET_SIGSEGV && sigismember(&ts->signal_mask, SIGSEGV)) { - /* Guest has blocked SIGSEGV but we got one anyway. Assume this - * is a forced SIGSEGV (ie one the kernel handles via force_sig_info - * because it got a real MMU fault), and treat as if default handler. - */ - handler = TARGET_SIG_DFL; - } - if (handler == TARGET_SIG_DFL) { /* default handler : ignore some signal. The other are job control or fatal */ if (sig == TARGET_SIGTSTP || sig == TARGET_SIGTTIN || sig == TARGET_SIGTTOU) { @@ -5841,14 +5833,32 @@ void process_pending_signals(CPUArchState *cpu_env) sigfillset(&set); sigprocmask(SIG_SETMASK, &set, 0); + sig = ts->sync_signal.pending; + if (sig) { + /* Synchronous signals are forced, + * see force_sig_info() and callers in Linux + * Note that not all of our queue_signal() calls in QEMU correspond + * to force_sig_info() calls in Linux (some are send_sig_info()). + * However it seems like a kernel bug to me to allow the process + * to block a synchronous signal since it could then just end up + * looping round and round indefinitely. + */ + if (sigismember(&ts->signal_mask, target_to_host_signal_table[sig]) + || sigact_table[sig - 1]._sa_handler == TARGET_SIG_IGN) { + sigdelset(&ts->signal_mask, target_to_host_signal_table[sig]); + sigact_table[sig - 1]._sa_handler = TARGET_SIG_DFL; + } + + handle_pending_signal(cpu_env, sig); + } + for (sig = 1; sig <= TARGET_NSIG; sig++) { blocked_set = ts->in_sigsuspend ? &ts->sigsuspend_mask : &ts->signal_mask; if (ts->sigtab[sig - 1].pending && (!sigismember(blocked_set, - target_to_host_signal_table[sig]) - || sig == TARGET_SIGSEGV)) { + target_to_host_signal_table[sig]))) { handle_pending_signal(cpu_env, sig); /* Restart scan from the beginning */ sig = 1; -- cgit v1.1 From ef6a778ea2af4ebcf08a84cc9314cfe7cf2a2299 Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Fri, 27 May 2016 15:51:54 +0100 Subject: linux-user: Block signals during sigaction() handling Block signals while emulating sigaction. This is a non-interruptible syscall, and using block_signals() avoids races where the host signal handler is invoked and tries to examine the signal handler data structures while we are updating them. Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-29-git-send-email-T.E.Baldwin99@members.leeds.ac.uk [PMM: expanded commit message] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/signal.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index f489028..b21d6bf 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -640,7 +640,7 @@ out: return ret; } -/* do_sigaction() return host values and errnos */ +/* do_sigaction() return target values and host errnos */ int do_sigaction(int sig, const struct target_sigaction *act, struct target_sigaction *oact) { @@ -649,8 +649,14 @@ int do_sigaction(int sig, const struct target_sigaction *act, int host_sig; int ret = 0; - if (sig < 1 || sig > TARGET_NSIG || sig == TARGET_SIGKILL || sig == TARGET_SIGSTOP) - return -EINVAL; + if (sig < 1 || sig > TARGET_NSIG || sig == TARGET_SIGKILL || sig == TARGET_SIGSTOP) { + return -TARGET_EINVAL; + } + + if (block_signals()) { + return -TARGET_ERESTARTSYS; + } + k = &sigact_table[sig - 1]; if (oact) { __put_user(k->_sa_handler, &oact->_sa_handler); -- cgit v1.1 From f59ec606104ade2443179231fc7a3cb98683ac85 Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Fri, 27 May 2016 15:51:55 +0100 Subject: linux-user: pause() should not pause if signal pending Fix races between signal handling and the pause syscall by reimplementing it using block_signals() and sigsuspend(). (Using safe_syscall(pause) would also work, except that the pause syscall doesn't exist on all architectures.) Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-28-git-send-email-T.E.Baldwin99@members.leeds.ac.uk [PMM: tweaked commit message] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 639b328..aa5517c 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -7059,7 +7059,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_pause /* not on alpha */ case TARGET_NR_pause: - ret = get_errno(pause()); + if (!block_signals()) { + sigsuspend(&((TaskState *)cpu->opaque)->signal_mask); + } + ret = -TARGET_EINTR; break; #endif #ifdef TARGET_NR_utime -- cgit v1.1 From a0995886e23323258d0612c71f8b0416a02806f2 Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Fri, 27 May 2016 15:51:56 +0100 Subject: linux-user: Restart exit() if signal pending Without this a signal could vanish on thread exit. Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-26-git-send-email-T.E.Baldwin99@members.leeds.ac.uk Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index aa5517c..f3061a9 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -6640,8 +6640,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, However in threaded applictions it is used for thread termination, and _exit_group is used for application termination. Do thread termination if we have more then one thread. */ - /* FIXME: This probably breaks if a signal arrives. We should probably - be disabling signals. */ + + if (block_signals()) { + ret = -TARGET_ERESTARTSYS; + break; + } + if (CPU_NEXT(first_cpu)) { TaskState *ts; -- cgit v1.1 From bef653d92e4e1c47c60f5c020ff6de69dba83478 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 27 May 2016 15:51:57 +0100 Subject: linux-user: Use safe_syscall for kill, tkill and tgkill syscalls Use the safe_syscall wrapper for the kill, tkill and tgkill syscalls. Without this, if a thread sent a SIGKILL to itself it could kill the thread before we had a chance to process a signal that arrived just before the SIGKILL, and that signal would get lost. We drop all the ifdeffery for tkill and tgkill, because every guest architecture we support implements them, and they've been in Linux since 2003 so we can assume the host headers define the __NR_tkill and __NR_tgkill constants. Signed-off-by: Peter Maydell Reviewed-by: Laurent Vivier Signed-off-by: Riku Voipio --- linux-user/syscall.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index f3061a9..c0d086c 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -191,8 +191,6 @@ static type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5, \ #define __NR_sys_getpriority __NR_getpriority #define __NR_sys_rt_sigqueueinfo __NR_rt_sigqueueinfo #define __NR_sys_syslog __NR_syslog -#define __NR_sys_tgkill __NR_tgkill -#define __NR_sys_tkill __NR_tkill #define __NR_sys_futex __NR_futex #define __NR_sys_inotify_init __NR_inotify_init #define __NR_sys_inotify_add_watch __NR_inotify_add_watch @@ -230,12 +228,6 @@ _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo, #endif _syscall3(int,sys_rt_sigqueueinfo,int,pid,int,sig,siginfo_t *,uinfo) _syscall3(int,sys_syslog,int,type,char*,bufp,int,len) -#if defined(TARGET_NR_tgkill) && defined(__NR_tgkill) -_syscall3(int,sys_tgkill,int,tgid,int,pid,int,sig) -#endif -#if defined(TARGET_NR_tkill) && defined(__NR_tkill) -_syscall2(int,sys_tkill,int,tid,int,sig) -#endif #ifdef __NR_exit_group _syscall1(int,exit_group,int,error_code) #endif @@ -717,6 +709,9 @@ safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \ safe_syscall6(int,futex,int *,uaddr,int,op,int,val, \ const struct timespec *,timeout,int *,uaddr2,int,val3) safe_syscall2(int, rt_sigsuspend, sigset_t *, newset, size_t, sigsetsize) +safe_syscall2(int, kill, pid_t, pid, int, sig) +safe_syscall2(int, tkill, int, tid, int, sig) +safe_syscall3(int, tgkill, int, tgid, int, pid, int, sig) static inline int host_to_target_sock_type(int host_type) { @@ -7169,7 +7164,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ret = 0; break; case TARGET_NR_kill: - ret = get_errno(kill(arg1, target_to_host_signal(arg2))); + ret = get_errno(safe_kill(arg1, target_to_host_signal(arg2))); break; #ifdef TARGET_NR_rename case TARGET_NR_rename: @@ -10405,18 +10400,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; #endif -#if defined(TARGET_NR_tkill) && defined(__NR_tkill) case TARGET_NR_tkill: - ret = get_errno(sys_tkill((int)arg1, target_to_host_signal(arg2))); + ret = get_errno(safe_tkill((int)arg1, target_to_host_signal(arg2))); break; -#endif -#if defined(TARGET_NR_tgkill) && defined(__NR_tgkill) case TARGET_NR_tgkill: - ret = get_errno(sys_tgkill((int)arg1, (int)arg2, + ret = get_errno(safe_tgkill((int)arg1, (int)arg2, target_to_host_signal(arg3))); - break; -#endif + break; #ifdef TARGET_NR_set_robust_list case TARGET_NR_set_robust_list: -- cgit v1.1 From 7d92d34ee4c7988f5ef6c8a5ed23d2c3e0837253 Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Fri, 27 May 2016 15:51:58 +0100 Subject: linux-user: Restart fork() if signals pending If there is a signal pending during fork() the signal handler will erroneously be called in both the parent and child, so handle any pending signals first. Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-20-git-send-email-T.E.Baldwin99@members.leeds.ac.uk Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index c0d086c..a2d591e 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -5437,6 +5437,11 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) { return -TARGET_EINVAL; } + + if (block_signals()) { + return -TARGET_ERESTARTSYS; + } + fork_start(); ret = fork(); if (ret == 0) { -- cgit v1.1 From a70dadc7f1a3e96a7179c6c3a6ccd1a0ea65760a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 27 May 2016 15:51:59 +0100 Subject: linux-user: Use both si_code and si_signo when converting siginfo_t The siginfo_t struct includes a union. The correct way to identify which fields of the union are relevant is complicated, because we have to use a combination of the si_code and si_signo to figure out which of the union's members are valid. (Within the host kernel it is always possible to tell, but the kernel carefully avoids giving userspace the high 16 bits of si_code, so we don't have the information to do this the easy way...) We therefore make our best guess, bearing in mind that a guest can spoof most of the si_codes via rt_sigqueueinfo() if it likes. Once we have made our guess, we record it in the top 16 bits of the si_code, so that tswap_siginfo() later can use it. tswap_siginfo() then strips these top bits out before writing si_code to the guest (sign-extending the lower bits). This fixes a bug where fields were sometimes wrong; in particular the LTP kill10 test went into an infinite loop because its signal handler got a si_pid value of 0 rather than the pid of the sending process. As part of this change, we switch to using __put_user() in the tswap_siginfo code which writes out the byteswapped values to the target memory, in case the target memory pointer is not sufficiently aligned for the host CPU's requirements. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/signal.c | 165 ++++++++++++++++++++++++++++++++-------------- linux-user/syscall_defs.h | 15 +++++ 2 files changed, 131 insertions(+), 49 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index b21d6bf..8ea0cbf 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -17,6 +17,7 @@ * along with this program; if not, see . */ #include "qemu/osdep.h" +#include "qemu/bitops.h" #include #include @@ -274,70 +275,129 @@ static inline void host_to_target_siginfo_noswap(target_siginfo_t *tinfo, const siginfo_t *info) { int sig = host_to_target_signal(info->si_signo); + int si_code = info->si_code; + int si_type; tinfo->si_signo = sig; tinfo->si_errno = 0; tinfo->si_code = info->si_code; - if (sig == TARGET_SIGILL || sig == TARGET_SIGFPE || sig == TARGET_SIGSEGV - || sig == TARGET_SIGBUS || sig == TARGET_SIGTRAP) { - /* Should never come here, but who knows. The information for - the target is irrelevant. */ - tinfo->_sifields._sigfault._addr = 0; - } else if (sig == TARGET_SIGIO) { - tinfo->_sifields._sigpoll._band = info->si_band; - tinfo->_sifields._sigpoll._fd = info->si_fd; - } else if (sig == TARGET_SIGCHLD) { - tinfo->_sifields._sigchld._pid = info->si_pid; - tinfo->_sifields._sigchld._uid = info->si_uid; - tinfo->_sifields._sigchld._status + /* This is awkward, because we have to use a combination of + * the si_code and si_signo to figure out which of the union's + * members are valid. (Within the host kernel it is always possible + * to tell, but the kernel carefully avoids giving userspace the + * high 16 bits of si_code, so we don't have the information to + * do this the easy way...) We therefore make our best guess, + * bearing in mind that a guest can spoof most of the si_codes + * via rt_sigqueueinfo() if it likes. + * + * Once we have made our guess, we record it in the top 16 bits of + * the si_code, so that tswap_siginfo() later can use it. + * tswap_siginfo() will strip these top bits out before writing + * si_code to the guest (sign-extending the lower bits). + */ + + switch (si_code) { + case SI_USER: + case SI_TKILL: + case SI_KERNEL: + /* Sent via kill(), tkill() or tgkill(), or direct from the kernel. + * These are the only unspoofable si_code values. + */ + tinfo->_sifields._kill._pid = info->si_pid; + tinfo->_sifields._kill._uid = info->si_uid; + si_type = QEMU_SI_KILL; + break; + default: + /* Everything else is spoofable. Make best guess based on signal */ + switch (sig) { + case TARGET_SIGCHLD: + tinfo->_sifields._sigchld._pid = info->si_pid; + tinfo->_sifields._sigchld._uid = info->si_uid; + tinfo->_sifields._sigchld._status = host_to_target_waitstatus(info->si_status); - tinfo->_sifields._sigchld._utime = info->si_utime; - tinfo->_sifields._sigchld._stime = info->si_stime; - } else if (sig >= TARGET_SIGRTMIN) { - tinfo->_sifields._rt._pid = info->si_pid; - tinfo->_sifields._rt._uid = info->si_uid; - /* XXX: potential problem if 64 bit */ - tinfo->_sifields._rt._sigval.sival_ptr + tinfo->_sifields._sigchld._utime = info->si_utime; + tinfo->_sifields._sigchld._stime = info->si_stime; + si_type = QEMU_SI_CHLD; + break; + case TARGET_SIGIO: + tinfo->_sifields._sigpoll._band = info->si_band; + tinfo->_sifields._sigpoll._fd = info->si_fd; + si_type = QEMU_SI_POLL; + break; + default: + /* Assume a sigqueue()/mq_notify()/rt_sigqueueinfo() source. */ + tinfo->_sifields._rt._pid = info->si_pid; + tinfo->_sifields._rt._uid = info->si_uid; + /* XXX: potential problem if 64 bit */ + tinfo->_sifields._rt._sigval.sival_ptr = (abi_ulong)(unsigned long)info->si_value.sival_ptr; + si_type = QEMU_SI_RT; + break; + } + break; } + + tinfo->si_code = deposit32(si_code, 16, 16, si_type); } static void tswap_siginfo(target_siginfo_t *tinfo, const target_siginfo_t *info) { - int sig = info->si_signo; - tinfo->si_signo = tswap32(sig); - tinfo->si_errno = tswap32(info->si_errno); - tinfo->si_code = tswap32(info->si_code); - - if (sig == TARGET_SIGILL || sig == TARGET_SIGFPE || sig == TARGET_SIGSEGV - || sig == TARGET_SIGBUS || sig == TARGET_SIGTRAP) { - tinfo->_sifields._sigfault._addr - = tswapal(info->_sifields._sigfault._addr); - } else if (sig == TARGET_SIGIO) { - tinfo->_sifields._sigpoll._band - = tswap32(info->_sifields._sigpoll._band); - tinfo->_sifields._sigpoll._fd = tswap32(info->_sifields._sigpoll._fd); - } else if (sig == TARGET_SIGCHLD) { - tinfo->_sifields._sigchld._pid - = tswap32(info->_sifields._sigchld._pid); - tinfo->_sifields._sigchld._uid - = tswap32(info->_sifields._sigchld._uid); - tinfo->_sifields._sigchld._status - = tswap32(info->_sifields._sigchld._status); - tinfo->_sifields._sigchld._utime - = tswapal(info->_sifields._sigchld._utime); - tinfo->_sifields._sigchld._stime - = tswapal(info->_sifields._sigchld._stime); - } else if (sig >= TARGET_SIGRTMIN) { - tinfo->_sifields._rt._pid = tswap32(info->_sifields._rt._pid); - tinfo->_sifields._rt._uid = tswap32(info->_sifields._rt._uid); - tinfo->_sifields._rt._sigval.sival_ptr - = tswapal(info->_sifields._rt._sigval.sival_ptr); + int si_type = extract32(info->si_code, 16, 16); + int si_code = sextract32(info->si_code, 0, 16); + + __put_user(info->si_signo, &tinfo->si_signo); + __put_user(info->si_errno, &tinfo->si_errno); + __put_user(si_code, &tinfo->si_code); + + /* We can use our internal marker of which fields in the structure + * are valid, rather than duplicating the guesswork of + * host_to_target_siginfo_noswap() here. + */ + switch (si_type) { + case QEMU_SI_KILL: + __put_user(info->_sifields._kill._pid, &tinfo->_sifields._kill._pid); + __put_user(info->_sifields._kill._uid, &tinfo->_sifields._kill._uid); + break; + case QEMU_SI_TIMER: + __put_user(info->_sifields._timer._timer1, + &tinfo->_sifields._timer._timer1); + __put_user(info->_sifields._timer._timer2, + &tinfo->_sifields._timer._timer2); + break; + case QEMU_SI_POLL: + __put_user(info->_sifields._sigpoll._band, + &tinfo->_sifields._sigpoll._band); + __put_user(info->_sifields._sigpoll._fd, + &tinfo->_sifields._sigpoll._fd); + break; + case QEMU_SI_FAULT: + __put_user(info->_sifields._sigfault._addr, + &tinfo->_sifields._sigfault._addr); + break; + case QEMU_SI_CHLD: + __put_user(info->_sifields._sigchld._pid, + &tinfo->_sifields._sigchld._pid); + __put_user(info->_sifields._sigchld._uid, + &tinfo->_sifields._sigchld._uid); + __put_user(info->_sifields._sigchld._status, + &tinfo->_sifields._sigchld._status); + __put_user(info->_sifields._sigchld._utime, + &tinfo->_sifields._sigchld._utime); + __put_user(info->_sifields._sigchld._stime, + &tinfo->_sifields._sigchld._stime); + break; + case QEMU_SI_RT: + __put_user(info->_sifields._rt._pid, &tinfo->_sifields._rt._pid); + __put_user(info->_sifields._rt._uid, &tinfo->_sifields._rt._uid); + __put_user(info->_sifields._rt._sigval.sival_ptr, + &tinfo->_sifields._rt._sigval.sival_ptr); + break; + default: + g_assert_not_reached(); } } - void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info) { host_to_target_siginfo_noswap(tinfo, info); @@ -505,6 +565,13 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info) trace_user_queue_signal(env, sig); + /* Currently all callers define siginfo structures which + * use the _sifields._sigfault union member, so we can + * set the type here. If that changes we should push this + * out so the si_type is passed in by callers. + */ + info->si_code = deposit32(info->si_code, 16, 16, QEMU_SI_FAULT); + ts->sync_signal.info = *info; ts->sync_signal.pending = sig; /* signal that a new signal is pending */ diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index 34af15a..124754f 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -673,6 +673,21 @@ typedef struct { #define TARGET_SI_PAD_SIZE ((TARGET_SI_MAX_SIZE - TARGET_SI_PREAMBLE_SIZE) / sizeof(int)) +/* Within QEMU the top 16 bits of si_code indicate which of the parts of + * the union in target_siginfo is valid. This only applies between + * host_to_target_siginfo_noswap() and tswap_siginfo(); it does not + * appear either within host siginfo_t or in target_siginfo structures + * which we get from the guest userspace program. (The Linux kernel + * does a similar thing with using the top bits for its own internal + * purposes but not letting them be visible to userspace.) + */ +#define QEMU_SI_KILL 0 +#define QEMU_SI_TIMER 1 +#define QEMU_SI_POLL 2 +#define QEMU_SI_FAULT 3 +#define QEMU_SI_CHLD 4 +#define QEMU_SI_RT 5 + typedef struct target_siginfo { #ifdef TARGET_MIPS int si_signo; -- cgit v1.1 From 90c0f080fe6fdd8b18691e6e38c853c8a996ad92 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 27 May 2016 15:52:01 +0100 Subject: linux-user: Avoid possible misalignment in target_to_host_siginfo() Reimplement target_to_host_siginfo() to use __get_user(), which handles possibly misaligned source guest structures correctly. Reviewed-by: Laurent Vivier Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/signal.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index 8ea0cbf..61c1145 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -408,13 +408,18 @@ void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info) /* XXX: find a solution for 64 bit (additional malloced data is needed) */ void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo) { - info->si_signo = tswap32(tinfo->si_signo); - info->si_errno = tswap32(tinfo->si_errno); - info->si_code = tswap32(tinfo->si_code); - info->si_pid = tswap32(tinfo->_sifields._rt._pid); - info->si_uid = tswap32(tinfo->_sifields._rt._uid); - info->si_value.sival_ptr = - (void *)(long)tswapal(tinfo->_sifields._rt._sigval.sival_ptr); + /* This conversion is used only for the rt_sigqueueinfo syscall, + * and so we know that the _rt fields are the valid ones. + */ + abi_ulong sival_ptr; + + __get_user(info->si_signo, &tinfo->si_signo); + __get_user(info->si_errno, &tinfo->si_errno); + __get_user(info->si_code, &tinfo->si_code); + __get_user(info->si_pid, &tinfo->_sifields._rt._pid); + __get_user(info->si_uid, &tinfo->_sifields._rt._uid); + __get_user(sival_ptr, &tinfo->_sifields._rt._sigval.sival_ptr); + info->si_value.sival_ptr = (void *)(long)sival_ptr; } static int fatal_signal (int sig) -- cgit v1.1 From 9e024732f53b368abdd578e1795bf3d2779ea88a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:56:54 +0100 Subject: linux-user: provide frame information in x86-64 safe_syscall Use cfi directives in the x86-64 safe_syscall to allow gdb to get backtraces right from within it. (In particular this will be quite a common situation if the user interrupts QEMU while it's in a blocked safe-syscall: at the point of the syscall insn RBP is in use for something else, and so gdb can't find the frame then without assistance.) Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Reviewed-by: Richard Henderson Signed-off-by: Riku Voipio --- linux-user/host/x86_64/safe-syscall.inc.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/linux-user/host/x86_64/safe-syscall.inc.S b/linux-user/host/x86_64/safe-syscall.inc.S index dde434c..e09368d 100644 --- a/linux-user/host/x86_64/safe-syscall.inc.S +++ b/linux-user/host/x86_64/safe-syscall.inc.S @@ -24,6 +24,7 @@ * -1-and-errno-set convention is done by the calling wrapper. */ safe_syscall_base: + .cfi_startproc /* This saves a frame pointer and aligns the stack for the syscall. * (It's unclear if the syscall ABI has the same stack alignment * requirements as the userspace function call ABI, but better safe than @@ -31,6 +32,8 @@ safe_syscall_base: * does not list any ABI differences regarding stack alignment.) */ push %rbp + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset rbp, 0 /* The syscall calling convention isn't the same as the * C one: @@ -70,12 +73,19 @@ safe_syscall_start: safe_syscall_end: /* code path for having successfully executed the syscall */ pop %rbp + .cfi_remember_state + .cfi_def_cfa_offset 8 + .cfi_restore rbp ret return_ERESTARTSYS: /* code path when we didn't execute the syscall */ + .cfi_restore_state mov $-TARGET_ERESTARTSYS, %rax pop %rbp + .cfi_def_cfa_offset 8 + .cfi_restore rbp ret + .cfi_endproc .size safe_syscall_base, .-safe_syscall_base -- cgit v1.1 From e0156a9dc43daea13b06b4c0edb755cc8f92dfdf Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 31 May 2016 15:45:09 +0100 Subject: linux-user: Fix handling of arm_fadvise64_64 syscall 32-bit ARM has an odd variant of the fadvise syscall which has rearranged arguments, which we try to implement. Unfortunately we got the rearrangement wrong. This is a six-argument syscall whose arguments are: * fd * advise parameter * offset high half * offset low half * len high half * len low half Stop trying to share code with the standard fadvise syscalls, and just implement the syscall with the correct argument order. Signed-off-by: Peter Maydell Reviewed-by: Laurent Vivier Signed-off-by: Riku Voipio --- linux-user/syscall.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index a2d591e..8c08e7c 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -9970,18 +9970,18 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_arm_fadvise64_64 case TARGET_NR_arm_fadvise64_64: - { - /* - * arm_fadvise64_64 looks like fadvise64_64 but - * with different argument order - */ - abi_long temp; - temp = arg3; - arg3 = arg4; - arg4 = temp; - } + /* arm_fadvise64_64 looks like fadvise64_64 but + * with different argument order: fd, advice, offset, len + * rather than the usual fd, offset, len, advice. + * Note that offset and len are both 64-bit so appear as + * pairs of 32-bit registers. + */ + ret = posix_fadvise(arg1, target_offset64(arg3, arg4), + target_offset64(arg5, arg6), arg2); + ret = -host_to_target_errno(ret); + break; #endif -#if defined(TARGET_NR_fadvise64_64) || defined(TARGET_NR_arm_fadvise64_64) || defined(TARGET_NR_fadvise64) +#if defined(TARGET_NR_fadvise64_64) || defined(TARGET_NR_fadvise64) #ifdef TARGET_NR_fadvise64_64 case TARGET_NR_fadvise64_64: #endif -- cgit v1.1 From badd3cd8805de3b5f8c76b6a00612ed62a71eff8 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 31 May 2016 15:45:10 +0100 Subject: linux-user: Fix NR_fadvise64 and NR_fadvise64_64 for 32-bit guests Fix errors in the implementation of NR_fadvise64 and NR_fadvise64_64 for 32-bit guests, which pass their off_t values in register pairs. We can't use the 64-bit code path for this, so split out the 32-bit cases, so that we can correctly handle the "only offset is 64-bit" and "both offset and length are 64-bit" syscall flavours, and "uses aligned register pairs" and "does not" flavours of target. Signed-off-by: Peter Maydell Reviewed-by: Laurent Vivier Signed-off-by: Riku Voipio --- linux-user/syscall.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 8c08e7c..5cff9f7 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -9981,6 +9981,44 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ret = -host_to_target_errno(ret); break; #endif + +#if TARGET_ABI_BITS == 32 + +#ifdef TARGET_NR_fadvise64_64 + case TARGET_NR_fadvise64_64: + /* 6 args: fd, offset (high, low), len (high, low), advice */ + if (regpairs_aligned(cpu_env)) { + /* offset is in (3,4), len in (5,6) and advice in 7 */ + arg2 = arg3; + arg3 = arg4; + arg4 = arg5; + arg5 = arg6; + arg6 = arg7; + } + ret = -host_to_target_errno(posix_fadvise(arg1, + target_offset64(arg2, arg3), + target_offset64(arg4, arg5), + arg6)); + break; +#endif + +#ifdef TARGET_NR_fadvise64 + case TARGET_NR_fadvise64: + /* 5 args: fd, offset (high, low), len, advice */ + if (regpairs_aligned(cpu_env)) { + /* offset is in (3,4), len in 5 and advice in 6 */ + arg2 = arg3; + arg3 = arg4; + arg4 = arg5; + arg5 = arg6; + } + ret = -host_to_target_errno(posix_fadvise(arg1, + target_offset64(arg2, arg3), + arg4, arg5)); + break; +#endif + +#else /* not a 32-bit ABI */ #if defined(TARGET_NR_fadvise64_64) || defined(TARGET_NR_fadvise64) #ifdef TARGET_NR_fadvise64_64 case TARGET_NR_fadvise64_64: @@ -10000,6 +10038,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ret = -posix_fadvise(arg1, arg2, arg3, arg4); break; #endif +#endif /* end of 64-bit ABI fadvise handling */ + #ifdef TARGET_NR_madvise case TARGET_NR_madvise: /* A straight passthrough may not be safe because qemu sometimes -- cgit v1.1 From 977d8241c10de4160a9377efd496395c98b7dee9 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 31 May 2016 15:45:11 +0100 Subject: linux-user: Fix error conversion in 64-bit fadvise syscall Fix a missing host-to-target errno conversion in the 64-bit fadvise syscall emulation. Signed-off-by: Peter Maydell Reviewed-by: Laurent Vivier Signed-off-by: Riku Voipio --- linux-user/syscall.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 5cff9f7..8f8d9db 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -10035,8 +10035,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, default: break; } #endif - ret = -posix_fadvise(arg1, arg2, arg3, arg4); - break; + ret = -host_to_target_errno(posix_fadvise(arg1, arg2, arg3, arg4)); + break; #endif #endif /* end of 64-bit ABI fadvise handling */ -- cgit v1.1 From 918c03ed9ac583eb9d5c33345a814291e1dd2e87 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:02 +0100 Subject: linux-user: Use safe_syscall wrapper for readv and writev syscalls Use the safe_syscall wrapper for readv and writev syscalls. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 8f8d9db..e0c49cc 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -712,6 +712,8 @@ safe_syscall2(int, rt_sigsuspend, sigset_t *, newset, size_t, sigsetsize) safe_syscall2(int, kill, pid_t, pid, int, sig) safe_syscall2(int, tkill, int, tid, int, sig) safe_syscall3(int, tgkill, int, tgid, int, pid, int, sig) +safe_syscall3(ssize_t, readv, int, fd, const struct iovec *, iov, int, iovcnt) +safe_syscall3(ssize_t, writev, int, fd, const struct iovec *, iov, int, iovcnt) static inline int host_to_target_sock_type(int host_type) { @@ -8986,7 +8988,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { struct iovec *vec = lock_iovec(VERIFY_WRITE, arg2, arg3, 0); if (vec != NULL) { - ret = get_errno(readv(arg1, vec, arg3)); + ret = get_errno(safe_readv(arg1, vec, arg3)); unlock_iovec(vec, arg2, arg3, 1); } else { ret = -host_to_target_errno(errno); @@ -8997,7 +8999,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { struct iovec *vec = lock_iovec(VERIFY_READ, arg2, arg3, 1); if (vec != NULL) { - ret = get_errno(writev(arg1, vec, arg3)); + ret = get_errno(safe_writev(arg1, vec, arg3)); unlock_iovec(vec, arg2, arg3, 0); } else { ret = -host_to_target_errno(errno); -- cgit v1.1 From 2a3c7619288af9cfcc09a233dce911bf80849dfb Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:03 +0100 Subject: linux-user: Use safe_syscall wrapper for connect syscall Use the safe_syscall wrapper for the connect syscall. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index e0c49cc..b363944 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -714,6 +714,8 @@ safe_syscall2(int, tkill, int, tid, int, sig) safe_syscall3(int, tgkill, int, tgid, int, pid, int, sig) safe_syscall3(ssize_t, readv, int, fd, const struct iovec *, iov, int, iovcnt) safe_syscall3(ssize_t, writev, int, fd, const struct iovec *, iov, int, iovcnt) +safe_syscall3(int, connect, int, fd, const struct sockaddr *, addr, + socklen_t, addrlen) static inline int host_to_target_sock_type(int host_type) { @@ -2859,7 +2861,7 @@ static abi_long do_connect(int sockfd, abi_ulong target_addr, if (ret) return ret; - return get_errno(connect(sockfd, addr, addrlen)); + return get_errno(safe_connect(sockfd, addr, addrlen)); } /* do_sendrecvmsg_locked() Must return target values and target errnos. */ -- cgit v1.1 From 666875306e03e1f94e1d4c808502585c10abc69a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:04 +0100 Subject: linux-user: Use safe_syscall wrapper for send* and recv* syscalls Use the safe_syscall wrapper for the send, sendto, sendmsg, recv, recvfrom and recvmsg syscalls. RV: adjusted to apply Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index b363944..bcae62d 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -716,6 +716,12 @@ safe_syscall3(ssize_t, readv, int, fd, const struct iovec *, iov, int, iovcnt) safe_syscall3(ssize_t, writev, int, fd, const struct iovec *, iov, int, iovcnt) safe_syscall3(int, connect, int, fd, const struct sockaddr *, addr, socklen_t, addrlen) +safe_syscall6(ssize_t, sendto, int, fd, const void *, buf, size_t, len, + int, flags, const struct sockaddr *, addr, socklen_t, addrlen) +safe_syscall6(ssize_t, recvfrom, int, fd, void *, buf, size_t, len, + int, flags, struct sockaddr *, addr, socklen_t *, addrlen) +safe_syscall3(ssize_t, sendmsg, int, fd, const struct msghdr *, msg, int, flags) +safe_syscall3(ssize_t, recvmsg, int, fd, struct msghdr *, msg, int, flags) static inline int host_to_target_sock_type(int host_type) { @@ -2910,10 +2916,10 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp, ret = target_to_host_cmsg(&msg, msgp); } if (ret == 0) { - ret = get_errno(sendmsg(fd, &msg, flags)); + ret = get_errno(safe_sendmsg(fd, &msg, flags)); } } else { - ret = get_errno(recvmsg(fd, &msg, flags)); + ret = get_errno(safe_recvmsg(fd, &msg, flags)); if (!is_error(ret)) { len = ret; if (fd_trans_host_to_target_data(fd)) { @@ -3162,9 +3168,9 @@ static abi_long do_sendto(int fd, abi_ulong msg, size_t len, int flags, unlock_user(host_msg, msg, 0); return ret; } - ret = get_errno(sendto(fd, host_msg, len, flags, addr, addrlen)); + ret = get_errno(safe_sendto(fd, host_msg, len, flags, addr, addrlen)); } else { - ret = get_errno(send(fd, host_msg, len, flags)); + ret = get_errno(safe_sendto(fd, host_msg, len, flags, NULL, 0)); } unlock_user(host_msg, msg, 0); return ret; @@ -3193,10 +3199,11 @@ static abi_long do_recvfrom(int fd, abi_ulong msg, size_t len, int flags, goto fail; } addr = alloca(addrlen); - ret = get_errno(recvfrom(fd, host_msg, len, flags, addr, &addrlen)); + ret = get_errno(safe_recvfrom(fd, host_msg, len, flags, + addr, &addrlen)); } else { addr = NULL; /* To keep compiler quiet. */ - ret = get_errno(qemu_recv(fd, host_msg, len, flags)); + ret = get_errno(safe_recvfrom(fd, host_msg, len, flags, NULL, 0)); } if (!is_error(ret)) { if (target_addr) { -- cgit v1.1 From 89f9fe4452848386e9d0aacd84ac681944051b78 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:05 +0100 Subject: linux-user: Use safe_syscall wrapper for msgsnd and msgrcv Use the safe_syscall wrapper for msgsnd and msgrcv syscalls. This is made slightly awkward by some host architectures providing only a single 'ipc' syscall rather than separate syscalls per operation; we provide safe_msgsnd() and safe_msgrcv() as wrappers around safe_ipc() to handle this if needed. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index bcae62d..b41d269 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -722,6 +722,34 @@ safe_syscall6(ssize_t, recvfrom, int, fd, void *, buf, size_t, len, int, flags, struct sockaddr *, addr, socklen_t *, addrlen) safe_syscall3(ssize_t, sendmsg, int, fd, const struct msghdr *, msg, int, flags) safe_syscall3(ssize_t, recvmsg, int, fd, struct msghdr *, msg, int, flags) +#ifdef __NR_msgsnd +safe_syscall4(int, msgsnd, int, msgid, const void *, msgp, size_t, sz, + int, flags) +safe_syscall5(int, msgrcv, int, msgid, void *, msgp, size_t, sz, + long, msgtype, int, flags) +#else +/* This host kernel architecture uses a single ipc syscall; fake up + * wrappers for the sub-operations to hide this implementation detail. + * Annoyingly we can't include linux/ipc.h to get the constant definitions + * for the call parameter because some structs in there conflict with the + * sys/ipc.h ones. So we just define them here, and rely on them being + * the same for all host architectures. + */ +#define Q_MSGSND 11 +#define Q_MSGRCV 12 +#define Q_IPCCALL(VERSION, OP) ((VERSION) << 16 | (OP)) + +safe_syscall6(int, ipc, int, call, long, first, long, second, long, third, + void *, ptr, long, fifth) +static int safe_msgsnd(int msgid, const void *msgp, size_t sz, int flags) +{ + return safe_ipc(Q_IPCCALL(0, Q_MSGSND), msgid, sz, flags, (void *)msgp, 0); +} +static int safe_msgrcv(int msgid, void *msgp, size_t sz, long type, int flags) +{ + return safe_ipc(Q_IPCCALL(1, Q_MSGRCV), msgid, sz, flags, msgp, type); +} +#endif static inline int host_to_target_sock_type(int host_type) { @@ -3796,7 +3824,7 @@ static inline abi_long do_msgsnd(int msqid, abi_long msgp, } host_mb->mtype = (abi_long) tswapal(target_mb->mtype); memcpy(host_mb->mtext, target_mb->mtext, msgsz); - ret = get_errno(msgsnd(msqid, host_mb, msgsz, msgflg)); + ret = get_errno(safe_msgsnd(msqid, host_mb, msgsz, msgflg)); g_free(host_mb); unlock_user_struct(target_mb, msgp, 0); @@ -3824,7 +3852,7 @@ static inline abi_long do_msgrcv(int msqid, abi_long msgp, ret = -TARGET_ENOMEM; goto end; } - ret = get_errno(msgrcv(msqid, host_mb, msgsz, msgtyp, msgflg)); + ret = get_errno(safe_msgrcv(msqid, host_mb, msgsz, msgtyp, msgflg)); if (ret > 0) { abi_ulong target_mtext_addr = msgp + sizeof(abi_ulong); -- cgit v1.1 From d40ecd66182c9dbcf320c63a359917968cc9b73c Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:06 +0100 Subject: linux-user: Use safe_syscall wrapper for mq_timedsend and mq_timedreceive Use the safe_syscall wrapper for mq_timedsend and mq_timedreceive syscalls. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index b41d269..294e5ee 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -750,6 +750,12 @@ static int safe_msgrcv(int msgid, void *msgp, size_t sz, long type, int flags) return safe_ipc(Q_IPCCALL(1, Q_MSGRCV), msgid, sz, flags, msgp, type); } #endif +#if defined(TARGET_NR_mq_open) && defined(__NR_mq_open) +safe_syscall5(int, mq_timedsend, int, mqdes, const char *, msg_ptr, + size_t, len, unsigned, prio, const struct timespec *, timeout) +safe_syscall5(int, mq_timedreceive, int, mqdes, char *, msg_ptr, + size_t, len, unsigned *, prio, const struct timespec *, timeout) +#endif static inline int host_to_target_sock_type(int host_type) { @@ -10593,11 +10599,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, p = lock_user (VERIFY_READ, arg2, arg3, 1); if (arg5 != 0) { target_to_host_timespec(&ts, arg5); - ret = get_errno(mq_timedsend(arg1, p, arg3, arg4, &ts)); + ret = get_errno(safe_mq_timedsend(arg1, p, arg3, arg4, &ts)); host_to_target_timespec(arg5, &ts); + } else { + ret = get_errno(safe_mq_timedsend(arg1, p, arg3, arg4, NULL)); } - else - ret = get_errno(mq_send(arg1, p, arg3, arg4)); unlock_user (p, arg2, arg3); } break; @@ -10610,11 +10616,13 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, p = lock_user (VERIFY_READ, arg2, arg3, 1); if (arg5 != 0) { target_to_host_timespec(&ts, arg5); - ret = get_errno(mq_timedreceive(arg1, p, arg3, &prio, &ts)); + ret = get_errno(safe_mq_timedreceive(arg1, p, arg3, + &prio, &ts)); host_to_target_timespec(arg5, &ts); + } else { + ret = get_errno(safe_mq_timedreceive(arg1, p, arg3, + &prio, NULL)); } - else - ret = get_errno(mq_receive(arg1, p, arg3, &prio)); unlock_user (p, arg2, arg3); if (arg4 != 0) put_user_u32(prio, arg4); -- cgit v1.1 From 2a8459892f83ad563efc8a3b29db766ebe986447 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:07 +0100 Subject: linux-user: Use safe_syscall wrapper for flock Use the safe_syscall wrapper for the flock syscall. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 294e5ee..7d88009 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -722,6 +722,7 @@ safe_syscall6(ssize_t, recvfrom, int, fd, void *, buf, size_t, len, int, flags, struct sockaddr *, addr, socklen_t *, addrlen) safe_syscall3(ssize_t, sendmsg, int, fd, const struct msghdr *, msg, int, flags) safe_syscall3(ssize_t, recvmsg, int, fd, struct msghdr *, msg, int, flags) +safe_syscall2(int, flock, int, fd, int, operation) #ifdef __NR_msgsnd safe_syscall4(int, msgsnd, int, msgid, const void *, msgp, size_t, sz, int, flags) @@ -9025,7 +9026,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, case TARGET_NR_flock: /* NOTE: the flock constant seems to be the same for every Linux platform */ - ret = get_errno(flock(arg1, arg2)); + ret = get_errno(safe_flock(arg1, arg2)); break; case TARGET_NR_readv: { -- cgit v1.1 From b3f823306829a717b072548e630e0bd769706802 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:08 +0100 Subject: linux-user: Use safe_syscall wrapper for rt_sigtimedwait syscall Use the safe_syscall wrapper for the rt_sigtimedwait syscall. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 7d88009..cecd96c 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -723,6 +723,8 @@ safe_syscall6(ssize_t, recvfrom, int, fd, void *, buf, size_t, len, safe_syscall3(ssize_t, sendmsg, int, fd, const struct msghdr *, msg, int, flags) safe_syscall3(ssize_t, recvmsg, int, fd, struct msghdr *, msg, int, flags) safe_syscall2(int, flock, int, fd, int, operation) +safe_syscall4(int, rt_sigtimedwait, const sigset_t *, these, siginfo_t *, uinfo, + const struct timespec *, uts, size_t, sigsetsize) #ifdef __NR_msgsnd safe_syscall4(int, msgsnd, int, msgid, const void *, msgp, size_t, sz, int, flags) @@ -7751,7 +7753,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } else { puts = NULL; } - ret = get_errno(sigtimedwait(&set, &uinfo, puts)); + ret = get_errno(safe_rt_sigtimedwait(&set, &uinfo, puts, + SIGSET_T_SIZE)); if (!is_error(ret)) { if (arg2) { p = lock_user(VERIFY_WRITE, arg2, sizeof(target_siginfo_t), -- cgit v1.1 From 9e518226f431454ce2d4d01051593b32515b3b55 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:09 +0100 Subject: linux-user: Use safe_syscall wrapper for sleep syscalls Use the safe_syscall wrapper for the clock_nanosleep and nanosleep syscalls. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index cecd96c..a931919 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -725,6 +725,12 @@ safe_syscall3(ssize_t, recvmsg, int, fd, struct msghdr *, msg, int, flags) safe_syscall2(int, flock, int, fd, int, operation) safe_syscall4(int, rt_sigtimedwait, const sigset_t *, these, siginfo_t *, uinfo, const struct timespec *, uts, size_t, sigsetsize) +safe_syscall2(int, nanosleep, const struct timespec *, req, + struct timespec *, rem) +#ifdef TARGET_NR_clock_nanosleep +safe_syscall4(int, clock_nanosleep, const clockid_t, clock, int, flags, + const struct timespec *, req, struct timespec *, rem) +#endif #ifdef __NR_msgsnd safe_syscall4(int, msgsnd, int, msgid, const void *, msgp, size_t, sz, int, flags) @@ -9205,7 +9211,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { struct timespec req, rem; target_to_host_timespec(&req, arg1); - ret = get_errno(nanosleep(&req, &rem)); + ret = get_errno(safe_nanosleep(&req, &rem)); if (is_error(ret) && arg2) { host_to_target_timespec(arg2, &rem); } @@ -10473,14 +10479,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { struct timespec ts; target_to_host_timespec(&ts, arg3); - ret = get_errno(clock_nanosleep(arg1, arg2, &ts, arg4 ? &ts : NULL)); + ret = get_errno(safe_clock_nanosleep(arg1, arg2, + &ts, arg4 ? &ts : NULL)); if (arg4) host_to_target_timespec(arg4, &ts); #if defined(TARGET_PPC) /* clock_nanosleep is odd in that it returns positive errno values. * On PPC, CR0 bit 3 should be set in such a situation. */ - if (ret) { + if (ret && ret != -TARGET_ERESTARTSYS) { ((CPUPPCState *)cpu_env)->crf[0] |= 1; } #endif -- cgit v1.1 From a6130237b85e15463592484155aa905a9b39cc6c Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:10 +0100 Subject: linux-user: Use safe_syscall wrapper for poll and ppoll syscalls Use the safe_syscall wrapper for the poll and ppoll syscalls. Since not all host architectures will have a poll syscall, we have to rewrite the TARGET_NR_poll handling to use ppoll instead (we can assume everywhere has ppoll by now). We take the opportunity to switch to the code structure already used in the implementation of epoll_wait and epoll_pwait, which uses a switch() to avoid interleaving #if and if (), and to stop using a variable with a leading '_' which is in the implementation's namespace. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index a931919..8bb9adf 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -429,16 +429,6 @@ static int sys_inotify_init1(int flags) #undef TARGET_NR_inotify_rm_watch #endif /* CONFIG_INOTIFY */ -#if defined(TARGET_NR_ppoll) -#ifndef __NR_ppoll -# define __NR_ppoll -1 -#endif -#define __NR_sys_ppoll __NR_ppoll -_syscall5(int, sys_ppoll, struct pollfd *, fds, nfds_t, nfds, - struct timespec *, timeout, const sigset_t *, sigmask, - size_t, sigsetsize) -#endif - #if defined(TARGET_NR_prlimit64) #ifndef __NR_prlimit64 # define __NR_prlimit64 -1 @@ -706,6 +696,9 @@ safe_syscall5(int, waitid, idtype_t, idtype, id_t, id, siginfo_t *, infop, \ safe_syscall3(int, execve, const char *, filename, char **, argv, char **, envp) safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \ fd_set *, exceptfds, struct timespec *, timeout, void *, sig) +safe_syscall5(int, ppoll, struct pollfd *, ufds, unsigned int, nfds, + struct timespec *, tsp, const sigset_t *, sigmask, + size_t, sigsetsize) safe_syscall6(int,futex,int *,uaddr,int,op,int,val, \ const struct timespec *,timeout,int *,uaddr2,int,val3) safe_syscall2(int, rt_sigsuspend, sigset_t *, newset, size_t, sigsetsize) @@ -8964,7 +8957,6 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { struct target_pollfd *target_pfd; unsigned int nfds = arg2; - int timeout = arg3; struct pollfd *pfd; unsigned int i; @@ -8984,8 +8976,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } } + switch (num) { # ifdef TARGET_NR_ppoll - if (num == TARGET_NR_ppoll) { + case TARGET_NR_ppoll: + { struct timespec _timeout_ts, *timeout_ts = &_timeout_ts; target_sigset_t *target_set; sigset_t _set, *set = &_set; @@ -9010,8 +9004,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, set = NULL; } - ret = get_errno(sys_ppoll(pfd, nfds, timeout_ts, - set, SIGSET_T_SIZE)); + ret = get_errno(safe_ppoll(pfd, nfds, timeout_ts, + set, SIGSET_T_SIZE)); if (!is_error(ret) && arg3) { host_to_target_timespec(arg3, timeout_ts); @@ -9019,9 +9013,30 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, if (arg4) { unlock_user(target_set, arg4, 0); } - } else + break; + } +# endif +# ifdef TARGET_NR_poll + case TARGET_NR_poll: + { + struct timespec ts, *pts; + + if (arg3 >= 0) { + /* Convert ms to secs, ns */ + ts.tv_sec = arg3 / 1000; + ts.tv_nsec = (arg3 % 1000) * 1000000LL; + pts = &ts; + } else { + /* -ve poll() timeout means "infinite" */ + pts = NULL; + } + ret = get_errno(safe_ppoll(pfd, nfds, pts, NULL, 0)); + break; + } # endif - ret = get_errno(poll(pfd, nfds, timeout)); + default: + g_assert_not_reached(); + } if (!is_error(ret)) { for(i = 0; i < nfds; i++) { -- cgit v1.1 From 227f02143f269493543faf6908318c17abd725cd Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:11 +0100 Subject: linux-user: Use safe_syscall wrapper for epoll_wait syscalls Use the safe_syscall wrapper for epoll_wait and epoll_pwait syscalls. Since we now directly use the host epoll_pwait syscall for both epoll_wait and epoll_pwait, we don't need the configure machinery to check whether glibc supports epoll_pwait(). (The kernel has supported the syscall since 2.6.19 so we can assume it's always there.) Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- configure | 21 ++------------------- linux-user/syscall.c | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 27 deletions(-) diff --git a/configure b/configure index d1dbc4b..e995ddd 100755 --- a/configure +++ b/configure @@ -3798,8 +3798,8 @@ if compile_prog "" "" ; then epoll=yes fi -# epoll_create1 and epoll_pwait are later additions -# so we must check separately for their presence +# epoll_create1 is a later addition +# so we must check separately for its presence epoll_create1=no cat > $TMPC << EOF #include @@ -3821,20 +3821,6 @@ if compile_prog "" "" ; then epoll_create1=yes fi -epoll_pwait=no -cat > $TMPC << EOF -#include - -int main(void) -{ - epoll_pwait(0, 0, 0, 0, 0); - return 0; -} -EOF -if compile_prog "" "" ; then - epoll_pwait=yes -fi - # check for sendfile support sendfile=no cat > $TMPC << EOF @@ -5125,9 +5111,6 @@ fi if test "$epoll_create1" = "yes" ; then echo "CONFIG_EPOLL_CREATE1=y" >> $config_host_mak fi -if test "$epoll_pwait" = "yes" ; then - echo "CONFIG_EPOLL_PWAIT=y" >> $config_host_mak -fi if test "$sendfile" = "yes" ; then echo "CONFIG_SENDFILE=y" >> $config_host_mak fi diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 8bb9adf..a193849 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -699,6 +699,9 @@ safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \ safe_syscall5(int, ppoll, struct pollfd *, ufds, unsigned int, nfds, struct timespec *, tsp, const sigset_t *, sigmask, size_t, sigsetsize) +safe_syscall6(int, epoll_pwait, int, epfd, struct epoll_event *, events, + int, maxevents, int, timeout, const sigset_t *, sigmask, + size_t, sigsetsize) safe_syscall6(int,futex,int *,uaddr,int,op,int,val, \ const struct timespec *,timeout,int *,uaddr2,int,val3) safe_syscall2(int, rt_sigsuspend, sigset_t *, newset, size_t, sigsetsize) @@ -10835,14 +10838,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } #endif -#if defined(TARGET_NR_epoll_pwait) && defined(CONFIG_EPOLL_PWAIT) -#define IMPLEMENT_EPOLL_PWAIT -#endif -#if defined(TARGET_NR_epoll_wait) || defined(IMPLEMENT_EPOLL_PWAIT) +#if defined(TARGET_NR_epoll_wait) || defined(TARGET_NR_epoll_pwait) #if defined(TARGET_NR_epoll_wait) case TARGET_NR_epoll_wait: #endif -#if defined(IMPLEMENT_EPOLL_PWAIT) +#if defined(TARGET_NR_epoll_pwait) case TARGET_NR_epoll_pwait: #endif { @@ -10861,7 +10861,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ep = alloca(maxevents * sizeof(struct epoll_event)); switch (num) { -#if defined(IMPLEMENT_EPOLL_PWAIT) +#if defined(TARGET_NR_epoll_pwait) case TARGET_NR_epoll_pwait: { target_sigset_t *target_set; @@ -10880,13 +10880,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, set = NULL; } - ret = get_errno(epoll_pwait(epfd, ep, maxevents, timeout, set)); + ret = get_errno(safe_epoll_pwait(epfd, ep, maxevents, timeout, + set, SIGSET_T_SIZE)); break; } #endif #if defined(TARGET_NR_epoll_wait) case TARGET_NR_epoll_wait: - ret = get_errno(epoll_wait(epfd, ep, maxevents, timeout)); + ret = get_errno(safe_epoll_pwait(epfd, ep, maxevents, timeout, + NULL, 0)); break; #endif default: -- cgit v1.1 From ffb7ee796ae83b1e4b3c108f8615d54b53872c68 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:12 +0100 Subject: linux-user: Use safe_syscall wrapper for semop Use the safe_syscall wrapper for the semop syscall or IPC operation. (We implement via the semtimedop syscall to make it easier to implement the guest semtimedop syscall later.) Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index a193849..3ccb367 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -732,6 +732,8 @@ safe_syscall4(int, msgsnd, int, msgid, const void *, msgp, size_t, sz, int, flags) safe_syscall5(int, msgrcv, int, msgid, void *, msgp, size_t, sz, long, msgtype, int, flags) +safe_syscall4(int, semtimedop, int, semid, struct sembuf *, tsops, + unsigned, nsops, const struct timespec *, timeout) #else /* This host kernel architecture uses a single ipc syscall; fake up * wrappers for the sub-operations to hide this implementation detail. @@ -740,6 +742,7 @@ safe_syscall5(int, msgrcv, int, msgid, void *, msgp, size_t, sz, * sys/ipc.h ones. So we just define them here, and rely on them being * the same for all host architectures. */ +#define Q_SEMTIMEDOP 4 #define Q_MSGSND 11 #define Q_MSGRCV 12 #define Q_IPCCALL(VERSION, OP) ((VERSION) << 16 | (OP)) @@ -754,6 +757,12 @@ static int safe_msgrcv(int msgid, void *msgp, size_t sz, long type, int flags) { return safe_ipc(Q_IPCCALL(1, Q_MSGRCV), msgid, sz, flags, msgp, type); } +static int safe_semtimedop(int semid, struct sembuf *tsops, unsigned nsops, + const struct timespec *timeout) +{ + return safe_ipc(Q_IPCCALL(0, Q_SEMTIMEDOP), semid, nsops, 0, tsops, + (long)timeout); +} #endif #if defined(TARGET_NR_mq_open) && defined(__NR_mq_open) safe_syscall5(int, mq_timedsend, int, mqdes, const char *, msg_ptr, @@ -3680,7 +3689,7 @@ static inline abi_long do_semop(int semid, abi_long ptr, unsigned nsops) if (target_to_host_sembuf(sops, ptr, nsops)) return -TARGET_EFAULT; - return get_errno(semop(semid, sops, nsops)); + return get_errno(safe_semtimedop(semid, sops, nsops, NULL)); } struct target_msqid_ds -- cgit v1.1 From ff6dc130794bcd5b2033bc50262a7720285a74c7 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:13 +0100 Subject: linux-user: Use safe_syscall wrapper for accept and accept4 syscalls Use the safe_syscall wrapper for the accept and accept4 syscalls. accept4 has been in the kernel since 2.6.28 so we can assume it is always present. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 3ccb367..d414dc4 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -721,6 +721,8 @@ safe_syscall3(ssize_t, recvmsg, int, fd, struct msghdr *, msg, int, flags) safe_syscall2(int, flock, int, fd, int, operation) safe_syscall4(int, rt_sigtimedwait, const sigset_t *, these, siginfo_t *, uinfo, const struct timespec *, uts, size_t, sigsetsize) +safe_syscall4(int, accept4, int, fd, struct sockaddr *, addr, socklen_t *, len, + int, flags) safe_syscall2(int, nanosleep, const struct timespec *, req, struct timespec *, rem) #ifdef TARGET_NR_clock_nanosleep @@ -3061,19 +3063,6 @@ static abi_long do_sendrecvmmsg(int fd, abi_ulong target_msgvec, return ret; } -/* If we don't have a system accept4() then just call accept. - * The callsites to do_accept4() will ensure that they don't - * pass a non-zero flags argument in this config. - */ -#ifndef CONFIG_ACCEPT4 -static inline int accept4(int sockfd, struct sockaddr *addr, - socklen_t *addrlen, int flags) -{ - assert(flags == 0); - return accept(sockfd, addr, addrlen); -} -#endif - /* do_accept4() Must return target values and target errnos. */ static abi_long do_accept4(int fd, abi_ulong target_addr, abi_ulong target_addrlen_addr, int flags) @@ -3086,7 +3075,7 @@ static abi_long do_accept4(int fd, abi_ulong target_addr, host_flags = target_to_host_bitmask(flags, fcntl_flags_tbl); if (target_addr == 0) { - return get_errno(accept4(fd, NULL, NULL, host_flags)); + return get_errno(safe_accept4(fd, NULL, NULL, host_flags)); } /* linux returns EINVAL if addrlen pointer is invalid */ @@ -3102,7 +3091,7 @@ static abi_long do_accept4(int fd, abi_ulong target_addr, addr = alloca(addrlen); - ret = get_errno(accept4(fd, addr, &addrlen, host_flags)); + ret = get_errno(safe_accept4(fd, addr, &addrlen, host_flags)); if (!is_error(ret)) { host_to_target_sockaddr(target_addr, addr, addrlen); if (put_user_u32(addrlen, target_addrlen_addr)) @@ -8334,11 +8323,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_accept4 case TARGET_NR_accept4: -#ifdef CONFIG_ACCEPT4 ret = do_accept4(arg1, arg2, arg3, arg4); -#else - goto unimplemented; -#endif break; #endif #ifdef TARGET_NR_bind -- cgit v1.1 From 49ca6f3e24ee45de514604b8116e541332b23a84 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:14 +0100 Subject: linux-user: Use safe_syscall wrapper for ioctl Use the safe_syscall wrapper to implement the ioctl syscall. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index d414dc4..7b7bae6 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -772,6 +772,11 @@ safe_syscall5(int, mq_timedsend, int, mqdes, const char *, msg_ptr, safe_syscall5(int, mq_timedreceive, int, mqdes, char *, msg_ptr, size_t, len, unsigned *, prio, const struct timespec *, timeout) #endif +/* We do ioctl like this rather than via safe_syscall3 to preserve the + * "third argument might be integer or pointer or not present" behaviour of + * the libc function. + */ +#define safe_ioctl(...) safe_syscall(__NR_ioctl, __VA_ARGS__) static inline int host_to_target_sock_type(int host_type) { @@ -4277,7 +4282,7 @@ static abi_long do_ioctl_fs_ioc_fiemap(const IOCTLEntry *ie, uint8_t *buf_temp, memcpy(fm, buf_temp, sizeof(struct fiemap)); free_fm = 1; } - ret = get_errno(ioctl(fd, ie->host_cmd, fm)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, fm)); if (!is_error(ret)) { target_size_out = target_size_in; /* An extent_count of 0 means we were only counting the extents @@ -4367,7 +4372,7 @@ static abi_long do_ioctl_ifconf(const IOCTLEntry *ie, uint8_t *buf_temp, host_ifconf->ifc_len = host_ifc_len; host_ifconf->ifc_buf = host_ifc_buf; - ret = get_errno(ioctl(fd, ie->host_cmd, host_ifconf)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, host_ifconf)); if (!is_error(ret)) { /* convert host ifc_len to target ifc_len */ @@ -4496,7 +4501,7 @@ static abi_long do_ioctl_dm(const IOCTLEntry *ie, uint8_t *buf_temp, int fd, } unlock_user(argptr, guest_data, 0); - ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp)); if (!is_error(ret)) { guest_data = arg + host_dm->data_start; guest_data_size = host_dm->data_size - host_dm->data_start; @@ -4677,7 +4682,7 @@ static abi_long do_ioctl_blkpg(const IOCTLEntry *ie, uint8_t *buf_temp, int fd, /* Swizzle the data pointer to our local copy and call! */ host_blkpg->data = &host_part; - ret = get_errno(ioctl(fd, ie->host_cmd, host_blkpg)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, host_blkpg)); out: return ret; @@ -4738,7 +4743,7 @@ static abi_long do_ioctl_rt(const IOCTLEntry *ie, uint8_t *buf_temp, } unlock_user(argptr, arg, 0); - ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp)); if (*host_rt_dev_ptr != 0) { unlock_user((void *)*host_rt_dev_ptr, *target_rt_dev_ptr, 0); @@ -4750,7 +4755,7 @@ static abi_long do_ioctl_kdsigaccept(const IOCTLEntry *ie, uint8_t *buf_temp, int fd, int cmd, abi_long arg) { int sig = target_to_host_signal(arg); - return get_errno(ioctl(fd, ie->host_cmd, sig)); + return get_errno(safe_ioctl(fd, ie->host_cmd, sig)); } static IOCTLEntry ioctl_entries[] = { @@ -4794,18 +4799,18 @@ static abi_long do_ioctl(int fd, int cmd, abi_long arg) switch(arg_type[0]) { case TYPE_NULL: /* no argument */ - ret = get_errno(ioctl(fd, ie->host_cmd)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd)); break; case TYPE_PTRVOID: case TYPE_INT: - ret = get_errno(ioctl(fd, ie->host_cmd, arg)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, arg)); break; case TYPE_PTR: arg_type++; target_size = thunk_type_size(arg_type, 0); switch(ie->access) { case IOC_R: - ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp)); if (!is_error(ret)) { argptr = lock_user(VERIFY_WRITE, arg, target_size, 0); if (!argptr) @@ -4820,7 +4825,7 @@ static abi_long do_ioctl(int fd, int cmd, abi_long arg) return -TARGET_EFAULT; thunk_convert(buf_temp, argptr, arg_type, THUNK_HOST); unlock_user(argptr, arg, 0); - ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp)); break; default: case IOC_RW: @@ -4829,7 +4834,7 @@ static abi_long do_ioctl(int fd, int cmd, abi_long arg) return -TARGET_EFAULT; thunk_convert(buf_temp, argptr, arg_type, THUNK_HOST); unlock_user(argptr, arg, 0); - ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp)); + ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp)); if (!is_error(ret)) { argptr = lock_user(VERIFY_WRITE, arg, target_size, 0); if (!argptr) -- cgit v1.1 From 8efb2ed5ec192f7e83c5e48753e695d5cbc161b2 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:16 +0100 Subject: linux-user: Correct signedness of target_flock l_start and l_len fields The l_start and l_len fields in the various target_flock structures are supposed to be '__kernel_off_t' or '__kernel_loff_t', which means they should be signed, not unsigned. Correcting the structure definitions means that __get_user() and __put_user() will correctly sign extend them if the guest is using 32 bit offsets and the host is using 64 bit offsets. This fixes failures in the LTP 'fcntl14' tests where it checks that negative seek offsets work correctly. We reindent the structures to drop hard tabs since we're touching 40% of the fields anyway. RV: long long -> abi_llong as suggested by Laurent Vivier Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall_defs.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index 124754f..6ee9251 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -2289,34 +2289,34 @@ struct target_statfs64 { #endif struct target_flock { - short l_type; - short l_whence; - abi_ulong l_start; - abi_ulong l_len; - int l_pid; + short l_type; + short l_whence; + abi_long l_start; + abi_long l_len; + int l_pid; }; struct target_flock64 { - short l_type; - short l_whence; + short l_type; + short l_whence; #if defined(TARGET_PPC) || defined(TARGET_X86_64) || defined(TARGET_MIPS) \ || defined(TARGET_SPARC) || defined(TARGET_HPPA) \ || defined(TARGET_MICROBLAZE) || defined(TARGET_TILEGX) - int __pad; + int __pad; #endif - unsigned long long l_start; - unsigned long long l_len; - int l_pid; + abi_llong l_start; + abi_llong l_len; + int l_pid; } QEMU_PACKED; #ifdef TARGET_ARM struct target_eabi_flock64 { - short l_type; - short l_whence; - int __pad; - unsigned long long l_start; - unsigned long long l_len; - int l_pid; + short l_type; + short l_whence; + int __pad; + abi_llong l_start; + abi_llong l_len; + int l_pid; } QEMU_PACKED; #endif -- cgit v1.1 From 7dcdaeafe07dd87079c4e073bced4cfc8bf5fdf3 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:18 +0100 Subject: linux-user: Make target_strerror() return 'const char *' Make target_strerror() return 'const char *' rather than just 'char *'; this will allow us to return constant strings from it for some special cases. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio Reviewed-by: Laurent Vivier --- linux-user/qemu.h | 2 +- linux-user/strace.c | 4 ++-- linux-user/syscall.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 6bd7b32..56f29c3 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -195,7 +195,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, void gemu_log(const char *fmt, ...) GCC_FMT_ATTR(1, 2); extern THREAD CPUState *thread_cpu; void cpu_loop(CPUArchState *env); -char *target_strerror(int err); +const char *target_strerror(int err); int get_osversion(void); void init_qemu_uname_release(void); void fork_start(void); diff --git a/linux-user/strace.c b/linux-user/strace.c index 0810c85..c5980a1 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -281,7 +281,7 @@ print_ipc(const struct syscallname *name, static void print_syscall_ret_addr(const struct syscallname *name, abi_long ret) { - char *errstr = NULL; + const char *errstr = NULL; if (ret < 0) { errstr = target_strerror(-ret); @@ -1594,7 +1594,7 @@ void print_syscall_ret(int num, abi_long ret) { int i; - char *errstr = NULL; + const char *errstr = NULL; for(i=0;i= (abi_ulong)(-4096); } -char *target_strerror(int err) +const char *target_strerror(int err) { if ((err >= ERRNO_TABLE_SIZE) || (err < 0)) { return NULL; -- cgit v1.1 From da2a34f7f9999da09f6c307b40b66eba8cc38283 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 6 Jun 2016 19:58:19 +0100 Subject: linux-user: Special-case ERESTARTSYS in target_strerror() Since TARGET_ERESTARTSYS and TARGET_ESIGRETURN are internal-to-QEMU error numbers, handle them specially in target_strerror(), to avoid confusing strace output like: 9521 rt_sigreturn(14,8,274886297808,8,0,268435456) = -1 errno=513 (Unknown error 513) Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 262c645..bd8095c 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -632,6 +632,13 @@ static inline int is_error(abi_long ret) const char *target_strerror(int err) { + if (err == TARGET_ERESTARTSYS) { + return "To be restarted"; + } + if (err == TARGET_QEMU_ESIGRETURN) { + return "Successful exit from sigreturn"; + } + if ((err >= ERRNO_TABLE_SIZE) || (err < 0)) { return NULL; } -- cgit v1.1 From 014628a705bdaf31c09915c29e61f4088956564d Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 7 Jun 2016 17:31:04 +0100 Subject: linux-user: In fork_end(), remove correct CPUs from CPU list In fork_end(), we must fix the list of current CPUs to match the fact that the child of the fork has only one thread. Unfortunately we were removing the wrong CPUs from the list, which meant that if the child subsequently did an exclusive operation it would deadlock in start_exclusive() waiting for a sibling CPU which didn't exist. In particular this could cause hangs doing git submodule init operations, as reported in https://bugs.launchpad.net/qemu/+bug/955379 comment #47. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/main.c b/linux-user/main.c index b6da0ba..150a356 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -130,7 +130,7 @@ void fork_end(int child) Discard information about the parent threads. */ CPU_FOREACH_SAFE(cpu, next_cpu) { if (cpu != thread_cpu) { - QTAILQ_REMOVE(&cpus, thread_cpu, node); + QTAILQ_REMOVE(&cpus, cpu, node); } } pending_cpus = 0; -- cgit v1.1