From 3bef0451e692966554b31e603424e7d40ba98d3a Mon Sep 17 00:00:00 2001
From: Laurent Vivier <laurent@vivier.eu>
Date: Fri, 29 Jan 2016 17:07:31 +0100
Subject: [PATCH 01/44] linux-user: Fix qemu-binfmt-conf.sh to store config
 across reboot

Original qemu-binfmt-conf.sh is only able to write configuration
into /proc/sys/fs/binfmt_misc, and the configuration is lost on reboot.

This script can configure debian and systemd services to restore
configuration on reboot. Moreover, it is able to manage binfmt
credential and to configure the path of the interpreter.

List of supported CPU is:

i386 i486 alpha arm sparc32plus ppc ppc64 ppc64le
m68k mips mipsel mipsn32 mipsn32el mips64 mips64el
sh4 sh4eb s390x aarch64

Usage: qemu-binfmt-conf.sh [--qemu-path PATH][--debian][--systemd CPU]
                           [--help][--credential yes|no][--exportdir PATH]

       Configure binfmt_misc to use qemu interpreter

       --help:       display this usage
       --qemu-path:  set path to qemu interpreter (/usr/local/bin)
       --debian:     don't write into /proc,
                     instead generate update-binfmts templates
       --systemd:    don't write into /proc,
                     instead generate file for systemd-binfmt.service
                     for the given CPU
       --exportdir:  define where to write configuration files
                     (default: /etc/binfmt.d or /usr/share/binfmts)
       --credential: if yes, credential an security tokens are
                     calculated according to the binary to interpret

    To import templates with update-binfmts, use :

        sudo update-binfmts --importdir /usr/share/binfmts --import qemu-CPU

    To remove interpreter, use :

        sudo update-binfmts --package qemu-CPU --remove qemu-CPU /usr/local/bin

    With systemd, binfmt files are loaded by systemd-binfmt.service

    The environment variable HOST_ARCH allows to override 'uname' to generate
    configuration files for a different architecture than the current one.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 scripts/qemu-binfmt-conf.sh | 385 +++++++++++++++++++++++++++++-------
 1 file changed, 318 insertions(+), 67 deletions(-)
 mode change 100644 => 100755 scripts/qemu-binfmt-conf.sh

diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh
old mode 100644
new mode 100755
index 289b1a3963..de4d1c13d4
--- a/scripts/qemu-binfmt-conf.sh
+++ b/scripts/qemu-binfmt-conf.sh
@@ -1,72 +1,323 @@
 #!/bin/sh
 # enable automatic i386/ARM/M68K/MIPS/SPARC/PPC/s390 program execution by the kernel
 
-# load the binfmt_misc module
-if [ ! -d /proc/sys/fs/binfmt_misc ]; then
-  /sbin/modprobe binfmt_misc
-fi
-if [ ! -f /proc/sys/fs/binfmt_misc/register ]; then
-  mount binfmt_misc -t binfmt_misc /proc/sys/fs/binfmt_misc
-fi
+qemu_target_list="i386 i486 alpha arm sparc32plus ppc ppc64 ppc64le m68k \
+mips mipsel mipsn32 mipsn32el mips64 mips64el \
+sh4 sh4eb s390x aarch64"
 
-# probe cpu type
-cpu=`uname -m`
-case "$cpu" in
-  i386|i486|i586|i686|i86pc|BePC|x86_64)
-    cpu="i386"
-  ;;
-  m68k)
-    cpu="m68k"
-  ;;
-  mips*)
-    cpu="mips"
-  ;;
-  "Power Macintosh"|ppc|ppc64)
-    cpu="ppc"
-  ;;
-  armv[4-9]*)
-    cpu="arm"
-  ;;
-esac
+i386_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00'
+i386_mask='\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+i386_family=i386
 
-# register the interpreter for each cpu except for the native one
-if [ $cpu != "i386" ] ; then
-    echo ':i386:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00:\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-i386:' > /proc/sys/fs/binfmt_misc/register
-    echo ':i486:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x06\x00:\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-i386:' > /proc/sys/fs/binfmt_misc/register
-fi
-if [ $cpu != "alpha" ] ; then
-    echo ':alpha:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x26\x90:\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-alpha:' > /proc/sys/fs/binfmt_misc/register
-fi
-if [ $cpu != "arm" ] ; then
-    echo   ':arm:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-arm:' > /proc/sys/fs/binfmt_misc/register
-    echo   ':armeb:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-armeb:' > /proc/sys/fs/binfmt_misc/register
-fi
-if [ $cpu != "aarch64" ] ; then
-    echo ':aarch64:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-aarch64:' > /proc/sys/fs/binfmt_misc/register
-fi
-if [ $cpu != "sparc" ] ; then
-    echo   ':sparc:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-sparc:' > /proc/sys/fs/binfmt_misc/register
-fi
-if [ $cpu != "ppc" ] ; then
-    echo   ':ppc:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x14:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-ppc:' > /proc/sys/fs/binfmt_misc/register
-fi
-if [ $cpu != "m68k" ] ; then
-    echo   'Please check cpu value and header information for m68k!'
-    echo   ':m68k:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x04:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-m68k:' > /proc/sys/fs/binfmt_misc/register
-fi
-if [ $cpu != "mips" ] ; then
-    # FIXME: We could use the other endianness on a MIPS host.
-    echo   ':mips:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-mips:' > /proc/sys/fs/binfmt_misc/register
-    echo   ':mipsel:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-mipsel:' > /proc/sys/fs/binfmt_misc/register
-    echo   ':mipsn32:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-mipsn32:' > /proc/sys/fs/binfmt_misc/register
-    echo   ':mipsn32el:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-mipsn32el:' > /proc/sys/fs/binfmt_misc/register
-    echo   ':mips64:M::\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-mips64:' > /proc/sys/fs/binfmt_misc/register
-    echo   ':mips64el:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-mips64el:' > /proc/sys/fs/binfmt_misc/register
-fi
-if [ $cpu != "sh" ] ; then
-    echo    ':sh4:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a\x00:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/local/bin/qemu-sh4:' > /proc/sys/fs/binfmt_misc/register
-    echo    ':sh4eb:M::\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-sh4eb:' > /proc/sys/fs/binfmt_misc/register
-fi
-if [ $cpu != "s390x" ] ; then
-    echo   ':s390x:M::\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x16:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/local/bin/qemu-s390x:' > /proc/sys/fs/binfmt_misc/register
-fi
+i486_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x06\x00'
+i486_mask='\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+i486_family=i386
+
+alpha_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x26\x90'
+alpha_mask='\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+alpha_family=alpha
+
+arm_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00'
+arm_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+arm_family=arm
+
+armeb_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28'
+armeb_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+armeb_family=arm
+
+sparc_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02'
+sparc_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+sparc_family=sparc
+
+sparc32plus_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x12'
+sparc32plus_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+sparc32plus_family=sparc
+
+ppc_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x14'
+ppc_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+ppc_family=ppc
+
+ppc64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15'
+ppc64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+ppc64_family=ppc
+
+ppc64le_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15\x00'
+ppc64le_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\x00'
+ppc64le_family=ppcle
+
+m68k_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x04'
+m68k_mask='\xff\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+m68k_family=m68k
+
+# FIXME: We could use the other endianness on a MIPS host.
+
+mips_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08'
+mips_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+mips_family=mips
+
+mipsel_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00'
+mipsel_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+mipsel_family=mips
+
+mipsn32_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08'
+mipsn32_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+mipsn32_family=mips
+
+mipsn32el_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00'
+mipsn32el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+mipsn32el_family=mips
+
+mips64_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08'
+mips64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+mips64_family=mips
+
+mips64el_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00'
+mips64el_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+mips64el_family=mips
+
+sh4_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a\x00'
+sh4_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+sh4_family=sh4
+
+sh4eb_magic='\x7fELF\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a'
+sh4eb_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+sh4eb_family=sh4
+
+s390x_magic='\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x16'
+s390x_mask='\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff'
+s390x_family=s390x
+
+aarch64_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7\x00'
+aarch64_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+aarch64_family=arm
+
+qemu_get_family() {
+    cpu=${HOST_ARCH:-$(uname -m)}
+    case "$cpu" in
+    amd64|i386|i486|i586|i686|i86pc|BePC|x86_64)
+        echo "i386"
+        ;;
+    mips*)
+        echo "mips"
+        ;;
+    "Power Macintosh"|ppc64|powerpc|ppc)
+        echo "ppc"
+        ;;
+    ppc64el|ppc64le)
+        echo "ppcle"
+        ;;
+    arm|armel|armhf|arm64|armv[4-9]*)
+        echo "arm"
+        ;;
+    sparc*)
+        echo "sparc"
+        ;;
+    *)
+        echo "$cpu"
+        ;;
+    esac
+}
+
+usage() {
+    cat <<EOF
+Usage: qemu-binfmt-conf.sh [--qemu-path PATH][--debian][--systemd CPU]
+                           [--help][--credential yes|no][--exportdir PATH]
+
+       Configure binfmt_misc to use qemu interpreter
+
+       --help:       display this usage
+       --qemu-path:  set path to qemu interpreter ($QEMU_PATH)
+       --debian:     don't write into /proc,
+                     instead generate update-binfmts templates
+       --systemd:    don't write into /proc,
+                     instead generate file for systemd-binfmt.service
+                     for the given CPU
+       --exportdir:  define where to write configuration files
+                     (default: $SYSTEMDDIR or $DEBIANDIR)
+       --credential: if yes, credential and security tokens are
+                     calculated according to the binary to interpret
+
+    To import templates with update-binfmts, use :
+
+        sudo update-binfmts --importdir ${EXPORTDIR:-$DEBIANDIR} --import qemu-CPU
+
+    To remove interpreter, use :
+
+        sudo update-binfmts --package qemu-CPU --remove qemu-CPU $QEMU_PATH
+
+    With systemd, binfmt files are loaded by systemd-binfmt.service
+
+    The environment variable HOST_ARCH allows to override 'uname' to generate
+    configuration files for a different architecture than the current one.
+
+    where CPU is one of:
+
+        $qemu_target_list
+
+EOF
+}
+
+qemu_check_access() {
+    if [ ! -w "$1" ] ; then
+        echo "ERROR: cannot write to $1" 1>&2
+        exit 1
+    fi
+}
+
+qemu_check_bintfmt_misc() {
+    # load the binfmt_misc module
+    if [ ! -d /proc/sys/fs/binfmt_misc ]; then
+      if ! /sbin/modprobe binfmt_misc ; then
+          exit 1
+      fi
+    fi
+    if [ ! -f /proc/sys/fs/binfmt_misc/register ]; then
+      if ! mount binfmt_misc -t binfmt_misc /proc/sys/fs/binfmt_misc ; then
+          exit 1
+      fi
+    fi
+
+    qemu_check_access /proc/sys/fs/binfmt_misc/register
+}
+
+installed_dpkg() {
+    dpkg --status "$1" > /dev/null 2>&1
+}
+
+qemu_check_debian() {
+    if [ ! -e /etc/debian_version ] ; then
+        echo "WARNING: your system is not a Debian based distro" 1>&2
+    elif ! installed_dpkg binfmt-support ; then
+        echo "WARNING: package binfmt-support is needed" 1>&2
+    fi
+    qemu_check_access "$EXPORTDIR"
+}
+
+qemu_check_systemd() {
+    if ! systemctl -q is-enabled systemd-binfmt.service ; then
+        echo "WARNING: systemd-binfmt.service is missing or disabled" 1>&2
+    fi
+    qemu_check_access "$EXPORTDIR"
+}
+
+qemu_generate_register() {
+    echo ":qemu-$cpu:M::$magic:$mask:$qemu:$FLAGS"
+}
+
+qemu_register_interpreter() {
+    echo "Setting $qemu as binfmt interpreter for $cpu"
+    qemu_generate_register > /proc/sys/fs/binfmt_misc/register
+}
+
+qemu_generate_systemd() {
+    echo "Setting $qemu as binfmt interpreter for $cpu for systemd-binfmt.service"
+    qemu_generate_register > "$EXPORTDIR/qemu-$cpu.conf"
+}
+
+qemu_generate_debian() {
+    cat > "$EXPORTDIR/qemu-$cpu" <<EOF
+package qemu-$cpu
+interpreter $qemu
+magic $magic
+mask $mask
+EOF
+    if [ "$FLAGS" = "OC" ] ; then
+        echo "credentials yes" >> "$EXPORTDIR/qemu-$cpu"
+    fi
+}
+
+qemu_set_binfmts() {
+    # probe cpu type
+    host_family=$(qemu_get_family)
+
+    # register the interpreter for each cpu except for the native one
+
+    for cpu in ${qemu_target_list} ; do
+        magic=$(eval echo \$${cpu}_magic)
+        mask=$(eval echo \$${cpu}_mask)
+        family=$(eval echo \$${cpu}_family)
+
+        if [ "$magic" = "" ] || [ "$mask" = "" ] || [ "$family" = "" ] ; then
+            echo "INTERNAL ERROR: unknown cpu $cpu" 1>&2
+            continue
+        fi
+
+        qemu="$QEMU_PATH/qemu-$cpu"
+        if [ "$cpu" = "i486" ] ; then
+            qemu="$QEMU_PATH/qemu-i386"
+        fi
+
+        if [ "$host_family" != "$family" ] ; then
+            $BINFMT_SET
+        fi
+    done
+}
+
+CHECK=qemu_check_bintfmt_misc
+BINFMT_SET=qemu_register_interpreter
+
+SYSTEMDDIR="/etc/binfmt.d"
+DEBIANDIR="/usr/share/binfmts"
+
+QEMU_PATH=/usr/local/bin
+FLAGS=""
+
+options=$(getopt -o ds:Q:e:hc: -l debian,systemd:,qemu-path:,exportdir:,help,credential: -- "$@")
+eval set -- "$options"
+
+while true ; do
+    case "$1" in
+    -d|--debian)
+        CHECK=qemu_check_debian
+        BINFMT_SET=qemu_generate_debian
+        EXPORTDIR=${EXPORTDIR:-$DEBIANDIR}
+        ;;
+    -s|--systemd)
+        CHECK=qemu_check_systemd
+        BINFMT_SET=qemu_generate_systemd
+        EXPORTDIR=${EXPORTDIR:-$SYSTEMDDIR}
+        shift
+        # check given cpu is in the supported CPU list
+        for cpu in ${qemu_target_list} ; do
+            if [ "$cpu" == "$1" ] ; then
+                break
+            fi
+        done
+
+        if [ "$cpu" == "$1" ] ; then
+            qemu_target_list="$1"
+        else
+            echo "ERROR: unknown CPU \"$1\"" 1>&2
+            usage
+            exit 1
+        fi
+        ;;
+    -Q|--qemu-path)
+        shift
+        QEMU_PATH="$1"
+        ;;
+    -e|--exportdir)
+        shift
+        EXPORTDIR="$1"
+        ;;
+    -h|--help)
+        usage
+        exit 1
+        ;;
+    -c|--credential)
+        shift
+        if [ "$1" = "yes" ] ; then
+            FLAGS="OC"
+        else
+            FLAGS=""
+        fi
+        ;;
+    *)
+        break
+        ;;
+    esac
+    shift
+done
+
+$CHECK
+qemu_set_binfmts

From 6c5b5645ae0b73c052df962e18e48d87bb7385e0 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <laurent@vivier.eu>
Date: Sun, 22 May 2016 18:56:19 +0200
Subject: [PATCH 02/44] linux-user: add rtnetlink(7) support

rtnetlink is needed to use iproute package (ip addr, ip route)
and dhcp client.

Examples:

Without this patch:
    # ip link
    Cannot open netlink socket: Address family not supported by protocol
    # ip addr
    Cannot open netlink socket: Address family not supported by protocol
    # ip route
    Cannot open netlink socket: Address family not supported by protocol
    # dhclient eth0
    Cannot open netlink socket: Address family not supported by protocol
    Cannot open netlink socket: Address family not supported by protocol

With this patch:
    # ip link
    1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT
        link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    51: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP mode DEFAULT qlen 1000
        link/ether 00:16:3e:89:6b:d7 brd ff:ff:ff:ff:ff:ff
    # ip addr show eth0
    51: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP qlen 1000
        link/ether 00:16:3e:89:6b:d7 brd ff:ff:ff:ff:ff:ff
        inet 192.168.122.197/24 brd 192.168.122.255 scope global eth0
           valid_lft forever preferred_lft forever
        inet6 fe80::216:3eff:fe89:6bd7/64 scope link
           valid_lft forever preferred_lft forever
    # ip route
    default via 192.168.122.1 dev eth0
    192.168.122.0/24 dev eth0  proto kernel  scope link  src 192.168.122.197
    # ip addr flush eth0
    # ip addr add 192.168.122.10 dev eth0
    # ip addr show eth0
    51: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP qlen 1000
        link/ether 00:16:3e:89:6b:d7 brd ff:ff:ff:ff:ff:ff
        inet 192.168.122.10/32 scope global eth0
           valid_lft forever preferred_lft forever
    # ip route add 192.168.122.0/24 via 192.168.122.10
    # ip route
        192.168.122.0/24 via 192.168.122.10 dev eth0

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 581 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 575 insertions(+), 6 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index df70255e5f..3e4895e845 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -101,6 +101,8 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
 #include <linux/route.h>
 #include <linux/filter.h>
 #include <linux/blkpg.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
 #include "linux_loop.h"
 #include "uname.h"
 
@@ -304,6 +306,14 @@ static TargetFdTrans **target_fd_trans;
 
 static unsigned int target_fd_max;
 
+static TargetFdDataFunc fd_trans_target_to_host_data(int fd)
+{
+    if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) {
+        return target_fd_trans[fd]->target_to_host_data;
+    }
+    return NULL;
+}
+
 static TargetFdDataFunc fd_trans_host_to_target_data(int fd)
 {
     if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) {
@@ -1261,7 +1271,13 @@ static inline abi_long target_to_host_sockaddr(int fd, struct sockaddr *addr,
 
     memcpy(addr, target_saddr, len);
     addr->sa_family = sa_family;
-    if (sa_family == AF_PACKET) {
+    if (sa_family == AF_NETLINK) {
+        struct sockaddr_nl *nladdr;
+
+        nladdr = (struct sockaddr_nl *)addr;
+        nladdr->nl_pid = tswap32(nladdr->nl_pid);
+        nladdr->nl_groups = tswap32(nladdr->nl_groups);
+    } else if (sa_family == AF_PACKET) {
 	struct target_sockaddr_ll *lladdr;
 
 	lladdr = (struct target_sockaddr_ll *)addr;
@@ -1284,6 +1300,11 @@ static inline abi_long host_to_target_sockaddr(abi_ulong target_addr,
         return -TARGET_EFAULT;
     memcpy(target_saddr, addr, len);
     target_saddr->sa_family = tswap16(addr->sa_family);
+    if (addr->sa_family == AF_NETLINK) {
+        struct sockaddr_nl *target_nl = (struct sockaddr_nl *)target_saddr;
+        target_nl->nl_pid = tswap32(target_nl->nl_pid);
+        target_nl->nl_groups = tswap32(target_nl->nl_groups);
+    }
     unlock_user(target_saddr, target_addr, len);
 
     return 0;
@@ -1515,6 +1536,511 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh,
     return 0;
 }
 
+static void tswap_nlmsghdr(struct nlmsghdr *nlh)
+{
+    nlh->nlmsg_len = tswap32(nlh->nlmsg_len);
+    nlh->nlmsg_type = tswap16(nlh->nlmsg_type);
+    nlh->nlmsg_flags = tswap16(nlh->nlmsg_flags);
+    nlh->nlmsg_seq = tswap32(nlh->nlmsg_seq);
+    nlh->nlmsg_pid = tswap32(nlh->nlmsg_pid);
+}
+
+static abi_long host_to_target_for_each_nlmsg(struct nlmsghdr *nlh,
+                                              size_t len,
+                                              abi_long (*host_to_target_nlmsg)
+                                                       (struct nlmsghdr *))
+{
+    uint32_t nlmsg_len;
+    abi_long ret;
+
+    while (len > sizeof(struct nlmsghdr)) {
+
+        nlmsg_len = nlh->nlmsg_len;
+        if (nlmsg_len < sizeof(struct nlmsghdr) ||
+            nlmsg_len > len) {
+            break;
+        }
+
+        switch (nlh->nlmsg_type) {
+        case NLMSG_DONE:
+            tswap_nlmsghdr(nlh);
+            return 0;
+        case NLMSG_NOOP:
+            break;
+        case NLMSG_ERROR:
+        {
+            struct nlmsgerr *e = NLMSG_DATA(nlh);
+            e->error = tswap32(e->error);
+            tswap_nlmsghdr(&e->msg);
+            tswap_nlmsghdr(nlh);
+            return 0;
+        }
+        default:
+            ret = host_to_target_nlmsg(nlh);
+            if (ret < 0) {
+                tswap_nlmsghdr(nlh);
+                return ret;
+            }
+            break;
+        }
+        tswap_nlmsghdr(nlh);
+        len -= NLMSG_ALIGN(nlmsg_len);
+        nlh = (struct nlmsghdr *)(((char*)nlh) + NLMSG_ALIGN(nlmsg_len));
+    }
+    return 0;
+}
+
+static abi_long target_to_host_for_each_nlmsg(struct nlmsghdr *nlh,
+                                              size_t len,
+                                              abi_long (*target_to_host_nlmsg)
+                                                       (struct nlmsghdr *))
+{
+    int ret;
+
+    while (len > sizeof(struct nlmsghdr)) {
+        if (tswap32(nlh->nlmsg_len) < sizeof(struct nlmsghdr) ||
+            tswap32(nlh->nlmsg_len) > len) {
+            break;
+        }
+        tswap_nlmsghdr(nlh);
+        switch (nlh->nlmsg_type) {
+        case NLMSG_DONE:
+            return 0;
+        case NLMSG_NOOP:
+            break;
+        case NLMSG_ERROR:
+        {
+            struct nlmsgerr *e = NLMSG_DATA(nlh);
+            e->error = tswap32(e->error);
+            tswap_nlmsghdr(&e->msg);
+        }
+        default:
+            ret = target_to_host_nlmsg(nlh);
+            if (ret < 0) {
+                return ret;
+            }
+        }
+        len -= NLMSG_ALIGN(nlh->nlmsg_len);
+        nlh = (struct nlmsghdr *)(((char *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len));
+    }
+    return 0;
+}
+
+static abi_long host_to_target_for_each_rtattr(struct rtattr *rtattr,
+                                               size_t len,
+                                               abi_long (*host_to_target_rtattr)
+                                                        (struct rtattr *))
+{
+    unsigned short rta_len;
+    abi_long ret;
+
+    while (len > sizeof(struct rtattr)) {
+        rta_len = rtattr->rta_len;
+        if (rta_len < sizeof(struct rtattr) ||
+            rta_len > len) {
+            break;
+        }
+        ret = host_to_target_rtattr(rtattr);
+        rtattr->rta_len = tswap16(rtattr->rta_len);
+        rtattr->rta_type = tswap16(rtattr->rta_type);
+        if (ret < 0) {
+            return ret;
+        }
+        len -= RTA_ALIGN(rta_len);
+        rtattr = (struct rtattr *)(((char *)rtattr) + RTA_ALIGN(rta_len));
+    }
+    return 0;
+}
+
+static abi_long host_to_target_data_link_rtattr(struct rtattr *rtattr)
+{
+    uint32_t *u32;
+    struct rtnl_link_stats *st;
+    struct rtnl_link_stats64 *st64;
+    struct rtnl_link_ifmap *map;
+
+    switch (rtattr->rta_type) {
+    /* binary stream */
+    case IFLA_ADDRESS:
+    case IFLA_BROADCAST:
+    /* string */
+    case IFLA_IFNAME:
+    case IFLA_QDISC:
+        break;
+    /* uin8_t */
+    case IFLA_OPERSTATE:
+    case IFLA_LINKMODE:
+    case IFLA_CARRIER:
+    case IFLA_PROTO_DOWN:
+        break;
+    /* uint32_t */
+    case IFLA_MTU:
+    case IFLA_LINK:
+    case IFLA_WEIGHT:
+    case IFLA_TXQLEN:
+    case IFLA_CARRIER_CHANGES:
+    case IFLA_NUM_RX_QUEUES:
+    case IFLA_NUM_TX_QUEUES:
+    case IFLA_PROMISCUITY:
+    case IFLA_EXT_MASK:
+    case IFLA_LINK_NETNSID:
+    case IFLA_GROUP:
+    case IFLA_MASTER:
+    case IFLA_NUM_VF:
+        u32 = RTA_DATA(rtattr);
+        *u32 = tswap32(*u32);
+        break;
+    /* struct rtnl_link_stats */
+    case IFLA_STATS:
+        st = RTA_DATA(rtattr);
+        st->rx_packets = tswap32(st->rx_packets);
+        st->tx_packets = tswap32(st->tx_packets);
+        st->rx_bytes = tswap32(st->rx_bytes);
+        st->tx_bytes = tswap32(st->tx_bytes);
+        st->rx_errors = tswap32(st->rx_errors);
+        st->tx_errors = tswap32(st->tx_errors);
+        st->rx_dropped = tswap32(st->rx_dropped);
+        st->tx_dropped = tswap32(st->tx_dropped);
+        st->multicast = tswap32(st->multicast);
+        st->collisions = tswap32(st->collisions);
+
+        /* detailed rx_errors: */
+        st->rx_length_errors = tswap32(st->rx_length_errors);
+        st->rx_over_errors = tswap32(st->rx_over_errors);
+        st->rx_crc_errors = tswap32(st->rx_crc_errors);
+        st->rx_frame_errors = tswap32(st->rx_frame_errors);
+        st->rx_fifo_errors = tswap32(st->rx_fifo_errors);
+        st->rx_missed_errors = tswap32(st->rx_missed_errors);
+
+        /* detailed tx_errors */
+        st->tx_aborted_errors = tswap32(st->tx_aborted_errors);
+        st->tx_carrier_errors = tswap32(st->tx_carrier_errors);
+        st->tx_fifo_errors = tswap32(st->tx_fifo_errors);
+        st->tx_heartbeat_errors = tswap32(st->tx_heartbeat_errors);
+        st->tx_window_errors = tswap32(st->tx_window_errors);
+
+        /* for cslip etc */
+        st->rx_compressed = tswap32(st->rx_compressed);
+        st->tx_compressed = tswap32(st->tx_compressed);
+        break;
+    /* struct rtnl_link_stats64 */
+    case IFLA_STATS64:
+        st64 = RTA_DATA(rtattr);
+        st64->rx_packets = tswap64(st64->rx_packets);
+        st64->tx_packets = tswap64(st64->tx_packets);
+        st64->rx_bytes = tswap64(st64->rx_bytes);
+        st64->tx_bytes = tswap64(st64->tx_bytes);
+        st64->rx_errors = tswap64(st64->rx_errors);
+        st64->tx_errors = tswap64(st64->tx_errors);
+        st64->rx_dropped = tswap64(st64->rx_dropped);
+        st64->tx_dropped = tswap64(st64->tx_dropped);
+        st64->multicast = tswap64(st64->multicast);
+        st64->collisions = tswap64(st64->collisions);
+
+        /* detailed rx_errors: */
+        st64->rx_length_errors = tswap64(st64->rx_length_errors);
+        st64->rx_over_errors = tswap64(st64->rx_over_errors);
+        st64->rx_crc_errors = tswap64(st64->rx_crc_errors);
+        st64->rx_frame_errors = tswap64(st64->rx_frame_errors);
+        st64->rx_fifo_errors = tswap64(st64->rx_fifo_errors);
+        st64->rx_missed_errors = tswap64(st64->rx_missed_errors);
+
+        /* detailed tx_errors */
+        st64->tx_aborted_errors = tswap64(st64->tx_aborted_errors);
+        st64->tx_carrier_errors = tswap64(st64->tx_carrier_errors);
+        st64->tx_fifo_errors = tswap64(st64->tx_fifo_errors);
+        st64->tx_heartbeat_errors = tswap64(st64->tx_heartbeat_errors);
+        st64->tx_window_errors = tswap64(st64->tx_window_errors);
+
+        /* for cslip etc */
+        st64->rx_compressed = tswap64(st64->rx_compressed);
+        st64->tx_compressed = tswap64(st64->tx_compressed);
+        break;
+    /* struct rtnl_link_ifmap */
+    case IFLA_MAP:
+        map = RTA_DATA(rtattr);
+        map->mem_start = tswap64(map->mem_start);
+        map->mem_end = tswap64(map->mem_end);
+        map->base_addr = tswap64(map->base_addr);
+        map->irq = tswap16(map->irq);
+        break;
+    /* nested */
+    case IFLA_AF_SPEC:
+    case IFLA_LINKINFO:
+        /* FIXME: implement nested type */
+        gemu_log("Unimplemented nested type %d\n", rtattr->rta_type);
+        break;
+    default:
+        gemu_log("Unknown host IFLA type: %d\n", rtattr->rta_type);
+        break;
+    }
+    return 0;
+}
+
+static abi_long host_to_target_data_addr_rtattr(struct rtattr *rtattr)
+{
+    uint32_t *u32;
+    struct ifa_cacheinfo *ci;
+
+    switch (rtattr->rta_type) {
+    /* binary: depends on family type */
+    case IFA_ADDRESS:
+    case IFA_LOCAL:
+        break;
+    /* string */
+    case IFA_LABEL:
+        break;
+    /* u32 */
+    case IFA_FLAGS:
+    case IFA_BROADCAST:
+        u32 = RTA_DATA(rtattr);
+        *u32 = tswap32(*u32);
+        break;
+    /* struct ifa_cacheinfo */
+    case IFA_CACHEINFO:
+        ci = RTA_DATA(rtattr);
+        ci->ifa_prefered = tswap32(ci->ifa_prefered);
+        ci->ifa_valid = tswap32(ci->ifa_valid);
+        ci->cstamp = tswap32(ci->cstamp);
+        ci->tstamp = tswap32(ci->tstamp);
+        break;
+    default:
+        gemu_log("Unknown host IFA type: %d\n", rtattr->rta_type);
+        break;
+    }
+    return 0;
+}
+
+static abi_long host_to_target_data_route_rtattr(struct rtattr *rtattr)
+{
+    uint32_t *u32;
+    switch (rtattr->rta_type) {
+    /* binary: depends on family type */
+    case RTA_GATEWAY:
+    case RTA_DST:
+    case RTA_PREFSRC:
+        break;
+    /* u32 */
+    case RTA_PRIORITY:
+    case RTA_TABLE:
+    case RTA_OIF:
+        u32 = RTA_DATA(rtattr);
+        *u32 = tswap32(*u32);
+        break;
+    default:
+        gemu_log("Unknown host RTA type: %d\n", rtattr->rta_type);
+        break;
+    }
+    return 0;
+}
+
+static abi_long host_to_target_link_rtattr(struct rtattr *rtattr,
+                                         uint32_t rtattr_len)
+{
+    return host_to_target_for_each_rtattr(rtattr, rtattr_len,
+                                          host_to_target_data_link_rtattr);
+}
+
+static abi_long host_to_target_addr_rtattr(struct rtattr *rtattr,
+                                         uint32_t rtattr_len)
+{
+    return host_to_target_for_each_rtattr(rtattr, rtattr_len,
+                                          host_to_target_data_addr_rtattr);
+}
+
+static abi_long host_to_target_route_rtattr(struct rtattr *rtattr,
+                                         uint32_t rtattr_len)
+{
+    return host_to_target_for_each_rtattr(rtattr, rtattr_len,
+                                          host_to_target_data_route_rtattr);
+}
+
+static abi_long host_to_target_data_route(struct nlmsghdr *nlh)
+{
+    uint32_t nlmsg_len;
+    struct ifinfomsg *ifi;
+    struct ifaddrmsg *ifa;
+    struct rtmsg *rtm;
+
+    nlmsg_len = nlh->nlmsg_len;
+    switch (nlh->nlmsg_type) {
+    case RTM_NEWLINK:
+    case RTM_DELLINK:
+    case RTM_GETLINK:
+        ifi = NLMSG_DATA(nlh);
+        ifi->ifi_type = tswap16(ifi->ifi_type);
+        ifi->ifi_index = tswap32(ifi->ifi_index);
+        ifi->ifi_flags = tswap32(ifi->ifi_flags);
+        ifi->ifi_change = tswap32(ifi->ifi_change);
+        host_to_target_link_rtattr(IFLA_RTA(ifi),
+                                   nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)));
+        break;
+    case RTM_NEWADDR:
+    case RTM_DELADDR:
+    case RTM_GETADDR:
+        ifa = NLMSG_DATA(nlh);
+        ifa->ifa_index = tswap32(ifa->ifa_index);
+        host_to_target_addr_rtattr(IFA_RTA(ifa),
+                                   nlmsg_len - NLMSG_LENGTH(sizeof(*ifa)));
+        break;
+    case RTM_NEWROUTE:
+    case RTM_DELROUTE:
+    case RTM_GETROUTE:
+        rtm = NLMSG_DATA(nlh);
+        rtm->rtm_flags = tswap32(rtm->rtm_flags);
+        host_to_target_route_rtattr(RTM_RTA(rtm),
+                                    nlmsg_len - NLMSG_LENGTH(sizeof(*rtm)));
+        break;
+    default:
+        return -TARGET_EINVAL;
+    }
+    return 0;
+}
+
+static inline abi_long host_to_target_nlmsg_route(struct nlmsghdr *nlh,
+                                                  size_t len)
+{
+    return host_to_target_for_each_nlmsg(nlh, len, host_to_target_data_route);
+}
+
+static abi_long target_to_host_for_each_rtattr(struct rtattr *rtattr,
+                                               size_t len,
+                                               abi_long (*target_to_host_rtattr)
+                                                        (struct rtattr *))
+{
+    abi_long ret;
+
+    while (len >= sizeof(struct rtattr)) {
+        if (tswap16(rtattr->rta_len) < sizeof(struct rtattr) ||
+            tswap16(rtattr->rta_len) > len) {
+            break;
+        }
+        rtattr->rta_len = tswap16(rtattr->rta_len);
+        rtattr->rta_type = tswap16(rtattr->rta_type);
+        ret = target_to_host_rtattr(rtattr);
+        if (ret < 0) {
+            return ret;
+        }
+        len -= RTA_ALIGN(rtattr->rta_len);
+        rtattr = (struct rtattr *)(((char *)rtattr) +
+                 RTA_ALIGN(rtattr->rta_len));
+    }
+    return 0;
+}
+
+static abi_long target_to_host_data_link_rtattr(struct rtattr *rtattr)
+{
+    switch (rtattr->rta_type) {
+    default:
+        gemu_log("Unknown target IFLA type: %d\n", rtattr->rta_type);
+        break;
+    }
+    return 0;
+}
+
+static abi_long target_to_host_data_addr_rtattr(struct rtattr *rtattr)
+{
+    switch (rtattr->rta_type) {
+    /* binary: depends on family type */
+    case IFA_LOCAL:
+    case IFA_ADDRESS:
+        break;
+    default:
+        gemu_log("Unknown target IFA type: %d\n", rtattr->rta_type);
+        break;
+    }
+    return 0;
+}
+
+static abi_long target_to_host_data_route_rtattr(struct rtattr *rtattr)
+{
+    uint32_t *u32;
+    switch (rtattr->rta_type) {
+    /* binary: depends on family type */
+    case RTA_DST:
+    case RTA_SRC:
+    case RTA_GATEWAY:
+        break;
+    /* u32 */
+    case RTA_OIF:
+        u32 = RTA_DATA(rtattr);
+        *u32 = tswap32(*u32);
+        break;
+    default:
+        gemu_log("Unknown target RTA type: %d\n", rtattr->rta_type);
+        break;
+    }
+    return 0;
+}
+
+static void target_to_host_link_rtattr(struct rtattr *rtattr,
+                                       uint32_t rtattr_len)
+{
+    target_to_host_for_each_rtattr(rtattr, rtattr_len,
+                                   target_to_host_data_link_rtattr);
+}
+
+static void target_to_host_addr_rtattr(struct rtattr *rtattr,
+                                     uint32_t rtattr_len)
+{
+    target_to_host_for_each_rtattr(rtattr, rtattr_len,
+                                   target_to_host_data_addr_rtattr);
+}
+
+static void target_to_host_route_rtattr(struct rtattr *rtattr,
+                                     uint32_t rtattr_len)
+{
+    target_to_host_for_each_rtattr(rtattr, rtattr_len,
+                                   target_to_host_data_route_rtattr);
+}
+
+static abi_long target_to_host_data_route(struct nlmsghdr *nlh)
+{
+    struct ifinfomsg *ifi;
+    struct ifaddrmsg *ifa;
+    struct rtmsg *rtm;
+
+    switch (nlh->nlmsg_type) {
+    case RTM_GETLINK:
+        break;
+    case RTM_NEWLINK:
+    case RTM_DELLINK:
+        ifi = NLMSG_DATA(nlh);
+        ifi->ifi_type = tswap16(ifi->ifi_type);
+        ifi->ifi_index = tswap32(ifi->ifi_index);
+        ifi->ifi_flags = tswap32(ifi->ifi_flags);
+        ifi->ifi_change = tswap32(ifi->ifi_change);
+        target_to_host_link_rtattr(IFLA_RTA(ifi), nlh->nlmsg_len -
+                                   NLMSG_LENGTH(sizeof(*ifi)));
+        break;
+    case RTM_GETADDR:
+    case RTM_NEWADDR:
+    case RTM_DELADDR:
+        ifa = NLMSG_DATA(nlh);
+        ifa->ifa_index = tswap32(ifa->ifa_index);
+        target_to_host_addr_rtattr(IFA_RTA(ifa), nlh->nlmsg_len -
+                                   NLMSG_LENGTH(sizeof(*ifa)));
+        break;
+    case RTM_GETROUTE:
+        break;
+    case RTM_NEWROUTE:
+    case RTM_DELROUTE:
+        rtm = NLMSG_DATA(nlh);
+        rtm->rtm_flags = tswap32(rtm->rtm_flags);
+        target_to_host_route_rtattr(RTM_RTA(rtm), nlh->nlmsg_len -
+                                    NLMSG_LENGTH(sizeof(*rtm)));
+        break;
+    default:
+        return -TARGET_EOPNOTSUPP;
+    }
+    return 0;
+}
+
+static abi_long target_to_host_nlmsg_route(struct nlmsghdr *nlh, size_t len)
+{
+    return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_route);
+}
+
 /* do_setsockopt() Must return target values and target errnos. */
 static abi_long do_setsockopt(int sockfd, int level, int optname,
                               abi_ulong optval_addr, socklen_t optlen)
@@ -2165,6 +2691,21 @@ static TargetFdTrans target_packet_trans = {
     .target_to_host_addr = packet_target_to_host_sockaddr,
 };
 
+static abi_long netlink_route_target_to_host(void *buf, size_t len)
+{
+    return target_to_host_nlmsg_route(buf, len);
+}
+
+static abi_long netlink_route_host_to_target(void *buf, size_t len)
+{
+    return host_to_target_nlmsg_route(buf, len);
+}
+
+static TargetFdTrans target_netlink_route_trans = {
+    .target_to_host_data = netlink_route_target_to_host,
+    .host_to_target_data = netlink_route_host_to_target,
+};
+
 /* do_socket() Must return target values and target errnos. */
 static abi_long do_socket(int domain, int type, int protocol)
 {
@@ -2176,8 +2717,10 @@ static abi_long do_socket(int domain, int type, int protocol)
         return ret;
     }
 
-    if (domain == PF_NETLINK)
-        return -TARGET_EAFNOSUPPORT;
+    if (domain == PF_NETLINK &&
+        protocol != NETLINK_ROUTE) {
+        return -EPFNOSUPPORT;
+    }
 
     if (domain == AF_PACKET ||
         (domain == AF_INET && type == SOCK_PACKET)) {
@@ -2192,6 +2735,14 @@ static abi_long do_socket(int domain, int type, int protocol)
              * if socket type is SOCK_PACKET, bind by name
              */
             fd_trans_register(ret, &target_packet_trans);
+        } else if (domain == PF_NETLINK) {
+            switch (protocol) {
+            case NETLINK_ROUTE:
+                fd_trans_register(ret, &target_netlink_route_trans);
+                break;
+            default:
+                g_assert_not_reached();
+            }
         }
     }
     return ret;
@@ -2276,14 +2827,25 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
     msg.msg_iov = vec;
 
     if (send) {
-        ret = target_to_host_cmsg(&msg, msgp);
-        if (ret == 0)
+        if (fd_trans_target_to_host_data(fd)) {
+            ret = fd_trans_target_to_host_data(fd)(msg.msg_iov->iov_base,
+                                                   msg.msg_iov->iov_len);
+        } else {
+            ret = target_to_host_cmsg(&msg, msgp);
+        }
+        if (ret == 0) {
             ret = get_errno(sendmsg(fd, &msg, flags));
+        }
     } else {
         ret = get_errno(recvmsg(fd, &msg, flags));
         if (!is_error(ret)) {
             len = ret;
-            ret = host_to_target_cmsg(msgp, &msg);
+            if (fd_trans_host_to_target_data(fd)) {
+                ret = fd_trans_host_to_target_data(fd)(msg.msg_iov->iov_base,
+                                                       msg.msg_iov->iov_len);
+            } else {
+                ret = host_to_target_cmsg(msgp, &msg);
+            }
             if (!is_error(ret)) {
                 msgp->msg_namelen = tswap32(msg.msg_namelen);
                 if (msg.msg_name != NULL) {
@@ -2510,6 +3072,13 @@ static abi_long do_sendto(int fd, abi_ulong msg, size_t len, int flags,
     host_msg = lock_user(VERIFY_READ, msg, len, 1);
     if (!host_msg)
         return -TARGET_EFAULT;
+    if (fd_trans_target_to_host_data(fd)) {
+        ret = fd_trans_target_to_host_data(fd)(host_msg, len);
+        if (ret < 0) {
+            unlock_user(host_msg, msg, 0);
+            return ret;
+        }
+    }
     if (target_addr) {
         addr = alloca(addrlen+1);
         ret = target_to_host_sockaddr(fd, addr, target_addr, addrlen);

From b265620bfbe300528247b31de54bacfd513109e8 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <laurent@vivier.eu>
Date: Sun, 22 May 2016 18:56:20 +0200
Subject: [PATCH 03/44] linux-user: support netlink protocol
 NETLINK_KOBJECT_UEVENT

This is the protocol used by udevd to manage kernel events.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 3e4895e845..584aeccd87 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -2718,7 +2718,8 @@ static abi_long do_socket(int domain, int type, int protocol)
     }
 
     if (domain == PF_NETLINK &&
-        protocol != NETLINK_ROUTE) {
+        !(protocol == NETLINK_ROUTE ||
+          protocol == NETLINK_KOBJECT_UEVENT)) {
         return -EPFNOSUPPORT;
     }
 
@@ -2740,6 +2741,9 @@ static abi_long do_socket(int domain, int type, int protocol)
             case NETLINK_ROUTE:
                 fd_trans_register(ret, &target_netlink_route_trans);
                 break;
+            case NETLINK_KOBJECT_UEVENT:
+                /* nothing to do: messages are strings */
+                break;
             default:
                 g_assert_not_reached();
             }

From 5ce9bb5937aa549efb0f93ee78a06ce8bded0d50 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <laurent@vivier.eu>
Date: Sun, 22 May 2016 18:56:21 +0200
Subject: [PATCH 04/44] linux-user: add netlink audit

This is, for instance, needed to log in a container.

Without this, the user cannot be identified and the console login
fails with "Login incorrect".

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 60 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 584aeccd87..933c2cd9a9 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -103,6 +103,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
 #include <linux/blkpg.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
+#include <linux/audit.h>
 #include "linux_loop.h"
 #include "uname.h"
 
@@ -2041,6 +2042,44 @@ static abi_long target_to_host_nlmsg_route(struct nlmsghdr *nlh, size_t len)
     return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_route);
 }
 
+static abi_long host_to_target_data_audit(struct nlmsghdr *nlh)
+{
+    switch (nlh->nlmsg_type) {
+    default:
+        gemu_log("Unknown host audit message type %d\n",
+                 nlh->nlmsg_type);
+        return -TARGET_EINVAL;
+    }
+    return 0;
+}
+
+static inline abi_long host_to_target_nlmsg_audit(struct nlmsghdr *nlh,
+                                                  size_t len)
+{
+    return host_to_target_for_each_nlmsg(nlh, len, host_to_target_data_audit);
+}
+
+static abi_long target_to_host_data_audit(struct nlmsghdr *nlh)
+{
+    switch (nlh->nlmsg_type) {
+    case AUDIT_USER:
+    case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
+    case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
+        break;
+    default:
+        gemu_log("Unknown target audit message type %d\n",
+                 nlh->nlmsg_type);
+        return -TARGET_EINVAL;
+    }
+
+    return 0;
+}
+
+static abi_long target_to_host_nlmsg_audit(struct nlmsghdr *nlh, size_t len)
+{
+    return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_audit);
+}
+
 /* do_setsockopt() Must return target values and target errnos. */
 static abi_long do_setsockopt(int sockfd, int level, int optname,
                               abi_ulong optval_addr, socklen_t optlen)
@@ -2706,6 +2745,21 @@ static TargetFdTrans target_netlink_route_trans = {
     .host_to_target_data = netlink_route_host_to_target,
 };
 
+static abi_long netlink_audit_target_to_host(void *buf, size_t len)
+{
+    return target_to_host_nlmsg_audit(buf, len);
+}
+
+static abi_long netlink_audit_host_to_target(void *buf, size_t len)
+{
+    return host_to_target_nlmsg_audit(buf, len);
+}
+
+static TargetFdTrans target_netlink_audit_trans = {
+    .target_to_host_data = netlink_audit_target_to_host,
+    .host_to_target_data = netlink_audit_host_to_target,
+};
+
 /* do_socket() Must return target values and target errnos. */
 static abi_long do_socket(int domain, int type, int protocol)
 {
@@ -2719,7 +2773,8 @@ static abi_long do_socket(int domain, int type, int protocol)
 
     if (domain == PF_NETLINK &&
         !(protocol == NETLINK_ROUTE ||
-          protocol == NETLINK_KOBJECT_UEVENT)) {
+          protocol == NETLINK_KOBJECT_UEVENT ||
+          protocol == NETLINK_AUDIT)) {
         return -EPFNOSUPPORT;
     }
 
@@ -2744,6 +2799,9 @@ static abi_long do_socket(int domain, int type, int protocol)
             case NETLINK_KOBJECT_UEVENT:
                 /* nothing to do: messages are strings */
                 break;
+            case NETLINK_AUDIT:
+                fd_trans_register(ret, &target_netlink_audit_trans);
+                break;
             default:
                 g_assert_not_reached();
             }

From 575b22b1b7a843f34f52ebc9c00fb1c967258912 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <laurent@vivier.eu>
Date: Thu, 2 Jun 2016 22:14:15 +0200
Subject: [PATCH 05/44] linux-user: check if NETLINK_ROUTE is available

Some IFLA_* symbols can be missing in the host linux/if_link.h,
but as they are enums and not "#defines", check in "configure" if
last known  (IFLA_PROTO_DOWN) is available and if not, disable
management of NETLINK_ROUTE protocol.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 configure            | 15 +++++++++++++++
 linux-user/syscall.c | 18 ++++++++++++++----
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/configure b/configure
index 6d01d968b8..d1dbc4b099 100755
--- a/configure
+++ b/configure
@@ -4526,6 +4526,17 @@ if compile_prog "" "" ; then
     have_fsxattr=yes
 fi
 
+have_rtnetlink=no
+cat > $TMPC << EOF
+#include <linux/rtnetlink.h>
+int main(void) {
+  return IFLA_PROTO_DOWN;
+}
+EOF
+if compile_prog "" "" ; then
+    have_rtnetlink=yes
+fi
+
 ##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
@@ -5461,6 +5472,10 @@ if test "$rdma" = "yes" ; then
   echo "CONFIG_RDMA=y" >> $config_host_mak
 fi
 
+if test "$have_rtnetlink" = "yes" ; then
+  echo "CONFIG_RTNETLINK=y" >> $config_host_mak
+fi
+
 # Hold two types of flag:
 #   CONFIG_THREAD_SETNAME_BYTHREAD  - we've got a way of setting the name on
 #                                     a thread we have a handle to
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 933c2cd9a9..9cc9a35cf5 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -102,7 +102,9 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
 #include <linux/filter.h>
 #include <linux/blkpg.h>
 #include <linux/netlink.h>
+#ifdef CONFIG_RTNETLINK
 #include <linux/rtnetlink.h>
+#endif
 #include <linux/audit.h>
 #include "linux_loop.h"
 #include "uname.h"
@@ -1627,6 +1629,7 @@ static abi_long target_to_host_for_each_nlmsg(struct nlmsghdr *nlh,
     return 0;
 }
 
+#ifdef CONFIG_RTNETLINK
 static abi_long host_to_target_for_each_rtattr(struct rtattr *rtattr,
                                                size_t len,
                                                abi_long (*host_to_target_rtattr)
@@ -2041,6 +2044,7 @@ static abi_long target_to_host_nlmsg_route(struct nlmsghdr *nlh, size_t len)
 {
     return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_route);
 }
+#endif /* CONFIG_RTNETLINK */
 
 static abi_long host_to_target_data_audit(struct nlmsghdr *nlh)
 {
@@ -2730,6 +2734,7 @@ static TargetFdTrans target_packet_trans = {
     .target_to_host_addr = packet_target_to_host_sockaddr,
 };
 
+#ifdef CONFIG_RTNETLINK
 static abi_long netlink_route_target_to_host(void *buf, size_t len)
 {
     return target_to_host_nlmsg_route(buf, len);
@@ -2744,6 +2749,7 @@ static TargetFdTrans target_netlink_route_trans = {
     .target_to_host_data = netlink_route_target_to_host,
     .host_to_target_data = netlink_route_host_to_target,
 };
+#endif /* CONFIG_RTNETLINK */
 
 static abi_long netlink_audit_target_to_host(void *buf, size_t len)
 {
@@ -2771,10 +2777,12 @@ static abi_long do_socket(int domain, int type, int protocol)
         return ret;
     }
 
-    if (domain == PF_NETLINK &&
-        !(protocol == NETLINK_ROUTE ||
-          protocol == NETLINK_KOBJECT_UEVENT ||
-          protocol == NETLINK_AUDIT)) {
+    if (domain == PF_NETLINK && !(
+#ifdef CONFIG_RTNETLINK
+         protocol == NETLINK_ROUTE ||
+#endif
+         protocol == NETLINK_KOBJECT_UEVENT ||
+         protocol == NETLINK_AUDIT)) {
         return -EPFNOSUPPORT;
     }
 
@@ -2793,9 +2801,11 @@ static abi_long do_socket(int domain, int type, int protocol)
             fd_trans_register(ret, &target_packet_trans);
         } else if (domain == PF_NETLINK) {
             switch (protocol) {
+#ifdef CONFIG_RTNETLINK
             case NETLINK_ROUTE:
                 fd_trans_register(ret, &target_netlink_route_trans);
                 break;
+#endif
             case NETLINK_KOBJECT_UEVENT:
                 /* nothing to do: messages are strings */
                 break;

From eb5525013ae4eaaa3038607aeb7bc1954fff8656 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 27 May 2016 15:51:43 +0100
Subject: [PATCH 06/44] linux-user: Factor out handle_signal code from
 process_pending_signals()

Factor out the code to handle a single signal from the
process_pending_signals() function. The use of goto for flow control
is OK currently, but would get significantly uglier if extended to
allow running the handle_signal code multiple times.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/signal.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 8090b4de1f..a9ac491af4 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -5765,33 +5765,40 @@ long do_rt_sigreturn(CPUArchState *env)
 
 #endif
 
+static void handle_pending_signal(CPUArchState *cpu_env, int sig);
+
 void process_pending_signals(CPUArchState *cpu_env)
 {
     CPUState *cpu = ENV_GET_CPU(cpu_env);
     int sig;
-    abi_ulong handler;
-    sigset_t set, old_set;
-    target_sigset_t target_old_set;
-    struct emulated_sigtable *k;
-    struct target_sigaction *sa;
-    struct sigqueue *q;
     TaskState *ts = cpu->opaque;
 
     if (!ts->signal_pending)
         return;
 
     /* FIXME: This is not threadsafe.  */
-    k = ts->sigtab;
     for(sig = 1; sig <= TARGET_NSIG; sig++) {
-        if (k->pending)
-            goto handle_signal;
-        k++;
+        if (ts->sigtab[sig - 1].pending) {
+            handle_pending_signal(cpu_env, sig);
+            return;
+        }
     }
     /* if no signal is pending, just return */
     ts->signal_pending = 0;
     return;
+}
+
+static void handle_pending_signal(CPUArchState *cpu_env, int sig)
+{
+    CPUState *cpu = ENV_GET_CPU(cpu_env);
+    abi_ulong handler;
+    sigset_t set, old_set;
+    target_sigset_t target_old_set;
+    struct target_sigaction *sa;
+    struct sigqueue *q;
+    TaskState *ts = cpu->opaque;
+    struct emulated_sigtable *k = &ts->sigtab[sig - 1];
 
- handle_signal:
     trace_user_handle_signal(cpu_env, sig);
     /* dequeue signal */
     q = k->first;

From e902d588dcaaff98a2832d3a61cba2f058f50dfc Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 27 May 2016 15:51:44 +0100
Subject: [PATCH 07/44] linux-user: Move handle_pending_signal() to avoid need
 for declaration

Move the handle_pending_signal() function above process_pending_signals()
to avoid the need for a forward declaration. (Whitespace only change.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/signal.c | 44 +++++++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index a9ac491af4..5f98c71e85 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -5765,29 +5765,6 @@ long do_rt_sigreturn(CPUArchState *env)
 
 #endif
 
-static void handle_pending_signal(CPUArchState *cpu_env, int sig);
-
-void process_pending_signals(CPUArchState *cpu_env)
-{
-    CPUState *cpu = ENV_GET_CPU(cpu_env);
-    int sig;
-    TaskState *ts = cpu->opaque;
-
-    if (!ts->signal_pending)
-        return;
-
-    /* FIXME: This is not threadsafe.  */
-    for(sig = 1; sig <= TARGET_NSIG; sig++) {
-        if (ts->sigtab[sig - 1].pending) {
-            handle_pending_signal(cpu_env, sig);
-            return;
-        }
-    }
-    /* if no signal is pending, just return */
-    ts->signal_pending = 0;
-    return;
-}
-
 static void handle_pending_signal(CPUArchState *cpu_env, int sig)
 {
     CPUState *cpu = ENV_GET_CPU(cpu_env);
@@ -5876,3 +5853,24 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig)
     if (q != &k->info)
         free_sigqueue(cpu_env, q);
 }
+
+void process_pending_signals(CPUArchState *cpu_env)
+{
+    CPUState *cpu = ENV_GET_CPU(cpu_env);
+    int sig;
+    TaskState *ts = cpu->opaque;
+
+    if (!ts->signal_pending)
+        return;
+
+    /* FIXME: This is not threadsafe.  */
+    for(sig = 1; sig <= TARGET_NSIG; sig++) {
+        if (ts->sigtab[sig - 1].pending) {
+            handle_pending_signal(cpu_env, sig);
+            return;
+        }
+    }
+    /* if no signal is pending, just return */
+    ts->signal_pending = 0;
+    return;
+}

From 7ec87e06c7371c6c574de8ce2cb6553a26c3f3de Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 27 May 2016 15:51:45 +0100
Subject: [PATCH 08/44] linux-user: Fix stray tab-indent

Fix a stray tab-indented linux in linux-user/signal.c.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/signal.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 5f98c71e85..5069c3f7c9 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -5847,8 +5847,9 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig)
         else
             setup_frame(sig, sa, &target_old_set, cpu_env);
 #endif
-	if (sa->sa_flags & TARGET_SA_RESETHAND)
+        if (sa->sa_flags & TARGET_SA_RESETHAND) {
             sa->_sa_handler = TARGET_SIG_DFL;
+        }
     }
     if (q != &k->info)
         free_sigqueue(cpu_env, q);

From 9eede5b69fbbed46f29c7c586cf9e067f56002a6 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 27 May 2016 15:51:46 +0100
Subject: [PATCH 09/44] linux-user: Factor out uses of do_sigprocmask() from
 sigreturn code

All the architecture specific handlers for sigreturn include calls
to do_sigprocmask(SIGSETMASK, &set, NULL) to set the signal mask
from the uc_sigmask in the context being restored. Factor these
out into calls to a set_sigmask() function. The next patch will
want to add code which is not run when setting the signal mask
via do_sigreturn, and this change allows us to separate the two
cases.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/signal.c | 55 ++++++++++++++++++++++++++-------------------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 5069c3f7c9..1b86a8550c 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -239,6 +239,15 @@ int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset)
     return ret;
 }
 
+#if !defined(TARGET_OPENRISC) && !defined(TARGET_UNICORE32) && \
+    !defined(TARGET_X86_64)
+/* Just set the guest's signal mask to the specified value */
+static void set_sigmask(const sigset_t *set)
+{
+    do_sigprocmask(SIG_SETMASK, set, NULL);
+}
+#endif
+
 /* siginfo conversion */
 
 static inline void host_to_target_siginfo_noswap(target_siginfo_t *tinfo,
@@ -1093,7 +1102,7 @@ long do_sigreturn(CPUX86State *env)
     }
 
     target_to_host_sigset_internal(&set, &target_set);
-    do_sigprocmask(SIG_SETMASK, &set, NULL);
+    set_sigmask(&set);
 
     /* restore registers */
     if (restore_sigcontext(env, &frame->sc))
@@ -1118,7 +1127,7 @@ long do_rt_sigreturn(CPUX86State *env)
     if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1))
         goto badframe;
     target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
-    do_sigprocmask(SIG_SETMASK, &set, NULL);
+    set_sigmask(&set);
 
     if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) {
         goto badframe;
@@ -1258,7 +1267,7 @@ static int target_restore_sigframe(CPUARMState *env,
     uint64_t pstate;
 
     target_to_host_sigset(&set, &sf->uc.tuc_sigmask);
-    do_sigprocmask(SIG_SETMASK, &set, NULL);
+    set_sigmask(&set);
 
     for (i = 0; i < 31; i++) {
         __get_user(env->xregs[i], &sf->uc.tuc_mcontext.regs[i]);
@@ -1900,7 +1909,7 @@ static long do_sigreturn_v1(CPUARMState *env)
     }
 
     target_to_host_sigset_internal(&host_set, &set);
-    do_sigprocmask(SIG_SETMASK, &host_set, NULL);
+    set_sigmask(&host_set);
 
     if (restore_sigcontext(env, &frame->sc)) {
         goto badframe;
@@ -1981,7 +1990,7 @@ static int do_sigframe_return_v2(CPUARMState *env, target_ulong frame_addr,
     abi_ulong *regspace;
 
     target_to_host_sigset(&host_set, &uc->tuc_sigmask);
-    do_sigprocmask(SIG_SETMASK, &host_set, NULL);
+    set_sigmask(&host_set);
 
     if (restore_sigcontext(env, &uc->tuc_mcontext))
         return 1;
@@ -2077,7 +2086,7 @@ static long do_rt_sigreturn_v1(CPUARMState *env)
     }
 
     target_to_host_sigset(&host_set, &frame->uc.tuc_sigmask);
-    do_sigprocmask(SIG_SETMASK, &host_set, NULL);
+    set_sigmask(&host_set);
 
     if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) {
         goto badframe;
@@ -2453,7 +2462,7 @@ long do_sigreturn(CPUSPARCState *env)
     }
 
     target_to_host_sigset_internal(&host_set, &set);
-    do_sigprocmask(SIG_SETMASK, &host_set, NULL);
+    set_sigmask(&host_set);
 
     if (err) {
         goto segv_and_exit;
@@ -2576,7 +2585,7 @@ void sparc64_set_context(CPUSPARCState *env)
             }
         }
         target_to_host_sigset_internal(&set, &target_set);
-        do_sigprocmask(SIG_SETMASK, &set, NULL);
+        set_sigmask(&set);
     }
     env->pc = pc;
     env->npc = npc;
@@ -2993,7 +3002,7 @@ long do_sigreturn(CPUMIPSState *regs)
     }
 
     target_to_host_sigset_internal(&blocked, &target_set);
-    do_sigprocmask(SIG_SETMASK, &blocked, NULL);
+    set_sigmask(&blocked);
 
     restore_sigcontext(regs, &frame->sf_sc);
 
@@ -3097,7 +3106,7 @@ long do_rt_sigreturn(CPUMIPSState *env)
     }
 
     target_to_host_sigset(&blocked, &frame->rs_uc.tuc_sigmask);
-    do_sigprocmask(SIG_SETMASK, &blocked, NULL);
+    set_sigmask(&blocked);
 
     restore_sigcontext(env, &frame->rs_uc.tuc_mcontext);
 
@@ -3371,7 +3380,7 @@ long do_sigreturn(CPUSH4State *regs)
         goto badframe;
 
     target_to_host_sigset_internal(&blocked, &target_set);
-    do_sigprocmask(SIG_SETMASK, &blocked, NULL);
+    set_sigmask(&blocked);
 
     restore_sigcontext(regs, &frame->sc);
 
@@ -3397,7 +3406,7 @@ long do_rt_sigreturn(CPUSH4State *regs)
     }
 
     target_to_host_sigset(&blocked, &frame->uc.tuc_sigmask);
-    do_sigprocmask(SIG_SETMASK, &blocked, NULL);
+    set_sigmask(&blocked);
 
     restore_sigcontext(regs, &frame->uc.tuc_mcontext);
 
@@ -3621,7 +3630,7 @@ long do_sigreturn(CPUMBState *env)
         __get_user(target_set.sig[i], &frame->extramask[i - 1]);
     }
     target_to_host_sigset_internal(&set, &target_set);
-    do_sigprocmask(SIG_SETMASK, &set, NULL);
+    set_sigmask(&set);
 
     restore_sigcontext(&frame->uc.tuc_mcontext, env);
     /* We got here through a sigreturn syscall, our path back is via an
@@ -3792,7 +3801,7 @@ long do_sigreturn(CPUCRISState *env)
         __get_user(target_set.sig[i], &frame->extramask[i - 1]);
     }
     target_to_host_sigset_internal(&set, &target_set);
-    do_sigprocmask(SIG_SETMASK, &set, NULL);
+    set_sigmask(&set);
 
     restore_sigcontext(&frame->sc, env);
     unlock_user_struct(frame, frame_addr, 0);
@@ -4284,7 +4293,7 @@ long do_sigreturn(CPUS390XState *env)
     __get_user(target_set.sig[0], &frame->sc.oldmask[0]);
 
     target_to_host_sigset_internal(&set, &target_set);
-    do_sigprocmask(SIG_SETMASK, &set, NULL); /* ~_BLOCKABLE? */
+    set_sigmask(&set); /* ~_BLOCKABLE? */
 
     if (restore_sigregs(env, &frame->sregs)) {
         goto badframe;
@@ -4310,7 +4319,7 @@ long do_rt_sigreturn(CPUS390XState *env)
     }
     target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
 
-    do_sigprocmask(SIG_SETMASK, &set, NULL); /* ~_BLOCKABLE? */
+    set_sigmask(&set); /* ~_BLOCKABLE? */
 
     if (restore_sigregs(env, &frame->uc.tuc_mcontext)) {
         goto badframe;
@@ -4872,7 +4881,7 @@ long do_sigreturn(CPUPPCState *env)
     __get_user(set.sig[1], &sc->_unused[3]);
 #endif
     target_to_host_sigset_internal(&blocked, &set);
-    do_sigprocmask(SIG_SETMASK, &blocked, NULL);
+    set_sigmask(&blocked);
 
     __get_user(sr_addr, &sc->regs);
     if (!lock_user_struct(VERIFY_READ, sr, sr_addr, 1))
@@ -4913,7 +4922,7 @@ static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
         return 1;
 
     target_to_host_sigset_internal(&blocked, &set);
-    do_sigprocmask(SIG_SETMASK, &blocked, NULL);
+    set_sigmask(&blocked);
     restore_user_regs(env, mcp, sig);
 
     unlock_user_struct(mcp, mcp_addr, 1);
@@ -5261,7 +5270,7 @@ long do_sigreturn(CPUM68KState *env)
     }
 
     target_to_host_sigset_internal(&set, &target_set);
-    do_sigprocmask(SIG_SETMASK, &set, NULL);
+    set_sigmask(&set);
 
     /* restore registers */
 
@@ -5287,7 +5296,7 @@ long do_rt_sigreturn(CPUM68KState *env)
         goto badframe;
 
     target_to_host_sigset_internal(&set, &target_set);
-    do_sigprocmask(SIG_SETMASK, &set, NULL);
+    set_sigmask(&set);
 
     /* restore registers */
 
@@ -5530,7 +5539,7 @@ long do_sigreturn(CPUAlphaState *env)
     __get_user(target_set.sig[0], &sc->sc_mask);
 
     target_to_host_sigset_internal(&set, &target_set);
-    do_sigprocmask(SIG_SETMASK, &set, NULL);
+    set_sigmask(&set);
 
     restore_sigcontext(env, sc);
     unlock_user_struct(sc, sc_addr, 0);
@@ -5551,7 +5560,7 @@ long do_rt_sigreturn(CPUAlphaState *env)
         goto badframe;
     }
     target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
-    do_sigprocmask(SIG_SETMASK, &set, NULL);
+    set_sigmask(&set);
 
     restore_sigcontext(env, &frame->uc.tuc_mcontext);
     if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe,
@@ -5718,7 +5727,7 @@ long do_rt_sigreturn(CPUTLGState *env)
         goto badframe;
     }
     target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
-    do_sigprocmask(SIG_SETMASK, &set, NULL);
+    set_sigmask(&set);
 
     restore_sigcontext(env, &frame->uc.tuc_mcontext);
     if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe,

From b28a1f333a5875e6f48725efd19c76fc3d27d8d1 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 27 May 2016 15:51:47 +0100
Subject: [PATCH 10/44] linux-user: Define macro for size of host kernel
 sigset_t

Some host syscalls take an argument specifying the size of a
host kernel's sigset_t (which isn't necessarily the same as
that of the host libc's type of that name). Instead of hardcoding
_NSIG / 8 where we do this, define and use a SIGSET_T_SIZE macro.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 9cc9a35cf5..d2749a6648 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -124,6 +124,10 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
 #define	VFAT_IOCTL_READDIR_BOTH		_IOR('r', 1, struct linux_dirent [2])
 #define	VFAT_IOCTL_READDIR_SHORT	_IOR('r', 2, struct linux_dirent [2])
 
+/* This is the size of the host kernel's sigset_t, needed where we make
+ * direct system calls that take a sigset_t pointer and a size.
+ */
+#define SIGSET_T_SIZE (_NSIG / 8)
 
 #undef _syscall0
 #undef _syscall1
@@ -7862,7 +7866,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             /* Extract the two packed args for the sigset */
             if (arg6) {
                 sig_ptr = &sig;
-                sig.size = _NSIG / 8;
+                sig.size = SIGSET_T_SIZE;
 
                 arg7 = lock_user(VERIFY_READ, arg6, sizeof(*arg7) * 2, 1);
                 if (!arg7) {
@@ -8916,7 +8920,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                     set = NULL;
                 }
 
-                ret = get_errno(sys_ppoll(pfd, nfds, timeout_ts, set, _NSIG/8));
+                ret = get_errno(sys_ppoll(pfd, nfds, timeout_ts,
+                                          set, SIGSET_T_SIZE));
 
                 if (!is_error(ret) && arg3) {
                     host_to_target_timespec(arg3, timeout_ts);

From 2fe4fba115b5b9f7e6722720c57810e0fc64b9b5 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 27 May 2016 15:51:48 +0100
Subject: [PATCH 11/44] linux-user: Use safe_syscall for sigsuspend syscalls

Use the safe_syscall wrapper for sigsuspend syscalls. This
means that we will definitely deliver a signal that arrives
before we do the sigsuspend call, rather than blocking first
and delivering afterwards.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index d2749a6648..5bdfe2a936 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -716,6 +716,7 @@ safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \
               fd_set *, exceptfds, struct timespec *, timeout, void *, sig)
 safe_syscall6(int,futex,int *,uaddr,int,op,int,val, \
               const struct timespec *,timeout,int *,uaddr2,int,val3)
+safe_syscall2(int, rt_sigsuspend, sigset_t *, newset, size_t, sigsetsize)
 
 static inline int host_to_target_sock_type(int host_type)
 {
@@ -7648,7 +7649,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             target_to_host_old_sigset(&set, p);
             unlock_user(p, arg1, 0);
 #endif
-            ret = get_errno(sigsuspend(&set));
+            ret = get_errno(safe_rt_sigsuspend(&set, SIGSET_T_SIZE));
         }
         break;
 #endif
@@ -7659,7 +7660,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                 goto efault;
             target_to_host_sigset(&set, p);
             unlock_user(p, arg1, 0);
-            ret = get_errno(sigsuspend(&set));
+            ret = get_errno(safe_rt_sigsuspend(&set, SIGSET_T_SIZE));
         }
         break;
     case TARGET_NR_rt_sigtimedwait:

From 3d3efba020da1de57a715e2087cf761ed0ad0904 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 27 May 2016 15:51:49 +0100
Subject: [PATCH 12/44] linux-user: Fix race between multiple signals

If multiple host signals are received in quick succession they would
be queued in TaskState then delivered to the guest in spite of
signals being supposed to be blocked by the guest signal handler's
sa_mask. Fix this by decoupling the guest signal mask from the
host signal mask, so we can have protected sections where all
host signals are blocked. In particular we block signals from
when host_signal_handler() queues a signal from the guest until
process_pending_signals() has unqueued it. We also block signals
while we are manipulating the guest signal mask in emulation of
sigprocmask and similar syscalls.

Blocking host signals also ensures the correct behaviour with respect
to multiple threads and the overrun count of timer related signals.
Alas blocking and queuing in qemu is still needed because of virtual
processor exceptions, SIGSEGV and SIGBUS.

Blocking signals inside process_pending_signals() protects against
concurrency problems that would otherwise happen if host_signal_handler()
ran and accessed the signal data structures while process_pending_signals()
was manipulating them.

Since we now track the guest signal mask separately from that
of the host, the sigsuspend system calls must track the signal
mask passed to them, because when we process signals as we leave
the sigsuspend the guest signal mask in force is that passed to
sigsuspend.

Signed-off-by: Timothy Edward Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Message-id: 1441497448-32489-19-git-send-email-T.E.Baldwin99@members.leeds.ac.uk
[PMM: make signal_pending a simple flag rather than a word with two flag bits;
 ensure we don't call block_signals() twice in sigreturn codepaths;
 document and assert() the guarantee that using do_sigprocmask() to
 get the current mask never fails;  use the qemu atomics.h functions
 rather than raw volatile variable access; add extra commentary and
 documentation; block SIGSEGV/SIGBUS in block_signals() and in
 process_pending_signals() because they can't occur synchronously here;
 check the right do_sigprocmask() call for errors in ssetmask syscall;
 expand commit message; fixed sigsuspend() hanging]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/qemu.h    |  50 +++++++++++++-
 linux-user/signal.c  | 161 ++++++++++++++++++++++++++++++-------------
 linux-user/syscall.c |  73 ++++++++++++++------
 3 files changed, 212 insertions(+), 72 deletions(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index f09b750bbf..5138289bfb 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -123,14 +123,33 @@ typedef struct TaskState {
 #endif
     uint32_t stack_base;
     int used; /* non zero if used */
-    bool sigsegv_blocked; /* SIGSEGV blocked by guest */
     struct image_info *info;
     struct linux_binprm *bprm;
 
     struct emulated_sigtable sigtab[TARGET_NSIG];
     struct sigqueue sigqueue_table[MAX_SIGQUEUE_SIZE]; /* siginfo queue */
     struct sigqueue *first_free; /* first free siginfo queue entry */
-    int signal_pending; /* non zero if a signal may be pending */
+    /* This thread's signal mask, as requested by the guest program.
+     * The actual signal mask of this thread may differ:
+     *  + we don't let SIGSEGV and SIGBUS be blocked while running guest code
+     *  + sometimes we block all signals to avoid races
+     */
+    sigset_t signal_mask;
+    /* The signal mask imposed by a guest sigsuspend syscall, if we are
+     * currently in the middle of such a syscall
+     */
+    sigset_t sigsuspend_mask;
+    /* Nonzero if we're leaving a sigsuspend and sigsuspend_mask is valid. */
+    int in_sigsuspend;
+
+    /* Nonzero if process_pending_signals() needs to do something (either
+     * handle a pending signal or unblock signals).
+     * This flag is written from a signal handler so should be accessed via
+     * the atomic_read() and atomic_write() functions. (It is not accessed
+     * from multiple threads.)
+     */
+    int signal_pending;
+
 } __attribute__((aligned(16))) TaskState;
 
 extern char *exec_path;
@@ -235,6 +254,12 @@ unsigned long init_guest_space(unsigned long host_start,
  * It's also OK to implement these with safe_syscall, though it will be
  * a little less efficient if a signal is delivered at the 'wrong' moment.
  *
+ * Some non-interruptible syscalls need to be handled using block_signals()
+ * to block signals for the duration of the syscall. This mainly applies
+ * to code which needs to modify the data structures used by the
+ * host_signal_handler() function and the functions it calls, including
+ * all syscalls which change the thread's signal mask.
+ *
  * (2) Interruptible syscalls
  *
  * These are guest syscalls that can be interrupted by signals and
@@ -266,6 +291,8 @@ unsigned long init_guest_space(unsigned long host_start,
  * you make in the implementation returns either -TARGET_ERESTARTSYS or
  * EINTR though.)
  *
+ * block_signals() cannot be used for interruptible syscalls.
+ *
  *
  * How and why the safe_syscall implementation works:
  *
@@ -352,6 +379,25 @@ long do_sigreturn(CPUArchState *env);
 long do_rt_sigreturn(CPUArchState *env);
 abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp);
 int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset);
+/**
+ * block_signals: block all signals while handling this guest syscall
+ *
+ * Block all signals, and arrange that the signal mask is returned to
+ * its correct value for the guest before we resume execution of guest code.
+ * If this function returns non-zero, then the caller should immediately
+ * return -TARGET_ERESTARTSYS to the main loop, which will take the pending
+ * signal and restart execution of the syscall.
+ * If block_signals() returns zero, then the caller can continue with
+ * emulation of the system call knowing that no signals can be taken
+ * (and therefore that no race conditions will result).
+ * This should only be called once, because if it is called a second time
+ * it will always return non-zero. (Think of it like a mutex that can't
+ * be recursively locked.)
+ * Signals will be unblocked again by process_pending_signals().
+ *
+ * Return value: non-zero if there was a pending signal, zero if not.
+ */
+int block_signals(void); /* Returns non zero if signal pending */
 
 #ifdef TARGET_I386
 /* vm86.c */
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 1b86a8550c..a89853d4eb 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -190,61 +190,81 @@ void target_to_host_old_sigset(sigset_t *sigset,
     target_to_host_sigset(sigset, &d);
 }
 
+int block_signals(void)
+{
+    TaskState *ts = (TaskState *)thread_cpu->opaque;
+    sigset_t set;
+    int pending;
+
+    /* It's OK to block everything including SIGSEGV, because we won't
+     * run any further guest code before unblocking signals in
+     * process_pending_signals().
+     */
+    sigfillset(&set);
+    sigprocmask(SIG_SETMASK, &set, 0);
+
+    pending = atomic_xchg(&ts->signal_pending, 1);
+
+    return pending;
+}
+
 /* Wrapper for sigprocmask function
  * Emulates a sigprocmask in a safe way for the guest. Note that set and oldset
- * are host signal set, not guest ones. This wraps the sigprocmask host calls
- * that should be protected (calls originated from guest)
+ * are host signal set, not guest ones. Returns -TARGET_ERESTARTSYS if
+ * a signal was already pending and the syscall must be restarted, or
+ * 0 on success.
+ * If set is NULL, this is guaranteed not to fail.
  */
 int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset)
 {
-    int ret;
-    sigset_t val;
-    sigset_t *temp = NULL;
-    CPUState *cpu = thread_cpu;
-    TaskState *ts = (TaskState *)cpu->opaque;
-    bool segv_was_blocked = ts->sigsegv_blocked;
+    TaskState *ts = (TaskState *)thread_cpu->opaque;
+
+    if (oldset) {
+        *oldset = ts->signal_mask;
+    }
 
     if (set) {
-        bool has_sigsegv = sigismember(set, SIGSEGV);
-        val = *set;
-        temp = &val;
+        int i;
 
-        sigdelset(temp, SIGSEGV);
+        if (block_signals()) {
+            return -TARGET_ERESTARTSYS;
+        }
 
         switch (how) {
         case SIG_BLOCK:
-            if (has_sigsegv) {
-                ts->sigsegv_blocked = true;
-            }
+            sigorset(&ts->signal_mask, &ts->signal_mask, set);
             break;
         case SIG_UNBLOCK:
-            if (has_sigsegv) {
-                ts->sigsegv_blocked = false;
+            for (i = 1; i <= NSIG; ++i) {
+                if (sigismember(set, i)) {
+                    sigdelset(&ts->signal_mask, i);
+                }
             }
             break;
         case SIG_SETMASK:
-            ts->sigsegv_blocked = has_sigsegv;
+            ts->signal_mask = *set;
             break;
         default:
             g_assert_not_reached();
         }
+
+        /* Silently ignore attempts to change blocking status of KILL or STOP */
+        sigdelset(&ts->signal_mask, SIGKILL);
+        sigdelset(&ts->signal_mask, SIGSTOP);
     }
-
-    ret = sigprocmask(how, temp, oldset);
-
-    if (oldset && segv_was_blocked) {
-        sigaddset(oldset, SIGSEGV);
-    }
-
-    return ret;
+    return 0;
 }
 
 #if !defined(TARGET_OPENRISC) && !defined(TARGET_UNICORE32) && \
     !defined(TARGET_X86_64)
-/* Just set the guest's signal mask to the specified value */
+/* Just set the guest's signal mask to the specified value; the
+ * caller is assumed to have called block_signals() already.
+ */
 static void set_sigmask(const sigset_t *set)
 {
-    do_sigprocmask(SIG_SETMASK, set, NULL);
+    TaskState *ts = (TaskState *)thread_cpu->opaque;
+
+    ts->signal_mask = *set;
 }
 #endif
 
@@ -376,6 +396,7 @@ static int core_dump_signal(int sig)
 
 void signal_init(void)
 {
+    TaskState *ts = (TaskState *)thread_cpu->opaque;
     struct sigaction act;
     struct sigaction oact;
     int i, j;
@@ -391,6 +412,9 @@ void signal_init(void)
         target_to_host_signal_table[j] = i;
     }
 
+    /* Set the signal mask from the host mask. */
+    sigprocmask(0, 0, &ts->signal_mask);
+
     /* set all host signal handlers. ALL signals are blocked during
        the handlers to serialize them. */
     memset(sigact_table, 0, sizeof(sigact_table));
@@ -509,7 +533,7 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info)
     queue = gdb_queuesig ();
     handler = sigact_table[sig - 1]._sa_handler;
 
-    if (ts->sigsegv_blocked && sig == TARGET_SIGSEGV) {
+    if (sig == TARGET_SIGSEGV && sigismember(&ts->signal_mask, SIGSEGV)) {
         /* Guest has blocked SIGSEGV but we got one anyway. Assume this
          * is a forced SIGSEGV (ie one the kernel handles via force_sig_info
          * because it got a real MMU fault). A blocked SIGSEGV in that
@@ -565,7 +589,7 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info)
         q->next = NULL;
         k->pending = 1;
         /* signal that a new signal is pending */
-        ts->signal_pending = 1;
+        atomic_set(&ts->signal_pending, 1);
         return 1; /* indicates that the signal was queued */
     }
 }
@@ -583,6 +607,7 @@ static void host_signal_handler(int host_signum, siginfo_t *info,
     CPUArchState *env = thread_cpu->env_ptr;
     int sig;
     target_siginfo_t tinfo;
+    ucontext_t *uc = puc;
 
     /* the CPU emulator uses some host signals to detect exceptions,
        we forward to it some signals */
@@ -602,6 +627,16 @@ static void host_signal_handler(int host_signum, siginfo_t *info,
 
     host_to_target_siginfo_noswap(&tinfo, info);
     if (queue_signal(env, sig, &tinfo) == 1) {
+        /* Block host signals until target signal handler entered. We
+         * can't block SIGSEGV or SIGBUS while we're executing guest
+         * code in case the guest code provokes one in the window between
+         * now and it getting out to the main loop. Signals will be
+         * unblocked again in process_pending_signals().
+         */
+        sigfillset(&uc->uc_sigmask);
+        sigdelset(&uc->uc_sigmask, SIGSEGV);
+        sigdelset(&uc->uc_sigmask, SIGBUS);
+
         /* interrupt the virtual CPU as soon as possible */
         cpu_exit(thread_cpu);
     }
@@ -2673,9 +2708,13 @@ void sparc64_get_context(CPUSPARCState *env)
     env->pc = env->npc;
     env->npc += 4;
 
-    err = 0;
-
-    do_sigprocmask(0, NULL, &set);
+    /* If we're only reading the signal mask then do_sigprocmask()
+     * is guaranteed not to fail, which is important because we don't
+     * have any way to signal a failure or restart this operation since
+     * this is not a normal syscall.
+     */
+    err = do_sigprocmask(0, NULL, &set);
+    assert(err == 0);
     host_to_target_sigset_internal(&target_set, &set);
     if (TARGET_NSIG_WORDS == 1) {
         __put_user(target_set.sig[0],
@@ -5778,7 +5817,7 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig)
 {
     CPUState *cpu = ENV_GET_CPU(cpu_env);
     abi_ulong handler;
-    sigset_t set, old_set;
+    sigset_t set;
     target_sigset_t target_old_set;
     struct target_sigaction *sa;
     struct sigqueue *q;
@@ -5801,7 +5840,7 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig)
         handler = sa->_sa_handler;
     }
 
-    if (ts->sigsegv_blocked && sig == TARGET_SIGSEGV) {
+    if (sig == TARGET_SIGSEGV && sigismember(&ts->signal_mask, SIGSEGV)) {
         /* Guest has blocked SIGSEGV but we got one anyway. Assume this
          * is a forced SIGSEGV (ie one the kernel handles via force_sig_info
          * because it got a real MMU fault), and treat as if default handler.
@@ -5825,17 +5864,23 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig)
         force_sig(sig);
     } else {
         /* compute the blocked signals during the handler execution */
+        sigset_t *blocked_set;
+
         target_to_host_sigset(&set, &sa->sa_mask);
         /* SA_NODEFER indicates that the current signal should not be
            blocked during the handler */
         if (!(sa->sa_flags & TARGET_SA_NODEFER))
             sigaddset(&set, target_to_host_signal(sig));
 
-        /* block signals in the handler using Linux */
-        do_sigprocmask(SIG_BLOCK, &set, &old_set);
         /* save the previous blocked signal state to restore it at the
            end of the signal execution (see do_sigreturn) */
-        host_to_target_sigset_internal(&target_old_set, &old_set);
+        host_to_target_sigset_internal(&target_old_set, &ts->signal_mask);
+
+        /* block signals in the handler */
+        blocked_set = ts->in_sigsuspend ?
+            &ts->sigsuspend_mask : &ts->signal_mask;
+        sigorset(&ts->signal_mask, blocked_set, &set);
+        ts->in_sigsuspend = 0;
 
         /* if the CPU is in VM86 mode, we restore the 32 bit values */
 #if defined(TARGET_I386) && !defined(TARGET_X86_64)
@@ -5869,18 +5914,38 @@ void process_pending_signals(CPUArchState *cpu_env)
     CPUState *cpu = ENV_GET_CPU(cpu_env);
     int sig;
     TaskState *ts = cpu->opaque;
+    sigset_t set;
+    sigset_t *blocked_set;
 
-    if (!ts->signal_pending)
-        return;
+    while (atomic_read(&ts->signal_pending)) {
+        /* FIXME: This is not threadsafe.  */
+        sigfillset(&set);
+        sigprocmask(SIG_SETMASK, &set, 0);
 
-    /* FIXME: This is not threadsafe.  */
-    for(sig = 1; sig <= TARGET_NSIG; sig++) {
-        if (ts->sigtab[sig - 1].pending) {
-            handle_pending_signal(cpu_env, sig);
-            return;
+        for (sig = 1; sig <= TARGET_NSIG; sig++) {
+            blocked_set = ts->in_sigsuspend ?
+                &ts->sigsuspend_mask : &ts->signal_mask;
+
+            if (ts->sigtab[sig - 1].pending &&
+                (!sigismember(blocked_set,
+                              target_to_host_signal_table[sig])
+                 || sig == TARGET_SIGSEGV)) {
+                handle_pending_signal(cpu_env, sig);
+                /* Restart scan from the beginning */
+                sig = 1;
+            }
         }
+
+        /* if no signal is pending, unblock signals and recheck (the act
+         * of unblocking might cause us to take another host signal which
+         * will set signal_pending again).
+         */
+        atomic_set(&ts->signal_pending, 0);
+        ts->in_sigsuspend = 0;
+        set = ts->signal_mask;
+        sigdelset(&set, SIGSEGV);
+        sigdelset(&set, SIGBUS);
+        sigprocmask(SIG_SETMASK, &set, 0);
     }
-    /* if no signal is pending, just return */
-    ts->signal_pending = 0;
-    return;
+    ts->in_sigsuspend = 0;
 }
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 5bdfe2a936..639b328c74 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -5387,6 +5387,7 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         new_cpu->opaque = ts;
         ts->bprm = parent_ts->bprm;
         ts->info = parent_ts->info;
+        ts->signal_mask = parent_ts->signal_mask;
         nptl_flags = flags;
         flags &= ~CLONE_NPTL_FLAGS2;
 
@@ -7482,9 +7483,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         {
             sigset_t cur_set;
             abi_ulong target_set;
-            do_sigprocmask(0, NULL, &cur_set);
-            host_to_target_old_sigset(&target_set, &cur_set);
-            ret = target_set;
+            ret = do_sigprocmask(0, NULL, &cur_set);
+            if (!ret) {
+                host_to_target_old_sigset(&target_set, &cur_set);
+                ret = target_set;
+            }
         }
         break;
 #endif
@@ -7493,12 +7496,20 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         {
             sigset_t set, oset, cur_set;
             abi_ulong target_set = arg1;
-            do_sigprocmask(0, NULL, &cur_set);
+            /* We only have one word of the new mask so we must read
+             * the rest of it with do_sigprocmask() and OR in this word.
+             * We are guaranteed that a do_sigprocmask() that only queries
+             * the signal mask will not fail.
+             */
+            ret = do_sigprocmask(0, NULL, &cur_set);
+            assert(!ret);
             target_to_host_old_sigset(&set, &target_set);
             sigorset(&set, &set, &cur_set);
-            do_sigprocmask(SIG_SETMASK, &set, &oset);
-            host_to_target_old_sigset(&target_set, &oset);
-            ret = target_set;
+            ret = do_sigprocmask(SIG_SETMASK, &set, &oset);
+            if (!ret) {
+                host_to_target_old_sigset(&target_set, &oset);
+                ret = target_set;
+            }
         }
         break;
 #endif
@@ -7527,7 +7538,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             mask = arg2;
             target_to_host_old_sigset(&set, &mask);
 
-            ret = get_errno(do_sigprocmask(how, &set, &oldset));
+            ret = do_sigprocmask(how, &set, &oldset);
             if (!is_error(ret)) {
                 host_to_target_old_sigset(&mask, &oldset);
                 ret = mask;
@@ -7561,7 +7572,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                 how = 0;
                 set_ptr = NULL;
             }
-            ret = get_errno(do_sigprocmask(how, set_ptr, &oldset));
+            ret = do_sigprocmask(how, set_ptr, &oldset);
             if (!is_error(ret) && arg3) {
                 if (!(p = lock_user(VERIFY_WRITE, arg3, sizeof(target_sigset_t), 0)))
                     goto efault;
@@ -7601,7 +7612,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                 how = 0;
                 set_ptr = NULL;
             }
-            ret = get_errno(do_sigprocmask(how, set_ptr, &oldset));
+            ret = do_sigprocmask(how, set_ptr, &oldset);
             if (!is_error(ret) && arg3) {
                 if (!(p = lock_user(VERIFY_WRITE, arg3, sizeof(target_sigset_t), 0)))
                     goto efault;
@@ -7639,28 +7650,36 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #ifdef TARGET_NR_sigsuspend
     case TARGET_NR_sigsuspend:
         {
-            sigset_t set;
+            TaskState *ts = cpu->opaque;
 #if defined(TARGET_ALPHA)
             abi_ulong mask = arg1;
-            target_to_host_old_sigset(&set, &mask);
+            target_to_host_old_sigset(&ts->sigsuspend_mask, &mask);
 #else
             if (!(p = lock_user(VERIFY_READ, arg1, sizeof(target_sigset_t), 1)))
                 goto efault;
-            target_to_host_old_sigset(&set, p);
+            target_to_host_old_sigset(&ts->sigsuspend_mask, p);
             unlock_user(p, arg1, 0);
 #endif
-            ret = get_errno(safe_rt_sigsuspend(&set, SIGSET_T_SIZE));
+            ret = get_errno(safe_rt_sigsuspend(&ts->sigsuspend_mask,
+                                               SIGSET_T_SIZE));
+            if (ret != -TARGET_ERESTARTSYS) {
+                ts->in_sigsuspend = 1;
+            }
         }
         break;
 #endif
     case TARGET_NR_rt_sigsuspend:
         {
-            sigset_t set;
+            TaskState *ts = cpu->opaque;
             if (!(p = lock_user(VERIFY_READ, arg1, sizeof(target_sigset_t), 1)))
                 goto efault;
-            target_to_host_sigset(&set, p);
+            target_to_host_sigset(&ts->sigsuspend_mask, p);
             unlock_user(p, arg1, 0);
-            ret = get_errno(safe_rt_sigsuspend(&set, SIGSET_T_SIZE));
+            ret = get_errno(safe_rt_sigsuspend(&ts->sigsuspend_mask,
+                                               SIGSET_T_SIZE));
+            if (ret != -TARGET_ERESTARTSYS) {
+                ts->in_sigsuspend = 1;
+            }
         }
         break;
     case TARGET_NR_rt_sigtimedwait:
@@ -7706,11 +7725,19 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         break;
 #ifdef TARGET_NR_sigreturn
     case TARGET_NR_sigreturn:
-        ret = do_sigreturn(cpu_env);
+        if (block_signals()) {
+            ret = -TARGET_ERESTARTSYS;
+        } else {
+            ret = do_sigreturn(cpu_env);
+        }
         break;
 #endif
     case TARGET_NR_rt_sigreturn:
-        ret = do_rt_sigreturn(cpu_env);
+        if (block_signals()) {
+            ret = -TARGET_ERESTARTSYS;
+        } else {
+            ret = do_rt_sigreturn(cpu_env);
+        }
         break;
     case TARGET_NR_sethostname:
         if (!(p = lock_user_string(arg1)))
@@ -9764,9 +9791,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             }
             mask = arg2;
             target_to_host_old_sigset(&set, &mask);
-            do_sigprocmask(how, &set, &oldset);
-            host_to_target_old_sigset(&mask, &oldset);
-            ret = mask;
+            ret = do_sigprocmask(how, &set, &oldset);
+            if (!ret) {
+                host_to_target_old_sigset(&mask, &oldset);
+                ret = mask;
+            }
         }
         break;
 #endif

From c19c1578f8a9b894f5e368e35139620a98bf6a69 Mon Sep 17 00:00:00 2001
From: Timothy E Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Date: Fri, 27 May 2016 15:51:50 +0100
Subject: [PATCH 13/44] linux-user: Remove redundant default action check in
 queue_signal()

Both queue_signal() and process_pending_signals() did check for default
actions of signals, this is redundant and also causes fatal and stopping
signals to incorrectly cause guest system calls to be interrupted.

The code in queue_signal() is removed.

Signed-off-by: Timothy Edward Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Message-id: 1441497448-32489-21-git-send-email-T.E.Baldwin99@members.leeds.ac.uk
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/signal.c | 37 -------------------------------------
 1 file changed, 37 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index a89853d4eb..2c6790d872 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -525,46 +525,10 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info)
     TaskState *ts = cpu->opaque;
     struct emulated_sigtable *k;
     struct sigqueue *q, **pq;
-    abi_ulong handler;
-    int queue;
 
     trace_user_queue_signal(env, sig);
     k = &ts->sigtab[sig - 1];
-    queue = gdb_queuesig ();
-    handler = sigact_table[sig - 1]._sa_handler;
 
-    if (sig == TARGET_SIGSEGV && sigismember(&ts->signal_mask, SIGSEGV)) {
-        /* Guest has blocked SIGSEGV but we got one anyway. Assume this
-         * is a forced SIGSEGV (ie one the kernel handles via force_sig_info
-         * because it got a real MMU fault). A blocked SIGSEGV in that
-         * situation is treated as if using the default handler. This is
-         * not correct if some other process has randomly sent us a SIGSEGV
-         * via kill(), but that is not easy to distinguish at this point,
-         * so we assume it doesn't happen.
-         */
-        handler = TARGET_SIG_DFL;
-    }
-
-    if (!queue && handler == TARGET_SIG_DFL) {
-        if (sig == TARGET_SIGTSTP || sig == TARGET_SIGTTIN || sig == TARGET_SIGTTOU) {
-            kill(getpid(),SIGSTOP);
-            return 0;
-        } else
-        /* default handler : ignore some signal. The other are fatal */
-        if (sig != TARGET_SIGCHLD &&
-            sig != TARGET_SIGURG &&
-            sig != TARGET_SIGWINCH &&
-            sig != TARGET_SIGCONT) {
-            force_sig(sig);
-        } else {
-            return 0; /* indicate ignored */
-        }
-    } else if (!queue && handler == TARGET_SIG_IGN) {
-        /* ignore signal */
-        return 0;
-    } else if (!queue && handler == TARGET_SIG_ERR) {
-        force_sig(sig);
-    } else {
         pq = &k->first;
         if (sig < TARGET_SIGRTMIN) {
             /* if non real time signal, we queue exactly one signal */
@@ -591,7 +555,6 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info)
         /* signal that a new signal is pending */
         atomic_set(&ts->signal_pending, 1);
         return 1; /* indicates that the signal was queued */
-    }
 }
 
 #ifndef HAVE_SAFE_SYSCALL

From 8fdb9fef3d63b5e245a496e4999ebb599b9b9496 Mon Sep 17 00:00:00 2001
From: Timothy E Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Date: Fri, 27 May 2016 15:51:51 +0100
Subject: [PATCH 14/44] linux-user: Remove redundant gdb_queuesig()

Signed-off-by: Timothy Edward Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Message-id: 1441497448-32489-22-git-send-email-T.E.Baldwin99@members.leeds.ac.uk
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 gdbstub.c              | 13 -------------
 include/exec/gdbstub.h |  1 -
 2 files changed, 14 deletions(-)

diff --git a/gdbstub.c b/gdbstub.c
index 8155eedf9c..b3101bf865 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1493,19 +1493,6 @@ void gdb_exit(CPUArchState *env, int code)
 }
 
 #ifdef CONFIG_USER_ONLY
-int
-gdb_queuesig (void)
-{
-    GDBState *s;
-
-    s = gdbserver_state;
-
-    if (gdbserver_fd < 0 || s->fd < 0)
-        return 0;
-    else
-        return 1;
-}
-
 int
 gdb_handlesig(CPUState *cpu, int sig)
 {
diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index 8e3f8d8176..f9708bbcd6 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -48,7 +48,6 @@ int use_gdb_syscalls(void);
 void gdb_set_stop_cpu(CPUState *cpu);
 void gdb_exit(CPUArchState *, int);
 #ifdef CONFIG_USER_ONLY
-int gdb_queuesig (void);
 int gdb_handlesig(CPUState *, int);
 void gdb_signalled(CPUArchState *, int);
 void gdbserver_fork(CPUState *);

From 907f5fddaa673ac3f6dc955df6eac2870e3603f4 Mon Sep 17 00:00:00 2001
From: Timothy E Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Date: Fri, 27 May 2016 15:51:52 +0100
Subject: [PATCH 15/44] linux-user: Remove real-time signal queuing

As host signals are now blocked whenever guest signals are blocked, the
queue of realtime signals is now in Linux. The QEMU queue is now
redundant and can be removed. (We already did not queue non-RT signals, and
none of the calls to queue_signal() except the one in host_signal_handler()
pass an RT signal number.)

Signed-off-by: Timothy Edward Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Message-id: 1441497448-32489-23-git-send-email-T.E.Baldwin99@members.leeds.ac.uk
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: minor commit message tweak]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/main.c   |  7 -----
 linux-user/qemu.h   | 11 +------
 linux-user/signal.c | 70 +++++++++------------------------------------
 3 files changed, 14 insertions(+), 74 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index b2bc6ab2f7..b6da0ba1e2 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -3794,14 +3794,7 @@ void stop_all_tasks(void)
 /* Assumes contents are already zeroed.  */
 void init_task_state(TaskState *ts)
 {
-    int i;
- 
     ts->used = 1;
-    ts->first_free = ts->sigqueue_table;
-    for (i = 0; i < MAX_SIGQUEUE_SIZE - 1; i++) {
-        ts->sigqueue_table[i].next = &ts->sigqueue_table[i + 1];
-    }
-    ts->sigqueue_table[i].next = NULL;
 }
 
 CPUArchState *cpu_copy(CPUArchState *env)
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 5138289bfb..b201f9042c 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -78,16 +78,9 @@ struct vm86_saved_state {
 
 #define MAX_SIGQUEUE_SIZE 1024
 
-struct sigqueue {
-    struct sigqueue *next;
-    target_siginfo_t info;
-};
-
 struct emulated_sigtable {
     int pending; /* true if signal is pending */
-    struct sigqueue *first;
-    struct sigqueue info; /* in order to always have memory for the
-                             first signal, we put it here */
+    target_siginfo_t info;
 };
 
 /* NOTE: we force a big alignment so that the stack stored after is
@@ -127,8 +120,6 @@ typedef struct TaskState {
     struct linux_binprm *bprm;
 
     struct emulated_sigtable sigtab[TARGET_NSIG];
-    struct sigqueue sigqueue_table[MAX_SIGQUEUE_SIZE]; /* siginfo queue */
-    struct sigqueue *first_free; /* first free siginfo queue entry */
     /* This thread's signal mask, as requested by the guest program.
      * The actual signal mask of this thread may differ:
      *  + we don't let SIGSEGV and SIGBUS be blocked while running guest code
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 2c6790d872..5db1c0b87b 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -441,27 +441,6 @@ void signal_init(void)
     }
 }
 
-/* signal queue handling */
-
-static inline struct sigqueue *alloc_sigqueue(CPUArchState *env)
-{
-    CPUState *cpu = ENV_GET_CPU(env);
-    TaskState *ts = cpu->opaque;
-    struct sigqueue *q = ts->first_free;
-    if (!q)
-        return NULL;
-    ts->first_free = q->next;
-    return q;
-}
-
-static inline void free_sigqueue(CPUArchState *env, struct sigqueue *q)
-{
-    CPUState *cpu = ENV_GET_CPU(env);
-    TaskState *ts = cpu->opaque;
-
-    q->next = ts->first_free;
-    ts->first_free = q;
-}
 
 /* abort execution with signal */
 static void QEMU_NORETURN force_sig(int target_sig)
@@ -524,37 +503,20 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info)
     CPUState *cpu = ENV_GET_CPU(env);
     TaskState *ts = cpu->opaque;
     struct emulated_sigtable *k;
-    struct sigqueue *q, **pq;
 
     trace_user_queue_signal(env, sig);
     k = &ts->sigtab[sig - 1];
 
-        pq = &k->first;
-        if (sig < TARGET_SIGRTMIN) {
-            /* if non real time signal, we queue exactly one signal */
-            if (!k->pending)
-                q = &k->info;
-            else
-                return 0;
-        } else {
-            if (!k->pending) {
-                /* first signal */
-                q = &k->info;
-            } else {
-                q = alloc_sigqueue(env);
-                if (!q)
-                    return -EAGAIN;
-                while (*pq != NULL)
-                    pq = &(*pq)->next;
-            }
-        }
-        *pq = q;
-        q->info = *info;
-        q->next = NULL;
-        k->pending = 1;
-        /* signal that a new signal is pending */
-        atomic_set(&ts->signal_pending, 1);
-        return 1; /* indicates that the signal was queued */
+    /* we queue exactly one signal */
+    if (k->pending) {
+        return 0;
+    }
+
+    k->info = *info;
+    k->pending = 1;
+    /* signal that a new signal is pending */
+    atomic_set(&ts->signal_pending, 1);
+    return 1; /* indicates that the signal was queued */
 }
 
 #ifndef HAVE_SAFE_SYSCALL
@@ -5783,16 +5745,12 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig)
     sigset_t set;
     target_sigset_t target_old_set;
     struct target_sigaction *sa;
-    struct sigqueue *q;
     TaskState *ts = cpu->opaque;
     struct emulated_sigtable *k = &ts->sigtab[sig - 1];
 
     trace_user_handle_signal(cpu_env, sig);
     /* dequeue signal */
-    q = k->first;
-    k->first = q->next;
-    if (!k->first)
-        k->pending = 0;
+    k->pending = 0;
 
     sig = gdb_handlesig(cpu, sig);
     if (!sig) {
@@ -5857,10 +5815,10 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig)
 #if defined(TARGET_ABI_MIPSN32) || defined(TARGET_ABI_MIPSN64) \
     || defined(TARGET_OPENRISC) || defined(TARGET_TILEGX)
         /* These targets do not have traditional signals.  */
-        setup_rt_frame(sig, sa, &q->info, &target_old_set, cpu_env);
+        setup_rt_frame(sig, sa, &k->info, &target_old_set, cpu_env);
 #else
         if (sa->sa_flags & TARGET_SA_SIGINFO)
-            setup_rt_frame(sig, sa, &q->info, &target_old_set, cpu_env);
+            setup_rt_frame(sig, sa, &k->info, &target_old_set, cpu_env);
         else
             setup_frame(sig, sa, &target_old_set, cpu_env);
 #endif
@@ -5868,8 +5826,6 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig)
             sa->_sa_handler = TARGET_SIG_DFL;
         }
     }
-    if (q != &k->info)
-        free_sigqueue(cpu_env, q);
 }
 
 void process_pending_signals(CPUArchState *cpu_env)

From 655ed67c2a248cf0a887229d8492d6ddc0518545 Mon Sep 17 00:00:00 2001
From: Timothy E Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Date: Fri, 27 May 2016 15:51:53 +0100
Subject: [PATCH 16/44] linux-user: Queue synchronous signals separately

If a synchronous signal and an asynchronous signal arrive near simultaneously,
and the signal number of the asynchronous signal is lower than that of the
synchronous signal the the handler for the asynchronous would be called first,
and then the handler for the synchronous signal would be called within or
after the first handler with an incorrect context.

This is fixed by queuing synchronous signals separately. Note that this does
risk delaying a asynchronous signal until the synchronous signal handler
returns rather than handling the signal on another thread, but this seems
unlikely to cause problems for real guest programs and is unavoidable unless
we could guarantee to roll back and reexecute whatever guest instruction
caused the synchronous signal (which would be a bit odd if we've already
logged its execution, for instance, and would require careful analysis of
all guest CPUs to check it was possible in all cases).

Signed-off-by: Timothy Edward Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Message-id: 1441497448-32489-24-git-send-email-T.E.Baldwin99@members.leeds.ac.uk
[PMM: added a comment]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/qemu.h   |  1 +
 linux-user/signal.c | 74 +++++++++++++++++++++++++--------------------
 2 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index b201f9042c..6bd7b3223a 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -119,6 +119,7 @@ typedef struct TaskState {
     struct image_info *info;
     struct linux_binprm *bprm;
 
+    struct emulated_sigtable sync_signal;
     struct emulated_sigtable sigtab[TARGET_NSIG];
     /* This thread's signal mask, as requested by the guest program.
      * The actual signal mask of this thread may differ:
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 5db1c0b87b..f48902882d 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -502,18 +502,11 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info)
 {
     CPUState *cpu = ENV_GET_CPU(env);
     TaskState *ts = cpu->opaque;
-    struct emulated_sigtable *k;
 
     trace_user_queue_signal(env, sig);
-    k = &ts->sigtab[sig - 1];
 
-    /* we queue exactly one signal */
-    if (k->pending) {
-        return 0;
-    }
-
-    k->info = *info;
-    k->pending = 1;
+    ts->sync_signal.info = *info;
+    ts->sync_signal.pending = sig;
     /* signal that a new signal is pending */
     atomic_set(&ts->signal_pending, 1);
     return 1; /* indicates that the signal was queued */
@@ -530,9 +523,13 @@ static void host_signal_handler(int host_signum, siginfo_t *info,
                                 void *puc)
 {
     CPUArchState *env = thread_cpu->env_ptr;
+    CPUState *cpu = ENV_GET_CPU(env);
+    TaskState *ts = cpu->opaque;
+
     int sig;
     target_siginfo_t tinfo;
     ucontext_t *uc = puc;
+    struct emulated_sigtable *k;
 
     /* the CPU emulator uses some host signals to detect exceptions,
        we forward to it some signals */
@@ -551,20 +548,23 @@ static void host_signal_handler(int host_signum, siginfo_t *info,
     rewind_if_in_safe_syscall(puc);
 
     host_to_target_siginfo_noswap(&tinfo, info);
-    if (queue_signal(env, sig, &tinfo) == 1) {
-        /* Block host signals until target signal handler entered. We
-         * can't block SIGSEGV or SIGBUS while we're executing guest
-         * code in case the guest code provokes one in the window between
-         * now and it getting out to the main loop. Signals will be
-         * unblocked again in process_pending_signals().
-         */
-        sigfillset(&uc->uc_sigmask);
-        sigdelset(&uc->uc_sigmask, SIGSEGV);
-        sigdelset(&uc->uc_sigmask, SIGBUS);
+    k = &ts->sigtab[sig - 1];
+    k->info = tinfo;
+    k->pending = sig;
+    ts->signal_pending = 1;
 
-        /* interrupt the virtual CPU as soon as possible */
-        cpu_exit(thread_cpu);
-    }
+    /* Block host signals until target signal handler entered. We
+     * can't block SIGSEGV or SIGBUS while we're executing guest
+     * code in case the guest code provokes one in the window between
+     * now and it getting out to the main loop. Signals will be
+     * unblocked again in process_pending_signals().
+     */
+    sigfillset(&uc->uc_sigmask);
+    sigdelset(&uc->uc_sigmask, SIGSEGV);
+    sigdelset(&uc->uc_sigmask, SIGBUS);
+
+    /* interrupt the virtual CPU as soon as possible */
+    cpu_exit(thread_cpu);
 }
 
 /* do_sigaltstack() returns target values and errnos. */
@@ -5761,14 +5761,6 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig)
         handler = sa->_sa_handler;
     }
 
-    if (sig == TARGET_SIGSEGV && sigismember(&ts->signal_mask, SIGSEGV)) {
-        /* Guest has blocked SIGSEGV but we got one anyway. Assume this
-         * is a forced SIGSEGV (ie one the kernel handles via force_sig_info
-         * because it got a real MMU fault), and treat as if default handler.
-         */
-        handler = TARGET_SIG_DFL;
-    }
-
     if (handler == TARGET_SIG_DFL) {
         /* default handler : ignore some signal. The other are job control or fatal */
         if (sig == TARGET_SIGTSTP || sig == TARGET_SIGTTIN || sig == TARGET_SIGTTOU) {
@@ -5841,14 +5833,32 @@ void process_pending_signals(CPUArchState *cpu_env)
         sigfillset(&set);
         sigprocmask(SIG_SETMASK, &set, 0);
 
+        sig = ts->sync_signal.pending;
+        if (sig) {
+            /* Synchronous signals are forced,
+             * see force_sig_info() and callers in Linux
+             * Note that not all of our queue_signal() calls in QEMU correspond
+             * to force_sig_info() calls in Linux (some are send_sig_info()).
+             * However it seems like a kernel bug to me to allow the process
+             * to block a synchronous signal since it could then just end up
+             * looping round and round indefinitely.
+             */
+            if (sigismember(&ts->signal_mask, target_to_host_signal_table[sig])
+                || sigact_table[sig - 1]._sa_handler == TARGET_SIG_IGN) {
+                sigdelset(&ts->signal_mask, target_to_host_signal_table[sig]);
+                sigact_table[sig - 1]._sa_handler = TARGET_SIG_DFL;
+            }
+
+            handle_pending_signal(cpu_env, sig);
+        }
+
         for (sig = 1; sig <= TARGET_NSIG; sig++) {
             blocked_set = ts->in_sigsuspend ?
                 &ts->sigsuspend_mask : &ts->signal_mask;
 
             if (ts->sigtab[sig - 1].pending &&
                 (!sigismember(blocked_set,
-                              target_to_host_signal_table[sig])
-                 || sig == TARGET_SIGSEGV)) {
+                              target_to_host_signal_table[sig]))) {
                 handle_pending_signal(cpu_env, sig);
                 /* Restart scan from the beginning */
                 sig = 1;

From ef6a778ea2af4ebcf08a84cc9314cfe7cf2a2299 Mon Sep 17 00:00:00 2001
From: Timothy E Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Date: Fri, 27 May 2016 15:51:54 +0100
Subject: [PATCH 17/44] linux-user: Block signals during sigaction() handling

Block signals while emulating sigaction. This is a non-interruptible
syscall, and using block_signals() avoids races where the host
signal handler is invoked and tries to examine the signal handler
data structures while we are updating them.

Signed-off-by: Timothy Edward Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Message-id: 1441497448-32489-29-git-send-email-T.E.Baldwin99@members.leeds.ac.uk
[PMM: expanded commit message]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/signal.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index f48902882d..b21d6bf625 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -640,7 +640,7 @@ out:
     return ret;
 }
 
-/* do_sigaction() return host values and errnos */
+/* do_sigaction() return target values and host errnos */
 int do_sigaction(int sig, const struct target_sigaction *act,
                  struct target_sigaction *oact)
 {
@@ -649,8 +649,14 @@ int do_sigaction(int sig, const struct target_sigaction *act,
     int host_sig;
     int ret = 0;
 
-    if (sig < 1 || sig > TARGET_NSIG || sig == TARGET_SIGKILL || sig == TARGET_SIGSTOP)
-        return -EINVAL;
+    if (sig < 1 || sig > TARGET_NSIG || sig == TARGET_SIGKILL || sig == TARGET_SIGSTOP) {
+        return -TARGET_EINVAL;
+    }
+
+    if (block_signals()) {
+        return -TARGET_ERESTARTSYS;
+    }
+
     k = &sigact_table[sig - 1];
     if (oact) {
         __put_user(k->_sa_handler, &oact->_sa_handler);

From f59ec606104ade2443179231fc7a3cb98683ac85 Mon Sep 17 00:00:00 2001
From: Timothy E Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Date: Fri, 27 May 2016 15:51:55 +0100
Subject: [PATCH 18/44] linux-user: pause() should not pause if signal pending

Fix races between signal handling and the pause syscall by
reimplementing it using block_signals() and sigsuspend().
(Using safe_syscall(pause) would also work, except that the
pause syscall doesn't exist on all architectures.)

Signed-off-by: Timothy Edward Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Message-id: 1441497448-32489-28-git-send-email-T.E.Baldwin99@members.leeds.ac.uk
[PMM: tweaked commit message]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 639b328c74..aa5517c849 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -7059,7 +7059,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_pause /* not on alpha */
     case TARGET_NR_pause:
-        ret = get_errno(pause());
+        if (!block_signals()) {
+            sigsuspend(&((TaskState *)cpu->opaque)->signal_mask);
+        }
+        ret = -TARGET_EINTR;
         break;
 #endif
 #ifdef TARGET_NR_utime

From a0995886e23323258d0612c71f8b0416a02806f2 Mon Sep 17 00:00:00 2001
From: Timothy E Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Date: Fri, 27 May 2016 15:51:56 +0100
Subject: [PATCH 19/44] linux-user: Restart exit() if signal pending

Without this a signal could vanish on thread exit.

Signed-off-by: Timothy Edward Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Message-id: 1441497448-32489-26-git-send-email-T.E.Baldwin99@members.leeds.ac.uk
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index aa5517c849..f3061a9567 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -6640,8 +6640,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
            However in threaded applictions it is used for thread termination,
            and _exit_group is used for application termination.
            Do thread termination if we have more then one thread.  */
-        /* FIXME: This probably breaks if a signal arrives.  We should probably
-           be disabling signals.  */
+
+        if (block_signals()) {
+            ret = -TARGET_ERESTARTSYS;
+            break;
+        }
+
         if (CPU_NEXT(first_cpu)) {
             TaskState *ts;
 

From bef653d92e4e1c47c60f5c020ff6de69dba83478 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 27 May 2016 15:51:57 +0100
Subject: [PATCH 20/44] linux-user: Use safe_syscall for kill, tkill and tgkill
 syscalls

Use the safe_syscall wrapper for the kill, tkill and tgkill syscalls.
Without this, if a thread sent a SIGKILL to itself it could kill the
thread before we had a chance to process a signal that arrived just
before the SIGKILL, and that signal would get lost.

We drop all the ifdeffery for tkill and tgkill, because every guest
architecture we support implements them, and they've been in Linux
since 2003 so we can assume the host headers define the __NR_tkill
and __NR_tgkill constants.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index f3061a9567..c0d086c2fb 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -191,8 +191,6 @@ static type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5,	\
 #define __NR_sys_getpriority __NR_getpriority
 #define __NR_sys_rt_sigqueueinfo __NR_rt_sigqueueinfo
 #define __NR_sys_syslog __NR_syslog
-#define __NR_sys_tgkill __NR_tgkill
-#define __NR_sys_tkill __NR_tkill
 #define __NR_sys_futex __NR_futex
 #define __NR_sys_inotify_init __NR_inotify_init
 #define __NR_sys_inotify_add_watch __NR_inotify_add_watch
@@ -230,12 +228,6 @@ _syscall5(int, _llseek,  uint,  fd, ulong, hi, ulong, lo,
 #endif
 _syscall3(int,sys_rt_sigqueueinfo,int,pid,int,sig,siginfo_t *,uinfo)
 _syscall3(int,sys_syslog,int,type,char*,bufp,int,len)
-#if defined(TARGET_NR_tgkill) && defined(__NR_tgkill)
-_syscall3(int,sys_tgkill,int,tgid,int,pid,int,sig)
-#endif
-#if defined(TARGET_NR_tkill) && defined(__NR_tkill)
-_syscall2(int,sys_tkill,int,tid,int,sig)
-#endif
 #ifdef __NR_exit_group
 _syscall1(int,exit_group,int,error_code)
 #endif
@@ -717,6 +709,9 @@ safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \
 safe_syscall6(int,futex,int *,uaddr,int,op,int,val, \
               const struct timespec *,timeout,int *,uaddr2,int,val3)
 safe_syscall2(int, rt_sigsuspend, sigset_t *, newset, size_t, sigsetsize)
+safe_syscall2(int, kill, pid_t, pid, int, sig)
+safe_syscall2(int, tkill, int, tid, int, sig)
+safe_syscall3(int, tgkill, int, tgid, int, pid, int, sig)
 
 static inline int host_to_target_sock_type(int host_type)
 {
@@ -7169,7 +7164,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         ret = 0;
         break;
     case TARGET_NR_kill:
-        ret = get_errno(kill(arg1, target_to_host_signal(arg2)));
+        ret = get_errno(safe_kill(arg1, target_to_host_signal(arg2)));
         break;
 #ifdef TARGET_NR_rename
     case TARGET_NR_rename:
@@ -10405,18 +10400,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         break;
 #endif
 
-#if defined(TARGET_NR_tkill) && defined(__NR_tkill)
     case TARGET_NR_tkill:
-        ret = get_errno(sys_tkill((int)arg1, target_to_host_signal(arg2)));
+        ret = get_errno(safe_tkill((int)arg1, target_to_host_signal(arg2)));
         break;
-#endif
 
-#if defined(TARGET_NR_tgkill) && defined(__NR_tgkill)
     case TARGET_NR_tgkill:
-	ret = get_errno(sys_tgkill((int)arg1, (int)arg2,
+        ret = get_errno(safe_tgkill((int)arg1, (int)arg2,
                         target_to_host_signal(arg3)));
-	break;
-#endif
+        break;
 
 #ifdef TARGET_NR_set_robust_list
     case TARGET_NR_set_robust_list:

From 7d92d34ee4c7988f5ef6c8a5ed23d2c3e0837253 Mon Sep 17 00:00:00 2001
From: Timothy E Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Date: Fri, 27 May 2016 15:51:58 +0100
Subject: [PATCH 21/44] linux-user: Restart fork() if signals pending

If there is a signal pending during fork() the signal handler will
erroneously be called in both the parent and child, so handle any
pending signals first.

Signed-off-by: Timothy Edward Baldwin <T.E.Baldwin99@members.leeds.ac.uk>
Message-id: 1441497448-32489-20-git-send-email-T.E.Baldwin99@members.leeds.ac.uk
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index c0d086c2fb..a2d591ed8a 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -5437,6 +5437,11 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) {
             return -TARGET_EINVAL;
         }
+
+        if (block_signals()) {
+            return -TARGET_ERESTARTSYS;
+        }
+
         fork_start();
         ret = fork();
         if (ret == 0) {

From a70dadc7f1a3e96a7179c6c3a6ccd1a0ea65760a Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 27 May 2016 15:51:59 +0100
Subject: [PATCH 22/44] linux-user: Use both si_code and si_signo when
 converting siginfo_t

The siginfo_t struct includes a union. The correct way to identify
which fields of the union are relevant is complicated, because we
have to use a combination of the si_code and si_signo to figure out
which of the union's members are valid.  (Within the host kernel it
is always possible to tell, but the kernel carefully avoids giving
userspace the high 16 bits of si_code, so we don't have the
information to do this the easy way...) We therefore make our best
guess, bearing in mind that a guest can spoof most of the si_codes
via rt_sigqueueinfo() if it likes.  Once we have made our guess, we
record it in the top 16 bits of the si_code, so that tswap_siginfo()
later can use it.  tswap_siginfo() then strips these top bits out
before writing si_code to the guest (sign-extending the lower bits).

This fixes a bug where fields were sometimes wrong; in particular
the LTP kill10 test went into an infinite loop because its signal
handler got a si_pid value of 0 rather than the pid of the sending
process.

As part of this change, we switch to using __put_user() in the
tswap_siginfo code which writes out the byteswapped values to
the target memory, in case the target memory pointer is not
sufficiently aligned for the host CPU's requirements.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/signal.c       | 163 +++++++++++++++++++++++++++-----------
 linux-user/syscall_defs.h |  15 ++++
 2 files changed, 130 insertions(+), 48 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index b21d6bf625..8ea0cbf0e6 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -17,6 +17,7 @@
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
+#include "qemu/bitops.h"
 #include <sys/ucontext.h>
 #include <sys/resource.h>
 
@@ -274,70 +275,129 @@ static inline void host_to_target_siginfo_noswap(target_siginfo_t *tinfo,
                                                  const siginfo_t *info)
 {
     int sig = host_to_target_signal(info->si_signo);
+    int si_code = info->si_code;
+    int si_type;
     tinfo->si_signo = sig;
     tinfo->si_errno = 0;
     tinfo->si_code = info->si_code;
 
-    if (sig == TARGET_SIGILL || sig == TARGET_SIGFPE || sig == TARGET_SIGSEGV
-            || sig == TARGET_SIGBUS || sig == TARGET_SIGTRAP) {
-        /* Should never come here, but who knows. The information for
-           the target is irrelevant.  */
-        tinfo->_sifields._sigfault._addr = 0;
-    } else if (sig == TARGET_SIGIO) {
-        tinfo->_sifields._sigpoll._band = info->si_band;
-        tinfo->_sifields._sigpoll._fd = info->si_fd;
-    } else if (sig == TARGET_SIGCHLD) {
-        tinfo->_sifields._sigchld._pid = info->si_pid;
-        tinfo->_sifields._sigchld._uid = info->si_uid;
-        tinfo->_sifields._sigchld._status
+    /* This is awkward, because we have to use a combination of
+     * the si_code and si_signo to figure out which of the union's
+     * members are valid. (Within the host kernel it is always possible
+     * to tell, but the kernel carefully avoids giving userspace the
+     * high 16 bits of si_code, so we don't have the information to
+     * do this the easy way...) We therefore make our best guess,
+     * bearing in mind that a guest can spoof most of the si_codes
+     * via rt_sigqueueinfo() if it likes.
+     *
+     * Once we have made our guess, we record it in the top 16 bits of
+     * the si_code, so that tswap_siginfo() later can use it.
+     * tswap_siginfo() will strip these top bits out before writing
+     * si_code to the guest (sign-extending the lower bits).
+     */
+
+    switch (si_code) {
+    case SI_USER:
+    case SI_TKILL:
+    case SI_KERNEL:
+        /* Sent via kill(), tkill() or tgkill(), or direct from the kernel.
+         * These are the only unspoofable si_code values.
+         */
+        tinfo->_sifields._kill._pid = info->si_pid;
+        tinfo->_sifields._kill._uid = info->si_uid;
+        si_type = QEMU_SI_KILL;
+        break;
+    default:
+        /* Everything else is spoofable. Make best guess based on signal */
+        switch (sig) {
+        case TARGET_SIGCHLD:
+            tinfo->_sifields._sigchld._pid = info->si_pid;
+            tinfo->_sifields._sigchld._uid = info->si_uid;
+            tinfo->_sifields._sigchld._status
                 = host_to_target_waitstatus(info->si_status);
-        tinfo->_sifields._sigchld._utime = info->si_utime;
-        tinfo->_sifields._sigchld._stime = info->si_stime;
-    } else if (sig >= TARGET_SIGRTMIN) {
-        tinfo->_sifields._rt._pid = info->si_pid;
-        tinfo->_sifields._rt._uid = info->si_uid;
-        /* XXX: potential problem if 64 bit */
-        tinfo->_sifields._rt._sigval.sival_ptr
+            tinfo->_sifields._sigchld._utime = info->si_utime;
+            tinfo->_sifields._sigchld._stime = info->si_stime;
+            si_type = QEMU_SI_CHLD;
+            break;
+        case TARGET_SIGIO:
+            tinfo->_sifields._sigpoll._band = info->si_band;
+            tinfo->_sifields._sigpoll._fd = info->si_fd;
+            si_type = QEMU_SI_POLL;
+            break;
+        default:
+            /* Assume a sigqueue()/mq_notify()/rt_sigqueueinfo() source. */
+            tinfo->_sifields._rt._pid = info->si_pid;
+            tinfo->_sifields._rt._uid = info->si_uid;
+            /* XXX: potential problem if 64 bit */
+            tinfo->_sifields._rt._sigval.sival_ptr
                 = (abi_ulong)(unsigned long)info->si_value.sival_ptr;
+            si_type = QEMU_SI_RT;
+            break;
+        }
+        break;
     }
+
+    tinfo->si_code = deposit32(si_code, 16, 16, si_type);
 }
 
 static void tswap_siginfo(target_siginfo_t *tinfo,
                           const target_siginfo_t *info)
 {
-    int sig = info->si_signo;
-    tinfo->si_signo = tswap32(sig);
-    tinfo->si_errno = tswap32(info->si_errno);
-    tinfo->si_code = tswap32(info->si_code);
+    int si_type = extract32(info->si_code, 16, 16);
+    int si_code = sextract32(info->si_code, 0, 16);
 
-    if (sig == TARGET_SIGILL || sig == TARGET_SIGFPE || sig == TARGET_SIGSEGV
-        || sig == TARGET_SIGBUS || sig == TARGET_SIGTRAP) {
-        tinfo->_sifields._sigfault._addr
-            = tswapal(info->_sifields._sigfault._addr);
-    } else if (sig == TARGET_SIGIO) {
-        tinfo->_sifields._sigpoll._band
-            = tswap32(info->_sifields._sigpoll._band);
-        tinfo->_sifields._sigpoll._fd = tswap32(info->_sifields._sigpoll._fd);
-    } else if (sig == TARGET_SIGCHLD) {
-        tinfo->_sifields._sigchld._pid
-            = tswap32(info->_sifields._sigchld._pid);
-        tinfo->_sifields._sigchld._uid
-            = tswap32(info->_sifields._sigchld._uid);
-        tinfo->_sifields._sigchld._status
-            = tswap32(info->_sifields._sigchld._status);
-        tinfo->_sifields._sigchld._utime
-            = tswapal(info->_sifields._sigchld._utime);
-        tinfo->_sifields._sigchld._stime
-            = tswapal(info->_sifields._sigchld._stime);
-    } else if (sig >= TARGET_SIGRTMIN) {
-        tinfo->_sifields._rt._pid = tswap32(info->_sifields._rt._pid);
-        tinfo->_sifields._rt._uid = tswap32(info->_sifields._rt._uid);
-        tinfo->_sifields._rt._sigval.sival_ptr
-            = tswapal(info->_sifields._rt._sigval.sival_ptr);
+    __put_user(info->si_signo, &tinfo->si_signo);
+    __put_user(info->si_errno, &tinfo->si_errno);
+    __put_user(si_code, &tinfo->si_code);
+
+    /* We can use our internal marker of which fields in the structure
+     * are valid, rather than duplicating the guesswork of
+     * host_to_target_siginfo_noswap() here.
+     */
+    switch (si_type) {
+    case QEMU_SI_KILL:
+        __put_user(info->_sifields._kill._pid, &tinfo->_sifields._kill._pid);
+        __put_user(info->_sifields._kill._uid, &tinfo->_sifields._kill._uid);
+        break;
+    case QEMU_SI_TIMER:
+        __put_user(info->_sifields._timer._timer1,
+                   &tinfo->_sifields._timer._timer1);
+        __put_user(info->_sifields._timer._timer2,
+                   &tinfo->_sifields._timer._timer2);
+        break;
+    case QEMU_SI_POLL:
+        __put_user(info->_sifields._sigpoll._band,
+                   &tinfo->_sifields._sigpoll._band);
+        __put_user(info->_sifields._sigpoll._fd,
+                   &tinfo->_sifields._sigpoll._fd);
+        break;
+    case QEMU_SI_FAULT:
+        __put_user(info->_sifields._sigfault._addr,
+                   &tinfo->_sifields._sigfault._addr);
+        break;
+    case QEMU_SI_CHLD:
+        __put_user(info->_sifields._sigchld._pid,
+                   &tinfo->_sifields._sigchld._pid);
+        __put_user(info->_sifields._sigchld._uid,
+                   &tinfo->_sifields._sigchld._uid);
+        __put_user(info->_sifields._sigchld._status,
+                   &tinfo->_sifields._sigchld._status);
+        __put_user(info->_sifields._sigchld._utime,
+                   &tinfo->_sifields._sigchld._utime);
+        __put_user(info->_sifields._sigchld._stime,
+                   &tinfo->_sifields._sigchld._stime);
+        break;
+    case QEMU_SI_RT:
+        __put_user(info->_sifields._rt._pid, &tinfo->_sifields._rt._pid);
+        __put_user(info->_sifields._rt._uid, &tinfo->_sifields._rt._uid);
+        __put_user(info->_sifields._rt._sigval.sival_ptr,
+                   &tinfo->_sifields._rt._sigval.sival_ptr);
+        break;
+    default:
+        g_assert_not_reached();
     }
 }
 
-
 void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info)
 {
     host_to_target_siginfo_noswap(tinfo, info);
@@ -505,6 +565,13 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info)
 
     trace_user_queue_signal(env, sig);
 
+    /* Currently all callers define siginfo structures which
+     * use the _sifields._sigfault union member, so we can
+     * set the type here. If that changes we should push this
+     * out so the si_type is passed in by callers.
+     */
+    info->si_code = deposit32(info->si_code, 16, 16, QEMU_SI_FAULT);
+
     ts->sync_signal.info = *info;
     ts->sync_signal.pending = sig;
     /* signal that a new signal is pending */
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 34af15a683..124754f8bd 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -673,6 +673,21 @@ typedef struct {
 
 #define TARGET_SI_PAD_SIZE ((TARGET_SI_MAX_SIZE - TARGET_SI_PREAMBLE_SIZE) / sizeof(int))
 
+/* Within QEMU the top 16 bits of si_code indicate which of the parts of
+ * the union in target_siginfo is valid. This only applies between
+ * host_to_target_siginfo_noswap() and tswap_siginfo(); it does not
+ * appear either within host siginfo_t or in target_siginfo structures
+ * which we get from the guest userspace program. (The Linux kernel
+ * does a similar thing with using the top bits for its own internal
+ * purposes but not letting them be visible to userspace.)
+ */
+#define QEMU_SI_KILL 0
+#define QEMU_SI_TIMER 1
+#define QEMU_SI_POLL 2
+#define QEMU_SI_FAULT 3
+#define QEMU_SI_CHLD 4
+#define QEMU_SI_RT 5
+
 typedef struct target_siginfo {
 #ifdef TARGET_MIPS
 	int si_signo;

From 90c0f080fe6fdd8b18691e6e38c853c8a996ad92 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 27 May 2016 15:52:01 +0100
Subject: [PATCH 23/44] linux-user: Avoid possible misalignment in
 target_to_host_siginfo()

Reimplement target_to_host_siginfo() to use __get_user(), which
handles possibly misaligned source guest structures correctly.

Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/signal.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 8ea0cbf0e6..61c1145446 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -408,13 +408,18 @@ void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info)
 /* XXX: find a solution for 64 bit (additional malloced data is needed) */
 void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo)
 {
-    info->si_signo = tswap32(tinfo->si_signo);
-    info->si_errno = tswap32(tinfo->si_errno);
-    info->si_code = tswap32(tinfo->si_code);
-    info->si_pid = tswap32(tinfo->_sifields._rt._pid);
-    info->si_uid = tswap32(tinfo->_sifields._rt._uid);
-    info->si_value.sival_ptr =
-            (void *)(long)tswapal(tinfo->_sifields._rt._sigval.sival_ptr);
+    /* This conversion is used only for the rt_sigqueueinfo syscall,
+     * and so we know that the _rt fields are the valid ones.
+     */
+    abi_ulong sival_ptr;
+
+    __get_user(info->si_signo, &tinfo->si_signo);
+    __get_user(info->si_errno, &tinfo->si_errno);
+    __get_user(info->si_code, &tinfo->si_code);
+    __get_user(info->si_pid, &tinfo->_sifields._rt._pid);
+    __get_user(info->si_uid, &tinfo->_sifields._rt._uid);
+    __get_user(sival_ptr, &tinfo->_sifields._rt._sigval.sival_ptr);
+    info->si_value.sival_ptr = (void *)(long)sival_ptr;
 }
 
 static int fatal_signal (int sig)

From 9e024732f53b368abdd578e1795bf3d2779ea88a Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:56:54 +0100
Subject: [PATCH 24/44] linux-user: provide frame information in x86-64
 safe_syscall

Use cfi directives in the x86-64 safe_syscall to allow gdb to get
backtraces right from within it. (In particular this will be
quite a common situation if the user interrupts QEMU while it's
in a blocked safe-syscall: at the point of the syscall insn RBP
is in use for something else, and so gdb can't find the frame then
without assistance.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/host/x86_64/safe-syscall.inc.S | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/linux-user/host/x86_64/safe-syscall.inc.S b/linux-user/host/x86_64/safe-syscall.inc.S
index dde434c8d7..e09368d450 100644
--- a/linux-user/host/x86_64/safe-syscall.inc.S
+++ b/linux-user/host/x86_64/safe-syscall.inc.S
@@ -24,6 +24,7 @@
          * -1-and-errno-set convention is done by the calling wrapper.
          */
 safe_syscall_base:
+        .cfi_startproc
         /* This saves a frame pointer and aligns the stack for the syscall.
          * (It's unclear if the syscall ABI has the same stack alignment
          * requirements as the userspace function call ABI, but better safe than
@@ -31,6 +32,8 @@ safe_syscall_base:
          * does not list any ABI differences regarding stack alignment.)
          */
         push    %rbp
+        .cfi_adjust_cfa_offset 8
+        .cfi_rel_offset rbp, 0
 
         /* The syscall calling convention isn't the same as the
          * C one:
@@ -70,12 +73,19 @@ safe_syscall_start:
 safe_syscall_end:
         /* code path for having successfully executed the syscall */
         pop     %rbp
+        .cfi_remember_state
+        .cfi_def_cfa_offset 8
+        .cfi_restore rbp
         ret
 
 return_ERESTARTSYS:
         /* code path when we didn't execute the syscall */
+        .cfi_restore_state
         mov     $-TARGET_ERESTARTSYS, %rax
         pop     %rbp
+        .cfi_def_cfa_offset 8
+        .cfi_restore rbp
         ret
+        .cfi_endproc
 
         .size   safe_syscall_base, .-safe_syscall_base

From e0156a9dc43daea13b06b4c0edb755cc8f92dfdf Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 31 May 2016 15:45:09 +0100
Subject: [PATCH 25/44] linux-user: Fix handling of arm_fadvise64_64 syscall

32-bit ARM has an odd variant of the fadvise syscall which has
rearranged arguments, which we try to implement. Unfortunately we got
the rearrangement wrong.

This is a six-argument syscall whose arguments are:
 * fd
 * advise parameter
 * offset high half
 * offset low half
 * len high half
 * len low half

Stop trying to share code with the standard fadvise syscalls,
and just implement the syscall with the correct argument order.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index a2d591ed8a..8c08e7c189 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -9970,18 +9970,18 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_arm_fadvise64_64
     case TARGET_NR_arm_fadvise64_64:
-	{
-		/*
-		 * arm_fadvise64_64 looks like fadvise64_64 but
-		 * with different argument order
-		 */
-		abi_long temp;
-		temp = arg3;
-		arg3 = arg4;
-		arg4 = temp;
-	}
+        /* arm_fadvise64_64 looks like fadvise64_64 but
+         * with different argument order: fd, advice, offset, len
+         * rather than the usual fd, offset, len, advice.
+         * Note that offset and len are both 64-bit so appear as
+         * pairs of 32-bit registers.
+         */
+        ret = posix_fadvise(arg1, target_offset64(arg3, arg4),
+                            target_offset64(arg5, arg6), arg2);
+        ret = -host_to_target_errno(ret);
+        break;
 #endif
-#if defined(TARGET_NR_fadvise64_64) || defined(TARGET_NR_arm_fadvise64_64) || defined(TARGET_NR_fadvise64)
+#if defined(TARGET_NR_fadvise64_64) || defined(TARGET_NR_fadvise64)
 #ifdef TARGET_NR_fadvise64_64
     case TARGET_NR_fadvise64_64:
 #endif

From badd3cd8805de3b5f8c76b6a00612ed62a71eff8 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 31 May 2016 15:45:10 +0100
Subject: [PATCH 26/44] linux-user: Fix NR_fadvise64 and NR_fadvise64_64 for
 32-bit guests

Fix errors in the implementation of NR_fadvise64 and NR_fadvise64_64
for 32-bit guests, which pass their off_t values in register pairs.
We can't use the 64-bit code path for this, so split out the 32-bit
cases, so that we can correctly handle the "only offset is 64-bit"
and "both offset and length are 64-bit" syscall flavours, and
"uses aligned register pairs" and "does not" flavours of target.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 8c08e7c189..5cff9f7879 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -9981,6 +9981,44 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         ret = -host_to_target_errno(ret);
         break;
 #endif
+
+#if TARGET_ABI_BITS == 32
+
+#ifdef TARGET_NR_fadvise64_64
+    case TARGET_NR_fadvise64_64:
+        /* 6 args: fd, offset (high, low), len (high, low), advice */
+        if (regpairs_aligned(cpu_env)) {
+            /* offset is in (3,4), len in (5,6) and advice in 7 */
+            arg2 = arg3;
+            arg3 = arg4;
+            arg4 = arg5;
+            arg5 = arg6;
+            arg6 = arg7;
+        }
+        ret = -host_to_target_errno(posix_fadvise(arg1,
+                                                  target_offset64(arg2, arg3),
+                                                  target_offset64(arg4, arg5),
+                                                  arg6));
+        break;
+#endif
+
+#ifdef TARGET_NR_fadvise64
+    case TARGET_NR_fadvise64:
+        /* 5 args: fd, offset (high, low), len, advice */
+        if (regpairs_aligned(cpu_env)) {
+            /* offset is in (3,4), len in 5 and advice in 6 */
+            arg2 = arg3;
+            arg3 = arg4;
+            arg4 = arg5;
+            arg5 = arg6;
+        }
+        ret = -host_to_target_errno(posix_fadvise(arg1,
+                                                  target_offset64(arg2, arg3),
+                                                  arg4, arg5));
+        break;
+#endif
+
+#else /* not a 32-bit ABI */
 #if defined(TARGET_NR_fadvise64_64) || defined(TARGET_NR_fadvise64)
 #ifdef TARGET_NR_fadvise64_64
     case TARGET_NR_fadvise64_64:
@@ -10000,6 +10038,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         ret = -posix_fadvise(arg1, arg2, arg3, arg4);
 	break;
 #endif
+#endif /* end of 64-bit ABI fadvise handling */
+
 #ifdef TARGET_NR_madvise
     case TARGET_NR_madvise:
         /* A straight passthrough may not be safe because qemu sometimes

From 977d8241c10de4160a9377efd496395c98b7dee9 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 31 May 2016 15:45:11 +0100
Subject: [PATCH 27/44] linux-user: Fix error conversion in 64-bit fadvise
 syscall

Fix a missing host-to-target errno conversion in the 64-bit
fadvise syscall emulation.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 5cff9f7879..8f8d9db78c 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -10035,8 +10035,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         default: break;
         }
 #endif
-        ret = -posix_fadvise(arg1, arg2, arg3, arg4);
-	break;
+        ret = -host_to_target_errno(posix_fadvise(arg1, arg2, arg3, arg4));
+        break;
 #endif
 #endif /* end of 64-bit ABI fadvise handling */
 

From 918c03ed9ac583eb9d5c33345a814291e1dd2e87 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:02 +0100
Subject: [PATCH 28/44] linux-user: Use safe_syscall wrapper for readv and
 writev syscalls

Use the safe_syscall wrapper for readv and writev syscalls.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 8f8d9db78c..e0c49cc11f 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -712,6 +712,8 @@ safe_syscall2(int, rt_sigsuspend, sigset_t *, newset, size_t, sigsetsize)
 safe_syscall2(int, kill, pid_t, pid, int, sig)
 safe_syscall2(int, tkill, int, tid, int, sig)
 safe_syscall3(int, tgkill, int, tgid, int, pid, int, sig)
+safe_syscall3(ssize_t, readv, int, fd, const struct iovec *, iov, int, iovcnt)
+safe_syscall3(ssize_t, writev, int, fd, const struct iovec *, iov, int, iovcnt)
 
 static inline int host_to_target_sock_type(int host_type)
 {
@@ -8986,7 +8988,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         {
             struct iovec *vec = lock_iovec(VERIFY_WRITE, arg2, arg3, 0);
             if (vec != NULL) {
-                ret = get_errno(readv(arg1, vec, arg3));
+                ret = get_errno(safe_readv(arg1, vec, arg3));
                 unlock_iovec(vec, arg2, arg3, 1);
             } else {
                 ret = -host_to_target_errno(errno);
@@ -8997,7 +8999,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         {
             struct iovec *vec = lock_iovec(VERIFY_READ, arg2, arg3, 1);
             if (vec != NULL) {
-                ret = get_errno(writev(arg1, vec, arg3));
+                ret = get_errno(safe_writev(arg1, vec, arg3));
                 unlock_iovec(vec, arg2, arg3, 0);
             } else {
                 ret = -host_to_target_errno(errno);

From 2a3c7619288af9cfcc09a233dce911bf80849dfb Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:03 +0100
Subject: [PATCH 29/44] linux-user: Use safe_syscall wrapper for connect
 syscall

Use the safe_syscall wrapper for the connect syscall.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index e0c49cc11f..b363944e74 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -714,6 +714,8 @@ safe_syscall2(int, tkill, int, tid, int, sig)
 safe_syscall3(int, tgkill, int, tgid, int, pid, int, sig)
 safe_syscall3(ssize_t, readv, int, fd, const struct iovec *, iov, int, iovcnt)
 safe_syscall3(ssize_t, writev, int, fd, const struct iovec *, iov, int, iovcnt)
+safe_syscall3(int, connect, int, fd, const struct sockaddr *, addr,
+              socklen_t, addrlen)
 
 static inline int host_to_target_sock_type(int host_type)
 {
@@ -2859,7 +2861,7 @@ static abi_long do_connect(int sockfd, abi_ulong target_addr,
     if (ret)
         return ret;
 
-    return get_errno(connect(sockfd, addr, addrlen));
+    return get_errno(safe_connect(sockfd, addr, addrlen));
 }
 
 /* do_sendrecvmsg_locked() Must return target values and target errnos. */

From 666875306e03e1f94e1d4c808502585c10abc69a Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:04 +0100
Subject: [PATCH 30/44] linux-user: Use safe_syscall wrapper for send* and
 recv* syscalls

Use the safe_syscall wrapper for the send, sendto, sendmsg, recv,
recvfrom and recvmsg syscalls.

RV: adjusted to apply
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index b363944e74..bcae62d068 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -716,6 +716,12 @@ safe_syscall3(ssize_t, readv, int, fd, const struct iovec *, iov, int, iovcnt)
 safe_syscall3(ssize_t, writev, int, fd, const struct iovec *, iov, int, iovcnt)
 safe_syscall3(int, connect, int, fd, const struct sockaddr *, addr,
               socklen_t, addrlen)
+safe_syscall6(ssize_t, sendto, int, fd, const void *, buf, size_t, len,
+              int, flags, const struct sockaddr *, addr, socklen_t, addrlen)
+safe_syscall6(ssize_t, recvfrom, int, fd, void *, buf, size_t, len,
+              int, flags, struct sockaddr *, addr, socklen_t *, addrlen)
+safe_syscall3(ssize_t, sendmsg, int, fd, const struct msghdr *, msg, int, flags)
+safe_syscall3(ssize_t, recvmsg, int, fd, struct msghdr *, msg, int, flags)
 
 static inline int host_to_target_sock_type(int host_type)
 {
@@ -2910,10 +2916,10 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
             ret = target_to_host_cmsg(&msg, msgp);
         }
         if (ret == 0) {
-            ret = get_errno(sendmsg(fd, &msg, flags));
+            ret = get_errno(safe_sendmsg(fd, &msg, flags));
         }
     } else {
-        ret = get_errno(recvmsg(fd, &msg, flags));
+        ret = get_errno(safe_recvmsg(fd, &msg, flags));
         if (!is_error(ret)) {
             len = ret;
             if (fd_trans_host_to_target_data(fd)) {
@@ -3162,9 +3168,9 @@ static abi_long do_sendto(int fd, abi_ulong msg, size_t len, int flags,
             unlock_user(host_msg, msg, 0);
             return ret;
         }
-        ret = get_errno(sendto(fd, host_msg, len, flags, addr, addrlen));
+        ret = get_errno(safe_sendto(fd, host_msg, len, flags, addr, addrlen));
     } else {
-        ret = get_errno(send(fd, host_msg, len, flags));
+        ret = get_errno(safe_sendto(fd, host_msg, len, flags, NULL, 0));
     }
     unlock_user(host_msg, msg, 0);
     return ret;
@@ -3193,10 +3199,11 @@ static abi_long do_recvfrom(int fd, abi_ulong msg, size_t len, int flags,
             goto fail;
         }
         addr = alloca(addrlen);
-        ret = get_errno(recvfrom(fd, host_msg, len, flags, addr, &addrlen));
+        ret = get_errno(safe_recvfrom(fd, host_msg, len, flags,
+                                      addr, &addrlen));
     } else {
         addr = NULL; /* To keep compiler quiet.  */
-        ret = get_errno(qemu_recv(fd, host_msg, len, flags));
+        ret = get_errno(safe_recvfrom(fd, host_msg, len, flags, NULL, 0));
     }
     if (!is_error(ret)) {
         if (target_addr) {

From 89f9fe4452848386e9d0aacd84ac681944051b78 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:05 +0100
Subject: [PATCH 31/44] linux-user: Use safe_syscall wrapper for msgsnd and
 msgrcv

Use the safe_syscall wrapper for msgsnd and msgrcv syscalls.
This is made slightly awkward by some host architectures providing
only a single 'ipc' syscall rather than separate syscalls per
operation; we provide safe_msgsnd() and safe_msgrcv() as wrappers
around safe_ipc() to handle this if needed.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index bcae62d068..b41d2699ef 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -722,6 +722,34 @@ safe_syscall6(ssize_t, recvfrom, int, fd, void *, buf, size_t, len,
               int, flags, struct sockaddr *, addr, socklen_t *, addrlen)
 safe_syscall3(ssize_t, sendmsg, int, fd, const struct msghdr *, msg, int, flags)
 safe_syscall3(ssize_t, recvmsg, int, fd, struct msghdr *, msg, int, flags)
+#ifdef __NR_msgsnd
+safe_syscall4(int, msgsnd, int, msgid, const void *, msgp, size_t, sz,
+              int, flags)
+safe_syscall5(int, msgrcv, int, msgid, void *, msgp, size_t, sz,
+              long, msgtype, int, flags)
+#else
+/* This host kernel architecture uses a single ipc syscall; fake up
+ * wrappers for the sub-operations to hide this implementation detail.
+ * Annoyingly we can't include linux/ipc.h to get the constant definitions
+ * for the call parameter because some structs in there conflict with the
+ * sys/ipc.h ones. So we just define them here, and rely on them being
+ * the same for all host architectures.
+ */
+#define Q_MSGSND 11
+#define Q_MSGRCV 12
+#define Q_IPCCALL(VERSION, OP) ((VERSION) << 16 | (OP))
+
+safe_syscall6(int, ipc, int, call, long, first, long, second, long, third,
+              void *, ptr, long, fifth)
+static int safe_msgsnd(int msgid, const void *msgp, size_t sz, int flags)
+{
+    return safe_ipc(Q_IPCCALL(0, Q_MSGSND), msgid, sz, flags, (void *)msgp, 0);
+}
+static int safe_msgrcv(int msgid, void *msgp, size_t sz, long type, int flags)
+{
+    return safe_ipc(Q_IPCCALL(1, Q_MSGRCV), msgid, sz, flags, msgp, type);
+}
+#endif
 
 static inline int host_to_target_sock_type(int host_type)
 {
@@ -3796,7 +3824,7 @@ static inline abi_long do_msgsnd(int msqid, abi_long msgp,
     }
     host_mb->mtype = (abi_long) tswapal(target_mb->mtype);
     memcpy(host_mb->mtext, target_mb->mtext, msgsz);
-    ret = get_errno(msgsnd(msqid, host_mb, msgsz, msgflg));
+    ret = get_errno(safe_msgsnd(msqid, host_mb, msgsz, msgflg));
     g_free(host_mb);
     unlock_user_struct(target_mb, msgp, 0);
 
@@ -3824,7 +3852,7 @@ static inline abi_long do_msgrcv(int msqid, abi_long msgp,
         ret = -TARGET_ENOMEM;
         goto end;
     }
-    ret = get_errno(msgrcv(msqid, host_mb, msgsz, msgtyp, msgflg));
+    ret = get_errno(safe_msgrcv(msqid, host_mb, msgsz, msgtyp, msgflg));
 
     if (ret > 0) {
         abi_ulong target_mtext_addr = msgp + sizeof(abi_ulong);

From d40ecd66182c9dbcf320c63a359917968cc9b73c Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:06 +0100
Subject: [PATCH 32/44] linux-user: Use safe_syscall wrapper for mq_timedsend
 and mq_timedreceive

Use the safe_syscall wrapper for mq_timedsend and mq_timedreceive syscalls.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index b41d2699ef..294e5ee236 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -750,6 +750,12 @@ static int safe_msgrcv(int msgid, void *msgp, size_t sz, long type, int flags)
     return safe_ipc(Q_IPCCALL(1, Q_MSGRCV), msgid, sz, flags, msgp, type);
 }
 #endif
+#if defined(TARGET_NR_mq_open) && defined(__NR_mq_open)
+safe_syscall5(int, mq_timedsend, int, mqdes, const char *, msg_ptr,
+              size_t, len, unsigned, prio, const struct timespec *, timeout)
+safe_syscall5(int, mq_timedreceive, int, mqdes, char *, msg_ptr,
+              size_t, len, unsigned *, prio, const struct timespec *, timeout)
+#endif
 
 static inline int host_to_target_sock_type(int host_type)
 {
@@ -10593,11 +10599,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             p = lock_user (VERIFY_READ, arg2, arg3, 1);
             if (arg5 != 0) {
                 target_to_host_timespec(&ts, arg5);
-                ret = get_errno(mq_timedsend(arg1, p, arg3, arg4, &ts));
+                ret = get_errno(safe_mq_timedsend(arg1, p, arg3, arg4, &ts));
                 host_to_target_timespec(arg5, &ts);
+            } else {
+                ret = get_errno(safe_mq_timedsend(arg1, p, arg3, arg4, NULL));
             }
-            else
-                ret = get_errno(mq_send(arg1, p, arg3, arg4));
             unlock_user (p, arg2, arg3);
         }
         break;
@@ -10610,11 +10616,13 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             p = lock_user (VERIFY_READ, arg2, arg3, 1);
             if (arg5 != 0) {
                 target_to_host_timespec(&ts, arg5);
-                ret = get_errno(mq_timedreceive(arg1, p, arg3, &prio, &ts));
+                ret = get_errno(safe_mq_timedreceive(arg1, p, arg3,
+                                                     &prio, &ts));
                 host_to_target_timespec(arg5, &ts);
+            } else {
+                ret = get_errno(safe_mq_timedreceive(arg1, p, arg3,
+                                                     &prio, NULL));
             }
-            else
-                ret = get_errno(mq_receive(arg1, p, arg3, &prio));
             unlock_user (p, arg2, arg3);
             if (arg4 != 0)
                 put_user_u32(prio, arg4);

From 2a8459892f83ad563efc8a3b29db766ebe986447 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:07 +0100
Subject: [PATCH 33/44] linux-user: Use safe_syscall wrapper for flock

Use the safe_syscall wrapper for the flock syscall.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 294e5ee236..7d880092ea 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -722,6 +722,7 @@ safe_syscall6(ssize_t, recvfrom, int, fd, void *, buf, size_t, len,
               int, flags, struct sockaddr *, addr, socklen_t *, addrlen)
 safe_syscall3(ssize_t, sendmsg, int, fd, const struct msghdr *, msg, int, flags)
 safe_syscall3(ssize_t, recvmsg, int, fd, struct msghdr *, msg, int, flags)
+safe_syscall2(int, flock, int, fd, int, operation)
 #ifdef __NR_msgsnd
 safe_syscall4(int, msgsnd, int, msgid, const void *, msgp, size_t, sz,
               int, flags)
@@ -9025,7 +9026,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     case TARGET_NR_flock:
         /* NOTE: the flock constant seems to be the same for every
            Linux platform */
-        ret = get_errno(flock(arg1, arg2));
+        ret = get_errno(safe_flock(arg1, arg2));
         break;
     case TARGET_NR_readv:
         {

From b3f823306829a717b072548e630e0bd769706802 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:08 +0100
Subject: [PATCH 34/44] linux-user: Use safe_syscall wrapper for
 rt_sigtimedwait syscall

Use the safe_syscall wrapper for the rt_sigtimedwait syscall.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 7d880092ea..cecd96cc3d 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -723,6 +723,8 @@ safe_syscall6(ssize_t, recvfrom, int, fd, void *, buf, size_t, len,
 safe_syscall3(ssize_t, sendmsg, int, fd, const struct msghdr *, msg, int, flags)
 safe_syscall3(ssize_t, recvmsg, int, fd, struct msghdr *, msg, int, flags)
 safe_syscall2(int, flock, int, fd, int, operation)
+safe_syscall4(int, rt_sigtimedwait, const sigset_t *, these, siginfo_t *, uinfo,
+              const struct timespec *, uts, size_t, sigsetsize)
 #ifdef __NR_msgsnd
 safe_syscall4(int, msgsnd, int, msgid, const void *, msgp, size_t, sz,
               int, flags)
@@ -7751,7 +7753,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             } else {
                 puts = NULL;
             }
-            ret = get_errno(sigtimedwait(&set, &uinfo, puts));
+            ret = get_errno(safe_rt_sigtimedwait(&set, &uinfo, puts,
+                                                 SIGSET_T_SIZE));
             if (!is_error(ret)) {
                 if (arg2) {
                     p = lock_user(VERIFY_WRITE, arg2, sizeof(target_siginfo_t),

From 9e518226f431454ce2d4d01051593b32515b3b55 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:09 +0100
Subject: [PATCH 35/44] linux-user: Use safe_syscall wrapper for sleep syscalls

Use the safe_syscall wrapper for the clock_nanosleep and nanosleep
syscalls.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index cecd96cc3d..a931919816 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -725,6 +725,12 @@ safe_syscall3(ssize_t, recvmsg, int, fd, struct msghdr *, msg, int, flags)
 safe_syscall2(int, flock, int, fd, int, operation)
 safe_syscall4(int, rt_sigtimedwait, const sigset_t *, these, siginfo_t *, uinfo,
               const struct timespec *, uts, size_t, sigsetsize)
+safe_syscall2(int, nanosleep, const struct timespec *, req,
+              struct timespec *, rem)
+#ifdef TARGET_NR_clock_nanosleep
+safe_syscall4(int, clock_nanosleep, const clockid_t, clock, int, flags,
+              const struct timespec *, req, struct timespec *, rem)
+#endif
 #ifdef __NR_msgsnd
 safe_syscall4(int, msgsnd, int, msgid, const void *, msgp, size_t, sz,
               int, flags)
@@ -9205,7 +9211,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         {
             struct timespec req, rem;
             target_to_host_timespec(&req, arg1);
-            ret = get_errno(nanosleep(&req, &rem));
+            ret = get_errno(safe_nanosleep(&req, &rem));
             if (is_error(ret) && arg2) {
                 host_to_target_timespec(arg2, &rem);
             }
@@ -10473,14 +10479,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     {
         struct timespec ts;
         target_to_host_timespec(&ts, arg3);
-        ret = get_errno(clock_nanosleep(arg1, arg2, &ts, arg4 ? &ts : NULL));
+        ret = get_errno(safe_clock_nanosleep(arg1, arg2,
+                                             &ts, arg4 ? &ts : NULL));
         if (arg4)
             host_to_target_timespec(arg4, &ts);
 
 #if defined(TARGET_PPC)
         /* clock_nanosleep is odd in that it returns positive errno values.
          * On PPC, CR0 bit 3 should be set in such a situation. */
-        if (ret) {
+        if (ret && ret != -TARGET_ERESTARTSYS) {
             ((CPUPPCState *)cpu_env)->crf[0] |= 1;
         }
 #endif

From a6130237b85e15463592484155aa905a9b39cc6c Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:10 +0100
Subject: [PATCH 36/44] linux-user: Use safe_syscall wrapper for poll and ppoll
 syscalls

Use the safe_syscall wrapper for the poll and ppoll syscalls.
Since not all host architectures will have a poll syscall, we
have to rewrite the TARGET_NR_poll handling to use ppoll instead
(we can assume everywhere has ppoll by now).

We take the opportunity to switch to the code structure
already used in the implementation of epoll_wait and epoll_pwait,
which uses a switch() to avoid interleaving #if and if (),
and to stop using a variable with a leading '_' which is in
the implementation's namespace.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 47 +++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index a931919816..8bb9adff1a 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -429,16 +429,6 @@ static int sys_inotify_init1(int flags)
 #undef TARGET_NR_inotify_rm_watch
 #endif /* CONFIG_INOTIFY  */
 
-#if defined(TARGET_NR_ppoll)
-#ifndef __NR_ppoll
-# define __NR_ppoll -1
-#endif
-#define __NR_sys_ppoll __NR_ppoll
-_syscall5(int, sys_ppoll, struct pollfd *, fds, nfds_t, nfds,
-          struct timespec *, timeout, const sigset_t *, sigmask,
-          size_t, sigsetsize)
-#endif
-
 #if defined(TARGET_NR_prlimit64)
 #ifndef __NR_prlimit64
 # define __NR_prlimit64 -1
@@ -706,6 +696,9 @@ safe_syscall5(int, waitid, idtype_t, idtype, id_t, id, siginfo_t *, infop, \
 safe_syscall3(int, execve, const char *, filename, char **, argv, char **, envp)
 safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \
               fd_set *, exceptfds, struct timespec *, timeout, void *, sig)
+safe_syscall5(int, ppoll, struct pollfd *, ufds, unsigned int, nfds,
+              struct timespec *, tsp, const sigset_t *, sigmask,
+              size_t, sigsetsize)
 safe_syscall6(int,futex,int *,uaddr,int,op,int,val, \
               const struct timespec *,timeout,int *,uaddr2,int,val3)
 safe_syscall2(int, rt_sigsuspend, sigset_t *, newset, size_t, sigsetsize)
@@ -8964,7 +8957,6 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         {
             struct target_pollfd *target_pfd;
             unsigned int nfds = arg2;
-            int timeout = arg3;
             struct pollfd *pfd;
             unsigned int i;
 
@@ -8984,8 +8976,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                 }
             }
 
+            switch (num) {
 # ifdef TARGET_NR_ppoll
-            if (num == TARGET_NR_ppoll) {
+            case TARGET_NR_ppoll:
+            {
                 struct timespec _timeout_ts, *timeout_ts = &_timeout_ts;
                 target_sigset_t *target_set;
                 sigset_t _set, *set = &_set;
@@ -9010,8 +9004,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                     set = NULL;
                 }
 
-                ret = get_errno(sys_ppoll(pfd, nfds, timeout_ts,
-                                          set, SIGSET_T_SIZE));
+                ret = get_errno(safe_ppoll(pfd, nfds, timeout_ts,
+                                           set, SIGSET_T_SIZE));
 
                 if (!is_error(ret) && arg3) {
                     host_to_target_timespec(arg3, timeout_ts);
@@ -9019,9 +9013,30 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                 if (arg4) {
                     unlock_user(target_set, arg4, 0);
                 }
-            } else
+                break;
+            }
 # endif
-                ret = get_errno(poll(pfd, nfds, timeout));
+# ifdef TARGET_NR_poll
+            case TARGET_NR_poll:
+            {
+                struct timespec ts, *pts;
+
+                if (arg3 >= 0) {
+                    /* Convert ms to secs, ns */
+                    ts.tv_sec = arg3 / 1000;
+                    ts.tv_nsec = (arg3 % 1000) * 1000000LL;
+                    pts = &ts;
+                } else {
+                    /* -ve poll() timeout means "infinite" */
+                    pts = NULL;
+                }
+                ret = get_errno(safe_ppoll(pfd, nfds, pts, NULL, 0));
+                break;
+            }
+# endif
+            default:
+                g_assert_not_reached();
+            }
 
             if (!is_error(ret)) {
                 for(i = 0; i < nfds; i++) {

From 227f02143f269493543faf6908318c17abd725cd Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:11 +0100
Subject: [PATCH 37/44] linux-user: Use safe_syscall wrapper for epoll_wait
 syscalls

Use the safe_syscall wrapper for epoll_wait and epoll_pwait syscalls.

Since we now directly use the host epoll_pwait syscall for both
epoll_wait and epoll_pwait, we don't need the configure machinery
to check whether glibc supports epoll_pwait(). (The kernel has
supported the syscall since 2.6.19 so we can assume it's always there.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 configure            | 21 ++-------------------
 linux-user/syscall.c | 18 ++++++++++--------
 2 files changed, 12 insertions(+), 27 deletions(-)

diff --git a/configure b/configure
index d1dbc4b099..e995ddd7c9 100755
--- a/configure
+++ b/configure
@@ -3798,8 +3798,8 @@ if compile_prog "" "" ; then
   epoll=yes
 fi
 
-# epoll_create1 and epoll_pwait are later additions
-# so we must check separately for their presence
+# epoll_create1 is a later addition
+# so we must check separately for its presence
 epoll_create1=no
 cat > $TMPC << EOF
 #include <sys/epoll.h>
@@ -3821,20 +3821,6 @@ if compile_prog "" "" ; then
   epoll_create1=yes
 fi
 
-epoll_pwait=no
-cat > $TMPC << EOF
-#include <sys/epoll.h>
-
-int main(void)
-{
-    epoll_pwait(0, 0, 0, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  epoll_pwait=yes
-fi
-
 # check for sendfile support
 sendfile=no
 cat > $TMPC << EOF
@@ -5125,9 +5111,6 @@ fi
 if test "$epoll_create1" = "yes" ; then
   echo "CONFIG_EPOLL_CREATE1=y" >> $config_host_mak
 fi
-if test "$epoll_pwait" = "yes" ; then
-  echo "CONFIG_EPOLL_PWAIT=y" >> $config_host_mak
-fi
 if test "$sendfile" = "yes" ; then
   echo "CONFIG_SENDFILE=y" >> $config_host_mak
 fi
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 8bb9adff1a..a193849494 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -699,6 +699,9 @@ safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \
 safe_syscall5(int, ppoll, struct pollfd *, ufds, unsigned int, nfds,
               struct timespec *, tsp, const sigset_t *, sigmask,
               size_t, sigsetsize)
+safe_syscall6(int, epoll_pwait, int, epfd, struct epoll_event *, events,
+              int, maxevents, int, timeout, const sigset_t *, sigmask,
+              size_t, sigsetsize)
 safe_syscall6(int,futex,int *,uaddr,int,op,int,val, \
               const struct timespec *,timeout,int *,uaddr2,int,val3)
 safe_syscall2(int, rt_sigsuspend, sigset_t *, newset, size_t, sigsetsize)
@@ -10835,14 +10838,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     }
 #endif
 
-#if defined(TARGET_NR_epoll_pwait) && defined(CONFIG_EPOLL_PWAIT)
-#define IMPLEMENT_EPOLL_PWAIT
-#endif
-#if defined(TARGET_NR_epoll_wait) || defined(IMPLEMENT_EPOLL_PWAIT)
+#if defined(TARGET_NR_epoll_wait) || defined(TARGET_NR_epoll_pwait)
 #if defined(TARGET_NR_epoll_wait)
     case TARGET_NR_epoll_wait:
 #endif
-#if defined(IMPLEMENT_EPOLL_PWAIT)
+#if defined(TARGET_NR_epoll_pwait)
     case TARGET_NR_epoll_pwait:
 #endif
     {
@@ -10861,7 +10861,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         ep = alloca(maxevents * sizeof(struct epoll_event));
 
         switch (num) {
-#if defined(IMPLEMENT_EPOLL_PWAIT)
+#if defined(TARGET_NR_epoll_pwait)
         case TARGET_NR_epoll_pwait:
         {
             target_sigset_t *target_set;
@@ -10880,13 +10880,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                 set = NULL;
             }
 
-            ret = get_errno(epoll_pwait(epfd, ep, maxevents, timeout, set));
+            ret = get_errno(safe_epoll_pwait(epfd, ep, maxevents, timeout,
+                                             set, SIGSET_T_SIZE));
             break;
         }
 #endif
 #if defined(TARGET_NR_epoll_wait)
         case TARGET_NR_epoll_wait:
-            ret = get_errno(epoll_wait(epfd, ep, maxevents, timeout));
+            ret = get_errno(safe_epoll_pwait(epfd, ep, maxevents, timeout,
+                                             NULL, 0));
             break;
 #endif
         default:

From ffb7ee796ae83b1e4b3c108f8615d54b53872c68 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:12 +0100
Subject: [PATCH 38/44] linux-user: Use safe_syscall wrapper for semop

Use the safe_syscall wrapper for the semop syscall or IPC operation.
(We implement via the semtimedop syscall to make it easier to
implement the guest semtimedop syscall later.)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index a193849494..3ccb3676d2 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -732,6 +732,8 @@ safe_syscall4(int, msgsnd, int, msgid, const void *, msgp, size_t, sz,
               int, flags)
 safe_syscall5(int, msgrcv, int, msgid, void *, msgp, size_t, sz,
               long, msgtype, int, flags)
+safe_syscall4(int, semtimedop, int, semid, struct sembuf *, tsops,
+              unsigned, nsops, const struct timespec *, timeout)
 #else
 /* This host kernel architecture uses a single ipc syscall; fake up
  * wrappers for the sub-operations to hide this implementation detail.
@@ -740,6 +742,7 @@ safe_syscall5(int, msgrcv, int, msgid, void *, msgp, size_t, sz,
  * sys/ipc.h ones. So we just define them here, and rely on them being
  * the same for all host architectures.
  */
+#define Q_SEMTIMEDOP 4
 #define Q_MSGSND 11
 #define Q_MSGRCV 12
 #define Q_IPCCALL(VERSION, OP) ((VERSION) << 16 | (OP))
@@ -754,6 +757,12 @@ static int safe_msgrcv(int msgid, void *msgp, size_t sz, long type, int flags)
 {
     return safe_ipc(Q_IPCCALL(1, Q_MSGRCV), msgid, sz, flags, msgp, type);
 }
+static int safe_semtimedop(int semid, struct sembuf *tsops, unsigned nsops,
+                           const struct timespec *timeout)
+{
+    return safe_ipc(Q_IPCCALL(0, Q_SEMTIMEDOP), semid, nsops, 0, tsops,
+                    (long)timeout);
+}
 #endif
 #if defined(TARGET_NR_mq_open) && defined(__NR_mq_open)
 safe_syscall5(int, mq_timedsend, int, mqdes, const char *, msg_ptr,
@@ -3680,7 +3689,7 @@ static inline abi_long do_semop(int semid, abi_long ptr, unsigned nsops)
     if (target_to_host_sembuf(sops, ptr, nsops))
         return -TARGET_EFAULT;
 
-    return get_errno(semop(semid, sops, nsops));
+    return get_errno(safe_semtimedop(semid, sops, nsops, NULL));
 }
 
 struct target_msqid_ds

From ff6dc130794bcd5b2033bc50262a7720285a74c7 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:13 +0100
Subject: [PATCH 39/44] linux-user: Use safe_syscall wrapper for accept and
 accept4 syscalls

Use the safe_syscall wrapper for the accept and accept4 syscalls.
accept4 has been in the kernel since 2.6.28 so we can assume it
is always present.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 3ccb3676d2..d414dc4771 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -721,6 +721,8 @@ safe_syscall3(ssize_t, recvmsg, int, fd, struct msghdr *, msg, int, flags)
 safe_syscall2(int, flock, int, fd, int, operation)
 safe_syscall4(int, rt_sigtimedwait, const sigset_t *, these, siginfo_t *, uinfo,
               const struct timespec *, uts, size_t, sigsetsize)
+safe_syscall4(int, accept4, int, fd, struct sockaddr *, addr, socklen_t *, len,
+              int, flags)
 safe_syscall2(int, nanosleep, const struct timespec *, req,
               struct timespec *, rem)
 #ifdef TARGET_NR_clock_nanosleep
@@ -3061,19 +3063,6 @@ static abi_long do_sendrecvmmsg(int fd, abi_ulong target_msgvec,
     return ret;
 }
 
-/* If we don't have a system accept4() then just call accept.
- * The callsites to do_accept4() will ensure that they don't
- * pass a non-zero flags argument in this config.
- */
-#ifndef CONFIG_ACCEPT4
-static inline int accept4(int sockfd, struct sockaddr *addr,
-                          socklen_t *addrlen, int flags)
-{
-    assert(flags == 0);
-    return accept(sockfd, addr, addrlen);
-}
-#endif
-
 /* do_accept4() Must return target values and target errnos. */
 static abi_long do_accept4(int fd, abi_ulong target_addr,
                            abi_ulong target_addrlen_addr, int flags)
@@ -3086,7 +3075,7 @@ static abi_long do_accept4(int fd, abi_ulong target_addr,
     host_flags = target_to_host_bitmask(flags, fcntl_flags_tbl);
 
     if (target_addr == 0) {
-        return get_errno(accept4(fd, NULL, NULL, host_flags));
+        return get_errno(safe_accept4(fd, NULL, NULL, host_flags));
     }
 
     /* linux returns EINVAL if addrlen pointer is invalid */
@@ -3102,7 +3091,7 @@ static abi_long do_accept4(int fd, abi_ulong target_addr,
 
     addr = alloca(addrlen);
 
-    ret = get_errno(accept4(fd, addr, &addrlen, host_flags));
+    ret = get_errno(safe_accept4(fd, addr, &addrlen, host_flags));
     if (!is_error(ret)) {
         host_to_target_sockaddr(target_addr, addr, addrlen);
         if (put_user_u32(addrlen, target_addrlen_addr))
@@ -8334,11 +8323,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_accept4
     case TARGET_NR_accept4:
-#ifdef CONFIG_ACCEPT4
         ret = do_accept4(arg1, arg2, arg3, arg4);
-#else
-        goto unimplemented;
-#endif
         break;
 #endif
 #ifdef TARGET_NR_bind

From 49ca6f3e24ee45de514604b8116e541332b23a84 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:14 +0100
Subject: [PATCH 40/44] linux-user: Use safe_syscall wrapper for ioctl

Use the safe_syscall wrapper to implement the ioctl syscall.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index d414dc4771..7b7bae6fda 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -772,6 +772,11 @@ safe_syscall5(int, mq_timedsend, int, mqdes, const char *, msg_ptr,
 safe_syscall5(int, mq_timedreceive, int, mqdes, char *, msg_ptr,
               size_t, len, unsigned *, prio, const struct timespec *, timeout)
 #endif
+/* We do ioctl like this rather than via safe_syscall3 to preserve the
+ * "third argument might be integer or pointer or not present" behaviour of
+ * the libc function.
+ */
+#define safe_ioctl(...) safe_syscall(__NR_ioctl, __VA_ARGS__)
 
 static inline int host_to_target_sock_type(int host_type)
 {
@@ -4277,7 +4282,7 @@ static abi_long do_ioctl_fs_ioc_fiemap(const IOCTLEntry *ie, uint8_t *buf_temp,
         memcpy(fm, buf_temp, sizeof(struct fiemap));
         free_fm = 1;
     }
-    ret = get_errno(ioctl(fd, ie->host_cmd, fm));
+    ret = get_errno(safe_ioctl(fd, ie->host_cmd, fm));
     if (!is_error(ret)) {
         target_size_out = target_size_in;
         /* An extent_count of 0 means we were only counting the extents
@@ -4367,7 +4372,7 @@ static abi_long do_ioctl_ifconf(const IOCTLEntry *ie, uint8_t *buf_temp,
     host_ifconf->ifc_len = host_ifc_len;
     host_ifconf->ifc_buf = host_ifc_buf;
 
-    ret = get_errno(ioctl(fd, ie->host_cmd, host_ifconf));
+    ret = get_errno(safe_ioctl(fd, ie->host_cmd, host_ifconf));
     if (!is_error(ret)) {
 	/* convert host ifc_len to target ifc_len */
 
@@ -4496,7 +4501,7 @@ static abi_long do_ioctl_dm(const IOCTLEntry *ie, uint8_t *buf_temp, int fd,
     }
     unlock_user(argptr, guest_data, 0);
 
-    ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp));
+    ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp));
     if (!is_error(ret)) {
         guest_data = arg + host_dm->data_start;
         guest_data_size = host_dm->data_size - host_dm->data_start;
@@ -4677,7 +4682,7 @@ static abi_long do_ioctl_blkpg(const IOCTLEntry *ie, uint8_t *buf_temp, int fd,
 
     /* Swizzle the data pointer to our local copy and call! */
     host_blkpg->data = &host_part;
-    ret = get_errno(ioctl(fd, ie->host_cmd, host_blkpg));
+    ret = get_errno(safe_ioctl(fd, ie->host_cmd, host_blkpg));
 
 out:
     return ret;
@@ -4738,7 +4743,7 @@ static abi_long do_ioctl_rt(const IOCTLEntry *ie, uint8_t *buf_temp,
     }
     unlock_user(argptr, arg, 0);
 
-    ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp));
+    ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp));
     if (*host_rt_dev_ptr != 0) {
         unlock_user((void *)*host_rt_dev_ptr,
                     *target_rt_dev_ptr, 0);
@@ -4750,7 +4755,7 @@ static abi_long do_ioctl_kdsigaccept(const IOCTLEntry *ie, uint8_t *buf_temp,
                                      int fd, int cmd, abi_long arg)
 {
     int sig = target_to_host_signal(arg);
-    return get_errno(ioctl(fd, ie->host_cmd, sig));
+    return get_errno(safe_ioctl(fd, ie->host_cmd, sig));
 }
 
 static IOCTLEntry ioctl_entries[] = {
@@ -4794,18 +4799,18 @@ static abi_long do_ioctl(int fd, int cmd, abi_long arg)
     switch(arg_type[0]) {
     case TYPE_NULL:
         /* no argument */
-        ret = get_errno(ioctl(fd, ie->host_cmd));
+        ret = get_errno(safe_ioctl(fd, ie->host_cmd));
         break;
     case TYPE_PTRVOID:
     case TYPE_INT:
-        ret = get_errno(ioctl(fd, ie->host_cmd, arg));
+        ret = get_errno(safe_ioctl(fd, ie->host_cmd, arg));
         break;
     case TYPE_PTR:
         arg_type++;
         target_size = thunk_type_size(arg_type, 0);
         switch(ie->access) {
         case IOC_R:
-            ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp));
+            ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp));
             if (!is_error(ret)) {
                 argptr = lock_user(VERIFY_WRITE, arg, target_size, 0);
                 if (!argptr)
@@ -4820,7 +4825,7 @@ static abi_long do_ioctl(int fd, int cmd, abi_long arg)
                 return -TARGET_EFAULT;
             thunk_convert(buf_temp, argptr, arg_type, THUNK_HOST);
             unlock_user(argptr, arg, 0);
-            ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp));
+            ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp));
             break;
         default:
         case IOC_RW:
@@ -4829,7 +4834,7 @@ static abi_long do_ioctl(int fd, int cmd, abi_long arg)
                 return -TARGET_EFAULT;
             thunk_convert(buf_temp, argptr, arg_type, THUNK_HOST);
             unlock_user(argptr, arg, 0);
-            ret = get_errno(ioctl(fd, ie->host_cmd, buf_temp));
+            ret = get_errno(safe_ioctl(fd, ie->host_cmd, buf_temp));
             if (!is_error(ret)) {
                 argptr = lock_user(VERIFY_WRITE, arg, target_size, 0);
                 if (!argptr)

From 8efb2ed5ec192f7e83c5e48753e695d5cbc161b2 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:16 +0100
Subject: [PATCH 41/44] linux-user: Correct signedness of target_flock l_start
 and l_len fields

The l_start and l_len fields in the various target_flock structures are
supposed to be '__kernel_off_t' or '__kernel_loff_t', which means they
should be signed, not unsigned. Correcting the structure definitions means
that __get_user() and __put_user() will correctly sign extend them if
the guest is using 32 bit offsets and the host is using 64 bit offsets.

This fixes failures in the LTP 'fcntl14' tests where it checks that
negative seek offsets work correctly.

We reindent the structures to drop hard tabs since we're touching 40%
of the fields anyway.

RV: long long -> abi_llong as suggested by Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall_defs.h | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 124754f8bd..6ee9251c50 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -2289,34 +2289,34 @@ struct target_statfs64 {
 #endif
 
 struct target_flock {
-	short l_type;
-	short l_whence;
-	abi_ulong l_start;
-	abi_ulong l_len;
-	int l_pid;
+    short l_type;
+    short l_whence;
+    abi_long l_start;
+    abi_long l_len;
+    int l_pid;
 };
 
 struct target_flock64 {
-	short  l_type;
-	short  l_whence;
+    short  l_type;
+    short  l_whence;
 #if defined(TARGET_PPC) || defined(TARGET_X86_64) || defined(TARGET_MIPS) \
     || defined(TARGET_SPARC) || defined(TARGET_HPPA) \
     || defined(TARGET_MICROBLAZE) || defined(TARGET_TILEGX)
-        int __pad;
+    int __pad;
 #endif
-	unsigned long long l_start;
-	unsigned long long l_len;
-	int  l_pid;
+    abi_llong l_start;
+    abi_llong l_len;
+    int  l_pid;
 } QEMU_PACKED;
 
 #ifdef TARGET_ARM
 struct target_eabi_flock64 {
-	short  l_type;
-	short  l_whence;
-        int __pad;
-	unsigned long long l_start;
-	unsigned long long l_len;
-	int  l_pid;
+    short  l_type;
+    short  l_whence;
+    int __pad;
+    abi_llong l_start;
+    abi_llong l_len;
+    int  l_pid;
 } QEMU_PACKED;
 #endif
 

From 7dcdaeafe07dd87079c4e073bced4cfc8bf5fdf3 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:18 +0100
Subject: [PATCH 42/44] linux-user: Make target_strerror() return 'const char
 *'

Make target_strerror() return 'const char *' rather than just 'char *';
this will allow us to return constant strings from it for some special
cases.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
---
 linux-user/qemu.h    | 2 +-
 linux-user/strace.c  | 4 ++--
 linux-user/syscall.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 6bd7b3223a..56f29c35b5 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -195,7 +195,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 void gemu_log(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
 extern THREAD CPUState *thread_cpu;
 void cpu_loop(CPUArchState *env);
-char *target_strerror(int err);
+const char *target_strerror(int err);
 int get_osversion(void);
 void init_qemu_uname_release(void);
 void fork_start(void);
diff --git a/linux-user/strace.c b/linux-user/strace.c
index 0810c85fbd..c5980a128c 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -281,7 +281,7 @@ print_ipc(const struct syscallname *name,
 static void
 print_syscall_ret_addr(const struct syscallname *name, abi_long ret)
 {
-    char *errstr = NULL;
+    const char *errstr = NULL;
 
     if (ret < 0) {
         errstr = target_strerror(-ret);
@@ -1594,7 +1594,7 @@ void
 print_syscall_ret(int num, abi_long ret)
 {
     int i;
-    char *errstr = NULL;
+    const char *errstr = NULL;
 
     for(i=0;i<nsyscalls;i++)
         if( scnames[i].nr == num ) {
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 7b7bae6fda..262c645d3c 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -630,7 +630,7 @@ static inline int is_error(abi_long ret)
     return (abi_ulong)ret >= (abi_ulong)(-4096);
 }
 
-char *target_strerror(int err)
+const char *target_strerror(int err)
 {
     if ((err >= ERRNO_TABLE_SIZE) || (err < 0)) {
         return NULL;

From da2a34f7f9999da09f6c307b40b66eba8cc38283 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Mon, 6 Jun 2016 19:58:19 +0100
Subject: [PATCH 43/44] linux-user: Special-case ERESTARTSYS in
 target_strerror()

Since TARGET_ERESTARTSYS and TARGET_ESIGRETURN are internal-to-QEMU
error numbers, handle them specially in target_strerror(), to avoid
confusing strace output like:

9521 rt_sigreturn(14,8,274886297808,8,0,268435456) = -1 errno=513 (Unknown error 513)

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/syscall.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 262c645d3c..bd8095c819 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -632,6 +632,13 @@ static inline int is_error(abi_long ret)
 
 const char *target_strerror(int err)
 {
+    if (err == TARGET_ERESTARTSYS) {
+        return "To be restarted";
+    }
+    if (err == TARGET_QEMU_ESIGRETURN) {
+        return "Successful exit from sigreturn";
+    }
+
     if ((err >= ERRNO_TABLE_SIZE) || (err < 0)) {
         return NULL;
     }

From 014628a705bdaf31c09915c29e61f4088956564d Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 7 Jun 2016 17:31:04 +0100
Subject: [PATCH 44/44] linux-user: In fork_end(), remove correct CPUs from CPU
 list

In fork_end(), we must fix the list of current CPUs to match the fact
that the child of the fork has only one thread. Unfortunately we were
removing the wrong CPUs from the list, which meant that if the child
subsequently did an exclusive operation it would deadlock in
start_exclusive() waiting for a sibling CPU which didn't exist.

In particular this could cause hangs doing git submodule init
operations, as reported in https://bugs.launchpad.net/qemu/+bug/955379
comment #47.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
---
 linux-user/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index b6da0ba1e2..150a356e8d 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -130,7 +130,7 @@ void fork_end(int child)
            Discard information about the parent threads.  */
         CPU_FOREACH_SAFE(cpu, next_cpu) {
             if (cpu != thread_cpu) {
-                QTAILQ_REMOVE(&cpus, thread_cpu, node);
+                QTAILQ_REMOVE(&cpus, cpu, node);
             }
         }
         pending_cpus = 0;