diff --git a/.gitignore b/.gitignore
index 9b6a968714..88ec2497b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -95,6 +95,10 @@
/pc-bios/optionrom/linuxboot.bin
/pc-bios/optionrom/linuxboot.raw
/pc-bios/optionrom/linuxboot.img
+/pc-bios/optionrom/linuxboot_dma.asm
+/pc-bios/optionrom/linuxboot_dma.bin
+/pc-bios/optionrom/linuxboot_dma.raw
+/pc-bios/optionrom/linuxboot_dma.img
/pc-bios/optionrom/multiboot.asm
/pc-bios/optionrom/multiboot.bin
/pc-bios/optionrom/multiboot.raw
diff --git a/Makefile b/Makefile
index 45706375b2..0d7647f796 100644
--- a/Makefile
+++ b/Makefile
@@ -419,7 +419,7 @@ efi-pcnet.rom efi-rtl8139.rom efi-virtio.rom \
efi-e1000e.rom efi-vmxnet3.rom \
qemu-icon.bmp qemu_logo_no_text.svg \
bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
-multiboot.bin linuxboot.bin kvmvapic.bin \
+multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \
s390-ccw.img \
spapr-rtas.bin slof.bin \
palcode-clipper \
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index f56e225a99..1b8baa8fee 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -998,8 +998,13 @@ static void load_linux(PCMachineState *pcms,
fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
- option_rom[nb_option_roms].name = "linuxboot.bin";
- option_rom[nb_option_roms].bootindex = 0;
+ if (fw_cfg_dma_enabled(fw_cfg)) {
+ option_rom[nb_option_roms].name = "linuxboot_dma.bin";
+ option_rom[nb_option_roms].bootindex = 0;
+ } else {
+ option_rom[nb_option_roms].name = "linuxboot.bin";
+ option_rom[nb_option_roms].bootindex = 0;
+ }
nb_option_roms++;
}
@@ -1291,6 +1296,7 @@ void xen_load_linux(PCMachineState *pcms)
load_linux(pcms, fw_cfg);
for (i = 0; i < nb_option_roms; i++) {
assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
+ !strcmp(option_rom[i].name, "linuxboot_dma.bin") ||
!strcmp(option_rom[i].name, "multiboot.bin"));
rom_add_option(option_rom[i].name, option_rom[i].bootindex);
}
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 74a0079ca6..2873030ade 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -552,7 +552,7 @@ static bool is_version_1(void *opaque, int version_id)
return version_id == 1;
}
-static bool fw_cfg_dma_enabled(void *opaque)
+bool fw_cfg_dma_enabled(void *opaque)
{
FWCfgState *s = opaque;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 21235322fd..e38c95a4da 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -366,6 +366,10 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
#define PC_COMPAT_2_6 \
HW_COMPAT_2_6 \
{\
+ .driver = "fw_cfg_io",\
+ .property = "dma_enabled",\
+ .value = "off",\
+ },{\
.driver = TYPE_X86_CPU,\
.property = "cpuid-0xb",\
.value = "off",\
diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h
index d00811258d..5c27a1f0d5 100644
--- a/include/hw/nvram/fw_cfg.h
+++ b/include/hw/nvram/fw_cfg.h
@@ -182,5 +182,6 @@ FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr,
hwaddr dma_addr, AddressSpace *dma_as);
FWCfgState *fw_cfg_find(void);
+bool fw_cfg_dma_enabled(void *opaque);
#endif
diff --git a/pc-bios/linuxboot_dma.bin b/pc-bios/linuxboot_dma.bin
new file mode 100644
index 0000000000..e1f623a124
Binary files /dev/null and b/pc-bios/linuxboot_dma.bin differ
diff --git a/pc-bios/optionrom/Makefile b/pc-bios/optionrom/Makefile
index 2cdda87f0e..d88ce119ce 100644
--- a/pc-bios/optionrom/Makefile
+++ b/pc-bios/optionrom/Makefile
@@ -9,22 +9,46 @@ $(call set-vpath, $(SRC_PATH)/pc-bios/optionrom)
.PHONY : all clean build-all
-CFLAGS := -Wall -Wstrict-prototypes -Werror -fomit-frame-pointer -fno-builtin
-CFLAGS += -I$(SRC_PATH)
-CFLAGS += $(call cc-option, $(CFLAGS), -fno-stack-protector)
-CFLAGS += $(CFLAGS_NOPIE)
-QEMU_CFLAGS = $(CFLAGS)
+# Drop -fstack-protector and the like
+QEMU_CFLAGS := $(filter -W%, $(QEMU_CFLAGS)) $(CFLAGS_NOPIE) -ffreestanding
+QEMU_CFLAGS += $(call cc-option, $(QEMU_CFLAGS), -m16)
+ifeq ($(filter -m16, $(QEMU_CFLAGS)),)
+# Attempt to work around compilers that lack -m16 (GCC <= 4.8, clang <= ??)
+# On GCC we add -fno-toplevel-reorder to keep the order of asm blocks with
+# respect to the rest of the code. clang does not have -fno-toplevel-reorder,
+# but it places all asm blocks at the beginning and we're relying on it for
+# the option ROM header. So just force clang not to use the integrated
+# assembler, which doesn't support .code16gcc.
+QEMU_CFLAGS += $(call cc-option, $(QEMU_CFLAGS), -fno-toplevel-reorder)
+QEMU_CFLAGS += $(call cc-option, $(QEMU_CFLAGS), -no-integrated-as)
+QEMU_CFLAGS += -m32 -include $(SRC_PATH)/pc-bios/optionrom/code16gcc.h
+endif
-build-all: multiboot.bin linuxboot.bin kvmvapic.bin
+# Drop gcov and glib flags
+CFLAGS := $(filter -O% -g%, $(CFLAGS))
+QEMU_INCLUDES += -I$(SRC_PATH)
+
+Wa = -Wa,
+ASFLAGS += -32
+QEMU_CFLAGS += $(call cc-option, $(QEMU_CFLAGS), $(Wa)-32)
+
+build-all: multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin
# suppress auto-removal of intermediate files
.SECONDARY:
+
%.o: %.S
- $(call quiet-command,$(CPP) $(QEMU_INCLUDES) $(QEMU_DGFLAGS) $(CFLAGS) -c -o - $< | $(AS) $(ASFLAGS) -o $@," AS $(TARGET_DIR)$@")
+ $(call quiet-command,$(CPP) $(QEMU_INCLUDES) $(QEMU_DGFLAGS) -c -o - $< | $(AS) $(ASFLAGS) -o $@," AS $(TARGET_DIR)$@")
+
+ifdef CONFIG_WIN32
+LD_EMULATION = i386pe
+else
+LD_EMULATION = elf_i386
+endif
%.img: %.o
- $(call quiet-command,$(LD) $(LDFLAGS_NOPIE) -Ttext 0 -e _start -s -o $@ $<," Building $(TARGET_DIR)$@")
+ $(call quiet-command,$(LD) $(LDFLAGS_NOPIE) -m $(LD_EMULATION) -Ttext 0 -e _start -s -o $@ $<," Building $(TARGET_DIR)$@")
%.raw: %.img
$(call quiet-command,$(OBJCOPY) -O binary -j .text $< $@," Building $(TARGET_DIR)$@")
diff --git a/pc-bios/optionrom/code16gcc.h b/pc-bios/optionrom/code16gcc.h
new file mode 100644
index 0000000000..9c8d25d508
--- /dev/null
+++ b/pc-bios/optionrom/code16gcc.h
@@ -0,0 +1,3 @@
+asm(
+".code16gcc\n"
+);
diff --git a/pc-bios/optionrom/linuxboot_dma.c b/pc-bios/optionrom/linuxboot_dma.c
new file mode 100644
index 0000000000..8509b287ba
--- /dev/null
+++ b/pc-bios/optionrom/linuxboot_dma.c
@@ -0,0 +1,294 @@
+/*
+ * Linux Boot Option ROM for fw_cfg DMA
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ *
+ * Copyright (c) 2015-2016 Red Hat Inc.
+ * Authors:
+ * Marc MarĂ
+ * Richard W.M. Jones
+ */
+
+asm(
+".text\n"
+".global _start\n"
+"_start:\n"
+" .short 0xaa55\n"
+" .byte 0\n" /* size in 512 units, filled in by signrom.py */
+" .byte 0xcb\n" /* far return without prefix */
+" .org 0x18\n"
+" .short 0\n"
+" .short _pnph\n"
+"_pnph:\n"
+" .ascii \"$PnP\"\n"
+" .byte 0x01\n"
+" .byte (_pnph_len / 16)\n"
+" .short 0x0000\n"
+" .byte 0x00\n"
+" .byte 0x00\n"
+" .long 0x00000000\n"
+" .short _manufacturer\n"
+" .short _product\n"
+" .long 0x00000000\n"
+" .short 0x0000\n"
+" .short 0x0000\n"
+" .short _bev\n"
+" .short 0x0000\n"
+" .short 0x0000\n"
+" .equ _pnph_len, . - _pnph\n"
+"_manufacturer:\n"
+" .asciz \"QEMU\"\n"
+"_product:\n"
+" .asciz \"Linux loader DMA\"\n"
+" .align 4, 0\n"
+"_bev:\n"
+" cli\n"
+" cld\n"
+" jmp load_kernel\n"
+);
+
+#include "../../include/hw/nvram/fw_cfg_keys.h"
+
+/* QEMU_CFG_DMA_CONTROL bits */
+#define BIOS_CFG_DMA_CTL_ERROR 0x01
+#define BIOS_CFG_DMA_CTL_READ 0x02
+#define BIOS_CFG_DMA_CTL_SKIP 0x04
+#define BIOS_CFG_DMA_CTL_SELECT 0x08
+
+#define BIOS_CFG_DMA_ADDR_HIGH 0x514
+#define BIOS_CFG_DMA_ADDR_LOW 0x518
+
+#define uint64_t unsigned long long
+#define uint32_t unsigned int
+#define uint16_t unsigned short
+
+#define barrier() asm("" : : : "memory")
+
+typedef struct FWCfgDmaAccess {
+ uint32_t control;
+ uint32_t length;
+ uint64_t address;
+} __attribute__((packed)) FWCfgDmaAccess;
+
+static inline void outl(uint32_t value, uint16_t port)
+{
+ asm("outl %0, %w1" : : "a"(value), "Nd"(port));
+}
+
+static inline void set_es(void *addr)
+{
+ uint32_t seg = (uint32_t)addr >> 4;
+ asm("movl %0, %%es" : : "r"(seg));
+}
+
+#ifdef __clang__
+#define ADDR32
+#else
+#define ADDR32 "addr32 "
+#endif
+
+static inline uint16_t readw_es(uint16_t offset)
+{
+ uint16_t val;
+ asm(ADDR32 "movw %%es:(%1), %0" : "=r"(val) : "r"((uint32_t)offset));
+ barrier();
+ return val;
+}
+
+static inline uint32_t readl_es(uint16_t offset)
+{
+ uint32_t val;
+ asm(ADDR32 "movl %%es:(%1), %0" : "=r"(val) : "r"((uint32_t)offset));
+ barrier();
+ return val;
+}
+
+static inline void writel_es(uint16_t offset, uint32_t val)
+{
+ barrier();
+ asm(ADDR32 "movl %0, %%es:(%1)" : : "r"(val), "r"((uint32_t)offset));
+}
+
+static inline uint32_t bswap32(uint32_t x)
+{
+ return
+ ((x & 0x000000ffU) << 24) |
+ ((x & 0x0000ff00U) << 8) |
+ ((x & 0x00ff0000U) >> 8) |
+ ((x & 0xff000000U) >> 24);
+}
+
+static inline uint64_t bswap64(uint64_t x)
+{
+ return
+ ((x & 0x00000000000000ffULL) << 56) |
+ ((x & 0x000000000000ff00ULL) << 40) |
+ ((x & 0x0000000000ff0000ULL) << 24) |
+ ((x & 0x00000000ff000000ULL) << 8) |
+ ((x & 0x000000ff00000000ULL) >> 8) |
+ ((x & 0x0000ff0000000000ULL) >> 24) |
+ ((x & 0x00ff000000000000ULL) >> 40) |
+ ((x & 0xff00000000000000ULL) >> 56);
+}
+
+static inline uint64_t cpu_to_be64(uint64_t x)
+{
+ return bswap64(x);
+}
+
+static inline uint32_t cpu_to_be32(uint32_t x)
+{
+ return bswap32(x);
+}
+
+static inline uint32_t be32_to_cpu(uint32_t x)
+{
+ return bswap32(x);
+}
+
+static void bios_cfg_read_entry(void *buf, uint16_t entry, uint32_t len)
+{
+ FWCfgDmaAccess access;
+ uint32_t control = (entry << 16) | BIOS_CFG_DMA_CTL_SELECT
+ | BIOS_CFG_DMA_CTL_READ;
+
+ access.address = cpu_to_be64((uint64_t)(uint32_t)buf);
+ access.length = cpu_to_be32(len);
+ access.control = cpu_to_be32(control);
+
+ barrier();
+
+ outl(cpu_to_be32((uint32_t)&access), BIOS_CFG_DMA_ADDR_LOW);
+
+ while (be32_to_cpu(access.control) & ~BIOS_CFG_DMA_CTL_ERROR) {
+ barrier();
+ }
+}
+
+/* Return top of memory using BIOS function E801. */
+static uint32_t get_e801_addr(void)
+{
+ uint16_t ax, bx, cx, dx;
+ uint32_t ret;
+
+ asm("int $0x15\n"
+ : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx)
+ : "a"(0xe801), "b"(0), "c"(0), "d"(0));
+
+ /* Not SeaBIOS, but in theory a BIOS could return CX=DX=0 in which
+ * case we need to use the result from AX & BX instead.
+ */
+ if (cx == 0 && dx == 0) {
+ cx = ax;
+ dx = bx;
+ }
+
+ if (dx) {
+ /* DX = extended memory above 16M, in 64K units.
+ * Convert it to bytes and return.
+ */
+ ret = ((uint32_t)dx + 256 /* 16M in 64K units */) << 16;
+ } else {
+ /* This is a fallback path for machines with <= 16MB of RAM,
+ * which probably would never be the case, but deal with it
+ * anyway.
+ *
+ * CX = extended memory between 1M and 16M, in kilobytes
+ * Convert it to bytes and return.
+ */
+ ret = ((uint32_t)cx + 1024 /* 1M in K */) << 10;
+ }
+
+ return ret;
+}
+
+/* Force the asm name without leading underscore, even on Win32. */
+extern void load_kernel(void) asm("load_kernel");
+
+void load_kernel(void)
+{
+ void *setup_addr;
+ void *initrd_addr;
+ void *kernel_addr;
+ void *cmdline_addr;
+ uint32_t setup_size;
+ uint32_t initrd_size;
+ uint32_t kernel_size;
+ uint32_t cmdline_size;
+ uint32_t initrd_end_page, max_allowed_page;
+ uint32_t segment_addr, stack_addr;
+
+ bios_cfg_read_entry(&setup_addr, FW_CFG_SETUP_ADDR, 4);
+ bios_cfg_read_entry(&setup_size, FW_CFG_SETUP_SIZE, 4);
+ bios_cfg_read_entry(setup_addr, FW_CFG_SETUP_DATA, setup_size);
+
+ set_es(setup_addr);
+
+ /* For protocol < 0x203 we don't have initrd_max ... */
+ if (readw_es(0x206) < 0x203) {
+ /* ... so we assume initrd_max = 0x37ffffff. */
+ writel_es(0x22c, 0x37ffffff);
+ }
+
+ bios_cfg_read_entry(&initrd_addr, FW_CFG_INITRD_ADDR, 4);
+ bios_cfg_read_entry(&initrd_size, FW_CFG_INITRD_SIZE, 4);
+
+ initrd_end_page = ((uint32_t)(initrd_addr + initrd_size) & -4096);
+ max_allowed_page = (readl_es(0x22c) & -4096);
+
+ if (initrd_end_page != 0 && max_allowed_page != 0 &&
+ initrd_end_page != max_allowed_page) {
+ /* Initrd at the end of memory. Compute better initrd address
+ * based on e801 data
+ */
+ initrd_addr = (void *)((get_e801_addr() - initrd_size) & -4096);
+ writel_es(0x218, (uint32_t)initrd_addr);
+
+ }
+
+ bios_cfg_read_entry(initrd_addr, FW_CFG_INITRD_DATA, initrd_size);
+
+ bios_cfg_read_entry(&kernel_addr, FW_CFG_KERNEL_ADDR, 4);
+ bios_cfg_read_entry(&kernel_size, FW_CFG_KERNEL_SIZE, 4);
+ bios_cfg_read_entry(kernel_addr, FW_CFG_KERNEL_DATA, kernel_size);
+
+ bios_cfg_read_entry(&cmdline_addr, FW_CFG_CMDLINE_ADDR, 4);
+ bios_cfg_read_entry(&cmdline_size, FW_CFG_CMDLINE_SIZE, 4);
+ bios_cfg_read_entry(cmdline_addr, FW_CFG_CMDLINE_DATA, cmdline_size);
+
+ /* Boot linux */
+ segment_addr = ((uint32_t)setup_addr >> 4);
+ stack_addr = (uint32_t)(cmdline_addr - setup_addr - 16);
+
+ /* As we are changing critical registers, we cannot leave freedom to the
+ * compiler.
+ */
+ asm("movw %%ax, %%ds\n"
+ "movw %%ax, %%es\n"
+ "movw %%ax, %%fs\n"
+ "movw %%ax, %%gs\n"
+ "movw %%ax, %%ss\n"
+ "movl %%ebx, %%esp\n"
+ "addw $0x20, %%ax\n"
+ "pushw %%ax\n" /* CS */
+ "pushw $0\n" /* IP */
+ /* Clear registers and jump to Linux */
+ "xor %%ebx, %%ebx\n"
+ "xor %%ecx, %%ecx\n"
+ "xor %%edx, %%edx\n"
+ "xor %%edi, %%edi\n"
+ "xor %%ebp, %%ebp\n"
+ "lretw\n"
+ : : "a"(segment_addr), "b"(stack_addr));
+}