Removed uvmm and virtio-net
This commit is contained in:
@@ -1,4 +0,0 @@
|
||||
requires: stdlibs libstdc++ libpthread libio-vbus libfdt l4virtio libloader
|
||||
optional: readline acpica zlib rtc libmbox-bcm2835
|
||||
variants: nofpu[server/src]
|
||||
maintainer: alexander.warg@kernkonzept.com adam@l4re.org
|
||||
@@ -1,328 +0,0 @@
|
||||
config UVMM_SUPPORTED
|
||||
def_bool BUILD_ARCH_arm64 || BUILD_ARCH_arm || BUILD_ARCH_amd64 || BUILD_ARCH_mips || BUILD_ARCH_riscv
|
||||
|
||||
comment "uvmm is not supported on this architecture"
|
||||
depends on !UVMM_SUPPORTED
|
||||
|
||||
comment "uvmm requires L4Re libgcc instead of toolchain version"
|
||||
depends on UVMM_SUPPORTED && COMPILER_RT_USE_TOOLCHAIN_LIBGCC
|
||||
|
||||
menu "uvmm virtual machine monitor"
|
||||
depends on UVMM_SUPPORTED
|
||||
depends on !COMPILER_RT_USE_TOOLCHAIN_LIBGCC
|
||||
|
||||
config UVMM_MONITOR
|
||||
bool "Monitor interface"
|
||||
depends on !RELEASE_MODE
|
||||
default y
|
||||
help
|
||||
Enables the monitoring interface that can be used to query and manipulate
|
||||
the guest state at runtime. When enabled the interface must also be switched
|
||||
on at runtime using the `mon` command line option.
|
||||
|
||||
menu "Supported Loaders"
|
||||
|
||||
config UVMM_LOADER_RAW
|
||||
bool "RAW images"
|
||||
default y
|
||||
help
|
||||
Allows to raw kernel images from an address. The format for the file name
|
||||
is 'raw:addr=0xcaffee:filename' where addr is the address the image should
|
||||
be loaded to, relative to the guest ram base.
|
||||
|
||||
config UVMM_LOADER_ELF
|
||||
bool "ELF images"
|
||||
default y
|
||||
help
|
||||
Enable loading of elf images. The file found under the file name passed to
|
||||
uvmm must be a valid ELF file for the loader to pick it up.
|
||||
|
||||
config UVMM_LOADER_PE
|
||||
bool "PE images (error handling only)"
|
||||
default y
|
||||
help
|
||||
Print usefull errors when trying to load PE images. If the file found under
|
||||
the file name passed to uvmm is a PE file uvmm will not load it but provides
|
||||
helpful error messages.
|
||||
|
||||
config UVMM_LOADER_ROM
|
||||
bool "ROM images (from guest memory locations)"
|
||||
default y
|
||||
help
|
||||
Enable loading of images from a guest memory location. The format for the
|
||||
kernel file name passed to uvmm is 'rom:addr=0xcaffee' where 0xcaffee is
|
||||
a valid address in the memory of the guest. If the format is
|
||||
'rom:addr=0xcaffee:64bit' then the guest is a 64 bit guest.
|
||||
|
||||
config UVMM_LOADER_LINUX
|
||||
bool "Linux images (non-elf)"
|
||||
depends on BUILD_ARCH_arm64 || BUILD_ARCH_arm || BUILD_ARCH_amd64 || BUILD_ARCH_riscv
|
||||
default y
|
||||
help
|
||||
Loads a Linux image.
|
||||
|
||||
config UVMM_LOADER_OPENBSD
|
||||
bool "OpenBSD images"
|
||||
depends on BUILD_ARCH_amd64
|
||||
default y
|
||||
help
|
||||
Loads an OpenBSD image.
|
||||
|
||||
config UVMM_LOADER_GZIP
|
||||
bool "GZIP/ZLIB loading for Linux images"
|
||||
depends on (BUILD_ARCH_arm64 || BUILD_ARCH_arm || BUILD_ARCH_riscv) && HAVE_BIDPC_ZLIB && UVMM_LOADER_LINUX
|
||||
default y
|
||||
help
|
||||
Allows to load gzip or zlib compressed kernel images.
|
||||
|
||||
endmenu
|
||||
|
||||
comment "GZIP/ZLIB compression not available due to missing zlib package"
|
||||
depends on !HAVE_BIDPC_ZLIB
|
||||
|
||||
config UVMM_QEMU_FW_IF
|
||||
bool "Qemu firmware configuration device"
|
||||
default y
|
||||
help
|
||||
The device allows guests to gain access to the configuration of the
|
||||
hypervisor or any kind of data like boot/kernel images in a defined way.
|
||||
Some bootloaders make use of this to setup the platform and start the guest
|
||||
OS.
|
||||
|
||||
For details on the configuration see device/qemu_fw_cfg.cc.
|
||||
|
||||
config UVMM_FAULT_INJECT
|
||||
bool "Fault injection"
|
||||
depends on BUILD_ARCH_arm64 || BUILD_ARCH_arm
|
||||
|
||||
comment "Interfacing with L4 applications or physical hardware"
|
||||
|
||||
config UVMM_VDEV_PSCI
|
||||
bool "PSCI interface support"
|
||||
depends on BUILD_ARCH_arm || BUILD_ARCH_arm64
|
||||
default y
|
||||
help
|
||||
Emulates a PSCI interface for the guest.
|
||||
|
||||
config UVMM_VDEV_SMCCC_PROXY
|
||||
bool "Smc device proxy"
|
||||
depends on BUILD_ARCH_arm || BUILD_ARCH_arm64
|
||||
default y
|
||||
help
|
||||
Proxies Smccc calls.
|
||||
|
||||
config UVMM_PCI_SUPPORT
|
||||
bool "Support PCI emulation" if BUILD_ARCH_arm64 || BUILD_ARCH_arm
|
||||
default y
|
||||
help
|
||||
Configures if the PCI subsystem (and the corresponding devices using the
|
||||
PCI transport) should be available in uvmm.
|
||||
|
||||
config UVMM_VDEV_OPTEE
|
||||
bool "OPTEE SMC call forwarding"
|
||||
depends on BUILD_ARCH_arm || BUILD_ARCH_arm64
|
||||
default y
|
||||
help
|
||||
Allows forwarding of OP-TEE SMC calls either to a running OP-TEE instance
|
||||
or to a native L4 application implementing the protocol.
|
||||
|
||||
For details on the configuration see device/optee.cc.
|
||||
|
||||
config UVMM_VDEV_VIRQ
|
||||
bool "Forward L4 interrupts"
|
||||
default y
|
||||
help
|
||||
Add a device for relaying L4 IRQs into the guest.
|
||||
|
||||
For details on the configuration see device/virq.cc
|
||||
|
||||
config UVMM_VDEV_MMIO_PROXY
|
||||
bool "Proxy dataspaces and MMIO protocol"
|
||||
default y
|
||||
help
|
||||
Emulate a device that proxies memory accesses to an external dataspace or
|
||||
MMIO space.
|
||||
|
||||
For details on the configuration see device/mmio_proxy.cc
|
||||
|
||||
config UVMM_VDEV_SYSCTL
|
||||
bool "System control device"
|
||||
default y
|
||||
help
|
||||
Mmio-based device for triggering system events (shutdown, reboot).
|
||||
The device can be used with the generic syscon device from Linux.
|
||||
|
||||
For details on the configuration see device/sysctl.cc
|
||||
|
||||
config UVMM_VDEV_ROM
|
||||
bool "Read-only dataspace based devices"
|
||||
default y
|
||||
help
|
||||
A device for adding L4 dataspaces read-only to the guest.
|
||||
|
||||
For details on the configuration see device/rom.cc
|
||||
|
||||
config UVMM_EXTERNAL_RTC
|
||||
bool "l4rtc time source"
|
||||
depends on HAVE_BIDPC_RTC
|
||||
default y
|
||||
help
|
||||
A driver to retrieve wallclock time from an L4Re rtc server.
|
||||
|
||||
For details on the configuration see device/l4rtc.cc
|
||||
|
||||
comment "l4rtc time source not available due to missing rtc package"
|
||||
depends on !HAVE_BIDPC_RTC
|
||||
|
||||
comment "Device Emulation"
|
||||
|
||||
config UVMM_VDEV_8250
|
||||
bool "8250-compatible UART"
|
||||
default y
|
||||
help
|
||||
Emulate an 8250 compatible UART for the guest.
|
||||
|
||||
For details on the configuration see device/uart_8250.cc
|
||||
|
||||
config UVMM_VDEV_PL011
|
||||
bool "PL011 UART"
|
||||
default y
|
||||
help
|
||||
Emulate a PrimeCell pl011-compatible UART for the guest.
|
||||
|
||||
For details on the configuration see device/pl011.cc
|
||||
|
||||
config UVMM_VDEV_PL031
|
||||
bool "PL031 RTC"
|
||||
default y
|
||||
depends on BUILD_ARCH_arm64 || BUILD_ARCH_arm
|
||||
help
|
||||
Emluate a simple PL0311 RTC for the guest. This is not a complete device
|
||||
model and does not come with write support.
|
||||
|
||||
For details on the configuration see device/arm/pl031.cc
|
||||
|
||||
config UVMM_VDEV_DEVICE_PCI_HOST_ECAM_GENERIC
|
||||
bool "ECAM PCIe host bridge support"
|
||||
default y
|
||||
depends on (BUILD_ARCH_arm || BUILD_ARCH_arm64) && UVMM_PCI_SUPPORT
|
||||
help
|
||||
Emulates a generic PCIe host bridge for ARM platforms.
|
||||
|
||||
config UVMM_VDEV_DEVICE_FRAMEBUFFER
|
||||
bool "Generic framebuffer device"
|
||||
default y
|
||||
help
|
||||
Emulate a simple generic framebuffer device
|
||||
|
||||
config UVMM_VDEV_CFI_FLASH
|
||||
bool "CFI flash device"
|
||||
default y
|
||||
help
|
||||
Emulate a simple CFI compliant flash device with the Intel command set.
|
||||
|
||||
For details on the configuration see device/cfi.cc
|
||||
|
||||
config UVMM_VDEV_ISA_DEBUG_PORT
|
||||
bool "Bochs debug port"
|
||||
default y
|
||||
depends on BUILD_ARCH_amd64
|
||||
help
|
||||
Emulate the BOCHS debug IO-port (0x402) to enable guests to print on the
|
||||
vcon device.
|
||||
|
||||
For details on the configuration see ARCH-amd64/isa_debugport.cc
|
||||
|
||||
config UVMM_VDEV_BCM2835_MBOX
|
||||
bool "Support for external bcm2835 mailbox service"
|
||||
depends on BUILD_ARCH_arm || BUILD_ARCH_arm64
|
||||
help
|
||||
Emulate the bcm2835 mailbox device as found on Raspberry Pi 4.
|
||||
|
||||
config UVMM_VDEV_GIC_V2
|
||||
def_bool UVMM_VDEV_SEL_GIC_V2 || UVMM_VDEV_SEL_GIC_BOTH
|
||||
|
||||
config UVMM_VDEV_GIC_V3
|
||||
def_bool UVMM_VDEV_SEL_GIC_V3 || UVMM_VDEV_SEL_GIC_BOTH
|
||||
|
||||
choice
|
||||
prompt "GIC Support"
|
||||
depends on BUILD_ARCH_arm || BUILD_ARCH_arm64
|
||||
help
|
||||
Select which GIC emulations uvmm should support for the virtual interrupt
|
||||
controller.
|
||||
|
||||
config UVMM_VDEV_SEL_GIC_BOTH
|
||||
bool "GICv2 + GICv3"
|
||||
|
||||
config UVMM_VDEV_SEL_GIC_V2
|
||||
bool "GICv2"
|
||||
|
||||
config UVMM_VDEV_SEL_GIC_V3
|
||||
bool "GICv3"
|
||||
|
||||
endchoice
|
||||
|
||||
config UVMM_VDEV_GIC_ITS
|
||||
bool "Emulate ITS in GICv3 to support MSIs"
|
||||
depends on UVMM_VDEV_GIC_V3
|
||||
default y
|
||||
|
||||
config UVMM_IRQ_DIRECT_INJECT
|
||||
bool "Utilize direct guest vIRQ injection"
|
||||
depends on BUILD_ARCH_arm || BUILD_ARCH_arm64
|
||||
default y
|
||||
help
|
||||
On Arm platforms, the kernel optionally supports forwarding hardware
|
||||
interrupts directly to a guest. This reduces the interrupt latency
|
||||
but the "pending" and "active" bits in the virtual distributor are
|
||||
not updated any more. This usually does not pose any problem, though.
|
||||
|
||||
It is safe to enable the feature in uvmm even if the kernel does not
|
||||
support it. Uvmm will transparently fall back to regular, IPC based
|
||||
interrupt forwarding.
|
||||
|
||||
menuconfig UVMM_VDEV_VIRTIO
|
||||
bool "Virtio driver/device support"
|
||||
default y
|
||||
|
||||
if UVMM_VDEV_VIRTIO
|
||||
|
||||
config UVMM_VDEV_VIRTIO_CONSOLE
|
||||
bool "Support forwarding vcon as virtio-console to the guest"
|
||||
default y
|
||||
|
||||
config UVMM_VDEV_VIRTIO_POWER
|
||||
bool "Virtio-input based power events"
|
||||
default y
|
||||
help
|
||||
Support sending power events via Virtio-input.
|
||||
|
||||
config UVMM_VDEV_VIRTIO_INPUT
|
||||
bool "Forward L4Re::Events via Virtio-input"
|
||||
default y
|
||||
help
|
||||
Support forwarding input device events via Virtio-input.
|
||||
|
||||
config UVMM_VDEV_VIRTIO_PROXY
|
||||
bool "Support forwarding L4virtio devices to the guest"
|
||||
default y
|
||||
help
|
||||
Support forwarding L4Re virtio devices to the guest. This enables
|
||||
guests to use L4Re drivers e.g., block devices or virtual network
|
||||
bridges.
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config UVMM_VDEV_DEVICE_PROXY
|
||||
bool "Virtio device proxy"
|
||||
default y
|
||||
help
|
||||
Proxy for virtio devices implemented in the guest. Can be used to export
|
||||
devices driven by a guest using the virtio protocol.
|
||||
|
||||
For details on the configuration see device/virtio_device_proxy.cc
|
||||
|
||||
endif
|
||||
|
||||
endmenu
|
||||
@@ -1,47 +0,0 @@
|
||||
## This file states the license of this package and possibly its subpackages
|
||||
## in machine and human readable format. The PackageName refers to the package
|
||||
## whose license is defined by PackageLicenseConcluded.
|
||||
## For more information about this file format visit the SPDX website at
|
||||
## https://spdx.org
|
||||
|
||||
SPDXVersion: SPDX-2.3
|
||||
DataLicense: CC0-1.0
|
||||
SPDXID: SPDXRef-DOCUMENT
|
||||
DocumentNamespace: spdx:kernkonzept/uvmm-c9ac5a16-e7f6-11e8-9023-e32cdff45c32
|
||||
DocumentName: uvmm
|
||||
Creator: Organization: Kernkonzept GmbH (info@kernkonzept.com)
|
||||
Created: 2018-11-13T00:00:00Z
|
||||
|
||||
## Package Information
|
||||
PackageName: uvmm
|
||||
SPDXID: SPDXRef-uvmm
|
||||
PackageOriginator: Organization: Kernkonzept GmbH (info@kernkonzept.com)
|
||||
PackageLicenseDeclared: GPL-2.0-only
|
||||
PackageLicenseConcluded: GPL-2.0-only
|
||||
FilesAnalyzed: true
|
||||
PackageCopyrightText: NOASSERTION
|
||||
PackageDownloadLocation: NOASSERTION
|
||||
|
||||
FileName: ./configs/dts/ls1021a-twr.dts
|
||||
SPDXID: SPDXRef-dts-ls1021a-twr-dts
|
||||
LicenseConcluded: GPL-2.0-only
|
||||
LicenseInfoInFile: NOASSERTION
|
||||
FileCopyrightText: NOASSERTION
|
||||
|
||||
FileName: ./configs/dts/ls1021a.dtsi
|
||||
SPDXID: SPDXRef-dts-ls1021a-dtsi
|
||||
LicenseConcluded: GPL-2.0-only
|
||||
LicenseInfoInFile: NOASSERTION
|
||||
FileCopyrightText: NOASSERTION
|
||||
|
||||
FileName: ./configs/dts/include/dt-bindings/interrupt-controller/irq.h
|
||||
SPDXID: SPDXRef-dts-irq-h
|
||||
LicenseConcluded: GPL-2.0-only OR MIT
|
||||
LicenseInfoInFile: NOASSERTION
|
||||
FileCopyrightText: NOASSERTION
|
||||
|
||||
FileName: ./configs/dts/include/dt-bindings/interrupt-controller/arm-gic.h
|
||||
SPDXID: SPDXRef-dts-arm-gic-h
|
||||
LicenseConcluded: GPL-2.0-only OR MIT
|
||||
LicenseInfoInFile: NOASSERTION
|
||||
FileCopyrightText: NOASSERTION
|
||||
@@ -1,6 +0,0 @@
|
||||
PKGDIR = .
|
||||
L4DIR ?= $(PKGDIR)/../..
|
||||
|
||||
TARGET = server configs tools
|
||||
|
||||
include $(L4DIR)/mk/subdir.mk
|
||||
@@ -1,22 +0,0 @@
|
||||
# L4Re uvmm
|
||||
|
||||
uvmm is the virtual machine monitor for the L4Re operating system. It
|
||||
allows to configure and execute guest OSes on top of L4Re. It provides
|
||||
a number of virtual interfaces to the guest, so that it can interact with
|
||||
L4 components in a secure way.
|
||||
|
||||
# Documentation
|
||||
|
||||
This package is part of the L4Re operating system. For documentation and
|
||||
build instructions see the
|
||||
[L4Re wiki](https://kernkonzept.com/L4Re/guides/l4re).
|
||||
|
||||
# Contributions
|
||||
|
||||
We welcome contributions. Please see our contributors guide on
|
||||
[how to contribute](https://kernkonzept.com/L4Re/contributing/l4re).
|
||||
|
||||
# License
|
||||
|
||||
Detailed licensing and copyright information can be found in
|
||||
the [LICENSE](LICENSE.spdx) file.
|
||||
@@ -1,29 +0,0 @@
|
||||
# Security Policy
|
||||
|
||||
This document outlines security procedures for the open-source projects of the
|
||||
L4Re Operating System Framework as found on https://github.com/kernkonzept.
|
||||
|
||||
# Reporting a vulnerability
|
||||
|
||||
Security is very important to us and we take all security vulnerabilities
|
||||
seriously. Thank you for improving the security of our open source software. If
|
||||
you have discovered a security issue, we appreciate your efforts and your
|
||||
responsible disclosure.
|
||||
|
||||
Please report a security vulnerability by sending an encrypted email to our
|
||||
security team using our [public
|
||||
key](https://www.kernkonzept.com/dl/security-at-kernkonzept.pub)
|
||||
to **security@kernkonzept.com**. The fingerprint of our public key is
|
||||
|
||||
````
|
||||
C4DC 2909 A22E D080 C012 5373 4055 CBA2 A4FD 855B
|
||||
````
|
||||
|
||||
Please include the following in your report:
|
||||
|
||||
* A description of the vulnerability
|
||||
* Steps to reproduce the vulnerability
|
||||
|
||||
A member of Kernkonzept's security team will confirm the vulnerability,
|
||||
determine its impact, and develop a fix. The fix will be applied to the master
|
||||
branch, tested, and released.
|
||||
@@ -1,11 +0,0 @@
|
||||
PKGDIR ?= ..
|
||||
L4DIR ?= $(PKGDIR)/../..
|
||||
|
||||
SRC_ASSETS_MODLIST = modules.list
|
||||
SRC_ASSETS_NED = vmm.lua uvmm.ned
|
||||
|
||||
SUBDIRS += dts
|
||||
|
||||
all:: $(SUBDIRS)
|
||||
|
||||
include $(L4DIR)/mk/assets.mk
|
||||
@@ -1,18 +0,0 @@
|
||||
PKGDIR ?= ../..
|
||||
L4DIR ?= $(PKGDIR)/../..
|
||||
|
||||
include $(L4DIR)/mk/Makeconf
|
||||
|
||||
ASSET_TYPE = dtb
|
||||
SRC_DTS = $(subst $(SRC_DIR)/,,$(wildcard $(SRC_DIR)/*.dts $(SRC_DIR)/*.dtso))
|
||||
PRIVATE_INCDIR = $(PKGDIR)/configs/dts/include $(PKGDIR)/configs/dts
|
||||
DTC_FLAGS := $(call checkdtc,-Wno-unit_address_vs_reg) \
|
||||
$(call checkdtc,-Wno-simple_bus_reg) \
|
||||
$(call checkdtc,-Wno-spi_bus_bridge) \
|
||||
$(call checkdtc,-Wno-alias_paths) \
|
||||
$(call checkdtc,-@)
|
||||
|
||||
clean::
|
||||
$(VERBOSE)$(RM) $(wildcard *.dtb *.dtbo)
|
||||
|
||||
include $(L4DIR)/mk/assets.mk
|
||||
@@ -1,21 +0,0 @@
|
||||
/*
|
||||
* GIC for ARM guests.
|
||||
*/
|
||||
|
||||
/ {
|
||||
icsoc {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
compatible = "simple-bus";
|
||||
ranges;
|
||||
|
||||
gic: interrupt-controller {
|
||||
compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
|
||||
#interrupt-cells = <3>;
|
||||
#address-cells = <0>;
|
||||
interrupt-controller;
|
||||
reg = <0x1000 0x1000>,
|
||||
<0x2000 0x100>;
|
||||
};
|
||||
};
|
||||
};
|
||||
@@ -1,32 +0,0 @@
|
||||
/**
|
||||
* Basic interrupt controllers for MIPS: core IC and GIC.
|
||||
*/
|
||||
|
||||
|
||||
/ {
|
||||
cpu_intc: cpu_intc {
|
||||
#address-cells = <0>;
|
||||
compatible = "mti,cpu-interrupt-controller";
|
||||
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <1>;
|
||||
};
|
||||
|
||||
soc {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
compatible = "simple-bus";
|
||||
ranges;
|
||||
|
||||
gic: interrupt-controller {
|
||||
compatible = "mti,gic";
|
||||
|
||||
reg = <0x1bdc0000 0x20000>;
|
||||
|
||||
mti,reserved-cpu-vectors = <7>;
|
||||
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <3>;
|
||||
};
|
||||
};
|
||||
};
|
||||
@@ -1,23 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* This header provides constants for the ARM GIC.
|
||||
*/
|
||||
|
||||
#ifndef _DT_BINDINGS_INTERRUPT_CONTROLLER_ARM_GIC_H
|
||||
#define _DT_BINDINGS_INTERRUPT_CONTROLLER_ARM_GIC_H
|
||||
|
||||
#include <dt-bindings/interrupt-controller/irq.h>
|
||||
|
||||
/* interrupt specifier cell 0 */
|
||||
|
||||
#define GIC_SPI 0
|
||||
#define GIC_PPI 1
|
||||
|
||||
/*
|
||||
* Interrupt specifier cell 2.
|
||||
* The flags in irq.h are valid, plus those below.
|
||||
*/
|
||||
#define GIC_CPU_MASK_RAW(x) ((x) << 8)
|
||||
#define GIC_CPU_MASK_SIMPLE(num) GIC_CPU_MASK_RAW((1 << (num)) - 1)
|
||||
|
||||
#endif
|
||||
@@ -1,20 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* This header provides constants for most IRQ bindings.
|
||||
*
|
||||
* Most IRQ bindings include a flags cell as part of the IRQ specifier.
|
||||
* In most cases, the format of the flags cell uses the standard values
|
||||
* defined in this header.
|
||||
*/
|
||||
|
||||
#ifndef _DT_BINDINGS_INTERRUPT_CONTROLLER_IRQ_H
|
||||
#define _DT_BINDINGS_INTERRUPT_CONTROLLER_IRQ_H
|
||||
|
||||
#define IRQ_TYPE_NONE 0
|
||||
#define IRQ_TYPE_EDGE_RISING 1
|
||||
#define IRQ_TYPE_EDGE_FALLING 2
|
||||
#define IRQ_TYPE_EDGE_BOTH (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)
|
||||
#define IRQ_TYPE_LEVEL_HIGH 4
|
||||
#define IRQ_TYPE_LEVEL_LOW 8
|
||||
|
||||
#endif
|
||||
@@ -1,38 +0,0 @@
|
||||
/dts-v1/;
|
||||
/plugin/;
|
||||
|
||||
/**
|
||||
* Example overlay for use of the PL031 real-time clock.
|
||||
* It also adds a node to connect uvmm to the L4Re RTC service, make sure to
|
||||
* provide this cap to uvmm in your ned script.
|
||||
*/
|
||||
|
||||
/ {
|
||||
|
||||
fragment@0 {
|
||||
target-path = "/";
|
||||
__overlay__ {
|
||||
l4vmm {
|
||||
l4rtc {
|
||||
compatible = "l4rtc";
|
||||
l4vmm,rtccap = "rtc";
|
||||
};
|
||||
|
||||
virt_pl031 {
|
||||
compatible = "arm,pl031", "arm,primecell";
|
||||
reg = <0x13000 0x1000>;
|
||||
interrupts = <0x00 0x02 0x04>;
|
||||
clocks = <&apb_dummy_pclk>;
|
||||
clock-names = "apb_pclk";
|
||||
};
|
||||
|
||||
apb_dummy_pclk: dummy_clk {
|
||||
compatible = "fixed-clock";
|
||||
#clock-cells = <0>;
|
||||
clock-frequency = <1000000>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
};
|
||||
@@ -1,13 +0,0 @@
|
||||
/*
|
||||
* Skeleton device tree; the bare minimum needed to boot; just include and
|
||||
* add a compatible value. The bootloader will typically populate the memory
|
||||
* node.
|
||||
*/
|
||||
|
||||
/ {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
chosen { };
|
||||
aliases { };
|
||||
cpus { };
|
||||
};
|
||||
@@ -1,13 +0,0 @@
|
||||
/*
|
||||
* Skeleton device tree in the 64 bits version; the bare minimum
|
||||
* needed to boot; just include and add a compatible value. The
|
||||
* bootloader will typically populate the memory node.
|
||||
*/
|
||||
|
||||
/ {
|
||||
#address-cells = <2>;
|
||||
#size-cells = <2>;
|
||||
chosen { };
|
||||
aliases { };
|
||||
cpus { };
|
||||
};
|
||||
@@ -1,33 +0,0 @@
|
||||
/**
|
||||
* L4 uvmm system management console. Provides reboot and poweroff hooks
|
||||
* towards the vmm.
|
||||
*/
|
||||
|
||||
/ {
|
||||
vmm-syscon {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
compatible = "simple-bus";
|
||||
ranges = <0x0 0x30030000 0x4>;
|
||||
|
||||
l4syscon: syscon {
|
||||
compatible = "syscon", "syscon-l4vmm";
|
||||
reg = <0x0 0x4>;
|
||||
little-endian;
|
||||
};
|
||||
|
||||
reboot {
|
||||
compatible = "syscon-reboot";
|
||||
regmap = <&l4syscon>;
|
||||
offset = <0x0>;
|
||||
mask = <0x66>;
|
||||
};
|
||||
|
||||
poweroff {
|
||||
compatible = "syscon-poweroff";
|
||||
regmap = <&l4syscon>;
|
||||
offset = <0x0>;
|
||||
mask = <0x0>;
|
||||
};
|
||||
};
|
||||
};
|
||||
@@ -1,165 +0,0 @@
|
||||
/*
|
||||
* Example device tree for a virtual machine on Arm SBSA compliant systems. It
|
||||
* provides MSIs and PCI hardware pass-though capabilities.
|
||||
*/
|
||||
|
||||
/dts-v1/;
|
||||
/include/ "skeleton64.dtsi"
|
||||
/include/ "vmm-devices-arm.dtsi"
|
||||
|
||||
#define CPU(x, r) cpu##x: cpu@x { \
|
||||
device_type = "cpu"; \
|
||||
compatible = "arm,armv8"; \
|
||||
reg = <r>; \
|
||||
enable-method = "psci"; \
|
||||
}
|
||||
|
||||
/ {
|
||||
model = "L4 VM";
|
||||
compatible = "l4,virt", "linux,dummy-virt";
|
||||
|
||||
interrupt-parent = <&gic>;
|
||||
|
||||
memory@0 {
|
||||
device_type = "memory";
|
||||
// Ram starts at 4GiB to make room for virtual devices. Currently, io
|
||||
// will map the ITS somewhere after 0xf0000000 so there should be no
|
||||
// RAM at this location. Size will be updated by uvmm based on ds size.
|
||||
reg = <0x1 0x00000000 0 0x0>;
|
||||
l4vmm,dscap = "ram";
|
||||
};
|
||||
|
||||
icsoc {
|
||||
compatible = "simple-bus";
|
||||
#address-cells = <2>;
|
||||
#size-cells = <2>;
|
||||
ranges;
|
||||
|
||||
/* Dist/Redist interface for gicv3 (0x10000, 0x20000 * number of CPUs).
|
||||
* The entries provided here support up to 32 CPUs.
|
||||
*/
|
||||
gic: interrupt-controller {
|
||||
compatible = "arm,gic-v3";
|
||||
#interrupt-cells = <3>;
|
||||
#address-cells = <2>;
|
||||
#size-cells = <2>;
|
||||
ranges;
|
||||
interrupt-controller;
|
||||
reg = <0 0x40000 0 0x10000>, // GICD
|
||||
<0 0x50000 0 0x400000>; // GICR
|
||||
};
|
||||
|
||||
its: msi-controller@500000 {
|
||||
#msi-cells = <1>;
|
||||
compatible = "arm,gic-v3-its";
|
||||
reg = <0x0 0x500000 0x0 0x20000>; // GITS
|
||||
msi-controller;
|
||||
};
|
||||
};
|
||||
|
||||
timer {
|
||||
compatible = "arm,armv8-timer";
|
||||
interrupts = <1 13 0xf08>,
|
||||
<1 14 0xf08>,
|
||||
<1 11 0xf08>,
|
||||
<1 10 0xf08>;
|
||||
always-on;
|
||||
};
|
||||
|
||||
cpus {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
// Beware: the Aff0 field must be in the [0..15] range!
|
||||
CPU( 0, 0x0000);
|
||||
CPU( 1, 0x0001);
|
||||
CPU( 2, 0x0002);
|
||||
CPU( 3, 0x0003);
|
||||
CPU( 4, 0x0004);
|
||||
CPU( 5, 0x0005);
|
||||
CPU( 6, 0x0006);
|
||||
CPU( 7, 0x0007);
|
||||
CPU( 8, 0x0008);
|
||||
CPU( 9, 0x0009);
|
||||
CPU(10, 0x000a);
|
||||
CPU(11, 0x000b);
|
||||
CPU(12, 0x000c);
|
||||
CPU(13, 0x000d);
|
||||
CPU(14, 0x000e);
|
||||
CPU(15, 0x000f);
|
||||
CPU(16, 0x0100);
|
||||
CPU(17, 0x0101);
|
||||
CPU(18, 0x0102);
|
||||
CPU(19, 0x0103);
|
||||
CPU(20, 0x0104);
|
||||
CPU(21, 0x0105);
|
||||
CPU(22, 0x0106);
|
||||
CPU(23, 0x0107);
|
||||
CPU(24, 0x0108);
|
||||
CPU(25, 0x0109);
|
||||
CPU(26, 0x010a);
|
||||
CPU(27, 0x010b);
|
||||
CPU(28, 0x010c);
|
||||
CPU(29, 0x010d);
|
||||
CPU(30, 0x010e);
|
||||
CPU(31, 0x010f);
|
||||
};
|
||||
|
||||
pcie@10000000 {
|
||||
// Interrupt map for any number of devices. Legacy interrupts are
|
||||
// interwoven based on the two device number LSBs. Uvmm supports only
|
||||
// one bus...
|
||||
interrupt-map-mask = <0x1800 0x00 0x00 0x07>;
|
||||
|
||||
// Interrupt cell (SPI / line / LEVEL triggered) -+
|
||||
// GIC #address-cells (ignored) --+ |
|
||||
// GIC phandle -----------+ | |
|
||||
// PCI interrupt pin -+ | | |
|
||||
// | | | |
|
||||
// PCI address | | | |
|
||||
// ---------------- --+- -+-- ----+---- ----------+---
|
||||
interrupt-map = <
|
||||
0x0000 0x00 0x00 0x01 &gic 0x00 0x00 0x00 0x83 0x04
|
||||
0x0000 0x00 0x00 0x02 &gic 0x00 0x00 0x00 0x84 0x04
|
||||
0x0000 0x00 0x00 0x03 &gic 0x00 0x00 0x00 0x85 0x04
|
||||
0x0000 0x00 0x00 0x04 &gic 0x00 0x00 0x00 0x86 0x04
|
||||
0x0800 0x00 0x00 0x01 &gic 0x00 0x00 0x00 0x84 0x04
|
||||
0x0800 0x00 0x00 0x02 &gic 0x00 0x00 0x00 0x85 0x04
|
||||
0x0800 0x00 0x00 0x03 &gic 0x00 0x00 0x00 0x86 0x04
|
||||
0x0800 0x00 0x00 0x04 &gic 0x00 0x00 0x00 0x83 0x04
|
||||
0x1000 0x00 0x00 0x01 &gic 0x00 0x00 0x00 0x85 0x04
|
||||
0x1000 0x00 0x00 0x02 &gic 0x00 0x00 0x00 0x86 0x04
|
||||
0x1000 0x00 0x00 0x03 &gic 0x00 0x00 0x00 0x83 0x04
|
||||
0x1000 0x00 0x00 0x04 &gic 0x00 0x00 0x00 0x84 0x04
|
||||
0x1800 0x00 0x00 0x01 &gic 0x00 0x00 0x00 0x86 0x04
|
||||
0x1800 0x00 0x00 0x02 &gic 0x00 0x00 0x00 0x83 0x04
|
||||
0x1800 0x00 0x00 0x03 &gic 0x00 0x00 0x00 0x84 0x04
|
||||
0x1800 0x00 0x00 0x04 &gic 0x00 0x00 0x00 0x85 0x04
|
||||
>;
|
||||
#interrupt-cells = <0x01>;
|
||||
|
||||
// The bridge windows (IO, MEM32, MEM64). We still have to officially map
|
||||
// x86 ports because real hardware provides such BARs even though they
|
||||
// won't be used on Arm.
|
||||
ranges = <0x01000000 0x00 0x00000000 0x00 0x00000000 0x00 0x10000
|
||||
0x02000000 0x00 0x40000000 0x00 0x40000000 0x00 0x30000000
|
||||
0x43000000 0xc0 0x00000000 0xc0 0x00000000 0x10 0x00000000>;
|
||||
reg = <0x0 0x10000000 0x00 0x10000000>;
|
||||
msi-map = <0 &its 0 0x10000>;
|
||||
dma-coherent;
|
||||
bus-range = <0x00 0xff>;
|
||||
#address-cells = <0x03>;
|
||||
#size-cells = <0x02>;
|
||||
device_type = "pci";
|
||||
compatible = "pci-host-ecam-generic";
|
||||
};
|
||||
|
||||
l4vmm {
|
||||
ranges = <0x0 0x0 0x10000 0x21000>;
|
||||
};
|
||||
|
||||
psci {
|
||||
compatible = "arm,psci-1.0";
|
||||
method = "hvc";
|
||||
};
|
||||
};
|
||||
@@ -1,96 +0,0 @@
|
||||
/*
|
||||
* Device tree for a virtual machine without any hardware pass-through.
|
||||
*/
|
||||
|
||||
/dts-v1/;
|
||||
/include/ "skeleton.dtsi"
|
||||
/include/ "vmm-devices-arm.dtsi"
|
||||
|
||||
/ {
|
||||
model = "L4 VM";
|
||||
compatible = "l4,virt", "linux,dummy-virt";
|
||||
|
||||
interrupt-parent = <&gic>;
|
||||
|
||||
memory@0 {
|
||||
device_type = "memory";
|
||||
// Ram starts at 128MB
|
||||
// Size will be updated by uvmm based on ds size
|
||||
reg = <0x8000000 0x0>;
|
||||
l4vmm,dscap = "ram";
|
||||
};
|
||||
|
||||
icsoc {
|
||||
compatible = "simple-bus";
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
ranges = <0x0 0x40000 0x100000>;
|
||||
|
||||
/* Uvmm will adapt the compatible string depending on the present gic
|
||||
* version. It expects reg entries that provide enough space for the
|
||||
* Cpu/Dist interface for gicv2 (at least 0x1000, 0x1000) or the
|
||||
* Dist/Redist interface for gicv3 (0x10000, 0x20000 * number of CPUs).
|
||||
* The entries provided here support any gicv2 setup or a gicv3 setup
|
||||
* with up to 4 CPUs.
|
||||
*/
|
||||
gic: interrupt-controller {
|
||||
compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
|
||||
#interrupt-cells = <3>;
|
||||
#address-cells = <0>;
|
||||
interrupt-controller;
|
||||
reg = <0x10000 0x10000>,
|
||||
<0x20000 0x80000>;
|
||||
};
|
||||
};
|
||||
|
||||
timer {
|
||||
compatible = "arm,armv7-timer";
|
||||
interrupts = <1 13 0xf08>,
|
||||
<1 14 0xf08>,
|
||||
<1 11 0xf08>,
|
||||
<1 10 0xf08>;
|
||||
always-on;
|
||||
};
|
||||
|
||||
cpus {
|
||||
#address-cells = <2>;
|
||||
#size-cells = <0>;
|
||||
|
||||
cpu@0 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,armv7";
|
||||
reg = <0x0 0x0>;
|
||||
enable-method = "psci";
|
||||
};
|
||||
|
||||
cpu@1 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,armv7";
|
||||
reg = <0x0 0x1>;
|
||||
enable-method = "psci";
|
||||
};
|
||||
|
||||
cpu@2 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,armv7";
|
||||
reg = <0x0 0x2>;
|
||||
enable-method = "psci";
|
||||
};
|
||||
|
||||
cpu@3 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,armv7";
|
||||
reg = <0x0 0x3>;
|
||||
enable-method = "psci";
|
||||
};
|
||||
};
|
||||
|
||||
l4vmm {
|
||||
ranges = <0x0 0x10000 0x21000>;
|
||||
};
|
||||
|
||||
psci {
|
||||
compatible = "arm,psci-1.0";
|
||||
method = "hvc";
|
||||
};
|
||||
};
|
||||
@@ -1,93 +0,0 @@
|
||||
/*
|
||||
* Device tree for a virtual machine without any hardware pass-through.
|
||||
*/
|
||||
|
||||
/dts-v1/;
|
||||
/include/ "skeleton64.dtsi"
|
||||
/include/ "vmm-devices-arm.dtsi"
|
||||
|
||||
/ {
|
||||
model = "L4 VM";
|
||||
compatible = "l4,virt", "linux,dummy-virt";
|
||||
|
||||
interrupt-parent = <&gic>;
|
||||
|
||||
memory@0 {
|
||||
device_type = "memory";
|
||||
// Ram starts at 128MB
|
||||
// Size will be updated by uvmm based on ds size
|
||||
reg = <0 0x8000000 0 0x0>;
|
||||
l4vmm,dscap = "ram";
|
||||
};
|
||||
|
||||
icsoc {
|
||||
compatible = "simple-bus";
|
||||
#address-cells = <2>;
|
||||
#size-cells = <2>;
|
||||
ranges;
|
||||
|
||||
/* Uvmm will adapt the compatible string depending on the present gic
|
||||
* version. It expects reg entries that provide enough space for the
|
||||
* Cpu/Dist interface for gicv2 (at least 0x1000, 0x1000) or the
|
||||
* Dist/Redist interface for gicv3 (0x10000, 0x20000 * number of CPUs).
|
||||
* The entries provided here support any gicv2 setup or a gicv3 setup
|
||||
* with up to 32 CPUs.
|
||||
*/
|
||||
gic: interrupt-controller {
|
||||
compatible = "arm,gic-400", "arm,cortex-a15-gic", "arm,cortex-a9-gic";
|
||||
#interrupt-cells = <3>;
|
||||
#address-cells = <0>;
|
||||
interrupt-controller;
|
||||
reg = <0 0x40000 0 0x10000>,
|
||||
<0 0x50000 0 0x400000>;
|
||||
};
|
||||
};
|
||||
|
||||
timer {
|
||||
compatible = "arm,armv8-timer";
|
||||
interrupts = <1 13 0xf08>,
|
||||
<1 14 0xf08>,
|
||||
<1 11 0xf08>,
|
||||
<1 10 0xf08>;
|
||||
always-on;
|
||||
};
|
||||
|
||||
cpus {
|
||||
#address-cells = <2>;
|
||||
#size-cells = <0>;
|
||||
|
||||
cpu@0 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,armv8";
|
||||
reg = <0x0 0x0>;
|
||||
enable-method = "psci";
|
||||
};
|
||||
cpu@1 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,armv8";
|
||||
reg = <0x0 0x1>;
|
||||
enable-method = "psci";
|
||||
};
|
||||
cpu@2 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,armv8";
|
||||
reg = <0x0 0x2>;
|
||||
enable-method = "psci";
|
||||
};
|
||||
cpu@3 {
|
||||
device_type = "cpu";
|
||||
compatible = "arm,armv8";
|
||||
reg = <0x0 0x3>;
|
||||
enable-method = "psci";
|
||||
};
|
||||
};
|
||||
|
||||
l4vmm {
|
||||
ranges = <0x0 0x0 0x10000 0x21000>;
|
||||
};
|
||||
|
||||
psci {
|
||||
compatible = "arm,psci-1.0";
|
||||
method = "hvc";
|
||||
};
|
||||
};
|
||||
@@ -1,42 +0,0 @@
|
||||
/dts-v1/;
|
||||
/plugin/;
|
||||
|
||||
/ {
|
||||
|
||||
fragment@0 {
|
||||
target-path = "/pcie@10000000";
|
||||
__overlay__ {
|
||||
ranges = <0x1000000 0x00 0x00000000 0x00 0x3eff0000 0x00 0x00010000
|
||||
0x2000000 0x00 0x10000000 0x00 0x10000000 0x00 0x2eff0000
|
||||
0x3000000 0x10 0x00000000 0x10 0x00000000 0x01 0x00000000>;
|
||||
};
|
||||
};
|
||||
|
||||
fragment@1 {
|
||||
target-path = "/";
|
||||
__overlay__ {
|
||||
l4vmm {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
compatible = "simple-bus";
|
||||
ranges = <0x0 0x0 0x30000000 0x21000>;
|
||||
|
||||
virtio_uart@20000 {
|
||||
compatible = "virtio,mmio";
|
||||
reg = <0x20000 0x100>;
|
||||
interrupts = <0 122 4>;
|
||||
l4vmm,vdev = "console";
|
||||
};
|
||||
|
||||
virtio_net@10000 {
|
||||
compatible = "virtio,mmio";
|
||||
reg = <0x10000 0x200>;
|
||||
interrupts = <0 123 4>;
|
||||
l4vmm,vdev = "proxy";
|
||||
l4vmm,virtiocap = "net";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
};
|
||||
@@ -1,34 +0,0 @@
|
||||
/*
|
||||
* Device tree for a virtual machine without any hardware pass-through.
|
||||
*/
|
||||
|
||||
/dts-v1/;
|
||||
/include/ "skeleton.dtsi"
|
||||
/include/ "syscon.dtsi"
|
||||
/include/ "vmm-devices-arm.dtsi"
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
|
||||
/ {
|
||||
model = "L4 VM";
|
||||
compatible = "l4,virt", "linux,dummy-virt";
|
||||
|
||||
interrupt-parent = <&gic>;
|
||||
|
||||
gic: interrupt-controller@1d00000 {
|
||||
compatible = "arm,gic-v3";
|
||||
#interrupt-cells = <3>;
|
||||
interrupt-controller;
|
||||
interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
|
||||
reg = <0x1d00000 0x10000>, /* GICD */
|
||||
<0x1d40000 0x40000>; /* GICR */
|
||||
};
|
||||
|
||||
timer {
|
||||
compatible = "arm,cortex-a15-timer", "arm,armv7-timer";
|
||||
interrupts = <1 13 0xf08>,
|
||||
<1 14 0xf08>,
|
||||
<1 11 0xf08>,
|
||||
<1 10 0xf08>;
|
||||
always-on;
|
||||
};
|
||||
};
|
||||
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
* Device tree for a virtual machine without any hardware pass-through.
|
||||
*/
|
||||
|
||||
/dts-v1/;
|
||||
/include/ "skeleton.dtsi"
|
||||
/include/ "syscon.dtsi"
|
||||
/include/ "ic-arm.dtsi"
|
||||
/include/ "vmm-devices-arm.dtsi"
|
||||
|
||||
/ {
|
||||
model = "L4 VM";
|
||||
compatible = "l4,virt", "linux,dummy-virt";
|
||||
|
||||
interrupt-parent = <&gic>;
|
||||
|
||||
icsoc {
|
||||
ranges = <0x0 0x10480000 0x3000>;
|
||||
};
|
||||
|
||||
timer {
|
||||
compatible = "arm,cortex-a15-timer", "arm,armv7-timer";
|
||||
interrupts = <1 13 0xf08>,
|
||||
<1 14 0xf08>,
|
||||
<1 11 0xf08>,
|
||||
<1 10 0xf08>;
|
||||
always-on;
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
/**
|
||||
* Devic tree for purely virtual guests on the MIPS architecture.
|
||||
*/
|
||||
|
||||
/dts-v1/;
|
||||
|
||||
/include/ "skeleton.dtsi"
|
||||
/include/ "ic-mips.dtsi"
|
||||
/include/ "vmm-devices-mips.dtsi"
|
||||
/include/ "syscon.dtsi"
|
||||
|
||||
/ {
|
||||
model = "L4 VM";
|
||||
compatible = "l4,virt", "linux,dummy-virt";
|
||||
|
||||
cpus {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
CPU0: cpu@0 {
|
||||
device_type = "cpu";
|
||||
compatible = "mips,p5600";
|
||||
reg = <0x0>;
|
||||
};
|
||||
|
||||
CPU1: cpu@1 {
|
||||
device_type = "cpu";
|
||||
compatible = "mips,p5600";
|
||||
reg = <0x1>;
|
||||
};
|
||||
};
|
||||
};
|
||||
@@ -1,44 +0,0 @@
|
||||
/**
|
||||
* Devic tree for purely virtual guests on the MIPS architecture.
|
||||
*/
|
||||
|
||||
/dts-v1/;
|
||||
|
||||
/include/ "skeleton64.dtsi"
|
||||
/include/ "ic-mips.dtsi"
|
||||
/include/ "vmm-devices-mips.dtsi"
|
||||
/include/ "syscon.dtsi"
|
||||
|
||||
/ {
|
||||
model = "L4 VM";
|
||||
compatible = "l4,virt", "linux,dummy-virt";
|
||||
|
||||
cpus {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
CPU0: cpu@0 {
|
||||
device_type = "cpu";
|
||||
compatible = "mips,i6400";
|
||||
reg = <0x0>;
|
||||
};
|
||||
|
||||
CPU1: cpu@1 {
|
||||
device_type = "cpu";
|
||||
compatible = "mips,i6400";
|
||||
reg = <0x1>;
|
||||
};
|
||||
};
|
||||
|
||||
l4vmm {
|
||||
ranges = <0x0 0x0 0x30000000 0x21000>;
|
||||
};
|
||||
|
||||
soc {
|
||||
ranges = <0x0 0x0 0x0 0xffffffff>;
|
||||
};
|
||||
|
||||
vmm-syscon {
|
||||
ranges = <0x0 0x0 0x30030000 0x4>;
|
||||
};
|
||||
};
|
||||
@@ -1,31 +0,0 @@
|
||||
/*
|
||||
* Device tree for a virtual machine without any hardware pass-through.
|
||||
*/
|
||||
|
||||
/dts-v1/;
|
||||
/include/ "skeleton.dtsi"
|
||||
/include/ "syscon.dtsi"
|
||||
/include/ "ic-arm.dtsi"
|
||||
/include/ "vmm-devices-arm.dtsi"
|
||||
|
||||
/ {
|
||||
model = "L4 VM";
|
||||
compatible = "l4,virt", "linux,dummy-virt";
|
||||
|
||||
interrupt-parent = <&gic>;
|
||||
|
||||
timer {
|
||||
compatible = "arm,cortex-a15-timer", "arm,armv7-timer";
|
||||
interrupts = <1 13 0xf08>,
|
||||
<1 14 0xf08>,
|
||||
<1 11 0xf08>,
|
||||
<1 10 0xf08>;
|
||||
clock-frequency = <6144000>;
|
||||
always-on;
|
||||
};
|
||||
|
||||
icsoc {
|
||||
ranges = <0x0 0x48210000 0x3000>;
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,262 +0,0 @@
|
||||
/*
|
||||
* Device tree for a virtual machine without any hardware pass-through.
|
||||
*
|
||||
* Information sources:
|
||||
* https://github.com/devicetree-org/devicetree-specification/releases/tag/v0.3
|
||||
* For PCI reg cell encoding:
|
||||
* [1] https://www.devicetree.org/open-firmware/bindings/pci/pci2_1.pdf
|
||||
* For (E)ISA reg cell encoding:
|
||||
* [2] https://www.devicetree.org/open-firmware/bindings/isa/isa0_4d.ps
|
||||
*/
|
||||
|
||||
/dts-v1/;
|
||||
|
||||
/include/ "skeleton64.dtsi"
|
||||
|
||||
#define CPU(x) cpu##x: cpu@x { \
|
||||
device_type = "cpu"; \
|
||||
compatible = "virt-intel"; \
|
||||
reg = <x>; \
|
||||
}
|
||||
|
||||
/ {
|
||||
model = "L4 VM";
|
||||
compatible = "l4,virt", "linux,dummy-virt";
|
||||
|
||||
memory@0 {
|
||||
device_type = "memory";
|
||||
reg = <0x0 0x00000000 0x0 0x80000000
|
||||
0x1 0x00000000 0xffffffff 0x0>;
|
||||
l4vmm,dscap = "ram";
|
||||
};
|
||||
|
||||
IOAPIC: ioapic {
|
||||
compatible = "intel,ioapic";
|
||||
interrupt-controller;
|
||||
msi-parent = <&msi_ctrl>;
|
||||
#address-cells = <0>;
|
||||
#interrupt-cells = <1>;
|
||||
};
|
||||
|
||||
msi_ctrl: msictrl {
|
||||
compatible = "intel,msi-controller";
|
||||
msi-controller;
|
||||
#msi-cells = <0>;
|
||||
};
|
||||
|
||||
cpus {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
CPU(0);
|
||||
CPU(1);
|
||||
CPU(2);
|
||||
CPU(3);
|
||||
CPU(4);
|
||||
CPU(5);
|
||||
CPU(6);
|
||||
CPU(7);
|
||||
CPU(8);
|
||||
CPU(9);
|
||||
CPU(10);
|
||||
CPU(11);
|
||||
CPU(12);
|
||||
CPU(13);
|
||||
CPU(14);
|
||||
CPU(15);
|
||||
CPU(16);
|
||||
CPU(17);
|
||||
CPU(18);
|
||||
CPU(19);
|
||||
CPU(20);
|
||||
CPU(21);
|
||||
CPU(22);
|
||||
CPU(23);
|
||||
CPU(24);
|
||||
CPU(25);
|
||||
CPU(26);
|
||||
CPU(27);
|
||||
CPU(28);
|
||||
CPU(29);
|
||||
CPU(30);
|
||||
CPU(31);
|
||||
};
|
||||
|
||||
pit {
|
||||
compatible = "virt-pit";
|
||||
reg = <0x0 0x0 0x0 0x0>;
|
||||
interrupt-parent = <&IOAPIC>;
|
||||
interrupts = <0>;
|
||||
};
|
||||
|
||||
acpi_platform {
|
||||
compatible = "virt-acpi";
|
||||
interrupt-parent = <&IOAPIC>;
|
||||
interrupts = <9>;
|
||||
l4vmm,pwrinput = "acpi_pwr_input";
|
||||
};
|
||||
|
||||
acpi_timer {
|
||||
compatible = "acpi-timer";
|
||||
};
|
||||
|
||||
rtc {
|
||||
compatible = "virt-rtc";
|
||||
interrupt-parent = <&IOAPIC>;
|
||||
interrupts = <8>;
|
||||
reg = <0x0 0x0 0x0 0x0>;
|
||||
};
|
||||
|
||||
uart8250 {
|
||||
compatible = "ns8250", "uart,8250";
|
||||
reg = <0x0 0x0 0x0 0x0>;
|
||||
interrupt-parent = <&IOAPIC>;
|
||||
interrupts = <4>;
|
||||
/* Redirecting to another vcon channel is possible: */
|
||||
/* l4vmm,vcon_cap = "uart"; */
|
||||
};
|
||||
|
||||
l4rtc {
|
||||
compatible = "l4rtc";
|
||||
l4vmm,rtccap = "rtc";
|
||||
};
|
||||
|
||||
kvm_clock {
|
||||
compatible = "kvm-clock";
|
||||
reg = <0x0 0x0 0x0 0x0>;
|
||||
};
|
||||
|
||||
isa {
|
||||
device_type = "eisa";
|
||||
#address-cells = <2>;
|
||||
#size-cells = <1>;
|
||||
// The first cell of a child nodes reg property encodes the
|
||||
// following information. See the ISA bus device-tree binding [2]
|
||||
// for more details:
|
||||
//
|
||||
// [2] 11-bit aliased (IOPORT only)
|
||||
// [1] 10-bit aliased (IOPORT only)
|
||||
// [0] 0=MMIO32, 1=IOPORT
|
||||
//
|
||||
// The standard ranges property defines the translation of child
|
||||
// reg address entries into the parent address space. Effectively
|
||||
// removes the upper word. For the purpose of the ISA translation,
|
||||
// only bit [0] is considered of the first word.
|
||||
ranges = <0x0 0x0 0x0 0x0 0xffffffff
|
||||
0x1 0x0 0x0 0x0 0x1000>;
|
||||
|
||||
// example {
|
||||
// reg = < 0x0 0xA0000 0x20000 // MMIO [0xA0000-0xBFFFF]
|
||||
// 0x1 0x3C0 0x20 > // IO [0x3C0-0x3DF]
|
||||
// }
|
||||
|
||||
isa_debugport {
|
||||
compatible = "l4vmm,isa-debugport";
|
||||
reg = <0x1 0x402 0x1>;
|
||||
l4vmm,vcon_cap = "debug";
|
||||
};
|
||||
|
||||
qemu_fw_if {
|
||||
compatible = "l4vmm,qemu-fw-cfg";
|
||||
l4vmm,kernel = "";
|
||||
l4vmm,ramdisk = "";
|
||||
l4vmm,cmdline = "";
|
||||
reg = <0x1 0x510 0x0c>;
|
||||
};
|
||||
};
|
||||
|
||||
pci0: pci@aa000000 {
|
||||
compatible = "virt-pci-bridge";
|
||||
device_type = "pci";
|
||||
interrupt-parent = <&IOAPIC>;
|
||||
msi-parent = <&msi_ctrl>;
|
||||
bus-range = <0x0 0xff>;
|
||||
#address-cells = <3>;
|
||||
#size-cells = <2>;
|
||||
#interrupt-cells = <1>;
|
||||
|
||||
// The first cell encodes the following information. See the PCI
|
||||
// bus device-tree binding [1] for more details:
|
||||
//
|
||||
// [31] non-relocatable
|
||||
// [30] prefetchable
|
||||
// [29] aliased
|
||||
// [25:24] 0=CFGSPACE, 1=IOPORT, 2=MMIO32, 3=MMIO64
|
||||
// [23:16] bus
|
||||
// [15:11] device
|
||||
// [10:8] function
|
||||
// [7:0] register (used to indicate BAR register, e.g. 0x10)
|
||||
//
|
||||
// The standard ranges property defines the translation of child
|
||||
// reg address entries into the parent address space. Effectively
|
||||
// removes the upper word. For the purpose of the PCI translation,
|
||||
// only bits [25:24] are considered of the first word.
|
||||
//
|
||||
// Attention: the ranges property is parsed by uvmm and by the
|
||||
// firmware to detect the bridge windows!
|
||||
ranges = <0x01000000 0x0 0x00006000 0x0 0x00006000 0x0 0x5000
|
||||
0x02000000 0x0 0xaa000000 0x0 0xaa000000 0x0 0x10000000
|
||||
0x03000000 0x3 0x00000000 0x3 0x00000000 0x1 0x00000000>;
|
||||
|
||||
// ECAM MCFG window
|
||||
reg = <0x00 0xb0000000 0x00 0x10000000>;
|
||||
|
||||
// Every virtual device needs a cfgspace address as first reg
|
||||
// entry. Currently uvmm will ignore the bus/device/function
|
||||
// address, though.
|
||||
|
||||
virtio_uart@0 {
|
||||
compatible = "virtio,pci";
|
||||
// The register property is required to be structured as follows:
|
||||
// reg 0: CFGSPACE address
|
||||
// reg 1: BAR[0] MMIO memory region for the MSIX table: 2 pages.
|
||||
// reg 2: BAR[1] IO port range for the device configuration.
|
||||
// The address of all entries should be 0 because it's defined as
|
||||
// *offset* from the associated BAR register.
|
||||
reg = <0x00000000 0x0 0x0 0x0 0x0000
|
||||
0x02000010 0x0 0x0 0x0 0x2000
|
||||
0x01000014 0x0 0x0 0x0 0x80>;
|
||||
msi-parent = <&msi_ctrl>;
|
||||
l4vmm,vdev = "console";
|
||||
/* Emulated UART is used by default for best bring-up
|
||||
* experience */
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
virtio_net@1 {
|
||||
compatible = "virtio,pci";
|
||||
// The reg property requirements are described in virtio_uart.
|
||||
reg = <0x00000800 0x0 0x0 0x0 0x0000
|
||||
0x02000810 0x0 0x0 0x0 0x2000
|
||||
0x01000814 0x0 0x0 0x0 0x80>;
|
||||
msi-parent = <&msi_ctrl>;
|
||||
l4vmm,virtiocap = "net";
|
||||
l4vmm,vdev = "proxy";
|
||||
};
|
||||
|
||||
virtio_disk@2 {
|
||||
compatible = "virtio,pci";
|
||||
// The reg property requirements are described in virtio_uart.
|
||||
reg = <0x00001000 0x0 0x0 0x0 0x0000
|
||||
0x02001010 0x0 0x0 0x0 0x2000
|
||||
0x01001014 0x0 0x0 0x0 0x100>;
|
||||
msi-parent = <&msi_ctrl>;
|
||||
l4vmm,virtiocap = "qdrv";
|
||||
l4vmm,vdev = "proxy";
|
||||
};
|
||||
};
|
||||
|
||||
rom@ffc84000 {
|
||||
compatible = "l4vmm,rom";
|
||||
reg = <0x0 0xffc84000 0x0 0x37c000>;
|
||||
l4vmm,dscap = "bios_code";
|
||||
};
|
||||
|
||||
nvm@ffc00000 {
|
||||
compatible = "cfi-flash";
|
||||
reg = <0x0 0xffc00000 0x0 0x84000>;
|
||||
l4vmm,dscap = "bios_vars";
|
||||
erase-size = <4>;
|
||||
bank-width = <4>;
|
||||
};
|
||||
};
|
||||
@@ -1,77 +0,0 @@
|
||||
/**
|
||||
* Device tree for purely virtual guests on the RISC-V architecture.
|
||||
*
|
||||
* Expects the RISCV_ISA macro to be defined when being included.
|
||||
*/
|
||||
|
||||
/include/ "vmm-devices-riscv.dtsi"
|
||||
/include/ "syscon.dtsi"
|
||||
|
||||
#define CPU(x) cpu##x: cpu@x { \
|
||||
device_type = "cpu"; \
|
||||
reg = <x>; \
|
||||
status = "okay"; \
|
||||
compatible = "riscv"; \
|
||||
riscv,isa = RISCV_ISA; \
|
||||
\
|
||||
cpu##x##_intc: interrupt-controller { \
|
||||
#interrupt-cells = <0x01>; \
|
||||
interrupt-controller; \
|
||||
compatible = "riscv,cpu-intc"; \
|
||||
}; \
|
||||
}
|
||||
|
||||
#define EXT_INT_AT_CPU(x) &cpu##x##_intc 0x09
|
||||
|
||||
/ {
|
||||
#address-cells = <2>;
|
||||
#size-cells = <2>;
|
||||
|
||||
model = "L4 VM";
|
||||
compatible = "l4,virt", "linux,dummy-virt";
|
||||
|
||||
chosen {
|
||||
stdout-path = "uart0";
|
||||
};
|
||||
|
||||
cpus {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
/* NOTE: If a CPU(n) is added here, a corresponding EXT_INT_AT_CPU(n)
|
||||
must be added to interrupts-extended in the PLIC below. */
|
||||
CPU(0);
|
||||
CPU(1);
|
||||
CPU(2);
|
||||
CPU(3);
|
||||
};
|
||||
|
||||
soc {
|
||||
#address-cells = <0x01>;
|
||||
#size-cells = <0x01>;
|
||||
compatible = "simple-bus";
|
||||
ranges = <0x0 0x0 0x0 0xffffffff>;
|
||||
|
||||
/* Platform-Level Interrupt Controller (PLIC) */
|
||||
gic: interrupt-controller@c000000 {
|
||||
reg = <0xc000000 0x4000000>;
|
||||
/* PLIC triggers external interrupt at interrupt controller of CPUs. */
|
||||
interrupts-extended = <EXT_INT_AT_CPU(0)
|
||||
EXT_INT_AT_CPU(1)
|
||||
EXT_INT_AT_CPU(2)
|
||||
EXT_INT_AT_CPU(3)
|
||||
>;
|
||||
interrupt-controller;
|
||||
compatible = "riscv,plic0";
|
||||
#interrupt-cells = <0x01>;
|
||||
#address-cells = <0x00>;
|
||||
};
|
||||
};
|
||||
|
||||
l4vmm {
|
||||
ranges = <0x0 0x0 0x30000000 0x21000>;
|
||||
};
|
||||
vmm-syscon {
|
||||
ranges = <0x0 0x0 0x30030000 0x4>;
|
||||
};
|
||||
};
|
||||
@@ -1,6 +0,0 @@
|
||||
/dts-v1/;
|
||||
|
||||
/include/ "skeleton.dtsi"
|
||||
|
||||
#define RISCV_ISA "rv32imafd"
|
||||
#include "virt-riscv.dtsi"
|
||||
@@ -1,6 +0,0 @@
|
||||
/dts-v1/;
|
||||
|
||||
/include/ "skeleton64.dtsi"
|
||||
|
||||
#define RISCV_ISA "rv64imafd"
|
||||
#include "virt-riscv.dtsi"
|
||||
@@ -1,50 +0,0 @@
|
||||
/**
|
||||
* Basic set of VMM virtual devices for ARM guests.
|
||||
*
|
||||
* * console device
|
||||
* * network virtio proxy device
|
||||
*/
|
||||
|
||||
/ {
|
||||
l4vmm {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
compatible = "simple-bus";
|
||||
ranges = <0x0 0x30000000 0x21000>;
|
||||
|
||||
apb_dummy_pclk: dummy_clk {
|
||||
compatible = "fixed-clock";
|
||||
#clock-cells = <0>;
|
||||
clock-frequency = <1000000>;
|
||||
};
|
||||
|
||||
uart0: pl011_uart@2000 {
|
||||
compatible = "arm,primecell", "arm,pl011";
|
||||
reg = <0x2000 0x1000>;
|
||||
interrupt-parent = <&gic>;
|
||||
interrupts = <0 121 4>;
|
||||
clocks = <&apb_dummy_pclk>;
|
||||
clock-names = "apb_pclk";
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
virtio_uart@3000 {
|
||||
compatible = "virtio,mmio";
|
||||
reg = <0x3000 0x100>;
|
||||
interrupt-parent = <&gic>;
|
||||
interrupts = <0 122 4>;
|
||||
l4vmm,vdev = "console";
|
||||
/* To be used instead of pl011 UART */
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
virtio_net@4000 {
|
||||
compatible = "virtio,mmio";
|
||||
reg = <0x4000 0x200>;
|
||||
interrupt-parent = <&gic>;
|
||||
interrupts = <0 123 4>;
|
||||
l4vmm,vdev = "proxy";
|
||||
l4vmm,virtiocap = "net";
|
||||
};
|
||||
};
|
||||
};
|
||||
@@ -1,32 +0,0 @@
|
||||
/**
|
||||
* Basic set of VMM virtual devices for MIPS guests.
|
||||
*
|
||||
* * console device
|
||||
* * network virtio proxy device
|
||||
*/
|
||||
|
||||
/ {
|
||||
l4vmm {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
compatible = "simple-bus";
|
||||
ranges = <0x0 0x30000000 0x21000>;
|
||||
|
||||
virtio_uart@20000 {
|
||||
compatible = "virtio,mmio";
|
||||
reg = <0x20000 0x100>;
|
||||
interrupt-parent = <&gic>;
|
||||
interrupts = <0 3 4>;
|
||||
l4vmm,vdev = "console";
|
||||
};
|
||||
|
||||
virtio_net@10000 {
|
||||
compatible = "virtio,mmio";
|
||||
reg = <0x10000 0x200>;
|
||||
interrupt-parent = <&gic>;
|
||||
interrupts = <0 4 4>;
|
||||
l4vmm,vdev = "proxy";
|
||||
l4vmm,virtiocap = "net";
|
||||
};
|
||||
};
|
||||
};
|
||||
@@ -1,46 +0,0 @@
|
||||
/**
|
||||
* Basic set of VMM virtual devices for RISC-V guests.
|
||||
*
|
||||
* * console device
|
||||
* * network virtio proxy device
|
||||
*/
|
||||
|
||||
/ {
|
||||
l4vmm {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
compatible = "simple-bus";
|
||||
ranges = <0x0 0x30000000 0x21000>;
|
||||
|
||||
uart0: ns16550a_uart@2000 {
|
||||
compatible = "ns16550a";
|
||||
reg = <0x2000 0x100>;
|
||||
interrupt-parent = <&gic>;
|
||||
interrupts = <0x01>;
|
||||
clock-frequency = <1000000>;
|
||||
l4vmm,vdev = "console";
|
||||
/* Redirecting to another vcon channel is possible: */
|
||||
/* l4vmm,vcon_cap = "uart"; */
|
||||
};
|
||||
|
||||
virtio_uart@20000 {
|
||||
compatible = "virtio,mmio";
|
||||
reg = <0x20000 0x100>;
|
||||
interrupt-parent = <&gic>;
|
||||
interrupts = <0x02>;
|
||||
l4vmm,vdev = "console";
|
||||
/* Emulated UART is used by default for best bring-up experience */
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
virtio_net@10000 {
|
||||
compatible = "virtio,mmio";
|
||||
reg = <0x10000 0x200>;
|
||||
interrupt-parent = <&gic>;
|
||||
interrupts = <0x03>;
|
||||
l4vmm,vdev = "proxy";
|
||||
l4vmm,virtiocap = "net";
|
||||
l4vmm,no-notify = <1>;
|
||||
};
|
||||
};
|
||||
};
|
||||
@@ -1,79 +0,0 @@
|
||||
From 247e6cca1bbfcd1cc8442d3f0e4cc359d9453550 Mon Sep 17 00:00:00 2001
|
||||
From: Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
Date: Thu, 6 Apr 2017 14:34:45 +0200
|
||||
Subject: [PATCH] L4Re UVMM: early_printk patch & l4re-uvmm-virt.config for
|
||||
amd64
|
||||
|
||||
---
|
||||
arch/x86/configs/l4re-uvmm-virt.config | 21 +++++++++++++++++++++
|
||||
arch/x86/kernel/early_printk.c | 19 +++++++++++++++++++
|
||||
2 files changed, 40 insertions(+)
|
||||
create mode 100644 arch/x86/configs/l4re-uvmm-virt.config
|
||||
|
||||
diff --git a/arch/x86/configs/l4re-uvmm-virt.config b/arch/x86/configs/l4re-uvmm-virt.config
|
||||
new file mode 100644
|
||||
index 0000000..6c2e94f
|
||||
--- /dev/null
|
||||
+++ b/arch/x86/configs/l4re-uvmm-virt.config
|
||||
@@ -0,0 +1,21 @@
|
||||
+CONFIG_KERNEL_XZ=y
|
||||
+CONFIG_BLK_DEV_INITRD=y
|
||||
+CONFIG_CC_STACKPROTECTOR_REGULAR=y
|
||||
+CONFIG_PCI_MSI=y
|
||||
+CONFIG_OF=y
|
||||
+CONFIG_BLK_DEV_RAM=y
|
||||
+CONFIG_BLK_DEV_RAM_SIZE=16384
|
||||
+CONFIG_VIRTIO_BLK=y
|
||||
+CONFIG_SERIO_RAW=y
|
||||
+CONFIG_VT_HW_CONSOLE_BINDING=y
|
||||
+CONFIG_VIRTIO_CONSOLE=y
|
||||
+CONFIG_HW_RANDOM_VIRTIO=y
|
||||
+CONFIG_VIRTIO_PCI=y
|
||||
+# CONFIG_VIRTIO_PCI_LEGACY is not set
|
||||
+CONFIG_VIRTIO_INPUT=y
|
||||
+CONFIG_EXT3_FS=y
|
||||
+CONFIG_TMPFS=y
|
||||
+CONFIG_MESSAGE_LOGLEVEL_DEFAULT=7
|
||||
+CONFIG_DEBUG_FS=y
|
||||
+CONFIG_STACKTRACE=y
|
||||
+CONFIG_MEMTEST=y
|
||||
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
|
||||
index 8a12199..2639abb 100644
|
||||
--- a/arch/x86/kernel/early_printk.c
|
||||
+++ b/arch/x86/kernel/early_printk.c
|
||||
@@ -316,6 +316,21 @@ static struct console early_serial_console = {
|
||||
.index = -1,
|
||||
};
|
||||
|
||||
+static void early_vmcall_write(struct console *con, const char *s, unsigned n)
|
||||
+{
|
||||
+ while (*s && n-- > 0) {
|
||||
+ asm("vmcall" : : "a"(0), "c"(*s));
|
||||
+ s++;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static struct console early_vmcall_console = {
|
||||
+ .name = "earlvmcall",
|
||||
+ .write = early_vmcall_write,
|
||||
+ .flags = CON_PRINTBUFFER,
|
||||
+ .index = -1,
|
||||
+};
|
||||
+
|
||||
static void early_console_register(struct console *con, int keep_early)
|
||||
{
|
||||
if (con->index != -1) {
|
||||
@@ -344,6 +359,10 @@ static int __init setup_early_printk(char *buf)
|
||||
keep = (strstr(buf, "keep") != NULL);
|
||||
|
||||
while (*buf != '\0') {
|
||||
+ if (!strncmp(buf, "vmcall", 6)) {
|
||||
+ buf += 6;
|
||||
+ early_console_register(&early_vmcall_console, keep);
|
||||
+ }
|
||||
if (!strncmp(buf, "serial", 6)) {
|
||||
buf += 6;
|
||||
early_serial_init(buf);
|
||||
--
|
||||
2.9.4
|
||||
|
||||
@@ -1,60 +0,0 @@
|
||||
|
||||
|
||||
Do the following to compile a Linux to be used as a guest.
|
||||
|
||||
Any recent Linux kernel as well as older ones are supposed work
|
||||
out of the box with vanilla Linux, including 3.16 and also 4.4
|
||||
and more recent versions.
|
||||
|
||||
Select the 'multi_v7_defconfig' config:
|
||||
|
||||
$ make ARCH=arm CROSS_COMPILE=arm-linux- multi_v7_defconfig
|
||||
$ make ARCH=arm CROSS_COMPILE=arm-linux- menuconfig
|
||||
|
||||
Enable at least:
|
||||
CONFIG_VIRTIO_CONSOLE
|
||||
|
||||
and probably some RAM disk support or similar.
|
||||
|
||||
Beginners may also want to enable those:
|
||||
DEBUG_LL
|
||||
DEBUG_ICEDCC
|
||||
EARLY_PRINTK
|
||||
|
||||
|
||||
Build the kernel
|
||||
$ make ARCH=arm CROSS_COMPILE=arm-linux- -j40
|
||||
|
||||
Then use arch/arm/boot/zImage
|
||||
|
||||
|
||||
Use an appropriate device tree file from dts/ directory.
|
||||
|
||||
IO configurations are in bsp/platform
|
||||
|
||||
|
||||
Caveats with at least Linux 3.14.1, 3.15 and 3.16:
|
||||
- With the multiplatform config enabling CONFIG_DEBUG_LL /
|
||||
CONFIG_EARLY_PRINTK selects the PL0X1 debug UART because of some
|
||||
(unfortunate?) settings in arch/arm/Kconfig.debug. Remove / modify the
|
||||
CONFIG_DEBUG_UART_PL01X option in a way so that CONFIG_DEBUG_LL_INCLUDE
|
||||
is set to debug/icedcc.S and also select CONFIG_DEBUG_ICEDCC
|
||||
accordingly. If this is given, early-printk should work.
|
||||
- The Linux boot code misses an 'ISB' instruction that might hit in your
|
||||
setup. You'll be seeing unresolvable page-faults or other unusual
|
||||
behavior if you hit it. The issue triggers depending on memory layout.
|
||||
|
||||
--- a/arch/arm/boot/compressed/head.S
|
||||
+++ b/arch/arm/boot/compressed/head.S
|
||||
@@ -403,6 +403,7 @@
|
||||
tst r4, #1
|
||||
bleq cache_clean_flush
|
||||
|
||||
+ isb
|
||||
adr r0, BSYM(restart)
|
||||
add r0, r0, r6
|
||||
mov pc, r0
|
||||
|
||||
According to the internet, a patch is floating around for this which
|
||||
hopefully gets merged soon.
|
||||
|
||||
@@ -1,603 +0,0 @@
|
||||
From babd3c04fbb493e1bb65f15fd5ae0cb032f99e52 Mon Sep 17 00:00:00 2001
|
||||
From: Sarah Hoffmann <sarah.hoffmann@kernkonzept.com>
|
||||
Date: Mon, 8 Aug 2016 10:27:28 +0200
|
||||
Subject: [PATCH] MIPS: Add virtual platform
|
||||
|
||||
This adds a virtual platform to use as a guest in VZ-enabled
|
||||
virtualization environments.
|
||||
---
|
||||
arch/mips/Kbuild.platforms | 1 +
|
||||
arch/mips/Kconfig | 28 ++++++
|
||||
arch/mips/configs/mach_virt_defconfig | 11 +++
|
||||
.../include/asm/mach-virt/cpu-feature-overrides.h | 15 +++
|
||||
arch/mips/include/asm/mach-virt/dma-coherence.h | 102 +++++++++++++++++++++
|
||||
arch/mips/include/asm/mach-virt/hypcall.h | 96 +++++++++++++++++++
|
||||
arch/mips/include/asm/timex.h | 6 +-
|
||||
arch/mips/mach-virt/Makefile | 3 +
|
||||
arch/mips/mach-virt/Platform | 9 ++
|
||||
arch/mips/mach-virt/dma.c | 53 +++++++++++
|
||||
arch/mips/mach-virt/early_printk.c | 13 +++
|
||||
arch/mips/mach-virt/irq.c | 17 ++++
|
||||
arch/mips/mach-virt/setup.c | 95 +++++++++++++++++++
|
||||
arch/mips/mm/c-r4k.c | 2 +
|
||||
14 files changed, 448 insertions(+), 3 deletions(-)
|
||||
create mode 100644 arch/mips/configs/mach_virt_defconfig
|
||||
create mode 100644 arch/mips/include/asm/mach-virt/cpu-feature-overrides.h
|
||||
create mode 100644 arch/mips/include/asm/mach-virt/dma-coherence.h
|
||||
create mode 100644 arch/mips/include/asm/mach-virt/hypcall.h
|
||||
create mode 100644 arch/mips/mach-virt/Makefile
|
||||
create mode 100644 arch/mips/mach-virt/Platform
|
||||
create mode 100644 arch/mips/mach-virt/dma.c
|
||||
create mode 100644 arch/mips/mach-virt/early_printk.c
|
||||
create mode 100644 arch/mips/mach-virt/irq.c
|
||||
create mode 100644 arch/mips/mach-virt/setup.c
|
||||
|
||||
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
|
||||
index a96c81d..f83c5b2 100644
|
||||
--- a/arch/mips/Kbuild.platforms
|
||||
+++ b/arch/mips/Kbuild.platforms
|
||||
@@ -17,6 +17,7 @@ platforms += lantiq
|
||||
platforms += lasat
|
||||
platforms += loongson32
|
||||
platforms += loongson64
|
||||
+platforms += mach-virt
|
||||
platforms += mti-malta
|
||||
platforms += mti-sead3
|
||||
platforms += netlogic
|
||||
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
|
||||
index 71683a8..4677f89 100644
|
||||
--- a/arch/mips/Kconfig
|
||||
+++ b/arch/mips/Kconfig
|
||||
@@ -481,6 +481,34 @@ config MIPS_MALTA
|
||||
This enables support for the MIPS Technologies Malta evaluation
|
||||
board.
|
||||
|
||||
+config MIPS_VIRT
|
||||
+ bool "MIPS virtual platform"
|
||||
+ select HW_HAS_PCI
|
||||
+ select BOOT_ELF32
|
||||
+ select BOOT_RAW
|
||||
+ select CEVT_R4K
|
||||
+ select CSRC_R4K
|
||||
+ select COMMON_CLK
|
||||
+ select IRQ_MIPS_CPU
|
||||
+ select DMA_NONCOHERENT
|
||||
+ select MIPS_GIC
|
||||
+ select MIPS_CPU_SCACHE
|
||||
+ select LIBFDT
|
||||
+ select HW_HAS_PCI
|
||||
+ select SMP_UP if SMP
|
||||
+ select SWAP_IO_SPACE
|
||||
+ select SYS_HAS_CPU_MIPS32_R2
|
||||
+ select SYS_HAS_CPU_MIPS32_R3_5
|
||||
+ select SYS_HAS_CPU_MIPS64_R6
|
||||
+ select SYS_HAS_EARLY_PRINTK
|
||||
+ select SYS_SUPPORTS_32BIT_KERNEL
|
||||
+ select SYS_SUPPORTS_64BIT_KERNEL
|
||||
+ select SYS_SUPPORTS_HIGHMEM
|
||||
+ select SYS_SUPPORTS_LITTLE_ENDIAN
|
||||
+ select SYS_SUPPORTS_MIPS_CPS
|
||||
+ select SYS_SUPPORTS_SMARTMIPS
|
||||
+ select USE_OF
|
||||
+
|
||||
config MIPS_SEAD3
|
||||
bool "MIPS SEAD3 board"
|
||||
select BOOT_ELF32
|
||||
diff --git a/arch/mips/configs/mach_virt_defconfig b/arch/mips/configs/mach_virt_defconfig
|
||||
new file mode 100644
|
||||
index 0000000..20a0353
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/configs/mach_virt_defconfig
|
||||
@@ -0,0 +1,11 @@
|
||||
+CONFIG_MIPS_VIRT=y
|
||||
+CONFIG_BLK_DEV_INITRD=y
|
||||
+CONFIG_BLK_DEV_RAM=y
|
||||
+# CONFIG_KEYBOARD_ATKBD is not set
|
||||
+# CONFIG_MOUSE_PS2 is not set
|
||||
+# CONFIG_SERIO is not set
|
||||
+CONFIG_VIRTIO_CONSOLE=y
|
||||
+CONFIG_VIRTIO_MMIO=y
|
||||
+CONFIG_EXT4_FS=y
|
||||
+CONFIG_EXT4_FS_POSIX_ACL=y
|
||||
+CONFIG_EXT4_FS_SECURITY=y
|
||||
diff --git a/arch/mips/include/asm/mach-virt/cpu-feature-overrides.h b/arch/mips/include/asm/mach-virt/cpu-feature-overrides.h
|
||||
new file mode 100644
|
||||
index 0000000..ded8a1a
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/include/asm/mach-virt/cpu-feature-overrides.h
|
||||
@@ -0,0 +1,14 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ */
|
||||
+#ifndef __ASM_MACH_VIRT_CPU_FEATURE_OVERRIDES_H
|
||||
+#define __ASM_MACH_VIRT_CPU_FEATURE_OVERRIDES_H
|
||||
+
|
||||
+#define cpu_has_maar 0
|
||||
+#define cpu_has_htw 0
|
||||
+#define cpu_has_dc_aliases 1
|
||||
+#define cpu_has_nan_legacy 1
|
||||
+
|
||||
+#endif /* __ASM_MACH_VIRT_CPU_FEATURE_OVERRIDES_H */
|
||||
diff --git a/arch/mips/include/asm/mach-virt/dma-coherence.h b/arch/mips/include/asm/mach-virt/dma-coherence.h
|
||||
new file mode 100644
|
||||
index 0000000..a9a3661
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/include/asm/mach-virt/dma-coherence.h
|
||||
@@ -0,0 +1,102 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+#pragma once
|
||||
+
|
||||
+#include <linux/mm.h>
|
||||
+#include <linux/err.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/bug.h>
|
||||
+#include <linux/io.h>
|
||||
+#include <linux/dma-mapping.h>
|
||||
+
|
||||
+extern unsigned long l4vmm_gpa_start;
|
||||
+extern unsigned long l4vmm_gpa_size;
|
||||
+extern dma_addr_t l4vmm_dma_start;
|
||||
+
|
||||
+struct device;
|
||||
+
|
||||
+static inline dma_addr_t plat_map_gpa_to_dma(unsigned long gpa)
|
||||
+{
|
||||
+ if (likely(l4vmm_gpa_size)) {
|
||||
+ if (likely(l4vmm_gpa_start <= gpa
|
||||
+ && gpa < l4vmm_gpa_start + l4vmm_gpa_size))
|
||||
+ return gpa - l4vmm_gpa_start + l4vmm_dma_start;
|
||||
+ }
|
||||
+
|
||||
+ pr_err("Failed to translate guest-physical 0x%lx to dma-addr\n",
|
||||
+ gpa);
|
||||
+ BUG(); /* What else? If not here we'll go chaos sooner anyway */
|
||||
+}
|
||||
+
|
||||
+static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr,
|
||||
+ size_t size)
|
||||
+{
|
||||
+ return plat_map_gpa_to_dma(virt_to_phys(addr));
|
||||
+}
|
||||
+
|
||||
+static inline dma_addr_t plat_map_dma_mem_page(struct device *dev,
|
||||
+ struct page *page)
|
||||
+{
|
||||
+ return plat_map_gpa_to_dma(page_to_phys(page));
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long plat_dma_addr_to_phys(struct device *dev,
|
||||
+ dma_addr_t dma_addr)
|
||||
+{
|
||||
+ if (likely(l4vmm_gpa_size)) {
|
||||
+ if (likely(l4vmm_dma_start <= dma_addr
|
||||
+ && dma_addr < l4vmm_dma_start + l4vmm_gpa_size))
|
||||
+ return dma_addr - l4vmm_dma_start + l4vmm_gpa_start;
|
||||
+ }
|
||||
+
|
||||
+ pr_err("%s: Do not know about dma_addr=%lx\n", __func__,
|
||||
+ (unsigned long) dma_addr);
|
||||
+ BUG();
|
||||
+}
|
||||
+
|
||||
+static inline void plat_unmap_dma_mem(struct device *dev, dma_addr_t dma_addr,
|
||||
+ size_t size, enum dma_data_direction direction)
|
||||
+{
|
||||
+ if (0) pr_warn("%s\n", __func__);
|
||||
+}
|
||||
+
|
||||
+static inline int plat_dma_supported(struct device *dev, u64 mask)
|
||||
+{
|
||||
+ /*
|
||||
+ * we fall back to GFP_DMA when the mask isn't all 1s,
|
||||
+ * so we can't guarantee allocations that must be
|
||||
+ * within a tighter range than GFP_DMA..
|
||||
+ */
|
||||
+ if (mask < DMA_BIT_MASK(24))
|
||||
+ return 0;
|
||||
+
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+static inline int plat_device_is_coherent(struct device *dev)
|
||||
+{
|
||||
+ return coherentio;
|
||||
+}
|
||||
+
|
||||
+#ifndef plat_post_dma_flush
|
||||
+static inline void plat_post_dma_flush(struct device *dev)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#ifdef CONFIG_SWIOTLB
|
||||
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
|
||||
+{
|
||||
+ return plat_map_gpa_to_dma(paddr);
|
||||
+}
|
||||
+
|
||||
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
|
||||
+{
|
||||
+ return daddr;
|
||||
+}
|
||||
+#endif
|
||||
diff --git a/arch/mips/include/asm/mach-virt/hypcall.h b/arch/mips/include/asm/mach-virt/hypcall.h
|
||||
new file mode 100644
|
||||
index 0000000..8a7e881
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/include/asm/mach-virt/hypcall.h
|
||||
@@ -0,0 +1,96 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * (C) 2016 Kernkonzept GmbH, Adam Lackorzynski <adam@l4re.org>
|
||||
+ */
|
||||
+#pragma once
|
||||
+
|
||||
+enum {
|
||||
+ L4VMM_FUNC_BASE = 0x160,
|
||||
+ L4VMM_FUNC_PRINTCHAR = L4VMM_FUNC_BASE + 0,
|
||||
+};
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall1(unsigned func, unsigned long a0)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0)
|
||||
+ : "cc", "memory");
|
||||
+ return _a0;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall2(unsigned func, unsigned long a0, unsigned long a1)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ register unsigned long _a1 asm ("a1") = a1;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0),
|
||||
+ "=r" (_a1)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0),
|
||||
+ "1" (_a1)
|
||||
+ : "cc", "memory");
|
||||
+ return _a0;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall2_ret(unsigned func, unsigned long a0, unsigned long *a1)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ register unsigned long _a1 asm ("a1") = *a1;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0),
|
||||
+ "=r" (_a1)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0),
|
||||
+ "1" (_a1)
|
||||
+ : "cc", "memory");
|
||||
+ *a1 = _a1;
|
||||
+ return _a0;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall3(unsigned func, unsigned long a0, unsigned long a1,
|
||||
+ unsigned long a2)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ register unsigned long _a1 asm ("a1") = a1;
|
||||
+ register unsigned long _a2 asm ("a2") = a2;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0),
|
||||
+ "=r" (_a1),
|
||||
+ "=r" (_a2)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0),
|
||||
+ "1" (_a1),
|
||||
+ "2" (_a2)
|
||||
+ : "cc", "memory");
|
||||
+ return _a0;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall3_ret(unsigned func, unsigned long a0, unsigned long *a1,
|
||||
+ unsigned long *a2)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ register unsigned long _a1 asm ("a1") = *a1;
|
||||
+ register unsigned long _a2 asm ("a2") = *a2;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0),
|
||||
+ "=r" (_a1),
|
||||
+ "=r" (_a2)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0),
|
||||
+ "1" (_a1),
|
||||
+ "2" (_a2)
|
||||
+ : "cc", "memory");
|
||||
+ *a1 = _a1;
|
||||
+ *a2 = _a2;
|
||||
+ return _a0;
|
||||
+}
|
||||
diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h
|
||||
index b05bb70..3222504 100644
|
||||
--- a/arch/mips/include/asm/timex.h
|
||||
+++ b/arch/mips/include/asm/timex.h
|
||||
@@ -71,7 +71,7 @@ static inline int can_use_mips_counter(unsigned int prid)
|
||||
|
||||
static inline cycles_t get_cycles(void)
|
||||
{
|
||||
- if (can_use_mips_counter(read_c0_prid()))
|
||||
+ if (IS_ENABLED(CONFIG_MIPS_VIRT) || can_use_mips_counter(read_c0_prid()))
|
||||
return read_c0_count();
|
||||
else
|
||||
return 0; /* no usable counter */
|
||||
@@ -86,10 +86,10 @@ static inline cycles_t get_cycles(void)
|
||||
*/
|
||||
static inline unsigned long random_get_entropy(void)
|
||||
{
|
||||
- unsigned int prid = read_c0_prid();
|
||||
+ unsigned int prid = IS_ENABLED(CONFIG_MIPS_VIRT) ? 0 : read_c0_prid();
|
||||
unsigned int imp = prid & PRID_IMP_MASK;
|
||||
|
||||
- if (can_use_mips_counter(prid))
|
||||
+ if (IS_ENABLED(CONFIG_MIPS_VIRT) || can_use_mips_counter(prid))
|
||||
return read_c0_count();
|
||||
else if (likely(imp != PRID_IMP_R6000 && imp != PRID_IMP_R6000A))
|
||||
return read_c0_random();
|
||||
diff --git a/arch/mips/mach-virt/Makefile b/arch/mips/mach-virt/Makefile
|
||||
new file mode 100644
|
||||
index 0000000..bb4b020
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/Makefile
|
||||
@@ -0,0 +1,3 @@
|
||||
+obj-y += setup.o irq.o dma.o
|
||||
+
|
||||
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
|
||||
diff --git a/arch/mips/mach-virt/Platform b/arch/mips/mach-virt/Platform
|
||||
new file mode 100644
|
||||
index 0000000..52ddca7
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/Platform
|
||||
@@ -0,0 +1,9 @@
|
||||
+#
|
||||
+# Virtual platform.
|
||||
+#
|
||||
+platform-$(CONFIG_MIPS_VIRT) += mach-virt/
|
||||
+cflags-$(CONFIG_MIPS_VIRT) += -I$(srctree)/arch/mips/include/asm/mach-virt
|
||||
+
|
||||
+load-$(CONFIG_MIPS_VIRT) += 0xffffffff80100000
|
||||
+
|
||||
+all-$(CONFIG_MIPS_VIRT) := $(COMPRESSION_FNAME).bin
|
||||
diff --git a/arch/mips/mach-virt/dma.c b/arch/mips/mach-virt/dma.c
|
||||
new file mode 100644
|
||||
index 0000000..9d86a2e
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/dma.c
|
||||
@@ -0,0 +1,53 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+
|
||||
+
|
||||
+#include <linux/of.h>
|
||||
+#include <linux/of_fdt.h>
|
||||
+#include <linux/of_address.h>
|
||||
+
|
||||
+#include <asm/mach-virt/dma-coherence.h>
|
||||
+
|
||||
+unsigned long l4vmm_gpa_start;
|
||||
+unsigned long l4vmm_gpa_size;
|
||||
+dma_addr_t l4vmm_dma_start;
|
||||
+
|
||||
+/* For now, we just have a single contiguous physical region in the
|
||||
+ * hypervisor */
|
||||
+static int __init mips_virt_dma_init(void)
|
||||
+{
|
||||
+ struct device_node *np;
|
||||
+ const __be32 *ranges = NULL;
|
||||
+ int naddr, nsize, len;
|
||||
+
|
||||
+ l4vmm_gpa_size = 0;
|
||||
+
|
||||
+ np = of_find_node_by_name(NULL, "memory");
|
||||
+ if (!np)
|
||||
+ return 0;
|
||||
+
|
||||
+ naddr = of_n_addr_cells(np);
|
||||
+ nsize = of_n_size_cells(np);
|
||||
+
|
||||
+ ranges = of_get_property(np, "dma-ranges", &len);
|
||||
+
|
||||
+ if (ranges && len >= (sizeof(*ranges) * (2 * naddr + nsize))) {
|
||||
+ l4vmm_dma_start = of_read_number(ranges, naddr);
|
||||
+ l4vmm_gpa_start = of_read_number(ranges + naddr, naddr);
|
||||
+ l4vmm_gpa_size = of_read_number(ranges + 2 * naddr, nsize);
|
||||
+
|
||||
+ pr_info("DMA range for memory 0x%lx - 0x%lx set @ 0x%lx\n",
|
||||
+ l4vmm_gpa_start,
|
||||
+ l4vmm_gpa_start + l4vmm_gpa_size,
|
||||
+ (unsigned long) l4vmm_dma_start);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+fs_initcall(mips_virt_dma_init);
|
||||
diff --git a/arch/mips/mach-virt/early_printk.c b/arch/mips/mach-virt/early_printk.c
|
||||
new file mode 100644
|
||||
index 0000000..591ed45
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/early_printk.c
|
||||
@@ -0,0 +1,13 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+#include <asm/mach-virt/hypcall.h>
|
||||
+
|
||||
+void prom_putchar(char c)
|
||||
+{
|
||||
+ l4vmm_hypcall1(L4VMM_FUNC_PRINTCHAR, c);
|
||||
+}
|
||||
diff --git a/arch/mips/mach-virt/irq.c b/arch/mips/mach-virt/irq.c
|
||||
new file mode 100644
|
||||
index 0000000..8a4c9ad
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/irq.c
|
||||
@@ -0,0 +1,17 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/irqchip.h>
|
||||
+
|
||||
+#include <asm/irq.h>
|
||||
+
|
||||
+void __init arch_init_irq(void)
|
||||
+{
|
||||
+ irqchip_init();
|
||||
+}
|
||||
diff --git a/arch/mips/mach-virt/setup.c b/arch/mips/mach-virt/setup.c
|
||||
new file mode 100644
|
||||
index 0000000..4182221
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/setup.c
|
||||
@@ -0,0 +1,95 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/initrd.h>
|
||||
+#include <linux/of.h>
|
||||
+#include <linux/of_fdt.h>
|
||||
+#include <linux/of_platform.h>
|
||||
+#include <linux/clk-provider.h>
|
||||
+#include <linux/clocksource.h>
|
||||
+
|
||||
+#include <asm/bootinfo.h>
|
||||
+#include <asm/cpu-features.h>
|
||||
+#include <asm/irq_cpu.h>
|
||||
+#include <asm/prom.h>
|
||||
+#include <asm/time.h>
|
||||
+
|
||||
+const char *get_system_type(void)
|
||||
+{
|
||||
+ return "MIPS Virtual Platform";
|
||||
+}
|
||||
+
|
||||
+static void __init init_mips_cpu_timer(void)
|
||||
+{
|
||||
+ struct device_node *np;
|
||||
+ u32 freq;
|
||||
+
|
||||
+ mips_hpt_frequency = 0;
|
||||
+
|
||||
+ /* The timer frequency must be defined in the device tree.
|
||||
+ If the definition is missing, we assume that the timer should
|
||||
+ not be used.
|
||||
+ */
|
||||
+ np = of_find_node_by_name(NULL, "cpus");
|
||||
+ if (np && of_property_read_u32(np, "mips-hpt-frequency", &freq) >= 0) {
|
||||
+ mips_hpt_frequency = freq;
|
||||
+
|
||||
+ printk("CPU frequency %d.%02d MHz\n", freq/1000000,
|
||||
+ (freq%1000000)*100/1000000);
|
||||
+ } else
|
||||
+ pr_warn("MIPS CPU core timer not used. %p, %u\n", np, freq);
|
||||
+
|
||||
+ of_node_put(np);
|
||||
+}
|
||||
+
|
||||
+void __init plat_time_init(void)
|
||||
+{
|
||||
+ init_mips_cpu_timer();
|
||||
+}
|
||||
+
|
||||
+void __init prom_init(void)
|
||||
+{
|
||||
+ int i;
|
||||
+ int argc = fw_arg0;
|
||||
+ char **argv = (char **)fw_arg1;
|
||||
+
|
||||
+ for (i = 0; i < argc; i++) {
|
||||
+ strlcat(arcs_cmdline, argv[i], COMMAND_LINE_SIZE);
|
||||
+ if (i < argc - 1)
|
||||
+ strlcat(arcs_cmdline, " ", COMMAND_LINE_SIZE);
|
||||
+ }
|
||||
+
|
||||
+ printk("DT at address %p\n", (void *)fw_arg3);
|
||||
+ __dt_setup_arch((void *)fw_arg3);
|
||||
+}
|
||||
+
|
||||
+void __init plat_mem_setup(void)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+void __init prom_free_prom_memory(void)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+void __init device_tree_init(void)
|
||||
+{
|
||||
+ unflatten_and_copy_device_tree();
|
||||
+}
|
||||
+
|
||||
+static int __init publish_devices(void)
|
||||
+{
|
||||
+ if (!of_have_populated_dt())
|
||||
+ return 0;
|
||||
+
|
||||
+ if (of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL))
|
||||
+ panic("Failed to populate DT");
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+device_initcall(publish_devices);
|
||||
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
|
||||
index 5d3a25e..355e7c6 100644
|
||||
--- a/arch/mips/mm/c-r4k.c
|
||||
+++ b/arch/mips/mm/c-r4k.c
|
||||
@@ -1505,9 +1505,11 @@ static void setup_scache(void)
|
||||
way_string[c->scache.ways], c->scache.linesz);
|
||||
}
|
||||
#else
|
||||
+#ifndef CONFIG_MIPS_VIRT
|
||||
if (!(c->scache.flags & MIPS_CACHE_NOT_PRESENT))
|
||||
panic("Dunno how to handle MIPS32 / MIPS64 second level cache");
|
||||
#endif
|
||||
+#endif
|
||||
return;
|
||||
}
|
||||
sc_present = 0;
|
||||
--
|
||||
2.1.4
|
||||
|
||||
@@ -1,603 +0,0 @@
|
||||
From f7b2513f5b82ee9966c2ba56bb8e35e185e5db57 Mon Sep 17 00:00:00 2001
|
||||
From: Sarah Hoffmann <sarah.hoffmann@kernkonzept.com>
|
||||
Date: Wed, 7 Dec 2016 11:53:15 +0100
|
||||
Subject: [PATCH] MIPS: Add virtual platform
|
||||
|
||||
This adds a virtual platform to use as a guest in VZ-enabled
|
||||
virtualization environments.
|
||||
---
|
||||
arch/mips/Kbuild.platforms | 1 +
|
||||
arch/mips/Kconfig | 28 ++++++
|
||||
arch/mips/configs/mach_virt_defconfig | 11 +++
|
||||
.../include/asm/mach-virt/cpu-feature-overrides.h | 15 +++
|
||||
arch/mips/include/asm/mach-virt/dma-coherence.h | 102 +++++++++++++++++++++
|
||||
arch/mips/include/asm/mach-virt/hypcall.h | 96 +++++++++++++++++++
|
||||
arch/mips/include/asm/timex.h | 6 +-
|
||||
arch/mips/mach-virt/Makefile | 3 +
|
||||
arch/mips/mach-virt/Platform | 9 ++
|
||||
arch/mips/mach-virt/dma.c | 53 +++++++++++
|
||||
arch/mips/mach-virt/early_printk.c | 13 +++
|
||||
arch/mips/mach-virt/irq.c | 17 ++++
|
||||
arch/mips/mach-virt/setup.c | 95 +++++++++++++++++++
|
||||
arch/mips/mm/c-r4k.c | 2 +
|
||||
14 files changed, 448 insertions(+), 3 deletions(-)
|
||||
create mode 100644 arch/mips/configs/mach_virt_defconfig
|
||||
create mode 100644 arch/mips/include/asm/mach-virt/cpu-feature-overrides.h
|
||||
create mode 100644 arch/mips/include/asm/mach-virt/dma-coherence.h
|
||||
create mode 100644 arch/mips/include/asm/mach-virt/hypcall.h
|
||||
create mode 100644 arch/mips/mach-virt/Makefile
|
||||
create mode 100644 arch/mips/mach-virt/Platform
|
||||
create mode 100644 arch/mips/mach-virt/dma.c
|
||||
create mode 100644 arch/mips/mach-virt/early_printk.c
|
||||
create mode 100644 arch/mips/mach-virt/irq.c
|
||||
create mode 100644 arch/mips/mach-virt/setup.c
|
||||
|
||||
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
|
||||
index c5cd63a..5971326 100644
|
||||
--- a/arch/mips/Kbuild.platforms
|
||||
+++ b/arch/mips/Kbuild.platforms
|
||||
@@ -17,6 +17,7 @@ platforms += lantiq
|
||||
platforms += lasat
|
||||
platforms += loongson32
|
||||
platforms += loongson64
|
||||
+platforms += mach-virt
|
||||
platforms += mti-malta
|
||||
platforms += mti-sead3
|
||||
platforms += netlogic
|
||||
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
|
||||
index 212ff92..0249b11 100644
|
||||
--- a/arch/mips/Kconfig
|
||||
+++ b/arch/mips/Kconfig
|
||||
@@ -494,6 +494,34 @@ config MACH_PIC32
|
||||
Microchip PIC32 is a family of general-purpose 32 bit MIPS core
|
||||
microcontrollers.
|
||||
|
||||
+config MIPS_VIRT
|
||||
+ bool "MIPS virtual platform"
|
||||
+ select HW_HAS_PCI
|
||||
+ select BOOT_ELF32
|
||||
+ select BOOT_RAW
|
||||
+ select CEVT_R4K
|
||||
+ select CSRC_R4K
|
||||
+ select COMMON_CLK
|
||||
+ select IRQ_MIPS_CPU
|
||||
+ select DMA_NONCOHERENT
|
||||
+ select MIPS_GIC
|
||||
+ select MIPS_CPU_SCACHE
|
||||
+ select LIBFDT
|
||||
+ select HW_HAS_PCI
|
||||
+ select SMP_UP if SMP
|
||||
+ select SWAP_IO_SPACE
|
||||
+ select SYS_HAS_CPU_MIPS32_R2
|
||||
+ select SYS_HAS_CPU_MIPS32_R3_5
|
||||
+ select SYS_HAS_CPU_MIPS64_R6
|
||||
+ select SYS_HAS_EARLY_PRINTK
|
||||
+ select SYS_SUPPORTS_32BIT_KERNEL
|
||||
+ select SYS_SUPPORTS_64BIT_KERNEL
|
||||
+ select SYS_SUPPORTS_HIGHMEM
|
||||
+ select SYS_SUPPORTS_LITTLE_ENDIAN
|
||||
+ select SYS_SUPPORTS_MIPS_CPS
|
||||
+ select SYS_SUPPORTS_SMARTMIPS
|
||||
+ select USE_OF
|
||||
+
|
||||
config MIPS_SEAD3
|
||||
bool "MIPS SEAD3 board"
|
||||
select BOOT_ELF32
|
||||
diff --git a/arch/mips/configs/mach_virt_defconfig b/arch/mips/configs/mach_virt_defconfig
|
||||
new file mode 100644
|
||||
index 0000000..20a0353
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/configs/mach_virt_defconfig
|
||||
@@ -0,0 +1,11 @@
|
||||
+CONFIG_MIPS_VIRT=y
|
||||
+CONFIG_BLK_DEV_INITRD=y
|
||||
+CONFIG_BLK_DEV_RAM=y
|
||||
+# CONFIG_KEYBOARD_ATKBD is not set
|
||||
+# CONFIG_MOUSE_PS2 is not set
|
||||
+# CONFIG_SERIO is not set
|
||||
+CONFIG_VIRTIO_CONSOLE=y
|
||||
+CONFIG_VIRTIO_MMIO=y
|
||||
+CONFIG_EXT4_FS=y
|
||||
+CONFIG_EXT4_FS_POSIX_ACL=y
|
||||
+CONFIG_EXT4_FS_SECURITY=y
|
||||
diff --git a/arch/mips/include/asm/mach-virt/cpu-feature-overrides.h b/arch/mips/include/asm/mach-virt/cpu-feature-overrides.h
|
||||
new file mode 100644
|
||||
index 0000000..ded8a1a
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/include/asm/mach-virt/cpu-feature-overrides.h
|
||||
@@ -0,0 +1,14 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ */
|
||||
+#ifndef __ASM_MACH_VIRT_CPU_FEATURE_OVERRIDES_H
|
||||
+#define __ASM_MACH_VIRT_CPU_FEATURE_OVERRIDES_H
|
||||
+
|
||||
+#define cpu_has_maar 0
|
||||
+#define cpu_has_htw 0
|
||||
+#define cpu_has_dc_aliases 1
|
||||
+#define cpu_has_nan_legacy 1
|
||||
+
|
||||
+#endif /* __ASM_MACH_VIRT_CPU_FEATURE_OVERRIDES_H */
|
||||
diff --git a/arch/mips/include/asm/mach-virt/dma-coherence.h b/arch/mips/include/asm/mach-virt/dma-coherence.h
|
||||
new file mode 100644
|
||||
index 0000000..a9a3661
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/include/asm/mach-virt/dma-coherence.h
|
||||
@@ -0,0 +1,102 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+#pragma once
|
||||
+
|
||||
+#include <linux/mm.h>
|
||||
+#include <linux/err.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/bug.h>
|
||||
+#include <linux/io.h>
|
||||
+#include <linux/dma-mapping.h>
|
||||
+
|
||||
+extern unsigned long l4vmm_gpa_start;
|
||||
+extern unsigned long l4vmm_gpa_size;
|
||||
+extern dma_addr_t l4vmm_dma_start;
|
||||
+
|
||||
+struct device;
|
||||
+
|
||||
+static inline dma_addr_t plat_map_gpa_to_dma(unsigned long gpa)
|
||||
+{
|
||||
+ if (likely(l4vmm_gpa_size)) {
|
||||
+ if (likely(l4vmm_gpa_start <= gpa
|
||||
+ && gpa < l4vmm_gpa_start + l4vmm_gpa_size))
|
||||
+ return gpa - l4vmm_gpa_start + l4vmm_dma_start;
|
||||
+ }
|
||||
+
|
||||
+ pr_err("Failed to translate guest-physical 0x%lx to dma-addr\n",
|
||||
+ gpa);
|
||||
+ BUG(); /* What else? If not here we'll go chaos sooner anyway */
|
||||
+}
|
||||
+
|
||||
+static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr,
|
||||
+ size_t size)
|
||||
+{
|
||||
+ return plat_map_gpa_to_dma(virt_to_phys(addr));
|
||||
+}
|
||||
+
|
||||
+static inline dma_addr_t plat_map_dma_mem_page(struct device *dev,
|
||||
+ struct page *page)
|
||||
+{
|
||||
+ return plat_map_gpa_to_dma(page_to_phys(page));
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long plat_dma_addr_to_phys(struct device *dev,
|
||||
+ dma_addr_t dma_addr)
|
||||
+{
|
||||
+ if (likely(l4vmm_gpa_size)) {
|
||||
+ if (likely(l4vmm_dma_start <= dma_addr
|
||||
+ && dma_addr < l4vmm_dma_start + l4vmm_gpa_size))
|
||||
+ return dma_addr - l4vmm_dma_start + l4vmm_gpa_start;
|
||||
+ }
|
||||
+
|
||||
+ pr_err("%s: Do not know about dma_addr=%lx\n", __func__,
|
||||
+ (unsigned long) dma_addr);
|
||||
+ BUG();
|
||||
+}
|
||||
+
|
||||
+static inline void plat_unmap_dma_mem(struct device *dev, dma_addr_t dma_addr,
|
||||
+ size_t size, enum dma_data_direction direction)
|
||||
+{
|
||||
+ if (0) pr_warn("%s\n", __func__);
|
||||
+}
|
||||
+
|
||||
+static inline int plat_dma_supported(struct device *dev, u64 mask)
|
||||
+{
|
||||
+ /*
|
||||
+ * we fall back to GFP_DMA when the mask isn't all 1s,
|
||||
+ * so we can't guarantee allocations that must be
|
||||
+ * within a tighter range than GFP_DMA..
|
||||
+ */
|
||||
+ if (mask < DMA_BIT_MASK(24))
|
||||
+ return 0;
|
||||
+
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+static inline int plat_device_is_coherent(struct device *dev)
|
||||
+{
|
||||
+ return coherentio;
|
||||
+}
|
||||
+
|
||||
+#ifndef plat_post_dma_flush
|
||||
+static inline void plat_post_dma_flush(struct device *dev)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#ifdef CONFIG_SWIOTLB
|
||||
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
|
||||
+{
|
||||
+ return plat_map_gpa_to_dma(paddr);
|
||||
+}
|
||||
+
|
||||
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
|
||||
+{
|
||||
+ return daddr;
|
||||
+}
|
||||
+#endif
|
||||
diff --git a/arch/mips/include/asm/mach-virt/hypcall.h b/arch/mips/include/asm/mach-virt/hypcall.h
|
||||
new file mode 100644
|
||||
index 0000000..8a7e881
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/include/asm/mach-virt/hypcall.h
|
||||
@@ -0,0 +1,96 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * (C) 2016 Kernkonzept GmbH, Adam Lackorzynski <adam@l4re.org>
|
||||
+ */
|
||||
+#pragma once
|
||||
+
|
||||
+enum {
|
||||
+ L4VMM_FUNC_BASE = 0x160,
|
||||
+ L4VMM_FUNC_PRINTCHAR = L4VMM_FUNC_BASE + 0,
|
||||
+};
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall1(unsigned func, unsigned long a0)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0)
|
||||
+ : "cc", "memory");
|
||||
+ return _a0;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall2(unsigned func, unsigned long a0, unsigned long a1)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ register unsigned long _a1 asm ("a1") = a1;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0),
|
||||
+ "=r" (_a1)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0),
|
||||
+ "1" (_a1)
|
||||
+ : "cc", "memory");
|
||||
+ return _a0;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall2_ret(unsigned func, unsigned long a0, unsigned long *a1)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ register unsigned long _a1 asm ("a1") = *a1;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0),
|
||||
+ "=r" (_a1)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0),
|
||||
+ "1" (_a1)
|
||||
+ : "cc", "memory");
|
||||
+ *a1 = _a1;
|
||||
+ return _a0;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall3(unsigned func, unsigned long a0, unsigned long a1,
|
||||
+ unsigned long a2)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ register unsigned long _a1 asm ("a1") = a1;
|
||||
+ register unsigned long _a2 asm ("a2") = a2;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0),
|
||||
+ "=r" (_a1),
|
||||
+ "=r" (_a2)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0),
|
||||
+ "1" (_a1),
|
||||
+ "2" (_a2)
|
||||
+ : "cc", "memory");
|
||||
+ return _a0;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall3_ret(unsigned func, unsigned long a0, unsigned long *a1,
|
||||
+ unsigned long *a2)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ register unsigned long _a1 asm ("a1") = *a1;
|
||||
+ register unsigned long _a2 asm ("a2") = *a2;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0),
|
||||
+ "=r" (_a1),
|
||||
+ "=r" (_a2)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0),
|
||||
+ "1" (_a1),
|
||||
+ "2" (_a2)
|
||||
+ : "cc", "memory");
|
||||
+ *a1 = _a1;
|
||||
+ *a2 = _a2;
|
||||
+ return _a0;
|
||||
+}
|
||||
diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h
|
||||
index b05bb70..3222504 100644
|
||||
--- a/arch/mips/include/asm/timex.h
|
||||
+++ b/arch/mips/include/asm/timex.h
|
||||
@@ -71,7 +71,7 @@ static inline int can_use_mips_counter(unsigned int prid)
|
||||
|
||||
static inline cycles_t get_cycles(void)
|
||||
{
|
||||
- if (can_use_mips_counter(read_c0_prid()))
|
||||
+ if (IS_ENABLED(CONFIG_MIPS_VIRT) || can_use_mips_counter(read_c0_prid()))
|
||||
return read_c0_count();
|
||||
else
|
||||
return 0; /* no usable counter */
|
||||
@@ -86,10 +86,10 @@ static inline cycles_t get_cycles(void)
|
||||
*/
|
||||
static inline unsigned long random_get_entropy(void)
|
||||
{
|
||||
- unsigned int prid = read_c0_prid();
|
||||
+ unsigned int prid = IS_ENABLED(CONFIG_MIPS_VIRT) ? 0 : read_c0_prid();
|
||||
unsigned int imp = prid & PRID_IMP_MASK;
|
||||
|
||||
- if (can_use_mips_counter(prid))
|
||||
+ if (IS_ENABLED(CONFIG_MIPS_VIRT) || can_use_mips_counter(prid))
|
||||
return read_c0_count();
|
||||
else if (likely(imp != PRID_IMP_R6000 && imp != PRID_IMP_R6000A))
|
||||
return read_c0_random();
|
||||
diff --git a/arch/mips/mach-virt/Makefile b/arch/mips/mach-virt/Makefile
|
||||
new file mode 100644
|
||||
index 0000000..bb4b020
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/Makefile
|
||||
@@ -0,0 +1,3 @@
|
||||
+obj-y += setup.o irq.o dma.o
|
||||
+
|
||||
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
|
||||
diff --git a/arch/mips/mach-virt/Platform b/arch/mips/mach-virt/Platform
|
||||
new file mode 100644
|
||||
index 0000000..52ddca7
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/Platform
|
||||
@@ -0,0 +1,9 @@
|
||||
+#
|
||||
+# Virtual platform.
|
||||
+#
|
||||
+platform-$(CONFIG_MIPS_VIRT) += mach-virt/
|
||||
+cflags-$(CONFIG_MIPS_VIRT) += -I$(srctree)/arch/mips/include/asm/mach-virt
|
||||
+
|
||||
+load-$(CONFIG_MIPS_VIRT) += 0xffffffff80100000
|
||||
+
|
||||
+all-$(CONFIG_MIPS_VIRT) := $(COMPRESSION_FNAME).bin
|
||||
diff --git a/arch/mips/mach-virt/dma.c b/arch/mips/mach-virt/dma.c
|
||||
new file mode 100644
|
||||
index 0000000..9d86a2e
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/dma.c
|
||||
@@ -0,0 +1,53 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+
|
||||
+
|
||||
+#include <linux/of.h>
|
||||
+#include <linux/of_fdt.h>
|
||||
+#include <linux/of_address.h>
|
||||
+
|
||||
+#include <asm/mach-virt/dma-coherence.h>
|
||||
+
|
||||
+unsigned long l4vmm_gpa_start;
|
||||
+unsigned long l4vmm_gpa_size;
|
||||
+dma_addr_t l4vmm_dma_start;
|
||||
+
|
||||
+/* For now, we just have a single contiguous physical region in the
|
||||
+ * hypervisor */
|
||||
+static int __init mips_virt_dma_init(void)
|
||||
+{
|
||||
+ struct device_node *np;
|
||||
+ const __be32 *ranges = NULL;
|
||||
+ int naddr, nsize, len;
|
||||
+
|
||||
+ l4vmm_gpa_size = 0;
|
||||
+
|
||||
+ np = of_find_node_by_name(NULL, "memory");
|
||||
+ if (!np)
|
||||
+ return 0;
|
||||
+
|
||||
+ naddr = of_n_addr_cells(np);
|
||||
+ nsize = of_n_size_cells(np);
|
||||
+
|
||||
+ ranges = of_get_property(np, "dma-ranges", &len);
|
||||
+
|
||||
+ if (ranges && len >= (sizeof(*ranges) * (2 * naddr + nsize))) {
|
||||
+ l4vmm_dma_start = of_read_number(ranges, naddr);
|
||||
+ l4vmm_gpa_start = of_read_number(ranges + naddr, naddr);
|
||||
+ l4vmm_gpa_size = of_read_number(ranges + 2 * naddr, nsize);
|
||||
+
|
||||
+ pr_info("DMA range for memory 0x%lx - 0x%lx set @ 0x%lx\n",
|
||||
+ l4vmm_gpa_start,
|
||||
+ l4vmm_gpa_start + l4vmm_gpa_size,
|
||||
+ (unsigned long) l4vmm_dma_start);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+fs_initcall(mips_virt_dma_init);
|
||||
diff --git a/arch/mips/mach-virt/early_printk.c b/arch/mips/mach-virt/early_printk.c
|
||||
new file mode 100644
|
||||
index 0000000..591ed45
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/early_printk.c
|
||||
@@ -0,0 +1,13 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+#include <asm/mach-virt/hypcall.h>
|
||||
+
|
||||
+void prom_putchar(char c)
|
||||
+{
|
||||
+ l4vmm_hypcall1(L4VMM_FUNC_PRINTCHAR, c);
|
||||
+}
|
||||
diff --git a/arch/mips/mach-virt/irq.c b/arch/mips/mach-virt/irq.c
|
||||
new file mode 100644
|
||||
index 0000000..8a4c9ad
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/irq.c
|
||||
@@ -0,0 +1,17 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/irqchip.h>
|
||||
+
|
||||
+#include <asm/irq.h>
|
||||
+
|
||||
+void __init arch_init_irq(void)
|
||||
+{
|
||||
+ irqchip_init();
|
||||
+}
|
||||
diff --git a/arch/mips/mach-virt/setup.c b/arch/mips/mach-virt/setup.c
|
||||
new file mode 100644
|
||||
index 0000000..4182221
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/setup.c
|
||||
@@ -0,0 +1,95 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/initrd.h>
|
||||
+#include <linux/of.h>
|
||||
+#include <linux/of_fdt.h>
|
||||
+#include <linux/of_platform.h>
|
||||
+#include <linux/clk-provider.h>
|
||||
+#include <linux/clocksource.h>
|
||||
+
|
||||
+#include <asm/bootinfo.h>
|
||||
+#include <asm/cpu-features.h>
|
||||
+#include <asm/irq_cpu.h>
|
||||
+#include <asm/prom.h>
|
||||
+#include <asm/time.h>
|
||||
+
|
||||
+const char *get_system_type(void)
|
||||
+{
|
||||
+ return "MIPS Virtual Platform";
|
||||
+}
|
||||
+
|
||||
+static void __init init_mips_cpu_timer(void)
|
||||
+{
|
||||
+ struct device_node *np;
|
||||
+ u32 freq;
|
||||
+
|
||||
+ mips_hpt_frequency = 0;
|
||||
+
|
||||
+ /* The timer frequency must be defined in the device tree.
|
||||
+ If the definition is missing, we assume that the timer should
|
||||
+ not be used.
|
||||
+ */
|
||||
+ np = of_find_node_by_name(NULL, "cpus");
|
||||
+ if (np && of_property_read_u32(np, "mips-hpt-frequency", &freq) >= 0) {
|
||||
+ mips_hpt_frequency = freq;
|
||||
+
|
||||
+ printk("CPU frequency %d.%02d MHz\n", freq/1000000,
|
||||
+ (freq%1000000)*100/1000000);
|
||||
+ } else
|
||||
+ pr_warn("MIPS CPU core timer not used. %p, %u\n", np, freq);
|
||||
+
|
||||
+ of_node_put(np);
|
||||
+}
|
||||
+
|
||||
+void __init plat_time_init(void)
|
||||
+{
|
||||
+ init_mips_cpu_timer();
|
||||
+}
|
||||
+
|
||||
+void __init prom_init(void)
|
||||
+{
|
||||
+ int i;
|
||||
+ int argc = fw_arg0;
|
||||
+ char **argv = (char **)fw_arg1;
|
||||
+
|
||||
+ for (i = 0; i < argc; i++) {
|
||||
+ strlcat(arcs_cmdline, argv[i], COMMAND_LINE_SIZE);
|
||||
+ if (i < argc - 1)
|
||||
+ strlcat(arcs_cmdline, " ", COMMAND_LINE_SIZE);
|
||||
+ }
|
||||
+
|
||||
+ printk("DT at address %p\n", (void *)fw_arg3);
|
||||
+ __dt_setup_arch((void *)fw_arg3);
|
||||
+}
|
||||
+
|
||||
+void __init plat_mem_setup(void)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+void __init prom_free_prom_memory(void)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+void __init device_tree_init(void)
|
||||
+{
|
||||
+ unflatten_and_copy_device_tree();
|
||||
+}
|
||||
+
|
||||
+static int __init publish_devices(void)
|
||||
+{
|
||||
+ if (!of_have_populated_dt())
|
||||
+ return 0;
|
||||
+
|
||||
+ if (of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL))
|
||||
+ panic("Failed to populate DT");
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+device_initcall(publish_devices);
|
||||
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
|
||||
index fa7d8d3..dbf8775 100644
|
||||
--- a/arch/mips/mm/c-r4k.c
|
||||
+++ b/arch/mips/mm/c-r4k.c
|
||||
@@ -1725,9 +1725,11 @@ static void setup_scache(void)
|
||||
way_string[c->scache.ways], c->scache.linesz);
|
||||
}
|
||||
#else
|
||||
+#ifndef CONFIG_MIPS_VIRT
|
||||
if (!(c->scache.flags & MIPS_CACHE_NOT_PRESENT))
|
||||
panic("Dunno how to handle MIPS32 / MIPS64 second level cache");
|
||||
#endif
|
||||
+#endif
|
||||
return;
|
||||
}
|
||||
sc_present = 0;
|
||||
--
|
||||
2.1.4
|
||||
|
||||
@@ -1,602 +0,0 @@
|
||||
From 998a547c11c9a36a2c3fa21f1d6efe492bf489ea Mon Sep 17 00:00:00 2001
|
||||
From: Adam Lackorzynski <adam@l4re.org>
|
||||
Subject: [PATCH] MIPS: Add virtual platform
|
||||
|
||||
This adds a virtual platform to use as a guest in VZ-enabled
|
||||
virtualization environments.
|
||||
---
|
||||
arch/mips/Kbuild.platforms | 1 +
|
||||
arch/mips/Kconfig | 28 ++++++
|
||||
arch/mips/configs/mach_virt_defconfig | 11 +++
|
||||
.../include/asm/mach-virt/cpu-feature-overrides.h | 14 +++
|
||||
arch/mips/include/asm/mach-virt/dma-coherence.h | 102 +++++++++++++++++++++
|
||||
arch/mips/include/asm/mach-virt/hypcall.h | 96 +++++++++++++++++++
|
||||
arch/mips/include/asm/timex.h | 6 +-
|
||||
arch/mips/mach-virt/Makefile | 3 +
|
||||
arch/mips/mach-virt/Platform | 9 ++
|
||||
arch/mips/mach-virt/dma.c | 53 +++++++++++
|
||||
arch/mips/mach-virt/early_printk.c | 13 +++
|
||||
arch/mips/mach-virt/irq.c | 17 ++++
|
||||
arch/mips/mach-virt/setup.c | 95 +++++++++++++++++++
|
||||
arch/mips/mm/c-r4k.c | 2 +
|
||||
14 files changed, 447 insertions(+), 3 deletions(-)
|
||||
create mode 100644 arch/mips/configs/mach_virt_defconfig
|
||||
create mode 100644 arch/mips/include/asm/mach-virt/cpu-feature-overrides.h
|
||||
create mode 100644 arch/mips/include/asm/mach-virt/dma-coherence.h
|
||||
create mode 100644 arch/mips/include/asm/mach-virt/hypcall.h
|
||||
create mode 100644 arch/mips/mach-virt/Makefile
|
||||
create mode 100644 arch/mips/mach-virt/Platform
|
||||
create mode 100644 arch/mips/mach-virt/dma.c
|
||||
create mode 100644 arch/mips/mach-virt/early_printk.c
|
||||
create mode 100644 arch/mips/mach-virt/irq.c
|
||||
create mode 100644 arch/mips/mach-virt/setup.c
|
||||
|
||||
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
|
||||
index f5f1bdb292de..74a19cda1f13 100644
|
||||
--- a/arch/mips/Kbuild.platforms
|
||||
+++ b/arch/mips/Kbuild.platforms
|
||||
@@ -18,6 +18,7 @@ platforms += lantiq
|
||||
platforms += lasat
|
||||
platforms += loongson32
|
||||
platforms += loongson64
|
||||
+platforms += mach-virt
|
||||
platforms += mti-malta
|
||||
platforms += netlogic
|
||||
platforms += paravirt
|
||||
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
|
||||
index b3c5bde43d34..6251c9b12455 100644
|
||||
--- a/arch/mips/Kconfig
|
||||
+++ b/arch/mips/Kconfig
|
||||
@@ -546,6 +546,34 @@ config MACH_PIC32
|
||||
Microchip PIC32 is a family of general-purpose 32 bit MIPS core
|
||||
microcontrollers.
|
||||
|
||||
+config MIPS_VIRT
|
||||
+ bool "MIPS virtual platform"
|
||||
+ select HW_HAS_PCI
|
||||
+ select BOOT_ELF32
|
||||
+ select BOOT_RAW
|
||||
+ select CEVT_R4K
|
||||
+ select CSRC_R4K
|
||||
+ select COMMON_CLK
|
||||
+ select IRQ_MIPS_CPU
|
||||
+ select DMA_NONCOHERENT
|
||||
+ select MIPS_GIC
|
||||
+ select MIPS_CPU_SCACHE
|
||||
+ select LIBFDT
|
||||
+ select HW_HAS_PCI
|
||||
+ select SMP_UP if SMP
|
||||
+ select SWAP_IO_SPACE
|
||||
+ select SYS_HAS_CPU_MIPS32_R2
|
||||
+ select SYS_HAS_CPU_MIPS32_R3_5
|
||||
+ select SYS_HAS_CPU_MIPS64_R6
|
||||
+ select SYS_HAS_EARLY_PRINTK
|
||||
+ select SYS_SUPPORTS_32BIT_KERNEL
|
||||
+ select SYS_SUPPORTS_64BIT_KERNEL
|
||||
+ select SYS_SUPPORTS_HIGHMEM
|
||||
+ select SYS_SUPPORTS_LITTLE_ENDIAN
|
||||
+ select SYS_SUPPORTS_MIPS_CPS
|
||||
+ select SYS_SUPPORTS_SMARTMIPS
|
||||
+ select USE_OF
|
||||
+
|
||||
config NEC_MARKEINS
|
||||
bool "NEC EMMA2RH Mark-eins board"
|
||||
select SOC_EMMA2RH
|
||||
diff --git a/arch/mips/configs/mach_virt_defconfig b/arch/mips/configs/mach_virt_defconfig
|
||||
new file mode 100644
|
||||
index 000000000000..20a03530c490
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/configs/mach_virt_defconfig
|
||||
@@ -0,0 +1,11 @@
|
||||
+CONFIG_MIPS_VIRT=y
|
||||
+CONFIG_BLK_DEV_INITRD=y
|
||||
+CONFIG_BLK_DEV_RAM=y
|
||||
+# CONFIG_KEYBOARD_ATKBD is not set
|
||||
+# CONFIG_MOUSE_PS2 is not set
|
||||
+# CONFIG_SERIO is not set
|
||||
+CONFIG_VIRTIO_CONSOLE=y
|
||||
+CONFIG_VIRTIO_MMIO=y
|
||||
+CONFIG_EXT4_FS=y
|
||||
+CONFIG_EXT4_FS_POSIX_ACL=y
|
||||
+CONFIG_EXT4_FS_SECURITY=y
|
||||
diff --git a/arch/mips/include/asm/mach-virt/cpu-feature-overrides.h b/arch/mips/include/asm/mach-virt/cpu-feature-overrides.h
|
||||
new file mode 100644
|
||||
index 000000000000..8994952808e4
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/include/asm/mach-virt/cpu-feature-overrides.h
|
||||
@@ -0,0 +1,14 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ */
|
||||
+#ifndef __ASM_MACH_VIRT_CPU_FEATURE_OVERRIDES_H
|
||||
+#define __ASM_MACH_VIRT_CPU_FEATURE_OVERRIDES_H
|
||||
+
|
||||
+#define cpu_has_maar 0
|
||||
+#define cpu_has_htw 0
|
||||
+#define cpu_has_dc_aliases 1
|
||||
+#define cpu_has_nan_legacy 1
|
||||
+
|
||||
+#endif /* __ASM_MACH_VIRT_CPU_FEATURE_OVERRIDES_H */
|
||||
diff --git a/arch/mips/include/asm/mach-virt/dma-coherence.h b/arch/mips/include/asm/mach-virt/dma-coherence.h
|
||||
new file mode 100644
|
||||
index 000000000000..a9a3661b68d3
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/include/asm/mach-virt/dma-coherence.h
|
||||
@@ -0,0 +1,102 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+#pragma once
|
||||
+
|
||||
+#include <linux/mm.h>
|
||||
+#include <linux/err.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/bug.h>
|
||||
+#include <linux/io.h>
|
||||
+#include <linux/dma-mapping.h>
|
||||
+
|
||||
+extern unsigned long l4vmm_gpa_start;
|
||||
+extern unsigned long l4vmm_gpa_size;
|
||||
+extern dma_addr_t l4vmm_dma_start;
|
||||
+
|
||||
+struct device;
|
||||
+
|
||||
+static inline dma_addr_t plat_map_gpa_to_dma(unsigned long gpa)
|
||||
+{
|
||||
+ if (likely(l4vmm_gpa_size)) {
|
||||
+ if (likely(l4vmm_gpa_start <= gpa
|
||||
+ && gpa < l4vmm_gpa_start + l4vmm_gpa_size))
|
||||
+ return gpa - l4vmm_gpa_start + l4vmm_dma_start;
|
||||
+ }
|
||||
+
|
||||
+ pr_err("Failed to translate guest-physical 0x%lx to dma-addr\n",
|
||||
+ gpa);
|
||||
+ BUG(); /* What else? If not here we'll go chaos sooner anyway */
|
||||
+}
|
||||
+
|
||||
+static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr,
|
||||
+ size_t size)
|
||||
+{
|
||||
+ return plat_map_gpa_to_dma(virt_to_phys(addr));
|
||||
+}
|
||||
+
|
||||
+static inline dma_addr_t plat_map_dma_mem_page(struct device *dev,
|
||||
+ struct page *page)
|
||||
+{
|
||||
+ return plat_map_gpa_to_dma(page_to_phys(page));
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long plat_dma_addr_to_phys(struct device *dev,
|
||||
+ dma_addr_t dma_addr)
|
||||
+{
|
||||
+ if (likely(l4vmm_gpa_size)) {
|
||||
+ if (likely(l4vmm_dma_start <= dma_addr
|
||||
+ && dma_addr < l4vmm_dma_start + l4vmm_gpa_size))
|
||||
+ return dma_addr - l4vmm_dma_start + l4vmm_gpa_start;
|
||||
+ }
|
||||
+
|
||||
+ pr_err("%s: Do not know about dma_addr=%lx\n", __func__,
|
||||
+ (unsigned long) dma_addr);
|
||||
+ BUG();
|
||||
+}
|
||||
+
|
||||
+static inline void plat_unmap_dma_mem(struct device *dev, dma_addr_t dma_addr,
|
||||
+ size_t size, enum dma_data_direction direction)
|
||||
+{
|
||||
+ if (0) pr_warn("%s\n", __func__);
|
||||
+}
|
||||
+
|
||||
+static inline int plat_dma_supported(struct device *dev, u64 mask)
|
||||
+{
|
||||
+ /*
|
||||
+ * we fall back to GFP_DMA when the mask isn't all 1s,
|
||||
+ * so we can't guarantee allocations that must be
|
||||
+ * within a tighter range than GFP_DMA..
|
||||
+ */
|
||||
+ if (mask < DMA_BIT_MASK(24))
|
||||
+ return 0;
|
||||
+
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+static inline int plat_device_is_coherent(struct device *dev)
|
||||
+{
|
||||
+ return coherentio;
|
||||
+}
|
||||
+
|
||||
+#ifndef plat_post_dma_flush
|
||||
+static inline void plat_post_dma_flush(struct device *dev)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#ifdef CONFIG_SWIOTLB
|
||||
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
|
||||
+{
|
||||
+ return plat_map_gpa_to_dma(paddr);
|
||||
+}
|
||||
+
|
||||
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
|
||||
+{
|
||||
+ return daddr;
|
||||
+}
|
||||
+#endif
|
||||
diff --git a/arch/mips/include/asm/mach-virt/hypcall.h b/arch/mips/include/asm/mach-virt/hypcall.h
|
||||
new file mode 100644
|
||||
index 000000000000..8a7e8818fbfb
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/include/asm/mach-virt/hypcall.h
|
||||
@@ -0,0 +1,96 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * (C) 2016 Kernkonzept GmbH, Adam Lackorzynski <adam@l4re.org>
|
||||
+ */
|
||||
+#pragma once
|
||||
+
|
||||
+enum {
|
||||
+ L4VMM_FUNC_BASE = 0x160,
|
||||
+ L4VMM_FUNC_PRINTCHAR = L4VMM_FUNC_BASE + 0,
|
||||
+};
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall1(unsigned func, unsigned long a0)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0)
|
||||
+ : "cc", "memory");
|
||||
+ return _a0;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall2(unsigned func, unsigned long a0, unsigned long a1)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ register unsigned long _a1 asm ("a1") = a1;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0),
|
||||
+ "=r" (_a1)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0),
|
||||
+ "1" (_a1)
|
||||
+ : "cc", "memory");
|
||||
+ return _a0;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall2_ret(unsigned func, unsigned long a0, unsigned long *a1)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ register unsigned long _a1 asm ("a1") = *a1;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0),
|
||||
+ "=r" (_a1)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0),
|
||||
+ "1" (_a1)
|
||||
+ : "cc", "memory");
|
||||
+ *a1 = _a1;
|
||||
+ return _a0;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall3(unsigned func, unsigned long a0, unsigned long a1,
|
||||
+ unsigned long a2)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ register unsigned long _a1 asm ("a1") = a1;
|
||||
+ register unsigned long _a2 asm ("a2") = a2;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0),
|
||||
+ "=r" (_a1),
|
||||
+ "=r" (_a2)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0),
|
||||
+ "1" (_a1),
|
||||
+ "2" (_a2)
|
||||
+ : "cc", "memory");
|
||||
+ return _a0;
|
||||
+}
|
||||
+
|
||||
+static inline unsigned long
|
||||
+l4vmm_hypcall3_ret(unsigned func, unsigned long a0, unsigned long *a1,
|
||||
+ unsigned long *a2)
|
||||
+{
|
||||
+ register unsigned long _a0 asm ("a0") = a0;
|
||||
+ register unsigned long _a1 asm ("a1") = *a1;
|
||||
+ register unsigned long _a2 asm ("a2") = *a2;
|
||||
+ asm volatile(".set push; .set virt; hypcall %[func]; .set pop"
|
||||
+ : "=r" (_a0),
|
||||
+ "=r" (_a1),
|
||||
+ "=r" (_a2)
|
||||
+ : [func] "K" (func),
|
||||
+ "0" (_a0),
|
||||
+ "1" (_a1),
|
||||
+ "2" (_a2)
|
||||
+ : "cc", "memory");
|
||||
+ *a1 = _a1;
|
||||
+ *a2 = _a2;
|
||||
+ return _a0;
|
||||
+}
|
||||
diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h
|
||||
index b05bb70a2e46..322250454979 100644
|
||||
--- a/arch/mips/include/asm/timex.h
|
||||
+++ b/arch/mips/include/asm/timex.h
|
||||
@@ -71,7 +71,7 @@ static inline int can_use_mips_counter(unsigned int prid)
|
||||
|
||||
static inline cycles_t get_cycles(void)
|
||||
{
|
||||
- if (can_use_mips_counter(read_c0_prid()))
|
||||
+ if (IS_ENABLED(CONFIG_MIPS_VIRT) || can_use_mips_counter(read_c0_prid()))
|
||||
return read_c0_count();
|
||||
else
|
||||
return 0; /* no usable counter */
|
||||
@@ -86,10 +86,10 @@ static inline cycles_t get_cycles(void)
|
||||
*/
|
||||
static inline unsigned long random_get_entropy(void)
|
||||
{
|
||||
- unsigned int prid = read_c0_prid();
|
||||
+ unsigned int prid = IS_ENABLED(CONFIG_MIPS_VIRT) ? 0 : read_c0_prid();
|
||||
unsigned int imp = prid & PRID_IMP_MASK;
|
||||
|
||||
- if (can_use_mips_counter(prid))
|
||||
+ if (IS_ENABLED(CONFIG_MIPS_VIRT) || can_use_mips_counter(prid))
|
||||
return read_c0_count();
|
||||
else if (likely(imp != PRID_IMP_R6000 && imp != PRID_IMP_R6000A))
|
||||
return read_c0_random();
|
||||
diff --git a/arch/mips/mach-virt/Makefile b/arch/mips/mach-virt/Makefile
|
||||
new file mode 100644
|
||||
index 000000000000..bb4b0207b85c
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/Makefile
|
||||
@@ -0,0 +1,3 @@
|
||||
+obj-y += setup.o irq.o dma.o
|
||||
+
|
||||
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
|
||||
diff --git a/arch/mips/mach-virt/Platform b/arch/mips/mach-virt/Platform
|
||||
new file mode 100644
|
||||
index 000000000000..52ddca75c1a2
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/Platform
|
||||
@@ -0,0 +1,9 @@
|
||||
+#
|
||||
+# Virtual platform.
|
||||
+#
|
||||
+platform-$(CONFIG_MIPS_VIRT) += mach-virt/
|
||||
+cflags-$(CONFIG_MIPS_VIRT) += -I$(srctree)/arch/mips/include/asm/mach-virt
|
||||
+
|
||||
+load-$(CONFIG_MIPS_VIRT) += 0xffffffff80100000
|
||||
+
|
||||
+all-$(CONFIG_MIPS_VIRT) := $(COMPRESSION_FNAME).bin
|
||||
diff --git a/arch/mips/mach-virt/dma.c b/arch/mips/mach-virt/dma.c
|
||||
new file mode 100644
|
||||
index 000000000000..9d86a2e6ff0a
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/dma.c
|
||||
@@ -0,0 +1,53 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+
|
||||
+
|
||||
+#include <linux/of.h>
|
||||
+#include <linux/of_fdt.h>
|
||||
+#include <linux/of_address.h>
|
||||
+
|
||||
+#include <asm/mach-virt/dma-coherence.h>
|
||||
+
|
||||
+unsigned long l4vmm_gpa_start;
|
||||
+unsigned long l4vmm_gpa_size;
|
||||
+dma_addr_t l4vmm_dma_start;
|
||||
+
|
||||
+/* For now, we just have a single contiguous physical region in the
|
||||
+ * hypervisor */
|
||||
+static int __init mips_virt_dma_init(void)
|
||||
+{
|
||||
+ struct device_node *np;
|
||||
+ const __be32 *ranges = NULL;
|
||||
+ int naddr, nsize, len;
|
||||
+
|
||||
+ l4vmm_gpa_size = 0;
|
||||
+
|
||||
+ np = of_find_node_by_name(NULL, "memory");
|
||||
+ if (!np)
|
||||
+ return 0;
|
||||
+
|
||||
+ naddr = of_n_addr_cells(np);
|
||||
+ nsize = of_n_size_cells(np);
|
||||
+
|
||||
+ ranges = of_get_property(np, "dma-ranges", &len);
|
||||
+
|
||||
+ if (ranges && len >= (sizeof(*ranges) * (2 * naddr + nsize))) {
|
||||
+ l4vmm_dma_start = of_read_number(ranges, naddr);
|
||||
+ l4vmm_gpa_start = of_read_number(ranges + naddr, naddr);
|
||||
+ l4vmm_gpa_size = of_read_number(ranges + 2 * naddr, nsize);
|
||||
+
|
||||
+ pr_info("DMA range for memory 0x%lx - 0x%lx set @ 0x%lx\n",
|
||||
+ l4vmm_gpa_start,
|
||||
+ l4vmm_gpa_start + l4vmm_gpa_size,
|
||||
+ (unsigned long) l4vmm_dma_start);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+fs_initcall(mips_virt_dma_init);
|
||||
diff --git a/arch/mips/mach-virt/early_printk.c b/arch/mips/mach-virt/early_printk.c
|
||||
new file mode 100644
|
||||
index 000000000000..591ed45f37f8
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/early_printk.c
|
||||
@@ -0,0 +1,13 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+#include <asm/mach-virt/hypcall.h>
|
||||
+
|
||||
+void prom_putchar(char c)
|
||||
+{
|
||||
+ l4vmm_hypcall1(L4VMM_FUNC_PRINTCHAR, c);
|
||||
+}
|
||||
diff --git a/arch/mips/mach-virt/irq.c b/arch/mips/mach-virt/irq.c
|
||||
new file mode 100644
|
||||
index 000000000000..8a4c9addf4cc
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/irq.c
|
||||
@@ -0,0 +1,17 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/irqchip.h>
|
||||
+
|
||||
+#include <asm/irq.h>
|
||||
+
|
||||
+void __init arch_init_irq(void)
|
||||
+{
|
||||
+ irqchip_init();
|
||||
+}
|
||||
diff --git a/arch/mips/mach-virt/setup.c b/arch/mips/mach-virt/setup.c
|
||||
new file mode 100644
|
||||
index 000000000000..4182221ebbaf
|
||||
--- /dev/null
|
||||
+++ b/arch/mips/mach-virt/setup.c
|
||||
@@ -0,0 +1,95 @@
|
||||
+/*
|
||||
+ * This file is subject to the terms and conditions of the GNU General Public
|
||||
+ * License. See the file "COPYING" in the main directory of this archive
|
||||
+ * for more details.
|
||||
+ *
|
||||
+ * Copyright (C) 2016 Kernkonzept GmbH
|
||||
+ */
|
||||
+
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/initrd.h>
|
||||
+#include <linux/of.h>
|
||||
+#include <linux/of_fdt.h>
|
||||
+#include <linux/of_platform.h>
|
||||
+#include <linux/clk-provider.h>
|
||||
+#include <linux/clocksource.h>
|
||||
+
|
||||
+#include <asm/bootinfo.h>
|
||||
+#include <asm/cpu-features.h>
|
||||
+#include <asm/irq_cpu.h>
|
||||
+#include <asm/prom.h>
|
||||
+#include <asm/time.h>
|
||||
+
|
||||
+const char *get_system_type(void)
|
||||
+{
|
||||
+ return "MIPS Virtual Platform";
|
||||
+}
|
||||
+
|
||||
+static void __init init_mips_cpu_timer(void)
|
||||
+{
|
||||
+ struct device_node *np;
|
||||
+ u32 freq;
|
||||
+
|
||||
+ mips_hpt_frequency = 0;
|
||||
+
|
||||
+ /* The timer frequency must be defined in the device tree.
|
||||
+ If the definition is missing, we assume that the timer should
|
||||
+ not be used.
|
||||
+ */
|
||||
+ np = of_find_node_by_name(NULL, "cpus");
|
||||
+ if (np && of_property_read_u32(np, "mips-hpt-frequency", &freq) >= 0) {
|
||||
+ mips_hpt_frequency = freq;
|
||||
+
|
||||
+ printk("CPU frequency %d.%02d MHz\n", freq/1000000,
|
||||
+ (freq%1000000)*100/1000000);
|
||||
+ } else
|
||||
+ pr_warn("MIPS CPU core timer not used. %p, %u\n", np, freq);
|
||||
+
|
||||
+ of_node_put(np);
|
||||
+}
|
||||
+
|
||||
+void __init plat_time_init(void)
|
||||
+{
|
||||
+ init_mips_cpu_timer();
|
||||
+}
|
||||
+
|
||||
+void __init prom_init(void)
|
||||
+{
|
||||
+ int i;
|
||||
+ int argc = fw_arg0;
|
||||
+ char **argv = (char **)fw_arg1;
|
||||
+
|
||||
+ for (i = 0; i < argc; i++) {
|
||||
+ strlcat(arcs_cmdline, argv[i], COMMAND_LINE_SIZE);
|
||||
+ if (i < argc - 1)
|
||||
+ strlcat(arcs_cmdline, " ", COMMAND_LINE_SIZE);
|
||||
+ }
|
||||
+
|
||||
+ printk("DT at address %p\n", (void *)fw_arg3);
|
||||
+ __dt_setup_arch((void *)fw_arg3);
|
||||
+}
|
||||
+
|
||||
+void __init plat_mem_setup(void)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+void __init prom_free_prom_memory(void)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+void __init device_tree_init(void)
|
||||
+{
|
||||
+ unflatten_and_copy_device_tree();
|
||||
+}
|
||||
+
|
||||
+static int __init publish_devices(void)
|
||||
+{
|
||||
+ if (!of_have_populated_dt())
|
||||
+ return 0;
|
||||
+
|
||||
+ if (of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL))
|
||||
+ panic("Failed to populate DT");
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+device_initcall(publish_devices);
|
||||
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
|
||||
index 88cfaf81c958..cad123f7127f 100644
|
||||
--- a/arch/mips/mm/c-r4k.c
|
||||
+++ b/arch/mips/mm/c-r4k.c
|
||||
@@ -1741,9 +1741,11 @@ static void setup_scache(void)
|
||||
way_string[c->scache.ways], c->scache.linesz);
|
||||
}
|
||||
#else
|
||||
+#ifndef CONFIG_MIPS_VIRT
|
||||
if (!(c->scache.flags & MIPS_CACHE_NOT_PRESENT))
|
||||
panic("Dunno how to handle MIPS32 / MIPS64 second level cache");
|
||||
#endif
|
||||
+#endif
|
||||
return;
|
||||
}
|
||||
sc_present = 0;
|
||||
--
|
||||
2.11.0
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
|
||||
How to build a Linux kernel to be used as a guest kernel in uvmm?
|
||||
|
||||
The following brief instructions assume that the reader has
|
||||
a basic knowledge how to build Linux and has the necessary
|
||||
environment and tools installed.
|
||||
|
||||
This directory contains patches against various versions
|
||||
of the Linux kernel. Choose the appropriate patch and apply with
|
||||
|
||||
/tmp $ cd linux-4.4
|
||||
/tmp/linux-4.4 $ patch -p1 -i /path/to/pkg/uvmm/configs/guests/Linux/mips/0001-MIPS-Add-virtual-platform-linux-4.4.patch
|
||||
|
||||
Configure the kernel:
|
||||
|
||||
/tmp/linux-4.4 $ make ARCH=mips mach_virt_defconfig
|
||||
|
||||
Further configure the kernel to your needs.
|
||||
|
||||
Build:
|
||||
|
||||
/tmp/linux-4.4 $ make ARCH=mips CROSS_COMPILE=mips-linux-
|
||||
|
||||
The resulting file /tmp/linux-4.4/vmlinux is used as a kernel
|
||||
for uvmm.
|
||||
@@ -1,17 +0,0 @@
|
||||
|
||||
entry uvmm
|
||||
kernel fiasco -serial_esc
|
||||
roottask moe rom/uvmm.ned
|
||||
module uvmm
|
||||
module l4re
|
||||
module ned
|
||||
module cons
|
||||
module io
|
||||
module l4vio_net_p2p
|
||||
module[shell] echo dtb/virt-$PLATFORM_TYPE.dtb
|
||||
module[shell] echo ned/uvmm/vmm.lua
|
||||
module[shell] echo io/plat-$PLATFORM_TYPE/io.cfg
|
||||
module[shell] echo io/plat-$PLATFORM_TYPE/vm_hw.vbus
|
||||
module[shell] echo ned/uvmm/uvmm.ned
|
||||
module[shell,uncompress] echo $ZIMAGE_FILE
|
||||
#module <path_to_ramdisk>/arm-image
|
||||
@@ -1,54 +0,0 @@
|
||||
-- vi:ft=lua
|
||||
|
||||
package.path = "rom/?.lua";
|
||||
|
||||
local L4 = require "L4";
|
||||
local vmm = require "vmm";
|
||||
|
||||
vmm.loader.log_fab = L4.default_loader:new_channel();
|
||||
|
||||
-- start cons server for console multiplexing
|
||||
L4.default_loader:start(
|
||||
{
|
||||
scheduler = vmm.new_sched(0x40);
|
||||
log = L4.Env.log,
|
||||
caps = { cons = vmm.loader.log_fab:svr() }
|
||||
}, "rom/cons -a");
|
||||
|
||||
|
||||
-- virtual IO busses
|
||||
local io_busses = {
|
||||
vm_hw = 1,
|
||||
}
|
||||
|
||||
-- virtio network ports
|
||||
local net_ports = {
|
||||
net0 = 1,
|
||||
net1 = 1,
|
||||
}
|
||||
|
||||
local common_bootargs = "console=ttyAMA0 earlyprintk=1";
|
||||
|
||||
vmm.start_io(io_busses, "rom/io.cfg");
|
||||
vmm.start_virtio_switch(net_ports, 0xa, 0x1);
|
||||
vmm.start_vm{
|
||||
id=1,
|
||||
mem=256,
|
||||
vbus=io_busses.vm_hw,
|
||||
net=net_ports.net0,
|
||||
rd="rom/ramdisk-arm.rd",
|
||||
fdt="rom/virt-omap5.dtb",
|
||||
--bootargs=common_bootargs .. " ip=192.168.1.1:::255.255.255.0:server:eth0 root=/dev/sda1",
|
||||
bootargs=common_bootargs .. " ramdisk_size=9100 root=/dev/ram",
|
||||
prio=nil, cpus=0x1
|
||||
};
|
||||
|
||||
vmm.start_vm{
|
||||
id=2,
|
||||
mem=256,
|
||||
net=net_ports.net1,
|
||||
rd="rom/ramdisk-arm.rd",
|
||||
fdt="rom/virt-omap5.dtb",
|
||||
bootargs=common_bootargs .. " ramdisk_size=9100 root=/dev/ram",
|
||||
prio=nil, cpus=0x1
|
||||
};
|
||||
@@ -1,244 +0,0 @@
|
||||
local L4 = require "L4";
|
||||
|
||||
local l = L4.Loader.new({mem = L4.Env.user_factory});
|
||||
loader = l;
|
||||
|
||||
function table_override(...)
|
||||
local combined = {}
|
||||
for _, tab in ipairs({...}) do
|
||||
for k, v in pairs(tab) do
|
||||
combined[k] = v
|
||||
end
|
||||
end
|
||||
return combined
|
||||
end
|
||||
|
||||
function new_sched(prio, cpus)
|
||||
return L4.Env.user_factory:create(L4.Proto.Scheduler, prio + 10, prio, cpus);
|
||||
end
|
||||
|
||||
-- Starts IO service with the given options:
|
||||
--
|
||||
-- `busses` : Table of vBus names to create. One file per vBus; file name must
|
||||
-- be <name>.vbus for busses.<name>.
|
||||
-- `cmdline`: io command line parameters
|
||||
-- `opts` : Option table for loader.start function, e.g. scheduler or
|
||||
-- ext_caps. ext_caps overwrites default caps created by this
|
||||
-- function.
|
||||
function start_io(busses, cmdline, opts)
|
||||
if opts == nil then opts = {} end
|
||||
|
||||
if opts.caps ~= nil then
|
||||
print("Warning: use opts.ext_caps to pass custom/additional capabilities.")
|
||||
end
|
||||
|
||||
if opts.scheduler == nil then
|
||||
print("IO started with base priority. Risk of priority related deadlocks! "
|
||||
.. "Provide an opts.scheduler entry.")
|
||||
end
|
||||
|
||||
local caps = {
|
||||
sigma0 = L4.cast(L4.Proto.Factory, L4.Env.sigma0):create(L4.Proto.Sigma0);
|
||||
icu = L4.Env.icu;
|
||||
iommu = L4.Env.iommu;
|
||||
};
|
||||
|
||||
local files = "";
|
||||
|
||||
for k, v in pairs(busses) do
|
||||
if caps[k] ~= nil then
|
||||
print("Warning: overwriting caps." .. k .. " with vbus of same name.")
|
||||
end
|
||||
local c = l:new_channel();
|
||||
busses[k] = c
|
||||
caps[k] = c:svr();
|
||||
files = files .. " rom/" .. k .. ".vbus";
|
||||
end
|
||||
|
||||
opts.caps = table_override(caps, opts.caps or {}, opts.ext_caps or {})
|
||||
opts.log = opts.log or { "io", "red" }
|
||||
|
||||
return l:start(opts, "rom/io " .. cmdline .. files)
|
||||
end
|
||||
|
||||
-- Creates a scheduler proxy and writes it into the `opts` table.
|
||||
--
|
||||
-- Four cases happen here:
|
||||
-- A) No prio and no cpus: No scheduler proxy created.
|
||||
-- B) A prio, but no cpus: Create a scheduler proxy with only a priority limit.
|
||||
-- C) No Prio, but cpus: Create a scheduler proxy with default prio and cpus
|
||||
-- limit.
|
||||
-- D) A prio and cpus: Create a scheduler proxy with given limits.
|
||||
function set_sched(opts, prio, cpus)
|
||||
if cpus == nil and prio == nil then
|
||||
return
|
||||
end
|
||||
|
||||
if prio == nil then
|
||||
-- Default to zero to use the L4Re Default_thread_prio
|
||||
prio = 0
|
||||
end
|
||||
|
||||
local sched = new_sched(prio, cpus);
|
||||
opts["scheduler"] = sched;
|
||||
end
|
||||
|
||||
function start_virtio_switch(ports, prio, cpus, switch_type, ext_caps)
|
||||
local switch = l:new_channel();
|
||||
|
||||
local opts = {
|
||||
log = { "switch", "Blue" },
|
||||
caps = table_override({ svr = switch:svr() }, ext_caps or {});
|
||||
};
|
||||
|
||||
set_sched(opts, prio, cpus);
|
||||
if switch_type == "switch" then
|
||||
local port_count = 0;
|
||||
for k, v in pairs(ports) do
|
||||
port_count = port_count + 1;
|
||||
end
|
||||
svr = l:start(opts, "rom/l4vio_switch -v -m -p " .. port_count );
|
||||
|
||||
for k, v in pairs(ports) do
|
||||
ports[k] = L4.cast(L4.Proto.Factory, switch):create(0, "ds-max=4", "name=" .. k)
|
||||
end
|
||||
else
|
||||
svr = l:start(opts, "rom/l4vio_net_p2p");
|
||||
|
||||
for k, v in pairs(ports) do
|
||||
ports[k] = L4.cast(L4.Proto.Factory, switch):create(0, "ds-max=4");
|
||||
end
|
||||
end
|
||||
|
||||
return svr;
|
||||
end
|
||||
|
||||
function start_vm(options)
|
||||
local nr = options.id;
|
||||
local size_mb = 0;
|
||||
local vbus = options.vbus;
|
||||
local vnet = options.net;
|
||||
local prio = options.prio;
|
||||
local cpus = options.cpus;
|
||||
local scheduler = options.scheduler;
|
||||
local nonidentmem = options.nonidentmem;
|
||||
|
||||
local align = 10;
|
||||
if L4.Info.arch() == "arm" then
|
||||
align = 28;
|
||||
elseif L4.Info.arch() == "arm64" then
|
||||
align = 21;
|
||||
end
|
||||
align = options.mem_align or align;
|
||||
|
||||
local cmdline = {};
|
||||
if options.fdt then
|
||||
if type(options.fdt) ~= "table" then
|
||||
options.fdt = { options.fdt }
|
||||
end
|
||||
for _,v in ipairs(options.fdt) do
|
||||
cmdline[#cmdline+1] = "-d" .. v;
|
||||
end
|
||||
end
|
||||
|
||||
if options.bootargs then
|
||||
cmdline[#cmdline+1] = "-c" .. options.bootargs;
|
||||
end
|
||||
|
||||
if options.rd then
|
||||
cmdline[#cmdline+1] = "-r" .. options.rd;
|
||||
end
|
||||
|
||||
if options.kernel then
|
||||
cmdline[#cmdline+1] = "-k" .. options.kernel;
|
||||
end
|
||||
|
||||
if options.ram_base then
|
||||
cmdline[#cmdline+1] = "-b" .. options.ram_base;
|
||||
end
|
||||
|
||||
if L4.Info.arch() == "arm" or L4.Info.arch() == "arm64" then
|
||||
if not options.nonidentmem then
|
||||
cmdline[#cmdline+1] = "-i";
|
||||
end
|
||||
end
|
||||
|
||||
local keyb_shortcut = nil;
|
||||
if nr ~= nil then
|
||||
keyb_shortcut = "key=" .. nr;
|
||||
end
|
||||
|
||||
local vm_ram;
|
||||
if type(options.mem) == "userdata" then
|
||||
-- User gave us a cap. Using this as dataspace for guest RAM.
|
||||
vm_ram = options.mem
|
||||
elseif type(options.mem) == "number" then
|
||||
-- User gave us a number. Using this as size for a new Dataspace.
|
||||
size_mb = options.mem
|
||||
elseif type(options.mem) == "string" then
|
||||
print("start_vm: mem parameter '" .. options.mem .. "' is of type string, "
|
||||
.. "please use integer.");
|
||||
size_mb = tonumber(options.mem)
|
||||
else
|
||||
-- User did not give us any valid value.
|
||||
size_mb = 16
|
||||
end
|
||||
|
||||
if size_mb > 0 then
|
||||
local mem_flags = L4.Mem_alloc_flags.Continuous
|
||||
| L4.Mem_alloc_flags.Pinned
|
||||
| L4.Mem_alloc_flags.Super_pages;
|
||||
|
||||
vm_ram = L4.Env.user_factory:create(L4.Proto.Dataspace,
|
||||
size_mb * 1024 * 1024,
|
||||
mem_flags, align):m("rw");
|
||||
end
|
||||
|
||||
local caps = {
|
||||
net = vnet;
|
||||
vbus = vbus;
|
||||
ram = vm_ram;
|
||||
};
|
||||
|
||||
if options.jdb then
|
||||
caps["jdb"] = L4.Env.jdb
|
||||
end
|
||||
|
||||
if options.ext_args then
|
||||
for _,v in ipairs(options.ext_args) do
|
||||
cmdline[#cmdline+1] = v
|
||||
end
|
||||
end
|
||||
|
||||
local opts = {
|
||||
log = options.log or l.log_fab:create(L4.Proto.Log, "vm" .. nr, "w",
|
||||
keyb_shortcut);
|
||||
caps = table_override(caps, options.ext_caps or {});
|
||||
};
|
||||
|
||||
if scheduler then
|
||||
opts["scheduler"] = scheduler;
|
||||
else
|
||||
set_sched(opts, prio, cpus);
|
||||
end
|
||||
|
||||
if type(options.mon) == 'string' then
|
||||
-- assume 'mon' is the name of a server binary which implements the uvmm
|
||||
-- CLI interface
|
||||
mon = l:new_channel()
|
||||
|
||||
l:start({
|
||||
scheduler = opts.scheduler;
|
||||
log = l.log_fab:create(L4.Proto.Log, "mon" .. nr),
|
||||
caps = { mon = mon:svr() }
|
||||
}, "rom/" .. options.mon)
|
||||
|
||||
caps["mon"] = mon
|
||||
elseif options.mon ~= false then
|
||||
caps["mon"] = l.log_fab:create(L4.Proto.Log, "mon" .. nr, "g");
|
||||
end
|
||||
|
||||
return l:startv(opts, "rom/uvmm", table.unpack(cmdline));
|
||||
end
|
||||
|
||||
return _ENV
|
||||
@@ -1,3 +0,0 @@
|
||||
INPUT += %PKGDIR%/doc/uvmm.dox
|
||||
INPUT += %PKGDIR%/doc/uvmm-ram.dox
|
||||
INPUT += %PKGDIR%/doc/uvmm_dtg.md
|
||||
@@ -1,82 +0,0 @@
|
||||
// vi:ft=c
|
||||
/**
|
||||
* \page l4re_servers_uvmm_ram_details RAM configuration
|
||||
*
|
||||
* ## RAM configuration for uvmm
|
||||
*
|
||||
* ### Without a memory node in the device tree
|
||||
*
|
||||
* * setup default RAM for guest VM.
|
||||
* * RAM starts either
|
||||
* * at base-address which defaults to 0x0 or the base address value
|
||||
* set via the -b cmdline option or
|
||||
* * in case of identity mapping at the host-physical address of the
|
||||
* dataspace allocated for the RAM
|
||||
*
|
||||
*
|
||||
* ### With a memory node in the device tree
|
||||
*
|
||||
* The memory node needs at least the properties device_type and l4vmm,dscap:
|
||||
*
|
||||
* memory@0 {
|
||||
* device_type = "memory";
|
||||
* l4vmm,dscap = "ram";
|
||||
* }
|
||||
*
|
||||
* Where the given l4vmm,dscap name is accessible in the capability namespace
|
||||
* of the uvmm. If the capability is invalid, the memory node is disabled.
|
||||
*
|
||||
* If memory nodes are given, but none provides valid RAM the configuration is
|
||||
* invalid and uvmm refuses to boot.
|
||||
*
|
||||
* Additional properties of the memory node are `reg` and `dma-ranges`.
|
||||
*
|
||||
* The `reg` property describes the location in the guest's address space that
|
||||
* should be backed by RAM.
|
||||
*
|
||||
* The `dma-ranges` property describes the offset between guest-physical and
|
||||
* host-physical addresses. The guest can evaluate this non-standard property
|
||||
* to derive the correct DMA addresses to program into passed-through devices.
|
||||
* Usage of this property __requires__ modification of guest code.
|
||||
*
|
||||
*
|
||||
* #### Without reg and dma-ranges properties
|
||||
*
|
||||
* The reg property is optional only in case the uvmm maps the guest's RAM into
|
||||
* the VM under the host-physical addresses of the backing memory
|
||||
* (l4vmm,dscap).
|
||||
*
|
||||
* This case can be forced via the cmdline parameter -i and is the default for
|
||||
* platforms without IOMMU, but with DMA capable devices on the configured
|
||||
* vBus.
|
||||
*
|
||||
*
|
||||
* #### Without a reg property, but with a dma-ranges property
|
||||
*
|
||||
* If the -i cmdline parameter is given, identity mapping is forced and the
|
||||
* behavior is the same as in the case above.
|
||||
* Additionally, the dma-ranges property is written
|
||||
*
|
||||
* In case no -i cmdline parameter is given, the configuration is invalid and
|
||||
* uvmm refuses to boot.
|
||||
*
|
||||
*
|
||||
* #### With a reg property
|
||||
*
|
||||
* uvmm parses the reg property of the memory node and maps the memory into the
|
||||
* VM to the given range(s).
|
||||
*
|
||||
* If the -i cmdline parameter is set, the reg property is ignored and the
|
||||
* memory is mapped into the VM under the corresponding host-physical addresses
|
||||
* of the backing memory (l4vmm,dscap)
|
||||
*
|
||||
*
|
||||
* #### With a reg and dma-ranges property
|
||||
*
|
||||
* uvmm parses the reg property of the memory node and maps the memory into the
|
||||
* VM to the given range(s).
|
||||
*
|
||||
* The dma-ranges property is filled with the corresponding host-physical
|
||||
* addresses of the backing memory (l4vmm,dscap).
|
||||
*
|
||||
*/
|
||||
@@ -1,512 +0,0 @@
|
||||
// vi:ft=c
|
||||
/**
|
||||
* \page l4re_servers L4Re Servers
|
||||
*
|
||||
* - \subpage l4re_servers_uvmm
|
||||
*
|
||||
* \page l4re_servers_uvmm Uvmm, the virtual machine monitor
|
||||
*
|
||||
* Uvmm provides a virtual machine for running an unmodified guest in
|
||||
* non-privileged mode.
|
||||
*
|
||||
* Command Line Options
|
||||
* --------------------
|
||||
*
|
||||
* uvmm provides the following command line options:
|
||||
*
|
||||
* * `-c, --cmdline=<guest command line>`
|
||||
*
|
||||
* Command line that is passed to the guest on boot.
|
||||
*
|
||||
* * `-k, --kernel=<kernel image name>`
|
||||
*
|
||||
* The name of the guest-kernel image file present in the ROM namespace.
|
||||
*
|
||||
* * `-d, --dtb=<DTB overlay>`
|
||||
*
|
||||
* The name of the device tree file present in the ROM namespace.
|
||||
* The device tree will be placed in the upmost region of guest memory.
|
||||
* Optionally, a user may use an additional parameter in the form of
|
||||
* "<DTB overlay>:limit=0xffffffff" to set an upper limit for the device tree
|
||||
* location.
|
||||
*
|
||||
* * `-r, --ramdisk=<RAM disk name>`
|
||||
*
|
||||
* The name of the RAM disk file present in the ROM namespace
|
||||
*
|
||||
* * `-b, --rambase=<Base address of the guest RAM>`
|
||||
*
|
||||
* Physical start address for the guest RAM. This value is
|
||||
* platform specific.
|
||||
*
|
||||
* * `-D, --debug=[<component>=][level]`
|
||||
*
|
||||
* Control the verbosity level of the uvmm. Possible `level` values are:
|
||||
* quiet, warn, info, trace
|
||||
*
|
||||
* Using the `component` prefix, the verbosity level of each uvmm
|
||||
* component is configurable. The component names are:
|
||||
* core, cpu, mmio, irq, dev, pm, vbus_event
|
||||
*
|
||||
* For example, the following command line sets the verbosity of all uvmm
|
||||
* components to `info` except for IRQ handling, which is set to `trace`.
|
||||
*
|
||||
* uvmm -D info -D irq=trace
|
||||
*
|
||||
* * `-f, --fault-mode`
|
||||
*
|
||||
* Control the handling of guest reads/writes to non-existing memory.
|
||||
* Possible values are:
|
||||
*
|
||||
* * `ignore` - Invalid writes are ignored. Invalid reads either return 0 or
|
||||
* are skipped. The guest may experience undefined behaviour.
|
||||
* * `halt` - Halt the VM on the first invalid memory access.
|
||||
* * `inject` - Try to forward the invalid access to the guest. This is not
|
||||
* supported on all architectures. Falls back to `halt` if the error could
|
||||
* not be forwarded to the guest.
|
||||
*
|
||||
* Defaults to `ignore`.
|
||||
*
|
||||
* * `-q, --quiet`
|
||||
*
|
||||
* Silence all uvmm output.
|
||||
*
|
||||
* * `-v, --verbose`
|
||||
*
|
||||
* Increase the verbosity of the uvmm. Repeating the option increases the
|
||||
* verbosity by another level.
|
||||
*
|
||||
* * `-W, --wakeup-on-system-resume`
|
||||
*
|
||||
* When set, the uvmm resumes when the host system resumes after a
|
||||
* suspend call.
|
||||
*
|
||||
* * `-i`
|
||||
*
|
||||
* When set, the option forces the guest RAM to be mapped to its
|
||||
* corresponding host-physical addresses.
|
||||
*
|
||||
* \note Options `-q, --quiet`, `-v, --verbose` and `-D, --debug` cancel each
|
||||
* other out.
|
||||
*
|
||||
* Setting up guest memory
|
||||
* -----------------------
|
||||
*
|
||||
* In the most simple setup, memory for the guest can be provided via a
|
||||
* simple dataspace. In your ned script, create a new dataspace of the
|
||||
* required size and hand it into uvmm as the `ram` capability:
|
||||
*
|
||||
* local ramds = L4.Env.user_factory:create(L4.Proto.Dataspace, 60 * 1024 * 1024)
|
||||
*
|
||||
* L4.default_loader::startv({caps = {ram = ramds:m("rw")}}, "rom/uvmm")
|
||||
*
|
||||
* The memory will be mapped to the most appropriate place and a memory node
|
||||
* added to the device tree, so that the guest can find the memory.
|
||||
*
|
||||
* For a more complex setup, the memory can be configured via the device tree.
|
||||
* uvmm scans for memory nodes and tries to set up the memory from them. A
|
||||
* memory device node should look like this:
|
||||
*
|
||||
* memory@0 {
|
||||
* device_type = "memory";
|
||||
* reg = <0x00000000 0x00100000
|
||||
* 0x00200000 0xffffffff>;
|
||||
* l4vmm,dscap = "memcap";
|
||||
* dma-ranges = <>;
|
||||
* };
|
||||
*
|
||||
* The `device_type` property is mandatory and needs to be set to `memory`.
|
||||
*
|
||||
* `l4vmm,dscap` contains the name of the capability containing the dataspace
|
||||
* to be used for the RAM. `reg` describe the memory regions to use for the
|
||||
* memory. The regions will be filled up to the size of the supplied dataspace.
|
||||
* If they are larger, then the remaining area will be cut.
|
||||
*
|
||||
* If the optional `dma-ranges` property is given, the host-physical address
|
||||
* ranges for the memory regions will be added here. Note that the property is
|
||||
* not cleared first, so it should be left empty.
|
||||
*
|
||||
* For more details see \subpage l4re_servers_uvmm_ram_details.
|
||||
*
|
||||
* ### Memory layout
|
||||
*
|
||||
* uvmm populates the RAM with the following data:
|
||||
*
|
||||
* * kernel binary
|
||||
* * (optional) ramdisk
|
||||
* * (optional) device tree
|
||||
*
|
||||
* The kernel binary is put at the predefined address. For ELF binaries, this
|
||||
* is an absolute physical address. If the binary supports relative addressing,
|
||||
* the binary is put to the requested offset relative to beginning of the
|
||||
* first 'memory' region defined in the device tree.
|
||||
*
|
||||
* The ramdisk and device tree are placed as far as possible to the end of the
|
||||
* regions defined in the first 'memory' node.
|
||||
*
|
||||
* If there is a part of RAM that must remain empty, then define an extra
|
||||
* memory node for it in the device tree. uvmm only writes to memory in
|
||||
* the first memory node it finds.
|
||||
*
|
||||
* Warning: uvmm does not touch any unpopulated memory. In particular, it does
|
||||
* not ensure that the memory is cleared. It is the responsibility of the provider
|
||||
* of the RAM dataspace to make sure that no data leakage can happen. Normally
|
||||
* this is not an issue because dataspaces are guaranteed to be cleaned when
|
||||
* they are newly created but users should be careful when reusing memory or
|
||||
* dataspaces, for example, when restarting the uvmm.
|
||||
*
|
||||
* Forwarding hardware resources to the guest
|
||||
* ------------------------------------------
|
||||
*
|
||||
* Hardware resources must be specified in two places: the device tree contains
|
||||
* the description of all hardware devices the guest could see and the Vbus
|
||||
* describes which resources are actually available to the uvmm.
|
||||
*
|
||||
* The vbus allows the uvmm access to hardware resources in the same way as
|
||||
* any other L4 application. uvmm expects a capability named 'vbus' where it
|
||||
* can access its hardware resources. It is possible to leave out the capability
|
||||
* for purely virtual guests (Note that this is not actually practical on some
|
||||
* architectures. On ARM, for example, the guest needs hardware access to the
|
||||
* interrupt controller. Without a 'vbus' capability, interrupts will not work.)
|
||||
* For information on how to configure a vbus, see the \ref io "IO documentation".
|
||||
*
|
||||
* The device tree needs to contain the hardware description the guest should
|
||||
* see. For hardware devices this usually means to use a device tree that would
|
||||
* also be used when running the guest directly on hardware.
|
||||
*
|
||||
* On startup, uvmm scans the device tree for any devices that require memory
|
||||
* or interrupt resources and compares the required resources with the ones
|
||||
* available from its vbus. When all resources are available, it sets up the
|
||||
* appropriate forwarding, so that the guest now has direct access to the
|
||||
* hardware. If the resources are not available, the device will be marked
|
||||
* as 'disabled'. This mechanism allows to work with a standard device tree
|
||||
* for all guests in the system while handling the actual resource allocation
|
||||
* in a flexible manner via the vbus configuration.
|
||||
*
|
||||
*
|
||||
* The default mechanism assigns all resources 1:1, i.e. with the same memory
|
||||
* address and interrupt number as on hardware. It is also possible to map a
|
||||
* hardware device to a different location. In this case, the assignment
|
||||
* between vbus device and device tree device must be known in advance and
|
||||
* marked in the device tree using the `l4vmm,vbus-dev` property.
|
||||
*
|
||||
* The following device will for example be bound with the vbus device with
|
||||
* the HID 'l4-test,dev':
|
||||
*
|
||||
* test@e0000000 {
|
||||
* compatible = "memdev,bar";
|
||||
* reg = <0 0xe0000000 0 0x50000>,
|
||||
* <0 0xe1000000 0 0x50000>;
|
||||
* l4vmm,vbus-dev = "l4-test,dev";
|
||||
* interrupts-extended = <&gic 0 139 4>;
|
||||
* };
|
||||
*
|
||||
* Resources are then matched by name. Memory resources in the vbus must
|
||||
* be named `reg0` to `reg9` to match against the address ranges in the
|
||||
* device tree `reg` property. Interrupts must be called `irq0` to `irq9`
|
||||
* and will be matched against `interrupts` or `interrupts-extended` entries
|
||||
* in the device tree. The vbus must expose resources for all resources
|
||||
* defined in the device tree entry or the initialisation will fail.
|
||||
*
|
||||
* An appropriate IO entry for the above device would thus be:
|
||||
*
|
||||
* MEM = Io.Hw.Device(function()
|
||||
* Property.hid = "l4-test,dev"
|
||||
* Resource.reg0 = Io.Res.mmio(0x41000000, 0x4104ffff)
|
||||
* Resource.reg1 = Io.Res.mmio(0x42000000, 0x4204ffff)
|
||||
* Resource.irq0 = Io.Res.irq(134);
|
||||
* end)
|
||||
*
|
||||
* Please note that HIDs on the vbus are not necessarily unique. If multiple
|
||||
* devices with the HID given in `l4vmm,vbus-dev` are available on the vbus,
|
||||
* then one device is chosen at random.
|
||||
*
|
||||
* If no vbus device with the given HID is available, the device is disabled.
|
||||
*
|
||||
* How to enable guest suspend/resume
|
||||
* ----------------------------------
|
||||
*
|
||||
* \note Currently only supported on ARM. It should work fine with Linux
|
||||
* version 4.4 or newer.
|
||||
*
|
||||
* Uvmm (partially) implements the power state coordination interface (PSCI),
|
||||
* which is the standard ARM power management interface. To make use of this
|
||||
* interface, you have to announce its availability to the guest operating
|
||||
* system via the device tree like so:
|
||||
*
|
||||
* psci {
|
||||
* compatible = "arm,psci-0.2";
|
||||
* method = "hvc";
|
||||
* };
|
||||
*
|
||||
* The Linux guest must be configured with at least these options:
|
||||
*
|
||||
* CONFIG_SUSPEND=y
|
||||
* CONFIG_ARM_PSCI=y
|
||||
*
|
||||
* How to communicate power management (PM) events
|
||||
* -----------------------------------------------
|
||||
*
|
||||
* Uvmm can be instructed to inform a PM manager of PM events through the
|
||||
* L4::Platform_control interface. To that end, uvmm may be equipped with a
|
||||
* `pfc` cap. On suspend, uvmm will call l4_platform_ctl_system_suspend().
|
||||
*
|
||||
* The `pfc` cap can also be implemented by IO. In that case the guest can
|
||||
* start a machine suspend/shutdown/reboot.
|
||||
*
|
||||
* Ram block device support
|
||||
* ------------------------
|
||||
*
|
||||
* The example ramdisk works by loading a file system into RAM, which needs RAM
|
||||
* block device support to work. In the Linux kernel configuration add:
|
||||
* CONFIG_BLK_DEV_RAM=y
|
||||
*
|
||||
* Framebuffer support for uvmm/amd64 guests
|
||||
* -----------------------------------------
|
||||
* Uvmm can be instructed to pass along a framebuffer to the Linux guest. To
|
||||
* enable this three things need to be done:
|
||||
*
|
||||
* 1. Configure Linux to support a simple framebuffer by enabling
|
||||
* CONFIG_FB_SIMPLE=y
|
||||
* CONFIG_X86_SYSFB=y
|
||||
*
|
||||
* 2. Configure a simple framebuffer device in the device tree (currently only
|
||||
* read by uvmm, linearer framebuffer at [0xf0000000 - 0xf1000000])
|
||||
*
|
||||
* simplefb {
|
||||
* compatible = "simple-framebuffer";
|
||||
* reg = <0x0 0xf0000000 0x0 0x1000000>;
|
||||
* l4vmm,fbcap = "fb";
|
||||
* };
|
||||
*
|
||||
* 3. Start a framebuffer instance and connect it to uvmm e.g.
|
||||
* -- Start fb-drv (but only if we need to)
|
||||
* local fbdrv_fb = L4.Env.vesa;
|
||||
* if (not fbdrv_fb) then
|
||||
* fbdrv_fb = l:new_channel();
|
||||
* l:start({
|
||||
* caps = {
|
||||
* vbus = io_busses.fbdrv,
|
||||
* fb = fbdrv_fb:svr(),
|
||||
* },
|
||||
* log = { "fbdrv", "r" },
|
||||
* },
|
||||
* "rom/fb-drv");
|
||||
* end
|
||||
* vmm.start_vm{
|
||||
* ext_caps = { fb = fbdrv_fb },
|
||||
* -- ...
|
||||
*
|
||||
*
|
||||
* Requirements on the Fiasco.OC configuration on amd64
|
||||
* ----------------------------------------------------
|
||||
*
|
||||
* The kernel configuration must feature `CONFIG_SYNC_TSC=y` in order for the
|
||||
* emulated timers to reach a sufficiently high resolution.
|
||||
*
|
||||
*
|
||||
* Recommended Linux configuration options for uvmm/amd64 guests
|
||||
* -------------------------------------------------------------
|
||||
*
|
||||
* The following options are recommended in additon to the amd64 defaults
|
||||
* provided by a `make defconfig`:
|
||||
*
|
||||
* Virtio support is required to access virtual devices provided by uvmm:
|
||||
*
|
||||
* CONFIG_VIRTIO=y
|
||||
* CONFIG_VIRTIO_PCI=y
|
||||
* CONFIG_VIRTIO_BLK=y
|
||||
* CONFIG_BLK_MQ_VIRTIO=y
|
||||
* CONFIG_VIRTIO_CONSOLE=y
|
||||
* CONFIG_VIRTIO_INPUT=y
|
||||
* CONFIG_VIRTIO_NET=y
|
||||
*
|
||||
* It is highly recommended to use the X2APIC, which needs virtualization
|
||||
* awareness to work under uvmm:
|
||||
*
|
||||
* CONFIG_X86_X2APIC=y
|
||||
* CONFIG_PARAVIRT=y
|
||||
* CONFIG_PARAVIRT_SPINLOCKS=y
|
||||
*
|
||||
* KVM clock for uvmm/amd64 guests
|
||||
* -------------------------------
|
||||
*
|
||||
* When executing L4Re + uvmm on QEMU, the PIT as clock source is not reliable.
|
||||
* The paravirtualized KVM clock provides the guest with a stable clock source.
|
||||
*
|
||||
* A KVM clock device is available to the guest, if the device tree contains
|
||||
* the corresponding entry:
|
||||
*
|
||||
* kvm_clock {
|
||||
* compatible = "kvm-clock";
|
||||
* reg = <0x0 0x0 0x0 0x0>;
|
||||
* };
|
||||
*
|
||||
* To make use of this clock, the Linux guest must be built with the following
|
||||
* configuration options:
|
||||
*
|
||||
* CONFIG_HYPERVISOR_GUEST=y
|
||||
* CONFIG_KVM_GUEST=y
|
||||
* CONFIG_PTP_1588_CLOCK_KVM is not set
|
||||
*
|
||||
* Note: KVM calls besides the KVM clock are unhandled and lead to failure
|
||||
* in the uvmm, e.g. vmcall 0x9 for the PTP_1588_CLOCK_KVM.
|
||||
*
|
||||
* This is considered a development feature. The KVM clock is not required when
|
||||
* running on physical hardware as TSC calibration via the PIT works as
|
||||
* expected.
|
||||
*
|
||||
*
|
||||
* Development notes for amd64
|
||||
* ---------------------------
|
||||
*
|
||||
* When you are developing on Linux using QEMU please note that nested
|
||||
* virtualization support is necessary on your host system to run uvmm guests.
|
||||
* Your host Linux version should be 4.12 or greater, **excluding 4.20**.
|
||||
*
|
||||
* Check if your KVM module has nested virtualization enabled via:
|
||||
*
|
||||
* > cat /sys/module/kvm_intel/parameters/nested
|
||||
* Y
|
||||
*
|
||||
* In case it shows `N` instead of `Y` enable nested virtualization support
|
||||
* via:
|
||||
*
|
||||
* modprobe kvm_intel nested=1
|
||||
*
|
||||
* On AMD platforms the module name is `kvm_amd`.
|
||||
*
|
||||
*
|
||||
* QEMU network setup for a uvmm guest on amd64
|
||||
* --------------------------------------------
|
||||
*
|
||||
* qemu-system-x86_64 -M q35 -cpu host -enable-kvm -device intel-iommu
|
||||
* -device e1000e,netdev=net0 -netdev bridge,id=net0,br=virbr0
|
||||
*
|
||||
* where 'virbr0' is the name of the host's bridge device. The line 'allow
|
||||
* virbr0' needs to be present in /etc/qemu/bridge.conf.
|
||||
* The bridge can either be created via the network manager or via the command
|
||||
* line:
|
||||
*
|
||||
* brctl addbr virbr0
|
||||
* ip addr add 192.168.124.1/24 dev virbr0
|
||||
* ip link set up dev virbr0
|
||||
*
|
||||
* In the guest linux with eth0 as network device:
|
||||
*
|
||||
* ip a a 192.168.124.5/24 dev eth0
|
||||
* ip li se up dev eth0
|
||||
*
|
||||
* Now the host and guest can ping each other using their respective IPs.
|
||||
*
|
||||
* Of course, uvmm needs to be connected to Io and Io needs a vbus
|
||||
* configuration for the uvmm client like this:
|
||||
*
|
||||
* Io.add_vbusses
|
||||
* {
|
||||
* vm_pci = Io.Vi.System_bus(function ()
|
||||
* Property.num_msis = 6
|
||||
* PCI = Io.Vi.PCI_bus(function ()
|
||||
* pci_net = wrap(Io.system_bus():match("PCI/CC_0200"))
|
||||
* end)
|
||||
* end)
|
||||
* }
|
||||
*
|
||||
* QEMU emulated VirtIO devices and IO-MMU on amd64
|
||||
* ------------------------------------------------
|
||||
*
|
||||
* QEMU does not route VirtIO devices through the IO-MMU per default. To use
|
||||
* QEMU emulated VirtIO devices add the
|
||||
* `disable-legacy=on,disable-modern=off,iommu_platform=on` flags to the option
|
||||
* list of the device.
|
||||
* The e1000e card in the network example above can be replaced with an
|
||||
* virtio-net-pci card like this:
|
||||
*
|
||||
* -device virtio-net-pci,disable-legacy=on,disable-modern=off,
|
||||
* iommu_platform=on,netdev=net0
|
||||
*
|
||||
* For more information on VirtIO devices and their options see
|
||||
* https://wiki.qemu.org/Features/VT-d.
|
||||
*
|
||||
*
|
||||
*
|
||||
* Using the uvmm monitor interface
|
||||
* --------------------------------
|
||||
*
|
||||
* Uvmm implements an interface with which parts of the guest's state can be
|
||||
* queried and manipulated at runtime. This monitor interface needs to be enabled
|
||||
* during compilation as well as during startup of uvmm. This is described in
|
||||
* detail below.
|
||||
*
|
||||
* ### Compiling uvmm with monitor interface support
|
||||
*
|
||||
* To compile uvmm with monitor interface support pass the `CONFIG_MONITOR=y`,
|
||||
* option during the `make` step (or set in in the Makefile.config). This
|
||||
* option is available on all architectures but note that the set of available
|
||||
* monitor interface features may vary significantly between them. Also note
|
||||
* that the monitor interface will always be disabled in release mode, i.e. if
|
||||
* `CONFIG_RELEASE_MODE=y`.
|
||||
*
|
||||
* ### Enabling the monitor interface at runtime
|
||||
*
|
||||
* When starting a uvmm instance from inside a `ned` script using the
|
||||
* `vmm.start_vm` function, the `mon` argument controls whether the monitor
|
||||
* interface is enabled at runtime. There are three cases to distinguish:
|
||||
*
|
||||
* - `mon=true` (default): The monitor interface is enabled but no server
|
||||
* implementing the client side of the monitor interface
|
||||
* is started. The monitor interface can still be
|
||||
* utilized via `cons` but no readline functionality will
|
||||
* be available.
|
||||
*
|
||||
* - `mon='some_binary'`: If a string is passed as the value of `mon`, the
|
||||
* monitor interface is enabled and the string is
|
||||
* interpreted as the name of a server binary which
|
||||
* implements the client side of the monitor interface.
|
||||
* This server is automatically started and has access to
|
||||
* a vcon capability named `mon` at startup through which
|
||||
* it can make use of the monitor interface. Unless you
|
||||
* have written your own server you should specify
|
||||
* `'uvmm_cli'` which is a server implementing a simple
|
||||
* readline interface.
|
||||
*
|
||||
* - `mon=false`: The monitor interface is disabled at runtime.
|
||||
*
|
||||
* ### Using the monitor interface
|
||||
*
|
||||
* If the monitor interface was enabled you can connect to it via `cons` under
|
||||
* the name `mon<n>` where `<n>` is a unique integer for every uvmm instance
|
||||
* that is started with the monitor interface enabled (numbered starting from
|
||||
* one in order of corresponding `vmm.start_vm` calls). If `mon='uvmm_cli'` was
|
||||
* specified, readline functionality such as command completion and history
|
||||
* will be available. Enter a command followed by enter to run that command.
|
||||
* To obtain a list of all available commands issue the `help` command, to
|
||||
* obtain usage information for a specific command `foo` issue `help foo`.
|
||||
*
|
||||
* \note Some commands will modify the guests state. Since it should be obvious
|
||||
* to which ones this applies this is usually not specifically
|
||||
* highlighted. Exercise reasonable caution.
|
||||
*
|
||||
* ### Using the guest debugger
|
||||
*
|
||||
* The guest debugger provides monitoring functionality akin to a very
|
||||
* bare-bone GDB interface, e.g. guest RAM and page table dumping,
|
||||
* breakpointing and single stepping. Additional functionality might be added in
|
||||
* the future.
|
||||
*
|
||||
* \note The guest debugger is currently still under development. The guest
|
||||
* debugger may also not be available on all architectures. To check
|
||||
* whether the guest debugger is available check if `help dbg` returns
|
||||
* usage information.
|
||||
*
|
||||
* If the guest debugger is available, you have to manually load it at runtime
|
||||
* using the monitor interface. This saves resources if the guest debugger is
|
||||
* not used. To enable the guest debugger, issue the `dbg on` monitor command.
|
||||
* Once enabled, the guest debugger can not be disabled again.
|
||||
*
|
||||
* To list available guest debugger subcommands, issue `dbg help` after `dbg on`.
|
||||
*
|
||||
* \note When using SMP, most guest debugger subcommands require you to
|
||||
* explicitly specify a guest vcpu using an index starting from zero.
|
||||
*
|
||||
*/
|
||||
@@ -1,81 +0,0 @@
|
||||
# uvmm_dtg The device tree generator for Uvmm {#l4re_servers_uvmm_uvmm_dtg}
|
||||
|
||||
A virtual machine in Uvmm is configured with a device tree that contains
|
||||
information about the VMs resources, memory layout, virtual CPUs and
|
||||
peripheral devices.
|
||||
|
||||
Uvmm_dtg is a tool to generate such a device tree at runtime according to its
|
||||
command line.
|
||||
|
||||
## Usage in L4Re
|
||||
|
||||
Example lua script for Ned:
|
||||
|
||||
-- Create DS holding device tree
|
||||
local dt = L4.Env.user_factory:create(L4.Proto.Dataspace, 4 * 1024):m("rw");
|
||||
|
||||
-- Start the generator
|
||||
L4.default_loader:start(
|
||||
{
|
||||
caps = { dt = dt },
|
||||
}, "rom/uvmm_dtg dt"):wait();
|
||||
|
||||
-- Start uvmm
|
||||
vmm.start_vm
|
||||
{
|
||||
...
|
||||
ext_caps = { dt = dt },
|
||||
fdt = "dt",
|
||||
...
|
||||
}
|
||||
|
||||
Please notice the `:wait()` when starting `uvmm_dtg`. This makes Ned pause
|
||||
until uvmm_dtg has exited and put the device tree into the dataspace such that
|
||||
Uvmm can commence.
|
||||
|
||||
## Usage
|
||||
|
||||
`uvmm_dtg [OPTION]... <file>|--`
|
||||
|
||||
* `--`
|
||||
print to stdout
|
||||
|
||||
* `file`
|
||||
|
||||
On L4Re, the string given as `<file>` is interpreted as a named capability
|
||||
which needs to be backed by a sufficiently large Dataspace. On
|
||||
Linux, a file with the given name is created. In both cases,
|
||||
uvmm_dtg will output into the named file.
|
||||
|
||||
### Options
|
||||
|
||||
* `-h`
|
||||
|
||||
Show help.
|
||||
|
||||
* `--arch <target architecture>`
|
||||
|
||||
Select the target architecture. Valid options are `x86`, `x86_64`, `arm32`,
|
||||
`arm64`, `mips32` and `mips64`.
|
||||
|
||||
* `--format <format>`
|
||||
|
||||
Select the output format. Available formats are:
|
||||
`txt`: The device tree will be printed as plain text (`dts`).
|
||||
`bin`: The device tree will be output as binary (`dtb`).
|
||||
|
||||
* `--mem-base <membase>`
|
||||
|
||||
Configure the start of the memory distribution. `membase` can be defined in
|
||||
both decimal and hex notations. uvmm_dtg rounds the given base up to the
|
||||
platforms page size.
|
||||
|
||||
This value can be overridden by memory devices with fixed addresses.
|
||||
|
||||
* `--device <devicename:[Option1,Option2=value,Option3=value,...]>`
|
||||
|
||||
This configures a device.
|
||||
|
||||
To get a list of supported devices, use `--device help`.
|
||||
|
||||
To get help for a specific device, use `--device devicename:help`.
|
||||
@@ -1,4 +0,0 @@
|
||||
PKGDIR ?= ..
|
||||
L4DIR ?= $(PKGDIR)/../..
|
||||
|
||||
include $(L4DIR)/mk/subdir.mk
|
||||
@@ -1,333 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
extern "C" {
|
||||
#include <libfdt.h>
|
||||
}
|
||||
|
||||
#include <cassert>
|
||||
#include <initializer_list>
|
||||
|
||||
namespace Dtb {
|
||||
|
||||
/**
|
||||
* Cell provides data structures and methods to handle cell based properties
|
||||
*
|
||||
* Device trees contain properties described by cells. The properties are
|
||||
* - stored in big endian
|
||||
* - the number of cells is specified by other properties like
|
||||
* \#address-cells, \#size-cells, \#interrupt-cells
|
||||
* - a property has at most 4 cells
|
||||
*
|
||||
* Cells might be translated from one domain into another. The
|
||||
* translation is done by comparing regions, calculating the offset
|
||||
* relative to a region in the current domain and applying this offset
|
||||
* to a region in another domain. Therefore cells need relational
|
||||
* operation, addition and subtraction.
|
||||
*/
|
||||
|
||||
class Cell
|
||||
{
|
||||
public:
|
||||
enum
|
||||
{
|
||||
Max_size = 4 /**< Maximal number of allowed cells */
|
||||
};
|
||||
|
||||
static Cell make_cell(std::initializer_list<uint32_t> l)
|
||||
{
|
||||
return Cell(l);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a default invalid cell
|
||||
*
|
||||
* An invalid cell is a tuple of {~0U, ~0U, ~0U, ~0U}.
|
||||
*/
|
||||
Cell()
|
||||
{
|
||||
for (auto &v: _values)
|
||||
v = ~0U;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a Cell object from a device tree property
|
||||
*
|
||||
* \param values Pointer to the property values
|
||||
* \param size Number of cells in the property; Must be smaller than
|
||||
* Max_size.
|
||||
*/
|
||||
Cell(fdt32_t const *values, size_t size)
|
||||
{
|
||||
assert(size <= Max_size);
|
||||
for (auto &v: _values)
|
||||
v = 0;
|
||||
|
||||
for (unsigned i = 0, offs = Max_size - size; i < size; ++i)
|
||||
_values[offs + i] = fdt32_to_cpu(values[i]);
|
||||
}
|
||||
|
||||
uint32_t const &operator [] (size_t idx) const
|
||||
{
|
||||
assert(idx < Max_size);
|
||||
return _values[idx];
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a Cell object is valid
|
||||
*
|
||||
* The default constructor set the cell to {~0U, ~0U, ~0U, ~0U}. If
|
||||
* the cell object contains anything else it is considered to be
|
||||
* valid.
|
||||
*
|
||||
* \return bool true if the cell is different from {~0U, ~0U, ~0U, ~0U}
|
||||
*/
|
||||
bool is_valid() const
|
||||
{
|
||||
for (auto x: _values)
|
||||
if (x != ~0U)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add two Cell objects
|
||||
*
|
||||
* We assume that cells are stored as 32 bit values in big endian
|
||||
* order and can be added by simply adding the invidual 32 bit
|
||||
* values and any overflow from a previous addition.
|
||||
*
|
||||
* We do not check whether there is an overflow when adding the
|
||||
* highest 32 bit values.
|
||||
*/
|
||||
Cell operator + (Cell const &other) const
|
||||
{
|
||||
Cell result;
|
||||
uint32_t carry = 0;
|
||||
for (int i = Max_size - 1; i >= 0; --i)
|
||||
{
|
||||
uint64_t a = _values[i];
|
||||
uint64_t b = other._values[i];
|
||||
uint64_t res = a + b + carry;
|
||||
carry = (res >> 32) ? 1 : 0;
|
||||
result._values[i] = static_cast<uint32_t>(res);
|
||||
}
|
||||
// XXX no overflow check yet
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subtract a Cell object from another
|
||||
*
|
||||
* We assume that cells are stored as 32 bit values in big endian
|
||||
* order and the difference can be calculate by simply subtracting
|
||||
* the invidual 32 bit values and any overflow from a previous
|
||||
* subtraction.
|
||||
*
|
||||
* We do not check whether a is larger than b in (a - b), which
|
||||
* would lead to an overflow.
|
||||
*/
|
||||
Cell operator - (Cell const &other) const
|
||||
{
|
||||
Cell result;
|
||||
uint32_t carry = 0;
|
||||
for (int i = Max_size - 1; i >= 0; --i)
|
||||
{
|
||||
uint64_t a = _values[i];
|
||||
uint64_t b = other._values[i];
|
||||
uint64_t res = a - b - carry;
|
||||
carry = (res >> 32) ? 1 : 0;
|
||||
result._values[i] = static_cast<uint32_t>(res);
|
||||
}
|
||||
// XXX no overflow check yet
|
||||
return result;
|
||||
}
|
||||
|
||||
Cell operator & (Cell const &other) const
|
||||
{
|
||||
Cell result;
|
||||
for (int i = 0; i < Max_size; i++)
|
||||
result._values[i] = _values[i] & other._values[i];
|
||||
return result;
|
||||
}
|
||||
|
||||
Cell& operator &= (Cell const &other)
|
||||
{
|
||||
for (int i = 0; i < Max_size; i++)
|
||||
_values[i] &= other._values[i];
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Relational operator Cell A < Cell B
|
||||
*/
|
||||
bool operator < (Cell const &other) const
|
||||
{ return cmp(other) == -1; }
|
||||
|
||||
/**
|
||||
* Relational operator Cell A <= Cell B
|
||||
*/
|
||||
bool operator <= (Cell const &other) const
|
||||
{ return cmp(other) != 1; }
|
||||
|
||||
/**
|
||||
* Relational operator Cell A == Cell B
|
||||
*/
|
||||
bool operator == (Cell const &other) const
|
||||
{ return cmp(other) == 0; }
|
||||
|
||||
/**
|
||||
* Relational operator Cell A != Cell B
|
||||
*/
|
||||
bool operator != (Cell const &other) const
|
||||
{ return cmp(other) != 0; }
|
||||
|
||||
/**
|
||||
* Relational operator Cell A >= Cell B
|
||||
*/
|
||||
bool operator >= (Cell const &other) const
|
||||
{ return cmp(other) != -1; }
|
||||
|
||||
/**
|
||||
* Relational operator Cell A > Cell B
|
||||
*/
|
||||
bool operator > (Cell const &other) const
|
||||
{ return cmp(other) == 1; }
|
||||
|
||||
/**
|
||||
* Check whether the cell object contains a valid memory address
|
||||
*
|
||||
* We consider any 32bit or 64bit value a valid memory address. If
|
||||
* the cell contains anything other than 0 in the highest order
|
||||
* values, it must be something else and cannot be interpreted as a
|
||||
* memory address.
|
||||
*
|
||||
* \return bool true, if the cell contains a 32bit or 64bit value.
|
||||
*/
|
||||
bool is_uint64() const
|
||||
{ return !_values[0] && !_values[1]; }
|
||||
|
||||
/**
|
||||
* Get the memory address of this cell
|
||||
*
|
||||
* Returns the value of the cell as 64bit value. It asserts, that
|
||||
* the cell actually contains something, that can be interpreted as
|
||||
* memory address.
|
||||
*
|
||||
* \return uint64_t the cell contents as 64bit value
|
||||
*/
|
||||
uint64_t get_uint64() const
|
||||
{
|
||||
assert(is_uint64());
|
||||
return (static_cast<uint64_t>(_values[2]) << 32) + _values[3];
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
Cell(std::initializer_list<uint32_t> l)
|
||||
{
|
||||
assert(l.size() <= Max_size);
|
||||
for (auto &v: _values)
|
||||
v = 0;
|
||||
|
||||
unsigned i = Max_size - l.size();
|
||||
for (uint32_t v : l)
|
||||
_values[i++] = v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare two cell objects
|
||||
*
|
||||
* We assume that cells are stored as 32 bit values in big endian
|
||||
* order and that we can compare them starting at the highest order
|
||||
* value.
|
||||
*
|
||||
* \param Cell cell object to compare with
|
||||
* \retval -1 cell is smaller than other cell
|
||||
* \retval 0 cells are equal
|
||||
* \retval 1 cells is larger than other cell
|
||||
*/
|
||||
int cmp(Cell const &other) const
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < Max_size; ++i)
|
||||
{
|
||||
if (_values[i] < other._values[i])
|
||||
return -1;
|
||||
if (_values[i] > other._values[i])
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t _values[Max_size];
|
||||
};
|
||||
|
||||
/**
|
||||
* Data and methods associated with a range property in a device tree
|
||||
*
|
||||
* Ranges in a device tree describe to translation of regions from one
|
||||
* domain to another.
|
||||
*/
|
||||
class Range
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Translate an address from one domain to another
|
||||
*
|
||||
* This function takes an address cell and a size cell and
|
||||
* translates the address from one domain to another if there is a
|
||||
* matching range.
|
||||
*
|
||||
* \param[inout] address Address cell that shall be translated
|
||||
* \param[in] size Size Size cell associated with the address
|
||||
*/
|
||||
bool translate(Cell *address, Cell const &size)
|
||||
{
|
||||
assert(address);
|
||||
|
||||
if (match(*address, size))
|
||||
{
|
||||
*address = (*address - _child) + _parent;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Range(Cell const &child, Cell const &parent, Cell const &length)
|
||||
: _child{child}, _parent{parent}, _length{length} {};
|
||||
|
||||
private:
|
||||
// ranges: child, parent, length
|
||||
// child.cells == this->cells
|
||||
// parent.cells == parent.cells
|
||||
Cell _child;
|
||||
Cell _parent;
|
||||
Cell _length;
|
||||
|
||||
// [address, address + size] subset of [child, child + length] ?
|
||||
bool match(Cell const &address, Cell const &size) const
|
||||
{
|
||||
Cell address_max = address + size;
|
||||
Cell child_max = _child + _length;
|
||||
return (_child <= address) && (address_max <= child_max);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Data and methods associated with a reg property in a device tree
|
||||
*/
|
||||
struct Reg
|
||||
{
|
||||
Cell address;
|
||||
Cell size;
|
||||
|
||||
Reg(Cell const &address, Cell const &size) : address{address}, size{size} {};
|
||||
|
||||
bool operator == (Reg const &other) const
|
||||
{ return (address == other.address) && (size == other.size); }
|
||||
|
||||
bool operator != (Reg const &other) const
|
||||
{ return !operator == (other); }
|
||||
};
|
||||
|
||||
} // namespace Dtb
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,851 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020, 2022-2024 Kernkonzept GmbH.
|
||||
* Author(s): Benjamin Lamowski <benjamin.lamowski@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* Basic ACPI tables.
|
||||
*
|
||||
* Adapted from the ACPI Specification version 6.3.
|
||||
* Currently only implements the ACPI tables necessary to make Linux find local
|
||||
* APICs for SMP.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <l4/cxx/utils>
|
||||
#include <consts.h>
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "debug.h"
|
||||
#include "guest.h"
|
||||
#include "cpu_dev_array.h"
|
||||
#include "cpu_dev.h"
|
||||
#include "ioapic.h"
|
||||
#include "virt_lapic.h"
|
||||
#include "mem_types.h"
|
||||
|
||||
extern "C" {
|
||||
#include "platform/acenv.h"
|
||||
#include "actypes.h"
|
||||
#include "actbl.h"
|
||||
}
|
||||
|
||||
namespace Acpi
|
||||
{
|
||||
static Dbg info(Dbg::Dev, Dbg::Info, "ACPI");
|
||||
static Dbg warn(Dbg::Dev, Dbg::Warn, "ACPI");
|
||||
static Dbg trace(Dbg::Dev, Dbg::Trace, "ACPI");
|
||||
|
||||
class Tables;
|
||||
class Acpi_device;
|
||||
/**
|
||||
* Registry of devices that need to insert information into ACPI tables.
|
||||
*
|
||||
* Upon Uvmm startup devices will be created from the device tree. These can
|
||||
* register themselves here. The Acpi::Tables class will then call the
|
||||
* Acpi_device functions of these devices to fill the ACPI tables. It will
|
||||
* also delete the Acpi_device_hub after use.
|
||||
*/
|
||||
class Acpi_device_hub
|
||||
{
|
||||
friend class Tables;
|
||||
public:
|
||||
static void register_device(Acpi_device const *dev)
|
||||
{ get()->_devices.push_back(dev); }
|
||||
|
||||
private:
|
||||
static Acpi_device_hub *get()
|
||||
{
|
||||
if (!_hub)
|
||||
_hub = new Acpi_device_hub();
|
||||
return _hub;
|
||||
}
|
||||
|
||||
std::vector<Acpi_device const*> const &devices() const
|
||||
{
|
||||
return _devices;
|
||||
}
|
||||
|
||||
static void destroy()
|
||||
{
|
||||
if (_hub)
|
||||
delete _hub;
|
||||
_hub = nullptr;
|
||||
}
|
||||
|
||||
Acpi_device_hub() = default;
|
||||
~Acpi_device_hub() = default;
|
||||
static Acpi_device_hub *_hub;
|
||||
std::vector<Acpi_device const*> _devices;
|
||||
};
|
||||
|
||||
/**
|
||||
* Devices that must register with ACPI shall implement this interface.
|
||||
*/
|
||||
class Acpi_device
|
||||
{
|
||||
public:
|
||||
explicit Acpi_device()
|
||||
{
|
||||
Acpi_device_hub::register_device(this);
|
||||
}
|
||||
|
||||
virtual void amend_fadt(ACPI_TABLE_FADT *) const {};
|
||||
virtual l4_size_t amend_mcfg(ACPI_MCFG_ALLOCATION *, l4_size_t) const { return 0; };
|
||||
|
||||
/**
|
||||
* Amend the DSDT ACPI table (highest priority).
|
||||
*
|
||||
* This method is executed before all the #amend_dsdt_late methods of all
|
||||
* ACPI devices.
|
||||
*/
|
||||
virtual l4_size_t amend_dsdt(void *, l4_size_t) const { return 0; };
|
||||
|
||||
/**
|
||||
* Amend the DSDT ACPI table (lowest priority).
|
||||
*
|
||||
* This method is executed after all the #amend_dsdt methods of all ACPI
|
||||
* devices. This is especially useful if the amendment refers to a scope
|
||||
* that needs to be already defined before.
|
||||
*/
|
||||
virtual l4_size_t amend_dsdt_late(void *, l4_size_t) const { return 0; };
|
||||
};
|
||||
|
||||
/**
|
||||
* Singleton for access to the FACS table.
|
||||
*
|
||||
* Used by ACPI platform to acquire the wakeup vector and zeropage to reserve
|
||||
* the FACS location in guest memory in the e820 map.
|
||||
*/
|
||||
class Facs_storage
|
||||
{
|
||||
public:
|
||||
static Facs_storage *get()
|
||||
{
|
||||
if (!_facs_storage)
|
||||
_facs_storage = new Facs_storage();
|
||||
return _facs_storage;
|
||||
}
|
||||
|
||||
void set_addr(ACPI_TABLE_FACS *table) { _facs = table; }
|
||||
void set_gaddr(l4_addr_t gaddr) { _gfacs = Vmm::Guest_addr(gaddr); }
|
||||
l4_uint32_t waking_vector() const { return _facs->FirmwareWakingVector; }
|
||||
Vmm::Region mem_region() const
|
||||
{
|
||||
assert(_gfacs.get() != 0);
|
||||
|
||||
return Vmm::Region::ss(_gfacs, sizeof(ACPI_TABLE_FACS),
|
||||
Vmm::Region_type::Ram);
|
||||
}
|
||||
|
||||
private:
|
||||
Facs_storage() = default;
|
||||
~Facs_storage() = default;
|
||||
|
||||
static Facs_storage *_facs_storage;
|
||||
ACPI_TABLE_FACS *_facs;
|
||||
Vmm::Guest_addr _gfacs;
|
||||
};
|
||||
|
||||
// Interrupt override data for MADT table
|
||||
// see ACPI Spec v6.3, 5.2.12.5 Interrupt Source Override Structure
|
||||
struct Madt_int_override
|
||||
{
|
||||
l4_uint8_t src_irq;
|
||||
l4_uint32_t gsi;
|
||||
l4_uint16_t flags;
|
||||
};
|
||||
|
||||
// Static storage management for interrupt override entries
|
||||
class Madt_int_override_storage
|
||||
{
|
||||
public:
|
||||
static Madt_int_override_storage *get()
|
||||
{
|
||||
if (!_self)
|
||||
_self = new Madt_int_override_storage();
|
||||
return _self;
|
||||
}
|
||||
|
||||
void add_override(Madt_int_override new_override)
|
||||
{ _overrides.push_back(new_override); }
|
||||
|
||||
std::vector<Madt_int_override> const &overrides() const
|
||||
{ return _overrides; }
|
||||
|
||||
private:
|
||||
Madt_int_override_storage() = default;
|
||||
~Madt_int_override_storage() = default;
|
||||
|
||||
static Madt_int_override_storage *_self;
|
||||
std::vector<Madt_int_override> _overrides;
|
||||
};
|
||||
|
||||
/**
|
||||
* ACPI control.
|
||||
*
|
||||
* Manage the creation of ACPI tables in guest memory.
|
||||
*/
|
||||
class Tables
|
||||
{
|
||||
public:
|
||||
~Tables()
|
||||
{
|
||||
Acpi_device_hub::destroy();
|
||||
}
|
||||
|
||||
protected:
|
||||
enum Table_sizes : l4_size_t
|
||||
{
|
||||
Header_size = sizeof(ACPI_TABLE_HEADER),
|
||||
Rsdp_size = sizeof(ACPI_TABLE_RSDP),
|
||||
Rsdp_v1_size = sizeof(ACPI_RSDP_COMMON),
|
||||
Facs_size = sizeof(ACPI_TABLE_FACS)
|
||||
};
|
||||
|
||||
enum class Table : unsigned
|
||||
{
|
||||
Rsdt,
|
||||
Xsdt,
|
||||
Fadt,
|
||||
Madt,
|
||||
Mcfg,
|
||||
Facs,
|
||||
Dsdt,
|
||||
Num_values,
|
||||
};
|
||||
|
||||
/**
|
||||
* Helps with generating ACPI structures by providing abstractions for common
|
||||
* operations, table references and checksums.
|
||||
*
|
||||
* Table reference fields and checksum fields are not filled in immediately,
|
||||
* but instead a list of fixups is kept for them. Firstly, this simplifies the
|
||||
* creation of ACPI structures, since the size and layout of the tables no
|
||||
* longer have to be calculated in advance, which is particularly tricky for
|
||||
* dynamically-sized tables. Secondly, this allows a more flexible use of the
|
||||
* generated ACPI structures, since they can now be relocated to arbitrary
|
||||
* memory addresses thanks to the fixups.
|
||||
*/
|
||||
class Writer
|
||||
{
|
||||
public:
|
||||
Writer(l4_addr_t buf_addr, unsigned buf_size)
|
||||
: _buf_addr(buf_addr), _buf_size(buf_size), _pos(0)
|
||||
{}
|
||||
|
||||
/**
|
||||
* Return current write position.
|
||||
*/
|
||||
unsigned pos() const
|
||||
{ return _pos; }
|
||||
|
||||
/**
|
||||
* Return number of unused bytes remaining in the write buffer.
|
||||
*/
|
||||
unsigned remaining_size() const
|
||||
{ return _buf_size - _pos; }
|
||||
|
||||
/**
|
||||
* Register the given ACPI table to start at the current write position, if
|
||||
* necessary adjusted to the tables alignment requirements. Then reserve
|
||||
* memory for the ACPI table.
|
||||
*
|
||||
* \tparam T Type of the table.
|
||||
* \param table Table
|
||||
* \param len Length of memory to reserve for the table.
|
||||
* \param align Alignment required by the table.
|
||||
*/
|
||||
template<typename T>
|
||||
T *start_table(Table table, unsigned len = sizeof(T), unsigned align = 8)
|
||||
{
|
||||
if (_pos % align != 0)
|
||||
reserve<void>(align - (_pos % align));
|
||||
|
||||
_tables[static_cast<unsigned>(table)] = _pos;
|
||||
return reserve<T>(len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve memory.
|
||||
*
|
||||
* \tparam T Type to reserve memory for.
|
||||
* \param len Length of the memory to reserve, defaults to size of T.
|
||||
*/
|
||||
template<typename T = void>
|
||||
T *reserve(unsigned len = sizeof(T))
|
||||
{
|
||||
if (_pos + len > _buf_size)
|
||||
{
|
||||
Err().printf("ACPI table memory allocation exhausted. "
|
||||
"Please configure less ACPI devices "
|
||||
"or raise the ACPI table size limit.\n");
|
||||
L4Re::throw_error(-L4_ENOMEM, "ACPI table memory allocation exhausted.");
|
||||
}
|
||||
|
||||
T *base = as_ptr<T>(_pos);
|
||||
_pos += len;
|
||||
return base;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write an identifier with correct padding.
|
||||
*
|
||||
* \param dest Pointer to the memory destination.
|
||||
* \param value String to write.
|
||||
* \param len Length of the identifier field.
|
||||
*/
|
||||
static void write_identifier(char *dest, char const *value, l4_size_t len)
|
||||
{
|
||||
auto value_length = strlen(value);
|
||||
|
||||
assert(value_length <= len && "Supplied identifier fits into field.");
|
||||
|
||||
memcpy(dest, value, value_length);
|
||||
memset(dest + value_length, ' ', len - value_length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a common header for ACPI tables as defined in section 5.2.6 of the
|
||||
* ACPI Specification.
|
||||
*
|
||||
* \param h Table header.
|
||||
* \param sig Signature as described in Table 5-29.
|
||||
* \param rev Revision of the table.
|
||||
* \param len Total length of the table.
|
||||
*/
|
||||
void write_header(ACPI_TABLE_HEADER *h, char const *sig, l4_uint8_t rev,
|
||||
l4_uint32_t len)
|
||||
{
|
||||
memcpy(h->Signature, sig, ACPI_NAMESEG_SIZE);
|
||||
h->Length = len;
|
||||
h->Revision = rev;
|
||||
add_checksum(&h->Checksum, h, len);
|
||||
write_identifier(h->OemId, "L4RE", ACPI_OEM_ID_SIZE);
|
||||
write_identifier(h->OemTableId, "UVMM", ACPI_OEM_TABLE_ID_SIZE);
|
||||
h->OemRevision = 1;
|
||||
memcpy(h->AslCompilerId, "UVMM", ACPI_NAMESEG_SIZE);
|
||||
h->AslCompilerRevision = 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write header for a table and automatically determine size as delta
|
||||
* between start position of the table and the current position of the
|
||||
* writer.
|
||||
*
|
||||
* Useful for tables with dynamic size.
|
||||
*
|
||||
* \param h Table header, must be at the very beginning of the table.
|
||||
* \param sig Signature as described in Table 5-29.
|
||||
* \param rev Revision of the table.
|
||||
*/
|
||||
void end_table(ACPI_TABLE_HEADER *h, char const *sig, l4_uint8_t rev)
|
||||
{
|
||||
write_header(h, sig, rev, _pos - as_offset(h));
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve an MADT subtable and write its header.
|
||||
*
|
||||
* \tparam T Type of the MADT subtable.
|
||||
* \param type MADT subtable type.
|
||||
*/
|
||||
template<typename T>
|
||||
T *reserve_madt_subtable(enum AcpiMadtType type)
|
||||
{
|
||||
T *subtable = reserve<T>();
|
||||
subtable->Header.Type = type;
|
||||
subtable->Header.Length = sizeof(T);
|
||||
return subtable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add fixup for table reference field.
|
||||
*
|
||||
* \tparam T Type of the table reference field.
|
||||
* \param ref Table reference field.
|
||||
* \param table Table that is referenced.
|
||||
*/
|
||||
template<typename T>
|
||||
void add_table_ref(T const *ref, Table table)
|
||||
{
|
||||
_table_refs.emplace_back(Table_ref{as_offset(ref), sizeof(T), table});
|
||||
}
|
||||
|
||||
/**
|
||||
* Add fixup for checksum field.
|
||||
*
|
||||
* \param checksum Checksum field.
|
||||
* \param base Pointer to start of memory area to checksum.
|
||||
* \param len Length of the memory area to checksum.
|
||||
*/
|
||||
void add_checksum(l4_uint8_t *checksum, void *base, unsigned len)
|
||||
{
|
||||
// Although we do not calculate the checksum here, ensure that the
|
||||
// checksum field is zeroed, which is required for checksum computation.
|
||||
*checksum = 0U;
|
||||
_checksums.emplace_back(Checksum{as_offset(checksum),
|
||||
as_offset(base), len});
|
||||
}
|
||||
|
||||
/**
|
||||
* Table reference placeholder.
|
||||
*/
|
||||
struct Table_ref
|
||||
{
|
||||
/// Offset of table reference field in write buffer.
|
||||
unsigned offset;
|
||||
/// Size of table reference field.
|
||||
unsigned size;
|
||||
/// Table that is referenced.
|
||||
Table table;
|
||||
};
|
||||
|
||||
/**
|
||||
* Checksum placeholder.
|
||||
*/
|
||||
struct Checksum
|
||||
{
|
||||
/// Offset of checksum field in write buffer.
|
||||
unsigned field_off;
|
||||
/// Offset of the memory area to checksum in write buffer.
|
||||
unsigned offset;
|
||||
/// Length of the memory area to checksum.
|
||||
unsigned len;
|
||||
};
|
||||
|
||||
/// Return table reference placeholders.
|
||||
std::vector<Table_ref> const &table_refs() const { return _table_refs; }
|
||||
/// Return checksum placeholders.
|
||||
std::vector<Checksum> const &checksums() const { return _checksums; }
|
||||
|
||||
/**
|
||||
* Return start offset of the given table.
|
||||
*/
|
||||
unsigned table_offset(Table table) const
|
||||
{ return _tables[static_cast<unsigned>(table)]; }
|
||||
|
||||
/**
|
||||
* Convert offset into virtual address.
|
||||
*/
|
||||
l4_addr_t as_addr(unsigned offset) const
|
||||
{
|
||||
assert(offset < _buf_size);
|
||||
return _buf_addr + offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert offset into pointer.
|
||||
*/
|
||||
template<typename T = void>
|
||||
T *as_ptr(unsigned offset) const
|
||||
{ return reinterpret_cast<T *>(as_addr(offset)); }
|
||||
|
||||
private:
|
||||
unsigned as_offset(void const *ptr) const
|
||||
{
|
||||
l4_addr_t addr = reinterpret_cast<l4_addr_t>(ptr);
|
||||
assert(addr >= _buf_addr);
|
||||
return addr - _buf_addr;
|
||||
}
|
||||
|
||||
l4_addr_t _buf_addr;
|
||||
unsigned _buf_size;
|
||||
unsigned _pos;
|
||||
std::array<unsigned, static_cast<unsigned>(Table::Num_values)> _tables;
|
||||
std::vector<Table_ref> _table_refs;
|
||||
std::vector<Checksum> _checksums;
|
||||
}; // class Writer
|
||||
|
||||
/**
|
||||
* Write a Root System Description Pointer (RSDP).
|
||||
*
|
||||
* Base ACPI structure as defined in section 5.2.5 of the ACPI Specification.
|
||||
* This class includes the ACPI 2.0+ extensions.
|
||||
*/
|
||||
static void write_rsdp(Writer &wr)
|
||||
{
|
||||
auto *t = wr.reserve<ACPI_TABLE_RSDP>(Rsdp_size);
|
||||
memcpy(t->Signature, ACPI_SIG_RSDP, sizeof(t->Signature));
|
||||
wr.add_checksum(&t->Checksum, t, Rsdp_v1_size);
|
||||
wr.write_identifier(t->OemId, "L4RE", ACPI_OEM_ID_SIZE);
|
||||
if (Vmm::Cpu_dev::get_max_vcpu_id() >= 0xff)
|
||||
t->Revision = 4; // needs Local X2APIC MADT entries: ACPI 4.0+
|
||||
else
|
||||
t->Revision = 2; // ACPI 2.0+
|
||||
wr.add_table_ref(&t->RsdtPhysicalAddress, Table::Rsdt);
|
||||
wr.add_table_ref(&t->XsdtPhysicalAddress, Table::Xsdt);
|
||||
t->Length = Rsdp_size;
|
||||
wr.add_checksum(&t->ExtendedChecksum, t, Rsdp_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes all implemented ACPI tables.
|
||||
*/
|
||||
static void write_all_tables(Writer &wr, Vdev::Device_lookup *devs)
|
||||
{
|
||||
write_rsdt(wr);
|
||||
write_xsdt(wr);
|
||||
write_fadt(wr);
|
||||
write_madt(wr, devs->cpus()->max_cpuid() + 1, devs->cpus(),
|
||||
Madt_int_override_storage::get()->overrides());
|
||||
write_mcfg(wr);
|
||||
write_facs(wr);
|
||||
write_dsdt(wr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute ACPI checksum for memory area.
|
||||
*
|
||||
* \param dest Base address of the memory area.
|
||||
* \param len Length of the memory area.
|
||||
*
|
||||
* \return Value so that the sum of all bytes in the memory area modulo 256
|
||||
* is zero.
|
||||
*/
|
||||
static l4_uint8_t compute_checksum(void *dest, unsigned len)
|
||||
{
|
||||
l4_uint8_t *bytes = reinterpret_cast<l4_uint8_t *>(dest);
|
||||
l4_uint8_t sum = 0;
|
||||
for (unsigned i = 0; i < len; i++)
|
||||
sum += bytes[i];
|
||||
|
||||
return -sum;
|
||||
}
|
||||
|
||||
private:
|
||||
/**
|
||||
* Write a Root System Description Table (RSDT) or an Extended System
|
||||
* Description Table (XSDT).
|
||||
*
|
||||
* Table holding pointers to other system description tables as defined in
|
||||
* sections 5.2.7 (RSDT) and 5.2.8 (XSDT) of the ACPI 3.0 Specification.
|
||||
*/
|
||||
template <typename TABLE>
|
||||
static void write_rsdt_xsdt(Writer &wr)
|
||||
{
|
||||
// Tables that RSDT / XSDT refers to.
|
||||
static constexpr std::array<Table, 3> ref_tables = {
|
||||
Table::Madt,
|
||||
Table::Fadt,
|
||||
Table::Mcfg,
|
||||
};
|
||||
|
||||
// RSDT/XSDT table header plus a 32/64-bit word per table pointer.
|
||||
constexpr auto size =
|
||||
Header_size + ref_tables.size() * sizeof(TABLE::TableOffsetEntry[0]);
|
||||
|
||||
constexpr Table table
|
||||
= (std::is_same<TABLE, ACPI_TABLE_RSDT>::value)
|
||||
? Table::Rsdt : Table::Xsdt;
|
||||
auto *t = wr.start_table<TABLE>(table, size);
|
||||
|
||||
// The acpi_table_{rsdt/xsdt} struct defines only one entry, but we simply
|
||||
// use the extra space allocated in the header. Do not forget to update
|
||||
// Num_table_refs when adding or removing a table reference here.
|
||||
for (l4_size_t i = 0; i < ref_tables.size(); i++)
|
||||
wr.add_table_ref(&t->TableOffsetEntry[i], ref_tables[i]);
|
||||
|
||||
constexpr char const *sig
|
||||
= (std::is_same<TABLE, ACPI_TABLE_RSDT>::value)
|
||||
? ACPI_SIG_RSDT : ACPI_SIG_XSDT;
|
||||
wr.end_table(&t->Header, sig, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a Root System Description Table (RSDT).
|
||||
*
|
||||
* Table holding pointers to other system description tables as defined in
|
||||
* section 5.2.7 of the ACPI 3.0 Specification.
|
||||
*/
|
||||
static void write_rsdt(Writer &wr)
|
||||
{
|
||||
write_rsdt_xsdt<ACPI_TABLE_RSDT>(wr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write an Extended System Description Table (XSDT).
|
||||
*
|
||||
* Table holding pointers to other system description tables as defined in
|
||||
* section 5.2.8 of the ACPI 3.0 Specification.
|
||||
*/
|
||||
static void write_xsdt(Writer &wr)
|
||||
{
|
||||
write_rsdt_xsdt<ACPI_TABLE_XSDT>(wr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a Fixed ACPI Description Table (FADT).
|
||||
*
|
||||
* Table providing fixed hardware information as defined in section 5.2.8 of
|
||||
* the ACPI Specification.
|
||||
*/
|
||||
static void write_fadt(Writer &wr)
|
||||
{
|
||||
auto *t = wr.start_table<ACPI_TABLE_FADT>(Table::Fadt);
|
||||
|
||||
// Switching on Hardware-Reduced ACPI has the positive effect of
|
||||
// eliminating a lot of legacy features we do not implement.
|
||||
// However, with that flag on Linux requires the DSDT to be properly set
|
||||
// up for finding PCI devices.
|
||||
// t->Flags = (1 << 20); // HW_REDUCED_ACPI
|
||||
|
||||
wr.add_table_ref(&t->Dsdt, Table::Dsdt);
|
||||
t->XDsdt = 0; // For now we don't implement the extended DSDT.
|
||||
wr.add_table_ref(&t->Facs, Table::Facs);
|
||||
t->XFacs = 0;
|
||||
|
||||
// How to pick the ID?
|
||||
t->HypervisorId = 0;
|
||||
|
||||
for (auto const &d : Acpi_device_hub::get()->devices())
|
||||
d->amend_fadt(t);
|
||||
|
||||
// Emulate ACPI 6.3.
|
||||
wr.end_table(&t->Header, ACPI_SIG_FADT, 6);
|
||||
t->MinorRevision = 3;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a Multiple APIC Description Table (MADT).
|
||||
*
|
||||
* The MADT lists Advanced Programmable Interrupt Controllers in the system
|
||||
* as defined in section 5.2.12 of the ACPI Specification.
|
||||
*
|
||||
* \param nr_cpus The number of enabled CPUs.
|
||||
* \param cpus Pointer to the CPU container.
|
||||
*/
|
||||
static void
|
||||
write_madt(Writer &wr, unsigned nr_cpus,
|
||||
cxx::Ref_ptr<Vmm::Cpu_dev_array> cpus,
|
||||
std::vector<Madt_int_override> const &madt_int_overrides)
|
||||
{
|
||||
auto *t = wr.start_table<ACPI_TABLE_MADT>(Table::Madt);
|
||||
|
||||
t->Address = Gic::Lapic_access_handler::Mmio_addr;
|
||||
// ACPI 6.3 Specification, Table 5-44:
|
||||
// not a PC-AT-compatible dual-8259 setup
|
||||
t->Flags = 0;
|
||||
|
||||
// I/O APIC Structure.
|
||||
// Provide information about the system's I/O APICs as defined in section
|
||||
// 5.2.12.3 of the ACPI Specification.
|
||||
auto *ioapic = wr.reserve_madt_subtable<ACPI_MADT_IO_APIC>(
|
||||
ACPI_MADT_TYPE_IO_APIC);
|
||||
ioapic->Reserved = 0;
|
||||
ioapic->Id = 0;
|
||||
ioapic->Address = Gic::Io_apic::Mmio_addr;
|
||||
ioapic->GlobalIrqBase = 0;
|
||||
|
||||
// Interrupt Override Structure.
|
||||
// Information about overriding ISA specified interrupt numbers with new
|
||||
// ones.
|
||||
for (auto const &over : madt_int_overrides)
|
||||
{
|
||||
auto *tbl = wr.reserve_madt_subtable<ACPI_MADT_INTERRUPT_OVERRIDE>(
|
||||
ACPI_MADT_TYPE_INTERRUPT_OVERRIDE);
|
||||
tbl->Bus = 0;
|
||||
tbl->SourceIrq = over.src_irq;
|
||||
tbl->GlobalIrq = over.gsi;
|
||||
tbl->IntiFlags = over.flags;
|
||||
}
|
||||
|
||||
// Processor Local APIC Structure.
|
||||
// Structure to be appended to the MADT base table for each local APIC.
|
||||
// Defined in section 5.2.12.2 of the ACPI Specification.
|
||||
for (unsigned i = 0; i < nr_cpus; ++i)
|
||||
{
|
||||
// ACPI spec 4.0 / 5.2.12.12: Processor Local x2APIC Structure: Logical
|
||||
// processors with APIC ID values less than 255 must use the Processor
|
||||
// Local APIC structure to convey their APIC information to OSPM.
|
||||
unsigned vcpu_id = cpus->vcpu(i).get_vcpu_id();
|
||||
if (vcpu_id < 0xff)
|
||||
{
|
||||
auto *lapic = wr.reserve_madt_subtable<ACPI_MADT_LOCAL_APIC>(
|
||||
ACPI_MADT_TYPE_LOCAL_APIC);
|
||||
lapic->ProcessorId = i;
|
||||
lapic->Id = vcpu_id;
|
||||
lapic->LapicFlags = 1; // Enable CPU.
|
||||
}
|
||||
}
|
||||
|
||||
// Processor Local X2APIC Structure.
|
||||
// Structure to be appended to the MADT base table for each local X2APIC.
|
||||
// Defined in section 5.2.12.12 of the ACPI 4.0 Specification.
|
||||
for (unsigned i = 0; i < nr_cpus; ++i)
|
||||
{
|
||||
unsigned vcpu_id = cpus->vcpu(i).get_vcpu_id();
|
||||
if (vcpu_id >= 0xff)
|
||||
{
|
||||
auto *lx2apic = wr.reserve_madt_subtable<ACPI_MADT_LOCAL_X2APIC>(
|
||||
ACPI_MADT_TYPE_LOCAL_X2APIC);
|
||||
lx2apic->LocalApicId = vcpu_id;
|
||||
lx2apic->LapicFlags = 1; // Enable CPU.
|
||||
lx2apic->Uid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Finally fill the table header.
|
||||
wr.end_table(&t->Header, ACPI_SIG_MADT, 5);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write PCI Express memory mapped configuration space base address
|
||||
* Description Table (MCFG).
|
||||
*/
|
||||
static void write_mcfg(Writer &wr)
|
||||
{
|
||||
auto *t = wr.start_table<ACPI_TABLE_MCFG>(Table::Mcfg);
|
||||
|
||||
for (auto const &d : Acpi_device_hub::get()->devices())
|
||||
{
|
||||
auto *ptr = wr.as_ptr<ACPI_MCFG_ALLOCATION>(wr.pos());
|
||||
auto amend_size = d->amend_mcfg(ptr, wr.remaining_size());
|
||||
wr.reserve(amend_size);
|
||||
}
|
||||
|
||||
wr.end_table(&t->Header, ACPI_SIG_MCFG, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a Firmware ACPI Control Structure (FACS).
|
||||
*/
|
||||
static void write_facs(Writer &wr)
|
||||
{
|
||||
auto *t = wr.start_table<ACPI_TABLE_FACS>(Table::Facs, Facs_size, 64);
|
||||
memcpy(t->Signature, ACPI_SIG_FACS, ACPI_NAMESEG_SIZE);
|
||||
t->Length = Facs_size;
|
||||
t->Version = 2;
|
||||
// other fields written by OSPM or should be zero.
|
||||
}
|
||||
|
||||
/**
|
||||
* Write Differentiated System Description Table (DSDT).
|
||||
*/
|
||||
static void write_dsdt(Writer &wr)
|
||||
{
|
||||
auto *t = wr.start_table<ACPI_TABLE_HEADER>(Table::Dsdt);
|
||||
|
||||
// Collect the highest priority DSDT fragments of ACPI devices.
|
||||
for (auto const &d : Acpi_device_hub::get()->devices())
|
||||
{
|
||||
void *ptr = wr.as_ptr(wr.pos());
|
||||
auto amend_size = d->amend_dsdt(ptr, wr.remaining_size());
|
||||
wr.reserve(amend_size);
|
||||
}
|
||||
|
||||
// Collect the lowest priority DSDT fragments of ACPI devices.
|
||||
for (auto const &d : Acpi_device_hub::get()->devices())
|
||||
{
|
||||
void *ptr = wr.as_ptr(wr.pos());
|
||||
auto amend_size = d->amend_dsdt_late(ptr, wr.remaining_size());
|
||||
wr.reserve(amend_size);
|
||||
}
|
||||
|
||||
// The revision of DSDT controls the integer width of AML code/interpreter.
|
||||
// Values less than two imply 32-bit integers and math, otherwise 64-bit
|
||||
// (see also ComplianceRevision in AML DefinitionBlock)
|
||||
wr.end_table(t, ACPI_SIG_DSDT, 1);
|
||||
}
|
||||
};
|
||||
|
||||
class Bios_tables : public Tables
|
||||
{
|
||||
enum : l4_uint32_t
|
||||
{
|
||||
/**
|
||||
* Physical location of the RSDP according to section 5.2.5.1 of the ACPI
|
||||
* Specification.
|
||||
*/
|
||||
Phys_start_addr = 0x0E0000
|
||||
};
|
||||
|
||||
public:
|
||||
/**
|
||||
* ACPI control structure.
|
||||
*
|
||||
* \param ram Guest RAM.
|
||||
*/
|
||||
Bios_tables(Vdev::Device_lookup *devs)
|
||||
: _devs(devs)
|
||||
{
|
||||
info.printf("Initialize legacy BIOS ACPI tables.\n");
|
||||
_dest_addr = _devs->ram()->guest2host<l4_addr_t>(Vmm::Guest_addr(Phys_start_addr));
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate positions for each table and write them in place.
|
||||
*/
|
||||
void write_to_guest()
|
||||
{
|
||||
// we allow the rsdp and all tables to take up one page
|
||||
l4_size_t max_size = L4_PAGESIZE;
|
||||
|
||||
auto acpi_mem = Vmm::Region::ss(Vmm::Guest_addr(Phys_start_addr), max_size,
|
||||
Vmm::Region_type::Ram);
|
||||
// Throws an exception if the ACPI memory region isn't within guest RAM.
|
||||
_devs->ram()->guest2host<l4_addr_t>(acpi_mem);
|
||||
|
||||
// Clear memory because we do not rely on the DS provider to do this for
|
||||
// us, and we must not have spurious values in ACPI tables.
|
||||
memset(reinterpret_cast<void *>(_dest_addr), 0, max_size);
|
||||
|
||||
Writer wr(_dest_addr, max_size);
|
||||
write_rsdp(wr);
|
||||
write_all_tables(wr, _devs);
|
||||
resolve_table_refs_and_checksums(wr);
|
||||
|
||||
l4_addr_t facs_off = wr.table_offset(Tables::Table::Facs);
|
||||
Facs_storage::get()->set_addr(wr.as_ptr<ACPI_TABLE_FACS>(facs_off));
|
||||
Facs_storage::get()->set_gaddr(
|
||||
acpi_phys_addr<l4_uint32_t>(wr.as_addr(facs_off)));
|
||||
}
|
||||
|
||||
private:
|
||||
void resolve_table_refs_and_checksums(Writer &wr)
|
||||
{
|
||||
for (Writer::Table_ref const &ref : wr.table_refs())
|
||||
{
|
||||
l4_addr_t table_addr = wr.as_addr(wr.table_offset(ref.table));
|
||||
if (ref.size == sizeof(l4_uint32_t))
|
||||
*wr.as_ptr<l4_uint32_t>(ref.offset) =
|
||||
acpi_phys_addr<l4_uint32_t>(table_addr);
|
||||
else if (ref.size == sizeof(l4_uint64_t)) // XSDT
|
||||
*wr.as_ptr<l4_uint64_t>(ref.offset) =
|
||||
acpi_phys_addr<l4_uint64_t>(table_addr);
|
||||
else
|
||||
L4Re::throw_error(-L4_EINVAL, "Unsupported table offset size.");
|
||||
}
|
||||
|
||||
for (Writer::Checksum const &checksum : wr.checksums())
|
||||
{
|
||||
l4_uint8_t *field = wr.as_ptr<l4_uint8_t>(checksum.field_off);
|
||||
// Calculate and write checksum.
|
||||
*field = compute_checksum(wr.as_ptr(checksum.offset), checksum.len);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute guest-physical address of target table.
|
||||
*
|
||||
* \param virt_target_addr Virtual address of the target table.
|
||||
*
|
||||
* \return 32-bit guest-physical address of the target table.
|
||||
*/
|
||||
template <typename T>
|
||||
T acpi_phys_addr(l4_addr_t virt_target_addr) const
|
||||
{
|
||||
return Phys_start_addr + static_cast<T>(virt_target_addr - _dest_addr);
|
||||
}
|
||||
|
||||
Vdev::Device_lookup *_devs;
|
||||
l4_addr_t _dest_addr;
|
||||
};
|
||||
|
||||
|
||||
} // namespace Acpi
|
||||
@@ -1,555 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2024 Kernkonzept GmbH.
|
||||
* Author(s): Steffen Liebergeld <steffen.liebergeld@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#include <l4/sys/vcon>
|
||||
|
||||
#include "acpi.h"
|
||||
#include "device_factory.h"
|
||||
#include "irq_dt.h"
|
||||
#include "monitor/virtio_input_power_cmd_handler.h"
|
||||
#include "vbus_event.h"
|
||||
|
||||
#include <l4/re/event_enums.h>
|
||||
#include <l4/vbus/vbus>
|
||||
#include <l4/vbus/vbus_inhibitor.h>
|
||||
|
||||
namespace Acpi {
|
||||
|
||||
/**
|
||||
* \file
|
||||
* Acpi platform support
|
||||
*
|
||||
* This implements minimal Acpi command support. Enough that Linux believes
|
||||
* that Acpi works and that it uses Acpi shutdown.
|
||||
*
|
||||
* This requires a device tree entry like this.
|
||||
*
|
||||
* \code{.dtb}
|
||||
* acpi_platform {
|
||||
* compatible = "virt-acpi";
|
||||
* interrupt-parent = <&PIC>;
|
||||
* interrupts = <9>;
|
||||
* // Optional: Connect vcon to trigger ACPI power events.
|
||||
* // l4vmm,pwrinput = "acpi_pwr_input";
|
||||
* };
|
||||
* \endcode
|
||||
*
|
||||
* You may configure a different interrupt number for the system control
|
||||
* interrupt (SCI), but make sure it does not collide.
|
||||
*
|
||||
* The interrupt parent is mandatory. The SCI is currently only used during
|
||||
* Acpi probing.
|
||||
*/
|
||||
|
||||
template <typename DEV>
|
||||
class Vcon_pwr_input
|
||||
: public L4::Irqep_t<Vcon_pwr_input<DEV> >,
|
||||
public Monitor::Virtio_input_power_cmd_handler<Monitor::Enabled,
|
||||
Vcon_pwr_input<DEV>>
|
||||
{
|
||||
friend Monitor::Virtio_input_power_cmd_handler<Monitor::Enabled,
|
||||
Vcon_pwr_input<DEV>>;
|
||||
|
||||
public:
|
||||
Vcon_pwr_input(L4::Cap<L4::Vcon> con)
|
||||
: _con(con) {}
|
||||
|
||||
~Vcon_pwr_input()
|
||||
{
|
||||
if (_con_irq.is_valid())
|
||||
if (long err = l4_error(_con->unbind(0, _con_irq)) < 0)
|
||||
warn().printf("Unbind notification IRQ from Vcon: %s\n.",
|
||||
l4sys_errtostr(err));
|
||||
}
|
||||
|
||||
void register_obj(L4::Registry_iface *registry)
|
||||
{
|
||||
_con_irq = L4Re::chkcap(registry->register_irq_obj(this),
|
||||
"Register IRQ of Vcon-pwr-input device.");
|
||||
L4Re::chksys(_con->bind(0, _con_irq),
|
||||
"Binding Vcon notification irq failed.\n");
|
||||
}
|
||||
|
||||
void handle_irq();
|
||||
|
||||
bool inject_command(char cmd);
|
||||
|
||||
private:
|
||||
static Dbg warn() { return Dbg(Dbg::Dev, Dbg::Warn, "pwr-input"); }
|
||||
static Dbg trace() { return Dbg(Dbg::Dev, Dbg::Trace, "pwr-input"); }
|
||||
|
||||
DEV *dev() { return static_cast<DEV *>(this); }
|
||||
L4::Cap<L4::Vcon> _con;
|
||||
L4::Cap<L4::Irq> _con_irq;
|
||||
};
|
||||
|
||||
template<typename DEV>
|
||||
void
|
||||
Vcon_pwr_input<DEV>::handle_irq()
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
int r = _con->read(NULL, 0);
|
||||
|
||||
if (r <= 0)
|
||||
break; // empty
|
||||
|
||||
char cmd;
|
||||
r = _con->read(&cmd, sizeof(cmd));
|
||||
|
||||
if (r < 0)
|
||||
{
|
||||
Err().printf("Vcon_pwr_input: read error: %d\n", r);
|
||||
break;
|
||||
}
|
||||
|
||||
inject_command(cmd);
|
||||
trace().printf("Vcon_pwr_input::handle_irq OK\n");
|
||||
_con->write("OK\n", 3);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename DEV>
|
||||
bool
|
||||
Vcon_pwr_input<DEV>::inject_command(char cmd)
|
||||
{
|
||||
bool ret = false;
|
||||
trace().printf("cmd=%c\n", cmd);
|
||||
|
||||
switch(cmd)
|
||||
{
|
||||
case 'a':
|
||||
case 's':
|
||||
case 'l':
|
||||
ret = dev()->inject_slpbtn();
|
||||
break;
|
||||
case 'p':
|
||||
case 'q':
|
||||
ret = dev()->inject_pwrbtn();
|
||||
break;
|
||||
case 'h':
|
||||
{
|
||||
char response[] = "a: apm suspend\ns: suspend\nl: sleep\np: power\n"
|
||||
"q: power2\n";
|
||||
_con->write(response, sizeof(response) - 1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
warn().printf("Unknown character '%c'\n", cmd);
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
class Acpi_platform:
|
||||
public Vmm::Io_device,
|
||||
public Vdev::Device,
|
||||
public Acpi_device,
|
||||
public Vcon_pwr_input<Acpi_platform>,
|
||||
public Vbus_stream_id_handler
|
||||
{
|
||||
private:
|
||||
enum Command_values : l4_uint16_t
|
||||
{
|
||||
Acpi_enable = 0xf2,
|
||||
Acpi_disable = 0xf1,
|
||||
Acpi_shutdown = 0x7,
|
||||
Acpi_suspend = 0x6,
|
||||
Reboot = 0x4,
|
||||
};
|
||||
|
||||
public:
|
||||
enum Ports : l4_uint16_t
|
||||
{
|
||||
Ports_start = 0x1800,
|
||||
Smi_command = Ports_start,
|
||||
Pm1a_cmd_block = Smi_command + 1, // 0x1801
|
||||
Pm1a_cmd_length = 2, // 2 ports
|
||||
Pm2_cmd_block = Pm1a_cmd_block + Pm1a_cmd_length, // 0x1803
|
||||
Pm2_cmd_length = 1, // 1 port
|
||||
Pm1a_event_block= Pm2_cmd_block + Pm2_cmd_length, // 0x1804
|
||||
Pm1a_sts = Pm1a_event_block,
|
||||
Pm1a_en = Pm1a_event_block + 2,
|
||||
Pm1_event_length= 4,
|
||||
Reset_register = Pm1a_event_block + Pm1_event_length, // 0x1808
|
||||
Ports_last = Reset_register, // inclusive end
|
||||
};
|
||||
|
||||
enum Events : l4_uint32_t
|
||||
{
|
||||
Pm1a_evt_gbl = 1U << 5,
|
||||
Pm1a_evt_pwrbtn = 1U << 8,
|
||||
Pm1a_evt_slpbtn = 1U << 9,
|
||||
Pm1a_evt_rtc = 1U << 10,
|
||||
|
||||
// PM1 events we implement.
|
||||
Pm1a_evt_supported = Pm1a_evt_gbl | Pm1a_evt_pwrbtn | Pm1a_evt_slpbtn
|
||||
| Pm1a_evt_rtc,
|
||||
};
|
||||
|
||||
Acpi_platform(Vdev::Device_lookup *devs, cxx::Ref_ptr<Gic::Ic> const &ic, int irq,
|
||||
L4::Cap<L4::Vcon> pwr_vcon)
|
||||
: Acpi_device(), Vcon_pwr_input<Acpi_platform>(pwr_vcon),
|
||||
_vmm(devs->vmm()),
|
||||
_sci(ic, irq),
|
||||
_irq(irq),
|
||||
_acpi_enabled(false),
|
||||
_pm1a_sts(0),
|
||||
_pm1a_en(0)
|
||||
{
|
||||
if (!devs->vbus()->available())
|
||||
return;
|
||||
|
||||
auto vbus = devs->vbus()->bus();
|
||||
info().printf("Registering as event handler for vbus->root() = %lx\n",
|
||||
vbus->root().dev_handle());
|
||||
Vbus_event::register_stream_id_handler(vbus->root().dev_handle(), this);
|
||||
}
|
||||
|
||||
char const *dev_name() const override
|
||||
{ return "ACPI platform"; }
|
||||
|
||||
void amend_fadt(ACPI_TABLE_FADT *t) const override
|
||||
{
|
||||
t->SmiCommand = Ports::Smi_command; // 32-bit port address of SMI command port
|
||||
t->SciInterrupt = _irq;
|
||||
t->AcpiEnable = Command_values::Acpi_enable;
|
||||
t->AcpiDisable = Command_values::Acpi_disable;
|
||||
|
||||
// 32-bit port address of Power Mgt 1a Control Reg Block
|
||||
t->Pm1aControlBlock = Ports::Pm1a_cmd_block;
|
||||
// size of block
|
||||
t->Pm1ControlLength = Ports::Pm1a_cmd_length;
|
||||
|
||||
// 32-bit port address of Power Mgt 2 Control Reg Block
|
||||
t->Pm2ControlBlock = Ports::Pm2_cmd_block;
|
||||
// size of block
|
||||
t->Pm2ControlLength = Ports::Pm2_cmd_length;
|
||||
|
||||
t->Pm1aEventBlock = Ports::Pm1a_event_block;
|
||||
t->Pm1EventLength = Ports::Pm1_event_length;
|
||||
|
||||
// Indicate the presence of an i8042 keyboard controller.
|
||||
if (_vmm->i8042_present())
|
||||
t->BootFlags |= ACPI_FADT_8042;
|
||||
|
||||
// set the reset register for ACPI reboot
|
||||
t->Flags |= ACPI_FADT_RESET_REGISTER;
|
||||
t->ResetRegister.Address = Ports::Reset_register;
|
||||
t->ResetRegister.SpaceId = ACPI_ADR_SPACE_SYSTEM_IO;
|
||||
t->ResetRegister.BitWidth = ACPI_RESET_REGISTER_WIDTH;
|
||||
t->ResetValue = Command_values::Reboot;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write an ACPI control object to the DSDT table that allows the guest to
|
||||
* discover shutdown capability.
|
||||
*
|
||||
* This is described in section 7.4.2 of the ACPI specification.
|
||||
*
|
||||
* \param buf The memory are where to put the object.
|
||||
* \param max_size Maximum available size of the designated memory area.
|
||||
*/
|
||||
l4_size_t amend_dsdt(void *buf, l4_size_t max_size) const override
|
||||
{
|
||||
// _S3 == suspend to ram
|
||||
// _S5 == shutdown
|
||||
unsigned char dsdt_S3S5 [] =
|
||||
{
|
||||
0x08, 0x5F, 0x53, '3', 0x5F, 0x12, 0x08, 0x04,
|
||||
0x0A, Command_values::Acpi_suspend,
|
||||
0x0A, Command_values::Acpi_suspend,
|
||||
0x00, 0x00,
|
||||
0x08, 0x5F, 0x53, '5', 0x5F, 0x12, 0x08, 0x04,
|
||||
0x0A, Command_values::Acpi_shutdown,
|
||||
0x0A, Command_values::Acpi_shutdown,
|
||||
0x00, 0x00,
|
||||
};
|
||||
|
||||
l4_size_t size = sizeof(dsdt_S3S5);
|
||||
if (max_size < size)
|
||||
L4Re::throw_error(-L4_ENOMEM,
|
||||
"Not enough space in DSDT");
|
||||
memcpy(buf, reinterpret_cast<void*>(dsdt_S3S5), size);
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle pm1a enable register.
|
||||
*
|
||||
* This handles a subset of the PM1A enable register as described in section
|
||||
* 4.8.3.1 of the ACPI specification. We support GBL_EN, PRWBTN_EN,
|
||||
* SLPBTN_EN and the RTC_EN bits. If both the corresponding status and the
|
||||
* enable bit is set, we inject an SCI.
|
||||
*/
|
||||
void handle_pm1a_en()
|
||||
{
|
||||
if (!_acpi_enabled)
|
||||
return;
|
||||
|
||||
// if sts and en bits are set we issue an SCI
|
||||
if (_pm1a_sts & _pm1a_en & Pm1a_evt_supported)
|
||||
{
|
||||
trace().printf("Injecting SCI\n");
|
||||
_sci.inject();
|
||||
}
|
||||
|
||||
trace().printf("_pm1a_sts = 0x%x _pm1a_en = 0x%x\n", _pm1a_sts, _pm1a_en);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a subset of the pm1a control register.
|
||||
*
|
||||
* This function handles the PM1A control register as described in section
|
||||
* 4.8.3.2 of the ACPI specification. We only handle the SLP_EN and SLP_TYPx
|
||||
* bits.
|
||||
*
|
||||
* \param value The value written to the register.
|
||||
*/
|
||||
void handle_pm1a_control(l4_uint32_t value)
|
||||
{
|
||||
enum
|
||||
{
|
||||
Slp_enable = 1 << 13,
|
||||
Slp_type_shutdown = Acpi_shutdown << 10,
|
||||
Slp_type_suspend = Acpi_suspend << 10,
|
||||
Slp_type_mask = 0x7 << 10,
|
||||
};
|
||||
static_assert((Slp_type_shutdown & Slp_type_mask) == Slp_type_shutdown,
|
||||
"ACPI platform: Sleep type shutdown within field bounds");
|
||||
static_assert((Slp_type_suspend & Slp_type_mask) == Slp_type_suspend,
|
||||
"ACPI platform: Sleep type suspend within field bounds");
|
||||
|
||||
if (value & Slp_enable)
|
||||
{
|
||||
if ((value & Slp_type_mask) == Slp_type_shutdown)
|
||||
{
|
||||
trace().printf("Guest requested power off. Bye\n");
|
||||
_vmm->shutdown(Vmm::Guest::Shutdown);
|
||||
}
|
||||
else if ((value & Slp_type_mask) == Slp_type_suspend)
|
||||
{
|
||||
trace().printf("System suspend requested\n");
|
||||
// If Uvmm loaded a guest Linux kernel itself, it emulates
|
||||
// firmware behaviour by resuming the guest directly at the
|
||||
// address the guest specified in the FACS.
|
||||
// Otherwise the VM resumes at the reset vector where firmware
|
||||
// shall take care of guest resume.
|
||||
if (_vmm->guest_type() == Boot::Binary_type::Linux)
|
||||
_vmm->suspend(Facs_storage::get()->waking_vector());
|
||||
else
|
||||
_vmm->suspend(0xffff'fff0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle IO port reads to the device.
|
||||
*
|
||||
* \param port IO port
|
||||
* \param[out] value The value read from the IO port.
|
||||
*/
|
||||
void io_in(unsigned port, Vmm::Mem_access::Width /*width*/,
|
||||
l4_uint32_t *value) override
|
||||
{
|
||||
port += Smi_command;
|
||||
*value = -1U;
|
||||
switch (port)
|
||||
{
|
||||
case Smi_command:
|
||||
*value = 0;
|
||||
break;
|
||||
case Pm1a_cmd_block:
|
||||
if (_acpi_enabled)
|
||||
*value = 1; // SMI_EN == 1
|
||||
else
|
||||
*value = 0;
|
||||
break;
|
||||
case Pm1a_sts:
|
||||
trace().printf("read _pm1a_sts = 0x%x\n", _pm1a_sts);
|
||||
*value = _pm1a_sts;
|
||||
break;
|
||||
case Pm1a_en:
|
||||
trace().printf("read _pm1a_en = 0x%x\n", _pm1a_en);
|
||||
*value = _pm1a_en;
|
||||
break;
|
||||
default:
|
||||
trace().printf("IO IN port=%x value=%x\n", port, *value);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle IO port writes to device IO ports.
|
||||
*
|
||||
* \param port IO Port
|
||||
* \param value The value written to the port.
|
||||
*/
|
||||
void io_out(unsigned port, Vmm::Mem_access::Width /*width*/,
|
||||
l4_uint32_t value) override
|
||||
{
|
||||
port += Smi_command;
|
||||
switch (port)
|
||||
{
|
||||
case Smi_command:
|
||||
if (value == Acpi_enable)
|
||||
{
|
||||
trace().printf("Acpi enabled\n");
|
||||
_acpi_enabled = true;
|
||||
}
|
||||
else if (value == Acpi_disable)
|
||||
{
|
||||
trace().printf("Acpi disabled\n");
|
||||
_acpi_enabled = false;
|
||||
}
|
||||
break;
|
||||
case Pm1a_cmd_block:
|
||||
handle_pm1a_control(value);
|
||||
break;
|
||||
case Pm1a_sts:
|
||||
trace().printf("write _pm1a_sts = 0x%x\n", value);
|
||||
_pm1a_sts &= ~(value & Pm1a_evt_supported);
|
||||
if ((_pm1a_sts & _pm1a_en) == 0U)
|
||||
{
|
||||
trace().printf("SCI ack\n");
|
||||
_sci.ack();
|
||||
}
|
||||
break;
|
||||
case Pm1a_en:
|
||||
trace().printf("write _pm1a_en = 0x%x\n", value);
|
||||
_pm1a_en = value;
|
||||
handle_pm1a_en();
|
||||
break;
|
||||
case Reset_register:
|
||||
if (value == Command_values::Reboot)
|
||||
{
|
||||
trace().printf("Reboot requested. Bye\n");
|
||||
_vmm->shutdown(Vmm::Guest::Reboot);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
trace().printf("IO OUT port=%x value=%x\n", port, value);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool inject_slpbtn()
|
||||
{
|
||||
if (!_acpi_enabled || !(_pm1a_en & Pm1a_evt_slpbtn))
|
||||
return false;
|
||||
|
||||
_pm1a_sts |= Pm1a_evt_slpbtn;
|
||||
_sci.inject();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool inject_pwrbtn()
|
||||
{
|
||||
if (!_acpi_enabled || !(_pm1a_en & Pm1a_evt_pwrbtn))
|
||||
return false;
|
||||
|
||||
_pm1a_sts |= Pm1a_evt_pwrbtn;
|
||||
_sci.inject();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void handle_event(L4Re::Event_buffer::Event *e) override
|
||||
{
|
||||
// Here we handle inhibitor signals.
|
||||
//
|
||||
// Iff Uvmm has a vbus, it will grab inhibitor locks for suspend and
|
||||
// shutdown. The rationale is that IO is only allowed to shutdown and/or
|
||||
// suspend the system once all inhibitor locks are free. To that end, IO
|
||||
// will send out inhibitor signals to its vbus clients. The clients shall
|
||||
// suspend/shutdown their devices and free the inhibitor lock.
|
||||
//
|
||||
// Management of the locks itself is done in pm.{cc,h}
|
||||
|
||||
if (e->payload.type != L4RE_EV_PM)
|
||||
{
|
||||
warn().printf("Unexpected event type (0x%x). Ignoring.\n", e->payload.type);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (e->payload.code)
|
||||
{
|
||||
case L4VBUS_INHIBITOR_SUSPEND:
|
||||
info().printf("SUSPEND signal\n");
|
||||
inject_slpbtn();
|
||||
break;
|
||||
case L4VBUS_INHIBITOR_SHUTDOWN:
|
||||
info().printf("SHUTDOWN signal\n");
|
||||
inject_pwrbtn();
|
||||
break;
|
||||
case L4VBUS_INHIBITOR_WAKEUP:
|
||||
// The IPC for this signal will have woken Uvmm up. Nothing to do
|
||||
// here.
|
||||
break;
|
||||
default:
|
||||
warn().printf("Unknown PM event: code 0x%x.\n", e->payload.code);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static Dbg trace() { return Dbg(Dbg::Dev, Dbg::Trace, "Acpi_platform"); }
|
||||
static Dbg warn() { return Dbg(Dbg::Dev, Dbg::Warn, "Acpi_platform"); }
|
||||
static Dbg info() { return Dbg(Dbg::Dev, Dbg::Info, "Acpi_platform"); }
|
||||
|
||||
Vmm::Guest *_vmm;
|
||||
Vmm::Irq_sink _sci;
|
||||
unsigned const _irq;
|
||||
bool _acpi_enabled;
|
||||
l4_uint32_t _pm1a_sts, _pm1a_en;
|
||||
};
|
||||
|
||||
} // namespace Acpi
|
||||
|
||||
/***********************************************************************/
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct F : Vdev::Factory
|
||||
{
|
||||
cxx::Ref_ptr<Vdev::Device> create(Vdev::Device_lookup *devs,
|
||||
Vdev::Dt_node const &node) override
|
||||
{
|
||||
|
||||
Vdev::Irq_dt_iterator it(devs, node);
|
||||
|
||||
if (it.next(devs) < 0)
|
||||
return nullptr;
|
||||
|
||||
if (!it.ic_is_virt())
|
||||
L4Re::throw_error(-L4_EINVAL, "Acpi_platform requires a virtual "
|
||||
"interrupt controller");
|
||||
|
||||
auto pwr_vcon = Vdev::get_cap<L4::Vcon>(node, "l4vmm,pwrinput");
|
||||
auto dev = Vdev::make_device<Acpi::Acpi_platform>(devs, it.ic(),
|
||||
it.irq(), pwr_vcon);
|
||||
if (pwr_vcon)
|
||||
dev->register_obj(devs->vmm()->registry());
|
||||
|
||||
Dbg().printf("Creating Acpi_platform\n");
|
||||
|
||||
auto *vmm = devs->vmm();
|
||||
auto start = Acpi::Acpi_platform::Ports::Ports_start;
|
||||
auto end = Acpi::Acpi_platform::Ports::Ports_last;
|
||||
vmm->add_io_device(Vmm::Io_region(start, end, Vmm::Region_type::Virtual),
|
||||
dev);
|
||||
return dev;
|
||||
}
|
||||
}; // struct F
|
||||
|
||||
static F f;
|
||||
static Vdev::Device_type t = {"virt-acpi", nullptr, &f};
|
||||
|
||||
}
|
||||
@@ -1,106 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Kernkonzept GmbH.
|
||||
* Author(s): Steffen Liebergeld <steffen.liebergeld@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
/**
|
||||
* The ACPI PM TIMER is documented in the ACPI Manual in Chapter 4.8.3.3
|
||||
* "Power Management Timer (PM_TMR)".
|
||||
*
|
||||
* Its IO port is 0xb008 by default.
|
||||
* "This is a 24-bit counter that runs off a 3.579545-MHz clock and counts
|
||||
* while in the S0 working system state."
|
||||
*
|
||||
* The client has to cope with wrap arounds.
|
||||
*
|
||||
* This can be used in linux with cmdline "clocksource=acpi_pm".
|
||||
*
|
||||
* We do not support interrupt generation.
|
||||
*/
|
||||
|
||||
|
||||
#include "device_factory.h"
|
||||
#include "guest.h"
|
||||
#include "device.h"
|
||||
#include "acpi.h"
|
||||
#include "io_device.h"
|
||||
#include <l4/re/env.h>
|
||||
#include <l4/util/rdtsc.h>
|
||||
|
||||
namespace Vdev {
|
||||
|
||||
class Acpi_timer:
|
||||
public Vmm::Io_device,
|
||||
public Vdev::Device,
|
||||
public Acpi::Acpi_device
|
||||
{
|
||||
public:
|
||||
enum
|
||||
{
|
||||
Frequency_hz = 3579545,
|
||||
Port = 0xb008,
|
||||
};
|
||||
|
||||
Acpi_timer()
|
||||
: Acpi_device()
|
||||
{
|
||||
_timebase = l4_rdtsc();
|
||||
}
|
||||
|
||||
char const *dev_name() const override
|
||||
{ return "ACPI Timer"; }
|
||||
|
||||
void amend_fadt(ACPI_TABLE_FADT *t) const override
|
||||
{
|
||||
t->PmTimerBlock = Port;
|
||||
t->PmTimerLength = 4;
|
||||
t->Flags |= ACPI_FADT_32BIT_TIMER;
|
||||
}
|
||||
|
||||
private:
|
||||
/* IO write from the guest to device */
|
||||
void io_out(unsigned, Vmm::Mem_access::Width, l4_uint32_t) override
|
||||
{
|
||||
// this is a read only field, so we can ignore that.
|
||||
return;
|
||||
}
|
||||
|
||||
/* IO read from the guest */
|
||||
void io_in(unsigned, Vmm::Mem_access::Width, l4_uint32_t *value) override
|
||||
{
|
||||
l4_cpu_time_t now = l4_rdtsc();
|
||||
l4_cpu_time_t diff_ns = l4_tsc_to_ns(now - _timebase);
|
||||
l4_cpu_time_t period = 1000UL * 1000 * 1000 / Frequency_hz;
|
||||
*value = diff_ns / period;
|
||||
}
|
||||
|
||||
l4_cpu_time_t _timebase = 0;
|
||||
};
|
||||
|
||||
} // namespace Vdev
|
||||
|
||||
namespace {
|
||||
|
||||
struct F : Vdev::Factory
|
||||
{
|
||||
cxx::Ref_ptr<Vdev::Device> create(Vdev::Device_lookup *devs,
|
||||
Vdev::Dt_node const &) override
|
||||
{
|
||||
auto dev = Vdev::make_device<Vdev::Acpi_timer>();
|
||||
|
||||
Acpi::info.printf("Acpi timer @ 0x%x\n", Vdev::Acpi_timer::Port);
|
||||
auto region = Vmm::Io_region(Vdev::Acpi_timer::Port,
|
||||
Vdev::Acpi_timer::Port,
|
||||
Vmm::Region_type::Virtual);
|
||||
devs->vmm()->add_io_device(region, dev);
|
||||
|
||||
return dev;
|
||||
}
|
||||
}; // struct F
|
||||
|
||||
static F f;
|
||||
static Vdev::Device_type t = {"acpi-timer", nullptr, &f};
|
||||
|
||||
} // namespace
|
||||
@@ -1,62 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2022, 2024 Kernkonzept GmbH.
|
||||
* Author(s): Christian Pötzsch <christian.potzsch@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#include "binary_loader_linux.h"
|
||||
#include "guest.h"
|
||||
|
||||
namespace Boot {
|
||||
|
||||
enum : unsigned
|
||||
{
|
||||
Linux_kernel_start_addr = 0x100000,
|
||||
};
|
||||
|
||||
int Linux_loader::load(char const * /*bin*/, std::shared_ptr<Binary_ds> image,
|
||||
Vmm::Vm_ram *ram, Vmm::Ram_free_list *free_list,
|
||||
l4_addr_t *entry)
|
||||
{
|
||||
trace().printf("Checking for Linux image...\n");
|
||||
|
||||
if (!image->is_valid())
|
||||
return -L4_EINVAL;
|
||||
|
||||
unsigned char const *h = static_cast<unsigned char const *>(image->get_data());
|
||||
if (!(h[0x1fe] == 0x55 && h[0x1ff] == 0xaa))
|
||||
return -L4_EINVAL;
|
||||
|
||||
info().printf("Linux kernel detected\n");
|
||||
|
||||
_64bit = true;
|
||||
|
||||
l4_uint8_t num_setup_sects = *(h + Vmm::Bp_setup_sects);
|
||||
trace().printf("number of setup sections found: 0x%x\n", num_setup_sects);
|
||||
|
||||
// 512 is the size of a segment
|
||||
l4_addr_t setup_sects_size = (num_setup_sects + 1) * 512;
|
||||
|
||||
if (Linux_kernel_start_addr < setup_sects_size)
|
||||
L4Re::chksys(-L4_EINVAL,
|
||||
"Supplied kernel image contains an invalid number "
|
||||
" of setup sections (zeropage).");
|
||||
|
||||
l4_addr_t start = Linux_kernel_start_addr - setup_sects_size;
|
||||
trace().printf("size of setup sections: 0x%lx\n", setup_sects_size);
|
||||
trace().printf("loading binary at: 0x%lx\n", start);
|
||||
|
||||
// load the binary starting after the boot_params
|
||||
*entry = image->load_as_raw(ram, ram->boot2guest_phys(start), free_list);
|
||||
trace().printf("Loaded kernel image as raw to 0x%lx\n", *entry);
|
||||
trace().printf("load kernel as raw entry to 0x%lx\n",
|
||||
ram->guest_phys2boot(
|
||||
Vmm::Guest_addr(Linux_kernel_start_addr)));
|
||||
|
||||
return L4_EOK;
|
||||
}
|
||||
|
||||
static Linux_loader f __attribute__((init_priority(Boot::Linux)));
|
||||
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023-2024 genua GmbH, 85551 Kirchheim, Germany
|
||||
* All rights reserved. Alle Rechte vorbehalten.
|
||||
*/
|
||||
/*
|
||||
* Copyright (C) 2025 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#include "binary_loader_openbsd.h"
|
||||
#include "guest.h"
|
||||
|
||||
namespace Boot {
|
||||
|
||||
bool OpenBSD_loader::is_openbsd(std::shared_ptr<Binary_ds> image) const
|
||||
{
|
||||
bool res = false;
|
||||
image->get_elf()->iterate_phdr([&res](Ldr::Elf_phdr ph)
|
||||
{
|
||||
if (ph.type() == Pt_openbsd_randomize)
|
||||
res = true;
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
int OpenBSD_loader::load(char const * /*bin*/, std::shared_ptr<Binary_ds> image,
|
||||
Vmm::Vm_ram *ram, Vmm::Ram_free_list *free_list,
|
||||
l4_addr_t *entry)
|
||||
{
|
||||
trace().printf("Checking for OpenBSD image...\n");
|
||||
|
||||
if (!image->is_valid())
|
||||
return -L4_EINVAL;
|
||||
|
||||
if (!image->is_elf_binary() || !image->is_elf64() || !is_openbsd(image))
|
||||
return -L4_EINVAL;
|
||||
|
||||
*entry = image->load_as_elf(ram, free_list);
|
||||
_binsize = image->loaded_size();
|
||||
info().printf("Loaded OpenBSD kernel image to 0x%lx, size 0x%zx\n", *entry,
|
||||
_binsize);
|
||||
|
||||
return L4_EOK;
|
||||
}
|
||||
|
||||
static OpenBSD_loader f __attribute__((init_priority(Boot::OpenBSD)));
|
||||
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023-2024 genua GmbH, 85551 Kirchheim, Germany
|
||||
* All rights reserved. Alle Rechte vorbehalten.
|
||||
*/
|
||||
/*
|
||||
* Copyright (C) 2025 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "binary_loader.h"
|
||||
|
||||
namespace Boot {
|
||||
|
||||
class OpenBSD_loader : public Binary_loader
|
||||
{
|
||||
enum { Pt_openbsd_randomize = 0x65a3dbe6 };
|
||||
|
||||
public:
|
||||
OpenBSD_loader()
|
||||
: Binary_loader(OpenBSD)
|
||||
{}
|
||||
|
||||
bool is_openbsd(std::shared_ptr<Binary_ds> image) const;
|
||||
int load(char const *bin, std::shared_ptr<Binary_ds> image, Vmm::Vm_ram *ram,
|
||||
Vmm::Ram_free_list *free_list, l4_addr_t *entry) override;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2022, 2024 Kernkonzept GmbH.
|
||||
* Author(s): Christian Pötzsch <christian.potzsch@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Boot {
|
||||
|
||||
static int raw_load_image(std::shared_ptr<Binary_ds> image, Vmm::Vm_ram *ram,
|
||||
Vmm::Ram_free_list *free_list, l4_addr_t *entry)
|
||||
{
|
||||
l4_addr_t start = *entry == ~0ul ? 0x0 : *entry;
|
||||
|
||||
// Get the RAM start address.
|
||||
Vmm::Guest_addr ram_base = free_list->first_free_address();
|
||||
*entry = image->load_as_raw(ram, ram_base + start, free_list);
|
||||
|
||||
return L4_EOK;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2016-2017, 2022, 2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#include "cpu_dev.h"
|
||||
@@ -1,266 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020, 2022-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "debug.h"
|
||||
#include "generic_cpu_dev.h"
|
||||
#include "vcpu_ptr.h"
|
||||
#include "monitor/cpu_dev_cmd_handler.h"
|
||||
|
||||
#include <deque>
|
||||
#include <mutex>
|
||||
|
||||
extern __thread unsigned vmm_current_cpu_id;
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
class Cpu_dev
|
||||
: public Generic_cpu_dev,
|
||||
public Monitor::Cpu_dev_cmd_handler<Monitor::Enabled, Cpu_dev>
|
||||
{
|
||||
public:
|
||||
enum { Max_cpus = 128 };
|
||||
|
||||
enum Cpu_state
|
||||
{
|
||||
Sleeping = 1, // Startup state, Thread created but not running,
|
||||
// needs rescheduling.
|
||||
Stopped, // Waits for INIT signal, no need for rescheduling.
|
||||
Init, // Wait for SIPI to transition to Running.
|
||||
Halted, // Idle state, VMentry only on event.
|
||||
Running
|
||||
};
|
||||
|
||||
private:
|
||||
struct State_change
|
||||
{
|
||||
State_change(Cpu_state s) : target_state(s) {}
|
||||
Cpu_state target_state;
|
||||
};
|
||||
|
||||
struct Ipi_event
|
||||
{
|
||||
Ipi_event(Cpu_dev *c) : cpu(c) {}
|
||||
void act()
|
||||
{
|
||||
cpu->_check_msgq = true;
|
||||
}
|
||||
|
||||
void registration_failure()
|
||||
{
|
||||
Dbg().printf("Failed to register IRQ to for IPI; "
|
||||
"vCPU %u cannot be started.\n", cpu->vcpu().get_vcpu_id());
|
||||
}
|
||||
|
||||
void trigger_failure(long ipc_err)
|
||||
{
|
||||
Dbg().printf("IPI to vCPU %u failed with error %li\n",
|
||||
cpu->vcpu().get_vcpu_id(), ipc_err);
|
||||
}
|
||||
|
||||
Cpu_dev *cpu;
|
||||
};
|
||||
|
||||
public:
|
||||
Cpu_dev(unsigned idx, unsigned phys_id, Vdev::Dt_node const *)
|
||||
: Generic_cpu_dev(idx, phys_id),
|
||||
_ipi(Ipi_event(this))
|
||||
{
|
||||
_cpu_state = (idx == 0) ? Running : Sleeping;
|
||||
}
|
||||
|
||||
~Cpu_dev()
|
||||
{
|
||||
Vcpu_obj_registry *reg = _vcpu.get_ipc_registry();
|
||||
_ipi.disarm(reg);
|
||||
}
|
||||
|
||||
void powerup_cpu() override
|
||||
{
|
||||
Generic_cpu_dev::powerup_cpu();
|
||||
_ipi.arm(_vcpu.get_ipc_registry());
|
||||
}
|
||||
|
||||
/// Reset the Cpu_dev including vCPU does not return to the caller.
|
||||
void reset() override
|
||||
{
|
||||
vmm_current_cpu_id = _vcpu.get_vcpu_id();
|
||||
info().printf("[%3u] Reset called\n", vmm_current_cpu_id);
|
||||
|
||||
reset_common();
|
||||
wait_until_online();
|
||||
|
||||
info().printf("[%3u] Resetting vCPU.\n", vmm_current_cpu_id);
|
||||
_vcpu.reset(_protected_mode);
|
||||
}
|
||||
|
||||
void hot_reset()
|
||||
{
|
||||
// assumption: Guest::run_vm() already called once.
|
||||
// intention: Do not add leak stack memory.
|
||||
reset_common();
|
||||
|
||||
info().printf("[%3u] Hot resetting vCPU.\n", vmm_current_cpu_id);
|
||||
_vcpu.hot_reset();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate a device tree "reg" value to an internally usable CPU id.
|
||||
*
|
||||
* For most architectures this is NOP, but some architectures like ARM
|
||||
* might encode topology information into this value, which needs to
|
||||
* be translated.
|
||||
*/
|
||||
static unsigned dtid_to_cpuid(l4_int32_t prop_val)
|
||||
{ return prop_val; }
|
||||
|
||||
static bool has_fixed_dt_mapping() { return true; }
|
||||
|
||||
Cpu_state get_cpu_state() const
|
||||
{ return _cpu_state; }
|
||||
|
||||
bool cpu_online() const
|
||||
{
|
||||
Cpu_state s = get_cpu_state();
|
||||
return (s == Cpu_state::Running) || (s == Cpu_state::Halted);
|
||||
}
|
||||
|
||||
void set_cpu_state(Cpu_state state)
|
||||
{ _cpu_state = state; }
|
||||
|
||||
void set_protected_mode()
|
||||
{ _protected_mode = true; }
|
||||
|
||||
/**
|
||||
* Handle the stop event.
|
||||
*
|
||||
* The event is usually emitted cross core, but also used in CPU local
|
||||
* error cases.
|
||||
*/
|
||||
void stop() override
|
||||
{
|
||||
_stop_irq.disarm(_vcpu.get_ipc_registry());
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_message_q_lock);
|
||||
// Clear all pending state changes to ensure the core is stopped ASAP.
|
||||
_message_q.clear();
|
||||
_message_q.emplace_back(Cpu_state::Stopped);
|
||||
}
|
||||
_check_msgq = true;
|
||||
// Do not do anything blocking here, we need to finish the execution of the
|
||||
// IPC dispatching that brought us here or return to our local caller.
|
||||
}
|
||||
|
||||
/// core local request to halt the CPU.
|
||||
void halt_cpu()
|
||||
{
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_message_q_lock);
|
||||
_message_q.emplace_back(Cpu_state::Halted);
|
||||
}
|
||||
_check_msgq = true;
|
||||
// No IRQ trigger, we are already in VMexit handling
|
||||
}
|
||||
|
||||
/// Send cross-core INIT signal
|
||||
void send_init_ipi()
|
||||
{
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_message_q_lock);
|
||||
_message_q.emplace_back(Cpu_state::Init);
|
||||
}
|
||||
_ipi.trigger();
|
||||
}
|
||||
|
||||
/// Send cross-core SIPI signal
|
||||
void send_sipi()
|
||||
{
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_message_q_lock);
|
||||
_message_q.emplace_back(Cpu_state::Running);
|
||||
}
|
||||
_ipi.trigger();
|
||||
}
|
||||
|
||||
Cpu_state next_state()
|
||||
{
|
||||
if (!has_message())
|
||||
return get_cpu_state();
|
||||
|
||||
std::lock_guard<std::mutex> lock(_message_q_lock);
|
||||
if (_message_q.empty())
|
||||
{
|
||||
_check_msgq = false;
|
||||
return get_cpu_state();
|
||||
}
|
||||
Cpu_state new_state = _message_q.front().target_state;
|
||||
_message_q.pop_front();
|
||||
_check_msgq = !_message_q.empty();
|
||||
|
||||
return new_state;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for an IPI, unless there are still items in the message queue.
|
||||
*/
|
||||
void wait_for_ipi()
|
||||
{
|
||||
if (has_message())
|
||||
return;
|
||||
|
||||
_ipi.receive();
|
||||
_check_msgq = true;
|
||||
}
|
||||
|
||||
private:
|
||||
static Dbg info() { return Dbg(Dbg::Cpu, Dbg::Info, "Cpu_dev"); }
|
||||
|
||||
bool has_message() const { return _check_msgq; }
|
||||
|
||||
/// Wait until an IPI puts the CPU in online state.
|
||||
void wait_until_online()
|
||||
{
|
||||
while (has_message())
|
||||
set_cpu_state(next_state());
|
||||
|
||||
// wait for the SIPI to sets the `Running` state
|
||||
while (!cpu_online())
|
||||
{
|
||||
wait_for_ipi();
|
||||
|
||||
while (has_message())
|
||||
set_cpu_state(next_state());
|
||||
}
|
||||
}
|
||||
|
||||
/// Functionality performed to reset a vCPU.
|
||||
void reset_common()
|
||||
{
|
||||
_stop_irq.arm(_vcpu.get_ipc_registry());
|
||||
|
||||
_vcpu->state = L4_VCPU_F_FPU_ENABLED;
|
||||
_vcpu->saved_state = L4_VCPU_F_FPU_ENABLED | L4_VCPU_F_USER_MODE;
|
||||
}
|
||||
|
||||
std::atomic<Cpu_state> _cpu_state; // core-local writes; cross-core reads;
|
||||
bool _protected_mode = false;
|
||||
bool _check_msgq = false; // use only in local vCPU thread.
|
||||
|
||||
Cpu_irq<Ipi_event> _ipi;
|
||||
// The mutex is used in IPI cases (INIT, SIPI, STOP) and for the local HALT
|
||||
// event. The IPIs do not happen during normal operation, HALT happens when
|
||||
// the core has nothing to do and reacts only to IRQs. In all other VMexits,
|
||||
// this mutex is unused.
|
||||
std::mutex _message_q_lock;
|
||||
std::deque<State_change> _message_q;
|
||||
}; // class Cpu_dev
|
||||
|
||||
} // namespace Vmm
|
||||
@@ -1,421 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Kernkonzept GmbH.
|
||||
* Author(s): Steffen Liebergeld <steffen.liebergeld@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* This file defines the x86 CPU features that we present to the guest via
|
||||
* our CPUID emulation.
|
||||
*
|
||||
* General rules:
|
||||
* - Whitelist only those CPU features that we know to support.
|
||||
* - We shall support as many features as possible because they might be there
|
||||
* for performance.
|
||||
*/
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
enum Cpuid_1_ecx : l4_uint32_t
|
||||
{
|
||||
Cpuid_1_ecx_sse3 = (1UL << 0),
|
||||
Cpuid_1_ecx_pclmulqdq = (1UL << 1),
|
||||
Cpuid_1_ecx_dtes64 = (1UL << 2),
|
||||
Cpuid_1_ecx_monitor = (1UL << 3),
|
||||
Cpuid_1_ecx_ds_cpl = (1UL << 4),
|
||||
Cpuid_1_ecx_vmx = (1UL << 5),
|
||||
Cpuid_1_ecx_smx = (1UL << 6),
|
||||
Cpuid_1_ecx_speed_step = (1UL << 7),
|
||||
Cpuid_1_ecx_thermal_monitor = (1UL << 8),
|
||||
Cpuid_1_ecx_ssse3 = (1UL << 9),
|
||||
Cpuid_1_ecx_context_id = (1UL << 10),
|
||||
Cpuid_1_ecx_sdbg = (1UL << 11),
|
||||
Cpuid_1_ecx_fma = (1UL << 12),
|
||||
Cpuid_1_ecx_cmpxchg16b = (1UL << 13),
|
||||
Cpuid_1_ecx_xtpr_update = (1UL << 14),
|
||||
Cpuid_1_ecx_pdcm = (1UL << 15),
|
||||
Cpuid_1_ecx_pcid = (1UL << 17),
|
||||
Cpuid_1_ecx_dca = (1UL << 18),
|
||||
Cpuid_1_ecx_sse4_1 = (1UL << 19),
|
||||
Cpuid_1_ecx_sse4_2 = (1UL << 20),
|
||||
Cpuid_1_ecx_x2apic = (1UL << 21),
|
||||
Cpuid_1_ecx_movbe = (1UL << 22),
|
||||
Cpuid_1_ecx_popcnt = (1UL << 23),
|
||||
Cpuid_1_ecx_tsc_deadline = (1UL << 24),
|
||||
Cpuid_1_ecx_aesni = (1UL << 25),
|
||||
Cpuid_1_ecx_xsave = (1UL << 26),
|
||||
Cpuid_1_ecx_osxsave = (1UL << 27),
|
||||
Cpuid_1_ecx_avx = (1UL << 28),
|
||||
Cpuid_1_ecx_f16c = (1UL << 29),
|
||||
Cpuid_1_ecx_rdrand = (1UL << 30),
|
||||
Cpuid_1_ecx_hypervisor = (1UL << 31),
|
||||
};
|
||||
|
||||
enum Cpuid_1_edx : l4_uint32_t
|
||||
{
|
||||
Cpuid_1_edx_fpu = (1UL << 0),
|
||||
Cpuid_1_edx_vme = (1UL << 1),
|
||||
Cpuid_1_edx_de = (1UL << 2),
|
||||
Cpuid_1_edx_pse = (1UL << 3),
|
||||
Cpuid_1_edx_tsc = (1UL << 4),
|
||||
Cpuid_1_edx_msr = (1UL << 5),
|
||||
Cpuid_1_edx_pae = (1UL << 6),
|
||||
Cpuid_1_edx_mce = (1UL << 7),
|
||||
Cpuid_1_edx_cx8 = (1UL << 8),
|
||||
Cpuid_1_edx_apic = (1UL << 9),
|
||||
Cpuid_1_edx_sep = (1UL << 11),
|
||||
Cpuid_1_edx_mtrr = (1UL << 12),
|
||||
Cpuid_1_edx_pge = (1UL << 13),
|
||||
Cpuid_1_edx_mca = (1UL << 14),
|
||||
Cpuid_1_edx_cmov = (1UL << 15),
|
||||
Cpuid_1_edx_pat = (1UL << 16),
|
||||
Cpuid_1_edx_pse_36= (1UL << 17),
|
||||
Cpuid_1_edx_psn = (1UL << 18),
|
||||
Cpuid_1_edx_clfsh = (1UL << 19),
|
||||
Cpuid_1_edx_ds = (1UL << 21),
|
||||
Cpuid_1_edx_acpi = (1UL << 22),
|
||||
Cpuid_1_edx_mmx = (1UL << 23),
|
||||
Cpuid_1_edx_fxsr = (1UL << 24),
|
||||
Cpuid_1_edx_sse = (1UL << 25),
|
||||
Cpuid_1_edx_sse2 = (1UL << 26),
|
||||
Cpuid_1_edx_ss = (1UL << 27),
|
||||
Cpuid_1_edx_htt = (1UL << 28),
|
||||
Cpuid_1_edx_tm = (1UL << 29),
|
||||
Cpuid_1_edx_pbe = (1UL << 31),
|
||||
|
||||
};
|
||||
|
||||
// thermal and power management
|
||||
enum Cpuid_6_eax : l4_uint32_t
|
||||
{
|
||||
Cpuid_6_eax_temp_sens = (1UL << 0),
|
||||
Cpuid_6_eax_turbo_boost = (1UL << 1),
|
||||
Cpuid_6_eax_arat = (1UL << 2),
|
||||
Cpuid_6_eax_pln = (1UL << 4),
|
||||
Cpuid_6_eax_ecmd = (1UL << 5),
|
||||
Cpuid_6_eax_ptm = (1UL << 6),
|
||||
Cpuid_6_eax_hwp = (1UL << 7),
|
||||
Cpuid_6_eax_hwp_notify = (1UL << 8),
|
||||
Cpuid_6_eax_hwp_act_win = (1UL << 9),
|
||||
Cpuid_6_eax_hwp_energy_perf_pref = (1UL << 10),
|
||||
Cpuid_6_eax_hwp_package_level = (1UL << 11),
|
||||
Cpuid_6_eax_hdc = (1UL << 13),
|
||||
Cpuid_6_eax_turbo_boost_max = (1UL << 14),
|
||||
Cpuid_6_eax_hwp_capabilities = (1UL << 15),
|
||||
Cpuid_6_eax_hwp_peci = (1UL << 16),
|
||||
Cpuid_6_eax_hwp_flex = (1UL << 17),
|
||||
Cpuid_6_eax_hwp_request_msr = (1UL << 18),
|
||||
Cpuid_6_eax_hw_feedback = (1UL << 19),
|
||||
Cpuid_6_eax_ignore_idle_cpu_hwp = (1UL << 20),
|
||||
Cpuid_6_eax_hwp_control_msr = (1UL << 22),
|
||||
Cpuid_6_eax_thread_director = (1UL << 23),
|
||||
Cpuid_6_eax_therm_irq_msr = (1UL << 24),
|
||||
};
|
||||
|
||||
enum Cpuid_7_0_ebx : l4_uint32_t
|
||||
{
|
||||
Cpuid_7_0_ebx_fsgsbase = (1UL << 0),
|
||||
Cpuid_7_0_ebx_tsc_adjust_msr = (1UL << 1),
|
||||
Cpuid_7_0_ebx_sgx = (1UL << 2),
|
||||
Cpuid_7_0_ebx_bmi1 = (1UL << 3),
|
||||
Cpuid_7_0_ebx_hle = (1UL << 4),
|
||||
Cpuid_7_0_ebx_avx2 = (1UL << 5),
|
||||
Cpuid_7_0_ebx_fdp_excptn_only= (1UL << 6),
|
||||
Cpuid_7_0_ebx_smep = (1UL << 7),
|
||||
Cpuid_7_0_ebx_bmi2 = (1UL << 8),
|
||||
Cpuid_7_0_ebx_movsb = (1UL << 9),
|
||||
Cpuid_7_0_ebx_invpcid = (1UL << 10),
|
||||
Cpuid_7_0_ebx_rtm = (1UL << 11),
|
||||
Cpuid_7_0_ebx_rdt_m = (1UL << 12),
|
||||
Cpuid_7_0_ebx_fpu_cs = (1UL << 13),
|
||||
Cpuid_7_0_ebx_mpx = (1UL << 14),
|
||||
Cpuid_7_0_ebx_rdt_a = (1UL << 15),
|
||||
Cpuid_7_0_ebx_avx_512_f = (1UL << 16),
|
||||
Cpuid_7_0_ebx_avx_512_dq = (1UL << 17),
|
||||
Cpuid_7_0_ebx_rdseed = (1UL << 18),
|
||||
Cpuid_7_0_ebx_adx = (1UL << 19),
|
||||
Cpuid_7_0_ebx_smap = (1UL << 20),
|
||||
Cpuid_7_0_ebx_avx_512_ifma = (1UL << 21),
|
||||
Cpuid_7_0_ebx_clflushopt = (1UL << 23),
|
||||
Cpuid_7_0_ebx_clwb = (1UL << 24),
|
||||
Cpuid_7_0_ebx_trace = (1UL << 25),
|
||||
Cpuid_7_0_ebx_avx_512_pf = (1UL << 26),
|
||||
Cpuid_7_0_ebx_avx_512_er = (1UL << 27),
|
||||
Cpuid_7_0_ebx_avx_512_cd = (1UL << 28),
|
||||
Cpuid_7_0_ebx_sha = (1UL << 29),
|
||||
Cpuid_7_0_ebx_avx_512_bw = (1UL << 30),
|
||||
Cpuid_7_0_ebx_avx_512_vl = (1UL << 31),
|
||||
};
|
||||
|
||||
enum Cpuid_7_0_ecx : l4_uint32_t
|
||||
{
|
||||
Cpuid_7_0_ecx_prefetchwt1 = (1UL << 0),
|
||||
Cpuid_7_0_ecx_avx_512_vbmi = (1UL << 1),
|
||||
Cpuid_7_0_ecx_umip = (1UL << 2),
|
||||
Cpuid_7_0_ecx_pku = (1UL << 3),
|
||||
Cpuid_7_0_ecx_ospke = (1UL << 4),
|
||||
Cpuid_7_0_ecx_waitpkg = (1UL << 5),
|
||||
Cpuid_7_0_ecx_avx_512_vbmi2 = (1UL << 6),
|
||||
Cpuid_7_0_ecx_cet_ss = (1UL << 7),
|
||||
Cpuid_7_0_ecx_gfni = (1UL << 8),
|
||||
Cpuid_7_0_ecx_vaes = (1UL << 9),
|
||||
Cpuid_7_0_ecx_vpclmulqdq = (1UL << 10),
|
||||
Cpuid_7_0_ecx_avx_512_vnni = (1UL << 11),
|
||||
Cpuid_7_0_ecx_avx_512_bitalg = (1UL << 12),
|
||||
Cpuid_7_0_ecx_tme_en = (1UL << 13),
|
||||
Cpuid_7_0_ecx_avx_512_vpopcntdq= (1UL << 14),
|
||||
Cpuid_7_0_ecx_la57 = (1UL << 16),
|
||||
Cpuid_7_0_ecx_rdpid = (1UL << 22),
|
||||
Cpuid_7_0_ecx_kl = (1UL << 23),
|
||||
Cpuid_7_0_ecx_bus_lock_detect = (1UL << 24),
|
||||
Cpuid_7_0_ecx_cldemote = (1UL << 25),
|
||||
Cpuid_7_0_ecx_movdiri = (1UL << 27),
|
||||
Cpuid_7_0_ecx_movdir64b = (1UL << 28),
|
||||
Cpuid_7_0_ecx_enqcmd = (1UL << 29),
|
||||
Cpuid_7_0_ecx_sgx_lc = (1UL << 30),
|
||||
Cpuid_7_0_ecx_pks = (1UL << 31),
|
||||
};
|
||||
|
||||
enum Cpuid_7_0_edx : l4_uint32_t
|
||||
{
|
||||
Cpuid_7_0_edx_sgx_keys = (1UL << 1),
|
||||
Cpuid_7_0_edx_avx_512_4vnniw = (1UL << 2),
|
||||
Cpuid_7_0_edx_avx_512_4fmaps = (1UL << 3),
|
||||
Cpuid_7_0_edx_repmov = (1UL << 4),
|
||||
Cpuid_7_0_edx_uintr = (1UL << 5),
|
||||
Cpuid_7_0_edx_avx_512_vp2intersect= (1UL << 8),
|
||||
Cpuid_7_0_edx_srbds_ctrl = (1UL << 9),
|
||||
Cpuid_7_0_edx_md_clear = (1UL << 10),
|
||||
Cpuid_7_0_edx_rtm_always_abort = (1UL << 11),
|
||||
Cpuid_7_0_edx_rtm_force_abort = (1UL << 13),
|
||||
Cpuid_7_0_edx_serialize = (1UL << 14),
|
||||
Cpuid_7_0_edx_hybrid = (1UL << 15),
|
||||
Cpuid_7_0_edx_tsxldtrk = (1UL << 16),
|
||||
Cpuid_7_0_edx_pconfig = (1UL << 18),
|
||||
Cpuid_7_0_edx_arch_lbr = (1UL << 19),
|
||||
Cpuid_7_0_edx_cet_ibt = (1UL << 20),
|
||||
Cpuid_7_0_edx_amx_fb16 = (1UL << 22),
|
||||
Cpuid_7_0_edx_avx_512_fp16 = (1UL << 23),
|
||||
Cpuid_7_0_edx_amx_tile = (1UL << 24),
|
||||
Cpuid_7_0_edx_amx_int8 = (1UL << 25),
|
||||
Cpuid_7_0_edx_ibrs = (1UL << 26),
|
||||
Cpuid_7_0_edx_stibp = (1UL << 27),
|
||||
Cpuid_7_0_edx_l1d_flush = (1UL << 28),
|
||||
Cpuid_7_0_edx_arch_cap_msr = (1UL << 29),
|
||||
Cpuid_7_0_edx_core_cap_msr = (1UL << 30),
|
||||
Cpuid_7_0_edx_ssbd = (1UL << 31),
|
||||
};
|
||||
|
||||
enum Cpuid_8000_0001_ecx : l4_uint32_t
|
||||
{
|
||||
// TODO amd has several bits here
|
||||
Cpuid_8000_0001_ecx_lahf = (1UL << 0),
|
||||
Cpuid_8000_0001_ecx_lzcnt = (1UL << 5),
|
||||
Cpuid_8000_0001_ecx_prefetchw = (1UL << 8),
|
||||
};
|
||||
|
||||
enum Cpuid_8000_0001_edx : l4_uint32_t
|
||||
{
|
||||
Cpuid_8000_0001_edx_syscall = (1UL << 11),
|
||||
Cpuid_8000_0001_edx_nx = (1UL << 20),
|
||||
Cpuid_8000_0001_edx_1gb = (1UL << 26),
|
||||
Cpuid_8000_0001_edx_rdtscp = (1UL << 27),
|
||||
Cpuid_8000_0001_edx_ia64 = (1UL << 29),
|
||||
};
|
||||
|
||||
enum Cpuid_8000_0007_edx : l4_uint32_t
|
||||
{
|
||||
Cpuid_8000_0007_edx_invariant_tsc = (1UL << 8),
|
||||
};
|
||||
|
||||
enum Cpuid_8000_0008_ebx : l4_uint32_t
|
||||
{
|
||||
Cpuid_8000_0008_ebx_amd_clzero = (1UL << 0),
|
||||
Cpuid_8000_0008_ebx_amd_instretcnt_msr = (1UL << 1),
|
||||
Cpuid_8000_0008_ebx_amd_rstrfperrptrs = (1UL << 2),
|
||||
Cpuid_8000_0008_ebx_amd_invlpkg = (1UL << 3),
|
||||
Cpuid_8000_0008_ebx_amd_rdpru = (1UL << 4),
|
||||
Cpuid_8000_0008_ebx_amd_mcommit = (1UL << 8),
|
||||
Cpuid_8000_0008_ebx_wbnoinvd = (1UL << 9),
|
||||
// AMD speculation control.
|
||||
// 0x8000'0008 EBX
|
||||
// Whitepaper AMD64 Technology: Indirect Branch Control Extension,
|
||||
// revision 4.10.18
|
||||
Cpuid_8000_0008_ebx_amd_ibpb = (1UL << 12),
|
||||
Cpuid_8000_0008_ebx_amd_ibrs = (1UL << 14),
|
||||
Cpuid_8000_0008_ebx_amd_stibp = (1UL << 15),
|
||||
// Whitepaper AMD64 Technology: Speculative Store Bypass Disable, 5.21.18
|
||||
Cpuid_8000_0008_ebx_amd_ssbd = (1UL << 24),
|
||||
};
|
||||
|
||||
}; // namespace
|
||||
|
||||
namespace Vmm
|
||||
{
|
||||
enum Cpuid_configuration : l4_uint32_t
|
||||
{
|
||||
// general config
|
||||
Cpuid_max_basic_info_leaf = 0x1f,
|
||||
Cpuid_max_ext_info_leaf = 0x8000'0008,
|
||||
|
||||
// leaf config
|
||||
|
||||
// Unsupported:
|
||||
// Cpuid_1_ecx_monitor
|
||||
// Cpuid_1_ecx_vmx
|
||||
// Cpuid_1_ecx_smx
|
||||
// Cpuid_1_ecx_thermal_monitor
|
||||
// Cpuid_1_ecx_speed_step
|
||||
// Cpuid_1_ecx_sdbg
|
||||
// Cpuid_1_ecx_osxsave
|
||||
// Cpuid_1_ecx_xtpr_update
|
||||
// Cpuid_1_ecx_pdcm
|
||||
// Cpuid_1_ecx_context_id
|
||||
// Cpuid_1_ecx_dca
|
||||
// Cpuid_1_ecx_ds_cpl
|
||||
// Cpuid_1_ecx_dtes64
|
||||
|
||||
Cpuid_1_ecx_supported = \
|
||||
Cpuid_1_ecx_sse3 \
|
||||
| Cpuid_1_ecx_pclmulqdq \
|
||||
| Cpuid_1_ecx_ssse3 \
|
||||
| Cpuid_1_ecx_fma \
|
||||
| Cpuid_1_ecx_cmpxchg16b \
|
||||
| Cpuid_1_ecx_sse4_1 \
|
||||
| Cpuid_1_ecx_sse4_2 \
|
||||
| Cpuid_1_ecx_movbe \
|
||||
| Cpuid_1_ecx_popcnt \
|
||||
| Cpuid_1_ecx_tsc_deadline \
|
||||
| Cpuid_1_ecx_aesni \
|
||||
| Cpuid_1_ecx_xsave \
|
||||
| Cpuid_1_ecx_avx \
|
||||
| Cpuid_1_ecx_f16c \
|
||||
| Cpuid_1_ecx_pcid \
|
||||
| Cpuid_1_ecx_rdrand,
|
||||
|
||||
Cpuid_1_ecx_mandatory = \
|
||||
Cpuid_1_ecx_hypervisor
|
||||
// x2apic is emulated even if the host doesn't have it
|
||||
| Cpuid_1_ecx_x2apic,
|
||||
|
||||
// Unsupported flags
|
||||
// Cpuid_1_edx_mca
|
||||
// Cpuid_1_edx_acpi
|
||||
// Cpuid_1_edx_ds
|
||||
// Cpuid_1_edx_tm
|
||||
// Cpuid_1_edx_htt
|
||||
// Cpuid_1_edx_psn
|
||||
// Cpuid_1_edx_pbe
|
||||
Cpuid_1_edx_supported = \
|
||||
Cpuid_1_edx_fpu \
|
||||
| Cpuid_1_edx_vme \
|
||||
| Cpuid_1_edx_de \
|
||||
| Cpuid_1_edx_pse \
|
||||
| Cpuid_1_edx_tsc \
|
||||
| Cpuid_1_edx_msr \
|
||||
| Cpuid_1_edx_pae \
|
||||
| Cpuid_1_edx_mce \
|
||||
| Cpuid_1_edx_cx8 \
|
||||
| Cpuid_1_edx_apic\
|
||||
| Cpuid_1_edx_sep \
|
||||
| Cpuid_1_edx_mtrr\
|
||||
| Cpuid_1_edx_pge \
|
||||
| Cpuid_1_edx_cmov\
|
||||
| Cpuid_1_edx_pat \
|
||||
| Cpuid_1_edx_pse_36 \
|
||||
| Cpuid_1_edx_clfsh \
|
||||
| Cpuid_1_edx_mmx \
|
||||
| Cpuid_1_edx_fxsr \
|
||||
| Cpuid_1_edx_sse \
|
||||
| Cpuid_1_edx_sse2 \
|
||||
| Cpuid_1_edx_ss,
|
||||
|
||||
Cpuid_6_eax_supported = \
|
||||
Cpuid_6_eax_arat,
|
||||
|
||||
Cpuid_7_0_eax_leafs = 1,
|
||||
|
||||
// Unsupported:
|
||||
// Cpuid_7_0_ebx_mpx
|
||||
// Cpuid_7_0_ebx_trace
|
||||
Cpuid_7_0_ebx_supported = \
|
||||
Cpuid_7_0_ebx_fsgsbase \
|
||||
| Cpuid_7_0_ebx_bmi1 \
|
||||
| Cpuid_7_0_ebx_hle \
|
||||
| Cpuid_7_0_ebx_avx2 \
|
||||
| Cpuid_7_0_ebx_fdp_excptn_only \
|
||||
| Cpuid_7_0_ebx_smep \
|
||||
| Cpuid_7_0_ebx_bmi2 \
|
||||
| Cpuid_7_0_ebx_movsb \
|
||||
| Cpuid_7_0_ebx_rtm \
|
||||
| Cpuid_7_0_ebx_fpu_cs \
|
||||
| Cpuid_7_0_ebx_avx_512_f \
|
||||
| Cpuid_7_0_ebx_avx_512_dq \
|
||||
| Cpuid_7_0_ebx_rdseed \
|
||||
| Cpuid_7_0_ebx_adx \
|
||||
| Cpuid_7_0_ebx_smap \
|
||||
| Cpuid_7_0_ebx_avx_512_ifma \
|
||||
| Cpuid_7_0_ebx_clflushopt \
|
||||
| Cpuid_7_0_ebx_clwb \
|
||||
| Cpuid_7_0_ebx_avx_512_pf \
|
||||
| Cpuid_7_0_ebx_avx_512_er \
|
||||
| Cpuid_7_0_ebx_avx_512_cd \
|
||||
| Cpuid_7_0_ebx_sha \
|
||||
| Cpuid_7_0_ebx_invpcid \
|
||||
| Cpuid_7_0_ebx_avx_512_bw \
|
||||
| Cpuid_7_0_ebx_avx_512_vl,
|
||||
|
||||
// Unsupported:
|
||||
// Cpuid_7_0_ecx_ospke
|
||||
// Cpuid_7_0_ecx_waitpkg
|
||||
// Cpuid_7_0_ecx_la57 (ia32e 5 level paging)
|
||||
Cpuid_7_0_ecx_supported = \
|
||||
Cpuid_7_0_ecx_prefetchwt1 \
|
||||
| Cpuid_7_0_ecx_avx_512_vbmi \
|
||||
| Cpuid_7_0_ecx_umip \
|
||||
| Cpuid_7_0_ecx_avx_512_vbmi2 \
|
||||
| Cpuid_7_0_ecx_avx_512_vnni \
|
||||
| Cpuid_7_0_ecx_avx_512_bitalg,
|
||||
|
||||
Cpuid_7_0_edx_supported = \
|
||||
Cpuid_7_0_edx_avx_512_4vnniw \
|
||||
| Cpuid_7_0_edx_avx_512_4fmaps \
|
||||
| Cpuid_7_0_edx_repmov \
|
||||
| Cpuid_7_0_edx_avx_512_vp2intersect \
|
||||
| Cpuid_7_0_edx_avx_512_fp16 \
|
||||
| Cpuid_7_0_edx_uintr \
|
||||
| Cpuid_7_0_edx_md_clear,
|
||||
|
||||
Cpuid_8000_0001_ecx_supported = \
|
||||
Cpuid_8000_0001_ecx_lahf,
|
||||
|
||||
Cpuid_8000_0001_edx_supported = \
|
||||
Cpuid_8000_0001_edx_syscall \
|
||||
| Cpuid_8000_0001_edx_nx \
|
||||
| Cpuid_8000_0001_edx_1gb \
|
||||
| Cpuid_8000_0001_edx_ia64,
|
||||
|
||||
Cpuid_8000_0007_edx_supported = \
|
||||
Cpuid_8000_0007_edx_invariant_tsc,
|
||||
|
||||
// According to the Linux source code at arch/x86/kernel/cpu/common.c,
|
||||
// "[...] a hypervisor might have set the individual AMD bits even on
|
||||
// Intel CPUs, for finer-grained selection of what's available."
|
||||
// Thus filter AMD bits for the case of nested virtualization.
|
||||
Cpuid_8000_0008_ebx_supported = \
|
||||
Cpuid_8000_0008_ebx_wbnoinvd,
|
||||
};
|
||||
|
||||
inline void
|
||||
cpuid_reg_apply(l4_uint32_t *host_register,
|
||||
l4_uint32_t supported_bits,
|
||||
l4_uint32_t mandatory_bits = 0)
|
||||
{
|
||||
*host_register &= supported_bits;
|
||||
*host_register |= mandatory_bits;
|
||||
}
|
||||
|
||||
}; // namespace Vmm
|
||||
@@ -1,40 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2022, 2024 Kernkonzept GmbH.
|
||||
* Author(s): Jakub Jermar <jakub.jermar@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <l4/sys/types.h>
|
||||
|
||||
#include "device.h"
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
/**
|
||||
* Interface for devices responding to guest CPUID invocations.
|
||||
*/
|
||||
struct Cpuid_device : virtual Vdev::Dev_ref
|
||||
{
|
||||
virtual ~Cpuid_device() = 0;
|
||||
|
||||
/**
|
||||
* Handle the CPUID instruction.
|
||||
*
|
||||
* \param regs Guest register state.
|
||||
* \param a[out] Output value for RAX.
|
||||
* \param b[out] Output value for RBX.
|
||||
* \param c[out] Output value for RCX.
|
||||
* \param d[out] Output value for RDX.
|
||||
*
|
||||
* \return True if the device handled the CPUID instruction,
|
||||
* false otherwise.
|
||||
*/
|
||||
virtual bool handle_cpuid(l4_vcpu_regs_t const *regs, unsigned *a,
|
||||
unsigned *b, unsigned *c, unsigned *d) const = 0;
|
||||
};
|
||||
|
||||
inline Cpuid_device::~Cpuid_device() = default;
|
||||
|
||||
} // namespace
|
||||
@@ -1,22 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019, 2024 Kernkonzept GmbH.
|
||||
* Author(s): Timo Nicolai <timo.nicolai@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "debugger/generic_guest_debugger.h"
|
||||
#include "monitor/dbg_cmd_handler.h"
|
||||
|
||||
namespace Monitor {
|
||||
|
||||
class Guest_debugger
|
||||
: public Generic_guest_debugger,
|
||||
public Dbg_cmd_handler<Enabled, Guest_debugger>
|
||||
{
|
||||
public:
|
||||
using Generic_guest_debugger::Generic_guest_debugger;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#include "event_record.h"
|
||||
#include "event_record_lapic.h"
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
bool Event_exc::inject(Vm_state *vm)
|
||||
{
|
||||
vm->inject_event(
|
||||
Injection_event(ev_num, 3, error_val != Invalid_error, error_val));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Real_mode_exc::inject(Vm_state *vm)
|
||||
{
|
||||
vm->inject_event(Injection_event(ev_num, 3, false));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Event_nmi::inject(Vm_state *vms)
|
||||
{
|
||||
if (vms->can_inject_nmi())
|
||||
{
|
||||
vms->disable_nmi_window();
|
||||
lapic->next_pending_nmi();
|
||||
vms->inject_event(Injection_event(2, 2, false)); // NMI is vector 2, type 2
|
||||
return true;
|
||||
}
|
||||
|
||||
vms->enable_nmi_window();
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Event_irq::inject(Vm_state *vms)
|
||||
{
|
||||
if (vms->can_inject_interrupt())
|
||||
{
|
||||
vms->disable_interrupt_window();
|
||||
int irq = lapic->next_pending_irq();
|
||||
if (irq < 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
vms->inject_event(Injection_event(irq, 0, false)); // IRQ vector, type 0
|
||||
return true;
|
||||
}
|
||||
|
||||
vms->enable_interrupt_window();
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace Vmm
|
||||
@@ -1,124 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "vm_state.h"
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
/**
|
||||
* Event priority order
|
||||
*
|
||||
* The priortiy is specified in the Intel SDM 12/2022 Vol 3
|
||||
* Section 6.9 "Prioritization of Concurrent Events".
|
||||
*/
|
||||
enum Event_prio : char
|
||||
{
|
||||
// on instruction events
|
||||
Abort = 0,
|
||||
Exception,
|
||||
Sw_int1,
|
||||
Sw_int3,
|
||||
Sw_intO,
|
||||
Sw_intN,
|
||||
Bound,
|
||||
// potentially concurrent events raised on instructions boundaries.
|
||||
Reset,
|
||||
Machine_check,
|
||||
Trap_task_switch,
|
||||
Ext_hw_intervention,
|
||||
Trap_dbg_except,
|
||||
Nmi,
|
||||
Irq,
|
||||
Fault_dbg_except,
|
||||
Fault_fetch_next_instr,
|
||||
Fault_decode_next_instr,
|
||||
|
||||
Prio_max // must be last
|
||||
};
|
||||
|
||||
/**
|
||||
* Single event record, e.g. for an event raised by hardware.
|
||||
*/
|
||||
struct Event_record
|
||||
{
|
||||
explicit Event_record(Event_prio p) : prio(p) {}
|
||||
|
||||
virtual ~Event_record() = default;
|
||||
|
||||
virtual bool inject(Vm_state *vms) = 0;
|
||||
|
||||
constexpr bool operator < (Event_record const &o) const
|
||||
{ return prio < o.prio; }
|
||||
|
||||
constexpr bool operator > (Event_record const &o) const
|
||||
{ return prio > o.prio; }
|
||||
|
||||
constexpr bool operator == (Event_record const &o) const
|
||||
{ return prio == o.prio; }
|
||||
|
||||
Event_prio const prio; ///< Type of the Event_record
|
||||
};
|
||||
|
||||
/**
|
||||
* Exception event record.
|
||||
*/
|
||||
struct Event_exc : Event_record
|
||||
{
|
||||
enum : unsigned { Invalid_error = ~0U };
|
||||
|
||||
explicit Event_exc(Event_prio p, unsigned ev_num)
|
||||
: Event_record(p), ev_num(ev_num)
|
||||
{}
|
||||
|
||||
Event_exc(Event_prio p, unsigned ev_num, unsigned e_val)
|
||||
: Event_record(p), ev_num(ev_num), error_val(e_val)
|
||||
{}
|
||||
|
||||
bool inject(Vm_state *vm) override;
|
||||
|
||||
unsigned ev_num; ///< Event number to inject
|
||||
unsigned error_val = Invalid_error; ///< Error value to push on the stack
|
||||
};
|
||||
|
||||
struct Real_mode_exc : Event_record
|
||||
{
|
||||
explicit Real_mode_exc(Event_prio p, unsigned ev_num)
|
||||
: Event_record(p), ev_num(ev_num)
|
||||
{}
|
||||
|
||||
bool inject(Vm_state *vm) override;
|
||||
|
||||
unsigned ev_num; ///< Event number to inject
|
||||
};
|
||||
|
||||
/**
|
||||
* Generic software exception/interrupt event to inject into the guest.
|
||||
*
|
||||
* \tparam TYPE Event type to use in injection.
|
||||
*/
|
||||
template <l4_uint8_t TYPE>
|
||||
struct Event_sw_generic : Event_record
|
||||
{
|
||||
Event_sw_generic(Event_prio p, unsigned ev_num, unsigned insn_len)
|
||||
: Event_record(p), ev_num(ev_num), instruction_len(insn_len)
|
||||
{}
|
||||
|
||||
bool inject(Vm_state *vm) override
|
||||
{
|
||||
vm->inject_event(Injection_event(ev_num, TYPE, false));
|
||||
if (vm->type() == Vm_state::Type::Vmx)
|
||||
vm->advance_entry_ip(instruction_len);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned ev_num; ///< Event number to inject
|
||||
unsigned instruction_len; ///< Bytes to advance IP
|
||||
};
|
||||
|
||||
} // namespace Vmm
|
||||
@@ -1,50 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "event_record.h"
|
||||
#include "virt_lapic.h"
|
||||
|
||||
namespace Vmm
|
||||
{
|
||||
|
||||
/**
|
||||
* NMI event record.
|
||||
*/
|
||||
struct Event_nmi : Event_record
|
||||
{
|
||||
explicit Event_nmi(Gic::Virt_lapic *apic)
|
||||
: Event_record(Event_prio::Nmi), lapic(apic)
|
||||
{}
|
||||
|
||||
bool inject(Vm_state *vm) override;
|
||||
|
||||
Gic::Virt_lapic *lapic;
|
||||
};
|
||||
|
||||
/**
|
||||
* IRQ event record.
|
||||
*/
|
||||
struct Event_irq : Event_record
|
||||
{
|
||||
explicit Event_irq(Gic::Virt_lapic *apic)
|
||||
: Event_record(Event_prio::Irq), lapic(apic)
|
||||
{}
|
||||
|
||||
bool inject(Vm_state *vm) override;
|
||||
|
||||
Gic::Virt_lapic *lapic;
|
||||
};
|
||||
|
||||
// These are necessary to correctly compute Event_memory::max_event_size().
|
||||
// The asserts ensure that these event objects don't influence the computation.
|
||||
static_assert(sizeof(Event_irq) <= sizeof(Event_exc),
|
||||
"IRQ event objects are not the largest event object.");
|
||||
static_assert(sizeof(Event_nmi) <= sizeof(Event_exc),
|
||||
"NMI event objects are not the largest event object.");
|
||||
|
||||
} // namespace Vmm
|
||||
@@ -1,114 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#include "event_recorder.h"
|
||||
#include "debug.h"
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
bool Event_recorder::inject(Vm_state *vms)
|
||||
{
|
||||
if (empty())
|
||||
return false;
|
||||
|
||||
auto top = _queue.top();
|
||||
if (top->inject(vms))
|
||||
{
|
||||
_queue.pop();
|
||||
if (top->prio == Event_prio::Exception)
|
||||
{
|
||||
if (_queue.empty() || _queue.top()->prio != Event_prio::Exception)
|
||||
_has_exception = false;
|
||||
}
|
||||
else if (top->prio == Event_prio::Nmi)
|
||||
_has_nmi = false;
|
||||
else if (top->prio == Event_prio::Irq)
|
||||
_has_irq = false;
|
||||
|
||||
// We have ownership. We have to free the memory!
|
||||
free_event(top);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void Event_recorder::add(Event_record *event)
|
||||
{
|
||||
if (event->prio == Event_prio::Exception)
|
||||
_has_exception = true;
|
||||
else if (event->prio == Event_prio::Nmi)
|
||||
{
|
||||
if (_has_nmi)
|
||||
return;
|
||||
else
|
||||
_has_nmi = true;
|
||||
}
|
||||
else if (event->prio == Event_prio::Irq)
|
||||
{
|
||||
if (_has_irq)
|
||||
return;
|
||||
else
|
||||
_has_irq = true;
|
||||
}
|
||||
|
||||
_queue.push(std::move(event));
|
||||
}
|
||||
|
||||
void Event_recorder::clear()
|
||||
{
|
||||
while (!_queue.empty())
|
||||
{
|
||||
auto top = _queue.top();
|
||||
_queue.pop();
|
||||
// We have ownership. We have to free the memory!
|
||||
free_event(top);
|
||||
}
|
||||
|
||||
_has_exception = false;
|
||||
_has_nmi = false;
|
||||
_has_irq = false;
|
||||
}
|
||||
|
||||
bool Event_recorder::empty() const
|
||||
{ return _queue.empty(); }
|
||||
|
||||
void Event_recorder::dump(unsigned vcpu_id) const
|
||||
{
|
||||
static char const *Event_prio_names[Event_prio::Prio_max] = {
|
||||
"Abort",
|
||||
"Exception",
|
||||
"Sw_int1",
|
||||
"Sw_int3",
|
||||
"Sw_intO",
|
||||
"Sw_intN",
|
||||
"Bound",
|
||||
"Reset",
|
||||
"Machine_check",
|
||||
"Trap_task_switch",
|
||||
"Ext_hw_intervention",
|
||||
"Trap_dbg_except",
|
||||
"Nmi",
|
||||
"Interrupt",
|
||||
"Fault_dbg_except",
|
||||
"Fault_fetch_next_instr",
|
||||
"Fault_decode_next_instr",
|
||||
};
|
||||
|
||||
if (_queue.empty())
|
||||
{
|
||||
Dbg().printf("[%3u] Ev_rec: No event recorded.\n", vcpu_id);
|
||||
return;
|
||||
}
|
||||
|
||||
auto prio = _queue.top()->prio;
|
||||
char const *name = prio < Event_prio::Prio_max ? Event_prio_names[prio]
|
||||
: "Index out of bounds";
|
||||
Dbg().printf("[%3u] Ev_rec: Top event has prio %i (%s); #events: %zu\n",
|
||||
vcpu_id, prio, name, _queue.size());
|
||||
}
|
||||
|
||||
} // namespace Vmm
|
||||
@@ -1,307 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
#include "event_record.h"
|
||||
#include "vm_state.h"
|
||||
#include "debug.h"
|
||||
|
||||
#include <l4/re/error_helper>
|
||||
#include <l4/cxx/bitmap>
|
||||
#include <l4/cxx/minmax>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include <cassert>
|
||||
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
/// Recorder of all events for a core.
|
||||
class Event_recorder
|
||||
{
|
||||
public:
|
||||
~Event_recorder() { clear(); }
|
||||
|
||||
/**
|
||||
* Inject highest priority event.
|
||||
*
|
||||
* \retval true Event injected.
|
||||
* \retval false No event to inject or can't inject pending event.
|
||||
*/
|
||||
bool inject(Vm_state *vms);
|
||||
|
||||
/**
|
||||
* Record an event.
|
||||
*
|
||||
* \note Pending interrupts are recorded as placeholder item such that the
|
||||
* caller knows the query the local APIC. NMI and IRQs are just
|
||||
* recorded once.
|
||||
*
|
||||
* \post Ownership moves to `Event_recorder`.
|
||||
*/
|
||||
void add(Event_record *event);
|
||||
|
||||
/// Clears all recorded events.
|
||||
void clear();
|
||||
/// True, iff no event recorded.
|
||||
bool empty() const;
|
||||
/// FIXME for MSR interface lacking return value tristate.
|
||||
bool has_exception() const { return _has_exception; }
|
||||
/// true, iff IRQ event already recorded
|
||||
bool has_nmi() const { return _has_nmi; }
|
||||
/// true, iff IRQ event already recorded
|
||||
bool has_irq() const { return _has_irq; }
|
||||
|
||||
/// debugging aid
|
||||
void dump(unsigned vcpu_id) const;
|
||||
|
||||
/// Create an Event instance and record it.
|
||||
template <typename T, typename... ARGS>
|
||||
void make_add_event(ARGS... args)
|
||||
{
|
||||
add(allocate_event<T, ARGS...>(args...));
|
||||
}
|
||||
|
||||
private:
|
||||
static Dbg warn() { return Dbg(Dbg::Core, Dbg::Warn, "Event recorder"); }
|
||||
|
||||
/**
|
||||
* Allocate memory for an object of type `T`.
|
||||
*
|
||||
* \tparam T Type derived from `Event_record`.
|
||||
*
|
||||
*/
|
||||
template <typename T, typename... ARGS>
|
||||
Event_record *allocate_event(ARGS... args)
|
||||
{
|
||||
static bool warn_once = true;
|
||||
char *addr = _memory.alloc(sizeof(T));
|
||||
if (addr)
|
||||
return new (addr) T(std::forward<ARGS>(args)...);
|
||||
else
|
||||
{
|
||||
// Print message once, if dynamic allocation is necessary on any core.
|
||||
if (warn_once)
|
||||
{
|
||||
warn_once = false;
|
||||
warn().printf("Usage of the slow path for event allocation. Memory "
|
||||
"preallocation exceeded for the first time.");
|
||||
}
|
||||
return new T(std::forward<ARGS>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Destruct object derived from `Event_record` and free the memory.
|
||||
*
|
||||
* \param object Address of the object to destruct and free.
|
||||
*/
|
||||
void free_event(Event_record *object)
|
||||
{
|
||||
if (_memory.in_memory(reinterpret_cast<char *>(object)))
|
||||
{
|
||||
object->~Event_record();
|
||||
_memory.free(reinterpret_cast<char *>(object));
|
||||
}
|
||||
else
|
||||
delete object;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encapsulate all memory management for Event_records within this class.
|
||||
*
|
||||
* We want to avoid dynamic memory allocation during VM exit handling and
|
||||
* thus preallocate the memory and create events within this memory range.
|
||||
* The memory is split into chunks that fit all Event_records object sizes
|
||||
* and returns one such chunk on request.
|
||||
*
|
||||
* It's an open question how to handle OOM situations.
|
||||
*/
|
||||
class Event_memory
|
||||
{
|
||||
struct Bin_if
|
||||
{
|
||||
virtual ~Bin_if() = default;
|
||||
virtual char *alloc() = 0;
|
||||
virtual bool free(char *) = 0;
|
||||
virtual bool managed_addr(char *addr) const = 0;
|
||||
};
|
||||
|
||||
template <unsigned BIN_SIZE, unsigned SLOTS>
|
||||
struct Bin : Bin_if
|
||||
{
|
||||
Bin() { slot_used.clear_all(); }
|
||||
|
||||
~Bin() = default;
|
||||
|
||||
char *alloc() noexcept override
|
||||
{
|
||||
int free_idx = slot_used.scan_zero(0);
|
||||
if (free_idx >= 0)
|
||||
{
|
||||
slot_used[free_idx] = true;
|
||||
return mem + free_idx * BIN_SIZE;
|
||||
}
|
||||
|
||||
warn().printf("no space in bin left to allocate. Bin addr %p, num bins "
|
||||
"%u, bin size %u\n",
|
||||
&mem, SLOTS, BIN_SIZE);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool free(char *addr) noexcept override
|
||||
{
|
||||
unsigned bin_idx = (addr - mem) / BIN_SIZE;
|
||||
assert(slot_used[bin_idx] == true);
|
||||
|
||||
slot_used[bin_idx] = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool managed_addr(char * addr) const noexcept override
|
||||
{
|
||||
if (addr < mem || addr >= mem + MEM_SIZE)
|
||||
{
|
||||
info().printf("Address %p not in bin-managed range[%p, %p]. Bin "
|
||||
"size: 0x%x\n",
|
||||
addr, mem, mem + MEM_SIZE, BIN_SIZE);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static unsigned constexpr MEM_SIZE = BIN_SIZE * SLOTS;
|
||||
cxx::Bitmap<SLOTS> slot_used;
|
||||
char mem[MEM_SIZE];
|
||||
};
|
||||
|
||||
/**
|
||||
* Compute maximum object size of all events.
|
||||
*
|
||||
* This depends on static_asserts for Event_nmi & Event_irq.
|
||||
*/
|
||||
static unsigned constexpr max_event_size()
|
||||
{
|
||||
// Event types: Event_exc, Real_mode_exc, Event_sw_generic, Event_nmi,
|
||||
// Event_irq
|
||||
|
||||
unsigned constexpr size =
|
||||
cxx::max(sizeof(Event_exc), sizeof(Real_mode_exc),
|
||||
sizeof(Event_sw_generic<0>));
|
||||
|
||||
// round to next power of two to fit to cache lines.
|
||||
return next_pow2(size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the next larger value which is a power of two.
|
||||
*
|
||||
* \param num Number to start from.
|
||||
*/
|
||||
static unsigned constexpr next_pow2(unsigned num)
|
||||
{
|
||||
static_assert(sizeof(unsigned) <= 4,
|
||||
"Next power of 2 algorithm only supports 32-bit numbers.");
|
||||
|
||||
if (num == 0U)
|
||||
return 1;
|
||||
|
||||
--num;
|
||||
num |= num >> 1;
|
||||
num |= num >> 2;
|
||||
num |= num >> 4;
|
||||
num |= num >> 8;
|
||||
num |= num >> 16;
|
||||
|
||||
return ++num;
|
||||
}
|
||||
|
||||
public:
|
||||
Event_memory()
|
||||
{
|
||||
// instead of one preallocated bin per event size, we simplify and
|
||||
// use one bin for all events and accept the additional temporary memory
|
||||
// usage within a bin. Only the bin size affects the total memory usage.
|
||||
unsigned constexpr size = max_event_size();
|
||||
_bin = new Bin<size, 32>();
|
||||
}
|
||||
|
||||
~Event_memory()
|
||||
{
|
||||
if (_bin)
|
||||
delete _bin;
|
||||
}
|
||||
|
||||
char *alloc(l4_size_t /* size */)
|
||||
{
|
||||
char *addr = _bin->alloc();
|
||||
return addr;
|
||||
}
|
||||
|
||||
// pre: in_memory(addr) == true
|
||||
void free(char *addr)
|
||||
{
|
||||
assert(in_memory(addr));
|
||||
|
||||
_bin->free(addr);
|
||||
}
|
||||
|
||||
bool in_memory(char *addr)
|
||||
{
|
||||
return _bin->managed_addr(addr);
|
||||
}
|
||||
|
||||
private:
|
||||
static Dbg warn() { return Dbg(Dbg::Core, Dbg::Warn, "Event memory"); }
|
||||
static Dbg info() { return Dbg(Dbg::Core, Dbg::Info, "Event memory"); }
|
||||
|
||||
Bin_if *_bin;
|
||||
}; // class Event_memory
|
||||
|
||||
using Qtype = Event_record *;
|
||||
|
||||
struct QGreater
|
||||
{
|
||||
bool operator()(Qtype const &item1, Qtype const &item2) const
|
||||
{ return *item1 > *item2; }
|
||||
};
|
||||
|
||||
std::priority_queue<Qtype, std::vector<Qtype>, QGreater> _queue;
|
||||
Event_memory _memory;
|
||||
bool _has_exception = false;
|
||||
bool _has_nmi = false;
|
||||
bool _has_irq = false;
|
||||
};
|
||||
|
||||
/// Interface to get the event recorder for a specific core.
|
||||
struct Event_recorders
|
||||
{
|
||||
virtual Event_recorder *recorder(unsigned num) = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* Management entity for one `Event_recorder` per core.
|
||||
*/
|
||||
class Event_recorder_array : public Event_recorders
|
||||
{
|
||||
public:
|
||||
virtual ~Event_recorder_array() = default;
|
||||
|
||||
void init(unsigned size)
|
||||
{ _recorders.resize(size); }
|
||||
|
||||
Event_recorder *recorder(unsigned num) override
|
||||
{
|
||||
assert(num < _recorders.size());
|
||||
return &_recorders[num];
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<Event_recorder> _recorders;
|
||||
};
|
||||
|
||||
} // namespace Vmm
|
||||
@@ -1,119 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022, 2024 Kernkonzept GmbH.
|
||||
* Author(s): Jean Wolter <jean.wolter@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#include <l4/re/video/goos>
|
||||
|
||||
#include "guest.h"
|
||||
|
||||
static bool fb_present = false;
|
||||
static l4_uint64_t fb_addr, fb_size;
|
||||
static L4Re::Video::View::Info fb_viewinfo;
|
||||
|
||||
namespace Vdev {
|
||||
// Taken from linux/include/uapi/linux/screen_info.h
|
||||
struct screen_info {
|
||||
l4_uint8_t orig_x; /* 0x00 */
|
||||
l4_uint8_t orig_y; /* 0x01 */
|
||||
l4_uint16_t ext_mem_k; /* 0x02 */
|
||||
l4_uint16_t orig_video_page; /* 0x04 */
|
||||
l4_uint8_t orig_video_mode; /* 0x06 */
|
||||
l4_uint8_t orig_video_cols; /* 0x07 */
|
||||
l4_uint8_t flags; /* 0x08 */
|
||||
l4_uint8_t unused2; /* 0x09 */
|
||||
l4_uint16_t orig_video_ega_bx;/* 0x0a */
|
||||
l4_uint16_t unused3; /* 0x0c */
|
||||
l4_uint8_t orig_video_lines; /* 0x0e */
|
||||
l4_uint8_t orig_video_isVGA; /* 0x0f */
|
||||
l4_uint16_t orig_video_points;/* 0x10 */
|
||||
|
||||
/* VESA graphic mode -- linear frame buffer */
|
||||
l4_uint16_t lfb_width; /* 0x12 */
|
||||
l4_uint16_t lfb_height; /* 0x14 */
|
||||
l4_uint16_t lfb_depth; /* 0x16 */
|
||||
l4_uint32_t lfb_base; /* 0x18 */
|
||||
l4_uint32_t lfb_size; /* 0x1c */
|
||||
l4_uint16_t cl_magic, cl_offset; /* 0x20 */
|
||||
l4_uint16_t lfb_linelength; /* 0x24 */
|
||||
l4_uint8_t red_size; /* 0x26 */
|
||||
l4_uint8_t red_pos; /* 0x27 */
|
||||
l4_uint8_t green_size; /* 0x28 */
|
||||
l4_uint8_t green_pos; /* 0x29 */
|
||||
l4_uint8_t blue_size; /* 0x2a */
|
||||
l4_uint8_t blue_pos; /* 0x2b */
|
||||
l4_uint8_t rsvd_size; /* 0x2c */
|
||||
l4_uint8_t rsvd_pos; /* 0x2d */
|
||||
l4_uint16_t vesapm_seg; /* 0x2e */
|
||||
l4_uint16_t vesapm_off; /* 0x30 */
|
||||
l4_uint16_t pages; /* 0x32 */
|
||||
l4_uint16_t vesa_attributes; /* 0x34 */
|
||||
l4_uint32_t capabilities; /* 0x36 */
|
||||
l4_uint32_t ext_lfb_base; /* 0x3a */
|
||||
l4_uint8_t _reserved[2]; /* 0x3e */
|
||||
} __attribute__((packed));
|
||||
|
||||
enum {
|
||||
Video_type_vlfb = 0x23
|
||||
};
|
||||
|
||||
enum {
|
||||
Video_capability_skip_quirks = (1 << 0),
|
||||
/* Frame buffer base is 64-bit */
|
||||
Video_capability_64bit_base = (1 << 1)
|
||||
};
|
||||
|
||||
static void configure_framebuffer(void *zeropage)
|
||||
{
|
||||
auto *si = reinterpret_cast<struct screen_info *>(zeropage);
|
||||
|
||||
// define framebuffer type
|
||||
si->orig_video_isVGA = Video_type_vlfb;
|
||||
si->capabilities = Video_capability_skip_quirks | Video_capability_64bit_base;
|
||||
|
||||
// setup address and size of buffer
|
||||
si->lfb_base = fb_addr & 0xffffffff;
|
||||
si->ext_lfb_base = fb_addr >> 32;
|
||||
// framebuffer size is in 64 KiB chunks for VLFB per historical convention
|
||||
si->lfb_size = l4_round_size(fb_size, 16) >> 16;
|
||||
|
||||
// define dimensions
|
||||
si->lfb_width = fb_viewinfo.width;
|
||||
si->lfb_height = fb_viewinfo.height;
|
||||
si->lfb_linelength = fb_viewinfo.bytes_per_line;
|
||||
|
||||
// define color
|
||||
si->lfb_depth = fb_viewinfo.pixel_info.bytes_per_pixel() * 8;
|
||||
si->red_size = fb_viewinfo.pixel_info.r().size();
|
||||
si->red_pos = fb_viewinfo.pixel_info.r().shift();
|
||||
si->green_size = fb_viewinfo.pixel_info.g().size();
|
||||
si->green_pos = fb_viewinfo.pixel_info.g().shift();
|
||||
si->blue_size = fb_viewinfo.pixel_info.b().size();
|
||||
si->blue_pos = fb_viewinfo.pixel_info.b().shift();
|
||||
si->rsvd_size = fb_viewinfo.pixel_info.padding().size();
|
||||
si->rsvd_pos = fb_viewinfo.pixel_info.padding().shift();
|
||||
}
|
||||
} // namespace Vdev
|
||||
|
||||
namespace Vmm {
|
||||
bool
|
||||
Guest::register_framebuffer(l4_uint64_t addr, l4_uint64_t size,
|
||||
const L4Re::Video::View::Info &info)
|
||||
{
|
||||
if (fb_present)
|
||||
{
|
||||
Err().printf("0x%llx: Multiple definitions of framebuffer, only one framebuffer is supported\n",
|
||||
addr);
|
||||
return false;
|
||||
}
|
||||
|
||||
fb_present = true;
|
||||
fb_addr = addr;
|
||||
fb_size = size;
|
||||
fb_viewinfo = info;
|
||||
Vmm::Zeropage::set_screen_callback(Vdev::configure_framebuffer);
|
||||
return true;
|
||||
}
|
||||
} // namespace Vmm
|
||||
@@ -1,340 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2024 Kernkonzept GmbH.
|
||||
* Author(s): Sarah Hoffmann <sarah.hoffmann@kernkonzept.com>
|
||||
* Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
* Benjamin Lamowski <benjamin.lamowski@kernkonzept.com>
|
||||
* Georg Kotheimer <georg.kotheimer@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#include "guest.h"
|
||||
#include "debug.h"
|
||||
#include "vm_state_svm.h"
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
/**
|
||||
* Synchronize VMCB.StateSaveArea.RAX with Vcpu_regs.RAX.
|
||||
*/
|
||||
class Rax_guard
|
||||
{
|
||||
public:
|
||||
Rax_guard(Svm_state *vms, l4_vcpu_regs_t *regs) : _vms(vms), _regs(regs)
|
||||
{ _regs->ax = _vms->vmcb()->state_save_area.rax; }
|
||||
|
||||
~Rax_guard()
|
||||
{ _vms->vmcb()->state_save_area.rax = _regs->ax; }
|
||||
|
||||
private:
|
||||
Svm_state *_vms;
|
||||
l4_vcpu_regs_t *_regs;
|
||||
};
|
||||
|
||||
template <>
|
||||
int
|
||||
Guest::handle_exit<Svm_state>(Vmm::Cpu_dev *cpu, Svm_state *vms)
|
||||
{
|
||||
Vmm::Vcpu_ptr vcpu = cpu->vcpu();
|
||||
l4_vcpu_regs_t *regs = &vcpu->r;
|
||||
unsigned vcpu_id = vcpu.get_vcpu_id();
|
||||
|
||||
// Synchronize VMCB.StateSaveArea.RAX with Vcpu_regs.RAX. This is necessary
|
||||
// because the code shared between VMX and SVM uses the RAX in Vcpu_regs,
|
||||
// since in VMX only RSP and RIP are stored in the "guest state save area".
|
||||
Rax_guard rax_guard(vms, regs);
|
||||
|
||||
// Initially all fields are clean
|
||||
vms->mark_all_clean();
|
||||
|
||||
auto *ev_rec = recorder(vcpu.get_vcpu_id());
|
||||
using Exit = Svm_state::Exit;
|
||||
Exit reason = vms->exit_code();
|
||||
|
||||
switch (reason)
|
||||
{
|
||||
// TODO: Lacks handlers for some of the enabled intercepts, which have not
|
||||
// been triggered during development. If one of these interceptions is hit,
|
||||
// first an error message is printed and then the VM is stopped.
|
||||
case Exit::Cpuid: return handle_cpuid(vcpu);
|
||||
|
||||
case Exit::Vmmcall: return handle_vm_call(regs);
|
||||
|
||||
case Exit::Ioio:
|
||||
{
|
||||
Svm_state::Io_info info(vms->exit_info1());
|
||||
bool is_read = info.type() == 1;
|
||||
unsigned port = info.port();
|
||||
|
||||
trace().printf("[%3u]: VM exit: IO port access with exit info 0x%x: "
|
||||
"%s port 0x%x\n",
|
||||
vcpu_id, info.raw, is_read ? "read" : "write", port);
|
||||
|
||||
if (info.str())
|
||||
{
|
||||
warn().printf("[%3u]: String based port access is not supported!\n",
|
||||
vcpu_id);
|
||||
return Jump_instr;
|
||||
}
|
||||
|
||||
// rep prefix is only specified for string port access instructions,
|
||||
// which are not yet supported anyway.
|
||||
if (info.rep())
|
||||
{
|
||||
warn().printf("[%3u]: Repeated port access is not supported!\n",
|
||||
vcpu_id);
|
||||
return Jump_instr;
|
||||
}
|
||||
|
||||
Mem_access::Width wd = Mem_access::Wd32;
|
||||
switch (info.data_size())
|
||||
{
|
||||
case 1: wd = Mem_access::Wd8; break;
|
||||
case 2: wd = Mem_access::Wd16; break;
|
||||
case 4: wd = Mem_access::Wd32; break;
|
||||
}
|
||||
|
||||
return handle_io_access(port, is_read, wd, regs);
|
||||
}
|
||||
|
||||
case Exit::Nested_page_fault:
|
||||
{
|
||||
l4_addr_t guest_phys_addr = vms->exit_info2();
|
||||
Svm_state::Npf_info info(vms->exit_info1());
|
||||
|
||||
trace().printf(
|
||||
"[%3u]: Nested page fault at gp_addr 0x%lx with exit info 0x%llx\n",
|
||||
vcpu_id, guest_phys_addr, info.raw);
|
||||
|
||||
// TODO: Use instruction bytes provided by decode assist
|
||||
switch(handle_mmio(guest_phys_addr, vcpu))
|
||||
{
|
||||
case Retry: return L4_EOK;
|
||||
case Jump_instr:
|
||||
{
|
||||
// TODO: Avoid fetching and decoding the current instruction again
|
||||
// (handle_mmio already did that once).
|
||||
l4_uint64_t opcode;
|
||||
try
|
||||
{
|
||||
// overwrite the virtual IP with the physical OP code
|
||||
opcode = vcpu.get_pt_walker()->walk(vms->cr3(), vms->ip());
|
||||
}
|
||||
catch (L4::Runtime_error &e)
|
||||
{
|
||||
warn().printf("[%3u]: Could not determine opcode for MMIO "
|
||||
"access\n",
|
||||
vcpu_id);
|
||||
return -L4_EINVAL;
|
||||
}
|
||||
|
||||
// TODO: Check inst_buf points to valid memory and figure out its size.
|
||||
unsigned char *inst_buf = reinterpret_cast<unsigned char *>(opcode);
|
||||
unsigned inst_buf_len = 15;
|
||||
|
||||
// The next sequential instruction pointer (nRIP) is not saved for
|
||||
// nested page faults:
|
||||
// > nRIP is saved for instruction intercepts as well as MSR and
|
||||
// > IOIO intercepts and exceptions caused by the INT3, INTO,
|
||||
// > and BOUND instructions.
|
||||
// > For all other intercepts, nRIP is reset to zero.
|
||||
if (vms->determine_next_ip_from_ip(regs, inst_buf, inst_buf_len))
|
||||
return Jump_instr;
|
||||
else
|
||||
{
|
||||
warn().printf("[%3u]: Could not determine next ip for MMIO "
|
||||
"access\n",
|
||||
vcpu_id);
|
||||
return -L4_EINVAL;
|
||||
}
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
|
||||
warn().printf("[%3u]: Unhandled nested page fault @ 0x%lx\n", vcpu_id,
|
||||
vms->ip());
|
||||
warn()
|
||||
.printf("[%3u]: Present: %u, Type: %s, Inst.: %u Phys addr: 0x%lx\n",
|
||||
vcpu_id, info.present().get(),
|
||||
info.write() ? "Write" : "Read", info.inst().get(),
|
||||
guest_phys_addr);
|
||||
return -L4_EINVAL;
|
||||
}
|
||||
|
||||
case Exit::Msr:
|
||||
{
|
||||
bool write = vms->exit_info1() == 1;
|
||||
bool has_already_exception = ev_rec->has_exception();
|
||||
if (!msr_devices_rwmsr(regs, write, vcpu.get_vcpu_id()))
|
||||
{
|
||||
info().printf("[%3u]: %s unsupported MSR 0x%lx\n", vcpu_id,
|
||||
write ? "Writing" : "Reading", regs->cx);
|
||||
ev_rec->make_add_event<Event_exc>(Event_prio::Exception, 13, 0);
|
||||
return Retry;
|
||||
}
|
||||
|
||||
if (!has_already_exception && ev_rec->has_exception())
|
||||
return Retry;
|
||||
else
|
||||
return Jump_instr;
|
||||
}
|
||||
|
||||
case Exit::Hlt:
|
||||
trace().printf("[%3u]: HALT 0x%lx!\n", vcpu_id, vms->ip());
|
||||
vms->halt();
|
||||
cpu->halt_cpu();
|
||||
return Jump_instr;
|
||||
|
||||
case Exit::Cr0_sel_write:
|
||||
return vms->handle_cr0_write(regs);
|
||||
|
||||
case Exit::Xsetbv:
|
||||
return vms->handle_xsetbv(regs);
|
||||
|
||||
case Exit::Vintr:
|
||||
// Used as interrupt window notification, handled in run_vm().
|
||||
return L4_EOK;
|
||||
|
||||
case Exit::Rdpmc:
|
||||
ev_rec->make_add_event<Event_exc>(Event_prio::Exception, 13, 0);
|
||||
return Retry;
|
||||
|
||||
case Exit::Dr0_read:
|
||||
case Exit::Dr1_read:
|
||||
case Exit::Dr2_read:
|
||||
case Exit::Dr3_read:
|
||||
case Exit::Dr4_read:
|
||||
case Exit::Dr5_read:
|
||||
case Exit::Dr6_read:
|
||||
case Exit::Dr7_read:
|
||||
{
|
||||
int i = static_cast<int>(reason) - static_cast<int>(Exit::Dr0_read);
|
||||
if (i == 4 || i == 5)
|
||||
{
|
||||
if (vms->vmcb()->state_save_area.cr4 & (1U << 3)) // CR4.DE set?
|
||||
{
|
||||
ev_rec->make_add_event<Event_exc>(Event_prio::Exception, 6);
|
||||
return Retry;
|
||||
}
|
||||
// else: alias to DR6 & DR7
|
||||
}
|
||||
|
||||
unsigned char gp_reg = vms->vmcb()->control_area.exitinfo1 & 0xf;
|
||||
*(&(regs->ax) - gp_reg) = 0;
|
||||
return Jump_instr;
|
||||
}
|
||||
case Exit::Dr8_read:
|
||||
case Exit::Dr9_read:
|
||||
case Exit::Dr10_read:
|
||||
case Exit::Dr11_read:
|
||||
case Exit::Dr12_read:
|
||||
case Exit::Dr13_read:
|
||||
case Exit::Dr14_read:
|
||||
case Exit::Dr15_read:
|
||||
// AMD APM Vol 2 Chapter 13.1.1.5 "64-Bit-Mode Extended Debug Registers":
|
||||
// DR8-15 are not implemented -> #UD
|
||||
ev_rec->make_add_event<Event_exc>(Event_prio::Exception, 6);
|
||||
return Retry;
|
||||
|
||||
case Exit::Dr0_write:
|
||||
case Exit::Dr1_write:
|
||||
case Exit::Dr2_write:
|
||||
case Exit::Dr3_write:
|
||||
case Exit::Dr4_write:
|
||||
case Exit::Dr5_write:
|
||||
case Exit::Dr6_write:
|
||||
case Exit::Dr7_write:
|
||||
{
|
||||
// Ignore the writes, except to illegal registers.
|
||||
int i = static_cast<int>(reason) - static_cast<int>(Exit::Dr0_read);
|
||||
if (i == 4 || i == 5)
|
||||
{
|
||||
if (vms->vmcb()->state_save_area.cr4 & (1U << 3)) // CR4.DE set?
|
||||
{
|
||||
ev_rec->make_add_event<Event_exc>(Event_prio::Exception, 6);
|
||||
return Retry;
|
||||
}
|
||||
}
|
||||
return Jump_instr;
|
||||
}
|
||||
case Exit::Dr8_write:
|
||||
case Exit::Dr9_write:
|
||||
case Exit::Dr10_write:
|
||||
case Exit::Dr11_write:
|
||||
case Exit::Dr12_write:
|
||||
case Exit::Dr13_write:
|
||||
case Exit::Dr14_write:
|
||||
case Exit::Dr15_write:
|
||||
// AMD APM Vol 2 Chapter 13.1.1.5 "64-Bit-Mode Extended Debug Registers":
|
||||
// DR8-15 are not implemented -> #UD
|
||||
ev_rec->make_add_event<Event_exc>(Event_prio::Exception, 6);
|
||||
return Retry;
|
||||
|
||||
case Exit::Vmrun:
|
||||
case Exit::Vmload:
|
||||
case Exit::Vmsave:
|
||||
case Exit::Stgi:
|
||||
case Exit::Clgi:
|
||||
case Exit::Skinit:
|
||||
case Exit::Rdtscp:
|
||||
// Unsupported instructions, inject undefined opcode exception
|
||||
ev_rec->make_add_event<Event_exc>(Event_prio::Exception, 6);
|
||||
return Retry;
|
||||
|
||||
case Exit::Sw_int:
|
||||
{
|
||||
// exit_info1[7:0] contains vector
|
||||
l4_uint32_t sw_int_num = vms->exit_info1() & 0xff;
|
||||
|
||||
using Event_sw_int = Event_sw_generic<4>;
|
||||
ev_rec->make_add_event<Event_sw_int>(Event_prio::Sw_intN, sw_int_num,
|
||||
0U);
|
||||
|
||||
return Retry;
|
||||
}
|
||||
|
||||
case Exit::Icebp:
|
||||
// Emulating ICEBP this way leads to an additional DPL check, which INT1
|
||||
// does not do normally, but normally, the INT1 is for HW vendors only.
|
||||
ev_rec->make_add_event<Event_exc>(Event_prio::Sw_int1, 1); // #DB
|
||||
|
||||
return Retry;
|
||||
|
||||
case Exit::Shutdown:
|
||||
// Any event that triggeres a shutdown, e.g. triple fault, lands here.
|
||||
info().printf("[%3u]: Shutdown intercept triggered at IP 0x%lx. Core in "
|
||||
"shutdown mode.\n",
|
||||
vcpu_id, vms->ip());
|
||||
vcpu.dump_regs_t(vms->ip(), info());
|
||||
|
||||
// move CPU into stop state
|
||||
cpu->stop();
|
||||
|
||||
return Retry;
|
||||
|
||||
default:
|
||||
if (reason >= Exit::Excp_0 && reason <= Exit::Excp_31)
|
||||
{
|
||||
int exc_num = static_cast<unsigned>(reason)
|
||||
- static_cast<unsigned>(Exit::Excp_0);
|
||||
return vms->handle_hardware_exception(ev_rec, exc_num);
|
||||
}
|
||||
|
||||
warn().printf("[%3u]: Exit at guest IP 0x%lx with 0x%x (Info1: 0x%llx, "
|
||||
"Info2: 0x%llx)\n",
|
||||
vcpu_id, vms->ip(), static_cast<unsigned>(reason),
|
||||
vms->exit_info1(), vms->exit_info2());
|
||||
|
||||
auto str_exit_code = vms->str_exit_code(reason);
|
||||
if (str_exit_code)
|
||||
warn().printf("[%3u]: Unhandled exit reason: %s (%d)\n",
|
||||
vcpu_id, str_exit_code, static_cast<unsigned>(reason));
|
||||
else
|
||||
warn().printf("[%3u]: Unknown exit reason: 0x%x\n",
|
||||
vcpu_id, static_cast<unsigned>(reason));
|
||||
|
||||
return -L4_ENOSYS;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -1,374 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2024 Kernkonzept GmbH.
|
||||
* Author(s): Sarah Hoffmann <sarah.hoffmann@kernkonzept.com>
|
||||
* Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
* Benjamin Lamowski <benjamin.lamowski@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#include "guest.h"
|
||||
#include "debug.h"
|
||||
#include "vm_state_vmx.h"
|
||||
#include "vmx_exit_to_str.h"
|
||||
#include "event_recorder.h"
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
template <>
|
||||
int
|
||||
Guest::handle_io_access_string<Vmx_state>(unsigned port, bool is_in,
|
||||
Mem_access::Width op_width,
|
||||
bool is_rep, l4_vcpu_regs_t *regs,
|
||||
Vmx_state *vms)
|
||||
{
|
||||
auto info
|
||||
= Vmx_state::Vmx_insn_info_field(vms->vmx_read(VMCS_VM_EXIT_INSN_INFO));
|
||||
|
||||
while (1)
|
||||
{
|
||||
if (is_rep)
|
||||
{
|
||||
// REP prefix: Handle loop condition.
|
||||
bool next;
|
||||
int rv = vms->rep_prefix_condition(regs, info, &next);
|
||||
if (rv != Jump_instr)
|
||||
return rv;
|
||||
|
||||
if (!next)
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_in)
|
||||
{
|
||||
l4_uint32_t value = ~0U;
|
||||
bool ret = handle_io_access_ptr(port, true, op_width, &value);
|
||||
if (!ret)
|
||||
{
|
||||
Dbg(Dbg::Dev, Dbg::Trace)
|
||||
.printf("WARNING: Unhandled string IO read port 0x%x/%u\n",
|
||||
port, (1U << op_width) * 8);
|
||||
int rv = vms->store_io_value(regs, _ptw, info, op_width, ~0U);
|
||||
if (rv != Jump_instr)
|
||||
return rv;
|
||||
}
|
||||
else
|
||||
{
|
||||
int rv = vms->store_io_value(regs, _ptw, info, op_width, value);
|
||||
if (rv != Jump_instr)
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
l4_uint32_t value;
|
||||
int rv = vms->load_io_value(regs, _ptw, info, op_width, &value);
|
||||
if (rv != Jump_instr)
|
||||
return rv;
|
||||
|
||||
bool ret = handle_io_access_ptr(port, false, op_width, &value);
|
||||
if (!ret)
|
||||
Dbg(Dbg::Dev, Dbg::Trace)
|
||||
.printf("WARNING: Unhandled string IO write port 0x%x/%u <- "
|
||||
"0x%x\n",
|
||||
port, (1U << op_width) * 8, value);
|
||||
}
|
||||
|
||||
// No REP prefix: Terminate the loop after the first iteration.
|
||||
if (!is_rep)
|
||||
break;
|
||||
}
|
||||
|
||||
return Jump_instr;
|
||||
}
|
||||
|
||||
template <>
|
||||
int
|
||||
Guest::handle_exit<Vmx_state>(Vmm::Cpu_dev *cpu, Vmx_state *vms)
|
||||
{
|
||||
using Exit = Vmx_state::Exit;
|
||||
auto reason = vms->exit_reason();
|
||||
Vmm::Vcpu_ptr vcpu = cpu->vcpu();
|
||||
auto *regs = &vcpu->r;
|
||||
auto *ev_rec = recorder(vcpu.get_vcpu_id());
|
||||
unsigned vcpu_id = vcpu.get_vcpu_id();
|
||||
|
||||
if (reason != Vmx_state::Exit::Exec_vmcall)
|
||||
trace().printf("[%3u]: Exit at guest IP 0x%lx SP 0x%lx with %llu ('%s') (Qual: 0x%llx)\n",
|
||||
vcpu_id, vms->ip(), vms->sp(),
|
||||
vms->vmx_read(VMCS_EXIT_REASON),
|
||||
exit_reason_to_str(vms->vmx_read(VMCS_EXIT_REASON)),
|
||||
vms->vmx_read(VMCS_EXIT_QUALIFICATION));
|
||||
|
||||
switch (reason)
|
||||
{
|
||||
case Exit::Cpuid: return handle_cpuid(vcpu);
|
||||
|
||||
case Exit::Exec_vmcall: return handle_vm_call(regs);
|
||||
|
||||
case Exit::Io_access:
|
||||
{
|
||||
auto qual = vms->vmx_read(VMCS_EXIT_QUALIFICATION);
|
||||
unsigned qwidth = qual & 7;
|
||||
bool is_read = qual & 8;
|
||||
bool is_string = qual & 16;
|
||||
bool is_rep = qual & 32;
|
||||
bool is_imm = qual & 64;
|
||||
unsigned port = (qual >> 16) & 0xFFFFU;
|
||||
|
||||
Dbg(Dbg::Dev, Dbg::Trace)
|
||||
.printf("[%3u]: VM exit @ 0x%lx: IO access with exit qualification "
|
||||
"0x%llx: %s port 0x%x %s%s%s\n",
|
||||
vcpu_id, vms->ip(), qual, is_read ? "read" : "write", port,
|
||||
is_imm ? "immediate" : "in DX", is_string ? " string" : "",
|
||||
is_rep ? " rep" : "");
|
||||
|
||||
if (port == 0xcfb)
|
||||
Dbg(Dbg::Dev, Dbg::Trace)
|
||||
.printf("[%3u]: N.B.: 0xcfb IO port access @ 0x%lx\n", vcpu_id,
|
||||
vms->ip());
|
||||
|
||||
Mem_access::Width op_width;
|
||||
switch (qwidth)
|
||||
{
|
||||
// Only 0, 1, 3 are valid values in the exit qualification.
|
||||
case 0: op_width = Mem_access::Wd8; break;
|
||||
case 1: op_width = Mem_access::Wd16; break;
|
||||
case 3: op_width = Mem_access::Wd32; break;
|
||||
default:
|
||||
warn().printf("[%3u]: Invalid IO access size %u @ 0x%lx\n",
|
||||
vcpu_id, qwidth, vms->ip());
|
||||
return Invalid_opcode;
|
||||
}
|
||||
|
||||
if (is_string)
|
||||
return handle_io_access_string(port, is_read, op_width, is_rep,
|
||||
regs, vms);
|
||||
|
||||
return handle_io_access(port, is_read, op_width, regs);
|
||||
}
|
||||
|
||||
// Ept_violation needs to be checked here, as handle_mmio needs a vCPU ptr,
|
||||
// which cannot be passed to Vm_state/Vmx_state due to dependency reasons.
|
||||
case Exit::Ept_violation:
|
||||
{
|
||||
auto guest_phys_addr =
|
||||
vms->vmx_read(VMCS_GUEST_PHYSICAL_ADDRESS);
|
||||
auto qual = vms->vmx_read(VMCS_EXIT_QUALIFICATION);
|
||||
|
||||
trace().printf("[%3u]: Exit reason due to EPT violation %i; gp_addr "
|
||||
"0x%llx, qualification 0x%llx\n",
|
||||
vcpu_id, static_cast<unsigned>(reason), guest_phys_addr,
|
||||
qual);
|
||||
|
||||
auto ret = handle_mmio(guest_phys_addr, vcpu);
|
||||
|
||||
// XXX Idt_vectoring_info could be valid.
|
||||
|
||||
switch(ret)
|
||||
{
|
||||
case Retry: return L4_EOK;
|
||||
case Jump_instr: return Jump_instr;
|
||||
default: break;
|
||||
}
|
||||
|
||||
warn().printf("[%3u]: Unhandled pagefault @ 0x%lx\n", vcpu_id,
|
||||
vms->ip());
|
||||
warn().printf("[%3u]: Read: %llu, Write: %llu, Inst.: %llu Phys addr: "
|
||||
"0x%llx\n",
|
||||
vcpu_id, qual & 1, qual & 2, qual & 4, guest_phys_addr);
|
||||
|
||||
if (qual & 0x80)
|
||||
warn().printf("[%3u]: Linear address: 0x%llx\n", vcpu_id,
|
||||
vms->vmx_read(VMCS_GUEST_LINEAR_ADDRESS));
|
||||
return -L4_EINVAL;
|
||||
}
|
||||
|
||||
// VMX specific exits
|
||||
case Exit::Exception_or_nmi:
|
||||
{
|
||||
// XXX Idt_vectoring_info could be valid.
|
||||
}
|
||||
// FIXME entry info might be overwritten by exception handling
|
||||
// currently this isn't fully fletched anyways so this works for now.
|
||||
[[fallthrough]];
|
||||
case Exit::External_int:
|
||||
return vms->handle_exception_nmi_ext_int(ev_rec);
|
||||
|
||||
case Exit::Interrupt_window:
|
||||
case Exit::Nmi_window:
|
||||
return Retry;
|
||||
|
||||
case Exit::Exec_halt:
|
||||
if (0)
|
||||
info().printf("[%3u]: HALT @ 0x%llx! Activity state 0x%llx\n",
|
||||
vcpu_id, vms->vmx_read(VMCS_GUEST_RIP),
|
||||
vms->vmx_read(VMCS_GUEST_ACTIVITY_STATE));
|
||||
|
||||
vms->halt();
|
||||
cpu->halt_cpu();
|
||||
return Jump_instr;
|
||||
|
||||
case Exit::Exec_rdpmc:
|
||||
return General_protection;
|
||||
|
||||
case Exit::Cr_access:
|
||||
return vms->handle_cr_access(regs);
|
||||
|
||||
case Exit::Exec_rdmsr:
|
||||
if (!msr_devices_rwmsr(regs, false, vcpu_id))
|
||||
{
|
||||
warn().printf("[%3u]: Reading unsupported MSR 0x%lx\n", vcpu_id,
|
||||
regs->cx);
|
||||
regs->ax = 0;
|
||||
regs->dx = 0;
|
||||
return General_protection;
|
||||
}
|
||||
|
||||
return Jump_instr;
|
||||
|
||||
case Exit::Exec_wrmsr:
|
||||
{
|
||||
bool has_already_exception = ev_rec->has_exception();
|
||||
if (!msr_devices_rwmsr(regs, true, vcpu.get_vcpu_id()))
|
||||
{
|
||||
warn().printf("[%3u]: Writing unsupported MSR 0x%lx\n", vcpu_id,
|
||||
regs->cx);
|
||||
return General_protection;
|
||||
}
|
||||
|
||||
// Writing an MSR e.g. IA32_EFER can lead to injection of a HW exception.
|
||||
// In this case the instruction wasn't emulated, thus don't jump it.
|
||||
if (!has_already_exception && ev_rec->has_exception())
|
||||
return Retry;
|
||||
else
|
||||
return Jump_instr;
|
||||
}
|
||||
case Exit::Virtualized_eoi:
|
||||
Dbg().printf("[%3u]: INFO: EOI virtualized for vector 0x%llx\n",
|
||||
vcpu_id, vms->vmx_read(VMCS_EXIT_QUALIFICATION));
|
||||
// Trap like exit: IP already on next instruction
|
||||
return L4_EOK;
|
||||
|
||||
case Exit::Exec_xsetbv:
|
||||
if (regs->cx == 0)
|
||||
{
|
||||
l4_uint64_t value = (l4_uint64_t(regs->ax) & 0xFFFFFFFF)
|
||||
| (l4_uint64_t(regs->dx) << 32);
|
||||
vms->vmx_write(L4_VM_VMX_VMCS_XCR0, value);
|
||||
trace().printf("[%3u]: Setting xcr0 to 0x%llx\n", vcpu_id, value);
|
||||
return Jump_instr;
|
||||
}
|
||||
Dbg().printf("[%3u]: Writing unknown extended control register %ld\n",
|
||||
vcpu_id, regs->cx);
|
||||
return -L4_EINVAL;
|
||||
|
||||
case Exit::Apic_write:
|
||||
// Trap like exit: IP already on next instruction
|
||||
assert(0); // Not supported
|
||||
return L4_EOK;
|
||||
|
||||
case Exit::Mov_debug_reg:
|
||||
{
|
||||
l4_uint64_t qual = vms->vmx_read(VMCS_EXIT_QUALIFICATION);
|
||||
unsigned char dbg_reg = qual & 0x7;
|
||||
bool read = qual & (1 << 4);
|
||||
unsigned char gp_reg = (qual >> 8) & 0xf;
|
||||
// check CR4.DE
|
||||
if (dbg_reg == 4 || dbg_reg == 5)
|
||||
{
|
||||
if (vms->vmx_read(VMCS_GUEST_CR4) & (1U << 3)) // CR4.DE set?
|
||||
return Invalid_opcode;
|
||||
// else: alias to DR6 & DR7
|
||||
}
|
||||
|
||||
if (read)
|
||||
{
|
||||
if (gp_reg == 0x4)
|
||||
regs->sp = 0UL;
|
||||
else
|
||||
{
|
||||
l4_umword_t *r = &(regs->ax);
|
||||
*(r - gp_reg) = 0UL;
|
||||
}
|
||||
}
|
||||
// else: ignore writes
|
||||
trace().printf("[%3u]: MOV DR exit: %s DR%u %s GP%u. Value: 0x%lx\n",
|
||||
vcpu_id, read ? "read" : "write", dbg_reg,
|
||||
read ? "to" : "from", gp_reg, *(&(regs->ax) - gp_reg));
|
||||
return Jump_instr;
|
||||
}
|
||||
|
||||
case Exit::Exec_vmclear:
|
||||
case Exit::Exec_vmlaunch:
|
||||
case Exit::Exec_vmptrld:
|
||||
case Exit::Exec_vmptrst:
|
||||
case Exit::Exec_vmread:
|
||||
case Exit::Exec_vmresume:
|
||||
case Exit::Exec_vmwrite:
|
||||
case Exit::Exec_vmxoff:
|
||||
case Exit::Exec_vmxon:
|
||||
case Exit::Exec_invept:
|
||||
case Exit::Exec_invvpid:
|
||||
case Exit::Exec_rdtscp:
|
||||
// Unsupported instructions, inject undefined opcode exception
|
||||
return Invalid_opcode;
|
||||
|
||||
case Exit::Triple_fault:
|
||||
// Double-fault experienced exception. Set core into shutdown mode.
|
||||
info().printf("[%3u]: Triple fault exit at IP 0x%lx. Core is in shutdown "
|
||||
"mode.\n",
|
||||
vcpu_id, vms->ip());
|
||||
vcpu.dump_regs_t(vms->ip(), info());
|
||||
|
||||
// move CPU into stop state
|
||||
cpu->stop();
|
||||
return Retry;
|
||||
|
||||
case Exit::Entry_fail_invalid_guest:
|
||||
{
|
||||
auto qual = vms->vmx_read(VMCS_EXIT_QUALIFICATION);
|
||||
auto reason_raw = vms->vmx_read(VMCS_EXIT_REASON);
|
||||
auto ip = vms->ip();
|
||||
auto insn_err = vms->vmx_read(VMCS_VM_INSN_ERROR);
|
||||
auto entry_exc_err = vms->vmx_read(VMCS_VM_ENTRY_EXCEPTION_ERROR);
|
||||
|
||||
Dbg().printf("VM-entry failure due to invalid guest state:\n"
|
||||
"Exit reason raw: 0x%llx\n"
|
||||
"Exit qualification: 0x%llx\n"
|
||||
"IP: 0x%lx\n"
|
||||
"Instruction error: 0x%llx\n"
|
||||
"Entry exception error: 0x%llx\n",
|
||||
reason_raw, qual, ip, insn_err, entry_exc_err
|
||||
);
|
||||
}
|
||||
[[fallthrough]];
|
||||
|
||||
case Exit::Task_switch:
|
||||
case Exit::Apic_access:
|
||||
case Exit::Ept_misconfig:
|
||||
case Exit::Page_mod_log_full:
|
||||
case Exit::Spp_related_event:
|
||||
// These cases need to check IDT-vectoring info for validity!
|
||||
|
||||
default:
|
||||
{
|
||||
Dbg().printf("[%3u]: Exit at guest IP 0x%lx SP 0x%lx with 0x%llx "
|
||||
"(Qual: 0x%llx)\n",
|
||||
vcpu_id, vms->ip(), vms->sp(),
|
||||
vms->vmx_read(VMCS_EXIT_REASON),
|
||||
vms->vmx_read(VMCS_EXIT_QUALIFICATION));
|
||||
|
||||
unsigned reason_u = static_cast<unsigned>(reason);
|
||||
if (reason_u < sizeof(str_exit_reason) / sizeof(*str_exit_reason))
|
||||
Dbg().printf("[%3u]: Unhandled exit reason: %s (%d)\n",
|
||||
vcpu_id, str_exit_reason[reason_u], reason_u);
|
||||
else
|
||||
Dbg().printf("[%3u]: Unknown exit reason: 0x%x\n", vcpu_id, reason_u);
|
||||
|
||||
return -L4_ENOSYS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,489 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2024 Kernkonzept GmbH.
|
||||
* Author(s): Sarah Hoffmann <sarah.hoffmann@kernkonzept.com>
|
||||
* Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <l4/util/cpu.h>
|
||||
#include <l4/vbus/vbus>
|
||||
#include <l4/l4virtio/l4virtio>
|
||||
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#include "cpu_dev_array.h"
|
||||
#include "generic_guest.h"
|
||||
#include "msr_device.h"
|
||||
#include "cpuid_device.h"
|
||||
#include "mem_access.h"
|
||||
#include "vcpu_ptr.h"
|
||||
#include "virt_lapic.h"
|
||||
#include "vmprint.h"
|
||||
#include "zeropage.h"
|
||||
#include "pt_walker.h"
|
||||
#include "vm_ram.h"
|
||||
#include "binary_loader.h"
|
||||
#include "event_recorder.h"
|
||||
#include "pm_device_if.h"
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
class Guest : public Generic_guest
|
||||
{
|
||||
public:
|
||||
enum { Default_rambase = 0, Boot_offset = 0 };
|
||||
|
||||
enum { Has_io_space = true };
|
||||
using Io_mem = std::map<Io_region, cxx::Ref_ptr<Io_device>>;
|
||||
|
||||
Guest()
|
||||
: _apics(Vdev::make_device<Gic::Lapic_array>()),
|
||||
_icr_handler(Vdev::make_device<Gic::Icr_handler>()),
|
||||
_lapic_access_handler(Vdev::make_device<Gic::Lapic_access_handler>(
|
||||
_apics, _icr_handler, get_max_physical_address_bit()))
|
||||
{
|
||||
add_mmio_device(_lapic_access_handler->mmio_region(),
|
||||
_lapic_access_handler);
|
||||
register_msr_device(_lapic_access_handler);
|
||||
|
||||
// Do this once for all TSC-based timers used in uvmm.
|
||||
l4_calibrate_tsc(l4re_kip());
|
||||
}
|
||||
|
||||
static Guest *create_instance();
|
||||
static Guest *get_instance();
|
||||
|
||||
void setup_device_tree(Vdev::Device_tree) {}
|
||||
|
||||
void show_state_interrupts(FILE *, Vcpu_ptr) {}
|
||||
|
||||
void register_io_device(cxx::Ref_ptr<Vmm::Io_device> const &dev,
|
||||
Region_type type,
|
||||
Vdev::Dt_node const &node, size_t index = 0);
|
||||
void add_io_device(Io_region const ®ion,
|
||||
cxx::Ref_ptr<Io_device> const &dev);
|
||||
void del_io_device(Io_region const ®ion);
|
||||
|
||||
/**
|
||||
* Indicate whether the legacy i8042 keyboard controller is present.
|
||||
*
|
||||
* We assume that the legacy i8042 keyboard controller is present if the
|
||||
* I/O ports 0x60 and 0x64 are registered.
|
||||
*
|
||||
* \retval true The legacy i8042 keyboard controller is present.
|
||||
* \retval false The legacy i8042 keyboard controller is absent.
|
||||
*/
|
||||
bool i8042_present();
|
||||
|
||||
bool register_framebuffer(l4_uint64_t addr, l4_uint64_t size,
|
||||
const L4Re::Video::View::Info &info);
|
||||
|
||||
/**
|
||||
* Return IO port map.
|
||||
*
|
||||
* Must only be used before the guest started to run or for debugging. Might
|
||||
* be manipulated concurrently from other vCPUs!
|
||||
*/
|
||||
Io_mem const *iomap()
|
||||
{ return &_iomap; }
|
||||
|
||||
void register_msr_device(cxx::Ref_ptr<Msr_device> const &dev);
|
||||
|
||||
/**
|
||||
* Register a CPUID-handling device in a list.
|
||||
*
|
||||
* \param dev CPUID-handling device to register.
|
||||
*/
|
||||
void register_cpuid_device(cxx::Ref_ptr<Cpuid_device> const &dev);
|
||||
|
||||
l4_addr_t load_binary(Vm_ram *ram, char const *binary,
|
||||
Ram_free_list *free_list);
|
||||
|
||||
void prepare_platform(Vdev::Device_lookup *devs);
|
||||
|
||||
void prepare_binary_run(Vdev::Device_lookup *devs, l4_addr_t entry,
|
||||
char const *binary, char const *cmd_line,
|
||||
l4_addr_t dt_boot_addr);
|
||||
|
||||
void run(cxx::Ref_ptr<Cpu_dev_array> const &cpus);
|
||||
|
||||
void suspend(l4_addr_t wake_vector)
|
||||
{
|
||||
Vdev::Pm_device_registry::suspend_devices();
|
||||
|
||||
if (!_pm->suspend())
|
||||
{
|
||||
warn().printf("System suspend not possible. Waking up immediately.\n");
|
||||
Vdev::Pm_device_registry::resume_devices();
|
||||
return;
|
||||
}
|
||||
|
||||
auto vcpu = _cpus->cpu(0)->vcpu();
|
||||
/* Go to sleep */
|
||||
vcpu.wait_for_ipc(l4_utcb(), L4_IPC_NEVER);
|
||||
|
||||
/* Back alive */
|
||||
_pm->resume();
|
||||
Vdev::Pm_device_registry::resume_devices();
|
||||
|
||||
vcpu.vm_state()->init_state();
|
||||
vcpu.vm_state()->setup_real_mode(wake_vector);
|
||||
info().printf("Waking CPU %u on EIP 0x%lx\n", 0, wake_vector);
|
||||
}
|
||||
|
||||
void sync_all_other_cores_off() const override;
|
||||
// returns the number of running cores
|
||||
unsigned cores_running() const;
|
||||
|
||||
void handle_entry(Vcpu_ptr vcpu);
|
||||
|
||||
Gic::Virt_lapic *lapic(Vcpu_ptr vcpu)
|
||||
{ return _apics->get(vcpu.get_vcpu_id()).get(); }
|
||||
|
||||
cxx::Ref_ptr<Gic::Lapic_array> apic_array() { return _apics; }
|
||||
cxx::Ref_ptr<Gic::Icr_handler> icr_handler() { return _icr_handler; }
|
||||
|
||||
int handle_cpuid(Vcpu_ptr vcpu);
|
||||
int handle_vm_call(l4_vcpu_regs_t *regs);
|
||||
|
||||
/**
|
||||
* Access IO port and load/store the value to RAX.
|
||||
*
|
||||
* In case the given IO port is not handled by any device on read, the value
|
||||
* of all ones is stored to RAX. Write errors are silently ignored.
|
||||
*
|
||||
* \param[in] port IO port to access.
|
||||
* \param[in] is_in True if this is the IN (read) access.
|
||||
* \param[in] op_width Width of the access (1/2/4 bytes).
|
||||
* \param[in,out] regs Register file. The value read/written is
|
||||
* stored/loaded into RAX.
|
||||
*
|
||||
* \retval Jump_instr Success, all errors are silently ignored.
|
||||
*/
|
||||
int handle_io_access(unsigned port, bool is_in, Mem_access::Width op_width,
|
||||
l4_vcpu_regs_t *regs);
|
||||
|
||||
/**
|
||||
* Access IO port (core implementation).
|
||||
*
|
||||
* Core implementation of accessing an IO port. The method looks up the
|
||||
* device that handles the IO port and does the access.
|
||||
*
|
||||
* \param[in] port IO port to access.
|
||||
* \param[in] is_in True if this is the IN (read) access.
|
||||
* \param[in] op_width Width of the access (1/2/4 bytes).
|
||||
* \param[in,out] value Value to read/write.
|
||||
*
|
||||
* \retval true The IO access was successful.
|
||||
* \retval false No device handles the given IO port.
|
||||
*/
|
||||
bool handle_io_access_ptr(unsigned port, bool is_in,
|
||||
Mem_access::Width op_width, l4_uint32_t *value);
|
||||
|
||||
void run_vm(Vcpu_ptr vcpu) L4_NORETURN;
|
||||
|
||||
Boot::Binary_type guest_type() const
|
||||
{ return _guest_t; }
|
||||
|
||||
private:
|
||||
enum : unsigned
|
||||
{
|
||||
Max_phys_addr_bits_mask = 0xff,
|
||||
};
|
||||
|
||||
struct Xsave_state_area
|
||||
{
|
||||
struct Size_off { l4_uint64_t size = 0, offset = 0; };
|
||||
|
||||
enum
|
||||
{
|
||||
// Some indices are valid in xcr0, some in xss.
|
||||
x87 = 0, // XCR0
|
||||
sse, // XCR0
|
||||
avx, // XCR0
|
||||
mpx1, // XCR0
|
||||
mpx2, // XCR0
|
||||
avx512_1, // XCR0
|
||||
avx512_2, // XCR0
|
||||
avx512_3, // XCR0
|
||||
pts, // XSS
|
||||
pkru, // XCR0,
|
||||
pasid, // XSS
|
||||
cetu, // XSS
|
||||
cets, // XSS
|
||||
hdc, // XSS
|
||||
uintr, // XSS
|
||||
lbr, // XSS
|
||||
hwp, // XSS
|
||||
tilecfg, // XCR0
|
||||
tiledata, // XCR0
|
||||
|
||||
Num_fields = 31,
|
||||
};
|
||||
|
||||
bool valid = false;
|
||||
// first two fields are legacy area, so always (size=0, offset=0);
|
||||
Size_off feat[Num_fields];
|
||||
};
|
||||
|
||||
void prepare_openbsd_binary_run(Vdev::Device_lookup *devs, l4_addr_t entry,
|
||||
char const *binary, char const *cmd_line,
|
||||
l4_addr_t dt_boot_addr);
|
||||
void prepare_linux_binary_run(Vdev::Device_lookup *devs, l4_addr_t entry,
|
||||
char const *binary, char const *cmd_line,
|
||||
l4_addr_t dt_boot_addr);
|
||||
|
||||
template<typename VMS>
|
||||
void run_vm_t(Vcpu_ptr vcpu, VMS *vm) L4_NORETURN;
|
||||
|
||||
template <typename VMS>
|
||||
bool event_injection_t(Vcpu_ptr vcpu, VMS *vm);
|
||||
|
||||
template <typename VMS>
|
||||
int handle_exit(Cpu_dev *cpu, VMS *vm);
|
||||
|
||||
/**
|
||||
* Handle IO access VM exit in case of a [REP] INS/OUTS.
|
||||
*
|
||||
* \tparam VMS VM state type.
|
||||
*
|
||||
* \param[in] port IO port to access.
|
||||
* \param[in] is_in True if this is the INS (read) access.
|
||||
* \param[in] op_width Width of the IO access (1/2/4 bytes).
|
||||
* \param[in] is_rep True if there is the REP prefix.
|
||||
* \param[in,out] regs Register file.
|
||||
* \param[in,out] vms VM state.
|
||||
*
|
||||
* \retval Jump_instr [REP] INS/OUTS instruction handled
|
||||
* successfully.
|
||||
* \retval Invalid_opcode Instruction decoding failure or unsupported
|
||||
* CPU mode.
|
||||
* \retval General_protection Segmentation fault.
|
||||
* \retval Stack_fault Segmentation fault in the SS segment.
|
||||
*/
|
||||
template <typename VMS>
|
||||
int handle_io_access_string(unsigned port, bool is_in,
|
||||
Mem_access::Width op_width, bool is_rep,
|
||||
l4_vcpu_regs_t *regs, VMS *vm);
|
||||
|
||||
unsigned get_max_physical_address_bit() const
|
||||
{
|
||||
l4_umword_t ax, bx, cx, dx;
|
||||
|
||||
// Check for highest extended CPUID leaf
|
||||
l4util_cpu_cpuid(0x80000000, &ax, &bx, &cx, &dx);
|
||||
|
||||
if (ax >= 0x80000008)
|
||||
l4util_cpu_cpuid(0x80000008, &ax, &bx, &cx, &dx);
|
||||
else
|
||||
{
|
||||
// Check for highest basic CPUID leaf
|
||||
l4util_cpu_cpuid(0x00, &ax, &bx, &cx, &dx);
|
||||
|
||||
if (ax >= 0x01)
|
||||
{
|
||||
l4util_cpu_cpuid(0x01, &ax, &bx, &cx, &dx);
|
||||
if (dx & (1UL << 6)) // PAE
|
||||
ax = 36;
|
||||
else
|
||||
ax = 32;
|
||||
}
|
||||
else
|
||||
ax = 32; // Minimum if leaf not supported
|
||||
}
|
||||
|
||||
return ax & Max_phys_addr_bits_mask;
|
||||
}
|
||||
|
||||
bool msr_devices_rwmsr(l4_vcpu_regs_t *regs, bool write, unsigned vcpu_no);
|
||||
/**
|
||||
* Attempt to handle the CPUID instruction by consecutively trying handlers
|
||||
* of the CPUID-handling devices registered in the _cpuid_devices list. The
|
||||
* list is traversed from the front to the back.
|
||||
*/
|
||||
bool handle_cpuid_devices(l4_vcpu_regs_t const *regs, unsigned *a,
|
||||
unsigned *b, unsigned *c, unsigned *d);
|
||||
|
||||
Event_recorder *recorder(unsigned num)
|
||||
{ return _event_recorders.recorder(num); }
|
||||
|
||||
/**
|
||||
* Perform actions necessary when changing from one Cpu_dev state to another.
|
||||
*
|
||||
* \tparam VMS SVM or VMX state type
|
||||
* \param current Current CPU state
|
||||
* \param new_state CPU state to transition into
|
||||
* \param lapic local APIC of the current vCPU
|
||||
* \param vm SVM or VMX state
|
||||
* \param cpu current CPU device
|
||||
*/
|
||||
template <typename VMS>
|
||||
bool state_transition_effects(Cpu_dev::Cpu_state const current,
|
||||
Cpu_dev::Cpu_state const new_state,
|
||||
Gic::Virt_lapic *lapic, VMS *vm, Cpu_dev *cpu);
|
||||
|
||||
/**
|
||||
* Perform actions of the state the Cpu_dev just transitioned into.
|
||||
*
|
||||
* \tparam VMS SVM or VMX state type
|
||||
* \param state New CPU state after state transition
|
||||
* \param halt_req true, if `state` is the halt state and events are pending
|
||||
* \param cpu current CPU device
|
||||
* \param vm SVM or VMX state
|
||||
*/
|
||||
template <typename VMS>
|
||||
bool new_state_action(Cpu_dev::Cpu_state state, bool halt_req, Cpu_dev *cpu,
|
||||
VMS *vm);
|
||||
|
||||
void iomap_dump(Dbg::Verbosity l)
|
||||
{
|
||||
Dbg d(Dbg::Dev, l, "vmmap");
|
||||
if (d.is_active())
|
||||
{
|
||||
d.printf("IOport map:\n");
|
||||
std::lock_guard<std::mutex> lock(_iomap_lock);
|
||||
for (auto const &r : _iomap)
|
||||
d.printf(" [%4lx:%4lx]: %s\n", r.first.start, r.first.end,
|
||||
r.second->dev_name());
|
||||
}
|
||||
}
|
||||
std::mutex _iomap_lock;
|
||||
Io_mem _iomap;
|
||||
|
||||
std::vector<cxx::Ref_ptr<Msr_device>> _msr_devices;
|
||||
std::vector<cxx::Ref_ptr<Cpuid_device>> _cpuid_devices;
|
||||
|
||||
// devices
|
||||
Guest_print_buffer _hypcall_print;
|
||||
cxx::Ref_ptr<Pt_walker> _ptw;
|
||||
cxx::Ref_ptr<Gic::Lapic_array> _apics;
|
||||
cxx::Ref_ptr<Gic::Icr_handler> _icr_handler;
|
||||
cxx::Ref_ptr<Gic::Lapic_access_handler> _lapic_access_handler;
|
||||
Boot::Binary_type _guest_t;
|
||||
cxx::Ref_ptr<Vmm::Cpu_dev_array> _cpus;
|
||||
Vmm::Event_recorder_array _event_recorders;
|
||||
Xsave_state_area _xsave_layout;
|
||||
l4_addr_t _guest_size;
|
||||
};
|
||||
|
||||
/**
|
||||
* Handler for MSR read/write to a specific vCPU with its corresponding
|
||||
* VM state.
|
||||
*/
|
||||
class Vcpu_msr_handler : public Msr_device
|
||||
{
|
||||
public:
|
||||
Vcpu_msr_handler(Cpu_dev_array *cpus,
|
||||
Vmm::Event_recorders *ev_rec)
|
||||
: _cpus(cpus), _ev_rec(ev_rec)
|
||||
{};
|
||||
|
||||
bool read_msr(unsigned msr, l4_uint64_t *value, unsigned vcpu_no) const override
|
||||
{
|
||||
return _cpus->vcpu(vcpu_no).vm_state()->read_msr(msr, value);
|
||||
}
|
||||
|
||||
bool write_msr(unsigned msr, l4_uint64_t value, unsigned vcpu_no) override
|
||||
{
|
||||
return _cpus->vcpu(vcpu_no)
|
||||
.vm_state()
|
||||
->write_msr(msr, value, _ev_rec->recorder(vcpu_no));
|
||||
}
|
||||
|
||||
private:
|
||||
Cpu_dev_array *_cpus;
|
||||
Event_recorders *_ev_rec;
|
||||
};
|
||||
|
||||
/**
|
||||
* Handler for MSR access to all MTRR registeres.
|
||||
*
|
||||
* MTRR are architectural registers and do not differ between AMD and Intel.
|
||||
* MTRRs are core specific and must be kept in sync.
|
||||
* Since all writes are ignored and reads just show the static state, we do
|
||||
* no core specific handling for these registers.
|
||||
*/
|
||||
class Mtrr_msr_handler : public Msr_device
|
||||
{
|
||||
public:
|
||||
Mtrr_msr_handler() = default;
|
||||
|
||||
bool read_msr(unsigned msr, l4_uint64_t *value, unsigned) const override
|
||||
{
|
||||
switch(msr)
|
||||
{
|
||||
case 0xfe: // IA32_MTRRCAP, RO
|
||||
*value = 1U << 10; // WriteCombining support bit.
|
||||
break;
|
||||
case 0x2ff: // IA32_MTRR_DEF_TYPE
|
||||
*value = 1U << 11; // E/MTRR enable bit
|
||||
break;
|
||||
|
||||
// MTRRphysMask/Base[0-9]; only present if IA32_MTRRCAP[7:0] > 0
|
||||
case 0x200: case 0x201: case 0x202: case 0x203: case 0x204: case 0x205:
|
||||
case 0x206: case 0x207: case 0x208: case 0x209: case 0x20a: case 0x20b:
|
||||
case 0x20c: case 0x20d: case 0x20e: case 0x20f: case 0x210: case 0x211:
|
||||
case 0x212: case 0x213:
|
||||
*value = 0;
|
||||
break;
|
||||
|
||||
case 0x250: // MTRRfix64K_0000
|
||||
[[fallthrough]];
|
||||
case 0x258: // MTRRfix16K
|
||||
[[fallthrough]];
|
||||
case 0x259: // MTRRfix16K
|
||||
[[fallthrough]];
|
||||
// MTRRfix_4K_*
|
||||
case 0x268: case 0x269: case 0x26a: case 0x26b: case 0x26c: case 0x26d:
|
||||
case 0x26e: case 0x26f:
|
||||
*value = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool write_msr(unsigned msr, l4_uint64_t, unsigned) override
|
||||
{
|
||||
switch(msr)
|
||||
{
|
||||
case 0x2ff: // MTRRdefType
|
||||
// We report no MTRRs in the MTRRdefType MSR. Thus we ignore writes here.
|
||||
// MTRRs might also be disabled temporarily by the guest.
|
||||
break;
|
||||
|
||||
// Ignore all writes to MTRR registers, we flagged all of them as unsupported
|
||||
// MTRRphysMask/Base[0-9]; only present if MTRRcap[7:0] > 0
|
||||
case 0x200: case 0x201: case 0x202: case 0x203: case 0x204: case 0x205:
|
||||
case 0x206: case 0x207: case 0x208: case 0x209: case 0x20a: case 0x20b:
|
||||
case 0x20c: case 0x20d: case 0x20e: case 0x20f: case 0x210: case 0x211:
|
||||
case 0x212: case 0x213:
|
||||
break;
|
||||
|
||||
case 0x250: // MTRRfix64K_0000
|
||||
[[fallthrough]];
|
||||
case 0x258: // MTRRfix16K
|
||||
[[fallthrough]];
|
||||
case 0x259: // MTRRfix16K
|
||||
[[fallthrough]];
|
||||
// MTRRfix_4K_*
|
||||
case 0x268: case 0x269: case 0x26a: case 0x26b: case 0x26c: case 0x26d:
|
||||
case 0x26e: case 0x26f:
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}; // class Mtrr_msr_handler
|
||||
|
||||
} // namespace Vmm
|
||||
@@ -1,59 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018, 2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#include <l4/util/port_io.h>
|
||||
#include "io_port_handler.h"
|
||||
|
||||
namespace Vdev {
|
||||
|
||||
void Io_port_handler::io_in(unsigned p, Mem_access::Width width, l4_uint32_t *value)
|
||||
{
|
||||
l4_uint16_t port = p + _base;
|
||||
|
||||
switch(width)
|
||||
{
|
||||
case Mem_access::Wd8:
|
||||
*value = l4util_in8(port);
|
||||
break;
|
||||
case Mem_access::Wd16:
|
||||
*value = l4util_in16(port);
|
||||
break;
|
||||
case Mem_access::Wd32:
|
||||
*value = l4util_in32(port);
|
||||
break;
|
||||
case Mem_access::Wd64:
|
||||
// architecture does not support 64bit port access
|
||||
*value = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Io_port_handler::io_out(unsigned p, Mem_access::Width width, l4_uint32_t value)
|
||||
{
|
||||
l4_uint16_t port = p + _base;
|
||||
|
||||
switch(width)
|
||||
{
|
||||
case Mem_access::Wd8:
|
||||
l4util_out8(value, port);
|
||||
break;
|
||||
|
||||
case Mem_access::Wd16:
|
||||
l4util_out16(value, port);
|
||||
break;
|
||||
|
||||
case Mem_access::Wd32:
|
||||
l4util_out32(value, port);
|
||||
break;
|
||||
|
||||
case Mem_access::Wd64:
|
||||
// architecture does not support 64bit port access
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vdev
|
||||
@@ -1,263 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#include "device_factory.h"
|
||||
#include "guest.h"
|
||||
#include "ioapic.h"
|
||||
|
||||
namespace Gic {
|
||||
|
||||
l4_uint64_t Io_apic::read_reg(unsigned reg) const
|
||||
{
|
||||
switch (reg)
|
||||
{
|
||||
case Id_reg:
|
||||
return _id;
|
||||
case Version_reg:
|
||||
return Io_apic_ver | ((Io_apic_num_pins - 1) << 16);
|
||||
case Arbitration_reg:
|
||||
return _id;
|
||||
default:
|
||||
{
|
||||
unsigned index = reg - Redir_tbl_offset_reg;
|
||||
unsigned irq = index / 2;
|
||||
if (irq >= Io_apic_num_pins)
|
||||
{
|
||||
info().printf("Unimplemented MMIO read from ioregsel "
|
||||
"register 0x%x\n", reg);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (index % 2)
|
||||
return _redirect_tbl[irq].load().upper_reg();
|
||||
else
|
||||
return _redirect_tbl[irq].load().lower_reg()
|
||||
& ~(1UL << Redir_tbl_entry::Nospec_level_set_bit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Io_apic::write_reg(unsigned reg, l4_uint64_t value)
|
||||
{
|
||||
if (reg == Id_reg)
|
||||
{
|
||||
_id = value;
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned index = reg - Redir_tbl_offset_reg;
|
||||
unsigned irq = index / 2;
|
||||
if (irq >= Io_apic_num_pins)
|
||||
{
|
||||
info().printf("Unimplemented MMIO write to ioregsel register 0x%x\n",
|
||||
reg);
|
||||
return;
|
||||
}
|
||||
|
||||
Redir_tbl_entry e = _redirect_tbl[irq];
|
||||
Redir_tbl_entry e_new;
|
||||
bool was_pending = e.is_pending();
|
||||
|
||||
do
|
||||
{
|
||||
e_new = e;
|
||||
|
||||
if (index % 2)
|
||||
e_new.upper_reg() = value;
|
||||
else
|
||||
{
|
||||
// ignore writes to RO fields
|
||||
value = (value & ~Redir_tbl_entry::Ro_mask)
|
||||
| e_new.delivery_status().get_unshifted()
|
||||
| e_new.remote_irr().get_unshifted();
|
||||
|
||||
// retain level_set bit, if entry is still masked.
|
||||
if ( value & (1 << Redir_tbl_entry::Masked_bit)
|
||||
&& e_new.is_pending())
|
||||
value |= (1 << Redir_tbl_entry::Nospec_level_set_bit);
|
||||
|
||||
e_new.lower_reg() = value;
|
||||
}
|
||||
}
|
||||
while (!_redirect_tbl[irq].compare_exchange_weak(e, e_new));
|
||||
|
||||
if (!e_new.masked())
|
||||
apic_bind_irq_src_handler(irq, e_new.vector(), e_new.dest_id(),
|
||||
e_new.dest_mode());
|
||||
|
||||
// in case of level-triggerd IRQs deliver IRQ since level is high.
|
||||
if (!e_new.masked() && was_pending)
|
||||
{
|
||||
trace()
|
||||
.printf("IRQ %i not masked anymore. send pending level irq\n",
|
||||
irq);
|
||||
set(irq);
|
||||
}
|
||||
// no need to clear the level_set bit, we didn't write it into the new
|
||||
// entry above.
|
||||
}
|
||||
|
||||
l4_uint64_t Io_apic::read(unsigned reg, char, unsigned cpu_id)
|
||||
{
|
||||
switch (reg)
|
||||
{
|
||||
case Ioregsel:
|
||||
return _ioregsel;
|
||||
case Iowin:
|
||||
return read_reg(_ioregsel.load());
|
||||
case Eoir:
|
||||
return 0UL;
|
||||
default:
|
||||
info().printf("Unimplemented MMIO read from register %d by CPU %d\n",
|
||||
reg, cpu_id);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
void Io_apic::write(unsigned reg, char, l4_uint64_t value, unsigned cpu_id)
|
||||
{
|
||||
switch (reg)
|
||||
{
|
||||
case Ioregsel:
|
||||
_ioregsel = value & 0xff;
|
||||
break;
|
||||
case Iowin:
|
||||
write_reg(_ioregsel.load(), value);
|
||||
break;
|
||||
case Eoir:
|
||||
clear_all_rirr(value & 0xff);
|
||||
break;
|
||||
default:
|
||||
info().printf("Unimplemented MMIO write to register %d by CPU %d\n",
|
||||
reg, cpu_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Io_apic::apic_bind_irq_src_handler(unsigned entry_num, unsigned vec,
|
||||
unsigned dest, unsigned dest_mod)
|
||||
{
|
||||
Ioapic_irq_src_handler *hdlr = &_apic_irq_src[entry_num];
|
||||
if (hdlr->vector != -1U)
|
||||
{
|
||||
// assumption: hdlr already bound
|
||||
if (hdlr->vector == vec)
|
||||
return;
|
||||
else
|
||||
apic_unbind_irq_src_handler(entry_num);
|
||||
}
|
||||
|
||||
hdlr->vector = vec;
|
||||
hdlr->dest = dest;
|
||||
hdlr->dest_mod = dest_mod;
|
||||
do_apic_bind_irq_src_handler(hdlr, true);
|
||||
};
|
||||
|
||||
void Io_apic::apic_unbind_irq_src_handler(unsigned entry_num)
|
||||
{
|
||||
Ioapic_irq_src_handler *hdlr = &_apic_irq_src[entry_num];
|
||||
if (hdlr->vector == -1U)
|
||||
// don't unbind handler if not bound
|
||||
return;
|
||||
|
||||
do_apic_bind_irq_src_handler(hdlr, false);
|
||||
|
||||
hdlr->vector = -1U;
|
||||
hdlr->dest = -1U;
|
||||
hdlr->dest_mod = 0U;
|
||||
}
|
||||
|
||||
void Io_apic::do_apic_bind_irq_src_handler(Ioapic_irq_src_handler *hdlr,
|
||||
bool bind)
|
||||
{
|
||||
Ioapic_irq_src_handler *new_hdlr = bind ? hdlr : nullptr;
|
||||
|
||||
if (hdlr->dest_mod == 0) // physical
|
||||
{
|
||||
auto apic = _lapics->get(hdlr->dest);
|
||||
if (apic)
|
||||
apic->bind_irq_src_handler(hdlr->vector, new_hdlr);
|
||||
}
|
||||
else
|
||||
_lapics->apics_bind_irq_src_handler_logical(hdlr->dest, hdlr->vector,
|
||||
new_hdlr);
|
||||
}
|
||||
|
||||
void Io_apic::set(unsigned irq)
|
||||
{
|
||||
// send to PIC. (TODO only if line is masked at IOAPIC?)
|
||||
if (irq < 16) // PIC can handle only the first 16 lines
|
||||
_pic->set(irq);
|
||||
|
||||
Redir_tbl_entry entry = redirect(irq);
|
||||
if (entry.masked())
|
||||
{
|
||||
if (entry.is_level_triggered())
|
||||
// We must save the state of the level triggered IRQ, since we get
|
||||
// the softIRQ only once and can't query the current level.
|
||||
// We don't notice, if the actual HW line changes to no-IRQ again,
|
||||
// but that's better than losing an IRQ here.
|
||||
set_level_set(irq);
|
||||
return;
|
||||
}
|
||||
|
||||
if (entry.remote_irr())
|
||||
{
|
||||
// ignore re-triggered level-triggered IRQs that are in-service at
|
||||
// local APIC
|
||||
return;
|
||||
}
|
||||
|
||||
Vdev::Msix::Data_register_format data(entry.vector());
|
||||
data.trigger_mode() = entry.trigger_mode();
|
||||
data.trigger_level() = !entry.pin_polarity(); // it's actually inverted.
|
||||
data.delivery_mode() = entry.delivery_mode();
|
||||
|
||||
Vdev::Msix::Interrupt_request_compat addr(0ULL);
|
||||
addr.dest_id() = entry.dest_id();
|
||||
addr.dest_mode() = entry.dest_mode();
|
||||
addr.fixed() = Vdev::Msix::Address_interrupt_prefix;
|
||||
|
||||
_distr->send(addr.raw, data.raw);
|
||||
|
||||
// update entry if necessary
|
||||
if (entry.is_level_triggered())
|
||||
set_remote_irr(irq);
|
||||
}
|
||||
|
||||
} // namespace Gic
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
struct F : Vdev::Factory
|
||||
{
|
||||
cxx::Ref_ptr<Vdev::Device> create(Vdev::Device_lookup *devs,
|
||||
Vdev::Dt_node const &node) override
|
||||
{
|
||||
auto msi_distr = devs->get_or_create_mc_dev(node);
|
||||
auto apic_array = devs->vmm()->apic_array();
|
||||
// Create the legacy PIC device here to forward legacy Interrupts.
|
||||
auto pic = Vdev::make_device<Vdev::Legacy_pic>(msi_distr);
|
||||
auto io_apic =
|
||||
Vdev::make_device<Gic::Io_apic>(msi_distr, apic_array, pic);
|
||||
devs->vmm()->add_mmio_device(io_apic->mmio_region(), io_apic);
|
||||
|
||||
// Register legacy PIC IO-ports
|
||||
devs->vmm()->add_io_device(Vmm::Io_region(0x20, 0x21,
|
||||
Vmm::Region_type::Virtual),
|
||||
pic->master());
|
||||
devs->vmm()->add_io_device(Vmm::Io_region(0xA0, 0xA1,
|
||||
Vmm::Region_type::Virtual),
|
||||
pic->slave());
|
||||
return io_apic;
|
||||
}
|
||||
};
|
||||
|
||||
static F f;
|
||||
static Vdev::Device_type d = {"intel,ioapic", nullptr, &f};
|
||||
|
||||
}
|
||||
@@ -1,294 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "mmio_device.h"
|
||||
#include "debug.h"
|
||||
#include "irq.h"
|
||||
#include "msi_controller.h"
|
||||
#include "msix.h"
|
||||
#include "msi_arch.h"
|
||||
#include "legacy_pic.h"
|
||||
#include "monitor/ioapic_cmd_handler.h"
|
||||
|
||||
namespace Gic {
|
||||
|
||||
/**
|
||||
* Virtual IOAPIC implementation of a 82093AA.
|
||||
*
|
||||
* The IOAPIC sends legacy IRQs onwards as MSI as programmed into the
|
||||
* redirection table by the guest.
|
||||
*/
|
||||
class Io_apic : public Ic,
|
||||
public Vmm::Mmio_device_t<Io_apic>,
|
||||
public Monitor::Ioapic_cmd_handler<Monitor::Enabled, Io_apic>
|
||||
{
|
||||
enum
|
||||
{
|
||||
Io_apic_id = 0,
|
||||
Io_apic_id_offset = 24,
|
||||
Io_apic_ver = 0x20,
|
||||
Io_apic_num_pins = 120,
|
||||
Io_apic_mem_size = 0x1000,
|
||||
Irq_cells = 1, // keep in sync with virt-pc.dts
|
||||
};
|
||||
|
||||
enum Ioapic_mmio_regs
|
||||
{
|
||||
Ioregsel = 0,
|
||||
Iowin = 0x10,
|
||||
Eoir = 0x40,
|
||||
};
|
||||
|
||||
enum Ioapic_regs
|
||||
{
|
||||
Id_reg = 0,
|
||||
Version_reg = 1,
|
||||
Arbitration_reg = 2,
|
||||
Redir_tbl_offset_reg = 0x10,
|
||||
};
|
||||
|
||||
struct Redir_tbl_entry
|
||||
{
|
||||
enum
|
||||
{
|
||||
Delivery_status_bit = 12,
|
||||
Remote_irr_bit = 14,
|
||||
Masked_bit = 16,
|
||||
Nospec_level_set_bit = 17,
|
||||
Ro_mask = 1U << Nospec_level_set_bit | 1U << Delivery_status_bit
|
||||
| 1U << Remote_irr_bit,
|
||||
};
|
||||
|
||||
Redir_tbl_entry() noexcept = default;
|
||||
// The IOAPIC spec mentions bit 48, which is specified as reserved, bit 16
|
||||
// is the mask bit and I think it's sane to start out with masked vectors.
|
||||
l4_uint64_t raw = 1ULL << 16;
|
||||
|
||||
bool is_level_triggered() const { return trigger_mode(); }
|
||||
bool is_pending() { return is_level_triggered() && level_set(); }
|
||||
|
||||
CXX_BITFIELD_MEMBER_RO(56, 63, dest_id, raw);
|
||||
// use reserved bit for internal state of level triggered input line.
|
||||
// only relevant, if line is masked
|
||||
CXX_BITFIELD_MEMBER(17, 17, level_set, raw);
|
||||
CXX_BITFIELD_MEMBER_RO(16, 16, masked, raw);
|
||||
CXX_BITFIELD_MEMBER_RO(15, 15, trigger_mode, raw);
|
||||
CXX_BITFIELD_MEMBER(14, 14, remote_irr, raw);
|
||||
CXX_BITFIELD_MEMBER_RO(13, 13, pin_polarity, raw);
|
||||
CXX_BITFIELD_MEMBER_RO(12, 12, delivery_status, raw);
|
||||
CXX_BITFIELD_MEMBER_RO(11, 11, dest_mode, raw);
|
||||
CXX_BITFIELD_MEMBER_RO(8, 10, delivery_mode, raw);
|
||||
CXX_BITFIELD_MEMBER_RO(0, 7, vector, raw);
|
||||
|
||||
// Redirection Table entries can only be written as DWORD.
|
||||
CXX_BITFIELD_MEMBER(0, 31, lower_reg, raw);
|
||||
CXX_BITFIELD_MEMBER(32, 63, upper_reg, raw);
|
||||
};
|
||||
|
||||
struct Ioapic_irq_src_handler : public Irq_src_handler
|
||||
{
|
||||
void eoi() override
|
||||
{
|
||||
assert(ioapic != nullptr);
|
||||
|
||||
// clear state in redirection table entry
|
||||
ioapic->entry_eoi(irq_num);
|
||||
|
||||
{
|
||||
// MSI generated from the IRQ can have multiple target cores. If this
|
||||
// IRQ/MSI is level triggered, multiple cores would send an EOI.
|
||||
// Would be insane, but who knows.
|
||||
std::lock_guard<std::mutex> lock(_mtx);
|
||||
|
||||
// get IRQ src handler of input IRQ and forward EOI signal
|
||||
Irq_src_handler *hdlr = ioapic->get_irq_src_handler(irq_num);
|
||||
if (hdlr)
|
||||
hdlr->eoi();
|
||||
}
|
||||
}
|
||||
|
||||
unsigned irq_num = 0;
|
||||
Io_apic *ioapic = nullptr;
|
||||
unsigned vector = -1U;
|
||||
unsigned dest = -1U;
|
||||
unsigned dest_mod = 0; // default: physical
|
||||
private:
|
||||
std::mutex _mtx;
|
||||
};
|
||||
|
||||
public:
|
||||
enum
|
||||
{
|
||||
Mmio_addr = 0xfec00000,
|
||||
};
|
||||
|
||||
Io_apic(cxx::Ref_ptr<Gic::Msix_controller> distr,
|
||||
cxx::Ref_ptr<Gic::Lapic_array> apic_array,
|
||||
cxx::Ref_ptr<Vdev::Legacy_pic> pic)
|
||||
: _distr(distr), _lapics(apic_array),
|
||||
_id(Io_apic_id << Io_apic_id_offset), _ioregsel(0), _iowin(0),
|
||||
_pic(pic)
|
||||
{
|
||||
// initialize IRQ src handler for LAPIC communication
|
||||
for (unsigned i = 0; i < Io_apic_num_pins; ++i)
|
||||
{
|
||||
_apic_irq_src[i].irq_num = i;
|
||||
_apic_irq_src[i].ioapic = this;
|
||||
}
|
||||
}
|
||||
|
||||
// public only for monitor access
|
||||
l4_uint64_t read_reg(unsigned reg) const;
|
||||
|
||||
// Mmio device interface
|
||||
l4_uint64_t read(unsigned reg, char, unsigned cpu_id);
|
||||
void write(unsigned reg, char, l4_uint64_t value, unsigned cpu_id);
|
||||
|
||||
// IC interface
|
||||
void set(unsigned irq) override;
|
||||
void clear(unsigned) override {}
|
||||
|
||||
/**
|
||||
* Bind the IRQ src handler of a level-triggered legacy interrupt.
|
||||
*
|
||||
* This handler is signaled, if the IOAPIC receives an EOI signal from the
|
||||
* local APIC for the corresponding interrupt line.
|
||||
*/
|
||||
void bind_irq_src_handler(unsigned irq, Irq_src_handler *handler) override
|
||||
{
|
||||
if (irq >= Io_apic_num_pins)
|
||||
{
|
||||
warn().printf("Try to bind out-of-range IRQ %u. Ignoring. \n", irq);
|
||||
return;
|
||||
}
|
||||
if (handler && _sources[irq])
|
||||
L4Re::throw_error(-L4_EEXIST, "Bind IRQ src handler at IOAPIC." );
|
||||
_sources[irq] = handler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get IRQ src handler bound for the given legacy interrupt line or
|
||||
* `nullptr` if no handler is bound.
|
||||
*/
|
||||
Irq_src_handler *get_irq_src_handler(unsigned irq) const override
|
||||
{
|
||||
if (irq >= Io_apic_num_pins)
|
||||
{
|
||||
warn().printf("Try to get out-of-range IRQ %u. Ignoring. \n", irq);
|
||||
return nullptr;
|
||||
}
|
||||
return _sources[irq];
|
||||
}
|
||||
|
||||
int dt_get_interrupt(fdt32_t const *prop, int propsz,
|
||||
int *read) const override
|
||||
{
|
||||
if (propsz < Irq_cells)
|
||||
return -L4_ERANGE;
|
||||
|
||||
if (read)
|
||||
*read = Irq_cells;
|
||||
|
||||
return fdt32_to_cpu(prop[0]);
|
||||
}
|
||||
|
||||
Vmm::Region mmio_region() const
|
||||
{
|
||||
return Vmm::Region::ss(Vmm::Guest_addr(Mmio_addr), Io_apic_mem_size,
|
||||
Vmm::Region_type::Virtual);
|
||||
}
|
||||
|
||||
char const *dev_name() const override { return "Ioapic"; }
|
||||
|
||||
private:
|
||||
static Dbg trace() { return Dbg(Dbg::Irq, Dbg::Trace, "IOAPIC"); }
|
||||
static Dbg info() { return Dbg(Dbg::Irq, Dbg::Info, "IOAPIC"); }
|
||||
static Dbg warn() { return Dbg(Dbg::Irq, Dbg::Warn, "IOAPIC"); }
|
||||
|
||||
void write_reg(unsigned reg, l4_uint64_t value);
|
||||
|
||||
/// Return the redirection table entry for given `irq`.
|
||||
Redir_tbl_entry redirect(unsigned irq) const
|
||||
{
|
||||
assert(irq < Io_apic_num_pins);
|
||||
return _redirect_tbl[irq];
|
||||
}
|
||||
|
||||
void entry_eoi(unsigned irq)
|
||||
{
|
||||
assert(irq < Io_apic_num_pins);
|
||||
|
||||
// clear remote_irr and for level triggered the level_set bit.
|
||||
Redir_tbl_entry e = _redirect_tbl[irq];
|
||||
Redir_tbl_entry e_new;
|
||||
|
||||
do
|
||||
{
|
||||
e_new = e;
|
||||
e_new.remote_irr() = 0;
|
||||
e_new.level_set() = 0;
|
||||
}
|
||||
while (!_redirect_tbl[irq].compare_exchange_weak(e, e_new));
|
||||
}
|
||||
|
||||
void set_level_set(unsigned irq)
|
||||
{
|
||||
assert(irq < Io_apic_num_pins);
|
||||
|
||||
Redir_tbl_entry e = _redirect_tbl[irq];
|
||||
Redir_tbl_entry e_new;
|
||||
|
||||
do
|
||||
{
|
||||
e_new = e;
|
||||
e_new.level_set() = 1;
|
||||
}
|
||||
while (!_redirect_tbl[irq].compare_exchange_weak(e, e_new));
|
||||
}
|
||||
|
||||
void set_remote_irr(unsigned irq)
|
||||
{
|
||||
assert(irq < Io_apic_num_pins);
|
||||
|
||||
Redir_tbl_entry e = _redirect_tbl[irq];
|
||||
Redir_tbl_entry e_new;
|
||||
|
||||
do
|
||||
{
|
||||
e_new = e;
|
||||
e_new.remote_irr() = 1;
|
||||
}
|
||||
while (!_redirect_tbl[irq].compare_exchange_weak(e, e_new));
|
||||
}
|
||||
|
||||
void clear_all_rirr(l4_uint8_t vec)
|
||||
{
|
||||
for (unsigned i = 0; i < Io_apic_num_pins; ++i)
|
||||
{
|
||||
if (_redirect_tbl[i].load().vector() == vec)
|
||||
entry_eoi(i);
|
||||
}
|
||||
}
|
||||
|
||||
void apic_bind_irq_src_handler(unsigned entry_num, unsigned vec,
|
||||
unsigned dest, unsigned dest_mod);
|
||||
void apic_unbind_irq_src_handler(unsigned entry_num);
|
||||
void do_apic_bind_irq_src_handler(Ioapic_irq_src_handler *hdlr, bool bind);
|
||||
|
||||
cxx::Ref_ptr<Gic::Msix_controller> _distr;
|
||||
cxx::Ref_ptr<Lapic_array> _lapics;
|
||||
std::atomic<l4_uint32_t> _id;
|
||||
std::atomic<l4_uint32_t> _ioregsel;
|
||||
std::atomic<l4_uint32_t> _iowin;
|
||||
std::atomic<Redir_tbl_entry> _redirect_tbl[Io_apic_num_pins];
|
||||
Gic::Irq_src_handler *_sources[Io_apic_num_pins] = {};
|
||||
cxx::Ref_ptr<Vdev::Legacy_pic> _pic;
|
||||
Ioapic_irq_src_handler _apic_irq_src[Io_apic_num_pins];
|
||||
}; // class Io_apic
|
||||
|
||||
} // namespace Gic
|
||||
@@ -1,119 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021, 2024 Kernkonzept GmbH.
|
||||
* Author(s): Steffen Liebergeld <steffen.liebergeld@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
/**
|
||||
* This implements a simple debug channel, similar to the ones implemented in
|
||||
* Qemu and Bochs.
|
||||
*
|
||||
* This can be used for low-level debugging of guests.
|
||||
*
|
||||
* Example DT:
|
||||
*
|
||||
* \code{.dtb}
|
||||
* isa {
|
||||
* device_type = "eisa";
|
||||
* #address-cells = <2>;
|
||||
* #size-cells = <1>;
|
||||
* // The first cell of a child nodes reg property encodes the
|
||||
* // following information. See the ISA bus device-tree binding [2]
|
||||
* // for more details:
|
||||
* //
|
||||
* // [2] 11-bit aliased (IOPORT only)
|
||||
* // [1] 10-bit aliased (IOPORT only)
|
||||
* // [0] 0=MMIO32, 1=IOPORT
|
||||
* //
|
||||
* // The standard ranges property defines the translation of child
|
||||
* // reg address entries into the parent address space. Effectively
|
||||
* // removes the upper word. For the purpose of the ISA translation,
|
||||
* // only bit [0] is considered of the first word.
|
||||
* ranges = <0x0 0x0 0x0 0x0 0xffffffff
|
||||
* 0x1 0x0 0x0 0x0 0x1000>;
|
||||
|
||||
* isa_debugport {
|
||||
* compatible = "l4vmm,isa-debugport";
|
||||
* reg = <0x1 0x402 0x1>;
|
||||
* l4vmm,vcon_cap = "debug";
|
||||
* };
|
||||
* };
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
#include "device_factory.h"
|
||||
#include "guest.h"
|
||||
#include "device.h"
|
||||
#include "io_device.h"
|
||||
|
||||
namespace Vdev {
|
||||
|
||||
class Isa_debugport : public Vmm::Io_device, public Vdev::Device
|
||||
{
|
||||
enum { Bochs_debug_port_magic = 0xe9 };
|
||||
|
||||
public:
|
||||
explicit Isa_debugport(L4::Cap<L4::Vcon> con)
|
||||
: _con(con)
|
||||
{
|
||||
l4_vcon_attr_t attr;
|
||||
if (l4_error(con->get_attr(&attr)) != L4_EOK)
|
||||
{
|
||||
Dbg(Dbg::Dev, Dbg::Warn, "cons")
|
||||
.printf("WARNING: Cannot set console attributes. "
|
||||
"Output may not work as expected.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
attr.set_raw();
|
||||
L4Re::chksys(con->set_attr(&attr), "console set_attr");
|
||||
}
|
||||
|
||||
char const *dev_name() const override
|
||||
{ return "ISA Debugport"; }
|
||||
|
||||
private:
|
||||
/* IO write from the guest to device */
|
||||
void io_out(unsigned, Vmm::Mem_access::Width, l4_uint32_t value) override
|
||||
{
|
||||
char s = value & 0xff;
|
||||
_con->write(&s, 1);
|
||||
}
|
||||
|
||||
/* IO read from the guest */
|
||||
void io_in(unsigned, Vmm::Mem_access::Width, l4_uint32_t *value) override
|
||||
{
|
||||
*value = Bochs_debug_port_magic;
|
||||
}
|
||||
|
||||
L4::Cap<L4::Vcon> _con;
|
||||
};
|
||||
|
||||
} // namespace Vdev
|
||||
|
||||
namespace {
|
||||
|
||||
struct F : Vdev::Factory
|
||||
{
|
||||
cxx::Ref_ptr<Vdev::Device> create(Vdev::Device_lookup *devs,
|
||||
Vdev::Dt_node const &node) override
|
||||
{
|
||||
L4::Cap<L4::Vcon> cap = Vdev::get_cap<L4::Vcon>(node, "l4vmm,vcon_cap");
|
||||
|
||||
// Do not default to anything. If the cap is not there, there is no
|
||||
// debugport.
|
||||
if (!cap)
|
||||
return nullptr;
|
||||
|
||||
auto dev = Vdev::make_device<Vdev::Isa_debugport>(cap);
|
||||
devs->vmm()->register_io_device(dev, Vmm::Region_type::Virtual, node);
|
||||
|
||||
return dev;
|
||||
}
|
||||
}; // struct F
|
||||
|
||||
static F f;
|
||||
static Vdev::Device_type t = {"l4vmm,isa-debugport", nullptr, &f};
|
||||
|
||||
} // namespace
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020, 2022, 2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#include "device_factory.h"
|
||||
#include "guest.h"
|
||||
#include "kvm_clock.h"
|
||||
#include "mem_types.h"
|
||||
|
||||
namespace {
|
||||
|
||||
struct F : Vdev::Factory
|
||||
{
|
||||
cxx::Ref_ptr<Vdev::Device> create(Vdev::Device_lookup *devs,
|
||||
Vdev::Dt_node const &) override
|
||||
{
|
||||
auto dev = Vdev::make_device<Vdev::Kvm_clock_ctrl>(devs->ram(),
|
||||
devs->vmm());
|
||||
|
||||
devs->vmm()->register_msr_device(dev);
|
||||
devs->vmm()->register_cpuid_device(dev);
|
||||
|
||||
return dev;
|
||||
}
|
||||
}; // struct F
|
||||
|
||||
static F f;
|
||||
static Vdev::Device_type t = {"kvm-clock", nullptr, &f};
|
||||
|
||||
} // namespace
|
||||
@@ -1,235 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020, 2022-2024 Kernkonzept GmbH.
|
||||
* Author(s): Sarah Hoffmann <sarah.hoffmann@kernkonzept.com>
|
||||
* Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
* Benjamin Lamowski <benjamin.lamowski@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <l4/sys/types.h>
|
||||
#include <l4/util/rdtsc.h>
|
||||
#include <l4/cxx/ref_ptr>
|
||||
#include <vector>
|
||||
|
||||
#include "debug.h"
|
||||
#include "mem_types.h"
|
||||
#include "msr_device.h"
|
||||
#include "cpuid_device.h"
|
||||
#include "vm_ram.h"
|
||||
#include "ds_mmio_mapper.h"
|
||||
#include "cpu_dev.h"
|
||||
#include "guest.h"
|
||||
|
||||
namespace Vdev {
|
||||
|
||||
struct Vcpu_time_info
|
||||
{
|
||||
l4_uint32_t version;
|
||||
l4_uint32_t pad0;
|
||||
l4_uint64_t tsc_timestamp;
|
||||
l4_uint64_t system_time;
|
||||
l4_uint32_t tsc_to_system_mul;
|
||||
l4_int8_t tsc_shift;
|
||||
// bit 0 is set, if all Vcpu_time_info instances show the same TSC value.
|
||||
l4_uint8_t flags;
|
||||
l4_uint8_t pad[2];
|
||||
};
|
||||
static_assert(sizeof(Vcpu_time_info) == 32,
|
||||
"Vcpu_time_info structure is compact.");
|
||||
|
||||
class Kvm_clock : public Vdev::Timer, public Device
|
||||
{
|
||||
|
||||
public:
|
||||
Kvm_clock(Vcpu_time_info *vti, bool enable)
|
||||
{
|
||||
configure(vti, enable);
|
||||
}
|
||||
|
||||
void configure(Vcpu_time_info *vti, bool enable)
|
||||
{
|
||||
_vcpu_time_enable = enable;
|
||||
vti->version = 0;
|
||||
vti->tsc_to_system_mul = l4_scaler_tsc_to_ns;
|
||||
vti->tsc_shift = 5;
|
||||
vti->flags = 0;
|
||||
_vcpu_time = vti;
|
||||
}
|
||||
|
||||
void tick()
|
||||
{
|
||||
auto now = l4_rdtsc();
|
||||
|
||||
cxx::write_now(&(_vcpu_time->version), _vcpu_time->version + 1);
|
||||
_vcpu_time->tsc_timestamp = now;
|
||||
_vcpu_time->system_time = l4_tsc_to_ns(now);
|
||||
cxx::write_now(&(_vcpu_time->version), _vcpu_time->version + 1);
|
||||
}
|
||||
|
||||
private:
|
||||
Vcpu_time_info *_vcpu_time;
|
||||
bool _vcpu_time_enable;
|
||||
std::mutex _mutex;
|
||||
};
|
||||
|
||||
class Kvm_clock_ctrl : public Vmm::Msr_device,
|
||||
public Vmm::Cpuid_device,
|
||||
public Device
|
||||
{
|
||||
struct Wall_clock
|
||||
{
|
||||
l4_uint32_t version;
|
||||
l4_uint32_t sec;
|
||||
l4_uint32_t nsec;
|
||||
};
|
||||
static_assert(sizeof(Wall_clock) == 3 * 4,
|
||||
"KVM Wall_clock struct is compact.");
|
||||
|
||||
enum Kvm_msrs : unsigned
|
||||
{
|
||||
Msr_kvm_wall_clock_new = 0x4b564d00,
|
||||
Msr_kvm_system_time_new = 0x4b564d01,
|
||||
Msr_kvm_async_pf_en = 0x4b564d02,
|
||||
Msr_kvm_steal_time = 0x4b564d03,
|
||||
Msr_kvm_eoi_en = 0x4b564d04,
|
||||
};
|
||||
|
||||
public:
|
||||
Kvm_clock_ctrl(cxx::Ref_ptr<Vmm::Vm_ram> const &memmap,
|
||||
Vmm::Guest *vmm)
|
||||
: _boottime(l4_rdtsc()),
|
||||
_memmap(memmap),
|
||||
_vmm(vmm)
|
||||
{}
|
||||
|
||||
bool read_msr(unsigned, l4_uint64_t *, unsigned) const override
|
||||
{
|
||||
// Nothing to read, above structures are memory mapped in the guest.
|
||||
return false;
|
||||
}
|
||||
|
||||
bool write_msr(unsigned msr, l4_uint64_t addr, unsigned core_no) override
|
||||
{
|
||||
switch (msr)
|
||||
{
|
||||
case Msr_kvm_wall_clock_new:
|
||||
{
|
||||
trace().printf("Msr_kvm_wall_clock_new with addr 0x%llx\n", addr);
|
||||
|
||||
// address must be 4-byte aligned
|
||||
auto gaddr = Vmm::Guest_addr(addr & (-1UL << 2));
|
||||
set_wall_clock(static_cast<Wall_clock *>(host_addr(gaddr)));
|
||||
break;
|
||||
}
|
||||
|
||||
case Msr_kvm_system_time_new:
|
||||
{
|
||||
trace().printf("Msr_kvm_system_time_new to addr 0x%llx\n", addr);
|
||||
|
||||
bool enable = addr & 1;
|
||||
|
||||
// address must be 4-byte aligned
|
||||
auto gaddr = Vmm::Guest_addr(addr & (-1UL << 2));
|
||||
setup_vcpu_time(static_cast<Vcpu_time_info *>(host_addr(gaddr)),
|
||||
enable, core_no);
|
||||
break;
|
||||
}
|
||||
|
||||
// NOTE: below functions are disabled via CPUID leaf 0x4000'0001 and
|
||||
// shouldn't be invoked by a guest.
|
||||
case Msr_kvm_async_pf_en:
|
||||
warn().printf("KVM async pf not implemented.\n");
|
||||
break;
|
||||
case Msr_kvm_steal_time:
|
||||
warn().printf("KVM steal time not implemented.\n");
|
||||
break;
|
||||
case Msr_kvm_eoi_en:
|
||||
warn().printf("KVM EIO not implemented.\n");
|
||||
break;
|
||||
// If the guest Linux is compiled with CONFIG_KVM and no-kvmclock is
|
||||
// set on the command line, Linux will try to write to these MSRs on
|
||||
// shutdown. We ignore that.
|
||||
case 0x11:
|
||||
case 0x12:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool handle_cpuid(l4_vcpu_regs_t const *regs, unsigned *a, unsigned *b,
|
||||
unsigned *c, unsigned *d) const override
|
||||
{
|
||||
enum Cpuid_kvm_constants
|
||||
{
|
||||
Kvm_feature_clocksource = 1UL, // clock at msr 0x11 & 0x12
|
||||
Kvm_feature_clocksource2 = 1UL << 3, // clock at msrs 0x4b564d00 & 01;
|
||||
// host communicates synchronized KVM clocks via Vcpu_time_info.flags[0]
|
||||
Kvm_feature_clocksource_stable_bit = 1UL << 24,
|
||||
};
|
||||
|
||||
switch (regs->ax)
|
||||
{
|
||||
case 0x40000000:
|
||||
*a = 0x40000001; // max CPUID leaf in the 0x4000'0000 range.
|
||||
*b = 0x4b4d564b; // "KVMK"
|
||||
*c = 0x564b4d56; // "VMKV"
|
||||
*d = 0x4d; // "M\0\0\0"
|
||||
return true;
|
||||
case 0x40000001:
|
||||
*a = Kvm_feature_clocksource2 | Kvm_feature_clocksource_stable_bit;
|
||||
*d = 0;
|
||||
*b = *c = 0;
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void set_wall_clock(Wall_clock *cs) const
|
||||
{
|
||||
trace().printf("Set wall clock address: %p \n", cs);
|
||||
|
||||
cxx::write_now(&(cs->version), 1U);
|
||||
l4_tsc_to_s_and_ns(_boottime, &(cs->sec), &(cs->nsec));
|
||||
cxx::write_now(&(cs->version), 0U);
|
||||
}
|
||||
|
||||
void setup_vcpu_time(Vcpu_time_info *vti, bool enable, unsigned core_no)
|
||||
{
|
||||
trace().printf("set system time address: %p: enable: %i, scaler 0x%x\n",
|
||||
vti, enable, l4_scaler_tsc_to_ns);
|
||||
|
||||
if (core_no >= _clocks.size())
|
||||
_clocks.resize(core_no + 1);
|
||||
|
||||
if (_clocks[core_no])
|
||||
_clocks[core_no]->configure(vti, enable);
|
||||
else
|
||||
{
|
||||
auto clock_dev = Vdev::make_device<Kvm_clock>(vti, enable);
|
||||
_clocks[core_no] = clock_dev;
|
||||
clock_dev->tick();
|
||||
}
|
||||
}
|
||||
|
||||
void *host_addr(Vmm::Guest_addr addr) const
|
||||
{
|
||||
return _memmap->guest2host<void *>(addr);
|
||||
}
|
||||
|
||||
static Dbg trace() { return Dbg(Dbg::Dev, Dbg::Trace, "KVMclock"); }
|
||||
static Dbg warn() { return Dbg(Dbg::Dev, Dbg::Warn, "KVMclock"); }
|
||||
|
||||
l4_cpu_time_t _boottime;
|
||||
std::vector<cxx::Ref_ptr<Kvm_clock>> _clocks;
|
||||
cxx::Ref_ptr<Vmm::Vm_ram> _memmap;
|
||||
Vmm::Guest *_vmm;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
@@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2022, 2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#include "legacy_pic.h"
|
||||
#include "device_factory.h"
|
||||
#include "guest.h"
|
||||
|
||||
namespace
|
||||
{
|
||||
struct F : Vdev::Factory
|
||||
{
|
||||
cxx::Ref_ptr<Vdev::Device> create(Vdev::Device_lookup *devs,
|
||||
Vdev::Dt_node const &node) override
|
||||
{
|
||||
auto msi_distr = devs->get_or_create_mc_dev(node);
|
||||
Dbg().printf("PIC found MSI ctrl %p\n", msi_distr.get());
|
||||
|
||||
auto dev = Vdev::make_device<Vdev::Legacy_pic>(msi_distr);
|
||||
|
||||
auto *vmm = devs->vmm();
|
||||
vmm->add_io_device(Vmm::Io_region(0x20, 0x21, Vmm::Region_type::Virtual),
|
||||
dev->master());
|
||||
vmm->add_io_device(Vmm::Io_region(0xA0, 0xA1, Vmm::Region_type::Virtual),
|
||||
dev->slave());
|
||||
|
||||
return dev;
|
||||
}
|
||||
}; // struct F
|
||||
|
||||
static F f;
|
||||
static Vdev::Device_type t = {"virt-i8259-pic", nullptr, &f};
|
||||
} // namespace
|
||||
@@ -1,488 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "io_device.h"
|
||||
#include "device.h"
|
||||
#include "irq.h"
|
||||
#include "msi_arch.h"
|
||||
#include "msi_controller.h"
|
||||
|
||||
#include <l4/cxx/bitfield>
|
||||
|
||||
namespace Vdev {
|
||||
|
||||
/**
|
||||
* Emulation of a programmable interrupt controller.
|
||||
*
|
||||
* Example of a device tree entry:
|
||||
*
|
||||
* \code{.dtb}
|
||||
* PIC: pic {
|
||||
* compatible = "virt-pic";
|
||||
* reg = <0x0 0x0 0x0 0x0>;
|
||||
* msi-parent = <&msi_ctrl>;
|
||||
* interrupt-controller;
|
||||
* #interrupt-cells = <1>;
|
||||
* };
|
||||
* \endcode
|
||||
*
|
||||
* The PIC emulation provides the guest with the ability to assign the legacy
|
||||
* interrupts of the master and slave PIC to a software defined range of two
|
||||
* times eight consecutive interrupt numbers.
|
||||
* The emulation reacts to IO-ports 0x20/0x21 and 0xA0/0xA1 as Command/Data
|
||||
* port combination for the master and slave chips.
|
||||
*/
|
||||
class Legacy_pic : public Gic::Ic
|
||||
{
|
||||
enum Config
|
||||
{
|
||||
Num_irqs = 16 // Number of IRQs supported by PIC
|
||||
};
|
||||
|
||||
enum Ports
|
||||
{
|
||||
Cmd_port = 0,
|
||||
Data_port = 1,
|
||||
};
|
||||
|
||||
enum class Init_words
|
||||
{
|
||||
ICW1 = 0,
|
||||
ICW2,
|
||||
ICW3,
|
||||
ICW4,
|
||||
};
|
||||
|
||||
/**
|
||||
* Single PIC-chip emulation handling IO-port access and interrupt offsets.
|
||||
*/
|
||||
class Chip : public Vmm::Io_device
|
||||
{
|
||||
// Register set
|
||||
// We only support ICW1 == 0x11. (ICW4 | INIT).
|
||||
struct ICW1
|
||||
{
|
||||
l4_uint8_t raw;
|
||||
|
||||
CXX_BITFIELD_MEMBER(0, 0, icw4, raw);
|
||||
CXX_BITFIELD_MEMBER(1, 1, single, raw); // only support 0
|
||||
CXX_BITFIELD_MEMBER(2, 2, address_interval, raw); // only support 0
|
||||
CXX_BITFIELD_MEMBER(3, 3, level_triggered_mode, raw); // ignore
|
||||
CXX_BITFIELD_MEMBER(4, 4, init, raw);
|
||||
};
|
||||
|
||||
struct ICW4
|
||||
{
|
||||
l4_uint8_t raw;
|
||||
|
||||
CXX_BITFIELD_MEMBER(0, 0, upm, raw); // 8086 mode, only one supported
|
||||
/**
|
||||
* Note from 8259a manual:
|
||||
* 8259As with a copyright date of 1985 or later will operate in the AEOI
|
||||
* mode as a master or a slave.
|
||||
* In AEOI mode interrupts are acked on delivery.
|
||||
*/
|
||||
CXX_BITFIELD_MEMBER(1, 1, aeoi, raw);
|
||||
CXX_BITFIELD_MEMBER(2, 2, buffer_master, raw);
|
||||
CXX_BITFIELD_MEMBER(3, 3, buffer_mode, raw);
|
||||
CXX_BITFIELD_MEMBER(3, 3, sfnm, raw); // One iff special fully nested mode.
|
||||
};
|
||||
|
||||
struct OCW2
|
||||
{
|
||||
l4_uint8_t raw;
|
||||
|
||||
CXX_BITFIELD_MEMBER(0, 2, irq, raw);
|
||||
CXX_BITFIELD_MEMBER(5, 5, eoi, raw);
|
||||
CXX_BITFIELD_MEMBER(6, 6, sl, raw);
|
||||
};
|
||||
|
||||
struct OCW3
|
||||
{
|
||||
l4_uint8_t raw;
|
||||
|
||||
CXX_BITFIELD_MEMBER(0, 0, ris, raw);
|
||||
CXX_BITFIELD_MEMBER(1, 1, rr, raw);
|
||||
CXX_BITFIELD_MEMBER(2, 2, poll, raw);
|
||||
CXX_BITFIELD_MEMBER(5, 5, smm, raw);
|
||||
CXX_BITFIELD_MEMBER(6, 6, esmm, raw);
|
||||
};
|
||||
|
||||
// Selected IRR/ISR register by OCW3 for even port reads
|
||||
bool _read_isr = false;
|
||||
// Interrupt service register. Stores the Irq currently being serviced.
|
||||
l4_uint8_t _isr = 0;
|
||||
// Interrupt request register. Stores incoming Irq requesting to be
|
||||
// serviced.
|
||||
l4_uint8_t _irr = 0;
|
||||
// Interrupt mask register. Masks out interrupts.
|
||||
l4_uint8_t _imr = 0;
|
||||
|
||||
// Needed to keep track of initialization sequence
|
||||
Init_words _expect = Init_words::ICW1;
|
||||
|
||||
// Offset of interrupts
|
||||
l4_uint8_t _offset = 0;
|
||||
l4_uint8_t _slave_at = 0;
|
||||
|
||||
struct ICW1 _icw1 {0}; // store to keep track of single mode and icw4
|
||||
struct ICW4 _icw4 {0}; // store to keep track of aeoi mode
|
||||
|
||||
bool _is_master;
|
||||
Legacy_pic *_pic;
|
||||
|
||||
public:
|
||||
Chip(bool master, Legacy_pic *pic) : _is_master(master), _pic(pic)
|
||||
{
|
||||
_icw4.aeoi() = 1;
|
||||
}
|
||||
|
||||
char const *dev_name() const override
|
||||
{ return "PIC"; }
|
||||
|
||||
/// Check interrupt mask/in-service and return the IRQ number with offset.
|
||||
int trigger(unsigned irq)
|
||||
{
|
||||
if (_offset == 0)
|
||||
return -1;
|
||||
|
||||
unsigned irq_bit = 1U << irq;
|
||||
|
||||
if (_isr || _imr & irq_bit)
|
||||
{
|
||||
_irr |= irq_bit;
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!_icw4.aeoi())
|
||||
_isr |= irq_bit;
|
||||
_irr &= ~irq_bit;
|
||||
return _offset + irq;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
/// Handle read accesses on the PICs command and data ports.
|
||||
void io_in(unsigned port, Vmm::Mem_access::Width width, l4_uint32_t *value)
|
||||
override
|
||||
{
|
||||
*value = -1U;
|
||||
|
||||
if (width != Vmm::Mem_access::Width::Wd8)
|
||||
return;
|
||||
|
||||
switch (port)
|
||||
{
|
||||
case Cmd_port:
|
||||
*value = _read_isr ? _isr : _irr;
|
||||
break;
|
||||
|
||||
case Data_port:
|
||||
*value = _imr;
|
||||
trace().printf("%s read mask 0x%x\n",
|
||||
_is_master ? "Master:" : "Slave:", _imr);
|
||||
break;
|
||||
}
|
||||
|
||||
trace().printf("%s port in: %s - 0x%x\n",
|
||||
_is_master ? "Master:" : "Slave:",
|
||||
port == 0 ? "cmd" : "data", *value);
|
||||
}
|
||||
|
||||
/// Handle write accesses on the PICs command and data ports.
|
||||
void io_out(unsigned port, Vmm::Mem_access::Width width, l4_uint32_t value)
|
||||
override
|
||||
{
|
||||
if (width != Vmm::Mem_access::Width::Wd8)
|
||||
return;
|
||||
|
||||
trace().printf("%s port out: %s - 0x%x\n",
|
||||
_is_master ? "Master:" : "Slave:",
|
||||
port == 0 ? "cmd" : "data", value);
|
||||
|
||||
switch (port)
|
||||
{
|
||||
case Cmd_port:
|
||||
handle_command_write(value);
|
||||
break;
|
||||
|
||||
case Data_port:
|
||||
handle_data_write(value);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
/// Return the number of the first pending interrupt or -1.
|
||||
int check_pending()
|
||||
{
|
||||
if (_isr || !(_irr & ~_imr))
|
||||
// we cannot issue new interrupts
|
||||
// if an interrupt is currently in service
|
||||
// or if all pending interrupts (in irr) are masked
|
||||
return -1;
|
||||
|
||||
for (int i = 0; _irr >> i; ++i)
|
||||
{
|
||||
l4_uint8_t bit = 1U << i;
|
||||
|
||||
if (_irr & bit)
|
||||
{
|
||||
_irr &= ~bit;
|
||||
_isr |= bit;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* EOI of last issued interrupt
|
||||
*/
|
||||
void eoi(unsigned irq = 0)
|
||||
{
|
||||
if (!irq)
|
||||
_isr = 0;
|
||||
else
|
||||
_isr &= ~(1U << irq);
|
||||
|
||||
if (_is_master)
|
||||
_pic->eoi(irq);
|
||||
else
|
||||
_pic->eoi(irq + 8);
|
||||
|
||||
issue_next_interrupt();
|
||||
}
|
||||
|
||||
void issue_next_interrupt()
|
||||
{
|
||||
int next_irq = check_pending();
|
||||
if (next_irq != -1)
|
||||
_pic->send_interrupt(next_irq + _offset);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reset to initial configuration
|
||||
*/
|
||||
void reset()
|
||||
{
|
||||
_irr = _imr = _isr = 0;
|
||||
_expect = Init_words::ICW1;
|
||||
_offset = 0;
|
||||
_slave_at = 0;
|
||||
_icw1 = {0U};
|
||||
_icw4 = {0U};
|
||||
_icw4.aeoi() = 1;
|
||||
}
|
||||
|
||||
void handle_command_write(l4_uint32_t command)
|
||||
{
|
||||
l4_uint8_t cmd = command;
|
||||
if (cmd & 0x10) // ICW1
|
||||
{
|
||||
// start initialization sequence
|
||||
reset();
|
||||
|
||||
_icw1 = {cmd};
|
||||
if (_icw1.address_interval() || _icw1.single())
|
||||
warn().printf("Unsupported initialization value.\n");
|
||||
|
||||
_expect = Init_words::ICW2;
|
||||
return;
|
||||
}
|
||||
|
||||
if (_expect != Init_words::ICW1) // are we still in initialization?
|
||||
{
|
||||
warn().printf("%s: PIC is in initialization and guest wrote OCW (%x). Ignoring.\n",
|
||||
_is_master ? "Master" : "Slave", cmd);
|
||||
return;
|
||||
}
|
||||
|
||||
// handle OCWs
|
||||
if (cmd & 0x8)
|
||||
{
|
||||
struct OCW3 o{cmd};
|
||||
|
||||
if (o.rr())
|
||||
{
|
||||
_read_isr = o.ris();
|
||||
return;
|
||||
}
|
||||
|
||||
// ignore the rest
|
||||
}
|
||||
else // OCW2
|
||||
{
|
||||
struct OCW2 o{cmd};
|
||||
|
||||
if (o.eoi())
|
||||
{
|
||||
if (o.sl())
|
||||
eoi(o.irq());
|
||||
else
|
||||
eoi();
|
||||
}
|
||||
|
||||
// ignore the rest for now
|
||||
}
|
||||
}
|
||||
|
||||
void handle_data_write(l4_uint32_t value)
|
||||
{
|
||||
if (_expect != Init_words::ICW1) // we are in initialization
|
||||
{
|
||||
switch (_expect)
|
||||
{
|
||||
case Init_words::ICW1: break; // avoid compiler warning
|
||||
|
||||
case Init_words::ICW2:
|
||||
_offset = value;
|
||||
if (_icw1.single())
|
||||
{
|
||||
if (_icw1.icw4())
|
||||
_expect = Init_words::ICW4;
|
||||
else
|
||||
_expect = Init_words::ICW1; // initialization complete
|
||||
}
|
||||
else
|
||||
_expect = Init_words::ICW3;
|
||||
warn().printf("%s: Vector offset %u\n",
|
||||
_is_master ? "MASTER" : "SLAVE", _offset);
|
||||
break;
|
||||
|
||||
case Init_words::ICW3:
|
||||
_slave_at = value;
|
||||
if (_icw1.icw4())
|
||||
_expect = Init_words::ICW4;
|
||||
else
|
||||
{
|
||||
_expect = Init_words::ICW1; // initialization complete
|
||||
_read_isr = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case Init_words::ICW4:
|
||||
_icw4.raw = value;
|
||||
if (!_icw4.upm())
|
||||
warn().printf("Guest tries to set MCS-80 mode. Unsupported.\n");
|
||||
_expect = Init_words::ICW1; // initialization complete
|
||||
_read_isr = false;
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// OCW1
|
||||
_imr = value;
|
||||
// immediately inject pending irqs
|
||||
issue_next_interrupt();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
public:
|
||||
/**
|
||||
* Create a legacy PIC consisting of a master and slave chip.
|
||||
*
|
||||
* \param distr MSI-parent to send interrupts to.
|
||||
*/
|
||||
Legacy_pic(cxx::Ref_ptr<Gic::Msix_controller> distr)
|
||||
: _master(Vdev::make_device<Chip>(true, this)),
|
||||
_slave(Vdev::make_device<Chip>(false, this)),
|
||||
_distr(distr)
|
||||
{
|
||||
info().printf("Hello, Legacy_pic\n");
|
||||
}
|
||||
|
||||
/// Issue a legacy interrupt in range [0, 15]
|
||||
void set(unsigned irq) override
|
||||
{
|
||||
assert(irq < Num_irqs);
|
||||
|
||||
int num = irq < 8 ? _master->trigger(irq) : _slave->trigger(irq - 8);
|
||||
// Do we need to set the _master line where the slave is wired to?
|
||||
if (num >= 32)
|
||||
send_interrupt(num);
|
||||
};
|
||||
|
||||
void send_interrupt(int irq)
|
||||
{
|
||||
if (irq >= 32)
|
||||
{
|
||||
using namespace Vdev::Msix;
|
||||
|
||||
Interrupt_request_compat addr(0ULL);
|
||||
// dest_id = 0, redirect_hint = 0, dest_mode = 0;
|
||||
addr.fixed() = Address_interrupt_prefix;
|
||||
|
||||
Data_register_format data(0U);
|
||||
data.vector() = irq;
|
||||
data.delivery_mode() = Dm_extint;
|
||||
|
||||
_distr->send(addr.raw, data.raw);
|
||||
}
|
||||
}
|
||||
|
||||
void clear(unsigned) override {}
|
||||
|
||||
void bind_irq_src_handler(unsigned irq, Gic::Irq_src_handler *handler) override
|
||||
{
|
||||
assert(irq < Num_irqs);
|
||||
if (handler && _sources[irq])
|
||||
throw L4::Runtime_error(-L4_EEXIST);
|
||||
|
||||
_sources[irq] = handler;
|
||||
}
|
||||
|
||||
Gic::Irq_src_handler *get_irq_src_handler(unsigned irq) const override
|
||||
{
|
||||
assert(irq < Num_irqs);
|
||||
return _sources[irq];
|
||||
}
|
||||
|
||||
void eoi(unsigned irq)
|
||||
{
|
||||
assert(irq < Num_irqs);
|
||||
|
||||
if (_sources[irq])
|
||||
_sources[irq]->eoi();
|
||||
}
|
||||
|
||||
int dt_get_interrupt(fdt32_t const *prop, int propsz, int *read) const override
|
||||
{
|
||||
enum { Irq_cells = 1, };
|
||||
|
||||
if (propsz < Irq_cells)
|
||||
return -L4_ERANGE;
|
||||
|
||||
if (read)
|
||||
*read = Irq_cells;
|
||||
|
||||
return fdt32_to_cpu(prop[0]);
|
||||
}
|
||||
|
||||
/// Obtain a pointer to the master PIC chip.
|
||||
cxx::Ref_ptr<Chip> master() const { return _master; }
|
||||
/// Obtain a pointer to the slave PIC chip.
|
||||
cxx::Ref_ptr<Chip> slave() const { return _slave; }
|
||||
|
||||
private:
|
||||
static Dbg trace() { return Dbg(Dbg::Irq, Dbg::Trace, "PIC"); }
|
||||
static Dbg info() { return Dbg(Dbg::Irq, Dbg::Info, "PIC"); }
|
||||
static Dbg warn() { return Dbg(Dbg::Irq, Dbg::Warn, "PIC"); }
|
||||
|
||||
cxx::Ref_ptr<Chip> _master;
|
||||
cxx::Ref_ptr<Chip> _slave;
|
||||
cxx::Ref_ptr<Gic::Msix_controller> _distr;
|
||||
Gic::Irq_src_handler *_sources[Num_irqs] = {};
|
||||
};
|
||||
|
||||
} // namespace Vdev
|
||||
@@ -1,651 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018, 2021, 2023-2024 Kernkonzept GmbH.
|
||||
* Author(s): Adam Lackorzynski <adam@l4re.org>
|
||||
* Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
* Georg Kotheimer <georg.kotheimer@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#include <cstdio>
|
||||
|
||||
#include <l4/cxx/bitfield>
|
||||
#include <l4/cxx/exceptions>
|
||||
#include <l4/re/error_helper>
|
||||
|
||||
#include "mad.h"
|
||||
|
||||
namespace L4mad
|
||||
{
|
||||
|
||||
static const char *reg_names_x86_32[] = {
|
||||
"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi" };
|
||||
|
||||
static const char *reg_names_x86_16[] = {
|
||||
"ax", "cx", "dx", "bx", "sp", "bp", "si", "di" };
|
||||
|
||||
static const char *reg_names_x86_8l[] = {
|
||||
"al", "cl", "dl", "bl" };
|
||||
|
||||
static const char *reg_names_x86_8h[] = {
|
||||
"ah", "ch", "dh", "bh" };
|
||||
|
||||
#ifdef ARCH_amd64
|
||||
enum Reg_names_amd64 { Reg_rax, Reg_rcx, Reg_rdx, Reg_rbx, Reg_rsp, Reg_rbp,
|
||||
Reg_rsi, Reg_rdi, Reg_r8, Reg_r9, Reg_r10, Reg_r11, Reg_r12,
|
||||
Reg_r13, Reg_r14, Reg_r15,
|
||||
Reg_eax = Reg_rax
|
||||
};
|
||||
|
||||
static const char *reg_names_x86_64[]
|
||||
= { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
|
||||
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
|
||||
|
||||
#elif defined(ARCH_x86)
|
||||
|
||||
enum Reg_names_x86 { Reg_eax, Reg_ecx, Reg_edx, Reg_ebx, Reg_esp, Reg_ebp,
|
||||
Reg_esi, Reg_edi };
|
||||
#endif
|
||||
|
||||
static unsigned width_in_bytes(Width width)
|
||||
{
|
||||
switch (width)
|
||||
{
|
||||
case Width::Wd8: return 1;
|
||||
case Width::Wd16: return 2;
|
||||
case Width::Wd32: return 4;
|
||||
case Width::Wd64: return 8;
|
||||
}
|
||||
L4Re::throw_error(-L4_EINVAL, "Invalid width to convert to bytes.");
|
||||
}
|
||||
|
||||
enum Rex
|
||||
{
|
||||
/// Operand size
|
||||
Rex_w = 8,
|
||||
/// ModR/M reg field
|
||||
Rex_r = 4,
|
||||
/// SIB index field
|
||||
Rex_x = 2,
|
||||
/// ModR/M r/m field
|
||||
Rex_b = 1,
|
||||
};
|
||||
|
||||
struct Modrm
|
||||
{
|
||||
unsigned char raw;
|
||||
explicit Modrm(unsigned char val) : raw(val) {}
|
||||
|
||||
/// Register (possibly extended by Rex_b) or an addressing mode combined with
|
||||
/// the mod field.
|
||||
CXX_BITFIELD_MEMBER(0, 2, rm, raw);
|
||||
/// Register (possibly extended by Rex_r) or three additional opcode bits.
|
||||
CXX_BITFIELD_MEMBER(3, 5, reg, raw);
|
||||
/// Controls whether the rm field encodes a register (mod=3) or an addressing
|
||||
/// mode.
|
||||
CXX_BITFIELD_MEMBER(6, 7, mod, raw);
|
||||
};
|
||||
|
||||
enum Rm
|
||||
{
|
||||
Rm_sib = 4,
|
||||
Rm_ripr = 5,
|
||||
};
|
||||
|
||||
enum Mod
|
||||
{
|
||||
Mod_indirect = 0,
|
||||
Mod_indirect_disp8 = 1,
|
||||
Mod_indirect_disp32 = 2,
|
||||
Mod_direct = 3,
|
||||
};
|
||||
|
||||
struct Sib
|
||||
{
|
||||
unsigned char raw;
|
||||
explicit Sib(unsigned char val) : raw(val) {}
|
||||
|
||||
/// Base register (possibly extended by Rex_b).
|
||||
CXX_BITFIELD_MEMBER(0, 2, base, raw);
|
||||
/// Index register (possibly extended by Rex_x).
|
||||
CXX_BITFIELD_MEMBER(3, 5, index, raw);
|
||||
/// Scale factor of index field.
|
||||
CXX_BITFIELD_MEMBER(6, 7, scale, raw);
|
||||
};
|
||||
|
||||
struct Instruction
|
||||
{
|
||||
/// Instruction length, only accurate after decoding of the instruction is
|
||||
/// complete.
|
||||
unsigned char len = 0;
|
||||
|
||||
/// Operand-size override
|
||||
bool op_size_ovr = false;
|
||||
/// REX prefix, if present
|
||||
unsigned char rex = 0;
|
||||
|
||||
/// Operand size is forced to one byte
|
||||
bool op_size_byte = false;
|
||||
|
||||
/// Register operand
|
||||
unsigned char op_reg;
|
||||
/// Shift to apply to register operand, e.g. used for accessing high byte.
|
||||
unsigned char op_reg_shift;
|
||||
/// Address operand
|
||||
l4_addr_t op_addr;
|
||||
/// Address operand is IP relative
|
||||
bool op_addr_ripr;
|
||||
/// Immediate operand
|
||||
l4_umword_t op_imm;
|
||||
|
||||
// Assumption: If in protected mode or long compatibility mode we assume that
|
||||
// we are in a 32-bit code segment (CS.d == 1).
|
||||
Width op_width() const
|
||||
{
|
||||
// Operand-size override prefix and Rex.W have no effect on byte-specific
|
||||
// operations.
|
||||
if (op_size_byte)
|
||||
return Width::Wd8;
|
||||
|
||||
if (rex & Rex_w)
|
||||
return Width::Wd64;
|
||||
|
||||
return op_size_ovr ? Width::Wd16 : Width::Wd32;
|
||||
}
|
||||
|
||||
// Assumption: If in protected mode or long compatibility mode we assume that
|
||||
// we are in a 32-bit code segment (CS.d == 1).
|
||||
Width imm_width() const
|
||||
{
|
||||
// Operand-size override prefix has no effect on byte-specific operations.
|
||||
if (op_size_byte)
|
||||
return Width::Wd8;
|
||||
|
||||
return op_size_ovr ? Width::Wd16 : Width::Wd32;
|
||||
}
|
||||
|
||||
unsigned char rex_reg(unsigned char reg, Rex rex_bit) const
|
||||
{ return rex & rex_bit ? reg + 8 : reg; }
|
||||
};
|
||||
|
||||
/**
|
||||
* Truncate value to specified width.
|
||||
*
|
||||
* \param v Value to truncate
|
||||
* \param width Width in bytes
|
||||
*/
|
||||
static l4_umword_t
|
||||
truncate(l4_umword_t v, Width width)
|
||||
{
|
||||
if (width_in_bytes(width) >= sizeof(l4_umword_t))
|
||||
return v;
|
||||
return v & ((1UL << (width * 8)) - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sign-extend value from specified width.
|
||||
*
|
||||
* \param v Value to sign-extend
|
||||
* \param from_width Width in bytes
|
||||
*/
|
||||
static l4_umword_t
|
||||
sign_extend(l4_umword_t v, Width from_width)
|
||||
{
|
||||
if (width_in_bytes(from_width) >= sizeof(l4_umword_t))
|
||||
return v;
|
||||
|
||||
l4_umword_t const msb = 1UL << (from_width * 8 - 1);
|
||||
if (v & msb)
|
||||
v |= ~0UL << (from_width * 8);
|
||||
return v;
|
||||
}
|
||||
|
||||
Decoder::Decoder(l4_exc_regs_t const *regs, l4_addr_t ip,
|
||||
unsigned char const *inst_buf, unsigned inst_buf_len)
|
||||
: _regs(regs), _ip(ip), _inst_buf(inst_buf), _inst_buf_len(inst_buf_len),
|
||||
#ifdef ARCH_amd64
|
||||
// TODO: Introduce parameter to Decoder or decode(), that signifies whether
|
||||
// CPU is in 64-bit mode or in compatibility/protected mode.
|
||||
_long_mode_64(true)
|
||||
#else
|
||||
_long_mode_64(false)
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
l4_umword_t
|
||||
Decoder::regval_arch(unsigned regnr) const
|
||||
{
|
||||
switch (regnr)
|
||||
{
|
||||
#ifdef ARCH_x86
|
||||
case Reg_eax: return _regs->eax;
|
||||
case Reg_ebx: return _regs->ebx;
|
||||
case Reg_ecx: return _regs->ecx;
|
||||
case Reg_edx: return _regs->edx;
|
||||
case Reg_edi: return _regs->edi;
|
||||
case Reg_esi: return _regs->esi;
|
||||
case Reg_ebp: return _regs->ebp;
|
||||
case Reg_esp: return _regs->sp;
|
||||
#else
|
||||
case Reg_rax: return _regs->rax;
|
||||
case Reg_rbx: return _regs->rbx;
|
||||
case Reg_rcx: return _regs->rcx;
|
||||
case Reg_rdx: return _regs->rdx;
|
||||
case Reg_rdi: return _regs->rdi;
|
||||
case Reg_rsi: return _regs->rsi;
|
||||
case Reg_rbp: return _regs->rbp;
|
||||
case Reg_rsp: return _regs->sp;
|
||||
case Reg_r8: return _regs->r8;
|
||||
case Reg_r9: return _regs->r9;
|
||||
case Reg_r10: return _regs->r10;
|
||||
case Reg_r11: return _regs->r11;
|
||||
case Reg_r12: return _regs->r12;
|
||||
case Reg_r13: return _regs->r13;
|
||||
case Reg_r14: return _regs->r14;
|
||||
case Reg_r15: return _regs->r15;
|
||||
#endif
|
||||
default: return 0; // cannot happen but gcc complains
|
||||
}
|
||||
}
|
||||
|
||||
l4_umword_t
|
||||
Decoder::regval(unsigned regnr, unsigned shift, Width aw) const
|
||||
{
|
||||
return truncate(regval_arch(regnr) >> shift, aw);
|
||||
}
|
||||
|
||||
char const *
|
||||
Decoder::regname(unsigned regnr, unsigned shift, Width aw) const
|
||||
{
|
||||
#if defined(ARCH_x86) || defined(ARCH_amd64)
|
||||
switch (aw)
|
||||
{
|
||||
case Width::Wd8:
|
||||
return shift == 8 ? reg_names_x86_8h[regnr] : reg_names_x86_8l[regnr];
|
||||
case Width::Wd16:
|
||||
return reg_names_x86_16[regnr];
|
||||
case Width::Wd32:
|
||||
return reg_names_x86_32[regnr];
|
||||
case Width::Wd64:
|
||||
#if defined(ARCH_x86)
|
||||
return 0;
|
||||
#else
|
||||
return reg_names_x86_64[regnr];
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
Decoder::regname_bm_snprintf(char *buf, unsigned buflen, unsigned reglist) const
|
||||
{
|
||||
unsigned w = 0;
|
||||
for (unsigned i = 0; i < Num_registers; ++i)
|
||||
if (reglist & (1 << i))
|
||||
w += snprintf(buf + w, buflen - w, "%s[%lx],",
|
||||
regname(i, 0, Width::Wd32), regval(i, 0, Width::Wd32));
|
||||
if (reglist)
|
||||
buf[w - 1] = 0;
|
||||
}
|
||||
|
||||
char *
|
||||
Decoder::desc_s(char *buf, unsigned buflen, Desc const &d, Width aw) const
|
||||
{
|
||||
switch (d.dtype)
|
||||
{
|
||||
case Desc_mem:
|
||||
snprintf(buf, buflen, "Mem:%08lx", d.val);
|
||||
break;
|
||||
case Desc_reg:
|
||||
snprintf(buf, buflen, "Reg:%s[%08lx] (s:%d,%ld,%d)",
|
||||
regname(d.val, d.shift, aw), regval(d.val, d.shift, aw),
|
||||
d.shift, d.val, aw);
|
||||
break;
|
||||
case Desc_regbitmap:
|
||||
{
|
||||
unsigned w = snprintf(buf, buflen, "Regs:");
|
||||
regname_bm_snprintf(buf + w, buflen - w, d.val);
|
||||
}
|
||||
break;
|
||||
case Desc_imm:
|
||||
snprintf(buf, buflen, "Val:%08lx", d.val);
|
||||
break;
|
||||
}
|
||||
buf[buflen - 1] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
void
|
||||
Decoder::print_insn_info(Op const &op, Desc const &tgt, Desc const &src) const
|
||||
{
|
||||
char buf_s[32], buf_t[32];
|
||||
|
||||
warn()
|
||||
.printf("0x%lx (%d): %s of %u bytes from %s to %s.\n",
|
||||
_ip, op.insn_len, op.atype == Read ? "Read" : "Write",
|
||||
op.access_width,
|
||||
desc_s(buf_s, sizeof(buf_s), src, op.access_width),
|
||||
desc_s(buf_t, sizeof(buf_t), tgt, op.access_width));
|
||||
}
|
||||
|
||||
// Assumption: If in protected mode or long compatibility mode we assume that
|
||||
// we are in a 32-bit code segment (CS.d == 1).
|
||||
Width
|
||||
Decoder::addr_width(Instruction const &) const
|
||||
{
|
||||
// TODO: Add support for address-size override prefix?
|
||||
return _long_mode_64 ? Width::Wd64 : Width::Wd32;
|
||||
}
|
||||
|
||||
l4_umword_t
|
||||
Decoder::peek_inst_bytes(Instruction const &inst, Width sz) const
|
||||
{
|
||||
unsigned new_inst_len = inst.len + width_in_bytes(sz);
|
||||
if (new_inst_len > _inst_buf_len || new_inst_len >= Max_instruction_len)
|
||||
L4Re::throw_error(-L4_ERANGE, "Instruction out of bounds.");
|
||||
|
||||
unsigned char const *bytes = &_inst_buf[inst.len];
|
||||
switch (sz)
|
||||
{
|
||||
case Width::Wd8: return *bytes;
|
||||
case Width::Wd16: return *reinterpret_cast<l4_uint16_t const *>(bytes);
|
||||
case Width::Wd32: return *reinterpret_cast<l4_uint32_t const *>(bytes);
|
||||
case Width::Wd64: return *reinterpret_cast<l4_uint64_t const *>(bytes);
|
||||
}
|
||||
L4Re::throw_error(-L4_EINVAL, "Invalid instruction buffer access size.");
|
||||
}
|
||||
|
||||
l4_umword_t
|
||||
Decoder::read_inst_bytes(Instruction &inst, Width sz) const
|
||||
{
|
||||
l4_umword_t result = peek_inst_bytes(inst, sz);
|
||||
inst.len += width_in_bytes(sz);
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
Decoder::decode_legacy_prefixes(Instruction &inst)
|
||||
{
|
||||
for(;;)
|
||||
{
|
||||
switch (peek_inst_bytes(inst, Width::Wd8))
|
||||
{
|
||||
// Group 1
|
||||
// Lock and repeat prefixes
|
||||
case 0xf0: // lock;
|
||||
break;
|
||||
case 0xf2:
|
||||
case 0xf3:
|
||||
trace().printf("Repeat prefix not considered\n");
|
||||
break;
|
||||
// Group 2
|
||||
// Segment-Override Prefixes
|
||||
case 0x26: // ES
|
||||
case 0x36: // SS
|
||||
case 0x64: // FS
|
||||
case 0x65: // GS
|
||||
trace().printf("Segment override not considered\n");
|
||||
break;
|
||||
// Branch hints
|
||||
case 0x2e: // branch hint or CS segment override
|
||||
case 0x3e: // branch hint or DS segment override
|
||||
break;
|
||||
// Group 3
|
||||
// Operand-size override prefix
|
||||
case 0x66:
|
||||
inst.op_size_ovr = true;
|
||||
break;
|
||||
// Group 4
|
||||
// Address-size override prefix
|
||||
case 0x67:
|
||||
trace().printf("Address-size override not considered\n");
|
||||
break;
|
||||
|
||||
default:
|
||||
// Not a prefix, opcode follows.
|
||||
return;
|
||||
};
|
||||
++inst.len;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Decoder::decode_rex_prefix(Instruction &inst)
|
||||
{
|
||||
if (!_long_mode_64)
|
||||
return;
|
||||
|
||||
unsigned char ib = peek_inst_bytes(inst, Width::Wd8);
|
||||
// REX prefix?
|
||||
if ((ib & 0xf0) == 0x40)
|
||||
{
|
||||
inst.rex = ib;
|
||||
++inst.len;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
Decoder::decode_modrm(Instruction &inst, unsigned char *opcode_ext)
|
||||
{
|
||||
Modrm modrm(read_inst_bytes(inst, Width::Wd8));
|
||||
|
||||
// Writing into or reading from a register cannot raise a page fault,
|
||||
// thus not relevant for our use case.
|
||||
if (modrm.mod() == Mod_direct)
|
||||
return false;
|
||||
|
||||
// Reg field encodes register if the opcode does not expect it to contain
|
||||
// additional opcode bits.
|
||||
if (!opcode_ext)
|
||||
{
|
||||
// Register operand
|
||||
inst.op_reg = inst.rex_reg(modrm.reg(), Rex_r);
|
||||
|
||||
// AH to DH are only accessible if the instruction does not use a REX
|
||||
// prefix. Then instead SPL, BPL, SIL, and DIL, which is the lower
|
||||
// byte of the actually referenced register, would be accessed.
|
||||
if (!inst.rex && inst.op_size_byte && inst.op_reg > 3)
|
||||
{
|
||||
inst.op_reg -= 4;
|
||||
// Access the high byte (AH to DH)
|
||||
inst.op_reg_shift = 8;
|
||||
}
|
||||
}
|
||||
// Reg field encodes additional opcode bits.
|
||||
else
|
||||
*opcode_ext = modrm.reg();
|
||||
|
||||
// Memory address operand
|
||||
if (modrm.rm() == Rm_sib)
|
||||
{
|
||||
inst.op_addr = decode_sib(inst, modrm);
|
||||
}
|
||||
else if (modrm.mod() == Mod_indirect && modrm.rm() == Rm_ripr)
|
||||
{
|
||||
inst.op_addr_ripr = _long_mode_64;
|
||||
// Plus 32-bit displacement
|
||||
inst.op_addr = sign_extend(read_inst_bytes(inst, Width::Wd32), Width::Wd32);
|
||||
}
|
||||
else
|
||||
{
|
||||
inst.op_addr = regval(inst.rex_reg(modrm.rm(), Rex_b), 0,
|
||||
addr_width(inst));
|
||||
}
|
||||
|
||||
// Displacement
|
||||
if (modrm.mod() == Mod_indirect_disp8 || modrm.mod() == Mod_indirect_disp32)
|
||||
{
|
||||
Width sz = modrm.mod() == Mod_indirect_disp8 ? Width::Wd8 : Width::Wd32;
|
||||
inst.op_addr += sign_extend(read_inst_bytes(inst, sz), sz);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
l4_umword_t
|
||||
Decoder::decode_sib(Instruction &inst, Modrm const &modrm)
|
||||
{
|
||||
Sib sib(read_inst_bytes(inst, Width::Wd8));
|
||||
|
||||
l4_umword_t base = 0;
|
||||
if (modrm.mod() == Mod_indirect && sib.base() == 5)
|
||||
{
|
||||
// No base register, instead a disp32 is specified.
|
||||
base = sign_extend(read_inst_bytes(inst, Width::Wd32), Width::Wd32);
|
||||
}
|
||||
else
|
||||
base = regval(inst.rex_reg(sib.base(), Rex_b), 0, addr_width(inst));
|
||||
|
||||
l4_umword_t index = 0;
|
||||
unsigned char rindex = inst.rex_reg(sib.index(), Rex_x);
|
||||
if (rindex != 4) // otherwise, no index register specified
|
||||
index = regval(rindex, 0, addr_width(inst));
|
||||
|
||||
return base + (index << sib.scale());
|
||||
}
|
||||
|
||||
void
|
||||
Decoder::decode_imm(Instruction &inst)
|
||||
{
|
||||
Width imm_len = inst.imm_width();
|
||||
inst.op_imm = read_inst_bytes(inst, imm_len);
|
||||
|
||||
if (_long_mode_64 && !inst.op_size_byte && (inst.rex & Rex_w))
|
||||
// In 64-bit mode all immediates are sign-extended to 64 bits.
|
||||
inst.op_imm = sign_extend(inst.op_imm, imm_len);
|
||||
}
|
||||
|
||||
void
|
||||
Decoder::decode_imm_moffs(Instruction &inst)
|
||||
{
|
||||
inst.op_imm = read_inst_bytes(inst, inst.op_width());
|
||||
}
|
||||
|
||||
Decoder::Result
|
||||
Decoder::decode(Op *op, Desc *tgt, Desc *src)
|
||||
{
|
||||
try
|
||||
{
|
||||
Decoder::Result result = decode_unsafe(op, tgt, src);
|
||||
if (result != Result::Success)
|
||||
warn().printf("Unsupported or invalid instruction at 0x%lx\n", _ip);
|
||||
return result;
|
||||
}
|
||||
catch (L4::Runtime_error const &e)
|
||||
{
|
||||
warn().printf("Invalid instruction in [0x%lx, 0x%lx]: %s (%ld): %s\n",
|
||||
_ip, _ip + _inst_buf_len, e.str(), e.err_no(),
|
||||
e.extra_str() ? e.extra_str() : "");
|
||||
return Result::Invalid;
|
||||
}
|
||||
}
|
||||
|
||||
Decoder::Result
|
||||
Decoder::decode_unsafe(Op *op, Desc *tgt, Desc *src)
|
||||
{
|
||||
Instruction inst{};
|
||||
|
||||
// Instructions consist of the following components in the given order:
|
||||
// - Legacy prefixes (optional)
|
||||
// - REX prefix (optional)
|
||||
// - Opcode (up to three bytes)
|
||||
// - ModR/M (1 byte, if required)
|
||||
// - SIB (1 byte, if required)
|
||||
// - Displacement (1, 2 or 4 bytes, if required)
|
||||
// - Immediate (1, 2, 4 or 8 bytes, if required)
|
||||
|
||||
decode_legacy_prefixes(inst);
|
||||
decode_rex_prefix(inst);
|
||||
|
||||
// Read first opcode byte
|
||||
unsigned char ib = read_inst_bytes(inst, Width::Wd8);
|
||||
switch (ib)
|
||||
{
|
||||
case 0xc6: // mov $, a
|
||||
case 0xc7:
|
||||
{
|
||||
inst.op_size_byte = !(ib & 1);
|
||||
|
||||
unsigned char opcode_ext;
|
||||
if (!decode_modrm(inst, &opcode_ext))
|
||||
return Result::Unsupported;
|
||||
|
||||
// Opcode extension must be zero.
|
||||
if (opcode_ext != 0)
|
||||
return Result::Unsupported;
|
||||
|
||||
decode_imm(inst);
|
||||
|
||||
op->set(Write, inst.op_width(), inst.len);
|
||||
imm_from_op_imm(src, inst);
|
||||
mem_from_op_addr(tgt, inst);
|
||||
return Result::Success;
|
||||
}
|
||||
|
||||
// read
|
||||
case 0xa0: // mov a, %al
|
||||
case 0xa1: // mov a, %eax
|
||||
// write
|
||||
case 0xa2: // mov %al, a
|
||||
case 0xa3: // mov %eax, a
|
||||
{
|
||||
inst.op_size_byte = !(ib & 1);
|
||||
bool write = (ib & 2);
|
||||
|
||||
decode_imm_moffs(inst);
|
||||
op->set(write ? Write : Read, inst.op_width(), inst.len);
|
||||
(write ? src : tgt)->set_reg(Reg_eax);
|
||||
mem_from_op_imm(write ? tgt : src, inst);
|
||||
return Result::Success;
|
||||
}
|
||||
|
||||
// write
|
||||
case 0x88: // mov %, a
|
||||
case 0x89: // mov %, a
|
||||
// read
|
||||
case 0x8a: // mov a, %
|
||||
case 0x8b: // mov a, %
|
||||
{
|
||||
inst.op_size_byte = !(ib & 1);
|
||||
bool write = !(ib & 2);
|
||||
|
||||
if (!decode_modrm(inst))
|
||||
return Result::Unsupported;
|
||||
|
||||
op->set(write ? Write : Read, inst.op_width(), inst.len);
|
||||
reg_from_op_reg(write ? src : tgt, inst);
|
||||
mem_from_op_addr(write ? tgt : src, inst);
|
||||
return Result::Success;
|
||||
}
|
||||
|
||||
default:
|
||||
warn().printf("Unsupported opcode: 0x%x\n", ib);
|
||||
return Result::Unsupported;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Decoder::reg_from_op_reg(Desc *desc, Instruction const &inst) const
|
||||
{ desc->set_reg(inst.op_reg, inst.op_reg_shift); }
|
||||
|
||||
void
|
||||
Decoder::imm_from_op_imm(Desc *desc, Instruction const &inst) const
|
||||
{ desc->set_imm(inst.op_imm); }
|
||||
|
||||
void
|
||||
Decoder::mem_from_op_imm(Desc *desc, Instruction const &inst) const
|
||||
{ desc->set_mem(inst.op_imm); }
|
||||
|
||||
void
|
||||
Decoder::mem_from_op_addr(Desc *desc, Instruction const &inst) const
|
||||
{
|
||||
l4_addr_t addr = inst.op_addr;
|
||||
if (inst.op_addr_ripr)
|
||||
addr += _ip + inst.len;
|
||||
// Truncate calculated address to current address width.
|
||||
addr = truncate(addr, addr_width(inst));
|
||||
desc->set_mem(addr);
|
||||
}
|
||||
|
||||
} // namspace L4mad
|
||||
@@ -1,156 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017, 2023-2024 Kernkonzept GmbH.
|
||||
* Author(s): Adam Lackorzynski <adam@l4re.org>
|
||||
* Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
* Georg Kotheimer <georg.kotheimer@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <l4/sys/utcb.h>
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "debug.h"
|
||||
#include "mem_access.h"
|
||||
|
||||
namespace L4mad
|
||||
{
|
||||
|
||||
enum Desc_type { Desc_mem, Desc_imm, Desc_reg, Desc_regbitmap };
|
||||
enum Access_type { Write, Read };
|
||||
|
||||
/// Width in bytes.
|
||||
using Width = Vmm::Mem_access::Width;
|
||||
|
||||
struct Desc
|
||||
{
|
||||
Desc_type dtype;
|
||||
l4_umword_t val;
|
||||
unsigned char shift;
|
||||
|
||||
void set_mem(l4_umword_t v)
|
||||
{ dtype = Desc_mem; val = v; shift = 0; }
|
||||
|
||||
void set_reg(l4_umword_t v, unsigned char s = 0)
|
||||
{ dtype = Desc_reg; val = v; shift = s; }
|
||||
|
||||
void set_regbitmap(l4_umword_t bm)
|
||||
{ dtype = Desc_regbitmap; val = bm; shift = 0; }
|
||||
|
||||
void set_imm(l4_umword_t v)
|
||||
{ dtype = Desc_imm; val = v; shift = 0; }
|
||||
};
|
||||
|
||||
struct Op
|
||||
{
|
||||
Access_type atype;
|
||||
Width access_width;
|
||||
unsigned char insn_len;
|
||||
|
||||
void set(Access_type t, Width aw, unsigned char il)
|
||||
{
|
||||
atype = t;
|
||||
access_width = aw;
|
||||
insn_len = il;
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(ARCH_amd64)
|
||||
enum { Num_registers = 16, };
|
||||
#elif defined(ARCH_x86)
|
||||
enum { Num_registers = 8, };
|
||||
#endif
|
||||
|
||||
struct Modrm;
|
||||
struct Instruction;
|
||||
|
||||
class Decoder
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Create decoder for the given execution state.
|
||||
*
|
||||
* \param regs General-purpose registers
|
||||
* \param ip Instruction pointer as guest virtual address (required
|
||||
* for RIP-relative addressing)
|
||||
* \param inst_buf Buffer containing instruction bytes
|
||||
* \param inst_buf_len Length of instruction byte buffer
|
||||
*/
|
||||
Decoder(l4_exc_regs_t const *regs, l4_addr_t ip,
|
||||
unsigned char const *inst_buf, unsigned inst_buf_len);
|
||||
|
||||
enum { Max_instruction_len = 15 };
|
||||
|
||||
enum class Result
|
||||
{
|
||||
Success,
|
||||
Unsupported,
|
||||
Invalid,
|
||||
};
|
||||
|
||||
/**
|
||||
* Decode instruction as a read or write operation.
|
||||
*
|
||||
* \param[out] op Operation
|
||||
* \param[out] tgt Target operand description
|
||||
* \param[out] src Source operation description
|
||||
*
|
||||
* \retval Result::Success Instruction was decoded successfully.
|
||||
* \retval Result::Unsupported Instruction decoding failed, because an
|
||||
* unsupported instruction was encountered.
|
||||
* \retval Result::Invalid Instruction decoding failed, because an invalid
|
||||
* or incomplete instruction was encountered, for
|
||||
* example if the the instruction spans more bytes
|
||||
* than available in the decoders instruction
|
||||
* buffer.
|
||||
*
|
||||
* \note The decoder assumes that the CPU is executing in long 64-bit mode or
|
||||
* long compatibility / protected mode in a 32-bit code segment (i.e.
|
||||
* CS.d==1). Otherwise incorrect operand and address widths are
|
||||
* calculated.
|
||||
*/
|
||||
Result decode(Op *op, Desc *tgt, Desc *src);
|
||||
|
||||
/**
|
||||
* Print textual representation of a successfully decoded instruction.
|
||||
*/
|
||||
void print_insn_info(Op const &op, Desc const &tgt, Desc const &src) const;
|
||||
|
||||
private:
|
||||
static Dbg trace() { return Dbg(Dbg::Core, Dbg::Trace, "Mad"); }
|
||||
static Dbg warn() { return Dbg(Dbg::Core, Dbg::Warn, "Mad"); }
|
||||
|
||||
Result decode_unsafe(Op *op, Desc *tgt, Desc *src);
|
||||
void decode_legacy_prefixes(Instruction &inst);
|
||||
void decode_rex_prefix(Instruction &inst);
|
||||
bool decode_modrm(Instruction &inst, unsigned char *opcode_ext = nullptr);
|
||||
l4_umword_t decode_sib(Instruction &inst, Modrm const &modrm);
|
||||
void decode_imm(Instruction &inst);
|
||||
void decode_imm_moffs(Instruction &inst);
|
||||
|
||||
char *desc_s(char *buf, unsigned buflen, Desc const &d, Width aw) const;
|
||||
void regname_bm_snprintf(char *buf, unsigned buflen, unsigned reglist) const;
|
||||
char const *regname(unsigned regnr, unsigned shift, Width aw) const;
|
||||
l4_umword_t regval_arch(unsigned regnr) const;
|
||||
l4_umword_t regval(unsigned regnr, unsigned shift, Width aw) const;
|
||||
|
||||
Width addr_width(Instruction const &inst) const;
|
||||
|
||||
l4_umword_t peek_inst_bytes(Instruction const &inst, Width sz) const;
|
||||
l4_umword_t read_inst_bytes(Instruction &inst, Width sz) const;
|
||||
|
||||
void reg_from_op_reg(Desc *desc, Instruction const &inst) const;
|
||||
void imm_from_op_imm(Desc *desc, Instruction const &inst) const;
|
||||
void mem_from_op_imm(Desc *desc, Instruction const &inst) const;
|
||||
void mem_from_op_addr(Desc *desc, Instruction const &inst) const;
|
||||
|
||||
l4_exc_regs_t const *const _regs;
|
||||
l4_addr_t const _ip;
|
||||
unsigned char const *const _inst_buf;
|
||||
unsigned const _inst_buf_len;
|
||||
bool const _long_mode_64;
|
||||
}; // class Decoder
|
||||
|
||||
} // namespace L4mad
|
||||
@@ -1,218 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021, 2023-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
* Timo Nicolai <timo.nicolai@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
#include "vmcs.h"
|
||||
#include "vcpu_ptr.h"
|
||||
#include "vm_state_vmx.h"
|
||||
#include "monitor/monitor.h"
|
||||
#include "monitor/monitor_args.h"
|
||||
|
||||
namespace Monitor {
|
||||
|
||||
template<bool, typename T>
|
||||
class Cpu_dev_cmd_handler {};
|
||||
|
||||
template<typename T>
|
||||
class Cpu_dev_cmd_handler<true, T> : public Cmd
|
||||
{
|
||||
public:
|
||||
char const *help() const override
|
||||
{ return "CPU state"; }
|
||||
|
||||
void usage(FILE *f) const override
|
||||
{
|
||||
fprintf(f, "%s\n"
|
||||
"* 'cpu <i> regs': dump CPU registers\n"
|
||||
"* 'cpu <i> vmx': dump VMX state\n",
|
||||
help());
|
||||
}
|
||||
|
||||
void complete(FILE *f, Completion_request *compl_req) const override
|
||||
{ compl_req->complete(f, {"regs", "vmx"}); }
|
||||
|
||||
void exec(FILE *f, Arglist *args) override
|
||||
{
|
||||
if (*args == "regs")
|
||||
show_regs(f);
|
||||
else if (*args == "vmx")
|
||||
show_vmx(f);
|
||||
else
|
||||
argument_error("Invalid subcommand");
|
||||
}
|
||||
|
||||
void show_regs(FILE *f) const
|
||||
{
|
||||
auto regs = get_vcpu()->r;
|
||||
auto *vms = get_vcpu().vm_state();
|
||||
|
||||
fprintf(f,
|
||||
"RAX %lx\nRBX %lx\nRCX %lx\nRDX %lx\nRSI %lx\nRDI %lx\n"
|
||||
"RSP %lx\nRBP %lx\nR8 %lx\nR9 %lx\nR10 %lx\nR11 %lx\n"
|
||||
"R12 %lx\nR13 %lx\nR14 %lx\nR15 %lx\nRIP %lx\n",
|
||||
regs.ax, regs.bx, regs.cx, regs.dx, regs.si, regs.di,
|
||||
regs.sp, regs.bp, regs.r8, regs.r9, regs.r10, regs.r11,
|
||||
regs.r12, regs.r13, regs.r14, regs.r15, vms->ip());
|
||||
}
|
||||
|
||||
void show_vmx(FILE *f) const
|
||||
{
|
||||
Vmm::Vmx_state *vmx = dynamic_cast<Vmm::Vmx_state *>(get_vcpu().vm_state());
|
||||
|
||||
if (!vmx)
|
||||
{
|
||||
fprintf(f, "Failed to read VMX state\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(f, "(C) VPID: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_VPID));
|
||||
fprintf(f, "(C) Int notification vector: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_PIR_NOTIFICATION_VECTOR));
|
||||
fprintf(f, "(C) EPTP index: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_EPTP_INDEX));
|
||||
fprintf(f, "(C) EPT pointer: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_EPT_POINTER));
|
||||
fprintf(f, "(C) Pin-based execution control: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_PIN_BASED_VM_EXEC_CTLS));
|
||||
fprintf(f, "(C) Primary execution control: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_PRI_PROC_BASED_VM_EXEC_CTLS));
|
||||
fprintf(f, "(C) Secondary execution control: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_SEC_PROC_BASED_VM_EXEC_CTLS));
|
||||
|
||||
fprintf(f, "(c) basic capabilities: 0x%llx\n",
|
||||
vmx->cap_read(L4_VM_VMX_BASIC_REG));
|
||||
fprintf(f, "(C) Real pin-based execution control: 0x%llx\n",
|
||||
vmx->cap_read(L4_VM_VMX_TRUE_PINBASED_CTLS_REG));
|
||||
fprintf(f, "(C) Real primary execution control: 0x%llx\n",
|
||||
vmx->cap_read(L4_VM_VMX_TRUE_PROCBASED_CTLS_REG));
|
||||
fprintf(f, "(C) Real secondary execution control: 0x%llx\n",
|
||||
vmx->cap_read(L4_VM_VMX_PROCBASED_CTLS2_REG));
|
||||
|
||||
fprintf(f, "(G) ES selector: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_ES_SELECTOR));
|
||||
fprintf(f, "(G) CS selector: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_CS_SELECTOR));
|
||||
fprintf(f, "(G) SS selector: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_SS_SELECTOR));
|
||||
fprintf(f, "(G) DS selector: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_DS_SELECTOR));
|
||||
fprintf(f, "(G) FS selector: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_FS_SELECTOR));
|
||||
fprintf(f, "(G) GS selector: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_GS_SELECTOR));
|
||||
fprintf(f, "(G) GDTR base: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_GDTR_BASE));
|
||||
fprintf(f, "(G) IDTR base: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_IDTR_BASE));
|
||||
|
||||
fprintf(f, "(G) LDTR selector: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_LDTR_SELECTOR));
|
||||
fprintf(f, "(G) TR selector: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_TR_SELECTOR));
|
||||
fprintf(f, "(G) interrupt status: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_INTERRUPT_STATUS));
|
||||
|
||||
fprintf(f, "(C) IO bitmap A: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_ADDRESS_IO_BITMAP_A));
|
||||
fprintf(f, "(C) IO bitmap B: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_ADDRESS_IO_BITMAP_B));
|
||||
fprintf(f, "(C) MSR bitmaps: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_ADDRESS_MSR_BITMAP));
|
||||
fprintf(f, "(C) Exit MSR store address: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_VM_EXIT_MSR_STORE_ADDRESS));
|
||||
fprintf(f, "(C) Exit MSR load address: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_VM_EXIT_MSR_LOAD_ADDRESS));
|
||||
fprintf(f, "(C) Entry MSR load address: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_VM_ENTRY_MSR_LOAD_ADDRESS));
|
||||
|
||||
fprintf(f, "(C) Entry control: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_VM_ENTRY_CTLS));
|
||||
fprintf(f, "(C) Entry error: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_VM_ENTRY_EXCEPTION_ERROR));
|
||||
fprintf(f, "(C) Entry MSR load cnt: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_VM_ENTRY_MSR_LOAD_COUNT));
|
||||
fprintf(f, "(C) Entry interrupt info: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_VM_ENTRY_INTERRUPT_INFO));
|
||||
fprintf(f, "(C) VM-instruction error: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_VM_INSN_ERROR));
|
||||
fprintf(f, "(C) Exit control: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_VM_EXIT_CTLS));
|
||||
fprintf(f, "(C) Exit reason: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_EXIT_REASON));
|
||||
fprintf(f, "(C) Exit interrupt info: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_VM_EXIT_INTERRUPT_INFO));
|
||||
fprintf(f, "(C) Exit interrupt error: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_VM_EXIT_INTERRUPT_ERROR));
|
||||
fprintf(f, "(C) Guest interruptability: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_INTERRUPTIBILITY_STATE));
|
||||
|
||||
fprintf(f, "(G) ES limit: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_ES_LIMIT));
|
||||
fprintf(f, "(G) CS limit: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_CS_LIMIT));
|
||||
fprintf(f, "(G) SS limit: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_SS_LIMIT));
|
||||
fprintf(f, "(G) DS limit: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_DS_LIMIT));
|
||||
fprintf(f, "(G) FS limit: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_FS_LIMIT));
|
||||
fprintf(f, "(G) GS limit: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_GS_LIMIT));
|
||||
fprintf(f, "(G) GDTR limit: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_GDTR_LIMIT));
|
||||
fprintf(f, "(G) IDTR limit: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_IDTR_LIMIT));
|
||||
fprintf(f, "(G) Activity state: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_ACTIVITY_STATE));
|
||||
|
||||
fprintf(f, "(G) sysenter rip: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_IA32_SYSENTER_EIP));
|
||||
fprintf(f, "(G) sysenter rsp: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_IA32_SYSENTER_ESP));
|
||||
fprintf(f, "(G) exit qualification: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_EXIT_QUALIFICATION));
|
||||
fprintf(f, "(G) guest linear address: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_LINEAR_ADDRESS));
|
||||
fprintf(f, "(G) CR0: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_CR0));
|
||||
fprintf(f, "(G) CR3: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_CR3));
|
||||
fprintf(f, "(G) CR4: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_CR4));
|
||||
fprintf(f, "(G) Guest IA32 EFER: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_IA32_EFER));
|
||||
fprintf(f, "(G) RFLAGS: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_RFLAGS));
|
||||
fprintf(f, "(G) RIP: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_RIP));
|
||||
fprintf(f, "(G) RSP: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_RSP));
|
||||
fprintf(f, "(G) ES base: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_ES_BASE));
|
||||
fprintf(f, "(G) CS base: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_CS_BASE));
|
||||
fprintf(f, "(G) SS base: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_SS_BASE));
|
||||
fprintf(f, "(G) DS base: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_DS_BASE));
|
||||
fprintf(f, "(G) FS base: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_FS_BASE));
|
||||
fprintf(f, "(G) GS base: 0x%llx\n",
|
||||
vmx->vmx_read(VMCS_GUEST_GS_BASE));
|
||||
}
|
||||
|
||||
private:
|
||||
Vmm::Vcpu_ptr get_vcpu() const
|
||||
{ return static_cast<T const *>(this)->vcpu(); }
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,147 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Kernkonzept GmbH.
|
||||
* Author(s): Timo Nicolai <timo.nicolai@kernkonzept.com>
|
||||
* Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
||||
#include <l4/sys/l4int.h>
|
||||
|
||||
#include "monitor/monitor.h"
|
||||
#include "monitor/monitor_args.h"
|
||||
|
||||
namespace Monitor {
|
||||
|
||||
template<bool, typename T>
|
||||
class Ioapic_cmd_handler {};
|
||||
|
||||
template<typename T>
|
||||
class Ioapic_cmd_handler<true, T> : public Cmd
|
||||
{
|
||||
enum Ioapic_regs
|
||||
{
|
||||
Id_reg = 0x0,
|
||||
Version_reg = 0x1,
|
||||
Arbitration_reg = 0x2,
|
||||
Redir_tbl_offset_reg = 0x10,
|
||||
Redir_tbl_last_reg = 0x3f,
|
||||
};
|
||||
|
||||
struct Ioapic_reg
|
||||
{
|
||||
char const *name;
|
||||
unsigned addr;
|
||||
unsigned bytes;
|
||||
};
|
||||
|
||||
public:
|
||||
Ioapic_cmd_handler()
|
||||
{ register_toplevel("ioapic"); }
|
||||
|
||||
char const *help() const override
|
||||
{ return "IO APIC registers"; }
|
||||
|
||||
void usage(FILE *f) const override
|
||||
{
|
||||
fprintf(f, "%s\n", help());
|
||||
}
|
||||
|
||||
void exec(FILE *f, Arglist * /*args*/) override
|
||||
{
|
||||
show_ioapic(f);
|
||||
}
|
||||
|
||||
void show_ioapic(FILE *f) const
|
||||
{
|
||||
Ioapic_reg ioapic_regs[] =
|
||||
{
|
||||
{"IOAPIC ID", Id_reg, 4},
|
||||
{"IOAPIC Version", Version_reg, 4},
|
||||
{"IOAPIC Arbitration ID", Arbitration_reg, 4},
|
||||
};
|
||||
|
||||
fprintf(f, "|%-5s |%-5s |%-30s |%-18s |\n",
|
||||
"Reg", "Bytes", "Name", "Value");
|
||||
|
||||
for (auto const ® : ioapic_regs)
|
||||
print_row(f, reg);
|
||||
|
||||
print_redirection_table(f);
|
||||
}
|
||||
|
||||
private:
|
||||
void print_redirection_table(FILE *f) const
|
||||
{
|
||||
for (unsigned reg = Redir_tbl_offset_reg; reg < Redir_tbl_last_reg;
|
||||
reg += 2)
|
||||
print_redir_row(f, reg, "Redirection table ",
|
||||
(reg - Redir_tbl_offset_reg) / 2);
|
||||
}
|
||||
|
||||
void print_redir_row(FILE *f, unsigned addr, std::string name,
|
||||
unsigned idx) const
|
||||
{
|
||||
unsigned bytes = 8;
|
||||
print_location(f, addr, bytes);
|
||||
|
||||
name.append(std::to_string(idx));
|
||||
fprintf(f, "|%-30s ", name.c_str());
|
||||
|
||||
l4_uint64_t lower = ioapic_read(addr);
|
||||
l4_uint64_t upper = ioapic_read(addr + 1);
|
||||
|
||||
fprintf(f,
|
||||
"|0x%0*llx%.*s ",
|
||||
bytes * 2,
|
||||
(upper << 32) | (lower & 0xffff'ffffU),
|
||||
(8 - bytes) * 2,
|
||||
" ");
|
||||
|
||||
fprintf(f,"|\n");
|
||||
}
|
||||
|
||||
void print_row(FILE *f, Ioapic_reg const &r) const
|
||||
{
|
||||
print_location(f, r.addr, r.bytes);
|
||||
|
||||
fprintf(f, "|%-30s ", r.name);
|
||||
|
||||
print_value(f, r.addr, r.bytes);
|
||||
|
||||
fprintf(f,"|\n");
|
||||
}
|
||||
|
||||
void print_location(FILE *f, unsigned reg, unsigned bytes) const
|
||||
{ fprintf(f, "|0x%03x |%-5u ", reg, bytes); }
|
||||
|
||||
void print_value(FILE *f, unsigned reg, unsigned bytes) const
|
||||
{
|
||||
l4_uint64_t value = ioapic_read(reg);
|
||||
if (value == -1ULL)
|
||||
{
|
||||
fprintf(f, "Failed to read IOAPIC register\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(f,
|
||||
"|0x%0*llx%.*s ",
|
||||
bytes * 2,
|
||||
value,
|
||||
(8 - bytes) * 2,
|
||||
" ");
|
||||
}
|
||||
|
||||
T const *ioapic() const
|
||||
{ return static_cast<T const *>(this); }
|
||||
|
||||
l4_uint64_t ioapic_read(unsigned reg) const
|
||||
{ return ioapic()->read_reg(reg); }
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,202 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020, 2023-2024 Kernkonzept GmbH.
|
||||
* Author(s): Timo Nicolai <timo.nicolai@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
#include <l4/sys/l4int.h>
|
||||
|
||||
#include "monitor/monitor.h"
|
||||
#include "monitor/monitor_args.h"
|
||||
|
||||
namespace Monitor {
|
||||
|
||||
template<bool, typename T>
|
||||
class Lapic_cmd_handler {};
|
||||
|
||||
template<typename T>
|
||||
class Lapic_cmd_handler<true, T> : public Cmd
|
||||
{
|
||||
enum { Chunk_size = 4 };
|
||||
|
||||
struct Apic_register
|
||||
{
|
||||
Apic_register(char const *name, unsigned msr, unsigned bytes = 4)
|
||||
: name(name), msr(msr), bytes(bytes)
|
||||
{}
|
||||
|
||||
char const *name;
|
||||
unsigned msr;
|
||||
unsigned bytes;
|
||||
};
|
||||
|
||||
public:
|
||||
Lapic_cmd_handler()
|
||||
{ register_toplevel("lapic"); }
|
||||
|
||||
char const *help() const override
|
||||
{ return "Local APIC registers"; }
|
||||
|
||||
void usage(FILE *f) const override
|
||||
{
|
||||
fprintf(f, "%s\n"
|
||||
"* 'lapic <i>': dump local APIC registers for a specific cpu\n"
|
||||
"* 'lapic all': dump local APIC registers for all cpus\n",
|
||||
help());
|
||||
}
|
||||
|
||||
void exec(FILE *f, Arglist *args) override
|
||||
{
|
||||
if (*args == "all")
|
||||
{
|
||||
unsigned i = 0;
|
||||
while (lapic_check(i))
|
||||
{
|
||||
fprintf(f, "LAPIC %u\n", i);
|
||||
show_lapic(f, i);
|
||||
fprintf(f, "\n");
|
||||
++i;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned lapic_no =
|
||||
args->pop<unsigned>("Failed to parse local APIC number.");
|
||||
|
||||
if (!lapic_check(lapic_no))
|
||||
argument_error("No such CPU or no local APIC registers found");
|
||||
|
||||
show_lapic(f, lapic_no);
|
||||
}
|
||||
}
|
||||
|
||||
void show_lapic(FILE *f, unsigned lapic_no) const
|
||||
{
|
||||
static Apic_register registers[] = {
|
||||
{ "Local APIC ID", 0x802 },
|
||||
{ "Local APIC Version", 0x803 },
|
||||
{ "Task Priority", 0x808 },
|
||||
{ "Process Priority", 0x80a },
|
||||
{ "Logical Destination", 0x80d },
|
||||
{ "Destination Format", 0x80e },
|
||||
{ "Spurious Vector", 0x80f },
|
||||
{ "In-Service", 0x810, 32 },
|
||||
{ "Trigger Mode", 0x818, 32 },
|
||||
{ "Interrupt Request", 0x820, 32 },
|
||||
{ "Error Status", 0x828 },
|
||||
{ "Corrected Machine Check Error Interrupt", 0x82f },
|
||||
{ "Interrupt Command", 0x830, 8 },
|
||||
{ "LVT Timer", 0x832 },
|
||||
{ "LVT Thermal Sensor", 0x833 },
|
||||
{ "LVT Performance Monitoring Counters", 0x834 },
|
||||
{ "LVT LINT0", 0x835 },
|
||||
{ "LVT LINT1", 0x836 },
|
||||
{ "LVT Error", 0x837 },
|
||||
{ "Initial Count", 0x838 },
|
||||
{ "Current Count", 0x839 },
|
||||
{ "TSC Deadline", 0x6e0 }
|
||||
};
|
||||
|
||||
fprintf(f, "|%-5s |%-5s |%-40s |%-18s |\n",
|
||||
"MSR", "Bytes", "Name", "Value");
|
||||
|
||||
for (auto const &r : registers)
|
||||
{
|
||||
if (r.bytes <= 8)
|
||||
{
|
||||
print_row(f, lapic_no, r);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned chunk = 0; chunk < r.bytes / Chunk_size; ++chunk)
|
||||
print_row(f, lapic_no, r, chunk);
|
||||
}
|
||||
}
|
||||
print_row(f, "Is NMI pending",
|
||||
static_cast<T const *>(this)->get(lapic_no)->is_nmi_pending());
|
||||
}
|
||||
|
||||
private:
|
||||
void print_row(FILE *f, char const *name, l4_uint64_t value) const
|
||||
{
|
||||
fprintf(f, "|0x%03x |%-5u ", 0, 0);
|
||||
fprintf(f, "|%-40s ", name);
|
||||
|
||||
unsigned bytes = 4;
|
||||
fprintf(f,
|
||||
"|0x%0*llx%.*s ",
|
||||
bytes * 2,
|
||||
value,
|
||||
(8 - bytes) * 2,
|
||||
" ");
|
||||
fprintf(f,"|\n");
|
||||
}
|
||||
|
||||
void print_row(FILE *f, unsigned lapic_no, Apic_register const &r) const
|
||||
{
|
||||
print_location(f, r.msr, r.bytes);
|
||||
|
||||
fprintf(f, "|%-40s ", r.name);
|
||||
|
||||
print_value(f, lapic_no, r.msr, r.bytes);
|
||||
|
||||
fprintf(f,"|\n");
|
||||
}
|
||||
|
||||
void print_row(FILE *f,
|
||||
unsigned lapic_no,
|
||||
Apic_register const &r,
|
||||
unsigned chunk) const
|
||||
{
|
||||
print_location(f, r.msr, Chunk_size);
|
||||
|
||||
fprintf(f,
|
||||
"|[%3u:%3u] %-30s ",
|
||||
chunk * Chunk_size * 8,
|
||||
(chunk + 1) * Chunk_size * 8 - 1,
|
||||
r.name);
|
||||
|
||||
print_value(f, lapic_no, r.msr + chunk, Chunk_size);
|
||||
|
||||
fprintf(f,"|\n");
|
||||
}
|
||||
|
||||
void print_location(FILE *f, unsigned msr, unsigned bytes) const
|
||||
{ fprintf(f, "|0x%03x |%-5u ", msr, bytes); }
|
||||
|
||||
void print_value(FILE *f,
|
||||
unsigned lapic_no,
|
||||
unsigned msr,
|
||||
unsigned bytes) const
|
||||
{
|
||||
l4_uint64_t value;
|
||||
if (!lapic_read_msr(lapic_no, msr, &value))
|
||||
{
|
||||
fprintf(f, "Failed to read Local APIC register\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(f,
|
||||
"|0x%0*llx%.*s ",
|
||||
bytes * 2,
|
||||
value,
|
||||
(8 - bytes) * 2,
|
||||
" ");
|
||||
}
|
||||
|
||||
bool lapic_check(unsigned lapic_no) const
|
||||
{ return static_cast<T const *>(this)->get(lapic_no) != nullptr; }
|
||||
|
||||
bool lapic_read_msr(unsigned lapic_no, unsigned msr, l4_uint64_t *value) const
|
||||
{
|
||||
return lapic_check(lapic_no)
|
||||
&& static_cast<T const *>(this)->get(lapic_no)->read_msr(msr, value);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2016-2017, 2019, 2024 Kernkonzept GmbH.
|
||||
* Author(s): Timo Nicolai <timo.nicolai@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
constexpr bool has_iomap()
|
||||
{ return true; }
|
||||
@@ -1,61 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020, 2022, 2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <l4/cxx/bitfield>
|
||||
|
||||
namespace Vdev { namespace Msix {
|
||||
|
||||
enum Table_entry_const_arch
|
||||
{
|
||||
Data_vector_mask = 0xff,
|
||||
Address_interrupt_prefix = 0xfee,
|
||||
};
|
||||
|
||||
/// MSI-X address: Interrupt request compatibility format (Intel)
|
||||
struct Interrupt_request_compat
|
||||
{
|
||||
l4_uint64_t raw;
|
||||
CXX_BITFIELD_MEMBER(40, 63, dest_id_upper, raw);
|
||||
CXX_BITFIELD_MEMBER(32, 39, reserved0_2, raw);
|
||||
CXX_BITFIELD_MEMBER(20, 31, fixed, raw);
|
||||
CXX_BITFIELD_MEMBER(12, 19, dest_id, raw);
|
||||
CXX_BITFIELD_MEMBER(4, 11, reserved0_1, raw);
|
||||
CXX_BITFIELD_MEMBER(3, 3, redirect_hint, raw);
|
||||
CXX_BITFIELD_MEMBER(2, 2, dest_mode, raw);
|
||||
CXX_BITFIELD_MEMBER(0, 1, reserved_0, raw);
|
||||
|
||||
explicit Interrupt_request_compat(l4_uint64_t addr) : raw(addr)
|
||||
{}
|
||||
};
|
||||
|
||||
enum Delivery_mode : l4_uint8_t
|
||||
{
|
||||
Dm_fixed = 0,
|
||||
Dm_lowest_prio = 1,
|
||||
Dm_smi = 2,
|
||||
Dm_nmi = 4,
|
||||
Dm_init = 5,
|
||||
Dm_startup = 6,
|
||||
Dm_extint = 7,
|
||||
};
|
||||
|
||||
/// MSI-X data format (Intel)
|
||||
struct Data_register_format
|
||||
{
|
||||
// Intel SDM Vol. 3A 10-35, October 2017
|
||||
l4_uint64_t raw;
|
||||
CXX_BITFIELD_MEMBER(15, 15, trigger_mode, raw);
|
||||
CXX_BITFIELD_MEMBER(14, 14, trigger_level, raw);
|
||||
CXX_BITFIELD_MEMBER(8, 10, delivery_mode, raw);
|
||||
CXX_BITFIELD_MEMBER(0, 7, vector, raw);
|
||||
|
||||
explicit Data_register_format(l4_uint64_t data) : raw(data)
|
||||
{}
|
||||
};
|
||||
|
||||
}} // namespace Vdev::Msix
|
||||
@@ -1,51 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020, 2022, 2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "msr_device.h"
|
||||
#include "vcpu_ptr.h"
|
||||
|
||||
namespace Vdev {
|
||||
|
||||
/**
|
||||
* MSR device handling read access to IA32_BIOS_SIGN_ID.
|
||||
*
|
||||
* This MSR provides the currently loaded microcode revision in bit [32:63].
|
||||
* As MSR access is a priviledged instruction this data can only be read with
|
||||
* support from the kernel. By default, the kernel provides the relevant 32
|
||||
* bits of IA32_BIOS_SIGN_ID in the last user_data register of the vCPU state.
|
||||
*/
|
||||
class Microcode_revision : public Vmm::Msr_device
|
||||
{
|
||||
enum { Ia32_bios_sign_id = 0x8b };
|
||||
|
||||
public:
|
||||
Microcode_revision(Vmm::Vcpu_ptr vcpu)
|
||||
: _ucode_revision((l4_uint64_t)vcpu.ucode_revision() << 32)
|
||||
{
|
||||
// Fiasco reports just the upper 32-bit aka microcode revision. To recreate
|
||||
// the complete MSR, we need to shift it to the upper 32-bit of the 64-bit
|
||||
// MSR.
|
||||
}
|
||||
|
||||
bool read_msr(unsigned msr, l4_uint64_t *value, unsigned) const override
|
||||
{
|
||||
if (msr != Ia32_bios_sign_id)
|
||||
return false;
|
||||
|
||||
*value = _ucode_revision;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool write_msr(unsigned, l4_uint64_t, unsigned) override
|
||||
{ return false; }
|
||||
|
||||
private:
|
||||
l4_uint64_t const _ucode_revision;
|
||||
}; // Microcode_revision
|
||||
|
||||
} // namespace Vdev
|
||||
@@ -1,159 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023-2024 genua GmbH, 85551 Kirchheim, Germany
|
||||
* All rights reserved. Alle Rechte vorbehalten.
|
||||
*/
|
||||
/*
|
||||
* Copyright (C) 2025 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#include "openbsd_bootparams.h"
|
||||
#include "acpi.h"
|
||||
|
||||
namespace Vmm::Openbsd {
|
||||
|
||||
void Boot_params::dump()
|
||||
{
|
||||
info().printf("OpenBSD Boot Parameters: =============================== \n");
|
||||
info().printf(" howto: 0x%x\n", _params.howto);
|
||||
info().printf(" apiversion: 0x%x\n", _params.apiversion);
|
||||
info().printf(" ac: %d\n", _params.ac);
|
||||
info().printf(" av: 0x%x\n", _params.av);
|
||||
info().printf(" bootdev: 0x%x\n", _params.bootdev);
|
||||
info().printf(" end: 0x%x\n", _params.end);
|
||||
}
|
||||
|
||||
void Boot_params::add_to_memmap(Bios_memmap **map, size_t const num,
|
||||
l4_uint32_t type, l4_uint64_t addr,
|
||||
l4_uint64_t size)
|
||||
{
|
||||
assert(num > 0); // we expect to allocate something and not free everything
|
||||
|
||||
*map = static_cast<Bios_memmap *>(realloc(*map, num * sizeof(Bios_memmap)));
|
||||
if (*map == nullptr)
|
||||
L4Re::throw_error(-L4_ENOMEM, "Failed to setup memmap!");
|
||||
|
||||
// Fill allocated map entry
|
||||
Bios_memmap &entry = (*map)[num - 1];
|
||||
entry.addr = static_cast<l4_uint64_t>(addr);
|
||||
entry.size = size;
|
||||
entry.type = type;
|
||||
|
||||
std::string typestr;
|
||||
switch (type)
|
||||
{
|
||||
case Bios_map_free: typestr = "Adding free"; break;
|
||||
case Bios_map_res: typestr = "Adding reserved"; break;
|
||||
case Bios_map_acpi: typestr = "Adding ACPI"; break;
|
||||
case Bios_map_nvs: typestr = "Adding ACPI NVS"; break;
|
||||
default: typestr = "Adding unknown"; break;
|
||||
}
|
||||
|
||||
trace().printf("%s memory to map: addr=0x%llx size=0x%llx\n", typestr.c_str(),
|
||||
addr, size);
|
||||
}
|
||||
|
||||
void Boot_params::setup_memmap(Vm_ram *ram)
|
||||
{
|
||||
Bios_memmap *bios_memmap = nullptr;
|
||||
size_t num = 0;
|
||||
|
||||
// Loop over all regions and add them to guest RAM
|
||||
ram->foreach_region([&bios_memmap, &num, this](Vmm::Ram_ds const &r) mutable {
|
||||
if (r.writable())
|
||||
{
|
||||
if (r.vm_start().get() < Iom_end
|
||||
&& (r.vm_start().get() + r.size()) > Iom_end)
|
||||
{
|
||||
// Split conventional and extended memory
|
||||
add_to_memmap(&bios_memmap, ++num, Bios_map_free,
|
||||
r.vm_start().get(), Iom_end - r.vm_start().get());
|
||||
add_to_memmap(&bios_memmap, ++num, Bios_map_free, Iom_end,
|
||||
r.size() - Iom_end + r.vm_start().get());
|
||||
}
|
||||
else
|
||||
{
|
||||
add_to_memmap(&bios_memmap, ++num, Bios_map_free,
|
||||
r.vm_start().get(), r.size());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
add_to_memmap(&bios_memmap, ++num, Bios_map_res, r.vm_start().get(),
|
||||
r.size());
|
||||
}
|
||||
});
|
||||
|
||||
auto facs = Acpi::Facs_storage::get()->mem_region();
|
||||
add_to_memmap(&bios_memmap, ++num, Bios_map_acpi, facs.start.get(),
|
||||
facs.end - facs.start + 1);
|
||||
|
||||
add_to_memmap(&bios_memmap, ++num, Bios_map_end, 0, 0);
|
||||
|
||||
if (bios_memmap != nullptr)
|
||||
{
|
||||
info().printf("Add BIOS memmap at %p.\n", bios_memmap);
|
||||
add_bootarg(Bootarg_memmap, num * sizeof(Bios_memmap), bios_memmap);
|
||||
free(bios_memmap);
|
||||
}
|
||||
}
|
||||
|
||||
void Boot_params::write(Vm_ram *ram)
|
||||
{
|
||||
// Prepare BIOS ram regions
|
||||
setup_memmap(ram);
|
||||
|
||||
// Add default uart console
|
||||
Bios_consdev cons;
|
||||
cons.consdev = makedev_obsd(8, 0); // com0
|
||||
cons.conspeed = 115200;
|
||||
cons.consaddr = 0x3f8;
|
||||
add_bootarg(Bootarg_consdev, sizeof(cons), &cons);
|
||||
|
||||
// Finalize and write boot arguments to guest memory
|
||||
add_bootarg(Bootarg_end, 0, nullptr);
|
||||
Vmm::Guest_addr bootargs_pos = Vmm::Guest_addr(Phys_mem_addr * 9);
|
||||
memset(ram->guest2host<void *>(bootargs_pos), 0, _bootargs_size);
|
||||
memcpy(ram->guest2host<void *>(bootargs_pos), _bootargs, _bootargs_size);
|
||||
_params.av = bootargs_pos.get();
|
||||
_params.ac = _bootargs_size;
|
||||
|
||||
// Write entry stack
|
||||
memset(ram->guest2host<void *>(_gp_addr), 0, Phys_mem_addr);
|
||||
memcpy(ram->guest2host<void *>(_gp_addr), &_params,
|
||||
sizeof(Openbsd_entry_stack));
|
||||
|
||||
dump();
|
||||
}
|
||||
|
||||
void Boot_params::add_bootarg(int type, size_t length, void const *data)
|
||||
{
|
||||
// Prepare header
|
||||
Boot_args next;
|
||||
next.ba_type = type;
|
||||
next.ba_size = sizeof(next) - sizeof(next.ba_arg) + length;
|
||||
|
||||
// Extend memory allocation
|
||||
size_t newsize = _bootargs_size + next.ba_size;
|
||||
if (newsize > L4_PAGESIZE)
|
||||
L4Re::throw_error(-L4_EINVAL, "OpenBSD bootargs: Too many arguments!");
|
||||
|
||||
_bootargs = realloc(_bootargs, newsize);
|
||||
if (_bootargs == nullptr)
|
||||
L4Re::throw_error(-L4_ENOMEM, "Failed to add bootarg!");
|
||||
|
||||
auto ptr_byte_add = [](void *ptr, l4_uint8_t param) {
|
||||
return static_cast<void *>(static_cast<l4_uint8_t *>(ptr) + param);
|
||||
};
|
||||
// Paste header and content to memory
|
||||
memcpy(ptr_byte_add(_bootargs, _bootargs_size), &next,
|
||||
sizeof(next) - sizeof(next.ba_arg));
|
||||
_bootargs_size = newsize;
|
||||
|
||||
if (data)
|
||||
memcpy(ptr_byte_add(_bootargs, _bootargs_size - length), data, length);
|
||||
}
|
||||
|
||||
} // namespace Vmm::Openbsd
|
||||
@@ -1,196 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2023-2024 genua GmbH, 85551 Kirchheim, Germany
|
||||
* All rights reserved. Alle Rechte vorbehalten.
|
||||
*/
|
||||
/*
|
||||
* Copyright (C) 2025 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <l4/sys/types.h>
|
||||
|
||||
#include "debug.h"
|
||||
#include "vm_ram.h"
|
||||
|
||||
namespace Vmm::Openbsd {
|
||||
|
||||
// See OpenBSD: sys/stand/boot/bootarg.h
|
||||
enum
|
||||
{
|
||||
Bapiv_ancient = 0x00000000, /* MD old i386 bootblocks */
|
||||
Bapiv_vars = 0x00000001, /* MD structure w/ add info passed */
|
||||
Bapiv_vector = 0x00000002, /* MI vector of MD structures passed */
|
||||
Bapiv_env = 0x00000004, /* MI environment vars vector */
|
||||
Bapiv_bmemmap = 0x00000008, /* MI memory map passed is in bytes */
|
||||
Bootarg_apiver = (Bapiv_vector|Bapiv_env|Bapiv_bmemmap),
|
||||
Bootarg_end = -1,
|
||||
Bootarg_memmap = 0,
|
||||
};
|
||||
|
||||
struct Boot_args
|
||||
{
|
||||
l4_int32_t ba_type;
|
||||
l4_int32_t ba_size;
|
||||
l4_int32_t ba_next;
|
||||
//struct _boot_args *ba_next;
|
||||
char ba_arg[1];
|
||||
} __attribute__((packed));
|
||||
static_assert(sizeof(Boot_args) == 13,
|
||||
"Size of packed Boot_args struct is as expected.");
|
||||
|
||||
// See OpenBSD: sys/arch/amd64/include/biosvar.h
|
||||
enum
|
||||
{
|
||||
Bios_map_end = 0x00, /* End of array XXX - special */
|
||||
Bios_map_free = 0x01, /* Usable memory */
|
||||
Bios_map_res = 0x02, /* Reserved memory */
|
||||
Bios_map_acpi = 0x03, /* ACPI Reclaim memory */
|
||||
Bios_map_nvs = 0x04, /* ACPI NVS memory */
|
||||
};
|
||||
|
||||
struct Bios_memmap
|
||||
{
|
||||
l4_uint64_t addr; /* Beginning of block */
|
||||
l4_uint64_t size; /* Size of block */
|
||||
l4_uint32_t type; /* Type of block */
|
||||
} __attribute__((packed));
|
||||
static_assert(sizeof(Bios_memmap) == 20,
|
||||
"Size of packed Bios_memmap struct is as expected.");
|
||||
|
||||
enum
|
||||
{
|
||||
Bootarg_consdev = 5,
|
||||
};
|
||||
|
||||
struct Bios_consdev
|
||||
{
|
||||
l4_int32_t consdev;
|
||||
l4_int32_t conspeed;
|
||||
l4_uint64_t consaddr;
|
||||
l4_int32_t consfreq;
|
||||
l4_uint32_t flags;
|
||||
l4_int32_t reg_width;
|
||||
l4_int32_t reg_shift;
|
||||
} __attribute__((packed));
|
||||
static_assert(sizeof(Bios_consdev) == 32,
|
||||
"Size of packed Bios_consdev struct is as expected.");
|
||||
|
||||
// See OpenBSD: sys/dev/isa/isareg.h
|
||||
enum
|
||||
{
|
||||
Iom_end = 0x100000 /* End of I/O Memory "hole" */
|
||||
};
|
||||
|
||||
// See OpenBSD: sys/sys/types.h
|
||||
static constexpr unsigned makedev_obsd(unsigned x, unsigned y)
|
||||
{
|
||||
return ((((x) & 0xff) << 8) | ((y) & 0xff) | (((y) & 0xffff00) << 8));
|
||||
}
|
||||
|
||||
// Memory layout for kernel entry function stack with parameters
|
||||
// This assembles the memory stack for the legacy exec call in OpenBSD
|
||||
// file sys/arch/amd64/stand/libsa/exec_i386.c
|
||||
struct Openbsd_entry_stack
|
||||
{
|
||||
l4_uint32_t returnaddr; // unused
|
||||
l4_uint32_t howto; // int
|
||||
l4_uint32_t bootdev; // dev_t
|
||||
l4_uint32_t apiversion; // api version of /boot
|
||||
l4_uint32_t end; // End address of loaded kernel binary
|
||||
l4_uint32_t extmem; // extended memory, unused
|
||||
l4_uint32_t cnvmem; // base memory reported by bios
|
||||
l4_uint32_t ac; // Length of bootargs
|
||||
l4_uint32_t av; // Offset of bootargs
|
||||
} __attribute__((packed));
|
||||
static_assert(sizeof(Openbsd_entry_stack) == 36,
|
||||
"Size of packed Openbsd_entry_stack struct is as expected.");
|
||||
|
||||
class Boot_params
|
||||
{
|
||||
public:
|
||||
enum
|
||||
{
|
||||
Phys_mem_addr = L4_PAGESIZE, ///< Location of the OpenBSD boot parameters
|
||||
};
|
||||
|
||||
Boot_params(Vmm::Guest_addr addr, l4_addr_t kernel,
|
||||
l4_addr_t kernel_size)
|
||||
: _gp_addr(addr), _bootargs(nullptr), _bootargs_size(0)
|
||||
{
|
||||
info().printf("Boot_params @ 0x%lx, Kernel @ 0x%lx (size=%ld)\n",
|
||||
addr.get(), kernel, kernel_size);
|
||||
memset(static_cast<void *>(&_params), 0, sizeof(Openbsd_entry_stack));
|
||||
_params.apiversion = Bootarg_apiver;
|
||||
|
||||
_params.cnvmem = Iom_end;
|
||||
_params.ac = 0;
|
||||
_params.av = 0;
|
||||
|
||||
_params.end = kernel + kernel_size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print OpenBSD Boot Parameters on console
|
||||
*/
|
||||
void dump();
|
||||
|
||||
/**
|
||||
* Write boot parameters into guest memory
|
||||
*/
|
||||
void write(Vm_ram *ram);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Add memory to memory map
|
||||
*/
|
||||
void add_to_memmap(Bios_memmap **map, size_t const num, l4_uint32_t type,
|
||||
l4_uint64_t addr, l4_uint64_t size);
|
||||
|
||||
/**
|
||||
* Prepare memory map for OpenBSD guest
|
||||
*/
|
||||
void setup_memmap(Vm_ram *ram);
|
||||
|
||||
/**
|
||||
* Get guest physical address
|
||||
*/
|
||||
Vmm::Guest_addr addr() const { return _gp_addr; }
|
||||
|
||||
/**
|
||||
* Add boot argument to linked list.
|
||||
*
|
||||
* \note The data is copied into an internal buffer.
|
||||
* The caller retains ownership of p.
|
||||
*/
|
||||
void add_bootarg(int t, size_t l, void const *p);
|
||||
|
||||
private:
|
||||
static Dbg trace() { return Dbg(Dbg::Core, Dbg::Trace, "OpenBSDBoot"); }
|
||||
static Dbg info() { return Dbg(Dbg::Core, Dbg::Info, "OpenBSDBoot"); }
|
||||
|
||||
/**
|
||||
* Guest physical address of first page
|
||||
*/
|
||||
Vmm::Guest_addr _gp_addr;
|
||||
|
||||
/**
|
||||
* Entry stack
|
||||
*/
|
||||
Openbsd_entry_stack _params;
|
||||
|
||||
/**
|
||||
* Blob containing chained boot argument structs of varying sizes
|
||||
*/
|
||||
void *_bootargs;
|
||||
|
||||
/**
|
||||
* Size of `_bootargs` in bytes
|
||||
*/
|
||||
size_t _bootargs_size;
|
||||
};
|
||||
|
||||
} // namespace Vmm::Openbsd
|
||||
@@ -1,284 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#include <l4/re/error_helper>
|
||||
|
||||
#include "irq_dt.h"
|
||||
#include "pit.h"
|
||||
#include "acpi.h"
|
||||
|
||||
namespace Vdev {
|
||||
|
||||
Pit_timer::Pit_timer(cxx::Ref_ptr<Gic::Ic> const &ic, unsigned irq)
|
||||
: _irq(ic, irq)
|
||||
{
|
||||
_channel[0] = cxx::make_unique_ptr<Channel>(new Channel(this));
|
||||
_channel[1] = cxx::make_unique_ptr<Channel>(new Channel(this, true));
|
||||
_port61 = make_device<Port61>(_channel[1].get());
|
||||
|
||||
if (irq != Pit_isa_irq)
|
||||
{
|
||||
info().printf("Timer IRQ configured to be %u, default is %u. Adding an "
|
||||
"override in MADT.\n", irq, Pit_isa_irq);
|
||||
Acpi::Madt_int_override_storage::get()->add_override(
|
||||
{Pit_isa_irq, irq, 0U});
|
||||
}
|
||||
}
|
||||
|
||||
void Pit_timer::io_out(unsigned port, Vmm::Mem_access::Width width,
|
||||
l4_uint32_t value)
|
||||
{
|
||||
if (width != Vmm::Mem_access::Width::Wd8)
|
||||
return;
|
||||
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
switch (port)
|
||||
{
|
||||
case Mode_command: // PIC_MODE
|
||||
{
|
||||
Control_reg control_reg(value);
|
||||
unsigned channel_select = control_reg.channel();
|
||||
if (channel_select == 1)
|
||||
{
|
||||
warn().printf("set mode for channel 1 unsupported\n");
|
||||
break;
|
||||
}
|
||||
// select either channel 0 or 2
|
||||
channel_select = (channel_select >> 1) & 0x1;
|
||||
|
||||
if (control_reg.is_read_back_cmd())
|
||||
{
|
||||
// read-back command
|
||||
if (control_reg.raw & (1U << 1)) // channel 0
|
||||
{
|
||||
if (control_reg.is_latch_status())
|
||||
_channel[0]->latch_status();
|
||||
if (control_reg.is_latch_count())
|
||||
_channel[0]->latch_count();
|
||||
}
|
||||
if (control_reg.raw & (1U << 3)) // channel 2
|
||||
{
|
||||
if (control_reg.is_latch_status())
|
||||
_channel[2]->latch_status();
|
||||
if (control_reg.is_latch_count())
|
||||
_channel[2]->latch_count();
|
||||
}
|
||||
trace().printf("Read-back command: 0x%x\n", control_reg.raw);
|
||||
break;
|
||||
}
|
||||
|
||||
_channel[channel_select]->write_status(control_reg.raw
|
||||
& 0x3f);
|
||||
trace().printf("Mode command on channel %d: 0x%x\n", channel_select,
|
||||
control_reg.raw);
|
||||
break;
|
||||
}
|
||||
case Channel_0_data:
|
||||
case Channel_2_data:
|
||||
{
|
||||
trace().printf("Writing 0x%x for channel %d\n", value, port);
|
||||
|
||||
unsigned channel_select = port2idx(port);
|
||||
_channel[channel_select]->write_count(value & 0xff);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
warn().printf("write to unimplemented channel 1\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Pit_timer::io_in(unsigned port, Vmm::Mem_access::Width width,
|
||||
l4_uint32_t *value)
|
||||
{
|
||||
// *value contains the value returned to the guest. It defaults to -1 from
|
||||
// Guest::handle_io_access(). Therefore we do not set it here in case of an
|
||||
// unhandled path.
|
||||
|
||||
if (width != Vmm::Mem_access::Width::Wd8)
|
||||
return;
|
||||
|
||||
switch (port)
|
||||
{
|
||||
case Mode_command: /* Register is write only. Ignore read. */ break;
|
||||
|
||||
case Channel_0_data:
|
||||
case Channel_2_data:
|
||||
{
|
||||
unsigned ch = port2idx(port);
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
*value = _channel[ch]->read();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
warn().printf("PIT read from unimplemented channel 1\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Pit_timer::Channel::write_count(l4_uint8_t value)
|
||||
{
|
||||
_count_latch.reset();
|
||||
_status_latch.reset();
|
||||
|
||||
if (_status.is_mode0())
|
||||
{
|
||||
// when writing a new count, out goes low.
|
||||
set_output(false);
|
||||
}
|
||||
|
||||
switch(_status.access())
|
||||
{
|
||||
case Access_lobyte:
|
||||
_reload = set_low_byte(_reload, value);
|
||||
check_start_counter();
|
||||
break;
|
||||
case Access_hibyte:
|
||||
_reload = set_high_byte(_reload, value);
|
||||
check_start_counter();
|
||||
break;
|
||||
case Access_lohi:
|
||||
write_lo_hi(value);
|
||||
break;
|
||||
default:
|
||||
warn().printf("Invalid access value for write to counter: counter "
|
||||
"%u, status 0x%x\n",
|
||||
_is_channel2 ? 2U : 0U, _status.raw);
|
||||
return;
|
||||
}
|
||||
trace().printf("Written new counter value to channel %i: reload: 0x%x, value "
|
||||
"0x%x\n",
|
||||
_is_channel2 ? 2U : 0U, _reload, value);
|
||||
}
|
||||
|
||||
void Pit_timer::Channel::check_start_counter()
|
||||
{
|
||||
// Assumption: only called after the full write of a counter
|
||||
if (!_gate)
|
||||
{
|
||||
warn().printf("count written, but gate not high: Counter %i\n",
|
||||
_is_channel2 ? 2 : 0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (_status.is_mode0() || _status.is_mode4())
|
||||
{
|
||||
if (_running)
|
||||
stop_counter();
|
||||
start_counter();
|
||||
}
|
||||
else if (!_running && (_status.is_mode2() || _status.is_mode3()))
|
||||
start_counter();
|
||||
|
||||
// modes 1, 2, 3, 5 do not change their counter value on a new reload value.
|
||||
}
|
||||
|
||||
void Pit_timer::Channel::write_status(l4_uint8_t value)
|
||||
{
|
||||
if ((value & 0x30U) == 0) // latch command
|
||||
{
|
||||
latch_count();
|
||||
return;
|
||||
}
|
||||
|
||||
// Spec states: When writing to control word, all control logic resets.
|
||||
stop_counter();
|
||||
_count_latch.reset();
|
||||
_status_latch.reset();
|
||||
_read_lo = true;
|
||||
_write_lo = true;
|
||||
|
||||
_status.write(value);
|
||||
// initial output level depends on the mode. Only mode0 is initially low.
|
||||
set_output(!_status.is_mode0());
|
||||
|
||||
trace().printf("New status on channel %i: 0x%x (mode %u)\n",
|
||||
_is_channel2 ? 2 : 0, _status.raw, _status.opmode().get());
|
||||
}
|
||||
|
||||
l4_uint8_t Pit_timer::Channel::read()
|
||||
{
|
||||
if (_status_latch.valid)
|
||||
{
|
||||
_status_latch.valid = false;
|
||||
return _status_latch.value & 0xff;
|
||||
}
|
||||
|
||||
if (_count_latch.valid)
|
||||
{
|
||||
switch (_status.access())
|
||||
{
|
||||
case Access_lobyte:
|
||||
_count_latch.valid = false;
|
||||
return low_byte(_count_latch.value);
|
||||
case Access_hibyte:
|
||||
_count_latch.valid = false;
|
||||
return high_byte(_count_latch.value);
|
||||
case Access_lohi:
|
||||
if (_count_latch.read_lo == false) // reading 2nd byte invalidates
|
||||
_count_latch.valid = false;
|
||||
return read_lo_hi(&_count_latch.read_lo, _count_latch.value);
|
||||
default:
|
||||
warn().printf("Read latch with invalid access mode: counter "
|
||||
"%u, status 0x%x\n",
|
||||
_is_channel2 ? 2U : 0U, _status.raw);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// read counter
|
||||
l4_uint16_t curr = current();
|
||||
switch (_status.access())
|
||||
{
|
||||
case Access_lobyte: return low_byte(curr);
|
||||
case Access_hibyte: return high_byte(curr);
|
||||
case Access_lohi: return read_lo_hi(&_read_lo, curr);
|
||||
default:
|
||||
warn().printf("Read counter with invalid access mode: counter "
|
||||
"%u, status 0x%x\n",
|
||||
_is_channel2 ? 2U : 0U, _status.raw);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vdev
|
||||
|
||||
#include "device_factory.h"
|
||||
#include "guest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
struct F : Vdev::Factory
|
||||
{
|
||||
cxx::Ref_ptr<Vdev::Device> create(Vdev::Device_lookup *devs,
|
||||
Vdev::Dt_node const &node) override
|
||||
{
|
||||
Vdev::Irq_dt_iterator it(devs, node);
|
||||
|
||||
if (it.next(devs) < 0)
|
||||
return nullptr;
|
||||
|
||||
if (!it.ic_is_virt())
|
||||
L4Re::chksys(-L4_EINVAL, "PIT requires a virtual interrupt controller");
|
||||
|
||||
auto dev = Vdev::make_device<Vdev::Pit_timer>(it.ic(), it.irq());
|
||||
|
||||
auto *vmm = devs->vmm();
|
||||
auto region = Vmm::Io_region(0x40, 0x43, Vmm::Region_type::Virtual);
|
||||
vmm->add_io_device(region, dev);
|
||||
region = Vmm::Io_region(0x61, 0x61, Vmm::Region_type::Virtual);
|
||||
vmm->add_io_device(region, dev->port61());
|
||||
vmm->register_timer_device(dev);
|
||||
|
||||
return dev;
|
||||
}
|
||||
}; // struct F
|
||||
|
||||
static F f;
|
||||
static Vdev::Device_type t = {"virt-pit", nullptr, &f};
|
||||
|
||||
} // namespace
|
||||
@@ -1,432 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020, 2022-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <l4/cxx/bitfield>
|
||||
#include <l4/cxx/unique_ptr>
|
||||
#include <l4/re/env.h>
|
||||
|
||||
#include "device.h"
|
||||
#include "io_device.h"
|
||||
#include "irq.h"
|
||||
#include "timer.h"
|
||||
|
||||
namespace Vdev {
|
||||
|
||||
/**
|
||||
* Limited implementation of 8254 PROGRAMMABLE INTERVAL TIMER.
|
||||
*
|
||||
* Supports only channel 0 and 2.
|
||||
* After a read-back command with status field, the following bits in the
|
||||
* status field latched are not supported: OUTPUT [7], NULL COUNT [6].
|
||||
*
|
||||
* Modes 0-3 are supported for both counters.
|
||||
* Mode 4 is only useable on counter 0, for the triggered interrupt.
|
||||
* Mode 5 is not supported.
|
||||
*
|
||||
* Modes 4 and 5 are not supported for counter 2, because the single tick
|
||||
* change in output is not emulated and its questionable, if the emulation
|
||||
* would be precise enough to allow visiblity to the guest.
|
||||
*
|
||||
* \note This timer model uses the KIP clock as time base. You need to
|
||||
* configure the Microkernel with CONFIG_SYNC_TSC in order to achieve
|
||||
* sufficient granularity.
|
||||
*/
|
||||
class Pit_timer
|
||||
: public Vmm::Io_device,
|
||||
public Vdev::Device,
|
||||
public Vdev::Timer
|
||||
{
|
||||
enum : l4_uint8_t { Pit_isa_irq = 0, };
|
||||
|
||||
enum
|
||||
{
|
||||
Channels = 2,
|
||||
Pit_tick_rate = 1193182, // given in Herz
|
||||
Microseconds_per_second = 1000000ULL,
|
||||
Channel_0_data = 0,
|
||||
Channel_2_data = 2,
|
||||
Mode_command = 3,
|
||||
|
||||
Low_byte_mask = 0xff,
|
||||
High_byte_mask = 0xff00,
|
||||
High_byte_shift = 0x8,
|
||||
Latch_cmd_null_mask = 0x3f,
|
||||
Latch_cmd_channel_mask = 0xc0,
|
||||
|
||||
Access_latch = 0,
|
||||
Access_lobyte = 1,
|
||||
Access_hibyte = 2,
|
||||
Access_lohi = 3,
|
||||
|
||||
Mode_terminal_count = 0,
|
||||
Mode_hw_oneshot = 1,
|
||||
Mode_rate_gen = 2,
|
||||
Mode_rate_gen2 = 6,
|
||||
Mode_square_wave = 3,
|
||||
Mode_square_wave2 = 7,
|
||||
Mode_sw_triggerd_strobe = 4,
|
||||
// mode 5 unsupported.
|
||||
Mode_periodic_mask = 0x2,
|
||||
};
|
||||
|
||||
class Channel: public L4::Ipc_svr::Timeout_queue::Timeout
|
||||
{
|
||||
struct Status
|
||||
{
|
||||
Status() : raw(0) {}
|
||||
Status(l4_uint8_t v) : raw(v) {}
|
||||
|
||||
l4_uint8_t raw = 0;
|
||||
CXX_BITFIELD_MEMBER(7, 7, output, raw);
|
||||
CXX_BITFIELD_MEMBER(6, 6, count, raw);
|
||||
CXX_BITFIELD_MEMBER(4, 5, access, raw);
|
||||
CXX_BITFIELD_MEMBER(1, 3, opmode, raw);
|
||||
CXX_BITFIELD_MEMBER(0, 0, bcd, raw);
|
||||
|
||||
enum
|
||||
{
|
||||
// Bits not changed on mode command
|
||||
Retain_mask = output_bfm_t::Mask | count_bfm_t::Mask
|
||||
};
|
||||
|
||||
void write(l4_uint8_t val)
|
||||
{ raw = (val & ~Retain_mask) | (raw & Retain_mask); }
|
||||
|
||||
bool is_periodic_mode() const { return opmode() > Mode_hw_oneshot; }
|
||||
bool is_one_shot_mode() const { return !is_periodic_mode(); }
|
||||
|
||||
bool is_mode0() const { return opmode() == Mode_terminal_count; }
|
||||
bool is_mode1() const { return opmode() == Mode_hw_oneshot; }
|
||||
bool is_mode2() const
|
||||
{ return opmode() == Mode_rate_gen || opmode() == Mode_rate_gen2; }
|
||||
bool is_mode3() const
|
||||
{ return opmode() == Mode_square_wave || opmode() == Mode_square_wave2; }
|
||||
bool is_mode4() const
|
||||
{ return opmode() == Mode_sw_triggerd_strobe; }
|
||||
};
|
||||
|
||||
struct Latch
|
||||
{
|
||||
void reset()
|
||||
{
|
||||
value = 0;
|
||||
valid = false;
|
||||
read_lo = true;
|
||||
}
|
||||
|
||||
l4_uint16_t value = 0;
|
||||
bool valid = false;
|
||||
bool read_lo = true;
|
||||
};
|
||||
|
||||
public:
|
||||
Channel(Pit_timer *pit, bool is_channel2 = false)
|
||||
: _is_channel2(is_channel2), _gate(!is_channel2), _pit(pit)
|
||||
{}
|
||||
|
||||
// called in the context of the timer thread, be careful with locking!
|
||||
void expired()
|
||||
{
|
||||
// Unimplemented: mode2, 4, 5: output shall be low for one tick
|
||||
// the single-tick output change in modes 2, 4 & 5 is not emulated
|
||||
if (_status.is_mode3())
|
||||
{
|
||||
// Toggle output
|
||||
set_output(!_status.output());
|
||||
}
|
||||
else
|
||||
set_output(true);
|
||||
|
||||
if(!_is_channel2)
|
||||
_pit->_irq.inject();
|
||||
|
||||
if (_status.is_mode2() || _status.is_mode3())
|
||||
{
|
||||
_reload_kip_clock = l4_kip_clock(l4re_kip());
|
||||
if (_reload)
|
||||
_pit->requeue_timeout(this, next_timeout_us());
|
||||
}
|
||||
else
|
||||
{
|
||||
// The timer in the non periodic modes does not stop, but rolls over
|
||||
// and continues counting until gate is low or counter is set to 0.
|
||||
// Mode0 would not fire an interrupt again, since out is high until
|
||||
// reprogrammed. We don't emulate any of this and just stop.
|
||||
_running = false;
|
||||
}
|
||||
}
|
||||
|
||||
void latch_count()
|
||||
{
|
||||
// ignore all but the first latch command
|
||||
if (_count_latch.valid)
|
||||
return;
|
||||
|
||||
_count_latch.value = current();
|
||||
_count_latch.valid = true;
|
||||
_count_latch.read_lo = true;
|
||||
}
|
||||
|
||||
void latch_status()
|
||||
{
|
||||
if (_status_latch.valid)
|
||||
return;
|
||||
|
||||
_status_latch.value = _status.raw;
|
||||
_status_latch.valid = true;
|
||||
}
|
||||
|
||||
void write_count(l4_uint8_t value);
|
||||
void write_status(l4_uint8_t value);
|
||||
l4_uint8_t read();
|
||||
|
||||
bool gate() const { return _gate; }
|
||||
void gate(bool high)
|
||||
{
|
||||
// We know we are on channel 2, as only channel 2's gate can change.
|
||||
trace().printf("Channel 2: set gate to %i from %i\n", high, _gate);
|
||||
|
||||
if (_status.is_mode0())
|
||||
{
|
||||
if (!high && _gate)
|
||||
stop_counter();
|
||||
else if (high && !_gate)
|
||||
start_counter();
|
||||
// XXX this reloads the counter, but it should stop counting and
|
||||
// continue after gate goes high again, unless output is high;
|
||||
}
|
||||
else if (_status.is_mode1())
|
||||
{
|
||||
if (high && !_gate) // retrigger
|
||||
{
|
||||
stop_counter();
|
||||
start_counter();
|
||||
set_output(false);
|
||||
}
|
||||
}
|
||||
else if (_status.is_mode2() || _status.is_mode3())
|
||||
{
|
||||
// the single-tick output change in modes 2, 4 & 5 is not emulated
|
||||
if (high && !_gate)
|
||||
{
|
||||
start_counter();
|
||||
set_output(true);
|
||||
}
|
||||
else if (!high && _gate)
|
||||
stop_counter();
|
||||
}
|
||||
// modes 4 & 5 not supported
|
||||
|
||||
_gate = high;
|
||||
}
|
||||
|
||||
private:
|
||||
static l4_uint8_t low_byte(l4_uint16_t v)
|
||||
{ return v & Low_byte_mask; }
|
||||
|
||||
static l4_uint8_t high_byte(l4_uint16_t v)
|
||||
{ return (v >> High_byte_shift) & Low_byte_mask; }
|
||||
|
||||
static l4_uint16_t set_high_byte(l4_uint16_t reg, l4_uint8_t value)
|
||||
{ return (reg & Low_byte_mask) | (value << High_byte_shift); }
|
||||
|
||||
static l4_uint16_t set_low_byte(l4_uint16_t reg, l4_uint8_t value)
|
||||
{ return (reg & High_byte_mask) | value; }
|
||||
|
||||
static l4_uint8_t read_lo_hi(bool *read_lo, l4_uint16_t count)
|
||||
{
|
||||
l4_uint8_t ret = 0;
|
||||
if (*read_lo)
|
||||
ret = low_byte(count);
|
||||
else
|
||||
ret = high_byte(count);
|
||||
|
||||
*read_lo = !*read_lo;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void write_lo_hi(l4_uint8_t value)
|
||||
{
|
||||
if (_write_lo)
|
||||
_reload = set_low_byte(_reload, value);
|
||||
else
|
||||
{
|
||||
_reload = set_high_byte(_reload, value);
|
||||
check_start_counter();
|
||||
}
|
||||
|
||||
_write_lo = !_write_lo;
|
||||
}
|
||||
|
||||
void set_output(bool out)
|
||||
{
|
||||
_status.output() = out;
|
||||
if (_is_channel2)
|
||||
out ? _pit->_port61->set_out() : _pit->_port61->clear_out();
|
||||
}
|
||||
|
||||
void start_counter()
|
||||
{
|
||||
_reload_kip_clock = l4_kip_clock(l4re_kip());
|
||||
if (_reload)
|
||||
{
|
||||
_pit->enqueue_timeout(this, next_timeout_us());
|
||||
trace().printf("start counter for channel %i (was %s)\n",
|
||||
_is_channel2 ? 2 : 0,
|
||||
_running ? "running" : "not running");
|
||||
_running = true;
|
||||
}
|
||||
}
|
||||
|
||||
void stop_counter()
|
||||
{
|
||||
trace().printf("stop counter for channel %i (was %s), reload: 0x%x\n",
|
||||
_is_channel2 ? 2 : 0, _running ? "running" : "not running",
|
||||
_reload);
|
||||
_pit->dequeue_timeout(this);
|
||||
_running = false;
|
||||
}
|
||||
|
||||
void check_start_counter();
|
||||
|
||||
/**
|
||||
* Next absolute timeout in microseconds.
|
||||
*/
|
||||
inline l4_cpu_time_t next_timeout_us() const
|
||||
{
|
||||
assert(_reload != 0);
|
||||
|
||||
l4_kernel_clock_t kip = l4_kip_clock(l4re_kip());
|
||||
l4_cpu_time_t timeout_us =
|
||||
_reload * Microseconds_per_second / Pit_tick_rate;
|
||||
|
||||
// square wave with half-time toggle
|
||||
if (_status.is_mode3())
|
||||
timeout_us /= 2;
|
||||
|
||||
return kip + timeout_us;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the current value of the counter.
|
||||
*
|
||||
* The counters count down from _reload with the fixed Pit_tick_rate.
|
||||
*
|
||||
* Our Pit model does not update the tick value by itself. Instead it only
|
||||
* calculates the tick count when the guest reads the counter register. We
|
||||
* use the TSC as time basis.
|
||||
*
|
||||
* returns the current counter value of this channel
|
||||
*/
|
||||
l4_uint32_t current()
|
||||
{
|
||||
// current time in microseconds
|
||||
l4_kernel_clock_t kip_us = l4_kip_clock(l4re_kip());
|
||||
// time that has gone by since _reload was set
|
||||
l4_cpu_time_t diff_us = kip_us - _reload_kip_clock;
|
||||
// return current counter value
|
||||
l4_uint32_t ticks = diff_us * Pit_tick_rate / Microseconds_per_second;
|
||||
if (_status.is_mode3())
|
||||
{
|
||||
// in mode3 the counter decrements by two on each tick, since we
|
||||
// compare to _reload, we have to double the number of counter
|
||||
// decrements. expired() is called on each half-period, where
|
||||
// _reload_kip_clock is adapted to track only the time since the last
|
||||
// reload.
|
||||
ticks *= 2;
|
||||
}
|
||||
|
||||
if (ticks >= _reload)
|
||||
return 0;
|
||||
return _reload - ticks;
|
||||
}
|
||||
|
||||
l4_uint16_t _reload = 0U;
|
||||
Status _status;
|
||||
bool _is_channel2;
|
||||
bool _gate; //< 0 = low
|
||||
bool _running = false;
|
||||
Latch _count_latch;
|
||||
Latch _status_latch;
|
||||
Pit_timer *_pit;
|
||||
l4_cpu_time_t _reload_kip_clock = 0ULL;
|
||||
bool _read_lo = true;
|
||||
bool _write_lo = true;
|
||||
};
|
||||
|
||||
struct Port61 : public Vmm::Io_device
|
||||
{
|
||||
Port61(Channel *ch2) : _ch2(ch2) {}
|
||||
|
||||
char const *dev_name() const override
|
||||
{ return "PIT port 61"; }
|
||||
|
||||
void io_in(unsigned, Vmm::Mem_access::Width, l4_uint32_t *value) override
|
||||
{
|
||||
*value = val;
|
||||
val &= ~(1 << 5); // destructive read
|
||||
}
|
||||
|
||||
void io_out(unsigned, Vmm::Mem_access::Width, l4_uint32_t value) override
|
||||
{
|
||||
_ch2->gate(value & 0x1);
|
||||
val = value & 0xff;
|
||||
}
|
||||
|
||||
bool channel_2_on() const { return val & 0x1; }
|
||||
void set_out() { val |= (1 << 5); }
|
||||
void clear_out() { val &= ~(1 << 5); }
|
||||
|
||||
l4_uint8_t val = 0;
|
||||
Channel *_ch2;
|
||||
};
|
||||
|
||||
struct Control_reg
|
||||
{
|
||||
Control_reg(l4_uint8_t val) : raw(val) {}
|
||||
|
||||
l4_uint8_t raw;
|
||||
CXX_BITFIELD_MEMBER(6, 7, channel, raw);
|
||||
CXX_BITFIELD_MEMBER(4, 5, access, raw);
|
||||
CXX_BITFIELD_MEMBER(1, 3, opmode, raw);
|
||||
CXX_BITFIELD_MEMBER(0, 0, bcd, raw);
|
||||
|
||||
bool is_read_back_cmd() const { return channel() == 3; }
|
||||
bool is_latch_status() const { return !(raw & (1U << 4)); }
|
||||
bool is_latch_count() const { return !(raw & (1U << 5)); }
|
||||
};
|
||||
|
||||
static constexpr int port2idx(int port) { return port >> 1; }
|
||||
|
||||
static Dbg trace() { return Dbg(Dbg::Irq, Dbg::Trace, "PIT"); }
|
||||
static Dbg info() { return Dbg(Dbg::Irq, Dbg::Info, "PIT"); }
|
||||
static Dbg warn() { return Dbg(Dbg::Irq, Dbg::Warn, "PIT"); }
|
||||
|
||||
public:
|
||||
Pit_timer(cxx::Ref_ptr<Gic::Ic> const &ic, unsigned irq);
|
||||
virtual ~Pit_timer() = default;
|
||||
|
||||
char const *dev_name() const override
|
||||
{ return "PIT"; }
|
||||
|
||||
cxx::Ref_ptr<Vmm::Io_device> const port61() const { return _port61; }
|
||||
|
||||
void io_out(unsigned port, Vmm::Mem_access::Width width,
|
||||
l4_uint32_t value) override;
|
||||
void io_in(unsigned port, Vmm::Mem_access::Width width,
|
||||
l4_uint32_t *value) override;
|
||||
|
||||
private:
|
||||
Vmm::Irq_edge_sink _irq;
|
||||
cxx::unique_ptr<Channel> _channel[Channels];
|
||||
std::mutex _mutex;
|
||||
cxx::Ref_ptr<Port61> _port61;
|
||||
};
|
||||
|
||||
} // namespace Vdev
|
||||
@@ -1,188 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020, 2022-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <l4/sys/types.h>
|
||||
#include <l4/l4virtio/virtqueue>
|
||||
#include <l4/cxx/ref_ptr>
|
||||
|
||||
#include "debug.h"
|
||||
#include "ds_mmio_mapper.h"
|
||||
#include "vcpu_ptr.h"
|
||||
#include "vm_ram.h"
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
class Pt_walker : public cxx::Ref_obj
|
||||
{
|
||||
public:
|
||||
Pt_walker(cxx::Ref_ptr<Vm_ram> mmap, unsigned max_phys_addr_bit)
|
||||
: _mmap(mmap),
|
||||
_levels {{Pml4_shift, Pml4_mask},
|
||||
{Pdpt_shift, Pdpt_mask},
|
||||
{Pd_shift, Pd_mask},
|
||||
{Pt_shift, Pt_mask}
|
||||
},
|
||||
_max_phys_addr_mask((1UL << max_phys_addr_bit) - 1)
|
||||
{
|
||||
trace().printf("PT_walker: MAXPHYSADDR bits %i\n", max_phys_addr_bit);
|
||||
|
||||
_phys_addr_mask_4k = _max_phys_addr_mask & ~((1UL << Phys_addr_4k) - 1);
|
||||
_phys_addr_mask_2m = _max_phys_addr_mask & ~((1UL << Phys_addr_2m) - 1);
|
||||
_phys_addr_mask_1g = _max_phys_addr_mask & ~((1UL << Phys_addr_1g) - 1);
|
||||
}
|
||||
|
||||
l4_uint64_t walk(l4_uint64_t cr3, l4_uint64_t virt_addr)
|
||||
{
|
||||
// mask everything besides the 4K-aligned PML4 table address
|
||||
l4_uint64_t *tbl = translate_to_table_base(cr3 & _phys_addr_mask_4k);
|
||||
l4_uint64_t entry = _levels[0].get_entry(tbl, virt_addr);
|
||||
|
||||
if (0)
|
||||
trace().printf("cr3 0x%llx, entry 0x%llx, vaddr 0x%llx\n", cr3, entry,
|
||||
virt_addr);
|
||||
|
||||
if (!(entry & Present_bit))
|
||||
L4Re::chksys(-L4_EINVAL, "PML4 table is present\n");
|
||||
|
||||
for (unsigned i = 1; i < Pt_levels; ++i)
|
||||
{
|
||||
// PML4Entry: no PAT bit (12) --> mask everything except [M-1:12]
|
||||
tbl = translate_to_table_base(entry & _phys_addr_mask_4k);
|
||||
entry = _levels[i].get_entry(tbl, virt_addr);
|
||||
|
||||
if (!(entry & Present_bit))
|
||||
{
|
||||
char buf[78];
|
||||
snprintf(buf, sizeof(buf),
|
||||
"Found entry is present. Actual: Entry 0x%llx not "
|
||||
"present.\n",
|
||||
entry);
|
||||
|
||||
L4Re::chksys(-L4_EINVAL, buf);
|
||||
}
|
||||
|
||||
// check for PS = 0 in PDPT & PD entries
|
||||
if (i < 3 && entry & Pagesize_bit)
|
||||
{
|
||||
if (i == 1)
|
||||
return add_voffset(translate_to_table_base(entry & _phys_addr_mask_1g),
|
||||
virt_addr & G1_offset_mask);
|
||||
if (i == 2)
|
||||
return add_voffset(translate_to_table_base(entry & _phys_addr_mask_2m),
|
||||
virt_addr & M2_offset_mask);
|
||||
}
|
||||
}
|
||||
|
||||
return add_voffset(translate_to_table_base(entry & _phys_addr_mask_4k),
|
||||
virt_addr & K4_offset_mask);
|
||||
}
|
||||
|
||||
private:
|
||||
l4_uint64_t *translate_to_table_base(l4_uint64_t addr)
|
||||
{
|
||||
auto *ret = _mmap->guest2host<l4_uint64_t *>(Guest_addr(addr));
|
||||
if (0)
|
||||
trace().printf("Ram_addr: addr 0x%llx --> %p\n", addr, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
l4_uint64_t add_voffset(l4_uint64_t *addr, l4_uint64_t offset)
|
||||
{
|
||||
return reinterpret_cast<l4_uint64_t>(addr) + offset;
|
||||
}
|
||||
|
||||
void dump_level(l4_uint64_t *tbl)
|
||||
{
|
||||
trace().printf("Dumping page table %p\n", tbl);
|
||||
for (int i = 0; i < 512; ++i)
|
||||
if (tbl[i] != 0 && tbl[i] & Present_bit)
|
||||
trace().printf("%i :: 0x%16llx\n", i, tbl[i]);
|
||||
}
|
||||
|
||||
void dump_all_valid_entries(l4_uint64_t base_ptr)
|
||||
{
|
||||
trace().printf(" +++++ Dumping all entries ++++ \n");
|
||||
l4_uint64_t *tbl = reinterpret_cast<l4_uint64_t *>(base_ptr);
|
||||
for (int i = 0; i < 512; ++i)
|
||||
{
|
||||
if (tbl[i] != 0 && tbl[i] & Present_bit)
|
||||
{
|
||||
trace().printf("%i :: 0x%16llx\n", i, tbl[i]);
|
||||
dump_level(translate_to_table_base(tbl[i] & _phys_addr_mask_4k));
|
||||
}
|
||||
}
|
||||
trace().printf(" +++++ Dumped all entries ++++ \n");
|
||||
}
|
||||
|
||||
struct Level
|
||||
{
|
||||
Level(int s, l4_uint64_t m) : shift(s), mask(m) {}
|
||||
|
||||
l4_uint64_t get_entry(l4_uint64_t *tbl, l4_uint64_t vaddr) const
|
||||
{
|
||||
if (0)
|
||||
trace().printf("next level idx: %llu\n", (vaddr & mask) >> shift);
|
||||
return tbl[(vaddr & mask) >> shift];
|
||||
}
|
||||
|
||||
int const shift;
|
||||
l4_uint64_t const mask;
|
||||
};
|
||||
|
||||
static Dbg trace() { return Dbg(Dbg::Mmio, Dbg::Trace, "PTW"); }
|
||||
|
||||
enum
|
||||
{
|
||||
Table_index_size = 9,
|
||||
Table_index_mask = (1UL << Table_index_size) - 1,
|
||||
|
||||
K4_offset_size = 12,
|
||||
K4_offset_mask = (1UL << K4_offset_size) - 1,
|
||||
|
||||
M2_offset_size = 21,
|
||||
M2_offset_mask = (1UL << M2_offset_size) - 1,
|
||||
|
||||
G1_offset_size = 30,
|
||||
G1_offset_mask = (1UL << G1_offset_size) - 1,
|
||||
|
||||
Pt_shift = 12,
|
||||
Pt_mask = Table_index_mask << Pt_shift,
|
||||
|
||||
Pd_shift = 21,
|
||||
Pd_mask = Table_index_mask << Pd_shift,
|
||||
|
||||
Pdpt_shift = 30,
|
||||
Pdpt_mask = Table_index_mask << Pdpt_shift,
|
||||
|
||||
Pml4_shift = 39,
|
||||
Pml4_mask = Table_index_mask << Pml4_shift,
|
||||
|
||||
Present_bit = 1UL,
|
||||
RW_bit = 2UL,
|
||||
US_bit = 4UL,
|
||||
Pagesize_bit = 1UL << 7,
|
||||
|
||||
Phys_addr_4k = 12,
|
||||
Phys_addr_2m = 21,
|
||||
Phys_addr_1g = 30,
|
||||
|
||||
XD_bit_shift = 63,
|
||||
XD_bit = 1UL << XD_bit_shift,
|
||||
|
||||
Pt_levels = 4,
|
||||
};
|
||||
|
||||
cxx::Ref_ptr<Vm_ram> _mmap;
|
||||
Level const _levels[Pt_levels];
|
||||
l4_uint64_t _phys_addr_mask_4k;
|
||||
l4_uint64_t _phys_addr_mask_2m;
|
||||
l4_uint64_t _phys_addr_mask_1g;
|
||||
l4_uint64_t _max_phys_addr_mask;
|
||||
};
|
||||
|
||||
} // namespace Vmm
|
||||
@@ -1,217 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2024 Kernkonzept GmbH.
|
||||
* Author(s): Steffen Liebergeld <steffen.liebergeld@kernkonzept.com>
|
||||
* Jan Klötzke <jan.kloetzke@kernkonzept.com>
|
||||
* Christian Pötzsch <christian.poetzsch@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#include "acpi.h"
|
||||
#include "device/qemu_fw_cfg.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace Acpi;
|
||||
|
||||
/**
|
||||
* Provide tables via the Qemu_fw_cfg to the guest firmware.
|
||||
*
|
||||
* The details of the interface are documented in the Qemu sources in
|
||||
* hw/acpi/bios-linker-loader.c. It is actively used by firmwares such as
|
||||
* Tianocore, so it can be considered stable.
|
||||
*
|
||||
* Because the final address of the tables is not known here, a more flexible
|
||||
* interface is used. The guest firmware is instructed by the
|
||||
* "etc/table-loader" commands file how to install the tables correctly. It
|
||||
* holds the commands to allocate space for the tables, patch the pointers
|
||||
* between the different tables and how to compute the checksums.
|
||||
*/
|
||||
class Acpi_tables : public Tables
|
||||
{
|
||||
enum
|
||||
{
|
||||
Tables_reservation = 8192,
|
||||
Loader_commands_reservation = 512,
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
// Commands
|
||||
Qemu_loader_allocate = 1,
|
||||
Qemu_loader_add_pointer = 2,
|
||||
Qemu_loader_add_checksum = 3,
|
||||
|
||||
Qemu_loader_zone_high = 1,
|
||||
Qemu_loader_zone_fseg = 2,
|
||||
|
||||
Qemu_loader_file_name_size = Qemu_fw_cfg::File_name_size,
|
||||
};
|
||||
|
||||
struct Qemu_loader_entry
|
||||
{
|
||||
l4_uint32_t type;
|
||||
union
|
||||
{
|
||||
struct Allocate
|
||||
{
|
||||
char file_name[Qemu_loader_file_name_size];
|
||||
l4_uint32_t alignment;
|
||||
l4_uint8_t zone;
|
||||
} allocate;
|
||||
struct Add_pointer
|
||||
{
|
||||
char dst_file_name[Qemu_loader_file_name_size];
|
||||
char src_file_name[Qemu_loader_file_name_size];
|
||||
l4_uint32_t dst_pointer_offset;
|
||||
l4_uint8_t dst_pointer_size;
|
||||
} add_pointer;
|
||||
struct Add_checksum
|
||||
{
|
||||
char file_name[Qemu_loader_file_name_size];
|
||||
l4_uint32_t checksum_offset;
|
||||
l4_uint32_t start;
|
||||
l4_uint32_t size;
|
||||
} add_checksum;
|
||||
|
||||
l4_uint8_t pad[124];
|
||||
} cmd;
|
||||
};
|
||||
|
||||
static_assert(sizeof(Qemu_loader_entry) == 128,
|
||||
"Invalid size of Qemu_loader_entry");
|
||||
|
||||
public:
|
||||
static char const constexpr *Rsdp_file_name = "etc/acpi/rsdp";
|
||||
static char const constexpr *Tables_file_name = "etc/acpi/tables";
|
||||
static char const constexpr *Loader_commands_file_name = "etc/table-loader";
|
||||
static char const constexpr *System_states_file_name = "etc/system-states";
|
||||
|
||||
Acpi_tables(Vdev::Device_lookup *devs)
|
||||
: _system_states_file(6)
|
||||
{
|
||||
info.printf("Initialize Qemu IF ACPI tables.\n");
|
||||
_tables.resize(Tables_reservation);
|
||||
_loader_cmds.reserve(Loader_commands_reservation);
|
||||
|
||||
cmd_add_alloc(Tables_file_name, 64 /* FACS requirement */, false);
|
||||
Writer table_wr(reinterpret_cast<l4_addr_t>(_tables.data()), _tables.size());
|
||||
write_all_tables(table_wr, devs);
|
||||
_tables.resize(table_wr.pos());
|
||||
resolve_table_refs_and_checksums(Tables_file_name, table_wr, table_wr);
|
||||
|
||||
cmd_add_alloc(Rsdp_file_name, 16, true /* EBDA area */);
|
||||
_rsdp.resize(Rsdp_size);
|
||||
Writer rdsp_wr(reinterpret_cast<l4_addr_t>(_rsdp.data()), _rsdp.size());
|
||||
write_rsdp(rdsp_wr);
|
||||
resolve_table_refs_and_checksums(Rsdp_file_name, rdsp_wr, table_wr);
|
||||
|
||||
// This is a qemu <-> EFI Interface. It is "documented" in
|
||||
// edk2/Ovmf/Library/QemuFwCfgS3Lib/QemuFwCfgS3PeiDxe.c
|
||||
// QemuFwCfgS3Enabled()
|
||||
// We only implement the bit needed for EFI to signal S3 support.
|
||||
_system_states_file[3] = (1 << 7); // S3 supported
|
||||
}
|
||||
|
||||
std::vector<char> const &rsdp() const
|
||||
{ return _rsdp; };
|
||||
std::vector<char> const &tables() const
|
||||
{ return _tables; }
|
||||
std::string const & loader_cmds() const
|
||||
{ return _loader_cmds; }
|
||||
std::vector<char> const &system_states_file() const
|
||||
{ return _system_states_file; }
|
||||
|
||||
private:
|
||||
void resolve_table_refs_and_checksums(char const *fn, Writer &wr,
|
||||
Writer &table_wr)
|
||||
{
|
||||
for (Writer::Table_ref const &ref : wr.table_refs())
|
||||
{
|
||||
if (ref.size == 4)
|
||||
*wr.as_ptr<l4_uint32_t>(ref.offset) = table_wr.table_offset(ref.table);
|
||||
else if (ref.size == 8) // XSDT
|
||||
*wr.as_ptr<l4_uint64_t>(ref.offset) = table_wr.table_offset(ref.table);
|
||||
else
|
||||
L4Re::throw_error(-L4_EINVAL, "Unsupported table offset size.");
|
||||
cmd_add_pointer(fn, ref.offset, ref.size, Tables_file_name);
|
||||
}
|
||||
|
||||
for (Writer::Checksum const &checksum : wr.checksums())
|
||||
cmd_add_checksum(fn, checksum.offset, checksum.len, checksum.field_off);
|
||||
}
|
||||
|
||||
void cmd_add_checksum(char const *fn, l4_size_t start, l4_size_t size,
|
||||
l4_size_t checksum)
|
||||
{
|
||||
Qemu_loader_entry e;
|
||||
std::memset(&e, 0, sizeof(e));
|
||||
|
||||
e.type = Qemu_loader_add_checksum;
|
||||
std::strncpy(e.cmd.add_checksum.file_name, fn,
|
||||
sizeof(e.cmd.add_checksum.file_name) - 1U);
|
||||
e.cmd.add_checksum.checksum_offset = checksum;
|
||||
e.cmd.add_checksum.start = start;
|
||||
e.cmd.add_checksum.size = size;
|
||||
|
||||
_loader_cmds.append((char*)&e, sizeof(e));
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the pointer value to `src_fn` in the file `dst_fn` at offset
|
||||
* `dst_off`. The patched pointer size is `dst_size`.
|
||||
*/
|
||||
void cmd_add_pointer(char const *dst_fn, l4_size_t dst_off, l4_size_t dst_size,
|
||||
char const *src_fn)
|
||||
{
|
||||
Qemu_loader_entry e;
|
||||
std::memset(&e, 0, sizeof(e));
|
||||
|
||||
e.type = Qemu_loader_add_pointer;
|
||||
std::strncpy(e.cmd.add_pointer.dst_file_name, dst_fn,
|
||||
sizeof(e.cmd.add_pointer.dst_file_name) - 1U);
|
||||
std::strncpy(e.cmd.add_pointer.src_file_name, src_fn,
|
||||
sizeof(e.cmd.add_pointer.src_file_name) - 1U);
|
||||
e.cmd.add_pointer.dst_pointer_offset = dst_off;
|
||||
e.cmd.add_pointer.dst_pointer_size = dst_size;
|
||||
|
||||
_loader_cmds.append((char*)&e, sizeof(e));
|
||||
}
|
||||
|
||||
void cmd_add_alloc(char const *fn, l4_size_t align, bool fseg_zone)
|
||||
{
|
||||
Qemu_loader_entry e;
|
||||
std::memset(&e, 0, sizeof(e));
|
||||
|
||||
e.type = Qemu_loader_allocate;
|
||||
std::strncpy(e.cmd.allocate.file_name, fn,
|
||||
sizeof(e.cmd.allocate.file_name) - 1U);
|
||||
e.cmd.allocate.alignment = align;
|
||||
e.cmd.allocate.zone = fseg_zone ? Qemu_loader_zone_fseg
|
||||
: Qemu_loader_zone_high;
|
||||
|
||||
_loader_cmds.append((char*)&e, sizeof(e));
|
||||
}
|
||||
|
||||
std::vector<char> _rsdp;
|
||||
std::vector<char> _tables;
|
||||
std::vector<char> _system_states_file;
|
||||
std::string _loader_cmds;
|
||||
};
|
||||
|
||||
struct Qemu_fw_cfg_tables : public Qemu_fw_cfg::Provider
|
||||
{
|
||||
void init_late(Vdev::Device_lookup *devs) override
|
||||
{
|
||||
Acpi_tables tables(devs);
|
||||
Qemu_fw_cfg::put_file(Acpi_tables::Rsdp_file_name, tables.rsdp());
|
||||
Qemu_fw_cfg::put_file(Acpi_tables::Tables_file_name, tables.tables());
|
||||
Qemu_fw_cfg::put_file(Acpi_tables::Loader_commands_file_name, tables.loader_cmds());
|
||||
Qemu_fw_cfg::put_file(Acpi_tables::System_states_file_name,
|
||||
tables.system_states_file());
|
||||
}
|
||||
};
|
||||
|
||||
static Qemu_fw_cfg_tables f;
|
||||
|
||||
}; // namespace
|
||||
@@ -1,167 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2024 Kernkonzept GmbH.
|
||||
* Author(s): Steffen Liebergeld <steffen.liebergeld@kernkonzept.com>
|
||||
* Jan Klötzke <jan.kloetzke@kernkonzept.com>
|
||||
* Christian Pötzsch <christian.poetzsch@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#include "cpu_dev_array.h"
|
||||
#include "guest.h"
|
||||
#include "device/qemu_fw_cfg.h"
|
||||
|
||||
#include <l4/cxx/unique_ptr>
|
||||
#include <l4/re/util/env_ns>
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* Device to forward boot data over the qemu fw configuration interface.
|
||||
*
|
||||
* The qemu_fw_cfg node must have l4vmm,kernel, l4vmm,ramdisk and l4vmm,cmdline
|
||||
* as additional properties. Their value can be an empty string.
|
||||
*
|
||||
* \code{.dtb}
|
||||
* qemu_fw_if {
|
||||
* compatible = "l4vmm,qemu-fw-cfg";
|
||||
* reg = <0x1 0x510 0x0c>;
|
||||
* l4vmm,kernel = "linux";
|
||||
* l4vmm,ramdisk = "ramdisk";
|
||||
* l4vmm,cmdline = "console=TTY0";
|
||||
* };
|
||||
* \endcode
|
||||
*/
|
||||
class Qemu_fw_cfg_boot : public Qemu_fw_cfg::Provider
|
||||
{
|
||||
enum Fw_cfg_item_selectors
|
||||
{
|
||||
// Item selectors defined by Qemu
|
||||
Fw_cfg_cpu_count = 0x05,
|
||||
Fw_cfg_kernel_size = 0x08,
|
||||
Fw_cfg_initrd_size = 0x0b,
|
||||
Fw_cfg_boot_menu = 0x0e,
|
||||
Fw_cfg_kernel_data = 0x11,
|
||||
Fw_cfg_commandline_size = 0x14,
|
||||
Fw_cfg_commandline_data = 0x15,
|
||||
Fw_cfg_kernel_setup_size = 0x17,
|
||||
Fw_cfg_kernel_setup_data = 0x18,
|
||||
Fw_cfg_initrd_data = 0x12,
|
||||
|
||||
// Added by KK
|
||||
Fw_cfg_uvmm_dt = 0xe0,
|
||||
};
|
||||
|
||||
void init(Vdev::Device_lookup * /*devs*/, Vdev::Dt_node const &node) override
|
||||
{
|
||||
_kernel = node.get_prop<char>("l4vmm,kernel", nullptr);
|
||||
_ramdisk = node.get_prop<char>("l4vmm,ramdisk", nullptr);
|
||||
_cmdline = node.get_prop<char>("l4vmm,cmdline", nullptr);
|
||||
auto c = node.stringlist_count("l4vmm,items");
|
||||
if (c > 0)
|
||||
for (int i = 0; i < c; i++)
|
||||
{
|
||||
std::string arg(node.stringlist_get("l4vmm,items", i, NULL));
|
||||
// Find the comma delimiter between "[name=]name" and "string=string".
|
||||
// The name component should not be empty.
|
||||
auto pos = arg.find(',');
|
||||
if (pos == std::string::npos || pos == 0)
|
||||
L4Re::throw_error(-L4_EINVAL, "fw_cfg items needs name");
|
||||
|
||||
// Strip the optional "name=" label from the name component.
|
||||
auto name = arg.substr(0, pos);
|
||||
if (name.substr(0, 5) == std::string("name="))
|
||||
name = name.substr(5);
|
||||
|
||||
// Strip the required "string=" label from the string component.
|
||||
auto string = arg.substr(pos);
|
||||
if (string.substr(0, 8) != std::string(",string="))
|
||||
L4Re::throw_error(-L4_EINVAL, "fw_cfg items only support strings");
|
||||
|
||||
string = string.substr(8);
|
||||
_items.push_back(std::make_tuple(name, string));
|
||||
}
|
||||
};
|
||||
|
||||
void init_late(Vdev::Device_lookup *devs) override
|
||||
{
|
||||
if (!_kernel.empty())
|
||||
{
|
||||
_kernel_binary = cxx::make_unique<Boot::Binary_ds>(_kernel.c_str());
|
||||
|
||||
if (!_kernel_binary->is_valid())
|
||||
L4Re::throw_error(-L4_EINVAL, "Kernel dataspace not found.");
|
||||
|
||||
if (_kernel_binary->is_elf_binary())
|
||||
L4Re::throw_error(-L4_EINVAL, "Elf files not supported for qemu fw.");
|
||||
|
||||
l4_uint8_t num_setup_sects =
|
||||
*((char *)_kernel_binary->get_data() + Vmm::Bp_setup_sects);
|
||||
|
||||
add_kernel(_kernel_binary->ds(), (num_setup_sects + 1) * 512);
|
||||
}
|
||||
|
||||
if (!_ramdisk.empty())
|
||||
{
|
||||
_ramdisk_ds = L4Re::Util::Unique_cap<L4Re::Dataspace>(
|
||||
L4Re::chkcap(L4Re::Util::Env_ns().query<L4Re::Dataspace>(
|
||||
_ramdisk.c_str()),
|
||||
"Ramdisk dataspace not found"));
|
||||
add_initrd(_ramdisk_ds.get());
|
||||
}
|
||||
|
||||
if (!_cmdline.empty())
|
||||
add_cmdline(_cmdline.c_str());
|
||||
|
||||
for (auto const &s: _items)
|
||||
Qemu_fw_cfg::put_file(std::get<0>(s).c_str(), std::get<1>(s));
|
||||
|
||||
add_dt_addr(devs->vmm()->dt_addr());
|
||||
|
||||
add_cpu_count(devs->cpus()->max_cpuid() + 1);
|
||||
};
|
||||
|
||||
void add_cmdline(char const *cmdline)
|
||||
{
|
||||
size_t len = strlen(cmdline) + 1U;
|
||||
Qemu_fw_cfg::set_item_u32le(Fw_cfg_commandline_size, len);
|
||||
Qemu_fw_cfg::set_item(Fw_cfg_commandline_data, cmdline, len);
|
||||
}
|
||||
|
||||
void add_kernel(L4::Cap<L4Re::Dataspace> kernel, l4_size_t setup_size)
|
||||
{
|
||||
size_t image_size = kernel->size();
|
||||
Qemu_fw_cfg::set_item_u32le(Fw_cfg_kernel_setup_size, setup_size);
|
||||
Qemu_fw_cfg::set_item(Fw_cfg_kernel_setup_data, kernel, 0, setup_size);
|
||||
Qemu_fw_cfg::set_item_u32le(Fw_cfg_kernel_size, image_size - setup_size);
|
||||
Qemu_fw_cfg::set_item(Fw_cfg_kernel_data, kernel, setup_size);
|
||||
}
|
||||
|
||||
void add_initrd(L4::Cap<L4Re::Dataspace> initrd)
|
||||
{
|
||||
Qemu_fw_cfg::set_item_u32le(Fw_cfg_initrd_size, initrd->size());
|
||||
Qemu_fw_cfg::set_item(Fw_cfg_initrd_data, initrd);
|
||||
}
|
||||
|
||||
void add_dt_addr(l4_addr_t addr)
|
||||
{
|
||||
l4_uint64_t addr_le = htole64(addr);
|
||||
Qemu_fw_cfg::set_item(Fw_cfg_uvmm_dt, &addr_le, sizeof(addr_le));
|
||||
}
|
||||
|
||||
void add_cpu_count(l4_uint16_t num)
|
||||
{
|
||||
Qemu_fw_cfg::set_item_u16le(Fw_cfg_cpu_count, num);
|
||||
}
|
||||
|
||||
std::string _kernel;
|
||||
cxx::unique_ptr<Boot::Binary_ds> _kernel_binary;
|
||||
std::string _ramdisk;
|
||||
L4Re::Util::Unique_cap<L4Re::Dataspace> _ramdisk_ds;
|
||||
std::string _cmdline;
|
||||
std::vector<std::tuple<std::string, std::string>> _items;
|
||||
};
|
||||
|
||||
static Qemu_fw_cfg_boot f;
|
||||
|
||||
}; // namespace
|
||||
@@ -1,599 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019, 2021-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
* Steffen Liebergeld <steffen.liebergeld@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Minimal viable implementation of a CMOS RTC (Motorola MC146818A).
|
||||
*
|
||||
* We do not support setting new time values.
|
||||
* We only support 24h mode (it is hard-wired).
|
||||
* We do not support the century byte.
|
||||
*
|
||||
* On amd64 linux will assume the rtc is in BCD mode even when the format is
|
||||
* set to binary.
|
||||
*
|
||||
* Example device tree entry:
|
||||
*
|
||||
* \code{.dtb}
|
||||
* rtc {
|
||||
* compatible = "virt-rtc";
|
||||
* reg = <0x0 0x0 0x0 0x0>;
|
||||
* interrupt-parent = <&IOAPIC>;
|
||||
* interrupts = <8>;
|
||||
* };
|
||||
* \endcode
|
||||
*
|
||||
* Optionally this emulation can use wallclock-time from an external source.
|
||||
*/
|
||||
#include "device_factory.h"
|
||||
#include "guest.h"
|
||||
#include "device.h"
|
||||
#include "io_device.h"
|
||||
#include "timer.h"
|
||||
#include "irq_dt.h"
|
||||
|
||||
#include "../device/rtc-hub.h"
|
||||
|
||||
#include <time.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <l4/bid_config.h>
|
||||
|
||||
namespace Vdev {
|
||||
|
||||
class Rtc :
|
||||
public Vdev::Timer,
|
||||
public Vdev::Pm_device,
|
||||
public Vmm::Io_device,
|
||||
public Vdev::Device
|
||||
{
|
||||
enum Register : unsigned
|
||||
{
|
||||
Seconds = 0,
|
||||
Seconds_alarm,
|
||||
Minutes,
|
||||
Minutes_alarm,
|
||||
Hours,
|
||||
Hours_alarm,
|
||||
Weekday,
|
||||
Day_of_month,
|
||||
Month,
|
||||
Year,
|
||||
Status_a = 0xa,
|
||||
Status_b = 0xb,
|
||||
Reg_c = 0xc,
|
||||
Reg_d = 0xd,
|
||||
|
||||
// Cmos_ram
|
||||
Ram_start = 0xe,
|
||||
Ram_end = 0x80,
|
||||
Ram_size = Ram_end - Ram_start,
|
||||
};
|
||||
|
||||
enum Status_reg_c : l4_uint8_t
|
||||
{
|
||||
Interrupt_request = 0x80,
|
||||
Periodic_interrupt_flag = 0x40,
|
||||
Alarm_interrupt_flag = 0x20,
|
||||
Update_ended_interrupt_flag = 0x10,
|
||||
};
|
||||
|
||||
enum Status_reg_d : l4_uint8_t
|
||||
{
|
||||
Valid_ram_and_time = 0x80,
|
||||
};
|
||||
|
||||
struct Status_reg_a
|
||||
{
|
||||
l4_uint8_t reg = 0;
|
||||
CXX_BITFIELD_MEMBER(0, 3, rate_selection_bits, reg);
|
||||
CXX_BITFIELD_MEMBER(4, 6, divider_selection_bits, reg);
|
||||
CXX_BITFIELD_MEMBER(7, 7, update_in_progress, reg);
|
||||
};
|
||||
|
||||
struct Status_reg_b
|
||||
{
|
||||
l4_uint8_t reg = 0x2; // mode_24 == 1
|
||||
CXX_BITFIELD_MEMBER(0, 0, daylight_savings_enable, reg);
|
||||
CXX_BITFIELD_MEMBER(1, 1, mode_24, reg);
|
||||
CXX_BITFIELD_MEMBER(2, 2, data_mode, reg);
|
||||
CXX_BITFIELD_MEMBER(3, 3, square_wave_enable, reg);
|
||||
CXX_BITFIELD_MEMBER(4, 4, update_ended_interrupt_enable, reg);
|
||||
CXX_BITFIELD_MEMBER(5, 5, alarm_interrupt_enable, reg);
|
||||
CXX_BITFIELD_MEMBER(6, 6, periodic_interrupt_enable, reg);
|
||||
CXX_BITFIELD_MEMBER(7, 7, set, reg);
|
||||
};
|
||||
|
||||
struct Alarm : public L4::Ipc_svr::Timeout_queue::Timeout
|
||||
{
|
||||
Rtc *_rtc;
|
||||
|
||||
Alarm(Rtc *rtc) : _rtc(rtc) {}
|
||||
|
||||
/**
|
||||
* Handle expired alarms.
|
||||
*
|
||||
* This function is called from the timer thread.
|
||||
*/
|
||||
void expired() override
|
||||
{
|
||||
if (!_rtc->_reg_b.alarm_interrupt_enable())
|
||||
{
|
||||
trace().printf("Alarm interrupt but alarm interrupt enable not set.\n");
|
||||
return;
|
||||
}
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_rtc->_mutex);
|
||||
|
||||
_rtc->_reg_c |= Alarm_interrupt_flag;
|
||||
_rtc->_reg_c |= Interrupt_request;
|
||||
}
|
||||
trace().printf("RTC Irq due to alarm expired()\n");
|
||||
_rtc->_sink.inject();
|
||||
}
|
||||
}; // struct Alarm
|
||||
|
||||
// allow Alarm access to private Rtc members.
|
||||
friend struct Alarm;
|
||||
|
||||
// convert internal binary representation to BCD if needed
|
||||
l4_uint32_t convert_to_guest(int val)
|
||||
{
|
||||
if (_reg_b.data_mode())
|
||||
return val;
|
||||
|
||||
// See https://de.wikipedia.org/wiki/BCD-Code
|
||||
return (val % 10) + ((val / 10) << 4);
|
||||
}
|
||||
|
||||
// convert what the guest gave us to internal binary representation
|
||||
l4_uint8_t convert_from_guest(l4_uint8_t val)
|
||||
{
|
||||
if (_reg_b.data_mode()) // we are using binary mode
|
||||
return val;
|
||||
|
||||
return (val & 0xf) + ((val & 0xf0) >> 4) * 10;
|
||||
}
|
||||
|
||||
void handle_set_time(Status_reg_b r)
|
||||
{
|
||||
// As long as the set() bit is set, the guest assumes that the clock does
|
||||
// not update. We redirect all writes to shadow registers, and those
|
||||
// never get updated.
|
||||
|
||||
// The strategy for updating is:
|
||||
// - the guest sets the set bit to 1
|
||||
// - the guest writes the new time value to the shadow registers
|
||||
// - the guest sets the set bit to 0
|
||||
// - once the set bit is 0, Uvmm retrieves the new time value from the
|
||||
// shadow registers and updates its internal time.
|
||||
bool old_set_bit = _reg_b.set().get();
|
||||
bool new_set_bit = r.set().get();
|
||||
|
||||
if (!old_set_bit || new_set_bit)
|
||||
return;
|
||||
|
||||
time_t seconds = ns_to_s(L4rtc_hub::ns_since_epoch());
|
||||
struct tm *t = gmtime(&seconds);
|
||||
if (!t)
|
||||
{
|
||||
warn().printf("Could not determine time.\n");
|
||||
return;
|
||||
}
|
||||
t->tm_sec = _shadow_registers[Seconds];
|
||||
t->tm_min = _shadow_registers[Minutes];
|
||||
t->tm_hour = _shadow_registers[Hours];
|
||||
t->tm_mday = _shadow_registers[Day_of_month];
|
||||
t->tm_mon = _shadow_registers[Month] - 1; // months start at '1'
|
||||
int centuries_since_1900 = t->tm_year / 100 * 100;
|
||||
// tm_year is defined as 'years since 1900'. The RTC spec instead
|
||||
// specifies the Year register as 'year in the range of 0-99'. Here we use
|
||||
// the previous centuries since 1900 (as calculated from "seconds since
|
||||
// epoch") and add them to the register value from the guest.
|
||||
t->tm_year = _shadow_registers[Year] + centuries_since_1900;
|
||||
|
||||
_seconds = timegm(t);
|
||||
L4rtc_hub::set_ns_since_epoch(s_to_ns(_seconds));
|
||||
|
||||
trace().printf("set time to %04d-%02d-%02d %02d:%02d:%02d\n",
|
||||
t->tm_year + 1900, t->tm_mon, t->tm_mday,
|
||||
t->tm_hour, t->tm_min, t->tm_sec);
|
||||
}
|
||||
|
||||
// return next timeout in seconds
|
||||
time_t calc_next_alarm()
|
||||
{
|
||||
time_t seconds = ns_to_s(L4rtc_hub::ns_since_epoch());
|
||||
struct tm *alarm_time = gmtime(&seconds);
|
||||
struct tm *current_time = gmtime(&seconds);
|
||||
|
||||
if (dont_care_not_set(_shadow_registers[Seconds_alarm]))
|
||||
alarm_time->tm_sec = _shadow_registers[Seconds_alarm];
|
||||
else
|
||||
{
|
||||
trace().printf("wildcard seconds\n");
|
||||
alarm_time->tm_sec += 1;
|
||||
alarm_time->tm_sec %= 60;
|
||||
}
|
||||
if (dont_care_not_set(_shadow_registers[Minutes_alarm]))
|
||||
alarm_time->tm_min = _shadow_registers[Minutes_alarm];
|
||||
else
|
||||
{
|
||||
trace().printf("wildcard minutes\n");
|
||||
alarm_time->tm_min += 1;
|
||||
alarm_time->tm_min %= 60;
|
||||
}
|
||||
if (dont_care_not_set(_shadow_registers[Hours_alarm]))
|
||||
alarm_time->tm_hour = _shadow_registers[Hours_alarm];
|
||||
else
|
||||
{
|
||||
trace().printf("wildcard hours\n");
|
||||
alarm_time->tm_hour += 1;
|
||||
alarm_time->tm_hour %= 24;
|
||||
}
|
||||
|
||||
time_t alarm_seconds = mktime(alarm_time);
|
||||
if (alarm_seconds == -1)
|
||||
trace().printf("error calculating alarm_seconds. Errno %i\n", errno);
|
||||
|
||||
time_t current_seconds = mktime(current_time);
|
||||
if (current_seconds == -1)
|
||||
trace().printf("error calculating current_seconds. Errno %i\n", errno);
|
||||
|
||||
if (alarm_seconds < current_seconds)
|
||||
{
|
||||
trace().printf("Alarm is in the past\n");
|
||||
return ~0L;
|
||||
}
|
||||
|
||||
trace().printf("alarm_seconds=%ld current_seconds=%ld\n", alarm_seconds,
|
||||
current_seconds);
|
||||
return (alarm_seconds - current_seconds);
|
||||
}
|
||||
|
||||
void handle_alarms(Status_reg_b r)
|
||||
{
|
||||
time_t next_alarm = 0;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
|
||||
if (r.update_ended_interrupt_enable())
|
||||
{
|
||||
trace().printf("Guest wants an update interrupt.\n");
|
||||
l4_cpu_time_t current_second = ns_to_s(l4_tsc_to_ns(l4_rdtsc()));
|
||||
_reg_c |= Update_ended_interrupt_flag;
|
||||
if (current_second > _previous_alarm_second)
|
||||
{
|
||||
_previous_alarm_second = current_second;
|
||||
_reg_c |= Interrupt_request;
|
||||
_sink.inject();
|
||||
trace().printf("Update ended interrupt injected immediately\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (!r.alarm_interrupt_enable())
|
||||
return;
|
||||
|
||||
trace().printf("Guest wants an alarm interrupt.\n");
|
||||
|
||||
next_alarm = calc_next_alarm();
|
||||
if (next_alarm == ~0L) // do not fire for alarms of the past
|
||||
return;
|
||||
|
||||
if (next_alarm == 0) // guest wants an alarm right now
|
||||
{
|
||||
l4_cpu_time_t current_second = ns_to_s(l4_tsc_to_ns(l4_rdtsc()));
|
||||
_reg_c |= Alarm_interrupt_flag;
|
||||
_reg_c |= Interrupt_request;
|
||||
if (current_second > _previous_alarm_second)
|
||||
{
|
||||
_previous_alarm_second = current_second;
|
||||
_sink.inject();
|
||||
trace().printf("Alarm interrupt injected immediately\n");
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// guest alarm is at least 1 second in the future
|
||||
// must not hold the lock when doing the IPC to the timer thread
|
||||
enqueue_timeout(&_alarm_timeout,
|
||||
l4_kip_clock(l4re_kip()) + s_to_us(next_alarm));
|
||||
trace().printf("enqueue timeout %ld\n", next_alarm);
|
||||
}
|
||||
|
||||
void handle_write(l4_uint32_t value)
|
||||
{
|
||||
trace().printf("write reg %d value = 0x%x\n", _reg_sel, value & 0xff);
|
||||
l4_uint8_t val = value & 0xff;
|
||||
switch (_reg_sel)
|
||||
{
|
||||
case Status_a:
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
trace().printf("reg a: 0x%x\n", val);
|
||||
_reg_a.reg = val;
|
||||
}
|
||||
break;
|
||||
case Status_b:
|
||||
{
|
||||
trace().printf("reg b: 0x%x\n", val);
|
||||
Status_reg_b r;
|
||||
r.reg = val;
|
||||
|
||||
// set_time() and alarms() handle the lock themselves
|
||||
handle_set_time(r);
|
||||
handle_alarms(r);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
_reg_b.reg = val;
|
||||
// we only allow mode_24
|
||||
_reg_b.mode_24().set(1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Reg_c:
|
||||
case Reg_d:
|
||||
warn().printf("Write to RO reg (%u)\n", _reg_sel);
|
||||
break;
|
||||
default:
|
||||
if (_reg_sel <= Year)
|
||||
_shadow_registers[_reg_sel] = convert_from_guest(val);
|
||||
else if (_reg_sel >= Ram_start && _reg_sel < Ram_end)
|
||||
cmos_write(_reg_sel - Ram_start, val);
|
||||
else
|
||||
warn().printf("Register write not handled (%u)\n", _reg_sel);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
l4_uint32_t handle_read()
|
||||
{
|
||||
trace().printf("read reg %d\n", _reg_sel);
|
||||
// these registers need to always work
|
||||
switch (_reg_sel)
|
||||
{
|
||||
case Status_a:
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
return _reg_a.reg;
|
||||
}
|
||||
case Status_b:
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
return _reg_b.reg;
|
||||
}
|
||||
case Reg_c:
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
unsigned ret = _reg_c;
|
||||
trace().printf("reg c: %x\n", _reg_c);
|
||||
// reading clears the status bits
|
||||
_reg_c = 0;
|
||||
_sink.ack();
|
||||
return ret;
|
||||
}
|
||||
case Reg_d:
|
||||
return Valid_ram_and_time;
|
||||
}
|
||||
|
||||
// only update time if guest does not currently try to set a new time
|
||||
if (!_reg_b.set())
|
||||
_seconds = ns_to_s(L4rtc_hub::ns_since_epoch());
|
||||
|
||||
struct tm *t = gmtime(&_seconds);
|
||||
if (!t)
|
||||
{
|
||||
warn().printf("Could not determine time.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
l4_uint32_t ret = 0;
|
||||
switch (_reg_sel)
|
||||
{
|
||||
case Seconds:
|
||||
ret = convert_to_guest(t->tm_sec);
|
||||
break;
|
||||
case Seconds_alarm:
|
||||
ret = convert_to_guest(_shadow_registers[Seconds_alarm]);
|
||||
break;
|
||||
case Minutes:
|
||||
ret = convert_to_guest(t->tm_min);
|
||||
break;
|
||||
case Minutes_alarm:
|
||||
ret = convert_to_guest(_shadow_registers[Minutes_alarm]);
|
||||
break;
|
||||
case Hours:
|
||||
ret = convert_to_guest(t->tm_hour);
|
||||
break;
|
||||
case Hours_alarm:
|
||||
ret = convert_to_guest(_shadow_registers[Hours_alarm]);
|
||||
break;
|
||||
case Weekday:
|
||||
ret = convert_to_guest(t->tm_wday);
|
||||
break;
|
||||
case Day_of_month:
|
||||
ret = convert_to_guest(t->tm_mday);
|
||||
break;
|
||||
case Month:
|
||||
ret = convert_to_guest(t->tm_mon + 1); // gmtime returns months counting from zero
|
||||
break;
|
||||
case Year:
|
||||
ret = convert_to_guest(t->tm_year % 100);
|
||||
break;
|
||||
default:
|
||||
if (Ram_start > _reg_sel || _reg_sel > Ram_end)
|
||||
warn().printf("Unknown register read (%d)\n", _reg_sel);
|
||||
else
|
||||
ret = cmos_read(_reg_sel - Ram_start);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
public:
|
||||
Rtc(cxx::Ref_ptr<Gic::Ic> const &ic, int irq)
|
||||
: Pm_device(), _alarm_timeout(this), _sink(ic, irq), _previous_alarm_second(0)
|
||||
{
|
||||
info().printf("Hello from RTC. Irq=%d\n", irq);
|
||||
#if !defined(CONFIG_UVMM_EXTERNAL_RTC) and !(CONFIG_RELEASE_MODE)
|
||||
warn().printf(
|
||||
"No external clock source. Rtc time will not represent wallclock time.\n"
|
||||
"Set CONFIG_UVMM_EXTERNAL_RTC = y if you have an external clock "
|
||||
"source.\n");
|
||||
#endif
|
||||
|
||||
_seconds = ns_to_s(L4rtc_hub::ns_since_epoch());
|
||||
}
|
||||
|
||||
void pm_suspend() override
|
||||
{}
|
||||
|
||||
void pm_resume() override
|
||||
{
|
||||
// tell the guest that the machine has resumed from suspend
|
||||
// use the PS/2 shutdown status byte as expected by firmware
|
||||
cmos_write(1, 0xfe);
|
||||
}
|
||||
|
||||
char const *dev_name() const override
|
||||
{ return "RTC"; }
|
||||
|
||||
/* IO write from the guest to device */
|
||||
void io_out(unsigned port, Vmm::Mem_access::Width, l4_uint32_t value) override
|
||||
{
|
||||
switch (port)
|
||||
{
|
||||
case 0:
|
||||
_reg_sel = value & 0x7f;
|
||||
break;
|
||||
case 1:
|
||||
handle_write(value);
|
||||
break;
|
||||
default:
|
||||
warn().printf("Unknown port written (%u).\n", port);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* IO read from the guest */
|
||||
void io_in(unsigned port, Vmm::Mem_access::Width, l4_uint32_t *value) override
|
||||
{
|
||||
switch (port)
|
||||
{
|
||||
case 0:
|
||||
*value = _reg_sel;
|
||||
break;
|
||||
case 1:
|
||||
*value = handle_read();
|
||||
break;
|
||||
default:
|
||||
warn().printf("Unknown port read (%u).\n", port);
|
||||
break;
|
||||
};
|
||||
}
|
||||
|
||||
~Rtc()
|
||||
{
|
||||
dequeue_timeout(&_alarm_timeout);
|
||||
}
|
||||
|
||||
private:
|
||||
static Dbg info() { return Dbg(Dbg::Dev, Dbg::Info, "RTC"); }
|
||||
static Dbg warn() { return Dbg(Dbg::Dev, Dbg::Warn, "RTC"); }
|
||||
static Dbg trace() { return Dbg(Dbg::Dev, Dbg::Trace, "RTC"); }
|
||||
|
||||
static l4_uint64_t ns_to_s(l4_uint64_t ns) { return ns / 1'000'000'000; }
|
||||
static l4_uint64_t s_to_us(l4_uint64_t s) { return s * 1'000'000; }
|
||||
static l4_uint64_t s_to_ns(l4_uint64_t s) { return s * 1'000'000'000; }
|
||||
|
||||
/// Alarm registers with the highest bits set (0xC0 - 0xFF) are don't care.
|
||||
static bool dont_care_not_set(l4_uint8_t reg)
|
||||
{
|
||||
enum { Dont_care_bits = 0xC0 };
|
||||
return (reg & Dont_care_bits) != Dont_care_bits;
|
||||
}
|
||||
|
||||
void cmos_write(l4_uint8_t regsel, l4_uint16_t value)
|
||||
{
|
||||
assert(regsel < Ram_size);
|
||||
trace().printf("cmos write(%u, 0x%x)\n", regsel, value);
|
||||
_cmos[regsel] = value;
|
||||
}
|
||||
|
||||
l4_uint16_t cmos_read(l4_uint8_t regsel)
|
||||
{
|
||||
assert(regsel < Ram_size);
|
||||
trace().printf("cmos read(%u) = 0x%x\n", regsel, _cmos[regsel]);
|
||||
return _cmos[regsel];
|
||||
}
|
||||
|
||||
l4_uint8_t _reg_sel = 0;
|
||||
Status_reg_a _reg_a;
|
||||
Status_reg_b _reg_b;
|
||||
l4_uint8_t _reg_c = 0;
|
||||
l4_uint8_t _reg_d = 0;
|
||||
|
||||
// These are written to by the guest.
|
||||
l4_uint8_t _shadow_registers[Year + 1];
|
||||
|
||||
// protect members from concurrent access
|
||||
std::mutex _mutex;
|
||||
|
||||
Alarm _alarm_timeout; //< Object handling timeout expired events.
|
||||
|
||||
// seconds since epoch as determined by external clock source
|
||||
time_t _seconds;
|
||||
|
||||
l4_uint16_t _cmos[Ram_size];
|
||||
|
||||
Vmm::Irq_sink _sink;
|
||||
l4_cpu_time_t _previous_alarm_second;
|
||||
}; // class Rtc
|
||||
|
||||
} // namespace Vdev
|
||||
|
||||
namespace {
|
||||
|
||||
struct F : Vdev::Factory
|
||||
{
|
||||
static Dbg info() { return Dbg(Dbg::Dev, Dbg::Info, "RTC"); }
|
||||
|
||||
cxx::Ref_ptr<Vdev::Device> create(Vdev::Device_lookup *devs,
|
||||
Vdev::Dt_node const &node) override
|
||||
{
|
||||
Vdev::Irq_dt_iterator it(devs, node);
|
||||
|
||||
if (it.next(devs) < 0)
|
||||
return nullptr;
|
||||
|
||||
if (!it.ic_is_virt())
|
||||
{
|
||||
info().printf("RTC requires a virtual interrupt controller.");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (it.irq() != 8)
|
||||
{
|
||||
info().printf("DT Node must specify IRQ 8 for the RTC.");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto dev = Vdev::make_device<Vdev::Rtc>(it.ic(), it.irq());
|
||||
|
||||
auto region = Vmm::Io_region(0x70, 0x71, Vmm::Region_type::Virtual);
|
||||
devs->vmm()->add_io_device(region, dev);
|
||||
devs->vmm()->register_timer_device(dev);
|
||||
|
||||
return dev;
|
||||
}
|
||||
}; // struct F
|
||||
|
||||
static F f;
|
||||
static Vdev::Device_type t = {"virt-rtc", nullptr, &f};
|
||||
|
||||
} // namespace
|
||||
@@ -1,213 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017, 2019, 2021-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
* Benjamin Lamowski <benjamin.lamowski@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#include <l4/util/cpu.h>
|
||||
|
||||
#include "vcpu_ptr.h"
|
||||
#include "vm_state_svm.h"
|
||||
#include "vm_state_vmx.h"
|
||||
#include "pt_walker.h"
|
||||
#include "mad.h"
|
||||
#include "guest.h"
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
void
|
||||
Vcpu_ptr::create_state(Vm_state::Type type)
|
||||
{
|
||||
if (type == Vm_state::Type::Vmx)
|
||||
{
|
||||
auto state = reinterpret_cast<l4_vm_vmx_vcpu_state_t *>(_s);
|
||||
auto vmx = new Vmx_state(state);
|
||||
_s->user_data[Reg_vmm_type] = reinterpret_cast<l4_umword_t>(vmx);
|
||||
}
|
||||
else if (type == Vm_state::Type::Svm)
|
||||
_s->user_data[Reg_vmm_type] =
|
||||
reinterpret_cast<l4_umword_t>(new Svm_state(extended_state()));
|
||||
|
||||
else
|
||||
throw L4::Runtime_error(-L4_ENOSYS, "Unsupported HW virtualization type.");
|
||||
}
|
||||
|
||||
Vm_state::Type
|
||||
Vcpu_ptr::determine_vmm_type()
|
||||
{
|
||||
if (!l4util_cpu_has_cpuid())
|
||||
throw L4::Runtime_error(-L4_ENOSYS,
|
||||
"Platform does not support CPUID. Aborting!\n");
|
||||
|
||||
l4_umword_t ax, bx, cx, dx;
|
||||
l4util_cpu_cpuid(0, &ax, &bx, &cx, &dx);
|
||||
|
||||
if (bx == 0x756e6547 && cx == 0x6c65746e && dx == 0x49656e69)
|
||||
return Vm_state::Type::Vmx;
|
||||
// AuthenticAMD
|
||||
else if (bx == 0x68747541 && cx == 0x444d4163 && dx == 0x69746e65)
|
||||
{
|
||||
warn().printf(">>> CAUTION: Support for AMD SVM is experimental, use at your own risk! <<<\n");
|
||||
|
||||
// Check if the SVM features we need are present.
|
||||
l4util_cpu_cpuid(0x8000000a, &ax, &bx, &cx, &dx);
|
||||
|
||||
if (!(dx & Svm_state::Cpuid_svm_feature_nrips))
|
||||
L4Re::throw_error(-L4_ENOSYS,
|
||||
"SVM does not support next_rip save. Aborting!\n");
|
||||
|
||||
// It should be safe to assume that the decode assists feature is
|
||||
// present, since all modern AMD CPUs (starting with Bulldozer)
|
||||
// implement it. However, QEMU or rather KVM-based nested virtualization
|
||||
// does not report that the feature is present (see svm_set_cpu_caps()),
|
||||
// but still provides decode assist information, e.g. for writes to CR0.
|
||||
if (!(dx & Svm_state::Cpuid_svm_feature_decode_assists))
|
||||
warn().printf("Platform does not support SVM decode assists (misreported on QEMU).\n");
|
||||
|
||||
return Vm_state::Type::Svm;
|
||||
}
|
||||
else
|
||||
throw L4::Runtime_error(-L4_ENOSYS, "Platform not supported. Aborting!\n");
|
||||
}
|
||||
|
||||
/// Mem_access::Kind::Other symbolises failure to decode.
|
||||
Mem_access
|
||||
Vcpu_ptr::decode_mmio() const
|
||||
{
|
||||
Mem_access m;
|
||||
m.access = Mem_access::Other;
|
||||
|
||||
auto *vms = vm_state();
|
||||
l4_uint64_t opcode;
|
||||
try
|
||||
{
|
||||
// overwrite the virtual IP with the physical OP code
|
||||
opcode = get_pt_walker()->walk(vms->cr3(), vms->ip());
|
||||
}
|
||||
catch (L4::Runtime_error &e)
|
||||
{
|
||||
warn().printf("[%3u] Could not determine opcode for MMIO access. Page table "
|
||||
"walking failed for IP 0x%lx and reports: %s\n",
|
||||
get_vcpu_id(), vms->ip(),
|
||||
e.extra_str() ? e.extra_str() : "");
|
||||
return m;
|
||||
}
|
||||
|
||||
// amd64: vcpu regs == exc_regs
|
||||
l4_exc_regs_t *reg = reinterpret_cast<l4_exc_regs_t *>(&_s->r);
|
||||
using namespace L4mad;
|
||||
unsigned char *inst_buf = reinterpret_cast<unsigned char *>(opcode);
|
||||
// TODO: Limit inst_buf_len to size until the next non-contiguous page
|
||||
// boundary if it is < Decoder::Max_instruction_len.
|
||||
unsigned inst_buf_len = Decoder::Max_instruction_len;
|
||||
Decoder decoder(reg, vms->ip(), inst_buf, inst_buf_len);
|
||||
|
||||
bool decoded = false;
|
||||
Op op;
|
||||
Desc tgt, src;
|
||||
switch (decoder.decode(&op, &tgt, &src))
|
||||
{
|
||||
case Decoder::Result::Success: decoded = true; break;
|
||||
case Decoder::Result::Unsupported: break;
|
||||
case Decoder::Result::Invalid:
|
||||
// TODO: If size of instruction buffer is < Decoder::Max_instruction_len,
|
||||
// because instruction lies on a non-contiguous page boundary,
|
||||
// use a temporary buffer to hold instruction bytes from both pages
|
||||
// and retry decoding from that.
|
||||
break;
|
||||
}
|
||||
|
||||
if (!decoded)
|
||||
{
|
||||
unsigned char const *text = reinterpret_cast<unsigned char *>(opcode);
|
||||
Dbg().printf("[%3u] Decoding failed at 0x%lx: %02x %02x %02x %02x %02x "
|
||||
"%02x %02x <%02x> %02x %02x %02x %02x %02x %02x %02x %02x\n",
|
||||
get_vcpu_id(), vms->ip(),
|
||||
text[-7], text[-6], text[-5], text[-4], text[-3],
|
||||
text[-2], text[-1], text[0], text[1], text[2], text[3],
|
||||
text[4], text[5], text[6], text[7], text[8]);
|
||||
return m;
|
||||
}
|
||||
|
||||
if (0)
|
||||
decoder.print_insn_info(op, tgt, src);
|
||||
|
||||
m.width = op.access_width;
|
||||
|
||||
if (tgt.dtype != L4mad::Desc_reg && tgt.dtype != L4mad::Desc_mem)
|
||||
{
|
||||
Dbg().printf("[%3u] tgt type invalid %i\n", get_vcpu_id(), tgt.dtype);
|
||||
return m;
|
||||
}
|
||||
|
||||
// SRC and TGT.val contain the register number of the MMIO access. In case of
|
||||
// write, this register can be decoded to the value.
|
||||
// In case of read I need to save the register number and write to this
|
||||
// register in writeback_mmio.
|
||||
|
||||
// translate to Mem_access;
|
||||
if (op.atype == L4mad::Read)
|
||||
{
|
||||
m.access = Mem_access::Load;
|
||||
_s->user_data[Reg_mmio_read] = tgt.val >> tgt.shift;
|
||||
}
|
||||
else if (op.atype == L4mad::Write)
|
||||
{
|
||||
m.access = Mem_access::Store;
|
||||
switch (src.dtype)
|
||||
{
|
||||
case L4mad::Desc_reg:
|
||||
// src.val is the register number in MAD order; which is inverse to
|
||||
// register order in l4_vcpu_regs_t.
|
||||
m.value = *decode_reg_ptr(src.val) >> src.shift;
|
||||
break;
|
||||
case L4mad::Desc_imm:
|
||||
m.value = src.val;
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
m.value = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// else unknown; Other already set.
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
l4_umword_t *
|
||||
Vcpu_ptr::decode_reg_ptr(int value) const
|
||||
{
|
||||
return reinterpret_cast<l4_umword_t *>(&_s->r)
|
||||
+ (L4mad::Num_registers - 1 - value);
|
||||
}
|
||||
|
||||
void
|
||||
Vcpu_ptr::reset(bool protected_mode)
|
||||
{
|
||||
vm_state()->init_state();
|
||||
|
||||
// If Uvmm is to boot a Linux kernel directly, it will do so in protected
|
||||
// mode as is required in Linux' boot protocol. Otherwise the Boot and
|
||||
// Application Processors are expected to come up in Real Mode.
|
||||
if (protected_mode)
|
||||
vm_state()->setup_linux_protected_mode(_s->r.ip, _s->r.sp);
|
||||
else
|
||||
vm_state()->setup_real_mode(_s->r.ip);
|
||||
|
||||
Guest::get_instance()->run_vm(*this);
|
||||
}
|
||||
|
||||
void
|
||||
Vcpu_ptr::hot_reset()
|
||||
{
|
||||
// assumption: reset while we already went through the normal reset once.
|
||||
// intention: Do not call Guest::run_vm() again.
|
||||
|
||||
vm_state()->init_state();
|
||||
vm_state()->setup_real_mode(_s->r.ip);
|
||||
}
|
||||
|
||||
} // namespace Vmm
|
||||
@@ -1,94 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020, 2022-2024 Kernkonzept GmbH.
|
||||
* Author(s): Sarah Hoffmann <sarah.hoffmann@kernkonzept.com>
|
||||
* Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "generic_vcpu_ptr.h"
|
||||
#include "mem_access.h"
|
||||
#include "vm_state.h"
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
class Pt_walker;
|
||||
|
||||
class Vcpu_ptr : public Generic_vcpu_ptr
|
||||
{
|
||||
public:
|
||||
enum User_data_regs_arch
|
||||
{
|
||||
Reg_vmm_type = Reg_arch_base,
|
||||
Reg_mmio_read,
|
||||
// <insert further register usage here>
|
||||
Reg_must_be_last_before_ucode,
|
||||
Reg_ucode_rev = 6, // must be in sync with Fiasco
|
||||
};
|
||||
static_assert(Reg_ucode_rev >= Reg_must_be_last_before_ucode,
|
||||
"Last user data register is reserved for microcode revision.");
|
||||
|
||||
explicit Vcpu_ptr(l4_vcpu_state_t *s) : Generic_vcpu_ptr(s)
|
||||
{
|
||||
if (s)
|
||||
create_state(determine_vmm_type());
|
||||
}
|
||||
|
||||
bool pf_write() const
|
||||
{
|
||||
return vm_state()->pf_write();
|
||||
}
|
||||
|
||||
void thread_attach()
|
||||
{
|
||||
control_ext(L4::Cap<L4::Thread>());
|
||||
}
|
||||
|
||||
Vm_state *vm_state() const
|
||||
{ return reinterpret_cast<Vm_state *>(_s->user_data[Reg_vmm_type]);}
|
||||
|
||||
Mem_access decode_mmio() const;
|
||||
|
||||
void writeback_mmio(Mem_access const m)
|
||||
{
|
||||
// used to write read value back to register it is read to.
|
||||
*decode_reg_ptr(_s->user_data[Reg_mmio_read]) = m.value;
|
||||
}
|
||||
|
||||
void reset(bool protected_mode);
|
||||
void hot_reset();
|
||||
|
||||
l4_umword_t ucode_revision() const
|
||||
{ return _s->user_data[Reg_ucode_rev]; }
|
||||
|
||||
template <typename ERR_DBG>
|
||||
void dump_regs_t(l4_addr_t vm_ip, ERR_DBG out) const
|
||||
{
|
||||
unsigned vcpu_id = get_vcpu_id();
|
||||
l4_vcpu_regs_t *regs = &_s->r;
|
||||
|
||||
out.printf("[%3u] RAX 0x%lx\nRBX 0x%lx\nRCX 0x%lx\nRDX 0x%lx\nRSI 0x%lx\n"
|
||||
"RDI 0x%lx\nRSP 0x%lx\nRBP 0x%lx\nR8 0x%lx\nR9 0x%lx\n"
|
||||
"R10 0x%lx\nR11 0x%lx\nR12 0x%lx\nR13 0x%lx\nR14 0x%lx\n"
|
||||
"R15 0x%lx\nRIP 0x%lx\nvCPU RIP 0x%lx\n",
|
||||
vcpu_id, regs->ax, regs->bx, regs->cx, regs->dx, regs->si,
|
||||
regs->di, regs->sp, regs->bp, regs->r8, regs->r9, regs->r10,
|
||||
regs->r11, regs->r12, regs->r13, regs->r14, regs->r15, vm_ip,
|
||||
regs->ip);
|
||||
}
|
||||
|
||||
private:
|
||||
void *extended_state() const
|
||||
{
|
||||
return (void *)(((char *)_s) + L4_VCPU_OFFSET_EXT_STATE);
|
||||
}
|
||||
|
||||
Vm_state::Type determine_vmm_type();
|
||||
void create_state(Vm_state::Type type);
|
||||
l4_umword_t *decode_reg_ptr(int value) const;
|
||||
|
||||
}; // class Vcpu_ptr
|
||||
|
||||
} // namespace Vmm
|
||||
|
||||
@@ -1,431 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#include <l4/re/env>
|
||||
#include <l4/re/error_helper>
|
||||
#include <l4/re/util/cap_alloc>
|
||||
#include <l4/re/util/unique_cap>
|
||||
|
||||
#include <climits>
|
||||
|
||||
#include "debug.h"
|
||||
#include "virt_lapic.h"
|
||||
#include "mad.h"
|
||||
#include "guest.h"
|
||||
|
||||
|
||||
namespace Gic {
|
||||
|
||||
using L4Re::chkcap;
|
||||
using L4Re::chksys;
|
||||
|
||||
Virt_lapic::Virt_lapic(unsigned id, cxx::Ref_ptr<Vmm::Cpu_dev> cpu)
|
||||
: _lapic_irq(chkcap(L4Re::Util::make_unique_cap<L4::Irq>(),
|
||||
"Allocate local APIC notification IRQ.")),
|
||||
_lapic_x2_id(id),
|
||||
_lapic_version(Lapic_version),
|
||||
_x2apic_enabled(false),
|
||||
_nmi_pending(false),
|
||||
_cpu(cpu),
|
||||
_registry(cpu->vcpu().get_ipc_registry())
|
||||
{
|
||||
trace().printf("Virt_lapic ctor; ID 0x%x\n", id);
|
||||
|
||||
chksys(L4Re::Env::env()->factory()->create(_lapic_irq.get()),
|
||||
"Create APIC IRQ.");
|
||||
|
||||
// Set reset values of the LAPIC registers
|
||||
memset(&_regs, 0, sizeof(_regs));
|
||||
_regs.dfr = -1U;
|
||||
_regs.cmci = _regs.therm = _regs.perf = 0x00010000;
|
||||
_regs.lint[0] = _regs.lint[1] = _regs.err = 0x00010000;
|
||||
_regs.svr = 0x000000ff;
|
||||
|
||||
_apic_timer = Vdev::make_device<Apic_timer>(this);
|
||||
}
|
||||
|
||||
void
|
||||
Virt_lapic::set(unsigned irq)
|
||||
{
|
||||
irq_trigger(irq);
|
||||
}
|
||||
|
||||
void
|
||||
Virt_lapic::set(Vdev::Msix::Data_register_format data)
|
||||
{
|
||||
// assumption 1: delivery_mode lowest prio already arbitrated
|
||||
// assumption 2: only called if this APIC is destination
|
||||
using namespace Vdev::Msix;
|
||||
|
||||
switch (data.delivery_mode())
|
||||
{
|
||||
case Dm_fixed: [[fallthrough]];
|
||||
case Dm_lowest_prio:
|
||||
irq_trigger(data.vector(), data.trigger_mode(), true);
|
||||
break;
|
||||
case Dm_smi: info().printf("SMI dropped at LAPIC 0x%x\n", id()); break;
|
||||
case Dm_nmi: nmi(); break;
|
||||
case Dm_init: init_ipi(); break;
|
||||
case Dm_startup: startup_ipi(data); break;
|
||||
case Dm_extint: irq_trigger(data.vector(), false, false); break;
|
||||
default:
|
||||
info().printf("LAPIC 0x%x drops unknown MSI. Delivery mode 0x%x, Vector "
|
||||
"0x%x, data: 0x%llx\n",
|
||||
id(), data.delivery_mode().get(), data.vector().get(),
|
||||
data.raw);
|
||||
break;
|
||||
};
|
||||
}
|
||||
|
||||
void
|
||||
Virt_lapic::init_ipi()
|
||||
{
|
||||
// Only sleeping vCPUs must be rescheduled
|
||||
if (_cpu->get_cpu_state() == Vmm::Cpu_dev::Sleeping)
|
||||
_cpu->reschedule();
|
||||
|
||||
_cpu->send_init_ipi();
|
||||
_sipi_cnt = 0;
|
||||
}
|
||||
|
||||
void
|
||||
Virt_lapic::startup_ipi(Vdev::Msix::Data_register_format data)
|
||||
{
|
||||
// only act on the first SIPI
|
||||
if (_sipi_cnt++)
|
||||
return;
|
||||
|
||||
enum : l4_uint32_t
|
||||
{
|
||||
Icr_startup_page_shift = 12
|
||||
};
|
||||
|
||||
l4_addr_t start_eip = data.vector() << Icr_startup_page_shift;
|
||||
start_cpu(start_eip);
|
||||
_cpu->send_sipi();
|
||||
}
|
||||
|
||||
void
|
||||
Virt_lapic::start_cpu(l4_addr_t entry)
|
||||
{
|
||||
Vmm::Vcpu_ptr vcpu = _cpu->vcpu();
|
||||
vcpu->r.sp = 0;
|
||||
vcpu->r.ip = entry; // r.ip used to communicate entry to Vcpu_ptr.reset()
|
||||
|
||||
info().printf("Starting CPU %u on EIP 0x%lx\n", _lapic_x2_id, entry);
|
||||
}
|
||||
|
||||
void
|
||||
Virt_lapic::bind_irq_src_handler(unsigned irq, Irq_src_handler *handler)
|
||||
{
|
||||
assert (irq < 256); // sources array length
|
||||
if(handler && _sources[irq] && handler != _sources[irq])
|
||||
info().printf("[LAPIC 0x%x] IRQ src handler for IRQ %u already set to "
|
||||
"%p, new %p\n",
|
||||
_lapic_x2_id, irq, _sources[irq], handler);
|
||||
|
||||
_sources[irq] = handler;
|
||||
}
|
||||
|
||||
Irq_src_handler *
|
||||
Virt_lapic::get_irq_src_handler(unsigned irq) const
|
||||
{
|
||||
assert (irq < 256); // sources array length
|
||||
return _sources[irq];
|
||||
}
|
||||
|
||||
int
|
||||
Virt_lapic::dt_get_interrupt(fdt32_t const *, int, int *) const
|
||||
{ return 1; }
|
||||
|
||||
void
|
||||
Virt_lapic::nmi()
|
||||
{
|
||||
_nmi_pending.store(true, std::memory_order_release);
|
||||
_lapic_irq->trigger();
|
||||
}
|
||||
|
||||
/**
|
||||
* Enqueue an interrupt and trigger an IPC in the vCPU.
|
||||
*
|
||||
* \param irq Interrupt to inject.
|
||||
*/
|
||||
void
|
||||
Virt_lapic::irq_trigger(l4_uint32_t irq, bool level, bool irr)
|
||||
{
|
||||
bool trigger = true;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_int_mutex);
|
||||
|
||||
if (irr)
|
||||
{
|
||||
// don't trigger lapic_irq, if the IRR has this IRQ already queued.
|
||||
trigger = !_regs.irr.set_irq(irq);
|
||||
if (level)
|
||||
_regs.tmr.set_irq(irq);
|
||||
else
|
||||
_regs.tmr.clear_irq(irq);
|
||||
}
|
||||
else
|
||||
{
|
||||
// don't trigger lapic_irq again, if an IRQ is already queued.
|
||||
trigger = _non_irr_irqs.empty();
|
||||
_non_irr_irqs.push(irq);
|
||||
}
|
||||
}
|
||||
|
||||
if (trigger)
|
||||
_lapic_irq->trigger();
|
||||
}
|
||||
|
||||
bool
|
||||
Virt_lapic::next_pending_nmi()
|
||||
{
|
||||
bool expected = true;
|
||||
return _nmi_pending.compare_exchange_strong(expected, false,
|
||||
std::memory_order_acquire,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
bool
|
||||
Virt_lapic::is_nmi_pending()
|
||||
{ return _nmi_pending.load(std::memory_order_relaxed); }
|
||||
|
||||
int
|
||||
Virt_lapic::next_pending_irq()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_int_mutex);
|
||||
|
||||
if (!_non_irr_irqs.empty())
|
||||
{
|
||||
unsigned irq = _non_irr_irqs.front();
|
||||
_non_irr_irqs.pop();
|
||||
return irq;
|
||||
}
|
||||
|
||||
auto highest_irr = _regs.irr.get_highest_irq();
|
||||
if (highest_irr >= 0)
|
||||
{
|
||||
auto highest_isr = _regs.isr.get_highest_irq();
|
||||
if (highest_irr > highest_isr)
|
||||
{
|
||||
_regs.isr.set_irq(highest_irr);
|
||||
_regs.irr.clear_irq(highest_irr);
|
||||
return highest_irr;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool
|
||||
Virt_lapic::is_irq_pending()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_int_mutex);
|
||||
return !_non_irr_irqs.empty() || _regs.irr.has_irq();
|
||||
}
|
||||
|
||||
bool
|
||||
Virt_lapic::read_msr(unsigned msr, l4_uint64_t *value) const
|
||||
{
|
||||
switch (msr)
|
||||
{
|
||||
case Msr_ia32_apic_base: // APIC base, Vol. 3A 10.4.4
|
||||
*value = Lapic_access_handler::Mmio_addr | Apic_base_enabled;
|
||||
|
||||
if (_lapic_x2_id == 0)
|
||||
*value |= Apic_base_bsp_processor;
|
||||
|
||||
if (_x2apic_enabled)
|
||||
*value |= Apic_base_x2_enabled;
|
||||
break;
|
||||
case Msr_ia32_tsc_deadline:
|
||||
*value = _apic_timer->read_tsc_deadline_msr();
|
||||
break;
|
||||
case Msr_ia32_x2apic_apicid:
|
||||
*value = _x2apic_enabled
|
||||
? _lapic_x2_id
|
||||
: (_lapic_x2_id << Xapic_mode_local_apic_id_shift);
|
||||
break;
|
||||
case Msr_ia32_x2apic_version: *value = _lapic_version; break;
|
||||
case Msr_ia32_x2apic_tpr: *value = _regs.tpr; break;
|
||||
case Msr_ia32_x2apic_ppr: *value = _regs.ppr; break;
|
||||
case Msr_ia32_x2apic_ldr: *value = _regs.ldr; break;
|
||||
case Mmio_apic_destination_format_register:
|
||||
// not existent in x2apic mode
|
||||
if (!_x2apic_enabled)
|
||||
*value = _regs.dfr;
|
||||
break;
|
||||
case Msr_ia32_x2apic_sivr: *value = _regs.svr; break;
|
||||
case 0x810:
|
||||
case 0x811:
|
||||
case 0x812:
|
||||
case 0x813:
|
||||
case 0x814:
|
||||
case 0x815:
|
||||
case 0x816:
|
||||
case Msr_ia32_x2apic_isr7:
|
||||
*value = _regs.isr.get_reg(msr - 0x810);
|
||||
break;
|
||||
case 0x818:
|
||||
case 0x819:
|
||||
case 0x81a:
|
||||
case 0x81b:
|
||||
case 0x81c:
|
||||
case 0x81d:
|
||||
case 0x81e:
|
||||
case Msr_ia32_x2apic_tmr7:
|
||||
*value = _regs.tmr.get_reg(msr - 0x818);
|
||||
break;
|
||||
case 0x820:
|
||||
case 0x821:
|
||||
case 0x822:
|
||||
case 0x823:
|
||||
case 0x824:
|
||||
case 0x825:
|
||||
case 0x826:
|
||||
case Msr_ia32_x2apic_irr7:
|
||||
*value = _regs.irr.get_reg(msr - 0x820);
|
||||
break;
|
||||
case Msr_ia32_x2apic_esr: *value = _regs.esr; break;
|
||||
case Msr_ia32_x2apic_lvt_cmci: *value = _regs.cmci; break;
|
||||
// 0x830 handled by Icr_handler
|
||||
case Msr_ia32_x2apic_lvt_timer:
|
||||
*value = _apic_timer->read_lvt_timer_reg();
|
||||
break;
|
||||
case Msr_ia32_x2apic_lvt_thermal: *value = _regs.therm; break;
|
||||
case Msr_ia32_x2apic_lvt_pmi: *value = _regs.perf; break;
|
||||
case Msr_ia32_x2apic_lvt_lint0: *value = _regs.lint[0]; break;
|
||||
case Msr_ia32_x2apic_lvt_lint1: *value = _regs.lint[1]; break;
|
||||
case Msr_ia32_x2apic_lvt_error: *value = _regs.err; break;
|
||||
case Msr_ia32_x2apic_init_count:
|
||||
*value = _apic_timer->read_tmr_init();
|
||||
break;
|
||||
case Msr_ia32_x2apic_cur_count: *value = _apic_timer->read_tmr_cur(); break;
|
||||
case Msr_ia32_x2apic_div_conf:
|
||||
*value = _apic_timer->read_divide_configuration_reg();
|
||||
break;
|
||||
|
||||
default: return false;
|
||||
}
|
||||
|
||||
if (0)
|
||||
Dbg().printf("ReadAPIC MSR 0x%x. Result: 0x%x\n", (unsigned)msr,
|
||||
(unsigned)*value);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Virt_lapic::write_msr(unsigned msr, l4_uint64_t value)
|
||||
{
|
||||
switch(msr)
|
||||
{
|
||||
case Msr_ia32_apic_base:
|
||||
_x2apic_enabled = value & Apic_base_x2_enabled;
|
||||
if (_x2apic_enabled)
|
||||
{
|
||||
Dbg().printf("------ x2APIC enabled\n");
|
||||
// from Intel SDM (October 2017)
|
||||
// Logical x2APIC ID = [(x2APIC ID[19:4] « 16) | (1 « x2APIC ID[3:0])]
|
||||
_regs.ldr =
|
||||
(_lapic_x2_id & 0xffff0) << 16 | 1U << (_lapic_x2_id & 0xf);
|
||||
}
|
||||
|
||||
// APIC Base field, Vol. 3A 10.4.4
|
||||
if (!((value >> 12) & (Lapic_access_handler::Mmio_addr >> 12)))
|
||||
// Vol. 3A 10.4.5
|
||||
warn().printf(
|
||||
"Relocating the Local APIC Registers is not supported.\n");
|
||||
break;
|
||||
case Msr_ia32_tsc_deadline:
|
||||
_apic_timer->write_tsc_deadline_msr(value);
|
||||
break;
|
||||
case Msr_ia32_x2apic_version: break; // RO register: ignore write
|
||||
case Msr_ia32_x2apic_tpr: _regs.tpr = value; break;
|
||||
case Msr_ia32_x2apic_ldr:
|
||||
// not writable in x2apic mode
|
||||
if (!_x2apic_enabled)
|
||||
_regs.ldr = value;
|
||||
break;
|
||||
case Mmio_apic_destination_format_register:
|
||||
// not existent in x2apic mode; writes by system software only in
|
||||
// disabled APIC state; which currently isn't supported. => write ignored
|
||||
break;
|
||||
case Msr_ia32_x2apic_sivr:
|
||||
_regs.svr = value; break; // TODO react on APIC SW en/disable
|
||||
case Msr_ia32_x2apic_eoi:
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_int_mutex);
|
||||
int irq_num = _regs.isr.clear_highest_irq();
|
||||
if (irq_num > 0)
|
||||
{
|
||||
Irq_src_handler *hdlr = get_irq_src_handler(irq_num);
|
||||
if (hdlr)
|
||||
hdlr->eoi();
|
||||
}
|
||||
}
|
||||
if (value != 0)
|
||||
{
|
||||
Dbg().printf("WARNING: write to EOI not zero, 0x%llx\n", value);
|
||||
}
|
||||
break;
|
||||
case Msr_ia32_x2apic_esr: _regs.esr = 0; break;
|
||||
case Msr_ia32_x2apic_lvt_cmci: _regs.cmci = value; break;
|
||||
// 0x830 handled by Icr_handler
|
||||
case Msr_ia32_x2apic_lvt_timer:
|
||||
_apic_timer->write_lvt_timer_reg(value);
|
||||
break;
|
||||
case Msr_ia32_x2apic_lvt_thermal: _regs.therm = value; break;
|
||||
case Msr_ia32_x2apic_lvt_pmi: _regs.perf = value; break;
|
||||
case Msr_ia32_x2apic_lvt_lint0: _regs.lint[0] = value; break;
|
||||
case Msr_ia32_x2apic_lvt_lint1: _regs.lint[1] = value; break;
|
||||
case Msr_ia32_x2apic_lvt_error: _regs.err = value; break;
|
||||
case Msr_ia32_x2apic_init_count:
|
||||
_apic_timer->write_tmr_init(value);
|
||||
break;
|
||||
case Msr_ia32_x2apic_div_conf:
|
||||
_apic_timer->write_divide_configuration_reg(value);
|
||||
break;
|
||||
case Msr_ia32_x2apic_self_ipi:
|
||||
if (_x2apic_enabled)
|
||||
irq_trigger(value & 0xff);
|
||||
else
|
||||
// if X2APIC is not enabled, writing IA32_SELF_IPI incurs a #GP
|
||||
return false;
|
||||
break;
|
||||
|
||||
default: return false;
|
||||
}
|
||||
|
||||
if (0 && msr != 0x80b)
|
||||
Dbg().printf("WARNING: APIC write to 0x%x: 0x%llx\n", msr, value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namepace Gic
|
||||
|
||||
#include "device_factory.h"
|
||||
#include "guest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
struct F : Vdev::Factory
|
||||
{
|
||||
cxx::Ref_ptr<Vdev::Device> create(Vdev::Device_lookup *devs,
|
||||
Vdev::Dt_node const &) override
|
||||
{
|
||||
auto apics = devs->vmm()->apic_array();
|
||||
auto msix_ctrl = Vdev::make_device<Gic::Msix_control>(apics);
|
||||
devs->vmm()->icr_handler()->register_msix_ctrl(msix_ctrl);
|
||||
return msix_ctrl;
|
||||
}
|
||||
};
|
||||
|
||||
static F f;
|
||||
static Vdev::Device_type e = {"intel,msi-controller", nullptr, &f};
|
||||
|
||||
} // namespace
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,86 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019, 2022-2024 Kernkonzept GmbH.
|
||||
* Author(s): Philipp Eppelt <philipp.eppelt@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <l4/sys/types.h>
|
||||
#include <l4/cxx/bitfield>
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
/// Abstraction of the VMX and SVM event injection format.
|
||||
struct Injection_event
|
||||
{
|
||||
l4_uint64_t raw = 0;
|
||||
CXX_BITFIELD_MEMBER(0, 31, event, raw);
|
||||
CXX_BITFIELD_MEMBER(32, 63, error, raw);
|
||||
// SVM and VMX both use the same bit encoding in the lower 11 bits.
|
||||
CXX_BITFIELD_MEMBER(0, 7, vector, raw);
|
||||
CXX_BITFIELD_MEMBER(8, 10, type, raw);
|
||||
CXX_BITFIELD_MEMBER(11, 11, error_valid, raw);
|
||||
// SVM and VMX both use bit 31 to indicate validity of the value.
|
||||
CXX_BITFIELD_MEMBER(31, 31, valid, raw);
|
||||
|
||||
Injection_event(l4_uint32_t ev, l4_uint32_t err)
|
||||
{
|
||||
event() = ev;
|
||||
error() = err;
|
||||
}
|
||||
|
||||
Injection_event(unsigned char v, unsigned char t, bool err_valid = false,
|
||||
l4_uint32_t err_code = 0)
|
||||
{
|
||||
vector() = v;
|
||||
type() = t;
|
||||
error_valid() = err_valid;
|
||||
error() = err_code;
|
||||
valid() = 1;
|
||||
}
|
||||
|
||||
explicit Injection_event(l4_uint64_t val) : raw(val) {}
|
||||
};
|
||||
|
||||
class Event_recorder;
|
||||
|
||||
class Vm_state
|
||||
{
|
||||
public:
|
||||
enum class Type { Vmx, Svm };
|
||||
|
||||
virtual ~Vm_state() = 0;
|
||||
|
||||
virtual Type type() const = 0;
|
||||
|
||||
virtual void init_state() = 0;
|
||||
virtual void setup_linux_protected_mode(l4_addr_t entry,
|
||||
l4_addr_t stack_addr) = 0;
|
||||
virtual void setup_real_mode(l4_addr_t entry) = 0;
|
||||
|
||||
virtual l4_umword_t ip() const = 0;
|
||||
virtual l4_umword_t sp() const = 0;
|
||||
virtual bool pf_write() const = 0;
|
||||
virtual l4_umword_t cr3() const = 0;
|
||||
virtual l4_uint64_t xcr0() const = 0;
|
||||
|
||||
virtual bool read_msr(unsigned msr, l4_uint64_t *value) const = 0;
|
||||
virtual bool write_msr(unsigned msr, l4_uint64_t value, Event_recorder *ev_rec) = 0;
|
||||
|
||||
virtual Injection_event pending_event_injection() = 0;
|
||||
virtual void inject_event(Injection_event const &ev) = 0;
|
||||
|
||||
virtual bool can_inject_nmi() const = 0;
|
||||
virtual bool can_inject_interrupt() const = 0;
|
||||
virtual void disable_interrupt_window() = 0;
|
||||
virtual void enable_interrupt_window() = 0;
|
||||
virtual void disable_nmi_window() = 0;
|
||||
virtual void enable_nmi_window() = 0;
|
||||
|
||||
// must only be called once per VM entry
|
||||
virtual void advance_entry_ip(unsigned bytes) = 0;
|
||||
};
|
||||
|
||||
} // namespace Vmm
|
||||
|
||||
@@ -1,699 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021, 2023-2024 Kernkonzept GmbH.
|
||||
* Author(s): Georg Kotheimer <georg.kotheimer@kernkonzept.com>
|
||||
*
|
||||
* License: see LICENSE.spdx (in this directory or the directories above)
|
||||
*/
|
||||
|
||||
#include <l4/re/error_helper>
|
||||
|
||||
#include "vm_state_svm.h"
|
||||
#include "consts.h"
|
||||
#include "mad.h"
|
||||
|
||||
namespace Vmm {
|
||||
|
||||
void
|
||||
Svm_state::init_state()
|
||||
{
|
||||
// Does not matter, Linux overwrites it...
|
||||
_vmcb->state_save_area.ldtr.selector = 0;
|
||||
_vmcb->state_save_area.ldtr.attrib = 0;
|
||||
_vmcb->state_save_area.ldtr.limit = 0;
|
||||
_vmcb->state_save_area.ldtr.base = 0;
|
||||
|
||||
// TODO: Setup GDTR, IDTR? (not done on VMX)
|
||||
|
||||
// Always use nested paging!
|
||||
_vmcb->control_area.np_enable = 1;
|
||||
// Initiated to default values at reset: WB,WT,WC,UC,WB,WT,UC-,UC
|
||||
_vmcb->state_save_area.g_pat = 0x0007040600010406ULL;
|
||||
// Reset value of XCR0
|
||||
_vmcb->state_save_area.xcr0 = 1ULL;
|
||||
|
||||
_vmcb->state_save_area.rflags = 0;
|
||||
_vmcb->state_save_area.cr3 = 0;
|
||||
_vmcb->state_save_area.dr6 = 0;
|
||||
_vmcb->state_save_area.dr7 = 0;
|
||||
|
||||
_vmcb->control_area.eventinj = 0;
|
||||
|
||||
// Enable SVM
|
||||
_vmcb->state_save_area.efer = Efer_svme_enable;
|
||||
|
||||
// Intercept DR accesses.
|
||||
// The kernel enforces 0xff3f, to keep the behavior consistent with VMX, we
|
||||
// intercept all DR accesses.
|
||||
_vmcb->control_area.intercept_rd_drX = 0xffff;
|
||||
_vmcb->control_area.intercept_wr_drX = 0xffff;
|
||||
|
||||
_vmcb->control_area.intercept_exceptions = 0;
|
||||
|
||||
_vmcb->control_area.intercept_instruction0 =
|
||||
Intercept_intr | Intercept_nmi | Intercept_smi | Intercept_init
|
||||
| Intercept_vintr | Intercept_cr0_sel_write | Intercept_rdpmc
|
||||
| Intercept_cpuid | Intercept_invd | Intercept_hlt | Intercept_ioio
|
||||
| Intercept_msr | Intercept_task_switch | Intercept_freeze
|
||||
| Intercept_shutdown;
|
||||
|
||||
// TODO: These are the instructions intercepts that Fiasco enforces. Check
|
||||
// if we intercept too less or too much...
|
||||
_vmcb->control_area.intercept_instruction1 =
|
||||
Intercept_vmrun | Intercept_vmmcall | Intercept_vmload
|
||||
| Intercept_vmsave | Intercept_stgi | Intercept_clgi | Intercept_skinit
|
||||
| Intercept_rdtscp | Intercept_monitor | Intercept_mwait
|
||||
| Intercept_xsetbv;
|
||||
|
||||
mark_all_dirty();
|
||||
}
|
||||
|
||||
void
|
||||
Svm_state::setup_linux_protected_mode(l4_addr_t entry, l4_addr_t stack_addr)
|
||||
{
|
||||
_vmcb->state_save_area.cs.selector = 0x10;
|
||||
_vmcb->state_save_area.cs.attrib = 0xc9a; // TYPE=10=Read/Execute, S, P, DB, G
|
||||
_vmcb->state_save_area.cs.limit = 0xffffffff;
|
||||
_vmcb->state_save_area.cs.base = 0;
|
||||
|
||||
_vmcb->state_save_area.ss.selector = 0x18;
|
||||
_vmcb->state_save_area.ss.attrib = 0xc92; // TYPE=2=Read/Write, S, P, DB, G
|
||||
_vmcb->state_save_area.ss.limit = 0xffffffff;
|
||||
_vmcb->state_save_area.ss.base = 0;
|
||||
|
||||
_vmcb->state_save_area.ds.selector = 0x18;
|
||||
_vmcb->state_save_area.ds.attrib = 0xc92;
|
||||
_vmcb->state_save_area.ds.limit = 0xffffffff;
|
||||
_vmcb->state_save_area.ds.base = 0;
|
||||
|
||||
_vmcb->state_save_area.es.selector = 0x18;
|
||||
_vmcb->state_save_area.es.attrib = 0xc92;
|
||||
_vmcb->state_save_area.es.limit = 0xffffffff;
|
||||
_vmcb->state_save_area.es.base = 0;
|
||||
|
||||
_vmcb->state_save_area.fs.selector = 0x0;
|
||||
_vmcb->state_save_area.fs.attrib = 0xcf3; // Equivalent to VMX
|
||||
_vmcb->state_save_area.fs.limit = 0xffffffff;
|
||||
_vmcb->state_save_area.fs.base = 0;
|
||||
|
||||
_vmcb->state_save_area.gs.selector = 0x0;
|
||||
_vmcb->state_save_area.gs.attrib = 0xcf3;
|
||||
_vmcb->state_save_area.gs.limit = 0xffffffff;
|
||||
_vmcb->state_save_area.gs.base = 0;
|
||||
|
||||
_vmcb->state_save_area.tr.selector = 0x28;
|
||||
_vmcb->state_save_area.tr.attrib = 0x8b; // TYPE=11, P
|
||||
_vmcb->state_save_area.tr.limit = 0x67; // TODO: VMX uses 67 here
|
||||
_vmcb->state_save_area.tr.base = 0;
|
||||
|
||||
_vmcb->state_save_area.rip = entry;
|
||||
_vmcb->state_save_area.rsp = stack_addr;
|
||||
_vmcb->state_save_area.cr0 = 0x10031;
|
||||
_vmcb->state_save_area.cr4 = 0x690;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup the Real Mode startup procedure for AP startup and BSP resume.
|
||||
*
|
||||
* This follows the hardware reset behavior described in AMD APM "14.1.5
|
||||
* Fetching the first instruction".
|
||||
*/
|
||||
void
|
||||
Svm_state::setup_real_mode(l4_addr_t entry)
|
||||
{
|
||||
if (entry == 0xfffffff0U)
|
||||
{
|
||||
// Bootstrap Processor (BSP) boot
|
||||
_vmcb->state_save_area.cs.selector = 0xf000U;
|
||||
_vmcb->state_save_area.cs.base = 0xffff0000U;
|
||||
_vmcb->state_save_area.rip = 0xfff0U;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Application Processor (AP) boot via Startup IPI (SIPI) or resume
|
||||
// from suspend.
|
||||
// cs_base contains the cached address computed from cs_selector. After
|
||||
// reset cs_base contains what we set until the first cs selector is
|
||||
// loaded. We use the waking vector or SIPI vector directly, because
|
||||
// tianocore cannot handle the CS_BASE + IP split.
|
||||
_vmcb->state_save_area.cs.selector = entry >> 4;
|
||||
_vmcb->state_save_area.cs.base = entry;
|
||||
_vmcb->state_save_area.rip = 0;
|
||||
}
|
||||
|
||||
_vmcb->state_save_area.cs.attrib = 0x9b; // TYPE=11, S, P
|
||||
_vmcb->state_save_area.cs.limit = 0xffff;
|
||||
|
||||
_vmcb->state_save_area.ss.selector = 0x18;
|
||||
_vmcb->state_save_area.ss.attrib = 0x93; // TYPE=3, S, P
|
||||
_vmcb->state_save_area.ss.limit = 0xffff;
|
||||
_vmcb->state_save_area.ss.base = 0;
|
||||
|
||||
_vmcb->state_save_area.ds.selector = 0x18;
|
||||
_vmcb->state_save_area.ds.attrib = 0x93;
|
||||
_vmcb->state_save_area.ds.limit = 0xffff;
|
||||
_vmcb->state_save_area.ds.base = 0;
|
||||
|
||||
_vmcb->state_save_area.es.selector = 0x18;
|
||||
_vmcb->state_save_area.es.attrib = 0x93;
|
||||
_vmcb->state_save_area.es.limit = 0xffff;
|
||||
_vmcb->state_save_area.es.base = 0;
|
||||
|
||||
_vmcb->state_save_area.fs.selector = 0x0;
|
||||
_vmcb->state_save_area.fs.attrib = 0x93;
|
||||
_vmcb->state_save_area.fs.limit = 0xffff;
|
||||
_vmcb->state_save_area.fs.base = 0;
|
||||
|
||||
_vmcb->state_save_area.gs.selector = 0x0;
|
||||
_vmcb->state_save_area.gs.attrib = 0x93;
|
||||
_vmcb->state_save_area.gs.limit = 0xffff;
|
||||
_vmcb->state_save_area.gs.base = 0;
|
||||
|
||||
_vmcb->state_save_area.tr.selector = 0x0;
|
||||
_vmcb->state_save_area.tr.attrib = 0x8b; // TYPE=11, P
|
||||
_vmcb->state_save_area.tr.limit = 0xffff;
|
||||
_vmcb->state_save_area.tr.base = 0;
|
||||
|
||||
_vmcb->state_save_area.rsp = 0;
|
||||
_vmcb->state_save_area.cr0 = 0x10030;
|
||||
_vmcb->state_save_area.cr4 = 0x680;
|
||||
|
||||
// clear in SW state to prevent injection of pending events from before
|
||||
// INIT/STARTUP IPI.
|
||||
_vmcb->control_area.exitintinfo = 0ULL;
|
||||
}
|
||||
|
||||
bool
|
||||
Svm_state::determine_next_ip_from_ip(l4_vcpu_regs_t *regs,
|
||||
unsigned char *inst_buf,
|
||||
unsigned inst_buf_len)
|
||||
{
|
||||
using namespace L4mad;
|
||||
Op op;
|
||||
Desc tgt, src;
|
||||
Decoder decoder(reinterpret_cast<l4_exc_regs_t *>(regs), ip(), inst_buf,
|
||||
inst_buf_len);
|
||||
if (decoder.decode(&op, &tgt, &src) != Decoder::Result::Success)
|
||||
{
|
||||
warn().printf("Could not decode instruction for current ip\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
trace().printf("Advance instruction pointer n_rip = 0x%lx + 0x%x\n",
|
||||
ip(), op.insn_len);
|
||||
|
||||
_vmcb->control_area.n_rip = ip() + op.insn_len;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Svm_state::read_msr(unsigned msr, l4_uint64_t *value) const
|
||||
{
|
||||
switch (msr)
|
||||
{
|
||||
case 0x8b: // IA32_BIOS_SIGN_ID
|
||||
case 0x1a0: // IA32_MISC_ENABLE
|
||||
*value = 0U;
|
||||
break;
|
||||
case 0x3a: // IA32_FEATURE_CONTROL
|
||||
// Lock register so the guest does not try to enable anything.
|
||||
*value = 1U;
|
||||
break;
|
||||
case 0x277: // PAT
|
||||
*value =_vmcb->state_save_area.g_pat;
|
||||
break;
|
||||
case 0xc0000080: // efer
|
||||
// Hide SVME bit
|
||||
*value = _vmcb->state_save_area.efer & ~Efer_svme_enable;
|
||||
break;
|
||||
case 0xc0010140: // OSVW_ID_Length
|
||||
// TODO: Report errata to the guest? Allow direct read access to OSVW
|
||||
// register in msrpm in Fiasco?
|
||||
*value = 0U;
|
||||
break;
|
||||
|
||||
case 0xc001001f: // MSR_AMD64_NB_CFG
|
||||
// can all be savely ignored
|
||||
*value = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Svm_state::write_msr(unsigned msr, l4_uint64_t value, Event_recorder *ev_rec)
|
||||
{
|
||||
switch (msr)
|
||||
{
|
||||
case 0x277: // PAT
|
||||
// sanitization of 7 PAT values
|
||||
// 0xF8 are reserved bits
|
||||
// 0x2 and 0x3 are reserved encodings
|
||||
// usage of reserved bits and encodings results in a #GP
|
||||
if (value & 0xF8F8F8F8F8F8F8F8ULL)
|
||||
{
|
||||
ev_rec->make_add_event<Event_exc>(Event_prio::Exception, 13, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 7; ++i)
|
||||
{
|
||||
l4_uint64_t const PAi_mask = (value & (0x7ULL << i * 8)) >> i * 8;
|
||||
if ((PAi_mask == 0x2ULL) || (PAi_mask == 0x3ULL))
|
||||
{
|
||||
ev_rec->make_add_event<Event_exc>(Event_prio::Exception, 13, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
_vmcb->state_save_area.g_pat = value;
|
||||
break;
|
||||
case 0xc0000080: // efer
|
||||
{
|
||||
// Force the SVME bit
|
||||
l4_uint64_t efer = (value & Efer_guest_write_mask) | Efer_svme_enable;
|
||||
l4_uint64_t old_efer = _vmcb->state_save_area.efer;
|
||||
l4_uint64_t cr0 = _vmcb->state_save_area.cr0;
|
||||
|
||||
trace().printf("cr0: 0x%llx old efer 0x%llx new efer 0x%llx\n",
|
||||
cr0, old_efer, efer);
|
||||
|
||||
// There is no going back from enabling long mode.
|
||||
efer |= old_efer & Efer_lme;
|
||||
|
||||
if ((efer & Efer_lme) && (cr0 & Cr0_pg))
|
||||
{
|
||||
// indicate that long mode is active
|
||||
efer |= Efer_lma;
|
||||
}
|
||||
|
||||
trace().printf("efer: 0x%llx\n", efer);
|
||||
_vmcb->state_save_area.efer = efer;
|
||||
mark_dirty(Vmcb_crx);
|
||||
break;
|
||||
}
|
||||
case 0xc001001f: // MSR_AMD64_NB_CFG
|
||||
// can all be savely ignored
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
Svm_state::handle_cr0_write(l4_vcpu_regs_t *regs)
|
||||
{
|
||||
l4_uint64_t info1 = exit_info1();
|
||||
if (!(info1 & Cr_valid))
|
||||
{
|
||||
// No decode assist information was provided for the access:
|
||||
// "If the instruction is LMSW no additional information is provided."
|
||||
Err().printf("LMSW write to CR0 not supported.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
l4_umword_t newval = read_gpr(regs, info1 & Cr_gpr_mask);
|
||||
|
||||
auto old_cr0 = _vmcb->state_save_area.cr0;
|
||||
trace().printf("Write to cr0: 0x%llx -> 0x%lx\n", old_cr0, newval);
|
||||
// 0x10 => Extension Type; hardcoded to 1 see manual
|
||||
_vmcb->state_save_area.cr0 = newval | 0x10;
|
||||
mark_dirty(Vmcb_crx);
|
||||
|
||||
if ((newval & Cr0_pg)
|
||||
&& (old_cr0 & Cr0_pg) == 0
|
||||
&& (_vmcb->state_save_area.efer & Efer_lme))
|
||||
{
|
||||
// indicate that long mode is active
|
||||
info().printf("Enable long mode\n");
|
||||
_vmcb->state_save_area.efer |= Efer_lma;
|
||||
}
|
||||
|
||||
if ((newval & Cr0_pg) == 0
|
||||
&& (old_cr0 & Cr0_pg))
|
||||
{
|
||||
trace().printf("Disabling paging ...\n");
|
||||
|
||||
if (_vmcb->state_save_area.efer & Efer_lme)
|
||||
_vmcb->state_save_area.efer &= ~Efer_lma;
|
||||
}
|
||||
|
||||
return Jump_instr;
|
||||
}
|
||||
|
||||
int
|
||||
Svm_state::handle_xsetbv(l4_vcpu_regs_t *regs)
|
||||
{
|
||||
// TODO: We have to check that the current privilege level is 0, and inject
|
||||
// a general protection exception into the guest otherwise!
|
||||
if (_vmcb->state_save_area.cpl != 0)
|
||||
{
|
||||
warn().printf(
|
||||
"Ignoring write to extended control register %ld from CPL %d.\n",
|
||||
regs->cx, _vmcb->state_save_area.cpl);
|
||||
return Jump_instr;
|
||||
}
|
||||
|
||||
if (regs->cx == 0)
|
||||
{
|
||||
l4_uint64_t value = (l4_uint64_t(regs->ax) & 0xFFFFFFFF)
|
||||
| (l4_uint64_t(regs->dx) << 32);
|
||||
_vmcb->state_save_area.xcr0 = value;
|
||||
trace().printf("Setting xcr0 to 0x%llx\n", value);
|
||||
return Jump_instr;
|
||||
}
|
||||
|
||||
info().printf("Writing unknown extended control register %ld\n", regs->cx);
|
||||
return -L4_EINVAL;
|
||||
}
|
||||
|
||||
int
|
||||
Svm_state::handle_hardware_exception(Event_recorder *ev_rec, unsigned num)
|
||||
{
|
||||
Err err;
|
||||
|
||||
// Besides #DB and #AC all hardware exceptions are reflected to the guest.
|
||||
// The print statements serve as (paranoid) debug help in case the reflection
|
||||
// does not happen.
|
||||
switch (num)
|
||||
{
|
||||
case 0: err.printf("Hardware exception: Divide error\n"); break;
|
||||
|
||||
case 1: // #DB
|
||||
{
|
||||
ev_rec->make_add_event<Event_exc>(Event_prio::Exception, num);
|
||||
// #DB exceptions are either of fault type or of trap type. We reflect
|
||||
// both to the guest, without changing state, thus don't change the IP.
|
||||
return Retry;
|
||||
}
|
||||
|
||||
case 3: err.printf("Hardware exception: Breakpoint\n"); break;
|
||||
case 4: err.printf("Hardware exception: Overflow\n"); break;
|
||||
case 5: err.printf("Hardware exception: Bound range\n"); break;
|
||||
case 6: err.printf("Hardware exception: Invalid opcode\n"); break;
|
||||
case 7: err.printf("Hardware exception: Device not available\n"); break;
|
||||
case 8: err.printf("Hardware exception: Double fault\n"); break;
|
||||
case 10: err.printf("Hardware exception: Invalid TSS\n"); break;
|
||||
case 11: err.printf("Hardware exception: Segment not present\n"); break;
|
||||
case 12: err.printf("Hardware exception: Stack-segment fault\n"); break;
|
||||
case 13: err.printf("Hardware exception: General protection\n"); break;
|
||||
case 14: err.printf("Hardware exception: Page fault\n"); break;
|
||||
case 16: err.printf("Hardware exception: FPU error\n"); break;
|
||||
|
||||
case 17: // #AC
|
||||
{
|
||||
l4_uint64_t err_code = exit_info1();
|
||||
ev_rec->make_add_event<Event_exc>(Event_prio::Exception, num, err_code);
|
||||
return Retry;
|
||||
}
|
||||
case 18: err.printf("Hardware exception: Machine check\n"); break;
|
||||
case 19: err.printf("Hardware exception: SIMD error\n"); break;
|
||||
default: err.printf("Hardware exception: Unknown exception\n"); break;
|
||||
}
|
||||
|
||||
return -L4_EINVAL;
|
||||
}
|
||||
|
||||
l4_umword_t
|
||||
Svm_state::read_gpr(l4_vcpu_regs_t *regs, unsigned reg) const
|
||||
{
|
||||
switch(reg)
|
||||
{
|
||||
case 0: return regs->ax;
|
||||
case 1: return regs->cx;
|
||||
case 2: return regs->dx;
|
||||
case 3: return regs->bx;
|
||||
case 4: return _vmcb->state_save_area.rsp;
|
||||
case 5: return regs->bp;
|
||||
case 6: return regs->si;
|
||||
case 7: return regs->di;
|
||||
case 8: return regs->r8;
|
||||
case 9: return regs->r9;
|
||||
case 10: return regs->r10;
|
||||
case 11: return regs->r11;
|
||||
case 12: return regs->r12;
|
||||
case 13: return regs->r13;
|
||||
case 14: return regs->r14;
|
||||
case 15: return regs->r15;
|
||||
default: L4Re::throw_error(-L4_EINVAL, "Invalid register num.");
|
||||
}
|
||||
}
|
||||
|
||||
const char *
|
||||
Svm_state::str_exit_code(Exit exit)
|
||||
{
|
||||
l4_uint32_t code = static_cast<l4_uint32_t>(exit);
|
||||
|
||||
if (/* code >= 0x00 && */ code <= 0x0f)
|
||||
return "Read of CR 0-15";
|
||||
|
||||
if (code >= 0x10 && code <= 0x1f)
|
||||
return "Write of CR 0-15";
|
||||
|
||||
if (code >= 0x20 && code <= 0x2f)
|
||||
return "Read of DR 0-15";
|
||||
|
||||
if (code >= 0x30 && code <= 0x3f)
|
||||
return "Write of DR 0-15";
|
||||
|
||||
if (code >= 0x40 && code <= 0x5f)
|
||||
return "Exception vector 0-31";
|
||||
|
||||
if (code >= 0x90 && code <= 0x9f)
|
||||
return "Write of CR 0-15 (trap)";
|
||||
|
||||
switch (code)
|
||||
{
|
||||
case 0x60: return "Physical INTR (maskable interrupt)";
|
||||
case 0x61: return "Physical NMI";
|
||||
case 0x62: return "Physical SMI";
|
||||
case 0x63: return "Physical INIT";
|
||||
case 0x64: return "Virtual INTR";
|
||||
case 0x65: return "Write of CR0 that changed any bits other than CR0.TS or CR0.MP";
|
||||
case 0x66: return "Read of IDTR";
|
||||
case 0x67: return "Read of GDTR";
|
||||
case 0x68: return "Read of LDTR";
|
||||
case 0x69: return "Read of TR";
|
||||
case 0x6A: return "Write of IDTR";
|
||||
case 0x6B: return "Write of GDTR";
|
||||
case 0x6C: return "Write of LDTR";
|
||||
case 0x6D: return "Write of TR";
|
||||
case 0x6E: return "RDTSC instruction";
|
||||
case 0x6F: return "RDPMC instruction";
|
||||
case 0x70: return "PUSHF instruction";
|
||||
case 0x71: return "POPF instruction";
|
||||
case 0x72: return "CPUID instruction";
|
||||
case 0x73: return "RSM instruction";
|
||||
case 0x74: return "IRET instruction";
|
||||
case 0x75: return "Software interrupt (INTn instructions)";
|
||||
case 0x76: return "INVD instruction";
|
||||
case 0x77: return "PAUSE instruction";
|
||||
case 0x78: return "HLT instruction";
|
||||
case 0x79: return "INVLPG instructions";
|
||||
case 0x7A: return "INVLPGA instruction";
|
||||
case 0x7B: return "IN or OUT accessing protected port";
|
||||
case 0x7C: return "RDMSR or WRMSR access to protected MSR";
|
||||
case 0x7D: return "Task switch";
|
||||
case 0x7E: return "FP error freeze";
|
||||
case 0x7F: return "Shutdown";
|
||||
case 0x80: return "VMRUN instruction";
|
||||
case 0x81: return "VMMCALL instruction";
|
||||
case 0x82: return "VMLOAD instruction";
|
||||
case 0x83: return "VMSAVE instruction";
|
||||
case 0x84: return "STGI instruction";
|
||||
case 0x85: return "CLGI instruction";
|
||||
case 0x86: return "SKINIT instruction";
|
||||
case 0x87: return "RDTSCP instruction";
|
||||
case 0x88: return "ICEBP instruction";
|
||||
case 0x89: return "WBINVD or WBNOINVD instruction";
|
||||
case 0x8A: return "MONITOR or MONITORX instruction";
|
||||
case 0x8B: return "MWAIT or MWAITX instruction";
|
||||
case 0x8C: return "MWAIT or MWAITX instruction, if monitor hardware is armed.";
|
||||
case 0x8E: return "RDPRU instruction";
|
||||
case 0x8D: return "XSETBV instruction";
|
||||
case 0x8F: return "Write of EFER MSR";
|
||||
case 0xA3: return "MCOMMIT instruction";
|
||||
case 0x400: return "Nested paging host-level page fault";
|
||||
case 0x401: return "AVIC Virtual IPI delivery not completed";
|
||||
case 0x402: return "AVIC Access to unaccelerated vAPIC register";
|
||||
case 0x403: return "VMGEXIT instruction";
|
||||
case -1U: return "Invalid guest state in VMCB";
|
||||
default: return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Svm_state::dump(l4_vcpu_regs_t const *regs) const
|
||||
{
|
||||
warn().printf("Registers:\n");
|
||||
warn().printf("r15=0x%lx\n", regs->r15); /**< r15 register */
|
||||
warn().printf("r14=0x%lx\n", regs->r14); /**< r14 register */
|
||||
warn().printf("r13=0x%lx\n", regs->r13); /**< r13 register */
|
||||
warn().printf("r12=0x%lx\n", regs->r12); /**< r12 register */
|
||||
warn().printf("r11=0x%lx\n", regs->r11); /**< r11 register */
|
||||
warn().printf("r10=0x%lx\n", regs->r10); /**< r10 register */
|
||||
warn().printf("r9=0x%lx\n", regs->r9); /**< r9 register */
|
||||
warn().printf("r8=0x%lx\n", regs->r8); /**< r8 register */
|
||||
|
||||
warn().printf("di=0x%lx\n", regs->di); /**< rdi register */
|
||||
warn().printf("si=0x%lx\n", regs->si); /**< rsi register */
|
||||
warn().printf("bp=0x%lx\n", regs->bp); /**< rbp register */
|
||||
warn().printf("pfa=0x%lx\n", regs->pfa); /**< page fault address */
|
||||
warn().printf("bx=0x%lx\n", regs->bx); /**< rbx register */
|
||||
warn().printf("dx=0x%lx\n", regs->dx); /**< rdx register */
|
||||
warn().printf("cx=0x%lx\n", regs->cx); /**< rcx register */
|
||||
warn().printf("ax=0x%lx\n", regs->ax); /**< rax register */
|
||||
|
||||
warn().printf("trapno=0x%lx\n", regs->trapno); /**< trap number */
|
||||
warn().printf("err=0x%lx\n", regs->err); /**< error code */
|
||||
|
||||
warn().printf("ip=0x%lx\n", regs->ip); /**< instruction pointer */
|
||||
warn().printf("cs=0x%lx\n", regs->cs); /**< dummy \internal */
|
||||
warn().printf("flags=0x%lx\n", regs->flags); /**< eflags */
|
||||
warn().printf("sp=0x%lx\n", regs->sp); /**< stack pointer */
|
||||
warn().printf("ss=0x%lx\n", regs->ss);
|
||||
warn().printf("fs_base=0x%lx\n", regs->fs_base);
|
||||
warn().printf("gs_base=0x%lx\n", regs->gs_base);
|
||||
warn().printf("ds=0x%x\n", regs->ds);
|
||||
warn().printf("es=0x%x\n", regs->es);
|
||||
warn().printf("fs=0x%x\n", regs->fs);
|
||||
warn().printf("gs=0x%x\n", regs->gs);
|
||||
|
||||
|
||||
warn().printf("Control area:\n");
|
||||
warn().printf("intercept_rd_crX=0x%x\n", _vmcb->control_area.intercept_rd_crX);
|
||||
warn().printf("intercept_wr_crX=0x%x\n", _vmcb->control_area.intercept_wr_crX);
|
||||
|
||||
warn().printf("intercept_rd_drX=0x%x\n", _vmcb->control_area.intercept_rd_drX);
|
||||
warn().printf("intercept_wr_drX=0x%x\n", _vmcb->control_area.intercept_wr_drX);
|
||||
|
||||
warn().printf("intercept_exceptions=0x%x\n", _vmcb->control_area.intercept_exceptions);
|
||||
|
||||
warn().printf("intercept_instruction0=0x%x\n", _vmcb->control_area.intercept_instruction0);
|
||||
warn().printf("intercept_instruction1=0x%x\n", _vmcb->control_area.intercept_instruction1);
|
||||
|
||||
|
||||
warn().printf("pause_filter_threshold=0x%x\n", _vmcb->control_area.pause_filter_threshold);
|
||||
warn().printf("pause_filter_count=0x%x\n", _vmcb->control_area.pause_filter_count);
|
||||
|
||||
warn().printf("iopm_base_pa=0x%llx\n", _vmcb->control_area.iopm_base_pa);
|
||||
warn().printf("msrpm_base_pa=0x%llx\n", _vmcb->control_area.msrpm_base_pa);
|
||||
warn().printf("tsc_offset=0x%llx\n", _vmcb->control_area.tsc_offset);
|
||||
warn().printf("guest_asid_tlb_ctl=0x%llx\n", _vmcb->control_area.guest_asid_tlb_ctl);
|
||||
warn().printf("interrupt_ctl=0x%llx\n", _vmcb->control_area.interrupt_ctl);
|
||||
warn().printf("interrupt_shadow=0x%llx\n", _vmcb->control_area.interrupt_shadow);
|
||||
warn().printf("exitcode=0x%llx\n", _vmcb->control_area.exitcode);
|
||||
warn().printf("exitinfo1=0x%llx\n", _vmcb->control_area.exitinfo1);
|
||||
warn().printf("exitinfo2=0x%llx\n", _vmcb->control_area.exitinfo2);
|
||||
warn().printf("exitintinfo=0x%llx\n", _vmcb->control_area.exitintinfo);
|
||||
warn().printf("np_enable=0x%llx\n", _vmcb->control_area.np_enable);
|
||||
|
||||
|
||||
warn().printf("eventinj=0x%llx\n", _vmcb->control_area.eventinj);
|
||||
warn().printf("n_cr3=0x%llx\n", _vmcb->control_area.n_cr3);
|
||||
warn().printf("lbr_virtualization_enable=0x%llx\n", _vmcb->control_area.lbr_virtualization_enable);
|
||||
warn().printf("clean_bits=0x%llx\n", _vmcb->control_area.clean_bits);
|
||||
warn().printf("n_rip=0x%llx\n", _vmcb->control_area.n_rip);
|
||||
|
||||
|
||||
warn().printf("State save area:\n");
|
||||
warn().printf("es: selector=0x%x, attrib=0x%x, limit=0x%x, base=0x%llx)\n",
|
||||
_vmcb->state_save_area.es.selector,
|
||||
_vmcb->state_save_area.es.attrib,
|
||||
_vmcb->state_save_area.es.limit,
|
||||
_vmcb->state_save_area.es.base);
|
||||
warn().printf("cs: selector=0x%x, attrib=0x%x, limit=0x%x, base=0x%llx)\n",
|
||||
_vmcb->state_save_area.cs.selector,
|
||||
_vmcb->state_save_area.cs.attrib,
|
||||
_vmcb->state_save_area.cs.limit,
|
||||
_vmcb->state_save_area.cs.base);
|
||||
warn().printf("ss: selector=0x%x, attrib=0x%x, limit=0x%x, base=0x%llx)\n",
|
||||
_vmcb->state_save_area.ss.selector,
|
||||
_vmcb->state_save_area.ss.attrib,
|
||||
_vmcb->state_save_area.ss.limit,
|
||||
_vmcb->state_save_area.ss.base);
|
||||
warn().printf("ds: selector=0x%x, attrib=0x%x, limit=0x%x, base=0x%llx)\n",
|
||||
_vmcb->state_save_area.ds.selector,
|
||||
_vmcb->state_save_area.ds.attrib,
|
||||
_vmcb->state_save_area.ds.limit,
|
||||
_vmcb->state_save_area.ds.base);
|
||||
warn().printf("fs: selector=0x%x, attrib=0x%x, limit=0x%x, base=0x%llx)\n",
|
||||
_vmcb->state_save_area.fs.selector,
|
||||
_vmcb->state_save_area.fs.attrib,
|
||||
_vmcb->state_save_area.fs.limit,
|
||||
_vmcb->state_save_area.fs.base);
|
||||
warn().printf("gs: selector=0x%x, attrib=0x%x, limit=0x%x, base=0x%llx)\n",
|
||||
_vmcb->state_save_area.gs.selector,
|
||||
_vmcb->state_save_area.gs.attrib,
|
||||
_vmcb->state_save_area.gs.limit,
|
||||
_vmcb->state_save_area.gs.base);
|
||||
warn().printf("gdtr: selector=0x%x, attrib=0x%x, limit=0x%x, base=0x%llx)\n",
|
||||
_vmcb->state_save_area.gdtr.selector,
|
||||
_vmcb->state_save_area.gdtr.attrib,
|
||||
_vmcb->state_save_area.gdtr.limit,
|
||||
_vmcb->state_save_area.gdtr.base);
|
||||
warn().printf("ldtr: selector=0x%x, attrib=0x%x, limit=0x%x, base=0x%llx)\n",
|
||||
_vmcb->state_save_area.ldtr.selector,
|
||||
_vmcb->state_save_area.ldtr.attrib,
|
||||
_vmcb->state_save_area.ldtr.limit,
|
||||
_vmcb->state_save_area.ldtr.base);
|
||||
warn().printf("idtr: selector=0x%x, attrib=0x%x, limit=0x%x, base=0x%llx)\n",
|
||||
_vmcb->state_save_area.idtr.selector,
|
||||
_vmcb->state_save_area.idtr.attrib,
|
||||
_vmcb->state_save_area.idtr.limit,
|
||||
_vmcb->state_save_area.idtr.base);
|
||||
warn().printf("tr: selector=0x%x, attrib=0x%x, limit=0x%x, base=0x%llx)\n",
|
||||
_vmcb->state_save_area.tr.selector,
|
||||
_vmcb->state_save_area.tr.attrib,
|
||||
_vmcb->state_save_area.tr.limit,
|
||||
_vmcb->state_save_area.tr.base);
|
||||
|
||||
|
||||
warn().printf("cpl=0x%x\n", _vmcb->state_save_area.cpl);
|
||||
|
||||
|
||||
warn().printf("efer=0x%llx\n", _vmcb->state_save_area.efer);
|
||||
|
||||
|
||||
warn().printf("cr4=0x%llx\n", _vmcb->state_save_area.cr4);
|
||||
warn().printf("cr3=0x%llx\n", _vmcb->state_save_area.cr3);
|
||||
warn().printf("cr0=0x%llx\n", _vmcb->state_save_area.cr0);
|
||||
warn().printf("dr7=0x%llx\n", _vmcb->state_save_area.dr7);
|
||||
warn().printf("dr6=0x%llx\n", _vmcb->state_save_area.dr6);
|
||||
warn().printf("rflags=0x%llx\n", _vmcb->state_save_area.rflags);
|
||||
warn().printf("rip=0x%llx\n", _vmcb->state_save_area.rip);
|
||||
|
||||
|
||||
warn().printf("rsp=0x%llx\n", _vmcb->state_save_area.rsp);
|
||||
|
||||
|
||||
warn().printf("rax=0x%llx\n", _vmcb->state_save_area.rax);
|
||||
warn().printf("star=0x%llx\n", _vmcb->state_save_area.star);
|
||||
warn().printf("lstar=0x%llx\n", _vmcb->state_save_area.lstar);
|
||||
warn().printf("cstar=0x%llx\n", _vmcb->state_save_area.cstar);
|
||||
warn().printf("sfmask=0x%llx\n", _vmcb->state_save_area.sfmask);
|
||||
warn().printf("kernelgsbase=0x%llx\n", _vmcb->state_save_area.kernelgsbase);
|
||||
warn().printf("sysenter_cs=0x%llx\n", _vmcb->state_save_area.sysenter_cs);
|
||||
warn().printf("sysenter_esp=0x%llx\n", _vmcb->state_save_area.sysenter_esp);
|
||||
warn().printf("sysenter_eip=0x%llx\n", _vmcb->state_save_area.sysenter_eip);
|
||||
warn().printf("cr2=0x%llx\n", _vmcb->state_save_area.cr2);
|
||||
|
||||
|
||||
warn().printf("g_pat=0x%llx\n", _vmcb->state_save_area.g_pat);
|
||||
warn().printf("dbgctl=0x%llx\n", _vmcb->state_save_area.dbgctl);
|
||||
warn().printf("br_from=0x%llx\n", _vmcb->state_save_area.br_from);
|
||||
warn().printf("br_to=0x%llx\n", _vmcb->state_save_area.br_to);
|
||||
warn().printf("lastexcpfrom=0x%llx\n", _vmcb->state_save_area.lastexcpfrom);
|
||||
warn().printf("last_excpto=0x%llx\n", _vmcb->state_save_area.last_excpto);
|
||||
|
||||
// this field is _NOT_ part of the official VMCB specification
|
||||
// a (userlevel) VMM needs this for proper FPU state virtualization
|
||||
warn().printf("xcr0=0x%llx\n", _vmcb->state_save_area.xcr0);
|
||||
}
|
||||
|
||||
} //namespace Vmm
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user