
commit 531b2ca0a940ac9db03f246c8b77c4201de72b00 upstream. According to the data sheet, writing the MODE register should stop the counter (and thus the interrupts). This appears to work on real hardware, at least modern Intel and AMD systems. It should also work on Hyper-V. However, on some buggy virtual machines the mode change doesn't have any effect until the counter is subsequently loaded (or perhaps when the IRQ next fires). So, set MODE 0 and then load the counter, to ensure that those buggy VMs do the right thing and the interrupts stop. And then write MODE 0 *again* to stop the counter on compliant implementations too. Apparently, Hyper-V keeps firing the IRQ *repeatedly* even in mode zero when it should only happen once, but the second MODE write stops that too. Userspace test program (mostly written by tglx): ===== #include <stdio.h> #include <unistd.h> #include <stdlib.h> #include <stdint.h> #include <sys/io.h> static __always_inline void __out##bwl(type value, uint16_t port) \ { \ asm volatile("out" #bwl " %" #bw "0, %w1" \ : : "a"(value), "Nd"(port)); \ } \ \ static __always_inline type __in##bwl(uint16_t port) \ { \ type value; \ asm volatile("in" #bwl " %w1, %" #bw "0" \ : "=a"(value) : "Nd"(port)); \ return value; \ } BUILDIO(b, b, uint8_t) #define inb __inb #define outb __outb #define PIT_MODE 0x43 #define PIT_CH0 0x40 #define PIT_CH2 0x42 static int is8254; static void dump_pit(void) { if (is8254) { // Latch and output counter and status outb(0xC2, PIT_MODE); printf("%02x %02x %02x\n", inb(PIT_CH0), inb(PIT_CH0), inb(PIT_CH0)); } else { // Latch and output counter outb(0x0, PIT_MODE); printf("%02x %02x\n", inb(PIT_CH0), inb(PIT_CH0)); } } int main(int argc, char* argv[]) { int nr_counts = 2; if (argc > 1) nr_counts = atoi(argv[1]); if (argc > 2) is8254 = 1; if (ioperm(0x40, 4, 1) != 0) return 1; dump_pit(); printf("Set oneshot\n"); outb(0x38, PIT_MODE); outb(0x00, PIT_CH0); outb(0x0F, PIT_CH0); dump_pit(); usleep(1000); dump_pit(); printf("Set periodic\n"); outb(0x34, PIT_MODE); outb(0x00, PIT_CH0); outb(0x0F, PIT_CH0); dump_pit(); usleep(1000); dump_pit(); dump_pit(); usleep(100000); dump_pit(); usleep(100000); dump_pit(); printf("Set stop (%d counter writes)\n", nr_counts); outb(0x30, PIT_MODE); while (nr_counts--) outb(0xFF, PIT_CH0); dump_pit(); usleep(100000); dump_pit(); usleep(100000); dump_pit(); printf("Set MODE 0\n"); outb(0x30, PIT_MODE); dump_pit(); usleep(100000); dump_pit(); usleep(100000); dump_pit(); return 0; } ===== Suggested-by: Sean Christopherson <seanjc@google.com> Co-developed-by: Li RongQing <lirongqing@baidu.com> Signed-off-by: Li RongQing <lirongqing@baidu.com> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Michael Kelley <mhkelley@outlook.com> Link: https://lore.kernel.org/all/20240802135555.564941-2-dwmw2@infradead.org Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
551 lines
15 KiB
C
551 lines
15 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* HyperV Detection code.
|
|
*
|
|
* Copyright (C) 2010, Novell, Inc.
|
|
* Author : K. Y. Srinivasan <ksrinivasan@novell.com>
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/time.h>
|
|
#include <linux/clocksource.h>
|
|
#include <linux/init.h>
|
|
#include <linux/export.h>
|
|
#include <linux/hardirq.h>
|
|
#include <linux/efi.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/irq.h>
|
|
#include <linux/kexec.h>
|
|
#include <linux/random.h>
|
|
#include <linux/swiotlb.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/hypervisor.h>
|
|
#include <asm/hyperv-tlfs.h>
|
|
#include <asm/mshyperv.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/idtentry.h>
|
|
#include <asm/irq_regs.h>
|
|
#include <asm/i8259.h>
|
|
#include <asm/apic.h>
|
|
#include <asm/timer.h>
|
|
#include <asm/reboot.h>
|
|
#include <asm/nmi.h>
|
|
#include <clocksource/hyperv_timer.h>
|
|
#include <asm/numa.h>
|
|
#include <asm/coco.h>
|
|
|
|
/* Is Linux running as the root partition? */
|
|
bool hv_root_partition;
|
|
struct ms_hyperv_info ms_hyperv;
|
|
|
|
#if IS_ENABLED(CONFIG_HYPERV)
|
|
static void (*vmbus_handler)(void);
|
|
static void (*hv_stimer0_handler)(void);
|
|
static void (*hv_kexec_handler)(void);
|
|
static void (*hv_crash_handler)(struct pt_regs *regs);
|
|
|
|
DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
|
|
{
|
|
struct pt_regs *old_regs = set_irq_regs(regs);
|
|
|
|
inc_irq_stat(irq_hv_callback_count);
|
|
if (vmbus_handler)
|
|
vmbus_handler();
|
|
|
|
if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
|
|
ack_APIC_irq();
|
|
|
|
set_irq_regs(old_regs);
|
|
}
|
|
|
|
void hv_setup_vmbus_handler(void (*handler)(void))
|
|
{
|
|
vmbus_handler = handler;
|
|
}
|
|
|
|
void hv_remove_vmbus_handler(void)
|
|
{
|
|
/* We have no way to deallocate the interrupt gate */
|
|
vmbus_handler = NULL;
|
|
}
|
|
|
|
/*
|
|
* Routines to do per-architecture handling of stimer0
|
|
* interrupts when in Direct Mode
|
|
*/
|
|
DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0)
|
|
{
|
|
struct pt_regs *old_regs = set_irq_regs(regs);
|
|
|
|
inc_irq_stat(hyperv_stimer0_count);
|
|
if (hv_stimer0_handler)
|
|
hv_stimer0_handler();
|
|
add_interrupt_randomness(HYPERV_STIMER0_VECTOR);
|
|
ack_APIC_irq();
|
|
|
|
set_irq_regs(old_regs);
|
|
}
|
|
|
|
/* For x86/x64, override weak placeholders in hyperv_timer.c */
|
|
void hv_setup_stimer0_handler(void (*handler)(void))
|
|
{
|
|
hv_stimer0_handler = handler;
|
|
}
|
|
|
|
void hv_remove_stimer0_handler(void)
|
|
{
|
|
/* We have no way to deallocate the interrupt gate */
|
|
hv_stimer0_handler = NULL;
|
|
}
|
|
|
|
void hv_setup_kexec_handler(void (*handler)(void))
|
|
{
|
|
hv_kexec_handler = handler;
|
|
}
|
|
|
|
void hv_remove_kexec_handler(void)
|
|
{
|
|
hv_kexec_handler = NULL;
|
|
}
|
|
|
|
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
|
|
{
|
|
hv_crash_handler = handler;
|
|
}
|
|
|
|
void hv_remove_crash_handler(void)
|
|
{
|
|
hv_crash_handler = NULL;
|
|
}
|
|
|
|
#ifdef CONFIG_KEXEC_CORE
|
|
static void hv_machine_shutdown(void)
|
|
{
|
|
if (kexec_in_progress && hv_kexec_handler)
|
|
hv_kexec_handler();
|
|
|
|
/*
|
|
* Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor
|
|
* corrupts the old VP Assist Pages and can crash the kexec kernel.
|
|
*/
|
|
if (kexec_in_progress && hyperv_init_cpuhp > 0)
|
|
cpuhp_remove_state(hyperv_init_cpuhp);
|
|
|
|
/* The function calls stop_other_cpus(). */
|
|
native_machine_shutdown();
|
|
|
|
/* Disable the hypercall page when there is only 1 active CPU. */
|
|
if (kexec_in_progress)
|
|
hyperv_cleanup();
|
|
}
|
|
|
|
static void hv_machine_crash_shutdown(struct pt_regs *regs)
|
|
{
|
|
if (hv_crash_handler)
|
|
hv_crash_handler(regs);
|
|
|
|
/* The function calls crash_smp_send_stop(). */
|
|
native_machine_crash_shutdown(regs);
|
|
|
|
/* Disable the hypercall page when there is only 1 active CPU. */
|
|
hyperv_cleanup();
|
|
}
|
|
#endif /* CONFIG_KEXEC_CORE */
|
|
|
|
static u64 hv_ref_counter_at_suspend;
|
|
static void (*old_save_sched_clock_state)(void);
|
|
static void (*old_restore_sched_clock_state)(void);
|
|
|
|
/*
|
|
* Hyper-V clock counter resets during hibernation. Save and restore clock
|
|
* offset during suspend/resume, while also considering the time passed
|
|
* before suspend. This is to make sure that sched_clock using hv tsc page
|
|
* based clocksource, proceeds from where it left off during suspend and
|
|
* it shows correct time for the timestamps of kernel messages after resume.
|
|
*/
|
|
static void save_hv_clock_tsc_state(void)
|
|
{
|
|
hv_ref_counter_at_suspend = hv_read_reference_counter();
|
|
}
|
|
|
|
static void restore_hv_clock_tsc_state(void)
|
|
{
|
|
/*
|
|
* Adjust the offsets used by hv tsc clocksource to
|
|
* account for the time spent before hibernation.
|
|
* adjusted value = reference counter (time) at suspend
|
|
* - reference counter (time) now.
|
|
*/
|
|
hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter());
|
|
}
|
|
|
|
/*
|
|
* Functions to override save_sched_clock_state and restore_sched_clock_state
|
|
* functions of x86_platform. The Hyper-V clock counter is reset during
|
|
* suspend-resume and the offset used to measure time needs to be
|
|
* corrected, post resume.
|
|
*/
|
|
static void hv_save_sched_clock_state(void)
|
|
{
|
|
old_save_sched_clock_state();
|
|
save_hv_clock_tsc_state();
|
|
}
|
|
|
|
static void hv_restore_sched_clock_state(void)
|
|
{
|
|
restore_hv_clock_tsc_state();
|
|
old_restore_sched_clock_state();
|
|
}
|
|
|
|
static void __init x86_setup_ops_for_tsc_pg_clock(void)
|
|
{
|
|
if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
|
|
return;
|
|
|
|
old_save_sched_clock_state = x86_platform.save_sched_clock_state;
|
|
x86_platform.save_sched_clock_state = hv_save_sched_clock_state;
|
|
|
|
old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
|
|
x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
|
|
}
|
|
#endif /* CONFIG_HYPERV */
|
|
|
|
static uint32_t __init ms_hyperv_platform(void)
|
|
{
|
|
u32 eax;
|
|
u32 hyp_signature[3];
|
|
|
|
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
|
return 0;
|
|
|
|
cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
|
|
&eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
|
|
|
|
if (eax < HYPERV_CPUID_MIN || eax > HYPERV_CPUID_MAX ||
|
|
memcmp("Microsoft Hv", hyp_signature, 12))
|
|
return 0;
|
|
|
|
/* HYPERCALL and VP_INDEX MSRs are mandatory for all features. */
|
|
eax = cpuid_eax(HYPERV_CPUID_FEATURES);
|
|
if (!(eax & HV_MSR_HYPERCALL_AVAILABLE)) {
|
|
pr_warn("x86/hyperv: HYPERCALL MSR not available.\n");
|
|
return 0;
|
|
}
|
|
if (!(eax & HV_MSR_VP_INDEX_AVAILABLE)) {
|
|
pr_warn("x86/hyperv: VP_INDEX MSR not available.\n");
|
|
return 0;
|
|
}
|
|
|
|
return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
|
|
}
|
|
|
|
static unsigned char hv_get_nmi_reason(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
/*
|
|
* Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes
|
|
* it difficult to process CHANNELMSG_UNLOAD in case of crash. Handle
|
|
* unknown NMI on the first CPU which gets it.
|
|
*/
|
|
static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
|
|
{
|
|
static atomic_t nmi_cpu = ATOMIC_INIT(-1);
|
|
|
|
if (!unknown_nmi_panic)
|
|
return NMI_DONE;
|
|
|
|
if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1)
|
|
return NMI_HANDLED;
|
|
|
|
return NMI_DONE;
|
|
}
|
|
#endif
|
|
|
|
static unsigned long hv_get_tsc_khz(void)
|
|
{
|
|
unsigned long freq;
|
|
|
|
rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
|
|
|
|
return freq / 1000;
|
|
}
|
|
|
|
#if defined(CONFIG_SMP) && IS_ENABLED(CONFIG_HYPERV)
|
|
static void __init hv_smp_prepare_boot_cpu(void)
|
|
{
|
|
native_smp_prepare_boot_cpu();
|
|
#if defined(CONFIG_X86_64) && defined(CONFIG_PARAVIRT_SPINLOCKS)
|
|
hv_init_spinlocks();
|
|
#endif
|
|
}
|
|
|
|
static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
|
|
{
|
|
#ifdef CONFIG_X86_64
|
|
int i;
|
|
int ret;
|
|
#endif
|
|
|
|
native_smp_prepare_cpus(max_cpus);
|
|
|
|
#ifdef CONFIG_X86_64
|
|
for_each_present_cpu(i) {
|
|
if (i == 0)
|
|
continue;
|
|
ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i));
|
|
BUG_ON(ret);
|
|
}
|
|
|
|
for_each_present_cpu(i) {
|
|
if (i == 0)
|
|
continue;
|
|
ret = hv_call_create_vp(numa_cpu_node(i), hv_current_partition_id, i, i);
|
|
BUG_ON(ret);
|
|
}
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
static void __init ms_hyperv_init_platform(void)
|
|
{
|
|
int hv_max_functions_eax;
|
|
int hv_host_info_eax;
|
|
int hv_host_info_ebx;
|
|
int hv_host_info_ecx;
|
|
int hv_host_info_edx;
|
|
|
|
#ifdef CONFIG_PARAVIRT
|
|
pv_info.name = "Hyper-V";
|
|
#endif
|
|
|
|
/*
|
|
* Extract the features and hints
|
|
*/
|
|
ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
|
|
ms_hyperv.priv_high = cpuid_ebx(HYPERV_CPUID_FEATURES);
|
|
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
|
|
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
|
|
|
|
hv_max_functions_eax = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS);
|
|
|
|
pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n",
|
|
ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints,
|
|
ms_hyperv.misc_features);
|
|
|
|
ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
|
|
ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS);
|
|
|
|
pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
|
|
ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
|
|
|
|
/*
|
|
* Check CPU management privilege.
|
|
*
|
|
* To mirror what Windows does we should extract CPU management
|
|
* features and use the ReservedIdentityBit to detect if Linux is the
|
|
* root partition. But that requires negotiating CPU management
|
|
* interface (a process to be finalized). For now, use the privilege
|
|
* flag as the indicator for running as root.
|
|
*
|
|
* Hyper-V should never specify running as root and as a Confidential
|
|
* VM. But to protect against a compromised/malicious Hyper-V trying
|
|
* to exploit root behavior to expose Confidential VM memory, ignore
|
|
* the root partition setting if also a Confidential VM.
|
|
*/
|
|
if ((ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
|
|
!(ms_hyperv.priv_high & HV_ISOLATION)) {
|
|
hv_root_partition = true;
|
|
pr_info("Hyper-V: running as root partition\n");
|
|
}
|
|
|
|
/*
|
|
* Extract host information.
|
|
*/
|
|
if (hv_max_functions_eax >= HYPERV_CPUID_VERSION) {
|
|
hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION);
|
|
hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION);
|
|
hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION);
|
|
hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION);
|
|
|
|
pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n",
|
|
hv_host_info_ebx >> 16, hv_host_info_ebx & 0xFFFF,
|
|
hv_host_info_eax, hv_host_info_edx & 0xFFFFFF,
|
|
hv_host_info_ecx, hv_host_info_edx >> 24);
|
|
}
|
|
|
|
if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
|
|
ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
|
|
x86_platform.calibrate_tsc = hv_get_tsc_khz;
|
|
x86_platform.calibrate_cpu = hv_get_tsc_khz;
|
|
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
|
|
}
|
|
|
|
if (ms_hyperv.priv_high & HV_ISOLATION) {
|
|
ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG);
|
|
ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG);
|
|
ms_hyperv.shared_gpa_boundary =
|
|
BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
|
|
|
|
pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
|
|
ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
|
|
|
|
if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
|
|
static_branch_enable(&isolation_type_snp);
|
|
#ifdef CONFIG_SWIOTLB
|
|
swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary;
|
|
#endif
|
|
}
|
|
/* Isolation VMs are unenlightened SEV-based VMs, thus this check: */
|
|
if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
|
|
if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE)
|
|
cc_vendor = CC_VENDOR_HYPERV;
|
|
}
|
|
}
|
|
|
|
if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
|
|
ms_hyperv.nested_features =
|
|
cpuid_eax(HYPERV_CPUID_NESTED_FEATURES);
|
|
pr_info("Hyper-V: Nested features: 0x%x\n",
|
|
ms_hyperv.nested_features);
|
|
}
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
|
|
ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
|
|
/*
|
|
* Get the APIC frequency.
|
|
*/
|
|
u64 hv_lapic_frequency;
|
|
|
|
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
|
|
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
|
|
lapic_timer_period = hv_lapic_frequency;
|
|
pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
|
|
lapic_timer_period);
|
|
}
|
|
|
|
register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
|
|
"hv_nmi_unknown");
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
no_timer_check = 1;
|
|
#endif
|
|
|
|
#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE)
|
|
machine_ops.shutdown = hv_machine_shutdown;
|
|
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
|
|
#endif
|
|
if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) {
|
|
/*
|
|
* Writing to synthetic MSR 0x40000118 updates/changes the
|
|
* guest visible CPUIDs. Setting bit 0 of this MSR enables
|
|
* guests to report invariant TSC feature through CPUID
|
|
* instruction, CPUID 0x800000007/EDX, bit 8. See code in
|
|
* early_init_intel() where this bit is examined. The
|
|
* setting of this MSR bit should happen before init_intel()
|
|
* is called.
|
|
*/
|
|
wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
|
|
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
|
|
}
|
|
|
|
/*
|
|
* Generation 2 instances don't support reading the NMI status from
|
|
* 0x61 port.
|
|
*/
|
|
if (efi_enabled(EFI_BOOT))
|
|
x86_platform.get_nmi_reason = hv_get_nmi_reason;
|
|
|
|
#if IS_ENABLED(CONFIG_HYPERV)
|
|
/*
|
|
* Setup the hook to get control post apic initialization.
|
|
*/
|
|
x86_platform.apic_post_init = hyperv_init;
|
|
hyperv_setup_mmu_ops();
|
|
/* Setup the IDT for hypervisor callback */
|
|
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_hyperv_callback);
|
|
|
|
/* Setup the IDT for reenlightenment notifications */
|
|
if (ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT) {
|
|
alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
|
|
asm_sysvec_hyperv_reenlightenment);
|
|
}
|
|
|
|
/* Setup the IDT for stimer0 */
|
|
if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) {
|
|
alloc_intr_gate(HYPERV_STIMER0_VECTOR,
|
|
asm_sysvec_hyperv_stimer0);
|
|
}
|
|
|
|
# ifdef CONFIG_SMP
|
|
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
|
|
if (hv_root_partition)
|
|
smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
|
|
# endif
|
|
|
|
/*
|
|
* Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic,
|
|
* set x2apic destination mode to physical mode when x2apic is available
|
|
* and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs
|
|
* have 8-bit APIC id.
|
|
*/
|
|
# ifdef CONFIG_X86_X2APIC
|
|
if (x2apic_supported())
|
|
x2apic_phys = 1;
|
|
# endif
|
|
|
|
/* Register Hyper-V specific clocksource */
|
|
hv_init_clocksource();
|
|
x86_setup_ops_for_tsc_pg_clock();
|
|
#endif
|
|
/*
|
|
* TSC should be marked as unstable only after Hyper-V
|
|
* clocksource has been initialized. This ensures that the
|
|
* stability of the sched_clock is not altered.
|
|
*/
|
|
if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
|
|
mark_tsc_unstable("running on Hyper-V");
|
|
|
|
hardlockup_detector_disable();
|
|
}
|
|
|
|
static bool __init ms_hyperv_x2apic_available(void)
|
|
{
|
|
return x2apic_supported();
|
|
}
|
|
|
|
/*
|
|
* If ms_hyperv_msi_ext_dest_id() returns true, hyperv_prepare_irq_remapping()
|
|
* returns -ENODEV and the Hyper-V IOMMU driver is not used; instead, the
|
|
* generic support of the 15-bit APIC ID is used: see __irq_msi_compose_msg().
|
|
*
|
|
* Note: for a VM on Hyper-V, the I/O-APIC is the only device which
|
|
* (logically) generates MSIs directly to the system APIC irq domain.
|
|
* There is no HPET, and PCI MSI/MSI-X interrupts are remapped by the
|
|
* pci-hyperv host bridge.
|
|
*/
|
|
static bool __init ms_hyperv_msi_ext_dest_id(void)
|
|
{
|
|
u32 eax;
|
|
|
|
eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_INTERFACE);
|
|
if (eax != HYPERV_VS_INTERFACE_EAX_SIGNATURE)
|
|
return false;
|
|
|
|
eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_PROPERTIES);
|
|
return eax & HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE;
|
|
}
|
|
|
|
const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
|
|
.name = "Microsoft Hyper-V",
|
|
.detect = ms_hyperv_platform,
|
|
.type = X86_HYPER_MS_HYPERV,
|
|
.init.x2apic_available = ms_hyperv_x2apic_available,
|
|
.init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id,
|
|
.init.init_platform = ms_hyperv_init_platform,
|
|
};
|