Mailing List Archive

[PATCH V5 2/4] KVM: x86: Make nsec per APIC bus cycle a VM variable
From: Isaku Yamahata <isaku.yamahata@intel.com>

Introduce the VM variable "nanoseconds per APIC bus cycle" in
preparation to make the APIC bus frequency configurable.

The TDX architecture hard-codes the core crystal clock frequency to
25MHz and mandates exposing it via CPUID leaf 0x15. The TDX architecture
does not allow the VMM to override the value.

In addition, per Intel SDM:
"The APIC timer frequency will be the processor’s bus clock or core
crystal clock frequency (when TSC/core crystal clock ratio is
enumerated in CPUID leaf 0x15) divided by the value specified in
the divide configuration register."

The resulting 25MHz APIC bus frequency conflicts with the KVM hardcoded
APIC bus frequency of 1GHz.

Introduce the VM variable "nanoseconds per APIC bus cycle" to prepare
for allowing userspace to tell KVM to use the frequency that TDX mandates
instead of the default 1Ghz. Doing so ensures that the guest doesn't have
a conflicting view of the APIC bus frequency.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
[reinette: rework changelog]
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
---
Changes v5:
- Add Rick's Reviewed-by tag.

Changes v4:
- Reword changelog to address comments related to "bus clock" vs
"core crystal clock" frequency. (Xiaoyao)
- Typo in changelog ("APIC APIC" -> "APIC").
- Change logic "APIC bus cycles per nsec" -> "nanoseconds per
APIC bus cycle".

Changes V3:
- Update commit message.
- Dropped apic_bus_frequency according to Maxim Levitsky.

Changes v2:
- No change.

arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/hyperv.c | 3 ++-
arch/x86/kvm/lapic.c | 6 ++++--
arch/x86/kvm/lapic.h | 2 +-
arch/x86/kvm/x86.c | 1 +
5 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1d13e3cd1dc5..f2735582c7e0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1358,6 +1358,7 @@ struct kvm_arch {

u32 default_tsc_khz;
bool user_set_tsc;
+ u64 apic_bus_cycle_ns;

seqcount_raw_spinlock_t pvclock_sc;
bool use_master_clock;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 1030701db967..5c31e715d2ad 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1737,7 +1737,8 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
break;
case HV_X64_MSR_APIC_FREQUENCY:
- data = div64_u64(1000000000ULL, APIC_BUS_CYCLE_NS);
+ data = div64_u64(1000000000ULL,
+ vcpu->kvm->arch.apic_bus_cycle_ns);
break;
default:
kvm_pr_unimpl_rdmsr(vcpu, msr);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cf37586f0466..3e66a0a95999 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1547,7 +1547,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
remaining = 0;

ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
- return div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->divide_count));
+ return div64_u64(ns, (apic->vcpu->kvm->arch.apic_bus_cycle_ns *
+ apic->divide_count));
}

static void __report_tpr_access(struct kvm_lapic *apic, bool write)
@@ -1965,7 +1966,8 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)

static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
{
- return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
+ return (u64)tmict * apic->vcpu->kvm->arch.apic_bus_cycle_ns *
+ (u64)apic->divide_count;
}

static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index a20cb006b6c8..51e09f5a7fc5 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -16,7 +16,7 @@
#define APIC_DEST_NOSHORT 0x0
#define APIC_DEST_MASK 0x800

-#define APIC_BUS_CYCLE_NS 1
+#define APIC_BUS_CYCLE_NS_DEFAULT 1

#define APIC_BROADCAST 0xFF
#define X2APIC_BROADCAST 0xFFFFFFFFul
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e9ef1fa4b90b..10e6315103f4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12629,6 +12629,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);

kvm->arch.default_tsc_khz = max_tsc_khz ? : tsc_khz;
+ kvm->arch.apic_bus_cycle_ns = APIC_BUS_CYCLE_NS_DEFAULT;
kvm->arch.guest_can_read_msr_platform_info = true;
kvm->arch.enable_pmu = enable_pmu;

--
2.34.1
Re: [PATCH V5 2/4] KVM: x86: Make nsec per APIC bus cycle a VM variable [ In reply to ]
On 4/26/2024 6:07 AM, Reinette Chatre wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> Introduce the VM variable "nanoseconds per APIC bus cycle" in
> preparation to make the APIC bus frequency configurable.
>
> The TDX architecture hard-codes the core crystal clock frequency to
> 25MHz and mandates exposing it via CPUID leaf 0x15. The TDX architecture
> does not allow the VMM to override the value.
>
> In addition, per Intel SDM:
> "The APIC timer frequency will be the processor’s bus clock or core
> crystal clock frequency (when TSC/core crystal clock ratio is
> enumerated in CPUID leaf 0x15) divided by the value specified in
> the divide configuration register."
>
> The resulting 25MHz APIC bus frequency conflicts with the KVM hardcoded
> APIC bus frequency of 1GHz.
>
> Introduce the VM variable "nanoseconds per APIC bus cycle" to prepare
> for allowing userspace to tell KVM to use the frequency that TDX mandates
> instead of the default 1Ghz. Doing so ensures that the guest doesn't have
> a conflicting view of the APIC bus frequency.

Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>

> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> Reviewed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> [reinette: rework changelog]
> Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
> ---
> Changes v5:
> - Add Rick's Reviewed-by tag.
>
> Changes v4:
> - Reword changelog to address comments related to "bus clock" vs
> "core crystal clock" frequency. (Xiaoyao)
> - Typo in changelog ("APIC APIC" -> "APIC").
> - Change logic "APIC bus cycles per nsec" -> "nanoseconds per
> APIC bus cycle".
>
> Changes V3:
> - Update commit message.
> - Dropped apic_bus_frequency according to Maxim Levitsky.
>
> Changes v2:
> - No change.
>
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/hyperv.c | 3 ++-
> arch/x86/kvm/lapic.c | 6 ++++--
> arch/x86/kvm/lapic.h | 2 +-
> arch/x86/kvm/x86.c | 1 +
> 5 files changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 1d13e3cd1dc5..f2735582c7e0 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1358,6 +1358,7 @@ struct kvm_arch {
>
> u32 default_tsc_khz;
> bool user_set_tsc;
> + u64 apic_bus_cycle_ns;
>
> seqcount_raw_spinlock_t pvclock_sc;
> bool use_master_clock;
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 1030701db967..5c31e715d2ad 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -1737,7 +1737,8 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
> data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
> break;
> case HV_X64_MSR_APIC_FREQUENCY:
> - data = div64_u64(1000000000ULL, APIC_BUS_CYCLE_NS);
> + data = div64_u64(1000000000ULL,
> + vcpu->kvm->arch.apic_bus_cycle_ns);
> break;
> default:
> kvm_pr_unimpl_rdmsr(vcpu, msr);
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index cf37586f0466..3e66a0a95999 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1547,7 +1547,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
> remaining = 0;
>
> ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
> - return div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->divide_count));
> + return div64_u64(ns, (apic->vcpu->kvm->arch.apic_bus_cycle_ns *
> + apic->divide_count));
> }
>
> static void __report_tpr_access(struct kvm_lapic *apic, bool write)
> @@ -1965,7 +1966,8 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
>
> static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
> {
> - return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
> + return (u64)tmict * apic->vcpu->kvm->arch.apic_bus_cycle_ns *
> + (u64)apic->divide_count;
> }
>
> static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index a20cb006b6c8..51e09f5a7fc5 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -16,7 +16,7 @@
> #define APIC_DEST_NOSHORT 0x0
> #define APIC_DEST_MASK 0x800
>
> -#define APIC_BUS_CYCLE_NS 1
> +#define APIC_BUS_CYCLE_NS_DEFAULT 1
>
> #define APIC_BROADCAST 0xFF
> #define X2APIC_BROADCAST 0xFFFFFFFFul
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e9ef1fa4b90b..10e6315103f4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -12629,6 +12629,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
>
> kvm->arch.default_tsc_khz = max_tsc_khz ? : tsc_khz;
> + kvm->arch.apic_bus_cycle_ns = APIC_BUS_CYCLE_NS_DEFAULT;
> kvm->arch.guest_can_read_msr_platform_info = true;
> kvm->arch.enable_pmu = enable_pmu;
>